From 84adefa331c4159d432d22840663c38f155cd4c1 Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Fri, 20 Nov 2009 14:54:40 +0000 Subject: The R13B03 release. --- lib/stdlib/src/Makefile | 227 ++ lib/stdlib/src/array.erl | 1926 +++++++++++++++++ lib/stdlib/src/base64.erl | 304 +++ lib/stdlib/src/beam_lib.erl | 1027 +++++++++ lib/stdlib/src/c.erl | 700 +++++++ lib/stdlib/src/calendar.erl | 459 +++++ lib/stdlib/src/dets.erl | 2989 +++++++++++++++++++++++++++ lib/stdlib/src/dets.hrl | 126 ++ lib/stdlib/src/dets_server.erl | 402 ++++ lib/stdlib/src/dets_sup.erl | 31 + lib/stdlib/src/dets_utils.erl | 1801 ++++++++++++++++ lib/stdlib/src/dets_v8.erl | 1591 ++++++++++++++ lib/stdlib/src/dets_v9.erl | 2761 +++++++++++++++++++++++++ lib/stdlib/src/dict.erl | 547 +++++ lib/stdlib/src/digraph.erl | 570 +++++ lib/stdlib/src/digraph_utils.erl | 338 +++ lib/stdlib/src/edlin.erl | 575 ++++++ lib/stdlib/src/edlin_expand.erl | 168 ++ lib/stdlib/src/epp.erl | 1146 +++++++++++ lib/stdlib/src/erl_bits.erl | 186 ++ lib/stdlib/src/erl_compile.erl | 233 +++ lib/stdlib/src/erl_eval.erl | 1108 ++++++++++ lib/stdlib/src/erl_expand_records.erl | 808 ++++++++ lib/stdlib/src/erl_internal.erl | 351 ++++ lib/stdlib/src/erl_lint.erl | 3489 +++++++++++++++++++++++++++++++ lib/stdlib/src/erl_parse.yrl | 1028 ++++++++++ lib/stdlib/src/erl_posix_msg.erl | 166 ++ lib/stdlib/src/erl_pp.erl | 992 +++++++++ lib/stdlib/src/erl_scan.erl | 1307 ++++++++++++ lib/stdlib/src/erl_tar.erl | 959 +++++++++ lib/stdlib/src/error_logger_file_h.erl | 265 +++ lib/stdlib/src/error_logger_tty_h.erl | 261 +++ lib/stdlib/src/escript.erl | 694 +++++++ lib/stdlib/src/ets.erl | 1269 ++++++++++++ lib/stdlib/src/eval_bits.erl | 348 ++++ lib/stdlib/src/file_sorter.erl | 1500 ++++++++++++++ lib/stdlib/src/filelib.erl | 443 ++++ lib/stdlib/src/filename.erl | 787 +++++++ lib/stdlib/src/gb_sets.erl | 812 ++++++++ lib/stdlib/src/gb_trees.erl | 515 +++++ lib/stdlib/src/gen.erl | 320 +++ lib/stdlib/src/gen_event.erl | 721 +++++++ lib/stdlib/src/gen_fsm.erl | 623 ++++++ lib/stdlib/src/gen_server.erl | 853 ++++++++ lib/stdlib/src/io.erl | 578 ++++++ lib/stdlib/src/io_lib.erl | 688 +++++++ lib/stdlib/src/io_lib_format.erl | 678 ++++++ lib/stdlib/src/io_lib_fread.erl | 466 +++++ lib/stdlib/src/io_lib_pretty.erl | 646 ++++++ lib/stdlib/src/lib.erl | 452 ++++ lib/stdlib/src/lists.erl | 2462 ++++++++++++++++++++++ lib/stdlib/src/log_mf_h.erl | 202 ++ lib/stdlib/src/math.erl | 25 + lib/stdlib/src/ms_transform.erl | 992 +++++++++ lib/stdlib/src/orddict.erl | 173 ++ lib/stdlib/src/ordsets.erl | 220 ++ lib/stdlib/src/otp_internal.erl | 384 ++++ lib/stdlib/src/pg.erl | 172 ++ lib/stdlib/src/pool.erl | 212 ++ lib/stdlib/src/proc_lib.erl | 624 ++++++ lib/stdlib/src/proplists.erl | 686 +++++++ lib/stdlib/src/qlc.erl | 3540 ++++++++++++++++++++++++++++++++ lib/stdlib/src/qlc_pt.erl | 2746 +++++++++++++++++++++++++ lib/stdlib/src/queue.erl | 487 +++++ lib/stdlib/src/random.erl | 124 ++ lib/stdlib/src/re.erl | 751 +++++++ lib/stdlib/src/regexp.erl | 490 +++++ lib/stdlib/src/sets.erl | 417 ++++ lib/stdlib/src/shell.erl | 1440 +++++++++++++ lib/stdlib/src/shell_default.erl | 131 ++ lib/stdlib/src/slave.erl | 332 +++ lib/stdlib/src/sofs.erl | 2502 ++++++++++++++++++++++ lib/stdlib/src/stdlib.app.src | 105 + lib/stdlib/src/stdlib.appup.src | 1 + lib/stdlib/src/string.erl | 394 ++++ lib/stdlib/src/supervisor.erl | 889 ++++++++ lib/stdlib/src/supervisor_bridge.erl | 116 ++ lib/stdlib/src/sys.erl | 391 ++++ lib/stdlib/src/timer.erl | 364 ++++ lib/stdlib/src/unicode.erl | 677 ++++++ lib/stdlib/src/win32reg.erl | 386 ++++ lib/stdlib/src/zip.erl | 1600 +++++++++++++++ 82 files changed, 65269 insertions(+) create mode 100644 lib/stdlib/src/Makefile create mode 100644 lib/stdlib/src/array.erl create mode 100644 lib/stdlib/src/base64.erl create mode 100644 lib/stdlib/src/beam_lib.erl create mode 100644 lib/stdlib/src/c.erl create mode 100644 lib/stdlib/src/calendar.erl create mode 100644 lib/stdlib/src/dets.erl create mode 100644 lib/stdlib/src/dets.hrl create mode 100644 lib/stdlib/src/dets_server.erl create mode 100644 lib/stdlib/src/dets_sup.erl create mode 100644 lib/stdlib/src/dets_utils.erl create mode 100644 lib/stdlib/src/dets_v8.erl create mode 100644 lib/stdlib/src/dets_v9.erl create mode 100644 lib/stdlib/src/dict.erl create mode 100644 lib/stdlib/src/digraph.erl create mode 100644 lib/stdlib/src/digraph_utils.erl create mode 100644 lib/stdlib/src/edlin.erl create mode 100644 lib/stdlib/src/edlin_expand.erl create mode 100644 lib/stdlib/src/epp.erl create mode 100644 lib/stdlib/src/erl_bits.erl create mode 100644 lib/stdlib/src/erl_compile.erl create mode 100644 lib/stdlib/src/erl_eval.erl create mode 100644 lib/stdlib/src/erl_expand_records.erl create mode 100644 lib/stdlib/src/erl_internal.erl create mode 100644 lib/stdlib/src/erl_lint.erl create mode 100644 lib/stdlib/src/erl_parse.yrl create mode 100644 lib/stdlib/src/erl_posix_msg.erl create mode 100644 lib/stdlib/src/erl_pp.erl create mode 100644 lib/stdlib/src/erl_scan.erl create mode 100644 lib/stdlib/src/erl_tar.erl create mode 100644 lib/stdlib/src/error_logger_file_h.erl create mode 100644 lib/stdlib/src/error_logger_tty_h.erl create mode 100644 lib/stdlib/src/escript.erl create mode 100644 lib/stdlib/src/ets.erl create mode 100644 lib/stdlib/src/eval_bits.erl create mode 100644 lib/stdlib/src/file_sorter.erl create mode 100644 lib/stdlib/src/filelib.erl create mode 100644 lib/stdlib/src/filename.erl create mode 100644 lib/stdlib/src/gb_sets.erl create mode 100644 lib/stdlib/src/gb_trees.erl create mode 100644 lib/stdlib/src/gen.erl create mode 100644 lib/stdlib/src/gen_event.erl create mode 100644 lib/stdlib/src/gen_fsm.erl create mode 100644 lib/stdlib/src/gen_server.erl create mode 100644 lib/stdlib/src/io.erl create mode 100644 lib/stdlib/src/io_lib.erl create mode 100644 lib/stdlib/src/io_lib_format.erl create mode 100644 lib/stdlib/src/io_lib_fread.erl create mode 100644 lib/stdlib/src/io_lib_pretty.erl create mode 100644 lib/stdlib/src/lib.erl create mode 100644 lib/stdlib/src/lists.erl create mode 100644 lib/stdlib/src/log_mf_h.erl create mode 100644 lib/stdlib/src/math.erl create mode 100644 lib/stdlib/src/ms_transform.erl create mode 100644 lib/stdlib/src/orddict.erl create mode 100644 lib/stdlib/src/ordsets.erl create mode 100644 lib/stdlib/src/otp_internal.erl create mode 100644 lib/stdlib/src/pg.erl create mode 100644 lib/stdlib/src/pool.erl create mode 100644 lib/stdlib/src/proc_lib.erl create mode 100644 lib/stdlib/src/proplists.erl create mode 100644 lib/stdlib/src/qlc.erl create mode 100644 lib/stdlib/src/qlc_pt.erl create mode 100644 lib/stdlib/src/queue.erl create mode 100644 lib/stdlib/src/random.erl create mode 100644 lib/stdlib/src/re.erl create mode 100644 lib/stdlib/src/regexp.erl create mode 100644 lib/stdlib/src/sets.erl create mode 100644 lib/stdlib/src/shell.erl create mode 100644 lib/stdlib/src/shell_default.erl create mode 100644 lib/stdlib/src/slave.erl create mode 100644 lib/stdlib/src/sofs.erl create mode 100644 lib/stdlib/src/stdlib.app.src create mode 100644 lib/stdlib/src/stdlib.appup.src create mode 100644 lib/stdlib/src/string.erl create mode 100644 lib/stdlib/src/supervisor.erl create mode 100644 lib/stdlib/src/supervisor_bridge.erl create mode 100644 lib/stdlib/src/sys.erl create mode 100644 lib/stdlib/src/timer.erl create mode 100644 lib/stdlib/src/unicode.erl create mode 100644 lib/stdlib/src/win32reg.erl create mode 100644 lib/stdlib/src/zip.erl (limited to 'lib/stdlib/src') diff --git a/lib/stdlib/src/Makefile b/lib/stdlib/src/Makefile new file mode 100644 index 0000000000..68708d6b02 --- /dev/null +++ b/lib/stdlib/src/Makefile @@ -0,0 +1,227 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 1996-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifdef BOOTSTRAP +EGEN=$(BOOTSTRAP_TOP)/lib/stdlib/egen +EBIN=$(BOOTSTRAP_TOP)/lib/stdlib/ebin +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(STDLIB_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/stdlib-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +MODULES= \ + array \ + base64 \ + beam_lib \ + c \ + calendar \ + dets \ + dets_server \ + dets_sup \ + dets_utils \ + dets_v8 \ + dets_v9 \ + dict \ + digraph \ + digraph_utils \ + edlin \ + edlin_expand \ + epp \ + erl_bits \ + erl_compile \ + erl_eval \ + erl_expand_records \ + erl_internal \ + erl_lint \ + erl_parse \ + erl_posix_msg \ + erl_pp \ + erl_scan \ + erl_tar \ + error_logger_file_h \ + error_logger_tty_h \ + escript \ + ets \ + eval_bits \ + file_sorter \ + filelib \ + filename \ + gb_trees \ + gb_sets \ + gen \ + gen_event \ + gen_fsm \ + gen_server \ + io \ + io_lib \ + io_lib_format \ + io_lib_fread \ + io_lib_pretty \ + lib \ + lists \ + log_mf_h \ + math \ + ms_transform \ + otp_internal \ + orddict \ + ordsets \ + pg \ + re \ + pool \ + proc_lib \ + proplists \ + qlc \ + qlc_pt \ + queue \ + random \ + regexp \ + sets \ + shell \ + shell_default \ + slave \ + sofs \ + string \ + supervisor \ + supervisor_bridge \ + sys \ + timer \ + unicode \ + win32reg \ + zip + +HRL_FILES= \ + ../include/erl_compile.hrl \ + ../include/erl_bits.hrl \ + ../include/ms_transform.hrl \ + ../include/qlc.hrl \ + ../include/zip.hrl + +INTERNAL_HRL_FILES= dets.hrl + +ERL_FILES= $(MODULES:%=%.erl) + +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) $(APP_TARGET) $(APPUP_TARGET) + +APP_FILE= stdlib.app + +APP_SRC= $(APP_FILE).src +APP_TARGET= $(EBIN)/$(APP_FILE) + +APPUP_FILE= stdlib.appup + +APPUP_SRC= $(APPUP_FILE).src +APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +ERL_COMPILE_FLAGS += -I../include -I../../kernel/include + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + rm -f erl_parse.erl + +docs: + +# This is a trick so that the preloaded files will get the correct type +# specifications. +primary_bootstrap_compiler: \ + $(BOOTSTRAP_COMPILER)/ebin/erl_scan.beam \ + $(BOOTSTRAP_COMPILER)/ebin/erl_parse.beam \ + $(BOOTSTRAP_COMPILER)/ebin/erl_lint.beam \ + $(BOOTSTRAP_COMPILER)/ebin/otp_internal.beam + +$(BOOTSTRAP_COMPILER)/ebin/erl_parse.beam: erl_parse.yrl + $(ERLC) -o $(BOOTSTRAP_COMPILER)/egen erl_parse.yrl + $(ERLC) -o $(BOOTSTRAP_COMPILER)/ebin $(BOOTSTRAP_COMPILER)/egen/erl_parse.erl + +#$(BOOTSTRAP_COMPILER)/ebin/erl_lint.beam: erl_lint.erl +# $(ERLC) -o $(BOOTSTRAP_COMPILER)/ebin erl_lint.erl + +$(BOOTSTRAP_COMPILER)/ebin/%.beam: %.erl + $(ERLC) -o $(BOOTSTRAP_COMPILER)/ebin $< + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + +$(APP_TARGET): $(APP_SRC) ../vsn.mk + sed -e 's;%VSN%;$(VSN);' $< > $@ + +$(APPUP_TARGET): $(APPUP_SRC) ../vsn.mk + sed -e 's;%VSN%;$(VSN);' $< > $@ + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/src + $(INSTALL_DATA) $(ERL_FILES) erl_parse.yrl $(RELSYSDIR)/src + $(INSTALL_DATA) $(INTERNAL_HRL_FILES) $(RELSYSDIR)/src + $(INSTALL_DIR) $(RELSYSDIR)/include + $(INSTALL_DATA) $(HRL_FILES) $(RELSYSDIR)/include + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +# ---------------------------------------------------- +# Dependencies -- alphabetically, please +# ---------------------------------------------------- + +$(EBIN)/beam_lib.beam: ../include/erl_compile.hrl ../../kernel/include/file.hrl +$(EBIN)/dets.beam: dets.hrl ../../kernel/include/file.hrl +$(EBIN)/dets_server.beam: dets.hrl +$(EBIN)/dets_utils.beam: dets.hrl +$(EBIN)/dets_v8.beam: dets.hrl +$(EBIN)/dets_v9.beam: dets.hrl +$(EBIN)/erl_bits.beam: ../include/erl_bits.hrl +$(EBIN)/erl_compile.beam: ../include/erl_compile.hrl ../../kernel/include/file.hrl +$(EBIN)/erl_lint.beam: ../include/erl_bits.hrl +$(EBIN)/erl_tar.beam: ../../kernel/include/file.hrl +$(EBIN)/file_sorter.beam: ../../kernel/include/file.hrl +$(EBIN)/filelib.beam: ../../kernel/include/file.hrl +$(EBIN)/filename.beam: ../../kernel/include/file.hrl +$(EBIN)/qlc_pt.beam: ../include/ms_transform.hrl +$(EBIN)/shell.beam: ../../kernel/include/file.hrl +$(EBIN)/zip.beam: ../include/zip.hrl ../../kernel/include/file.hrl diff --git a/lib/stdlib/src/array.erl b/lib/stdlib/src/array.erl new file mode 100644 index 0000000000..295eeac221 --- /dev/null +++ b/lib/stdlib/src/array.erl @@ -0,0 +1,1926 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% @author Richard Carlsson +%% @author Dan Gudmundsson +%% @version 1.0 + +%% @doc Functional, extendible arrays. Arrays can have fixed size, or +%% can grow automatically as needed. A default value is used for entries +%% that have not been explicitly set. +%% +%% Arrays uses zero based indexing. This is a deliberate design +%% choice and differs from other erlang datastructures, e.g. tuples. +%% +%% Unless specified by the user when the array is created, the default +%% value is the atom `undefined'. There is no difference between an +%% unset entry and an entry which has been explicitly set to the same +%% value as the default one (cf. {@link reset/2}). If you need to +%% differentiate between unset and set entries, you must make sure that +%% the default value cannot be confused with the values of set entries. +%% +%% The array never shrinks automatically; if an index `I' has been used +%% successfully to set an entry, all indices in the range [0,`I'] will +%% stay accessible unless the array size is explicitly changed by +%% calling {@link resize/2}. +%% +%% Examples: +%% ``` +%% %% Create a fixed-size array with entries 0-9 set to 'undefined' +%% A0 = array:new(10). +%% 10 = array:size(A0). +%% +%% %% Create an extendible array and set entry 17 to 'true', +%% %% causing the array to grow automatically +%% A1 = array:set(17, true, array:new()). +%% 18 = array:size(A1). +%% +%% %% Read back a stored value +%% true = array:get(17, A1). +%% +%% %% Accessing an unset entry returns the default value +%% undefined = array:get(3, A1). +%% +%% %% Accessing an entry beyond the last set entry also returns the +%% %% default value, if the array does not have fixed size +%% undefined = array:get(18, A1). +%% +%% %% "sparse" functions ignore default-valued entries +%% A2 = array:set(4, false, A1). +%% [{4, false}, {17, true}] = array:sparse_to_orddict(A2). +%% +%% %% An extendible array can be made fixed-size later +%% A3 = array:fix(A2). +%% +%% %% A fixed-size array does not grow automatically and does not +%% %% allow accesses beyond the last set entry +%% {'EXIT',{badarg,_}} = (catch array:set(18, true, A3)). +%% {'EXIT',{badarg,_}} = (catch array:get(18, A3)). +%% ''' + +%% @type array(). A functional, extendible array. The representation is +%% not documented and is subject to change without notice. Note that +%% arrays cannot be directly compared for equality. + +-module(array). + +-export([new/0, new/1, new/2, is_array/1, set/3, get/2, size/1, + sparse_size/1, default/1, reset/2, to_list/1, sparse_to_list/1, + from_list/1, from_list/2, to_orddict/1, sparse_to_orddict/1, + from_orddict/1, from_orddict/2, map/2, sparse_map/2, foldl/3, + foldr/3, sparse_foldl/3, sparse_foldr/3, fix/1, relax/1, is_fix/1, + resize/1, resize/2]). + +%%-define(TEST,1). +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + + +%% Developers: +%% +%% For OTP devs: Both tests and documentation is extracted from this +%% file, keep and update this file, +%% test are extracted with array_SUITE:extract_tests(). +%% Doc with docb_gen array.erl +%% +%% The key to speed is to minimize the number of tests, on +%% large input. Always make the most probable path as short as possible. +%% In particular, keep in mind that for large trees, the probability of +%% a leaf node is small relative to that of an internal node. +%% +%% If you try to tweak the set_1 and get_1 loops: Measure, look at the +%% generated Beam code, and measure again! The argument order matters! + + +%% Representation: +%% +%% A tree is either a leaf, with LEAFSIZE elements (the "base"), an +%% internal node with LEAFSIZE+1 elements, or an unexpanded tree, +%% represented by a single integer: the number of elements that may be +%% stored in the tree when it is expanded. The last element of an +%% internal node caches the number of elements that may be stored in +%% each of its subtrees. +%% +%% Note that to update an entry in a tree of height h = log[b] n, the +%% total number of written words is (b+1)+(h-1)*(b+2), since tuples use +%% a header word on the heap. 4 is the optimal base for minimizing the +%% number of words written, but causes higher trees, which takes time. +%% The best compromise between speed and memory usage seems to lie +%% around 8-10. Measurements indicate that the optimum base for speed is +%% 24 - above that, it gets slower again due to the high memory usage. +%% Base 10 is a good choice, giving 2/3 of the possible speedup from +%% base 4, but only using 1/3 more memory. (Base 24 uses 65% more memory +%% per write than base 10, but the speedup is only 21%.) + +-define(DEFAULT, undefined). +-define(LEAFSIZE, 10). % the "base" +-define(NODESIZE, ?LEAFSIZE). % (no reason to have a different size) +-define(NODEPATTERN(S), {_,_,_,_,_,_,_,_,_,_,S}). % NODESIZE+1 elements! +-define(NEW_NODE(S), % beware of argument duplication! + setelement((?NODESIZE+1),erlang:make_tuple((?NODESIZE+1),(S)),(S))). +-define(NEW_LEAF(D), erlang:make_tuple(?LEAFSIZE,(D))). +-define(NODELEAFS, ?NODESIZE*?LEAFSIZE). + +%% These make the code a little easier to experiment with. +%% It turned out that using shifts (when NODESIZE=2^n) was not faster. +-define(reduce(X), ((X) div (?NODESIZE))). +-define(extend(X), ((X) * (?NODESIZE))). + +%%-------------------------------------------------------------------------- + +-record(array, {size :: non_neg_integer(), %% number of defined entries + max :: non_neg_integer(), %% maximum number of entries + %% in current tree + default, %% the default value (usually 'undefined') + elements %% the tuple tree + }). +%% A declaration equivalent to the following one is hard-coded in erl_types. +%% That declaration contains hard-coded information about the #array{} +%% structure and the types of its fields. So, please make sure that any +%% changes to its structure are also propagated to erl_types.erl. +%% +%% -opaque array() :: #array{}. + +%% +%% Types +%% + +-type array_indx() :: non_neg_integer(). + +-type array_opt() :: 'fixed' | non_neg_integer() + | {'default', term()} | {'fixed', boolean()} + | {'size', non_neg_integer()}. +-type array_opts() :: array_opt() | [array_opt()]. + +-type indx_pair() :: {array_indx(), term()}. +-type indx_pairs() :: [indx_pair()]. + +%%-------------------------------------------------------------------------- + +%% @spec () -> array() +%% @doc Create a new, extendible array with initial size zero. +%% @equiv new([]) +%% +%% @see new/1 +%% @see new/2 + +-spec new() -> array(). + +new() -> + new([]). + +%% @spec (Options::term()) -> array() +%% @doc Create a new array according to the given options. By default, +%% the array is extendible and has initial size zero. Array indices +%% start at 0. +%% +%% `Options' is a single term or a list of terms, selected from the +%% following: +%%
+%%
`N::integer()' or `{size, N::integer()}'
+%%
Specifies the initial size of the array; this also implies +%% `{fixed, true}'. If `N' is not a nonnegative integer, the call +%% fails with reason `badarg'.
+%%
`fixed' or `{fixed, true}'
+%%
Creates a fixed-size array; see also {@link fix/1}.
+%%
`{fixed, false}'
+%%
Creates an extendible (non fixed-size) array.
+%%
`{default, Value}'
+%%
Sets the default value for the array to `Value'.
+%%
+%% Options are processed in the order they occur in the list, i.e., +%% later options have higher precedence. +%% +%% The default value is used as the value of uninitialized entries, and +%% cannot be changed once the array has been created. +%% +%% Examples: +%% ```array:new(100)''' creates a fixed-size array of size 100. +%% ```array:new({default,0})''' creates an empty, extendible array +%% whose default value is 0. +%% ```array:new([{size,10},{fixed,false},{default,-1}])''' creates an +%% extendible array with initial size 10 whose default value is -1. +%% +%% @see new/0 +%% @see new/2 +%% @see set/3 +%% @see get/2 +%% @see from_list/2 +%% @see fix/1 + +-spec new(array_opts()) -> array(). + +new(Options) -> + new_0(Options, 0, false). + +%% @spec (Size::integer(), Options::term()) -> array() +%% @doc Create a new array according to the given size and options. If +%% `Size' is not a nonnegative integer, the call fails with reason +%% `badarg'. By default, the array has fixed size. Note that any size +%% specifications in `Options' will override the `Size' parameter. +%% +%% If `Options' is a list, this is simply equivalent to `new([{size, +%% Size} | Options]', otherwise it is equivalent to `new([{size, Size} | +%% [Options]]'. However, using this function directly is more efficient. +%% +%% Example: +%% ```array:new(100, {default,0})''' creates a fixed-size array of size +%% 100, whose default value is 0. +%% +%% @see new/1 + +-spec new(non_neg_integer(), array_opts()) -> array(). + +new(Size, Options) when is_integer(Size), Size >= 0 -> + new_0(Options, Size, true); +new(_, _) -> + erlang:error(badarg). + +new_0(Options, Size, Fixed) when is_list(Options) -> + new_1(Options, Size, Fixed, ?DEFAULT); +new_0(Options, Size, Fixed) -> + new_1([Options], Size, Fixed, ?DEFAULT). + +new_1([fixed | Options], Size, _, Default) -> + new_1(Options, Size, true, Default); +new_1([{fixed, Fixed} | Options], Size, _, Default) + when is_boolean(Fixed) -> + new_1(Options, Size, Fixed, Default); +new_1([{default, Default} | Options], Size, Fixed, _) -> + new_1(Options, Size, Fixed, Default); +new_1([{size, Size} | Options], _, _, Default) + when is_integer(Size), Size >= 0 -> + new_1(Options, Size, true, Default); +new_1([Size | Options], _, _, Default) + when is_integer(Size), Size >= 0 -> + new_1(Options, Size, true, Default); +new_1([], Size, Fixed, Default) -> + new(Size, Fixed, Default); +new_1(_Options, _Size, _Fixed, _Default) -> + erlang:error(badarg). + +new(0, false, undefined) -> + %% Constant empty array + #array{size=0, max=?LEAFSIZE, elements=?LEAFSIZE}; +new(Size, Fixed, Default) -> + E = find_max(Size - 1, ?LEAFSIZE), + M = if Fixed -> 0; + true -> E + end, + #array{size = Size, max = M, default = Default, elements = E}. + +-spec find_max(integer(), integer()) -> integer(). + +find_max(I, M) when I >= M -> + find_max(I, ?extend(M)); +find_max(_I, M) -> + M. + + +%% @spec (X::term()) -> boolean() +%% @doc Returns `true' if `X' appears to be an array, otherwise `false'. +%% Note that the check is only shallow; there is no guarantee that `X' +%% is a well-formed array representation even if this function returns +%% `true'. + +-spec is_array(term()) -> boolean(). + +is_array(#array{size = Size, max = Max}) + when is_integer(Size), is_integer(Max) -> + true; +is_array(_) -> + false. + + +%% @spec (array()) -> integer() +%% @doc Get the number of entries in the array. Entries are numbered +%% from 0 to `size(Array)-1'; hence, this is also the index of the first +%% entry that is guaranteed to not have been previously set. +%% @see set/3 +%% @see sparse_size/1 + +-spec size(array()) -> non_neg_integer(). + +size(#array{size = N}) -> N; +size(_) -> erlang:error(badarg). + + +%% @spec (array()) -> term() +%% @doc Get the value used for uninitialized entries. +%% +%% @see new/2 + +-spec default(array()) -> term(). + +default(#array{default = D}) -> D; +default(_) -> erlang:error(badarg). + + +-ifdef(EUNIT). +new_test_() -> + N0 = ?LEAFSIZE, + N01 = N0+1, + N1 = ?NODESIZE*N0, + N11 = N1+1, + N2 = ?NODESIZE*N1, + [?_test(new()), + + ?_test(new([])), + ?_test(new(10)), + ?_test(new({size,10})), + ?_test(new(fixed)), + ?_test(new({fixed,true})), + ?_test(new({fixed,false})), + ?_test(new({default,undefined})), + ?_test(new([{size,100},{fixed,false},{default,undefined}])), + ?_test(new([100,fixed,{default,0}])), + + ?_assert(new() =:= new([])), + ?_assert(new() =:= new([{size,0},{default,undefined},{fixed,false}])), + ?_assert(new() =:= new(0, {fixed,false})), + ?_assert(new(fixed) =:= new(0)), + ?_assert(new(fixed) =:= new(0, [])), + ?_assert(new(10) =:= new([{size,0},{size,5},{size,10}])), + ?_assert(new(10) =:= new(0, {size,10})), + ?_assert(new(10, []) =:= new(10, [{default,undefined},{fixed,true}])), + + ?_assertError(badarg, new(-1)), + ?_assertError(badarg, new(10.0)), + ?_assertError(badarg, new(undefined)), + ?_assertError(badarg, new([undefined])), + ?_assertError(badarg, new([{default,0} | fixed])), + + ?_assertError(badarg, new(-1, [])), + ?_assertError(badarg, new(10.0, [])), + ?_assertError(badarg, new(undefined, [])), + + ?_assertMatch(#array{size=0,max=N0,default=undefined,elements=N0}, + new()), + ?_assertMatch(#array{size=0,max=0,default=undefined,elements=N0}, + new(fixed)), + ?_assertMatch(#array{size=N0,max=N0,elements=N0}, + new(N0, {fixed,false})), + ?_assertMatch(#array{size=N01,max=N1,elements=N1}, + new(N01, {fixed,false})), + ?_assertMatch(#array{size=N1,max=N1,elements=N1}, + new(N1, {fixed,false})), + ?_assertMatch(#array{size=N11,max=N2,elements=N2}, + new(N11, {fixed,false})), + ?_assertMatch(#array{size=N2, max=N2, default=42,elements=N2}, + new(N2, [{fixed,false},{default,42}])), + + ?_assert(0 =:= array:size(new())), + ?_assert(17 =:= array:size(new(17))), + ?_assert(100 =:= array:size(array:set(99,0,new()))), + ?_assertError(badarg, array:size({bad_data,gives_error})), + + ?_assert(undefined =:= default(new())), + ?_assert(4711 =:= default(new({default,4711}))), + ?_assert(0 =:= default(new(10, {default,0}))), + ?_assertError(badarg, default({bad_data,gives_error})), + + ?_assert(is_array(new())), + ?_assert(false =:= is_array({foobar, 23, 23})), + ?_assert(false =:= is_array(#array{size=bad})), + ?_assert(false =:= is_array(#array{max=bad})), + ?_assert(is_array(new(10))), + ?_assert(is_array(new(10, {fixed,false}))) + ]. +-endif. + + +%% @spec (array()) -> array() +%% @doc Fix the size of the array. This prevents it from growing +%% automatically upon insertion; see also {@link set/3}. +%% @see relax/1 + +-spec fix(array()) -> array(). + +fix(#array{}=A) -> + A#array{max = 0}. + + +%% @spec (array()) -> boolean() +%% @doc Check if the array has fixed size. +%% Returns `true' if the array is fixed, otherwise `false'. +%% @see fix/1 + +-spec is_fix(array()) -> boolean(). + +is_fix(#array{max = 0}) -> true; +is_fix(#array{}) -> false. + + +-ifdef(EUNIT). +fix_test_() -> + [?_assert(is_array(fix(new()))), + ?_assert(fix(new()) =:= new(fixed)), + + ?_assertNot(is_fix(new())), + ?_assertNot(is_fix(new([]))), + ?_assertNot(is_fix(new({fixed,false}))), + ?_assertNot(is_fix(new(10, {fixed,false}))), + ?_assert(is_fix(new({fixed,true}))), + ?_assert(is_fix(new(fixed))), + ?_assert(is_fix(new(10))), + ?_assert(is_fix(new(10, []))), + ?_assert(is_fix(new(10, {fixed,true}))), + ?_assert(is_fix(fix(new()))), + ?_assert(is_fix(fix(new({fixed,false})))), + + ?_test(set(0, 17, new())), + ?_assertError(badarg, set(0, 17, new(fixed))), + ?_assertError(badarg, set(1, 42, fix(set(0, 17, new())))), + + ?_test(set(9, 17, new(10))), + ?_assertError(badarg, set(10, 17, new(10))), + ?_assertError(badarg, set(10, 17, fix(new(10, {fixed,false})))) + ]. +-endif. + + +%% @spec (array()) -> array() +%% @doc Make the array resizable. (Reverses the effects of {@link +%% fix/1}.) +%% @see fix/1 + +-spec relax(array()) -> array(). + +relax(#array{size = N}=A) -> + A#array{max = find_max(N-1, ?LEAFSIZE)}. + + +-ifdef(EUNIT). +relax_test_() -> + [?_assert(is_array(relax(new(fixed)))), + ?_assertNot(is_fix(relax(fix(new())))), + ?_assertNot(is_fix(relax(new(fixed)))), + + ?_assert(new() =:= relax(new(fixed))), + ?_assert(new() =:= relax(new(0))), + ?_assert(new(17, {fixed,false}) =:= relax(new(17))), + ?_assert(new(100, {fixed,false}) + =:= relax(fix(new(100, {fixed,false})))) + ]. +-endif. + + +%% @spec (integer(), array()) -> array() +%% @doc Change the size of the array. If `Size' is not a nonnegative +%% integer, the call fails with reason `badarg'. If the given array has +%% fixed size, the resulting array will also have fixed size. + +-spec resize(non_neg_integer(), array()) -> array(). + +resize(Size, #array{size = N, max = M, elements = E}=A) + when is_integer(Size), Size >= 0 -> + if Size > N -> + {E1, M1} = grow(Size-1, E, + if M > 0 -> M; + true -> find_max(N-1, ?LEAFSIZE) + end), + A#array{size = Size, + max = if M > 0 -> M1; + true -> M + end, + elements = E1}; + Size < N -> + %% TODO: shrink physical representation when shrinking the array + A#array{size = Size}; + true -> + A + end; +resize(_Size, _) -> + erlang:error(badarg). + + +%% @spec (array()) -> array() + +%% @doc Change the size of the array to that reported by {@link +%% sparse_size/1}. If the given array has fixed size, the resulting +%% array will also have fixed size. +%% @equiv resize(sparse_size(Array), Array) +%% @see resize/2 +%% @see sparse_size/1 + +-spec resize(array()) -> array(). + +resize(Array) -> + resize(sparse_size(Array), Array). + + +-ifdef(EUNIT). +resize_test_() -> + [?_assert(resize(0, new()) =:= new()), + ?_assert(resize(99, new(99)) =:= new(99)), + ?_assert(resize(99, relax(new(99))) =:= relax(new(99))), + ?_assert(is_fix(resize(100, new(10)))), + ?_assertNot(is_fix(resize(100, relax(new(10))))), + + ?_assert(array:size(resize(100, new())) =:= 100), + ?_assert(array:size(resize(0, new(100))) =:= 0), + ?_assert(array:size(resize(99, new(10))) =:= 99), + ?_assert(array:size(resize(99, new(1000))) =:= 99), + + ?_assertError(badarg, set(99, 17, new(10))), + ?_test(set(99, 17, resize(100, new(10)))), + ?_assertError(badarg, set(100, 17, resize(100, new(10)))), + + ?_assert(array:size(resize(new())) =:= 0), + ?_assert(array:size(resize(new(8))) =:= 0), + ?_assert(array:size(resize(array:set(7, 0, new()))) =:= 8), + ?_assert(array:size(resize(array:set(7, 0, new(10)))) =:= 8), + ?_assert(array:size(resize(array:set(99, 0, new(10,{fixed,false})))) + =:= 100), + ?_assert(array:size(resize(array:set(7, undefined, new()))) =:= 0), + ?_assert(array:size(resize(array:from_list([1,2,3,undefined]))) + =:= 3), + ?_assert(array:size( + resize(array:from_orddict([{3,0},{17,0},{99,undefined}]))) + =:= 18), + ?_assertError(badarg, resize(foo, bad_argument)) + ]. +-endif. + + +%% @spec (integer(), term(), array()) -> array() +%% @doc Set entry `I' of the array to `Value'. If `I' is not a +%% nonnegative integer, or if the array has fixed size and `I' is larger +%% than the maximum index, the call fails with reason `badarg'. +%% +%% If the array does not have fixed size, and `I' is greater than +%% `size(Array)-1', the array will grow to size `I+1'. +%% +%% @see get/2 +%% @see reset/2 + +-spec set(array_indx(), term(), array()) -> array(). + +set(I, Value, #array{size = N, max = M, default = D, elements = E}=A) + when is_integer(I), I >= 0 -> + if I < N -> + A#array{elements = set_1(I, E, Value, D)}; + I < M -> + %% (note that this cannot happen if M == 0, since N >= 0) + A#array{size = I+1, elements = set_1(I, E, Value, D)}; + M > 0 -> + {E1, M1} = grow(I, E, M), + A#array{size = I+1, max = M1, + elements = set_1(I, E1, Value, D)}; + true -> + erlang:error(badarg) + end; +set(_I, _V, _A) -> + erlang:error(badarg). + +%% See get_1/3 for details about switching and the NODEPATTERN macro. + +set_1(I, E=?NODEPATTERN(S), X, D) -> + I1 = I div S + 1, + setelement(I1, E, set_1(I rem S, element(I1, E), X, D)); +set_1(I, E, X, D) when is_integer(E) -> + expand(I, E, X, D); +set_1(I, E, X, _D) -> + setelement(I+1, E, X). + + +%% Enlarging the array upwards to accommodate an index `I' + +grow(I, E, _M) when is_integer(E) -> + M1 = find_max(I, E), + {M1, M1}; +grow(I, E, M) -> + grow_1(I, E, M). + +grow_1(I, E, M) when I >= M -> + grow(I, setelement(1, ?NEW_NODE(M), E), ?extend(M)); +grow_1(_I, E, M) -> + {E, M}. + + +%% Insert an element in an unexpanded node, expanding it as necessary. + +expand(I, S, X, D) when S > ?LEAFSIZE -> + S1 = ?reduce(S), + setelement(I div S1 + 1, ?NEW_NODE(S1), + expand(I rem S1, S1, X, D)); +expand(I, _S, X, D) -> + setelement(I+1, ?NEW_LEAF(D), X). + + +%% @spec (integer(), array()) -> term() +%% @doc Get the value of entry `I'. If `I' is not a nonnegative +%% integer, or if the array has fixed size and `I' is larger than the +%% maximum index, the call fails with reason `badarg'. +%% +%% If the array does not have fixed size, this function will return the +%% default value for any index `I' greater than `size(Array)-1'. + +%% @see set/3 + +-spec get(array_indx(), array()) -> term(). + +get(I, #array{size = N, max = M, elements = E, default = D}) + when is_integer(I), I >= 0 -> + if I < N -> + get_1(I, E, D); + M > 0 -> + D; + true -> + erlang:error(badarg) + end; +get(_I, _A) -> + erlang:error(badarg). + +%% The use of NODEPATTERN(S) to select the right clause is just a hack, +%% but it is the only way to get the maximum speed out of this loop +%% (using the Beam compiler in OTP 11). + +get_1(I, E=?NODEPATTERN(S), D) -> + get_1(I rem S, element(I div S + 1, E), D); +get_1(_I, E, D) when is_integer(E) -> + D; +get_1(I, E, _D) -> + element(I+1, E). + + +%% @spec (integer(), array()) -> array() +%% @doc Reset entry `I' to the default value for the array. +%% If the value of entry `I' is the default value the array will be +%% returned unchanged. Reset will never change size of the array. +%% Shrinking can be done explicitly by calling {@link resize/2}. +%% +%% If `I' is not a nonnegative integer, or if the array has fixed size +%% and `I' is larger than the maximum index, the call fails with reason +%% `badarg'; cf. {@link set/3} +%% +%% @see new/2 +%% @see set/3 + +%% TODO: a reset_range function + +-spec reset(array_indx(), array()) -> array(). + +reset(I, #array{size = N, max = M, default = D, elements = E}=A) + when is_integer(I), I >= 0 -> + if I < N -> + try A#array{elements = reset_1(I, E, D)} + catch throw:default -> A + end; + M > 0 -> + A; + true -> + erlang:error(badarg) + end; +reset(_I, _A) -> + erlang:error(badarg). + +reset_1(I, E=?NODEPATTERN(S), D) -> + I1 = I div S + 1, + setelement(I1, E, reset_1(I rem S, element(I1, E), D)); +reset_1(_I, E, _D) when is_integer(E) -> + throw(default); +reset_1(I, E, D) -> + Indx = I+1, + case element(Indx, E) of + D -> throw(default); + _ -> setelement(I+1, E, D) + end. + + +-ifdef(EUNIT). +set_get_test_() -> + N0 = ?LEAFSIZE, + N1 = ?NODESIZE*N0, + [?_assert(array:get(0, new()) =:= undefined), + ?_assert(array:get(1, new()) =:= undefined), + ?_assert(array:get(99999, new()) =:= undefined), + + ?_assert(array:get(0, new(1)) =:= undefined), + ?_assert(array:get(0, new(1,{default,0})) =:= 0), + ?_assert(array:get(9, new(10)) =:= undefined), + + ?_assertError(badarg, array:get(0, new(fixed))), + ?_assertError(badarg, array:get(1, new(1))), + ?_assertError(badarg, array:get(-1, new(1))), + ?_assertError(badarg, array:get(10, new(10))), + ?_assertError(badarg, array:set(-1, foo, new(10))), + ?_assertError(badarg, array:set(10, foo, no_array)), + + ?_assert(array:size(set(0, 17, new())) =:= 1), + ?_assert(array:size(set(N1-1, 17, new())) =:= N1), + ?_assert(array:size(set(0, 42, set(0, 17, new()))) =:= 1), + ?_assert(array:size(set(9, 42, set(0, 17, new()))) =:= 10), + + ?_assert(array:get(0, set(0, 17, new())) =:= 17), + ?_assert(array:get(0, set(1, 17, new())) =:= undefined), + ?_assert(array:get(1, set(1, 17, new())) =:= 17), + + ?_assert(array:get(0, fix(set(0, 17, new()))) =:= 17), + ?_assertError(badarg, array:get(1, fix(set(0, 17, new())))), + + ?_assert(array:get(N1-2, set(N1-1, 17, new())) =:= undefined), + ?_assert(array:get(N1-1, set(N1-1, 17, new())) =:= 17), + ?_assertError(badarg, array:get(N1, fix(set(N1-1, 17, new())))), + + ?_assert(array:get(0, set(0, 42, set(0, 17, new()))) =:= 42), + + ?_assert(array:get(0, reset(0, new())) =:= undefined), + ?_assert(array:get(0, reset(0, set(0, 17, new()))) =:= undefined), + ?_assert(array:get(0, reset(0, new({default,42}))) =:= 42), + ?_assert(array:get(0, reset(0, set(0, 17, new({default,42})))) + =:= 42) + ]. +-endif. + + +%% @spec (array()) -> list() +%% @doc Converts the array to a list. +%% +%% @see from_list/2 +%% @see sparse_to_list/1 + +-spec to_list(array()) -> list(). + +to_list(#array{size = 0}) -> + []; +to_list(#array{size = N, elements = E, default = D}) -> + to_list_1(E, D, N - 1); +to_list(_) -> + erlang:error(badarg). + +%% this part handles the rightmost subtrees + +to_list_1(E=?NODEPATTERN(S), D, I) -> + N = I div S, + to_list_3(N, D, to_list_1(element(N+1, E), D, I rem S), E); +to_list_1(E, D, I) when is_integer(E) -> + push(I+1, D, []); +to_list_1(E, _D, I) -> + push_tuple(I+1, E, []). + +%% this part handles full trees only + +to_list_2(E=?NODEPATTERN(_S), D, L) -> + to_list_3(?NODESIZE, D, L, E); +to_list_2(E, D, L) when is_integer(E) -> + push(E, D, L); +to_list_2(E, _D, L) -> + push_tuple(?LEAFSIZE, E, L). + +to_list_3(0, _D, L, _E) -> + L; +to_list_3(N, D, L, E) -> + to_list_3(N-1, D, to_list_2(element(N, E), D, L), E). + +push(0, _E, L) -> + L; +push(N, E, L) -> + push(N - 1, E, [E | L]). + +push_tuple(0, _T, L) -> + L; +push_tuple(N, T, L) -> + push_tuple(N - 1, T, [element(N, T) | L]). + + +-ifdef(EUNIT). +to_list_test_() -> + N0 = ?LEAFSIZE, + [?_assert([] =:= to_list(new())), + ?_assert([undefined] =:= to_list(new(1))), + ?_assert([undefined,undefined] =:= to_list(new(2))), + ?_assert(lists:duplicate(N0,0) =:= to_list(new(N0,{default,0}))), + ?_assert(lists:duplicate(N0+1,1) =:= to_list(new(N0+1,{default,1}))), + ?_assert(lists:duplicate(N0+2,2) =:= to_list(new(N0+2,{default,2}))), + ?_assert(lists:duplicate(666,6) =:= to_list(new(666,{default,6}))), + ?_assert([1,2,3] =:= to_list(set(2,3,set(1,2,set(0,1,new()))))), + ?_assert([3,2,1] =:= to_list(set(0,3,set(1,2,set(2,1,new()))))), + ?_assert([1|lists:duplicate(N0-2,0)++[1]] =:= + to_list(set(N0-1,1,set(0,1,new({default,0}))))), + ?_assert([1|lists:duplicate(N0-1,0)++[1]] =:= + to_list(set(N0,1,set(0,1,new({default,0}))))), + ?_assert([1|lists:duplicate(N0,0)++[1]] =:= + to_list(set(N0+1,1,set(0,1,new({default,0}))))), + ?_assert([1|lists:duplicate(N0*3,0)++[1]] =:= + to_list(set((N0*3)+1,1,set(0,1,new({default,0}))))), + ?_assertError(badarg, to_list(no_array)) + ]. +-endif. + + +%% @spec (array()) -> list() +%% @doc Converts the array to a list, skipping default-valued entries. +%% +%% @see to_list/1 + +-spec sparse_to_list(array()) -> list(). + +sparse_to_list(#array{size = 0}) -> + []; +sparse_to_list(#array{size = N, elements = E, default = D}) -> + sparse_to_list_1(E, D, N - 1); +sparse_to_list(_) -> + erlang:error(badarg). + +%% see to_list/1 for details + +sparse_to_list_1(E=?NODEPATTERN(S), D, I) -> + N = I div S, + sparse_to_list_3(N, D, + sparse_to_list_1(element(N+1, E), D, I rem S), + E); +sparse_to_list_1(E, _D, _I) when is_integer(E) -> + []; +sparse_to_list_1(E, D, I) -> + sparse_push_tuple(I+1, D, E, []). + +sparse_to_list_2(E=?NODEPATTERN(_S), D, L) -> + sparse_to_list_3(?NODESIZE, D, L, E); +sparse_to_list_2(E, _D, L) when is_integer(E) -> + L; +sparse_to_list_2(E, D, L) -> + sparse_push_tuple(?LEAFSIZE, D, E, L). + +sparse_to_list_3(0, _D, L, _E) -> + L; +sparse_to_list_3(N, D, L, E) -> + sparse_to_list_3(N-1, D, sparse_to_list_2(element(N, E), D, L), E). + +sparse_push_tuple(0, _D, _T, L) -> + L; +sparse_push_tuple(N, D, T, L) -> + case element(N, T) of + D -> sparse_push_tuple(N - 1, D, T, L); + E -> sparse_push_tuple(N - 1, D, T, [E | L]) + end. + + +-ifdef(EUNIT). +sparse_to_list_test_() -> + N0 = ?LEAFSIZE, + [?_assert([] =:= sparse_to_list(new())), + ?_assert([] =:= sparse_to_list(new(1))), + ?_assert([] =:= sparse_to_list(new(1,{default,0}))), + ?_assert([] =:= sparse_to_list(new(2))), + ?_assert([] =:= sparse_to_list(new(2,{default,0}))), + ?_assert([] =:= sparse_to_list(new(N0,{default,0}))), + ?_assert([] =:= sparse_to_list(new(N0+1,{default,1}))), + ?_assert([] =:= sparse_to_list(new(N0+2,{default,2}))), + ?_assert([] =:= sparse_to_list(new(666,{default,6}))), + ?_assert([1,2,3] =:= sparse_to_list(set(2,3,set(1,2,set(0,1,new()))))), + ?_assert([3,2,1] =:= sparse_to_list(set(0,3,set(1,2,set(2,1,new()))))), + ?_assert([0,1] =:= sparse_to_list(set(N0-1,1,set(0,0,new())))), + ?_assert([0,1] =:= sparse_to_list(set(N0,1,set(0,0,new())))), + ?_assert([0,1] =:= sparse_to_list(set(N0+1,1,set(0,0,new())))), + ?_assert([0,1,2] =:= sparse_to_list(set(N0*10+1,2,set(N0*2+1,1,set(0,0,new()))))), + ?_assertError(badarg, sparse_to_list(no_array)) + ]. +-endif. + + +%% @spec (list()) -> array() +%% @equiv from_list(List, undefined) + +-spec from_list(list()) -> array(). + +from_list(List) -> + from_list(List, undefined). + +%% @spec (list(), term()) -> array() +%% @doc Convert a list to an extendible array. `Default' is used as the value +%% for uninitialized entries of the array. If `List' is not a proper list, +%% the call fails with reason `badarg'. +%% +%% @see new/2 +%% @see to_list/1 + +-spec from_list(list(), term()) -> array(). + +from_list([], Default) -> + new({default,Default}); +from_list(List, Default) when is_list(List) -> + {E, N, M} = from_list_1(?LEAFSIZE, List, Default, 0, [], []), + #array{size = N, max = M, default = Default, elements = E}; +from_list(_, _) -> + erlang:error(badarg). + +%% Note: A cleaner but slower algorithm is to first take the length of +%% the list and compute the max size of the final tree, and then +%% decompose the list. The below algorithm is almost twice as fast, +%% however. + +%% Building the leaf nodes (padding the last one as necessary) and +%% counting the total number of elements. +from_list_1(0, Xs, D, N, As, Es) -> + E = list_to_tuple(lists:reverse(As)), + case Xs of + [] -> + case Es of + [] -> + {E, N, ?LEAFSIZE}; + _ -> + from_list_2_0(N, [E | Es], ?LEAFSIZE) + end; + [_|_] -> + from_list_1(?LEAFSIZE, Xs, D, N, [], [E | Es]); + _ -> + erlang:error(badarg) + end; +from_list_1(I, Xs, D, N, As, Es) -> + case Xs of + [X | Xs1] -> + from_list_1(I-1, Xs1, D, N+1, [X | As], Es); + _ -> + from_list_1(I-1, Xs, D, N, [D | As], Es) + end. + +%% Building the internal nodes (note that the input is reversed). +from_list_2_0(N, Es, S) -> + from_list_2(?NODESIZE, pad((N-1) div S + 1, ?NODESIZE, S, Es), + S, N, [S], []). + +from_list_2(0, Xs, S, N, As, Es) -> + E = list_to_tuple(As), + case Xs of + [] -> + case Es of + [] -> + {E, N, ?extend(S)}; + _ -> + from_list_2_0(N, lists:reverse([E | Es]), + ?extend(S)) + end; + _ -> + from_list_2(?NODESIZE, Xs, S, N, [S], [E | Es]) + end; +from_list_2(I, [X | Xs], S, N, As, Es) -> + from_list_2(I-1, Xs, S, N, [X | As], Es). + + +%% left-padding a list Es with elements P to the nearest multiple of K +%% elements from N (adding 0 to K-1 elements). +pad(N, K, P, Es) -> + push((K - (N rem K)) rem K, P, Es). + + +-ifdef(EUNIT). +from_list_test_() -> + N0 = ?LEAFSIZE, + N1 = ?NODESIZE*N0, + N2 = ?NODESIZE*N1, + N3 = ?NODESIZE*N2, + N4 = ?NODESIZE*N3, + [?_assert(array:size(from_list([])) =:= 0), + ?_assert(array:is_fix(from_list([])) =:= false), + ?_assert(array:size(from_list([undefined])) =:= 1), + ?_assert(array:is_fix(from_list([undefined])) =:= false), + ?_assert(array:size(from_list(lists:seq(1,N1))) =:= N1), + ?_assert(to_list(from_list(lists:seq(1,N0))) =:= lists:seq(1,N0)), + ?_assert(to_list(from_list(lists:seq(1,N0+1))) =:= lists:seq(1,N0+1)), + ?_assert(to_list(from_list(lists:seq(1,N0+2))) =:= lists:seq(1,N0+2)), + ?_assert(to_list(from_list(lists:seq(1,N2))) =:= lists:seq(1,N2)), + ?_assert(to_list(from_list(lists:seq(1,N2+1))) =:= lists:seq(1,N2+1)), + ?_assert(to_list(from_list(lists:seq(0,N3))) =:= lists:seq(0,N3)), + ?_assert(to_list(from_list(lists:seq(0,N4))) =:= lists:seq(0,N4)), + ?_assertError(badarg, from_list([a,b,a,c|d])), + ?_assertError(badarg, from_list(no_array)) + ]. +-endif. + + +%% @spec (array()) -> [{Index::integer(), Value::term()}] +%% @doc Convert the array to an ordered list of pairs `{Index, Value}'. +%% +%% @see from_orddict/2 +%% @see sparse_to_orddict/1 + +-spec to_orddict(array()) -> indx_pairs(). + +to_orddict(#array{size = 0}) -> + []; +to_orddict(#array{size = N, elements = E, default = D}) -> + I = N - 1, + to_orddict_1(E, I, D, I); +to_orddict(_) -> + erlang:error(badarg). + +%% see to_list/1 for comparison + +to_orddict_1(E=?NODEPATTERN(S), R, D, I) -> + N = I div S, + I1 = I rem S, + to_orddict_3(N, R - I1 - 1, D, + to_orddict_1(element(N+1, E), R, D, I1), + E, S); +to_orddict_1(E, R, D, I) when is_integer(E) -> + push_pairs(I+1, R, D, []); +to_orddict_1(E, R, _D, I) -> + push_tuple_pairs(I+1, R, E, []). + +to_orddict_2(E=?NODEPATTERN(S), R, D, L) -> + to_orddict_3(?NODESIZE, R, D, L, E, S); +to_orddict_2(E, R, D, L) when is_integer(E) -> + push_pairs(E, R, D, L); +to_orddict_2(E, R, _D, L) -> + push_tuple_pairs(?LEAFSIZE, R, E, L). + +to_orddict_3(0, _R, _D, L, _E, _S) -> %% when is_integer(R) -> + L; +to_orddict_3(N, R, D, L, E, S) -> + to_orddict_3(N-1, R - S, D, + to_orddict_2(element(N, E), R, D, L), + E, S). + +-spec push_pairs(non_neg_integer(), array_indx(), term(), indx_pairs()) -> + indx_pairs(). + +push_pairs(0, _I, _E, L) -> + L; +push_pairs(N, I, E, L) -> + push_pairs(N-1, I-1, E, [{I, E} | L]). + +-spec push_tuple_pairs(non_neg_integer(), array_indx(), term(), indx_pairs()) -> + indx_pairs(). + +push_tuple_pairs(0, _I, _T, L) -> + L; +push_tuple_pairs(N, I, T, L) -> + push_tuple_pairs(N-1, I-1, T, [{I, element(N, T)} | L]). + + +-ifdef(EUNIT). +to_orddict_test_() -> + N0 = ?LEAFSIZE, + [?_assert([] =:= to_orddict(new())), + ?_assert([{0,undefined}] =:= to_orddict(new(1))), + ?_assert([{0,undefined},{1,undefined}] =:= to_orddict(new(2))), + ?_assert([{N,0}||N<-lists:seq(0,N0-1)] + =:= to_orddict(new(N0,{default,0}))), + ?_assert([{N,1}||N<-lists:seq(0,N0)] + =:= to_orddict(new(N0+1,{default,1}))), + ?_assert([{N,2}||N<-lists:seq(0,N0+1)] + =:= to_orddict(new(N0+2,{default,2}))), + ?_assert([{N,6}||N<-lists:seq(0,665)] + =:= to_orddict(new(666,{default,6}))), + ?_assert([{0,1},{1,2},{2,3}] =:= + to_orddict(set(2,3,set(1,2,set(0,1,new()))))), + ?_assert([{0,3},{1,2},{2,1}] =:= + to_orddict(set(0,3,set(1,2,set(2,1,new()))))), + ?_assert([{0,1}|[{N,0}||N<-lists:seq(1,N0-2)]++[{N0-1,1}]] + =:= to_orddict(set(N0-1,1,set(0,1,new({default,0}))))), + ?_assert([{0,1}|[{N,0}||N<-lists:seq(1,N0-1)]++[{N0,1}]] + =:= to_orddict(set(N0,1,set(0,1,new({default,0}))))), + ?_assert([{0,1}|[{N,0}||N<-lists:seq(1,N0)]++[{N0+1,1}]] + =:= to_orddict(set(N0+1,1,set(0,1,new({default,0}))))), + ?_assert([{0,0} | [{N,undefined}||N<-lists:seq(1,N0*2)]] ++ + [{N0*2+1,1} | [{N,undefined}||N<-lists:seq(N0*2+2,N0*10)]] ++ + [{N0*10+1,2}] =:= + to_orddict(set(N0*10+1,2,set(N0*2+1,1,set(0,0,new()))))), + ?_assertError(badarg, to_orddict(no_array)) + ]. +-endif. + + +%% @spec (array()) -> [{Index::integer(), Value::term()}] +%% @doc Convert the array to an ordered list of pairs `{Index, Value}', +%% skipping default-valued entries. +%% +%% @see to_orddict/1 + +-spec sparse_to_orddict(array()) -> indx_pairs(). + +sparse_to_orddict(#array{size = 0}) -> + []; +sparse_to_orddict(#array{size = N, elements = E, default = D}) -> + I = N - 1, + sparse_to_orddict_1(E, I, D, I); +sparse_to_orddict(_) -> + erlang:error(badarg). + +%% see to_orddict/1 for details + +sparse_to_orddict_1(E=?NODEPATTERN(S), R, D, I) -> + N = I div S, + I1 = I rem S, + sparse_to_orddict_3(N, R - I1 - 1, D, + sparse_to_orddict_1(element(N+1, E), R, D, I1), + E, S); +sparse_to_orddict_1(E, _R, _D, _I) when is_integer(E) -> + []; +sparse_to_orddict_1(E, R, D, I) -> + sparse_push_tuple_pairs(I+1, R, D, E, []). + +sparse_to_orddict_2(E=?NODEPATTERN(S), R, D, L) -> + sparse_to_orddict_3(?NODESIZE, R, D, L, E, S); +sparse_to_orddict_2(E, _R, _D, L) when is_integer(E) -> + L; +sparse_to_orddict_2(E, R, D, L) -> + sparse_push_tuple_pairs(?LEAFSIZE, R, D, E, L). + +sparse_to_orddict_3(0, _R, _D, L, _E, _S) -> % when is_integer(R) -> + L; +sparse_to_orddict_3(N, R, D, L, E, S) -> + sparse_to_orddict_3(N-1, R - S, D, + sparse_to_orddict_2(element(N, E), R, D, L), + E, S). + +-spec sparse_push_tuple_pairs(non_neg_integer(), array_indx(), + _, _, indx_pairs()) -> indx_pairs(). + +sparse_push_tuple_pairs(0, _I, _D, _T, L) -> + L; +sparse_push_tuple_pairs(N, I, D, T, L) -> + case element(N, T) of + D -> sparse_push_tuple_pairs(N-1, I-1, D, T, L); + E -> sparse_push_tuple_pairs(N-1, I-1, D, T, [{I, E} | L]) + end. + + +-ifdef(EUNIT). +sparse_to_orddict_test_() -> + N0 = ?LEAFSIZE, + [?_assert([] =:= sparse_to_orddict(new())), + ?_assert([] =:= sparse_to_orddict(new(1))), + ?_assert([] =:= sparse_to_orddict(new(1,{default,0}))), + ?_assert([] =:= sparse_to_orddict(new(2))), + ?_assert([] =:= sparse_to_orddict(new(2,{default,0}))), + ?_assert([] =:= sparse_to_orddict(new(N0,{default,0}))), + ?_assert([] =:= sparse_to_orddict(new(N0+1,{default,1}))), + ?_assert([] =:= sparse_to_orddict(new(N0+2,{default,2}))), + ?_assert([] =:= sparse_to_orddict(new(666,{default,6}))), + ?_assert([{0,1},{1,2},{2,3}] =:= + sparse_to_orddict(set(2,3,set(1,2,set(0,1,new()))))), + ?_assert([{0,3},{1,2},{2,1}] =:= + sparse_to_orddict(set(0,3,set(1,2,set(2,1,new()))))), + ?_assert([{0,1},{N0-1,1}] =:= + sparse_to_orddict(set(N0-1,1,set(0,1,new({default,0}))))), + ?_assert([{0,1},{N0,1}] =:= + sparse_to_orddict(set(N0,1,set(0,1,new({default,0}))))), + ?_assert([{0,1},{N0+1,1}] =:= + sparse_to_orddict(set(N0+1,1,set(0,1,new({default,0}))))), + ?_assert([{0,0},{N0*2+1,1},{N0*10+1,2}] =:= + sparse_to_orddict(set(N0*10+1,2,set(N0*2+1,1,set(0,0,new()))))), + ?_assertError(badarg, sparse_to_orddict(no_array)) + ]. +-endif. + + +%% @spec (list()) -> array() +%% @equiv from_orddict(Orddict, undefined) + +-spec from_orddict(indx_pairs()) -> array(). + +from_orddict(Orddict) -> + from_orddict(Orddict, undefined). + +%% @spec (list(), term()) -> array() +%% @doc Convert an ordered list of pairs `{Index, Value}' to a +%% corresponding extendible array. `Default' is used as the value for +%% uninitialized entries of the array. If `List' is not a proper, +%% ordered list of pairs whose first elements are nonnegative +%% integers, the call fails with reason `badarg'. +%% +%% @see new/2 +%% @see to_orddict/1 + +-spec from_orddict(indx_pairs(), term()) -> array(). + +from_orddict([], Default) -> + new({default,Default}); +from_orddict(List, Default) when is_list(List) -> + {E, N, M} = from_orddict_0(List, 0, ?LEAFSIZE, Default, []), + #array{size = N, max = M, default = Default, elements = E}; +from_orddict(_, _) -> + erlang:error(badarg). + +%% 2 pass implementation, first pass builds the needed leaf nodes +%% and adds hole sizes. +%% (inserts default elements for missing list entries in the leafs +%% and pads the last tuple if necessary). +%% Second pass builds the tree from the leafs and the holes. +%% +%% Doesn't build/expand unnecessary leaf nodes which costs memory +%% and time for sparse arrays. + +from_orddict_0([], N, _Max, _D, Es) -> + %% Finished, build the resulting tree + case Es of + [E] -> + {E, N, ?LEAFSIZE}; + _ -> + collect_leafs(N, Es, ?LEAFSIZE) + end; + +from_orddict_0(Xs=[{Ix1, _}|_], Ix, Max0, D, Es0) + when Ix1 > Max0, is_integer(Ix1) -> + %% We have a hole larger than a leaf + Hole = Ix1-Ix, + Step = Hole - (Hole rem ?LEAFSIZE), + Next = Ix+Step, + from_orddict_0(Xs, Next, Next+?LEAFSIZE, D, [Step|Es0]); +from_orddict_0(Xs0=[{_, _}|_], Ix0, Max, D, Es) -> + %% Fill a leaf + {Xs,E,Ix} = from_orddict_1(Ix0, Max, Xs0, Ix0, D, []), + from_orddict_0(Xs, Ix, Ix+?LEAFSIZE, D, [E|Es]); +from_orddict_0(Xs, _, _, _,_) -> + erlang:error({badarg, Xs}). + +from_orddict_1(Ix, Ix, Xs, N, _D, As) -> + %% Leaf is full + E = list_to_tuple(lists:reverse(As)), + {Xs, E, N}; +from_orddict_1(Ix, Max, Xs, N0, D, As) -> + case Xs of + [{Ix, Val} | Xs1] -> + N = Ix+1, + from_orddict_1(N, Max, Xs1, N, D, [Val | As]); + [{Ix1, _} | _] when is_integer(Ix1), Ix1 > Ix -> + N = Ix+1, + from_orddict_1(N, Max, Xs, N, D, [D | As]); + [_ | _] -> + erlang:error({badarg, Xs}); + _ -> + from_orddict_1(Ix+1, Max, Xs, N0, D, [D | As]) + end. + +%% Es is reversed i.e. starting from the largest leafs +collect_leafs(N, Es, S) -> + I = (N-1) div S + 1, + Pad = ((?NODESIZE - (I rem ?NODESIZE)) rem ?NODESIZE) * S, + case Pad of + 0 -> + collect_leafs(?NODESIZE, Es, S, N, [S], []); + _ -> %% Pad the end + collect_leafs(?NODESIZE, [Pad|Es], S, N, [S], []) + end. + +collect_leafs(0, Xs, S, N, As, Es) -> + E = list_to_tuple(As), + case Xs of + [] -> + case Es of + [] -> + {E, N, ?extend(S)}; + _ -> + collect_leafs(N, lists:reverse([E | Es]), + ?extend(S)) + end; + _ -> + collect_leafs(?NODESIZE, Xs, S, N, [S], [E | Es]) + end; +collect_leafs(I, [X | Xs], S, N, As0, Es0) + when is_integer(X) -> + %% A hole, pad accordingly. + Step0 = (X div S), + if + Step0 < I -> + As = push(Step0, S, As0), + collect_leafs(I-Step0, Xs, S, N, As, Es0); + I =:= ?NODESIZE -> + Step = Step0 rem ?NODESIZE, + As = push(Step, S, As0), + collect_leafs(I-Step, Xs, S, N, As, [X|Es0]); + I =:= Step0 -> + As = push(I, S, As0), + collect_leafs(0, Xs, S, N, As, Es0); + true -> + As = push(I, S, As0), + Step = Step0 - I, + collect_leafs(0, [Step*S|Xs], S, N, As, Es0) + end; +collect_leafs(I, [X | Xs], S, N, As, Es) -> + collect_leafs(I-1, Xs, S, N, [X | As], Es); +collect_leafs(?NODESIZE, [], S, N, [_], Es) -> + collect_leafs(N, lists:reverse(Es), ?extend(S)). + +-ifdef(EUNIT). +from_orddict_test_() -> + N0 = ?LEAFSIZE, + N1 = ?NODESIZE*N0, + N2 = ?NODESIZE*N1, + N3 = ?NODESIZE*N2, + N4 = ?NODESIZE*N3, + [?_assert(array:size(from_orddict([])) =:= 0), + ?_assert(array:is_fix(from_orddict([])) =:= false), + ?_assert(array:size(from_orddict([{0,undefined}])) =:= 1), + ?_assert(array:is_fix(from_orddict([{0,undefined}])) =:= false), + ?_assert(array:size(from_orddict([{N0-1,undefined}])) =:= N0), + ?_assert(array:size(from_orddict([{N,0}||N<-lists:seq(0,N1-1)])) + =:= N1), + ?_assertError({badarg,_}, from_orddict([foo])), + ?_assertError({badarg,_}, from_orddict([{200,foo},{1,bar}])), + ?_assertError({badarg,_}, from_orddict([{N,0}||N<-lists:seq(0,N0-1)] ++ not_a_list)), + ?_assertError(badarg, from_orddict(no_array)), + + + ?_assert(?LET(L, [{N,0}||N<-lists:seq(0,N0-1)], + L =:= to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{N,0}||N<-lists:seq(0,N0)], + L =:= to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{N,0}||N<-lists:seq(0,N2-1)], + L =:= to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{N,0}||N<-lists:seq(0,N2)], + L =:= to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{N,0}||N<-lists:seq(0,N3-1)], + L =:= to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{N,0}||N<-lists:seq(0,N4-1)], + L =:= to_orddict(from_orddict(L)))), + + %% Hole in the begining + ?_assert(?LET(L, [{0,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{N0,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{N1,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{N3,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{N4,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{N0-1,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{N1-1,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{N3-1,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{N4-1,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + + %% Hole in middle + + ?_assert(?LET(L, [{0,0},{N0,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{0,0},{N1,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{0,0},{N3,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{0,0},{N4,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{0,0},{N0-1,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{0,0},{N1-1,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{0,0},{N3-1,0}], + L =:= sparse_to_orddict(from_orddict(L)))), + ?_assert(?LET(L, [{0,0},{N4-1,0}], + L =:= sparse_to_orddict(from_orddict(L)))) + + ]. +-endif. + + +%% @spec (Function, array()) -> array() +%% Function = (Index::integer(), Value::term()) -> term() +%% @doc Map the given function onto each element of the array. The +%% elements are visited in order from the lowest index to the highest. +%% If `Function' is not a function, the call fails with reason `badarg'. +%% +%% @see foldl/3 +%% @see foldr/3 +%% @see sparse_map/2 + +-spec map(fun((array_indx(), _) -> _), array()) -> array(). + +map(Function, Array=#array{size = N, elements = E, default = D}) + when is_function(Function, 2) -> + if N > 0 -> + A = Array#array{elements = []}, % kill reference, for GC + A#array{elements = map_1(N-1, E, 0, Function, D)}; + true -> + Array + end; +map(_, _) -> + erlang:error(badarg). + +%% It might be simpler to traverse the array right-to-left, as done e.g. +%% in the to_orddict/1 function, but it is better to guarantee +%% left-to-right application over the elements - that is more likely to +%% be a generally useful property. + +map_1(N, E=?NODEPATTERN(S), Ix, F, D) -> + list_to_tuple(lists:reverse([S | map_2(1, E, Ix, F, D, [], + N div S + 1, N rem S, S)])); +map_1(N, E, Ix, F, D) when is_integer(E) -> + map_1(N, unfold(E, D), Ix, F, D); +map_1(N, E, Ix, F, D) -> + list_to_tuple(lists:reverse(map_3(1, E, Ix, F, D, N+1, []))). + +map_2(I, E, Ix, F, D, L, I, R, _S) -> + map_2_1(I+1, E, [map_1(R, element(I, E), Ix, F, D) | L]); +map_2(I, E, Ix, F, D, L, N, R, S) -> + map_2(I+1, E, Ix + S, F, D, + [map_1(S-1, element(I, E), Ix, F, D) | L], + N, R, S). + +map_2_1(I, E, L) when I =< ?NODESIZE -> + map_2_1(I+1, E, [element(I, E) | L]); +map_2_1(_I, _E, L) -> + L. + +-spec map_3(pos_integer(), _, array_indx(), + fun((array_indx(),_) -> _), _, non_neg_integer(), [X]) -> [X]. + +map_3(I, E, Ix, F, D, N, L) when I =< N -> + map_3(I+1, E, Ix+1, F, D, N, [F(Ix, element(I, E)) | L]); +map_3(I, E, Ix, F, D, N, L) when I =< ?LEAFSIZE -> + map_3(I+1, E, Ix+1, F, D, N, [D | L]); +map_3(_I, _E, _Ix, _F, _D, _N, L) -> + L. + + +unfold(S, _D) when S > ?LEAFSIZE -> + ?NEW_NODE(?reduce(S)); +unfold(_S, D) -> + ?NEW_LEAF(D). + + +-ifdef(EUNIT). +map_test_() -> + N0 = ?LEAFSIZE, + Id = fun (_,X) -> X end, + Plus = fun(N) -> fun (_,X) -> X+N end end, + Default = fun(_K,undefined) -> no_value; + (K,V) -> K+V + end, + [?_assertError(badarg, map([], new())), + ?_assertError(badarg, map([], new(10))), + ?_assert(to_list(map(Id, new())) =:= []), + ?_assert(to_list(map(Id, new(1))) =:= [undefined]), + ?_assert(to_list(map(Id, new(5,{default,0}))) =:= [0,0,0,0,0]), + ?_assert(to_list(map(Id, from_list([1,2,3,4]))) =:= [1,2,3,4]), + ?_assert(to_list(map(Plus(1), from_list([0,1,2,3]))) =:= [1,2,3,4]), + ?_assert(to_list(map(Plus(-1), from_list(lists:seq(1,11)))) + =:= lists:seq(0,10)), + ?_assert(to_list(map(Plus(11), from_list(lists:seq(0,99999)))) + =:= lists:seq(11,100010)), + ?_assert([{0,0},{N0*2+1,N0*2+1+1},{N0*100+1,N0*100+1+2}] =:= + sparse_to_orddict((map(Default, + set(N0*100+1,2, + set(N0*2+1,1, + set(0,0,new())))))#array{default = no_value})) + ]. +-endif. + + +%% @spec (Function, array()) -> array() +%% Function = (Index::integer(), Value::term()) -> term() +%% @doc Map the given function onto each element of the array, skipping +%% default-valued entries. The elements are visited in order from the +%% lowest index to the highest. If `Function' is not a function, the +%% call fails with reason `badarg'. +%% +%% @see map/2 + +-spec sparse_map(fun((array_indx(), _) -> _), array()) -> array(). + +sparse_map(Function, Array=#array{size = N, elements = E, default = D}) + when is_function(Function, 2) -> + if N > 0 -> + A = Array#array{elements = []}, % kill reference, for GC + A#array{elements = sparse_map_1(N-1, E, 0, Function, D)}; + true -> + Array + end; +sparse_map(_, _) -> + erlang:error(badarg). + +%% see map/2 for details +%% TODO: we can probably optimize away the use of div/rem here + +sparse_map_1(N, E=?NODEPATTERN(S), Ix, F, D) -> + list_to_tuple(lists:reverse([S | sparse_map_2(1, E, Ix, F, D, [], + N div S + 1, + N rem S, S)])); +sparse_map_1(_N, E, _Ix, _F, _D) when is_integer(E) -> + E; +sparse_map_1(_N, E, Ix, F, D) -> + list_to_tuple(lists:reverse(sparse_map_3(1, E, Ix, F, D, []))). + +sparse_map_2(I, E, Ix, F, D, L, I, R, _S) -> + sparse_map_2_1(I+1, E, + [sparse_map_1(R, element(I, E), Ix, F, D) | L]); +sparse_map_2(I, E, Ix, F, D, L, N, R, S) -> + sparse_map_2(I+1, E, Ix + S, F, D, + [sparse_map_1(S-1, element(I, E), Ix, F, D) | L], + N, R, S). + +sparse_map_2_1(I, E, L) when I =< ?NODESIZE -> + sparse_map_2_1(I+1, E, [element(I, E) | L]); +sparse_map_2_1(_I, _E, L) -> + L. + +-spec sparse_map_3(pos_integer(), _, array_indx(), + fun((array_indx(),_) -> _), _, [X]) -> [X]. + +sparse_map_3(I, T, Ix, F, D, L) when I =< ?LEAFSIZE -> + case element(I, T) of + D -> sparse_map_3(I+1, T, Ix+1, F, D, [D | L]); + E -> sparse_map_3(I+1, T, Ix+1, F, D, [F(Ix, E) | L]) + end; +sparse_map_3(_I, _E, _Ix, _F, _D, L) -> + L. + + +-ifdef(EUNIT). +sparse_map_test_() -> + N0 = ?LEAFSIZE, + Id = fun (_,X) -> X end, + Plus = fun(N) -> fun (_,X) -> X+N end end, + KeyPlus = fun (K,X) -> K+X end, + [?_assertError(badarg, sparse_map([], new())), + ?_assertError(badarg, sparse_map([], new(10))), + ?_assert(to_list(sparse_map(Id, new())) =:= []), + ?_assert(to_list(sparse_map(Id, new(1))) =:= [undefined]), + ?_assert(to_list(sparse_map(Id, new(5,{default,0}))) =:= [0,0,0,0,0]), + ?_assert(to_list(sparse_map(Id, from_list([1,2,3,4]))) =:= [1,2,3,4]), + ?_assert(to_list(sparse_map(Plus(1), from_list([0,1,2,3]))) + =:= [1,2,3,4]), + ?_assert(to_list(sparse_map(Plus(-1), from_list(lists:seq(1,11)))) + =:= lists:seq(0,10)), + ?_assert(to_list(sparse_map(Plus(11), from_list(lists:seq(0,99999)))) + =:= lists:seq(11,100010)), + ?_assert(to_list(sparse_map(Plus(1), set(1,1,new({default,0})))) + =:= [0,2]), + ?_assert(to_list(sparse_map(Plus(1), + set(3,4,set(0,1,new({default,0}))))) + =:= [2,0,0,5]), + ?_assert(to_list(sparse_map(Plus(1), + set(9,9,set(1,1,new({default,0}))))) + =:= [0,2,0,0,0,0,0,0,0,10]), + ?_assert([{0,0},{N0*2+1,N0*2+1+1},{N0*100+1,N0*100+1+2}] =:= + sparse_to_orddict(sparse_map(KeyPlus, + set(N0*100+1,2, + set(N0*2+1,1, + set(0,0,new())))))) + + ]. +-endif. + + +%% @spec (Function, InitialAcc::term(), array()) -> term() +%% Function = (Index::integer(), Value::term(), Acc::term()) -> +%% term() +%% @doc Fold the elements of the array using the given function and +%% initial accumulator value. The elements are visited in order from the +%% lowest index to the highest. If `Function' is not a function, the +%% call fails with reason `badarg'. +%% +%% @see foldr/3 +%% @see map/2 +%% @see sparse_foldl/3 + +-spec foldl(fun((array_indx(), _, A) -> B), A, array()) -> B. + +foldl(Function, A, #array{size = N, elements = E, default = D}) + when is_function(Function, 3) -> + if N > 0 -> + foldl_1(N-1, E, A, 0, Function, D); + true -> + A + end; +foldl(_, _, _) -> + erlang:error(badarg). + +foldl_1(N, E=?NODEPATTERN(S), A, Ix, F, D) -> + foldl_2(1, E, A, Ix, F, D, N div S + 1, N rem S, S); +foldl_1(N, E, A, Ix, F, D) when is_integer(E) -> + foldl_1(N, unfold(E, D), A, Ix, F, D); +foldl_1(N, E, A, Ix, F, _D) -> + foldl_3(1, E, A, Ix, F, N+1). + +foldl_2(I, E, A, Ix, F, D, I, R, _S) -> + foldl_1(R, element(I, E), A, Ix, F, D); +foldl_2(I, E, A, Ix, F, D, N, R, S) -> + foldl_2(I+1, E, foldl_1(S-1, element(I, E), A, Ix, F, D), + Ix + S, F, D, N, R, S). + +-spec foldl_3(pos_integer(), _, A, array_indx(), + fun((array_indx, _, A) -> B), integer()) -> B. + +foldl_3(I, E, A, Ix, F, N) when I =< N -> + foldl_3(I+1, E, F(Ix, element(I, E), A), Ix+1, F, N); +foldl_3(_I, _E, A, _Ix, _F, _N) -> + A. + + +-ifdef(EUNIT). +foldl_test_() -> + N0 = ?LEAFSIZE, + Count = fun (_,_,N) -> N+1 end, + Sum = fun (_,X,N) -> N+X end, + Reverse = fun (_,X,L) -> [X|L] end, + Vals = fun(_K,undefined,{C,L}) -> {C+1,L}; + (K,X,{C,L}) -> {C,[K+X|L]} + end, + [?_assertError(badarg, foldl([], 0, new())), + ?_assertError(badarg, foldl([], 0, new(10))), + ?_assert(foldl(Count, 0, new()) =:= 0), + ?_assert(foldl(Count, 0, new(1)) =:= 1), + ?_assert(foldl(Count, 0, new(10)) =:= 10), + ?_assert(foldl(Count, 0, from_list([1,2,3,4])) =:= 4), + ?_assert(foldl(Count, 10, from_list([0,1,2,3,4,5,6,7,8,9])) =:= 20), + ?_assert(foldl(Count, 1000, from_list(lists:seq(0,999))) =:= 2000), + ?_assert(foldl(Sum, 0, from_list(lists:seq(0,10))) =:= 55), + ?_assert(foldl(Reverse, [], from_list(lists:seq(0,1000))) + =:= lists:reverse(lists:seq(0,1000))), + ?_assert({999,[N0*100+1+2,N0*2+1+1,0]} =:= + foldl(Vals, {0,[]}, + set(N0*100+1,2, + set(N0*2+1,1, + set(0,0,new()))))) + + ]. +-endif. + + +%% @spec (Function, InitialAcc::term(), array()) -> term() +%% Function = (Index::integer(), Value::term(), Acc::term()) -> +%% term() +%% @doc Fold the elements of the array using the given function and +%% initial accumulator value, skipping default-valued entries. The +%% elements are visited in order from the lowest index to the highest. +%% If `Function' is not a function, the call fails with reason `badarg'. +%% +%% @see foldl/3 +%% @see sparse_foldr/3 + +-spec sparse_foldl(fun((array_indx(), _, A) -> B), A, array()) -> B. + +sparse_foldl(Function, A, #array{size = N, elements = E, default = D}) + when is_function(Function, 3) -> + if N > 0 -> + sparse_foldl_1(N-1, E, A, 0, Function, D); + true -> + A + end; +sparse_foldl(_, _, _) -> + erlang:error(badarg). + +%% see foldl/3 for details +%% TODO: this can be optimized + +sparse_foldl_1(N, E=?NODEPATTERN(S), A, Ix, F, D) -> + sparse_foldl_2(1, E, A, Ix, F, D, N div S + 1, N rem S, S); +sparse_foldl_1(_N, E, A, _Ix, _F, _D) when is_integer(E) -> + A; +sparse_foldl_1(N, E, A, Ix, F, D) -> + sparse_foldl_3(1, E, A, Ix, F, D, N+1). + +sparse_foldl_2(I, E, A, Ix, F, D, I, R, _S) -> + sparse_foldl_1(R, element(I, E), A, Ix, F, D); +sparse_foldl_2(I, E, A, Ix, F, D, N, R, S) -> + sparse_foldl_2(I+1, E, sparse_foldl_1(S-1, element(I, E), A, Ix, F, D), + Ix + S, F, D, N, R, S). + +sparse_foldl_3(I, T, A, Ix, F, D, N) when I =< N -> + case element(I, T) of + D -> sparse_foldl_3(I+1, T, A, Ix+1, F, D, N); + E -> sparse_foldl_3(I+1, T, F(Ix, E, A), Ix+1, F, D, N) + end; +sparse_foldl_3(_I, _T, A, _Ix, _F, _D, _N) -> + A. + + +-ifdef(EUNIT). +sparse_foldl_test_() -> + N0 = ?LEAFSIZE, + Count = fun (_,_,N) -> N+1 end, + Sum = fun (_,X,N) -> N+X end, + Reverse = fun (_,X,L) -> [X|L] end, + Vals = fun(_K,undefined,{C,L}) -> {C+1,L}; + (K,X,{C,L}) -> {C,[K+X|L]} + end, + [?_assertError(badarg, sparse_foldl([], 0, new())), + ?_assertError(badarg, sparse_foldl([], 0, new(10))), + ?_assert(sparse_foldl(Count, 0, new()) =:= 0), + ?_assert(sparse_foldl(Count, 0, new(1)) =:= 0), + ?_assert(sparse_foldl(Count, 0, new(10,{default,1})) =:= 0), + ?_assert(sparse_foldl(Count, 0, from_list([0,1,2,3,4],0)) =:= 4), + ?_assert(sparse_foldl(Count, 0, from_list([0,1,2,3,4,5,6,7,8,9,0],0)) + =:= 9), + ?_assert(sparse_foldl(Count, 0, from_list(lists:seq(0,999),0)) + =:= 999), + ?_assert(sparse_foldl(Sum, 0, from_list(lists:seq(0,10), 5)) =:= 50), + ?_assert(sparse_foldl(Reverse, [], from_list(lists:seq(0,1000), 0)) + =:= lists:reverse(lists:seq(1,1000))), + ?_assert({0,[N0*100+1+2,N0*2+1+1,0]} =:= + sparse_foldl(Vals, {0,[]}, + set(N0*100+1,2, + set(N0*2+1,1, + set(0,0,new()))))) + ]. +-endif. + + +%% @spec (Function, InitialAcc::term(), array()) -> term() +%% Function = (Index::integer(), Value::term(), Acc::term()) -> +%% term() +%% @doc Fold the elements of the array right-to-left using the given +%% function and initial accumulator value. The elements are visited in +%% order from the highest index to the lowest. If `Function' is not a +%% function, the call fails with reason `badarg'. +%% +%% @see foldl/3 +%% @see map/2 + +-spec foldr(fun((array_indx(), _, A) -> B), A, array()) -> B. + +foldr(Function, A, #array{size = N, elements = E, default = D}) + when is_function(Function, 3) -> + if N > 0 -> + I = N - 1, + foldr_1(I, E, I, A, Function, D); + true -> + A + end; +foldr(_, _, _) -> + erlang:error(badarg). + +%% this is based on to_orddict/1 + +foldr_1(I, E=?NODEPATTERN(S), Ix, A, F, D) -> + foldr_2(I div S + 1, E, Ix, A, F, D, I rem S, S-1); +foldr_1(I, E, Ix, A, F, D) when is_integer(E) -> + foldr_1(I, unfold(E, D), Ix, A, F, D); +foldr_1(I, E, Ix, A, F, _D) -> + I1 = I+1, + foldr_3(I1, E, Ix-I1, A, F). + +foldr_2(0, _E, _Ix, A, _F, _D, _R, _R0) -> + A; +foldr_2(I, E, Ix, A, F, D, R, R0) -> + foldr_2(I-1, E, Ix - R - 1, + foldr_1(R, element(I, E), Ix, A, F, D), + F, D, R0, R0). + +-spec foldr_3(array_indx(), term(), integer(), A, + fun((array_indx(), _, A) -> B)) -> B. + +foldr_3(0, _E, _Ix, A, _F) -> + A; +foldr_3(I, E, Ix, A, F) -> + foldr_3(I-1, E, Ix, F(Ix+I, element(I, E), A), F). + + +-ifdef(EUNIT). +foldr_test_() -> + N0 = ?LEAFSIZE, + Count = fun (_,_,N) -> N+1 end, + Sum = fun (_,X,N) -> N+X end, + List = fun (_,X,L) -> [X|L] end, + Vals = fun(_K,undefined,{C,L}) -> {C+1,L}; + (K,X,{C,L}) -> {C,[K+X|L]} + end, + [?_assertError(badarg, foldr([], 0, new())), + ?_assertError(badarg, foldr([], 0, new(10))), + ?_assert(foldr(Count, 0, new()) =:= 0), + ?_assert(foldr(Count, 0, new(1)) =:= 1), + ?_assert(foldr(Count, 0, new(10)) =:= 10), + ?_assert(foldr(Count, 0, from_list([1,2,3,4])) =:= 4), + ?_assert(foldr(Count, 10, from_list([0,1,2,3,4,5,6,7,8,9])) =:= 20), + ?_assert(foldr(Count, 1000, from_list(lists:seq(0,999))) =:= 2000), + ?_assert(foldr(Sum, 0, from_list(lists:seq(0,10))) =:= 55), + ?_assert(foldr(List, [], from_list(lists:seq(0,1000))) + =:= lists:seq(0,1000)), + ?_assert({999,[0,N0*2+1+1,N0*100+1+2]} =:= + foldr(Vals, {0,[]}, + set(N0*100+1,2, + set(N0*2+1,1, + set(0,0,new()))))) + + ]. +-endif. + + +%% @spec (Function, InitialAcc::term(), array()) -> term() +%% Function = (Index::integer(), Value::term(), Acc::term()) -> +%% term() +%% @doc Fold the elements of the array right-to-left using the given +%% function and initial accumulator value, skipping default-valued +%% entries. The elements are visited in order from the highest index to +%% the lowest. If `Function' is not a function, the call fails with +%% reason `badarg'. +%% +%% @see foldr/3 +%% @see sparse_foldl/3 + +-spec sparse_foldr(fun((array_indx(), _, A) -> B), A, array()) -> B. + +sparse_foldr(Function, A, #array{size = N, elements = E, default = D}) + when is_function(Function, 3) -> + if N > 0 -> + I = N - 1, + sparse_foldr_1(I, E, I, A, Function, D); + true -> + A + end; +sparse_foldr(_, _, _) -> + erlang:error(badarg). + +%% see foldr/3 for details +%% TODO: this can be optimized + +sparse_foldr_1(I, E=?NODEPATTERN(S), Ix, A, F, D) -> + sparse_foldr_2(I div S + 1, E, Ix, A, F, D, I rem S, S-1); +sparse_foldr_1(_I, E, _Ix, A, _F, _D) when is_integer(E) -> + A; +sparse_foldr_1(I, E, Ix, A, F, D) -> + I1 = I+1, + sparse_foldr_3(I1, E, Ix-I1, A, F, D). + +sparse_foldr_2(0, _E, _Ix, A, _F, _D, _R, _R0) -> + A; +sparse_foldr_2(I, E, Ix, A, F, D, R, R0) -> + sparse_foldr_2(I-1, E, Ix - R - 1, + sparse_foldr_1(R, element(I, E), Ix, A, F, D), + F, D, R0, R0). + +-spec sparse_foldr_3(array_indx(), _, array_indx(), A, + fun((array_indx(), _, A) -> B), _) -> B. + +sparse_foldr_3(0, _T, _Ix, A, _F, _D) -> + A; +sparse_foldr_3(I, T, Ix, A, F, D) -> + case element(I, T) of + D -> sparse_foldr_3(I-1, T, Ix, A, F, D); + E -> sparse_foldr_3(I-1, T, Ix, F(Ix+I, E, A), F, D) + end. + + +%% @spec (array()) -> integer() +%% @doc Get the number of entries in the array up until the last +%% non-default valued entry. In other words, returns `I+1' if `I' is the +%% last non-default valued entry in the array, or zero if no such entry +%% exists. +%% @see size/1 +%% @see resize/1 + +-spec sparse_size(array()) -> non_neg_integer(). + +sparse_size(A) -> + F = fun (I, _V, _A) -> throw({value, I}) end, + try sparse_foldr(F, [], A) of + [] -> 0 + catch + {value, I} -> + I + 1 + end. + + +-ifdef(EUNIT). +sparse_foldr_test_() -> + N0 = ?LEAFSIZE, + Count = fun (_,_,N) -> N+1 end, + Sum = fun (_,X,N) -> N+X end, + List = fun (_,X,L) -> [X|L] end, + Vals = fun(_K,undefined,{C,L}) -> {C+1,L}; + (K,X,{C,L}) -> {C,[K+X|L]} + end, + [?_assertError(badarg, sparse_foldr([], 0, new())), + ?_assertError(badarg, sparse_foldr([], 0, new(10))), + ?_assert(sparse_foldr(Count, 0, new()) =:= 0), + ?_assert(sparse_foldr(Count, 0, new(1)) =:= 0), + ?_assert(sparse_foldr(Count, 0, new(10,{default,1})) =:= 0), + ?_assert(sparse_foldr(Count, 0, from_list([0,1,2,3,4],0)) =:= 4), + ?_assert(sparse_foldr(Count, 0, from_list([0,1,2,3,4,5,6,7,8,9,0],0)) + =:= 9), + ?_assert(sparse_foldr(Count, 0, from_list(lists:seq(0,999),0)) + =:= 999), + ?_assert(sparse_foldr(Sum, 0, from_list(lists:seq(0,10),5)) =:= 50), + ?_assert(sparse_foldr(List, [], from_list(lists:seq(0,1000),0)) + =:= lists:seq(1,1000)), + + ?_assert(sparse_size(new()) =:= 0), + ?_assert(sparse_size(new(8)) =:= 0), + ?_assert(sparse_size(array:set(7, 0, new())) =:= 8), + ?_assert(sparse_size(array:set(7, 0, new(10))) =:= 8), + ?_assert(sparse_size(array:set(99, 0, new(10,{fixed,false}))) + =:= 100), + ?_assert(sparse_size(array:set(7, undefined, new())) =:= 0), + ?_assert(sparse_size(array:from_list([1,2,3,undefined])) =:= 3), + ?_assert(sparse_size(array:from_orddict([{3,0},{17,0},{99,undefined}])) + =:= 18), + ?_assert({0,[0,N0*2+1+1,N0*100+1+2]} =:= + sparse_foldr(Vals, {0,[]}, + set(N0*100+1,2, + set(N0*2+1,1, + set(0,0,new()))))) + ]. +-endif. diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl new file mode 100644 index 0000000000..ebef998ee1 --- /dev/null +++ b/lib/stdlib/src/base64.erl @@ -0,0 +1,304 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Description: Implements base 64 encode and decode. See RFC4648. + +-module(base64). + +-export([encode/1, decode/1, mime_decode/1, + encode_to_string/1, decode_to_string/1, mime_decode_to_string/1]). + +%%------------------------------------------------------------------------- +%% The following type is a subtype of string() for return values +%% of (some) functions of this module. +%%------------------------------------------------------------------------- + +-type ascii_string() :: [1..255]. + +%%------------------------------------------------------------------------- +%% encode_to_string(ASCII) -> Base64String +%% ASCII - string() | binary() +%% Base64String - string() +%% +%% Description: Encodes a plain ASCII string (or binary) into base64. +%%------------------------------------------------------------------------- + +-spec encode_to_string(string() | binary()) -> ascii_string(). + +encode_to_string(Bin) when is_binary(Bin) -> + encode_to_string(binary_to_list(Bin)); +encode_to_string(List) when is_list(List) -> + encode_l(List). + +%%------------------------------------------------------------------------- +%% encode(ASCII) -> Base64 +%% ASCII - string() | binary() +%% Base64 - binary() +%% +%% Description: Encodes a plain ASCII string (or binary) into base64. +%%------------------------------------------------------------------------- + +-spec encode(string() | binary()) -> binary(). + +encode(Bin) when is_binary(Bin) -> + encode_binary(Bin); +encode(List) when is_list(List) -> + list_to_binary(encode_l(List)). + +-spec encode_l(string()) -> ascii_string(). + +encode_l([]) -> + []; +encode_l([A]) -> + [b64e(A bsr 2), + b64e((A band 3) bsl 4), $=, $=]; +encode_l([A,B]) -> + [b64e(A bsr 2), + b64e(((A band 3) bsl 4) bor (B bsr 4)), + b64e((B band 15) bsl 2), $=]; +encode_l([A,B,C|Ls]) -> + BB = (A bsl 16) bor (B bsl 8) bor C, + [b64e(BB bsr 18), + b64e((BB bsr 12) band 63), + b64e((BB bsr 6) band 63), + b64e(BB band 63) | encode_l(Ls)]. + +encode_binary(Bin) -> + Split = 3*(byte_size(Bin) div 3), + <> = Bin, + Main = << <<(b64e(C)):8>> || <> <= Main0 >>, + case Rest of + <> -> + <
>; + <> -> + <
>; + <<>> -> + Main + end. + +%%------------------------------------------------------------------------- +%% mime_decode(Base64) -> ASCII +%% decode(Base64) -> ASCII +%% Base64 - string() | binary() +%% ASCII - binary() +%% +%% Description: Decodes an base64 encoded string to plain ASCII. +%% mime_decode strips away all characters not Base64 before converting, +%% whereas decode crashes if an illegal character is found +%%------------------------------------------------------------------------- + +-spec decode(string() | binary()) -> binary(). + +decode(Bin) when is_binary(Bin) -> + decode_binary(<<>>, Bin); +decode(List) when is_list(List) -> + list_to_binary(decode_l(List)). + +-spec mime_decode(string() | binary()) -> binary(). + +mime_decode(Bin) when is_binary(Bin) -> + mime_decode_binary(<<>>, Bin); +mime_decode(List) when is_list(List) -> + list_to_binary(mime_decode_l(List)). + +-spec decode_l(string()) -> string(). + +decode_l(List) -> + L = strip_spaces(List, []), + decode(L, []). + +-spec mime_decode_l(string()) -> string(). + +mime_decode_l(List) -> + L = strip_illegal(List, []), + decode(L, []). + +%%------------------------------------------------------------------------- +%% mime_decode_to_string(Base64) -> ASCII +%% decode_to_string(Base64) -> ASCII +%% Base64 - string() | binary() +%% ASCII - binary() +%% +%% Description: Decodes an base64 encoded string to plain ASCII. +%% mime_decode strips away all characters not Base64 before converting, +%% whereas decode crashes if an illegal character is found +%%------------------------------------------------------------------------- + +-spec decode_to_string(string() | binary()) -> string(). + +decode_to_string(Bin) when is_binary(Bin) -> + decode_to_string(binary_to_list(Bin)); +decode_to_string(List) when is_list(List) -> + decode_l(List). + +-spec mime_decode_to_string(string() | binary()) -> string(). + +mime_decode_to_string(Bin) when is_binary(Bin) -> + mime_decode_to_string(binary_to_list(Bin)); +mime_decode_to_string(List) when is_list(List) -> + mime_decode_l(List). + +%% One-based decode map. +-define(DECODE_MAP, + {bad,bad,bad,bad,bad,bad,bad,bad,ws,ws,bad,bad,ws,bad,bad, %1-15 + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, %16-31 + ws,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,62,bad,bad,bad,63, %32-47 + 52,53,54,55,56,57,58,59,60,61,bad,bad,bad,eq,bad,bad, %48-63 + bad,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, + 15,16,17,18,19,20,21,22,23,24,25,bad,bad,bad,bad,bad, + bad,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, + 41,42,43,44,45,46,47,48,49,50,51,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}). + +decode_binary(Result0, <>) -> + case element(C, ?DECODE_MAP) of + bad -> + erlang:error({badarg,C}); + ws -> + decode_binary(Result0, T0); + eq -> + case strip_ws(T0) of + <<$=:8,T/binary>> -> + <<>> = strip_ws(T), + Split = byte_size(Result0) - 1, + <> = Result0, + Result; + T -> + <<>> = strip_ws(T), + Split = byte_size(Result0) - 1, + <> = Result0, + Result + end; + Bits -> + decode_binary(<>, T0) + end; +decode_binary(Result, <<>>) -> + true = is_binary(Result), + Result. + +mime_decode_binary(Result, <<0:8,T/bits>>) -> + mime_decode_binary(Result, T); +mime_decode_binary(Result0, <>) -> + case element(C, ?DECODE_MAP) of + Bits when is_integer(Bits) -> + mime_decode_binary(<>, T); + eq -> + case tail_contains_equal(T) of + true -> + Split = byte_size(Result0) - 1, + <> = Result0, + Result; + false -> + Split = byte_size(Result0) - 1, + <> = Result0, + Result + end; + _ -> + mime_decode_binary(Result0, T) + end; +mime_decode_binary(Result, <<>>) -> + true = is_binary(Result), + Result. + +decode([], A) -> A; +decode([$=,$=,C2,C1|Cs], A) -> + Bits2x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12), + Octet1 = Bits2x6 bsr 16, + decode(Cs, [Octet1|A]); +decode([$=,C3,C2,C1|Cs], A) -> + Bits3x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12) + bor (b64d(C3) bsl 6), + Octet1 = Bits3x6 bsr 16, + Octet2 = (Bits3x6 bsr 8) band 16#ff, + decode(Cs, [Octet1,Octet2|A]); +decode([C4,C3,C2,C1| Cs], A) -> + Bits4x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12) + bor (b64d(C3) bsl 6) bor b64d(C4), + Octet1 = Bits4x6 bsr 16, + Octet2 = (Bits4x6 bsr 8) band 16#ff, + Octet3 = Bits4x6 band 16#ff, + decode(Cs, [Octet1,Octet2,Octet3|A]). + +%%%======================================================================== +%%% Internal functions +%%%======================================================================== + +strip_spaces([], A) -> A; +strip_spaces([$\s|Cs], A) -> strip_spaces(Cs, A); +strip_spaces([$\t|Cs], A) -> strip_spaces(Cs, A); +strip_spaces([$\r|Cs], A) -> strip_spaces(Cs, A); +strip_spaces([$\n|Cs], A) -> strip_spaces(Cs, A); +strip_spaces([C|Cs], A) -> strip_spaces(Cs, [C | A]). + +strip_ws(<<$\t,T/binary>>) -> + strip_ws(T); +strip_ws(<<$\n,T/binary>>) -> + strip_ws(T); +strip_ws(<<$\r,T/binary>>) -> + strip_ws(T); +strip_ws(<<$\s,T/binary>>) -> + strip_ws(T); +strip_ws(T) -> T. + +strip_illegal([0|Cs], A) -> + strip_illegal(Cs, A); +strip_illegal([C|Cs], A) -> + case element(C, ?DECODE_MAP) of + bad -> strip_illegal(Cs, A); + ws -> strip_illegal(Cs, A); + eq -> strip_illegal_end(Cs, [$=|A]); + _ -> strip_illegal(Cs, [C|A]) + end; +strip_illegal([], A) -> A. + +strip_illegal_end([0|Cs], A) -> + strip_illegal_end(Cs, A); +strip_illegal_end([C|Cs], A) -> + case element(C, ?DECODE_MAP) of + bad -> strip_illegal(Cs, A); + ws -> strip_illegal(Cs, A); + eq -> [C|A]; + _ -> strip_illegal(Cs, [C|A]) + end; +strip_illegal_end([], A) -> A. + +tail_contains_equal(<<$=,_/binary>>) -> true; +tail_contains_equal(<<_,T/binary>>) -> tail_contains_equal(T); +tail_contains_equal(<<>>) -> false. + +%% accessors +b64e(X) -> + element(X+1, + {$A, $B, $C, $D, $E, $F, $G, $H, $I, $J, $K, $L, $M, $N, + $O, $P, $Q, $R, $S, $T, $U, $V, $W, $X, $Y, $Z, + $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, + $o, $p, $q, $r, $s, $t, $u, $v, $w, $x, $y, $z, + $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $+, $/}). + + +b64d(X) -> + b64d_ok(element(X, ?DECODE_MAP)). + +b64d_ok(I) when is_integer(I) -> I. diff --git a/lib/stdlib/src/beam_lib.erl b/lib/stdlib/src/beam_lib.erl new file mode 100644 index 0000000000..820afd3739 --- /dev/null +++ b/lib/stdlib/src/beam_lib.erl @@ -0,0 +1,1027 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2000-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(beam_lib). +-behaviour(gen_server). + +-export([info/1, + cmp/2, + cmp_dirs/2, + chunks/2, + chunks/3, + all_chunks/1, + diff_dirs/2, + strip/1, + strip_files/1, + strip_release/1, + build_module/1, + version/1, + md5/1, + format_error/1]). + +%% The following functions implement encrypted debug info. + +-export([crypto_key_fun/1, clear_crypto_key_fun/0]). +-export([init/1,handle_call/3,handle_cast/2,handle_info/2, + terminate/2,code_change/3]). +-export([make_crypto_key/2, get_crypto_key/1]). %Utilities used by compiler + +-import(lists, [append/1, delete/2, foreach/2, keysort/2, + member/2, reverse/1, sort/1, splitwith/2]). + +-include_lib("kernel/include/file.hrl"). +-include("erl_compile.hrl"). + +%%------------------------------------------------------------------------- + +-type beam() :: module() | file:filename() | binary(). + +%% XXX: THE FOLLOWING SHOULD BE IMPORTED FROM SOMEWHERE ELSE +-type forms() :: term(). + +-type abst_vsn() :: atom(). +-type abst_code() :: {abst_vsn(), forms()} | 'no_abstract_code'. +-type attribute() :: atom(). +-type attrvalue() :: term(). +-type dataB() :: binary(). +-type index() :: non_neg_integer(). +-type label() :: integer(). + +-type chunkid() :: nonempty_string(). % approximation of the strings below +%% "Abst" | "Attr" | "CInf" | "ExpT" | "ImpT" | "LocT" | "Atom". +-type chunkname() :: 'abstract_code' | 'attributes' | 'compile_info' + | 'exports' | 'labeled_exports' + | 'imports' | 'indexed_imports' + | 'locals' | 'labeled_locals' + | 'atoms'. +-type chunkref() :: chunkname() | chunkid(). + +-type attrib_entry() :: {attribute(), [attrvalue()]}. +-type compinfo_entry() :: {atom(), term()}. +-type labeled_entry() :: {atom(), arity(), label()}. + +-type chunkdata() :: {chunkid(), dataB()} + | {'abstract_code', abst_code()} + | {'attributes', [attrib_entry()]} + | {'compile_info', [compinfo_entry()]} + | {'exports', [{atom(), arity()}]} + | {'labeled_exports', [labeled_entry()]} + | {'imports', [mfa()]} + | {'indexed_imports', [{index(), module(), atom(), arity()}]} + | {'locals', [{atom(), arity()}]} + | {'labeled_locals', [labeled_entry()]} + | {'atoms', [{integer(), atom()}]}. + +-type info_pair() :: {'file', file:filename()} + | {'binary', binary()} + | {'module', module()} + | {'chunks', [{chunkid(), integer(), integer()}]}. + +%% Error reasons +-type info_rsn() :: {'chunk_too_big', file:filename(), + chunkid(), integer(), integer()} + | {'invalid_beam_file', file:filename(), integer()} + | {'invalid_chunk', file:filename(), chunkid()} + | {'missing_chunk', file:filename(), chunkid()} + | {'not_a_beam_file', file:filename()} + | {'file_error', file:filename(), file:posix()}. +-type chnk_rsn() :: {'unknown_chunk', file:filename(), atom()} + | {'key_missing_or_invalid', file:filename(), + 'abstract_code'} + | info_rsn(). +-type cmp_rsn() :: {'modules_different', module(), module()} + | {'chunks_different', chunkid()} + | info_rsn(). + +%%------------------------------------------------------------------------- + +%% +%% Exported functions +%% + +-spec info(beam()) -> [info_pair()] | {'error', 'beam_lib', info_rsn()}. + +info(File) -> + read_info(beam_filename(File)). + +-spec chunks(beam(), [chunkref()]) -> + {'ok', {module(), [chunkdata()]}} | {'error', 'beam_lib', chnk_rsn()}. + +chunks(File, Chunks) -> + read_chunk_data(File, Chunks). + +-spec chunks(beam(), [chunkref()], ['allow_missing_chunks']) -> + {'ok', {module(), [{chunkref(), chunkdata() | 'missing_chunk'}]}} + | {'error', 'beam_lib', chnk_rsn()}. + +chunks(File, Chunks, Options) -> + try read_chunk_data(File, Chunks, Options) + catch Error -> Error end. + +-spec all_chunks(beam()) -> {'ok', 'beam_lib', [{chunkid(), dataB()}]}. + +all_chunks(File) -> + read_all_chunks(File). + +-spec cmp(beam(), beam()) -> 'ok' | {'error', 'beam_lib', cmp_rsn()}. + +cmp(File1, File2) -> + try cmp_files(File1, File2) + catch Error -> Error end. + +-spec cmp_dirs(atom() | file:filename(), atom() | file:filename()) -> + {[file:filename()], [file:filename()], + [{file:filename(), file:filename()}]} + | {'error', 'beam_lib', {'not_a_directory', term()} | info_rsn()}. + +cmp_dirs(Dir1, Dir2) -> + catch compare_dirs(Dir1, Dir2). + +-spec diff_dirs(atom() | file:filename(), atom() | file:filename()) -> + 'ok' | {'error', 'beam_lib', {'not_a_directory', term()} | info_rsn()}. + +diff_dirs(Dir1, Dir2) -> + catch diff_directories(Dir1, Dir2). + +-spec strip(beam()) -> + {'ok', {module(), beam()}} | {'error', 'beam_lib', info_rsn()}. + +strip(FileName) -> + try strip_file(FileName) + catch Error -> Error end. + +-spec strip_files([beam()]) -> + {'ok', [{module(), beam()}]} | {'error', 'beam_lib', info_rsn()}. + +strip_files(Files) when is_list(Files) -> + try strip_fils(Files) + catch Error -> Error end. + +-spec strip_release(atom() | file:filename()) -> + {'ok', [{module(), file:filename()}]} + | {'error', 'beam_lib', {'not_a_directory', term()} | info_rsn()}. + +strip_release(Root) -> + catch strip_rel(Root). + +-spec version(beam()) -> + {'ok', {module(), [term()]}} | {'error', 'beam_lib', chnk_rsn()}. + +version(File) -> + case catch read_chunk_data(File, [attributes]) of + {ok, {Module, [{attributes, Attrs}]}} -> + {vsn, Version} = lists:keyfind(vsn, 1, Attrs), + {ok, {Module, Version}}; + Error -> + Error + end. + +-spec md5(beam()) -> + {'ok', {module(), binary()}} | {'error', 'beam_lib', chnk_rsn()}. + +md5(File) -> + case catch read_significant_chunks(File) of + {ok, {Module, Chunks0}} -> + Chunks = filter_funtab(Chunks0), + {ok, {Module, erlang:md5([C || {_Id, C} <- Chunks])}}; + Error -> + Error + end. + +-spec format_error(term()) -> [char() | string()]. + +format_error({error, Error}) -> + format_error(Error); +format_error({error, Module, Error}) -> + Module:format_error(Error); +format_error({unknown_chunk, File, ChunkName}) -> + io_lib:format("~p: Cannot find chunk ~p~n", [File, ChunkName]); +format_error({invalid_chunk, File, ChunkId}) -> + io_lib:format("~p: Invalid contents of chunk ~p~n", [File, ChunkId]); +format_error({not_a_beam_file, File}) -> + io_lib:format("~p: Not a BEAM file~n", [File]); +format_error({file_error, File, Reason}) -> + io_lib:format("~p: ~p~n", [File, file:format_error(Reason)]); +format_error({missing_chunk, File, ChunkId}) -> + io_lib:format("~p: Not a BEAM file: no IFF \"~s\" chunk~n", + [File, ChunkId]); +format_error({invalid_beam_file, File, Pos}) -> + io_lib:format("~p: Invalid format of BEAM file near byte number ~p~n", + [File, Pos]); +format_error({chunk_too_big, File, ChunkId, Size, Len}) -> + io_lib:format("~p: Size of chunk \"~s\" is ~p bytes, " + "but only ~p bytes could be read~n", + [File, ChunkId, Size, Len]); +format_error({chunks_different, Id}) -> + io_lib:format("Chunk \"~s\" differs in the two files~n", [Id]); +format_error(different_chunks) -> + "The two files have different chunks\n"; +format_error({modules_different, Module1, Module2}) -> + io_lib:format("Module names ~p and ~p differ in the two files~n", + [Module1, Module2]); +format_error({not_a_directory, Name}) -> + io_lib:format("~p: Not a directory~n", [Name]); +format_error({key_missing_or_invalid, File, abstract_code}) -> + io_lib:format("~p: Cannot decrypt abstract code because key is missing or invalid", + [File]); +format_error(badfun) -> + "not a fun or the fun has the wrong arity"; +format_error(exists) -> + "a fun has already been installed"; +format_error(E) -> + io_lib:format("~p~n", [E]). + +%% +%% Exported functions for encrypted debug info. +%% + +-type mode() :: 'des3_cbc'. +-type crypto_fun_arg() :: 'init' + | 'clear' + | {'debug_info', mode(), module(), file:filename()}. +-type crypto_fun() :: fun((crypto_fun_arg()) -> term()). + +-spec crypto_key_fun(crypto_fun()) -> 'ok' | {'error', term()}. + +crypto_key_fun(F) -> + call_crypto_server({crypto_key_fun, F}). + +-spec clear_crypto_key_fun() -> 'undefined' | {'ok', term()}. + +clear_crypto_key_fun() -> + call_crypto_server(clear_crypto_key_fun). + +-spec make_crypto_key(mode(), string()) -> + {binary(), binary(), binary(), binary()}. + +make_crypto_key(des3_cbc, String) -> + <> = First = erlang:md5(String), + <> = erlang:md5([First|reverse(String)]), + {K1,K2,K3,IVec}. + +%% +%% Local functions +%% + +read_info(File) -> + try + {ok, Module, Data} = scan_beam(File, info), + [if + is_binary(File) -> {binary, File}; + true -> {file, File} + end, {module, Module}, {chunks, Data}] + catch Error -> Error end. + +diff_directories(Dir1, Dir2) -> + {OnlyDir1, OnlyDir2, Diff} = compare_dirs(Dir1, Dir2), + diff_only(Dir1, OnlyDir1), + diff_only(Dir2, OnlyDir2), + foreach(fun(D) -> io:format("** different: ~p~n", [D]) end, Diff), + ok. + +diff_only(_Dir, []) -> + ok; +diff_only(Dir, Only) -> + io:format("Only in ~p: ~p~n", [Dir, Only]). + +%% -> {OnlyInDir1, OnlyInDir2, Different} | throw(Error) +compare_dirs(Dir1, Dir2) -> + R1 = sofs:relation(beam_files(Dir1)), + R2 = sofs:relation(beam_files(Dir2)), + F1 = sofs:domain(R1), + F2 = sofs:domain(R2), + {O1, Both, O2} = sofs:symmetric_partition(F1, F2), + OnlyL1 = sofs:image(R1, O1), + OnlyL2 = sofs:image(R2, O2), + B1 = sofs:to_external(sofs:restriction(R1, Both)), + B2 = sofs:to_external(sofs:restriction(R2, Both)), + Diff = compare_files(B1, B2, []), + {sofs:to_external(OnlyL1), sofs:to_external(OnlyL2), Diff}. + +compare_files([], [], Acc) -> + lists:reverse(Acc); +compare_files([{_,F1} | R1], [{_,F2} | R2], Acc) -> + NAcc = case catch cmp_files(F1, F2) of + {error, _Mod, _Reason} -> + [{F1, F2} | Acc]; + ok -> + Acc + end, + compare_files(R1, R2, NAcc). + +beam_files(Dir) -> + ok = assert_directory(Dir), + L = filelib:wildcard(filename:join(Dir, "*.beam")), + [{filename:basename(Path), Path} || Path <- L]. + +%% -> ok | throw(Error) +cmp_files(File1, File2) -> + {ok, {M1, L1}} = read_significant_chunks(File1), + {ok, {M2, L2}} = read_significant_chunks(File2), + if + M1 =:= M2 -> + List1 = filter_funtab(L1), + List2 = filter_funtab(L2), + cmp_lists(List1, List2); + true -> + error({modules_different, M1, M2}) + end. + +cmp_lists([], []) -> + ok; +cmp_lists([{Id, C1} | R1], [{Id, C2} | R2]) -> + if + C1 =:= C2 -> + cmp_lists(R1, R2); + true -> + error({chunks_different, Id}) + end; +cmp_lists(_, _) -> + error(different_chunks). + +strip_rel(Root) -> + ok = assert_directory(Root), + strip_fils(filelib:wildcard(filename:join(Root, "lib/*/ebin/*.beam"))). + +%% -> {ok, [{Mod, BinaryOrFileName}]} | throw(Error) +strip_fils(Files) -> + {ok, [begin {ok, Reply} = strip_file(F), Reply end || F <- Files]}. + +%% -> {ok, {Mod, FileName}} | {ok, {Mod, binary()}} | throw(Error) +strip_file(File) -> + {ok, {Mod, Chunks}} = read_significant_chunks(File), + {ok, Stripped0} = build_module(Chunks), + Stripped = compress(Stripped0), + case File of + _ when is_binary(File) -> + {ok, {Mod, Stripped}}; + _ -> + FileName = beam_filename(File), + case file:open(FileName, [raw, binary, write]) of + {ok, Fd} -> + case file:write(Fd, Stripped) of + ok -> + ok = file:close(Fd), + {ok, {Mod, FileName}}; + Error -> + ok = file:close(Fd), + file_error(FileName, Error) + end; + Error -> + file_error(FileName, Error) + end + end. + +build_module(Chunks0) -> + Chunks = list_to_binary(build_chunks(Chunks0)), + Size = byte_size(Chunks), + 0 = Size rem 4, % Assertion: correct padding? + {ok, <<"FOR1", (Size+4):32, "BEAM", Chunks/binary>>}. + +build_chunks([{Id, Data} | Chunks]) -> + BId = list_to_binary(Id), + Size = byte_size(Data), + Chunk = [<>, Data | pad(Size)], + [Chunk | build_chunks(Chunks)]; +build_chunks([]) -> + []. + +pad(Size) -> + case Size rem 4 of + 0 -> []; + Rem -> lists:duplicate(4 - Rem, 0) + end. + +%% -> {ok, {Module, Chunks}} | throw(Error) +read_significant_chunks(File) -> + case read_chunk_data(File, significant_chunks(), [allow_missing_chunks]) of + {ok, {Module, Chunks0}} -> + Mandatory = mandatory_chunks(), + Chunks = filter_significant_chunks(Chunks0, Mandatory, File, Module), + {ok, {Module, Chunks}} + end. + +filter_significant_chunks([{_, Data}=Pair|Cs], Mandatory, File, Mod) + when is_binary(Data) -> + [Pair|filter_significant_chunks(Cs, Mandatory, File, Mod)]; +filter_significant_chunks([{Id, missing_chunk}|Cs], Mandatory, File, Mod) -> + case member(Id, Mandatory) of + false -> + filter_significant_chunks(Cs, Mandatory, File, Mod); + true -> + error({missing_chunk, File, Id}) + end; +filter_significant_chunks([], _, _, _) -> []. + +filter_funtab([{"FunT"=Tag, <>}|Cs]) -> + Data = filter_funtab_1(Data0, <<0:32>>), + Funtab = <>, + [{Tag, Funtab}|filter_funtab(Cs)]; +filter_funtab([H|T]) -> + [H|filter_funtab(T)]; +filter_funtab([]) -> []. + +filter_funtab_1(<>, Zero) -> + [Important,Zero|filter_funtab_1(T, Zero)]; +filter_funtab_1(Tail, _) when is_binary(Tail) -> [Tail]. + +read_all_chunks(File0) when is_atom(File0); + is_list(File0); + is_binary(File0) -> + try + File = beam_filename(File0), + {ok, Module, ChunkIds0} = scan_beam(File, info), + ChunkIds = [Name || {Name,_,_} <- ChunkIds0], + {ok, Module, Chunks} = scan_beam(File, ChunkIds), + {ok, Module, lists:reverse(Chunks)} + catch Error -> Error end. + +read_chunk_data(File0, ChunkNames) -> + try read_chunk_data(File0, ChunkNames, []) + catch Error -> Error end. + +%% -> {ok, {Module, Symbols}} | throw(Error) +read_chunk_data(File0, ChunkNames0, Options) + when is_atom(File0); is_list(File0); is_binary(File0) -> + File = beam_filename(File0), + {ChunkIds, Names} = check_chunks(ChunkNames0, File, [], []), + AllowMissingChunks = member(allow_missing_chunks, Options), + {ok, Module, Chunks} = scan_beam(File, ChunkIds, AllowMissingChunks), + AT = ets:new(beam_symbols, []), + T = {empty, AT}, + try chunks_to_data(Names, Chunks, File, Chunks, Module, T, []) + after ets:delete(AT) + end. + +%% -> {ok, list()} | throw(Error) +check_chunks([ChunkName | Ids], File, IL, L) when is_atom(ChunkName) -> + ChunkId = chunk_name_to_id(ChunkName, File), + check_chunks(Ids, File, [ChunkId | IL], [{ChunkId, ChunkName} | L]); +check_chunks([ChunkId | Ids], File, IL, L) -> % when is_list(ChunkId) + check_chunks(Ids, File, [ChunkId | IL], [{ChunkId, ChunkId} | L]); +check_chunks([], _File, IL, L) -> + {lists:usort(IL), reverse(L)}. + +%% -> {ok, Module, Data} | throw(Error) +scan_beam(File, What) -> + scan_beam(File, What, false). + +%% -> {ok, Module, Data} | throw(Error) +scan_beam(File, What0, AllowMissingChunks) -> + case scan_beam1(File, What0) of + {missing, _FD, Mod, Data, What} when AllowMissingChunks -> + {ok, Mod, [{Id, missing_chunk} || Id <- What] ++ Data}; + {missing, FD, _Mod, _Data, What} -> + error({missing_chunk, filename(FD), hd(What)}); + R -> + R + end. + +%% -> {ok, Module, Data} | throw(Error) +scan_beam1(File, What) -> + FD = open_file(File), + case catch scan_beam2(FD, What) of + Error when error =:= element(1, Error) -> + throw(Error); + R -> + R + end. + +scan_beam2(FD, What) -> + case pread(FD, 0, 12) of + {NFD, {ok, <<"FOR1", _Size:32, "BEAM">>}} -> + Start = 12, + scan_beam(NFD, Start, What, 17, []); + _Error -> + error({not_a_beam_file, filename(FD)}) + end. + +scan_beam(_FD, _Pos, [], Mod, Data) when Mod =/= 17 -> + {ok, Mod, Data}; +scan_beam(FD, Pos, What, Mod, Data) -> + case pread(FD, Pos, 8) of + {_NFD, eof} when Mod =:= 17 -> + error({missing_chunk, filename(FD), "Atom"}); + {_NFD, eof} when What =:= info -> + {ok, Mod, reverse(Data)}; + {NFD, eof} -> + {missing, NFD, Mod, Data, What}; + {NFD, {ok, <>}} -> + Id = binary_to_list(IdL), + Pos1 = Pos + 8, + Pos2 = (4 * trunc((Sz+3) / 4)) + Pos1, + get_data(What, Id, NFD, Sz, Pos1, Pos2, Mod, Data); + {_NFD, {ok, _ChunkHead}} -> + error({invalid_beam_file, filename(FD), Pos}) + end. + +get_data(Cs, "Atom"=Id, FD, Size, Pos, Pos2, _Mod, Data) -> + NewCs = del_chunk(Id, Cs), + {NFD, Chunk} = get_chunk(Id, Pos, Size, FD), + <<_Num:32, Chunk2/binary>> = Chunk, + {Module, _} = extract_atom(Chunk2), + C = case Cs of + info -> + {Id, Pos, Size}; + _ -> + {Id, Chunk} + end, + scan_beam(NFD, Pos2, NewCs, Module, [C | Data]); +get_data(info, Id, FD, Size, Pos, Pos2, Mod, Data) -> + scan_beam(FD, Pos2, info, Mod, [{Id, Pos, Size} | Data]); +get_data(Chunks, Id, FD, Size, Pos, Pos2, Mod, Data) -> + {NFD, NewData} = case member(Id, Chunks) of + true -> + {FD1, Chunk} = get_chunk(Id, Pos, Size, FD), + {FD1, [{Id, Chunk} | Data]}; + false -> + {FD, Data} + end, + NewChunks = del_chunk(Id, Chunks), + scan_beam(NFD, Pos2, NewChunks, Mod, NewData). + +del_chunk(_Id, info) -> + info; +del_chunk(Id, Chunks) -> + delete(Id, Chunks). + +%% -> {NFD, binary()} | throw(Error) +get_chunk(Id, Pos, Size, FD) -> + case pread(FD, Pos, Size) of + {NFD, eof} when Size =:= 0 -> % cannot happen + {NFD, <<>>}; + {_NFD, eof} when Size > 0 -> + error({chunk_too_big, filename(FD), Id, Size, 0}); + {_NFD, {ok, Chunk}} when Size > byte_size(Chunk) -> + error({chunk_too_big, filename(FD), Id, Size, byte_size(Chunk)}); + {NFD, {ok, Chunk}} -> % when Size =:= size(Chunk) + {NFD, Chunk} + end. + +chunks_to_data([{Id, Name} | CNs], Chunks, File, Cs, Module, Atoms, L) -> + {_Id, Chunk} = lists:keyfind(Id, 1, Chunks), + {NewAtoms, Ret} = chunk_to_data(Name, Chunk, File, Cs, Atoms, Module), + chunks_to_data(CNs, Chunks, File, Cs, Module, NewAtoms, [Ret | L]); +chunks_to_data([], _Chunks, _File, _Cs, Module, _Atoms, L) -> + {ok, {Module, reverse(L)}}. + +chunk_to_data(attributes=Id, Chunk, File, _Cs, AtomTable, _Mod) -> + try + Term = binary_to_term(Chunk), + {AtomTable, {Id, attributes(Term)}} + catch + error:badarg -> + error({invalid_chunk, File, chunk_name_to_id(Id, File)}) + end; +chunk_to_data(compile_info=Id, Chunk, File, _Cs, AtomTable, _Mod) -> + try + {AtomTable, {Id, binary_to_term(Chunk)}} + catch + error:badarg -> + error({invalid_chunk, File, chunk_name_to_id(Id, File)}) + end; +chunk_to_data(abstract_code=Id, Chunk, File, _Cs, AtomTable, Mod) -> + case Chunk of + <<>> -> + {AtomTable, {Id, no_abstract_code}}; + <<0:8,N:8,Mode0:N/binary,Rest/binary>> -> + Mode = list_to_atom(binary_to_list(Mode0)), + decrypt_abst(Mode, Mod, File, Id, AtomTable, Rest); + _ -> + case catch binary_to_term(Chunk) of + {'EXIT', _} -> + error({invalid_chunk, File, chunk_name_to_id(Id, File)}); + Term -> + {AtomTable, {Id, Term}} + end + end; +chunk_to_data(atoms=Id, _Chunk, _File, Cs, AtomTable0, _Mod) -> + AtomTable = ensure_atoms(AtomTable0, Cs), + Atoms = ets:tab2list(AtomTable), + {AtomTable, {Id, lists:sort(Atoms)}}; +chunk_to_data(ChunkName, Chunk, File, + Cs, AtomTable, _Mod) when is_atom(ChunkName) -> + case catch symbols(Chunk, AtomTable, Cs, ChunkName) of + {ok, NewAtomTable, S} -> + {NewAtomTable, {ChunkName, S}}; + {'EXIT', _} -> + error({invalid_chunk, File, chunk_name_to_id(ChunkName, File)}) + end; +chunk_to_data(ChunkId, Chunk, _File, + _Cs, AtomTable, _Module) when is_list(ChunkId) -> + {AtomTable, {ChunkId, Chunk}}. % Chunk is a binary + +chunk_name_to_id(atoms, _) -> "Atom"; +chunk_name_to_id(indexed_imports, _) -> "ImpT"; +chunk_name_to_id(imports, _) -> "ImpT"; +chunk_name_to_id(exports, _) -> "ExpT"; +chunk_name_to_id(labeled_exports, _) -> "ExpT"; +chunk_name_to_id(locals, _) -> "LocT"; +chunk_name_to_id(labeled_locals, _) -> "LocT"; +chunk_name_to_id(attributes, _) -> "Attr"; +chunk_name_to_id(abstract_code, _) -> "Abst"; +chunk_name_to_id(compile_info, _) -> "CInf"; +chunk_name_to_id(Other, File) -> + error({unknown_chunk, File, Other}). + +%% Extract attributes + +attributes(Attrs) -> + attributes(keysort(1, Attrs), []). + +attributes([], R) -> + reverse(R); +attributes(L, R) -> + K = element(1, hd(L)), + {L1, L2} = splitwith(fun(T) -> element(1, T) =:= K end, L), + V = append([A || {_, A} <- L1]), + attributes(L2, [{K, V} | R]). + +%% Extract symbols + +symbols(<<_Num:32, B/binary>>, AT0, Cs, Name) -> + AT = ensure_atoms(AT0, Cs), + symbols1(B, AT, Name, [], 1). + +symbols1(<>, AT, Name, S, Cnt) -> + Symbol = symbol(Name, AT, I1, I2, I3, Cnt), + symbols1(B, AT, Name, [Symbol|S], Cnt+1); +symbols1(<<>>, AT, _Name, S, _Cnt) -> + {ok, AT, sort(S)}. + +symbol(indexed_imports, AT, I1, I2, I3, Cnt) -> + {Cnt, atm(AT, I1), atm(AT, I2), I3}; +symbol(imports, AT, I1, I2, I3, _Cnt) -> + {atm(AT, I1), atm(AT, I2), I3}; +symbol(labeled_exports, AT, I1, I2, I3, _Cnt) -> + {atm(AT, I1), I2, I3}; +symbol(labeled_locals, AT, I1, I2, I3, _Cnt) -> + {atm(AT, I1), I2, I3}; +symbol(_, AT, I1, I2, _I3, _Cnt) -> + {atm(AT, I1), I2}. + +atm(AT, N) -> + [{_N, S}] = ets:lookup(AT, N), + S. + +%% AT is updated. +ensure_atoms({empty, AT}, Cs) -> + {_Id, AtomChunk} = lists:keyfind("Atom", 1, Cs), + extract_atoms(AtomChunk, AT), + AT; +ensure_atoms(AT, _Cs) -> + AT. + +extract_atoms(<<_Num:32, B/binary>>, AT) -> + extract_atoms(B, 1, AT). + +extract_atoms(<<>>, _I, _AT) -> + true; +extract_atoms(B, I, AT) -> + {Atom, B1} = extract_atom(B), + true = ets:insert(AT, {I, Atom}), + extract_atoms(B1, I+1, AT). + +extract_atom(<>) -> + <> = B, + {list_to_atom(binary_to_list(SB)), Tail}. + +%%% Utils. + +-record(bb, {pos = 0 :: integer(), + bin :: binary(), + source :: binary() | string()}). + +open_file(<<"FOR1",_/binary>>=Binary) -> + #bb{bin = Binary, source = Binary}; +open_file(Binary0) when is_binary(Binary0) -> + Binary = uncompress(Binary0), + #bb{bin = Binary, source = Binary}; +open_file(FileName) -> + case file:open(FileName, [read, raw, binary]) of + {ok, Fd} -> + read_all(Fd, FileName, []); + Error -> + file_error(FileName, Error) + end. + +read_all(Fd, FileName, Bins) -> + case file:read(Fd, 1 bsl 18) of + {ok, Bin} -> + read_all(Fd, FileName, [Bin | Bins]); + eof -> + ok = file:close(Fd), + #bb{bin = uncompress(reverse(Bins)), source = FileName}; + Error -> + ok = file:close(Fd), + file_error(FileName, Error) + end. + +pread(FD, AtPos, Size) -> + #bb{pos = Pos, bin = Binary} = FD, + Skip = AtPos-Pos, + case Binary of + <<_:Skip/binary, B:Size/binary, Bin/binary>> -> + NFD = FD#bb{pos = AtPos+Size, bin = Bin}, + {NFD, {ok, B}}; + <<_:Skip/binary, Bin/binary>> when byte_size(Bin) > 0 -> + NFD = FD#bb{pos = AtPos+byte_size(Bin), bin = <<>>}, + {NFD, {ok, Bin}}; + _ -> + {FD, eof} + end. + +filename(BB) when is_binary(BB#bb.source) -> + BB#bb.source; +filename(BB) -> + list_to_atom(BB#bb.source). + +beam_filename(Bin) when is_binary(Bin) -> + Bin; +beam_filename(File) -> + filename:rootname(File, ".beam") ++ ".beam". + + +uncompress(Binary0) -> + {ok, Fd} = ram_file:open(Binary0, [write, binary]), + {ok, _} = ram_file:uncompress(Fd), + {ok, Binary} = ram_file:get_file(Fd), + ok = ram_file:close(Fd), + Binary. + +compress(Binary0) -> + {ok, Fd} = ram_file:open(Binary0, [write, binary]), + {ok, _} = ram_file:compress(Fd), + {ok, Binary} = ram_file:get_file(Fd), + ok = ram_file:close(Fd), + Binary. + +%% -> ok | throw(Error) +assert_directory(FileName) -> + case filelib:is_dir(FileName) of + true -> + ok; + false -> + error({not_a_directory, FileName}) + end. + +-spec file_error(file:filename(), {'error',atom()}) -> no_return(). + +file_error(FileName, {error, Reason}) -> + error({file_error, FileName, Reason}). + +-spec error(term()) -> no_return(). + +error(Reason) -> + throw({error, ?MODULE, Reason}). + + +%% The following chunks are significant when calculating the MD5 for a module, +%% and also the modules that must be retained when stripping a file. +%% They are listed in the order that they should be MD5:ed. + +significant_chunks() -> + ["Atom", "Code", "StrT", "ImpT", "ExpT", "FunT", "LitT"]. + +%% The following chunks are mandatory in every Beam file. + +mandatory_chunks() -> + ["Code", "ExpT", "ImpT", "StrT", "Atom"]. + +%%% ==================================================================== +%%% The rest of the file handles encrypted debug info. +%%% +%%% Encrypting the debug info is only useful if you want to +%%% have the debug info available all the time (maybe even in a live +%%% system), but don't want to risk that anyone else but yourself +%%% can use it. +%%% ==================================================================== + +-record(state, {crypto_key_f :: crypto_fun()}). + +-define(CRYPTO_KEY_SERVER, beam_lib__crypto_key_server). + +decrypt_abst(Mode, Module, File, Id, AtomTable, Bin) -> + try + KeyString = get_crypto_key({debug_info, Mode, Module, File}), + Key = make_crypto_key(des3_cbc, KeyString), + Term = decrypt_abst_1(Mode, Key, Bin), + {AtomTable, {Id, Term}} + catch + _:_ -> + error({key_missing_or_invalid, File, Id}) + end. + +decrypt_abst_1(des3_cbc, {K1, K2, K3, IVec}, Bin) -> + ok = start_crypto(), + NewBin = crypto:des3_cbc_decrypt(K1, K2, K3, IVec, Bin), + binary_to_term(NewBin). + +start_crypto() -> + case crypto:start() of + {error, {already_started, _}} -> + ok; + ok -> + ok + end. + +get_crypto_key(What) -> + call_crypto_server({get_crypto_key, What}). + +call_crypto_server(Req) -> + try + gen_server:call(?CRYPTO_KEY_SERVER, Req, infinity) + catch + exit:{noproc,_} -> + start_crypto_server(), + erlang:yield(), + call_crypto_server(Req) + end. + +start_crypto_server() -> + gen_server:start({local,?CRYPTO_KEY_SERVER}, ?MODULE, [], []). + +-spec init([]) -> {'ok', #state{}}. + +init([]) -> + {ok, #state{}}. + +-type calls() :: 'clear_crypto_key_fun' + | {'crypto_key_fun', _} + | {'get_crypto_key', _}. + +-spec handle_call(calls(), {pid(), term()}, #state{}) -> + {'noreply', #state{}} | + {'reply', 'error' | {'error','badfun' | 'exists'}, #state{}} | + {'stop', 'normal', 'undefined' | {'ok', term()}, #state{}}. + +handle_call({get_crypto_key, _}=R, From, #state{crypto_key_f=undefined}=S) -> + case crypto_key_fun_from_file() of + error -> + {reply, error, S}; + F when is_function(F) -> + %% The init function for the fun has already been called. + handle_call(R, From, S#state{crypto_key_f=F}) + end; +handle_call({get_crypto_key, What}, From, #state{crypto_key_f=F}=S) -> + try + Result = F(What), + %% The result may hold information that we don't want + %% lying around. Reply first, then GC, then noreply. + gen_server:reply(From, Result), + erlang:garbage_collect(), + {noreply, S} + catch + _:_ -> + {reply, error, S} + end; +handle_call({crypto_key_fun, F}, {_,_} = From, S) -> + case S#state.crypto_key_f of + undefined -> + %% Don't allow tuple funs here. (They weren't allowed before, + %% so there is no reason to allow them now.) + if is_function(F), is_function(F, 1) -> + {Result, Fun, Reply} = + case catch F(init) of + ok -> + {true, F, ok}; + {ok, F1} when is_function(F1) -> + if + is_function(F1, 1) -> + {true, F1, ok}; + true -> + {false, undefined, + {error, badfun}} + end; + {error, Reason} -> + {false, undefined, {error, Reason}}; + {'EXIT', Reason} -> + {false, undefined, {error, Reason}} + end, + gen_server:reply(From, Reply), + erlang:garbage_collect(), + NewS = case Result of + true -> + S#state{crypto_key_f = Fun}; + false -> + S + end, + {noreply, NewS}; + true -> + {reply, {error, badfun}, S} + end; + OtherF when is_function(OtherF) -> + {reply, {error, exists}, S} + end; +handle_call(clear_crypto_key_fun, _From, S) -> + case S#state.crypto_key_f of + undefined -> + {stop,normal,undefined,S}; + F -> + Result = (catch F(clear)), + {stop,normal,{ok,Result},S} + end. + +-spec handle_cast(term(), #state{}) -> {'noreply', #state{}}. + +handle_cast(_, State) -> + {noreply, State}. + +-spec handle_info(term(), #state{}) -> {'noreply', #state{}}. + +handle_info(_, State) -> + {noreply, State}. + +-spec code_change(term(), #state{}, term()) -> {'ok', #state{}}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +-spec terminate(term(), #state{}) -> 'ok'. + +terminate(_Reason, _State) -> + ok. + +crypto_key_fun_from_file() -> + case init:get_argument(home) of + {ok,[[Home]]} -> + crypto_key_fun_from_file_1([".",Home]); + _ -> + crypto_key_fun_from_file_1(["."]) + end. + +crypto_key_fun_from_file_1(Path) -> + case f_p_s(Path, ".erlang.crypt") of + {ok, KeyInfo, _} -> + try_load_crypto_fun(KeyInfo); + _ -> + error + end. + +f_p_s(P, F) -> + case file:path_script(P, F) of + {error, enoent} -> + {error, enoent}; + {error, {Line, _Mod, _Term}=E} -> + error("file:path_script(~p,~p): error on line ~p: ~s~n", + [P, F, Line, file:format_error(E)]), + ok; + {error, E} when is_atom(E) -> + error("file:path_script(~p,~p): ~s~n", + [P, F, file:format_error(E)]), + ok; + Other -> + Other + end. + +try_load_crypto_fun(KeyInfo) when is_list(KeyInfo) -> + T = ets:new(keys, [private, set]), + foreach( + fun({debug_info, Mode, M, Key}) when is_atom(M) -> + ets:insert(T, {{debug_info,Mode,M,[]}, Key}); + ({debug_info, Mode, [], Key}) -> + ets:insert(T, {{debug_info, Mode, [], []}, Key}); + (Other) -> + error("unknown key: ~p~n", [Other]) + end, KeyInfo), + fun({debug_info, Mode, M, F}) -> + alt_lookup_key( + [{debug_info,Mode,M,F}, + {debug_info,Mode,M,[]}, + {debug_info,Mode,[],[]}], T); + (clear) -> + ets:delete(T); + (_) -> + error + end; +try_load_crypto_fun(KeyInfo) -> + error("unrecognized crypto key info: ~p\n", [KeyInfo]). + +alt_lookup_key([H|T], Tab) -> + case ets:lookup(Tab, H) of + [] -> + alt_lookup_key(T, Tab); + [{_, Val}] -> + Val + end; +alt_lookup_key([], _) -> + error. + +error(Fmt, Args) -> + error_logger:error_msg(Fmt, Args), + error. diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl new file mode 100644 index 0000000000..9e4cec5db2 --- /dev/null +++ b/lib/stdlib/src/c.erl @@ -0,0 +1,700 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(c). + +%% Utilities to use from shell. + +-export([help/0,lc/1,c/1,c/2,nc/1,nc/2, nl/1,l/1,i/0,i/1,ni/0, + y/1, y/2, + lc_batch/0, lc_batch/1, + i/3,pid/3,m/0,m/1, + bt/1, q/0, + erlangrc/0,erlangrc/1,bi/1, flush/0, regs/0, + nregs/0,pwd/0,ls/0,ls/1,cd/1,memory/1,memory/0, xm/1]). + +-export([display_info/1]). +-export([appcall/4]). + +-import(lists, [reverse/1,flatten/1,sublist/3,sort/1,keysearch/3,keysort/2, + concat/1,max/1,min/1,foreach/2,foldl/3,flatmap/2]). +-import(io, [format/1, format/2]). + +help() -> + format("bt(Pid) -- stack backtrace for a process\n" + "c(File) -- compile and load code in \n" + "cd(Dir) -- change working directory\n" + "flush() -- flush any messages sent to the shell\n" + "help() -- help info\n" + "i() -- information about the system\n" + "ni() -- information about the networked system\n" + "i(X,Y,Z) -- information about pid \n" + "l(Module) -- load or reload module\n" + "lc([File]) -- compile a list of Erlang modules\n" + "ls() -- list files in the current directory\n" + "ls(Dir) -- list files in directory \n" + "m() -- which modules are loaded\n" + "m(Mod) -- information about module \n" + "memory() -- memory allocation information\n" + "memory(T) -- memory allocation information of type \n" + "nc(File) -- compile and load code in on all nodes\n" + "nl(Module) -- load module on all nodes\n" + "pid(X,Y,Z) -- convert X,Y,Z to a Pid\n" + "pwd() -- print working directory\n" + "q() -- quit - shorthand for init:stop()\n" + "regs() -- information about registered processes\n" + "nregs() -- information about all registered processes\n" + "xm(M) -- cross reference check a module\n" + "y(File) -- generate a Yecc parser\n"). + +%% c(FileName) +%% Compile a file/module. + +c(File) -> c(File, []). + +c(File, Opts0) when is_list(Opts0) -> + Opts = [report_errors,report_warnings|Opts0], + case compile:file(File, Opts) of + {ok,Mod} -> %Listing file. + machine_load(Mod, File, Opts); + {ok,Mod,_Ws} -> %Warnings maybe turned on. + machine_load(Mod, File, Opts); + Other -> %Errors go here + Other + end; +c(File, Opt) -> + c(File, [Opt]). + +%%% Obtain the 'outdir' option from the argument. Return "." if no +%%% such option was given. +outdir([]) -> + "."; +outdir([Opt|Rest]) -> + case Opt of + {outdir, D} -> + D; + _ -> + outdir(Rest) + end. + +%%% We have compiled File with options Opts. Find out where the +%%% output file went to, and load it. +machine_load(Mod, File, Opts) -> + Dir = outdir(Opts), + File2 = filename:join(Dir, filename:basename(File, ".erl")), + case compile:output_generated(Opts) of + true -> + Base = packages:last(Mod), + case filename:basename(File, ".erl") of + Base -> + code:purge(Mod), + check_load(code:load_abs(File2,Mod), Mod); + _OtherMod -> + format("** Module name '~p' does not match file name '~p' **~n", + [Mod,File]), + {error, badfile} + end; + false -> + format("** Warning: No object file created - nothing loaded **~n", []), + ok + end. + +%%% This function previously warned if the loaded module was +%%% loaded from some other place than current directory. +%%% Now, loading from other than current directory is supposed to work. +%%% so this function does nothing special. +check_load({error, R}, _) -> {error, R}; +check_load(_, X) -> {ok, X}. + +%% Compile a list of modules +%% enables the nice unix shell cmd +%% erl -s c lc f1 f2 f3 @d c1=v1 @c2 @i IDir @o ODir -s erlang halt +%% to compile files f1.erl , f2.erl ....... from a unix shell +%% with constant c2 defined, c1=v1 (v1 must be a term!), include dir +%% IDir, outdir ODir. + +lc(Args) -> + case catch split(Args, [], []) of + error -> error; + {Opts, Files} -> + COpts = [report_errors, report_warnings | reverse(Opts)], + foreach(fun(File) -> compile:file(File, COpts) end, reverse(Files)) + end. + +%%% lc_batch/1 works like lc/1, but halts afterwards, with appropriate +%%% exit code. This is meant to be called by "erl -compile". + +-spec lc_batch() -> no_return(). + +lc_batch() -> + io:format("Error: no files to compile~n"), + halt(1). + +-spec lc_batch([_]) -> no_return(). + +lc_batch(Args) -> + try split(Args, [], []) of + {Opts, Files} -> + COpts = [report_errors, report_warnings | reverse(Opts)], + Res = [compile:file(File, COpts) || File <- reverse(Files)], + case lists:member(error, Res) of + true -> + halt(1); + false -> + halt(0) + end + catch + throw:error -> halt(1) + end. + +split(['@i', Dir | T], Opts, Files) -> + split(T, [{i, atom_to_list(Dir)} | Opts], Files); +split(['@o', Dir | T], Opts, Files) -> + split(T, [{outdir, atom_to_list(Dir)} | Opts], Files); +split(['@d', Def | T], Opts, Files) -> + split(T, [split_def(atom_to_list(Def), []) | Opts], Files); +split([File | T], Opts, Files) -> + split(T, Opts, [File | Files]); +split([], Opts, Files) -> + {Opts, Files}. + +split_def([$= | T], Res) -> {d, list_to_atom(reverse(Res)),make_term(T)}; +split_def([H | T], Res) -> split_def(T, [H | Res]); +split_def([], Res) -> {d, list_to_atom(reverse(Res))}. + +make_term(Str) -> + case erl_scan:string(Str) of + {ok, Tokens, _} -> + case erl_parse:parse_term(Tokens ++ [{dot, 1}]) of + {ok, Term} -> Term; + {error, {_,_,Reason}} -> + io:format("~s: ~s~n", [Reason, Str]), + throw(error) + end; + {error, {_,_,Reason}, _} -> + io:format("~s: ~s~n", [Reason, Str]), + throw(error) + end. + +nc(File) -> nc(File, []). + +nc(File, Opts0) when is_list(Opts0) -> + Opts = Opts0 ++ [report_errors, report_warnings], + case compile:file(File, Opts) of + {ok,Mod} -> + Fname = concat([File, code:objfile_extension()]), + case file:read_file(Fname) of + {ok,Bin} -> + rpc:eval_everywhere(code,load_binary,[Mod,Fname,Bin]), + {ok,Mod}; + Other -> + Other + end; + Other -> %Errors go here + Other + end; +nc(File, Opt) when is_atom(Opt) -> + nc(File, [Opt]). + +%% l(Mod) +%% Reload module Mod from file of same name + +l(Mod) -> + code:purge(Mod), + code:load_file(Mod). + +%% Network version of l/1 +nl(Mod) -> + case code:get_object_code(Mod) of + {_Module, Bin, Fname} -> + rpc:eval_everywhere(code,load_binary,[Mod,Fname,Bin]); + Other -> + Other + end. + +i() -> i(processes()). +ni() -> i(all_procs()). + +i(Ps) -> + i(Ps, length(Ps)). + +i(Ps, N) when N =< 100 -> + iformat("Pid", "Initial Call", "Heap", "Reds", + "Msgs"), + iformat("Registered", "Current Function", "Stack", "", + ""), + {R,M,H,S} = foldl(fun(Pid, {R0,M0,H0,S0}) -> + {A,B,C,D} = display_info(Pid), + {R0+A,M0+B,H0+C,S0+D} + end, {0,0,0,0}, Ps), + iformat("Total", "", w(H), w(R), w(M)), + iformat("", "", w(S), "", ""); +i(Ps, N) -> + iformat("Pid", "Initial Call", "Heap", "Reds", + "Msgs"), + iformat("Registered", "Current Function", "Stack", "", + ""), + paged_i(Ps, {0,0,0,0}, N, 50). + +paged_i([], {R,M,H,S}, _, _) -> + iformat("Total", "", w(H), w(R), w(M)), + iformat("", "", w(S), "", ""); +paged_i(Ps, Acc, N, Page) -> + {Pids, Rest, N1} = + if N > Page -> + {L1,L2} = lists:split(Page, Ps), + {L1,L2,N-Page}; + true -> + {Ps, [], 0} + end, + NewAcc = foldl(fun(Pid, {R,M,H,S}) -> + {A,B,C,D} = display_info(Pid), + {R+A,M+B,H+C,S+D} + end, Acc, Pids), + case Rest of + [_|_] -> + choice(fun() -> paged_i(Rest, NewAcc, N1, Page) end); + [] -> + paged_i([], NewAcc, 0, Page) + end. + + +choice(F) -> + case get_line('(c)ontinue (q)uit -->', "c\n") of + "c\n" -> + F(); + "q\n" -> + quit; + _ -> + choice(F) + end. + + +get_line(P, Default) -> + case io:get_line(P) of + "\n" -> + Default; + L -> + L + end. + +mfa_string(Fun) when is_function(Fun) -> + {module,M} = erlang:fun_info(Fun, module), + {name,F} = erlang:fun_info(Fun, name), + {arity,A} = erlang:fun_info(Fun, arity), + mfa_string({M,F,A}); +mfa_string({M,F,A}) -> + io_lib:format("~w:~w/~w", [M,F,A]); +mfa_string(X) -> + w(X). + + +display_info(Pid) -> + case pinfo(Pid) of + undefined -> {0,0,0,0}; + Info -> + Call = initial_call(Info), + Curr = case fetch(current_function, Info) of + {Mod,F,Args} when is_list(Args) -> + {Mod,F,length(Args)}; + Other -> + Other + end, + Reds = fetch(reductions, Info), + LM = length(fetch(messages, Info)), + HS = fetch(heap_size, Info), + SS = fetch(stack_size, Info), + iformat(w(Pid), mfa_string(Call), + w(HS), + w(Reds), w(LM)), + iformat(case fetch(registered_name, Info) of + 0 -> ""; + X -> w(X) + end, + mfa_string(Curr), + w(SS), + "", + ""), + {Reds, LM, HS, SS} + end. + +%% We have to do some assumptions about the initial call. +%% If the initial call is proc_lib:init_p/3,5 we can find more information +%% calling the function proc_lib:initial_call/1. + +initial_call(Info) -> + case fetch(initial_call, Info) of + {proc_lib, init_p, _} -> + proc_lib:translate_initial_call(Info); + ICall -> + ICall + end. + +iformat(A1, A2, A3, A4, A5) -> + format("~-21s ~-33s ~8s ~8s ~4s~n", [A1,A2,A3,A4,A5]). + +all_procs() -> + case is_alive() of + true -> flatmap(fun (N) -> rpc:call(N,erlang,processes,[]) end, + [node()|nodes()]); + false -> processes() + end. + +pinfo(Pid) -> + case is_alive() of + true -> rpc:call(node(Pid), erlang, process_info, [Pid]); + false -> process_info(Pid) + end. + +fetch(Key, Info) -> + case keysearch(Key, 1, Info) of + {value, {_, Val}} -> Val; + false -> 0 + end. + +pid(X,Y,Z) -> + list_to_pid("<" ++ integer_to_list(X) ++ "." ++ + integer_to_list(Y) ++ "." ++ + integer_to_list(Z) ++ ">"). + +i(X,Y,Z) -> pinfo(pid(X,Y,Z)). + +q() -> + init:stop(). + +bt(Pid) -> + case catch erlang:process_display(Pid, backtrace) of + {'EXIT', _} -> + undefined; + _ -> + ok + end. + +m() -> + mformat("Module", "File"), + foreach(fun ({Mod,File}) -> mformat(Mod, File) end, sort(code:all_loaded())). + +mformat(A1, A2) -> + format("~-20s ~s\n", [A1,A2]). + +%% erlangrc(Home) +%% Try to run a ".erlang" file, first in the current directory +%% else in home directory. + +erlangrc() -> + case init:get_argument(home) of + {ok,[[Home]]} -> + erlangrc([Home]); + _ -> + f_p_e(["."], ".erlang") + end. + +erlangrc([Home]) -> + f_p_e([".",Home], ".erlang"). + +error(Fmt, Args) -> + error_logger:error_msg(Fmt, Args). + +f_p_e(P, F) -> + case file:path_eval(P, F) of + {error, enoent} -> + {error, enoent}; + {error, E={Line, _Mod, _Term}} -> + error("file:path_eval(~p,~p): error on line ~p: ~s~n", + [P, F, Line, file:format_error(E)]), + ok; + {error, E} -> + error("file:path_eval(~p,~p): ~s~n", + [P, F, file:format_error(E)]), + ok; + Other -> + Other + end. + +bi(I) -> + case erlang:system_info(I) of + X when is_binary(X) -> io:put_chars(binary_to_list(X)); + X when is_list(X) -> io:put_chars(X); + X -> format("~w", [X]) + end. + +%% +%% Short and nice form of module info +%% + +m(M) -> + L = M:module_info(), + {value,{exports,E}} = keysearch(exports, 1, L), + Time = get_compile_time(L), + COpts = get_compile_options(L), + format("Module ~w compiled: ",[M]), print_time(Time), + format("Compiler options: ~p~n", [COpts]), + print_object_file(M), + format("Exports: ~n",[]), print_exports(keysort(1, E)). + +print_object_file(Mod) -> + case code:is_loaded(Mod) of + {file,File} -> + format("Object file: ~s\n", [File]); + _ -> + ignore + end. + +get_compile_time(L) -> + case get_compile_info(L, time) of + {ok,Val} -> Val; + error -> notime + end. + +get_compile_options(L) -> + case get_compile_info(L, options) of + {ok,Val} -> Val; + error -> [] + end. + +get_compile_info(L, Tag) -> + case keysearch(compile, 1, L) of + {value, {compile, I}} -> + case keysearch(Tag, 1, I) of + {value, {Tag, Val}} -> {ok,Val}; + false -> error + end; + false -> error + end. + +print_exports(X) when length(X) > 16 -> + split_print_exports(X); +print_exports([]) -> ok; +print_exports([{F, A} |Tail]) -> + format(" ~w/~w~n",[F, A]), + print_exports(Tail). + +split_print_exports(L) -> + Len = length(L), + Mid = Len div 2, + L1 = sublist(L, 1, Mid), + L2 = sublist(L, Mid +1, Len - Mid + 1), + split_print_exports(L1, L2). + +split_print_exports([], [{F, A}|T]) -> + Str = " ", + format("~-30s~w/~w~n", [Str, F, A]), + split_print_exports([], T); +split_print_exports([{F1, A1}|T1], [{F2, A2} | T2]) -> + Str = flatten(io_lib:format("~w/~w", [F1, A1])), + format("~-30s~w/~w~n", [Str, F2, A2]), + split_print_exports(T1, T2); +split_print_exports([], []) -> ok. + +print_time({Year,Month,Day,Hour,Min,_Secs}) -> + format("Date: ~s ~w ~w, ", [month(Month),Day,Year]), + format("Time: ~.2.0w.~.2.0w~n", [Hour,Min]); +print_time(notime) -> + format("No compile time info available~n",[]). + +month(1) -> "January"; +month(2) -> "February"; +month(3) -> "March"; +month(4) -> "April"; +month(5) -> "May"; +month(6) -> "June"; +month(7) -> "July"; +month(8) -> "August"; +month(9) -> "September"; +month(10) -> "October"; +month(11) -> "November"; +month(12) -> "December". + +%% Just because we can't eval receive statements... +flush() -> + receive + X -> + format("Shell got ~p~n",[X]), + flush() + after 0 -> + ok + end. + +%% Print formatted info about all registered names in the system +nregs() -> + foreach(fun (N) -> print_node_regs(N) end, all_regs()). + +regs() -> + print_node_regs({node(),registered()}). + +all_regs() -> + case is_alive() of + true -> [{N,rpc:call(N, erlang, registered, [])} || + N <- [node()|nodes()]]; + false -> [{node(),registered()}] + end. + +print_node_regs({N, List}) when is_list(List) -> + {Pids,Ports,_Dead} = pids_and_ports(N, sort(List), [], [], []), + %% print process info + format("~n** Registered procs on node ~w **~n",[N]), + procformat("Name", "Pid", "Initial Call", "Reds", "Msgs"), + foreach(fun({Name,PI,Pid}) -> procline(Name, PI, Pid) end, Pids), + %% print port info + format("~n** Registered ports on node ~w **~n",[N]), + portformat("Name", "Id", "Command"), + foreach(fun({Name,PI,Id}) -> portline(Name, PI, Id) end, Ports). + +pids_and_ports(_, [], Pids, Ports, Dead) -> + {reverse(Pids),reverse(Ports),reverse(Dead)}; + +pids_and_ports(Node, [Name|Names], Pids, Ports, Dead) -> + case pwhereis(Node, Name) of + Pid when is_pid(Pid) -> + pids_and_ports(Node, Names, [{Name,pinfo(Pid),Pid}|Pids], + Ports, Dead); + Id when is_port(Id) -> + pids_and_ports(Node, Names, Pids, + [{Name,portinfo(Id),Id}|Ports], Dead); + undefined -> + pids_and_ports(Node, Names, Pids, Ports, [Name|Dead]) + end. + +pwhereis(Node, Name) -> + case is_alive() of + true -> rpc:call(Node, erlang, whereis, [Name]); + false -> whereis(Name) + end. + +portinfo(Id) -> + case is_alive() of + true -> [ rpc:call(node(Id), erlang, port_info, [Id,name]) ]; + false -> [ erlang:port_info(Id, name) ] + end. + +procline(Name, Info, Pid) -> + Call = initial_call(Info), + Reds = fetch(reductions, Info), + LM = length(fetch(messages, Info)), + procformat(io_lib:format("~w",[Name]), + io_lib:format("~w",[Pid]), + io_lib:format("~s",[mfa_string(Call)]), + integer_to_list(Reds), integer_to_list(LM)). + +procformat(Name, Pid, Call, Reds, LM) -> + format("~-21s ~-12s ~-25s ~12s ~4s~n", [Name,Pid,Call,Reds,LM]). + +portline(Name, Info, Id) -> + Cmd = fetch(name, Info), + portformat(io_lib:format("~w",[Name]), + erlang:port_to_list(Id), + Cmd). + +portformat(Name, Id, Cmd) -> + format("~-21s ~-15s ~-40s~n", [Name,Id,Cmd]). + +%% pwd() +%% cd(Directory) +%% These are just wrappers around the file:get/set_cwd functions. + +pwd() -> + case file:get_cwd() of + {ok, Str} -> + ok = io:format("~s\n", [Str]); + {error, _} -> + ok = io:format("Cannot determine current directory\n") + end. + +cd(Dir) -> + file:set_cwd(Dir), + pwd(). + +%% ls() +%% ls(Directory) +%% The strategy is to print in fixed width files. + +ls() -> + ls("."). + +ls(Dir) -> + case file:list_dir(Dir) of + {ok, Entries} -> + ls_print(sort(Entries)); + {error,_E} -> + format("Invalid directory\n") + end. + +ls_print([]) -> ok; +ls_print(L) -> + Width = min([max(lengths(L, [])), 40]) + 5, + ls_print(L, Width, 0). + +ls_print(X, Width, Len) when Width + Len >= 80 -> + io:nl(), + ls_print(X, Width, 0); +ls_print([H|T], Width, Len) -> + io:format("~-*s",[Width,H]), + ls_print(T, Width, Len+Width); +ls_print([], _, _) -> + io:nl(). + +lengths([H|T], L) -> lengths(T, [length(H)|L]); +lengths([], L) -> L. + +w(X) -> + io_lib:write(X). + +%% +%% memory/[0,1] +%% + +memory() -> erlang:memory(). +memory(TypeSpec) -> erlang:memory(TypeSpec). + +%% +%% Cross Reference Check +%% + +xm(M) -> + appcall(tools, xref, m, [M]). + +%% +%% Call yecc +%% + +y(File) -> y(File, []). + +y(File, Opts) -> + appcall(parsetools, yecc, file, [File,Opts]). + + +%% +%% Avoid creating strong components in xref and dialyzer by making calls +%% from helper functions to other applications indirect. +%% + +appcall(App, M, F, Args) -> + try + apply(M, F, Args) + catch + error:undef -> + case erlang:get_stacktrace() of + [{M,F,Args}|_] -> + Arity = length(Args), + io:format("Call to ~w:~w/~w in application ~w failed.\n", + [M,F,Arity,App]); + Stk -> + erlang:raise(error, undef, Stk) + end + end. + diff --git a/lib/stdlib/src/calendar.erl b/lib/stdlib/src/calendar.erl new file mode 100644 index 0000000000..ddc0666f77 --- /dev/null +++ b/lib/stdlib/src/calendar.erl @@ -0,0 +1,459 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(calendar). + +%% local and universal time, time conversions + +-export([date_to_gregorian_days/1, + date_to_gregorian_days/3, + datetime_to_gregorian_seconds/1, + day_of_the_week/1, + day_of_the_week/3, + gregorian_days_to_date/1, + gregorian_seconds_to_datetime/1, + is_leap_year/1, + last_day_of_the_month/2, + local_time/0, + local_time_to_universal_time/1, + local_time_to_universal_time/2, + local_time_to_universal_time_dst/1, + now_to_datetime/1, % = now_to_universal_time/1 + now_to_local_time/1, + now_to_universal_time/1, + seconds_to_daystime/1, + seconds_to_time/1, + time_difference/2, + time_to_seconds/1, + universal_time/0, + universal_time_to_local_time/1, + valid_date/1, + valid_date/3]). + +-deprecated([{local_time_to_universal_time,1}]). + +-define(SECONDS_PER_MINUTE, 60). +-define(SECONDS_PER_HOUR, 3600). +-define(SECONDS_PER_DAY, 86400). +-define(DAYS_PER_YEAR, 365). +-define(DAYS_PER_LEAP_YEAR, 366). +-define(DAYS_PER_4YEARS, 1461). +-define(DAYS_PER_100YEARS, 36524). +-define(DAYS_PER_400YEARS, 146097). +-define(DAYS_FROM_0_TO_1970, 719528). + +%%---------------------------------------------------------------------- +%% Types +%%---------------------------------------------------------------------- + +-type year() :: non_neg_integer(). +-type year1970() :: 1970..10000. % should probably be 1970.. +-type month() :: 1..12. +-type day() :: 1..31. +-type hour() :: 0..23. +-type minute() :: 0..59. +-type second() :: 0..59. +-type daynum() :: 1..7. +-type ldom() :: 28 | 29 | 30 | 31. % last day of month + +-type t_now() :: {non_neg_integer(),non_neg_integer(),non_neg_integer()}. + +-type t_date() :: {year(),month(),day()}. +-type t_time() :: {hour(),minute(),second()}. +-type t_datetime() :: {t_date(),t_time()}. +-type t_datetime1970() :: {{year1970(),month(),day()},t_time()}. + +%%---------------------------------------------------------------------- + +%% All dates are according the the Gregorian calendar. In this module +%% the Gregorian calendar is extended back to year 0 for convenience. +%% +%% A year Y is a leap year if and only if either +%% +%% (1) Y is divisible by 4, but not by 100, or +%% (2) Y is divisible by 400. +%% +%% Hence, e.g. 1996 is a leap year, 1900 is not, but 2000 is. +%% + +%% +%% EXPORTS +%% + +%% date_to_gregorian_days(Year, Month, Day) = Integer +%% date_to_gregorian_days({Year, Month, Day}) = Integer +%% +%% Computes the total number of days starting from year 0, +%% January 1st. +%% +%% df/2 catches the case Year<0 +-spec date_to_gregorian_days(year(),month(),day()) -> non_neg_integer(). +date_to_gregorian_days(Year, Month, Day) when is_integer(Day), Day > 0 -> + Last = last_day_of_the_month(Year, Month), + if + Day =< Last -> + dy(Year) + dm(Month) + df(Year, Month) + Day - 1 + end. + +-spec date_to_gregorian_days(t_date()) -> non_neg_integer(). +date_to_gregorian_days({Year, Month, Day}) -> + date_to_gregorian_days(Year, Month, Day). + + +%% datetime_to_gregorian_seconds(DateTime) = Integer +%% +%% Computes the total number of seconds starting from year 0, +%% January 1st. +%% +-spec datetime_to_gregorian_seconds(t_datetime()) -> non_neg_integer(). +datetime_to_gregorian_seconds({Date, Time}) -> + ?SECONDS_PER_DAY*date_to_gregorian_days(Date) + + time_to_seconds(Time). + + +%% day_of_the_week(Year, Month, Day) +%% day_of_the_week({Year, Month, Day}) +%% +%% Returns: 1 | .. | 7. Monday = 1, Tuesday = 2, ..., Sunday = 7. +%% +-spec day_of_the_week(year(), month(), day()) -> daynum(). +day_of_the_week(Year, Month, Day) -> + (date_to_gregorian_days(Year, Month, Day) + 5) rem 7 + 1. + +-spec day_of_the_week(t_date()) -> daynum(). +day_of_the_week({Year, Month, Day}) -> + day_of_the_week(Year, Month, Day). + + +%% gregorian_days_to_date(Days) = {Year, Month, Day} +%% +-spec gregorian_days_to_date(non_neg_integer()) -> t_date(). +gregorian_days_to_date(Days) -> + {Year, DayOfYear} = day_to_year(Days), + {Month, DayOfMonth} = year_day_to_date(Year, DayOfYear), + {Year, Month, DayOfMonth}. + + +%% gregorian_seconds_to_datetime(Secs) +%% +-spec gregorian_seconds_to_datetime(non_neg_integer()) -> t_datetime(). +gregorian_seconds_to_datetime(Secs) when Secs >= 0 -> + Days = Secs div ?SECONDS_PER_DAY, + Rest = Secs rem ?SECONDS_PER_DAY, + {gregorian_days_to_date(Days), seconds_to_time(Rest)}. + + +%% is_leap_year(Year) = true | false +%% +-spec is_leap_year(year()) -> boolean(). +is_leap_year(Y) when is_integer(Y), Y >= 0 -> + is_leap_year1(Y). + +-spec is_leap_year1(year()) -> boolean(). +is_leap_year1(Year) when Year rem 4 =:= 0, Year rem 100 > 0 -> + true; +is_leap_year1(Year) when Year rem 400 =:= 0 -> + true; +is_leap_year1(_) -> false. + + +%% last_day_of_the_month(Year, Month) +%% +%% Returns the number of days in a month. +%% +-spec last_day_of_the_month(year(), month()) -> ldom(). +last_day_of_the_month(Y, M) when is_integer(Y), Y >= 0 -> + last_day_of_the_month1(Y, M). + +-spec last_day_of_the_month1(year(),month()) -> ldom(). +last_day_of_the_month1(_, 4) -> 30; +last_day_of_the_month1(_, 6) -> 30; +last_day_of_the_month1(_, 9) -> 30; +last_day_of_the_month1(_,11) -> 30; +last_day_of_the_month1(Y, 2) -> + case is_leap_year(Y) of + true -> 29; + _ -> 28 + end; +last_day_of_the_month1(_, M) when is_integer(M), M > 0, M < 13 -> + 31. + + +%% local_time() +%% +%% Returns: {date(), time()}, date() = {Y, M, D}, time() = {H, M, S}. +-spec local_time() -> t_datetime(). +local_time() -> + erlang:localtime(). + + +%% local_time_to_universal_time(DateTime) +%% +-spec local_time_to_universal_time(t_datetime1970()) -> t_datetime1970(). +local_time_to_universal_time(DateTime) -> + erlang:localtime_to_universaltime(DateTime). + +-spec local_time_to_universal_time(t_datetime1970(), + 'true' | 'false' | 'undefined') -> + t_datetime1970(). +local_time_to_universal_time(DateTime, IsDst) -> + erlang:localtime_to_universaltime(DateTime, IsDst). + +-spec local_time_to_universal_time_dst(t_datetime1970()) -> [t_datetime1970()]. +local_time_to_universal_time_dst(DateTime) -> + UtDst = erlang:localtime_to_universaltime(DateTime, true), + Ut = erlang:localtime_to_universaltime(DateTime, false), + %% Reverse check the universal times + LtDst = erlang:universaltime_to_localtime(UtDst), + Lt = erlang:universaltime_to_localtime(Ut), + %% Return the valid universal times + case {LtDst,Lt} of + {DateTime,DateTime} when UtDst =/= Ut -> + [UtDst,Ut]; + {DateTime,_} -> + [UtDst]; + {_,DateTime} -> + [Ut]; + {_,_} -> + [] + end. + +%% now_to_universal_time(Now) +%% now_to_datetime(Now) +%% +%% Convert from now() to UTC. +%% +%% Args: Now = now(); now() = {MegaSec, Sec, MilliSec}, MegaSec = Sec +%% = MilliSec = integer() +%% Returns: {date(), time()}, date() = {Y, M, D}, time() = {H, M, S}. +%% +-spec now_to_datetime(t_now()) -> t_datetime1970(). +now_to_datetime({MSec, Sec, _uSec}) -> + Sec0 = MSec*1000000 + Sec + ?DAYS_FROM_0_TO_1970*?SECONDS_PER_DAY, + gregorian_seconds_to_datetime(Sec0). + +-spec now_to_universal_time(t_now()) -> t_datetime1970(). +now_to_universal_time(Now) -> + now_to_datetime(Now). + + +%% now_to_local_time(Now) +%% +%% Args: Now = now() +%% +-spec now_to_local_time(t_now()) -> t_datetime1970(). +now_to_local_time({MSec, Sec, _uSec}) -> + erlang:universaltime_to_localtime( + now_to_universal_time({MSec, Sec, _uSec})). + + + +%% seconds_to_daystime(Secs) = {Days, {Hour, Minute, Second}} +%% +-spec seconds_to_daystime(integer()) -> {integer(), t_time()}. +seconds_to_daystime(Secs) -> + Days0 = Secs div ?SECONDS_PER_DAY, + Secs0 = Secs rem ?SECONDS_PER_DAY, + if + Secs0 < 0 -> + {Days0 - 1, seconds_to_time(Secs0 + ?SECONDS_PER_DAY)}; + true -> + {Days0, seconds_to_time(Secs0)} + end. + + +%% +%% seconds_to_time(Secs) +%% +%% Wraps. +%% +-type secs_per_day() :: 0..?SECONDS_PER_DAY. +-spec seconds_to_time(secs_per_day()) -> t_time(). +seconds_to_time(Secs) when Secs >= 0, Secs < ?SECONDS_PER_DAY -> + Secs0 = Secs rem ?SECONDS_PER_DAY, + Hour = Secs0 div ?SECONDS_PER_HOUR, + Secs1 = Secs0 rem ?SECONDS_PER_HOUR, + Minute = Secs1 div ?SECONDS_PER_MINUTE, + Second = Secs1 rem ?SECONDS_PER_MINUTE, + {Hour, Minute, Second}. + +%% time_difference(T1, T2) = Tdiff +%% +%% Returns the difference between two {Date, Time} structures. +%% +%% T1 = T2 = {Date, Time}, Tdiff = {Day, {Hour, Min, Sec}}, +%% Date = {Year, Month, Day}, Time = {Hour, Minute, Sec}, +%% Year = Month = Day = Hour = Minute = Sec = integer() +%% +-type timediff() :: {integer(), t_time()}. +-spec time_difference(t_datetime(), t_datetime()) -> timediff(). +time_difference({{Y1, Mo1, D1}, {H1, Mi1, S1}}, + {{Y2, Mo2, D2}, {H2, Mi2, S2}}) -> + Secs = datetime_to_gregorian_seconds({{Y2, Mo2, D2}, {H2, Mi2, S2}}) - + datetime_to_gregorian_seconds({{Y1, Mo1, D1}, {H1, Mi1, S1}}), + seconds_to_daystime(Secs). + + +%% +%% time_to_seconds(Time) +%% +-spec time_to_seconds(t_time()) -> secs_per_day(). +time_to_seconds({H, M, S}) when is_integer(H), is_integer(M), is_integer(S) -> + H * ?SECONDS_PER_HOUR + + M * ?SECONDS_PER_MINUTE + S. + + +%% universal_time() +%% +%% Returns: {date(), time()}, date() = {Y, M, D}, time() = {H, M, S}. +-spec universal_time() -> t_datetime(). +universal_time() -> + erlang:universaltime(). + + +%% universal_time_to_local_time(DateTime) +%% +-spec universal_time_to_local_time(t_datetime()) -> t_datetime(). +universal_time_to_local_time(DateTime) -> + erlang:universaltime_to_localtime(DateTime). + + +%% valid_date(Year, Month, Day) = true | false +%% valid_date({Year, Month, Day}) = true | false +%% +-spec valid_date(integer(), integer(), integer()) -> boolean(). +valid_date(Y, M, D) when is_integer(Y), is_integer(M), is_integer(D) -> + valid_date1(Y, M, D). + +-spec valid_date1(integer(), integer(), integer()) -> boolean(). +valid_date1(Y, M, D) when Y >= 0, M > 0, M < 13, D > 0 -> + D =< last_day_of_the_month(Y, M); +valid_date1(_, _, _) -> + false. + +-spec valid_date({integer(),integer(),integer()}) -> boolean(). +valid_date({Y, M, D}) -> + valid_date(Y, M, D). + + +%% +%% LOCAL FUNCTIONS +%% +-type day_of_year() :: 0..365. + +%% day_to_year(DayOfEpoch) = {Year, DayOfYear} +%% +%% The idea here is to first guess a year, and then adjust. Although +%% the implementation is recursive, at most 1 or 2 recursive steps +%% are taken. +%% If DayOfEpoch is very large, we need far more than 1 or 2 iterations, +%% since we just subtract a yearful of days at a time until we're there. +%% +-spec day_to_year(non_neg_integer()) -> {year(), day_of_year()}. +day_to_year(DayOfEpoch) when DayOfEpoch >= 0 -> + Y0 = DayOfEpoch div ?DAYS_PER_YEAR, + {Y1, D1} = dty(Y0, DayOfEpoch, dy(Y0)), + {Y1, DayOfEpoch - D1}. + +-spec dty(year(), non_neg_integer(), non_neg_integer()) -> + {year(), non_neg_integer()}. +dty(Y, D1, D2) when D1 < D2 -> + dty(Y-1, D1, dy(Y-1)); +dty(Y, _D1, D2) -> + {Y, D2}. + +%% year_day_to_date(Year, DayOfYear) = {Month, DayOfMonth} +%% +%% Note: 1 is the first day of the month. +%% +-spec year_day_to_date(year(), day_of_year()) -> {month(), day()}. +year_day_to_date(Year, DayOfYear) -> + ExtraDay = case is_leap_year(Year) of + true -> + 1; + false -> + 0 + end, + {Month, Day} = year_day_to_date2(ExtraDay, DayOfYear), + {Month, Day + 1}. + + +%% Note: 0 is the first day of the month +%% +-spec year_day_to_date2(0 | 1, day_of_year()) -> {month(), 0..30}. +year_day_to_date2(_, Day) when Day < 31 -> + {1, Day}; +year_day_to_date2(E, Day) when 31 =< Day, Day < 59 + E -> + {2, Day - 31}; +year_day_to_date2(E, Day) when 59 + E =< Day, Day < 90 + E -> + {3, Day - (59 + E)}; +year_day_to_date2(E, Day) when 90 + E =< Day, Day < 120 + E -> + {4, Day - (90 + E)}; +year_day_to_date2(E, Day) when 120 + E =< Day, Day < 151 + E -> + {5, Day - (120 + E)}; +year_day_to_date2(E, Day) when 151 + E =< Day, Day < 181 + E -> + {6, Day - (151 + E)}; +year_day_to_date2(E, Day) when 181 + E =< Day, Day < 212 + E -> + {7, Day - (181 + E)}; +year_day_to_date2(E, Day) when 212 + E =< Day, Day < 243 + E -> + {8, Day - (212 + E)}; +year_day_to_date2(E, Day) when 243 + E =< Day, Day < 273 + E -> + {9, Day - (243 + E)}; +year_day_to_date2(E, Day) when 273 + E =< Day, Day < 304 + E -> + {10, Day - (273 + E)}; +year_day_to_date2(E, Day) when 304 + E =< Day, Day < 334 + E -> + {11, Day - (304 + E)}; +year_day_to_date2(E, Day) when 334 + E =< Day -> + {12, Day - (334 + E)}. + +%% dy(Year) +%% +%% Days in previous years. +%% +-spec dy(integer()) -> non_neg_integer(). +dy(Y) when Y =< 0 -> + 0; +dy(Y) -> + X = Y - 1, + (X div 4) - (X div 100) + (X div 400) + + X*?DAYS_PER_YEAR + ?DAYS_PER_LEAP_YEAR. + +%% dm(Month) +%% +%% Returns the total number of days in all months +%% preceeding Month, for an ordinary year. +%% +-spec dm(month()) -> + 0 | 31 | 59 | 90 | 120 | 151 | 181 | 212 | 243 | 273 | 304 | 334. +dm(1) -> 0; dm(2) -> 31; dm(3) -> 59; dm(4) -> 90; +dm(5) -> 120; dm(6) -> 151; dm(7) -> 181; dm(8) -> 212; +dm(9) -> 243; dm(10) -> 273; dm(11) -> 304; dm(12) -> 334. + +%% df(Year, Month) +%% +%% Accounts for an extra day in February if Year is +%% a leap year, and if Month > 2. +%% +-spec df(year(), month()) -> 0 | 1. +df(_, Month) when Month < 3 -> + 0; +df(Year, _) -> + case is_leap_year(Year) of + true -> 1; + false -> 0 + end. diff --git a/lib/stdlib/src/dets.erl b/lib/stdlib/src/dets.erl new file mode 100644 index 0000000000..7f1c13770b --- /dev/null +++ b/lib/stdlib/src/dets.erl @@ -0,0 +1,2989 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(dets). + +%% Disk based linear hashing lookup dictionary. + +%% Public. +-export([all/0, + bchunk/2, + close/1, + delete/2, + delete_all_objects/1, + delete_object/2, + first/1, + foldl/3, + foldr/3, + from_ets/2, + info/1, + info/2, + init_table/2, + init_table/3, + insert/2, + insert_new/2, + is_compatible_bchunk_format/2, + is_dets_file/1, + lookup/2, + match/1, + match/2, + match/3, + match_delete/2, + match_object/1, + match_object/2, + match_object/3, + member/2, + next/2, + open_file/1, + open_file/2, + pid2name/1, + repair_continuation/2, + safe_fixtable/2, + select/1, + select/2, + select/3, + select_delete/2, + slot/2, + sync/1, + table/1, + table/2, + to_ets/2, + traverse/2, + update_counter/3]). + +%% Server export. +-export([start/0, stop/0]). + +%% Internal exports. +-export([istart_link/1, init/2, internal_open/3, add_user/3, + internal_close/1, remove_user/2, + system_continue/3, system_terminate/4, system_code_change/4]). + +%% Debug. +-export([file_info/1, + fsck/1, + fsck/2, + get_head_field/2, + view/1, + where/2, + verbose/0, + verbose/1 + ]). + +%% Not documented, or not ready for publication. +-export([lookup_keys/2]). + + +-compile({inline, [{einval,2},{badarg,2},{undefined,1}, + {badarg_exit,2},{lookup_reply,2}]}). + +-include_lib("kernel/include/file.hrl"). + +-include("dets.hrl"). + +-type object() :: tuple(). +-type pattern() :: atom() | tuple(). +-type tab_name() :: atom() | reference(). + +%%% This is the implementation of the mnesia file storage. Each (non +%%% ram-copy) table is maintained in a corresponding .DAT file. The +%%% dat file is organized as a segmented linear hashlist. The head of +%%% the file with the split indicator, size etc is held in ram by the +%%% server at all times. +%%% +%%% The parts specific for formats up to and including 8(c) are +%%% implemented in dets_v8.erl, parts specific for format 9 are +%%% implemented in dets_v9.erl. + +%% The method of hashing is the so called linear hashing algorithm +%% with segments. +%% +%% Linear hashing: +%% +%% - n indicates next bucket to split (initially zero); +%% - m is the size of the hash table +%% - initially next = m and n = 0 +%% +%% - to insert: +%% - hash = key mod m +%% - if hash < n then hash = key mod 2m +%% - when the number of objects exceeds the initial size +%% of the hash table, each insertion of an object +%% causes bucket n to be split: +%% - add a new bucket to the end of the table +%% - redistribute the contents of bucket n +%% using hash = key mod 2m +%% - increment n +%% - if n = m then m = 2m, n = 0 +%% - to search: +%% hash = key mod m +%% if hash < n then hash = key mod 2m +%% do linear scan of the bucket +%% + +%%% If a file error occurs on a working dets file, update_mode is set +%%% to the error tuple. When in 'error' mode, the free lists are not +%%% written, and a repair is forced next time the file is opened. + +-record(dets_cont, { + what, % object | bindings | select | bchunk + no_objs, % requested number of objects: default | integer() > 0 + bin, % small chunk not consumed, or 'eof' at end-of-file + alloc, % the part of the file not yet scanned, mostly a binary + tab, + match_program % true | compiled_match_spec() | undefined + }). + +-record(open_args, { + file, + type, + keypos, + repair, + min_no_slots, + max_no_slots, + ram_file, + delayed_write, + auto_save, + access, + version, + debug + }). + +-define(PATTERN_TO_OBJECT_MATCH_SPEC(Pat), [{Pat,[],['$_']}]). +-define(PATTERN_TO_BINDINGS_MATCH_SPEC(Pat), [{Pat,[],['$$']}]). +-define(PATTERN_TO_TRUE_MATCH_SPEC(Pat), [{Pat,[],[true]}]). + +%%-define(DEBUGM(X, Y), io:format(X, Y)). +-define(DEBUGM(X, Y), true). + +%%-define(DEBUGF(X,Y), io:format(X, Y)). +-define(DEBUGF(X,Y), void). + +%%-define(PROFILE(C), C). +-define(PROFILE(C), void). + +%%% Some further debug code was added in R12B-1 (stdlib-1.15.1): +%%% - there is a new open_file() option 'debug'; +%%% - there is a new OS environment variable 'DETS_DEBUG'; +%%% - verbose(true) implies that info messages are written onto +%%% the error log whenever an unsafe traversal is started. +%%% The 'debug' mode (set by the open_file() option 'debug' or +%%% by os:putenv("DETS_DEBUG", "true")) implies that the results of +%%% calling pwrite() and pread() are tested to some extent. It also +%%% means a considerable overhead when it comes to RAM usage. The +%%% operation of Dets is also slowed down a bit. Note that in debug +%%% mode terms will be output on the error logger. + +%%%---------------------------------------------------------------------- +%%% API +%%%---------------------------------------------------------------------- + +add_user(Pid, Tab, Args) -> + req(Pid, {add_user, Tab, Args}). + +-spec all() -> [tab_name()]. + +all() -> + dets_server:all(). + +-type cont() :: #dets_cont{}. +-spec bchunk(tab_name(), 'start' | cont()) -> + {cont(), binary() | tuple()} | '$end_of_table' | {'error', term()}. + +bchunk(Tab, start) -> + badarg(treq(Tab, {bchunk_init, Tab}), [Tab, start]); +bchunk(Tab, #dets_cont{bin = eof, tab = Tab}) -> + '$end_of_table'; +bchunk(Tab, #dets_cont{what = bchunk, tab = Tab} = State) -> + badarg(treq(Tab, {bchunk, State}), [Tab, State]); +bchunk(Tab, Term) -> + erlang:error(badarg, [Tab, Term]). + +-spec close(tab_name()) -> 'ok' | {'error', term()}. + +close(Tab) -> + case dets_server:close(Tab) of + badarg -> % Should not happen. + {error, not_owner}; % Backwards compatibility... + Reply -> + Reply + end. + +-spec delete(tab_name(), term()) -> 'ok' | {'error', term()}. + +delete(Tab, Key) -> + badarg(treq(Tab, {delete_key, [Key]}), [Tab, Key]). + +-spec delete_all_objects(tab_name()) -> 'ok' | {'error', term()}. + +delete_all_objects(Tab) -> + case treq(Tab, delete_all_objects) of + badarg -> + erlang:error(badarg, [Tab]); + fixed -> + match_delete(Tab, '_'); + Reply -> + Reply + end. + +-spec delete_object(tab_name(), object()) -> 'ok' | {'error', term()}. + +delete_object(Tab, O) -> + badarg(treq(Tab, {delete_object, [O]}), [Tab, O]). + +%% Given a filename, fsck it. Debug. +fsck(Fname) -> + fsck(Fname, default). + +fsck(Fname, Version) -> + catch begin + {ok, Fd, FH} = read_file_header(Fname, read, false), + ?DEBUGF("FileHeader: ~p~n", [FH]), + case (FH#fileheader.mod):check_file_header(FH, Fd) of + {error, not_closed} -> + fsck(Fd, make_ref(), Fname, FH, default, default, Version); + {ok, _Head, _Extra} -> + fsck(Fd, make_ref(), Fname, FH, default, default, Version); + Error -> + Error + end + end. + +-spec first(tab_name()) -> term() | '$end_of_table'. + +first(Tab) -> + badarg_exit(treq(Tab, first), [Tab]). + +-spec foldr(fun((object(), Acc) -> Acc), Acc, tab_name()) -> Acc | {'error', term()}. + +foldr(Fun, Acc, Tab) -> + foldl(Fun, Acc, Tab). + +-spec foldl(fun((object(), Acc) -> Acc), Acc, tab_name()) -> Acc | {'error', term()}. + +foldl(Fun, Acc, Tab) -> + Ref = make_ref(), + do_traverse(Fun, Acc, Tab, Ref). + +-spec from_ets(tab_name(), ets:tab()) -> 'ok' | {'error', term()}. + +from_ets(DTab, ETab) -> + ets:safe_fixtable(ETab, true), + Spec = ?PATTERN_TO_OBJECT_MATCH_SPEC('_'), + LC = ets:select(ETab, Spec, 100), + InitFun = from_ets_fun(LC, ETab), + Reply = treq(DTab, {initialize, InitFun, term, default}), + ets:safe_fixtable(ETab, false), + case Reply of + {thrown, Thrown} -> throw(Thrown); + Else -> badarg(Else, [DTab, ETab]) + end. + +from_ets_fun(LC, ETab) -> + fun(close) -> + ok; + (read) when LC =:= '$end_of_table' -> + end_of_input; + (read) -> + {L, C} = LC, + {L, from_ets_fun(ets:select(C), ETab)} + end. + +info(Tab) -> + case catch dets_server:get_pid(Tab) of + {'EXIT', _Reason} -> + undefined; + Pid -> + undefined(req(Pid, info)) + end. + +info(Tab, owner) -> + case catch dets_server:get_pid(Tab) of + Pid when is_pid(Pid) -> + Pid; + _ -> + undefined + end; +info(Tab, users) -> % undocumented + case dets_server:users(Tab) of + [] -> + undefined; + Users -> + Users + end; +info(Tab, Tag) -> + case catch dets_server:get_pid(Tab) of + {'EXIT', _Reason} -> + undefined; + Pid -> + undefined(req(Pid, {info, Tag})) + end. + +init_table(Tab, InitFun) -> + init_table(Tab, InitFun, []). + +init_table(Tab, InitFun, Options) when is_function(InitFun) -> + case options(Options, [format, min_no_slots]) of + {badarg,_} -> + erlang:error(badarg, [Tab, InitFun, Options]); + [Format, MinNoSlots] -> + case treq(Tab, {initialize, InitFun, Format, MinNoSlots}) of + {thrown, Thrown} -> throw(Thrown); + Else -> badarg(Else, [Tab, InitFun, Options]) + end + end; +init_table(Tab, InitFun, Options) -> + erlang:error(badarg, [Tab, InitFun, Options]). + +insert(Tab, Objs) when is_list(Objs) -> + badarg(treq(Tab, {insert, Objs}), [Tab, Objs]); +insert(Tab, Obj) -> + badarg(treq(Tab, {insert, [Obj]}), [Tab, Obj]). + +insert_new(Tab, Objs) when is_list(Objs) -> + badarg(treq(Tab, {insert_new, Objs}), [Tab, Objs]); +insert_new(Tab, Obj) -> + badarg(treq(Tab, {insert_new, [Obj]}), [Tab, Obj]). + +internal_close(Pid) -> + req(Pid, close). + +internal_open(Pid, Ref, Args) -> + req(Pid, {internal_open, Ref, Args}). + +is_compatible_bchunk_format(Tab, Term) -> + badarg(treq(Tab, {is_compatible_bchunk_format, Term}), [Tab, Term]). + +is_dets_file(FileName) -> + case catch read_file_header(FileName, read, false) of + {ok, Fd, FH} -> + file:close(Fd), + FH#fileheader.cookie =:= ?MAGIC; + {error, {tooshort, _}} -> + false; + {error, {not_a_dets_file, _}} -> + false; + Other -> + Other + end. + +lookup(Tab, Key) -> + badarg(treq(Tab, {lookup_keys, [Key]}), [Tab, Key]). + +%% Not public. +lookup_keys(Tab, Keys) -> + case catch lists:usort(Keys) of + UKeys when is_list(UKeys), UKeys =/= [] -> + badarg(treq(Tab, {lookup_keys, UKeys}), [Tab, Keys]); + _Else -> + erlang:error(badarg, [Tab, Keys]) + end. + +match(Tab, Pat) -> + badarg(safe_match(Tab, Pat, bindings), [Tab, Pat]). + +match(Tab, Pat, N) -> + badarg(init_chunk_match(Tab, Pat, bindings, N), [Tab, Pat, N]). + +match(State) when State#dets_cont.what =:= bindings -> + badarg(chunk_match(State), [State]); +match(Term) -> + erlang:error(badarg, [Term]). + +-spec match_delete(tab_name(), pattern()) -> + non_neg_integer() | 'ok' | {'error', term()}. + +match_delete(Tab, Pat) -> + badarg(match_delete(Tab, Pat, delete), [Tab, Pat]). + +match_delete(Tab, Pat, What) -> + safe_fixtable(Tab, true), + case compile_match_spec(What, Pat) of + {Spec, MP} -> + Proc = dets_server:get_pid(Tab), + R = req(Proc, {match_delete_init, MP, Spec}), + do_match_delete(Tab, Proc, R, What, 0); + badarg -> + badarg + end. + +do_match_delete(Tab, _Proc, {done, N1}, select, N) -> + safe_fixtable(Tab, false), + N + N1; +do_match_delete(Tab, _Proc, {done, _N1}, _What, _N) -> + safe_fixtable(Tab, false), + ok; +do_match_delete(Tab, Proc, {cont, State, N1}, What, N) -> + do_match_delete(Tab, Proc, req(Proc, {match_delete, State}), What, N+N1); +do_match_delete(Tab, _Proc, Error, _What, _N) -> + safe_fixtable(Tab, false), + Error. + +match_object(Tab, Pat) -> + badarg(safe_match(Tab, Pat, object), [Tab, Pat]). + +match_object(Tab, Pat, N) -> + badarg(init_chunk_match(Tab, Pat, object, N), [Tab, Pat, N]). + +match_object(State) when State#dets_cont.what =:= object -> + badarg(chunk_match(State), [State]); +match_object(Term) -> + erlang:error(badarg, [Term]). + +member(Tab, Key) -> + badarg(treq(Tab, {member, Key}), [Tab, Key]). + +next(Tab, Key) -> + badarg_exit(treq(Tab, {next, Key}), [Tab, Key]). + +%% Assuming that a file already exists, open it with the +%% parameters as already specified in the file itself. +%% Return a ref leading to the file. +open_file(File) -> + case dets_server:open_file(to_list(File)) of + badarg -> % Should not happen. + erlang:error(dets_process_died, [File]); + Reply -> + einval(Reply, [File]) + end. + +open_file(Tab, Args) when is_list(Args) -> + case catch defaults(Tab, Args) of + OpenArgs when is_record(OpenArgs, open_args) -> + case dets_server:open_file(Tab, OpenArgs) of + badarg -> % Should not happen. + erlang:error(dets_process_died, [Tab, Args]); + Reply -> + einval(Reply, [Tab, Args]) + end; + _ -> + erlang:error(badarg, [Tab, Args]) + end; +open_file(Tab, Arg) -> + open_file(Tab, [Arg]). + +pid2name(Pid) -> + dets_server:pid2name(Pid). + +remove_user(Pid, From) -> + req(Pid, {close, From}). + +repair_continuation(#dets_cont{match_program = B}=Cont, MS) + when is_binary(B) -> + case ets:is_compiled_ms(B) of + true -> + Cont; + false -> + Cont#dets_cont{match_program = ets:match_spec_compile(MS)} + end; +repair_continuation(#dets_cont{}=Cont, _MS) -> + Cont; +repair_continuation(T, MS) -> + erlang:error(badarg, [T, MS]). + +safe_fixtable(Tab, Bool) when Bool; not Bool -> + badarg(treq(Tab, {safe_fixtable, Bool}), [Tab, Bool]); +safe_fixtable(Tab, Term) -> + erlang:error(badarg, [Tab, Term]). + +select(Tab, Pat) -> + badarg(safe_match(Tab, Pat, select), [Tab, Pat]). + +select(Tab, Pat, N) -> + badarg(init_chunk_match(Tab, Pat, select, N), [Tab, Pat, N]). + +select(State) when State#dets_cont.what =:= select -> + badarg(chunk_match(State), [State]); +select(Term) -> + erlang:error(badarg, [Term]). + +select_delete(Tab, Pat) -> + badarg(match_delete(Tab, Pat, select), [Tab, Pat]). + +slot(Tab, Slot) when is_integer(Slot), Slot >= 0 -> + badarg(treq(Tab, {slot, Slot}), [Tab, Slot]); +slot(Tab, Term) -> + erlang:error(badarg, [Tab, Term]). + +start() -> + dets_server:start(). + +stop() -> + dets_server:stop(). + +istart_link(Server) -> + {ok, proc_lib:spawn_link(dets, init, [self(), Server])}. + +sync(Tab) -> + badarg(treq(Tab, sync), [Tab]). + +table(Tab) -> + table(Tab, []). + +table(Tab, Opts) -> + case options(Opts, [traverse, n_objects]) of + {badarg,_} -> + erlang:error(badarg, [Tab, Opts]); + [Traverse, NObjs] -> + TF = case Traverse of + first_next -> + fun() -> qlc_next(Tab, first(Tab)) end; + select -> + fun(MS) -> qlc_select(select(Tab, MS, NObjs)) end; + {select, MS} -> + fun() -> qlc_select(select(Tab, MS, NObjs)) end + end, + PreFun = fun(_) -> safe_fixtable(Tab, true) end, + PostFun = fun() -> safe_fixtable(Tab, false) end, + InfoFun = fun(Tag) -> table_info(Tab, Tag) end, + %% lookup_keys is not public, but convenient + LookupFun = + case Traverse of + {select, _MS} -> + undefined; + _ -> + fun(_KeyPos, [K]) -> lookup(Tab, K); + (_KeyPos, Ks) -> lookup_keys(Tab, Ks) + end + end, + FormatFun = + fun({all, _NElements, _ElementFun}) -> + As = [Tab | [Opts || _ <- [[]], Opts =/= []]], + {?MODULE, table, As}; + ({match_spec, MS}) -> + {?MODULE, table, [Tab, [{traverse, {select, MS}} | + listify(Opts)]]}; + ({lookup, _KeyPos, [Value], _NElements, ElementFun}) -> + io_lib:format("~w:lookup(~w, ~w)", + [?MODULE, Tab, ElementFun(Value)]); + ({lookup, _KeyPos, Values, _NElements, ElementFun}) -> + Vals = [ElementFun(V) || V <- Values], + io_lib:format("lists:flatmap(fun(V) -> " + "~w:lookup(~w, V) end, ~w)", + [?MODULE, Tab, Vals]) + end, + qlc:table(TF, [{pre_fun, PreFun}, {post_fun, PostFun}, + {info_fun, InfoFun}, {format_fun, FormatFun}, + {key_equality, '=:='}, + {lookup_fun, LookupFun}]) + end. + +qlc_next(_Tab, '$end_of_table') -> + []; +qlc_next(Tab, Key) -> + case lookup(Tab, Key) of + Objects when is_list(Objects) -> + Objects ++ fun() -> qlc_next(Tab, next(Tab, Key)) end; + Error -> + %% Do what first and next do. + exit(Error) + end. + +qlc_select('$end_of_table') -> + []; +qlc_select({Objects, Cont}) when is_list(Objects) -> + Objects ++ fun() -> qlc_select(select(Cont)) end; +qlc_select(Error) -> + Error. + +table_info(Tab, num_of_objects) -> + info(Tab, size); +table_info(Tab, keypos) -> + info(Tab, keypos); +table_info(Tab, is_unique_objects) -> + info(Tab, type) =/= duplicate_bag; +table_info(_Tab, _) -> + undefined. + +%% End of table/2. + +to_ets(DTab, ETab) -> + case ets:info(ETab, protection) of + undefined -> + erlang:error(badarg, [DTab, ETab]); + _ -> + Fun = fun(X, T) -> true = ets:insert(T, X), T end, + foldl(Fun, ETab, DTab) + end. + +traverse(Tab, Fun) -> + Ref = make_ref(), + TFun = + fun(O, Acc) -> + case Fun(O) of + continue -> + Acc; + {continue, Val} -> + [Val | Acc]; + {done, Value} -> + throw({Ref, [Value | Acc]}); + Other -> + throw({Ref, Other}) + end + end, + do_traverse(TFun, [], Tab, Ref). + +update_counter(Tab, Key, C) -> + badarg(treq(Tab, {update_counter, Key, C}), [Tab, Key, C]). + +verbose() -> + verbose(true). + +verbose(What) -> + ok = dets_server:verbose(What), + All = dets_server:all(), + Fun = fun(Tab) -> treq(Tab, {set_verbose, What}) end, + lists:foreach(Fun, All), + All. + +%% Where in the (open) table is Object located? +%% The address of the first matching object is returned. +%% Format 9 returns the address of the object collection. +%% -> {ok, Address} | false +where(Tab, Object) -> + badarg(treq(Tab, {where, Object}), [Tab, Object]). + +do_traverse(Fun, Acc, Tab, Ref) -> + safe_fixtable(Tab, true), + Proc = dets_server:get_pid(Tab), + try + do_trav(Proc, Acc, Fun) + catch {Ref, Result} -> + Result + after + safe_fixtable(Tab, false) + end. + +do_trav(Proc, Acc, Fun) -> + {Spec, MP} = compile_match_spec(object, '_'), + %% MP not used + case req(Proc, {match, MP, Spec, default}) of + {cont, State} -> + do_trav(State, Proc, Acc, Fun); + Error -> + Error + end. + +do_trav(#dets_cont{bin = eof}, _Proc, Acc, _Fun) -> + Acc; +do_trav(State, Proc, Acc, Fun) -> + case req(Proc, {match_init, State}) of + {cont, {Bins, NewState}} -> + do_trav_bins(NewState, Proc, Acc, Fun, lists:reverse(Bins)); + Error -> + Error + end. + +do_trav_bins(State, Proc, Acc, Fun, []) -> + do_trav(State, Proc, Acc, Fun); +do_trav_bins(State, Proc, Acc, Fun, [Bin | Bins]) -> + %% Unpack one binary at a time, using the client's heap. + case catch binary_to_term(Bin) of + {'EXIT', _} -> + req(Proc, {corrupt, dets_utils:bad_object(do_trav_bins, Bin)}); + Term -> + NewAcc = Fun(Term, Acc), + do_trav_bins(State, Proc, NewAcc, Fun, Bins) + end. + +safe_match(Tab, Pat, What) -> + safe_fixtable(Tab, true), + R = do_safe_match(init_chunk_match(Tab, Pat, What, default), []), + safe_fixtable(Tab, false), + R. + +do_safe_match({error, Error}, _L) -> + {error, Error}; +do_safe_match({L, C}, LL) -> + do_safe_match(chunk_match(C), L++LL); +do_safe_match('$end_of_table', L) -> + L; +do_safe_match(badarg, _L) -> + badarg. + +%% What = object | bindings | select +init_chunk_match(Tab, Pat, What, N) when is_integer(N), N >= 0; + N =:= default -> + case compile_match_spec(What, Pat) of + {Spec, MP} -> + case req(dets_server:get_pid(Tab), {match, MP, Spec, N}) of + {done, L} -> + {L, #dets_cont{tab = Tab, what = What, bin = eof}}; + {cont, State} -> + chunk_match(State#dets_cont{what = What, tab = Tab}); + Error -> + Error + end; + badarg -> + badarg + end; +init_chunk_match(_Tab, _Pat, _What, _) -> + badarg. + +chunk_match(State) -> + case catch dets_server:get_pid(State#dets_cont.tab) of + {'EXIT', _Reason} -> + badarg; + _Proc when State#dets_cont.bin =:= eof -> + '$end_of_table'; + Proc -> + case req(Proc, {match_init, State}) of + {cont, {Bins, NewState}} -> + MP = NewState#dets_cont.match_program, + case catch do_foldl_bins(Bins, MP) of + {'EXIT', _} -> + case ets:is_compiled_ms(MP) of + true -> + Bad = dets_utils:bad_object(chunk_match, + Bins), + req(Proc, {corrupt, Bad}); + false -> + badarg + end; + [] -> + chunk_match(NewState); + Terms -> + {Terms, NewState} + end; + Error -> + Error + end + end. + +do_foldl_bins(Bins, true) -> + foldl_bins(Bins, []); +do_foldl_bins(Bins, MP) -> + foldl_bins(Bins, MP, []). + +foldl_bins([], Terms) -> + %% Preserve time order (version 9). + Terms; +foldl_bins([Bin | Bins], Terms) -> + foldl_bins(Bins, [binary_to_term(Bin) | Terms]). + +foldl_bins([], _MP, Terms) -> + %% Preserve time order (version 9). + Terms; +foldl_bins([Bin | Bins], MP, Terms) -> + Term = binary_to_term(Bin), + case ets:match_spec_run([Term], MP) of + [] -> + foldl_bins(Bins, MP, Terms); + [Result] -> + foldl_bins(Bins, MP, [Result | Terms]) + end. + +%% -> {Spec, binary()} | badarg +compile_match_spec(select, ?PATTERN_TO_OBJECT_MATCH_SPEC('_') = Spec) -> + {Spec, true}; +compile_match_spec(select, Spec) -> + case catch ets:match_spec_compile(Spec) of + X when is_binary(X) -> + {Spec, X}; + _ -> + badarg + end; +compile_match_spec(object, Pat) -> + compile_match_spec(select, ?PATTERN_TO_OBJECT_MATCH_SPEC(Pat)); +compile_match_spec(bindings, Pat) -> + compile_match_spec(select, ?PATTERN_TO_BINDINGS_MATCH_SPEC(Pat)); +compile_match_spec(delete, Pat) -> + compile_match_spec(select, ?PATTERN_TO_TRUE_MATCH_SPEC(Pat)). + +%% Process the args list as provided to open_file/2. +defaults(Tab, Args) -> + Defaults0 = #open_args{file = to_list(Tab), + type = set, + keypos = 1, + repair = true, + min_no_slots = default, + max_no_slots = default, + ram_file = false, + delayed_write = ?DEFAULT_CACHE, + auto_save = timer:minutes(?DEFAULT_AUTOSAVE), + access = read_write, + version = default, + debug = false}, + Fun = fun repl/2, + Defaults = lists:foldl(Fun, Defaults0, Args), + case Defaults#open_args.version of + 8 -> + Defaults#open_args{max_no_slots = default}; + _ -> + is_comp_min_max(Defaults) + end. + +to_list(T) when is_atom(T) -> atom_to_list(T); +to_list(T) -> T. + +repl({access, A}, Defs) -> + mem(A, [read, read_write]), + Defs#open_args{access = A}; +repl({auto_save, Int}, Defs) when is_integer(Int), Int >= 0 -> + Defs#open_args{auto_save = Int}; +repl({auto_save, infinity}, Defs) -> + Defs#open_args{auto_save =infinity}; +repl({cache_size, Int}, Defs) when is_integer(Int), Int >= 0 -> + %% Recognized, but ignored. + Defs; +repl({cache_size, infinity}, Defs) -> + Defs; +repl({delayed_write, default}, Defs) -> + Defs#open_args{delayed_write = ?DEFAULT_CACHE}; +repl({delayed_write, {Delay,Size} = C}, Defs) + when is_integer(Delay), Delay >= 0, is_integer(Size), Size >= 0 -> + Defs#open_args{delayed_write = C}; +repl({estimated_no_objects, I}, Defs) -> + repl({min_no_slots, I}, Defs); +repl({file, File}, Defs) -> + Defs#open_args{file = to_list(File)}; +repl({keypos, P}, Defs) when is_integer(P), P > 0 -> + Defs#open_args{keypos =P}; +repl({max_no_slots, I}, Defs) -> + %% Version 9 only. + MaxSlots = is_max_no_slots(I), + Defs#open_args{max_no_slots = MaxSlots}; +repl({min_no_slots, I}, Defs) -> + MinSlots = is_min_no_slots(I), + Defs#open_args{min_no_slots = MinSlots}; +repl({ram_file, Bool}, Defs) -> + mem(Bool, [true, false]), + Defs#open_args{ram_file = Bool}; +repl({repair, T}, Defs) -> + mem(T, [true, false, force]), + Defs#open_args{repair = T}; +repl({type, T}, Defs) -> + mem(T, [set, bag, duplicate_bag]), + Defs#open_args{type =T}; +repl({version, Version}, Defs) -> + V = is_version(Version), + Defs#open_args{version = V}; +repl({debug, Bool}, Defs) -> + %% Not documented. + mem(Bool, [true, false]), + Defs#open_args{debug = Bool}; +repl({_, _}, _) -> + exit(badarg). + +is_min_no_slots(default) -> default; +is_min_no_slots(I) when is_integer(I), I >= ?DEFAULT_MIN_NO_SLOTS -> I; +is_min_no_slots(I) when is_integer(I), I >= 0 -> ?DEFAULT_MIN_NO_SLOTS. + +is_max_no_slots(default) -> default; +is_max_no_slots(I) when is_integer(I), I > 0, I < 1 bsl 31 -> I. + +is_comp_min_max(Defs) -> + #open_args{max_no_slots = Max, min_no_slots = Min, version = V} = Defs, + case V of + _ when Min =:= default -> Defs; + _ when Max =:= default -> Defs; + _ -> true = Min =< Max, Defs + end. + +is_version(default) -> default; +is_version(8) -> 8; +is_version(9) -> 9. + +mem(X, L) -> + case lists:member(X, L) of + true -> true; + false -> exit(badarg) + end. + +options(Options, Keys) when is_list(Options) -> + options(Options, Keys, []); +options(Option, Keys) -> + options([Option], Keys, []). + +options(Options, [Key | Keys], L) when is_list(Options) -> + V = case lists:keysearch(Key, 1, Options) of + {value, {format, Format}} when Format =:= term; + Format =:= bchunk -> + {ok, Format}; + {value, {min_no_slots, I}} -> + case catch is_min_no_slots(I) of + {'EXIT', _} -> badarg; + MinNoSlots -> {ok, MinNoSlots} + end; + {value, {n_objects, default}} -> + {ok, default_option(Key)}; + {value, {n_objects, NObjs}} when is_integer(NObjs), + NObjs >= 1 -> + {ok, NObjs}; + {value, {traverse, select}} -> + {ok, select}; + {value, {traverse, {select, MS}}} -> + {ok, {select, MS}}; + {value, {traverse, first_next}} -> + {ok, first_next}; + {value, {Key, _}} -> + badarg; + false -> + Default = default_option(Key), + {ok, Default} + end, + case V of + badarg -> + {badarg, Key}; + {ok, Value} -> + NewOptions = lists:keydelete(Key, 1, Options), + options(NewOptions, Keys, [Value | L]) + end; +options([], [], L) -> + lists:reverse(L); +options(Options, _, _L) -> + {badarg,Options}. + +default_option(format) -> term; +default_option(min_no_slots) -> default; +default_option(traverse) -> select; +default_option(n_objects) -> default. + +listify(L) when is_list(L) -> + L; +listify(T) -> + [T]. + +treq(Tab, R) -> + case catch dets_server:get_pid(Tab) of + Pid when is_pid(Pid) -> + req(Pid, R); + _ -> + badarg + end. + +req(Proc, R) -> + Ref = erlang:monitor(process, Proc), + Proc ! ?DETS_CALL(self(), R), + receive + {'DOWN', Ref, process, Proc, _Info} -> + badarg; + {Proc, Reply} -> + erlang:demonitor(Ref), + receive + {'DOWN', Ref, process, Proc, _Reason} -> + Reply + after 0 -> + Reply + end + end. + +%% Inlined. +einval({error, {file_error, _, einval}}, A) -> + erlang:error(badarg, A); +einval({error, {file_error, _, badarg}}, A) -> + erlang:error(badarg, A); +einval(Reply, _A) -> + Reply. + +%% Inlined. +badarg(badarg, A) -> + erlang:error(badarg, A); +badarg(Reply, _A) -> + Reply. + +%% Inlined. +undefined(badarg) -> + undefined; +undefined(Reply) -> + Reply. + +%% Inlined. +badarg_exit(badarg, A) -> + erlang:error(badarg, A); +badarg_exit({ok, Reply}, _A) -> + Reply; +badarg_exit(Reply, _A) -> + exit(Reply). + +%%%----------------------------------------------------------------- +%%% Server functions +%%%----------------------------------------------------------------- + +init(Parent, Server) -> + process_flag(trap_exit, true), + open_file_loop(#head{parent = Parent, server = Server}). + +open_file_loop(Head) -> + open_file_loop(Head, 0). + +open_file_loop(Head, N) when element(1, Head#head.update_mode) =:= error -> + open_file_loop2(Head, N); +open_file_loop(Head, N) -> + receive + %% When the table is fixed it can be assumed that at least one + %% traversal is in progress. To speed the traversal up three + %% things have been done: + %% - prioritize match_init, bchunk, next, and match_delete_init; + %% - do not peek the message queue for updates; + %% - wait 1 ms after each update. + %% next is normally followed by lookup, but since lookup is also + %% used when not traversing the table, it is not prioritized. + ?DETS_CALL(From, {match_init, _State} = Op) -> + do_apply_op(Op, From, Head, N); + ?DETS_CALL(From, {bchunk, _State} = Op) -> + do_apply_op(Op, From, Head, N); + ?DETS_CALL(From, {next, _Key} = Op) -> + do_apply_op(Op, From, Head, N); + ?DETS_CALL(From, {match_delete_init, _MP, _Spec} = Op) -> + do_apply_op(Op, From, Head, N); + {'EXIT', Pid, Reason} when Pid =:= Head#head.parent -> + %% Parent orders shutdown. + _NewHead = do_stop(Head), + exit(Reason); + {'EXIT', Pid, Reason} when Pid =:= Head#head.server -> + %% The server is gone. + _NewHead = do_stop(Head), + exit(Reason); + {'EXIT', Pid, _Reason} -> + %% A process fixing the table exits. + H2 = remove_fix(Head, Pid, close), + open_file_loop(H2, N); + {system, From, Req} -> + sys:handle_system_msg(Req, From, Head#head.parent, + ?MODULE, [], Head) + after 0 -> + open_file_loop2(Head, N) + end. + +open_file_loop2(Head, N) -> + receive + ?DETS_CALL(From, Op) -> + do_apply_op(Op, From, Head, N); + {'EXIT', Pid, Reason} when Pid =:= Head#head.parent -> + %% Parent orders shutdown. + _NewHead = do_stop(Head), + exit(Reason); + {'EXIT', Pid, Reason} when Pid =:= Head#head.server -> + %% The server is gone. + _NewHead = do_stop(Head), + exit(Reason); + {'EXIT', Pid, _Reason} -> + %% A process fixing the table exits. + H2 = remove_fix(Head, Pid, close), + open_file_loop(H2, N); + {system, From, Req} -> + sys:handle_system_msg(Req, From, Head#head.parent, + ?MODULE, [], Head); + Message -> + error_logger:format("** dets: unexpected message" + "(ignored): ~w~n", [Message]), + open_file_loop(Head, N) + end. + +do_apply_op(Op, From, Head, N) -> + try apply_op(Op, From, Head, N) of + ok -> + open_file_loop(Head, N); + {N2, H2} when is_record(H2, head), is_integer(N2) -> + open_file_loop(H2, N2); + H2 when is_record(H2, head) -> + open_file_loop(H2, N) + catch + exit:normal -> + exit(normal); + _:Bad -> + Name = Head#head.name, + case dets_utils:debug_mode() of + true -> + %% If stream_op/5 found more requests, this is not + %% the last operation. + error_logger:format + ("** dets: Bug was found when accessing table ~w,~n" + "** dets: operation was ~p and reply was ~w.~n" + "** dets: Stacktrace: ~w~n", + [Name, Op, Bad, erlang:get_stacktrace()]); + false -> + error_logger:format + ("** dets: Bug was found when accessing table ~w~n", + [Name]) + end, + if + From =/= self() -> + From ! {self(), {error, {dets_bug, Name, Op, Bad}}}; + true -> % auto_save | may_grow | {delayed_write, _} + ok + end, + open_file_loop(Head, N) + end. + +apply_op(Op, From, Head, N) -> + case Op of + {add_user, Tab, OpenArgs}-> + #open_args{file = Fname, type = Type, keypos = Keypos, + ram_file = Ram, access = Access, + version = Version} = OpenArgs, + VersionOK = (Version =:= default) or + (Head#head.version =:= Version), + %% min_no_slots and max_no_slots are not tested + Res = if + Tab =:= Head#head.name, + Head#head.keypos =:= Keypos, + Head#head.type =:= Type, + Head#head.ram_file =:= Ram, + Head#head.access =:= Access, + VersionOK, + Fname =:= Head#head.filename -> + ok; + true -> + err({error, incompatible_arguments}) + end, + From ! {self(), Res}, + ok; + auto_save -> + case Head#head.update_mode of + saved -> + Head; + {error, _Reason} -> + Head; + _Dirty when N =:= 0 -> % dirty or new_dirty + %% The updates seems to have declined + dets_utils:vformat("** dets: Auto save of ~p\n", + [Head#head.name]), + {NewHead, _Res} = perform_save(Head, true), + erlang:garbage_collect(), + {0, NewHead}; + dirty -> + %% Reset counter and try later + start_auto_save_timer(Head), + {0, Head} + end; + close -> + From ! {self(), fclose(Head)}, + _NewHead = unlink_fixing_procs(Head), + ?PROFILE(ep:done()), + exit(normal); + {close, Pid} -> + %% Used from dets_server when Pid has closed the table, + %% but the table is still opened by some process. + NewHead = remove_fix(Head, Pid, close), + From ! {self(), status(NewHead)}, + NewHead; + {corrupt, Reason} -> + {H2, Error} = dets_utils:corrupt_reason(Head, Reason), + From ! {self(), Error}, + H2; + {delayed_write, WrTime} -> + delayed_write(Head, WrTime); + info -> + {H2, Res} = finfo(Head), + From ! {self(), Res}, + H2; + {info, Tag} -> + {H2, Res} = finfo(Head, Tag), + From ! {self(), Res}, + H2; + {is_compatible_bchunk_format, Term} -> + Res = test_bchunk_format(Head, Term), + From ! {self(), Res}, + ok; + {internal_open, Ref, Args} -> + ?PROFILE(ep:do()), + case do_open_file(Args, Head#head.parent, Head#head.server,Ref) of + {ok, H2} -> + From ! {self(), ok}, + H2; + Error -> + From ! {self(), Error}, + exit(normal) + end; + may_grow when Head#head.update_mode =/= saved -> + if + Head#head.update_mode =:= dirty -> + %% Won't grow more if the table is full. + {H2, _Res} = + (Head#head.mod):may_grow(Head, 0, many_times), + {N + 1, H2}; + true -> + ok + end; + {set_verbose, What} -> + set_verbose(What), + From ! {self(), ok}, + ok; + {where, Object} -> + {H2, Res} = where_is_object(Head, Object), + From ! {self(), Res}, + H2; + _Message when element(1, Head#head.update_mode) =:= error -> + From ! {self(), status(Head)}, + ok; + %% The following messages assume that the status of the table is OK. + {bchunk_init, Tab} -> + {H2, Res} = do_bchunk_init(Head, Tab), + From ! {self(), Res}, + H2; + {bchunk, State} -> + {H2, Res} = do_bchunk(Head, State), + From ! {self(), Res}, + H2; + delete_all_objects -> + {H2, Res} = fdelete_all_objects(Head), + From ! {self(), Res}, + erlang:garbage_collect(), + {0, H2}; + {delete_key, Keys} when Head#head.update_mode =:= dirty -> + if + Head#head.version =:= 8 -> + {H2, Res} = fdelete_key(Head, Keys), + From ! {self(), Res}, + {N + 1, H2}; + true -> + stream_op(Op, From, [], Head, N) + end; + {delete_object, Objs} when Head#head.update_mode =:= dirty -> + case check_objects(Objs, Head#head.keypos) of + true when Head#head.version =:= 8 -> + {H2, Res} = fdelete_object(Head, Objs), + From ! {self(), Res}, + {N + 1, H2}; + true -> + stream_op(Op, From, [], Head, N); + false -> + From ! {self(), badarg}, + ok + end; + first -> + {H2, Res} = ffirst(Head), + From ! {self(), Res}, + H2; + {initialize, InitFun, Format, MinNoSlots} -> + {H2, Res} = finit(Head, InitFun, Format, MinNoSlots), + From ! {self(), Res}, + erlang:garbage_collect(), + H2; + {insert, Objs} when Head#head.update_mode =:= dirty -> + case check_objects(Objs, Head#head.keypos) of + true when Head#head.version =:= 8 -> + {H2, Res} = finsert(Head, Objs), + From ! {self(), Res}, + {N + 1, H2}; + true -> + stream_op(Op, From, [], Head, N); + false -> + From ! {self(), badarg}, + ok + end; + {insert_new, Objs} when Head#head.update_mode =:= dirty -> + {H2, Res} = finsert_new(Head, Objs), + From ! {self(), Res}, + {N + 1, H2}; + {lookup_keys, Keys} when Head#head.version =:= 8 -> + {H2, Res} = flookup_keys(Head, Keys), + From ! {self(), Res}, + H2; + {lookup_keys, _Keys} -> + stream_op(Op, From, [], Head, N); + {match_init, State} -> + {H2, Res} = fmatch_init(Head, State), + From ! {self(), Res}, + H2; + {match, MP, Spec, NObjs} -> + {H2, Res} = fmatch(Head, MP, Spec, NObjs), + From ! {self(), Res}, + H2; + {member, Key} when Head#head.version =:= 8 -> + {H2, Res} = fmember(Head, Key), + From ! {self(), Res}, + H2; + {member, _Key} = Op -> + stream_op(Op, From, [], Head, N); + {next, Key} -> + {H2, Res} = fnext(Head, Key), + From ! {self(), Res}, + H2; + {match_delete, State} when Head#head.update_mode =:= dirty -> + {H2, Res} = fmatch_delete(Head, State), + From ! {self(), Res}, + {N + 1, H2}; + {match_delete_init, MP, Spec} when Head#head.update_mode =:= dirty -> + {H2, Res} = fmatch_delete_init(Head, MP, Spec), + From ! {self(), Res}, + {N + 1, H2}; + {safe_fixtable, Bool} -> + NewHead = do_safe_fixtable(Head, From, Bool), + From ! {self(), ok}, + NewHead; + {slot, Slot} -> + {H2, Res} = fslot(Head, Slot), + From ! {self(), Res}, + H2; + sync -> + {NewHead, Res} = perform_save(Head, true), + From ! {self(), Res}, + erlang:garbage_collect(), + {0, NewHead}; + {update_counter, Key, Incr} when Head#head.update_mode =:= dirty -> + {NewHead, Res} = do_update_counter(Head, Key, Incr), + From ! {self(), Res}, + {N + 1, NewHead}; + WriteOp when Head#head.update_mode =:= new_dirty -> + H2 = Head#head{update_mode = dirty}, + apply_op(WriteOp, From, H2, 0); + WriteOp when Head#head.access =:= read_write, + Head#head.update_mode =:= saved -> + case catch (Head#head.mod):mark_dirty(Head) of + ok -> + start_auto_save_timer(Head), + H2 = Head#head{update_mode = dirty}, + apply_op(WriteOp, From, H2, 0); + {NewHead, Error} when is_record(NewHead, head) -> + From ! {self(), Error}, + NewHead + end; + WriteOp when is_tuple(WriteOp), Head#head.access =:= read -> + Reason = {access_mode, Head#head.filename}, + From ! {self(), err({error, Reason})}, + ok + end. + +start_auto_save_timer(Head) when Head#head.auto_save =:= infinity -> + ok; +start_auto_save_timer(Head) -> + Millis = Head#head.auto_save, + erlang:send_after(Millis, self(), ?DETS_CALL(self(), auto_save)). + +%% Version 9: Peek the message queue and try to evaluate several +%% lookup requests in parallel. Evalute delete_object, delete and +%% insert as well. +stream_op(Op, Pid, Pids, Head, N) -> + stream_op(Head, Pids, [], N, Pid, Op, Head#head.fixed). + +stream_loop(Head, Pids, C, N, false = Fxd) -> + receive + ?DETS_CALL(From, Message) -> + stream_op(Head, Pids, C, N, From, Message, Fxd) + after 0 -> + stream_end(Head, Pids, C, N, no_more) + end; +stream_loop(Head, Pids, C, N, _Fxd) -> + stream_end(Head, Pids, C, N, no_more). + +stream_op(Head, Pids, C, N, Pid, {lookup_keys,Keys}, Fxd) -> + NC = [{{lookup,Pid},Keys} | C], + stream_loop(Head, Pids, NC, N, Fxd); +stream_op(Head, Pids, C, N, Pid, {insert, _Objects} = Op, Fxd) -> + NC = [Op | C], + stream_loop(Head, [Pid | Pids], NC, N, Fxd); +stream_op(Head, Pids, C, N, Pid, {insert_new, _Objects} = Op, Fxd) -> + NC = [Op | C], + stream_loop(Head, [Pid | Pids], NC, N, Fxd); +stream_op(Head, Pids, C, N, Pid, {delete_key, _Keys} = Op, Fxd) -> + NC = [Op | C], + stream_loop(Head, [Pid | Pids], NC, N, Fxd); +stream_op(Head, Pids, C, N, Pid, {delete_object, _Objects} = Op, Fxd) -> + NC = [Op | C], + stream_loop(Head, [Pid | Pids], NC, N, Fxd); +stream_op(Head, Pids, C, N, Pid, {member, Key}, Fxd) -> + NC = [{{lookup,[Pid]},[Key]} | C], + stream_loop(Head, Pids, NC, N, Fxd); +stream_op(Head, Pids, C, N, Pid, Op, _Fxd) -> + stream_end(Head, Pids, C, N, {Pid,Op}). + +stream_end(Head, Pids0, C, N, Next) -> + case catch update_cache(Head, lists:reverse(C)) of + {Head1, [], PwriteList} -> + stream_end1(Pids0, Next, N, C, Head1, PwriteList); + {Head1, Found, PwriteList} -> + %% Possibly an optimization: reply to lookup requests + %% first, then write stuff. This makes it possible for + %% clients to continue while the disk is accessed. + %% (Replies to lookup requests are sent earlier than + %% replies to delete and insert requests even if the + %% latter requests were made before the lookup requests, + %% which can be confusing.) + lookup_replies(Found), + stream_end1(Pids0, Next, N, C, Head1, PwriteList); + Head1 when is_record(Head1, head) -> + stream_end2(Pids0, Pids0, Next, N, C, Head1, ok); + {Head1, Error} when is_record(Head1, head) -> + %% Dig out the processes that did lookup or member. + Fun = fun({{lookup,[Pid]},_Keys}, L) -> [Pid | L]; + ({{lookup,Pid},_Keys}, L) -> [Pid | L]; + (_, L) -> L + end, + LPs0 = lists:foldl(Fun, [], C), + LPs = lists:usort(lists:flatten(LPs0)), + stream_end2(Pids0 ++ LPs, Pids0, Next, N, C, Head1, Error); + DetsError -> + throw(DetsError) + end. + +stream_end1(Pids, Next, N, C, Head, []) -> + stream_end2(Pids, Pids, Next, N, C, Head, ok); +stream_end1(Pids, Next, N, C, Head, PwriteList) -> + {Head1, PR} = (catch dets_utils:pwrite(Head, PwriteList)), + stream_end2(Pids, Pids, Next, N, C, Head1, PR). + +stream_end2([Pid | Pids], Ps, Next, N, C, Head, Reply) -> + Pid ! {self(), Reply}, + stream_end2(Pids, Ps, Next, N+1, C, Head, Reply); +stream_end2([], Ps, no_more, N, C, Head, _Reply) -> + penalty(Head, Ps, C), + {N, Head}; +stream_end2([], _Ps, {From, Op}, N, _C, Head, _Reply) -> + apply_op(Op, From, Head, N). + +penalty(H, _Ps, _C) when H#head.fixed =:= false -> + ok; +penalty(_H, _Ps, [{{lookup,_Pids},_Keys}]) -> + ok; +penalty(#head{fixed = {_,[{Pid,_}]}}, [Pid], _C) -> + ok; +penalty(_H, _Ps, _C) -> + timer:sleep(1). + +lookup_replies([{P,O}]) -> + lookup_reply(P, O); +lookup_replies(Q) -> + [{P,O} | L] = dets_utils:family(Q), + lookup_replies(P, lists:append(O), L). + +lookup_replies(P, O, []) -> + lookup_reply(P, O); +lookup_replies(P, O, [{P2,O2} | L]) -> + lookup_reply(P, O), + lookup_replies(P2, lists:append(O2), L). + +%% If a list of Pid then op was {member, Key}. Inlined. +lookup_reply([P], O) -> + P ! {self(), O =/= []}; +lookup_reply(P, O) -> + P ! {self(), O}. + +%%----------------------------------------------------------------- +%% Callback functions for system messages handling. +%%----------------------------------------------------------------- +system_continue(_Parent, _, Head) -> + open_file_loop(Head). + +system_terminate(Reason, _Parent, _, Head) -> + _NewHead = do_stop(Head), + exit(Reason). + +%%----------------------------------------------------------------- +%% Code for upgrade. +%%----------------------------------------------------------------- +system_code_change(State, _Module, _OldVsn, _Extra) -> + {ok, State}. + + +%%%---------------------------------------------------------------------- +%%% Internal functions +%%%---------------------------------------------------------------------- + +constants(FH, FileName) -> + Version = FH#fileheader.version, + if + Version =< 8 -> + dets_v8:constants(); + Version =:= 9 -> + dets_v9:constants(); + true -> + throw({error, {not_a_dets_file, FileName}}) + end. + +%% -> {ok, Fd, fileheader()} | throw(Error) +read_file_header(FileName, Access, RamFile) -> + BF = if + RamFile -> + case file:read_file(FileName) of + {ok, B} -> B; + Err -> dets_utils:file_error(FileName, Err) + end; + true -> + FileName + end, + {ok, Fd} = dets_utils:open(BF, open_args(Access, RamFile)), + {ok, <>} = + dets_utils:pread_close(Fd, FileName, ?FILE_FORMAT_VERSION_POS, 4), + if + Version =< 8 -> + dets_v8:read_file_header(Fd, FileName); + Version =:= 9 -> + dets_v9:read_file_header(Fd, FileName); + true -> + throw({error, {not_a_dets_file, FileName}}) + end. + +fclose(Head) -> + {Head1, Res} = perform_save(Head, false), + case Head1#head.ram_file of + true -> + ignore; + false -> + dets_utils:stop_disk_map(), + file:close(Head1#head.fptr) + end, + Res. + +%% -> {NewHead, Res} +perform_save(Head, DoSync) when Head#head.update_mode =:= dirty; + Head#head.update_mode =:= new_dirty -> + case catch begin + {Head1, []} = write_cache(Head), + {Head2, ok} = (Head1#head.mod):do_perform_save(Head1), + ok = ensure_written(Head2, DoSync), + {Head2#head{update_mode = saved}, ok} + end of + {NewHead, _} = Reply when is_record(NewHead, head) -> + Reply + end; +perform_save(Head, _DoSync) -> + {Head, status(Head)}. + +ensure_written(Head, DoSync) when Head#head.ram_file -> + {ok, EOF} = dets_utils:position(Head, eof), + {ok, Bin} = dets_utils:pread(Head, 0, EOF, 0), + if + DoSync -> + dets_utils:write_file(Head, Bin); + not DoSync -> + case file:write_file(Head#head.filename, Bin) of + ok -> + ok; + Error -> + dets_utils:corrupt_file(Head, Error) + end + end; +ensure_written(Head, true) when not Head#head.ram_file -> + dets_utils:sync(Head); +ensure_written(Head, false) when not Head#head.ram_file -> + ok. + +%% -> {NewHead, {cont(), [binary()]}} | {NewHead, Error} +do_bchunk_init(Head, Tab) -> + case catch write_cache(Head) of + {H2, []} -> + case (H2#head.mod):table_parameters(H2) of + undefined -> + {H2, {error, old_version}}; + Parms -> + L = dets_utils:all_allocated(H2), + C0 = #dets_cont{no_objs = default, bin = <<>>, alloc = L}, + BinParms = term_to_binary(Parms), + {H2, {C0#dets_cont{tab = Tab, what = bchunk}, [BinParms]}} + end; + {NewHead, _} = HeadError when is_record(NewHead, head) -> + HeadError + end. + +%% -> {NewHead, {cont(), [binary()]}} | {NewHead, Error} +do_bchunk(Head, State) -> + case dets_v9:read_bchunks(Head, State#dets_cont.alloc) of + {error, Reason} -> + dets_utils:corrupt_reason(Head, Reason); + {finished, Bins} -> + {Head, {State#dets_cont{bin = eof}, Bins}}; + {Bins, NewL} -> + {Head, {State#dets_cont{alloc = NewL}, Bins}} + end. + +%% -> {NewHead, Result} +fdelete_all_objects(Head) when Head#head.fixed =:= false -> + case catch do_delete_all_objects(Head) of + {ok, NewHead} -> + start_auto_save_timer(NewHead), + {NewHead, ok}; + {error, Reason} -> + dets_utils:corrupt_reason(Head, Reason) + end; +fdelete_all_objects(Head) -> + {Head, fixed}. + +do_delete_all_objects(Head) -> + #head{fptr = Fd, name = Tab, filename = Fname, type = Type, keypos = Kp, + ram_file = Ram, auto_save = Auto, min_no_slots = MinSlots, + max_no_slots = MaxSlots, cache = Cache} = Head, + CacheSz = dets_utils:cache_size(Cache), + ok = dets_utils:truncate(Fd, Fname, bof), + (Head#head.mod):initiate_file(Fd, Tab, Fname, Type, Kp, MinSlots, MaxSlots, + Ram, CacheSz, Auto, true). + +%% -> {NewHead, Reply}, Reply = ok | Error. +fdelete_key(Head, Keys) -> + do_delete(Head, Keys, delete_key). + +%% -> {NewHead, Reply}, Reply = ok | badarg | Error. +fdelete_object(Head, Objects) -> + do_delete(Head, Objects, delete_object). + +ffirst(H) -> + Ref = make_ref(), + case catch {Ref, ffirst1(H)} of + {Ref, {NH, R}} -> + {NH, {ok, R}}; + {NH, R} when is_record(NH, head) -> + {NH, {error, R}} + end. + +ffirst1(H) -> + check_safe_fixtable(H), + {NH, []} = write_cache(H), + ffirst(NH, 0). + +ffirst(H, Slot) -> + case (H#head.mod):slot_objs(H, Slot) of + '$end_of_table' -> {H, '$end_of_table'}; + [] -> ffirst(H, Slot+1); + [X|_] -> {H, element(H#head.keypos, X)} + end. + +%% -> {NewHead, Reply}, Reply = ok | badarg | Error. +finsert(Head, Objects) -> + case catch update_cache(Head, Objects, insert) of + {NewHead, []} -> + {NewHead, ok}; + {NewHead, _} = HeadError when is_record(NewHead, head) -> + HeadError + end. + +%% -> {NewHead, Reply}, Reply = ok | badarg | Error. +finsert_new(Head, Objects) -> + KeyPos = Head#head.keypos, + case catch lists:map(fun(Obj) -> element(KeyPos, Obj) end, Objects) of + Keys when is_list(Keys) -> + case catch update_cache(Head, Keys, {lookup, nopid}) of + {Head1, PidObjs} when is_list(PidObjs) -> + case lists:all(fun({_P,OL}) -> OL =:= [] end, PidObjs) of + true -> + case catch update_cache(Head1, Objects, insert) of + {NewHead, []} -> + {NewHead, true}; + {NewHead, Error} when is_record(NewHead, head) -> + {NewHead, Error} + end; + false=Reply -> + {Head1, Reply} + end; + {NewHead, _} = HeadError when is_record(NewHead, head) -> + HeadError + end; + _ -> + {Head, badarg} + end. + +do_safe_fixtable(Head, Pid, true) -> + case Head#head.fixed of + false -> + link(Pid), + Fixed = {erlang:now(), [{Pid, 1}]}, + Ftab = dets_utils:get_freelists(Head), + Head#head{fixed = Fixed, freelists = {Ftab, Ftab}}; + {TimeStamp, Counters} -> + case lists:keysearch(Pid, 1, Counters) of + {value, {Pid, Counter}} -> % when Counter > 1 + NewCounters = lists:keyreplace(Pid, 1, Counters, + {Pid, Counter+1}), + Head#head{fixed = {TimeStamp, NewCounters}}; + false -> + link(Pid), + Fixed = {TimeStamp, [{Pid, 1} | Counters]}, + Head#head{fixed = Fixed} + end + end; +do_safe_fixtable(Head, Pid, false) -> + remove_fix(Head, Pid, false). + +remove_fix(Head, Pid, How) -> + case Head#head.fixed of + false -> + Head; + {TimeStamp, Counters} -> + case lists:keysearch(Pid, 1, Counters) of + %% How =:= close when Pid closes the table. + {value, {Pid, Counter}} when Counter =:= 1; How =:= close -> + unlink(Pid), + case lists:keydelete(Pid, 1, Counters) of + [] -> + check_growth(Head), + erlang:garbage_collect(), + Head#head{fixed = false, + freelists = dets_utils:get_freelists(Head)}; + NewCounters -> + Head#head{fixed = {TimeStamp, NewCounters}} + end; + {value, {Pid, Counter}} -> + NewCounters = lists:keyreplace(Pid, 1, Counters, + {Pid, Counter-1}), + Head#head{fixed = {TimeStamp, NewCounters}}; + false -> + Head + end + end. + +do_stop(Head) -> + unlink_fixing_procs(Head), + fclose(Head). + +unlink_fixing_procs(Head) -> + case Head#head.fixed of + false -> + Head; + {_, Counters} -> + lists:map(fun({Pid, _Counter}) -> unlink(Pid) end, Counters), + Head#head{fixed = false, + freelists = dets_utils:get_freelists(Head)} + end. + +check_growth(#head{access = read}) -> + ok; +check_growth(Head) -> + NoThings = no_things(Head), + if + NoThings > Head#head.next -> + erlang:send_after(200, self(), + ?DETS_CALL(self(), may_grow)); % Catch up. + true -> + ok + end. + +finfo(H) -> + case catch write_cache(H) of + {H2, []} -> + Info = (catch [{type, H2#head.type}, + {keypos, H2#head.keypos}, + {size, H2#head.no_objects}, + {file_size, + file_size(H2#head.fptr, H2#head.filename)}, + {filename, H2#head.filename}]), + {H2, Info}; + {H2, _} = HeadError when is_record(H2, head) -> + HeadError + end. + +finfo(H, access) -> {H, H#head.access}; +finfo(H, auto_save) -> {H, H#head.auto_save}; +finfo(H, bchunk_format) -> + case catch write_cache(H) of + {H2, []} -> + case (H2#head.mod):table_parameters(H2) of + undefined = Undef -> + {H2, Undef}; + Parms -> + {H2, term_to_binary(Parms)} + end; + {H2, _} = HeadError when is_record(H2, head) -> + HeadError + end; +finfo(H, delayed_write) -> % undocumented + {H, dets_utils:cache_size(H#head.cache)}; +finfo(H, filename) -> {H, H#head.filename}; +finfo(H, file_size) -> + case catch write_cache(H) of + {H2, []} -> + {H2, catch file_size(H#head.fptr, H#head.filename)}; + {H2, _} = HeadError when is_record(H2, head) -> + HeadError + end; +finfo(H, fixed) -> + %% true if fixtable/2 has been called + {H, not (H#head.fixed =:= false)}; +finfo(H, hash) -> {H, H#head.hash_bif}; +finfo(H, keypos) -> {H, H#head.keypos}; +finfo(H, memory) -> finfo(H, file_size); +finfo(H, no_objects) -> finfo(H, size); +finfo(H, no_keys) -> + case catch write_cache(H) of + {H2, []} -> + {H2, H2#head.no_keys}; + {H2, _} = HeadError when is_record(H2, head) -> + HeadError + end; +finfo(H, no_slots) -> {H, (H#head.mod):no_slots(H)}; +finfo(H, pid) -> {H, self()}; +finfo(H, ram_file) -> {H, H#head.ram_file}; +finfo(H, safe_fixed) -> {H, H#head.fixed}; +finfo(H, size) -> + case catch write_cache(H) of + {H2, []} -> + {H2, H2#head.no_objects}; + {H2, _} = HeadError when is_record(H2, head) -> + HeadError + end; +finfo(H, type) -> {H, H#head.type}; +finfo(H, version) -> {H, H#head.version}; +finfo(H, _) -> {H, undefined}. + +file_size(Fd, FileName) -> + {ok, Pos} = dets_utils:position(Fd, FileName, eof), + Pos. + +test_bchunk_format(_Head, undefined) -> + false; +test_bchunk_format(Head, _Term) when Head#head.version =:= 8 -> + false; +test_bchunk_format(Head, Term) -> + dets_v9:try_bchunk_header(Term, Head) =/= not_ok. + +do_open_file([Fname, Verbose], Parent, Server, Ref) -> + case catch fopen2(Fname, Ref) of + {error, _Reason} = Error -> + err(Error); + {ok, Head} -> + maybe_put(verbose, Verbose), + {ok, Head#head{parent = Parent, server = Server}}; + {'EXIT', _Reason} = Error -> + Error; + Bad -> + error_logger:format + ("** dets: Bug was found in open_file/1, reply was ~w.~n", + [Bad]), + {error, {dets_bug, Fname, Bad}} + end; +do_open_file([Tab, OpenArgs, Verb], Parent, Server, Ref) -> + case catch fopen3(Tab, OpenArgs) of + {error, {tooshort, _}} -> + file:delete(OpenArgs#open_args.file), + do_open_file([Tab, OpenArgs, Verb], Parent, Server, Ref); + {error, _Reason} = Error -> + err(Error); + {ok, Head} -> + maybe_put(verbose, Verb), + {ok, Head#head{parent = Parent, server = Server}}; + {'EXIT', _Reason} = Error -> + Error; + Bad -> + error_logger:format + ("** dets: Bug was found in open_file/2, arguments were~n" + "** dets: ~w and reply was ~w.~n", + [OpenArgs, Bad]), + {error, {dets_bug, Tab, {open_file, OpenArgs}, Bad}} + end. + +maybe_put(_, undefined) -> + ignore; +maybe_put(K, V) -> + put(K, V). + +%% -> {Head, Result}, Result = ok | Error | {thrown, Error} | badarg +finit(Head, InitFun, _Format, _NoSlots) when Head#head.access =:= read -> + _ = (catch InitFun(close)), + {Head, {error, {access_mode, Head#head.filename}}}; +finit(Head, InitFun, _Format, _NoSlots) when Head#head.fixed =/= false -> + _ = (catch InitFun(close)), + {Head, {error, {fixed_table, Head#head.name}}}; +finit(Head, InitFun, Format, NoSlots) -> + case catch do_finit(Head, InitFun, Format, NoSlots) of + {ok, NewHead} -> + check_growth(NewHead), + start_auto_save_timer(NewHead), + {NewHead, ok}; + badarg -> + {Head, badarg}; + Error -> + dets_utils:corrupt(Head, Error) + end. + +%% -> {ok, NewHead} | throw(badarg) | throw(Error) +do_finit(Head, Init, Format, NoSlots) -> + #head{fptr = Fd, type = Type, keypos = Kp, auto_save = Auto, + cache = Cache, filename = Fname, ram_file = Ram, + min_no_slots = MinSlots0, max_no_slots = MaxSlots, + name = Tab, update_mode = UpdateMode, mod = HMod} = Head, + CacheSz = dets_utils:cache_size(Cache), + {How, Head1} = + case Format of + term when is_integer(NoSlots), NoSlots > MaxSlots -> + throw(badarg); + term -> + MinSlots = choose_no_slots(NoSlots, MinSlots0), + if + UpdateMode =:= new_dirty, MinSlots =:= MinSlots0 -> + {general_init, Head}; + true -> + ok = dets_utils:truncate(Fd, Fname, bof), + {ok, H} = HMod:initiate_file(Fd, Tab, Fname, Type, Kp, + MinSlots, MaxSlots, Ram, + CacheSz, Auto, false), + {general_init, H} + end; + bchunk -> + ok = dets_utils:truncate(Fd, Fname, bof), + {bchunk_init, Head} + end, + case How of + bchunk_init -> + case HMod:bchunk_init(Head1, Init) of + {ok, NewHead} -> + {ok, NewHead#head{update_mode = dirty}}; + Error -> + Error + end; + general_init -> + Cntrs = ets:new(dets_init, []), + Input = HMod:bulk_input(Head1, Init, Cntrs), + SlotNumbers = {Head1#head.min_no_slots, bulk_init, MaxSlots}, + {Reply, SizeData} = + do_sort(Head1, SlotNumbers, Input, Cntrs, Fname, not_used), + Bulk = true, + case Reply of + {ok, NoDups, H1} -> + fsck_copy(SizeData, H1, Bulk, NoDups); + Else -> + close_files(Bulk, SizeData, Head1), + Else + end + end. + +%% -> {NewHead, [LookedUpObject]} | {NewHead, Error} +flookup_keys(Head, Keys) -> + case catch update_cache(Head, Keys, {lookup, nopid}) of + {NewHead, [{_NoPid,Objs}]} -> + {NewHead, Objs}; + {NewHead, L} when is_list(L) -> + {NewHead, lists:flatmap(fun({_Pid,OL}) -> OL end, L)}; + {NewHead, _} = HeadError when is_record(NewHead, head) -> + HeadError + end. + +%% -> {NewHead, Result} +fmatch_init(Head, C) -> + case scan(Head, C) of + {scan_error, Reason} -> + dets_utils:corrupt_reason(Head, Reason); + {Ts, NC} -> + {Head, {cont, {Ts, NC}}} + end. + +%% -> {NewHead, Result} +fmatch(Head, MP, Spec, N) -> + KeyPos = Head#head.keypos, + case find_all_keys(Spec, KeyPos, []) of + [] -> + %% Complete match + case catch write_cache(Head) of + {NewHead, []} -> + C0 = init_scan(NewHead, N), + {NewHead, {cont, C0#dets_cont{match_program = MP}}}; + {NewHead, _} = HeadError when is_record(NewHead, head) -> + HeadError + end; + List -> + Keys = lists:usort(List), + {NewHead, Reply} = flookup_keys(Head, Keys), + case Reply of + Objs when is_list(Objs) -> + MatchingObjs = ets:match_spec_run(Objs, MP), + {NewHead, {done, MatchingObjs}}; + Error -> + {NewHead, Error} + end + end. + +find_all_keys([], _, Ks) -> + Ks; +find_all_keys([{H,_,_} | T], KeyPos, Ks) when is_tuple(H) -> + case tuple_size(H) of + Enough when Enough >= KeyPos -> + Key = element(KeyPos, H), + case contains_variable(Key) of + true -> + []; + false -> + find_all_keys(T, KeyPos, [Key | Ks]) + end; + _ -> + find_all_keys(T, KeyPos, Ks) + end; +find_all_keys(_, _, _) -> + []. + +contains_variable('_') -> + true; +contains_variable(A) when is_atom(A) -> + case atom_to_list(A) of + [$$ | T] -> + case (catch list_to_integer(T)) of + {'EXIT', _} -> + false; + _ -> + true + end; + _ -> + false + end; +contains_variable(T) when is_tuple(T) -> + contains_variable(tuple_to_list(T)); +contains_variable([]) -> + false; +contains_variable([H|T]) -> + case contains_variable(H) of + true -> + true; + false -> + contains_variable(T) + end; +contains_variable(_) -> + false. + +%% -> {NewHead, Res} +fmatch_delete_init(Head, MP, Spec) -> + KeyPos = Head#head.keypos, + case catch + case find_all_keys(Spec, KeyPos, []) of + [] -> + do_fmatch_delete_var_keys(Head, MP, Spec); + List -> + Keys = lists:usort(List), + do_fmatch_constant_keys(Head, Keys, MP) + end of + {NewHead, _} = Reply when is_record(NewHead, head) -> + Reply + end. + +%% A note: If deleted objects reside in a bucket with other objects +%% that are not deleted, the bucket is moved. If the address of the +%% moved bucket is greater than original bucket address the kept +%% objects will be read once again later on. +%% -> {NewHead, Res} +fmatch_delete(Head, C) -> + case scan(Head, C) of + {scan_error, Reason} -> + dets_utils:corrupt_reason(Head, Reason); + {[], _} -> + {Head, {done, 0}}; + {RTs, NC} -> + MP = C#dets_cont.match_program, + case catch filter_binary_terms(RTs, MP, []) of + {'EXIT', _} -> + Bad = dets_utils:bad_object(fmatch_delete, RTs), + dets_utils:corrupt_reason(Head, Bad); + Terms -> + do_fmatch_delete(Head, Terms, NC) + end + end. + +do_fmatch_delete_var_keys(Head, _MP, ?PATTERN_TO_TRUE_MATCH_SPEC('_')) + when Head#head.fixed =:= false -> + %% Handle the case where the file is emptied efficiently. + %% Empty the cache just to get the number of objects right. + {Head1, []} = write_cache(Head), + N = Head1#head.no_objects, + case fdelete_all_objects(Head1) of + {NewHead, ok} -> + {NewHead, {done, N}}; + Reply -> + Reply + end; +do_fmatch_delete_var_keys(Head, MP, _Spec) -> + {NewHead, []} = write_cache(Head), + C0 = init_scan(NewHead, default), + {NewHead, {cont, C0#dets_cont{match_program = MP}, 0}}. + +do_fmatch_constant_keys(Head, Keys, MP) -> + case flookup_keys(Head, Keys) of + {NewHead, ReadTerms} when is_list(ReadTerms) -> + Terms = filter_terms(ReadTerms, MP, []), + do_fmatch_delete(NewHead, Terms, fixed); + Reply -> + Reply + end. + +filter_binary_terms([Bin | Bins], MP, L) -> + Term = binary_to_term(Bin), + case ets:match_spec_run([Term], MP) of + [true] -> + filter_binary_terms(Bins, MP, [Term | L]); + _ -> + filter_binary_terms(Bins, MP, L) + end; +filter_binary_terms([], _MP, L) -> + L. + +filter_terms([Term | Terms], MP, L) -> + case ets:match_spec_run([Term], MP) of + [true] -> + filter_terms(Terms, MP, [Term | L]); + _ -> + filter_terms(Terms, MP, L) + end; +filter_terms([], _MP, L) -> + L. + +do_fmatch_delete(Head, Terms, What) -> + N = length(Terms), + case do_delete(Head, Terms, delete_object) of + {NewHead, ok} when What =:= fixed -> + {NewHead, {done, N}}; + {NewHead, ok} -> + {NewHead, {cont, What, N}}; + Reply -> + Reply + end. + +do_delete(Head, Things, What) -> + case catch update_cache(Head, Things, What) of + {NewHead, []} -> + {NewHead, ok}; + {NewHead, _} = HeadError when is_record(NewHead, head) -> + HeadError + end. + +fmember(Head, Key) -> + case catch begin + {Head2, [{_NoPid,Objs}]} = + update_cache(Head, [Key], {lookup, nopid}), + {Head2, Objs =/= []} + end of + {NewHead, _} = Reply when is_record(NewHead, head) -> + Reply + end. + +fnext(Head, Key) -> + Slot = (Head#head.mod):db_hash(Key, Head), + Ref = make_ref(), + case catch {Ref, fnext(Head, Key, Slot)} of + {Ref, {H, R}} -> + {H, {ok, R}}; + {NewHead, _} = HeadError when is_record(NewHead, head) -> + HeadError + end. + +fnext(H, Key, Slot) -> + {NH, []} = write_cache(H), + case (H#head.mod):slot_objs(NH, Slot) of + '$end_of_table' -> {NH, '$end_of_table'}; + L -> fnext_search(NH, Key, Slot, L) + end. + +fnext_search(H, K, Slot, L) -> + Kp = H#head.keypos, + case beyond_key(K, Kp, L) of + [] -> fnext_slot(H, K, Slot+1); + L2 -> {H, element(H#head.keypos, hd(L2))} + end. + +%% We've got to continue to search for the next key in the next slot +fnext_slot(H, K, Slot) -> + case (H#head.mod):slot_objs(H, Slot) of + '$end_of_table' -> {H, '$end_of_table'}; + [] -> fnext_slot(H, K, Slot+1); + L -> {H, element(H#head.keypos, hd(L))} + end. + +beyond_key(_K, _Kp, []) -> []; +beyond_key(K, Kp, [H|T]) -> + case dets_utils:cmp(element(Kp, H), K) of + 0 -> beyond_key2(K, Kp, T); + _ -> beyond_key(K, Kp, T) + end. + +beyond_key2(_K, _Kp, []) -> []; +beyond_key2(K, Kp, [H|T]=L) -> + case dets_utils:cmp(element(Kp, H), K) of + 0 -> beyond_key2(K, Kp, T); + _ -> L + end. + +%% Open an already existing file, no arguments +%% -> {ok, head()} | throw(Error) +fopen2(Fname, Tab) -> + case file:read_file_info(Fname) of + {ok, _} -> + Acc = read_write, + Ram = false, + %% Fd is not always closed upon error, but exit is soon called. + {ok, Fd, FH} = read_file_header(Fname, Acc, Ram), + Mod = FH#fileheader.mod, + case Mod:check_file_header(FH, Fd) of + {error, not_closed} -> + io:format(user,"dets: file ~p not properly closed, " + "repairing ...~n", [Fname]), + Version = default, + case fsck(Fd, Tab, Fname, FH, default, default, Version) of + ok -> + fopen2(Fname, Tab); + Error -> + throw(Error) + end; + {ok, Head, ExtraInfo} -> + open_final(Head, Fname, Acc, Ram, ?DEFAULT_CACHE, + Tab, ExtraInfo, false); + {error, Reason} -> + throw({error, {Reason, Fname}}) + end; + Error -> + dets_utils:file_error(Fname, Error) + end. + +%% Open and possibly create and initialize a file +%% -> {ok, head()} | throw(Error) +fopen3(Tab, OpenArgs) -> + FileName = OpenArgs#open_args.file, + case file:read_file_info(FileName) of + {ok, _} -> + fopen_existing_file(Tab, OpenArgs); + Error when OpenArgs#open_args.access =:= read -> + dets_utils:file_error(FileName, Error); + _Error -> + fopen_init_file(Tab, OpenArgs) + end. + +fopen_existing_file(Tab, OpenArgs) -> + #open_args{file = Fname, type = Type, keypos = Kp, repair = Rep, + min_no_slots = MinSlots, max_no_slots = MaxSlots, + ram_file = Ram, delayed_write = CacheSz, auto_save = + Auto, access = Acc, version = Version, debug = Debug} = + OpenArgs, + %% Fd is not always closed upon error, but exit is soon called. + {ok, Fd, FH} = read_file_header(Fname, Acc, Ram), + V9 = (Version =:= 9) or (Version =:= default), + MinF = (MinSlots =:= default) or (MinSlots =:= FH#fileheader.min_no_slots), + MaxF = (MaxSlots =:= default) or (MaxSlots =:= FH#fileheader.max_no_slots), + Do = case (FH#fileheader.mod):check_file_header(FH, Fd) of + {ok, Head, true} when Rep =:= force, Acc =:= read_write, + FH#fileheader.version =:= 9, + FH#fileheader.no_colls =/= undefined, + MinF, MaxF, V9 -> + {compact, Head}; + {ok, _Head, _Extra} when Rep =:= force, Acc =:= read -> + throw({error, {access_mode, Fname}}); + {ok, Head, need_compacting} when Acc =:= read -> + {final, Head, true}; % Version 8 only. + {ok, _Head, need_compacting} when Rep =:= true -> + %% The file needs to be compacted due to a very big + %% and fragmented free_list. Version 8 only. + M = " is now compacted ...", + {repair, M}; + {ok, _Head, _Extra} when Rep =:= force -> + M = ", repair forced.", + {repair, M}; + {ok, Head, ExtraInfo} -> + {final, Head, ExtraInfo}; + {error, not_closed} when Rep =:= force, Acc =:= read_write -> + M = ", repair forced.", + {repair, M}; + {error, not_closed} when Rep =:= true, Acc =:= read_write -> + M = " not properly closed, repairing ...", + {repair, M}; + {error, not_closed} when Rep =:= false -> + throw({error, {needs_repair, Fname}}); + {error, version_bump} when Rep =:= true, Acc =:= read_write -> + %% Version 8 only + M = " old version, upgrading ...", + {repair, M}; + {error, Reason} -> + throw({error, {Reason, Fname}}) + end, + case Do of + _ when FH#fileheader.type =/= Type -> + throw({error, {type_mismatch, Fname}}); + _ when FH#fileheader.keypos =/= Kp -> + throw({error, {keypos_mismatch, Fname}}); + {compact, SourceHead} -> + io:format(user, "dets: file ~p is now compacted ...~n", [Fname]), + {ok, NewSourceHead} = open_final(SourceHead, Fname, read, false, + ?DEFAULT_CACHE, Tab, true, + Debug), + case catch compact(NewSourceHead) of + ok -> + erlang:garbage_collect(), + fopen3(Tab, OpenArgs#open_args{repair = false}); + _Err -> + _ = file:close(Fd), + dets_utils:stop_disk_map(), + io:format(user, "dets: compaction of file ~p failed, " + "now repairing ...~n", [Fname]), + {ok, Fd2, _FH} = read_file_header(Fname, Acc, Ram), + do_repair(Fd2, Tab, Fname, FH, MinSlots, MaxSlots, + Version, OpenArgs) + end; + {repair, Mess} -> + io:format(user, "dets: file ~p~s~n", [Fname, Mess]), + do_repair(Fd, Tab, Fname, FH, MinSlots, MaxSlots, + Version, OpenArgs); + _ when FH#fileheader.version =/= Version, Version =/= default -> + throw({error, {version_mismatch, Fname}}); + {final, H, EI} -> + H1 = H#head{auto_save = Auto}, + open_final(H1, Fname, Acc, Ram, CacheSz, Tab, EI, Debug) + end. + +do_repair(Fd, Tab, Fname, FH, MinSlots, MaxSlots, Version, OpenArgs) -> + case fsck(Fd, Tab, Fname, FH, MinSlots, MaxSlots, Version) of + ok -> + %% No need to update 'version'. + erlang:garbage_collect(), + fopen3(Tab, OpenArgs#open_args{repair = false}); + Error -> + throw(Error) + end. + +%% -> {ok, head()} | throw(Error) +open_final(Head, Fname, Acc, Ram, CacheSz, Tab, ExtraInfo, Debug) -> + Head1 = Head#head{access = Acc, + ram_file = Ram, + filename = Fname, + name = Tab, + cache = dets_utils:new_cache(CacheSz)}, + init_disk_map(Head1#head.version, Tab, Debug), + Mod = Head#head.mod, + Mod:cache_segps(Head1#head.fptr, Fname, Head1#head.next), + Ftab = Mod:init_freelist(Head1, ExtraInfo), + check_growth(Head1), + NewHead = Head1#head{freelists = Ftab}, + {ok, NewHead}. + +%% -> {ok, head()} | throw(Error) +fopen_init_file(Tab, OpenArgs) -> + #open_args{file = Fname, type = Type, keypos = Kp, + min_no_slots = MinSlotsArg, max_no_slots = MaxSlotsArg, + ram_file = Ram, delayed_write = CacheSz, auto_save = Auto, + version = UseVersion, debug = Debug} = OpenArgs, + MinSlots = choose_no_slots(MinSlotsArg, ?DEFAULT_MIN_NO_SLOTS), + MaxSlots = choose_no_slots(MaxSlotsArg, ?DEFAULT_MAX_NO_SLOTS), + FileSpec = if + Ram -> []; + true -> Fname + end, + {ok, Fd} = dets_utils:open(FileSpec, open_args(read_write, Ram)), + Version = if + UseVersion =:= default -> + case os:getenv("DETS_USE_FILE_FORMAT") of + "8" -> 8; + _ -> 9 + end; + true -> + UseVersion + end, + Mod = version2module(Version), + %% No need to truncate an empty file. + init_disk_map(Version, Tab, Debug), + case catch Mod:initiate_file(Fd, Tab, Fname, Type, Kp, MinSlots, MaxSlots, + Ram, CacheSz, Auto, true) of + {error, Reason} when Ram -> + file:close(Fd), + throw({error, Reason}); + {error, Reason} -> + file:close(Fd), + file:delete(Fname), + throw({error, Reason}); + {ok, Head} -> + start_auto_save_timer(Head), + %% init_table does not need to truncate and write header + {ok, Head#head{update_mode = new_dirty}} + end. + +%% Debug. +init_disk_map(9, Name, Debug) -> + case Debug orelse dets_utils:debug_mode() of + true -> + dets_utils:init_disk_map(Name); + false -> + ok + end; +init_disk_map(_Version, _Name, _Debug) -> + ok. + +open_args(Access, RamFile) -> + A1 = case Access of + read -> []; + read_write -> [write] + end, + A2 = case RamFile of + true -> [ram]; + false -> [raw] + end, + A1 ++ A2 ++ [binary, read]. + +version2module(V) when V =< 8 -> dets_v8; +version2module(9) -> dets_v9. + +module2version(dets_v8) -> 8; +module2version(dets_v9) -> 9; +module2version(not_used) -> 9. + +%% -> ok | throw(Error) +%% For version 9 tables only. +compact(SourceHead) -> + #head{name = Tab, filename = Fname, fptr = SFd, type = Type, keypos = Kp, + ram_file = Ram, auto_save = Auto} = SourceHead, + Tmp = tempfile(Fname), + TblParms = dets_v9:table_parameters(SourceHead), + {ok, Fd} = dets_utils:open(Tmp, open_args(read_write, false)), + CacheSz = ?DEFAULT_CACHE, + %% It is normally not possible to have two open tables in the same + %% process since the process dictionary is used for caching + %% segment pointers, but here is works anyway--when reading a file + %% serially the pointers to not need to be used. + Head = case catch dets_v9:prep_table_copy(Fd, Tab, Tmp, Type, Kp, Ram, + CacheSz, Auto, TblParms) of + {ok, H} -> + H; + Error -> + file:close(Fd), + file:delete(Tmp), + throw(Error) + end, + + case dets_v9:compact_init(SourceHead, Head, TblParms) of + {ok, NewHead} -> + R = case fclose(NewHead) of + ok -> + ok = file:close(SFd), + %% Save (rename) Fname first? + dets_utils:rename(Tmp, Fname); + E -> + E + end, + if + R =:= ok -> ok; + true -> + file:delete(Tmp), + throw(R) + end; + Err -> + file:close(Fd), + file:delete(Tmp), + throw(Err) + end. + +%% -> ok | Error +%% Closes Fd. +fsck(Fd, Tab, Fname, FH, MinSlotsArg, MaxSlotsArg, Version) -> + %% MinSlots and MaxSlots are the option values. + #fileheader{min_no_slots = MinSlotsFile, + max_no_slots = MaxSlotsFile} = FH, + EstNoSlots0 = file_no_things(FH), + MinSlots = choose_no_slots(MinSlotsArg, MinSlotsFile), + MaxSlots = choose_no_slots(MaxSlotsArg, MaxSlotsFile), + EstNoSlots = erlang:min(MaxSlots, erlang:max(MinSlots, EstNoSlots0)), + SlotNumbers = {MinSlots, EstNoSlots, MaxSlots}, + %% When repairing: We first try and sort on slots using MinSlots. + %% If the number of objects (keys) turns out to be significantly + %% different from NoSlots, we try again with the correct number of + %% objects (keys). + case fsck_try(Fd, Tab, FH, Fname, SlotNumbers, Version) of + {try_again, BetterNoSlots} -> + BetterSlotNumbers = {MinSlots, BetterNoSlots, MaxSlots}, + case fsck_try(Fd, Tab, FH, Fname, BetterSlotNumbers, Version) of + {try_again, _} -> + file:close(Fd), + {error, {cannot_repair, Fname}}; + Else -> + Else + end; + Else -> + Else + end. + +choose_no_slots(default, NoSlots) -> NoSlots; +choose_no_slots(NoSlots, _) -> NoSlots. + +%% -> ok | {try_again, integer()} | Error +%% Closes Fd unless {try_again, _} is returned. +%% Initiating a table using a fun and repairing (or converting) a +%% file are completely different things, but nevertheless the same +%% method is used in both cases... +fsck_try(Fd, Tab, FH, Fname, SlotNumbers, Version) -> + Tmp = tempfile(Fname), + #fileheader{type = Type, keypos = KeyPos} = FH, + {_MinSlots, EstNoSlots, MaxSlots} = SlotNumbers, + OpenArgs = #open_args{file = Tmp, type = Type, keypos = KeyPos, + repair = false, min_no_slots = EstNoSlots, + max_no_slots = MaxSlots, + ram_file = false, delayed_write = ?DEFAULT_CACHE, + auto_save = infinity, access = read_write, + version = Version, debug = false}, + case catch fopen3(Tab, OpenArgs) of + {ok, Head} -> + case fsck_try_est(Head, Fd, Fname, SlotNumbers, FH) of + {ok, NewHead} -> + R = case fclose(NewHead) of + ok -> + %% Save (rename) Fname first? + dets_utils:rename(Tmp, Fname); + Error -> + Error + end, + if + R =:= ok -> ok; + true -> + file:delete(Tmp), + R + end; + TryAgainOrError -> + file:delete(Tmp), + TryAgainOrError + end; + Error -> + file:close(Fd), + Error + end. + +tempfile(Fname) -> + Tmp = lists:concat([Fname, ".TMP"]), + case file:delete(Tmp) of + {error, eacces} -> % 'dets_process_died' happened anyway... (W-nd-ws) + timer:sleep(5000), + file:delete(Tmp); + _ -> + ok + end, + Tmp. + +%% -> {ok, NewHead} | {try_again, integer()} | Error +fsck_try_est(Head, Fd, Fname, SlotNumbers, FH) -> + %% Mod is the module to use for reading input when repairing. + Mod = FH#fileheader.mod, + Cntrs = ets:new(dets_repair, []), + Input = Mod:fsck_input(Head, Fd, Cntrs, FH), + {Reply, SizeData} = do_sort(Head, SlotNumbers, Input, Cntrs, Fname, Mod), + Bulk = false, + case Reply of + {ok, NoDups, H1} -> + file:close(Fd), + fsck_copy(SizeData, H1, Bulk, NoDups); + {try_again, _} = Return -> + close_files(Bulk, SizeData, Head), + Return; + Else -> + file:close(Fd), + close_files(Bulk, SizeData, Head), + Else + end. + +do_sort(Head, SlotNumbers, Input, Cntrs, Fname, Mod) -> + OldV = module2version(Mod), + %% output_objs/4 replaces {LogSize,NoObjects} in Cntrs by + %% {LogSize,Position,Data,NoObjects | NoCollections}. + %% Data = {FileName,FileDescriptor} | [object()] + %% For small tables Data may be a list of objects which is more + %% efficient since no temporary files are created. + Output = (Head#head.mod):output_objs(OldV, Head, SlotNumbers, Cntrs), + TmpDir = filename:dirname(Fname), + Reply = (catch file_sorter:sort(Input, Output, + [{format, binary},{tmpdir, TmpDir}])), + L = ets:tab2list(Cntrs), + ets:delete(Cntrs), + {Reply, lists:reverse(lists:keysort(1, L))}. + +fsck_copy([{_LogSz, Pos, Bins, _NoObjects} | SizeData], Head, _Bulk, NoDups) + when is_list(Bins) -> + true = NoDups =:= 0, + PWs = [{Pos,Bins} | lists:map(fun({_, P, B, _}) -> {P, B} end, SizeData)], + #head{fptr = Fd, filename = FileName} = Head, + dets_utils:pwrite(Fd, FileName, PWs), + {ok, Head#head{update_mode = dirty}}; +fsck_copy(SizeData, Head, Bulk, NoDups) -> + catch fsck_copy1(SizeData, Head, Bulk, NoDups). + +fsck_copy1([SzData | L], Head, Bulk, NoDups) -> + Out = Head#head.fptr, + {LogSz, Pos, {FileName, Fd}, NoObjects} = SzData, + Size = if NoObjects =:= 0 -> 0; true -> ?POW(LogSz-1) end, + ExpectedSize = Size * NoObjects, + close_tmp(Fd), + case file:position(Out, Pos) of + {ok, Pos} -> ok; + PError -> dets_utils:file_error(FileName, PError) + end, + {ok, Pos} = file:position(Out, Pos), + CR = file:copy({FileName, [raw,binary]}, Out), + file:delete(FileName), + case CR of + {ok, Copied} when Copied =:= ExpectedSize; + NoObjects =:= 0 -> % the segments + fsck_copy1(L, Head, Bulk, NoDups); + {ok, Copied} when Bulk, Head#head.version =:= 8 -> + NoZeros = ExpectedSize - Copied, + Dups = NoZeros div Size, + Addr = Pos+Copied, + NewHead = free_n_objects(Head, Addr, Size-1, NoDups), + NewNoDups = NoDups - Dups, + fsck_copy1(L, NewHead, Bulk, NewNoDups); + {ok, _Copied} -> % should never happen + close_files(Bulk, L, Head), + Reason = if Bulk -> initialization_failed; + true -> repair_failed end, + {error, {Reason, Head#head.filename}}; + FError -> + close_files(Bulk, L, Head), + dets_utils:file_error(FileName, FError) + end; +fsck_copy1([], Head, _Bulk, NoDups) when NoDups =/= 0 -> + {error, {initialization_failed, Head#head.filename}}; +fsck_copy1([], Head, _Bulk, _NoDups) -> + {ok, Head#head{update_mode = dirty}}. + +free_n_objects(Head, _Addr, _Size, 0) -> + Head; +free_n_objects(Head, Addr, Size, N) -> + {NewHead, _} = dets_utils:free(Head, Addr, Size), + NewAddr = Addr + Size + 1, + free_n_objects(NewHead, NewAddr, Size, N-1). + +close_files(false, SizeData, Head) -> + file:close(Head#head.fptr), + close_files(true, SizeData, Head); +close_files(true, SizeData, _Head) -> + Fun = fun({_Size, _Pos, {FileName, Fd}, _No}) -> + close_tmp(Fd), + file:delete(FileName); + (_) -> + ok + end, + lists:foreach(Fun, SizeData). + +close_tmp(Fd) -> + file:close(Fd). + +fslot(H, Slot) -> + case catch begin + {NH, []} = write_cache(H), + Objs = (NH#head.mod):slot_objs(NH, Slot), + {NH, Objs} + end of + {NewHead, _Objects} = Reply when is_record(NewHead, head) -> + Reply + end. + +do_update_counter(Head, _Key, _Incr) when Head#head.type =/= set -> + {Head, badarg}; +do_update_counter(Head, Key, Incr) -> + case flookup_keys(Head, [Key]) of + {H1, [O]} -> + Kp = H1#head.keypos, + case catch try_update_tuple(O, Kp, Incr) of + {'EXIT', _} -> + {H1, badarg}; + {New, Term} -> + case finsert(H1, [Term]) of + {H2, ok} -> + {H2, New}; + Reply -> + Reply + end + end; + {H1, []} -> + {H1, badarg}; + HeadError -> + HeadError + end. + +try_update_tuple(O, _Kp, {Pos, Incr}) -> + try_update_tuple2(O, Pos, Incr); +try_update_tuple(O, Kp, Incr) -> + try_update_tuple2(O, Kp+1, Incr). + +try_update_tuple2(O, Pos, Incr) -> + New = element(Pos, O) + Incr, + {New, setelement(Pos, O, New)}. + +set_verbose(true) -> + put(verbose, yes); +set_verbose(_) -> + erase(verbose). + +where_is_object(Head, Object) -> + Keypos = Head#head.keypos, + case check_objects([Object], Keypos) of + true -> + case catch write_cache(Head) of + {NewHead, []} -> + {NewHead, (Head#head.mod):find_object(NewHead, Object)}; + {NewHead, _} = HeadError when is_record(NewHead, head) -> + HeadError + end; + false -> + {Head, badarg} + end. + +check_objects([T | Ts], Kp) when tuple_size(T) >= Kp -> + check_objects(Ts, Kp); +check_objects(L, _Kp) -> + L =:= []. + +no_things(Head) when Head#head.no_keys =:= undefined -> + Head#head.no_objects; +no_things(Head) -> + Head#head.no_keys. + +file_no_things(FH) when FH#fileheader.no_keys =:= undefined -> + FH#fileheader.no_objects; +file_no_things(FH) -> + FH#fileheader.no_keys. + +%%% The write cache is list of {Key, [Item]} where Item is one of +%%% {Seq, delete_key}, {Seq, {lookup,Pid}}, {Seq, {delete_object,object()}}, +%%% or {Seq, {insert,object()}}. Seq is a number that increases +%%% monotonically for each item put in the cache. The purpose is to +%%% make sure that items are sorted correctly. Sequences of delete and +%%% insert operations are inserted in the cache without doing any file +%%% operations. When the cache is considered full, a lookup operation +%%% is requested, or after some delay, the contents of the cache are +%%% written to the file, and the cache emptied. +%%% +%%% Data is not allowed to linger more than 'delay' milliseconds in +%%% the write cache. A delayed_write message is received when some +%%% datum has become too old. If 'wrtime' is equal to 'undefined', +%%% then the cache is empty and no such delayed_write message has been +%%% scheduled. Otherwise there is a delayed_write message scheduled, +%%% and the value of 'wrtime' is the time when the cache was last +%%% written, or when it was first updated after the cache was last +%%% written. + +update_cache(Head, KeysOrObjects, What) -> + {Head1, LU, PwriteList} = update_cache(Head, [{What,KeysOrObjects}]), + {NewHead, ok} = dets_utils:pwrite(Head1, PwriteList), + {NewHead, LU}. + +%% -> {NewHead, [object()], pwrite_list()} | throw({Head, Error}) +update_cache(Head, ToAdd) -> + Cache = Head#head.cache, + #cache{cache = C, csize = Size0, inserts = Ins} = Cache, + NewSize = Size0 + erlang:external_size(ToAdd), + %% The size is used as a sequence number here; it increases monotonically. + {NewC, NewIns, Lookup, Found} = + cache_binary(Head, ToAdd, C, Size0, Ins, false, []), + NewCache = Cache#cache{cache = NewC, csize = NewSize, inserts = NewIns}, + Head1 = Head#head{cache = NewCache}, + if + Lookup; NewSize >= Cache#cache.tsize -> + %% The cache is considered full, or some lookup. + {NewHead, LU, PwriteList} = (Head#head.mod):write_cache(Head1), + {NewHead, Found ++ LU, PwriteList}; + NewC =:= [] -> + {Head1, Found, []}; + Cache#cache.wrtime =:= undefined -> + %% Empty cache. Schedule a delayed write. + Now = now(), Me = self(), + Call = ?DETS_CALL(Me, {delayed_write, Now}), + erlang:send_after(Cache#cache.delay, Me, Call), + {Head1#head{cache = NewCache#cache{wrtime = Now}}, Found, []}; + Size0 =:= 0 -> + %% Empty cache that has been written after the + %% currently scheduled delayed write. + {Head1#head{cache = NewCache#cache{wrtime = now()}}, Found, []}; + true -> + %% Cache is not empty, delayed write has been scheduled. + {Head1, Found, []} + end. + +cache_binary(Head, [{Q,Os} | L], C, Seq, Ins, Lu,F) when Q =:= delete_object -> + cache_obj_op(Head, L, C, Seq, Ins, Lu, F, Os, Head#head.keypos, Q); +cache_binary(Head, [{Q,Os} | L], C, Seq, Ins, Lu, F) when Q =:= insert -> + NewIns = Ins + length(Os), + cache_obj_op(Head, L, C, Seq, NewIns, Lu, F, Os, Head#head.keypos, Q); +cache_binary(Head, [{Q,Ks} | L], C, Seq, Ins, Lu, F) when Q =:= delete_key -> + cache_key_op(Head, L, C, Seq, Ins, Lu, F, Ks, Q); +cache_binary(Head, [{Q,Ks} | L], C, Seq, Ins, _Lu, F) when C =:= [] -> % lookup + cache_key_op(Head, L, C, Seq, Ins, true, F, Ks, Q); +cache_binary(Head, [{Q,Ks} | L], C, Seq, Ins, Lu, F) -> % lookup + case dets_utils:cache_lookup(Head#head.type, Ks, C, []) of + false -> + cache_key_op(Head, L, C, Seq, Ins, true, F, Ks, Q); + Found -> + {lookup,Pid} = Q, + cache_binary(Head, L, C, Seq, Ins, Lu, [{Pid,Found} | F]) + end; +cache_binary(_Head, [], C, _Seq, Ins, Lu, F) -> + {C, Ins, Lu, F}. + +cache_key_op(Head, L, C, Seq, Ins, Lu, F, [K | Ks], Q) -> + E = {K, {Seq, Q}}, + cache_key_op(Head, L, [E | C], Seq+1, Ins, Lu, F, Ks, Q); +cache_key_op(Head, L, C, Seq, Ins, Lu, F, [], _Q) -> + cache_binary(Head, L, C, Seq, Ins, Lu, F). + +cache_obj_op(Head, L, C, Seq, Ins, Lu, F, [O | Os], Kp, Q) -> + E = {element(Kp, O), {Seq, {Q, O}}}, + cache_obj_op(Head, L, [E | C], Seq+1, Ins, Lu, F, Os, Kp, Q); +cache_obj_op(Head, L, C, Seq, Ins, Lu, F, [], _Kp, _Q) -> + cache_binary(Head, L, C, Seq, Ins, Lu, F). + +%% Called after some delay. +%% -> NewHead +delayed_write(Head, WrTime) -> + Cache = Head#head.cache, + LastWrTime = Cache#cache.wrtime, + if + LastWrTime =:= WrTime -> + %% The cache was not emptied during the last delay. + case catch write_cache(Head) of + {Head2, []} -> + NewCache = (Head2#head.cache)#cache{wrtime = undefined}, + Head2#head{cache = NewCache}; + {NewHead, _Error} -> % Head.update_mode has been updated + NewHead + end; + true -> + %% The cache was emptied during the delay. + %% Has anything been written since then? + if + Cache#cache.csize =:= 0 -> + %% No, further delayed write not needed. + NewCache = Cache#cache{wrtime = undefined}, + Head#head{cache = NewCache}; + true -> + %% Yes, schedule a new delayed write. + {MS1,S1,M1} = WrTime, + {MS2,S2,M2} = LastWrTime, + WrT = M1+1000000*(S1+1000000*MS1), + LastWrT = M2+1000000*(S2+1000000*MS2), + When = round((LastWrT - WrT)/1000), Me = self(), + Call = ?DETS_CALL(Me, {delayed_write, LastWrTime}), + erlang:send_after(When, Me, Call), + Head + end + end. + +%% -> {NewHead, [LookedUpObject]} | throw({NewHead, Error}) +write_cache(Head) -> + {Head1, LU, PwriteList} = (Head#head.mod):write_cache(Head), + {NewHead, ok} = dets_utils:pwrite(Head1, PwriteList), + {NewHead, LU}. + +status(Head) -> + case Head#head.update_mode of + saved -> ok; + dirty -> ok; + new_dirty -> ok; + Error -> Error + end. + +%%% Scan the file from start to end by reading chunks. + +%% -> dets_cont() +init_scan(Head, NoObjs) -> + check_safe_fixtable(Head), + FreeLists = dets_utils:get_freelists(Head), + Base = Head#head.base, + {From, To} = dets_utils:find_next_allocated(FreeLists, Base, Base), + #dets_cont{no_objs = NoObjs, bin = <<>>, alloc = {From, To, <<>>}}. + +check_safe_fixtable(Head) -> + case (Head#head.fixed =:= false) andalso + ((get(verbose) =:= yes) orelse dets_utils:debug_mode()) of + true -> + error_logger:format + ("** dets: traversal of ~p needs safe_fixtable~n", + [Head#head.name]); + false -> + ok + end. + +%% -> {[RTerm], dets_cont()} | {scan_error, Reason} +%% RTerm = {Pos, Next, Size, Status, Term} +scan(_Head, #dets_cont{alloc = <<>>}=C) -> + {[], C}; +scan(Head, C) -> % when is_record(C, dets_cont) + #dets_cont{no_objs = No, alloc = L0, bin = Bin} = C, + {From, To, L} = L0, + R = case No of + default -> + 0; + _ when is_integer(No) -> + -No-1 + end, + scan(Bin, Head, From, To, L, [], R, {C, Head#head.type}). + +scan(Bin, H, From, To, L, Ts, R, {C0, Type} = C) -> + case (H#head.mod):scan_objs(H, Bin, From, To, L, Ts, R, Type) of + {more, NFrom, NTo, NL, NTs, NR, Sz} -> + scan_read(H, NFrom, NTo, Sz, NL, NTs, NR, C); + {stop, <<>>=B, NFrom, NTo, <<>>=NL, NTs} -> + Ftab = dets_utils:get_freelists(H), + case dets_utils:find_next_allocated(Ftab, NFrom, H#head.base) of + none -> + {NTs, C0#dets_cont{bin = eof, alloc = B}}; + _ -> + {NTs, C0#dets_cont{bin = B, alloc = {NFrom, NTo, NL}}} + end; + {stop, B, NFrom, NTo, NL, NTs} -> + {NTs, C0#dets_cont{bin = B, alloc = {NFrom, NTo, NL}}}; + bad_object -> + {scan_error, dets_utils:bad_object(scan, {From, To, Bin})} + end. + +scan_read(_H, From, To, _Min, L0, Ts, + R, {C, _Type}) when R >= ?CHUNK_SIZE -> + %% We may have read (much) more than CHUNK_SIZE, if there are holes. + L = {From, To, L0}, + {Ts, C#dets_cont{bin = <<>>, alloc = L}}; +scan_read(H, From, _To, Min, _L, Ts, R, C) -> + Max = if + Min < ?CHUNK_SIZE -> ?CHUNK_SIZE; + true -> Min + end, + FreeLists = dets_utils:get_freelists(H), + case dets_utils:find_allocated(FreeLists, From, Max, H#head.base) of + <<>>=Bin0 -> + {Cont, _} = C, + {Ts, Cont#dets_cont{bin = eof, alloc = Bin0}}; + <> -> + case dets_utils:pread_n(H#head.fptr, From1, Max) of + eof -> + {scan_error, premature_eof}; + NewBin -> + scan(NewBin, H, From1, To1, L1, Ts, R, C) + end + end. + +err(Error) -> + case get(verbose) of + yes -> + error_logger:format("** dets: failed with ~w~n", [Error]), + Error; + undefined -> + Error + end. + +%%%%%%%%%%%%%%%%% DEBUG functions %%%%%%%%%%%%%%%% + +file_info(FileName) -> + case catch read_file_header(FileName, read, false) of + {ok, Fd, FH} -> + file:close(Fd), + (FH#fileheader.mod):file_info(FH); + Other -> + Other + end. + +get_head_field(Fd, Field) -> + dets_utils:read_4(Fd, Field). + +%% Dump the contents of a DAT file to the tty +%% internal debug function which ignores the closed properly thingie +%% and just tries anyway + +view(FileName) -> + case catch read_file_header(FileName, read, false) of + {ok, Fd, FH} -> + Mod = FH#fileheader.mod, + case Mod:check_file_header(FH, Fd) of + {ok, H0, ExtraInfo} -> + Ftab = Mod:init_freelist(H0, ExtraInfo), + {_Bump, Base} = constants(FH, FileName), + H = H0#head{freelists=Ftab, base = Base}, + v_free_list(H), + Mod:v_segments(H), + file:close(Fd); + X -> + file:close(Fd), + X + end; + X -> + X + end. + +v_free_list(Head) -> + io:format("FREE LIST ...... \n",[]), + io:format("~p~n", [dets_utils:all_free(Head)]), + io:format("END OF FREE LIST \n",[]). diff --git a/lib/stdlib/src/dets.hrl b/lib/stdlib/src/dets.hrl new file mode 100644 index 0000000000..6e59770753 --- /dev/null +++ b/lib/stdlib/src/dets.hrl @@ -0,0 +1,126 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-define(DEFAULT_MIN_NO_SLOTS, 256). +-define(DEFAULT_MAX_NO_SLOTS, 2*1024*1024). +-define(DEFAULT_AUTOSAVE, 3). % minutes +-define(DEFAULT_CACHE, {3000, 14000}). % {delay,size} in {milliseconds,bytes} + +%% Type. +-define(SET, 1). +-define(BAG, 2). +-define(DUPLICATE_BAG, 3). + +-define(MAGIC, 16#0abcdef). % dets cookie, won't ever change. +%% Status values. +-define(FREE, 16#3abcdef). +-define(ACTIVE, 16#12345678). + +-define(FILE_FORMAT_VERSION_POS, 16). + +-define(CHUNK_SIZE, 8192). + +-define(SERVER_NAME, dets). + +-define(POW(X), (1 bsl (X))). + +%% REM2(A,B) = A rem B, if B is a power of 2. +-define(REM2(A, B), ((A) band ((B)-1))). + +-define(DETS_CALL(Pid, Req), {'$dets_call', Pid, Req}). + +%% Record holding the file header and more. +-record(head, { + m, % size + m2, % m * 2 + next, % next position for growth (segm mgmt only) + fptr, % the file descriptor + no_objects, % number of objects in table, + no_keys, % number of keys (version 9 only) + maxobjsize, % 2-log of the size of the biggest object + % collection (version 9 only) + n, % split indicator + type, % set | bag | duplicate_bag + keypos, % default is 1 as for ets + freelists, % tuple of free lists of buddies + % if fixed =/= false, then a pair of freelists + freelists_p, % cached FreelistsPointer + no_collections, % [{LogSize,NoCollections}] | undefined; number of + % object collections per size (version 9(b)) + auto_save, % Integer | infinity + update_mode, % saved | dirty | new_dirty | {error, Reason} + fixed = false, % false | {now_time(), [{pid(),Counter}]} + % time of first fix, and number of fixes per process + hash_bif, % hash bif used for this file (phash2, phash, hash) + has_md5, % whether the header has an MD5 sum (version 9(c)) + min_no_slots, % minimum number of slots (default or integer) + max_no_slots, % maximum number of slots (default or integer) + cache, % cache(). Write cache. + + filename, % name of the file being used + access = read_write, % read | read_write + ram_file = false, % true | false + name, % the name of the table + + parent, % The supervisor of Dets processes. + server, % The creator of Dets processes. + + %% Depending on the file format: + version, + mod, + bump, + base + + }). + +%% Info extracted from the file header. +-record(fileheader, { + freelist, + cookie, + closed_properly, + type, + version, + m, + next, + keypos, + no_objects, + no_keys, + min_no_slots, + max_no_slots, + no_colls, + hash_method, + read_md5, + has_md5, + md5, + trailer, + eof, + n, + mod + }). + +%% Write Cache. +-record(cache, { + cache, % [{Key,{Seq,Item}}], write cache, last item first + csize, % current size of the cached items + inserts, % upper limit on number of inserted keys + wrtime, % last write or update time + tsize, % threshold size of cache, in bytes + delay % max time items are kept in RAM only, in milliseconds + }). + diff --git a/lib/stdlib/src/dets_server.erl b/lib/stdlib/src/dets_server.erl new file mode 100644 index 0000000000..931112088e --- /dev/null +++ b/lib/stdlib/src/dets_server.erl @@ -0,0 +1,402 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(dets_server). + +%% Disk based linear hashing lookup dictionary. Server part. + +-behaviour(gen_server). + +%% External exports. +-export([all/0, close/1, get_pid/1, open_file/1, open_file/2, pid2name/1, + users/1, verbose/1]). + +%% Internal. +-export([start_link/0, start/0, stop/0]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +%% record for not yet handled reqeusts to open or close files +-record(pending, {tab, ref, pid, from, reqtype, clients}). % [{From,Args}] + +%% state for the dets server +-record(state, {store, parent, pending}). % [pending()] + +-include("dets.hrl"). + +-define(REGISTRY, dets_registry). % {Table, NoUsers, TablePid} +-define(OWNERS, dets_owners). % {TablePid, Table} +-define(STORE, dets). % {User, Table} and {{links,User}, NoLinks} + +%%-define(DEBUGF(X,Y), io:format(X, Y)). +-define(DEBUGF(X,Y), void). + +-compile({inline, [{pid2name_1,1}]}). + +%%%---------------------------------------------------------------------- +%%% API +%%%---------------------------------------------------------------------- + +%% Internal. +start_link() -> + gen_server:start_link({local, ?SERVER_NAME}, dets_server, [self()], []). + +start() -> + ensure_started(). + +stop() -> + case whereis(?SERVER_NAME) of + undefined -> + stopped; + _Pid -> + gen_server:call(?SERVER_NAME, stop, infinity) + end. + +all() -> + call(all). + +close(Tab) -> + call({close, Tab}). + +get_pid(Tab) -> + ets:lookup_element(?REGISTRY, Tab, 3). + +open_file(File) -> + call({open, File}). + +open_file(Tab, OpenArgs) -> + call({open, Tab, OpenArgs}). + +pid2name(Pid) -> + ensure_started(), + pid2name_1(Pid). + +users(Tab) -> + call({users, Tab}). + +verbose(What) -> + call({set_verbose, What}). + +call(Message) -> + ensure_started(), + gen_server:call(?SERVER_NAME, Message, infinity). + +%%%---------------------------------------------------------------------- +%%% Callback functions from gen_server +%%%---------------------------------------------------------------------- + +%%---------------------------------------------------------------------- +%% Func: init/1 +%% Returns: {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%%---------------------------------------------------------------------- +init(Parent) -> + Store = init(), + {ok, #state{store=Store, parent=Parent, pending = []}}. + +%%---------------------------------------------------------------------- +%% Func: handle_call/3 +%% Returns: {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | (terminate/2 is called) +%% {stop, Reason, State} (terminate/2 is called) +%%---------------------------------------------------------------------- +handle_call(all, _From, State) -> + F = fun(X, A) -> [element(1, X) | A] end, + {reply, ets:foldl(F, [], ?REGISTRY), State}; +handle_call({close, Tab}, From, State) -> + request([{{close, Tab}, From}], State); +handle_call({open, File}, From, State) -> + request([{{open, File}, From}], State); +handle_call({open, Tab, OpenArgs}, From, State) -> + request([{{open, Tab, OpenArgs}, From}], State); +handle_call(stop, _From, State) -> + {stop, normal, stopped, State}; +handle_call({set_verbose, What}, _From, State) -> + set_verbose(What), + {reply, ok, State}; +handle_call({users, Tab}, _From, State) -> + Users = ets:select(State#state.store, [{{'$1', Tab}, [], ['$1']}]), + {reply, Users, State}. + +%%---------------------------------------------------------------------- +%% Func: handle_cast/2 +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%---------------------------------------------------------------------- +handle_cast(_Msg, State) -> + {noreply, State}. + +%%---------------------------------------------------------------------- +%% Func: handle_info/2 +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%---------------------------------------------------------------------- +handle_info({pending_reply, {Ref, Result0}}, State) -> + {value, #pending{tab = Tab, pid = Pid, from = {FromPid,_Tag}=From, + reqtype = ReqT, clients = Clients}} = + lists:keysearch(Ref, #pending.ref, State#state.pending), + Store = State#state.store, + Result = + case {Result0, ReqT} of + {ok, add_user} -> + do_link(Store, FromPid), + true = ets:insert(Store, {FromPid, Tab}), + ets:update_counter(?REGISTRY, Tab, 1), + {ok, Tab}; + {ok, internal_open} -> + link(Pid), + do_link(Store, FromPid), + true = ets:insert(Store, {FromPid, Tab}), + true = ets:insert(?REGISTRY, {Tab, 1, Pid}), + true = ets:insert(?OWNERS, {Pid, Tab}), + {ok, Tab}; + {Reply, _} -> % ok or Error + Reply + end, + gen_server:reply(From, Result), + NP = lists:keydelete(Pid, #pending.pid, State#state.pending), + State1 = State#state{pending = NP}, + request(Clients, State1); +handle_info({'EXIT', Pid, _Reason}, State) -> + Store = State#state.store, + case pid2name_1(Pid) of + {ok, Tab} -> + %% A table was killed. + true = ets:delete(?REGISTRY, Tab), + true = ets:delete(?OWNERS, Pid), + Users = ets:select(State#state.store, [{{'$1', Tab}, [], ['$1']}]), + true = ets:match_delete(Store, {'_', Tab}), + lists:foreach(fun(User) -> do_unlink(Store, User) end, Users), + {noreply, State}; + undefined -> + %% Close all tables used by Pid. + F = fun({FromPid, Tab}, S) -> + {_, S1} = handle_close(S, {close, Tab}, + {FromPid, notag}, Tab), + S1 + end, + State1 = lists:foldl(F, State, ets:lookup(Store, Pid)), + {noreply, State1} + end; +handle_info(_Message, State) -> + {noreply, State}. + +%%---------------------------------------------------------------------- +%% Func: terminate/2 +%% Purpose: Shutdown the server +%% Returns: any (ignored by gen_server) +%%---------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + +%%---------------------------------------------------------------------- +%% Func: code_change/3 +%% Purpose: Convert process state when code is changed +%% Returns: {ok, NewState} +%%---------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%---------------------------------------------------------------------- +%%% Internal functions +%%%---------------------------------------------------------------------- + +ensure_started() -> + case whereis(?SERVER_NAME) of + undefined -> + DetsSup = {dets_sup, {dets_sup, start_link, []}, permanent, + 1000, supervisor, [dets_sup]}, + _ = supervisor:start_child(kernel_safe_sup, DetsSup), + DetsServer = {?SERVER_NAME, {?MODULE, start_link, []}, + permanent, 2000, worker, [?MODULE]}, + _ = supervisor:start_child(kernel_safe_sup, DetsServer), + ok; + _ -> ok + end. + +init() -> + set_verbose(verbose_flag()), + process_flag(trap_exit, true), + ets:new(?REGISTRY, [set, named_table]), + ets:new(?OWNERS, [set, named_table]), + ets:new(?STORE, [duplicate_bag]). + +verbose_flag() -> + case init:get_argument(dets) of + {ok, Args} -> + lists:member(["verbose"], Args); + _ -> + false + end. + +set_verbose(true) -> + put(verbose, yes); +set_verbose(_) -> + erase(verbose). + +%% Inlined. +pid2name_1(Pid) -> + case ets:lookup(?OWNERS, Pid) of + [] -> undefined; + [{_Pid,Tab}] -> {ok, Tab} + end. + +request([{Req, From} | L], State) -> + Res = case Req of + {close, Tab} -> + handle_close(State, Req, From, Tab); + {open, File} -> + do_internal_open(State, From, [File, get(verbose)]); + {open, Tab, OpenArgs} -> + do_open(State, Req, From, OpenArgs, Tab) + end, + State2 = case Res of + {pending, State1} -> + State1; + {Reply, State1} -> + gen_server:reply(From, Reply), + State1 + end, + request(L, State2); +request([], State) -> + {noreply, State}. + +%% -> {pending, NewState} | {Reply, NewState} +do_open(State, Req, From, Args, Tab) -> + case check_pending(Tab, From, State, Req) of + {pending, NewState} -> {pending, NewState}; + false -> + case ets:lookup(?REGISTRY, Tab) of + [] -> + A = [Tab, Args, get(verbose)], + do_internal_open(State, From, A); + [{Tab, _Counter, Pid}] -> + pending_call(Tab, Pid, make_ref(), From, Args, + add_user, State) + end + end. + +%% -> {pending, NewState} | {Reply, NewState} +do_internal_open(State, From, Args) -> + case supervisor:start_child(dets_sup, [self()]) of + {ok, Pid} -> + Ref = make_ref(), + Tab = case Args of + [T, _, _] -> T; + [_, _] -> Ref + end, + pending_call(Tab, Pid, Ref, From, Args, internal_open, State); + Error -> + {Error, State} + end. + +%% -> {pending, NewState} | {Reply, NewState} +handle_close(State, Req, {FromPid,_Tag}=From, Tab) -> + case check_pending(Tab, From, State, Req) of + {pending, NewState} -> {pending, NewState}; + false -> + Store = State#state.store, + case ets:match_object(Store, {FromPid, Tab}) of + [] -> + ?DEBUGF("DETS: Table ~w close attempt by non-owner~w~n", + [Tab, FromPid]), + {{error, not_owner}, State}; + [_ | Keep] -> + case ets:lookup(?REGISTRY, Tab) of + [{Tab, 1, Pid}] -> + do_unlink(Store, FromPid), + true = ets:delete(?REGISTRY, Tab), + true = ets:delete(?OWNERS, Pid), + true = ets:match_delete(Store, {FromPid, Tab}), + unlink(Pid), + pending_call(Tab, Pid, make_ref(), From, [], + internal_close, State); + [{Tab, _Counter, Pid}] -> + do_unlink(Store, FromPid), + true = ets:match_delete(Store, {FromPid, Tab}), + [true = ets:insert(Store, K) || K <- Keep], + ets:update_counter(?REGISTRY, Tab, -1), + pending_call(Tab, Pid, make_ref(), From, [], + remove_user, State) + end + end + end. + +%% Links with counters +do_link(Store, Pid) -> + Key = {links, Pid}, + case ets:lookup(Store, Key) of + [] -> + true = ets:insert(Store, {Key, 1}), + link(Pid); + [{_, C}] -> + true = ets:delete(Store, Key), + true = ets:insert(Store, {Key, C+1}) + end. + +do_unlink(Store, Pid) -> + Key = {links, Pid}, + case ets:lookup(Store, Key) of + [{_, C}] when C > 1 -> + true = ets:delete(Store, Key), + true = ets:insert(Store, {Key, C-1}); + _ -> + true = ets:delete(Store, Key), + unlink(Pid) + + end. + +pending_call(Tab, Pid, Ref, {FromPid, _Tag}=From, Args, ReqT, State) -> + Server = self(), + F = fun() -> + Res = case ReqT of + add_user -> + dets:add_user(Pid, Tab, Args); + internal_open -> + dets:internal_open(Pid, Ref, Args); + internal_close -> + dets:internal_close(Pid); + remove_user -> + dets:remove_user(Pid, FromPid) + end, + Server ! {pending_reply, {Ref, Res}} + end, + _ = spawn(F), + PD = #pending{tab = Tab, ref = Ref, pid = Pid, reqtype = ReqT, + from = From, clients = []}, + P = [PD | State#state.pending], + {pending, State#state{pending = P}}. + +check_pending(Tab, From, State, Req) -> + case lists:keysearch(Tab, #pending.tab, State#state.pending) of + {value, #pending{tab = Tab, clients = Clients}=P} -> + NP = lists:keyreplace(Tab, #pending.tab, State#state.pending, + P#pending{clients = Clients++[{Req,From}]}), + {pending, State#state{pending = NP}}; + false -> + false + end. diff --git a/lib/stdlib/src/dets_sup.erl b/lib/stdlib/src/dets_sup.erl new file mode 100644 index 0000000000..5c6caa787d --- /dev/null +++ b/lib/stdlib/src/dets_sup.erl @@ -0,0 +1,31 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(dets_sup). + +-behaviour(supervisor). + +-export([start_link/0, init/1]). + +start_link() -> + supervisor:start_link({local, dets_sup}, dets_sup, []). + +init([]) -> + SupFlags = {simple_one_for_one, 4, 3600}, + Child = {dets, {dets, istart_link, []}, temporary, 30000, worker, [dets]}, + {ok, {SupFlags, [Child]}}. diff --git a/lib/stdlib/src/dets_utils.erl b/lib/stdlib/src/dets_utils.erl new file mode 100644 index 0000000000..5db2ad3049 --- /dev/null +++ b/lib/stdlib/src/dets_utils.erl @@ -0,0 +1,1801 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(dets_utils). + +%% Utility functions common to several dets file formats. +%% To be used from dets, dets_v8 and dets_v9 only. + +-export([cmp/2, msort/1, mkeysort/2, mkeysearch/3, family/1]). + +-export([rename/2, pread/2, pread/4, ipread/3, pwrite/2, write/2, + truncate/2, position/2, sync/1, open/2, truncate/3, fwrite/3, + write_file/2, position/3, position_close/3, pwrite/4, + pwrite/3, pread_close/4, read_n/2, pread_n/3, read_4/2]). + +-export([code_to_type/1, type_to_code/1]). + +-export([corrupt_reason/2, corrupt/2, corrupt_file/2, + vformat/2, file_error/2]). + +-export([debug_mode/0, bad_object/2]). + +-export([cache_lookup/4, cache_size/1, new_cache/1, + reset_cache/1, is_empty_cache/1]). + +-export([empty_free_lists/0, init_alloc/1, alloc_many/4, alloc/2, + free/3, get_freelists/1, all_free/1, all_allocated/1, + all_allocated_as_list/1, find_allocated/4, find_next_allocated/3, + log2/1, make_zeros/1]). + +-export([init_slots_from_old_file/2]). + +-export([list_to_tree/1, tree_to_bin/5]). + +-compile({inline, [{sz2pos,1}, {adjust_addr,3}]}). +-compile({inline, [{bplus_mk_leaf,1}, {bplus_get_size,1}, + {bplus_get_tree,2}, {bplus_get_lkey,2}, + {bplus_get_rkey,2}]}). + +%% Debug +-export([init_disk_map/1, stop_disk_map/0, + disk_map_segment_p/2, disk_map_segment/2]). + +-include("dets.hrl"). + +%%% A total ordering of all Erlang terms. + +%% -> -1 | 0 | 1. T1 is (smaller than | equal | greater than) T2. +%% If is_integer(I), is_float(F), I == F then I is deemed smaller than F. +cmp(T, T) -> + 0; +cmp([E1 | T1], [E2 | T2]) -> + case cmp(E1, E2) of + 0 -> cmp(T1, T2); + R -> R + end; +cmp(T1, T2) when tuple_size(T1) =:= tuple_size(T2) -> + tcmp(T1, T2, 1, tuple_size(T1)); +cmp(I, F) when is_integer(I), is_float(F) -> + -1; +cmp(F, I) when is_float(F), is_integer(I) -> + 1; +cmp(T1, T2) when T1 < T2 -> + -1; +cmp(_T1, _T2) -> % when _T1 > _T2 + 1. + +tcmp(T1, T2, I, I) -> + cmp(element(I, T1), element(I, T2)); +tcmp(T1, T2, I, N) -> + case cmp(element(I, T1), element(I, T2)) of + 0 -> tcmp(T1, T2, I + 1, N); + R -> R + end. + +msort(L) -> + %% sort is very much faster than msort, let it do most of the work. + F = fun(X, Y) -> cmp(X, Y) =< 0 end, + lists:sort(F, lists:sort(L)). + +mkeysort(I, L) -> + F = fun(X, Y) -> cmp(element(I, X), element(I, Y)) =< 0 end, + %% keysort is much faster than mkeysort, let it do most of the work. + lists:sort(F, lists:keysort(I, L)). + +mkeysearch(Key, I, L) -> + case lists:keysearch(Key, I, L) of + {value, Value}=Reply when element(I, Value) =:= Key -> + Reply; + false -> + false; + _ -> + mkeysearch2(Key, I, L) + end. + +mkeysearch2(_Key, _I, []) -> + false; +mkeysearch2(Key, I, [E | _L]) when element(I, E) =:= Key -> + {value, E}; +mkeysearch2(Key, I, [_ | L]) -> + mkeysearch2(Key, I, L). + +%% Be careful never to compare keys, but use matching instead. +%% Otherwise sofs could have been used: +%% sofs:to_external(sofs:relation_to_family(sofs:relation(L, 2))). +family([]) -> + []; +family(L) -> + [{K,V}|KVL] = mkeysort(1, L), + per_key(KVL, K, [V], []). + +per_key([], K, Vs, KVs) -> + lists:reverse(KVs, [{K,msort(Vs)}]); +per_key([{K,V}|L], K, Vs, KVs) -> % match + per_key(L, K, [V|Vs], KVs); +per_key([{K1,V}|L], K, Vs, KVs) -> + per_key(L, K1, [V], [{K,msort(Vs)}|KVs]). + +rename(From, To) -> + case file:rename(From, To) of + ok -> + ok; + {error, Reason} -> + {error, {file_error, {From, To}, Reason}} + end. + +%% -> {ok, Bins} | throw({NewHead, Error}) +pread(Positions, Head) -> + R = case file:pread(Head#head.fptr, Positions) of + {ok, Bins} -> + %% file:pread/2 can return 'eof' as "data". + case lists:member(eof, Bins) of + true -> + {error, {premature_eof, Head#head.filename}}; + false -> + {ok, Bins} + end; + {error, Reason} when enomem =:= Reason; einval =:= Reason -> + {error, {bad_object_header, Head#head.filename}}; + {error, Reason} -> + {file_error, Head#head.filename, Reason} + end, + case R of + {ok, _Bins} -> + R; + Error -> + throw(corrupt(Head, Error)) + end. + +%% -> {ok, binary()} | throw({NewHead, Error}) +pread(Head, Pos, Min, Extra) -> + R = case file:pread(Head#head.fptr, Pos, Min+Extra) of + {error, Reason} when enomem =:= Reason; einval =:= Reason -> + {error, {bad_object_header, Head#head.filename}}; + {error, Reason} -> + {file_error, Head#head.filename, Reason}; + {ok, Bin} when byte_size(Bin) < Min -> + {error, {premature_eof, Head#head.filename}}; + OK -> OK + end, + case R of + {ok, _Bin} -> + R; + Error -> + throw(corrupt(Head, Error)) + end. + +%% -> eof | [] | {ok, {Size, Pointer, binary()}} +ipread(Head, Pos1, MaxSize) -> + try + disk_map_pread(Pos1) + catch Bad -> + throw(corrupt_reason(Head, {disk_map, Bad})) + end, + case file:ipread_s32bu_p32bu(Head#head.fptr, Pos1, MaxSize) of + {ok, {0, 0, eof}} -> + []; + {ok, Reply} -> + {ok, Reply}; + _Else -> + eof + end. + +%% -> {Head, ok} | throw({Head, Error}) +pwrite(Head, []) -> + {Head, ok}; +pwrite(Head, Bins) -> + try + disk_map(Bins) + catch Bad -> + throw(corrupt_reason(Head, {disk_map, Bad, Bins})) + end, + case file:pwrite(Head#head.fptr, Bins) of + ok -> + {Head, ok}; + Error -> + corrupt_file(Head, Error) + end. + +%% -> ok | throw({Head, Error}) +write(_Head, []) -> + ok; +write(Head, Bins) -> + case file:write(Head#head.fptr, Bins) of + ok -> + ok; + Error -> + corrupt_file(Head, Error) + end. + +%% -> ok | throw({Head, Error}) +%% Same as file:write_file/2, but calls file:sync/1. +write_file(Head, Bin) -> + R = case file:open(Head#head.filename, [binary, raw, write]) of + {ok, Fd} -> + R1 = file:write(Fd, Bin), + R2 = file:sync(Fd), + file:close(Fd), + if R1 =:= ok -> R2; true -> R1 end; + Else -> + Else + end, + case R of + ok -> + ok; + Error -> + corrupt_file(Head, Error) + end. + +%% -> ok | throw({Head, Error}) +truncate(Head, Pos) -> + case catch truncate(Head#head.fptr, Head#head.filename, Pos) of + ok -> + ok; + Error -> + throw(corrupt(Head, Error)) + end. + +%% -> {ok, Pos} | throw({Head, Error}) +position(Head, Pos) -> + case file:position(Head#head.fptr, Pos) of + {error, _Reason} = Error -> + corrupt_file(Head, Error); + OK -> OK + end. + +%% -> ok | throw({Head, Error}) +sync(Head) -> + case file:sync(Head#head.fptr) of + ok -> + ok; + Error -> + corrupt_file(Head, Error) + end. + +open(FileSpec, Args) -> + case file:open(FileSpec, Args) of + {ok, Fd} -> + {ok, Fd}; + Error -> + file_error(FileSpec, Error) + end. + +truncate(Fd, FileName, Pos) -> + if + Pos =:= cur -> + ok; + true -> + position(Fd, FileName, Pos) + end, + case file:truncate(Fd) of + ok -> + ok; + Error -> + file_error(FileName, {error, Error}) + end. + +fwrite(Fd, FileName, B) -> + case file:write(Fd, B) of + ok -> ok; + Error -> file_error_close(Fd, FileName, Error) + end. + +position(Fd, FileName, Pos) -> + case file:position(Fd, Pos) of + {error, Error} -> file_error(FileName, {error, Error}); + OK -> OK + end. + +position_close(Fd, FileName, Pos) -> + case file:position(Fd, Pos) of + {error, Error} -> file_error_close(Fd, FileName, {error, Error}); + OK -> OK + end. + +pwrite(Fd, FileName, Position, B) -> + case file:pwrite(Fd, Position, B) of + ok -> ok; + Error -> file_error(FileName, {error, Error}) + end. + +pwrite(Fd, FileName, Bins) -> + case file:pwrite(Fd, Bins) of + ok -> + ok; + {error, {_NoWrites, Reason}} -> + file_error(FileName, {error, Reason}) + end. + +pread_close(Fd, FileName, Pos, Size) -> + case file:pread(Fd, Pos, Size) of + {error, Error} -> + file_error_close(Fd, FileName, {error, Error}); + {ok, Bin} when byte_size(Bin) < Size -> + file:close(Fd), + throw({error, {tooshort, FileName}}); + eof -> + file:close(Fd), + throw({error, {tooshort, FileName}}); + OK -> OK + end. + +file_error(FileName, {error, Reason}) -> + throw({error, {file_error, FileName, Reason}}). + +file_error_close(Fd, FileName, {error, Reason}) -> + file:close(Fd), + throw({error, {file_error, FileName, Reason}}). + +debug_mode() -> + os:getenv("DETS_DEBUG") =:= "true". + +bad_object(Where, Extra) -> + case debug_mode() of + true -> + {bad_object, Where, Extra}; + false -> + %% Avoid showing possibly secret data on the error logger. + {bad_object, Where} + end. + +read_n(Fd, Max) -> + case file:read(Fd, Max) of + {ok, Bin} -> + Bin; + _Else -> + eof + end. + +pread_n(Fd, Position, Max) -> + case file:pread(Fd, Position, Max) of + {ok, Bin} -> + Bin; + _ -> + eof + end. + +read_4(Fd, Position) -> + {ok, _} = file:position(Fd, Position), + <> = dets_utils:read_n(Fd, 4), + Four. + +corrupt_file(Head, {error, Reason}) -> + Error = {error, {file_error, Head#head.filename, Reason}}, + throw(corrupt(Head, Error)). + +%% -> {NewHead, Error} +corrupt_reason(Head, Reason0) -> + Reason = case get_disk_map() of + no_disk_map -> + Reason0; + DM -> + ST = erlang:get_stacktrace(), + PD = get(), + {Reason0, ST, PD, DM} + end, + Error = {error, {Reason, Head#head.filename}}, + corrupt(Head, Error). + +corrupt(Head, Error) -> + case get(verbose) of + yes -> + error_logger:format("** dets: Corrupt table ~p: ~p\n", + [Head#head.name, Error]); + _ -> ok + end, + case Head#head.update_mode of + {error, _} -> + {Head, Error}; + _ -> + {Head#head{update_mode = Error}, Error} + end. + +vformat(F, As) -> + case get(verbose) of + yes -> error_logger:format(F, As); + _ -> ok + end. + +code_to_type(?SET) -> set; +code_to_type(?BAG) -> bag; +code_to_type(?DUPLICATE_BAG) -> duplicate_bag; +code_to_type(_Type) -> badtype. + +type_to_code(set) -> ?SET; +type_to_code(bag) -> ?BAG; +type_to_code(duplicate_bag) -> ?DUPLICATE_BAG. + +%%% +%%% Write Cache +%%% + +cache_size(C) -> + {C#cache.delay, C#cache.tsize}. + +%% -> [object()] | false +cache_lookup(Type, [Key | Keys], CL, LU) -> + %% mkeysearch returns the _first_ tuple with a matching key. + case mkeysearch(Key, 1, CL) of + {value, {Key,{_Seq,{insert,Object}}}} when Type =:= set -> + cache_lookup(Type, Keys, CL, [Object | LU]); + {value, {Key,{_Seq,delete_key}}} -> + cache_lookup(Type, Keys, CL, LU); + _ -> + false + end; +cache_lookup(_Type, [], _CL, LU) -> + LU. + +reset_cache(C) -> + WrTime = C#cache.wrtime, + NewWrTime = if + WrTime =:= undefined -> + WrTime; + true -> + now() + end, + PK = family(C#cache.cache), + NewC = C#cache{cache = [], csize = 0, inserts = 0, wrtime = NewWrTime}, + {NewC, C#cache.inserts, PK}. + +is_empty_cache(Cache) -> + Cache#cache.cache =:= []. + +new_cache({Delay, Size}) -> + #cache{cache = [], csize = 0, inserts = 0, + tsize = Size, wrtime = undefined, delay = Delay}. + +%%% +%%% Buddy System +%%% + +%% Definitions for the buddy allocator. +-define(MAXBUD, 32). % 2 GB is maximum file size +-define(MAXFREELISTS, 50000000). % Bytes reserved for the free lists (at end). + +%%-define(DEBUG(X, Y), io:format(X, Y)). +-define(DEBUG(X, Y), true). + +%%% Algorithm : We use a buddy system on each file. This is nicely described +%%% in i.e. the last chapter of the first-grade text book +%%% Data structures and algorithms by Aho, Hopcroft and +%%% Ullman. I think buddy systems were invented by Knuth, a long +%%% time ago. + +init_slots_from_old_file([{Slot,Addr} | T], Ftab) -> + init_slot(Slot+1,[{Slot,Addr} | T], Ftab); +init_slots_from_old_file([], Ftab) -> + Ftab. + +init_slot(_Slot,[], Ftab) -> + Ftab; % should never happen +init_slot(_Slot,[{_Addr,0}|T], Ftab) -> + init_slots_from_old_file(T, Ftab); +init_slot(Slot,[{_Slot1,Addr}|T], Ftab) -> + Stree = element(Slot, Ftab), + %% io:format("init_slot ~p:~p~n",[Slot, Addr]), + init_slot(Slot,T,setelement(Slot, Ftab, bplus_insert(Stree, Addr))). + +%%% The free lists are kept in RAM, and written to the end of the file +%%% from time to time. It is possible that a considerable amount of +%%% memory is used for a fragmented file. +%%% +%%% To make things (slightly) worse (from a memory usage point of +%%% view), each traversal of the file starts with making a "map" of +%%% the allocated areas; only the allocated areas will be +%%% traversed. Creating a map involves inspecting and sorting the free +%%% lists. Since the map is passed on between client and server, it +%%% has to be a binary (to avoid copying a possibly huge term). +%%% +%%% An active map should always be protected by fixing the table. This +%%% prevents insertion of objects into the mapped area (where some +%%% objects may have been deleted). The means for implementing this +%%% protection is a copy of the free lists (using even more memory, if +%%% objects are inserted). The position to write an inserted object is +%%% found by looking at the free lists from the time when the table +%%% was fixed; areas within the mapped area that have been freed are +%%% hidden from the allocator. + +%% -> free_table() +%% A free table is a tuple of ?MAXBUD elements, element i handling +%% buddies of size 2^(i-1). +init_alloc(Base) -> + Ftab = empty_free_lists(), + Empty = bplus_empty_tree(), + setelement(?MAXBUD, Ftab, bplus_insert(Empty, Base)). + +empty_free_lists() -> + Empty = bplus_empty_tree(), + %% initiate a tuple with ?MAXBUD "Empty" elements + erlang:make_tuple(?MAXBUD, Empty). + +%% Only used when repairing or initiating. +alloc_many(Head, _Sz, 0, _A0) -> + Head; +alloc_many(Head, Sz, N, A0) -> + Ftab = Head#head.freelists, + Head#head{freelists = alloc_many1(Ftab, 1, Sz * N, A0, Head)}. + +%% -> NewFtab | throw(Error) +alloc_many1(Ftab, Pos, Size, A0, H) -> + {FPos, Addr} = find_first_free(Ftab, Pos, Pos, H), + true = Addr >= A0, % assertion + if + ?POW(FPos - 1) >= Size -> + alloc_many2(Ftab, sz2pos(Size), Size, A0, H); + true -> + NewFtab = reserve_buddy(Ftab, FPos, FPos, Addr), + NSize = Size - ?POW(FPos-1), + alloc_many1(NewFtab, FPos, NSize, Addr, H) + end. + +alloc_many2(Ftab, _Pos, 0, _A0, _H) -> + Ftab; +alloc_many2(Ftab, Pos, Size, A0, H) when Size band ?POW(Pos-1) > 0 -> + {FPos, Addr} = find_first_free(Ftab, Pos, Pos, H), + true = Addr >= A0, % assertion + NewFtab = reserve_buddy(Ftab, FPos, Pos, Addr), + NSize = Size - ?POW(Pos - 1), + alloc_many2(NewFtab, Pos-1, NSize, Addr, H); +alloc_many2(Ftab, Pos, Size, A0, H) -> + alloc_many2(Ftab, Pos-1, Size, A0, H). + +%% -> {NewHead, Addr, Log2} | throw(Error) +alloc(Head, Sz) when Head#head.fixed =/= false -> % when Sz > 0 + ?DEBUG("alloc of size ~p (fixed)", [Sz]), + Pos = sz2pos(Sz), + {Frozen, Ftab} = Head#head.freelists, + {FPos, Addr} = find_first_free(Frozen, Pos, Pos, Head), + NewFrozen = reserve_buddy(Frozen, FPos, Pos, Addr), + Ftab1 = undo_free(Ftab, FPos, Addr, Head#head.base), + NewFtab = move_down(Ftab1, FPos, Pos, Addr), + NewFreelists = {NewFrozen, NewFtab}, + {Head#head{freelists = NewFreelists}, Addr, Pos}; +alloc(Head, Sz) when Head#head.fixed =:= false -> % when Sz > 0 + ?DEBUG("alloc of size ~p", [Sz]), + Pos = sz2pos(Sz), + Ftab = Head#head.freelists, + {FPos, Addr} = find_first_free(Ftab, Pos, Pos, Head), + NewFtab = reserve_buddy(Ftab, FPos, Pos, Addr), + {Head#head{freelists = NewFtab}, Addr, Pos}. + +find_first_free(_Ftab, Pos, _Pos0, Head) when Pos > ?MAXBUD -> + throw({error, {no_more_space_on_file, Head#head.filename}}); +find_first_free(Ftab, Pos, Pos0, Head) -> + PosTab = element(Pos, Ftab), + case bplus_lookup_first(PosTab) of + undefined -> + find_first_free(Ftab, Pos+1, Pos0, Head); + {ok, Addr} when Addr + ?POW(Pos0-1) > ?POW(?MAXBUD-1)-?MAXFREELISTS -> + %% We would occupy (some of) the area reserved for the free lists. + throw({error, {no_more_space_on_file, Head#head.filename}}); + {ok, Addr} -> + {Pos, Addr} + end. + +%% When the table is fixed, free/4 may have joined buddies so that the +%% requested block is now part of some larger block. We have to find +%% that block, and insert free buddies along the way. +undo_free(Ftab, Pos, Addr, Base) -> + PosTab = element(Pos, Ftab), + case bplus_lookup(PosTab, Addr) of + undefined -> + {BuddyAddr, MoveUpAddr} = my_buddy(Addr, ?POW(Pos-1), Base), + NewFtab = setelement(Pos, Ftab, bplus_insert(PosTab, BuddyAddr)), + undo_free(NewFtab, Pos+1, MoveUpAddr, Base); + {ok, Addr} -> + NewPosTab = bplus_delete(PosTab, Addr), + setelement(Pos, Ftab, NewPosTab) + end. + +reserve_buddy(Ftab, Pos, Pos0, Addr) -> + PosTab = element(Pos, Ftab), + NewPosTab = bplus_delete(PosTab, Addr), + NewFtab = setelement(Pos, Ftab, NewPosTab), + move_down(NewFtab, Pos, Pos0, Addr). + +move_down(Ftab, Pos, Pos, _Addr) -> + ?DEBUG(" to address ~p, table ~p (~p bytes)~n", + [_Addr, Pos, ?POW(Pos-1)]), + Ftab; +move_down(Ftab, Pos, Pos0, Addr) -> + Pos_1 = Pos - 1, + Size = ?POW(Pos_1), + HighBuddy = (Addr + (Size bsr 1)), + NewPosTab_1 = bplus_insert(element(Pos_1, Ftab), HighBuddy), + NewFtab = setelement(Pos_1, Ftab, NewPosTab_1), + move_down(NewFtab, Pos_1, Pos0, Addr). + +%% -> {Head, Log2} +free(Head, Addr, Sz) -> + ?DEBUG("free of size ~p at address ~p~n", [Sz, Addr]), + Ftab = get_freelists(Head), + Pos = sz2pos(Sz), + {set_freelists(Head, free_in_pos(Ftab, Addr, Pos, Head#head.base)), Pos}. + +free_in_pos(Ftab, _Addr, Pos, _Base) when Pos > ?MAXBUD -> + Ftab; +free_in_pos(Ftab, Addr, Pos, Base) -> + PosTab = element(Pos, Ftab), + {BuddyAddr, MoveUpAddr} = my_buddy(Addr, ?POW(Pos-1), Base), + case bplus_lookup(PosTab, BuddyAddr) of + undefined -> % no buddy found + ?DEBUG(" table ~p, no buddy~n", [Pos]), + setelement(Pos, Ftab, bplus_insert(PosTab, Addr)); + {ok, BuddyAddr} -> % buddy found + PosTab1 = bplus_delete(PosTab, Addr), + PosTab2 = bplus_delete(PosTab1, BuddyAddr), + ?DEBUG(" table ~p, with buddy ~p~n", [Pos, BuddyAddr]), + NewFtab = setelement(Pos, Ftab, PosTab2), + free_in_pos(NewFtab, MoveUpAddr, Pos+1, Base) + end. + +get_freelists(Head) when Head#head.fixed =:= false -> + Head#head.freelists; +get_freelists(Head) when Head#head.fixed =/= false -> + {_Frozen, Current} = Head#head.freelists, + Current. + +set_freelists(Head, Ftab) when Head#head.fixed =:= false -> + Head#head{freelists = Ftab}; +set_freelists(Head, Ftab) when Head#head.fixed =/= false -> + {Frozen, _} = Head#head.freelists, + Head#head{freelists = {Frozen,Ftab}}. + +%% Bug: If Sz0 is equal to 2^k for some k, then 2^(k+1) bytes are +%% allocated (wasting 2^k bytes). Inlined. +sz2pos(N) when N > 0 -> + 1 + log2(N+1). + +%% Returns the i such that 2^(i-1) < N =< 2^i. +log2(N) when is_integer(N), N >= 0 -> + if N > ?POW(8) -> + if N > ?POW(10) -> + if N > ?POW(11) -> + if N > ?POW(12) -> + 12 + if N band (?POW(12)-1) =:= 0 -> + log2(N bsr 12); + true -> log2(1 + (N bsr 12)) + end; + true -> 12 + end; + true -> 11 + end; + N > ?POW(9) -> 10; + true -> 9 + end; + N > ?POW(4) -> + if N > ?POW(6) -> + if N > ?POW(7) -> 8; + true -> 7 + end; + N > ?POW(5) -> 6; + true -> 5 + end; + N > ?POW(2) -> + if + N > ?POW(3) -> 4; + true -> 3 + end; + N > ?POW(1) -> 2; + N >= ?POW(0) -> 1; + true -> 0 + end. + +make_zeros(0) -> []; +make_zeros(N) when N rem 2 =:= 0 -> + P = make_zeros(N div 2), + [P|P]; +make_zeros(N) -> + P = make_zeros(N div 2), + [0,P|P]. + +%% Calculate the buddy of Addr +my_buddy(Addr, Sz, Base) -> + case (Addr - Base) band Sz of + 0 -> % even, buddy is higher addr + {Addr+Sz, Addr}; + _ -> % odd, buddy is lower addr + T = Addr-Sz, + {T, T} + end. + +all_free(Head) -> + Tab = get_freelists(Head), + Base = Head#head.base, + case all_free(all(Tab), Base, Base, []) of + [{Base,Base} | L] -> L; + L -> L + end. + +all_free([], X0, Y0, F) -> + lists:reverse([{X0,Y0} | F]); +all_free([{X,Y} | L], X0, Y0, F) when Y0 =:= X -> + all_free(L, X0, Y, F); +all_free([{X,Y} | L], X0, Y0, F) when Y0 < X -> + all_free(L, X, Y, [{X0,Y0} | F]). + +all_allocated(Head) -> + all_allocated(all(get_freelists(Head)), 0, Head#head.base, []). + +all_allocated([], _X0, _Y0, []) -> + <<>>; +all_allocated([], _X0, _Y0, A0) -> + [<> | A] = lists:reverse(A0), + {From, To, list_to_binary(A)}; +all_allocated([{X,Y} | L], X0, Y0, A) when Y0 =:= X -> + all_allocated(L, X0, Y, A); +all_allocated([{X,Y} | L], _X0, Y0, A) when Y0 < X -> + all_allocated(L, X, Y, [<> | A]). + +all_allocated_as_list(Head) -> + all_allocated_as_list(all(get_freelists(Head)), 0, Head#head.base, []). + +all_allocated_as_list([], _X0, _Y0, []) -> + []; +all_allocated_as_list([], _X0, _Y0, A) -> + lists:reverse(A); +all_allocated_as_list([{X,Y} | L], X0, Y0, A) when Y0 =:= X -> + all_allocated_as_list(L, X0, Y, A); +all_allocated_as_list([{X,Y} | L], _X0, Y0, A) when Y0 < X -> + all_allocated_as_list(L, X, Y, [[Y0 | X] | A]). + +all(Tab) -> + all(Tab, tuple_size(Tab), []). + +all(_Tab, 0, L) -> + %% This is not as bad as it looks. L contains less than 32 runs, + %% so there will be only a small number of merges. + lists:sort(L); +all(Tab, I, L) -> + LL = collect_tree(element(I, Tab), I, L), + all(Tab, I-1, LL). + +%% Finds allocated areas between Addr (approx.) and Addr+Length. +find_allocated(Ftab, Addr, Length, Base) -> + MaxAddr = Addr + Length, + Ints = collect_all_interval(Ftab, Addr, MaxAddr, Base), + allocated(Ints, Addr, MaxAddr, Ftab, Base). + +allocated(Some, Addr, Max, Ftab, Base) -> + case allocated1(Some, Addr, Max, []) of + [] -> + case find_next_allocated(Ftab, Addr, Base) of + {From,_} -> + find_allocated(Ftab, From, ?CHUNK_SIZE, Base); + none -> + <<>> + end; + L -> + list_to_binary(lists:reverse(L)) + end. + +allocated1([], Y0, Max, A) when Y0 < Max -> + [<> | A]; +allocated1([], _Y0, _Max, A) -> + A; +allocated1([{X,Y} | L], Y0, Max, A) when Y0 >= X -> + allocated1(L, Y, Max, A); +allocated1([{X,Y} | L], Y0, Max, A) -> % when Y0 < X + allocated1(L, Y, Max, [<> | A]). + +%% Finds the first allocated area starting at Addr or later. +find_next_allocated(Ftab, Addr, Base) -> + case find_next_free(Ftab, Addr, Base) of + none -> + none; + {Addr1, Pos} when Addr1 =< Addr -> + find_next_allocated(Ftab, Addr1 + ?POW(Pos-1), Base); + {Next, _Pos} -> + {Addr, Next} + end. + +%% Finds the first free address starting att Addr or later. +%% -> none | {FirstFreeAddress, FtabPosition} +find_next_free(Ftab, Addr, Base) -> + MaxBud = tuple_size(Ftab), + find_next_free(Ftab, Addr, 1, MaxBud, -1, -1, Base). + +find_next_free(Ftab, Addr0, Pos, MaxBud, Next, PosN, Base) + when Pos =< MaxBud -> + Addr = adjust_addr(Addr0, Pos, Base), + PosTab = element(Pos, Ftab), + case bplus_lookup_next(PosTab, Addr-1) of + undefined -> + find_next_free(Ftab, Addr0, Pos+1, MaxBud, Next, PosN, Base); + {ok, Next1} when PosN =:= -1; Next1 < Next -> + find_next_free(Ftab, Addr0, Pos+1, MaxBud, Next1, Pos, Base); + {ok, _} -> + find_next_free(Ftab, Addr0, Pos+1, MaxBud, Next, PosN, Base) + end; +find_next_free(_Ftab, _Addr, _Pos, _MaxBud, -1, _PosN, _Base) -> + none; +find_next_free(_Ftab, _Addr, _Pos, _MaxBud, Next, PosN, _Base) -> + {Next, PosN}. + +collect_all_interval(Ftab, Addr, MaxAddr, Base) -> + MaxBud = tuple_size(Ftab), + collect_all_interval(Ftab, Addr, MaxAddr, 1, MaxBud, Base, []). + +collect_all_interval(Ftab, L0, U, Pos, MaxBud, Base, Acc0) when Pos =< MaxBud -> + PosTab = element(Pos, Ftab), + L = adjust_addr(L0, Pos, Base), + Acc = collect_interval(PosTab, Pos, L, U, Acc0), + collect_all_interval(Ftab, L0, U, Pos+1, MaxBud, Base, Acc); +collect_all_interval(_Ftab, _L, _U, _Pos, _MaxBud, _Base, Acc) -> + lists:sort(Acc). + +%% It could be that Addr is inside a free area. This function adjusts +%% the address so that is placed on a boundary in the Pos tree. Inlined. +adjust_addr(Addr, Pos, Base) -> + Pow = ?POW(Pos - 1), + Rem = (Addr - Base) rem Pow, + if + Rem =:= 0 -> + Addr; + Addr < Pow -> + Addr; + true -> + Addr - Rem + end. + +%%%----------------------------------------------------------------- +%%% The Disk Map is used for debugging only. +%%% Very tightly coupled to the way dets_v9 works. +%%%----------------------------------------------------------------- + +-define(DM, disk_map). + +get_disk_map() -> + case get(?DM) of + undefined -> no_disk_map; + T -> {disk_map, ets:tab2list(T)} + end. + +init_disk_map(Name) -> + error_logger:info_msg("** dets: (debug) using disk map for ~p~n", [Name]), + put(?DM, ets:new(any,[ordered_set])). + +stop_disk_map() -> + catch ets:delete(erase(?DM)). + +disk_map_segment_p(Fd, P) -> + case get(?DM) of + undefined -> + ok; + _T -> + disk_map_segment(P, pread_n(Fd, P, 8*256)) + end. + +disk_map_segment(P, Segment) -> + case get(?DM) of + undefined -> + ok; + T -> + Ps = segment_fragment_to_pointers(P, iolist_to_binary(Segment)), + Ss = [{X,<>} || + {_P1,<>} <- Ps, + X > 0], % optimization + dm(Ps ++ Ss, T) + end. + +disk_map_pread(P) -> + case get(?DM) of + undefined -> + ok; + T -> + case ets:lookup(T, P) of + [] -> + throw({pread, P, 8}); + [{P,{pointer,0,0}}] -> + ok; + [{P,{pointer,Pointer,Sz}}] -> + case ets:lookup(T, Pointer) of + %% _P =/= P after re-hash... + [{Pointer,{slot,_P,Sz}}] -> + ok; + Got -> + throw({pread, P, Pointer, Got}) + end; + Got -> + throw({pread, P, Got}) + end + end. + +-define(STATUS_POS, 4). +-define(BASE, 1336). +disk_map(Bins) -> + case get(?DM) of + undefined -> + ok; + T -> + Bs = [{P,iolist_to_binary(Io)} || {P,Io} <- Bins], + dm(Bs, T) + end. + +dm([{P,_Header} | Bs], T) when P < ?BASE -> + dm(Bs, T); +dm([{P0,<>} | Bs], T) -> + P = P0 - ?STATUS_POS, + case ets:lookup(T, P) of + [] -> + throw({free, P0}); + [{P,_OldSz}] -> + true = ets:delete(T, P) + end, + dm(Bs, T); +dm([{SlotP,<>} | Bs], T) -> + Ptr = case ets:lookup(T, {pointer,SlotP}) of + [{{pointer,SlotP}, Pointer}] -> + case ets:lookup(T, Pointer) of + [{Pointer,{pointer,SlotP,Sz2}}] -> + case log2(Sz) =:= log2(Sz2) of + true -> + Pointer; + false -> + throw({active, SlotP, Sz, Pointer, Sz2}) + end; + Got -> + throw({active, SlotP, Sz, Got}) + end; + [] -> + throw({active, SlotP, Sz}) + end, + true = ets:insert(T, {SlotP,{slot,Ptr,Sz}}), + dm(Bs, T); +dm([{P,<>} | Bs], T) -> + %% Look for slot object in Bs? + case prev(P, T) of + {Prev, PrevSz} -> + throw({prev, P, Sz, X, Prev, PrevSz}); + ok -> + ok + end, + case next(P, 8, T) of + {next, Next} -> + %% Can (should?) do more... + throw({next, P, Sz, X, Next}); + ok -> + ok + end, + true = ets:insert(T, {P,{pointer,X,Sz}}), + if + Sz =:= 0 -> + X = 0; + true -> + true = ets:insert(T, {{pointer,X}, P}) + end, + dm(Bs, T); +dm([{P,<>} | Bs], T) -> + case ets:lookup(T, X) of + [] -> throw({segment, P, X}); + [{X,{pointer,0,0}}] -> ok; + [{X,{pointer,P,X}}] -> ok + end, + dm(Bs, T); +dm([{P,<<_Sz:32,B0/binary>>=B} | Bs], T) -> + Overwrite = + case catch binary_to_term(B0) of % accepts garbage at end of binary + {'EXIT', _} -> + <<_Sz1:32,B1/binary>> = B0, + case catch binary_to_term(B1) of + {'EXIT', _} -> + false; + _ -> + true + end; + _ -> + true + end, + if + Overwrite -> + %% overwrite same + dm([{P-8,<<(byte_size(B) + 8):32,?ACTIVE:32,B/binary>>} | Bs], T); + true -> + dm(segment_fragment_to_pointers(P, B)++Bs, T) + end; +dm([], _T) -> + ok. + +segment_fragment_to_pointers(_P, <<>>) -> + []; +segment_fragment_to_pointers(P, <>) -> + [{P,SzP} | segment_fragment_to_pointers(P+8, B)]. + +prev(P, T) -> + case ets:prev(T, P) of + '$end_of_table' -> ok; + Prev -> + case ets:lookup(T, Prev) of + [{Prev,{pointer,_Ptr,_}}] when Prev + 8 > P -> + {Prev, 8}; + [{Prev,{slot,_,Sz}}] when Prev + Sz > P -> + {Prev, Sz}; + _ -> + ok + end + end. + +next(P, PSz, T) -> + case ets:next(T, P) of + '$end_of_table' -> ok; + Next when P + PSz > Next -> + {next, Next}; + _ -> + ok + end. + +%%%----------------------------------------------------------------- +%%% These functions implement a B+ tree. +%%%----------------------------------------------------------------- + +-define(max_size, 16). +-define(min_size, 8). +%%----------------------------------------------------------------- +%% Finds out the type of the node: 'l' or 'n'. +%%----------------------------------------------------------------- +-define(NODE_TYPE(Tree), element(1, Tree)). +%% Finds out if a node/leaf is full or not. +-define(FULL(Tree), (bplus_get_size(Tree) >= ?max_size)). +%% Finds out if a node/leaf is filled up over its limit. +-define(OVER_FULL(Tree), (bplus_get_size(Tree) > ?max_size)). +%% Finds out if a node/leaf has less items than allowed. +-define(UNDER_FILLED(Tree), (bplus_get_size(Tree) < ?min_size)). +%% Finds out if a node/leaf has as few items as minimum allowed. +-define(LOW_FILLED(Tree), (bplus_get_size(Tree) =< ?min_size)). +%%Returns a key in a leaf at position Pos. +-define(GET_LEAF_KEY(Leaf, Pos), element(Pos+1, Leaf)). + +%% Special for dets. +collect_tree(v, _TI, Acc) -> Acc; +collect_tree(T, TI, Acc) -> + Pow = ?POW(TI-1), + collect_tree2(T, Pow, Acc). + +collect_tree2(Tree, Pow, Acc) -> + S = bplus_get_size(Tree), + case ?NODE_TYPE(Tree) of + l -> + collect_leaf(Tree, S, Pow, Acc); + n -> + collect_node(Tree, S, Pow, Acc) + end. + +collect_leaf(_Leaf, 0, _Pow, Acc) -> + Acc; +collect_leaf(Leaf, I, Pow, Acc) -> + Key = ?GET_LEAF_KEY(Leaf, I), + V = {Key, Key+Pow}, + collect_leaf(Leaf, I-1, Pow, [V | Acc]). + +collect_node(_Node, 0, _Pow, Acc) -> + Acc; +collect_node(Node, I, Pow, Acc) -> + Acc1 = collect_tree2(bplus_get_tree(Node, I), Pow, Acc), + collect_node(Node, I-1, Pow, Acc1). + +%% Special for dets. +tree_to_bin(v, _F, _Max, Ws, WsSz) -> {Ws, WsSz}; +tree_to_bin(T, F, Max, Ws, WsSz) -> + {N, L1, Ws1, WsSz1} = tree_to_bin2(T, F, Max, 0, [], Ws, WsSz), + {N1, L2, Ws2, WsSz2} = F(N, lists:reverse(L1), Ws1, WsSz1), + {0, [], NWs, NWsSz} = F(N1, L2, Ws2, WsSz2), + {NWs, NWsSz}. + +tree_to_bin2(Tree, F, Max, N, Acc, Ws, WsSz) when N >= Max -> + {NN, NAcc, NWs, NWsSz} = F(N, lists:reverse(Acc), Ws, WsSz), + tree_to_bin2(Tree, F, Max, NN, lists:reverse(NAcc), NWs, NWsSz); +tree_to_bin2(Tree, F, Max, N, Acc, Ws, WsSz) -> + S = bplus_get_size(Tree), + case ?NODE_TYPE(Tree) of + l -> + {N+S, leaf_to_bin(bplus_leaf_to_list(Tree), Acc), Ws, WsSz}; + n -> + node_to_bin(Tree, F, Max, N, Acc, 1, S, Ws, WsSz) + end. + +node_to_bin(_Node, _F, _Max, N, Acc, I, S, Ws, WsSz) when I > S -> + {N, Acc, Ws, WsSz}; +node_to_bin(Node, F, Max, N, Acc, I, S, Ws, WsSz) -> + {N1,Acc1,Ws1,WsSz1} = + tree_to_bin2(bplus_get_tree(Node, I), F, Max, N, Acc, Ws, WsSz), + node_to_bin(Node, F, Max, N1, Acc1, I+1, S, Ws1, WsSz1). + +leaf_to_bin([N | L], Acc) -> + leaf_to_bin(L, [<> | Acc]); +leaf_to_bin([], Acc) -> + Acc. + +%% Special for dets. +list_to_tree(L) -> + leafs_to_nodes(L, length(L), fun bplus_mk_leaf/1, []). + +leafs_to_nodes([], 0, _F, [T]) -> + T; +leafs_to_nodes([], 0, _F, L) -> + leafs_to_nodes(lists:reverse(L), length(L), fun mk_node/1, []); +leafs_to_nodes(Ls, Sz, F, L) -> + I = if + Sz =< 16 -> Sz; + Sz =< 32 -> Sz div 2; + true -> 12 + end, + {L1, R} = split_list(Ls, I, []), + N = F(L1), + Sz1 = Sz - I, + leafs_to_nodes(R, Sz1, F, [N | L]). + +mk_node([E | Es]) -> + NL = [E | lists:foldr(fun(X, A) -> [get_first_key(X), X | A] end, [], Es)], + bplus_mk_node(NL). + +split_list(L, 0, SL) -> + {SL, L}; +split_list([E | Es], I, SL) -> + split_list(Es, I-1, [E | SL]). + +get_first_key(T) -> + case ?NODE_TYPE(T) of + l -> + ?GET_LEAF_KEY(T, 1); + n -> + get_first_key(bplus_get_tree(T, 1)) + end. + +%% Special for dets. +collect_interval(v, _TI, _L, _U, Acc) -> Acc; +collect_interval(T, TI, L, U, Acc) -> + Pow = ?POW(TI-1), + collect_interval2(T, Pow, L, U, Acc). + +collect_interval2(Tree, Pow, L, U, Acc) -> + S = bplus_get_size(Tree), + case ?NODE_TYPE(Tree) of + l -> + collect_leaf_interval(Tree, S, Pow, L, U, Acc); + n -> + {Max, _} = bplus_select_sub_tree(Tree, U), + {Min, _} = bplus_select_sub_tree_2(Tree, L, Max), + collect_node_interval(Tree, Min, Max, Pow, L, U, Acc) + end. + +collect_leaf_interval(_Leaf, 0, _Pow, _L, _U, Acc) -> + Acc; +collect_leaf_interval(Leaf, I, Pow, L, U, Acc) -> + Key = ?GET_LEAF_KEY(Leaf, I), + if + Key < L -> + Acc; + Key > U -> + collect_leaf_interval(Leaf, I-1, Pow, L, U, Acc); + true -> + collect_leaf_interval(Leaf, I-1, Pow, L, U, [{Key,Key+Pow} | Acc]) + end. + +collect_node_interval(_Node, I, UP, _Pow, _L, _U, Acc) when I > UP -> + Acc; +collect_node_interval(Node, I, UP, Pow, L, U, Acc) -> + Acc1 = collect_interval2(bplus_get_tree(Node, I), Pow, L, U, Acc), + collect_node_interval(Node, I+1, UP, Pow, L, U, Acc1). + +%%----------------------------------------------------------------- +%% Func: empty_tree/0 +%% Purpose: Creates a new empty tree. +%% Returns: tree() +%%----------------------------------------------------------------- +bplus_empty_tree() -> v. + +%%----------------------------------------------------------------- +%% Func: lookup/2 +%% Purpose: Looks for Key in the Tree. +%% Returns: {ok, {Key, Val}} | 'undefined'. +%%----------------------------------------------------------------- +bplus_lookup(v, _Key) -> undefined; +bplus_lookup(Tree, Key) -> + case ?NODE_TYPE(Tree) of + l -> + bplus_lookup_leaf(Key, Tree); + n -> + {_, SubTree} = bplus_select_sub_tree(Tree, Key), + bplus_lookup(SubTree, Key) + end. + +%%----------------------------------------------------------------- +%% Searches through a leaf until the Key is ok or +%% when it is determined that it does not exist. +%%----------------------------------------------------------------- +bplus_lookup_leaf(Key, Leaf) -> + bplus_lookup_leaf_2(Key, Leaf, bplus_get_size(Leaf)). + +bplus_lookup_leaf_2(_, _, 0) -> undefined; +bplus_lookup_leaf_2(Key, Leaf, N) -> + case ?GET_LEAF_KEY(Leaf, N) of + Key -> {ok, Key}; + _ -> + bplus_lookup_leaf_2(Key, Leaf, N-1) + end. + +%%----------------------------------------------------------------- +%% Func: lookup_first/1 +%% Purpose: Finds the smallest key in the entire Tree. +%% Returns: {ok, {Key, Val}} | 'undefined'. +%%----------------------------------------------------------------- +bplus_lookup_first(v) -> undefined; +bplus_lookup_first(Tree) -> + case ?NODE_TYPE(Tree) of + l -> + % Then it is the leftmost key here. + {ok, ?GET_LEAF_KEY(Tree, 1)}; + n -> + % Look in the leftmost subtree. + bplus_lookup_first(bplus_get_tree(Tree, 1)) + end. + + +%%----------------------------------------------------------------- +%% Func: lookup_next/2 +%% Purpose: Finds the next key nearest after Key. +%% Returns: {ok, {Key, Val}} | 'undefined'. NIX!!! +%%----------------------------------------------------------------- +bplus_lookup_next(v, _) -> undefined; +bplus_lookup_next(Tree, Key) -> + case ?NODE_TYPE(Tree) of + l -> + lookup_next_leaf(Key, Tree); + n -> + {Pos, SubTree} = bplus_select_sub_tree(Tree, Key), + case bplus_lookup_next(SubTree, Key) of + undefined -> + S = bplus_get_size(Tree), + if + % There is a right brother. + S > Pos -> + bplus_lookup_first(bplus_get_tree(Tree, Pos+1)); + % No there is no right brother. + true -> + undefined + end; + % We ok a next item. + Result -> + Result + end + end. + +%%----------------------------------------------------------------- +%% Returns {ok, NextKey} if there is a key in the leaf which is greater. +%% If there is no such key we return 'undefined' instead. +%% Key does not have to be a key in the structure, just a search value. +%%----------------------------------------------------------------- +lookup_next_leaf(Key, Leaf) -> + lookup_next_leaf_2(Key, Leaf, bplus_get_size(Leaf), 1). + +lookup_next_leaf_2(Key, Leaf, Size, Size) -> + % This is the rightmost key. + K = ?GET_LEAF_KEY(Leaf, Size), + if + K > Key -> + {ok, ?GET_LEAF_KEY(Leaf, Size)}; + true -> + undefined + end; +lookup_next_leaf_2(Key, Leaf, Size, N) -> + K = ?GET_LEAF_KEY(Leaf, N), + if + K < Key -> + % K is still smaller, try next in the leaf. + lookup_next_leaf_2(Key, Leaf, Size, N+1); + Key == K -> + % Since this is exact Key it must be the next. + {ok, ?GET_LEAF_KEY(Leaf, N+1)}; + true -> + % Key was not an exact specification. + % It must be K that is next greater. + {ok, ?GET_LEAF_KEY(Leaf, N)} + end. + +%%----------------------------------------------------------------- +%% Func: insert/3 +%% Purpose: Inserts a new {Key, Value} into the tree. +%% Returns: tree() +%%----------------------------------------------------------------- +bplus_insert(v, Key) -> bplus_mk_leaf([Key]); +bplus_insert(Tree, Key) -> + NewTree = bplus_insert_in(Tree, Key), + case ?OVER_FULL(NewTree) of + false -> + NewTree; + % If the node is over-full the tree will grow. + true -> + {LTree, DKey, RTree} = + case ?NODE_TYPE(NewTree) of + l -> + bplus_split_leaf(NewTree); + n -> + bplus_split_node(NewTree) + end, + bplus_mk_node([LTree, DKey, RTree]) + end. + +%%----------------------------------------------------------------- +%% Func: delete/2 +%% Purpose: Deletes a key from the tree (if present). +%% Returns: tree() +%%----------------------------------------------------------------- +bplus_delete(v, _Key) -> v; +bplus_delete(Tree, Key) -> + NewTree = bplus_delete_in(Tree, Key), + S = bplus_get_size(NewTree), + case ?NODE_TYPE(NewTree) of + l -> + if + S =:= 0 -> + v; + true -> + NewTree + end; + n -> + if + S =:= 1 -> + bplus_get_tree(NewTree, 1); + true -> + NewTree + end + end. + + +%%% ----------------------- +%%% Help function to insert. +%%% ----------------------- + +bplus_insert_in(Tree, Key) -> + case ?NODE_TYPE(Tree) of + l -> + bplus_insert_in_leaf(Tree, Key); + n -> + {Pos, SubTree} = bplus_select_sub_tree(Tree, Key), + % Pos = "the position of the subtree". + NewSubTree = bplus_insert_in(SubTree, Key), + case ?OVER_FULL(NewSubTree) of + false -> + bplus_put_subtree(Tree, [NewSubTree, Pos]); + true -> + case bplus_reorganize_tree_ins(Tree, NewSubTree, Pos) of + {left, {LeftT, DKey, MiddleT}} -> + bplus_put_subtree(bplus_put_lkey(Tree, DKey, Pos), + [LeftT, Pos-1, MiddleT, Pos]); + {right, {MiddleT, DKey, RightT}} -> + bplus_put_subtree(bplus_put_rkey(Tree, DKey, Pos), + [MiddleT, Pos, RightT, Pos+1]); + {split, {LeftT, DKey, RightT}} -> + bplus_extend_tree(Tree, {LeftT, DKey, RightT}, Pos) + end + end + end. + +%%----------------------------------------------------------------- +%% Inserts a key in correct position in a leaf. +%%----------------------------------------------------------------- +bplus_insert_in_leaf(Leaf, Key) -> + bplus_insert_in_leaf_2(Leaf, Key, bplus_get_size(Leaf), []). + +bplus_insert_in_leaf_2(Leaf, Key, 0, Accum) -> + bplus_insert_in_leaf_3(Leaf, 0, [Key|Accum]); +bplus_insert_in_leaf_2(Leaf, Key, N, Accum) -> + K = ?GET_LEAF_KEY(Leaf, N), + if + Key < K -> + % Not here! + bplus_insert_in_leaf_2(Leaf, Key, N-1, [K|Accum]); + K < Key -> + % Insert here. + bplus_insert_in_leaf_3(Leaf, N-1, [K, Key|Accum]); + K == Key -> + % Replace (?). + bplus_insert_in_leaf_3(Leaf, N-1, [ Key|Accum]) + end. + +bplus_insert_in_leaf_3(_Leaf, 0, LeafList) -> + bplus_mk_leaf(LeafList); +bplus_insert_in_leaf_3(Leaf, N, LeafList) -> + bplus_insert_in_leaf_3(Leaf, N-1, [?GET_LEAF_KEY(Leaf, N)|LeafList]). + + +%%% ------------------------- +%%% Help functions for delete. +%%% ------------------------- + +bplus_delete_in(Tree, Key) -> + case ?NODE_TYPE(Tree) of + l -> + bplus_delete_in_leaf(Tree, Key); + n -> + {Pos, SubTree} = bplus_select_sub_tree(Tree, Key), + % Pos = "the position of the subtree". + NewSubTree = bplus_delete_in(SubTree, Key), + % Check if it has become to small now + case ?UNDER_FILLED(NewSubTree) of + false -> + bplus_put_subtree(Tree, [NewSubTree, Pos]); + true -> + case bplus_reorganize_tree_del(Tree, NewSubTree, Pos) of + {left, {LeftT, DKey, MiddleT}} -> + bplus_put_subtree(bplus_put_lkey(Tree, DKey, Pos), + [LeftT, Pos-1, MiddleT, Pos]); + {right, {MiddleT, DKey, RightT}} -> + bplus_put_subtree(bplus_put_rkey(Tree, DKey, Pos), + [MiddleT, Pos, RightT, Pos+1]); + {join_left, JoinedTree} -> + bplus_joinleft_tree(Tree, JoinedTree, Pos); + {join_right, JoinedTree} -> + bplus_joinright_tree(Tree, JoinedTree, Pos) + end + end + end. + +%%----------------------------------------------------------------- +%% Deletes a key from the leaf returning a new (smaller) leaf. +%%----------------------------------------------------------------- +bplus_delete_in_leaf(Leaf, Key) -> + bplus_delete_in_leaf_2(Leaf, Key, bplus_get_size(Leaf), []). + +bplus_delete_in_leaf_2(Leaf, _, 0, _) -> Leaf; +bplus_delete_in_leaf_2(Leaf, Key, N, Accum) -> + K = ?GET_LEAF_KEY(Leaf, N), + if + Key == K -> + % Remove this one! + bplus_delete_in_leaf_3(Leaf, N-1, Accum); + true -> + bplus_delete_in_leaf_2(Leaf, Key, N-1, [K|Accum]) + end. + +bplus_delete_in_leaf_3(_Leaf, 0, LeafList) -> + bplus_mk_leaf(LeafList); +bplus_delete_in_leaf_3(Leaf, N, LeafList) -> + bplus_delete_in_leaf_3(Leaf, N-1, [?GET_LEAF_KEY(Leaf, N)|LeafList]). + + + +%%----------------------------------------------------------------- +%% Selects and returns which subtree the search should continue in. +%%----------------------------------------------------------------- +bplus_select_sub_tree(Tree, Key) -> + bplus_select_sub_tree_2(Tree, Key, bplus_get_size(Tree)). + +bplus_select_sub_tree_2(Tree, _Key, 1) -> {1, bplus_get_tree(Tree, 1)}; +bplus_select_sub_tree_2(Tree, Key, N) -> + K = bplus_get_lkey(Tree, N), + if + K > Key -> + bplus_select_sub_tree_2(Tree, Key, N-1); + K =< Key -> + % Here it is! + {N, bplus_get_tree(Tree, N)} + end. + +%%----------------------------------------------------------------- +%% Selects which brother that should take over some of our items. +%% Or if they are both full makes a split. +%%----------------------------------------------------------------- +bplus_reorganize_tree_ins(Tree, NewSubTree, 1) -> + RTree = bplus_get_tree(Tree, 2), % 2 = Pos+1 = 1+1. + case ?FULL(RTree) of + false -> + bplus_reorganize_tree_r(Tree, NewSubTree, 1, RTree); + true -> + % It is full, we must split this one! + bplus_reorganize_tree_s(NewSubTree) + end; +bplus_reorganize_tree_ins(Tree, NewSubTree, Pos) -> + Size = bplus_get_size(Tree), + if + Pos == Size -> + % Pos is the rightmost postion!. + % Our only chance is the left one. + LTree = bplus_get_tree(Tree, Pos-1), + case ?FULL(LTree) of + false -> + bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); + true -> + % It is full, we must split this one! + bplus_reorganize_tree_s(NewSubTree) + end; + true -> + % Pos is somewhere inside the node. + LTree = bplus_get_tree(Tree, Pos-1), + RTree = bplus_get_tree(Tree, Pos+1), + SL = bplus_get_size(LTree), + SR = bplus_get_size(RTree), + if + SL > SR -> + bplus_reorganize_tree_r(Tree, NewSubTree, Pos, RTree); + SL < SR -> + bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); + true -> + case ?FULL(LTree) of + false -> + bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); + true -> + bplus_reorganize_tree_s(NewSubTree) + end + end + end. + +%%----------------------------------------------------------------- +%% This function fills over items from brothers to maintain the minimum +%% number of items per node/leaf. +%%----------------------------------------------------------------- +bplus_reorganize_tree_del(Tree, NewSubTree, 1) -> + % The case when Pos is at leftmost position. + RTree = bplus_get_tree(Tree, 2), % 2 = Pos+1 = 1+1. + case ?LOW_FILLED(RTree) of + false -> + bplus_reorganize_tree_r(Tree, NewSubTree, 1, RTree); + true -> + % It is to small, we must join them! + bplus_reorganize_tree_jr(Tree, NewSubTree, 1, RTree) + end; +bplus_reorganize_tree_del(Tree, NewSubTree, Pos) -> + Size = bplus_get_size(Tree), + if + Pos == Size -> + % Pos is the rightmost postion!. + % Our only chance is the left one. + LTree = bplus_get_tree(Tree, Pos-1), + case ?LOW_FILLED(LTree) of + false -> + bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); + true -> + % It is to small, we must join this one! + bplus_reorganize_tree_jl(Tree, NewSubTree, Pos, LTree) + end; + true -> + % Pos is somewhere inside the node. + LTree = bplus_get_tree(Tree, Pos-1), + RTree = bplus_get_tree(Tree, Pos+1), + SL = bplus_get_size(LTree), + SR = bplus_get_size(RTree), + if + SL>SR -> + bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); + SL < SR -> + bplus_reorganize_tree_r(Tree, NewSubTree, Pos, RTree); + true -> + case ?LOW_FILLED(LTree) of + false -> + bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); + true -> + bplus_reorganize_tree_jl(Tree, NewSubTree, Pos, LTree) + end + end + end. + + +bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree) -> + case ?NODE_TYPE(NewSubTree) of + l -> + {left, bplus_split_leaf( + bplus_mk_leaf( + lists:append(bplus_leaf_to_list(LTree), + bplus_leaf_to_list(NewSubTree))))}; + n -> + {left, bplus_split_node( + bplus_mk_node( + lists:append([bplus_node_to_list(LTree), + [bplus_get_lkey(Tree, Pos)], + bplus_node_to_list(NewSubTree)])))} + end. + +bplus_reorganize_tree_r(Tree, NewSubTree, Pos, RTree) -> + case ?NODE_TYPE(NewSubTree) of + l -> + {right, + bplus_split_leaf( + bplus_mk_leaf( + lists:append([bplus_leaf_to_list(NewSubTree), + bplus_leaf_to_list(RTree)])))}; + n -> + {right, + bplus_split_node( + bplus_mk_node( + lists:append([bplus_node_to_list(NewSubTree), + [bplus_get_rkey(Tree, Pos)], + bplus_node_to_list(RTree)])))} + end. + +bplus_reorganize_tree_s(NewSubTree) -> + case ?NODE_TYPE(NewSubTree) of + l -> + {split, bplus_split_leaf(NewSubTree)}; + n -> + {split, bplus_split_node(NewSubTree)} + end. + +bplus_reorganize_tree_jl(Tree, NewSubTree, Pos, LTree) -> + case ?NODE_TYPE(NewSubTree) of + l -> + {join_left, + bplus_mk_leaf(lists:append([bplus_leaf_to_list(LTree), + bplus_leaf_to_list(NewSubTree)]))}; + n -> + {join_left, + bplus_mk_node(lists:append([bplus_node_to_list(LTree), + [bplus_get_lkey(Tree, Pos)], + bplus_node_to_list(NewSubTree)]))} + end. + +bplus_reorganize_tree_jr(Tree, NewSubTree, Pos, RTree) -> + case ?NODE_TYPE(NewSubTree) of + l -> + {join_right, + bplus_mk_leaf(lists:append([bplus_leaf_to_list(NewSubTree), + bplus_leaf_to_list(RTree)]))}; + n -> + {join_right, + bplus_mk_node(lists:append([bplus_node_to_list(NewSubTree), + [bplus_get_rkey(Tree, Pos)], + bplus_node_to_list(RTree)]))} + end. + + +%%----------------------------------------------------------------- +%% Takes a leaf and divides it into two equal big leaves. +%% The result is returned in a tuple. The dividing key is also returned. +%%----------------------------------------------------------------- +bplus_split_leaf(Leaf) -> + S = bplus_get_size(Leaf), + bplus_split_leaf_2(Leaf, S, S div 2, []). + +bplus_split_leaf_2(Leaf, Pos, 1, Accum) -> + K = ?GET_LEAF_KEY(Leaf, Pos), + bplus_split_leaf_3(Leaf, Pos-1, [], K, [K|Accum]); +bplus_split_leaf_2(Leaf, Pos, N, Accum) -> + bplus_split_leaf_2(Leaf, Pos-1, N-1, [?GET_LEAF_KEY(Leaf, Pos)|Accum]). + +bplus_split_leaf_3(_, 0, LeftAcc, DKey, RightAcc) -> + {bplus_mk_leaf(LeftAcc), DKey, bplus_mk_leaf(RightAcc)}; +bplus_split_leaf_3(Leaf, Pos, LeftAcc, DKey, RightAcc) -> + bplus_split_leaf_3(Leaf, Pos-1, [?GET_LEAF_KEY(Leaf, Pos)|LeftAcc], + DKey, RightAcc). + +%%----------------------------------------------------------------- +%% Takes a node and divides it into two equal big nodes. +%% The result is returned in a tuple. The dividing key is also returned. +%%----------------------------------------------------------------- +bplus_split_node(Node) -> + S = bplus_get_size(Node), + bplus_split_node_2(Node, S, S div 2, []). + +bplus_split_node_2(Node, Pos, 1, Accum) -> + bplus_split_node_3(Node, Pos-1, [], bplus_get_lkey(Node, Pos), + [bplus_get_tree(Node, Pos)|Accum]); +bplus_split_node_2(Node, Pos, N, Accum) -> + bplus_split_node_2(Node, Pos-1, N-1, [bplus_get_lkey(Node, Pos), + bplus_get_tree(Node, Pos)|Accum]). + +bplus_split_node_3(Node, 1, LeftAcc, DKey, RightAcc) -> + {bplus_mk_node([bplus_get_tree(Node, 1)|LeftAcc]), DKey, + bplus_mk_node(RightAcc)}; +bplus_split_node_3(Node, Pos, LeftAcc, DKey, RightAcc) -> + bplus_split_node_3(Node, Pos-1, + [bplus_get_lkey(Node, Pos), + bplus_get_tree(Node, Pos)|LeftAcc], + DKey, RightAcc). + +%%----------------------------------------------------------------- +%% Inserts a joined tree insted of the old one at position Pos and +%% the one nearest left/right brother. +%%----------------------------------------------------------------- +bplus_joinleft_tree(Tree, JoinedTree, Pos) -> + bplus_join_tree_2(Tree, JoinedTree, Pos, bplus_get_size(Tree), []). +bplus_joinright_tree(Tree, JoinedTree, Pos) -> + bplus_join_tree_2(Tree, JoinedTree, Pos+1, bplus_get_size(Tree), []). + +bplus_join_tree_2(Tree, JoinedTree, Pos, Pos, Accum) -> + bplus_join_tree_3(Tree, Pos-2, [JoinedTree|Accum]); +bplus_join_tree_2(Tree, JoinedTree, Pos, N, Accum) -> + bplus_join_tree_2(Tree, JoinedTree, Pos, N-1, + [bplus_get_lkey(Tree, N), bplus_get_tree(Tree, N)|Accum]). + +bplus_join_tree_3(_Tree, 0, Accum) -> bplus_mk_node(Accum); +bplus_join_tree_3(Tree, Pos, Accum) -> + bplus_join_tree_3(Tree, Pos-1, [bplus_get_tree(Tree, Pos), + bplus_get_rkey(Tree, Pos)|Accum]). + +%%% --------------------------------- +%%% Primitive datastructure functions. +%%% --------------------------------- + +%%----------------------------------------------------------------- +%% Constructs a node out of list format. +%%----------------------------------------------------------------- +bplus_mk_node(NodeList) -> list_to_tuple([ n |NodeList]). + +%%----------------------------------------------------------------- +%% Converts the node into list format. +%%----------------------------------------------------------------- +bplus_node_to_list(Node) -> + [_|NodeList] = tuple_to_list(Node), + NodeList. + +%%----------------------------------------------------------------- +%% Constructs a leaf out of list format. +%%----------------------------------------------------------------- +bplus_mk_leaf(KeyList) -> list_to_tuple([l|KeyList]). + +%%----------------------------------------------------------------- +%% Converts a leaf into list format. +%%----------------------------------------------------------------- +bplus_leaf_to_list(Leaf) -> + [_|LeafList] = tuple_to_list(Leaf), + LeafList. + +%%----------------------------------------------------------------- +%% Changes subtree "pointers" in a node. +%%----------------------------------------------------------------- +bplus_put_subtree(Tree, []) -> Tree; +bplus_put_subtree(Tree, [NewSubTree, Pos|Rest]) -> + bplus_put_subtree(setelement(Pos*2, Tree, NewSubTree), Rest). + +%%----------------------------------------------------------------- +%% Replaces the tree at position Pos with two new trees. +%%----------------------------------------------------------------- +bplus_extend_tree(Tree, Inserts, Pos) -> + bplus_extend_tree_2(Tree, Inserts, Pos, bplus_get_size(Tree), []). + +bplus_extend_tree_2(Tree, {T1, DKey, T2}, Pos, Pos, Accum) -> + bplus_extend_tree_3(Tree, Pos-1, [T1, DKey, T2|Accum]); +bplus_extend_tree_2(Tree, Inserts, Pos, N, Accum) -> + bplus_extend_tree_2(Tree, Inserts, Pos, N-1, + [bplus_get_lkey(Tree, N), bplus_get_tree(Tree, N)|Accum]). + +bplus_extend_tree_3(_, 0, Accum) -> bplus_mk_node(Accum); +bplus_extend_tree_3(Tree, N, Accum) -> + bplus_extend_tree_3(Tree, N-1, [bplus_get_tree(Tree, N), + bplus_get_rkey(Tree, N)|Accum]). + +%%----------------------------------------------------------------- +%% Changes the dividing key between two trees. +%%----------------------------------------------------------------- +bplus_put_lkey(Tree, DKey, Pos) -> setelement(Pos*2-1, Tree, DKey). +bplus_put_rkey(Tree, DKey, Pos) -> setelement(Pos*2+1, Tree, DKey). + + +%%----------------------------------------------------------------- +%% Calculates the number of items in a node/leaf. +%%----------------------------------------------------------------- +bplus_get_size(Tree) -> + case ?NODE_TYPE(Tree) of + l -> + tuple_size(Tree)-1; + n -> + tuple_size(Tree) div 2 + end. + +%%----------------------------------------------------------------- +%% Returns a tree at position Pos from an internal node. +%%----------------------------------------------------------------- +bplus_get_tree(Tree, Pos) -> element(Pos*2, Tree). + +%%----------------------------------------------------------------- +%% Returns dividing keys, left of or right of a tree. +%%----------------------------------------------------------------- +bplus_get_lkey(Tree, Pos) -> element(Pos*2-1, Tree). +bplus_get_rkey(Tree, Pos) -> element(Pos*2+1, Tree). + diff --git a/lib/stdlib/src/dets_v8.erl b/lib/stdlib/src/dets_v8.erl new file mode 100644 index 0000000000..b24df02882 --- /dev/null +++ b/lib/stdlib/src/dets_v8.erl @@ -0,0 +1,1591 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(dets_v8). + +%% Dets files, implementation part. This module handles versions up to +%% and including 8(c). To be called from dets.erl only. + +-export([constants/0, mark_dirty/1, read_file_header/2, + check_file_header/2, do_perform_save/1, initiate_file/11, + init_freelist/2, fsck_input/4, + bulk_input/3, output_objs/4, write_cache/1, may_grow/3, + find_object/2, re_hash/2, slot_objs/2, scan_objs/8, + db_hash/2, no_slots/1, table_parameters/1]). + +-export([file_info/1, v_segments/1]). + +-export([cache_segps/3]). + +%% For backward compatibility. +-export([sz2pos/1]). + +-compile({inline, [{sz2pos,1},{scan_skip,7}]}). +-compile({inline, [{skip_bytes,5}, {get_segp,1}]}). +-compile({inline, [{wl_lookup,5}]}). +-compile({inline, [{actual_seg_size,0}]}). + +-include("dets.hrl"). + +%% The layout of the file is : +%% +%% bytes decsription +%% ---------------------- File header +%% 4 FreelistsPointer +%% 4 Cookie +%% 4 ClosedProperly (pos=8) +%% 4 Type (pos=12) +%% 4 Version (pos=16) +%% 4 M +%% 4 Next +%% 4 KeyPos +%% 4 NoObjects +%% 4 N +%% ------------------ end of file header +%% 4*8192 SegmentArray +%% ------------------ +%% 4*256 First segment +%% ----------------------------- This is BASE. +%% ??? Objects (free and alive) +%% 4*256 Second segment (2 kB now, due to a bug) +%% ??? Objects (free and alive) +%% ... more objects and segments ... +%% ----------------------------- +%% ??? Free lists +%% ----------------------------- +%% 4 File size, in bytes. + +%% The first slot (0) in the segment array always points to the +%% pre-allocated first segment. +%% Before we can find an object we must find the slot where the +%% object resides. Each slot is a (possibly empty) list (or chain) of +%% objects that hash to the same slot. If the value stored in the +%% slot is zero, the slot chain is empty. If the slot value is +%% non-zero, the value points to a position in the file where the +%% chain starts. Each object in a chain has the following layout: +%% +%% bytes decsription +%% -------------------- +%% 4 Pointer to the next object of the chain. +%% 4 Size of the object in bytes (Sz). +%% 4 Status (FREE or ACTIVE) +%% Sz Binary representing the object +%% +%% The status field is used while repairing a file (but not next or size). +%% +%%|---------------| +%%| head | +%%| | +%%| | +%%|_______________| +%%| |------| +%%|___seg ptr1____| | +%%| | | +%%|__ seg ptr 2___| | +%%| | | segment 1 +%%| .... | V _____________ +%% | | +%% | | +%% |___slot 0 ____| +%% | | +%% |___slot 1 ____|-----| +%% | | | +%% | ..... | | 1:st obj in slot 1 +%% V segment 1 +%% |-----------| +%% | next | +%% |___________| +%% | size | +%% |___________| +%% | status | +%% |___________| +%% | | +%% | | +%% | obj | +%% | | + +%%% +%%% File header +%%% + +-define(HEADSZ, 40). % The size of the file header, in bytes. +-define(SEGSZ, 256). % Size of a segment, in words. +-define(SEGSZ_LOG2, 8). +-define(SEGARRSZ, 8192). % Maximal number of segments. +-define(SEGADDR(SegN), (?HEADSZ + (4 * (SegN)))). +-define(BASE, ?SEGADDR((?SEGSZ + ?SEGARRSZ))). +-define(MAXOBJS, (?SEGSZ * ?SEGARRSZ)). % 2 M objects + +-define(SLOT2SEG(S), ((S) bsr ?SEGSZ_LOG2)). + +%% BIG is used for hashing. BIG must be greater than the maximum +%% number of slots, currently MAXOBJS. +-define(BIG, 16#ffffff). + +%% Hard coded positions into the file header: +-define(FREELIST_POS, 0). +-define(CLOSED_PROPERLY_POS, 8). +-define(D_POS, 20). +-define(NO_OBJECTS_POS, (?D_POS + 12)). + +%% The version of a dets file is indicated by the ClosedProperly +%% field. Version 6 was used in the R1A release, and version 7 in the +%% R1B release up to and including the R3B01 release. Both version 6 +%% and version 7 indicate properly closed files by the value +%% CLOSED_PROPERLY. +%% +%% The current version, 8, has three sub-versions: +%% +%% - 8(a), indicated by the value CLOSED_PROPERLY (same as in versions 6 +%% and 7), introduced in R3B02; +%% - 8(b), indicated by the value CLOSED_PROPERLY2(_NEED_COMPACTING), +%% introduced in R5A and used up to and including R6A; +%% - 8(c), indicated by the value CLOSED_PROPERLY_NEW_HASH(_NEED_COMPACTING), +%% in use since R6B. +%% +%% The difference between the 8(a) and the 8(b) versions is the format +%% used for free lists saved on dets files. +%% The 8(c) version uses a different hashing algorithm, erlang:phash +%% (former versions use erlang:hash). +%% Version 8(b) files are only converted to version 8(c) if repair is +%% done, so we need compatability with 8(b) for a _long_ time. +%% +%% There are known bugs due to the fact that keys and objects are +%% sometimes compared (==) and sometimes matched (=:=). The version +%% used by default (9, see dets_v9.erl) does not have this problem. + +-define(NOT_PROPERLY_CLOSED,0). +-define(CLOSED_PROPERLY,1). +-define(CLOSED_PROPERLY2,2). +-define(CLOSED_PROPERLY2_NEED_COMPACTING,3). +-define(CLOSED_PROPERLY_NEW_HASH,4). +-define(CLOSED_PROPERLY_NEW_HASH_NEED_COMPACTING,5). + +-define(FILE_FORMAT_VERSION, 8). +-define(CAN_BUMP_BY_REPAIR, [6, 7]). +-define(CAN_CONVERT_FREELIST, [8]). + +%%% +%%% Object header (next, size, status). +%%% + +-define(OHDSZ, 12). % The size of the object header, in bytes. +-define(STATUS_POS, 8). % Position of the status field. + +%% The size of each object is a multiple of 16. +%% BUMP is used when repairing files. +-define(BUMP, 16). + +-define(ReadAhead, 512). + +%%-define(DEBUGF(X,Y), io:format(X, Y)). +-define(DEBUGF(X,Y), void). + +%% {Bump} +constants() -> + {?BUMP, ?BASE}. + +%% -> ok | throw({NewHead,Error}) +mark_dirty(Head) -> + Dirty = [{?CLOSED_PROPERLY_POS, <>}], + dets_utils:pwrite(Head, Dirty), + dets_utils:sync(Head), + dets_utils:position(Head, Head#head.freelists_p), + dets_utils:truncate(Head, cur). + +%% -> {ok, head()} | throw(Error) +initiate_file(Fd, Tab, Fname, Type, Kp, MinSlots, MaxSlots, + Ram, CacheSz, Auto, _DoInitSegments) -> + Freelist = 0, + Cookie = ?MAGIC, + ClosedProperly = ?NOT_PROPERLY_CLOSED, % immediately overwritten + Version = ?FILE_FORMAT_VERSION, + Factor = est_no_segments(MinSlots), + N = 0, + M = Next = ?SEGSZ * Factor, + NoObjects = 0, + dets_utils:pwrite(Fd, Fname, 0, + <>), % Initialize first segment + %% We must set the first slot of the segment pointer array to + %% point to the first segment + Pos = ?SEGADDR(0), + SegP = (?HEADSZ + (4 * ?SEGARRSZ)), + dets_utils:pwrite(Fd, Fname, Pos, <>), + segp_cache(Pos, SegP), + + Ftab = dets_utils:init_alloc(?BASE), + H0 = #head{freelists=Ftab, fptr = Fd, base = ?BASE}, + {H1, Ws} = init_more_segments(H0, 1, Factor, undefined, []), + + %% This is not optimal but simple: always initiate the segments. + dets_utils:pwrite(Fd, Fname, Ws), + + %% Return a new nice head structure + Head = #head{ + m = M, + m2 = M * 2, + next = Next, + fptr = Fd, + no_objects = NoObjects, + n = N, + type = Type, + update_mode = dirty, + freelists = H1#head.freelists, + auto_save = Auto, + hash_bif = phash, + keypos = Kp, + min_no_slots = Factor * ?SEGSZ, + max_no_slots = no_segs(MaxSlots) * ?SEGSZ, + + ram_file = Ram, + filename = Fname, + name = Tab, + cache = dets_utils:new_cache(CacheSz), + version = Version, + bump = ?BUMP, + base = ?BASE, + mod = ?MODULE + }, + {ok, Head}. + +est_no_segments(MinSlots) when 1 + ?SLOT2SEG(MinSlots) > ?SEGARRSZ -> + ?SEGARRSZ; +est_no_segments(MinSlots) -> + 1 + ?SLOT2SEG(MinSlots). + +init_more_segments(Head, SegNo, Factor, undefined, Ws) when SegNo < Factor -> + init_more_segments(Head, SegNo, Factor, seg_zero(), Ws); +init_more_segments(Head, SegNo, Factor, SegZero, Ws) when SegNo < Factor -> + {NewHead, W} = allocate_segment(Head, SegZero, SegNo), + init_more_segments(NewHead, SegNo+1, Factor, SegZero, W++Ws); +init_more_segments(Head, _SegNo, _Factor, _SegZero, Ws) -> + {Head, Ws}. + +allocate_segment(Head, SegZero, SegNo) -> + %% may throw error: + {NewHead, Segment, _} = dets_utils:alloc(Head, 4 * ?SEGSZ), + InitSegment = {Segment, SegZero}, + Pos = ?SEGADDR(SegNo), + segp_cache(Pos, Segment), + SegPointer = {Pos, <>}, + {NewHead, [InitSegment, SegPointer]}. + +%% Read free lists (using a Buddy System) from file. +init_freelist(Head, {convert_freelist,_Version}) -> + %% This function converts the saved freelist of the form + %% [{Slot1,Addr1},{Addr1,Addr2},...,{AddrN,0},{Slot2,Addr},...] + %% i.e each slot is a linked list which ends with a 0. + %% This is stored in a bplus_tree per Slot. + %% Each Slot is a position in a tuple. + + Ftab = dets_utils:empty_free_lists(), + Pos = Head#head.freelists_p, + case catch prterm(Head, Pos, ?OHDSZ) of + {0, _Sz, Term} -> + FreeList = lists:reverse(Term), + dets_utils:init_slots_from_old_file(FreeList, Ftab); + _ -> + throw({error, {bad_freelists, Head#head.filename}}) + end; +init_freelist(Head, _) -> + %% bplus_tree stored as is + Pos = Head#head.freelists_p, + case catch prterm(Head, Pos, ?OHDSZ) of + {0, _Sz, Term} -> + Term; + _ -> + throw({error, {bad_freelists, Head#head.filename}}) + end. + +%% -> {ok, Fd, fileheader()} | throw(Error) +read_file_header(Fd, FileName) -> + {ok, Bin} = dets_utils:pread_close(Fd, FileName, 0, ?HEADSZ), + [Freelist, Cookie, CP, Type2, Version, M, Next, Kp, NoObjects, N] = + bin2ints(Bin), + {ok, EOF} = dets_utils:position_close(Fd, FileName, eof), + {ok, <>} = dets_utils:pread_close(Fd, FileName, EOF-4, 4), + FH = #fileheader{freelist = Freelist, + cookie = Cookie, + closed_properly = CP, + type = dets_utils:code_to_type(Type2), + version = Version, + m = M, + next = Next, + keypos = Kp, + no_objects = NoObjects, + min_no_slots = ?DEFAULT_MIN_NO_SLOTS, + max_no_slots = ?DEFAULT_MAX_NO_SLOTS, + trailer = FileSize, + eof = EOF, + n = N, + mod = ?MODULE}, + {ok, Fd, FH}. + +%% -> {ok, head(), ExtraInfo} | {error, Reason} (Reason lacking file name) +%% ExtraInfo = {convert_freelist, Version} | true | need_compacting +check_file_header(FH, Fd) -> + Test = + if + FH#fileheader.cookie =/= ?MAGIC -> + {error, not_a_dets_file}; + FH#fileheader.type =:= badtype -> + {error, invalid_type_code}; + FH#fileheader.version =/= ?FILE_FORMAT_VERSION -> + case lists:member(FH#fileheader.version, + ?CAN_BUMP_BY_REPAIR) of + true -> + {error, version_bump}; + false -> + {error, bad_version} + end; + FH#fileheader.trailer =/= FH#fileheader.eof -> + {error, not_closed}; + FH#fileheader.closed_properly =:= ?CLOSED_PROPERLY -> + case lists:member(FH#fileheader.version, + ?CAN_CONVERT_FREELIST) of + true -> + {ok, {convert_freelist, FH#fileheader.version}, hash}; + false -> + {error, not_closed} % should not happen + end; + FH#fileheader.closed_properly =:= ?CLOSED_PROPERLY2 -> + {ok, true, hash}; + FH#fileheader.closed_properly =:= + ?CLOSED_PROPERLY2_NEED_COMPACTING -> + {ok, need_compacting, hash}; + FH#fileheader.closed_properly =:= ?CLOSED_PROPERLY_NEW_HASH -> + {ok, true, phash}; + FH#fileheader.closed_properly =:= + ?CLOSED_PROPERLY_NEW_HASH_NEED_COMPACTING -> + {ok, need_compacting, phash}; + FH#fileheader.closed_properly =:= ?NOT_PROPERLY_CLOSED -> + {error, not_closed}; + FH#fileheader.closed_properly > + ?CLOSED_PROPERLY_NEW_HASH_NEED_COMPACTING -> + {error, not_closed}; + true -> + {error, not_a_dets_file} + end, + case Test of + {ok, ExtraInfo, HashAlg} -> + H = #head{ + m = FH#fileheader.m, + m2 = FH#fileheader.m * 2, + next = FH#fileheader.next, + fptr = Fd, + no_objects= FH#fileheader.no_objects, + n = FH#fileheader.n, + type = FH#fileheader.type, + update_mode = saved, + auto_save = infinity, % not saved on file + fixed = false, % not saved on file + freelists_p = FH#fileheader.freelist, + hash_bif = HashAlg, + keypos = FH#fileheader.keypos, + min_no_slots = FH#fileheader.min_no_slots, + max_no_slots = FH#fileheader.max_no_slots, + version = ?FILE_FORMAT_VERSION, + mod = ?MODULE, + bump = ?BUMP, + base = ?BASE}, + {ok, H, ExtraInfo}; + Error -> + Error + end. + +cache_segps(Fd, FileName, M) -> + NSegs = no_segs(M), + {ok, Bin} = dets_utils:pread_close(Fd, FileName, ?HEADSZ, 4 * NSegs), + Fun = fun(S, P) -> segp_cache(P, S), P+4 end, + lists:foldl(Fun, ?HEADSZ, bin2ints(Bin)). + +no_segs(NoSlots) -> + ?SLOT2SEG(NoSlots - 1) + 1. + +bin2ints(<>) -> + [Int | bin2ints(B)]; +bin2ints(<<>>) -> + []. + +%%% +%%% Repair, conversion and initialization of a dets file. +%%% + +bulk_input(Head, InitFun, Cntrs) -> + bulk_input(Head, InitFun, Cntrs, make_ref()). + +bulk_input(Head, InitFun, Cntrs, Ref) -> + fun(close) -> + ok; + (read) -> + case catch {Ref, InitFun(read)} of + {Ref, end_of_input} -> + end_of_input; + {Ref, {L0, NewInitFun}} when is_list(L0), + is_function(NewInitFun) -> + Kp = Head#head.keypos, + case catch bulk_objects(L0, Head, Cntrs, Kp, []) of + {'EXIT', _Error} -> + _ = (catch NewInitFun(close)), + {error, invalid_objects_list}; + L -> + {L, bulk_input(Head, NewInitFun, Cntrs, Ref)} + end; + {Ref, Value} -> + {error, {init_fun, Value}}; + Error -> + throw({thrown, Error}) + end + end. + +bulk_objects([T | Ts], Head, Cntrs, Kp, L) -> + BT = term_to_binary(T), + Sz = byte_size(BT), + LogSz = sz2pos(Sz+?OHDSZ), + count_object(Cntrs, LogSz), + Key = element(Kp, T), + bulk_objects(Ts, Head, Cntrs, Kp, [make_object(Head, Key, LogSz, BT) | L]); +bulk_objects([], _Head, _Cntrs, _Kp, L) -> + L. + +-define(FSCK_SEGMENT, 10000). + +-define(DCT(D, CT), [D | CT]). + +-define(VNEW(N, E), erlang:make_tuple(N, E)). +-define(VSET(I, V, E), setelement(I, V, E)). +-define(VGET(I, V), element(I, V)). + +%% OldVersion not used, assuming later versions have been converted already. +output_objs(OldVersion, Head, SlotNumbers, Cntrs) -> + fun(close) -> + {ok, 0, Head}; + ([]) -> + output_objs(OldVersion, Head, SlotNumbers, Cntrs); + (L) -> + %% Descending sizes. + Count = lists:sort(ets:tab2list(Cntrs)), + RCount = lists:reverse(Count), + NoObjects = lists:foldl(fun({_Sz,No}, A) -> A + No end, 0, Count), + {_, MinSlots, _} = SlotNumbers, + if + %% Using number of objects for bags and duplicate bags + %% is not ideal; number of (unique) keys should be + %% used instead. The effect is that there will be more + %% segments than "necessary". + MinSlots =/= bulk_init, + abs(?SLOT2SEG(NoObjects) - ?SLOT2SEG(MinSlots)) > 5, + (NoObjects < ?MAXOBJS) -> + {try_again, NoObjects}; + true -> + Head1 = Head#head{no_objects = NoObjects}, + SegSz = actual_seg_size(), + {_, End, _} = dets_utils:alloc(Head, SegSz-1), + %% Now {LogSize,NoObjects} in Cntrs is replaced by + %% {LogSize,Position,{FileName,FileDescriptor},NoObjects}. + {Head2, CT} = allocate_all_objects(Head1, RCount, Cntrs), + [E | Es] = bin2term(L, []), + {NE, Acc, DCT1} = + output_slots(E, Es, [E], Head2, ?DCT(0, CT)), + NDCT = write_all_sizes(DCT1, Cntrs), + Max = ets:info(Cntrs, size), + output_objs2(NE, Acc, Head2, Cntrs, NDCT, End, Max,Max) + end + end. + +output_objs2(E, Acc, Head, Cntrs, DCT, End, 0, MaxNoChunks) -> + NDCT = write_all_sizes(DCT, Cntrs), + output_objs2(E, Acc, Head, Cntrs, NDCT, End, MaxNoChunks, MaxNoChunks); +output_objs2(E, Acc, Head, Cntrs, DCT, End, ChunkI, MaxNoChunks) -> + fun(close) -> + DCT1 = output_slot(Acc, Head, DCT), + NDCT = write_all_sizes(DCT1, Cntrs), + ?DCT(NoDups, CT) = NDCT, + [SegAddr | []] = ?VGET(tuple_size(CT), CT), + FinalZ = End - SegAddr, + [{?FSCK_SEGMENT, _, {FileName, Fd}, _}] = + ets:lookup(Cntrs, ?FSCK_SEGMENT), + ok = dets_utils:fwrite(Fd, FileName, + dets_utils:make_zeros(FinalZ)), + NewHead = Head#head{no_objects = Head#head.no_objects - NoDups}, + {ok, NoDups, NewHead}; + (L) -> + Es = bin2term(L, []), + {NE, NAcc, NDCT} = output_slots(E, Es, Acc, Head, DCT), + output_objs2(NE, NAcc, Head, Cntrs, NDCT, End, + ChunkI-1, MaxNoChunks) + end. + +%% By allocating bigger objects before smaller ones, holes in the +%% buddy system memory map are avoided. Unfortunately, the segments +%% are always allocated first, so if there are objects bigger than a +%% segment, there is a hole to handle. (Haven't considered placing the +%% segments among other objects of the same size.) +allocate_all_objects(Head, Count, Cntrs) -> + SegSize = actual_seg_size(), + {Head1, HSz, HN, HA} = alloc_hole(Count, Head, SegSize), + {Max, _} = hd(Count), + CT = ?VNEW(Max+1, not_used), + {Head2, NCT} = allocate_all(Head1, Count, Cntrs, CT), + Head3 = free_hole(Head2, HSz, HN, HA), + {Head3, NCT}. + +alloc_hole([{LSize,_} | _], Head, SegSz) when ?POW(LSize-1) > SegSz -> + {_, SegAddr, _} = dets_utils:alloc(Head, SegSz-1), + Size = ?POW(LSize-1)-1, + {_, Addr, _} = dets_utils:alloc(Head, Size), + N = (Addr - SegAddr) div SegSz, + Head1 = dets_utils:alloc_many(Head, SegSz, N, SegAddr), + {Head1, SegSz-1, N, SegAddr}; +alloc_hole(_Count, Head, _SegSz) -> + {Head, 0, 0, 0}. + +free_hole(Head, _Size, 0, _Addr) -> + Head; +free_hole(Head, Size, N, Addr) -> + {Head1, _} = dets_utils:free(Head, Addr, Size), + free_hole(Head1, Size, N-1, Addr+Size+1). + +%% One (temporary) file for each buddy size, write all objects of that +%% size to the file. +allocate_all(Head, [{LSize,NoObjects} | Count], Cntrs, CT) -> + Size = ?POW(LSize-1)-1, + {_Head, Addr, _} = dets_utils:alloc(Head, Size), + NewHead = dets_utils:alloc_many(Head, Size+1, NoObjects, Addr), + {FileName, Fd} = temp_file(Head, LSize), + true = ets:insert(Cntrs, {LSize, Addr, {FileName, Fd}, NoObjects}), + NCT = ?VSET(LSize, CT, [Addr | []]), + allocate_all(NewHead, Count, Cntrs, NCT); +allocate_all(Head, [], Cntrs, CT) -> + %% Note that space for the segments has been allocated already. + %% And one file for the segments... + {FileName, Fd} = temp_file(Head, ?FSCK_SEGMENT), + Addr = ?SEGADDR(?SEGARRSZ), + true = ets:insert(Cntrs, {?FSCK_SEGMENT, Addr, {FileName, Fd}, 0}), + NCT = ?VSET(tuple_size(CT), CT, [Addr | []]), + {Head, NCT}. + +temp_file(Head, N) -> + TmpName = lists:concat([Head#head.filename, '.', N]), + {ok, Fd} = dets_utils:open(TmpName, [raw, binary, write]), + {TmpName, Fd}. + +bin2term([<> | BTs], L) -> + bin2term(BTs, [{Slot, LogSize, BinTerm} | L]); +bin2term([], L) -> + lists:reverse(L). + +write_all_sizes(?DCT(D, CT), Cntrs) -> + ?DCT(D, write_sizes(1, tuple_size(CT), CT, Cntrs)). + +write_sizes(Sz, Sz, CT, Cntrs) -> + write_size(Sz, ?FSCK_SEGMENT, CT, Cntrs); +write_sizes(Sz, MaxSz, CT, Cntrs) -> + NCT = write_size(Sz, Sz, CT, Cntrs), + write_sizes(Sz+1, MaxSz, NCT, Cntrs). + +write_size(Sz, I, CT, Cntrs) -> + case ?VGET(Sz, CT) of + not_used -> + CT; + [Addr | L] -> + {FileName, Fd} = ets:lookup_element(Cntrs, I, 3), + case file:write(Fd, lists:reverse(L)) of + ok -> + ?VSET(Sz, CT, [Addr | []]); + Error -> + dets_utils:file_error(FileName, Error) + end + end. + +output_slots(E, [E1 | Es], Acc, Head, DCT) + when element(1, E) =:= element(1, E1) -> + output_slots(E1, Es, [E1 | Acc], Head, DCT); +output_slots(_E, [E | L], Acc, Head, DCT) -> + NDCT = output_slot(Acc, Head, DCT), + output_slots(E, L, [E], Head, NDCT); +output_slots(E, [], Acc, _Head, DCT) -> + {E, Acc, DCT}. + +output_slot([E], _Head, ?DCT(D, CT)) -> + ?DCT(D, output_slot([{foo, E}], 0, foo, CT)); +output_slot(Es0, Head, ?DCT(D, CT)) -> + Kp = Head#head.keypos, + Fun = fun({_Slot, _LSize, BinTerm} = E) -> + Key = element(Kp, binary_to_term(BinTerm)), + {Key, E} + end, + Es = lists:map(Fun, Es0), + NEs = case Head#head.type of + set -> + [{Key0,_} = E | L0] = lists:sort(Es), + choose_one(lists:sort(L0), Key0, [E]); + bag -> + lists:usort(Es); + duplicate_bag -> + lists:sort(Es) + end, + Dups = D + length(Es) - length(NEs), + ?DCT(Dups, output_slot(NEs, 0, foo, CT)). + +choose_one([{Key,_} | Es], Key, L) -> + choose_one(Es, Key, L); +choose_one([{Key,_} = E | Es], _Key, L) -> + choose_one(Es, Key, [E | L]); +choose_one([], _Key, L) -> + L. + +output_slot([E | Es], Next, _Slot, CT) -> + {_Key, {Slot, LSize, BinTerm}} = E, + Size = byte_size(BinTerm), + Size2 = ?POW(LSize-1), + Pad = <<0:(Size2-Size-?OHDSZ)/unit:8>>, + BinObject = [<>, BinTerm | Pad], + [Addr | L] = ?VGET(LSize, CT), + NCT = ?VSET(LSize, CT, [Addr+Size2 | [BinObject | L]]), + output_slot(Es, Addr, Slot, NCT); +output_slot([], Next, Slot, CT) -> + I = tuple_size(CT), + [Addr | L] = ?VGET(I, CT), + {Pos, _} = slot_position(Slot), + NoZeros = Pos - Addr, + BinObject = if + NoZeros > 100 -> + [dets_utils:make_zeros(NoZeros) | <>]; + true -> + <<0:NoZeros/unit:8,Next:32>> + end, + Size = NoZeros+4, + ?VSET(I, CT, [Addr+Size | [BinObject | L]]). + +%% Does not close Fd. +fsck_input(Head, Fd, Cntrs, _FileHeader) -> + %% The file is not compressed, so the object size cannot exceed + %% the filesize, for all objects. + MaxSz = case file:position(Fd, eof) of + {ok, Pos} -> + Pos; + _ -> + (1 bsl 32) - 1 + end, + State0 = fsck_read(?BASE, Fd, []), + fsck_input1(Head, State0, Fd, MaxSz, Cntrs). + +fsck_input1(Head, State, Fd, MaxSz, Cntrs) -> + fun(close) -> + ok; + (read) -> + case State of + done -> + end_of_input; + {done, L} -> + R = count_input(Cntrs, L, []), + {R, fsck_input1(Head, done, Fd, MaxSz, Cntrs)}; + {cont, L, Bin, Pos} -> + R = count_input(Cntrs, L, []), + FR = fsck_objs(Bin, Head#head.keypos, Head, []), + NewState = fsck_read(FR, Pos, Fd, MaxSz, Head), + {R, fsck_input1(Head, NewState, Fd, MaxSz, Cntrs)} + end + end. + +%% The ets table Cntrs is used for counting objects per size. +count_input(Cntrs, [[LogSz | B] | Ts], L) -> + count_object(Cntrs, LogSz), + count_input(Cntrs, Ts, [B | L]); +count_input(_Cntrs, [], L) -> + L. + +count_object(Cntrs, LogSz) -> + case catch ets:update_counter(Cntrs, LogSz, 1) of + N when is_integer(N) -> ok; + _Badarg -> true = ets:insert(Cntrs, {LogSz, 1}) + end. + +fsck_read(Pos, F, L) -> + case file:position(F, Pos) of + {ok, _} -> + read_more_bytes(<<>>, 0, Pos, F, L); + _Error -> + {done, L} + end. + +fsck_read({more, Bin, Sz, L}, Pos, F, MaxSz, Head) when Sz > MaxSz -> + FR = skip_bytes(Bin, ?BUMP, Head#head.keypos, Head, L), + fsck_read(FR, Pos, F, MaxSz, Head); +fsck_read({more, Bin, Sz, L}, Pos, F, _MaxSz, _Head) -> + read_more_bytes(Bin, Sz, Pos, F, L); +fsck_read({new, Skip, L}, Pos, F, _MaxSz, _Head) -> + NewPos = Pos + Skip, + fsck_read(NewPos, F, L). + +read_more_bytes(B, Min, Pos, F, L) -> + Max = if + Min < ?CHUNK_SIZE -> ?CHUNK_SIZE; + true -> Min + end, + case dets_utils:read_n(F, Max) of + eof -> + {done, L}; + Bin -> + NewPos = Pos + byte_size(Bin), + {cont, L, list_to_binary([B, Bin]), NewPos} + end. + +fsck_objs(Bin = <<_N:32, Sz:32, Status:32, Tail/binary>>, Kp, Head, L) -> + if + Status =:= ?ACTIVE -> + case Tail of + <> -> + case catch element(Kp, binary_to_term(BinTerm)) of + {'EXIT', _} -> + skip_bytes(Bin, ?BUMP, Kp, Head, L); + Key -> + LogSz = sz2pos(Sz+?OHDSZ), + Obj = make_object(Head, Key, LogSz, BinTerm), + NL = [[LogSz | Obj] | L], + Skip = ?POW(LogSz-1) - Sz - ?OHDSZ, + skip_bytes(Tail2, Skip, Kp, Head, NL) + end; + _ -> + {more, Bin, Sz, L} + end; + true -> + skip_bytes(Bin, ?BUMP, Kp, Head, L) + end; +fsck_objs(Bin, _Kp, _Head, L) -> + {more, Bin, 0, L}. + +%% Version 8 has to know about version 9. +make_object(Head, Key, _LogSz, BT) when Head#head.version =:= 9 -> + Slot = dets_v9:db_hash(Key, Head), + <>; +make_object(Head, Key, LogSz, BT) -> + Slot = db_hash(Key, Head), + <>. + +%% Inlined. +skip_bytes(Bin, Skip, Kp, Head, L) -> + case Bin of + <<_:Skip/binary, Tail/binary>> -> + fsck_objs(Tail, Kp, Head, L); + _ -> + {new, Skip - byte_size(Bin), L} + end. + +%% -> {NewHead, ok} | throw({Head, Error}) +do_perform_save(H) -> + FL = dets_utils:get_freelists(H), + B = term_to_binary(FL), + Size = byte_size(B), + ?DEBUGF("size of freelist = ~p~n", [Size]), + ?DEBUGF("head.m = ~p~n", [H#head.m]), + ?DEBUGF("head.no_objects = ~p~n", [H#head.no_objects]), + + {ok, Pos} = dets_utils:position(H, eof), + H1 = H#head{freelists_p = Pos}, + W1 = {?FREELIST_POS, <>}, + W2 = {Pos, [<<0:32, Size:32, ?FREE:32>>, B]}, + + W3 = {?D_POS, <<(H1#head.m):32, + (H1#head.next):32, + (H1#head.keypos):32, + (H1#head.no_objects):32, + (H1#head.n):32>>}, + {ClosedProperly, ClosedProperlyNeedCompacitng} = + case H1#head.hash_bif of + hash -> + {?CLOSED_PROPERLY2, ?CLOSED_PROPERLY2_NEED_COMPACTING}; + phash -> + {?CLOSED_PROPERLY_NEW_HASH, + ?CLOSED_PROPERLY_NEW_HASH_NEED_COMPACTING} + end, + W4 = + if + Size > 1000, Size > H1#head.no_objects -> + {?CLOSED_PROPERLY_POS, + <>}; + true -> + {?CLOSED_PROPERLY_POS, <>} + end, + W5 = {?FILE_FORMAT_VERSION_POS, <>}, + {H2, ok} = dets_utils:pwrite(H1, [W1,W2,W3,W4,W5]), + {ok, Pos2} = dets_utils:position(H2, eof), + ?DEBUGF("Writing file size ~p, eof at ~p~n", [Pos2+4, Pos2]), + dets_utils:pwrite(H2, [{Pos2, <<(Pos2 + 4):32>>}]). + +%% -> [term()] | throw({Head, Error}) +slot_objs(H, Slot) when Slot >= H#head.next -> + '$end_of_table'; +slot_objs(H, Slot) -> + {_Pos, Chain} = chain(H, Slot), + collect_chain(H, Chain). + +collect_chain(_H, 0) -> []; +collect_chain(H, Pos) -> + {Next, _Sz, Term} = prterm(H, Pos, ?ReadAhead), + [Term | collect_chain(H, Next)]. + +db_hash(Key, Head) -> + H = h(Key, Head#head.hash_bif), + Hash = H rem Head#head.m, + if + Hash < Head#head.n -> + H rem (Head#head.m2); % H rem (2 * m) + true -> + Hash + end. + +h(I, phash) -> erlang:phash(I, ?BIG) - 1; +h(I, HF) -> erlang:HF(I, ?BIG) - 1. %% stupid BIF has 1 counts. + +no_slots(_Head) -> + undefined. + +table_parameters(_Head) -> + undefined. + +%% Re-hashing a segment, starting with SlotStart. +%% +%% On the average, half of the objects of the chain are put into a new +%% chain. If the slot of the old chain is i, then the slot of the new +%% chain is i+m. +%% Note that the insertion of objects into the new chain is simplified +%% by the fact that the chains are not sorted on key, which means that +%% each moved object can be inserted first in the new chain. +%% (It is also a fact that the objects with the same key are not sorted.) +%% +%% -> {ok, Writes} | throw({Head, Error}) +re_hash(Head, SlotStart) -> + {SlotPos, _4} = slot_position(SlotStart), + {ok, Bin} = dets_utils:pread(Head, SlotPos, 4*?SEGSZ, 0), + {Read, Cs} = split_bin(SlotPos, Bin, [], []), + re_hash_read(Head, [], Read, Cs). + +split_bin(Pos, <>, R, Cs) -> + if + P =:= 0 -> + split_bin(Pos+4, B, R, Cs); + true -> + split_bin(Pos+4, B, [{P,?ReadAhead} | R], [[Pos] | Cs]) + end; +split_bin(_Pos, <<>>, R, Cs) -> + {R, Cs}. + +re_hash_read(Head, Cs, R, RCs) -> + {ok, Bins} = dets_utils:pread(R, Head), + re_hash_read(Head, R, RCs, Bins, Cs, [], []). + +re_hash_read(Head, [{Pos, Size} | Ps], [C | Cs], + [<> | Bins], + DoneCs, R, RCs) -> + case byte_size(Bin0) of + BinSz when BinSz >= Sz -> + case catch binary_to_term(Bin0) of + {'EXIT', _Error} -> + throw(dets_utils:corrupt_reason(Head, bad_object)); + Term -> + Key = element(Head#head.keypos, Term), + New = h(Key, Head#head.hash_bif) rem Head#head.m2, + NC = case New >= Head#head.m of + true -> [{Pos,New} | C]; + false -> [Pos | C] + end, + if + Next =:= 0 -> + NDoneCs = [NC | DoneCs], + re_hash_read(Head, Ps, Cs, Bins, NDoneCs, R, RCs); + true -> + NR = [{Next,?ReadAhead} | R], + NRCs = [NC | RCs], + re_hash_read(Head, Ps, Cs, Bins, DoneCs, NR, NRCs) + end + end; + BinSz when Size =:= BinSz+?OHDSZ -> + NR = [{Pos, Sz+?OHDSZ} | R], + re_hash_read(Head, Ps, Cs, Bins, DoneCs, NR, [C | RCs]); + _BinSz -> + throw({Head, {error, {premature_eof, Head#head.filename}}}) + end; +re_hash_read(Head, [], [], [], Cs, [], []) -> + re_hash_traverse_chains(Cs, Head, [], [], []); +re_hash_read(Head, [], [], [], Cs, R, RCs) -> + re_hash_read(Head, Cs, R, RCs). + +re_hash_traverse_chains([C | Cs], Head, Rs, Ns, Ws) -> + case re_hash_find_new(C, Rs, start, start) of + false -> + re_hash_traverse_chains(Cs, Head, Rs, Ns, Ws); + {NRs, FirstNew, LastNew} -> + LastInNew = case C of + [{_,_} | _] -> true; + _ -> false + end, + N = {FirstNew, LastNew, LastInNew}, + NWs = re_hash_link(C, start, start, start, Ws), + re_hash_traverse_chains(Cs, Head, NRs, [N | Ns], NWs) + end; +re_hash_traverse_chains([], Head, Rs, Ns, Ws) -> + {ok, Bins} = dets_utils:pread(Rs, Head), + {ok, insert_new(Rs, Bins, Ns, Ws)}. + +re_hash_find_new([{Pos,NewSlot} | C], R, start, start) -> + {SPos, _4} = slot_position(NewSlot), + re_hash_find_new(C, [{SPos,4} | R], Pos, Pos); +re_hash_find_new([{Pos,_SPos} | C], R, _FirstNew, LastNew) -> + re_hash_find_new(C, R, Pos, LastNew); +re_hash_find_new([_Pos | C], R, FirstNew, LastNew) -> + re_hash_find_new(C, R, FirstNew, LastNew); +re_hash_find_new([], _R, start, start) -> + false; +re_hash_find_new([], R, FirstNew, LastNew) -> + {R, FirstNew, LastNew}. + +re_hash_link([{Pos,_SPos} | C], LastOld, start, _LastInNew, Ws) -> + re_hash_link(C, LastOld, Pos, true, Ws); +re_hash_link([{Pos,_SPos} | C], LastOld, LastNew, false, Ws) -> + re_hash_link(C, LastOld, Pos, true, [{Pos,<>} | Ws]); +re_hash_link([{Pos,_SPos} | C], LastOld, _LastNew, LastInNew, Ws) -> + re_hash_link(C, LastOld, Pos, LastInNew, Ws); +re_hash_link([Pos | C], start, LastNew, true, Ws) -> + re_hash_link(C, Pos, LastNew, false, [{Pos,<<0:32>>} | Ws]); +re_hash_link([Pos | C], LastOld, LastNew, true, Ws) -> + re_hash_link(C, Pos, LastNew, false, [{Pos,<>} | Ws]); +re_hash_link([Pos | C], _LastOld, LastNew, LastInNew, Ws) -> + re_hash_link(C, Pos, LastNew, LastInNew, Ws); +re_hash_link([], _LastOld, _LastNew, _LastInNew, Ws) -> + Ws. + +insert_new([{NewSlotPos,_4} | Rs], [<> = PB | Bins], [N | Ns], Ws) -> + {FirstNew, LastNew, LastInNew} = N, + Ws1 = case P of + 0 when LastInNew -> + Ws; + 0 -> + [{LastNew, <<0:32>>} | Ws]; + _ -> + [{LastNew, PB} | Ws] + end, + NWs = [{NewSlotPos, <>} | Ws1], + insert_new(Rs, Bins, Ns, NWs); +insert_new([], [], [], Ws) -> + Ws. + +%% When writing the cache, a 'work list' is first created: +%% WorkList = [{Key, {Delete,Lookup,[Inserted]}}] +%% Delete = keep | delete +%% Lookup = skip | lookup +%% Inserted = {object(), No} +%% No = integer() +%% If No =< 0 then there will be -No instances of object() on the file +%% when the cache has been written. If No > 0 then No instances of +%% object() will be added to the file. +%% If Delete has the value 'delete', then all objects with the key Key +%% have been deleted. (This could be viewed as a shorthand for {Object,0} +%% for each object Object on the file not mentioned in some Inserted.) +%% If Lookup has the value 'lookup', all objects with the key Key will +%% be returned. +%% + +%% -> {NewHead, [LookedUpObject], pwrite_list()} | throw({NewHead, Error}) +write_cache(Head) -> + #head{cache = C, type = Type} = Head, + case dets_utils:is_empty_cache(C) of + true -> {Head, [], []}; + false -> + {NewC, _MaxInserts, PerKey} = dets_utils:reset_cache(C), + %% NoInsertedKeys is an upper limit on the number of new keys. + {WL, NoInsertedKeys} = make_wl(PerKey, Type), + Head1 = Head#head{cache = NewC}, + case may_grow(Head1, NoInsertedKeys, once) of + {Head2, ok} -> + eval_work_list(Head2, WL); + HeadError -> + throw(HeadError) + end + end. + +make_wl(PerKey, Type) -> + make_wl(PerKey, Type, [], 0). + +make_wl([{Key,L} | PerKey], Type, WL, Ins) -> + [Cs | I] = wl(L, Type), + make_wl(PerKey, Type, [{Key,Cs} | WL], Ins+I); +make_wl([], _Type, WL, Ins) -> + {WL, Ins}. + +wl(L, Type) -> + wl(L, Type, keep, skip, 0, []). + +wl([{_Seq, delete_key} | Cs], Type, _Del, Lookup, _I, _Objs) -> + wl(Cs, Type, delete, Lookup, 0, []); +wl([{_Seq, {delete_object, Object}} | Cs], Type, Del, Lookup, I, Objs) -> + NObjs = lists:keydelete(Object, 1, Objs), + wl(Cs, Type, Del, Lookup, I, [{Object,0} | NObjs]); +wl([{_Seq, {insert, Object}} | Cs], Type, _Del, Lookup, _I, _Objs) + when Type =:= set -> + wl(Cs, Type, delete, Lookup, 1, [{Object,-1}]); +wl([{_Seq, {insert, Object}} | Cs], Type, Del, Lookup, _I, Objs) -> + NObjs = + case lists:keysearch(Object, 1, Objs) of + {value, {_, 0}} -> + lists:keyreplace(Object, 1, Objs, {Object,-1}); + {value, {_, _C}} when Type =:= bag -> % C =:= 1; C =:= -1 + Objs; + {value, {_, C}} when C < 0 -> % when Type =:= duplicate_bag + lists:keyreplace(Object, 1, Objs, {Object,C-1}); + {value, {_, C}} -> % when C > 0, Type =:= duplicate_bag + lists:keyreplace(Object, 1, Objs, {Object,C+1}); + false when Del =:= delete -> + [{Object, -1} | Objs]; + false -> + [{Object, 1} | Objs] + end, + wl(Cs, Type, Del, Lookup, 1, NObjs); +wl([{_Seq, {lookup,_Pid}=Lookup} | Cs], Type, Del, _Lookup, I, Objs) -> + wl(Cs, Type, Del, Lookup, I, Objs); +wl([], _Type, Del, Lookup, I, Objs) -> + [{Del, Lookup, Objs} | I]. + +%% -> {NewHead, ok} | {NewHead, Error} +may_grow(Head, _N, _How) when Head#head.fixed =/= false -> + {Head, ok}; +may_grow(#head{access = read}=Head, _N, _How) -> + {Head, ok}; +may_grow(Head, _N, _How) when Head#head.next >= ?MAXOBJS -> + {Head, ok}; +may_grow(Head, N, How) -> + Extra = erlang:min(2*?SEGSZ, Head#head.no_objects + N - Head#head.next), + case catch may_grow1(Head, Extra, How) of + {error, Reason} -> % alloc may throw error + {Head, {error, Reason}}; + Reply -> + Reply + end. + +may_grow1(Head, Extra, many_times) when Extra > ?SEGSZ -> + Reply = grow(Head, 1, undefined), + self() ! ?DETS_CALL(self(), may_grow), + Reply; +may_grow1(Head, Extra, _How) -> + grow(Head, Extra, undefined). + +%% -> {Head, ok} | throw({Head, Error}) +grow(Head, Extra, _SegZero) when Extra =< 0 -> + {Head, ok}; +grow(Head, Extra, undefined) -> + grow(Head, Extra, seg_zero()); +grow(Head, Extra, SegZero) -> + #head{n = N, next = Next, m = M} = Head, + SegNum = ?SLOT2SEG(Next), + {Head0, Ws1} = allocate_segment(Head, SegZero, SegNum), + {Head1, ok} = dets_utils:pwrite(Head0, Ws1), + %% If re_hash fails, segp_cache has been called, but it does not matter. + {ok, Ws2} = re_hash(Head1, N), + {Head2, ok} = dets_utils:pwrite(Head1, Ws2), + NewHead = + if + N + ?SEGSZ =:= M -> + Head2#head{n = 0, next = Next + ?SEGSZ, m = 2 * M, m2 = 4 * M}; + true -> + Head2#head{n = N + ?SEGSZ, next = Next + ?SEGSZ} + end, + grow(NewHead, Extra - ?SEGSZ, SegZero). + +seg_zero() -> + <<0:(4*?SEGSZ)/unit:8>>. + +find_object(Head, Object) -> + Key = element(Head#head.keypos, Object), + Slot = db_hash(Key, Head), + find_object(Head, Object, Slot). + +find_object(H, _Obj, Slot) when Slot >= H#head.next -> + false; +find_object(H, Obj, Slot) -> + {_Pos, Chain} = chain(H, Slot), + case catch find_obj(H, Obj, Chain) of + {ok, Pos} -> + {ok, Pos}; + _Else -> + false + end. + +find_obj(H, Obj, Pos) when Pos > 0 -> + {Next, _Sz, Term} = prterm(H, Pos, ?ReadAhead), + if + Term == Obj -> + {ok, Pos}; + true -> + find_obj(H, Obj, Next) + end. + +%% Given, a slot, return the {Pos, Chain} in the file where the +%% objects hashed to this slot reside. Pos is the position in the +%% file where the chain pointer is written and Chain is the position +%% in the file where the first object resides. +chain(Head, Slot) -> + Pos = ?SEGADDR(?SLOT2SEG(Slot)), + Segment = get_segp(Pos), + FinalPos = Segment + (4 * ?REM2(Slot, ?SEGSZ)), + {ok, <>} = dets_utils:pread(Head, FinalPos, 4, 0), + {FinalPos, Chain}. + +%%% +%%% Cache routines depending on the dets file format. +%%% + +%% -> {Head, [LookedUpObject], pwrite_list()} | throw({Head, Error}) +eval_work_list(Head, WorkLists) -> + SWLs = tag_with_slot(WorkLists, Head, []), + P1 = dets_utils:family(SWLs), + {PerSlot, SlotPositions} = remove_slot_tag(P1, [], []), + {ok, Bins} = dets_utils:pread(SlotPositions, Head), + first_object(PerSlot, SlotPositions, Bins, Head, [], [], [], []). + +tag_with_slot([{K,_} = WL | WLs], Head, L) -> + tag_with_slot(WLs, Head, [{db_hash(K, Head), WL} | L]); +tag_with_slot([], _Head, L) -> + L. + +remove_slot_tag([{S,SWLs} | SSWLs], Ls, SPs) -> + remove_slot_tag(SSWLs, [SWLs | Ls], [slot_position(S) | SPs]); +remove_slot_tag([], Ls, SPs) -> + {Ls, SPs}. + +%% The initial chain pointers and the first object in each chain are +%% read "in parallel", that is, with one call to file:pread/2 (two +%% calls altogether). The following chain objects are read one by +%% one. This is a compromise: if the chains are long and threads are +%% active, it would be faster to keep a state for each chain and read +%% the objects of the chains in parallel, but the overhead would be +%% quite substantial. + +first_object([WorkLists | SPs], [{P1,_4} | Ss], [<> | Bs], Head, + ObjsToRead, ToRead, Ls, LU) when P2 =:= 0 -> + L0 = [{old,P1}], + {L, NLU} = eval_slot(Head, ?ReadAhead, P2, WorkLists, L0, LU), + first_object(SPs, Ss, Bs, Head, ObjsToRead, ToRead, [L | Ls], NLU); +first_object([WorkLists | SPs], [{P1,_4} | Ss], [<> | Bs], Head, + ObjsToRead, ToRead, Ls, LU) -> + E = {P1,P2,WorkLists}, + first_object(SPs, Ss, Bs, Head, + [E | ObjsToRead], [{P2, ?ReadAhead} | ToRead], Ls, LU); +first_object([], [], [], Head, ObjsToRead, ToRead, Ls, LU) -> + {ok, Bins} = dets_utils:pread(ToRead, Head), + case catch eval_first(Bins, ObjsToRead, Head, Ls, LU) of + {ok, NLs, NLU} -> + case create_writes(NLs, Head, [], 0) of + {Head1, [], 0} -> + {Head1, NLU, []}; + {Head1, Ws, No} -> + {NewHead, Ws2} = update_no_objects(Head1, Ws, No), + {NewHead, NLU, Ws2} + end; + _Error -> + throw(dets_utils:corrupt_reason(Head, bad_object)) + end. + +%% Update no_objects on the file too, if the number of segments that +%% dets:fsck/6 use for estimate has changed. +update_no_objects(Head, Ws, 0) -> {Head, Ws}; +update_no_objects(Head, Ws, Delta) -> + No = Head#head.no_objects, + NewNo = No + Delta, + NWs = + if + NewNo > ?MAXOBJS -> + Ws; + ?SLOT2SEG(No) =:= ?SLOT2SEG(NewNo) -> + Ws; + true -> + [{?NO_OBJECTS_POS, <>} | Ws] + end, + {Head#head{no_objects = NewNo}, NWs}. + +eval_first([<> | Bins], + [SP | SPs], Head, Ls, LU) -> + {P1, P2, WLs} = SP, + L0 = [{old,P1}], + case byte_size(Bin) of + BinSz when BinSz >= Sz -> + Term = binary_to_term(Bin), + Key = element(Head#head.keypos, Term), + {L, NLU} = find_key(Head, P2, Next, Sz, Term, Key, WLs, L0, LU), + eval_first(Bins, SPs, Head, [L | Ls], NLU); + _BinSz -> + {L, NLU} = eval_slot(Head, Sz+?OHDSZ, P2, WLs, L0, LU), + eval_first(Bins, SPs, Head, [L | Ls], NLU) + end; +eval_first([], [], _Head, Ls, LU) -> + {ok, Ls, LU}. + +eval_slot(_Head, _TrySize, _Pos=0, [], L, LU) -> + {L, LU}; +eval_slot(Head, _TrySize, Pos=0, [WL | WLs], L, LU) -> + {_Key, {_Delete, LookUp, Objects}} = WL, + {NL, NLU} = end_of_key(Objects, LookUp, L, []), + eval_slot(Head, ?ReadAhead, Pos, WLs, NL, NLU++LU); +eval_slot(Head, TrySize, Pos, WLs, L, LU) -> + {NextPos, Size, Term} = prterm(Head, Pos, TrySize), + Key = element(Head#head.keypos, Term), + find_key(Head, Pos, NextPos, Size, Term, Key, WLs, L, LU). + +find_key(Head, Pos, NextPos, Size, Term, Key, WLs, L, LU) -> + case lists:keysearch(Key, 1, WLs) of + {value, {_, {Delete, LookUp, Objects}} = WL} -> + NWLs = lists:delete(WL, WLs), + {NewObjects, NL, LUK} = eval_object(Size, Term, Delete, LookUp, + Objects, Head, Pos, L, []), + eval_key(Key, Delete, LookUp, NewObjects, Head, NextPos, + NWLs, NL, LU, LUK); + false -> + L0 = [{old,Pos} | L], + eval_slot(Head, ?ReadAhead, NextPos, WLs, L0, LU) + end. + +eval_key(_Key, _Delete, Lookup, _Objects, Head, Pos, WLs, L, LU, LUK) + when Head#head.type =:= set -> + NLU = case Lookup of + {lookup, Pid} -> [{Pid,LUK} | LU]; + skip -> LU + end, + eval_slot(Head, ?ReadAhead, Pos, WLs, L, NLU); +eval_key(_Key, _Delete, LookUp, Objects, Head, Pos, WLs, L, LU, LUK) + when Pos =:= 0 -> + {NL, NLU} = end_of_key(Objects, LookUp, L, LUK), + eval_slot(Head, ?ReadAhead, Pos, WLs, NL, NLU++LU); +eval_key(Key, Delete, LookUp, Objects, Head, Pos, WLs, L, LU, LUK) -> + {NextPos, Size, Term} = prterm(Head, Pos, ?ReadAhead), + case element(Head#head.keypos, Term) of + Key -> + {NewObjects, NL, LUK1} = + eval_object(Size, Term, Delete, LookUp,Objects,Head,Pos,L,LUK), + eval_key(Key, Delete, LookUp, NewObjects, Head, NextPos, WLs, + NL, LU, LUK1); + Key2 -> + {L1, NLU} = end_of_key(Objects, LookUp, L, LUK), + find_key(Head, Pos, NextPos, Size, Term, Key2, WLs, L1, NLU++LU) + end. + +%% All objects in Objects have the key Key. +eval_object(Size, Term, Delete, LookUp, Objects, Head, Pos, L, LU) -> + Type = Head#head.type, + case lists:keysearch(Term, 1, Objects) of + {value, {_Object, N}} when N =:= 0 -> + L1 = [{delete,Pos,Size} | L], + {Objects, L1, LU}; + {value, {_Object, N}} when N < 0, Type =:= set -> + L1 = [{old,Pos} | L], + wl_lookup(LookUp, Objects, Term, L1, LU); + {value, {Object, _N}} when Type =:= bag -> % when N =:= 1; N =:= -1 + L1 = [{old,Pos} | L], + Objects1 = lists:keydelete(Object, 1, Objects), + wl_lookup(LookUp, Objects1, Term, L1, LU); + {value, {Object, N}} when N < 0, Type =:= duplicate_bag -> + L1 = [{old,Pos} | L], + Objects1 = lists:keyreplace(Object, 1, Objects, {Object,N+1}), + wl_lookup(LookUp, Objects1, Term, L1, LU); + {value, {_Object, N}} when N > 0, Type =:= duplicate_bag -> + L1 = [{old,Pos} | L], + wl_lookup(LookUp, Objects, Term, L1, LU); + false when Type =:= set, Delete =:= delete -> + case lists:keysearch(-1, 2, Objects) of + false -> % no inserted object, perhaps deleted objects + L1 = [{delete,Pos,Size} | L], + {[], L1, LU}; + {value, {Term2,-1}} -> + Bin2 = term_to_binary(Term2), + NSize = byte_size(Bin2), + Overwrite = + if + NSize =:= Size -> + true; + true -> + SizePos = sz2pos(Size+?OHDSZ), + NSizePos = sz2pos(NSize+?OHDSZ), + SizePos =:= NSizePos + end, + E = if + Overwrite -> + {overwrite,Bin2,Pos}; + true -> + {replace,Bin2,Pos,Size} + end, + wl_lookup(LookUp, [], Term2, [E | L], LU) + end; + false when Delete =:= delete -> + L1 = [{delete,Pos,Size} | L], + {Objects, L1, LU}; + false -> + L1 = [{old,Pos} | L], + wl_lookup(LookUp, Objects, Term, L1, LU) + end. + +%% Inlined. +wl_lookup({lookup,_}, Objects, Term, L, LU) -> + {Objects, L, [Term | LU]}; +wl_lookup(skip, Objects, _Term, L, LU) -> + {Objects, L, LU}. + +end_of_key([{Object,N0} | Objs], LookUp, L, LU) when N0 =/= 0 -> + N = abs(N0), + NL = [{insert,N,term_to_binary(Object)} | L], + NLU = case LookUp of + {lookup, _} -> + lists:duplicate(N, Object) ++ LU; + skip -> + LU + end, + end_of_key(Objs, LookUp, NL, NLU); +end_of_key([_ | Objects], LookUp, L, LU) -> + end_of_key(Objects, LookUp, L, LU); +end_of_key([], {lookup,Pid}, L, LU) -> + {L, [{Pid,LU}]}; +end_of_key([], skip, L, LU) -> + {L, LU}. + +create_writes([L | Ls], H, Ws, No) -> + {NH, NWs, NNo} = create_writes(L, H, Ws, No, 0, true), + create_writes(Ls, NH, NWs, NNo); +create_writes([], H, Ws, No) -> + {H, lists:reverse(Ws), No}. + +create_writes([{old,Pos} | L], H, Ws, No, _Next, true) -> + create_writes(L, H, Ws, No, Pos, true); +create_writes([{old,Pos} | L], H, Ws, No, Next, false) -> + W = {Pos, <>}, + create_writes(L, H, [W | Ws], No, Pos, true); +create_writes([{insert,N,Bin} | L], H, Ws, No, Next, _NextIsOld) -> + {NH, NWs, Pos} = create_inserts(N, H, Ws, Next, byte_size(Bin), Bin), + create_writes(L, NH, NWs, No+N, Pos, false); +create_writes([{overwrite,Bin,Pos} | L], H, Ws, No, Next, _) -> + Size = byte_size(Bin), + W = {Pos, [<>, Bin]}, + create_writes(L, H, [W | Ws], No, Pos, true); +create_writes([{replace,Bin,Pos,OSize} | L], H, Ws, No, Next, _) -> + Size = byte_size(Bin), + {H1, _} = dets_utils:free(H, Pos, OSize+?OHDSZ), + {NH, NewPos, _} = dets_utils:alloc(H1, ?OHDSZ + Size), + W1 = {NewPos, [<>, Bin]}, + NWs = if + Pos =:= NewPos -> + [W1 | Ws]; + true -> + W2 = {Pos+?STATUS_POS, <>}, + [W1,W2 | Ws] + end, + create_writes(L, NH, NWs, No, NewPos, false); +create_writes([{delete,Pos,Size} | L], H, Ws, No, Next, _) -> + {NH, _} = dets_utils:free(H, Pos, Size+?OHDSZ), + NWs = [{Pos+?STATUS_POS,<>} | Ws], + create_writes(L, NH, NWs, No-1, Next, false); +create_writes([], H, Ws, No, _Next, _NextIsOld) -> + {H, Ws, No}. + +create_inserts(0, H, Ws, Next, _Size, _Bin) -> + {H, Ws, Next}; +create_inserts(N, H, Ws, Next, Size, Bin) -> + {NH, Pos, _} = dets_utils:alloc(H, ?OHDSZ + Size), + W = {Pos, [<>, Bin]}, + create_inserts(N-1, NH, [W | Ws], Pos, Size, Bin). + +slot_position(S) -> + Pos = ?SEGADDR(?SLOT2SEG(S)), + Segment = get_segp(Pos), + FinalPos = Segment + (4 * ?REM2(S, ?SEGSZ)), + {FinalPos, 4}. + +%% Twice the size of a segment due to the bug in sz2pos/1. Inlined. +actual_seg_size() -> + ?POW(sz2pos(?SEGSZ*4)-1). + +segp_cache(Pos, Segment) -> + put(Pos, Segment). + +%% Inlined. +get_segp(Pos) -> + get(Pos). + +%% Bug: If Sz0 is equal to 2**k for some k, then 2**(k+1) bytes are +%% allocated (wasting 2**k bytes). +sz2pos(N) -> + 1 + dets_utils:log2(N+1). + +scan_objs(_Head, Bin, From, To, L, Ts, R, _Type) -> + scan_objs(Bin, From, To, L, Ts, R). + +scan_objs(Bin, From, To, L, Ts, -1) -> + {stop, Bin, From, To, L, Ts}; +scan_objs(B = <<_N:32, Sz:32, St:32, T/binary>>, From, To, L, Ts, R) -> + if + St =:= ?ACTIVE; + St =:= ?FREE -> % deleted after scanning started + case T of + <> -> + NTs = [BinTerm | Ts], + OSz = Sz + ?OHDSZ, + Skip = ?POW(sz2pos(OSz)-1) - OSz, + F2 = From + OSz, + NR = if + R < 0 -> + R + 1; + true -> + R + OSz + Skip + end, + scan_skip(T2, F2, To, Skip, L, NTs, NR); + _ -> + {more, From, To, L, Ts, R, Sz+?OHDSZ} + end; + true -> % a segment + scan_skip(B, From, To, actual_seg_size(), L, Ts, R) + end; +scan_objs(_B, From, To, L, Ts, R) -> + {more, From, To, L, Ts, R, 0}. + +scan_skip(Bin, From, To, Skip, L, Ts, R) when From + Skip < To -> + SkipPos = From + Skip, + case Bin of + <<_:Skip/binary, Tail/binary>> -> + scan_objs(Tail, SkipPos, To, L, Ts, R); + _ -> + {more, SkipPos, To, L, Ts, R, 0} + end; +scan_skip(Bin, From, To, Skip, L, Ts, R) when From + Skip =:= To -> + scan_next_allocated(Bin, From, To, L, Ts, R); +scan_skip(_Bin, From, _To, Skip, L, Ts, R) -> % when From + Skip > _To + From1 = From + Skip, + {more, From1, From1, L, Ts, R, 0}. + +scan_next_allocated(_Bin, _From, To, <<>>=L, Ts, R) -> + {more, To, To, L, Ts, R, 0}; +scan_next_allocated(Bin, From0, _To, <>, Ts, R) -> + Skip = From - From0, + scan_skip(Bin, From0, To, Skip, L, Ts, R). + +%% Read term from file at position Pos +prterm(Head, Pos, ReadAhead) -> + Res = dets_utils:pread(Head, Pos, ?OHDSZ, ReadAhead), + ?DEBUGF("file:pread(~p, ~p, ?) -> ~p~n", [Head#head.filename, Pos, Res]), + {ok, <>} = Res, + ?DEBUGF("{Next, Sz} = ~p~n", [{Next, Sz}]), + Bin = case byte_size(Bin0) of + Actual when Actual >= Sz -> + Bin0; + _ -> + {ok, Bin1} = dets_utils:pread(Head, Pos + ?OHDSZ, Sz, 0), + Bin1 + end, + Term = binary_to_term(Bin), + {Next, Sz, Term}. + +%%%%%%%%%%%%%%%%% DEBUG functions %%%%%%%%%%%%%%%% + +file_info(FH) -> + #fileheader{closed_properly = CP, keypos = Kp, + m = M, next = Next, n = N, version = Version, + type = Type, no_objects = NoObjects} + = FH, + if + CP =:= 0 -> + {error, not_closed}; + FH#fileheader.cookie =/= ?MAGIC -> + {error, not_a_dets_file}; + FH#fileheader.version =/= ?FILE_FORMAT_VERSION -> + {error, bad_version}; + true -> + {ok, [{closed_properly,CP},{keypos,Kp},{m, M}, + {n,N},{next,Next},{no_objects,NoObjects}, + {type,Type},{version,Version}]} + end. + +v_segments(H) -> + v_segments(H, 0). + +v_segments(_H, ?SEGARRSZ) -> + done; +v_segments(H, SegNo) -> + Seg = dets_utils:read_4(H#head.fptr, ?SEGADDR(SegNo)), + if + Seg =:= 0 -> + done; + true -> + io:format("SEGMENT ~w ", [SegNo]), + io:format("At position ~w~n", [Seg]), + v_segment(H, SegNo, Seg, 0), + v_segments(H, SegNo+1) + end. + +v_segment(_H, _, _SegPos, ?SEGSZ) -> + done; +v_segment(H, SegNo, SegPos, SegSlot) -> + Slot = SegSlot + (SegNo * ?SEGSZ), + Chain = dets_utils:read_4(H#head.fptr, SegPos + (4 * SegSlot)), + if + Chain =:= 0 -> %% don't print empty chains + true; + true -> + io:format(" <~p>~p: [",[SegPos + (4 * SegSlot), Slot]), + print_chain(H, Chain) + end, + v_segment(H, SegNo, SegPos, SegSlot+1). + +print_chain(_H, 0) -> + io:format("] \n", []); +print_chain(H, Pos) -> + {ok, _} = file:position(H#head.fptr, Pos), + case rterm(H#head.fptr) of + {ok, 0, _Sz, Term} -> + io:format("<~p>~p] \n",[Pos, Term]); + {ok, Next, _Sz, Term} -> + io:format("<~p>~p, ", [Pos, Term]), + print_chain(H, Next); + Other -> + io:format("~nERROR ~p~n", [Other]) + end. + +%% Can't be used at the bucket level!!!! +%% Only when we go down a chain +rterm(F) -> + case catch rterm2(F) of + {'EXIT', Reason} -> %% truncated DAT file + dets_utils:vformat("** dets: Corrupt or truncated dets file~n", + []), + {error, Reason}; + Other -> + Other + end. + +rterm2(F) -> + {ok, <>} = file:read(F, ?OHDSZ), + {ok, Bin} = file:read(F, Sz), + Term = binary_to_term(Bin), + {ok, Next, Sz, Term}. + + diff --git a/lib/stdlib/src/dets_v9.erl b/lib/stdlib/src/dets_v9.erl new file mode 100644 index 0000000000..53238e962f --- /dev/null +++ b/lib/stdlib/src/dets_v9.erl @@ -0,0 +1,2761 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(dets_v9). + +%% Dets files, implementation part. This module handles version 9. +%% To be called from dets.erl only. + +-export([constants/0, mark_dirty/1, read_file_header/2, + check_file_header/2, do_perform_save/1, initiate_file/11, + prep_table_copy/9, init_freelist/2, fsck_input/4, + bulk_input/3, output_objs/4, bchunk_init/2, + try_bchunk_header/2, compact_init/3, read_bchunks/2, + write_cache/1, may_grow/3, find_object/2, slot_objs/2, + scan_objs/8, db_hash/2, no_slots/1, table_parameters/1]). + +-export([file_info/1, v_segments/1]). + +-export([cache_segps/3]). + +-compile({inline, [{max_objsize,1},{maxobjsize,1}]}). +-compile({inline, [{write_segment_file,6}]}). +-compile({inline, [{sz2pos,1},{adjsz,1}]}). +-compile({inline, [{skip_bytes,6},{make_object,4}]}). +-compile({inline, [{segp_cache,2},{get_segp,1},{get_arrpart,1}]}). +-compile({inline, [{h,2}]}). + +-include("dets.hrl"). + +%% The layout of the file is : +%% +%% bytes decsription +%% ---------------------- File header +%% 4 FreelistsPointer +%% 4 Cookie +%% 4 ClosedProperly (pos=8) +%% 4 Type (pos=12) +%% 4 Version (pos=16) +%% 4 M +%% 4 Next +%% 4 KeyPos +%% 4 NoObjects +%% 4 NoKeys +%% 4 MinNoSlots +%% 4 MaxNoSlots +%% 4 HashMethod +%% 4 N +%% --- +%% 256 Version 9(a): Reserved for future versions. Initially zeros. +%% Version 9(b) has instead: +%% 112 28 counters for the buddy system sizes 2^4 to 2^31. +%% 144 Reserved for future versions. Initially zeros. +%% Version 9(c) has instead: +%% 112 28 counters for the buddy system sizes (as for 9(b)). +%% 16 MD5-sum for the 44 plus 112 bytes before the MD5-sum. +%% (FreelistsPointer, Cookie and ClosedProperly are not digested.) +%% 128 Reserved for future versions. Initially zeros. +%% --- +%% ------------------ end of file header +%% 4*256 SegmentArray Pointers. +%% ------------------ This is BASE. +%% 4*512 SegmentArray Part 1 +%% ... More SegmentArray Parts +%% 8*256 First segment +%% ??? Objects (free and alive) +%% 4*512 Further SegmentArray Part. +%% ??? Objects (free and alive) +%% 8*256 Further segment. +%% ??? Objects (free and alive) +%% ... more objects, segment array parts, and segments ... +%% ----------------------------- +%% ??? Free lists +%% ----------------------------- +%% 4 File size, in bytes. + +%% Before we can find an object we must find the slot where the +%% object resides. Each slot is a (possibly empty) list (or chain) of +%% objects that hash to the same slot. If the value stored in the +%% slot is zero, the slot chain is empty. If the slot value is +%% non-zero, the value points to a position in the file where the +%% collection of objects resides. Each collection has the following +%% layout: +%% +%% bytes decsription +%% -------------------- +%% 4 Size of the area allocated for the collection (8+Sz) +%% 4 Status (FREE or ACTIVE). These two are the Object Header. +%% Sz A binary containing the objects per key, sorted on key. +%% +%% When repairing or converting a file, the status field is used. +%% +%% The binary containing the objects per key of a table of type 'set' +%% has the following layout: +%% +%% bytes decsription +%% -------------------- +%% 4 Size of the object of the first key (4+OSz1) +%% OSz1 The object of the first key +%% ... +%% 4 Size of the object of the ith key (4+OSzi) +%% OSzi The object of the ith key +%% +%% The binary containing the objects per key of a table of type 'bag' +%% or 'duplicate_bag' has the following layout: +%% +%% bytes decsription +%% ---------------------- +%% 4 Size of the objects of the first key (4 + OSz1_1+...+OSz1_j+...) +%% 4 Size of the first object of the first key (4+OSz1_1) +%% OSz1_1 The first object of the first key +%% ... +%% 4 Size of the jth object of the first key (4+OSz1_j) +%% OSz1_j The jth object of the first key +%% ... +%% 4 Size of the objects of the ith key (4 + OSzi_1+...+OSzi_k+...) +%% 4 Size of the first object of the ith key (4+OSzi_1) +%% OSzi_1 The first object of the ith key +%% ... +%% 4 Size of the kth object of the ith key (4+OSzi_k) +%% OSzi_k The kth object of the ith key +%% ... +%% +%% The objects of a key are placed in time order, that is, the older +%% objects come first. If a new object is inserted, it is inserted +%% last. +%% +%% +%% +%%|---------------| +%%| head | +%%| | +%%| | +%%|_______________| +%%| |--| +%%|___part ptr 1__| | +%%| | | segarr part 1 +%%|___part ptr 2__| V______________| +%%| | | p1 | +%%| | |______________|--| +%%| .... | | p2 | | +%% (256) |______________| | +%% | | | +%% | .... | | segment 1 +%% | (512) | V __slot 0 ____| +%% | size | +%% | pointer |--| +%% |___slot 1 ____| | +%% | | | +%% | .... | | objects in slot 0 +%% (256) V segment 1 +%% |___________| +%% | size | +%% |___________| +%% | status | +%% |___________| +%% | | +%% | object | +%% | collec. | +%% |___________| + +%%% +%%% File header +%%% + +-define(RESERVED, 128). % Reserved for future use. + +-define(COLL_CNTRS, (28*4)). % Counters for the buddy system. +-define(MD5SZ, 16). + +-define(HEADSZ, + 56+?COLL_CNTRS+?MD5SZ). % The size of the file header, in bytes, + % not including the reserved part. +-define(HEADEND, (?HEADSZ+?RESERVED)). + % End of header and reserved area. +-define(SEGSZ, 512). % Size of a segment, in words. SZOBJP*SEGSZP. +-define(SEGSZP, 256). % Size of a segment, in number of pointers. +-define(SEGSZP_LOG2, 8). +-define(SEGOBJSZ, (4 * ?SZOBJP)). +-define(SEGPARTSZ, 512). % Size of segment array part, in words. +-define(SEGPARTSZ_LOG2, 9). +-define(SEGARRSZ, 256). % Maximal number of segment array parts.. +-define(SEGARRADDR(PartN), (?HEADEND + (4 * (PartN)))). +-define(SEGPARTADDR(P,SegN), ((P) + (4 * ?REM2(SegN, ?SEGPARTSZ)))). +-define(BASE, ?SEGARRADDR(?SEGARRSZ)). +-define(MAXSLOTS, (?SEGARRSZ * ?SEGPARTSZ * ?SEGSZP)). + +-define(SLOT2SEG(S), ((S) bsr ?SEGSZP_LOG2)). +-define(SEG2SEGARRPART(S), ((S) bsr ?SEGPARTSZ_LOG2)). + +-define(PHASH, 0). +-define(PHASH2, 1). + +%% BIG is used for hashing. BIG must be greater than the maximum +%% number of slots, currently 32 M (MAXSLOTS). +-define(BIG, 16#3ffffff). % 64 M + +%% Hard coded positions into the file header: +-define(FREELIST_POS, 0). +-define(CLOSED_PROPERLY_POS, 8). +-define(D_POS, 20). + +%%% Dets file versions up to 8 are handled in dets_v8. This module +%%% handles version 9, introduced in R8. +%%% +%%% Version 9(a) tables have 256 reserved bytes in the file header, +%%% all initialized to zero. +%%% Version 9(b) tables use the first 112 of these bytes for storing +%%% number of objects for each size of the buddy system. An empty 9(b) +%%% table cannot be distinguished from an empty 9(a) table. +%%% 9(c) has an MD5-sum for the file header. + +-define(FILE_FORMAT_VERSION, 9). + +-define(NOT_PROPERLY_CLOSED,0). +-define(CLOSED_PROPERLY,1). + +%% Size of object pointer, in words. SEGSZ = SZOBJP * SEGSZP. +-define(SZOBJP, 2). + +-define(OHDSZ, 8). % The size of the object header, in bytes. +-define(STATUS_POS, 4). % Position of the status field. + +-define(OHDSZ_v8, 12). % The size of the version 8 object header. + +%% The size of each object is a multiple of 16. +%% BUMP is used when repairing files. +-define(BUMP, 16). + +%%% '$hash' is the value of HASH_PARMS in R8, '$hash2' is the value in R9. +%%% +%%% The fields of the ?HASH_PARMS records are the same, but having +%%% different tags makes bchunk_init on R8 nodes reject data from R9 +%%% nodes, and vice versa. This is overkill, and due to an oversight. +%%% What should have been done in R8 was to check the hash method, not +%%% only the type of the table and the key position. R8 nodes cannot +%%% handle the phash2 method. +-define(HASH_PARMS, '$hash2'). + +-define(BCHUNK_FORMAT_VERSION, 1). + +-record(?HASH_PARMS, { + file_format_version, + bchunk_format_version, + file, type, keypos, hash_method, + n,m,next, + min,max, + no_objects,no_keys, + no_colls % [{LogSz,NoColls}], NoColls >= 0 + }). + +-define(ACTUAL_SEG_SIZE, (?SEGSZ*4)). + +-define(MAXBUD, 32). + +%%-define(DEBUGF(X,Y), io:format(X, Y)). +-define(DEBUGF(X,Y), void). + +%% {Bump} +constants() -> + {?BUMP, ?BASE}. + +%% -> ok | throw({NewHead,Error}) +mark_dirty(Head) -> + Dirty = [{?CLOSED_PROPERLY_POS, <>}], + dets_utils:pwrite(Head, Dirty), + dets_utils:sync(Head), + dets_utils:position(Head, Head#head.freelists_p), + dets_utils:truncate(Head, cur). + +%% -> {ok, head()} | throw(Error) | throw(badarg) +prep_table_copy(Fd, Tab, Fname, Type, Kp, Ram, CacheSz, Auto, Parms) -> + case Parms of + #?HASH_PARMS{file_format_version = ?FILE_FORMAT_VERSION, + bchunk_format_version = ?BCHUNK_FORMAT_VERSION, + n = N, m = M, next = Next, + min = Min, max = Max, + hash_method = HashMethodCode, + no_objects = NoObjects, no_keys = NoKeys, + no_colls = _NoColls} + when is_integer(N), is_integer(M), is_integer(Next), + is_integer(Min), is_integer(Max), + is_integer(NoObjects), is_integer(NoKeys), + NoObjects >= NoKeys -> + HashMethod = code_to_hash_method(HashMethodCode), + case hash_invars(N, M, Next, Min, Max) of + false -> + throw(badarg); + true -> + init_file(Fd, Tab, Fname, Type, Kp, Min, Max, Ram, + CacheSz, Auto, false, M, N, Next, HashMethod, + NoObjects, NoKeys) + end; + _ -> + throw(badarg) + end. + +%% -> {ok, head()} | throw(Error) +%% The File header and the SegmentArray Pointers are written here. +%% SegmentArray Parts are also written, but the segments are are not +%% initialized on file unless DoInitSegments is 'true'. (When +%% initializing a file by calling init_table, some time is saved by +%% not writing the segments twice.) +initiate_file(Fd, Tab, Fname, Type, Kp, MinSlots0, MaxSlots0, + Ram, CacheSz, Auto, DoInitSegments) -> + MaxSlots1 = erlang:min(MaxSlots0, ?MAXSLOTS), + MinSlots1 = erlang:min(MinSlots0, MaxSlots1), + MinSlots = slots2(MinSlots1), + MaxSlots = slots2(MaxSlots1), + M = Next = MinSlots, + N = 0, + init_file(Fd, Tab, Fname, Type, Kp, MinSlots, MaxSlots, Ram, CacheSz, + Auto, DoInitSegments, M, N, Next, phash2, 0, 0). + +init_file(Fd, Tab, Fname, Type, Kp, MinSlots, MaxSlots, Ram, CacheSz, + Auto, DoInitSegments, M, N, Next, HashMethod, NoObjects, NoKeys) -> + Ftab = dets_utils:init_alloc(?BASE), + + Head0 = #head{ + m = M, + m2 = M * 2, + next = Next, + fptr = Fd, + no_objects = NoObjects, + no_keys = NoKeys, + maxobjsize = 0, + n = N, + type = Type, + update_mode = dirty, + freelists = Ftab, + no_collections = orddict:new(), + auto_save = Auto, + hash_bif = HashMethod, + has_md5 = true, + keypos = Kp, + min_no_slots = MinSlots, + max_no_slots = MaxSlots, + + ram_file = Ram, + filename = Fname, + name = Tab, + cache = dets_utils:new_cache(CacheSz), + version = ?FILE_FORMAT_VERSION, + bump = ?BUMP, + base = ?BASE, + mod = ?MODULE + }, + + FreeListsPointer = 0, + NoColls = <<0:?COLL_CNTRS/unit:8>>, %% Buddy system counters. + FileHeader = file_header(Head0, FreeListsPointer, + ?NOT_PROPERLY_CLOSED, NoColls), + W0 = {0, [FileHeader | + <<0:(4*?SEGARRSZ)/unit:8>>]}, %% SegmentArray Pointers + + %% Remove cached pointers to segment array parts and segments: + lists:foreach(fun({I1,I2}) when is_integer(I1), is_integer(I2) -> ok; + ({K,V}) -> put(K, V) + end, erase()), + + %% Initialize array parts. + %% All parts before segments, for the sake of repair and initialization. + Zero = seg_zero(), + {Head1, Ws1} = init_parts(Head0, 0, no_parts(Next), Zero, []), + NoSegs = no_segs(Next), + + {Head, WsI, WsP} = init_segments(Head1, 0, NoSegs, Zero, [], []), + Ws2 = if + DoInitSegments -> WsP ++ WsI; + true -> WsP + end, + dets_utils:pwrite(Fd, Fname, [W0 | lists:append(Ws1) ++ Ws2]), + true = hash_invars(Head), + {ok, Head}. + +%% Returns a power of two not less than 256. +slots2(NoSlots) when NoSlots >= 256 -> + ?POW(dets_utils:log2(NoSlots)). + +init_parts(Head, PartNo, NoParts, Zero, Ws) when PartNo < NoParts -> + PartPos = ?SEGARRADDR(PartNo), + {NewHead, W, _Part} = alloc_part(Head, Zero, PartPos), + init_parts(NewHead, PartNo+1, NoParts, Zero, [W | Ws]); +init_parts(Head, _PartNo, _NoParts, _Zero, Ws) -> + {Head, Ws}. + +%% -> {Head, SegInitList, OtherList}; +%% SegPtrList = SegInitList = pwrite_list(). +init_segments(Head, SegNo, NoSegs, SegZero, WsP, WsI) when SegNo < NoSegs -> + {NewHead, WI, Ws} = allocate_segment(Head, SegZero, SegNo), + init_segments(NewHead, SegNo+1, NoSegs, SegZero, Ws ++ WsP, [WI | WsI]); +init_segments(Head, _SegNo, _NoSegs, _SegZero, WsP, WsI) -> + {Head, WsI, WsP}. + +%% -> {NewHead, SegInit, [SegPtr | PartStuff]} +allocate_segment(Head, SegZero, SegNo) -> + PartPos = ?SEGARRADDR(SegNo div ?SEGPARTSZ), + case get_arrpart(PartPos) of + undefined -> + %% may throw error: + {Head1, [InitArrPart, ArrPartPointer], Part} = + alloc_part(Head, SegZero, PartPos), + {NewHead, InitSegment, [SegPointer]} = + alloc_seg(Head1, SegZero, SegNo, Part), + {NewHead, InitSegment, [InitArrPart, SegPointer, ArrPartPointer]}; + Part -> + alloc_seg(Head, SegZero, SegNo, Part) + end. + +alloc_part(Head, PartZero, PartPos) -> + %% may throw error: + {NewHead, Part, _} = dets_utils:alloc(Head, adjsz(4 * ?SEGPARTSZ)), + arrpart_cache(PartPos, Part), + InitArrPart = {Part, PartZero}, % same size as segment + ArrPartPointer = {PartPos, <>}, + {NewHead, [InitArrPart, ArrPartPointer], Part}. + +alloc_seg(Head, SegZero, SegNo, Part) -> + %% may throw error: + {NewHead, Segment, _} = dets_utils:alloc(Head, adjsz(4 * ?SEGSZ)), + InitSegment = {Segment, SegZero}, + Pos = ?SEGPARTADDR(Part, SegNo), + segp_cache(Pos, Segment), + dets_utils:disk_map_segment(Segment, SegZero), + SegPointer = {Pos, <>}, + {NewHead, InitSegment, [SegPointer]}. + +%% Read free lists (using a Buddy System) from file. +init_freelist(Head, true) -> + Pos = Head#head.freelists_p, + free_lists_from_file(Head, Pos). + +%% -> {ok, Fd, fileheader()} | throw(Error) +read_file_header(Fd, FileName) -> + {ok, Bin} = dets_utils:pread_close(Fd, FileName, 0, ?HEADSZ), + <> = Bin, + <<_:12/binary,MD5DigestedPart:(?HEADSZ-?MD5SZ-12)/binary,_/binary>> = Bin, + {ok, EOF} = dets_utils:position_close(Fd, FileName, eof), + {ok, <>} = dets_utils:pread_close(Fd, FileName, EOF-4, 4), + {CL, <<>>} = lists:foldl(fun(LSz, {Acc,<>}) -> + if + NN =:= 0 -> {Acc, R}; + true -> {[{LSz,NN} | Acc], R} + end + end, {[], NoCollsB}, lists:seq(4, ?MAXBUD-1)), + NoColls = + if + CL =:= [], NoObjects > 0 -> % Version 9(a) + undefined; + true -> + lists:reverse(CL) + end, + + FH = #fileheader{freelist = FreeList, + cookie = Cookie, + closed_properly = CP, + type = dets_utils:code_to_type(Type2), + version = Version, + m = M, + next = Next, + keypos = Kp, + no_objects = NoObjects, + no_keys = NoKeys, + min_no_slots = MinNoSlots, + max_no_slots = MaxNoSlots, + no_colls = NoColls, + hash_method = HashMethod, + read_md5 = MD5, + has_md5 = <<0:?MD5SZ/unit:8>> =/= MD5, + md5 = erlang:md5(MD5DigestedPart), + trailer = FileSize, + eof = EOF, + n = N, + mod = ?MODULE}, + {ok, Fd, FH}. + +%% -> {ok, head(), ExtraInfo} | {error, Reason} (Reason lacking file name) +%% ExtraInfo = true +check_file_header(FH, Fd) -> + HashBif = code_to_hash_method(FH#fileheader.hash_method), + Test = + if + FH#fileheader.cookie =/= ?MAGIC -> + {error, not_a_dets_file}; + FH#fileheader.type =:= badtype -> + {error, invalid_type_code}; + FH#fileheader.version =/= ?FILE_FORMAT_VERSION -> + {error, bad_version}; + FH#fileheader.has_md5, + FH#fileheader.read_md5 =/= FH#fileheader.md5 -> + {error, not_a_dets_file}; % harsh but fair + FH#fileheader.trailer =/= FH#fileheader.eof -> + {error, not_closed}; + HashBif =:= undefined -> + {error, bad_hash_bif}; + FH#fileheader.closed_properly =:= ?CLOSED_PROPERLY -> + {ok, true}; + FH#fileheader.closed_properly =:= ?NOT_PROPERLY_CLOSED -> + {error, not_closed}; + true -> + {error, not_a_dets_file} + end, + case Test of + {ok, ExtraInfo} -> + MaxObjSize = max_objsize(FH#fileheader.no_colls), + H = #head{ + m = FH#fileheader.m, + m2 = FH#fileheader.m * 2, + next = FH#fileheader.next, + fptr = Fd, + no_objects = FH#fileheader.no_objects, + no_keys = FH#fileheader.no_keys, + maxobjsize = MaxObjSize, + n = FH#fileheader.n, + type = FH#fileheader.type, + update_mode = saved, + auto_save = infinity, % not saved on file + fixed = false, % not saved on file + freelists_p = FH#fileheader.freelist, + hash_bif = HashBif, + has_md5 = FH#fileheader.has_md5, + keypos = FH#fileheader.keypos, + min_no_slots = FH#fileheader.min_no_slots, + max_no_slots = FH#fileheader.max_no_slots, + no_collections = FH#fileheader.no_colls, + version = ?FILE_FORMAT_VERSION, + mod = ?MODULE, + bump = ?BUMP, + base = ?BASE}, + {ok, H, ExtraInfo}; + Error -> + Error + end. + +%% Inlined. +max_objsize(NoColls = undefined) -> + NoColls; +max_objsize(NoColls) -> + max_objsize(NoColls, 0). + +max_objsize([], Max) -> + Max; +max_objsize([{_,0} | L], Max) -> + max_objsize(L, Max); +max_objsize([{I,_} | L], _Max) -> + max_objsize(L, I). + +cache_segps(Fd, FileName, M) -> + NoParts = no_parts(M), + ArrStart = ?SEGARRADDR(0), + {ok, Bin} = dets_utils:pread_close(Fd, FileName, ArrStart, 4 * NoParts), + cache_arrparts(Bin, ?HEADEND, Fd, FileName). + +cache_arrparts(<>, Pos, Fd, FileName) -> + arrpart_cache(Pos, ArrPartPos), + {ok, ArrPartBin} = dets_utils:pread_close(Fd, FileName, + ArrPartPos, + ?SEGPARTSZ*4), + cache_segps1(Fd, ArrPartBin, ArrPartPos), + cache_arrparts(B, Pos+4, Fd, FileName); +cache_arrparts(<<>>, _Pos, _Fd, _FileName) -> + ok. + +cache_segps1(_Fd, <<0:32,_/binary>>, _P) -> + ok; +cache_segps1(Fd, <>, P) -> + dets_utils:disk_map_segment_p(Fd, S), + segp_cache(P, S), + cache_segps1(Fd, B, P+4); +cache_segps1(_Fd, <<>>, _P) -> + ok. + +no_parts(NoSlots) -> + ((NoSlots - 1) div (?SEGSZP * ?SEGPARTSZ)) + 1. + +no_segs(NoSlots) -> + ((NoSlots - 1) div ?SEGSZP) + 1. + +%%% +%%% Repair, conversion and initialization of a dets file. +%%% + +%%% bulk_input/3. Initialization, the general case (any stream of objects). +%%% output_objs/4. Initialization (general case) and repair. +%%% bchunk_init/2. Initialization using bchunk. + +bulk_input(Head, InitFun, _Cntrs) -> + bulk_input(Head, InitFun, make_ref(), 0). + +bulk_input(Head, InitFun, Ref, Seq) -> + fun(close) -> + _ = (catch InitFun(close)); + (read) -> + case catch {Ref, InitFun(read)} of + {Ref, end_of_input} -> + end_of_input; + {Ref, {L0, NewInitFun}} when is_list(L0), + is_function(NewInitFun) -> + Kp = Head#head.keypos, + case catch bulk_objects(L0, Head, Kp, Seq, []) of + {'EXIT', _Error} -> + _ = (catch NewInitFun(close)), + {error, invalid_objects_list}; + {L, NSeq} -> + {L, bulk_input(Head, NewInitFun, Ref, NSeq)} + end; + {Ref, Value} -> + {error, {init_fun, Value}}; + Error -> + throw({thrown, Error}) + end + end. + +bulk_objects([T | Ts], Head, Kp, Seq, L) -> + BT = term_to_binary(T), + Key = element(Kp, T), + bulk_objects(Ts, Head, Kp, Seq+1, [make_object(Head, Key, Seq, BT) | L]); +bulk_objects([], _Head, Kp, Seq, L) when is_integer(Kp), is_integer(Seq) -> + {L, Seq}. + +-define(FSCK_SEGMENT, 1). +-define(FSCK_SEGMENT2, 10000). + +-define(VEMPTY, {}). +-define(VSET(I, V, E), setelement(I, V, E)). +-define(VGET(I, V), element(I, V)). +-define(VEXT(S, V, T), + list_to_tuple(tuple_to_list(V) ++ lists:duplicate(S-tuple_size(V), T))). + +%% Number of bytes that will be handled before the cache is written to +%% file. Used when compacting or writing chunks. +-define(CACHE_SIZE, (60*?CHUNK_SIZE)). + +%% {LogSize,NoObjects} in Cntrs is replaced by +%% {LogSize,Position,{FileName,FileDescriptor},NoCollections}. +%% There is also an object {no, NoObjects, NoKeys}. +-define(COUNTERS, no). +-define(OBJ_COUNTER, 2). +-define(KEY_COUNTER, 3). + +output_objs(OldV, Head, SlotNums, Cntrs) when OldV =< 9 -> + fun(close) -> + %% Make sure that the segments are initialized in case + %% init_table has been called. + Cache = ?VEMPTY, + Acc = [], % This is the only way Acc can be empty. + true = ets:insert(Cntrs, {?FSCK_SEGMENT,0,[],0}), + true = ets:insert(Cntrs, {?COUNTERS, 0, 0}), + Fun = output_objs2(foo, Acc, OldV, Head, Cache, Cntrs, + SlotNums, bar), + Fun(close); + ([]) -> + output_objs(OldV, Head, SlotNums, Cntrs); + (L) -> + %% Information about number of objects per size is not + %% relevant for version 9. It is the number of collections + %% that matters. + true = ets:delete_all_objects(Cntrs), + true = ets:insert(Cntrs, {?COUNTERS, 0, 0}), + Es = bin2term(L, OldV, Head#head.keypos), + %% The cache is a tuple indexed by the (log) size. An element + %% is [BinaryObject]. + Cache = ?VEMPTY, + {NE, NAcc, NCache} = output_slots(Es, Head, Cache, Cntrs, 0, 0), + output_objs2(NE, NAcc, OldV, Head, NCache, Cntrs, SlotNums, 1) + end. + +output_objs2(E, Acc, OldV, Head, Cache, SizeT, SlotNums, 0) -> + NCache = write_all_sizes(Cache, SizeT, Head, more), + %% Number of handled file_sorter chunks before writing: + Max = erlang:max(1, erlang:min(tuple_size(NCache), 10)), + output_objs2(E, Acc, OldV, Head, NCache, SizeT, SlotNums, Max); +output_objs2(E, Acc, OldV, Head, Cache, SizeT, SlotNums, ChunkI) -> + fun(close) -> + {_, [], Cache1} = + if + Acc =:= [] -> {foo, [], Cache}; + true -> output_slot(Acc, Head, Cache, [], SizeT, 0, 0) + end, + _NCache = write_all_sizes(Cache1, SizeT, Head, no_more), + SegSz = ?ACTUAL_SEG_SIZE, + {_, SegEnd, _} = dets_utils:alloc(Head, adjsz(SegSz)), + [{?COUNTERS,NoObjects,NoKeys}] = ets:lookup(SizeT, ?COUNTERS), + Head1 = Head#head{no_objects = NoObjects, no_keys = NoKeys}, + true = ets:delete(SizeT, ?COUNTERS), + {NewHead, NL, _MaxSz, _End} = allocate_all_objects(Head1, SizeT), + %% It is not known until all objects have been collected + %% how many object collections there are per size. Now + %% that is known and the absolute positions of the object + %% collections can be calculated. + segment_file(SizeT, NewHead, NL, SegEnd), + {MinSlots, EstNoSlots, MaxSlots} = SlotNums, + if + EstNoSlots =:= bulk_init -> + {ok, 0, NewHead}; + true -> + EstNoSegs = no_segs(EstNoSlots), + MinNoSegs = no_segs(MinSlots), + MaxNoSegs = no_segs(MaxSlots), + NoSegs = no_segs(NoKeys), + Diff = abs(NoSegs - EstNoSegs), + if + Diff > 5, NoSegs =< MaxNoSegs, NoSegs >= MinNoSegs -> + {try_again, NoKeys}; + true -> + {ok, 0, NewHead} + end + end; + (L) -> + Es = bin2term(L, OldV, Head#head.keypos), + {NE, NAcc, NCache} = + output_slots(E, Es, Acc, Head, Cache, SizeT, 0, 0), + output_objs2(NE, NAcc, OldV, Head, NCache, SizeT, SlotNums, + ChunkI-1) + end. + +%%% Compaction. + +compact_init(ReadHead, WriteHead, TableParameters) -> + SizeT = ets:new(dets_compact, []), + #head{no_keys = NoKeys, no_objects = NoObjects} = ReadHead, + + NoObjsPerSize = TableParameters#?HASH_PARMS.no_colls, + {NewWriteHead, Bases, SegAddr, SegEnd} = + prepare_file_init(NoObjects, NoKeys, NoObjsPerSize, SizeT, WriteHead), + + Input = compact_input(ReadHead, NewWriteHead, SizeT, tuple_size(Bases)), + Output = fast_output(NewWriteHead, SizeT, Bases, SegAddr, SegEnd), + TmpDir = filename:dirname(NewWriteHead#head.filename), + Reply = (catch file_sorter:sort(Input, Output, + [{format, binary},{tmpdir, TmpDir}, + {header, 1}])), % compact_objs/9: 13 bytes + ets:delete(SizeT), + Reply. + +compact_input(Head, WHead, SizeT, NoSizes) -> + L = dets_utils:all_allocated_as_list(Head), + Cache = ?VEXT(NoSizes, ?VEMPTY, [0 | []]), + compact_input(Head, WHead, SizeT, Cache, L). + +compact_input(Head, WHead, SizeT, Cache, L) -> + fun(close) -> + ok; + (read) -> + compact_read(Head, WHead, SizeT, Cache, L, 0, [], 0) + end. + +compact_read(_Head, WHead, SizeT, Cache, [], _Min, [], _ASz) -> + _ = fast_write_all_sizes(Cache, SizeT, WHead), + end_of_input; +compact_read(Head, WHead, SizeT, Cache, L, Min, SegBs, ASz) + when ASz + Min >= ?CACHE_SIZE, ASz > 0 -> + NCache = fast_write_all_sizes(Cache, SizeT, WHead), + {SegBs, compact_input(Head, WHead, SizeT, NCache, L)}; +compact_read(Head, WHead, SizeT, Cache, [[From | To] | L], Min, SegBs, ASz) -> + Max = erlang:max(?CHUNK_SIZE*3, Min), + case check_pread_arg(Max, Head) of + true -> + case dets_utils:pread_n(Head#head.fptr, From, Max) of + eof -> + %% Should never happen since compaction will not + %% be tried unless the file trailer is valid. + not_ok; % try a proper repair + Bin1 when byte_size(Bin1) < Min -> + %% The last object may not be padded. + Pad = Min - byte_size(Bin1), + NewBin = <>, + compact_objs(Head, WHead, SizeT, NewBin, L, + From, To, SegBs, Cache, ASz); + NewBin -> + compact_objs(Head, WHead, SizeT, NewBin, L, + From, To, SegBs, Cache, ASz) + end; + false -> + not_ok % try a proper repair + end. + +compact_objs(Head, WHead, SizeT, Bin, L, From, To, SegBs, Cache, ASz) + when From =:= To -> + case L of + [] -> + {SegBs, compact_input(Head, WHead, SizeT, Cache, L)}; + [[From1 | To1] | L1] -> + Skip1 = From1 - From, + case Bin of + <<_:Skip1/binary,NewBin/binary>> -> + compact_objs(Head, WHead, SizeT, NewBin, L1, From1, To1, + SegBs, Cache, ASz); + _ when byte_size(Bin) < Skip1 -> + compact_read(Head, WHead, SizeT, Cache, L, 0, SegBs, ASz) + end + end; +compact_objs(Head, WHead, SizeT, <> = Bin, + L, From, To, SegBs, Cache, ASz) when St =:= ?ACTIVE -> + LSize = sz2pos(Size), + Size2 = ?POW(LSize-1), + if + byte_size(Bin) >= Size2 -> + NASz = ASz + Size2, + <> = Bin, + Term = if + Head#head.type =:= set -> + binary_to_term(KO); + true -> + <<_KSz:32,B2/binary>> = KO, + binary_to_term(B2) + end, + Key = element(Head#head.keypos, Term), + Slot = db_hash(Key, Head), + From1 = From + Size2, + [Addr | AL] = ?VGET(LSize, Cache), + NCache = ?VSET(LSize, Cache, [Addr + Size2 | [SlotObjs | AL]]), + NSegBs = [<> | SegBs], + compact_objs(Head, WHead, SizeT, NewBin, L, From1, + To, NSegBs, NCache, NASz); + true -> + compact_read(Head, WHead, SizeT, Cache, [[From|To] | L], + Size2, SegBs, ASz) + end; +compact_objs(Head, WHead, SizeT, <<_:32, _St:32, _:32, _/binary>> = Bin, + L, From, To, SegBs, Cache, ASz) + when byte_size(Bin) >= ?ACTUAL_SEG_SIZE -> % , _St =/= ?ACTIVE + <<_:?ACTUAL_SEG_SIZE/binary, NewBin/binary>> = Bin, + compact_objs(Head, WHead, SizeT, NewBin, L, From + ?ACTUAL_SEG_SIZE, + To, SegBs, Cache, ASz); +compact_objs(Head, WHead, SizeT, <<_:32, _St:32, _:32, _/binary>> = Bin, + L, From, To, SegBs, Cache, ASz) + when byte_size(Bin) < ?ACTUAL_SEG_SIZE -> % , _St =/= ?ACTIVE + compact_read(Head, WHead, SizeT, Cache, [[From|To] | L], + ?ACTUAL_SEG_SIZE, SegBs, ASz); +compact_objs(Head, WHead, SizeT, _Bin, L, From, To, SegBs, Cache, ASz) -> + compact_read(Head, WHead, SizeT, Cache, [[From|To] | L], 0, SegBs, ASz). + +%%% End compaction. + +%%% Bchunk. + +read_bchunks(Head, L) -> + read_bchunks(Head, L, 0, [], 0). + +read_bchunks(_Head, L, Min, Bs, ASz) when ASz + Min >= 4*?CHUNK_SIZE, + Bs =/= [] -> + {lists:reverse(Bs), L}; +read_bchunks(Head, {From, To, L}, Min, Bs, ASz) -> + Max = erlang:max(?CHUNK_SIZE*2, Min), + case check_pread_arg(Max, Head) of + true -> + case dets_utils:pread_n(Head#head.fptr, From, Max) of + eof -> + %% Should never happen. + {error, premature_eof}; + NewBin when byte_size(NewBin) >= Min -> + bchunks(Head, L, NewBin, Bs, ASz, From, To); + Bin1 when To - From =:= Min, L =:= <<>> -> + %% when byte_size(Bin1) < Min. + %% The last object may not be padded. + Pad = Min - byte_size(Bin1), + NewBin = <>, + bchunks(Head, L, NewBin, Bs, ASz, From, To); + _ -> + {error, premature_eof} + end; + false -> + {error, dets_utils:bad_object(bad_object, {read_bchunks, Max})} + end. + +bchunks(Head, L, Bin, Bs, ASz, From, To) when From =:= To -> + if + L =:= <<>> -> + {finished, lists:reverse(Bs)}; + true -> + <> = L, + Skip1 = From1 - From, + case Bin of + <<_:Skip1/binary,NewBin/binary>> -> + bchunks(Head, L1, NewBin, Bs, ASz, From1, To1); + _ when byte_size(Bin) < Skip1 -> + read_bchunks(Head, {From1,To1,L1}, 0, Bs, ASz) + end + end; +bchunks(Head, L, <> = Bin, Bs, ASz, + From, To) when St =:= ?ACTIVE; St =:= ?FREE -> + LSize = sz2pos(Size), + Size2 = ?POW(LSize-1), + if + byte_size(Bin) >= Size2 -> + <> = Bin, + %% LSize and Slot are used in make_slots/6. The reason to + %% calculate Slot here is to reduce the CPU load in + %% make_slots/6. + Term = if + Head#head.type =:= set -> + binary_to_term(KO); + true -> + <<_KSz:32,B2/binary>> = KO, + binary_to_term(B2) + end, + Key = element(Head#head.keypos, Term), + Slot = db_hash(Key, Head), + B = {LSize,Slot,B0}, + bchunks(Head, L, NewBin, [B | Bs], ASz + Size2, From+Size2, To); + true -> + read_bchunks(Head, {From, To, L}, Size2, Bs, ASz) + end; +bchunks(Head, L, <<_:32, _St:32, _:32, _/binary>> = Bin, Bs, ASz, From, To) + when byte_size(Bin) >= ?ACTUAL_SEG_SIZE -> + <<_:?ACTUAL_SEG_SIZE/binary, NewBin/binary>> = Bin, + bchunks(Head, L, NewBin, Bs, ASz, From + ?ACTUAL_SEG_SIZE, To); +bchunks(Head, L, <<_:32, _St:32, _:32, _/binary>> = Bin, Bs, ASz, From, To) + when byte_size(Bin) < ?ACTUAL_SEG_SIZE -> + read_bchunks(Head, {From, To, L}, ?ACTUAL_SEG_SIZE, Bs, ASz); +bchunks(Head, L, _Bin, Bs, ASz, From, To) -> + read_bchunks(Head, {From, To, L}, 0, Bs, ASz). + +%%% End bchunk. + +%% -> {ok, NewHead} | throw(Error) | Error +bchunk_init(Head, InitFun) -> + Ref = make_ref(), + %% The non-empty list of data begins with the table parameters. + case catch {Ref, InitFun(read)} of + {Ref, end_of_input} -> + {error, {init_fun, end_of_input}}; + {Ref, {[], NInitFun}} when is_function(NInitFun) -> + bchunk_init(Head, NInitFun); + {Ref, {[ParmsBin | L], NInitFun}} + when is_list(L), is_function(NInitFun) -> + #head{fptr = Fd, type = Type, keypos = Kp, + auto_save = Auto, cache = Cache, + filename = Fname, ram_file = Ram, + name = Tab} = Head, + case try_bchunk_header(ParmsBin, Head) of + {ok, Parms} -> + #?HASH_PARMS{no_objects = NoObjects, + no_keys = NoKeys, + no_colls = NoObjsPerSize} = Parms, + CacheSz = dets_utils:cache_size(Cache), + {ok, Head1} = + prep_table_copy(Fd, Tab, Fname, Type, + Kp, Ram, CacheSz, + Auto, Parms), + SizeT = ets:new(dets_init, []), + {NewHead, Bases, SegAddr, SegEnd} = + prepare_file_init(NoObjects, NoKeys, + NoObjsPerSize, SizeT, Head1), + ECache = ?VEXT(tuple_size(Bases), ?VEMPTY, [0 | []]), + Input = + fun(close) -> + _ = (catch NInitFun(close)); + (read) -> + do_make_slots(L, ECache, SizeT, NewHead, Ref, + 0, NInitFun) + end, + Output = fast_output(NewHead, SizeT, Bases, SegAddr,SegEnd), + TmpDir = filename:dirname(Head#head.filename), + Reply = (catch file_sorter:sort(Input, Output, + [{format, binary}, + {tmpdir, TmpDir}, + {header, 1}])), + ets:delete(SizeT), + Reply; + not_ok -> + {error, {init_fun, ParmsBin}} + end; + {Ref, Value} -> + {error, {init_fun, Value}}; + Error -> + {thrown, Error} + end. + +try_bchunk_header(ParmsBin, Head) -> + #head{type = Type, keypos = Kp, hash_bif = HashBif} = Head, + HashMethod = hash_method_to_code(HashBif), + case catch binary_to_term(ParmsBin) of + Parms when is_record(Parms, ?HASH_PARMS), + Parms#?HASH_PARMS.type =:= Type, + Parms#?HASH_PARMS.keypos =:= Kp, + Parms#?HASH_PARMS.hash_method =:= HashMethod, + Parms#?HASH_PARMS.bchunk_format_version =:= + ?BCHUNK_FORMAT_VERSION -> + {ok, Parms}; + _ -> + not_ok + end. + +bchunk_input(InitFun, SizeT, Head, Ref, Cache, ASz) -> + fun(close) -> + _ = (catch InitFun(close)); + (read) -> + case catch {Ref, InitFun(read)} of + {Ref, end_of_input} -> + _ = fast_write_all_sizes(Cache, SizeT, Head), + end_of_input; + {Ref, {L, NInitFun}} when is_list(L), is_function(NInitFun) -> + do_make_slots(L, Cache, SizeT, Head, Ref, ASz, + NInitFun); + {Ref, Value} -> + {error, {init_fun, Value}}; + Error -> + throw({thrown, Error}) + end + end. + +do_make_slots(L, Cache, SizeT, Head, Ref, ASz, InitFun) -> + case catch make_slots(L, Cache, [], ASz) of + {'EXIT', _} -> + _ = (catch InitFun(close)), + {error, invalid_objects_list}; + {Cache1, SegBs, NASz} when NASz > ?CACHE_SIZE -> + NCache = fast_write_all_sizes(Cache1, SizeT, Head), + F = bchunk_input(InitFun, SizeT, Head, Ref, NCache, 0), + {SegBs, F}; + {NCache, SegBs, NASz} -> + F = bchunk_input(InitFun, SizeT, Head, Ref, NCache, NASz), + {SegBs, F} + end. + +make_slots([{LSize,Slot,<> = Bin0} | Bins], + Cache, SegBs, ASz) -> + Bin = if + St =:= ?ACTIVE -> + Bin0; + St =:= ?FREE -> + <> + end, + BSz = byte_size(Bin0), + true = (BSz =:= ?POW(LSize-1)), + NASz = ASz + BSz, + [Addr | L] = ?VGET(LSize, Cache), + NSegBs = [<> | SegBs], + NCache = ?VSET(LSize, Cache, [Addr + BSz | [Bin | L]]), + make_slots(Bins, NCache, NSegBs, NASz); +make_slots([], Cache, SegBs, ASz) -> + {Cache, SegBs, ASz}. + +fast_output(Head, SizeT, Bases, SegAddr, SegEnd) -> + fun(close) -> + fast_output_end(Head, SizeT); + (L) -> + case file:position(Head#head.fptr, SegAddr) of + {ok, SegAddr} -> + NewSegAddr = write_segment_file(L, Bases, Head, [], + SegAddr, SegAddr), + fast_output2(Head, SizeT, Bases, NewSegAddr, + SegAddr, SegEnd); + Error -> + catch dets_utils:file_error(Error, Head#head.filename) + end + end. + +fast_output2(Head, SizeT, Bases, SegAddr, SS, SegEnd) -> + fun(close) -> + FinalZ = SegEnd - SegAddr, + dets_utils:write(Head, dets_utils:make_zeros(FinalZ)), + fast_output_end(Head, SizeT); + (L) -> + NewSegAddr = write_segment_file(L, Bases, Head, [], SegAddr, SS), + fast_output2(Head, SizeT, Bases, NewSegAddr, SS, SegEnd) + end. + +fast_output_end(Head, SizeT) -> + case ets:foldl(fun({_Sz,_Pos,Cnt,NoC}, Acc) -> (Cnt =:= NoC) and Acc end, + true, SizeT) of + true -> {ok, Head}; + false -> {error, invalid_objects_list} + end. + +%% Inlined. +write_segment_file([<> | Bins], + Bases, Head, Ws, SegAddr, SS) -> + %% Should call slot_position/1, but since all segments are + %% allocated in a sequence, the position of a slot can be + %% calculated faster. + Pos = SS + ?SZOBJP*4 * Slot, % Same as Pos = slot_position(Slot). + write_segment_file(Bins, Bases, Head, Ws, SegAddr, SS, Pos, + BSize, AddrToBe, LSize); +write_segment_file([], _Bases, Head, Ws, SegAddr, _SS) -> + dets_utils:write(Head, Ws), + SegAddr. + +write_segment_file(Bins, Bases, Head, Ws, SegAddr, SS, Pos, BSize, + AddrToBe, LSize) when Pos =:= SegAddr -> + Addr = AddrToBe + element(LSize, Bases), + NWs = [Ws | <>], + write_segment_file(Bins, Bases, Head, NWs, SegAddr + ?SZOBJP*4, SS); +write_segment_file(Bins, Bases, Head, Ws, SegAddr, SS, Pos, BSize, + AddrToBe, LSize) when Pos - SegAddr < 100 -> + Addr = AddrToBe + element(LSize, Bases), + NoZeros = Pos - SegAddr, + NWs = [Ws | <<0:NoZeros/unit:8,BSize:32,Addr:32>>], + NSegAddr = SegAddr + NoZeros + ?SZOBJP*4, + write_segment_file(Bins, Bases, Head, NWs, NSegAddr, SS); +write_segment_file(Bins, Bases, Head, Ws, SegAddr, SS, Pos, BSize, + AddrToBe, LSize) -> + Addr = AddrToBe + element(LSize, Bases), + NoZeros = Pos - SegAddr, + NWs = [Ws, dets_utils:make_zeros(NoZeros) | <>], + NSegAddr = SegAddr + NoZeros + ?SZOBJP*4, + write_segment_file(Bins, Bases, Head, NWs, NSegAddr, SS). + +fast_write_all_sizes(Cache, SizeT, Head) -> + CacheL = lists:reverse(tuple_to_list(Cache)), + fast_write_sizes(CacheL, tuple_size(Cache), SizeT, Head, [], []). + +fast_write_sizes([], _Sz, _SizeT, Head, NCL, PwriteList) -> + #head{filename = FileName, fptr = Fd} = Head, + ok = dets_utils:pwrite(Fd, FileName, PwriteList), + list_to_tuple(NCL); +fast_write_sizes([[_Addr] = C | CL], Sz, SizeT, Head, NCL, PwriteList) -> + fast_write_sizes(CL, Sz-1, SizeT, Head, [C | NCL], PwriteList); +fast_write_sizes([[Addr | C] | CL], Sz, SizeT, Head, NCL, PwriteList) -> + case ets:lookup(SizeT, Sz) of + [] -> + throw({error, invalid_objects_list}); + [{Sz,Position,_ObjCounter,_NoCollections}] -> + %% Update ObjCounter: + NoColls = length(C), + _ = ets:update_counter(SizeT, Sz, {3, NoColls}), + Pos = Position + Addr - NoColls*?POW(Sz-1), + fast_write_sizes(CL, Sz-1, SizeT, Head, [[Addr] | NCL], + [{Pos,lists:reverse(C)} | PwriteList]) + end. + +prepare_file_init(NoObjects, NoKeys, NoObjsPerSize, SizeT, Head) -> + SegSz = ?ACTUAL_SEG_SIZE, + {_, SegEnd, _} = dets_utils:alloc(Head, adjsz(SegSz)), + Head1 = Head#head{no_objects = NoObjects, no_keys = NoKeys}, + true = ets:insert(SizeT, {?FSCK_SEGMENT,0,[],0}), + lists:foreach(fun({LogSz,NoColls}) -> + true = ets:insert(SizeT, {LogSz+1,0,0,NoColls}) + end, NoObjsPerSize), + {NewHead, NL0, MaxSz, EndOfFile} = allocate_all_objects(Head1, SizeT), + [{?FSCK_SEGMENT,SegAddr,[],0} | NL] = NL0, + true = ets:delete_all_objects(SizeT), + lists:foreach(fun(X) -> true = ets:insert(SizeT, X) end, NL), + Bases = lists:foldl(fun({LSz,P,_D,_N}, A) -> setelement(LSz,A,P) end, + erlang:make_tuple(MaxSz, 0), NL), + Est = lists:foldl(fun({LSz,_,_,N}, A) -> A + ?POW(LSz-1)*N end, 0, NL), + ok = write_bytes(NewHead, EndOfFile, Est), + {NewHead, Bases, SegAddr, SegEnd}. + +%% Writes "zeros" to the file. This ensures that the file blocks are +%% allocated more or less contiguously, which reduces the seek times +%% to a minimum when the file is later read serially from beginning to +%% end (as is done when calling select and the like). A well-formed +%% file will be created also if nothing is written (as is the case for +%% small files, for efficiency). +write_bytes(_Head, _EndOfFile, Est) when Est < ?CACHE_SIZE -> + ok; +write_bytes(Head, EndOfFile, _Est) -> + Fd = Head#head.fptr, + {ok, Start} = file:position(Fd, eof), + BytesToWrite = EndOfFile - Start, + SizeInKB = 64, + Bin = list_to_binary(lists:duplicate(SizeInKB * 4, lists:seq(0, 255))), + write_loop(Head, BytesToWrite, Bin). + +write_loop(Head, BytesToWrite, Bin) when BytesToWrite >= byte_size(Bin) -> + case file:write(Head#head.fptr, Bin) of + ok -> write_loop(Head, BytesToWrite - byte_size(Bin), Bin); + Error -> dets_utils:file_error(Error, Head#head.filename) + end; +write_loop(_Head, 0, _Bin) -> + ok; +write_loop(Head, BytesToWrite, Bin) -> + <> = Bin, + write_loop(Head, BytesToWrite, SmallBin). + +%% By allocating bigger objects before smaller ones, holes in the +%% buddy system memory map are avoided. Unfortunately, the segments +%% are always allocated first, so if there are objects bigger than a +%% segment, there is a hole to handle. (Haven't considered placing the +%% segments among other objects of the same size.) +allocate_all_objects(Head, SizeT) -> + DTL = lists:reverse(lists:keysort(1, ets:tab2list(SizeT))), + MaxSz = element(1, hd(DTL)), + SegSize = ?ACTUAL_SEG_SIZE, + {Head1, HSz, HN, HA} = alloc_hole(MaxSz, Head, SegSize), + {Head2, NL} = allocate_all(Head1, DTL, []), + %% Find the position that will be the end of the file by allocating + %% a minimal object. + {_Head, EndOfFile, _} = dets_utils:alloc(Head2, ?BUMP), + Head3 = free_hole(Head2, HSz, HN, HA), + NewHead = Head3#head{maxobjsize = max_objsize(Head3#head.no_collections)}, + {NewHead, NL, MaxSz, EndOfFile}. + +alloc_hole(LSize, Head, SegSz) when ?POW(LSize-1) > SegSz -> + Size = ?POW(LSize-1), + {_, SegAddr, _} = dets_utils:alloc(Head, adjsz(SegSz)), + {_, Addr, _} = dets_utils:alloc(Head, adjsz(Size)), + N = (Addr - SegAddr) div SegSz, + Head1 = dets_utils:alloc_many(Head, SegSz, N, SegAddr), + {Head1, SegSz, N, SegAddr}; +alloc_hole(_MaxSz, Head, _SegSz) -> + {Head, 0, 0, 0}. + +free_hole(Head, _Size, 0, _Addr) -> + Head; +free_hole(Head, Size, N, Addr) -> + {Head1, _} = dets_utils:free(Head, Addr, adjsz(Size)), + free_hole(Head1, Size, N-1, Addr+Size). + +%% One (temporary) file for each buddy size, write all objects of that +%% size to the file. +allocate_all(Head, [{?FSCK_SEGMENT,_,Data,_}], L) -> + %% And one file for the segments... + %% Note that space for the array parts and the segments has + %% already been allocated, but the segments have not been + %% initialized on disk. + NoParts = no_parts(Head#head.next), + %% All parts first, ensured by init_segments/6. + Addr = ?BASE + NoParts * 4 * ?SEGPARTSZ, + {Head, [{?FSCK_SEGMENT,Addr,Data,0} | L]}; +allocate_all(Head, [{LSize,_,Data,NoCollections} | DTL], L) -> + Size = ?POW(LSize-1), + {_Head, Addr, _} = dets_utils:alloc(Head, adjsz(Size)), + Head1 = dets_utils:alloc_many(Head, Size, NoCollections, Addr), + NoColls = Head1#head.no_collections, + NewNoColls = orddict:update_counter(LSize-1, NoCollections, NoColls), + NewHead = Head1#head{no_collections = NewNoColls}, + E = {LSize,Addr,Data,NoCollections}, + allocate_all(NewHead, DTL, [E | L]). + +bin2term(Bin, 9, Kp) -> + bin2term1(Bin, Kp, []); +bin2term(Bin, 8, Kp) -> + bin2term_v8(Bin, Kp, []). + +bin2term1([<> | BTs], Kp, L) -> + Term = binary_to_term(BinTerm), + Key = element(Kp, Term), + bin2term1(BTs, Kp, [{Slot, Key, Seq, Term, BinTerm} | L]); +bin2term1([], _Kp, L) -> + lists:reverse(L). + +bin2term_v8([<> | BTs], Kp, L) -> + Term = binary_to_term(BinTerm), + Key = element(Kp, Term), + bin2term_v8(BTs, Kp, [{Slot, Key, foo, Term, BinTerm} | L]); +bin2term_v8([], _Kp, L) -> + lists:reverse(L). + +write_all_sizes({}=Cache, _SizeT, _Head, _More) -> + Cache; +write_all_sizes(Cache, SizeT, Head, More) -> + CacheL = lists:reverse(tuple_to_list(Cache)), + Sz = length(CacheL), + NCL = case ets:info(SizeT, size) of + 1 when More =:= no_more -> % COUNTERS only... + all_sizes(CacheL, Sz, SizeT); + _ -> + write_sizes(CacheL, Sz, SizeT, Head) + end, + list_to_tuple(NCL). + +all_sizes([]=CL, _Sz, _SizeT) -> + CL; +all_sizes([[]=C | CL], Sz, SizeT) -> + [C | all_sizes(CL, Sz-1, SizeT)]; +all_sizes([C0 | CL], Sz, SizeT) -> + C = lists:reverse(C0), + NoCollections = length(C), + true = ets:insert(SizeT, {Sz,0,C,NoCollections}), + [[] | all_sizes(CL, Sz-1, SizeT)]. + +write_sizes([]=CL, _Sz, _SizeT, _Head) -> + CL; +write_sizes([[]=C | CL], Sz, SizeT, Head) -> + [C | write_sizes(CL, Sz-1, SizeT, Head)]; +write_sizes([C | CL], Sz, SizeT, Head) -> + {FileName, Fd} = + case ets:lookup(SizeT, Sz) of + [] -> + temp_file(Head, SizeT, Sz); + [{_,_,{FN,F},_}] -> + {FN, F} + end, + NoCollections = length(C), + _ = ets:update_counter(SizeT, Sz, {4,NoCollections}), + case file:write(Fd, lists:reverse(C)) of + ok -> + [[] | write_sizes(CL, Sz-1, SizeT, Head)]; + Error -> + dets_utils:file_error(FileName, Error) + end. + +output_slots([E | Es], Head, Cache, SizeT, NoKeys, NoObjs) -> + output_slots(E, Es, [E], Head, Cache, SizeT, NoKeys, NoObjs); +output_slots([], _Head, Cache, SizeT, NoKeys, NoObjs) -> + _ = ets:update_counter(SizeT, ?COUNTERS, {?OBJ_COUNTER,NoObjs}), + _ = ets:update_counter(SizeT, ?COUNTERS, {?KEY_COUNTER,NoKeys}), + {not_a_tuple, [], Cache}. + +output_slots(E, [E1 | Es], Acc, Head, Cache, SizeT, NoKeys, NoObjs) + when element(1, E) =:= element(1, E1) -> + output_slots(E1, Es, [E1 | Acc], Head, Cache, SizeT, NoKeys, NoObjs); +output_slots(E, [], Acc, _Head, Cache, SizeT, NoKeys, NoObjs) -> + _ = ets:update_counter(SizeT, ?COUNTERS, {?OBJ_COUNTER,NoObjs}), + _ = ets:update_counter(SizeT, ?COUNTERS, {?KEY_COUNTER,NoKeys}), + {E, Acc, Cache}; +output_slots(_E, L, Acc, Head, Cache, SizeT, NoKeys, NoObjs) -> + output_slot(Acc, Head, Cache, L, SizeT, NoKeys, NoObjs). + +output_slot(Es, Head, Cache, L, SizeT, NoKeys, NoObjs) -> + Slot = element(1, hd(Es)), + %% Plain lists:sort/1 will do. + {Bins, Size, No, KNo} = prep_slot(lists:sort(Es), Head), + NNoKeys = NoKeys + KNo, + NNoObjs = NoObjs + No, + + %% First the object collection. + BSize = Size + ?OHDSZ, + LSize = sz2pos(BSize), + Size2 = ?POW(LSize-1), + Pad = <<0:(Size2-BSize)/unit:8>>, + BinObject = [<>, Bins | Pad], + Cache1 = + if + LSize > tuple_size(Cache) -> + C1 = ?VEXT(LSize, Cache, []), + ?VSET(LSize, C1, [BinObject]); + true -> + CL = ?VGET(LSize, Cache), + ?VSET(LSize, Cache, [BinObject | CL]) + end, + + %% Then the pointer to the object collection. + %% Cannot yet determine the absolute pointers; segment_file/4 does that. + PBin = <>, + PL = ?VGET(?FSCK_SEGMENT, Cache1), + NCache = ?VSET(?FSCK_SEGMENT, Cache1, [PBin | PL]), + output_slots(L, Head, NCache, SizeT, NNoKeys, NNoObjs). + +prep_slot(L, Head) when Head#head.type =/= set -> + prep_slot(L, Head, []); +prep_slot([{_Slot,Key,_Seq,_T,BT} | L], _Head) -> + prep_set_slot(L, Key, BT, 0, 0, 0, []). + +prep_slot([{_Slot, Key, Seq, T, _BT} | L], Head, W) -> + prep_slot(L, Head, [{Key, {Seq, {insert,T}}} | W]); +prep_slot([], Head, W) -> + WLs = dets_utils:family(W), + {[], Bins, Size, No, KNo, _} = + eval_slot(WLs, [], Head#head.type, [], [], 0, 0, 0, false), + {Bins, Size, No, KNo}. + +%% Optimization, prep_slot/3 would work for set tables as well. +prep_set_slot([{_,K,_Seq,_T1,BT1} | L], K, _BT, Sz, NoKeys, NoObjs, Ws) -> + prep_set_slot(L, K, BT1, Sz, NoKeys, NoObjs, Ws); +prep_set_slot([{_,K1,_Seq,_T1,BT1} | L], _K, BT, Sz, NoKeys, NoObjs, Ws) -> + BSize = byte_size(BT) + 4, + NWs = [Ws,<>|BT], + prep_set_slot(L, K1, BT1, Sz+BSize, NoKeys+1, NoObjs+1, NWs); +prep_set_slot([], _K, BT, Sz, NoKeys, NoObjs, Ws) -> + BSize = byte_size(BT) + 4, + {[Ws, <> | BT], Sz + BSize, NoKeys+1, NoObjs+1}. + +segment_file(SizeT, Head, FileData, SegEnd) -> + I = 2, + true = ets:delete_all_objects(SizeT), + lists:foreach(fun(X) -> true = ets:insert(SizeT, X) end, FileData), + [{?FSCK_SEGMENT,SegAddr,Data,0} | FileData1] = FileData, + NewData = + case Data of + {InFile,In0} -> + {OutFile, Out} = temp_file(Head, SizeT, I), + file:close(In0), + {ok, In} = dets_utils:open(InFile, [raw,binary,read]), + {ok, 0} = dets_utils:position(In, InFile, bof), + seg_file(SegAddr, SegAddr, In, InFile, Out, OutFile, SizeT, + SegEnd), + file:close(In), + file:delete(InFile), + {OutFile,Out}; + Objects -> + {LastAddr, B} = seg_file(Objects, SegAddr, SegAddr, SizeT, []), + dets_utils:disk_map_segment(SegAddr, B), + FinalZ = SegEnd - LastAddr, + [B | dets_utils:make_zeros(FinalZ)] + end, + %% Restore the positions. + true = ets:delete_all_objects(SizeT), + %% To get the segments copied first by dets:fsck_copy/4, use a big + %% number here, FSCK_SEGMENT2. + lists:foreach(fun(X) -> true = ets:insert(SizeT, X) end, + [{?FSCK_SEGMENT2,SegAddr,NewData,0} | FileData1]), + ok. + +seg_file(Addr, SS, In, InFile, Out, OutFile, SizeT, SegEnd) -> + case dets_utils:read_n(In, 4500) of + eof -> + FinalZ = SegEnd - Addr, + dets_utils:fwrite(Out, OutFile, dets_utils:make_zeros(FinalZ)); + Bin -> + {NewAddr, L} = seg_file(Bin, Addr, SS, SizeT, []), + dets_utils:disk_map_segment(Addr, L), + ok = dets_utils:fwrite(Out, OutFile, L), + seg_file(NewAddr, SS, In, InFile, Out, OutFile, SizeT, SegEnd) + end. + +seg_file(<>, Addr, SS, SizeT, L) -> + seg_file_item(T, Addr, SS, SizeT, L, Slot, BSize, LSize); +seg_file([<> | T], Addr, SS, SizeT, L) -> + seg_file_item(T, Addr, SS, SizeT, L, Slot, BSize, LSize); +seg_file([], Addr, _SS, _SizeT, L) -> + {Addr, lists:reverse(L)}; +seg_file(<<>>, Addr, _SS, _SizeT, L) -> + {Addr, lists:reverse(L)}. + +seg_file_item(T, Addr, SS, SizeT, L, Slot, BSize, LSize) -> + %% Should call slot_position/1, but since all segments are + %% allocated in a sequence, the position of a slot can be + %% calculated faster. + SlotPos = SS + ?SZOBJP*4 * Slot, % SlotPos = slot_position(Slot) + NoZeros = SlotPos - Addr, + PSize = NoZeros+?SZOBJP*4, + Inc = ?POW(LSize-1), + CollP = ets:update_counter(SizeT, LSize, Inc) - Inc, + PointerBin = if + NoZeros =:= 0 -> + <>; + NoZeros > 100 -> + [dets_utils:make_zeros(NoZeros) | + <>]; + true -> + <<0:NoZeros/unit:8, BSize:32, CollP:32>> + end, + seg_file(T, Addr + PSize, SS, SizeT, [PointerBin | L]). + +temp_file(Head, SizeT, N) -> + TmpName = lists:concat([Head#head.filename, '.', N]), + {ok, Fd} = dets_utils:open(TmpName, [raw, binary, write]), + %% The file table is consulted when cleaning up. + true = ets:insert(SizeT, {N,0,{TmpName,Fd},0}), + {TmpName, Fd}. + +%% Does not close Fd. +fsck_input(Head, Fd, Cntrs, FileHeader) -> + MaxSz0 = case FileHeader#fileheader.has_md5 of + true when is_integer(FileHeader#fileheader.no_colls) -> + ?POW(max_objsize(FileHeader#fileheader.no_colls)); + _ -> + %% The file is not compressed, so the bucket size + %% cannot exceed the filesize, for all buckets. + case file:position(Fd, eof) of + {ok, Pos} -> + Pos; + _ -> + 1 bsl 32 + end + end, + MaxSz = erlang:max(MaxSz0, ?CHUNK_SIZE), + State0 = fsck_read(?BASE, Fd, [], 0), + fsck_input(Head, State0, Fd, MaxSz, Cntrs). + +fsck_input(Head, State, Fd, MaxSz, Cntrs) -> + fun(close) -> + ok; + (read) -> + case State of + done -> + end_of_input; + {done, L, _Seq} -> + R = count_input(Head, Cntrs, L), + {R, fsck_input(Head, done, Fd, MaxSz, Cntrs)}; + {cont, L, Bin, Pos, Seq} -> + R = count_input(Head, Cntrs, L), + FR = fsck_objs(Bin, Head#head.keypos, Head, [], Seq), + NewState = fsck_read(FR, Pos, Fd, MaxSz, Head), + {R, fsck_input(Head, NewState, Fd, MaxSz, Cntrs)} + end + end. + +%% The ets table Cntrs is used for counting objects per size. +count_input(Head, Cntrs, L) when Head#head.version =:= 8 -> + count_input1(Cntrs, L, []); +count_input(_Head, _Cntrs, L) -> + lists:reverse(L). + +count_input1(Cntrs, [[LogSz | B] | Ts], L) -> + case catch ets:update_counter(Cntrs, LogSz, 1) of + N when is_integer(N) -> ok; + _Badarg -> true = ets:insert(Cntrs, {LogSz, 1}) + end, + count_input1(Cntrs, Ts, [B | L]); +count_input1(_Cntrs, [], L) -> + L. + +fsck_read(Pos, F, L, Seq) -> + case file:position(F, Pos) of + {ok, _} -> + read_more_bytes([], 0, Pos, F, L, Seq); + _Error -> + {done, L, Seq} + end. + +fsck_read({more, Bin, Sz, L, Seq}, Pos, F, MaxSz, Head) when Sz > MaxSz -> + FR = skip_bytes(Bin, ?BUMP, Head#head.keypos, Head, L, Seq), + fsck_read(FR, Pos, F, MaxSz, Head); +fsck_read({more, Bin, Sz, L, Seq}, Pos, F, _MaxSz, _Head) -> + read_more_bytes(Bin, Sz, Pos, F, L, Seq); +fsck_read({new, Skip, L, Seq}, Pos, F, _MaxSz, _Head) -> + NewPos = Pos + Skip, + fsck_read(NewPos, F, L, Seq). + +read_more_bytes(B, Min, Pos, F, L, Seq) -> + Max = if + Min < ?CHUNK_SIZE -> ?CHUNK_SIZE; + true -> Min + end, + case dets_utils:read_n(F, Max) of + eof -> + {done, L, Seq}; + Bin -> + NewPos = Pos + byte_size(Bin), + {cont, L, list_to_binary([B, Bin]), NewPos, Seq} + end. + +fsck_objs(Bin = <>, Kp, Head, L, Seq) -> + if + Status =:= ?ACTIVE -> + Sz1 = Sz-?OHDSZ, + case Tail of + <> -> + case catch bin2keybins(BinTerm, Head) of + {'EXIT', _Reason} -> + %% The whole collection of objects is skipped. + skip_bytes(Bin, ?BUMP, Kp, Head, L, Seq); + BOs -> + {NL, NSeq} = make_objects(BOs, Seq, Kp, Head, L), + Skip = ?POW(sz2pos(Sz)-1) - Sz, + skip_bytes(Tail2, Skip, Kp, Head, NL, NSeq) + end; + _ when byte_size(Tail) < Sz1 -> + {more, Bin, Sz, L, Seq} + end; + true -> + skip_bytes(Bin, ?BUMP, Kp, Head, L, Seq) + end; +fsck_objs(Bin, _Kp, _Head, L, Seq) -> + {more, Bin, 0, L, Seq}. + +make_objects([{K,BT}|Os], Seq, Kp, Head, L) when Head#head.version =:= 8 -> + LogSz = dets_v8:sz2pos(byte_size(BT)+?OHDSZ_v8), + Slot = dets_v8:db_hash(K, Head), + Obj = [LogSz | <>], + make_objects(Os, Seq, Kp, Head, [Obj | L]); +make_objects([{K,BT} | Os], Seq, Kp, Head, L) -> + Obj = make_object(Head, K, Seq, BT), + make_objects(Os, Seq+1, Kp, Head, [Obj | L]); +make_objects([], Seq, _Kp, _Head, L) -> + {L, Seq}. + +%% Inlined. +make_object(Head, Key, Seq, BT) -> + Slot = db_hash(Key, Head), + <>. + +%% Inlined. +skip_bytes(Bin, Skip, Kp, Head, L, Seq) -> + case Bin of + <<_:Skip/binary, Tail/binary>> -> + fsck_objs(Tail, Kp, Head, L, Seq); + _ when byte_size(Bin) < Skip -> + {new, Skip - byte_size(Bin), L, Seq} + end. + +%%% +%%% End of repair, conversion and initialization of a dets file. +%%% + +%% -> {NewHead, ok} | throw({Head, Error}) +do_perform_save(H) -> + {ok, FreeListsPointer} = dets_utils:position(H, eof), + H1 = H#head{freelists_p = FreeListsPointer}, + {FLW, FLSize} = free_lists_to_file(H1), + FileSize = FreeListsPointer + FLSize + 4, + ok = dets_utils:write(H1, [FLW | <>]), + FileHeader = file_header(H1, FreeListsPointer, ?CLOSED_PROPERLY), + case dets_utils:debug_mode() of + true -> + TmpHead = H1#head{freelists = init_freelist(H1, true), + fixed = false}, + case + catch dets_utils:all_allocated_as_list(TmpHead) + =:= dets_utils:all_allocated_as_list(H1) + of + true -> + dets_utils:pwrite(H1, [{0, FileHeader}]); + _ -> + dets_utils:corrupt_reason(H1, {failed_to_save_free_lists, + FreeListsPointer, + TmpHead#head.freelists, + H1#head.freelists}) + end; + false -> + dets_utils:pwrite(H1, [{0, FileHeader}]) + end. + +file_header(Head, FreeListsPointer, ClosedProperly) -> + NoColls = case Head#head.no_collections of + undefined -> []; + NC -> NC + end, + L = orddict:merge(fun(_K, V1, V2) -> V1 + V2 end, + NoColls, + lists:map(fun(X) -> {X,0} end, lists:seq(4,?MAXBUD-1))), + CW = lists:map(fun({_LSz,N}) -> <> end, L), + file_header(Head, FreeListsPointer, ClosedProperly, CW). + +file_header(Head, FreeListsPointer, ClosedProperly, NoColls) -> + Cookie = ?MAGIC, + TypeCode = dets_utils:type_to_code(Head#head.type), + Version = ?FILE_FORMAT_VERSION, + HashMethod = hash_method_to_code(Head#head.hash_bif), + H1 = <>, + H2 = <>, + DigH = [H2 | NoColls], + MD5 = case Head#head.has_md5 of + true -> erlang:md5(DigH); + false -> <<0:?MD5SZ/unit:8>> + end, + [H1, DigH, MD5 | <<0:?RESERVED/unit:8>>]. + +%% Going through some trouble to avoid creating one single binary for +%% the free lists. If the free lists are huge, binary_to_term and +%% term_to_binary could otherwise stop the emulator for quite some time. + +-define(MAXFREEOBJ, 4096). +-define(ENDFREE, 12345). + +free_lists_to_file(H) -> + FL = dets_utils:get_freelists(H), + free_list_to_file(FL, H, 1, tuple_size(FL), [], 0). + +free_list_to_file(_Ftab, _H, Pos, Sz, Ws, WsSz) when Pos > Sz -> + {[Ws | <<(4+?OHDSZ):32, ?FREE:32, ?ENDFREE:32>>], WsSz+4+?OHDSZ}; +free_list_to_file(Ftab, H, Pos, Sz, Ws, WsSz) -> + Max = (?MAXFREEOBJ - 4 - ?OHDSZ) div 4, + F = fun(N, L, W, S) when N =:= 0 -> {N, L, W, S}; + (N, L, W, S) -> + {L1, N1, More} = + if + N > Max -> + {lists:sublist(L, Max), Max, + {N-Max, lists:nthtail(Max, L)}}; + true -> + {L, N, no_more} + end, + Size = N1*4 + 4 + ?OHDSZ, + Header = <>, + NW = [W, Header | L1], + case More of + no_more -> + {0, [], NW, S+Size}; + {NN, NL} -> + ok = dets_utils:write(H, NW), + {NN, NL, [], S+Size} + end + end, + {NWs,NWsSz} = dets_utils:tree_to_bin(element(Pos, Ftab), F, Max, Ws, WsSz), + free_list_to_file(Ftab, H, Pos+1, Sz, NWs, NWsSz). + +free_lists_from_file(H, Pos) -> + dets_utils:position(H#head.fptr, H#head.filename, Pos), + FL = dets_utils:empty_free_lists(), + case catch bin_to_tree([], H, start, FL, -1, []) of + {'EXIT', _} -> + throw({error, {bad_freelists, H#head.filename}}); + Reply -> + Reply + end. + +bin_to_tree(Bin, H, LastPos, Ftab, A0, L) -> + case Bin of + <<_Size:32,?FREE:32,?ENDFREE:32,_/binary>> when L =:= [] -> + Ftab; + <<_Size:32,?FREE:32,?ENDFREE:32,_/binary>> -> + setelement(LastPos, Ftab, dets_utils:list_to_tree(L)); + <> + when byte_size(T) >= Size-4-?OHDSZ -> + {NFtab, L1, A1} = + if + Pos =/= LastPos, LastPos =/= start -> + Tree = dets_utils:list_to_tree(L), + {setelement(LastPos, Ftab, Tree), [], -1}; + true -> + {Ftab, L, A0} + end, + {NL, B2, A2} = bin_to_tree1(T, Size-?OHDSZ-4, A1, L1), + bin_to_tree(B2, H, Pos, NFtab, A2, NL); + _ -> + Bin2 = dets_utils:read_n(H#head.fptr, ?MAXFREEOBJ), + bin_to_tree(list_to_binary([Bin | Bin2]), H, LastPos, Ftab, A0, L) + end. + +bin_to_tree1(<>, Size, A, L) + when Size >= 16, A < A1, A1 < A2, A2 < A3, A3 < A4 -> + bin_to_tree1(T, Size-16, A4, [A4, A3, A2, A1 | L]); +bin_to_tree1(<>, Size, A, L) when Size >= 4, A < A1 -> + bin_to_tree1(T, Size - 4, A1, [A1 | L]); +bin_to_tree1(B, 0, A, L) -> + {L, B, A}. + +%% -> [term()] | throw({Head, Error}) +slot_objs(H, Slot) when Slot >= H#head.next -> + '$end_of_table'; +slot_objs(H, Slot) -> + {ok, _Pointer, Objects} = slot_objects(H, Slot), + Objects. + +%% Inlined. +h(I, phash2) -> erlang:phash2(I); % -> [0..2^27-1] +h(I, phash) -> erlang:phash(I, ?BIG) - 1. + +db_hash(Key, Head) when Head#head.hash_bif =:= phash2 -> + H = erlang:phash2(Key), + Hash = ?REM2(H, Head#head.m), + if + Hash < Head#head.n -> + ?REM2(H, Head#head.m2); % H rem (2 * m) + true -> + Hash + end; +db_hash(Key, Head) -> + H = h(Key, Head#head.hash_bif), + Hash = H rem Head#head.m, + if + Hash < Head#head.n -> + H rem (Head#head.m2); % H rem (2 * m) + true -> + Hash + end. + +hash_method_to_code(phash2) -> ?PHASH2; +hash_method_to_code(phash) -> ?PHASH. + +code_to_hash_method(?PHASH2) -> phash2; +code_to_hash_method(?PHASH) -> phash; +code_to_hash_method(_) -> undefined. + +no_slots(Head) -> + {Head#head.min_no_slots, Head#head.next, Head#head.max_no_slots}. + +table_parameters(Head) -> + case Head#head.no_collections of + undefined -> + undefined; % Version 9(a) + CL -> + NoColls0 = lists:foldl(fun({_,0}, A) -> A; + (E, A) -> [E | A] + end, [], CL), + NoColls = lists:reverse(NoColls0), + #?HASH_PARMS{file_format_version = Head#head.version, + bchunk_format_version = ?BCHUNK_FORMAT_VERSION, + file = filename:basename(Head#head.filename), + type = Head#head.type, + keypos = Head#head.keypos, + hash_method = hash_method_to_code(Head#head.hash_bif), + n = Head#head.n, m = Head#head.m, + next = Head#head.next, + min = Head#head.min_no_slots, + max = Head#head.max_no_slots, + no_objects = Head#head.no_objects, + no_keys = Head#head.no_keys, no_colls = NoColls} + end. + +%% Allow quite a lot when reading object collections. +-define(MAXCOLL, (10 * ?CHUNK_SIZE)). + +%% Re-hashing a segment, starting with SlotStart. +%% +%% On the average, half of the keys of the slot are put in a new slot. +%% If the old slot is i, then the new slot is i+m. The new slots +%% reside in a newly allocated segment. +%% +%% -> {NewHead, ok} | throw({Head, Error}) +re_hash(Head, SlotStart) -> + FromSlotPos = slot_position(SlotStart), + ToSlotPos = slot_position(SlotStart + Head#head.m), + RSpec = [{FromSlotPos, 4 * ?SEGSZ}], + {ok, [FromBin]} = dets_utils:pread(RSpec, Head), + split_bins(FromBin, Head, FromSlotPos, ToSlotPos, [], [], 0). + +split_bins(<<>>, Head, _Pos1, _Pos2, _ToRead, _L, 0) -> + {Head, ok}; +split_bins(<<>>, Head, Pos1, Pos2, ToRead, L, _SoFar) -> + re_hash_write(Head, ToRead, L, Pos1, Pos2); +split_bins(FB, Head, Pos1, Pos2, ToRead, L, SoFar) -> + <> = FB, + <> = FB, + NSoFar = SoFar + Sz1, + NPos1 = Pos1 + ?SZOBJP*4, + NPos2 = Pos2 + ?SZOBJP*4, + if + NSoFar > ?MAXCOLL, ToRead =/= [] -> + {NewHead, ok} = re_hash_write(Head, ToRead, L, Pos1, Pos2), + split_bins(FB, NewHead, Pos1, Pos2, [], [], 0); + Sz1 =:= 0 -> + E = {skip,B1}, + split_bins(FT, Head, NPos1, NPos2, ToRead, [E | L], NSoFar); + true -> + E = {Sz1,P1,B1,Pos1,Pos2}, + NewToRead = [{P1,Sz1} | ToRead], + split_bins(FT, Head, NPos1, NPos2, NewToRead, [E | L], NSoFar) + end. + +re_hash_write(Head, ToRead, L, Pos1, Pos2) -> + check_pread2_arg(ToRead, Head), + {ok, Bins} = dets_utils:pread(ToRead, Head), + Z = <<0:32, 0:32>>, + {Head1, BinFS, BinTS, WsB} = re_hash_slots(Bins, L, Head, Z, [],[],[]), + WPos1 = Pos1 - ?SZOBJP*4*length(L), + WPos2 = Pos2 - ?SZOBJP*4*length(L), + ToWrite = [{WPos1,BinFS}, {WPos2, BinTS} | WsB], + dets_utils:pwrite(Head1, ToWrite). + +re_hash_slots(Bins, [{skip,B1} | L], Head, Z, BinFS, BinTS, WsB) -> + re_hash_slots(Bins, L, Head, Z, [B1 | BinFS], [Z | BinTS], WsB); +re_hash_slots([FB | Bins], [E | L], Head, Z, BinFS, BinTS, WsB) -> + {Sz1,P1,B1,Pos1,Pos2} = E, + KeyObjs = case catch per_key(Head, FB) of + {'EXIT', _Error} -> + Bad = dets_utils:bad_object(re_hash_slots, {FB, E}), + throw(dets_utils:corrupt_reason(Head, Bad)); + Else -> + Else + end, + case re_hash_split(KeyObjs, Head, [], 0, [], 0) of + {_KL, _KSz, [], 0} -> + Sz1 = _KSz + ?OHDSZ, + re_hash_slots(Bins, L, Head, Z, [B1 | BinFS], [Z | BinTS], WsB); + {[], 0, _ML, _MSz} -> %% Optimization. + Sz1 = _MSz + ?OHDSZ, + re_hash_slots(Bins, L, Head, Z, [Z | BinFS], [B1 | BinTS], WsB); + {KL, KSz, ML, MSz} when KL =/= [], KSz > 0, ML =/= [], MSz > 0 -> + {Head1, FS1, Ws1} = + updated(Head, P1, Sz1, KSz, Pos1, KL, true, foo, bar), + {NewHead, [{Pos2,Bin2}], Ws2} = + updated(Head1, 0, 0, MSz, Pos2, ML, true, foo, bar), + NewBinFS = case FS1 of + [{Pos1,Bin1}] -> [Bin1 | BinFS]; + [] -> [B1 | BinFS] % cannot happen + end, + NewBinTS = [Bin2 | BinTS], + NewWsB = Ws2 ++ Ws1 ++ WsB, + re_hash_slots(Bins, L, NewHead, Z, NewBinFS, NewBinTS, NewWsB) + end; +re_hash_slots([], [], Head, _Z, BinFS, BinTS, WsB) -> + {Head, BinFS, BinTS, lists:reverse(WsB)}. + +re_hash_split([E | KeyObjs], Head, KL, KSz, ML, MSz) -> + {Key,Sz,Bin,_Item,_Objs} = E, + New = h(Key, Head#head.hash_bif) rem Head#head.m2, % h(key) rem (m * 2) + if + New >= Head#head.m -> + re_hash_split(KeyObjs, Head, KL, KSz, [Bin | ML], MSz + Sz); + true -> + re_hash_split(KeyObjs, Head, [Bin | KL], KSz + Sz, ML, MSz) + end; +re_hash_split([], _Head, KL, KSz, ML, MSz) -> + {lists:reverse(KL), KSz, lists:reverse(ML), MSz}. + +%% -> {NewHead, [LookedUpObject], pwrite_list()} | throw({NewHead, Error}) +write_cache(Head) -> + C = Head#head.cache, + case dets_utils:is_empty_cache(C) of + true -> {Head, [], []}; + false -> + {NewC, MaxInserts, PerKey} = dets_utils:reset_cache(C), + %% MaxNoInsertedKeys is an upper limit on the number of new keys. + MaxNoInsertedKeys = erlang:min(MaxInserts, length(PerKey)), + Head1 = Head#head{cache = NewC}, + case may_grow(Head1, MaxNoInsertedKeys, once) of + {Head2, ok} -> + eval_work_list(Head2, PerKey); + HeadError -> + throw(HeadError) + end + end. + +%% -> {NewHead, ok} | {NewHead, Error} +may_grow(Head, _N, _How) when Head#head.fixed =/= false -> + {Head, ok}; +may_grow(#head{access = read}=Head, _N, _How) -> + {Head, ok}; +may_grow(Head, _N, _How) when Head#head.next >= Head#head.max_no_slots -> + {Head, ok}; +may_grow(Head, N, How) -> + Extra = erlang:min(2*?SEGSZP, Head#head.no_keys + N - Head#head.next), + case catch may_grow1(Head, Extra, How) of + {error, _Reason} = Error -> % alloc may throw error + dets_utils:corrupt(Head, Error); + {NewHead, Reply} when is_record(Head, head) -> + {NewHead, Reply} + end. + +may_grow1(Head, Extra, many_times) when Extra > ?SEGSZP -> + Reply = grow(Head, 1, undefined), + self() ! ?DETS_CALL(self(), may_grow), + Reply; +may_grow1(Head, Extra, _How) -> + grow(Head, Extra, undefined). + +%% -> {Head, ok} | throw({Head, Error}) +grow(Head, Extra, _SegZero) when Extra =< 0 -> + {Head, ok}; +grow(Head, Extra, undefined) -> + grow(Head, Extra, seg_zero()); +grow(Head, _Extra, _SegZero) when Head#head.next >= Head#head.max_no_slots -> + {Head, ok}; +grow(Head, Extra, SegZero) -> + #head{n = N, next = Next, m = M} = Head, + SegNum = Next div ?SEGSZP, + {Head0, W, Ws1} = allocate_segment(Head, SegZero, SegNum), + %% re_hash/2 will overwrite the segment, but initialize it anyway... + {Head1, ok} = dets_utils:pwrite(Head0, [W | Ws1]), + %% If re_hash fails, segp_cache has been called, but it does not matter. + {Head2, ok} = re_hash(Head1, N), + NewHead = + if + N + ?SEGSZP =:= M -> + Head2#head{n = 0, next = Next + ?SEGSZP, m = 2 * M, m2 = 4 * M}; + true -> + Head2#head{n = N + ?SEGSZP, next = Next + ?SEGSZP} + end, + true = hash_invars(NewHead), + grow(NewHead, Extra - ?SEGSZP, SegZero). + +hash_invars(H) -> + hash_invars(H#head.n, H#head.m, H#head.next, H#head.min_no_slots, + H#head.max_no_slots). + +-define(M8(X), (((X) band (?SEGSZP - 1)) =:= 0)). +hash_invars(N, M, Next, Min, Max) -> + ?M8(N) and ?M8(M) and ?M8(Next) and ?M8(Min) and ?M8(Max) + and (0 =< N) and (N =< M) and (N =< 2*Next) and (M =< Next) + and (Next =< 2*M) and (0 =< Min) and (Min =< Next) and (Next =< Max) + and (Min =< M). + +seg_zero() -> + <<0:(4*?SEGSZ)/unit:8>>. + +find_object(Head, Object) -> + Key = element(Head#head.keypos, Object), + Slot = db_hash(Key, Head), + find_object(Head, Object, Slot). + +find_object(H, _Obj, Slot) when Slot >= H#head.next -> + false; +find_object(H, Obj, Slot) -> + case catch slot_objects(H, Slot) of + {ok, Pointer, Objects} -> + case lists:member(Obj, Objects) of + true -> {ok, Pointer}; + false -> false + end; + _ -> false + end. + +%% -> {ok, BucketP, Objects} | throw({Head, Error}) +slot_objects(Head, Slot) -> + SlotPos = slot_position(Slot), + MaxSize = maxobjsize(Head), + case dets_utils:ipread(Head, SlotPos, MaxSize) of + {ok, {BucketSz, Pointer, <>}} -> + case catch bin2objs(KeysObjs, Head#head.type, []) of + {'EXIT', _Error} -> + Bad = dets_utils:bad_object(slot_objects, + {SlotPos, KeysObjs}), + throw(dets_utils:corrupt_reason(Head, Bad)); + Objs when is_list(Objs) -> + {ok, Pointer, lists:reverse(Objs)} + end; + [] -> + {ok, 0, []}; + BadRead -> % eof or bad badly formed binary + Bad = dets_utils:bad_object(slot_objects, {SlotPos, BadRead}), + throw(dets_utils:corrupt_reason(Head, Bad)) + end. + +%%% +%%% Cache routines depending on the dets file format. +%%% + +%% -> {Head, [LookedUpObject], pwrite_list()} | throw({Head, Error}) +eval_work_list(Head, [{Key,[{_Seq,{lookup,Pid}}]}]) -> + SlotPos = slot_position(db_hash(Key, Head)), + MaxSize = maxobjsize(Head), + Objs = case dets_utils:ipread(Head, SlotPos, MaxSize) of + {ok, {_BucketSz, _Pointer, Bin}} -> + case catch per_key(Head, Bin) of + {'EXIT', _Error} -> + Bad = dets_utils:bad_object(eval_work_list, + {SlotPos, Bin}), + throw(dets_utils:corrupt_reason(Head, Bad)); + KeyObjs when is_list(KeyObjs) -> + case dets_utils:mkeysearch(Key, 1, KeyObjs) of + false -> + []; + {value, {Key,_KS,_KB,O,Os}} -> + case catch binobjs2terms(Os) of + {'EXIT', _Error} -> + Bad = dets_utils:bad_object + (eval_work_list, + {SlotPos, Bin, KeyObjs}), + throw(dets_utils:corrupt_reason + (Head, Bad)); + Terms when is_list(Terms) -> + get_objects([O | Terms]) + end + end + end; + [] -> + []; + BadRead -> % eof or bad badly formed binary + Bad = dets_utils:bad_object(eval_work_list, + {SlotPos, BadRead}), + throw(dets_utils:corrupt_reason(Head, Bad)) + end, + {Head, [{Pid,Objs}], []}; +eval_work_list(Head, PerKey) -> + SWLs = tag_with_slot(PerKey, Head, []), + P1 = dets_utils:family(SWLs), + {PerSlot, SlotPositions} = remove_slot_tag(P1, [], []), + {ok, Bins} = dets_utils:pread(SlotPositions, Head), + read_buckets(PerSlot, SlotPositions, Bins, Head, [], [], [], [], 0, 0, 0). + +tag_with_slot([{K,_} = WL | WLs], Head, L) -> + tag_with_slot(WLs, Head, [{db_hash(K, Head), WL} | L]); +tag_with_slot([], _Head, L) -> + L. + +remove_slot_tag([{S,SWLs} | SSWLs], Ls, SPs) -> + remove_slot_tag(SSWLs, [SWLs | Ls], [{slot_position(S), ?SEGOBJSZ} | SPs]); +remove_slot_tag([], Ls, SPs) -> + {Ls, SPs}. + +read_buckets([WLs | SPs], [{P1,_8} | Ss], [<<_Zero:32,P2:32>> | Bs], Head, + PWLs, ToRead, LU, Ws, NoObjs, NoKeys, SoFar) when P2 =:= 0 -> + {NewHead, NLU, NWs, No, KNo} = + eval_bucket_keys(WLs, P1, 0, 0, [], Head, Ws, LU), + NewNoObjs = No + NoObjs, + NewNoKeys = KNo + NoKeys, + read_buckets(SPs, Ss, Bs, NewHead, PWLs, ToRead, NLU, NWs, + NewNoObjs, NewNoKeys, SoFar); +read_buckets([WorkLists| SPs], [{P1,_8} | Ss], [<> | Bs], Head, + PWLs, ToRead, LU, Ws, NoObjs, NoKeys, SoFar) + when SoFar + Size < ?MAXCOLL; ToRead =:= [] -> + NewToRead = [{P2, Size} | ToRead], + NewPWLs = [{P2,P1,WorkLists} | PWLs], + NewSoFar = SoFar + Size, + read_buckets(SPs, Ss, Bs, Head, NewPWLs, NewToRead, LU, Ws, + NoObjs, NoKeys, NewSoFar); +read_buckets(SPs, Ss, Bs, Head, PWLs0, ToRead0, LU, Ws, NoObjs, NoKeys, SoFar) + when SoFar > 0 -> + %% It pays off to sort the positions. The seek times are reduced, + %% at least if the file blocks are reasonably contiguous, as is + %% often the case. + PWLs = lists:keysort(1, PWLs0), + ToRead = lists:keysort(1, ToRead0), + check_pread2_arg(ToRead, Head), + {ok, Bins} = dets_utils:pread(ToRead, Head), + case catch eval_buckets(Bins, PWLs, Head, LU, Ws, 0, 0) of + {ok, NewHead, NLU, [], 0, 0} -> + read_buckets(SPs, Ss, Bs, NewHead, [], [], NLU, [], + NoObjs, NoKeys, 0); + {ok, Head1, NLU, NWs, No, KNo} -> + NewNoObjs = NoObjs + No, + NewNoKeys = NoKeys + KNo, + %% It does not seem to reduce seek times to sort positions + %% when writing (maybe because it takes several calls to + %% write_cache/1 to fill the file system's buffer cache). + {NewHead, ok} = dets_utils:pwrite(Head1, lists:reverse(NWs)), + read_buckets(SPs, Ss, Bs, NewHead, [], [], NLU, [], + NewNoObjs, NewNoKeys, 0); + Error -> + Bad = dets_utils:bad_object(read_buckets, {Bins, Error}), + throw(dets_utils:corrupt_reason(Head, Bad)) + end; +read_buckets([], [], [], Head, [], [], LU, Ws, NoObjs, NoKeys, 0) -> + {NewHead, NWs} = update_no_keys(Head, Ws, NoObjs, NoKeys), + {NewHead, LU, lists:reverse(NWs)}. + +eval_buckets([Bin | Bins], [SP | SPs], Head, LU, Ws, NoObjs, NoKeys) -> + {Pos, P1, WLs} = SP, + KeyObjs = per_key(Head, Bin), + {NewHead, NLU, NWs, No, KNo} = + eval_bucket_keys(WLs, P1, Pos, byte_size(Bin), KeyObjs, Head,Ws,LU), + eval_buckets(Bins, SPs, NewHead, NLU, NWs, NoObjs + No, NoKeys + KNo); +eval_buckets([], [], Head, LU, Ws, NoObjs, NoKeys) -> + {ok, Head, LU, Ws, NoObjs, NoKeys}. + +eval_bucket_keys(WLs, SlotPos, Pos, OldSize, KeyObjs, Head, Ws, LU) -> + {NLU, Bins, BSize, No, KNo, Ch} = + eval_slot(WLs, KeyObjs, Head#head.type, LU, [], 0, 0, 0, false), + {NewHead, W1, W2} = + updated(Head, Pos, OldSize, BSize, SlotPos, Bins, Ch, No, KNo), + {NewHead, NLU, W2++W1++Ws, No, KNo}. + +updated(Head, Pos, OldSize, BSize, SlotPos, Bins, Ch, DeltaNoOs, DeltaNoKs) -> + BinsSize = BSize + ?OHDSZ, + if + Pos =:= 0, BSize =:= 0 -> + {Head, [], []}; + Pos =:= 0, BSize > 0 -> + {Head1, NewPos, FPos} = dets_utils:alloc(Head, adjsz(BinsSize)), + NewHead = one_bucket_added(Head1, FPos-1), + W1 = {NewPos, [<> | Bins]}, + W2 = {SlotPos, <>}, + {NewHead, [W2], [W1]}; + Pos =/= 0, BSize =:= 0 -> + {Head1, FPos} = dets_utils:free(Head, Pos, adjsz(OldSize)), + NewHead = one_bucket_removed(Head1, FPos-1), + W1 = {Pos+?STATUS_POS, <>}, + W2 = {SlotPos, <<0:32, 0:32>>}, + {NewHead, [W2], [W1]}; + Pos =/= 0, BSize > 0, Ch =:= false -> + {Head, [], []}; + Pos =/= 0, BSize > 0 -> + %% Doubtful. The scan function has to be careful since + %% partly scanned objects may be overwritten. + Overwrite0 = if + OldSize =:= BinsSize -> same; + true -> sz2pos(OldSize) =:= sz2pos(BinsSize) + end, + Overwrite = if + Head#head.fixed =/= false -> + %% Make sure that if the table is + %% fixed, nothing is overwritten, + %% unless the number of objects and + %% the number of keys remain the same. + %% This is used by bchunk, which + %% assumes that it traverses exactly + %% the same number of objects and keys + %% (and collections) as were present + %% when chunking started (the table + %% must have been fixed). + (Overwrite0 =/= false) and + (DeltaNoOs =:= 0) and (DeltaNoKs =:= 0); + true -> + Overwrite0 + end, + if + Overwrite =:= same -> + W1 = {Pos+?OHDSZ, Bins}, + {Head, [], [W1]}; + Overwrite -> + W1 = {Pos, [<> | Bins]}, + %% Pos is already there, but return {SlotPos, <8 bytes>}. + W2 = {SlotPos, <>}, + {Head, [W2], [W1]}; + true -> + {Head1, FPosF} = dets_utils:free(Head, Pos, adjsz(OldSize)), + {Head2, NewPos, FPosA} = + dets_utils:alloc(Head1, adjsz(BinsSize)), + Head3 = one_bucket_added(Head2, FPosA-1), + NewHead = one_bucket_removed(Head3, FPosF-1), + W0 = {NewPos, [<> | Bins]}, + W2 = {SlotPos, <>}, + W1 = if + Pos =/= NewPos -> + %% W0 first. + [W0, {Pos+?STATUS_POS, <>}]; + true -> + [W0] + end, + {NewHead, [W2], W1} + end + end. + +one_bucket_added(H, _Log2) when H#head.no_collections =:= undefined -> + H; +one_bucket_added(H, Log2) when H#head.maxobjsize >= Log2 -> + NewNoColls = orddict:update_counter(Log2, 1, H#head.no_collections), + H#head{no_collections = NewNoColls}; +one_bucket_added(H, Log2) -> + NewNoColls = orddict:update_counter(Log2, 1, H#head.no_collections), + H#head{no_collections = NewNoColls, maxobjsize = Log2}. + +one_bucket_removed(H, _FPos) when H#head.no_collections =:= undefined -> + H; +one_bucket_removed(H, Log2) when H#head.maxobjsize > Log2 -> + NewNoColls = orddict:update_counter(Log2, -1, H#head.no_collections), + H#head{no_collections = NewNoColls}; +one_bucket_removed(H, Log2) when H#head.maxobjsize =:= Log2 -> + NewNoColls = orddict:update_counter(Log2, -1, H#head.no_collections), + MaxObjSize = max_objsize(NewNoColls), + H#head{no_collections = NewNoColls, maxobjsize = MaxObjSize}. + +eval_slot([{Key,Commands} | WLs] = WLs0, [{K,KS,KB,O,Os} | KOs1]=KOs, + Type, LU, Ws, No, KNo,BSz, Ch) -> + case dets_utils:cmp(K, Key) of + 0 -> + Old = [O | binobjs2terms(Os)], + {NLU, NWs, Sz, No1, KNo1, NCh} = + eval_key(Key, Commands, Old, Type, KB, KS, LU, Ws, Ch), + eval_slot(WLs, KOs1, Type, NLU, NWs, No1 + No, + KNo1 + KNo, Sz + BSz, NCh); + -1 -> + eval_slot(WLs0, KOs1, Type, LU, [Ws | KB], No, + KNo, KS + BSz, Ch); + 1 -> + {NLU, NWs, Sz, No1, KNo1, NCh} = + eval_key(Key, Commands, [], Type, [], 0, LU, Ws, Ch), + eval_slot(WLs, KOs, Type, NLU, NWs, No1 + No, + KNo1 + KNo, Sz + BSz, NCh) + end; +eval_slot([{Key,Commands} | WLs], [], Type, LU, Ws, No, KNo,BSz, Ch) -> + {NLU, NWs, Sz, No1, KNo1, NCh} = + eval_key(Key, Commands, [], Type, [], 0, LU, Ws, Ch), + eval_slot(WLs, [], Type, NLU, NWs, No1 + No, KNo1 + KNo, Sz + BSz, NCh); +eval_slot([], [{_Key,Size,KeyBin,_,_} | KOs], Type, LU, Ws, No, KNo,BSz, Ch) -> + eval_slot([], KOs, Type, LU, [Ws | KeyBin], No, KNo, Size + BSz, Ch); +eval_slot([], [], _Type, LU, Ws, No, KNo, BSz, Ch) -> + {LU, Ws, BSz, No, KNo, Ch}. + +eval_key(_K, [{_Seq,{lookup,Pid}}], [], _Type, _KeyBin, _KeySz, LU, Ws, Ch) -> + NLU = [{Pid, []} | LU], + {NLU, Ws, 0, 0, 0, Ch}; +eval_key(_K, [{_Seq,{lookup,Pid}}], Old0, _Type, KeyBin, KeySz, LU, Ws, Ch) -> + Old = lists:keysort(2, Old0), % sort on sequence number + Objs = get_objects(Old), + NLU = [{Pid, Objs} | LU], + {NLU, [Ws | KeyBin], KeySz, 0, 0, Ch}; +eval_key(K, Comms, Orig, Type, KeyBin, KeySz, LU, Ws, Ch) -> + Old = dets_utils:msort(Orig), + case eval_key1(Comms, [], Old, Type, K, LU, Ws, 0, Orig) of + {ok, NLU} when Old =:= [] -> + {NLU, Ws, 0, 0, 0, Ch}; + {ok, NLU} -> + {NLU, [Ws | KeyBin], KeySz, 0, 0, Ch}; + {NLU, NWs, NSz, No} when Old =:= [], NSz > 0 -> + {NLU, NWs, NSz, No, 1, true}; + {NLU, NWs, NSz, No} when Old =/= [], NSz =:= 0 -> + {NLU, NWs, NSz, No, -1, true}; + {NLU, NWs, NSz, No} -> + {NLU, NWs, NSz, No, 0, true} + end. + +%% First find 'delete_key' and 'lookup' commands, and handle the 'set' type. +eval_key1([{_Seq,{insert,Term}} | L], Cs, [{Term,_,_}] = Old, Type=set, K, + LU, Ws, No, Orig) -> + eval_key1(L, Cs, Old, Type, K, LU, Ws, No, Orig); +eval_key1([{Seq,{insert,Term}} | L], Cs, Old, Type=set, K, LU, Ws, No, Orig) + -> + NNo = No + 1 - length(Old), + eval_key1(L, Cs, [{Term,Seq,insert}], Type, K, LU, Ws, NNo, Orig); +eval_key1([{_Seq,{lookup,Pid}} | L], Cs, Old, Type, Key, LU, Ws, No, Orig) -> + {ok, New0, NewNo} = eval_comms(Cs, Old, Type, No), + New = lists:keysort(2, New0), % sort on sequence number + Objs = get_objects(New), + NLU = [{Pid, Objs} | LU], + if + L =:= [] -> + eval_end(New, NLU, Type, Ws, NewNo, Orig); + true -> + NewOld = dets_utils:msort(New), + eval_key1(L, [], NewOld, Type, Key, NLU, Ws, NewNo, Orig) + end; +eval_key1([{_Seq,delete_key} | L], _Cs, Old, Type, K, LU, Ws, No, Orig) -> + NewNo = No - length(Old), + eval_key1(L, [], [], Type, K, LU, Ws, NewNo, Orig); +eval_key1([{_Seq,{delete_object,Term}} | L], Cs, [{Term,_,_}], Type=set, K, + LU, Ws, No, Orig) -> + eval_key1(L, Cs, [], Type, K, LU, Ws, No-1, Orig); +eval_key1([{_Seq,{delete_object,_T}}| L], Cs, Old1, Type=set, K, LU, + Ws, No, Orig) -> + eval_key1(L, Cs, Old1, Type, K, LU, Ws, No, Orig); +eval_key1([{Seq,{Comm,Term}} | L], Cs, Old, Type, K, LU, Ws, No, Orig) + when Type =/= set -> + eval_key1(L, [{Term,Seq,Comm} | Cs], Old, Type, K, LU, Ws, No, Orig); +eval_key1([], Cs, Old, Type=set, _Key, LU, Ws, No, Orig) -> + [] = Cs, + eval_end(Old, LU, Type, Ws, No, Orig); +eval_key1([], Cs, Old, Type, _Key, LU, Ws, No, Orig) -> + {ok, New, NewNo} = eval_comms(Cs, Old, Type, No), + eval_end(New, LU, Type, Ws, NewNo, Orig). + +eval_comms([], L, _Type=set, No) -> + {ok, L, No}; +eval_comms(Cs, Old, Type, No) -> + Commands = dets_utils:msort(Cs), + case Type of + bag -> eval_bag(Commands, Old, [], No); + duplicate_bag -> eval_dupbag(Commands, Old, [], No) + end. + +eval_end(New0, LU, Type, Ws, NewNo, Orig) -> + New = lists:keysort(2, New0), % sort on sequence number + NoChange = if + length(New) =/= length(Orig) -> false; + true -> + same_terms(Orig, New) + end, + if + NoChange -> + %% The key's objects have not changed. + {ok, LU}; + New =:= [] -> + {LU, Ws, 0, NewNo}; + true -> + {Ws1, Sz} = make_bins(New, [], 0), + if + Type =:= set -> + {LU, [Ws | Ws1], Sz, NewNo}; + true -> + NSz = Sz + 4, + {LU, [Ws, <> | Ws1], NSz, NewNo} + end + end. + +same_terms([E1 | L1], [E2 | L2]) when element(1, E1) =:= element(1, E2) -> + same_terms(L1, L2); +same_terms([], []) -> + true; +same_terms(_L1, _L2) -> + false. + +make_bins([{_Term,_Seq,B} | L], W, Sz) when is_binary(B) -> + make_bins(L, [W | B], Sz + byte_size(B)); +make_bins([{Term,_Seq,insert} | L], W, Sz) -> + B = term_to_binary(Term), + BSize = byte_size(B) + 4, + make_bins(L, [W, [<> | B]], Sz + BSize); +make_bins([], W, Sz) -> + {W, Sz}. + +get_objects([{T,_S,_BT} | L]) -> + [T | get_objects(L)]; +get_objects([]) -> + []. + +eval_bag([{Term1,_S1,Op}=N | L]=L0, [{Term2,_,_}=O | Old]=Old0, New, No) -> + case {Op, dets_utils:cmp(Term1, Term2)} of + {delete_object, -1} -> + eval_bag(L, Old0, New, No); + {insert, -1} -> + bag_object(L, Old0, New, No, [N], Term1); + {delete_object, 0} -> + bag_object(L, Old, New, No-1, [], Term1); + {insert, 0} -> + bag_object(L, Old, New, No-1, [N], Term1); + {_, 1} -> + eval_bag(L0, Old, [O | New], No) + end; +eval_bag([{_Term1,_Seq1,delete_object} | L], []=Old, New, No) -> + eval_bag(L, Old, New, No); +eval_bag([{Term,_Seq1,insert} = N | L], []=Old, New, No) -> + bag_object(L, Old, New, No, [N], Term); +eval_bag([]=L, [O | Old], New, No) -> + eval_bag(L, Old, [O | New], No); +eval_bag([], [], New, No) -> + {ok, New, No}. + +bag_object([{Term,_,insert} = N | L], Old, New, No, _N, Term) -> + bag_object(L, Old, New, No, [N], Term); +bag_object([{Term,_,delete_object} | L], Old, New, No, _N, Term) -> + bag_object(L, Old, New, No, [], Term); +bag_object(L, Old, New, No, [], _Term) -> + eval_bag(L, Old, New, No); +bag_object(L, Old, New, No, [N], _Term) -> + eval_bag(L, Old, [N | New], No+1). + +eval_dupbag([{Term1,_S1,Op}=N | L]=L0, [{Term2,_,_}=O | Old]=Old0, New, No) -> + case {Op, dets_utils:cmp(Term1, Term2)} of + {delete_object, -1} -> + eval_dupbag(L, Old0, New, No); + {insert, -1} -> + dup_object(L, Old0, New, No+1, Term1, [N]); + {_, 0} -> + old_dup_object(L0, Old, New, No, Term1, [O]); + {_, 1} -> + eval_dupbag(L0, Old, [O | New], No) + end; +eval_dupbag([{_Term1,_Seq1,delete_object} | L], []=Old, New, No) -> + eval_dupbag(L, Old, New, No); +eval_dupbag([{Term,_Seq1,insert} = N | L], []=Old, New, No) -> + dup_object(L, Old, New, No+1, Term, [N]); +eval_dupbag([]=L, [O | Old], New, No) -> + eval_dupbag(L, Old, [O | New], No); +eval_dupbag([], [], New, No) -> + {ok, New, No}. + +old_dup_object(L, [{Term,_,_} = Obj | Old], New, No, Term, N) -> + old_dup_object(L, Old, New, No, Term, [Obj | N]); +old_dup_object(L, Old, New, No, Term, N) -> + dup_object(L, Old, New, No, Term, N). + +dup_object([{Term,_,insert} = Obj | L], Old, New, No, Term, Q) -> + dup_object(L, Old, New, No+1, Term, [Obj | Q]); +dup_object([{Term,_Seq,delete_object} | L], Old, New, No, Term, Q) -> + %% All objects are deleted. + NewNo = No - length(Q), + dup_object(L, Old, New, NewNo, Term, []); +dup_object(L, Old, New, No, _Term, Q) -> + eval_dupbag(L, Old, Q ++ New, No). + +%% Update no_keys on the file too, if the number of segments that +%% dets:fsck/6 uses for estimate has changed. +update_no_keys(Head, Ws, 0, 0) -> {Head, Ws}; +update_no_keys(Head, Ws, DeltaObjects, DeltaKeys) -> + NoKeys = Head#head.no_keys, + NewNoKeys = NoKeys + DeltaKeys, + NewNoObject = Head#head.no_objects + DeltaObjects, + NewHead = Head#head{no_objects = NewNoObject, no_keys = NewNoKeys}, + NWs = + if + NewNoKeys > NewHead#head.max_no_slots -> + Ws; + NoKeys div ?SEGSZP =:= NewNoKeys div ?SEGSZP -> + Ws; + true -> + [{0, file_header(NewHead, 0, ?NOT_PROPERLY_CLOSED)} | Ws] + end, + {NewHead, NWs}. + +slot_position(S) -> + SegNo = ?SLOT2SEG(S), % S div ?SEGSZP + PartPos = ?SEGARRADDR(?SEG2SEGARRPART(SegNo)), % SegNo div ?SEGPARTSZ + Part = get_arrpart(PartPos), + Pos = ?SEGPARTADDR(Part, SegNo), + get_segp(Pos) + (?SEGOBJSZ * ?REM2(S, ?SEGSZP)). + +check_pread2_arg([{_Pos,Sz}], Head) when Sz > ?MAXCOLL -> + case check_pread_arg(Sz, Head) of + true -> + ok; + false -> + Bad = dets_utils:bad_object(check_pread2_arg, Sz), + throw(dets_utils:corrupt_reason(Head, Bad)) + end; +check_pread2_arg(_ToRead, _Head) -> + ok. + +check_pread_arg(Sz, Head) when Sz > ?MAXCOLL -> + maxobjsize(Head) >= Sz; +check_pread_arg(_Sz, _Head) -> + true. + +%% Inlined. +segp_cache(Pos, Segment) -> + put(Pos, Segment). + +%% Inlined. +get_segp(Pos) -> + get(Pos). + +arrpart_cache(Pos, ArrPart) -> + put(Pos, ArrPart). + +%% Inlined. +get_arrpart(Pos) -> + get(Pos). + +sz2pos(N) -> + 1 + dets_utils:log2(N). + +%% Inlined. Compensates for the bug in dets_utils:sz2pos/1. +adjsz(N) -> + N-1. + +%% Inlined. +maxobjsize(Head) when Head#head.maxobjsize =:= undefined -> + ?POW(32); +maxobjsize(Head) -> + ?POW(Head#head.maxobjsize). + +scan_objs(Head, Bin, From, To, L, Ts, R, Type) -> + case catch scan_skip(Bin, From, To, L, Ts, R, Type, 0) of + {'EXIT', _Reason} -> + bad_object; + Reply = {more, _From1, _To, _L, _Ts, _R, Size} when Size > ?MAXCOLL -> + case check_pread_arg(Size, Head) of + true -> Reply; + false -> bad_object + end; + Reply -> + Reply + end. + +scan_skip(Bin, From, To, L, Ts, R, Type, Skip) -> + From1 = From + Skip, + case Bin of + _ when From1 >= To -> + if + From1 > To; L =:= <<>> -> + {more, From1, To, L, Ts, R, 0}; + true -> + <> = L, + Skip1 = From2 - From, + scan_skip(Bin, From, To1, L1, Ts, R, Type, Skip1) + end; + <<_:Skip/binary, _Size:32, St:32, _Sz:32, KO/binary>> + when St =/= ?ACTIVE, St =/= ?FREE -> + %% Neither ?ACTIVE nor ?FREE is a multiple of ?BUMP and + %% thus cannot be found in segments or segment array + %% parts. + scan_skip(KO, From1+12, To, L, Ts, R, Type, ?ACTUAL_SEG_SIZE-12); + <<_:Skip/binary, Size:32, _St:32, Sz:32, KO/binary>> + when Size-12 =< byte_size(KO) -> + %% St = ?FREE means that the object was deleted after + %% scanning started + bin2bins(KO, From1+12, To, L, Ts, R, Type, Size, Sz); + <<_:Skip/binary, Size:32, _St:32, _Sz:32, _KO/binary>> -> + {more, From1, To, L, Ts, R, Size}; + _ when Skip >= 0 -> + {more, From1, To, L, Ts, R, 0} + end. + +%% Appends objects in reversed order. All objects of the slot are +%% extracted. Note that binary_to_term/1 ignores garbage at the end. +bin2bins(Bin, From, To, L, Ts, R, Type=set, Size, ObjSz0) -> + ObjsSz1 = Size - ObjSz0, + if + ObjsSz1 =:= ?OHDSZ -> + slot_end(Bin, From, To, L, [Bin | Ts], R, Type, Size, 1); + true -> + ObjSz = ObjSz0-4, + <<_:ObjSz/binary, NObjSz:32, T/binary>> = Bin, + bins_set(T, From, To, L, [Bin | Ts], R, Type, Size, 2, + NObjSz, ObjsSz1-NObjSz, Bin) + end; +bin2bins(<> = KO, From, To, L, Ts, R, Type, Size, Sz) -> + bins_bag(Bin, From, To, L, Ts, R, Type, Size, 1, + Sz-ObjSz-4, ObjSz-4, Size-Sz, KO). + +bins_set(Bin, From, To, L, Ts, R, Type, Size, NoObjs, _ObjSz0, ?OHDSZ, KO) -> + slot_end(KO, From, To, L, [Bin | Ts], R, Type, Size, NoObjs); +bins_set(Bin, From, To, L, Ts, R, Type, Size, NoObjs, ObjSz0, ObjsSz, KO) -> + ObjSz = ObjSz0 - 4, + <<_:ObjSz/binary, NObjSz:32, T/binary>> = Bin, + bins_set(T, From, To, L, [Bin | Ts], R, Type, Size, NoObjs + 1, + NObjSz, ObjsSz-NObjSz, KO). + +bins_bag(Bin, From, To, L, Ts, R, Type, Size, NoObjs, Sz, ObjSz, ObjsSz, KO) + when Sz > 0 -> + <<_:ObjSz/binary, NObjSz:32, T/binary>> = Bin, + bins_bag(T, From, To, L, [Bin | Ts], R, Type, Size, NoObjs + 1, + Sz-NObjSz, NObjSz-4, ObjsSz, KO); +bins_bag(Bin, From, To, L, Ts, R, Type, Size, NoObjs, _Z, _ObjSz, ?OHDSZ, KO) -> + slot_end(KO, From, To, L, [Bin | Ts], R, Type, Size, NoObjs); +bins_bag(Bin, From, To, L, Ts, R, Type, Size, NoObjs, _Z, ObjSz, ObjsSz, KO) -> + <<_:ObjSz/binary, Sz:32, NObjSz:32, T/binary>> = Bin, + bins_bag(T, From, To, L, [Bin | Ts], R, Type, Size, NoObjs + 1, + Sz-NObjSz-4, NObjSz-4, ObjsSz-Sz, KO). + +slot_end(KO, From, To, L, Ts, R, Type, Size, NoObjs) -> + Skip = ?POW(dets_utils:log2(Size)) - 12, % expensive... + if + R >= 0 -> + scan_skip(KO, From, To, L, Ts, R+Size, Type, Skip); + true -> + %% Should check this at the end of every key. + case R + NoObjs of + R1 when R1 >= -1 -> + From1 = From + Skip, + Bin1 = case KO of + <<_:Skip/binary, B/binary>> -> B; + _ -> <<>> + end, + {stop, Bin1, From1, To, L, Ts}; + R1 -> + scan_skip(KO, From, To, L, Ts, R1, Type, Skip) + end + end. + +%%%%%%%%%%%%%%%%% DEBUG functions %%%%%%%%%%%%%%%% + +file_info(FH) -> + #fileheader{closed_properly = CP, keypos = Kp, + m = M, next = Next, n = N, version = Version, + type = Type, no_objects = NoObjects, no_keys = NoKeys} + = FH, + if + CP =:= 0 -> + {error, not_closed}; + FH#fileheader.cookie =/= ?MAGIC -> + {error, not_a_dets_file}; + FH#fileheader.version =/= ?FILE_FORMAT_VERSION -> + {error, bad_version}; + true -> + {ok, [{closed_properly,CP},{keypos,Kp},{m, M},{n,N}, + {next,Next},{no_objects,NoObjects},{no_keys,NoKeys}, + {type,Type},{version,Version}]} + end. + +v_segments(#head{}=H) -> + v_parts(H, 0, 0). + +v_parts(_H, ?SEGARRSZ, _SegNo) -> + done; +v_parts(H, PartNo, SegNo) -> + Fd = H#head.fptr, + PartPos = dets_utils:read_4(Fd, ?SEGARRADDR(PartNo)), + if + PartPos =:= 0 -> + done; + true -> + PartBin = dets_utils:pread_n(Fd, PartPos, ?SEGPARTSZ*4), + v_segments(H, PartBin, PartNo+1, SegNo) + end. + +v_segments(H, <<>>, PartNo, SegNo) -> + v_parts(H, PartNo, SegNo); +v_segments(_H, <<0:32,_/binary>>, _PartNo, _SegNo) -> + done; +v_segments(H, <>, PartNo, SegNo) -> + io:format("<~w>SEGMENT ~w~n", [Seg, SegNo]), + v_segment(H, SegNo, Seg, 0), + v_segments(H, T, PartNo, SegNo+1). + +v_segment(_H, _, _SegPos, ?SEGSZP) -> + done; +v_segment(H, SegNo, SegPos, SegSlot) -> + Slot = SegSlot + (SegNo * ?SEGSZP), + BucketP = SegPos + (4 * ?SZOBJP * SegSlot), + case catch read_bucket(H, BucketP, H#head.type) of + {'EXIT', Reason} -> + dets_utils:vformat("** dets: Corrupt or truncated dets file~n", + []), + io:format("~nERROR ~p~n", [Reason]); + [] -> %% don't print empty buckets + true; + {Size, CollP, Objects} -> + io:format(" <~w>~w: <~w:~p>~w~n", + [BucketP, Slot, CollP, Size, Objects]) + end, + v_segment(H, SegNo, SegPos, SegSlot+1). + +%% -> [] | {Pointer, [object()]} | throw(EXIT) +read_bucket(Head, Position, Type) -> + MaxSize = maxobjsize(Head), + case dets_utils:ipread(Head, Position, MaxSize) of + {ok, {Size, Pointer, <>}} -> + Objs = bin2objs(KeysObjs, Type, []), + {Size, Pointer, lists:reverse(Objs)}; + [] -> + [] + end. + +-define(SEQSTART, -(1 bsl 26)). + +%% -> [{Key,SizeOfWholeKey,WholeKeyBin,FirstObject,OtherObjects}] |throw(EXIT) +%% FirstObject = {Term, Seq, Binary} +%% Seq < 0 (and ascending). +per_key(Head, <> = B) -> + true = (byte_size(B) =:= BinSize), + if + Head#head.type =:= set -> + per_set_key(Bin, Head#head.keypos, []); + true -> + per_bag_key(Bin, Head#head.keypos, []) + end. + +per_set_key(<> = B, KeyPos, L) -> + <> = B, + Term = binary_to_term(T), + Key = element(KeyPos, Term), + Item = {Term, ?SEQSTART, KeyBin}, + per_set_key(R, KeyPos, [{Key,Size,KeyBin,Item,[]} | L]); +per_set_key(<<>>, KeyPos, L) when is_integer(KeyPos) -> + lists:reverse(L). + +per_bag_key(<> = B, KeyPos, L) -> + <> = B, + ObjSz1 = ObjSz - 4, + Size1 = Size - ObjSz - 4, + <<_:ObjSz1/binary, KeyObjs:Size1/binary, _/binary>> = T, + <<_Size:32, Bin:ObjSz/binary, _/binary>> = B, + Term = binary_to_term(T), + Key = element(KeyPos, Term), + Item = {Term, ?SEQSTART, Bin}, + per_bag_key(R, KeyPos, [{Key,Size,KeyBin,Item,KeyObjs} | L]); +per_bag_key(<<>>, KeyPos, L) when is_integer(KeyPos) -> + lists:reverse(L). + + +binobjs2terms(<> = B) -> + binobjs2terms(B, T, ObjSz, byte_size(B)-ObjSz, ?SEQSTART+1, []); +binobjs2terms([] = B) -> + B; +binobjs2terms(<<>>) -> + []. + +binobjs2terms(Bin, Obj, _ObjSz, _Size=0, N, L) -> + lists:reverse(L, [{binary_to_term(Obj), N, Bin}]); +binobjs2terms(Bin, Bin1, ObjSz, Size, N, L) -> + <> = Bin, + <> = T, + Item = {binary_to_term(Bin1), N, B}, + binobjs2terms(T, T1, NObjSz, Size-NObjSz, N+1, [Item | L]). + + +%% Appends objects in reversed order. +bin2objs(KeysObjs, set, Ts) -> + <> = KeysObjs, + bin2objs(T, ObjSz-4, byte_size(KeysObjs)-ObjSz, Ts); +bin2objs(KeysObjs, _Type, Ts) -> + bin2objs2(KeysObjs, Ts). + +bin2objs2(<>, Ts) -> + bin2objs(T, ObjSz-4, Size-ObjSz-4, Ts); +bin2objs2(<<>>, Ts) -> + Ts. + +bin2objs(Bin, ObjSz, _Size=0, Ts) -> + <<_:ObjSz/binary, T/binary>> = Bin, + bin2objs2(T, [binary_to_term(Bin) | Ts]); +bin2objs(Bin, ObjSz, Size, Ts) -> + <<_:ObjSz/binary, NObjSz:32, T/binary>> = Bin, + bin2objs(T, NObjSz-4, Size-NObjSz, [binary_to_term(Bin) | Ts]). + + +bin2keybins(KeysObjs, Head) when Head#head.type =:= set -> + <> = KeysObjs, + bin2keybins(T, Head#head.keypos, ObjSz-4, byte_size(KeysObjs)-ObjSz,[]); +bin2keybins(KeysObjs, Head) -> + bin2keybins2(KeysObjs, Head#head.keypos, []). + +bin2keybins2(<>, Kp, L) -> + bin2keybins(T, Kp, ObjSz-4, Size-ObjSz-4, L); +bin2keybins2(<<>>, Kp, L) when is_integer(Kp) -> + lists:reverse(L). + +bin2keybins(Bin, Kp, ObjSz, _Size=0, L) -> + <> = Bin, + Term = binary_to_term(Obj), + bin2keybins2(T, Kp, [{element(Kp, Term),Obj} | L]); +bin2keybins(Bin, Kp, ObjSz, Size, L) -> + <> = Bin, + Term = binary_to_term(Obj), + bin2keybins(T, Kp, NObjSz-4, Size-NObjSz, [{element(Kp,Term),Obj} | L]). diff --git a/lib/stdlib/src/dict.erl b/lib/stdlib/src/dict.erl new file mode 100644 index 0000000000..7e51141098 --- /dev/null +++ b/lib/stdlib/src/dict.erl @@ -0,0 +1,547 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2000-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% We use the dynamic hashing techniques by Per-Åke Larsson as +%% described in "The Design and Implementation of Dynamic Hashing for +%% Sets and Tables in Icon" by Griswold and Townsend. Much of the +%% terminology comes from that paper as well. +%% +%% The segments are all of the same fixed size and we just keep +%% increasing the size of the top tuple as the table grows. At the +%% end of the segments tuple we keep an empty segment which we use +%% when we expand the segments. The segments are expanded by doubling +%% every time n reaches maxn instead of increasing the tuple one +%% element at a time. It is easier and does not seem detrimental to +%% speed. The same applies when contracting the segments. +%% +%% Note that as the order of the keys is undefined we may freely +%% reorder keys within a bucket. + +-module(dict). + +%% Standard interface. +-export([new/0,is_key/2,to_list/1,from_list/1,size/1]). +-export([fetch/2,find/2,fetch_keys/1,erase/2]). +-export([store/3,append/3,append_list/3,update/3,update/4,update_counter/3]). +-export([fold/3,map/2,filter/2,merge/3]). + +%% Low-level interface. +%%-export([get_slot/2,get_bucket/2,on_bucket/3,fold_dict/3, +%% maybe_expand/2,maybe_contract/2]). + +%% Note: mk_seg/1 must be changed too if seg_size is changed. +-define(seg_size, 16). +-define(max_seg, 32). +-define(expand_load, 5). +-define(contract_load, 3). +-define(exp_size, (?seg_size * ?expand_load)). +-define(con_size, (?seg_size * ?contract_load)). + +%% Define a hashtable. The default values are the standard ones. +-record(dict, + {size=0 :: non_neg_integer(), % Number of elements + n=?seg_size :: non_neg_integer(), % Number of active slots + maxn=?seg_size :: non_neg_integer(), % Maximum slots + bso=?seg_size div 2 :: non_neg_integer(), % Buddy slot offset + exp_size=?exp_size :: non_neg_integer(), % Size to expand at + con_size=?con_size :: non_neg_integer(), % Size to contract at + empty :: tuple(), % Empty segment + segs :: tuple() % Segments + }). +%% A declaration equivalent to the following one is hard-coded in erl_types. +%% That declaration contains hard-coded information about the #dict{} +%% structure and the types of its fields. So, please make sure that any +%% changes to its structure are also propagated to erl_types.erl. +%% +%% -opaque dict() :: #dict{}. + +-define(kv(K,V), [K|V]). % Key-Value pair format +%%-define(kv(K,V), {K,V}). % Key-Value pair format + +-spec new() -> dict(). + +new() -> + Empty = mk_seg(?seg_size), + #dict{empty=Empty,segs={Empty}}. + +-spec is_key(term(), dict()) -> boolean(). + +is_key(Key, D) -> + Slot = get_slot(D, Key), + Bkt = get_bucket(D, Slot), + find_key(Key, Bkt). + +find_key(K, [?kv(K,_Val)|_]) -> true; +find_key(K, [_|Bkt]) -> find_key(K, Bkt); +find_key(_, []) -> false. + +-spec to_list(dict()) -> [{term(), term()}]. + +to_list(D) -> + fold(fun (Key, Val, List) -> [{Key,Val}|List] end, [], D). + +-spec from_list([{term(), term()}]) -> dict(). + +from_list(L) -> + lists:foldl(fun ({K,V}, D) -> store(K, V, D) end, new(), L). + +-spec size(dict()) -> non_neg_integer(). + +size(#dict{size=N}) when is_integer(N), N >= 0 -> N. + +-spec fetch(term(), dict()) -> term(). + +fetch(Key, D) -> + Slot = get_slot(D, Key), + Bkt = get_bucket(D, Slot), + try fetch_val(Key, Bkt) + catch + badarg -> erlang:error(badarg, [Key, D]) + end. + +fetch_val(K, [?kv(K,Val)|_]) -> Val; +fetch_val(K, [_|Bkt]) -> fetch_val(K, Bkt); +fetch_val(_, []) -> throw(badarg). + +-spec find(term(), dict()) -> {'ok', term()} | 'error'. + +find(Key, D) -> + Slot = get_slot(D, Key), + Bkt = get_bucket(D, Slot), + find_val(Key, Bkt). + +find_val(K, [?kv(K,Val)|_]) -> {ok,Val}; +find_val(K, [_|Bkt]) -> find_val(K, Bkt); +find_val(_, []) -> error. + +-spec fetch_keys(dict()) -> [term()]. + +fetch_keys(D) -> + fold(fun (Key, _Val, Keys) -> [Key|Keys] end, [], D). + +-spec erase(term(), dict()) -> dict(). +%% Erase all elements with key Key. + +erase(Key, D0) -> + Slot = get_slot(D0, Key), + {D1,Dc} = on_bucket(fun (B0) -> erase_key(Key, B0) end, + D0, Slot), + maybe_contract(D1, Dc). + +erase_key(Key, [?kv(Key,_Val)|Bkt]) -> {Bkt,1}; +erase_key(Key, [E|Bkt0]) -> + {Bkt1,Dc} = erase_key(Key, Bkt0), + {[E|Bkt1],Dc}; +erase_key(_, []) -> {[],0}. + +-spec store(term(), term(), dict()) -> dict(). + +store(Key, Val, D0) -> + Slot = get_slot(D0, Key), + {D1,Ic} = on_bucket(fun (B0) -> store_bkt_val(Key, Val, B0) end, + D0, Slot), + maybe_expand(D1, Ic). + +%% store_bkt_val(Key, Val, Bucket) -> {NewBucket,PutCount}. + +store_bkt_val(Key, New, [?kv(Key,_Old)|Bkt]) -> {[?kv(Key,New)|Bkt],0}; +store_bkt_val(Key, New, [Other|Bkt0]) -> + {Bkt1,Ic} = store_bkt_val(Key, New, Bkt0), + {[Other|Bkt1],Ic}; +store_bkt_val(Key, New, []) -> {[?kv(Key,New)],1}. + +-spec append(term(), term(), dict()) -> dict(). + +append(Key, Val, D0) -> + Slot = get_slot(D0, Key), + {D1,Ic} = on_bucket(fun (B0) -> append_bkt(Key, Val, B0) end, + D0, Slot), + maybe_expand(D1, Ic). + +%% append_bkt(Key, Val, Bucket) -> {NewBucket,PutCount}. + +append_bkt(Key, Val, [?kv(Key,Bag)|Bkt]) -> {[?kv(Key,Bag ++ [Val])|Bkt],0}; +append_bkt(Key, Val, [Other|Bkt0]) -> + {Bkt1,Ic} = append_bkt(Key, Val, Bkt0), + {[Other|Bkt1],Ic}; +append_bkt(Key, Val, []) -> {[?kv(Key,[Val])],1}. + +-spec append_list(term(), [term()], dict()) -> dict(). + +append_list(Key, L, D0) -> + Slot = get_slot(D0, Key), + {D1,Ic} = on_bucket(fun (B0) -> app_list_bkt(Key, L, B0) end, + D0, Slot), + maybe_expand(D1, Ic). + +%% app_list_bkt(Key, L, Bucket) -> {NewBucket,PutCount}. + +app_list_bkt(Key, L, [?kv(Key,Bag)|Bkt]) -> {[?kv(Key,Bag ++ L)|Bkt],0}; +app_list_bkt(Key, L, [Other|Bkt0]) -> + {Bkt1,Ic} = app_list_bkt(Key, L, Bkt0), + {[Other|Bkt1],Ic}; +app_list_bkt(Key, L, []) -> {[?kv(Key,L)],1}. + +%% %% first_key(Table) -> {ok,Key} | error. +%% %% Find the "first" key in a Table. + +%% first_key(T) -> +%% case next_bucket(T, 1) of +%% [?kv(K,Val)|Bkt] -> {ok,K}; +%% [] -> error %No elements +%% end. + +%% next_bucket(T, Slot) when Slot > T#dict.n -> []; +%% next_bucket(T, Slot) -> +%% case get_bucket(T, Slot) of +%% [] -> next_bucket(T, Slot+1); %Empty bucket +%% B -> B +%% end. + +%% %% next_key(Table, Key) -> {ok,NextKey} | error. + +%% next_key(T, Key) -> +%% Slot = get_slot(T, Key), +%% B = get_bucket(T, Slot), +%% %% Find a bucket with something in it. +%% Bkt = case bucket_after_key(Key, B) of +%% no_key -> exit(badarg); +%% [] -> next_bucket(T, Slot+1); +%% Rest -> Rest +%% end, +%% case Bkt of +%% [?kv(Next,Val)|_] -> {ok,Next}; +%% [] -> error %We have reached the end! +%% end. + +%% bucket_after_key(Key, [?kv(Key,Val)|Bkt]) -> Bkt; +%% bucket_after_key(Key, [Other|Bkt]) -> +%% bucket_after_key(Key, Bkt); +%% bucket_after_key(Key, []) -> no_key. %Key not found! + +%% %% on_key(Fun, Key, Dictionary) -> Dictionary. + +%% on_key(F, Key, D0) -> +%% Slot = get_slot(D0, Key), +%% {D1,Dc} = on_bucket(fun (B0) -> on_key_bkt(Key, F, B0) end, +%% D0, Slot), +%% maybe_contract(D1, Dc). + +%% on_key_bkt(Key, F, [?kv(Key,Val)|Bkt]) -> +%% case F(Val) of +%% {ok,New} -> {[?kv(Key,New)|Bkt],0}; +%% erase -> {Bkt,1} +%% end; +%% on_key_bkt(Key, F, [Other|Bkt0]) -> +%% {Bkt1,Dc} = on_key_bkt(Key, F, Bkt0), +%% {[Other|Bkt1],Dc}. + +-spec update(term(), fun((term()) -> term()), dict()) -> dict(). + +update(Key, F, D0) -> + Slot = get_slot(D0, Key), + try on_bucket(fun (B0) -> update_bkt(Key, F, B0) end, D0, Slot) of + {D1,_Uv} -> D1 + catch + badarg -> erlang:error(badarg, [Key, F, D0]) + end. + +update_bkt(Key, F, [?kv(Key,Val)|Bkt]) -> + Upd = F(Val), + {[?kv(Key,Upd)|Bkt],Upd}; +update_bkt(Key, F, [Other|Bkt0]) -> + {Bkt1,Upd} = update_bkt(Key, F, Bkt0), + {[Other|Bkt1],Upd}; +update_bkt(_Key, _F, []) -> + throw(badarg). + +-spec update(term(), fun((term()) -> term()), term(), dict()) -> dict(). + +update(Key, F, Init, D0) -> + Slot = get_slot(D0, Key), + {D1,Ic} = on_bucket(fun (B0) -> update_bkt(Key, F, Init, B0) end, + D0, Slot), + maybe_expand(D1, Ic). + +update_bkt(Key, F, _, [?kv(Key,Val)|Bkt]) -> + {[?kv(Key,F(Val))|Bkt],0}; +update_bkt(Key, F, I, [Other|Bkt0]) -> + {Bkt1,Ic} = update_bkt(Key, F, I, Bkt0), + {[Other|Bkt1],Ic}; +update_bkt(Key, F, I, []) when is_function(F, 1) -> {[?kv(Key,I)],1}. + +-spec update_counter(term(), number(), dict()) -> dict(). + +update_counter(Key, Incr, D0) when is_number(Incr) -> + Slot = get_slot(D0, Key), + {D1,Ic} = on_bucket(fun (B0) -> counter_bkt(Key, Incr, B0) end, + D0, Slot), + maybe_expand(D1, Ic). + +counter_bkt(Key, I, [?kv(Key,Val)|Bkt]) -> + {[?kv(Key,Val+I)|Bkt],0}; +counter_bkt(Key, I, [Other|Bkt0]) -> + {Bkt1,Ic} = counter_bkt(Key, I, Bkt0), + {[Other|Bkt1],Ic}; +counter_bkt(Key, I, []) -> {[?kv(Key,I)],1}. + +-spec fold(fun((term(), term(), term()) -> term()), term(), dict()) -> term(). +%% Fold function Fun over all "bags" in Table and return Accumulator. + +fold(F, Acc, D) -> fold_dict(F, Acc, D). + +-spec map(fun((term(), term()) -> term()), dict()) -> dict(). + +map(F, D) -> map_dict(F, D). + +-spec filter(fun((term(), term()) -> boolean()), dict()) -> dict(). + +filter(F, D) -> filter_dict(F, D). + +-spec merge(fun((term(), term(), term()) -> term()), dict(), dict()) -> dict(). + +merge(F, D1, D2) when D1#dict.size < D2#dict.size -> + fold_dict(fun (K, V1, D) -> + update(K, fun (V2) -> F(K, V1, V2) end, V1, D) + end, D2, D1); +merge(F, D1, D2) -> + fold_dict(fun (K, V2, D) -> + update(K, fun (V1) -> F(K, V1, V2) end, V2, D) + end, D1, D2). + + +%% get_slot(Hashdb, Key) -> Slot. +%% Get the slot. First hash on the new range, if we hit a bucket +%% which has not been split use the unsplit buddy bucket. + +get_slot(T, Key) -> + H = erlang:phash(Key, T#dict.maxn), + if + H > T#dict.n -> H - T#dict.bso; + true -> H + end. + +%% get_bucket(Hashdb, Slot) -> Bucket. + +get_bucket(T, Slot) -> get_bucket_s(T#dict.segs, Slot). + +%% on_bucket(Fun, Hashdb, Slot) -> {NewHashDb,Result}. +%% Apply Fun to the bucket in Slot and replace the returned bucket. + +on_bucket(F, T, Slot) -> + SegI = ((Slot-1) div ?seg_size) + 1, + BktI = ((Slot-1) rem ?seg_size) + 1, + Segs = T#dict.segs, + Seg = element(SegI, Segs), + B0 = element(BktI, Seg), + {B1,Res} = F(B0), %Op on the bucket. + {T#dict{segs=setelement(SegI, Segs, setelement(BktI, Seg, B1))},Res}. + +%% fold_dict(Fun, Acc, Dictionary) -> Acc. +%% map_dict(Fun, Dictionary) -> Dictionary. +%% filter_dict(Fun, Dictionary) -> Dictionary. +%% +%% Work functions for fold, map and filter operations. These +%% traverse the hash structure rebuilding as necessary. Note we +%% could have implemented map and filter using fold but these are +%% faster. We hope! + +fold_dict(F, Acc, D) -> + Segs = D#dict.segs, + fold_segs(F, Acc, Segs, tuple_size(Segs)). + +fold_segs(F, Acc, Segs, I) when I >= 1 -> + Seg = element(I, Segs), + fold_segs(F, fold_seg(F, Acc, Seg, tuple_size(Seg)), Segs, I-1); +fold_segs(F, Acc, _, 0) when is_function(F, 3) -> Acc. + +fold_seg(F, Acc, Seg, I) when I >= 1 -> + fold_seg(F, fold_bucket(F, Acc, element(I, Seg)), Seg, I-1); +fold_seg(F, Acc, _, 0) when is_function(F, 3) -> Acc. + +fold_bucket(F, Acc, [?kv(Key,Val)|Bkt]) -> + fold_bucket(F, F(Key, Val, Acc), Bkt); +fold_bucket(F, Acc, []) when is_function(F, 3) -> Acc. + +map_dict(F, D) -> + Segs0 = tuple_to_list(D#dict.segs), + Segs1 = map_seg_list(F, Segs0), + D#dict{segs=list_to_tuple(Segs1)}. + +map_seg_list(F, [Seg|Segs]) -> + Bkts0 = tuple_to_list(Seg), + Bkts1 = map_bkt_list(F, Bkts0), + [list_to_tuple(Bkts1)|map_seg_list(F, Segs)]; +map_seg_list(F, []) when is_function(F, 2) -> []. + +map_bkt_list(F, [Bkt0|Bkts]) -> + [map_bucket(F, Bkt0)|map_bkt_list(F, Bkts)]; +map_bkt_list(F, []) when is_function(F, 2) -> []. + +map_bucket(F, [?kv(Key,Val)|Bkt]) -> + [?kv(Key,F(Key, Val))|map_bucket(F, Bkt)]; +map_bucket(F, []) when is_function(F, 2) -> []. + +filter_dict(F, D) -> + Segs0 = tuple_to_list(D#dict.segs), + {Segs1,Fc} = filter_seg_list(F, Segs0, [], 0), + maybe_contract(D#dict{segs=list_to_tuple(Segs1)}, Fc). + +filter_seg_list(F, [Seg|Segs], Fss, Fc0) -> + Bkts0 = tuple_to_list(Seg), + {Bkts1,Fc1} = filter_bkt_list(F, Bkts0, [], Fc0), + filter_seg_list(F, Segs, [list_to_tuple(Bkts1)|Fss], Fc1); +filter_seg_list(F, [], Fss, Fc) when is_function(F, 2) -> + {lists:reverse(Fss, []),Fc}. + +filter_bkt_list(F, [Bkt0|Bkts], Fbs, Fc0) -> + {Bkt1,Fc1} = filter_bucket(F, Bkt0, [], Fc0), + filter_bkt_list(F, Bkts, [Bkt1|Fbs], Fc1); +filter_bkt_list(F, [], Fbs, Fc) when is_function(F, 2) -> + {lists:reverse(Fbs),Fc}. + +filter_bucket(F, [?kv(Key,Val)=E|Bkt], Fb, Fc) -> + case F(Key, Val) of + true -> filter_bucket(F, Bkt, [E|Fb], Fc); + false -> filter_bucket(F, Bkt, Fb, Fc+1) + end; +filter_bucket(F, [], Fb, Fc) when is_function(F, 2) -> + {lists:reverse(Fb),Fc}. + +%% get_bucket_s(Segments, Slot) -> Bucket. +%% put_bucket_s(Segments, Slot, Bucket) -> NewSegments. + +get_bucket_s(Segs, Slot) -> + SegI = ((Slot-1) div ?seg_size) + 1, + BktI = ((Slot-1) rem ?seg_size) + 1, + element(BktI, element(SegI, Segs)). + +put_bucket_s(Segs, Slot, Bkt) -> + SegI = ((Slot-1) div ?seg_size) + 1, + BktI = ((Slot-1) rem ?seg_size) + 1, + Seg = setelement(BktI, element(SegI, Segs), Bkt), + setelement(SegI, Segs, Seg). + +%% In maybe_expand(), the variable Ic only takes the values 0 or 1, +%% but type inference is not strong enough to infer this. Thus, the +%% use of explicit pattern matching and an auxiliary function. + +maybe_expand(T, 0) -> maybe_expand_aux(T, 0); +maybe_expand(T, 1) -> maybe_expand_aux(T, 1). + +maybe_expand_aux(T0, Ic) when T0#dict.size + Ic > T0#dict.exp_size -> + T = maybe_expand_segs(T0), %Do we need more segments. + N = T#dict.n + 1, %Next slot to expand into + Segs0 = T#dict.segs, + Slot1 = N - T#dict.bso, + B = get_bucket_s(Segs0, Slot1), + Slot2 = N, + [B1|B2] = rehash(B, Slot1, Slot2, T#dict.maxn), + Segs1 = put_bucket_s(Segs0, Slot1, B1), + Segs2 = put_bucket_s(Segs1, Slot2, B2), + T#dict{size=T#dict.size + Ic, + n=N, + exp_size=N * ?expand_load, + con_size=N * ?contract_load, + segs=Segs2}; +maybe_expand_aux(T, Ic) -> T#dict{size=T#dict.size + Ic}. + +maybe_expand_segs(T) when T#dict.n =:= T#dict.maxn -> + T#dict{maxn=2 * T#dict.maxn, + bso=2 * T#dict.bso, + segs=expand_segs(T#dict.segs, T#dict.empty)}; +maybe_expand_segs(T) -> T. + +maybe_contract(T, Dc) when T#dict.size - Dc < T#dict.con_size, + T#dict.n > ?seg_size -> + N = T#dict.n, + Slot1 = N - T#dict.bso, + Segs0 = T#dict.segs, + B1 = get_bucket_s(Segs0, Slot1), + Slot2 = N, + B2 = get_bucket_s(Segs0, Slot2), + Segs1 = put_bucket_s(Segs0, Slot1, B1 ++ B2), + Segs2 = put_bucket_s(Segs1, Slot2, []), %Clear the upper bucket + N1 = N - 1, + maybe_contract_segs(T#dict{size=T#dict.size - Dc, + n=N1, + exp_size=N1 * ?expand_load, + con_size=N1 * ?contract_load, + segs=Segs2}); +maybe_contract(T, Dc) -> T#dict{size=T#dict.size - Dc}. + +maybe_contract_segs(T) when T#dict.n =:= T#dict.bso -> + T#dict{maxn=T#dict.maxn div 2, + bso=T#dict.bso div 2, + segs=contract_segs(T#dict.segs)}; +maybe_contract_segs(T) -> T. + +%% rehash(Bucket, Slot1, Slot2, MaxN) -> [Bucket1|Bucket2]. +%% Yes, we should return a tuple, but this is more fun. + +rehash([?kv(Key,_Bag)=KeyBag|T], Slot1, Slot2, MaxN) -> + [L1|L2] = rehash(T, Slot1, Slot2, MaxN), + case erlang:phash(Key, MaxN) of + Slot1 -> [[KeyBag|L1]|L2]; + Slot2 -> [L1|[KeyBag|L2]] + end; +rehash([], _Slot1, _Slot2, _MaxN) -> [[]|[]]. + +%% mk_seg(Size) -> Segment. + +mk_seg(16) -> {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}. + +%% expand_segs(Segs, EmptySeg) -> NewSegs. +%% contract_segs(Segs) -> NewSegs. +%% Expand/contract the segment tuple by doubling/halving the number +%% of segments. We special case the powers of 2 upto 32, this should +%% catch most case. N.B. the last element in the segments tuple is +%% an extra element containing a default empty segment. + +expand_segs({B1}, Empty) -> + {B1,Empty}; +expand_segs({B1,B2}, Empty) -> + {B1,B2,Empty,Empty}; +expand_segs({B1,B2,B3,B4}, Empty) -> + {B1,B2,B3,B4,Empty,Empty,Empty,Empty}; +expand_segs({B1,B2,B3,B4,B5,B6,B7,B8}, Empty) -> + {B1,B2,B3,B4,B5,B6,B7,B8, + Empty,Empty,Empty,Empty,Empty,Empty,Empty,Empty}; +expand_segs({B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,B14,B15,B16}, Empty) -> + {B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,B14,B15,B16, + Empty,Empty,Empty,Empty,Empty,Empty,Empty,Empty, + Empty,Empty,Empty,Empty,Empty,Empty,Empty,Empty}; +expand_segs(Segs, Empty) -> + list_to_tuple(tuple_to_list(Segs) + ++ lists:duplicate(tuple_size(Segs), Empty)). + +contract_segs({B1,_}) -> + {B1}; +contract_segs({B1,B2,_,_}) -> + {B1,B2}; +contract_segs({B1,B2,B3,B4,_,_,_,_}) -> + {B1,B2,B3,B4}; +contract_segs({B1,B2,B3,B4,B5,B6,B7,B8,_,_,_,_,_,_,_,_}) -> + {B1,B2,B3,B4,B5,B6,B7,B8}; +contract_segs({B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,B14,B15,B16, + _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_}) -> + {B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,B14,B15,B16}; +contract_segs(Segs) -> + Ss = tuple_size(Segs) div 2, + list_to_tuple(lists:sublist(tuple_to_list(Segs), 1, Ss)). diff --git a/lib/stdlib/src/digraph.erl b/lib/stdlib/src/digraph.erl new file mode 100644 index 0000000000..9bdea671a9 --- /dev/null +++ b/lib/stdlib/src/digraph.erl @@ -0,0 +1,570 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(digraph). + +-export([new/0, new/1, delete/1, info/1]). + +-export([add_vertex/1, add_vertex/2, add_vertex/3]). +-export([del_vertex/2, del_vertices/2]). +-export([vertex/2, no_vertices/1, vertices/1]). +-export([source_vertices/1, sink_vertices/1]). + +-export([add_edge/3, add_edge/4, add_edge/5]). +-export([del_edge/2, del_edges/2, del_path/3]). +-export([edge/2, no_edges/1, edges/1]). + +-export([out_neighbours/2, in_neighbours/2]). +-export([out_edges/2, in_edges/2, edges/2]). +-export([out_degree/2, in_degree/2]). +-export([get_path/3, get_cycle/2]). + +-export([get_short_path/3, get_short_cycle/2]). + +-record(digraph, {vtab = notable :: ets:tab(), + etab = notable :: ets:tab(), + ntab = notable :: ets:tab(), + cyclic = true :: boolean()}). +%% A declaration equivalent to the following one is hard-coded in erl_types. +%% That declaration contains hard-coded information about the #digraph{} +%% record and the types of its fields. So, please make sure that any +%% changes to its structure are also propagated to erl_types.erl. +%% +%% -opaque digraph() :: #digraph{}. + +-type edge() :: term(). +-type label() :: term(). +-type vertex() :: term(). + +-type add_edge_err_rsn() :: {'bad_edge', [vertex()]} | {'bad_vertex', vertex()}. + +%% +%% Type is a list of +%% protected | private +%% acyclic | cyclic +%% +%% default is [cyclic,protected] +%% +-type d_protection() :: 'private' | 'protected'. +-type d_cyclicity() :: 'acyclic' | 'cyclic'. +-type d_type() :: d_cyclicity() | d_protection(). + +-spec new() -> digraph(). + +new() -> new([]). + +-spec new([d_type()]) -> digraph(). + +new(Type) -> + case check_type(Type, protected, []) of + {Access, Ts} -> + V = ets:new(vertices, [set, Access]), + E = ets:new(edges, [set, Access]), + N = ets:new(neighbours, [bag, Access]), + ets:insert(N, [{'$vid', 0}, {'$eid', 0}]), + set_type(Ts, #digraph{vtab=V, etab=E, ntab=N}); + error -> + erlang:error(badarg) + end. + +%% +%% Check type of graph +%% +%-spec check_type([d_type()], d_protection(), [{'cyclic', boolean()}]) -> +% {d_protection(), [{'cyclic', boolean()}]}. + +check_type([acyclic|Ts], A, L) -> + check_type(Ts, A,[{cyclic,false} | L]); +check_type([cyclic | Ts], A, L) -> + check_type(Ts, A, [{cyclic,true} | L]); +check_type([protected | Ts], _, L) -> + check_type(Ts, protected, L); +check_type([private | Ts], _, L) -> + check_type(Ts, private, L); +check_type([], A, L) -> {A, L}; +check_type(_, _, _) -> error. + +%% +%% Set graph type +%% +-spec set_type([{'cyclic', boolean()}], digraph()) -> digraph(). + +set_type([{cyclic,V} | Ks], G) -> + set_type(Ks, G#digraph{cyclic = V}); +set_type([], G) -> G. + + +%% Data access functions + +-spec delete(digraph()) -> 'true'. + +delete(G) -> + ets:delete(G#digraph.vtab), + ets:delete(G#digraph.etab), + ets:delete(G#digraph.ntab). + +-spec info(digraph()) -> [{'cyclicity', d_cyclicity()} | + {'memory', non_neg_integer()} | + {'protection', d_protection()}]. +info(G) -> + VT = G#digraph.vtab, + ET = G#digraph.etab, + NT = G#digraph.ntab, + Cyclicity = case G#digraph.cyclic of + true -> cyclic; + false -> acyclic + end, + Protection = ets:info(VT, protection), + Memory = ets:info(VT, memory) + ets:info(ET, memory) + ets:info(NT, memory), + [{cyclicity, Cyclicity}, {memory, Memory}, {protection, Protection}]. + +-spec add_vertex(digraph()) -> vertex(). + +add_vertex(G) -> + do_add_vertex({new_vertex_id(G), []}, G). + +-spec add_vertex(digraph(), vertex()) -> vertex(). + +add_vertex(G, V) -> + do_add_vertex({V, []}, G). + +-spec add_vertex(digraph(), vertex(), label()) -> vertex(). + +add_vertex(G, V, D) -> + do_add_vertex({V, D}, G). + +-spec del_vertex(digraph(), vertex()) -> 'true'. + +del_vertex(G, V) -> + do_del_vertex(V, G). + +-spec del_vertices(digraph(), [vertex()]) -> 'true'. + +del_vertices(G, Vs) -> + do_del_vertices(Vs, G). + +-spec vertex(digraph(), vertex()) -> {vertex(), label()} | 'false'. + +vertex(G, V) -> + case ets:lookup(G#digraph.vtab, V) of + [] -> false; + [Vertex] -> Vertex + end. + +-spec no_vertices(digraph()) -> non_neg_integer(). + +no_vertices(G) -> + ets:info(G#digraph.vtab, size). + +-spec vertices(digraph()) -> [vertex()]. + +vertices(G) -> + ets:select(G#digraph.vtab, [{{'$1', '_'}, [], ['$1']}]). + +-spec source_vertices(digraph()) -> [vertex()]. + +source_vertices(G) -> + collect_vertices(G, in). + +-spec sink_vertices(digraph()) -> [vertex()]. + +sink_vertices(G) -> + collect_vertices(G, out). + +-spec in_degree(digraph(), vertex()) -> non_neg_integer(). + +in_degree(G, V) -> + length(ets:lookup(G#digraph.ntab, {in, V})). + +-spec in_neighbours(digraph(), vertex()) -> [vertex()]. + +in_neighbours(G, V) -> + ET = G#digraph.etab, + NT = G#digraph.ntab, + collect_elems(ets:lookup(NT, {in, V}), ET, 2). + +-spec in_edges(digraph(), vertex()) -> [edge()]. + +in_edges(G, V) -> + ets:select(G#digraph.ntab, [{{{in, V}, '$1'}, [], ['$1']}]). + +-spec out_degree(digraph(), vertex()) -> non_neg_integer(). + +out_degree(G, V) -> + length(ets:lookup(G#digraph.ntab, {out, V})). + +-spec out_neighbours(digraph(), vertex()) -> [vertex()]. + +out_neighbours(G, V) -> + ET = G#digraph.etab, + NT = G#digraph.ntab, + collect_elems(ets:lookup(NT, {out, V}), ET, 3). + +-spec out_edges(digraph(), vertex()) -> [edge()]. + +out_edges(G, V) -> + ets:select(G#digraph.ntab, [{{{out, V}, '$1'}, [], ['$1']}]). + +-spec add_edge(digraph(), vertex(), vertex()) -> + edge() | {'error', add_edge_err_rsn()}. + +add_edge(G, V1, V2) -> + do_add_edge({new_edge_id(G), V1, V2, []}, G). + +-spec add_edge(digraph(), vertex(), vertex(), label()) -> + edge() | {'error', add_edge_err_rsn()}. + +add_edge(G, V1, V2, D) -> + do_add_edge({new_edge_id(G), V1, V2, D}, G). + +-spec add_edge(digraph(), edge(), vertex(), vertex(), label()) -> + edge() | {'error', add_edge_err_rsn()}. + +add_edge(G, E, V1, V2, D) -> + do_add_edge({E, V1, V2, D}, G). + +-spec del_edge(digraph(), edge()) -> 'true'. + +del_edge(G, E) -> + do_del_edges([E], G). + +-spec del_edges(digraph(), [edge()]) -> 'true'. + +del_edges(G, Es) -> + do_del_edges(Es, G). + +-spec no_edges(digraph()) -> non_neg_integer(). + +no_edges(G) -> + ets:info(G#digraph.etab, size). + +-spec edges(digraph()) -> [edge()]. + +edges(G) -> + ets:select(G#digraph.etab, [{{'$1', '_', '_', '_'}, [], ['$1']}]). + +-spec edges(digraph(), vertex()) -> [edge()]. + +edges(G, V) -> + ets:select(G#digraph.ntab, [{{{out, V},'$1'}, [], ['$1']}, + {{{in, V}, '$1'}, [], ['$1']}]). + +-spec edge(digraph(), edge()) -> {edge(),vertex(),vertex(),label()} | 'false'. + +edge(G, E) -> + case ets:lookup(G#digraph.etab,E) of + [] -> false; + [Edge] -> Edge + end. + +%% +%% Generate a "unique" edge identifier (relative to this graph) +%% +-spec new_edge_id(digraph()) -> nonempty_improper_list('$e', non_neg_integer()). + +new_edge_id(G) -> + NT = G#digraph.ntab, + [{'$eid', K}] = ets:lookup(NT, '$eid'), + true = ets:delete(NT, '$eid'), + true = ets:insert(NT, {'$eid', K+1}), + ['$e' | K]. + +%% +%% Generate a "unique" vertex identifier (relative to this graph) +%% +-spec new_vertex_id(digraph()) -> nonempty_improper_list('$v', non_neg_integer()). + +new_vertex_id(G) -> + NT = G#digraph.ntab, + [{'$vid', K}] = ets:lookup(NT, '$vid'), + true = ets:delete(NT, '$vid'), + true = ets:insert(NT, {'$vid', K+1}), + ['$v' | K]. + +%% +%% Collect elements for a index in a tuple +%% +collect_elems(Keys, Table, Index) -> + collect_elems(Keys, Table, Index, []). + +collect_elems([{_,Key}|Keys], Table, Index, Acc) -> + collect_elems(Keys, Table, Index, + [ets:lookup_element(Table, Key, Index)|Acc]); +collect_elems([], _, _, Acc) -> Acc. + +-spec do_add_vertex({vertex(), label()}, digraph()) -> vertex(). + +do_add_vertex({V, _Label} = VL, G) -> + ets:insert(G#digraph.vtab, VL), + V. + +%% +%% Collect either source or sink vertices. +%% +collect_vertices(G, Type) -> + Vs = vertices(G), + lists:foldl(fun(V, A) -> + case ets:member(G#digraph.ntab, {Type, V}) of + true -> A; + false -> [V|A] + end + end, [], Vs). + +%% +%% Delete vertices +%% +do_del_vertices([V | Vs], G) -> + do_del_vertex(V, G), + do_del_vertices(Vs, G); +do_del_vertices([], #digraph{}) -> true. + +do_del_vertex(V, G) -> + do_del_nedges(ets:lookup(G#digraph.ntab, {in, V}), G), + do_del_nedges(ets:lookup(G#digraph.ntab, {out, V}), G), + ets:delete(G#digraph.vtab, V). + +do_del_nedges([{_, E}|Ns], G) -> + case ets:lookup(G#digraph.etab, E) of + [{E, V1, V2, _}] -> + do_del_edge(E, V1, V2, G), + do_del_nedges(Ns, G); + [] -> % cannot happen + do_del_nedges(Ns, G) + end; +do_del_nedges([], #digraph{}) -> true. + +%% +%% Delete edges +%% +do_del_edges([E|Es], G) -> + case ets:lookup(G#digraph.etab, E) of + [{E,V1,V2,_}] -> + do_del_edge(E,V1,V2,G), + do_del_edges(Es, G); + [] -> + do_del_edges(Es, G) + end; +do_del_edges([], #digraph{}) -> true. + +do_del_edge(E, V1, V2, G) -> + ets:select_delete(G#digraph.ntab, [{{{in, V2}, E}, [], [true]}, + {{{out,V1}, E}, [], [true]}]), + ets:delete(G#digraph.etab, E). + +-spec rm_edges([vertex(),...], digraph()) -> 'true'. + +rm_edges([V1, V2|Vs], G) -> + rm_edge(V1, V2, G), + rm_edges([V2|Vs], G); +rm_edges(_, _) -> true. + +-spec rm_edge(vertex(), vertex(), digraph()) -> 'ok'. + +rm_edge(V1, V2, G) -> + Es = out_edges(G, V1), + rm_edge_0(Es, V1, V2, G). + +rm_edge_0([E|Es], V1, V2, G) -> + case ets:lookup(G#digraph.etab, E) of + [{E, V1, V2, _}] -> + do_del_edge(E, V1, V2, G), + rm_edge_0(Es, V1, V2, G); + _ -> + rm_edge_0(Es, V1, V2, G) + end; +rm_edge_0([], _, _, #digraph{}) -> ok. + +%% +%% Check that endpoints exist +%% +-spec do_add_edge({edge(), vertex(), vertex(), label()}, digraph()) -> + edge() | {'error', add_edge_err_rsn()}. + +do_add_edge({E, V1, V2, Label}, G) -> + case ets:member(G#digraph.vtab, V1) of + false -> {error, {bad_vertex, V1}}; + true -> + case ets:member(G#digraph.vtab, V2) of + false -> {error, {bad_vertex, V2}}; + true -> + case other_edge_exists(G, E, V1, V2) of + true -> {error, {bad_edge, [V1, V2]}}; + false when G#digraph.cyclic =:= false -> + acyclic_add_edge(E, V1, V2, Label, G); + false -> + do_insert_edge(E, V1, V2, Label, G) + end + end + end. + +other_edge_exists(#digraph{etab = ET}, E, V1, V2) -> + case ets:lookup(ET, E) of + [{E, Vert1, Vert2, _}] when Vert1 =/= V1; Vert2 =/= V2 -> + true; + _ -> + false + end. + +-spec do_insert_edge(edge(), vertex(), vertex(), label(), digraph()) -> edge(). + +do_insert_edge(E, V1, V2, Label, #digraph{ntab=NT, etab=ET}) -> + ets:insert(NT, [{{out, V1}, E}, {{in, V2}, E}]), + ets:insert(ET, {E, V1, V2, Label}), + E. + +-spec acyclic_add_edge(edge(), vertex(), vertex(), label(), digraph()) -> + edge() | {'error', {'bad_edge', [vertex()]}}. + +acyclic_add_edge(_E, V1, V2, _L, _G) when V1 =:= V2 -> + {error, {bad_edge, [V1, V2]}}; +acyclic_add_edge(E, V1, V2, Label, G) -> + case get_path(G, V2, V1) of + false -> do_insert_edge(E, V1, V2, Label, G); + Path -> {error, {bad_edge, Path}} + end. + +%% +%% Delete all paths from vertex V1 to vertex V2 +%% + +-spec del_path(digraph(), vertex(), vertex()) -> 'true'. + +del_path(G, V1, V2) -> + case get_path(G, V1, V2) of + false -> true; + Path -> + rm_edges(Path, G), + del_path(G, V1, V2) + end. + +%% +%% Find a cycle through V +%% return the cycle as list of vertices [V ... V] +%% if no cycle exists false is returned +%% if only a cycle of length one exists it will be +%% returned as [V] but only after longer cycles have +%% been searched. +%% + +-spec get_cycle(digraph(), vertex()) -> [vertex(),...] | 'false'. + +get_cycle(G, V) -> + case one_path(out_neighbours(G, V), V, [], [V], [V], 2, G, 1) of + false -> + case lists:member(V, out_neighbours(G, V)) of + true -> [V]; + false -> false + end; + Vs -> Vs + end. + +%% +%% Find a path from V1 to V2 +%% return the path as list of vertices [V1 ... V2] +%% if no path exists false is returned +%% + +-spec get_path(digraph(), vertex(), vertex()) -> [vertex(),...] | 'false'. + +get_path(G, V1, V2) -> + one_path(out_neighbours(G, V1), V2, [], [V1], [V1], 1, G, 1). + +%% +%% prune_short_path (evaluate conditions on path) +%% short : if path is too short +%% ok : if path is ok +%% +prune_short_path(Counter, Min) when Counter < Min -> + short; +prune_short_path(_Counter, _Min) -> + ok. + +one_path([W|Ws], W, Cont, Xs, Ps, Prune, G, Counter) -> + case prune_short_path(Counter, Prune) of + short -> one_path(Ws, W, Cont, Xs, Ps, Prune, G, Counter); + ok -> lists:reverse([W|Ps]) + end; +one_path([V|Vs], W, Cont, Xs, Ps, Prune, G, Counter) -> + case lists:member(V, Xs) of + true -> one_path(Vs, W, Cont, Xs, Ps, Prune, G, Counter); + false -> one_path(out_neighbours(G, V), W, + [{Vs,Ps} | Cont], [V|Xs], [V|Ps], + Prune, G, Counter+1) + end; +one_path([], W, [{Vs,Ps}|Cont], Xs, _, Prune, G, Counter) -> + one_path(Vs, W, Cont, Xs, Ps, Prune, G, Counter-1); +one_path([], _, [], _, _, _, _, _Counter) -> false. + +%% +%% Like get_cycle/2, but a cycle of length one is preferred. +%% + +-spec get_short_cycle(digraph(), vertex()) -> [vertex(),...] | 'false'. + +get_short_cycle(G, V) -> + get_short_path(G, V, V). + +%% +%% Like get_path/3, but using a breadth-first search makes it possible +%% to find a short path. +%% + +-spec get_short_path(digraph(), vertex(), vertex()) -> [vertex(),...] | 'false'. + +get_short_path(G, V1, V2) -> + T = new(), + add_vertex(T, V1), + Q = queue:new(), + Q1 = queue_out_neighbours(V1, G, Q), + L = spath(Q1, G, V2, T), + delete(T), + L. + +spath(Q, G, Sink, T) -> + case queue:out(Q) of + {{value, E}, Q1} -> + {_E, V1, V2, _Label} = edge(G, E), + if + Sink =:= V2 -> + follow_path(V1, T, [V2]); + true -> + case vertex(T, V2) of + false -> + add_vertex(T, V2), + add_edge(T, V2, V1), + NQ = queue_out_neighbours(V2, G, Q1), + spath(NQ, G, Sink, T); + _V -> + spath(Q1, G, Sink, T) + end + end; + {empty, _Q1} -> + false + end. + +follow_path(V, T, P) -> + P1 = [V | P], + case out_neighbours(T, V) of + [N] -> + follow_path(N, T, P1); + [] -> + P1 + end. + +queue_out_neighbours(V, G, Q0) -> + lists:foldl(fun(E, Q) -> queue:in(E, Q) end, Q0, out_edges(G, V)). diff --git a/lib/stdlib/src/digraph_utils.erl b/lib/stdlib/src/digraph_utils.erl new file mode 100644 index 0000000000..080cae4742 --- /dev/null +++ b/lib/stdlib/src/digraph_utils.erl @@ -0,0 +1,338 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1999-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(digraph_utils). + +%%% Operations on directed (and undirected) graphs. +%%% +%%% Implementation based on Launchbury, John: Graph Algorithms with a +%%% Functional Flavour, in Jeuring, Johan, and Meijer, Erik (Eds.): +%%% Advanced Functional Programming, Lecture Notes in Computer +%%% Science 925, Springer Verlag, 1995. + +-export([components/1, strong_components/1, cyclic_strong_components/1, + reachable/2, reachable_neighbours/2, + reaching/2, reaching_neighbours/2, + topsort/1, is_acyclic/1, + arborescence_root/1, is_arborescence/1, is_tree/1, + loop_vertices/1, + subgraph/2, subgraph/3, condensation/1, + preorder/1, postorder/1]). + +%% +%% A convenient type alias +%% + +-type vertices() :: [digraph:vertex()]. + +%% +%% Exported functions +%% + +-spec components(digraph()) -> vertices(). + +components(G) -> + forest(G, fun inout/3). + +-spec strong_components(digraph()) -> vertices(). + +strong_components(G) -> + forest(G, fun in/3, revpostorder(G)). + +-spec cyclic_strong_components(digraph()) -> vertices(). + +cyclic_strong_components(G) -> + remove_singletons(strong_components(G), G, []). + +-spec reachable(vertices(), digraph()) -> vertices(). + +reachable(Vs, G) when is_list(Vs) -> + lists:append(forest(G, fun out/3, Vs, first)). + +-spec reachable_neighbours(vertices(), digraph()) -> vertices(). + +reachable_neighbours(Vs, G) when is_list(Vs) -> + lists:append(forest(G, fun out/3, Vs, not_first)). + +-spec reaching(vertices(), digraph()) -> vertices(). + +reaching(Vs, G) when is_list(Vs) -> + lists:append(forest(G, fun in/3, Vs, first)). + +-spec reaching_neighbours(vertices(), digraph()) -> vertices(). + +reaching_neighbours(Vs, G) when is_list(Vs) -> + lists:append(forest(G, fun in/3, Vs, not_first)). + +-spec topsort(digraph()) -> vertices() | 'false'. + +topsort(G) -> + L = revpostorder(G), + case length(forest(G, fun in/3, L)) =:= length(digraph:vertices(G)) of + true -> L; + false -> false + end. + +-spec is_acyclic(digraph()) -> boolean(). + +is_acyclic(G) -> + loop_vertices(G) =:= [] andalso topsort(G) =/= false. + +-spec arborescence_root(digraph()) -> 'no' | {'yes', digraph:vertex()}. + +arborescence_root(G) -> + case digraph:no_edges(G) =:= digraph:no_vertices(G) - 1 of + true -> + try + F = fun(V, Z) -> + case digraph:in_degree(G, V) of + 1 -> Z; + 0 when Z =:= [] -> [V] + end + end, + [Root] = lists:foldl(F, [], digraph:vertices(G)), + {yes, Root} + catch _:_ -> + no + end; + false -> + no + end. + +-spec is_arborescence(digraph()) -> boolean(). + +is_arborescence(G) -> + arborescence_root(G) =/= no. + +-spec is_tree(digraph()) -> boolean(). + +is_tree(G) -> + (digraph:no_edges(G) =:= digraph:no_vertices(G) - 1) + andalso (length(components(G)) =:= 1). + +-spec loop_vertices(digraph()) -> vertices(). + +loop_vertices(G) -> + [V || V <- digraph:vertices(G), is_reflexive_vertex(V, G)]. + +-spec subgraph(digraph(), vertices()) -> digraph(). + +subgraph(G, Vs) -> + try + subgraph_opts(G, Vs, []) + catch + throw:badarg -> + erlang:error(badarg) + end. + +-type option() :: {'type', 'inherit' | [digraph:d_type()]} + | {'keep_labels', boolean()}. + +-spec subgraph(digraph(), vertices(), [option()]) -> digraph(). + +subgraph(G, Vs, Opts) -> + try + subgraph_opts(G, Vs, Opts) + catch + throw:badarg -> + erlang:error(badarg) + end. + +-spec condensation(digraph()) -> digraph(). + +condensation(G) -> + SCs = strong_components(G), + %% Each component is assigned a number. + %% V2I: from vertex to number. + %% I2C: from number to component. + V2I = ets:new(condensation, []), + I2C = ets:new(condensation, []), + CFun = fun(SC, N) -> lists:foreach(fun(V) -> + true = ets:insert(V2I, {V,N}) + end, + SC), + true = ets:insert(I2C, {N, SC}), + N + 1 + end, + lists:foldl(CFun, 1, SCs), + SCG = subgraph_opts(G, [], []), + lists:foreach(fun(SC) -> condense(SC, G, SCG, V2I, I2C) end, SCs), + ets:delete(V2I), + ets:delete(I2C), + SCG. + +-spec preorder(digraph()) -> vertices(). + +preorder(G) -> + lists:reverse(revpreorder(G)). + +-spec postorder(digraph()) -> vertices(). + +postorder(G) -> + lists:reverse(revpostorder(G)). + +%% +%% Local functions +%% + +forest(G, SF) -> + forest(G, SF, digraph:vertices(G)). + +forest(G, SF, Vs) -> + forest(G, SF, Vs, first). + +forest(G, SF, Vs, HandleFirst) -> + T = ets:new(forest, [set]), + F = fun(V, LL) -> pretraverse(HandleFirst, V, SF, G, T, LL) end, + LL = lists:foldl(F, [], Vs), + ets:delete(T), + LL. + +pretraverse(first, V, SF, G, T, LL) -> + ptraverse([V], SF, G, T, [], LL); +pretraverse(not_first, V, SF, G, T, LL) -> + case ets:member(T, V) of + false -> ptraverse(SF(G, V, []), SF, G, T, [], LL); + true -> LL + end. + +ptraverse([V | Vs], SF, G, T, Rs, LL) -> + case ets:member(T, V) of + false -> + ets:insert(T, {V}), + ptraverse(SF(G, V, Vs), SF, G, T, [V | Rs], LL); + true -> + ptraverse(Vs, SF, G, T, Rs, LL) + end; +ptraverse([], _SF, _G, _T, [], LL) -> + LL; +ptraverse([], _SF, _G, _T, Rs, LL) -> + [Rs | LL]. + +revpreorder(G) -> + lists:append(forest(G, fun out/3)). + +revpostorder(G) -> + T = ets:new(forest, [set]), + L = posttraverse(digraph:vertices(G), G, T, []), + ets:delete(T), + L. + +posttraverse([V | Vs], G, T, L) -> + L1 = case ets:member(T, V) of + false -> + ets:insert(T, {V}), + [V | posttraverse(out(G, V, []), G, T, L)]; + true -> + L + end, + posttraverse(Vs, G, T, L1); +posttraverse([], _G, _T, L) -> + L. + +in(G, V, Vs) -> + digraph:in_neighbours(G, V) ++ Vs. + +out(G, V, Vs) -> + digraph:out_neighbours(G, V) ++ Vs. + +inout(G, V, Vs) -> + in(G, V, out(G, V, Vs)). + +remove_singletons([C=[V] | Cs], G, L) -> + case is_reflexive_vertex(V, G) of + true -> remove_singletons(Cs, G, [C | L]); + false -> remove_singletons(Cs, G, L) + end; +remove_singletons([C | Cs], G, L) -> + remove_singletons(Cs, G, [C | L]); +remove_singletons([], _G, L) -> + L. + +is_reflexive_vertex(V, G) -> + lists:member(V, digraph:out_neighbours(G, V)). + +subgraph_opts(G, Vs, Opts) -> + subgraph_opts(Opts, inherit, true, G, Vs). + +subgraph_opts([{type, Type} | Opts], _Type0, Keep, G, Vs) + when Type =:= inherit; is_list(Type) -> + subgraph_opts(Opts, Type, Keep, G, Vs); +subgraph_opts([{keep_labels, Keep} | Opts], Type, _Keep0, G, Vs) + when is_boolean(Keep) -> + subgraph_opts(Opts, Type, Keep, G, Vs); +subgraph_opts([], inherit, Keep, G, Vs) -> + Info = digraph:info(G), + {_, {_, Cyclicity}} = lists:keysearch(cyclicity, 1, Info), + {_, {_, Protection}} = lists:keysearch(protection, 1, Info), + subgraph(G, Vs, [Cyclicity, Protection], Keep); +subgraph_opts([], Type, Keep, G, Vs) -> + subgraph(G, Vs, Type, Keep); +subgraph_opts(_, _Type, _Keep, _G, _Vs) -> + throw(badarg). + +subgraph(G, Vs, Type, Keep) -> + try digraph:new(Type) of + SG -> + lists:foreach(fun(V) -> subgraph_vertex(V, G, SG, Keep) end, Vs), + EFun = fun(V) -> lists:foreach(fun(E) -> + subgraph_edge(E, G, SG, Keep) + end, + digraph:out_edges(G, V)) + end, + lists:foreach(EFun, digraph:vertices(SG)), + SG + catch + error:badarg -> + throw(badarg) + end. + +subgraph_vertex(V, G, SG, Keep) -> + case digraph:vertex(G, V) of + false -> ok; + _ when not Keep -> digraph:add_vertex(SG, V); + {_V, Label} when Keep -> digraph:add_vertex(SG, V, Label) + end. + +subgraph_edge(E, G, SG, Keep) -> + {_E, V1, V2, Label} = digraph:edge(G, E), + case digraph:vertex(SG, V2) of + false -> ok; + _ when not Keep -> digraph:add_edge(SG, E, V1, V2, []); + _ when Keep -> digraph:add_edge(SG, E, V1, V2, Label) + end. + +condense(SC, G, SCG, V2I, I2C) -> + T = ets:new(condense, []), + NFun = fun(Neighbour) -> + [{_V,I}] = ets:lookup(V2I, Neighbour), + ets:insert(T, {I}) + end, + VFun = fun(V) -> lists:foreach(NFun, digraph:out_neighbours(G, V)) end, + lists:foreach(VFun, SC), + digraph:add_vertex(SCG, SC), + condense(ets:first(T), T, SC, G, SCG, I2C), + ets:delete(T). + +condense('$end_of_table', _T, _SC, _G, _SCG, _I2C) -> + ok; +condense(I, T, SC, G, SCG, I2C) -> + [{_,C}] = ets:lookup(I2C, I), + digraph:add_vertex(SCG, C), + digraph:add_edge(SCG, SC, C), + condense(ets:next(T, I), T, SC, G, SCG, I2C). diff --git a/lib/stdlib/src/edlin.erl b/lib/stdlib/src/edlin.erl new file mode 100644 index 0000000000..31a653bda0 --- /dev/null +++ b/lib/stdlib/src/edlin.erl @@ -0,0 +1,575 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(edlin). + +%% A simple Emacs-like line editor. +%% About Latin-1 characters: see the beginning of erl_scan.erl. + +-export([init/0,start/1,edit_line/2,prefix_arg/1]). +-export([erase_line/1,erase_inp/1,redraw_line/1]). +-export([length_before/1,length_after/1,prompt/1]). +%%-export([expand/1]). + +-export([edit_line1/2]). + +-import(lists, [reverse/1, reverse/2]). + +%-import([nthtail/2, keysearch/3, prefix/2]). + +-export([over_word/3]). + + +%% A Continuation has the structure: +%% {line,Prompt,CurrentLine,EditPrefix} + +%% init() +%% Initialise the line editor. This must be done once per process using +%% the editor. + +init() -> + put(kill_buffer, []). + +%% start(Prompt) +%% edit(Characters, Continuation) +%% Return +%% {done,Line,Rest,Requests} +%% {more_chars,Cont,Requests} +%% {blink,Cont,Requests} +%% {undefined,Char,Rest,Cont,Requests} + +start(Pbs) -> + {more_chars,{line,Pbs,{[],[]},none},[{put_chars,unicode,Pbs}]}. + +edit_line(Cs, {line,P,L,{blink,N}}) -> + edit(Cs, P, L, none, [{move_rel,N}]); +edit_line(Cs, {line,P,L,M}) -> + edit(Cs, P, L, M, []). + +edit_line1(Cs, {line,P,L,{blink,N}}) -> + edit(Cs, P, L, none, [{move_rel,N}]); +edit_line1(Cs, {line,P,{[],[]},none}) -> + {more_chars, {line,P,{lists:reverse(Cs),[]},none},[{put_chars, unicode, Cs}]}; +edit_line1(Cs, {line,P,L,M}) -> + edit(Cs, P, L, M, []). + +edit([C|Cs], P, Line, {blink,_}, [_|Rs]) -> %Remove blink here + edit([C|Cs], P, Line, none, Rs); +edit([C|Cs], P, {Bef,Aft}, Prefix, Rs0) -> + case key_map(C, Prefix) of + meta -> + edit(Cs, P, {Bef,Aft}, meta, Rs0); + meta_left_sq_bracket -> + edit(Cs, P, {Bef,Aft}, meta_left_sq_bracket, Rs0); + ctlx -> + edit(Cs, P, {Bef,Aft}, ctlx, Rs0); + new_line -> + {done, reverse(Bef, Aft ++ "\n"), Cs, + reverse(Rs0, [{move_rel,length(Aft)},{put_chars,unicode,"\n"}])}; + redraw_line -> + Rs1 = erase(P, Bef, Aft, Rs0), + Rs = redraw(P, Bef, Aft, Rs1), + edit(Cs, P, {Bef,Aft}, none, Rs); + tab_expand -> + {expand, Bef, Cs, + {line, P, {Bef, Aft}, none}, + reverse(Rs0)}; + +%% tab -> +%% %% Always redraw the line since expand/1 might have printed +%% %% possible expansions. +%% case expand(Bef) of +%% {yes,Str} -> +%% edit([redraw_line| +%% (Str ++ Cs)], P, {Bef,Aft}, none, Rs0); +%% no -> +%% %% don't beep if there's only whitespace before +%% %% us - user may have pasted in a lot of indented stuff. +%% case whitespace_only(Bef) of +%% false -> +%% edit([redraw_line|Cs], P, {Bef,Aft}, none, +%% [beep|Rs0]); +%% true -> +%% edit([redraw_line|Cs], P, {Bef,Aft}, none, [Rs0]) +%% end +%% end; + {undefined,C} -> + {undefined,{none,Prefix,C},Cs,{line,P,{Bef,Aft},none}, + reverse(Rs0)}; + Op -> + case do_op(Op, Bef, Aft, Rs0) of + {blink,N,Line,Rs} -> + edit(Cs, P, Line, {blink,N}, Rs); + {Line,Rs} -> + edit(Cs, P, Line, none, Rs) + end + end; +edit([], P, L, {blink,N}, Rs) -> + {blink,{line,P,L,{blink,N}},reverse(Rs)}; +edit([], P, L, Prefix, Rs) -> + {more_chars,{line,P,L,Prefix},reverse(Rs)}; +edit(eof, _, {Bef,Aft}, _, Rs) -> + {done,reverse(Bef, Aft),[],reverse(Rs, [{move_rel,length(Aft)}])}. + +%% %% Assumes that arg is a string +%% %% Horizontal whitespace only. +%% whitespace_only([]) -> +%% true; +%% whitespace_only([C|Rest]) -> +%% case C of +%% $\s -> +%% whitespace_only(Rest); +%% $\t -> +%% whitespace_only(Rest); +%% _ -> +%% false +%% end. + +%% prefix_arg(Argument) +%% Take a prefix argument and return its numeric value. + +prefix_arg(none) -> 1; +prefix_arg({ctlu,N}) -> N; +prefix_arg(N) -> N. + +%% key_map(Char, Prefix) +%% Map a character and a prefix to an action. + +key_map(A, _) when is_atom(A) -> A; % so we can push keywords +key_map($\^A, none) -> beginning_of_line; +key_map($\^B, none) -> backward_char; +key_map($\^D, none) -> forward_delete_char; +key_map($\^E, none) -> end_of_line; +key_map($\^F, none) -> forward_char; +key_map($\^H, none) -> backward_delete_char; +key_map($\t, none) -> tab_expand; +key_map($\^L, none) -> redraw_line; +key_map($\n, none) -> new_line; +key_map($\^K, none) -> kill_line; +key_map($\r, none) -> new_line; +key_map($\^T, none) -> transpose_char; +key_map($\^U, none) -> ctlu; +key_map($\^], none) -> auto_blink; +key_map($\^X, none) -> ctlx; +key_map($\^Y, none) -> yank; +key_map($\e, none) -> meta; +key_map($), Prefix) when Prefix =/= meta -> {blink,$),$(}; +key_map($}, Prefix) when Prefix =/= meta -> {blink,$},${}; +key_map($], Prefix) when Prefix =/= meta -> {blink,$],$[}; +key_map($B, meta) -> backward_word; +key_map($D, meta) -> kill_word; +key_map($F, meta) -> forward_word; +key_map($T, meta) -> transpose_word; +key_map($Y, meta) -> yank_pop; +key_map($b, meta) -> backward_word; +key_map($d, meta) -> kill_word; +key_map($f, meta) -> forward_word; +key_map($t, meta) -> transpose_word; +key_map($y, meta) -> yank_pop; +key_map($\177, none) -> backward_delete_char; +key_map($\177, meta) -> backward_kill_word; +key_map($[, meta) -> meta_left_sq_bracket; +key_map($D, meta_left_sq_bracket) -> backward_char; +key_map($C, meta_left_sq_bracket) -> forward_char; +key_map(C, none) when C >= $\s -> + {insert,C}; +key_map(C, _) -> {undefined,C}. + +%% do_op(Action, Before, After, Requests) + +do_op({insert,C}, Bef, [], Rs) -> + {{[C|Bef],[]},[{put_chars, unicode,[C]}|Rs]}; +do_op({insert,C}, Bef, Aft, Rs) -> + {{[C|Bef],Aft},[{insert_chars, unicode, [C]}|Rs]}; +%% do blink after $$ +do_op({blink,C,M}, Bef=[$$,$$|_], Aft, Rs) -> + N = over_paren(Bef, C, M), + {blink,N+1,{[C|Bef],Aft},[{move_rel,-(N+1)},{insert_chars, unicode,[C]}|Rs]}; +%% don't blink after a $ +do_op({blink,C,_}, Bef=[$$|_], Aft, Rs) -> + do_op({insert,C}, Bef, Aft, Rs); +%do_op({blink,C,M}, Bef, [], Rs) -> +% N = over_paren(Bef, C, M), +% {blink,N+1,{[C|Bef],[]},[{move_rel,-(N+1)},{put_chars,[C]}|Rs]}; +do_op({blink,C,M}, Bef, Aft, Rs) -> + case over_paren(Bef, C, M) of + beep -> + {{[C|Bef], Aft}, [beep,{insert_chars, unicode, [C]}|Rs]}; + N -> {blink,N+1,{[C|Bef],Aft}, + [{move_rel,-(N+1)},{insert_chars, unicode,[C]}|Rs]} + end; +do_op(auto_blink, Bef, Aft, Rs) -> + case over_paren_auto(Bef) of + {N, Paren} -> + {blink,N+1, + {[Paren|Bef], Aft},[{move_rel,-(N+1)},{insert_chars, unicode,[Paren]}|Rs]}; + % N is likely 0 + N -> {blink,N+1,{Bef,Aft}, + [{move_rel,-(N+1)}|Rs]} + end; +do_op(forward_delete_char, Bef, [_|Aft], Rs) -> + {{Bef,Aft},[{delete_chars,1}|Rs]}; +do_op(backward_delete_char, [_|Bef], Aft, Rs) -> + {{Bef,Aft},[{delete_chars,-1}|Rs]}; +do_op(transpose_char, [C1,C2|Bef], [], Rs) -> + {{[C2,C1|Bef],[]},[{put_chars, unicode,[C1,C2]},{move_rel,-2}|Rs]}; +do_op(transpose_char, [C2|Bef], [C1|Aft], Rs) -> + {{[C2,C1|Bef],Aft},[{put_chars, unicode,[C1,C2]},{move_rel,-1}|Rs]}; +do_op(kill_word, Bef, Aft0, Rs) -> + {Aft1,Kill0,N0} = over_non_word(Aft0, [], 0), + {Aft,Kill,N} = over_word(Aft1, Kill0, N0), + put(kill_buffer, reverse(Kill)), + {{Bef,Aft},[{delete_chars,N}|Rs]}; +do_op(backward_kill_word, Bef0, Aft, Rs) -> + {Bef1,Kill0,N0} = over_non_word(Bef0, [], 0), + {Bef,Kill,N} = over_word(Bef1, Kill0, N0), + put(kill_buffer, Kill), + {{Bef,Aft},[{delete_chars,-N}|Rs]}; +do_op(kill_line, Bef, Aft, Rs) -> + put(kill_buffer, Aft), + {{Bef,[]},[{delete_chars,length(Aft)}|Rs]}; +do_op(yank, Bef, [], Rs) -> + Kill = get(kill_buffer), + {{reverse(Kill, Bef),[]},[{put_chars, unicode,Kill}|Rs]}; +do_op(yank, Bef, Aft, Rs) -> + Kill = get(kill_buffer), + {{reverse(Kill, Bef),Aft},[{insert_chars, unicode,Kill}|Rs]}; +do_op(forward_char, Bef, [C|Aft], Rs) -> + {{[C|Bef],Aft},[{move_rel,1}|Rs]}; +do_op(backward_char, [C|Bef], Aft, Rs) -> + {{Bef,[C|Aft]},[{move_rel,-1}|Rs]}; +do_op(forward_word, Bef0, Aft0, Rs) -> + {Aft1,Bef1,N0} = over_non_word(Aft0, Bef0, 0), + {Aft,Bef,N} = over_word(Aft1, Bef1, N0), + {{Bef,Aft},[{move_rel,N}|Rs]}; +do_op(backward_word, Bef0, Aft0, Rs) -> + {Bef1,Aft1,N0} = over_non_word(Bef0, Aft0, 0), + {Bef,Aft,N} = over_word(Bef1, Aft1, N0), + {{Bef,Aft},[{move_rel,-N}|Rs]}; +do_op(beginning_of_line, [C|Bef], Aft, Rs) -> + {{[],reverse(Bef, [C|Aft])},[{move_rel,-(length(Bef)+1)}|Rs]}; +do_op(beginning_of_line, [], Aft, Rs) -> + {{[],Aft},Rs}; +do_op(end_of_line, Bef, [C|Aft], Rs) -> + {{reverse(Aft, [C|Bef]),[]},[{move_rel,length(Aft)+1}|Rs]}; +do_op(end_of_line, Bef, [], Rs) -> + {{Bef,[]},Rs}; +do_op(beep, Bef, Aft, Rs) -> + {{Bef,Aft},[beep|Rs]}; +do_op(_, Bef, Aft, Rs) -> + {{Bef,Aft},[beep|Rs]}. + +%% over_word(Chars, InitialStack, InitialCount) -> +%% {RemainingChars,CharStack,Count} +%% over_non_word(Chars, InitialStack, InitialCount) -> +%% {RemainingChars,CharStack,Count} +%% Step over word/non-word characters pushing the stepped over ones on +%% the stack. + +over_word([C|Cs], Stack, N) -> + case word_char(C) of + true -> over_word(Cs, [C|Stack], N+1); + false -> {[C|Cs],Stack,N} + end; +over_word([], Stack, N) when is_integer(N) -> + {[],Stack,N}. + +over_non_word([C|Cs], Stack, N) -> + case word_char(C) of + true -> {[C|Cs],Stack,N}; + false -> over_non_word(Cs, [C|Stack], N+1) + end; +over_non_word([], Stack, N) -> + {[],Stack,N}. + +word_char(C) when C >= $A, C =< $Z -> true; +word_char(C) when C >= $À, C =< $Þ, C =/= $× -> true; +word_char(C) when C >= $a, C =< $z -> true; +word_char(C) when C >= $ß, C =< $ÿ, C =/= $÷ -> true; +word_char(C) when C >= $0, C =< $9 -> true; +word_char(C) when C =:= $_ -> true; +word_char(C) when C =:= $. -> true; % accept dot-separated names +word_char(_) -> false. + +%% over_white(Chars, InitialStack, InitialCount) -> +%% {RemainingChars,CharStack,Count} + +%% over_white([$\s|Cs], Stack, N) -> +%% over_white(Cs, [$\s|Stack], N+1); +%% over_white([$\t|Cs], Stack, N) -> +%% over_white(Cs, [$\t|Stack], N+1); +%% over_white(Cs, Stack, N) -> +%% {Cs,Stack,N}. + +%% over_paren(Chars, Paren, Match) +%% over_paren(Chars, Paren, Match, Depth, N) +%% Step over parentheses until matching Paren is found at depth 0. Don't +%% do proper parentheses matching check. Paren has NOT been added. + +over_paren(Chars, Paren, Match) -> + over_paren(Chars, Paren, Match, 1, 1, []). + + +over_paren([C,$$,$$|Cs], Paren, Match, D, N, L) -> + over_paren([C|Cs], Paren, Match, D, N+2, L); +over_paren([_,$$|Cs], Paren, Match, D, N, L) -> + over_paren(Cs, Paren, Match, D, N+2, L); +over_paren([Match|_], _Paren, Match, 1, N, _) -> + N; +over_paren([Match|Cs], Paren, Match, D, N, [Match|L]) -> + over_paren(Cs, Paren, Match, D-1, N+1, L); +over_paren([Paren|Cs], Paren, Match, D, N, L) -> + over_paren(Cs, Paren, Match, D+1, N+1, [Match|L]); + +over_paren([$)|Cs], Paren, Match, D, N, L) -> + over_paren(Cs, Paren, Match, D, N+1, [$(|L]); +over_paren([$]|Cs], Paren, Match, D, N, L) -> + over_paren(Cs, Paren, Match, D, N+1, [$[|L]); +over_paren([$}|Cs], Paren, Match, D, N, L) -> + over_paren(Cs, Paren, Match, D, N+1, [${|L]); + +over_paren([$(|Cs], Paren, Match, D, N, [$(|L]) -> + over_paren(Cs, Paren, Match, D, N+1, L); +over_paren([$[|Cs], Paren, Match, D, N, [$[|L]) -> + over_paren(Cs, Paren, Match, D, N+1, L); +over_paren([${|Cs], Paren, Match, D, N, [${|L]) -> + over_paren(Cs, Paren, Match, D, N+1, L); + +over_paren([$(|_], _, _, _, _, _) -> + beep; +over_paren([$[|_], _, _, _, _, _) -> + beep; +over_paren([${|_], _, _, _, _, _) -> + beep; + +over_paren([_|Cs], Paren, Match, D, N, L) -> + over_paren(Cs, Paren, Match, D, N+1, L); +over_paren([], _, _, _, _, _) -> + 0. + +over_paren_auto(Chars) -> + over_paren_auto(Chars, 1, 1, []). + + +over_paren_auto([C,$$,$$|Cs], D, N, L) -> + over_paren_auto([C|Cs], D, N+2, L); +over_paren_auto([_,$$|Cs], D, N, L) -> + over_paren_auto(Cs, D, N+2, L); + +over_paren_auto([$(|_], _, N, []) -> + {N, $)}; +over_paren_auto([$[|_], _, N, []) -> + {N, $]}; +over_paren_auto([${|_], _, N, []) -> + {N, $}}; + +over_paren_auto([$)|Cs], D, N, L) -> + over_paren_auto(Cs, D, N+1, [$(|L]); +over_paren_auto([$]|Cs], D, N, L) -> + over_paren_auto(Cs, D, N+1, [$[|L]); +over_paren_auto([$}|Cs], D, N, L) -> + over_paren_auto(Cs, D, N+1, [${|L]); + +over_paren_auto([$(|Cs], D, N, [$(|L]) -> + over_paren_auto(Cs, D, N+1, L); +over_paren_auto([$[|Cs], D, N, [$[|L]) -> + over_paren_auto(Cs, D, N+1, L); +over_paren_auto([${|Cs], D, N, [${|L]) -> + over_paren_auto(Cs, D, N+1, L); + +over_paren_auto([_|Cs], D, N, L) -> + over_paren_auto(Cs, D, N+1, L); +over_paren_auto([], _, _, _) -> + 0. + +%% erase_line(Line) +%% erase_inp(Line) +%% redraw_line(Line) +%% length_before(Line) +%% length_after(Line) +%% prompt(Line) +%% Various functions for accessing bits of a line. + +erase_line({line,Pbs,{Bef,Aft},_}) -> + reverse(erase(Pbs, Bef, Aft, [])). + +erase_inp({line,_,{Bef,Aft},_}) -> + reverse(erase([], Bef, Aft, [])). + +erase(Pbs, Bef, Aft, Rs) -> + [{delete_chars,-length(Pbs)-length(Bef)},{delete_chars,length(Aft)}|Rs]. + +redraw_line({line,Pbs,{Bef,Aft},_}) -> + reverse(redraw(Pbs, Bef, Aft, [])). + +redraw(Pbs, Bef, Aft, Rs) -> + [{move_rel,-length(Aft)},{put_chars, unicode,reverse(Bef, Aft)},{put_chars, unicode,Pbs}|Rs]. + +length_before({line,Pbs,{Bef,_Aft},_}) -> + length(Pbs) + length(Bef). + +length_after({line,_,{_Bef,Aft},_}) -> + length(Aft). + +prompt({line,Pbs,_,_}) -> + Pbs. + +%% %% expand(CurrentBefore) -> +%% %% {yes,Expansion} | no +%% %% Try to expand the word before as either a module name or a function +%% %% name. We can handle white space around the seperating ':' but the +%% %% function name must be on the same line. CurrentBefore is reversed +%% %% and over_word/3 reverses the characters it finds. In certain cases +%% %% possible expansions are printed. + +%% expand(Bef0) -> +%% {Bef1,Word,_} = over_word(Bef0, [], 0), +%% case over_white(Bef1, [], 0) of +%% {[$:|Bef2],_White,_Nwh} -> +%% {Bef3,_White1,_Nwh1} = over_white(Bef2, [], 0), +%% {_,Mod,_Nm} = over_word(Bef3, [], 0), +%% expand_function_name(Mod, Word); +%% {_,_,_} -> +%% expand_module_name(Word) +%% end. + +%% expand_module_name(Prefix) -> +%% match(Prefix, code:all_loaded(), ":"). + +%% expand_function_name(ModStr, FuncPrefix) -> +%% Mod = list_to_atom(ModStr), +%% case erlang:module_loaded(Mod) of +%% true -> +%% L = apply(Mod, module_info, []), +%% case keysearch(exports, 1, L) of +%% {value, {_, Exports}} -> +%% match(FuncPrefix, Exports, "("); +%% _ -> +%% no +%% end; +%% false -> +%% no +%% end. + +%% match(Prefix, Alts, Extra) -> +%% Matches = match1(Prefix, Alts), +%% case longest_common_head([N || {N,_} <- Matches]) of +%% {partial, []} -> +%% print_matches(Matches), +%% no; +%% {partial, Str} -> +%% case nthtail(length(Prefix), Str) of +%% [] -> +%% print_matches(Matches), +%% {yes, []}; +%% Remain -> +%% {yes, Remain} +%% end; +%% {complete, Str} -> +%% {yes, nthtail(length(Prefix), Str) ++ Extra}; +%% no -> +%% no +%% end. + +%% %% Print the list of names L in multiple columns. +%% print_matches(L) -> +%% io:nl(), +%% col_print(lists:sort(L)), +%% ok. + +%% col_print([]) -> ok; +%% col_print(L) -> col_print(L, field_width(L), 0). + +%% col_print(X, Width, Len) when Width + Len > 79 -> +%% io:nl(), +%% col_print(X, Width, 0); +%% col_print([{H0,A}|T], Width, Len) -> +%% H = if +%% %% If the second element is an integer, we assume it's an +%% %% arity, and meant to be printed. +%% integer(A) -> +%% H0 ++ "/" ++ integer_to_list(A); +%% true -> +%% H0 +%% end, +%% io:format("~-*s",[Width,H]), +%% col_print(T, Width, Len+Width); +%% col_print([], _, _) -> +%% io:nl(). + +%% field_width([{H,_}|T]) -> field_width(T, length(H)). + +%% field_width([{H,_}|T], W) -> +%% case length(H) of +%% L when L > W -> field_width(T, L); +%% _ -> field_width(T, W) +%% end; +%% field_width([], W) when W < 40 -> +%% W + 4; +%% field_width([], _) -> +%% 40. + +%% match1(Prefix, Alts) -> +%% match1(Prefix, Alts, []). + +%% match1(Prefix, [{H,A}|T], L) -> +%% case prefix(Prefix, Str = atom_to_list(H)) of +%% true -> +%% match1(Prefix, T, [{Str,A}|L]); +%% false -> +%% match1(Prefix, T, L) +%% end; +%% match1(_, [], L) -> +%% L. + +%% longest_common_head([]) -> +%% no; +%% longest_common_head(LL) -> +%% longest_common_head(LL, []). + +%% longest_common_head([[]|_], L) -> +%% {partial, reverse(L)}; +%% longest_common_head(LL, L) -> +%% case same_head(LL) of +%% true -> +%% [[H|_]|_] = LL, +%% LL1 = all_tails(LL), +%% case all_nil(LL1) of +%% false -> +%% longest_common_head(LL1, [H|L]); +%% true -> +%% {complete, reverse([H|L])} +%% end; +%% false -> +%% {partial, reverse(L)} +%% end. + +%% same_head([[H|_]|T1]) -> same_head(H, T1). + +%% same_head(H, [[H|_]|T]) -> same_head(H, T); +%% same_head(_, []) -> true; +%% same_head(_, _) -> false. + +%% all_tails(LL) -> all_tails(LL, []). + +%% all_tails([[_|T]|T1], L) -> all_tails(T1, [T|L]); +%% all_tails([], L) -> L. + +%% all_nil([]) -> true; +%% all_nil([[] | Rest]) -> all_nil(Rest); +%% all_nil(_) -> false. diff --git a/lib/stdlib/src/edlin_expand.erl b/lib/stdlib/src/edlin_expand.erl new file mode 100644 index 0000000000..7ed76a6b09 --- /dev/null +++ b/lib/stdlib/src/edlin_expand.erl @@ -0,0 +1,168 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(edlin_expand). + +%% a default expand function for edlin, expanding modules and functions + +-export([expand/1, format_matches/1]). + +-import(lists, [reverse/1, nthtail/2, prefix/2]). + +%% expand(CurrentBefore) -> +%% {yes, Expansion, Matches} | {no, Matches} +%% Try to expand the word before as either a module name or a function +%% name. We can handle white space around the seperating ':' but the +%% function name must be on the same line. CurrentBefore is reversed +%% and over_word/3 reverses the characters it finds. In certain cases +%% possible expansions are printed. +expand(Bef0) -> + {Bef1,Word,_} = edlin:over_word(Bef0, [], 0), + case over_white(Bef1, [], 0) of + {[$:|Bef2],_White,_Nwh} -> + {Bef3,_White1,_Nwh1} = over_white(Bef2, [], 0), + {_,Mod,_Nm} = edlin:over_word(Bef3, [], 0), + expand_function_name(Mod, Word); + {_,_,_} -> + expand_module_name(Word) + end. + +expand_module_name(Prefix) -> + match(Prefix, code:all_loaded(), ":"). + +expand_function_name(ModStr, FuncPrefix) -> + Mod = list_to_atom(ModStr), + case erlang:module_loaded(Mod) of + true -> + L = Mod:module_info(), + case lists:keyfind(exports, 1, L) of + {_, Exports} -> + match(FuncPrefix, Exports, "("); + _ -> + {no, [], []} + end; + false -> + {no, [], []} + end. + +match(Prefix, Alts, Extra) -> + Len = length(Prefix), + Matches = [{S, A} || {H, A} <- Alts, prefix(Prefix, S=atom_to_list(H))], + case longest_common_head([N || {N, _} <- Matches]) of + {partial, []} -> + {no, [], Matches}; % format_matches(Matches)}; + {partial, Str} -> + case nthtail(Len, Str) of + [] -> + {yes, [], Matches}; % format_matches(Matches)}; + Remain -> + {yes, Remain, []} + end; + {complete, Str} -> + {yes, nthtail(Len, Str) ++ Extra, []}; + no -> + {no, [], []} + end. + +%% Return the list of names L in multiple columns. +format_matches(L) -> + S = format_col(lists:sort(L), []), + ["\n" | S]. + +format_col([], _) -> []; +format_col(L, Acc) -> format_col(L, field_width(L), 0, Acc). + +format_col(X, Width, Len, Acc) when Width + Len > 79 -> + format_col(X, Width, 0, ["\n" | Acc]); +format_col([A|T], Width, Len, Acc0) -> + H = case A of + %% If it's a tuple {string(), integer()}, we assume it's an + %% arity, and meant to be printed. + {H0, I} when is_integer(I) -> + H0 ++ "/" ++ integer_to_list(I); + {H1, _} -> H1; + H2 -> H2 + end, + Acc = [io_lib:format("~-*s", [Width,H]) | Acc0], + format_col(T, Width, Len+Width, Acc); +format_col([], _, _, Acc) -> + lists:reverse(Acc, "\n"). + +field_width(L) -> field_width(L, 0). + +field_width([{H,_}|T], W) -> + case length(H) of + L when L > W -> field_width(T, L); + _ -> field_width(T, W) + end; +field_width([H|T], W) -> + case length(H) of + L when L > W -> field_width(T, L); + _ -> field_width(T, W) + end; +field_width([], W) when W < 40 -> + W + 4; +field_width([], _) -> + 40. + +longest_common_head([]) -> + no; +longest_common_head(LL) -> + longest_common_head(LL, []). + +longest_common_head([[]|_], L) -> + {partial, reverse(L)}; +longest_common_head(LL, L) -> + case same_head(LL) of + true -> + [[H|_]|_] = LL, + LL1 = all_tails(LL), + case all_nil(LL1) of + false -> + longest_common_head(LL1, [H|L]); + true -> + {complete, reverse([H|L])} + end; + false -> + {partial, reverse(L)} + end. + +same_head([[H|_]|T1]) -> same_head(H, T1). + +same_head(H, [[H|_]|T]) -> same_head(H, T); +same_head(_, []) -> true; +same_head(_, _) -> false. + +all_tails(LL) -> all_tails(LL, []). + +all_tails([[_|T]|T1], L) -> all_tails(T1, [T|L]); +all_tails([], L) -> L. + +all_nil([]) -> true; +all_nil([[] | Rest]) -> all_nil(Rest); +all_nil(_) -> false. + +%% over_white(Chars, InitialStack, InitialCount) -> +%% {RemainingChars,CharStack,Count}. + +over_white([$\s|Cs], Stack, N) -> + over_white(Cs, [$\s|Stack], N+1); +over_white([$\t|Cs], Stack, N) -> + over_white(Cs, [$\t|Stack], N+1); +over_white(Cs, Stack, N) when is_list(Cs) -> + {Cs,Stack,N}. diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl new file mode 100644 index 0000000000..8b702c005b --- /dev/null +++ b/lib/stdlib/src/epp.erl @@ -0,0 +1,1146 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% + +-module(epp). + +%% An Erlang code preprocessor. + +-export([open/2,open/3,open/5,close/1,format_error/1]). +-export([scan_erl_form/1,parse_erl_form/1,macro_defs/1]). +-export([parse_file/1, parse_file/3]). +-export([interpret_file_attribute/1]). +-export([normalize_typed_record_fields/1,restore_typed_record_fields/1]). + +%%------------------------------------------------------------------------ + +-type macros() :: [{atom(), term()}]. + +%% Epp state record. +-record(epp, {file, %Current file + location, %Current location + name="", %Current file name + istk=[], %Ifdef stack + sstk=[], %State stack + path=[], %Include-path + macs = dict:new() :: dict(), %Macros (don't care locations) + uses = dict:new() :: dict(), %Macro use structure + pre_opened = false :: boolean() + }). + +%%% Note on representation: as tokens, both {var, Location, Name} and +%%% {atom, Location, Name} can occur as macro identifiers. However, keeping +%%% this distinction here is done for historical reasons only: previously, +%%% ?FOO and ?'FOO' were not the same, but now they are. Removing the +%%% distinction in the internal representation would simplify the code +%%% a little. + +%% open(FileName, IncludePath) +%% open(FileName, IncludePath, PreDefMacros) +%% open(FileName, IoDevice, StartLocation, IncludePath, PreDefMacros) +%% close(Epp) +%% scan_erl_form(Epp) +%% parse_erl_form(Epp) +%% parse_file(Epp) +%% parse_file(FileName, IncludePath, PreDefMacros) +%% macro_defs(Epp) + +-spec open(file:name(), [file:name()]) -> + {'ok', pid()} | {'error', term()}. + +open(Name, Path) -> + open(Name, Path, []). + +-spec open(file:name(), [file:name()], macros()) -> + {'ok', pid()} | {'error', term()}. + +open(Name, Path, Pdm) -> + Self = self(), + Epp = spawn(fun() -> server(Self, Name, Path, Pdm) end), + epp_request(Epp). + +open(Name, File, StartLocation, Path, Pdm) -> + Self = self(), + Epp = spawn(fun() -> server(Self, Name, File, StartLocation,Path,Pdm) end), + epp_request(Epp). + +-spec close(pid()) -> 'ok'. + +close(Epp) -> + %% Make sure that close is synchronous as a courtesy to test + %% cases that test for resource leaks. + Ref = erlang:monitor(process, Epp), + R = epp_request(Epp, close), + receive {'DOWN',Ref,_,_,_} -> ok end, + R. + +scan_erl_form(Epp) -> + epp_request(Epp, scan_erl_form). + +parse_erl_form(Epp) -> + case epp_request(Epp, scan_erl_form) of + {ok,Toks} -> + erl_parse:parse_form(Toks); + Other -> + Other + end. + +macro_defs(Epp) -> + epp_request(Epp, macro_defs). + +%% format_error(ErrorDescriptor) -> String +%% Return a string describing the error. + +format_error(cannot_parse) -> + io_lib:format("cannot parse file, giving up", []); +format_error({bad,W}) -> + io_lib:format("badly formed '~s'", [W]); +format_error({call,What}) -> + io_lib:format("illegal macro call '~s'",[What]); +format_error({undefined,M}) -> + io_lib:format("undefined macro '~w'", [M]); +format_error({depth,What}) -> + io_lib:format("~s too deep",[What]); +format_error({mismatch,M}) -> + io_lib:format("argument mismatch for macro '~w'", [M]); +format_error({arg_error,M}) -> + io_lib:format("badly formed argument for macro '~w'", [M]); +format_error({redefine,M}) -> + io_lib:format("redefining macro '~w'", [M]); +format_error({circular,M}) -> + io_lib:format("circular macro '~w'", [M]); +format_error({include,W,F}) -> + io_lib:format("can't find include ~s \"~s\"", [W,F]); +format_error({illegal,How,What}) -> + io_lib:format("~s '-~s'", [How,What]); +format_error({'NYI',What}) -> + io_lib:format("not yet implemented '~s'", [What]); +format_error(E) -> file:format_error(E). + +%% parse_file(FileName, IncludePath, [PreDefMacro]) -> +%% {ok,[Form]} | {error,OpenError} + +parse_file(Ifile, Path, Predefs) -> + case open(Ifile, Path, Predefs) of + {ok,Epp} -> + Forms = parse_file(Epp), + close(Epp), + {ok,Forms}; + {error,E} -> + {error,E} + end. + +%% parse_file(Epp) -> +%% [Form] + +parse_file(Epp) -> + case parse_erl_form(Epp) of + {ok,Form} -> + case Form of + {attribute,La,record,{Record, Fields}} -> + case normalize_typed_record_fields(Fields) of + {typed, NewFields} -> + [{attribute, La, record, {Record, NewFields}}, + {attribute, La, type, + {{record, Record}, Fields, []}} + |parse_file(Epp)]; + not_typed -> + [Form|parse_file(Epp)] + end; + _ -> + [Form|parse_file(Epp)] + end; + {error,E} -> + [{error,E}|parse_file(Epp)]; + {eof,Location} -> + [{eof,Location}] + end. + +normalize_typed_record_fields(Fields) -> + normalize_typed_record_fields(Fields, [], false). + +normalize_typed_record_fields([], NewFields, Typed) -> + case Typed of + true -> {typed, lists:reverse(NewFields)}; + false -> not_typed + end; +normalize_typed_record_fields([{typed_record_field,Field,_}|Rest], + NewFields, _Typed) -> + normalize_typed_record_fields(Rest, [Field|NewFields], true); +normalize_typed_record_fields([Field|Rest], NewFields, Typed) -> + normalize_typed_record_fields(Rest, [Field|NewFields], Typed). + +restore_typed_record_fields([]) -> + []; +restore_typed_record_fields([{attribute,La,record,{Record,_NewFields}}, + {attribute,La,type,{{record,Record},Fields,[]}}| + Forms]) -> + [{attribute,La,record,{Record,Fields}}| + restore_typed_record_fields(Forms)]; +restore_typed_record_fields([{attribute,La,type,{{record,Record},Fields,[]}}| + Forms]) -> + %% This clause is due to the compiler's 'E' option. + %% Record information kept by erl_expand_records. + [{attribute,La,record,{Record,Fields}}| + restore_typed_record_fields(Forms)]; +restore_typed_record_fields([Form|Forms]) -> + [Form|restore_typed_record_fields(Forms)]. + +%% server(StarterPid, FileName, Path, PreDefMacros) + +server(Pid, Name, Path, Pdm) -> + process_flag(trap_exit, true), + case file:open(Name, [read]) of + {ok,File} -> + Location = 1, + init_server(Pid, Name, File, Location, Path, Pdm, false); + {error,E} -> + epp_reply(Pid, {error,E}) + end. + +%% server(StarterPid, FileName, IoDevice, Location, Path, PreDefMacros) +server(Pid, Name, File, AtLocation, Path, Pdm) -> + process_flag(trap_exit, true), + init_server(Pid, Name, File, AtLocation, Path, Pdm, true). + +init_server(Pid, Name, File, AtLocation, Path, Pdm, Pre) -> + Ms0 = predef_macros(Name), + case user_predef(Pdm, Ms0) of + {ok,Ms1} -> + epp_reply(Pid, {ok,self()}), + St = #epp{file=File, location=AtLocation, name=Name, + path=Path, macs=Ms1, pre_opened = Pre}, + From = wait_request(St), + enter_file_reply(From, Name, AtLocation, AtLocation), + wait_req_scan(St); + {error,E} -> + epp_reply(Pid, {error,E}) + end. + +%% predef_macros(FileName) -> Macrodict +%% Initialise the macro dictionary with the default predefined macros, +%% FILE, LINE, MODULE as undefined, MACHINE and MACHINE value. + +predef_macros(File) -> + Machine = list_to_atom(erlang:system_info(machine)), + dict:from_list([ + {{atom,'FILE'}, {none,[{string,1,File}]}}, + {{atom,'LINE'}, {none,[{integer,1,1}]}}, + {{atom,'MODULE'}, undefined}, + {{atom,'MODULE_STRING'}, undefined}, + {{atom,'BASE_MODULE'}, undefined}, + {{atom,'BASE_MODULE_STRING'}, undefined}, + {{atom,'MACHINE'}, {none,[{atom,1,Machine}]}}, + {{atom,Machine}, {none,[{atom,1,true}]}} + ]). + +%% user_predef(PreDefMacros, Macros) -> +%% {ok,MacroDict} | {error,E} +%% Add the predefined macros to the macros dictionary. A macro without a +%% value gets the value 'true'. + +user_predef([{M,Val,redefine}|Pdm], Ms) when is_atom(M) -> + Exp = erl_parse:tokens(erl_parse:abstract(Val)), + user_predef(Pdm, dict:store({atom,M}, {none,Exp}, Ms)); +user_predef([{M,Val}|Pdm], Ms) when is_atom(M) -> + case dict:find({atom,M}, Ms) of + {ok,_Def} -> + {error,{redefine,M}}; + error -> + Exp = erl_parse:tokens(erl_parse:abstract(Val)), + user_predef(Pdm, dict:store({atom,M}, {none,Exp}, Ms)) + end; +user_predef([M|Pdm], Ms) when is_atom(M) -> + case dict:find({atom,M}, Ms) of + {ok,_Def} -> + {error,{redefine,M}}; + error -> + user_predef(Pdm, dict:store({atom,M}, {none,[{atom,1,true}]}, Ms)) + end; +user_predef([Md|_Pdm], _Ms) -> {error,{bad,Md}}; +user_predef([], Ms) -> {ok,Ms}. + +%% wait_request(EppState) -> RequestFrom +%% wait_req_scan(EppState) +%% wait_req_skip(EppState, SkipIstack) +%% Handle requests, processing trivial requests directly. Either return +%% requestor or scan/skip tokens. + +wait_request(St) -> + receive + {epp_request,From,scan_erl_form} -> From; + {epp_request,From,macro_defs} -> + epp_reply(From, dict:to_list(St#epp.macs)), + wait_request(St); + {epp_request,From,close} -> + close_file(St), + epp_reply(From, ok), + exit(normal); + {'EXIT',_,R} -> + exit(R); + Other -> + io:fwrite("Epp: unknown '~w'\n", [Other]), + wait_request(St) + end. + +close_file(#epp{pre_opened = true}) -> + ok; +close_file(#epp{pre_opened = false, file = File}) -> + ok = file:close(File). + +wait_req_scan(St) -> + From = wait_request(St), + scan_toks(From, St). + +wait_req_skip(St, Sis) -> + From = wait_request(St), + skip_toks(From, St, Sis). + +%% enter_file(Path, FileName, IncludeToken, From, EppState) +%% leave_file(From, EppState) +%% Handle entering and leaving included files. Notify caller when the +%% current file is changed. Note it is an error to exit a file if we are +%% in a conditional. These functions never return. + +enter_file(_Path, _NewName, Inc, From, St) + when length(St#epp.sstk) >= 8 -> + epp_reply(From, {error,{abs_loc(Inc),epp,{depth,"include"}}}), + wait_req_scan(St); +enter_file(Path, NewName, Inc, From, St) -> + case file:path_open(Path, NewName, [read]) of + {ok,NewF,Pname} -> + Loc = start_loc(St#epp.location), + wait_req_scan(enter_file2(NewF, Pname, From, St, Loc)); + {error,_E} -> + epp_reply(From, {error,{abs_loc(Inc),epp,{include,file,NewName}}}), + wait_req_scan(St) + end. + +%% enter_file2(File, FullName, From, EppState, AtLocation) -> EppState. +%% Set epp to use this file and "enter" it. + +enter_file2(NewF, Pname, From, St, AtLocation) -> + enter_file2(NewF, Pname, From, St, AtLocation, []). + +enter_file2(NewF, Pname, From, St, AtLocation, ExtraPath) -> + Loc = start_loc(AtLocation), + enter_file_reply(From, Pname, Loc, AtLocation), + Ms = dict:store({atom,'FILE'}, {none,[{string,Loc,Pname}]}, St#epp.macs), + Path = St#epp.path ++ ExtraPath, + #epp{location=Loc,file=NewF, + name=Pname,sstk=[St|St#epp.sstk],path=Path,macs=Ms}. + +enter_file_reply(From, Name, Location, AtLocation) -> + Attr = loc_attr(AtLocation), + Rep = {ok, [{'-',Attr},{atom,Attr,file},{'(',Attr}, + {string,Attr,file_name(Name)},{',',Attr}, + {integer,Attr,get_line(Location)},{')',Location}, + {dot,Attr}]}, + epp_reply(From, Rep). + +%% Flatten filename to a string. Must be a valid filename. + +file_name([C | T]) when is_integer(C), C > 0, C =< 255 -> + [C | file_name(T)]; +file_name([H|T]) -> + file_name(H) ++ file_name(T); +file_name([]) -> + []; +file_name(N) when is_atom(N) -> + atom_to_list(N). + +leave_file(From, St) -> + case St#epp.istk of + [I|Cis] -> + epp_reply(From, + {error,{St#epp.location,epp, + {illegal,"unterminated",I}}}), + leave_file(wait_request(St),St#epp{istk=Cis}); + [] -> + case St#epp.sstk of + [OldSt|Sts] -> + close_file(St), + enter_file_reply(From, OldSt#epp.name, + OldSt#epp.location, OldSt#epp.location), + Ms = dict:store({atom,'FILE'}, + {none, + [{string,OldSt#epp.location, + OldSt#epp.name}]}, + St#epp.macs), + wait_req_scan(OldSt#epp{sstk=Sts,macs=Ms}); + [] -> + epp_reply(From, {eof,St#epp.location}), + wait_req_scan(St) + end + end. + +%% scan_toks(From, EppState) +%% scan_toks(Tokens, From, EppState) + +scan_toks(From, St) -> + case io:scan_erl_form(St#epp.file, '', St#epp.location) of + {ok,Toks,Cl} -> + scan_toks(Toks, From, St#epp{location=Cl}); + {error,E,Cl} -> + epp_reply(From, {error,E}), + wait_req_scan(St#epp{location=Cl}); + {eof,Cl} -> + leave_file(From, St#epp{location=Cl}); + {error,_E} -> + epp_reply(From, {error,{St#epp.location,epp,cannot_parse}}), + leave_file(From, St) %This serious, just exit! + end. + +scan_toks([{'-',_Lh},{atom,_Ld,define}=Define|Toks], From, St) -> + scan_define(Toks, Define, From, St); +scan_toks([{'-',_Lh},{atom,_Ld,undef}=Undef|Toks], From, St) -> + scan_undef(Toks, Undef, From, St); +scan_toks([{'-',_Lh},{atom,_Li,include}=Inc|Toks], From, St) -> + scan_include(Toks, Inc, From, St); +scan_toks([{'-',_Lh},{atom,_Li,include_lib}=IncLib|Toks], From, St) -> + scan_include_lib(Toks, IncLib, From, St); +scan_toks([{'-',_Lh},{atom,_Li,ifdef}=IfDef|Toks], From, St) -> + scan_ifdef(Toks, IfDef, From, St); +scan_toks([{'-',_Lh},{atom,_Li,ifndef}=IfnDef|Toks], From, St) -> + scan_ifndef(Toks, IfnDef, From, St); +scan_toks([{'-',_Lh},{atom,_Le,'else'}=Else|Toks], From, St) -> + scan_else(Toks, Else, From, St); +scan_toks([{'-',_Lh},{'if',_Le}=If|Toks], From, St) -> + scan_if(Toks, If, From, St); +scan_toks([{'-',_Lh},{atom,_Le,elif}=Elif|Toks], From, St) -> + scan_elif(Toks, Elif, From, St); +scan_toks([{'-',_Lh},{atom,_Le,endif}=Endif|Toks], From, St) -> + scan_endif(Toks, Endif, From, St); +scan_toks([{'-',_Lh},{atom,_Lf,file}=FileToken|Toks0], From, St) -> + case catch expand_macros(Toks0, {St#epp.macs, St#epp.uses}) of + Toks1 when is_list(Toks1) -> + scan_file(Toks1, FileToken, From, St); + {error,ErrL,What} -> + epp_reply(From, {error,{ErrL,epp,What}}), + wait_req_scan(St) + end; +scan_toks(Toks0, From, St) -> + case catch expand_macros(Toks0, {St#epp.macs, St#epp.uses}) of + Toks1 when is_list(Toks1) -> + epp_reply(From, {ok,Toks1}), + wait_req_scan(St#epp{macs=scan_module(Toks1, St#epp.macs)}); + {error,ErrL,What} -> + epp_reply(From, {error,{ErrL,epp,What}}), + wait_req_scan(St) + end. + +scan_module([{'-',_Lh},{atom,_Lm,module},{'(',_Ll}|Ts], Ms) -> + scan_module_1(Ts, [], Ms); +scan_module([{'-',_Lh},{atom,_Lm,extends},{'(',_Ll}|Ts], Ms) -> + scan_extends(Ts, [], Ms); +scan_module(_Ts, Ms) -> Ms. + +scan_module_1([{atom,_,_}=A,{',',L}|Ts], As, Ms) -> + %% Parameterized modules. + scan_module_1([A,{')',L}|Ts], As, Ms); +scan_module_1([{atom,Ln,A},{')',_Lr}|_Ts], As, Ms0) -> + Mod = lists:concat(lists:reverse([A|As])), + Ms = dict:store({atom,'MODULE'}, + {none,[{atom,Ln,list_to_atom(Mod)}]}, Ms0), + dict:store({atom,'MODULE_STRING'}, {none,[{string,Ln,Mod}]}, Ms); +scan_module_1([{atom,_Ln,A},{'.',_Lr}|Ts], As, Ms) -> + scan_module_1(Ts, [".",A|As], Ms); +scan_module_1([{'.',_Lr}|Ts], As, Ms) -> + scan_module_1(Ts, As, Ms); +scan_module_1(_Ts, _As, Ms) -> Ms. + +scan_extends([{atom,Ln,A},{')',_Lr}|_Ts], As, Ms0) -> + Mod = lists:concat(lists:reverse([A|As])), + Ms = dict:store({atom,'BASE_MODULE'}, + {none,[{atom,Ln,list_to_atom(Mod)}]}, Ms0), + dict:store({atom,'BASE_MODULE_STRING'}, {none,[{string,Ln,Mod}]}, Ms); +scan_extends([{atom,_Ln,A},{'.',_Lr}|Ts], As, Ms) -> + scan_extends(Ts, [".",A|As], Ms); +scan_extends([{'.',_Lr}|Ts], As, Ms) -> + scan_extends(Ts, As, Ms); +scan_extends(_Ts, _As, Ms) -> Ms. + +%% scan_define(Tokens, DefineToken, From, EppState) + +scan_define([{'(',_Lp},{atom,_Lm,M}=Mac,{',',_Lc}|Toks], _Def, From, St) -> + case dict:find({atom,M}, St#epp.macs) of + {ok,_OldDef} -> + epp_reply(From, {error,{loc(Mac),epp,{redefine,M}}}), + wait_req_scan(St); + error -> + scan_define_cont(From, St, + {atom, M}, + {none,macro_expansion(Toks)}) + end; +scan_define([{'(',_Lp},{atom,_Lm,M}=Mac,{'(',_Lc}|Toks], Def, From, St) -> + case dict:find({atom,M}, St#epp.macs) of + {ok,_Def} -> + epp_reply(From, {error,{loc(Mac),epp,{redefine,M}}}), + wait_req_scan(St); + error -> + case catch macro_pars(Toks, []) of + {ok, {As, Me}} -> + scan_define_cont(From, St, + {atom, M}, + {As, Me}); + _ -> + epp_reply(From, {error,{loc(Def),epp,{bad,define}}}), + wait_req_scan(St) + end + end; +scan_define([{'(',_Lp},{var,_Lm,M}=Mac,{',',_Lc}|Toks], _Def, From, St) -> + case dict:find({atom,M}, St#epp.macs) of + {ok,_OldDef} -> + epp_reply(From, {error,{loc(Mac),epp,{redefine,M}}}), + wait_req_scan(St); + error -> + scan_define_cont(From, St, + {atom, M}, + {none,macro_expansion(Toks)}) + end; +scan_define([{'(',_Lp},{var,_Lm,M}=Mac,{'(',_Lc}|Toks], Def, From, St) -> + case dict:find({atom,M}, St#epp.macs) of + {ok,_Def} -> + epp_reply(From, {error,{loc(Mac),epp,{redefine,M}}}), + wait_req_scan(St); + error -> + case catch macro_pars(Toks, []) of + {ok, {As, Me}} -> + scan_define_cont(From, St, + {atom, M}, + {As, Me}); + _ -> + epp_reply(From, {error,{loc(Def),epp,{bad,define}}}), + wait_req_scan(St) + end + end; +scan_define(_Toks, Def, From, St) -> + epp_reply(From, {error,{loc(Def),epp,{bad,define}}}), + wait_req_scan(St). + +%%% Detection of circular macro expansions (which would either keep +%%% the compiler looping forever, or run out of memory): +%%% When a macro is defined, we store the names of other macros it +%%% uses in St#epp.uses. If any macro is undef'ed, that information +%%% becomes invalid, so we redo it for all remaining macros. +%%% The circularity detection itself is done when a macro is expanded: +%%% the information from St#epp.uses is traversed, and if a circularity +%%% is detected, an error message is thrown. + +scan_define_cont(F, St, M, Def) -> + Ms = dict:store(M, Def, St#epp.macs), + U = dict:store(M, macro_uses(Def), St#epp.uses), + scan_toks(F, St#epp{uses=U, macs=Ms}). + +macro_uses(undefined) -> + undefined; +macro_uses({_Args, Tokens}) -> + Uses0 = macro_ref(Tokens), + lists:usort(Uses0). + +macro_ref([]) -> + []; +macro_ref([{'?', _}, {'?', _} | Rest]) -> + macro_ref(Rest); +macro_ref([{'?', _}, {atom, _, A} | Rest]) -> + [{atom, A} | macro_ref(Rest)]; +macro_ref([{'?', _}, {var, _, A} | Rest]) -> + [{atom, A} | macro_ref(Rest)]; +macro_ref([_Token | Rest]) -> + macro_ref(Rest). + +all_macro_uses(D0) -> + L = dict:to_list(D0), + D = dict:new(), + add_macro_uses(L, D). + +add_macro_uses([], D) -> + D; +add_macro_uses([{Key, Def} | Rest], D0) -> + add_macro_uses(Rest, dict:store(Key, macro_uses(Def), D0)). + +%% scan_undef(Tokens, UndefToken, From, EppState) + +scan_undef([{'(',_Llp},{atom,_Lm,M},{')',_Lrp},{dot,_Ld}], _Undef, From, St) -> + scan_toks(From, St#epp{macs=dict:erase({atom,M}, St#epp.macs), + uses=all_macro_uses(St#epp.macs)}); +scan_undef([{'(',_Llp},{var,_Lm,M},{')',_Lrp},{dot,_Ld}], _Undef, From,St) -> + scan_toks(From, St#epp{macs=dict:erase({atom,M}, St#epp.macs), + uses=all_macro_uses(St#epp.macs)}); +scan_undef(_Toks, Undef, From, St) -> + epp_reply(From, {error,{loc(Undef),epp,{bad,undef}}}), + wait_req_scan(St). + +%% scan_include(Tokens, IncludeToken, From, St) + +scan_include([{'(',_Llp},{string,_Lf,NewName0},{')',_Lrp},{dot,_Ld}], Inc, + From, St) -> + NewName = expand_var(NewName0), + enter_file(St#epp.path, NewName, Inc, From, St); +scan_include(_Toks, Inc, From, St) -> + epp_reply(From, {error,{abs_loc(Inc),epp,{bad,include}}}), + wait_req_scan(St). + +%% scan_include_lib(Tokens, IncludeToken, From, EppState) +%% For include_lib we first test if we can find the file through the +%% normal search path, if not we assume that the first directory name +%% is a library name, find its true directory and try with that. + +find_lib_dir(NewName) -> + [Lib | Rest] = filename:split(NewName), + {code:lib_dir(list_to_atom(Lib)), Rest}. + +scan_include_lib([{'(',_Llp},{string,_Lf,_NewName0},{')',_Lrp},{dot,_Ld}], + Inc, From, St) + when length(St#epp.sstk) >= 8 -> + epp_reply(From, {error,{abs_loc(Inc),epp,{depth,"include_lib"}}}), + wait_req_scan(St); +scan_include_lib([{'(',_Llp},{string,_Lf,NewName0},{')',_Lrp},{dot,_Ld}], + Inc, From, St) -> + NewName = expand_var(NewName0), + Loc = start_loc(St#epp.location), + case file:path_open(St#epp.path, NewName, [read]) of + {ok,NewF,Pname} -> + wait_req_scan(enter_file2(NewF, Pname, From, St, Loc)); + {error,_E1} -> + case catch find_lib_dir(NewName) of + {LibDir, Rest} when is_list(LibDir) -> + LibName = filename:join([LibDir | Rest]), + case file:open(LibName, [read]) of + {ok,NewF} -> + ExtraPath = [filename:dirname(LibName)], + wait_req_scan(enter_file2(NewF, LibName, From, + St, Loc, ExtraPath)); + {error,_E2} -> + epp_reply(From, + {error,{abs_loc(Inc),epp, + {include,lib,NewName}}}), + wait_req_scan(St) + end; + _Error -> + epp_reply(From, {error,{abs_loc(Inc),epp, + {include,lib,NewName}}}), + wait_req_scan(St) + end + end; +scan_include_lib(_Toks, Inc, From, St) -> + epp_reply(From, {error,{abs_loc(Inc),epp,{bad,include_lib}}}), + wait_req_scan(St). + +%% scan_ifdef(Tokens, IfdefToken, From, EppState) +%% scan_ifndef(Tokens, IfdefToken, From, EppSate) +%% Handle the conditional parsing of a file. +%% Report a badly formed if[n]def test and then treat as undefined macro. + +scan_ifdef([{'(',_Llp},{atom,_Lm,M},{')',_Lrp},{dot,_Ld}], _IfD, From, St) -> + case dict:find({atom,M}, St#epp.macs) of + {ok,_Def} -> + scan_toks(From, St#epp{istk=[ifdef|St#epp.istk]}); + error -> + skip_toks(From, St, [ifdef]) + end; +scan_ifdef([{'(',_Llp},{var,_Lm,M},{')',_Lrp},{dot,_Ld}], _IfD, From, St) -> + case dict:find({atom,M}, St#epp.macs) of + {ok,_Def} -> + scan_toks(From, St#epp{istk=[ifdef|St#epp.istk]}); + error -> + skip_toks(From, St, [ifdef]) + end; +scan_ifdef(_Toks, IfDef, From, St) -> + epp_reply(From, {error,{loc(IfDef),epp,{bad,ifdef}}}), + wait_req_skip(St, [ifdef]). + +scan_ifndef([{'(',_Llp},{atom,_Lm,M},{')',_Lrp},{dot,_Ld}], _IfnD, From, St) -> + case dict:find({atom,M}, St#epp.macs) of + {ok,_Def} -> + skip_toks(From, St, [ifndef]); + error -> + scan_toks(From, St#epp{istk=[ifndef|St#epp.istk]}) + end; +scan_ifndef([{'(',_Llp},{var,_Lm,M},{')',_Lrp},{dot,_Ld}], _IfnD, From, St) -> + case dict:find({atom,M}, St#epp.macs) of + {ok,_Def} -> + skip_toks(From, St, [ifndef]); + error -> + scan_toks(From, St#epp{istk=[ifndef|St#epp.istk]}) + end; +scan_ifndef(_Toks, IfnDef, From, St) -> + epp_reply(From, {error,{loc(IfnDef),epp,{bad,ifndef}}}), + wait_req_skip(St, [ifndef]). + +%% scan_else(Tokens, ElseToken, From, EppState) +%% If we are in an if body then convert to else and skip, if we are in an +%% else or not in anything report an error. + +scan_else([{dot,_Ld}], Else, From, St) -> + case St#epp.istk of + ['else'|Cis] -> + epp_reply(From, {error,{loc(Else), + epp,{illegal,"repeated",'else'}}}), + wait_req_skip(St#epp{istk=Cis}, ['else']); + [_I|Cis] -> + skip_toks(From, St#epp{istk=Cis}, ['else']); + [] -> + epp_reply(From, {error,{loc(Else),epp, + {illegal,"unbalanced",'else'}}}), + wait_req_scan(St) + end; +scan_else(_Toks, Else, From, St) -> + epp_reply(From, {error,{loc(Else),epp,{bad,'else'}}}), + wait_req_scan(St). + +%% scan_if(Tokens, EndifToken, From, EppState) +%% Handle the conditional parsing of a file. +%% Report a badly formed if test and then treat as false macro. + +scan_if(_Toks, If, From, St) -> + epp_reply(From, {error,{loc(If),epp,{'NYI','if'}}}), + wait_req_skip(St, ['if']). + +%% scan_elif(Tokens, EndifToken, From, EppState) +%% Handle the conditional parsing of a file. +%% Report a badly formed if test and then treat as false macro. + +scan_elif(_Toks, Elif, From, St) -> + epp_reply(From, {error,{loc(Elif),epp,{'NYI','elif'}}}), + wait_req_scan(St). + +%% scan_endif(Tokens, EndifToken, From, EppState) +%% If we are in an if body then exit it, else report an error. + +scan_endif([{dot,_Ld}], Endif, From, St) -> + case St#epp.istk of + [_I|Cis] -> + scan_toks(From, St#epp{istk=Cis}); + [] -> + epp_reply(From, {error,{loc(Endif),epp, + {illegal,"unbalanced",endif}}}), + wait_req_scan(St) + end; +scan_endif(_Toks, Endif, From, St) -> + epp_reply(From, {error,{loc(Endif),epp,{bad,endif}}}), + wait_req_scan(St). + +%% scan_file(Tokens, FileToken, From, EppState) +%% Set the current file and line to the given file and line. +%% Note that the line of the attribute itself is kept. + +scan_file([{'(',_Llp},{string,_Ls,Name},{',',_Lc},{integer,_Li,Ln},{')',_Lrp}, + {dot,_Ld}], Tf, From, St) -> + enter_file_reply(From, Name, Ln, neg_line(abs_loc(Tf))), + Ms = dict:store({atom,'FILE'}, {none,[{string,1,Name}]}, St#epp.macs), + Locf = loc(Tf), + NewLoc = new_location(Ln, St#epp.location, Locf), + scan_toks(From, St#epp{name=Name,location=NewLoc,macs=Ms}); +scan_file(_Toks, Tf, From, St) -> + epp_reply(From, {error,{loc(Tf),epp,{bad,file}}}), + wait_req_scan(St). + +new_location(Ln, Le, Lf) when is_integer(Lf) -> + Ln+(Le-Lf); +new_location(Ln, {Le,_}, {Lf,_}) -> + {Ln+(Le-Lf),1}. + +%% skip_toks(From, EppState, SkipIstack) +%% Skip over forms until current conditional has been exited. Handle +%% nested conditionals and repeated 'else's. + +skip_toks(From, St, [I|Sis]) -> + case io:scan_erl_form(St#epp.file, '', St#epp.location) of + {ok,[{'-',_Lh},{atom,_Li,ifdef}|_Toks],Cl} -> + skip_toks(From, St#epp{location=Cl}, [ifdef,I|Sis]); + {ok,[{'-',_Lh},{atom,_Li,ifndef}|_Toks],Cl} -> + skip_toks(From, St#epp{location=Cl}, [ifndef,I|Sis]); + {ok,[{'-',_Lh},{'if',_Li}|_Toks],Cl} -> + skip_toks(From, St#epp{location=Cl}, ['if',I|Sis]); + {ok,[{'-',_Lh},{atom,_Le,'else'}=Else|_Toks],Cl}-> + skip_else(Else, From, St#epp{location=Cl}, [I|Sis]); + {ok,[{'-',_Lh},{atom,_Le,endif}|_Toks],Cl} -> + skip_toks(From, St#epp{location=Cl}, Sis); + {ok,_Toks,Cl} -> + skip_toks(From, St#epp{location=Cl}, [I|Sis]); + {error,_E,Cl} -> + skip_toks(From, St#epp{location=Cl}, [I|Sis]); + {eof,Cl} -> + leave_file(From, St#epp{location=Cl,istk=[I|Sis]}); + {error,_E} -> + epp_reply(From, {error,{St#epp.location,epp,cannot_parse}}), + leave_file(From, St) %This serious, just exit! + end; +skip_toks(From, St, []) -> + scan_toks(From, St). + +skip_else(Else, From, St, ['else'|Sis]) -> + epp_reply(From, {error,{loc(Else),epp,{illegal,"repeated",'else'}}}), + wait_req_skip(St, ['else'|Sis]); +skip_else(_Else, From, St, [_I]) -> + scan_toks(From, St#epp{istk=['else'|St#epp.istk]}); +skip_else(_Else, From, St, Sis) -> + skip_toks(From, St, Sis). + +%% macro_pars(Tokens, ArgStack) +%% macro_expansion(Tokens) +%% Extract the macro parameters and the expansion from a macro definition. + +macro_pars([{')',_Lp}, {',',_Ld}|Ex], Args) -> + {ok, {lists:reverse(Args), macro_expansion(Ex)}}; +macro_pars([{var,_,Name}, {')',_Lp}, {',',_Ld}|Ex], Args) -> + false = lists:member(Name, Args), %Prolog is nice + {ok, {lists:reverse([Name|Args]), macro_expansion(Ex)}}; +macro_pars([{var,_L,Name}, {',',_}|Ts], Args) -> + false = lists:member(Name, Args), + macro_pars(Ts, [Name|Args]). + +macro_expansion([{')',_Lp},{dot,_Ld}]) -> []; +macro_expansion([{dot,_Ld}]) -> []; %Be nice, allow no right paren! +macro_expansion([T|Ts]) -> + [T|macro_expansion(Ts)]. + +%% expand_macros(Tokens, Macros) +%% expand_macro(Tokens, MacroToken, RestTokens) +%% Expand the macros in a list of tokens, making sure that an expansion +%% gets the same location as the macro call. + +expand_macros(Type, MacT, M, Toks, Ms0) -> + %% (Type will always be 'atom') + {Ms, U} = Ms0, + Lm = loc(MacT), + check_uses([{Type,M}], [], U, Lm), + Tinfo = element(2, MacT), + case dict:find({Type,M}, Ms) of + {ok,{none,Exp}} -> + expand_macros(expand_macro(Exp, Tinfo, Toks, dict:new()), Ms0); + {ok,{As,Exp}} -> + {Bs,Toks1} = bind_args(Toks, Lm, M, As, dict:new()), + %%io:format("Bound arguments to macro ~w (~w)~n", [M,Bs]), + expand_macros(expand_macro(Exp, Tinfo, Toks1, Bs), Ms0); + {ok,undefined} -> + throw({error,Lm,{undefined,M}}); + error -> + throw({error,Lm,{undefined,M}}) + end. + +check_uses(undefined, _Anc, _U, _Lm) -> + ok; +check_uses([], _Anc, _U, _Lm) -> + ok; +check_uses([M|Rest], Anc, U, Lm) -> + case lists:member(M, Anc) of + true -> + {_, Name} = M, + throw({error,Lm,{circular,Name}}); + false -> + L = get_macro_uses(M, U), + check_uses(L, [M|Anc], U, Lm), + check_uses(Rest, Anc, U, Lm) + end. + +get_macro_uses(M, U) -> + case dict:find(M, U) of + error -> + []; + {ok, L} -> + L + end. + +%% Macro expansion +%% Note: io:scan_erl_form() does not return comments or white spaces. +expand_macros([{'?',_Lq},{atom,_Lm,M}=MacT|Toks], Ms) -> + expand_macros(atom, MacT, M, Toks, Ms); +%% Special macros +expand_macros([{'?',_Lq},{var,Lm,'LINE'}=Tok|Toks], Ms) -> + {line,Line} = erl_scan:token_info(Tok, line), + [{integer,Lm,Line}|expand_macros(Toks, Ms)]; +expand_macros([{'?',_Lq},{var,_Lm,M}=MacT|Toks], Ms) -> + expand_macros(atom, MacT, M, Toks, Ms); +%% Illegal macros +expand_macros([{'?',_Lq},Token|_Toks], _Ms) -> + T = case erl_scan:token_info(Token, text) of + {text,Text} -> + Text; + undefined -> + {symbol,Symbol} = erl_scan:token_info(Token, symbol), + io_lib:write(Symbol) + end, + throw({error,loc(Token),{call,[$?|T]}}); +expand_macros([T|Ts], Ms) -> + [T|expand_macros(Ts, Ms)]; +expand_macros([], _Ms) -> []. + +%% bind_args(Tokens, MacroLocation, MacroName, ArgumentVars, Bindings) +%% Collect the arguments to a macro call and check for correct number. + +bind_args([{'(',_Llp},{')',_Lrp}|Toks], _Lm, _M, [], Bs) -> + {Bs,Toks}; +bind_args([{'(',_Llp}|Toks0], Lm, M, [A|As], Bs) -> + {Arg,Toks1} = macro_arg(Toks0, [], []), + macro_args(Toks1, Lm, M, As, store_arg(Lm, M, A, Arg, Bs)); +bind_args(_Toks, Lm, M, _As, _Bs) -> + throw({error,Lm,{mismatch,M}}). + +macro_args([{')',_Lrp}|Toks], _Lm, _M, [], Bs) -> + {Bs,Toks}; +macro_args([{',',_Lc}|Toks0], Lm, M, [A|As], Bs) -> + {Arg,Toks1} = macro_arg(Toks0, [], []), + macro_args(Toks1, Lm, M, As, store_arg(Lm, M, A, Arg, Bs)); +macro_args([], Lm, M, _As, _Bs) -> + throw({error,Lm,{arg_error,M}}); +macro_args(_Toks, Lm, M, _As, _Bs) -> + throw({error,Lm,{mismatch,M}}). + +store_arg(L, M, _A, [], _Bs) -> + throw({error,L,{mismatch,M}}); +store_arg(_L, _M, A, Arg, Bs) -> + dict:store(A, Arg, Bs). + +%% macro_arg([Tok], [ClosePar], [ArgTok]) -> {[ArgTok],[RestTok]}. +%% Collect argument tokens until we hit a ',' or a ')'. We know a +%% enough about syntax to recognise "open parentheses" and keep +%% scanning until matching "close parenthesis". + +macro_arg([{',',Lc}|Toks], [], Arg) -> + {lists:reverse(Arg),[{',',Lc}|Toks]}; +macro_arg([{')',Lrp}|Toks], [], Arg) -> + {lists:reverse(Arg),[{')',Lrp}|Toks]}; +macro_arg([{'(',Llp}|Toks], E, Arg) -> + macro_arg(Toks, [')'|E], [{'(',Llp}|Arg]); +macro_arg([{'<<',Lls}|Toks], E, Arg) -> + macro_arg(Toks, ['>>'|E], [{'<<',Lls}|Arg]); +macro_arg([{'[',Lls}|Toks], E, Arg) -> + macro_arg(Toks, [']'|E], [{'[',Lls}|Arg]); +macro_arg([{'{',Llc}|Toks], E, Arg) -> + macro_arg(Toks, ['}'|E], [{'{',Llc}|Arg]); +macro_arg([{'begin',Lb}|Toks], E, Arg) -> + macro_arg(Toks, ['end'|E], [{'begin',Lb}|Arg]); +macro_arg([{'if',Li}|Toks], E, Arg) -> + macro_arg(Toks, ['end'|E], [{'if',Li}|Arg]); +macro_arg([{'case',Lc}|Toks], E, Arg) -> + macro_arg(Toks, ['end'|E], [{'case',Lc}|Arg]); +macro_arg([{'fun',Lc}|[{'(',_}|_]=Toks], E, Arg) -> + macro_arg(Toks, ['end'|E], [{'fun',Lc}|Arg]); +macro_arg([{'receive',Lr}|Toks], E, Arg) -> + macro_arg(Toks, ['end'|E], [{'receive',Lr}|Arg]); +macro_arg([{'try',Lr}|Toks], E, Arg) -> + macro_arg(Toks, ['end'|E], [{'try',Lr}|Arg]); +macro_arg([{'cond',Lr}|Toks], E, Arg) -> + macro_arg(Toks, ['end'|E], [{'cond',Lr}|Arg]); +macro_arg([{'query',Lr}|Toks], E, Arg) -> + macro_arg(Toks, ['end'|E], [{'query',Lr}|Arg]); +macro_arg([{Rb,Lrb}|Toks], [Rb|E], Arg) -> %Found matching close + macro_arg(Toks, E, [{Rb,Lrb}|Arg]); +macro_arg([T|Toks], E, Arg) -> + macro_arg(Toks, E, [T|Arg]); +macro_arg([], _E, Arg) -> + {lists:reverse(Arg),[]}. + +%% expand_macro(MacroDef, MacroTokenInfo, RestTokens, Bindings) +%% expand_arg(Argtokens, MacroTokens, MacroLocation, RestTokens, Bindings) +%% Insert the macro expansion replacing macro parameters with their +%% argument values, inserting the location of first the macro call +%% and then the macro arguments, i.e. simulate textual expansion. + +expand_macro([{var,_Lv,V}|Ts], L, Rest, Bs) -> + case dict:find(V, Bs) of + {ok,Val} -> + %% lists:append(Val, expand_macro(Ts, L, Rest, Bs)); + expand_arg(Val, Ts, L, Rest, Bs); + error -> + [{var,L,V}|expand_macro(Ts, L, Rest, Bs)] + end; +expand_macro([{'?', _}, {'?', _}, {var,_Lv,V}|Ts], L, Rest, Bs) -> + case dict:find(V, Bs) of + {ok,Val} -> + %% lists:append(Val, expand_macro(Ts, L, Rest, Bs)); + expand_arg(stringify(Val, L), Ts, L, Rest, Bs); + error -> + [{var,L,V}|expand_macro(Ts, L, Rest, Bs)] + end; +expand_macro([T|Ts], L, Rest, Bs) -> + [setelement(2, T, L)|expand_macro(Ts, L, Rest, Bs)]; +expand_macro([], _L, Rest, _Bs) -> Rest. + +expand_arg([A|As], Ts, _L, Rest, Bs) -> + %% It is not obvious that the location of arguments should replace L. + NextL = element(2, A), + [A|expand_arg(As, Ts, NextL, Rest, Bs)]; +expand_arg([], Ts, L, Rest, Bs) -> + expand_macro(Ts, L, Rest, Bs). + +%%% stringify(Ts, L) returns a list of one token: a string which when +%%% tokenized would yield the token list Ts. + +%% erl_scan:token_info(T, text) is not backward compatible with this. +token_src({dot, _}) -> + "."; +token_src({X, _}) when is_atom(X) -> + atom_to_list(X); +token_src({var, _, X}) -> + atom_to_list(X); +token_src({char,_,C}) -> + io_lib:write_char(C); +token_src({string, _, X}) -> + lists:flatten(io_lib:format("~p", [X])); +token_src({_, _, X}) -> + lists:flatten(io_lib:format("~w", [X])). + +stringify1([]) -> + []; +stringify1([T | Tokens]) -> + [io_lib:format(" ~s", [token_src(T)]) | stringify1(Tokens)]. + +stringify(Ts, L) -> + [$\s | S] = lists:flatten(stringify1(Ts)), + [{string, L, S}]. + +%% epp_request(Epp) +%% epp_request(Epp, Request) +%% epp_reply(From, Reply) +%% Handle communication with the epp. + +epp_request(Epp) -> + wait_epp_reply(Epp, erlang:monitor(process, Epp)). + +epp_request(Epp, Req) -> + Epp ! {epp_request,self(),Req}, + wait_epp_reply(Epp, erlang:monitor(process, Epp)). + +epp_reply(From, Rep) -> + From ! {epp_reply,self(),Rep}, + ok. + +wait_epp_reply(Epp, Mref) -> + receive + {epp_reply,Epp,Rep} -> + erlang:demonitor(Mref), + receive {'DOWN',Mref,_,_,_} -> ok after 0 -> ok end, + Rep; + {'DOWN',Mref,_,_,E} -> + receive {epp_reply,Epp,Rep} -> Rep + after 0 -> exit(E) + end + end. + +expand_var([$$ | _] = NewName) -> + case catch expand_var1(NewName) of + {ok, ExpName} -> + ExpName; + _ -> + NewName + end; +expand_var(NewName) -> + NewName. + +expand_var1(NewName) -> + [[$$ | Var] | Rest] = filename:split(NewName), + Value = os:getenv(Var), + true = Value =/= false, + {ok, filename:join([Value | Rest])}. + +%% The line only. (Other tokens may have the column and text as well...) +loc_attr(Line) when is_integer(Line) -> + Line; +loc_attr({Line,_Column}) -> + Line. + +loc(Token) -> + {location,Location} = erl_scan:token_info(Token, location), + Location. + +abs_loc(Token) -> + loc(setelement(2, Token, abs_line(element(2, Token)))). + +neg_line(L) -> + erl_scan:set_attribute(line, L, fun(Line) -> -abs(Line) end). + +abs_line(L) -> + erl_scan:set_attribute(line, L, fun(Line) -> abs(Line) end). + +start_loc(Line) when is_integer(Line) -> + 1; +start_loc({_Line, _Column}) -> + {1,1}. + +get_line(Line) when is_integer(Line) -> + Line; +get_line({Line,_Column}) -> + Line. + +%% epp has always output -file attributes when entering and leaving +%% included files (-include, -include_lib). Starting with R11B the +%% -file attribute is also recognized in the input file. This is +%% mainly aimed at yecc, the parser generator, which uses the -file +%% attribute to get correct lines in messages referring to code +%% supplied by the user (actions etc in .yrl files). +%% +%% In a perfect world (read: perfectly implemented applications such +%% as Xref, Cover, Debugger, etc.) it would not be necessary to +%% distinguish -file attributes from epp and the input file. The +%% Debugger for example could have one window for each referred file, +%% each window with its own set of breakpoints etc. The line numbers +%% of the abstract code would then point into different windows +%% depending on the -file attribute. [Note that if, as is the case for +%% yecc, code has been copied into the file, then it is possible that +%% the copied code differ from the one referred to by the -file +%% attribute, which means that line numbers can mismatch.] In practice +%% however it is very rare with Erlang functions in included files, so +%% only one window is used per module. This means that the line +%% numbers of the abstract code have to be adjusted to refer to the +%% top-most source file. The function interpret_file_attributes/1 +%% below interprets the -file attribute and returns forms where line +%% numbers refer to the top-most file. The -file attribute forms that +%% have been output by epp (corresponding to -include and +%% -include_lib) are kept, but the user's -file attributes are +%% removed. This seems sufficient for now. +%% +%% It turns out to be difficult to distinguish -file attributes in the +%% input file from the ones added by epp unless some action is taken. +%% The (less than perfect) solution employed is to let epp assign +%% negative line numbers to user supplied -file attributes. + +%% Note: it is assumed that the second element is a line or a key-list +%% where 'line' can be found. + +interpret_file_attribute(Forms) -> + interpret_file_attr(Forms, 0, []). + +interpret_file_attr([{attribute,Loc,file,{File,Line}}=Form | Forms], + Delta, Fs) -> + {line, L} = erl_scan:attributes_info(Loc, line), + if + L < 0 -> + %% -file attribute + interpret_file_attr(Forms, (abs(L) + Delta) - Line, Fs); + true -> + %% -include or -include_lib + % true = L =:= Line, + case Fs of + [_, Delta1, File | Fs1] -> % end of included file + [Form | interpret_file_attr(Forms, Delta1, [File | Fs1])]; + _ -> % start of included file + [Form | interpret_file_attr(Forms, 0, [File, Delta | Fs])] + end + end; +interpret_file_attr([Form0 | Forms], Delta, Fs) -> + F = fun(Attrs) -> + F2 = fun(L) -> abs(L) + Delta end, + erl_scan:set_attribute(line, Attrs, F2) + end, + Form = erl_lint:modify_line(Form0, F), + [Form | interpret_file_attr(Forms, Delta, Fs)]; +interpret_file_attr([], _Delta, _Fs) -> + []. + diff --git a/lib/stdlib/src/erl_bits.erl b/lib/stdlib/src/erl_bits.erl new file mode 100644 index 0000000000..62f6d00fae --- /dev/null +++ b/lib/stdlib/src/erl_bits.erl @@ -0,0 +1,186 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1999-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(erl_bits). + +-export([system_bittypes/0, + system_bitdefault/0, + set_bit_type/2, + as_list/1]). + +-include("../include/erl_bits.hrl"). + +%% Dummies. + +-spec system_bitdefault() -> 'no_system_bitdefault'. + +system_bitdefault() -> no_system_bitdefault. + +-spec system_bittypes() -> 'no_system_types'. + +system_bittypes() -> no_system_types. + +-spec as_list(#bittype{}) -> + [bt_endian() | bt_sign() | bt_type() | {'unit', 'undefined' | bt_unit()}]. + +as_list(Bt) -> + [Bt#bittype.type,{unit,Bt#bittype.unit},Bt#bittype.sign,Bt#bittype.endian]. + +%% XXX: tuple() below stands for what's produced by the parser +%% {integer,L,M} | {var,L,VAR} | {atom,L,ATOM} | {op,L,OP,OP1,OP2} | ... +-type size() :: 'all' | 'unknown' | non_neg_integer() | tuple(). % XXX: REFINE +-type type() :: 'bytes' | 'bitstring' | 'bits' + | bt_type() | bt_endian() | bt_sign() + | {'unit', 'undefined' | bt_unit()}. + +-spec set_bit_type('default' | size(), 'default' | [type()]) -> + {'ok', 'undefined' | size(), #bittype{}} | + {'error', {'undefined_bittype', term()}} | + {'error', {'bittype_mismatch', term(), term(), string()}}. + +set_bit_type(Size, default) -> + set_bit_type(Size, []); +set_bit_type(Size, TypeList) -> + try + #bittype{type=Type,unit=Unit,sign=Sign,endian=Endian} = + set_bit(TypeList), + apply_defaults(Type, Size, Unit, Sign, Endian) + catch + throw:Error -> Error + end. + +set_bit([]) -> #bittype{}; +set_bit([H|T]) -> set_bit_1(T, type_to_record(H)). + +set_bit_1([T0|Ts], Bt0) -> + Type = type_to_record(T0), + Bt = merge_bittype(Type, Bt0), + set_bit_1(Ts, Bt); +set_bit_1([], Bt) -> Bt. + +type_to_record(integer) -> #bittype{type = integer}; +type_to_record(utf8) -> #bittype{type = utf8}; +type_to_record(utf16) -> #bittype{type = utf16}; +type_to_record(utf32) -> #bittype{type = utf32}; +type_to_record(float) -> #bittype{type = float}; +type_to_record(binary) -> #bittype{type = binary}; +type_to_record(bytes) -> #bittype{type = binary, unit = 8}; +type_to_record(bitstring) -> #bittype{type = binary, unit = 1}; +type_to_record(bits) -> #bittype{type = binary, unit = 1}; + +type_to_record({unit,undefined}) -> + #bittype{unit=undefined}; +type_to_record({unit,Sz}) when is_integer(Sz), Sz > 0, Sz =< 256 -> + #bittype{unit=Sz}; + +type_to_record(big) -> #bittype{endian = big}; +type_to_record(little) -> #bittype{endian = little}; +type_to_record(native) -> #bittype{endian = native}; + +type_to_record(signed) -> #bittype{sign = signed}; +type_to_record(unsigned) -> #bittype{sign = unsigned}; + +type_to_record(Name) -> throw({error,{undefined_bittype,Name}}). + +%% +%% Merge two bit type specifications. +%% +merge_bittype(B1, B2) -> + Endian = merge_field(B1#bittype.endian, B2#bittype.endian, endianness), + Sign = merge_field(B1#bittype.sign, B2#bittype.sign, sign), + Type = merge_field(B1#bittype.type, B2#bittype.type, type), + Unit = merge_field(B1#bittype.unit, B2#bittype.unit, unit), + #bittype{type=Type,unit=Unit,endian=Endian,sign=Sign}. + +merge_field(undefined, B, _) -> B; +merge_field(A, undefined, _) -> A; +merge_field(A, A, _) -> A; +merge_field(X, Y, What) -> + throw({error,{bittype_mismatch,X,Y,atom_to_list(What)}}). + +%% +%% Defaults are as follows. +%% +%% The default is integer. +%% The default size is 'all' for binaries, 8 for integers, 64 for floats. +%% No unit must be given if the size is not given. +%% The default unit size is 8 for binaries, and 1 for integers and floats. +%% The default sign is always unsigned. +%% The default endian is always big. +%% + +apply_defaults(undefined, Size, Unit, Sign, Endian) -> %default type + apply_defaults(integer, Size, Unit, Sign, Endian); + +apply_defaults(binary, default, Unit, Sign, Endian) -> %default size + %% check_unit(Unit), removed to allow bitlevel binaries + apply_defaults(binary, all, Unit, Sign, Endian); +apply_defaults(integer, default, Unit, Sign, Endian) -> + check_unit(Unit), + apply_defaults(integer, 8, 1, Sign, Endian); +apply_defaults(utf8=Type, default, Unit, Sign, Endian) -> + apply_defaults(Type, undefined, Unit, Sign, Endian); +apply_defaults(utf16=Type, default, Unit, Sign, Endian) -> + apply_defaults(Type, undefined, Unit, Sign, Endian); +apply_defaults(utf32=Type, default, Unit, Sign, Endian) -> + apply_defaults(Type, undefined, Unit, Sign, Endian); +apply_defaults(float, default, Unit, Sign, Endian) -> + check_unit(Unit), + apply_defaults(float, 64, 1, Sign, Endian); + +apply_defaults(binary, Size, undefined, Sign, Endian) -> %default unit + apply_defaults(binary, Size, 8, Sign, Endian); +apply_defaults(integer, Size, undefined, Sign, Endian) -> + apply_defaults(integer, Size, 1, Sign, Endian); +apply_defaults(float, Size, undefined, Sign, Endian) -> + apply_defaults(float, Size, 1, Sign, Endian); + +apply_defaults(Type, Size, Unit, undefined, Endian) -> %default sign + apply_defaults(Type, Size, Unit, unsigned, Endian); + +apply_defaults(Type, Size, Unit, Sign, undefined) -> %default endian + apply_defaults(Type, Size, Unit, Sign, big); + +apply_defaults(Type, Size, Unit, Sign, Endian) -> %done + check_size_unit(Type, Size, Unit), + {ok,Size,#bittype{type=Type,unit=Unit,sign=Sign,endian=Endian}}. + +check_size_unit(utf8, Size, Unit) -> + check_size_unit_1(Size, Unit); +check_size_unit(utf16, Size, Unit) -> + check_size_unit_1(Size, Unit); +check_size_unit(utf32, Size, Unit) -> + check_size_unit_1(Size, Unit); +check_size_unit(_, _, _) -> ok. + +check_size_unit_1(Size, Unit) -> + case Size of + default -> ok; + undefined -> ok; + {atom,_,undefined} -> ok; + {value,_,undefined} -> ok; + _ -> throw({error,utf_bittype_size_or_unit}) + end, + case Unit of + undefined -> ok; + _ -> throw({error,utf_bittype_size_or_unit}) + end. + +check_unit(undefined) -> ok; +check_unit(_) -> throw({error,bittype_unit}). diff --git a/lib/stdlib/src/erl_compile.erl b/lib/stdlib/src/erl_compile.erl new file mode 100644 index 0000000000..d9d15e05f8 --- /dev/null +++ b/lib/stdlib/src/erl_compile.erl @@ -0,0 +1,233 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(erl_compile). + +-include("erl_compile.hrl"). +-include("file.hrl"). + +-export([compile_cmdline/1]). + +%% Mapping from extension to {M,F} to run the correct compiler. + +compiler(".erl") -> {compile, compile}; +compiler(".S") -> {compile, compile_asm}; +compiler(".beam") -> {compile, compile_beam}; +compiler(".core") -> {compile, compile_core}; +compiler(".mib") -> {snmpc, compile}; +compiler(".bin") -> {snmpc, mib_to_hrl}; +compiler(".xrl") -> {leex, compile}; +compiler(".yrl") -> {yecc, compile}; +compiler(".script") -> {systools, script2boot}; +compiler(".rel") -> {systools, compile_rel}; +compiler(".idl") -> {ic, compile}; +compiler(".asn1") -> {asn1ct, compile_asn1}; +compiler(".asn") -> {asn1ct, compile_asn}; +compiler(".py") -> {asn1ct, compile_py}; +compiler(".xml") -> {xmerl_scan, process}; +compiler(_) -> no. + +%% Entry from command line. + +-type cmd_line_arg() :: atom() | string(). + +-spec compile_cmdline([cmd_line_arg()]) -> no_return(). + +compile_cmdline(List) -> + case compile(List) of + ok -> my_halt(0); + error -> my_halt(1); + _ -> my_halt(2) + end. + +my_halt(Reason) -> + case process_info(group_leader(), status) of + {_,waiting} -> + %% Now all output data is down in the driver. + %% Give the driver some extra time before halting. + receive after 1 -> ok end, + halt(Reason); + _ -> + %% Probably still processing I/O requests. + erlang:yield(), + my_halt(Reason) + end. + +%% Run the the compiler in a separate process, trapping EXITs. + +compile(List) -> + process_flag(trap_exit, true), + Pid = spawn_link(fun() -> compiler_runner(List) end), + receive + {'EXIT', Pid, {compiler_result, Result}} -> + Result; + {'EXIT', Pid, Reason} -> + io:format("Runtime error: ~p~n", [Reason]), + error + end. + +-spec compiler_runner([cmd_line_arg()]) -> no_return(). + +compiler_runner(List) -> + %% We don't want the current directory in the code path. + %% Remove it. + Path = [D || D <- code:get_path(), D =/= "."], + true = code:set_path(Path), + exit({compiler_result, compile1(List)}). + +%% Parses the first part of the option list. + +compile1(['@cwd', Cwd|Rest]) -> + CwdL = atom_to_list(Cwd), + compile1(Rest, CwdL, #options{outdir=CwdL, cwd=CwdL}); +compile1(Args) -> + %% From R13B02, the @cwd argument is optional. + {ok, Cwd} = file:get_cwd(), + compile1(Args, Cwd, #options{outdir=Cwd, cwd=Cwd}). + +%% Parses all options. + +compile1(['@i', Dir|Rest], Cwd, Opts) -> + AbsDir = filename:absname(Dir, Cwd), + compile1(Rest, Cwd, Opts#options{includes=[AbsDir|Opts#options.includes]}); +compile1(['@outdir', Dir|Rest], Cwd, Opts) -> + AbsName = filename:absname(Dir, Cwd), + case file_or_directory(AbsName) of + file -> + compile1(Rest, Cwd, Opts#options{outfile=AbsName}); + directory -> + compile1(Rest, Cwd, Opts#options{outdir=AbsName}) + end; +compile1(['@d', Name|Rest], Cwd, Opts) -> + Defines = Opts#options.defines, + compile1(Rest, Cwd, Opts#options{defines=[Name|Defines]}); +compile1(['@dv', Name, Term|Rest], Cwd, Opts) -> + Defines = Opts#options.defines, + Value = make_term(atom_to_list(Term)), + compile1(Rest, Cwd, Opts#options{defines=[{Name, Value}|Defines]}); +compile1(['@warn', Level0|Rest], Cwd, Opts) -> + case catch list_to_integer(atom_to_list(Level0)) of + Level when is_integer(Level) -> + compile1(Rest, Cwd, Opts#options{warning=Level}); + _ -> + compile1(Rest, Cwd, Opts) + end; +compile1(['@verbose', false|Rest], Cwd, Opts) -> + compile1(Rest, Cwd, Opts#options{verbose=false}); +compile1(['@verbose', true|Rest], Cwd, Opts) -> + compile1(Rest, Cwd, Opts#options{verbose=true}); +compile1(['@optimize', Atom|Rest], Cwd, Opts) -> + Term = make_term(atom_to_list(Atom)), + compile1(Rest, Cwd, Opts#options{optimize=Term}); +compile1(['@option', Atom|Rest], Cwd, Opts) -> + Term = make_term(atom_to_list(Atom)), + Specific = Opts#options.specific, + compile1(Rest, Cwd, Opts#options{specific=[Term|Specific]}); +compile1(['@output_type', OutputType|Rest], Cwd, Opts) -> + compile1(Rest, Cwd, Opts#options{output_type=OutputType}); +compile1(['@files'|Rest], Cwd, Opts) -> + Includes = lists:reverse(Opts#options.includes), + compile2(Rest, Cwd, Opts#options{includes=Includes}). + +compile2(Files, Cwd, Opts) -> + case {Opts#options.outfile, length(Files)} of + {"", _} -> + compile3(Files, Cwd, Opts); + {[_|_], 1} -> + compile3(Files, Cwd, Opts); + {[_|_], _N} -> + io:format("Output file name given, but more than one input file.~n"), + error + end. + +%% Compiles the list of files, until done or compilation fails. + +compile3([File|Rest], Cwd, Options) -> + Ext = filename:extension(File), + Root = filename:rootname(File), + InFile = filename:absname(Root, Cwd), + OutFile = + case Options#options.outfile of + "" -> + filename:join(Options#options.outdir, filename:basename(Root)); + Outfile -> + filename:rootname(Outfile) + end, + case compile_file(Ext, InFile, OutFile, Options) of + ok -> + compile3(Rest, Cwd, Options); + Other -> + Other + end; +compile3([], _Cwd, _Options) -> ok. + +%% Invokes the appropriate compiler, depending on the file extension. + +compile_file("", Input, _Output, _Options) -> + io:format("File has no extension: ~s~n", [Input]), + error; +compile_file(Ext, Input, Output, Options) -> + case compiler(Ext) of + no -> + io:format("Unknown extension: '~s'\n", [Ext]), + error; + {M, F} -> + case catch M:F(Input, Output, Options) of + ok -> ok; + error -> error; + {'EXIT',Reason} -> + io:format("Compiler function ~w:~w/3 failed:\n~p~n", + [M,F,Reason]), + error; + Other -> + io:format("Compiler function ~w:~w/3 returned:\n~p~n", + [M,F,Other]), + error + end + end. + +%% Guesses if a give name refers to a file or a directory. + +file_or_directory(Name) -> + case file:read_file_info(Name) of + {ok, #file_info{type=regular}} -> + file; + {ok, _} -> + directory; + {error, _} -> + case filename:extension(Name) of + [] -> directory; + _Other -> file + end + end. + +%% Makes an Erlang term given a string. + +make_term(Str) -> + case erl_scan:string(Str) of + {ok, Tokens, _} -> + case erl_parse:parse_term(Tokens ++ [{dot, 1}]) of + {ok, Term} -> Term; + {error, {_,_,Reason}} -> + io:format("~s: ~s~n", [Reason, Str]), + throw(error) + end; + {error, {_,_,Reason}, _} -> + io:format("~s: ~s~n", [Reason, Str]), + throw(error) + end. diff --git a/lib/stdlib/src/erl_eval.erl b/lib/stdlib/src/erl_eval.erl new file mode 100644 index 0000000000..ea1b179ee5 --- /dev/null +++ b/lib/stdlib/src/erl_eval.erl @@ -0,0 +1,1108 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(erl_eval). + +%% An evaluator for Erlang abstract syntax. + +-export([exprs/2,exprs/3,exprs/4,expr/2,expr/3,expr/4,expr/5, + expr_list/2,expr_list/3,expr_list/4]). +-export([new_bindings/0,bindings/1,binding/2,add_binding/3,del_binding/2]). + +-export([is_constant_expr/1, partial_eval/1]). + +%% Is used by standalone Erlang (escript). +%% Also used by shell.erl. +-export([match_clause/4]). + +-export([check_command/2, fun_data/1]). + +-import(lists, [reverse/1,foldl/3,member/2]). + +%% exprs(ExpressionSeq, Bindings) +%% exprs(ExpressionSeq, Bindings, LocalFuncHandler) +%% exprs(ExpressionSeq, Bindings, LocalFuncHandler, ExternalFuncHandler) +%% Returns: +%% {value,Value,NewBindings} +%% or {'EXIT', Reason} +%% Only exprs/2 checks the command by calling erl_lint. The reason is +%% that if there is a function handler present, then it is possible +%% that there are valid constructs in Expression to be taken care of +%% by a function handler but considerad errors by erl_lint. + +exprs(Exprs, Bs) -> + case check_command(Exprs, Bs) of + ok -> + exprs(Exprs, Bs, none, none, none); + {error,{_Line,_Mod,Error}} -> + erlang:raise(error, Error, [{?MODULE,exprs,2}]) + end. + +exprs(Exprs, Bs, Lf) -> + exprs(Exprs, Bs, Lf, none, none). + +exprs(Exprs, Bs, Lf, Ef) -> + exprs(Exprs, Bs, Lf, Ef, none). + +exprs([E], Bs0, Lf, Ef, RBs) -> + expr(E, Bs0, Lf, Ef, RBs); +exprs([E|Es], Bs0, Lf, Ef, RBs) -> + RBs1 = none, + {value,_V,Bs} = expr(E, Bs0, Lf, Ef, RBs1), + exprs(Es, Bs, Lf, Ef, RBs). + +%% expr(Expression, Bindings) +%% expr(Expression, Bindings, LocalFuncHandler) +%% expr(Expression, Bindings, LocalFuncHandler, ExternalFuncHandler) +%% Returns: +%% {value,Value,NewBindings} +%% or {'EXIT', Reason} +%% +%% Only expr/2 checks the command by calling erl_lint. See exprs/2. + +expr(E, Bs) -> + case check_command([E], Bs) of + ok -> + expr(E, Bs, none, none, none); + {error,{_Line,_Mod,Error}} -> + erlang:raise(error, Error, [{?MODULE,expr,2}]) + end. + +expr(E, Bs, Lf) -> + expr(E, Bs, Lf, none, none). + +expr(E, Bs, Lf, Ef) -> + expr(E, Bs, Lf, Ef, none). + +%% Check a command (a list of expressions) by calling erl_lint. + +check_command(Es, Bs) -> + Opts = [bitlevel_binaries,binary_comprehension], + case erl_lint:exprs_opt(Es, bindings(Bs), Opts) of + {ok,_Ws} -> + ok; + {error,[{_File,[Error|_]}],_Ws} -> + {error,Error} + end. + +%% Check whether a term F is a function created by this module. +%% Returns 'false' if not, otherwise {fun_data,Imports,Clauses}. + +fun_data(F) when is_function(F) -> + case erlang:fun_info(F, module) of + {module,erl_eval} -> + {env, [FBs,_FEf,_FLf,FCs]} = erlang:fun_info(F, env), + {fun_data,FBs,FCs}; + _ -> + false + end; +fun_data(_T) -> + false. + +expr({var,_,V}, Bs, _Lf, _Ef, RBs) -> + case binding(V, Bs) of + {value,Val} -> + ret_expr(Val, Bs, RBs); + unbound -> % Should not happen. + erlang:raise(error, {unbound,V}, stacktrace()) + end; +expr({char,_,C}, Bs, _Lf, _Ef, RBs) -> + ret_expr(C, Bs, RBs); +expr({integer,_,I}, Bs, _Lf, _Ef, RBs) -> + ret_expr(I, Bs, RBs); +expr({float,_,F}, Bs, _Lf, _Ef, RBs) -> + ret_expr(F, Bs, RBs); +expr({atom,_,A}, Bs, _Lf, _Ef, RBs) -> + ret_expr(A, Bs, RBs); +expr({string,_,S}, Bs, _Lf, _Ef, RBs) -> + ret_expr(S, Bs, RBs); +expr({nil, _}, Bs, _Lf, _Ef, RBs) -> + ret_expr([], Bs, RBs); +expr({cons,_,H0,T0}, Bs0, Lf, Ef, RBs) -> + {value,H,Bs1} = expr(H0, Bs0, Lf, Ef, none), + {value,T,Bs2} = expr(T0, Bs0, Lf, Ef, none), + ret_expr([H|T], merge_bindings(Bs1, Bs2), RBs); +expr({lc,_,E,Qs}, Bs, Lf, Ef, RBs) -> + eval_lc(E, Qs, Bs, Lf, Ef, RBs); +expr({bc,_,E,Qs}, Bs, Lf, Ef, RBs) -> + eval_bc(E, Qs, Bs, Lf, Ef, RBs); +expr({tuple,_,Es}, Bs0, Lf, Ef, RBs) -> + {Vs,Bs} = expr_list(Es, Bs0, Lf, Ef), + ret_expr(list_to_tuple(Vs), Bs, RBs); +expr({record_field,_,_,_}=Mod, Bs, _Lf, _Ef, RBs) -> + case expand_module_name(Mod, Bs) of + {atom,_,A} -> + ret_expr(A, Bs, RBs); %% This is the "x.y" syntax + _ -> + erlang:raise(error, {badexpr, '.'}, stacktrace()) + end; +expr({record_field,_,_,Name,_}, _Bs, _Lf, _Ef, _RBs) -> + erlang:raise(error, {undef_record,Name}, stacktrace()); +expr({record_index,_,Name,_}, _Bs, _Lf, _Ef, _RBs) -> + erlang:raise(error, {undef_record,Name}, stacktrace()); +expr({record,_,Name,_}, _Bs, _Lf, _Ef, _RBs) -> + erlang:raise(error, {undef_record,Name}, stacktrace()); +expr({record,_,_,Name,_}, _Bs, _Lf, _Ef, _RBs) -> + erlang:raise(error, {undef_record,Name}, stacktrace()); +expr({block,_,Es}, Bs, Lf, Ef, RBs) -> + exprs(Es, Bs, Lf, Ef, RBs); +expr({'if',_,Cs}, Bs, Lf, Ef, RBs) -> + if_clauses(Cs, Bs, Lf, Ef, RBs); +expr({'case',_,E,Cs}, Bs0, Lf, Ef, RBs) -> + {value,Val,Bs} = expr(E, Bs0, Lf, Ef, none), + case_clauses(Val, Cs, Bs, Lf, Ef, RBs); +expr({'try',_,B,Cases,Catches,AB}, Bs, Lf, Ef, RBs) -> + try_clauses(B, Cases, Catches, AB, Bs, Lf, Ef, RBs); +expr({'receive',_,Cs}, Bs, Lf, Ef, RBs) -> + receive_clauses(Cs, Bs, Lf, Ef, [], RBs); +expr({'receive',_, Cs, E, TB}, Bs0, Lf, Ef, RBs) -> + {value,T,Bs} = expr(E, Bs0, Lf, Ef, none), + receive_clauses(T, Cs, {TB,Bs}, Bs0, Lf, Ef, [], RBs); +expr({'fun',_Line,{function,Mod,Name,Arity}}, Bs, _Lf, _Ef, RBs) -> + F = erlang:make_fun(Mod, Name, Arity), + ret_expr(F, Bs, RBs); +expr({'fun',_Line,{function,Name,Arity}}, _Bs0, _Lf, _Ef, _RBs) -> % R8 + %% Don't know what to do... + erlang:raise(error, undef, [{erl_eval,Name,Arity}|stacktrace()]); +expr({'fun',Line,{clauses,Cs}} = Ex, Bs, Lf, Ef, RBs) -> + %% Save only used variables in the function environment. + %% {value,L,V} are hidden while lint finds used variables. + {Ex1, _} = hide_calls(Ex, 0), + {ok,Used} = erl_lint:used_vars([Ex1], Bs), + En = orddict:filter(fun(K,_V) -> member(K,Used) end, Bs), + %% This is a really ugly hack! + F = + case length(element(3,hd(Cs))) of + 0 -> fun () -> eval_fun(Cs, [], En, Lf, Ef) end; + 1 -> fun (A) -> eval_fun(Cs, [A], En, Lf, Ef) end; + 2 -> fun (A,B) -> eval_fun(Cs, [A,B], En, Lf, Ef) end; + 3 -> fun (A,B,C) -> eval_fun(Cs, [A,B,C], En, Lf, Ef) end; + 4 -> fun (A,B,C,D) -> eval_fun(Cs, [A,B,C,D], En, Lf, Ef) end; + 5 -> fun (A,B,C,D,E) -> eval_fun(Cs, [A,B,C,D,E], En, Lf, Ef) end; + 6 -> fun (A,B,C,D,E,F) -> eval_fun(Cs, [A,B,C,D,E,F], En, Lf, Ef) end; + 7 -> fun (A,B,C,D,E,F,G) -> + eval_fun(Cs, [A,B,C,D,E,F,G], En, Lf, Ef) end; + 8 -> fun (A,B,C,D,E,F,G,H) -> + eval_fun(Cs, [A,B,C,D,E,F,G,H], En, Lf, Ef) end; + 9 -> fun (A,B,C,D,E,F,G,H,I) -> + eval_fun(Cs, [A,B,C,D,E,F,G,H,I], En, Lf, Ef) end; + 10 -> fun (A,B,C,D,E,F,G,H,I,J) -> + eval_fun(Cs, [A,B,C,D,E,F,G,H,I,J], En, Lf, Ef) end; + 11 -> fun (A,B,C,D,E,F,G,H,I,J,K) -> + eval_fun(Cs, [A,B,C,D,E,F,G,H,I,J,K], En, Lf, Ef) end; + 12 -> fun (A,B,C,D,E,F,G,H,I,J,K,L) -> + eval_fun(Cs, [A,B,C,D,E,F,G,H,I,J,K,L], En, Lf, Ef) end; + 13 -> fun (A,B,C,D,E,F,G,H,I,J,K,L,M) -> + eval_fun(Cs, [A,B,C,D,E,F,G,H,I,J,K,L,M], En, Lf, Ef) end; + 14 -> fun (A,B,C,D,E,F,G,H,I,J,K,L,M,N) -> + eval_fun(Cs, [A,B,C,D,E,F,G,H,I,J,K,L,M,N], En, Lf, Ef) end; + 15 -> fun (A,B,C,D,E,F,G,H,I,J,K,L,M,N,O) -> + eval_fun(Cs, [A,B,C,D,E,F,G,H,I,J,K,L,M,N,O], En, Lf, Ef) end; + 16 -> fun (A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P) -> + eval_fun(Cs, [A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P], En, Lf, Ef) end; + 17 -> fun (A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q) -> + eval_fun(Cs, [A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q], En, Lf, Ef) end; + 18 -> fun (A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R) -> + eval_fun(Cs, [A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R], En, Lf, Ef) end; + 19 -> fun (A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S) -> + eval_fun(Cs, [A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S], + En, Lf, Ef) end; + 20 -> fun (A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T) -> + eval_fun(Cs, [A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T], + En, Lf, Ef) end; + _Other -> + erlang:raise(error, {'argument_limit',{'fun',Line,Cs}}, + stacktrace()) + end, + ret_expr(F, Bs, RBs); +expr({call,_,{remote,_,{atom,_,qlc},{atom,_,q}},[{lc,_,_E,_Qs}=LC | As0]}, + Bs0, Lf, Ef, RBs) when length(As0) =< 1 -> + %% No expansion or evaluation of module name or function name. + MaxLine = find_maxline(LC), + {LC1, D} = hide_calls(LC, MaxLine), + case qlc:transform_from_evaluator(LC1, Bs0) of + {ok,{call,L,Remote,[QLC]}} -> + QLC1 = unhide_calls(QLC, MaxLine, D), + expr({call,L,Remote,[QLC1 | As0]}, Bs0, Lf, Ef, RBs); + {not_ok,Error} -> + ret_expr(Error, Bs0, RBs) + end; +expr({call,L1,{remote,L2,{record_field,_,{atom,_,''},{atom,_,qlc}=Mod}, + {atom,_,q}=Func}, + [{lc,_,_E,_Qs} | As0]=As}, + Bs, Lf, Ef, RBs) when length(As0) =< 1 -> + expr({call,L1,{remote,L2,Mod,Func},As}, Bs, Lf, Ef, RBs); +expr({call,_,{remote,_,Mod,Func},As0}, Bs0, Lf, Ef, RBs) -> + Mod1 = expand_module_name(Mod, Bs0), + {value,M,Bs1} = expr(Mod1, Bs0, Lf, Ef, none), + {value,F,Bs2} = expr(Func, Bs0, Lf, Ef, none), + {As,Bs3} = expr_list(As0, merge_bindings(Bs1, Bs2), Lf, Ef), + %% M could be a parameterized module (not an atom). + case is_atom(M) andalso erl_internal:bif(M, F, length(As)) of + true -> + bif(F, As, Bs3, Ef, RBs); + false -> + do_apply({M,F}, As, Bs3, Ef, RBs) + end; +expr({call,_,{atom,_,Func},As0}, Bs0, Lf, Ef, RBs) -> + case erl_internal:bif(Func, length(As0)) of + true -> + {As,Bs} = expr_list(As0, Bs0, Lf, Ef), + bif(Func, As, Bs, Ef, RBs); + false -> + local_func(Func, As0, Bs0, Lf, RBs) + end; +expr({call,_,Func0,As0}, Bs0, Lf, Ef, RBs) -> % function or {Mod,Fun} + {value,Func,Bs1} = expr(Func0, Bs0, Lf, Ef, none), + {As,Bs2} = expr_list(As0, Bs1, Lf, Ef), + do_apply(Func, As, Bs2, Ef, RBs); +expr({'catch',_,Expr}, Bs0, Lf, Ef, RBs) -> + Ref = make_ref(), + case catch {Ref,expr(Expr, Bs0, Lf, Ef, none)} of + {Ref,{value,V,Bs}} -> % Nothing was thrown (guaranteed). + ret_expr(V, Bs, RBs); + Other -> + ret_expr(Other, Bs0, RBs) + end; +expr({match,_,Lhs,Rhs0}, Bs0, Lf, Ef, RBs) -> + {value,Rhs,Bs1} = expr(Rhs0, Bs0, Lf, Ef, none), + case match(Lhs, Rhs, Bs1) of + {match,Bs} -> + ret_expr(Rhs, Bs, RBs); + nomatch -> + erlang:raise(error, {badmatch,Rhs}, stacktrace()) + end; +expr({op,_,Op,A0}, Bs0, Lf, Ef, RBs) -> + {value,A,Bs} = expr(A0, Bs0, Lf, Ef, none), + eval_op(Op, A, Bs, Ef, RBs); +expr({op,_,'andalso',L0,R0}, Bs0, Lf, Ef, RBs) -> + {value,L,Bs1} = expr(L0, Bs0, Lf, Ef, none), + V = case L of + true -> + {value,R,_} = expr(R0, Bs1, Lf, Ef, none), + R; + false -> false; + _ -> erlang:raise(error, {badarg,L}, stacktrace()) + end, + ret_expr(V, Bs1, RBs); +expr({op,_,'orelse',L0,R0}, Bs0, Lf, Ef, RBs) -> + {value,L,Bs1} = expr(L0, Bs0, Lf, Ef, none), + V = case L of + true -> true; + false -> + {value,R,_} = expr(R0, Bs1, Lf, Ef, none), + R; + _ -> erlang:raise(error, {badarg,L}, stacktrace()) + end, + ret_expr(V, Bs1, RBs); +expr({op,_,Op,L0,R0}, Bs0, Lf, Ef, RBs) -> + {value,L,Bs1} = expr(L0, Bs0, Lf, Ef, none), + {value,R,Bs2} = expr(R0, Bs0, Lf, Ef, none), + eval_op(Op, L, R, merge_bindings(Bs1, Bs2), Ef, RBs); +expr({bin,_,Fs}, Bs0, Lf, Ef, RBs) -> + EvalFun = fun(E, B) -> expr(E, B, Lf, Ef, none) end, + {value,V,Bs} = eval_bits:expr_grp(Fs, Bs0, EvalFun), + ret_expr(V, Bs, RBs); +expr({remote,_,_,_}, _Bs, _Lf, _Ef, _RBs) -> + erlang:raise(error, {badexpr,':'}, stacktrace()); +expr({value,_,Val}, Bs, _Lf, _Ef, RBs) -> % Special case straight values. + ret_expr(Val, Bs, RBs). + +find_maxline(LC) -> + put('$erl_eval_max_line', 0), + F = fun(L) -> + case is_integer(L) and (L > get('$erl_eval_max_line')) of + true -> put('$erl_eval_max_line', L); + false -> ok + end end, + _ = erl_lint:modify_line(LC, F), + erase('$erl_eval_max_line'). + +hide_calls(LC, MaxLine) -> + LineId0 = MaxLine + 1, + {NLC, _, D} = hide(LC, LineId0, dict:new()), + {NLC, D}. + +%% v/1 and local calls are hidden. +hide({value,L,V}, Id, D) -> + {{atom,Id,ok}, Id+1, dict:store(Id, {value,L,V}, D)}; +hide({call,L,{atom,_,N}=Atom,Args}, Id0, D0) -> + {NArgs, Id, D} = hide(Args, Id0, D0), + C = case erl_internal:bif(N, length(Args)) of + true -> + {call,L,Atom,NArgs}; + false -> + {call,Id,{remote,L,{atom,L,m},{atom,L,f}},NArgs} + end, + {C, Id+1, dict:store(Id, {call,Atom}, D)}; +hide(T0, Id0, D0) when is_tuple(T0) -> + {L, Id, D} = hide(tuple_to_list(T0), Id0, D0), + {list_to_tuple(L), Id, D}; +hide([E0 | Es0], Id0, D0) -> + {E, Id1, D1} = hide(E0, Id0, D0), + {Es, Id, D} = hide(Es0, Id1, D1), + {[E | Es], Id, D}; +hide(E, Id, D) -> + {E, Id, D}. + +unhide_calls({atom,Id,ok}, MaxLine, D) when Id > MaxLine -> + dict:fetch(Id, D); +unhide_calls({call,Id,{remote,L,_M,_F},Args}, MaxLine, D) when Id > MaxLine -> + {call,Atom} = dict:fetch(Id, D), + {call,L,Atom,unhide_calls(Args, MaxLine, D)}; +unhide_calls(T, MaxLine, D) when is_tuple(T) -> + list_to_tuple(unhide_calls(tuple_to_list(T), MaxLine, D)); +unhide_calls([E | Es], MaxLine, D) -> + [unhide_calls(E, MaxLine, D) | unhide_calls(Es, MaxLine, D)]; +unhide_calls(E, _MaxLine, _D) -> + E. + +%% local_func(Function, Arguments, Bindings, LocalFuncHandler, RBs) -> +%% {value,Value,Bindings} | Value when +%% LocalFuncHandler = {value,F} | {value,F,Eas} | +%% {eval,F} | {eval,F,Eas} | none. + +local_func(Func, As0, Bs0, {value,F}, value) -> + {As1,_Bs1} = expr_list(As0, Bs0, {value,F}), + %% Make tail recursive calls when possible. + F(Func, As1); +local_func(Func, As0, Bs0, {value,F}, RBs) -> + {As1,Bs1} = expr_list(As0, Bs0, {value,F}), + ret_expr(F(Func, As1), Bs1, RBs); +local_func(Func, As0, Bs0, {value,F,Eas}, value) -> + {As1,_Bs1} = expr_list(As0, Bs0, {value,F,Eas}), + apply(F, [Func,As1|Eas]); +local_func(Func, As0, Bs0, {value,F,Eas}, RBs) -> + {As1,Bs1} = expr_list(As0, Bs0, {value,F,Eas}), + ret_expr(apply(F, [Func,As1|Eas]), Bs1, RBs); +local_func(Func, As, Bs, {eval,F}, RBs) -> + local_func2(F(Func, As, Bs), RBs); +local_func(Func, As, Bs, {eval,F,Eas}, RBs) -> + local_func2(apply(F, [Func,As,Bs|Eas]), RBs); +%% These two clauses are for backwards compatibility. +local_func(Func, As0, Bs0, {M,F}, RBs) -> + {As1,Bs1} = expr_list(As0, Bs0, {M,F}), + ret_expr(M:F(Func,As1), Bs1, RBs); +local_func(Func, As, _Bs, {M,F,Eas}, RBs) -> + local_func2(apply(M, F, [Func,As|Eas]), RBs); +%% Default unknown function handler to undefined function. +local_func(Func, As0, _Bs0, none, _RBs) -> + erlang:raise(error, undef, [{erl_eval,Func,length(As0)}|stacktrace()]). + +local_func2({value,V,Bs}, RBs) -> + ret_expr(V, Bs, RBs); +local_func2({eval,F,As,Bs}, RBs) -> % This reply is not documented. + %% The shell found F. erl_eval tries to do a tail recursive call, + %% something the shell cannot do. Do not use Ef here. + do_apply(F, As, Bs, none, RBs). + +%% bif(Name, Arguments, RBs) +%% Evaluate the Erlang auto-imported function Name. erlang:apply/2,3 +%% are "hidden" from the external function handler. + +bif(apply, [erlang,apply,As], Bs, Ef, RBs) -> + bif(apply, As, Bs, Ef, RBs); +bif(apply, [M,F,As], Bs, Ef, RBs) -> + do_apply({M,F}, As, Bs, Ef, RBs); +bif(apply, [F,As], Bs, Ef, RBs) -> + do_apply(F, As, Bs, Ef, RBs); +bif(Name, As, Bs, Ef, RBs) -> + do_apply({erlang,Name}, As, Bs, Ef, RBs). + +%% do_apply(MF, Arguments, Bindings, ExternalFuncHandler, RBs) -> +%% {value,Value,Bindings} | Value when +%% ExternalFuncHandler = {value,F} | none. +%% MF is a tuple {Module,Function} or a fun. + +do_apply({M,F}=Func, As, Bs0, Ef, RBs) + when tuple_size(M) >= 1, is_atom(element(1, M)), is_atom(F) -> + case Ef of + none when RBs =:= value -> + %% Make tail recursive calls when possible. + apply(M, F, As); + none -> + ret_expr(apply(M, F, As), Bs0, RBs); + {value,Fun} when RBs =:= value -> + Fun(Func, As); + {value,Fun} -> + ret_expr(Fun(Func, As), Bs0, RBs) + end; +do_apply(Func, As, Bs0, Ef, RBs) -> + Env = if + is_function(Func) -> + case {erlang:fun_info(Func, module), + erlang:fun_info(Func, env)} of + {{module,?MODULE},{env,Env1}} when Env1 =/= [] -> + {env,Env1}; + _ -> + no_env + end; + true -> + no_env + end, + case {Env,Ef} of + {{env,[FBs, FEf, FLf, FCs]},_} -> + %% If we are evaluting within another function body + %% (RBs =/= none), we return RBs when this function body + %% has been evalutated, otherwise we return Bs0, the + %% bindings when evalution of this function body started. + NRBs = if + RBs =:= none -> Bs0; + true -> RBs + end, + case {erlang:fun_info(Func, arity), length(As)} of + {{arity, Arity}, Arity} -> + eval_fun(FCs, As, FBs, FLf, FEf, NRBs); + _ -> + erlang:raise(error, {badarity,{Func,As}},stacktrace()) + end; + {no_env,none} when RBs =:= value -> + %% Make tail recursive calls when possible. + apply(Func, As); + {no_env,none} -> + ret_expr(apply(Func, As), Bs0, RBs); + {no_env,{value,F}} when RBs =:= value -> + F(Func,As); + {no_env,{value,F}} -> + ret_expr(F(Func, As), Bs0, RBs) + end. + +%% eval_lc(Expr, [Qualifier], Bindings, LocalFunctionHandler, +%% ExternalFuncHandler, RetBindings) -> +%% {value,Value,Bindings} | Value + +eval_lc(E, Qs, Bs, Lf, Ef, RBs) -> + ret_expr(lists:reverse(eval_lc1(E, Qs, Bs, Lf, Ef, [])), Bs, RBs). + +eval_lc1(E, [{generate,_,P,L0}|Qs], Bs0, Lf, Ef, Acc0) -> + {value,L1,_Bs1} = expr(L0, Bs0, Lf, Ef, none), + CompFun = fun(Bs, Acc) -> eval_lc1(E, Qs, Bs, Lf, Ef, Acc) end, + eval_generate(L1, P, Bs0, Lf, Ef, CompFun, Acc0); +eval_lc1(E, [{b_generate,_,P,L0}|Qs], Bs0, Lf, Ef, Acc0) -> + {value,Bin,_Bs1} = expr(L0, Bs0, Lf, Ef, none), + CompFun = fun(Bs, Acc) -> eval_lc1(E, Qs, Bs, Lf, Ef, Acc) end, + eval_b_generate(Bin, P, Bs0, Lf, Ef, CompFun, Acc0); +eval_lc1(E, [F|Qs], Bs0, Lf, Ef, Acc) -> + CompFun = fun(Bs) -> eval_lc1(E, Qs, Bs, Lf, Ef, Acc) end, + eval_filter(F, Bs0, Lf, Ef, CompFun, Acc); +eval_lc1(E, [], Bs, Lf, Ef, Acc) -> + {value,V,_} = expr(E, Bs, Lf, Ef, none), + [V|Acc]. + +%% eval_bc(Expr, [Qualifier], Bindings, LocalFunctionHandler, +%% ExternalFuncHandler, RetBindings) -> +%% {value,Value,Bindings} | Value + +eval_bc(E, Qs, Bs, Lf, Ef, RBs) -> + ret_expr(eval_bc1(E, Qs, Bs, Lf, Ef, <<>>), Bs, RBs). + +eval_bc1(E, [{b_generate,_,P,L0}|Qs], Bs0, Lf, Ef, Acc0) -> + {value,Bin,_Bs1} = expr(L0, Bs0, Lf, Ef, none), + CompFun = fun(Bs, Acc) -> eval_bc1(E, Qs, Bs, Lf, Ef, Acc) end, + eval_b_generate(Bin, P, Bs0, Lf, Ef, CompFun, Acc0); +eval_bc1(E, [{generate,_,P,L0}|Qs], Bs0, Lf, Ef, Acc0) -> + {value,List,_Bs1} = expr(L0, Bs0, Lf, Ef, none), + CompFun = fun(Bs, Acc) -> eval_bc1(E, Qs, Bs, Lf, Ef, Acc) end, + eval_generate(List, P, Bs0, Lf, Ef, CompFun, Acc0); +eval_bc1(E, [F|Qs], Bs0, Lf, Ef, Acc) -> + CompFun = fun(Bs) -> eval_bc1(E, Qs, Bs, Lf, Ef, Acc) end, + eval_filter(F, Bs0, Lf, Ef, CompFun, Acc); +eval_bc1(E, [], Bs, Lf, Ef, Acc) -> + {value,V,_} = expr(E, Bs, Lf, Ef, none), + <>. + +eval_generate([V|Rest], P, Bs0, Lf, Ef, CompFun, Acc) -> + case match(P, V, new_bindings(), Bs0) of + {match,Bsn} -> + Bs2 = add_bindings(Bsn, Bs0), + NewAcc = CompFun(Bs2, Acc), + eval_generate(Rest, P, Bs0, Lf, Ef, CompFun, NewAcc); + nomatch -> + eval_generate(Rest, P, Bs0, Lf, Ef, CompFun, Acc) + end; +eval_generate([], _P, _Bs0, _Lf, _Ef, _CompFun, Acc) -> + Acc; +eval_generate(Term, _P, _Bs0, _Lf, _Ef, _CompFun, _Acc) -> + erlang:raise(error, {bad_generator,Term}, stacktrace()). + +eval_b_generate(<<_/bitstring>>=Bin, P, Bs0, Lf, Ef, CompFun, Acc) -> + Mfun = fun(L, R, Bs) -> match1(L, R, Bs, Bs0) end, + Efun = fun(Exp, Bs) -> expr(Exp, Bs, Lf, Ef, none) end, + case eval_bits:bin_gen(P, Bin, new_bindings(), Bs0, Mfun, Efun) of + {match, Rest, Bs1} -> + Bs2 = add_bindings(Bs1, Bs0), + NewAcc = CompFun(Bs2, Acc), + eval_b_generate(Rest, P, Bs0, Lf, Ef, CompFun, NewAcc); + {nomatch, Rest} -> + eval_b_generate(Rest, P, Bs0, Lf, Ef, CompFun, Acc); + done -> + Acc + end; +eval_b_generate(Term, _P, _Bs0, _Lf, _Ef, _CompFun, _Acc) -> + erlang:raise(error, {bad_generator,Term}, stacktrace()). + +eval_filter(F, Bs0, Lf, Ef, CompFun, Acc) -> + case erl_lint:is_guard_test(F) of + true -> + case guard_test(F, Bs0, Lf, Ef) of + {value,true,Bs1} -> CompFun(Bs1); + {value,false,_} -> Acc + end; + false -> + case expr(F, Bs0, Lf, Ef, none) of + {value,true,Bs1} -> CompFun(Bs1); + {value,false,_} -> Acc; + {value,V,_} -> + erlang:raise(error, {bad_filter,V}, stacktrace()) + end + end. + + +%% RBs is the bindings to return when the evalution of a function +%% (fun) has finished. If RBs =:= none, then the evalution took place +%% outside a function. If RBs =:= value, only the value (not the bindings) +%% is to be returned (to a compiled function). + +ret_expr(V, _Bs, value) -> + V; +ret_expr(V, Bs, none) -> + {value,V,Bs}; +ret_expr(V, _Bs, RBs) when is_list(RBs) -> + {value,V,RBs}. + +%% eval_fun(Clauses, Arguments, Bindings, LocalFunctionHandler, +%% ExternalFunctionHandler) -> Value +%% This function is called when the fun is called from compiled code +%% or from apply. + +eval_fun(Cs, As, Bs0, Lf, Ef) -> + eval_fun(Cs, As, Bs0, Lf, Ef, value). + +eval_fun([{clause,_,H,G,B}|Cs], As, Bs0, Lf, Ef, RBs) -> + case match_list(H, As, new_bindings(), Bs0) of + {match,Bsn} -> % The new bindings for the head + Bs1 = add_bindings(Bsn, Bs0), % which then shadow! + case guard(G, Bs1, Lf, Ef) of + true -> exprs(B, Bs1, Lf, Ef, RBs); + false -> eval_fun(Cs, As, Bs0, Lf, Ef, RBs) + end; + nomatch -> + eval_fun(Cs, As, Bs0, Lf, Ef, RBs) + end; +eval_fun([], As, _Bs, _Lf, _Ef, _RBs) -> + erlang:raise(error, function_clause, + [{?MODULE,'-inside-an-interpreted-fun-',As}|stacktrace()]). + +%% expr_list(ExpressionList, Bindings) +%% expr_list(ExpressionList, Bindings, LocalFuncHandler) +%% expr_list(ExpressionList, Bindings, LocalFuncHandler, ExternalFuncHandler) +%% Evaluate a list of expressions "in parallel" at the same level. + +expr_list(Es, Bs) -> + expr_list(Es, Bs, none, none). + +expr_list(Es, Bs, Lf) -> + expr_list(Es, Bs, Lf, none). + +expr_list(Es, Bs, Lf, Ef) -> + expr_list(Es, [], Bs, Bs, Lf, Ef). + +expr_list([E|Es], Vs, BsOrig, Bs0, Lf, Ef) -> + {value,V,Bs1} = expr(E, BsOrig, Lf, Ef, none), + expr_list(Es, [V|Vs], BsOrig, merge_bindings(Bs1, Bs0), Lf, Ef); +expr_list([], Vs, _, Bs, _Lf, _Ef) -> + {reverse(Vs),Bs}. + +eval_op(Op, Arg1, Arg2, Bs, Ef, RBs) -> + do_apply({erlang,Op}, [Arg1,Arg2], Bs, Ef, RBs). + +eval_op(Op, Arg, Bs, Ef, RBs) -> + do_apply({erlang,Op}, [Arg], Bs, Ef, RBs). + +%% if_clauses(Clauses, Bindings, LocalFuncHandler, ExtFuncHandler, RBs) + +if_clauses([{clause,_,[],G,B}|Cs], Bs, Lf, Ef, RBs) -> + case guard(G, Bs, Lf, Ef) of + true -> exprs(B, Bs, Lf, Ef, RBs); + false -> if_clauses(Cs, Bs, Lf, Ef, RBs) + end; +if_clauses([], _Bs, _Lf, _Ef, _RBs) -> + erlang:raise(error, if_clause, stacktrace()). + +%% try_clauses(Body, CaseClauses, CatchClauses, AfterBody, Bindings, +%% LocalFuncHandler, ExtFuncHandler, RBs) +%% When/if variable bindings between the different parts of a +%% try-catch expression are introduced this will have to be rewritten. +try_clauses(B, Cases, Catches, AB, Bs, Lf, Ef, RBs) -> + try exprs(B, Bs, Lf, Ef, none) of + {value,V,Bs1} when Cases =:= [] -> + ret_expr(V, Bs1, RBs); + {value,V,Bs1} -> + case match_clause(Cases, [V], Bs1, Lf, Ef) of + {B2,Bs2} -> + exprs(B2, Bs2, Lf, Ef, RBs); + nomatch -> + erlang:raise(error, {try_clause,V}, stacktrace()) + end + catch + Class:Reason when Catches =:= [] -> + %% Rethrow + erlang:raise(Class, Reason, stacktrace()); + Class:Reason -> +%%% %% Set stacktrace +%%% try erlang:raise(Class, Reason, stacktrace()) +%%% catch _:_ -> ok +%%% end, + V = {Class,Reason,erlang:get_stacktrace()}, + case match_clause(Catches, [V],Bs, Lf, Ef) of + {B2,Bs2} -> + exprs(B2, Bs2, Lf, Ef, RBs); + nomatch -> + erlang:raise(Class, Reason, stacktrace()) + end + after + if AB =:= [] -> + Bs; % any + true -> + exprs(AB, Bs, Lf, Ef, none) + end + end. + +%% case_clauses(Value, Clauses, Bindings, LocalFuncHandler, ExtFuncHandler, +%% RBs) + +case_clauses(Val, Cs, Bs, Lf, Ef, RBs) -> + case match_clause(Cs, [Val], Bs, Lf, Ef) of + {B, Bs1} -> + exprs(B, Bs1, Lf, Ef, RBs); + nomatch -> + erlang:raise(error, {case_clause,Val}, stacktrace()) + end. + +%% +%% receive_clauses(Clauses, Bindings, LocalFuncHnd,ExtFuncHnd, Messages, RBs) +%% +receive_clauses(Cs, Bs, Lf, Ef, Ms, RBs) -> + receive + Val -> + case match_clause(Cs, [Val], Bs, Lf, Ef) of + {B, Bs1} -> + merge_queue(Ms), + exprs(B, Bs1, Lf, Ef, RBs); + nomatch -> + receive_clauses(Cs, Bs, Lf, Ef, [Val|Ms], RBs) + end + end. +%% +%% receive_clauses(TimeOut, Clauses, TimeoutBody, Bindings, +%% ExternalFuncHandler, LocalFuncHandler, RBs) +%% +receive_clauses(T, Cs, TB, Bs, Lf, Ef, Ms, RBs) -> + {_,_} = statistics(runtime), + receive + Val -> + case match_clause(Cs, [Val], Bs, Lf, Ef) of + {B, Bs1} -> + merge_queue(Ms), + exprs(B, Bs1, Lf, Ef, RBs); + nomatch -> + {_,T1} = statistics(runtime), + if + T =:= infinity -> + receive_clauses(T, Cs, TB,Bs,Lf,Ef,[Val|Ms],RBs); + T-T1 =< 0 -> + receive_clauses(0, Cs, TB,Bs,Lf,Ef,[Val|Ms],RBs); + true -> + receive_clauses(T-T1, Cs,TB,Bs,Lf,Ef,[Val|Ms],RBs) + end + end + after T -> + merge_queue(Ms), + {B, Bs1} = TB, + exprs(B, Bs1, Lf, Ef, RBs) + end. + +merge_queue([]) -> + true; +merge_queue(Ms) -> + send_all(recv_all(Ms), self()). + +recv_all(Xs) -> + receive + X -> recv_all([X|Xs]) + after 0 -> + reverse(Xs) + end. + +send_all([X|Xs], Self) -> + Self ! X, + send_all(Xs, Self); +send_all([], _) -> true. + + +%% match_clause -> {Body, Bindings} or nomatch + +match_clause(Cs, Vs, Bs, Lf) -> + match_clause(Cs, Vs, Bs, Lf, none). + +match_clause([{clause,_,H,G,B}|Cs], Vals, Bs, Lf, Ef) -> + case match_list(H, Vals, Bs) of + {match, Bs1} -> + case guard(G, Bs1, Lf, Ef) of + true -> {B, Bs1}; + false -> match_clause(Cs, Vals, Bs, Lf, Ef) + end; + nomatch -> match_clause(Cs, Vals, Bs, Lf, Ef) + end; +match_clause([], _Vals, _Bs, _Lf, _Ef) -> + nomatch. + +%% guard(GuardTests, Bindings, LocalFuncHandler, ExtFuncHandler) -> bool() +%% Evaluate a guard. We test if the guard is a true guard. + +guard(L=[G|_], Bs0, Lf, Ef) when is_list(G) -> + guard1(L, Bs0, Lf, Ef); +guard(L, Bs0, Lf, Ef) -> + guard0(L, Bs0, Lf, Ef). + +%% disjunction of guard conjunctions +guard1([G|Gs], Bs0, Lf, Ef) when is_list(G) -> + case guard0(G, Bs0, Lf, Ef) of + true -> + true; + false -> + guard1(Gs, Bs0, Lf, Ef) + end; +guard1([], _Bs, _Lf, _Ef) -> false. + +%% guard conjunction +guard0([G|Gs], Bs0, Lf, Ef) -> + case erl_lint:is_guard_test(G) of + true -> + case guard_test(G, Bs0, Lf, Ef) of + {value,true,Bs} -> guard0(Gs, Bs, Lf, Ef); + {value,false,_} -> false + end; + false -> + erlang:raise(error, guard_expr, stacktrace()) + end; +guard0([], _Bs, _Lf, _Ef) -> true. + +%% guard_test(GuardTest, Bindings, LocalFuncHandler, ExtFuncHandler) -> +%% {value,bool(),NewBindings}. +%% Evaluate one guard test. Never fails, returns bool(). + +guard_test({call,L,{atom,Ln,F},As0}, Bs0, Lf, Ef) -> + TT = type_test(F), + guard_test({call,L,{tuple,Ln,[{atom,Ln,erlang},{atom,Ln,TT}]},As0}, + Bs0, Lf, Ef); +guard_test({call,L,{remote,_Lr,{atom,_Lm,erlang},{atom,_Lf,_F}=T},As0}, + Bs0, Lf, Ef) -> + guard_test({call,L,T,As0}, Bs0, Lf, Ef); +guard_test(G, Bs0, Lf, Ef) -> + try {value,true,_} = expr(G, Bs0, Lf, Ef, none) + catch error:_ -> {value,false,Bs0} end. + +type_test(integer) -> is_integer; +type_test(float) -> is_float; +type_test(number) -> is_number; +type_test(atom) -> is_atom; +type_test(constant) -> is_constant; +type_test(list) -> is_list; +type_test(tuple) -> is_tuple; +type_test(pid) -> is_pid; +type_test(reference) -> is_reference; +type_test(port) -> is_port; +type_test(function) -> is_function; +type_test(binary) -> is_binary; +type_test(record) -> is_record; +type_test(Test) -> Test. + + +%% match(Pattern, Term, Bindings) -> +%% {match,NewBindings} | nomatch +%% or erlang:error({illegal_pattern, Pattern}). +%% Try to match Pattern against Term with the current bindings. + +match(Pat, Term, Bs) -> + match(Pat, Term, Bs, Bs). + +%% Bs are the bindings that are augmented with new bindings. BBs are +%% the bindings used for "binsize" variables (in <>, Y is a +%% binsize variable). + +match(Pat, Term, Bs, BBs) -> + case catch match1(Pat, Term, Bs, BBs) of + invalid -> + erlang:raise(error, {illegal_pattern,Pat}, stacktrace()); + Other -> + Other + end. + +string_to_conses([], _, Tail) -> Tail; +string_to_conses([E|Rest], Line, Tail) -> + {cons, Line, {integer, Line, E}, string_to_conses(Rest, Line, Tail)}. + +match1({atom,_,A0}, A, Bs, _BBs) -> + case A of + A0 -> {match,Bs}; + _ -> throw(nomatch) + end; +match1({integer,_,I0}, I, Bs, _BBs) -> + case I of + I0 -> {match,Bs}; + _ -> throw(nomatch) + end; +match1({float,_,F0}, F, Bs, _BBs) -> + case F of + F0 -> {match,Bs}; + _ -> throw(nomatch) + end; +match1({char,_,C0}, C, Bs, _BBs) -> + case C of + C0 -> {match,Bs}; + _ -> throw(nomatch) + end; +match1({var,_,'_'}, _, Bs, _BBs) -> %Anonymous variable matches + {match,Bs}; % everything, no new bindings +match1({var,_,Name}, Term, Bs, _BBs) -> + case binding(Name, Bs) of + {value,Term} -> + {match,Bs}; + {value,_} -> + throw(nomatch); + unbound -> + {match,add_binding(Name, Term, Bs)} + end; +match1({match,_,Pat1,Pat2}, Term, Bs0, BBs) -> + {match, Bs1} = match1(Pat1, Term, Bs0, BBs), + match1(Pat2, Term, Bs1, BBs); +match1({string,_,S0}, S, Bs, _BBs) -> + case S of + S0 -> {match,Bs}; + _ -> throw(nomatch) + end; +match1({nil,_}, Nil, Bs, _BBs) -> + case Nil of + [] -> {match,Bs}; + _ -> throw(nomatch) + end; +match1({cons,_,H,T}, [H1|T1], Bs0, BBs) -> + {match,Bs} = match1(H, H1, Bs0, BBs), + match1(T, T1, Bs, BBs); +match1({cons,_,_,_}, _, _Bs, _BBs) -> + throw(nomatch); +match1({tuple,_,Elts}, Tuple, Bs, BBs) + when length(Elts) =:= tuple_size(Tuple) -> + match_tuple(Elts, Tuple, 1, Bs, BBs); +match1({tuple,_,_}, _, _Bs, _BBs) -> + throw(nomatch); +match1({bin, _, Fs}, <<_/bitstring>>=B, Bs0, BBs) -> + eval_bits:match_bits(Fs, B, Bs0, BBs, + fun(L, R, Bs) -> match1(L, R, Bs, BBs) end, + fun(E, Bs) -> expr(E, Bs, none, none, none) end); +match1({bin,_,_}, _, _Bs, _BBs) -> + throw(nomatch); +match1({op,_,'++',{nil,_},R}, Term, Bs, BBs) -> + match1(R, Term, Bs, BBs); +match1({op,_,'++',{cons,Li,{integer,L2,I},T},R}, Term, Bs, BBs) -> + match1({cons,Li,{integer,L2,I},{op,Li,'++',T,R}}, Term, Bs, BBs); +match1({op,_,'++',{cons,Li,{char,L2,C},T},R}, Term, Bs, BBs) -> + match1({cons,Li,{char,L2,C},{op,Li,'++',T,R}}, Term, Bs, BBs); +match1({op,_,'++',{string,Li,L},R}, Term, Bs, BBs) -> + match1(string_to_conses(L, Li, R), Term, Bs, BBs); +match1({op,Line,Op,A}, Term, Bs, BBs) -> + case partial_eval({op,Line,Op,A}) of + {op,Line,Op,A} -> + throw(invalid); + X -> + match1(X, Term, Bs, BBs) + end; +match1({op,Line,Op,L,R}, Term, Bs, BBs) -> + case partial_eval({op,Line,Op,L,R}) of + {op,Line,Op,L,R} -> + throw(invalid); + X -> + match1(X, Term, Bs, BBs) + end; +match1(_, _, _Bs, _BBs) -> + throw(invalid). + +match_tuple([E|Es], Tuple, I, Bs0, BBs) -> + {match,Bs} = match1(E, element(I, Tuple), Bs0, BBs), + match_tuple(Es, Tuple, I+1, Bs, BBs); +match_tuple([], _, _, Bs, _BBs) -> + {match,Bs}. + +%% match_list(PatternList, TermList, Bindings) -> +%% {match,NewBindings} | nomatch +%% Try to match a list of patterns against a list of terms with the +%% current bindings. + +match_list(Ps, Ts, Bs) -> + match_list(Ps, Ts, Bs, Bs). + +match_list([P|Ps], [T|Ts], Bs0, BBs) -> + case match(P, T, Bs0, BBs) of + {match,Bs1} -> match_list(Ps, Ts, Bs1, BBs); + nomatch -> nomatch + end; +match_list([], [], Bs, _BBs) -> + {match,Bs}; +match_list(_, _, _Bs, _BBs) -> + nomatch. + +%% new_bindings() +%% bindings(Bindings) +%% binding(Name, Bindings) +%% add_binding(Name, Value, Bindings) +%% del_binding(Name, Bindings) + +new_bindings() -> orddict:new(). + +bindings(Bs) -> orddict:to_list(Bs). + +binding(Name, Bs) -> + case orddict:find(Name, Bs) of + {ok,Val} -> {value,Val}; + error -> unbound + end. + +add_binding(Name, Val, Bs) -> orddict:store(Name, Val, Bs). + +del_binding(Name, Bs) -> orddict:erase(Name, Bs). + +add_bindings(Bs1, Bs2) -> + foldl(fun ({Name,Val}, Bs) -> orddict:store(Name, Val, Bs) end, + Bs2, orddict:to_list(Bs1)). + +merge_bindings(Bs1, Bs2) -> + foldl(fun ({Name,Val}, Bs) -> + case orddict:find(Name, Bs) of + {ok,Val} -> Bs; %Already with SAME value + {ok,V1} -> + erlang:raise(error, {badmatch,V1}, stacktrace()); + error -> orddict:store(Name, Val, Bs) + end end, + Bs2, orddict:to_list(Bs1)). + +%% del_bindings(Bs1, Bs2) -> % del all in Bs1 from Bs2 +%% orddict:fold( +%% fun (Name, Val, Bs) -> +%% case orddict:find(Name, Bs) of +%% {ok,Val} -> orddict:erase(Name, Bs); +%% {ok,V1} -> erlang:raise(error,{badmatch,V1},stacktrace()); +%% error -> Bs +%% end +%% end, Bs2, Bs1). +%%---------------------------------------------------------------------------- +%% +%% Evaluate expressions: +%% constants and +%% op A +%% L op R +%% Things that evaluate to constants are accepted +%% and guard_bifs are allowed in constant expressions +%%---------------------------------------------------------------------------- + +is_constant_expr(Expr) -> + case eval_expr(Expr) of + {ok, X} when is_number(X) -> true; + _ -> false + end. + +eval_expr(Expr) -> + case catch ev_expr(Expr) of + X when is_integer(X) -> {ok, X}; + X when is_float(X) -> {ok, X}; + X when is_atom(X) -> {ok,X}; + {'EXIT',Reason} -> {error, Reason}; + _ -> {error, badarg} + end. + +partial_eval(Expr) -> + Line = line(Expr), + case catch ev_expr(Expr) of + X when is_integer(X) -> ret_expr(Expr,{integer,Line,X}); + X when is_float(X) -> ret_expr(Expr,{float,Line,X}); + X when is_atom(X) -> ret_expr(Expr,{atom,Line,X}); + _ -> + Expr + end. + +ev_expr({op,_,Op,L,R}) -> erlang:Op(ev_expr(L), ev_expr(R)); +ev_expr({op,_,Op,A}) -> erlang:Op(ev_expr(A)); +ev_expr({integer,_,X}) -> X; +ev_expr({float,_,X}) -> X; +ev_expr({atom,_,X}) -> X; +ev_expr({tuple,_,Es}) -> + list_to_tuple([ev_expr(X) || X <- Es]); +ev_expr({nil,_}) -> []; +ev_expr({cons,_,H,T}) -> [ev_expr(H) | ev_expr(T)]. +%%ev_expr({call,Line,{atom,_,F},As}) -> +%% true = erl_internal:guard_bif(F, length(As)), +%% apply(erlang, F, [ev_expr(X) || X <- As]); +%%ev_expr({call,Line,{remote,_,{atom,_,erlang},{atom,_,F}},As}) -> +%% true = erl_internal:guard_bif(F, length(As)), +%% apply(erlang, F, [ev_expr(X) || X <- As]); + +ret_expr(_Old, New) -> + %% io:format("~w: reduced ~s => ~s~n", + %% [line(Old), erl_pp:expr(Old), erl_pp:expr(New)]), + New. + +line(Expr) -> element(2, Expr). + +%% In syntax trees, module/package names are atoms or lists of atoms. + +expand_module_name({atom,L,A} = M, Bs) -> + case binding({module,A}, Bs) of + {value, A1} -> + {atom,L,A1}; + unbound -> + case packages:is_segmented(A) of + true -> + M; + false -> +%%% P = case binding({module,'$package'}, Bs) of +%%% {value, P1} -> P1; +%%% unbound -> "" +%%% end, +%%% A1 = list_to_atom(packages:concat(P, A)), +%%% {atom,L,list_to_atom(A1)} + {atom,L,A} + end + end; +expand_module_name(M, _) -> + case erl_parse:package_segments(M) of + error -> + M; + M1 -> + L = element(2,M), + Mod = packages:concat(M1), + case packages:is_valid(Mod) of + true -> + {atom,L,list_to_atom(Mod)}; + false -> + erlang:raise(error, {bad_module_name, Mod}, stacktrace()) + end + end. + +%% {?MODULE,expr,3} is still the stacktrace, despite the +%% fact that expr() now takes two, three or four arguments... +stacktrace() -> [{?MODULE,expr,3}]. diff --git a/lib/stdlib/src/erl_expand_records.erl b/lib/stdlib/src/erl_expand_records.erl new file mode 100644 index 0000000000..6fa77f2c3b --- /dev/null +++ b/lib/stdlib/src/erl_expand_records.erl @@ -0,0 +1,808 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Purpose : Expand records into tuples. + +%% N.B. Although structs (tagged tuples) are not yet allowed in the +%% language there is code included in pattern/2 and expr/3 (commented out) +%% that handles them. + +-module(erl_expand_records). + +-export([module/2]). + +-import(lists, [map/2,foldl/3,foldr/3,sort/1,reverse/1,duplicate/2]). + +-record(exprec, {compile=[], % Compile flags + vcount=0, % Variable counter + imports=[], % Imports + records=dict:new(), % Record definitions + trecords=sets:new(), % Typed records + uses_types=false, % Are there -spec or -type in the module + strict_ra=[], % strict record accesses + checked_ra=[] % succesfully accessed records + }). + +%% Is is assumed that Fs is a valid list of forms. It should pass +%% erl_lint without errors. +module(Fs0, Opts0) -> + Opts = compiler_options(Fs0) ++ Opts0, + TRecs = typed_records(Fs0), + UsesTypes = uses_types(Fs0), + St0 = #exprec{compile = Opts, trecords = TRecs, uses_types = UsesTypes}, + {Fs,_St} = forms(Fs0, St0), + Fs. + +compiler_options(Forms) -> + lists:flatten([C || {attribute,_,compile,C} <- Forms]). + +typed_records(Fs) -> + typed_records(Fs, sets:new()). + +typed_records([{attribute,_L,type,{{record, Name},_Defs,[]}} | Fs], Trecs) -> + typed_records(Fs, sets:add_element(Name, Trecs)); +typed_records([_|Fs], Trecs) -> + typed_records(Fs, Trecs); +typed_records([], Trecs) -> + Trecs. + +uses_types([{attribute,_L,spec,_}|_]) -> true; +uses_types([{attribute,_L,type,_}|_]) -> true; +uses_types([{attribute,_L,opaque,_}|_]) -> true; +uses_types([_|Fs]) -> uses_types(Fs); +uses_types([]) -> false. + +forms([{attribute,L,record,{Name,Defs}} | Fs], St0) -> + NDefs = normalise_fields(Defs), + St = St0#exprec{records=dict:store(Name, NDefs, St0#exprec.records)}, + {Fs1, St1} = forms(Fs, St), + %% Check if we need to keep the record information for usage in types. + case St#exprec.uses_types of + true -> + case sets:is_element(Name, St#exprec.trecords) of + true -> {Fs1, St1}; + false -> {[{attribute,L,type,{{record,Name},Defs,[]}}|Fs1], St1} + end; + false -> + {Fs1, St1} + end; +forms([{attribute,L,import,Is} | Fs0], St0) -> + St1 = import(Is, St0), + {Fs,St2} = forms(Fs0, St1), + {[{attribute,L,import,Is} | Fs], St2}; +forms([{function,L,N,A,Cs0} | Fs0], St0) -> + {Cs,St1} = clauses(Cs0, St0), + {Fs,St2} = forms(Fs0, St1), + {[{function,L,N,A,Cs} | Fs],St2}; +forms([F | Fs0], St0) -> + {Fs,St} = forms(Fs0, St0), + {[F | Fs], St}; +forms([], St) -> {[],St}. + +clauses([{clause,Line,H0,G0,B0} | Cs0], St0) -> + {H,St1} = head(H0, St0), + {G,St2} = guard(G0, St1), + {B,St3} = exprs(B0, St2), + {Cs,St4} = clauses(Cs0, St3), + {[{clause,Line,H,G,B} | Cs],St4}; +clauses([], St) -> {[],St}. + +head(As, St) -> pattern_list(As, St). + +pattern({var,_,'_'}=Var, St) -> + {Var,St}; +pattern({var,_,_}=Var, St) -> + {Var,St}; +pattern({char,_,_}=Char, St) -> + {Char,St}; +pattern({integer,_,_}=Int, St) -> + {Int,St}; +pattern({float,_,_}=Float, St) -> + {Float,St}; +pattern({atom,_,_}=Atom, St) -> + {Atom,St}; +pattern({string,_,_}=String, St) -> + {String,St}; +pattern({nil,_}=Nil, St) -> + {Nil,St}; +pattern({cons,Line,H,T}, St0) -> + {TH,St1} = pattern(H, St0), + {TT,St2} = pattern(T, St1), + {{cons,Line,TH,TT},St2}; +pattern({tuple,Line,Ps}, St0) -> + {TPs,St1} = pattern_list(Ps, St0), + {{tuple,Line,TPs},St1}; +%%pattern({struct,Line,Tag,Ps}, St0) -> +%% {TPs,TPsvs,St1} = pattern_list(Ps, St0), +%% {{struct,Line,Tag,TPs},TPsvs,St1}; +pattern({record_field,_,_,_}=M, St) -> + {M,St}; % must be a package name +pattern({record_index,Line,Name,Field}, St) -> + {index_expr(Line, Field, Name, record_fields(Name, St)),St}; +pattern({record,Line,Name,Pfs}, St0) -> + Fs = record_fields(Name, St0), + {TMs,St1} = pattern_list(pattern_fields(Fs, Pfs), St0), + {{tuple,Line,[{atom,Line,Name} | TMs]},St1}; +pattern({bin,Line,Es0}, St0) -> + {Es1,St1} = pattern_bin(Es0, St0), + {{bin,Line,Es1},St1}; +pattern({match,Line,Pat1, Pat2}, St0) -> + {TH,St1} = pattern(Pat2, St0), + {TT,St2} = pattern(Pat1, St1), + {{match,Line,TT,TH},St2}; +pattern({op,Line,Op,A0}, St0) -> + {A,St1} = pattern(A0, St0), + {{op,Line,Op,A},St1}; +pattern({op,Line,Op,L0,R0}, St0) -> + {L,St1} = pattern(L0, St0), + {R,St2} = pattern(R0, St1), + {{op,Line,Op,L,R},St2}. + +pattern_list([P0 | Ps0], St0) -> + {P,St1} = pattern(P0, St0), + {Ps,St2} = pattern_list(Ps0, St1), + {[P | Ps],St2}; +pattern_list([], St) -> {[],St}. + +guard([G0 | Gs0], St0) -> + {G,St1} = guard_tests(G0, St0), + {Gs,St2} = guard(Gs0, St1), + {[G | Gs],St2}; +guard([], St) -> {[],St}. + +guard_tests(Gts0, St0) -> + {Gts1,St1} = guard_tests1(Gts0, St0), + {Gts1,St1#exprec{checked_ra = []}}. + +guard_tests1([Gt0 | Gts0], St0) -> + {Gt1,St1} = guard_test(Gt0, St0), + {Gts1,St2} = guard_tests1(Gts0, St1), + {[Gt1 | Gts1],St2}; +guard_tests1([], St) -> {[],St}. + +guard_test(G0, St0) -> + in_guard(fun() -> + {G1,St1} = guard_test1(G0, St0), + strict_record_access(G1, St1) + end). + +%% Normalising guard tests ensures that none of the Boolean operands +%% created by strict_record_access/2 calls any of the old guard tests. +guard_test1({call,Line,{atom,Lt,Tname},As}, St) -> + Test = {atom,Lt,normalise_test(Tname, length(As))}, + expr({call,Line,Test,As}, St); +guard_test1(Test, St) -> + expr(Test, St). + +normalise_test(atom, 1) -> is_atom; +normalise_test(binary, 1) -> is_binary; +normalise_test(constant, 1) -> is_constant; +normalise_test(float, 1) -> is_float; +normalise_test(function, 1) -> is_function; +normalise_test(integer, 1) -> is_integer; +normalise_test(list, 1) -> is_list; +normalise_test(number, 1) -> is_number; +normalise_test(pid, 1) -> is_pid; +normalise_test(port, 1) -> is_port; +normalise_test(record, 2) -> is_record; +normalise_test(reference, 1) -> is_reference; +normalise_test(tuple, 1) -> is_tuple; +normalise_test(Name, _) -> Name. + +is_in_guard() -> + get(erl_expand_records_in_guard) =/= undefined. + +in_guard(F) -> + undefined = put(erl_expand_records_in_guard, true), + Res = F(), + true = erase(erl_expand_records_in_guard), + Res. + +%% record_test(Line, Term, Name, Vs, St) -> TransformedExpr +%% Generate code for is_record/1. + +record_test(Line, Term, Name, St) -> + case is_in_guard() of + false -> + record_test_in_body(Line, Term, Name, St); + true -> + record_test_in_guard(Line, Term, Name, St) + end. + +record_test_in_guard(Line, Term, Name, St) -> + case not_a_tuple(Term) of + true -> + %% In case that later optimization passes have been turned off. + expr({atom,Line,false}, St); + false -> + Fs = record_fields(Name, St), + NLine = neg_line(Line), + expr({call,NLine,{remote,NLine,{atom,NLine,erlang},{atom,NLine,is_record}}, + [Term,{atom,Line,Name},{integer,Line,length(Fs)+1}]}, + St) + end. + +not_a_tuple({atom,_,_}) -> true; +not_a_tuple({integer,_,_}) -> true; +not_a_tuple({float,_,_}) -> true; +not_a_tuple({nil,_}) -> true; +not_a_tuple({cons,_,_,_}) -> true; +not_a_tuple({char,_,_}) -> true; +not_a_tuple({string,_,_}) -> true; +not_a_tuple({record_index,_,_,_}) -> true; +not_a_tuple({bin,_,_}) -> true; +not_a_tuple({op,_,_,_}) -> true; +not_a_tuple({op,_,_,_,_}) -> true; +not_a_tuple(_) -> false. + +record_test_in_body(Line, Expr, Name, St0) -> + %% As Expr may have side effects, we must evaluate it + %% first and bind the value to a new variable. + %% We must use also handle the case that Expr does not + %% evaluate to a tuple properly. + Fs = record_fields(Name, St0), + {Var,St} = new_var(Line, St0), + NLine = neg_line(Line), + expr({block,Line, + [{match,Line,Var,Expr}, + {call,NLine,{remote,NLine,{atom,NLine,erlang}, + {atom,NLine,is_record}}, + [Var,{atom,Line,Name},{integer,Line,length(Fs)+1}]}]}, St). + +exprs([E0 | Es0], St0) -> + {E,St1} = expr(E0, St0), + {Es,St2} = exprs(Es0, St1), + {[E | Es],St2}; +exprs([], St) -> {[],St}. + +expr({var,_,_}=Var, St) -> + {Var,St}; +expr({char,_,_}=Char, St) -> + {Char,St}; +expr({integer,_,_}=Int, St) -> + {Int,St}; +expr({float,_,_}=Float, St) -> + {Float,St}; +expr({atom,_,_}=Atom, St) -> + {Atom,St}; +expr({string,_,_}=String, St) -> + {String,St}; +expr({nil,_}=Nil, St) -> + {Nil,St}; +expr({cons,Line,H0,T0}, St0) -> + {H,St1} = expr(H0, St0), + {T,St2} = expr(T0, St1), + {{cons,Line,H,T},St2}; +expr({lc,Line,E0,Qs0}, St0) -> + {Qs1,St1} = lc_tq(Line, Qs0, St0), + {E1,St2} = expr(E0, St1), + {{lc,Line,E1,Qs1},St2}; +expr({bc,Line,E0,Qs0}, St0) -> + {Qs1,St1} = lc_tq(Line, Qs0, St0), + {E1,St2} = expr(E0, St1), + {{bc,Line,E1,Qs1},St2}; +expr({tuple,Line,Es0}, St0) -> + {Es1,St1} = expr_list(Es0, St0), + {{tuple,Line,Es1},St1}; +%%expr({struct,Line,Tag,Es0}, Vs, St0) -> +%% {Es1,Esvs,Esus,St1} = expr_list(Es0, Vs, St0), +%% {{struct,Line,Tag,Es1},Esvs,Esus,St1}; +expr({record_field,_,_,_}=M, St) -> + {M,St}; % must be a package name +expr({record_index,Line,Name,F}, St) -> + I = index_expr(Line, F, Name, record_fields(Name, St)), + expr(I, St); +expr({record,Line,Name,Is}, St) -> + expr({tuple,Line,[{atom,Line,Name} | + record_inits(record_fields(Name, St), Is)]}, + St); +expr({record_field,Line,R,Name,F}, St) -> + get_record_field(Line, R, F, Name, St); +expr({record,_,R,Name,Us}, St0) -> + {Ue,St1} = record_update(R, Name, record_fields(Name, St0), Us, St0), + expr(Ue, St1); +expr({bin,Line,Es0}, St0) -> + {Es1,St1} = expr_bin(Es0, St0), + {{bin,Line,Es1},St1}; +expr({block,Line,Es0}, St0) -> + {Es,St1} = exprs(Es0, St0), + {{block,Line,Es},St1}; +expr({'if',Line,Cs0}, St0) -> + {Cs,St1} = clauses(Cs0, St0), + {{'if',Line,Cs},St1}; +expr({'case',Line,E0,Cs0}, St0) -> + {E,St1} = expr(E0, St0), + {Cs,St2} = clauses(Cs0, St1), + {{'case',Line,E,Cs},St2}; +expr({'receive',Line,Cs0}, St0) -> + {Cs,St1} = clauses(Cs0, St0), + {{'receive',Line,Cs},St1}; +expr({'receive',Line,Cs0,To0,ToEs0}, St0) -> + {To,St1} = expr(To0, St0), + {ToEs,St2} = exprs(ToEs0, St1), + {Cs,St3} = clauses(Cs0, St2), + {{'receive',Line,Cs,To,ToEs},St3}; +expr({'fun',_,{function,_F,_A}}=Fun, St) -> + {Fun,St}; +expr({'fun',_,{function,_M,_F,_A}}=Fun, St) -> + {Fun,St}; +expr({'fun',Line,{clauses,Cs0}}, St0) -> + {Cs,St1} = clauses(Cs0, St0), + {{'fun',Line,{clauses,Cs}},St1}; +expr({call,Line,{atom,_,is_record},[A,{atom,_,Name}]}, St) -> + record_test(Line, A, Name, St); +expr({'cond',Line,Cs0}, St0) -> + {Cs,St1} = clauses(Cs0, St0), + {{'cond',Line,Cs},St1}; +expr({call,Line,{remote,_,{atom,_,erlang},{atom,_,is_record}}, + [A,{atom,_,Name}]}, St) -> + record_test(Line, A, Name, St); +expr({call,Line,{tuple,_,[{atom,_,erlang},{atom,_,is_record}]}, + [A,{atom,_,Name}]}, St) -> + record_test(Line, A, Name, St); +expr({call,Line,{atom,_La,N}=Atom,As0}, St0) -> + {As,St1} = expr_list(As0, St0), + Ar = length(As), + case erl_internal:bif(N, Ar) of + true -> + {{call,Line,Atom,As},St1}; + false -> + case imported(N, Ar, St1) of + {yes,_Mod} -> + {{call,Line,Atom,As},St1}; + no -> + case {N,Ar} of + {record_info,2} -> + record_info_call(Line, As, St1); + _ -> + {{call,Line,Atom,As},St1} + end + end + end; +expr({call,Line,{record_field,_,_,_}=M,As0}, St0) -> + {As,St1} = expr_list(As0, St0), + {{call,Line,M,As},St1}; +expr({call,Line,{remote,Lr,M,F},As0}, St0) -> + {[M1,F1 | As1],St1} = expr_list([M,F | As0], St0), + {{call,Line,{remote,Lr,M1,F1},As1},St1}; +expr({call,Line,{tuple,Lt,[{atom,_,_}=M,{atom,_,_}=F]},As0}, St0) -> + {As,St1} = expr_list(As0, St0), + {{call,Line,{tuple,Lt,[M,F]},As},St1}; +expr({call,Line,F,As0}, St0) -> + {[Fun1 | As1],St1} = expr_list([F | As0], St0), + {{call,Line,Fun1,As1},St1}; +expr({'try',Line,Es0,Scs0,Ccs0,As0}, St0) -> + {Es1,St1} = exprs(Es0, St0), + {Scs1,St2} = clauses(Scs0, St1), + {Ccs1,St3} = clauses(Ccs0, St2), + {As1,St4} = exprs(As0, St3), + {{'try',Line,Es1,Scs1,Ccs1,As1},St4}; +expr({'catch',Line,E0}, St0) -> + {E,St1} = expr(E0, St0), + {{'catch',Line,E},St1}; +expr({match,Line,P0,E0}, St0) -> + {E,St1} = expr(E0, St0), + {P,St2} = pattern(P0, St1), + {{match,Line,P,E},St2}; +expr({op,Line,'not',A0}, St0) -> + {A,St1} = bool_operand(A0, St0), + {{op,Line,'not',A},St1}; +expr({op,Line,Op,A0}, St0) -> + {A,St1} = expr(A0, St0), + {{op,Line,Op,A},St1}; +expr({op,Line,Op,L0,R0}, St0) when Op =:= 'and'; + Op =:= 'or' -> + {L,St1} = bool_operand(L0, St0), + {R,St2} = bool_operand(R0, St1), + {{op,Line,Op,L,R},St2}; +expr({op,Line,Op,L0,R0}, St0) when Op =:= 'andalso'; + Op =:= 'orelse' -> + {L,St1} = bool_operand(L0, St0), + {R,St2} = bool_operand(R0, St1), + {{op,Line,Op,L,R},St2#exprec{checked_ra = St1#exprec.checked_ra}}; +expr({op,Line,Op,L0,R0}, St0) -> + {L,St1} = expr(L0, St0), + {R,St2} = expr(R0, St1), + {{op,Line,Op,L,R},St2}. + +expr_list([E0 | Es0], St0) -> + {E,St1} = expr(E0, St0), + {Es,St2} = expr_list(Es0, St1), + {[E | Es],St2}; +expr_list([], St) -> {[],St}. + +bool_operand(E0, St0) -> + {E1,St1} = expr(E0, St0), + strict_record_access(E1, St1). + +strict_record_access(E, #exprec{strict_ra = []} = St) -> + {E, St}; +strict_record_access(E0, St0) -> + #exprec{strict_ra = StrictRA, checked_ra = CheckedRA} = St0, + {New,NC} = lists:foldl(fun ({Key,_L,_R,_Sz}=A, {L,C}) -> + case lists:keymember(Key, 1, C) of + true -> {L,C}; + false -> {[A|L],[A|C]} + end + end, {[],CheckedRA}, StrictRA), + E1 = if New =:= [] -> E0; true -> conj(New, E0) end, + St1 = St0#exprec{strict_ra = [], checked_ra = NC}, + expr(E1, St1). + +%% Make it look nice (?) when compiled with the 'E' flag +%% ('and'/2 is left recursive). +conj([], _E) -> + empty; +conj([{{Name,_Rp},L,R,Sz} | AL], E) -> + NL = neg_line(L), + T1 = {op,NL,'orelse', + {call,NL,{atom,NL,is_record},[R,{atom,NL,Name},{integer,NL,Sz}]}, + {atom,NL,fail}}, + T2 = case conj(AL, none) of + empty -> T1; + C -> {op,NL,'and',C,T1} + end, + case E of + none -> + case T2 of + {op,_,'and',_,_} -> + T2; + _ -> + %% Wrap the 'orelse' expression in an dummy 'and true' to make + %% sure that the entire guard fails if the 'orelse' + %% expression returns 'fail'. ('orelse' used to verify + %% that its right operand was a boolean, but that is no + %% longer the case.) + {op,NL,'and',T2,{atom,NL,true}} + end; + _ -> + {op,NL,'and',T2,E} + end. + +%% lc_tq(Line, Qualifiers, State) -> +%% {[TransQual],State'} + +lc_tq(Line, [{generate,Lg,P0,G0} | Qs0], St0) -> + {G1,St1} = expr(G0, St0), + {P1,St2} = pattern(P0, St1), + {Qs1,St3} = lc_tq(Line, Qs0, St2), + {[{generate,Lg,P1,G1} | Qs1],St3}; +lc_tq(Line, [{b_generate,Lg,P0,G0} | Qs0], St0) -> + {G1,St1} = expr(G0, St0), + {P1,St2} = pattern(P0, St1), + {Qs1,St3} = lc_tq(Line, Qs0, St2), + {[{b_generate,Lg,P1,G1} | Qs1],St3}; +lc_tq(Line, [F0 | Qs0], St0) -> + %% Allow record/2 and expand out as guard test. + case erl_lint:is_guard_test(F0) of + true -> + {F1,St1} = guard_test(F0, St0), + {Qs1,St2} = lc_tq(Line, Qs0, St1), + {[F1|Qs1],St2}; + false -> + {F1,St1} = expr(F0, St0), + {Qs1,St2} = lc_tq(Line, Qs0, St1), + {[F1 | Qs1],St2} + end; +lc_tq(_Line, [], St0) -> + {[],St0#exprec{checked_ra = []}}. + + +%% normalise_fields([RecDef]) -> [Field]. +%% Normalise the field definitions to always have a default value. If +%% none has been given then use 'undefined'. + +normalise_fields(Fs) -> + map(fun ({record_field,Lf,Field}) -> + {record_field,Lf,Field,{atom,Lf,undefined}}; + ({typed_record_field,{record_field,Lf,Field},_Type}) -> + {record_field,Lf,Field,{atom,Lf,undefined}}; + ({typed_record_field,Field,_Type}) -> + Field; + (F) -> F + end, Fs). + +%% record_fields(RecordName, State) +%% find_field(FieldName, Fields) + +record_fields(R, St) -> dict:fetch(R, St#exprec.records). + +find_field(F, [{record_field,_,{atom,_,F},Val} | _]) -> {ok,Val}; +find_field(F, [_ | Fs]) -> find_field(F, Fs); +find_field(_, []) -> error. + +%% field_names(RecFields) -> [Name]. +%% Return a list of the field names structures. + +field_names(Fs) -> + map(fun ({record_field,_,Field,_Val}) -> Field end, Fs). + +%% index_expr(Line, FieldExpr, Name, Fields) -> IndexExpr. +%% Return an expression which evaluates to the index of a +%% field. Currently only handle the case where the field is an +%% atom. This expansion must be passed through expr again. + +index_expr(Line, {atom,_,F}, _Name, Fs) -> + {integer,Line,index_expr(F, Fs, 2)}. + +index_expr(F, [{record_field,_,{atom,_,F},_} | _], I) -> I; +index_expr(F, [_ | Fs], I) -> index_expr(F, Fs, I+1). + +%% get_record_field(Line, RecExpr, FieldExpr, Name, St) -> {Expr,St'}. +%% Return an expression which verifies that the type of record +%% is correct and then returns the value of the field. +%% This expansion must be passed through expr again. + +get_record_field(Line, R, Index, Name, St) -> + case strict_record_tests(St#exprec.compile) of + false -> + sloppy_get_record_field(Line, R, Index, Name, St); + true -> + strict_get_record_field(Line, R, Index, Name, St) + end. + +strict_get_record_field(Line, R, {atom,_,F}=Index, Name, St0) -> + case is_in_guard() of + false -> %Body context. + {Var,St} = new_var(Line, St0), + Fs = record_fields(Name, St), + I = index_expr(F, Fs, 2), + P = record_pattern(2, I, Var, length(Fs)+1, Line, [{atom,Line,Name}]), + NLine = neg_line(Line), + E = {'case',NLine,R, + [{clause,NLine,[{tuple,NLine,P}],[],[Var]}, + {clause,NLine,[{var,NLine,'_'}],[], + [{call,NLine,{remote,NLine, + {atom,NLine,erlang}, + {atom,NLine,error}}, + [{tuple,NLine,[{atom,NLine,badrecord},{atom,NLine,Name}]}]}]}]}, + expr(E, St); + true -> %In a guard. + Fs = record_fields(Name, St0), + I = index_expr(Line, Index, Name, Fs), + {ExpR,St1} = expr(R, St0), + %% Just to make comparison simple: + ExpRp = erl_lint:modify_line(ExpR, fun(_L) -> 0 end), + RA = {{Name,ExpRp},Line,ExpR,length(Fs)+1}, + St2 = St1#exprec{strict_ra = [RA | St1#exprec.strict_ra]}, + {{call,Line,{atom,Line,element},[I,ExpR]},St2} + end. + +record_pattern(I, I, Var, Sz, Line, Acc) -> + record_pattern(I+1, I, Var, Sz, Line, [Var | Acc]); +record_pattern(Cur, I, Var, Sz, Line, Acc) when Cur =< Sz -> + record_pattern(Cur+1, I, Var, Sz, Line, [{var,Line,'_'} | Acc]); +record_pattern(_, _, _, _, _, Acc) -> reverse(Acc). + +sloppy_get_record_field(Line, R, Index, Name, St) -> + Fs = record_fields(Name, St), + I = index_expr(Line, Index, Name, Fs), + expr({call,Line,{atom,Line,element},[I,R]}, St). + +strict_record_tests([strict_record_tests | _]) -> true; +strict_record_tests([no_strict_record_tests | _]) -> false; +strict_record_tests([_ | Os]) -> strict_record_tests(Os); +strict_record_tests([]) -> true. %Default. + +strict_record_updates([strict_record_updates | _]) -> true; +strict_record_updates([no_strict_record_updates | _]) -> false; +strict_record_updates([_ | Os]) -> strict_record_updates(Os); +strict_record_updates([]) -> false. %Default. + +%% pattern_fields([RecDefField], [Match]) -> [Pattern]. +%% Build a list of match patterns for the record tuple elements. +%% This expansion must be passed through pattern again. N.B. We are +%% scanning the record definition field list! + +pattern_fields(Fs, Ms) -> + Wildcard = record_wildcard_init(Ms), + map(fun ({record_field,L,{atom,_,F},_}) -> + case find_field(F, Ms) of + {ok,Match} -> Match; + error when Wildcard =:= none -> {var,L,'_'}; + error -> Wildcard + end + end, Fs). + +%% record_inits([RecDefField], [Init]) -> [InitExpr]. +%% Build a list of initialisation expressions for the record tuple +%% elements. This expansion must be passed through expr +%% again. N.B. We are scanning the record definition field list! + +record_inits(Fs, Is) -> + WildcardInit = record_wildcard_init(Is), + map(fun ({record_field,_,{atom,_,F},D}) -> + case find_field(F, Is) of + {ok,Init} -> Init; + error when WildcardInit =:= none -> D; + error -> WildcardInit + end + end, Fs). + +record_wildcard_init([{record_field,_,{var,_,'_'},D} | _]) -> D; +record_wildcard_init([_ | Is]) -> record_wildcard_init(Is); +record_wildcard_init([]) -> none. + +%% record_update(Record, RecordName, [RecDefField], [Update], State) -> +%% {Expr,State'} +%% Build an expression to update fields in a record returning a new +%% record. Try to be smart and optimise this. This expansion must be +%% passed through expr again. + +record_update(R, Name, Fs, Us0, St0) -> + Line = element(2, R), + {Pre,Us,St1} = record_exprs(Us0, St0), + Nf = length(Fs), %# of record fields + Nu = length(Us), %# of update fields + Nc = Nf - Nu, %# of copy fields + + %% We need a new variable for the record expression + %% to guarantee that it is only evaluated once. + {Var,St2} = new_var(Line, St1), + + StrictUpdates = strict_record_updates(St2#exprec.compile), + + %% Try to be intelligent about which method of updating record to use. + {Update,St} = + if + Nu =:= 0 -> + record_match(Var, Name, Line, Fs, Us, St2); + Nu =< Nc, not StrictUpdates -> %Few fields updated + {record_setel(Var, Name, Fs, Us), St2}; + true -> %The wide area inbetween + record_match(Var, Name, element(2, hd(Us)), Fs, Us, St2) + end, + {{block,Line,Pre ++ [{match,Line,Var,R},Update]},St}. + +%% record_match(Record, RecordName, [RecDefField], [Update], State) +%% Build a 'case' expression to modify record fields. + +record_match(R, Name, Lr, Fs, Us, St0) -> + {Ps,News,St1} = record_upd_fs(Fs, Us, St0), + NLr = neg_line(Lr), + {{'case',Lr,R, + [{clause,Lr,[{tuple,Lr,[{atom,Lr,Name} | Ps]}],[], + [{tuple,Lr,[{atom,Lr,Name} | News]}]}, + {clause,NLr,[{var,NLr,'_'}],[], + [call_error(NLr, {tuple,NLr,[{atom,NLr,badrecord},{atom,NLr,Name}]})]} + ]}, + St1}. + +record_upd_fs([{record_field,Lf,{atom,_La,F},_Val} | Fs], Us, St0) -> + {P,St1} = new_var(Lf, St0), + {Ps,News,St2} = record_upd_fs(Fs, Us, St1), + case find_field(F, Us) of + {ok,New} -> {[P | Ps],[New | News],St2}; + error -> {[P | Ps],[P | News],St2} + end; +record_upd_fs([], _, St) -> {[],[],St}. + +%% record_setel(Record, RecordName, [RecDefField], [Update]) +%% Build a nested chain of setelement calls to build the +%% updated record tuple. + +record_setel(R, Name, Fs, Us0) -> + Us1 = foldl(fun ({record_field,Lf,Field,Val}, Acc) -> + {integer,_,FieldIndex} = I = index_expr(Lf, Field, Name, Fs), + [{FieldIndex,{I,Lf,Val}} | Acc] + end, [], Us0), + Us2 = sort(Us1), + Us = [T || {_,T} <- Us2], + Lr = element(2, hd(Us)), + Wildcards = duplicate(length(Fs), {var,Lr,'_'}), + NLr = neg_line(Lr), + {'case',Lr,R, + [{clause,Lr,[{tuple,Lr,[{atom,Lr,Name} | Wildcards]}],[], + [foldr(fun ({I,Lf,Val}, Acc) -> + {call,Lf,{atom,Lf,setelement},[I,Acc,Val]} end, + R, Us)]}, + {clause,NLr,[{var,NLr,'_'}],[], + [call_error(NLr, {tuple,NLr,[{atom,NLr,badrecord},{atom,NLr,Name}]})]}]}. + +%% Expand a call to record_info/2. We have checked that it is not +%% shadowed by an import. + +record_info_call(Line, [{atom,_Li,Info},{atom,_Ln,Name}], St) -> + case Info of + size -> + {{integer,Line,1+length(record_fields(Name, St))},St}; + fields -> + {make_list(field_names(record_fields(Name, St)), Line),St} + end. + +%% Break out expressions from an record update list and bind to new +%% variables. The idea is that we will evaluate all update expressions +%% before starting to update the record. + +record_exprs(Us, St) -> + record_exprs(Us, St, [], []). + +record_exprs([{record_field,Lf,{atom,_La,_F}=Name,Val}=Field0 | Us], St0, Pre, Fs) -> + case is_simple_val(Val) of + true -> + record_exprs(Us, St0, Pre, [Field0 | Fs]); + false -> + {Var,St} = new_var(Lf, St0), + Bind = {match,Lf,Var,Val}, + Field = {record_field,Lf,Name,Var}, + record_exprs(Us, St, [Bind | Pre], [Field | Fs]) + end; +record_exprs([], St, Pre, Fs) -> + {reverse(Pre),Fs,St}. + +is_simple_val({var,_,_}) -> true; +is_simple_val(Val) -> + try + erl_parse:normalise(Val), + true + catch error:_ -> + false + end. + +%% pattern_bin([Element], State) -> {[Element],[Variable],[UsedVar],State}. + +pattern_bin(Es0, St) -> + foldr(fun (E, Acc) -> pattern_element(E, Acc) end, {[],St}, Es0). + +pattern_element({bin_element,Line,Expr0,Size,Type}, {Es,St0}) -> + {Expr,St1} = pattern(Expr0, St0), + {[{bin_element,Line,Expr,Size,Type} | Es],St1}. + +%% expr_bin([Element], State) -> {[Element],State}. + +expr_bin(Es0, St) -> + foldr(fun (E, Acc) -> bin_element(E, Acc) end, {[],St}, Es0). + +bin_element({bin_element,Line,Expr,Size,Type}, {Es,St0}) -> + {Expr1,St1} = expr(Expr, St0), + {Size1,St2} = if Size =:= default -> {default,St1}; + true -> expr(Size, St1) + end, + {[{bin_element,Line,Expr1,Size1,Type} | Es],St2}. + +new_var(L, St0) -> + {New,St1} = new_var_name(St0), + {{var,L,New},St1}. + +new_var_name(St) -> + C = St#exprec.vcount, + {list_to_atom("rec" ++ integer_to_list(C)),St#exprec{vcount=C+1}}. + +make_list(Ts, Line) -> + foldr(fun (H, T) -> {cons,Line,H,T} end, {nil,Line}, Ts). + +call_error(L, R) -> + {call,L,{remote,L,{atom,L,erlang},{atom,L,error}},[R]}. + +import({Mod,Fs}, St) -> + St#exprec{imports=add_imports(Mod, Fs, St#exprec.imports)}; +import(_Mod0, St) -> + St. + +add_imports(Mod, [F | Fs], Is) -> + add_imports(Mod, Fs, orddict:store(F, Mod, Is)); +add_imports(_, [], Is) -> Is. + +imported(F, A, St) -> + case orddict:find({F,A}, St#exprec.imports) of + {ok,Mod} -> {yes,Mod}; + error -> no + end. + +neg_line(L) -> + erl_parse:set_line(L, fun(Line) -> -abs(Line) end). diff --git a/lib/stdlib/src/erl_internal.erl b/lib/stdlib/src/erl_internal.erl new file mode 100644 index 0000000000..16173d8210 --- /dev/null +++ b/lib/stdlib/src/erl_internal.erl @@ -0,0 +1,351 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1998-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(erl_internal). + +%% Define Erlang bifs, guard tests and other internal stuff. +%% +%% NOTE: All guard_bif(), arith_op(), bool_op() and comp_op() must be +%% defined in bif.tab as 'ubif', i.e bif without trace wrapper. +%% +%% Why? +%% +%% Because the compiler uses an optimized instruction for +%% the call to these bifs, which when loaded gets a direct +%% entry pointer inserted into itself by the loader, +%% instead of a bif table index as for regular bifs. +%% +%% If tracing is enabled on these bifs, when a module is loaded, +%% the direct entry pointer inserted into the call instruction +%% will be pointing to the trace wrapper, so even if tracing is +%% disabled for bifs, the loaded module will call these bifs through +%% the trace wrappers. +%% +%% The call instruction in question does not give enough information +%% to call trace match function {caller} for it to succeed +%% other then by chance, and the 'return_to' trace flag works just +%% as bad, so both will mostly say that the caller is 'undefined'. +%% Furthermore the calls to these bifs will still generate +%% trace messages from the loaded module even if tracing is disabled +%% for them, and no one knows what else might be messed up. +%% +%% That's why! +%% + +-export([bif/2,bif/3,guard_bif/2, + type_test/2,new_type_test/2,old_type_test/2]). +-export([arith_op/2,bool_op/2,comp_op/2,list_op/2,send_op/2,op_type/2]). + +%%--------------------------------------------------------------------------- + +%% Erlang builtin functions allowed in guards. +-spec guard_bif(Name::atom(), Arity::arity()) -> boolean(). + +guard_bif(abs, 1) -> true; +guard_bif(float, 1) -> true; +guard_bif(trunc, 1) -> true; +guard_bif(round, 1) -> true; +guard_bif(length, 1) -> true; +guard_bif(hd, 1) -> true; +guard_bif(tl, 1) -> true; +guard_bif(size, 1) -> true; +guard_bif(bit_size, 1) -> true; +guard_bif(byte_size, 1) -> true; +guard_bif(element, 2) -> true; +guard_bif(self, 0) -> true; +guard_bif(node, 0) -> true; +guard_bif(node, 1) -> true; +guard_bif(tuple_size, 1) -> true; +guard_bif(is_atom, 1) -> true; +guard_bif(is_binary, 1) -> true; +guard_bif(is_bitstring, 1) -> true; +guard_bif(is_boolean, 1) -> true; +guard_bif(is_float, 1) -> true; +guard_bif(is_function, 1) -> true; +guard_bif(is_function, 2) -> true; +guard_bif(is_integer, 1) -> true; +guard_bif(is_list, 1) -> true; +guard_bif(is_number, 1) -> true; +guard_bif(is_pid, 1) -> true; +guard_bif(is_port, 1) -> true; +guard_bif(is_reference, 1) -> true; +guard_bif(is_tuple, 1) -> true; +guard_bif(is_record, 2) -> true; +guard_bif(is_record, 3) -> true; +guard_bif(Name, A) when is_atom(Name), is_integer(A) -> false. + +%% Erlang type tests. +-spec type_test(Name::atom(), Arity::arity()) -> boolean(). + +type_test(Name, Arity) -> + new_type_test(Name, Arity) orelse old_type_test(Name, Arity). + +%% Erlang new-style type tests. +-spec new_type_test(Name::atom(), Arity::arity()) -> boolean(). + +new_type_test(is_atom, 1) -> true; +new_type_test(is_boolean, 1) -> true; +new_type_test(is_binary, 1) -> true; +new_type_test(is_bitstring, 1) -> true; +new_type_test(is_float, 1) -> true; +new_type_test(is_function, 1) -> true; +new_type_test(is_function, 2) -> true; +new_type_test(is_integer, 1) -> true; +new_type_test(is_list, 1) -> true; +new_type_test(is_number, 1) -> true; +new_type_test(is_pid, 1) -> true; +new_type_test(is_port, 1) -> true; +new_type_test(is_reference, 1) -> true; +new_type_test(is_tuple, 1) -> true; +new_type_test(is_record, 2) -> true; +new_type_test(is_record, 3) -> true; +new_type_test(Name, A) when is_atom(Name), is_integer(A) -> false. + +%% Erlang old-style type tests. +-spec old_type_test(Name::atom(), Arity::arity()) -> boolean(). + +old_type_test(integer, 1) -> true; +old_type_test(float, 1) -> true; +old_type_test(number, 1) -> true; +old_type_test(atom, 1) -> true; +old_type_test(list, 1) -> true; +old_type_test(tuple, 1) -> true; +old_type_test(pid, 1) -> true; +old_type_test(reference, 1) -> true; +old_type_test(port, 1) -> true; +old_type_test(binary, 1) -> true; +old_type_test(record, 2) -> true; +old_type_test(function, 1) -> true; +old_type_test(Name, A) when is_atom(Name), is_integer(A) -> false. + +-spec arith_op(Op::atom(), Arity::arity()) -> boolean(). + +arith_op('+', 1) -> true; +arith_op('-', 1) -> true; +arith_op('*', 2) -> true; +arith_op('/', 2) -> true; +arith_op('+', 2) -> true; +arith_op('-', 2) -> true; +arith_op('bnot', 1) -> true; +arith_op('div', 2) -> true; +arith_op('rem', 2) -> true; +arith_op('band', 2) -> true; +arith_op('bor', 2) -> true; +arith_op('bxor', 2) -> true; +arith_op('bsl', 2) -> true; +arith_op('bsr', 2) -> true; +arith_op(Op, A) when is_atom(Op), is_integer(A) -> false. + +-spec bool_op(Op::atom(), Arity::arity()) -> boolean(). + +bool_op('not', 1) -> true; +bool_op('and', 2) -> true; +bool_op('or', 2) -> true; +bool_op('xor', 2) -> true; +bool_op(Op, A) when is_atom(Op), is_integer(A) -> false. + +-spec comp_op(Op::atom(), Arity::arity()) -> boolean(). + +comp_op('==', 2) -> true; +comp_op('/=', 2) -> true; +comp_op('=<', 2) -> true; +comp_op('<', 2) -> true; +comp_op('>=', 2) -> true; +comp_op('>', 2) -> true; +comp_op('=:=', 2) -> true; +comp_op('=/=', 2) -> true; +comp_op(Op, A) when is_atom(Op), is_integer(A) -> false. + +-spec list_op(Op::atom(), Arity::arity()) -> boolean(). + +list_op('++', 2) -> true; +list_op('--', 2) -> true; +list_op(Op, A) when is_atom(Op), is_integer(A) -> false. + +-spec send_op(Op::atom(), Arity::arity()) -> boolean(). + +send_op('!', 2) -> true; +send_op(Op, A) when is_atom(Op), is_integer(A) -> false. + +-spec op_type(atom(), arity()) -> 'arith' | 'bool' | 'comp' | 'list' | 'send'. + +op_type('+', 1) -> arith; +op_type('-', 1) -> arith; +op_type('*', 2) -> arith; +op_type('/', 2) -> arith; +op_type('+', 2) -> arith; +op_type('-', 2) -> arith; +op_type('bnot', 1) -> arith; +op_type('div', 2) -> arith; +op_type('rem', 2) -> arith; +op_type('band', 2) -> arith; +op_type('bor', 2) -> arith; +op_type('bxor', 2) -> arith; +op_type('bsl', 2) -> arith; +op_type('bsr', 2) -> arith; +op_type('not', 1) -> bool; +op_type('and', 2) -> bool; +op_type('or', 2) -> bool; +op_type('xor', 2) -> bool; +op_type('==', 2) -> comp; +op_type('/=', 2) -> comp; +op_type('=<', 2) -> comp; +op_type('<', 2) -> comp; +op_type('>=', 2) -> comp; +op_type('>', 2) -> comp; +op_type('=:=', 2) -> comp; +op_type('=/=', 2) -> comp; +op_type('++', 2) -> list; +op_type('--', 2) -> list; +op_type('!', 2) -> send. + +-spec bif(Mod::atom(), Name::atom(), Arity::arity()) -> boolean(). + +bif(erlang, Name, Arity) -> bif(Name, Arity); +bif(M, F, A) when is_atom(M), is_atom(F), is_integer(A) -> false. + +-spec bif(Name::atom(), Arity::arity()) -> boolean(). +%% Returns true if erlang:Name/Arity is an auto-imported BIF, false otherwise. +%% Use erlang:is_bultin(Mod, Name, Arity) to find whether a function is a BIF +%% (meaning implemented in C) or not. + +bif(abs, 1) -> true; +bif(apply, 2) -> true; +bif(apply, 3) -> true; +bif(atom_to_binary, 2) -> true; +bif(atom_to_list, 1) -> true; +bif(binary_to_atom, 2) -> true; +bif(binary_to_existing_atom, 2) -> true; +bif(binary_to_list, 1) -> true; +bif(binary_to_list, 3) -> true; +bif(binary_to_term, 1) -> true; +bif(bitsize, 1) -> true; +bif(bit_size, 1) -> true; +bif(bitstring_to_list, 1) -> true; +bif(byte_size, 1) -> true; +bif(check_process_code, 2) -> true; +bif(concat_binary, 1) -> true; +bif(date, 0) -> true; +bif(delete_module, 1) -> true; +bif(disconnect_node, 1) -> true; +bif(element, 2) -> true; +bif(erase, 0) -> true; +bif(erase, 1) -> true; +bif(exit, 1) -> true; +bif(exit, 2) -> true; +bif(float, 1) -> true; +bif(float_to_list, 1) -> true; +bif(garbage_collect, 0) -> true; +bif(garbage_collect, 1) -> true; +bif(get, 0) -> true; +bif(get, 1) -> true; +bif(get_keys, 1) -> true; +bif(group_leader, 0) -> true; +bif(group_leader, 2) -> true; +bif(halt, 0) -> true; +bif(halt, 1) -> true; +bif(hd, 1) -> true; +bif(integer_to_list, 1) -> true; +bif(iolist_size, 1) -> true; +bif(iolist_to_binary, 1) -> true; +bif(is_alive, 0) -> true; +bif(is_process_alive, 1) -> true; +bif(is_atom, 1) -> true; +bif(is_boolean, 1) -> true; +bif(is_binary, 1) -> true; +bif(is_bitstr, 1) -> true; +bif(is_bitstring, 1) -> true; +bif(is_float, 1) -> true; +bif(is_function, 1) -> true; +bif(is_function, 2) -> true; +bif(is_integer, 1) -> true; +bif(is_list, 1) -> true; +bif(is_number, 1) -> true; +bif(is_pid, 1) -> true; +bif(is_port, 1) -> true; +bif(is_reference, 1) -> true; +bif(is_tuple, 1) -> true; +bif(is_record, 2) -> true; +bif(is_record, 3) -> true; +bif(length, 1) -> true; +bif(link, 1) -> true; +bif(list_to_atom, 1) -> true; +bif(list_to_binary, 1) -> true; +bif(list_to_bitstring, 1) -> true; +bif(list_to_existing_atom, 1) -> true; +bif(list_to_float, 1) -> true; +bif(list_to_integer, 1) -> true; +bif(list_to_pid, 1) -> true; +bif(list_to_tuple, 1) -> true; +bif(load_module, 2) -> true; +bif(make_ref, 0) -> true; +bif(module_loaded, 1) -> true; +bif(monitor_node, 2) -> true; +bif(node, 0) -> true; +bif(node, 1) -> true; +bif(nodes, 0) -> true; +bif(nodes, 1) -> true; +bif(now, 0) -> true; +bif(open_port, 2) -> true; +bif(pid_to_list, 1) -> true; +bif(port_close, 1) -> true; +bif(port_command, 2) -> true; +bif(port_connect, 2) -> true; +bif(port_control, 3) -> true; +bif(pre_loaded, 0) -> true; +bif(process_flag, 2) -> true; +bif(process_flag, 3) -> true; +bif(process_info, 1) -> true; +bif(process_info, 2) -> true; +bif(processes, 0) -> true; +bif(purge_module, 1) -> true; +bif(put, 2) -> true; +bif(register, 2) -> true; +bif(registered, 0) -> true; +bif(round, 1) -> true; +bif(self, 0) -> true; +bif(setelement, 3) -> true; +bif(size, 1) -> true; +bif(spawn, 1) -> true; +bif(spawn, 2) -> true; +bif(spawn, 3) -> true; +bif(spawn, 4) -> true; +bif(spawn_link, 1) -> true; +bif(spawn_link, 2) -> true; +bif(spawn_link, 3) -> true; +bif(spawn_link, 4) -> true; +bif(spawn_monitor, 1) -> true; +bif(spawn_monitor, 3) -> true; +bif(spawn_opt, 2) -> true; +bif(spawn_opt, 3) -> true; +bif(spawn_opt, 4) -> true; +bif(spawn_opt, 5) -> true; +bif(split_binary, 2) -> true; +bif(statistics, 1) -> true; +bif(term_to_binary, 1) -> true; +bif(term_to_binary, 2) -> true; +bif(throw, 1) -> true; +bif(time, 0) -> true; +bif(tl, 1) -> true; +bif(trunc, 1) -> true; +bif(tuple_size, 1) -> true; +bif(tuple_to_list, 1) -> true; +bif(unlink, 1) -> true; +bif(unregister, 1) -> true; +bif(whereis, 1) -> true; +bif(Name, A) when is_atom(Name), is_integer(A) -> false. diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl new file mode 100644 index 0000000000..156d68554e --- /dev/null +++ b/lib/stdlib/src/erl_lint.erl @@ -0,0 +1,3489 @@ +%% -*- erlang-indent-level: 4 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Do necessary checking of Erlang code. + +%% N.B. All the code necessary for checking structs (tagged tuples) is +%% here. Just comment out the lines in pattern/2, gexpr/3 and expr/3. + +-module(erl_lint). + +-export([module/1,module/2,module/3,format_error/1]). +-export([exprs/2,exprs_opt/3,used_vars/2]). % Used from erl_eval.erl. +-export([is_pattern_expr/1,is_guard_test/1,is_guard_test/2]). +-export([is_guard_expr/1]). +-export([bool_option/4,value_option/3,value_option/7]). + +-export([modify_line/2]). + +-import(lists, [member/2,map/2,foldl/3,foldr/3,mapfoldl/3,all/2,reverse/1]). + +%% bool_option(OnOpt, OffOpt, Default, Options) -> boolean(). +%% value_option(Flag, Default, Options) -> Value. +%% value_option(Flag, Default, OnOpt, OnVal, OffOpt, OffVal, Options) -> +%% Value. +%% The option handling functions. + +-spec bool_option(atom(), atom(), boolean(), [_]) -> boolean(). + +bool_option(On, Off, Default, Opts) -> + foldl(fun (Opt, _Def) when Opt =:= On -> true; + (Opt, _Def) when Opt =:= Off -> false; + (_Opt, Def) -> Def + end, Default, Opts). + +value_option(Flag, Default, Opts) -> + foldl(fun ({Opt,Val}, _Def) when Opt =:= Flag -> Val; + (_Opt, Def) -> Def + end, Default, Opts). + +value_option(Flag, Default, On, OnVal, Off, OffVal, Opts) -> + foldl(fun ({Opt,Val}, _Def) when Opt =:= Flag -> Val; + (Opt, _Def) when Opt =:= On -> OnVal; + (Opt, _Def) when Opt =:= Off -> OffVal; + (_Opt, Def) -> Def + end, Default, Opts). + +%% The error and warning info structures, {Line,Module,Descriptor}, +%% are kept in their seperate fields in the lint state record together +%% with the name of the file (when a new file is entered, marked by +%% the 'file' attribute, then the field 'file' of the lint record is +%% set). At the end of the run these lists are packed into a list of +%% {FileName,ErrorDescList} pairs which are returned. + +-include_lib("stdlib/include/erl_bits.hrl"). + +%%-define(DEBUGF(X,Y), io:format(X, Y)). +-define(DEBUGF(X,Y), void). + +%% Usage of records, functions, and imports. The variable table, which +%% is passed on as an argument, holds the usage of variables. +-record(usage, { + calls = dict:new(), %Who calls who + imported = [], %Actually imported functions + used_records=sets:new() :: set(), %Used record definitions + used_types = sets:new() :: set() %Used type definitions + }). + +%% Define the lint state record. +%% 'called' and 'exports' contain {Line, {Function, Arity}}, +%% the other function collections contain {Function, Arity}. +-record(lint, {state=start :: 'start' | 'attribute' | 'function', + module=[], %Module + package="", %Module package + extends=[], %Extends + behaviour=[], %Behaviour + exports=gb_sets:empty() :: gb_set(), %Exports + imports=[], %Imports + mod_imports=dict:new() :: dict(), %Module Imports + compile=[], %Compile flags + records=dict:new() :: dict(), %Record definitions + defined=gb_sets:empty() :: gb_set(), %Defined fuctions + on_load=[] :: [{atom(),integer()}], %On-load function + on_load_line=0 :: integer(), %Line for on_load + clashes=[], %Exported functions named as BIFs + not_deprecated=[], %Not considered deprecated + func=[], %Current function + warn_format=0, %Warn format calls + enabled_warnings=[], %All enabled warnings (ordset). + errors=[], %Current errors + warnings=[], %Current warnings + global_vt=[], %The global VarTable + file = "" :: string(), %From last file attribute + recdef_top=false :: boolean(), %true in record initialisation + %outside any fun or lc + xqlc= false :: boolean(), %true if qlc.hrl included + new = false :: boolean(), %Has user-defined 'new/N' + called= [], %Called functions + usage = #usage{} :: #usage{}, + specs = dict:new() :: dict(), %Type specifications + types = dict:new() :: dict() %Type definitions + }). + +-type lint_state() :: #lint{}. + +%% format_error(Error) +%% Return a string describing the error. + +format_error(undefined_module) -> + "no module definition"; +format_error({bad_module_name, M}) -> + io_lib:format("bad module name '~s'", [M]); +format_error(redefine_module) -> + "redefining module"; +format_error(redefine_extends) -> + "redefining extends attribute"; +format_error(extends_self) -> + "cannot extend from self"; +%% format_error({redefine_mod_import, M, P}) -> +%% io_lib:format("module '~s' already imported from package '~s'", [M, P]); + +format_error(invalid_call) -> + "invalid function call"; +format_error(invalid_record) -> + "invalid record expression"; + +format_error({attribute,A}) -> + io_lib:format("attribute '~w' after function definitions", [A]); +format_error({missing_qlc_hrl,A}) -> + io_lib:format("qlc:q/~w called, but \"qlc.hrl\" not included", [A]); +format_error({redefine_import,{bif,{F,A},M}}) -> + io_lib:format("function ~w/~w already auto-imported from ~w", [F,A,M]); +format_error({redefine_import,{{F,A},M}}) -> + io_lib:format("function ~w/~w already imported from ~w", [F,A,M]); +format_error({bad_inline,{F,A}}) -> + io_lib:format("inlined function ~w/~w undefined", [F,A]); +format_error({invalid_deprecated,D}) -> + io_lib:format("badly formed deprecated attribute ~w", [D]); +format_error(invalid_extends) -> + "badly formed extends attribute"; +format_error(define_instance) -> + "defining instance function not allowed in abstract module"; +format_error({bad_deprecated,{F,A}}) -> + io_lib:format("deprecated function ~w/~w undefined or not exported", [F,A]); +format_error({bad_nowarn_unused_function,{F,A}}) -> + io_lib:format("function ~w/~w undefined", [F,A]); +format_error({bad_nowarn_bif_clash,{F,A}}) -> + io_lib:format("function ~w/~w undefined", [F,A]); +format_error({bad_nowarn_deprecated_function,{M,F,A}}) -> + io_lib:format("~w:~w/~w is not a deprecated function", [M,F,A]); +format_error({bad_on_load,Term}) -> + io_lib:format("badly formed on_load attribute: ~w", [Term]); +format_error(multiple_on_loads) -> + "more than one on_load attribute"; +format_error({bad_on_load_arity,{F,A}}) -> + io_lib:format("function ~w/~w has wrong arity (must be 0)", [F,A]); +format_error({undefined_on_load,{F,A}}) -> + io_lib:format("function ~w/~w undefined", [F,A]); + +format_error(export_all) -> + "export_all flag enabled - all functions will be exported"; +format_error({duplicated_export, {F,A}}) -> + io_lib:format("function ~w/~w already exported", [F,A]); +format_error({unused_import,{{F,A},M}}) -> + io_lib:format("import ~w:~w/~w is unused", [M,F,A]); +format_error({undefined_function,{F,A}}) -> + io_lib:format("function ~w/~w undefined", [F,A]); +format_error({redefine_function,{F,A}}) -> + io_lib:format("function ~w/~w already defined", [F,A]); +format_error({define_import,{F,A}}) -> + io_lib:format("defining imported function ~w/~w", [F,A]); +format_error({unused_function,{F,A}}) -> + io_lib:format("function ~w/~w is unused", [F,A]); +format_error({redefine_bif,{F,A}}) -> + io_lib:format("defining BIF ~w/~w", [F,A]); +format_error({call_to_redefined_bif,{F,A}}) -> + io_lib:format("call to ~w/~w will call erlang:~w/~w; " + "not ~w/~w in this module \n" + " (add an explicit module name to the call to avoid this error)", + [F,A,F,A,F,A]); + +format_error({deprecated, MFA, ReplacementMFA, Rel}) -> + io_lib:format("~s is deprecated and will be removed in ~s; use ~s", + [format_mfa(MFA), Rel, format_mfa(ReplacementMFA)]); +format_error({deprecated, {M1, F1, A1}, String}) when is_list(String) -> + io_lib:format("~p:~p/~p: ~s", [M1, F1, A1, String]); +format_error({removed, MFA, ReplacementMFA, Rel}) -> + io_lib:format("call to ~s will fail, since it was removed in ~s; " + "use ~s", [format_mfa(MFA), Rel, format_mfa(ReplacementMFA)]); +format_error({removed, MFA, String}) when is_list(String) -> + io_lib:format("~s: ~s", [format_mfa(MFA), String]); +format_error({obsolete_guard, {F, A}}) -> + io_lib:format("~p/~p obsolete", [F, A]); +format_error({reserved_for_future,K}) -> + io_lib:format("atom ~w: future reserved keyword - rename or quote", [K]); +%% --- patterns and guards --- +format_error(illegal_pattern) -> "illegal pattern"; +format_error(illegal_bin_pattern) -> + "binary patterns cannot be matched in parallel using '='"; +format_error(illegal_expr) -> "illegal expression"; +format_error(illegal_guard_expr) -> "illegal guard expression"; +%% --- exports --- +format_error({explicit_export,F,A}) -> + io_lib:format("in this release, the call to ~w/~w must be written " + "like this: erlang:~w/~w", + [F,A,F,A]); +%% --- records --- +format_error({undefined_record,T}) -> + io_lib:format("record ~w undefined", [T]); +format_error({redefine_record,T}) -> + io_lib:format("record ~w already defined", [T]); +format_error({redefine_field,T,F}) -> + io_lib:format("field ~w already defined in record ~w", [F,T]); +format_error({undefined_field,T,F}) -> + io_lib:format("field ~w undefined in record ~w", [F,T]); +format_error(illegal_record_info) -> + "illegal record info"; +format_error({field_name_is_variable,T,F}) -> + io_lib:format("field ~w is not an atom or _ in record ~w", [F,T]); +format_error({wildcard_in_update,T}) -> + io_lib:format("meaningless use of _ in update of record ~w", [T]); +format_error({unused_record,T}) -> + io_lib:format("record ~w is unused", [T]); +format_error({untyped_record,T}) -> + io_lib:format("record ~w has field(s) without type information", [T]); +%% --- variables ---- +format_error({unbound_var,V}) -> + io_lib:format("variable ~w is unbound", [V]); +format_error({unsafe_var,V,{What,Where}}) -> + io_lib:format("variable ~w unsafe in ~w ~s", + [V,What,format_where(Where)]); +format_error({exported_var,V,{What,Where}}) -> + io_lib:format("variable ~w exported from ~w ~s", + [V,What,format_where(Where)]); +format_error({shadowed_var,V,In}) -> + io_lib:format("variable ~w shadowed in ~w", [V,In]); +format_error({unused_var, V}) -> + io_lib:format("variable ~w is unused", [V]); +format_error({variable_in_record_def,V}) -> + io_lib:format("variable ~w in record definition", [V]); +%% --- binaries --- +format_error({undefined_bittype,Type}) -> + io_lib:format("bit type ~w undefined", [Type]); +format_error({bittype_mismatch,T1,T2,What}) -> + io_lib:format("bit type mismatch (~s) between ~p and ~p", [What,T1,T2]); +format_error(bittype_unit) -> + "a bit unit size must not be specified unless a size is specified too"; +format_error(illegal_bitsize) -> + "illegal bit size"; +format_error(unsized_binary_not_at_end) -> + "a binary field without size is only allowed at the end of a binary pattern"; +format_error(typed_literal_string) -> + "a literal string in a binary pattern must not have a type or a size"; +format_error(utf_bittype_size_or_unit) -> + "neither size nor unit must be given for segments of type utf8/utf16/utf32"; +format_error({bad_bitsize,Type}) -> + io_lib:format("bad ~s bit size", [Type]); +%% --- behaviours --- +format_error({conflicting_behaviours,{Name,Arity},B,FirstL,FirstB}) -> + io_lib:format("conflicting behaviours - callback ~w/~w required by both '~p' " + "and '~p' ~s", [Name,Arity,B,FirstB,format_where(FirstL)]); +format_error({undefined_behaviour_func, {Func,Arity}, Behaviour}) -> + io_lib:format("undefined callback function ~w/~w (behaviour '~w')", + [Func,Arity,Behaviour]); +format_error({undefined_behaviour,Behaviour}) -> + io_lib:format("behaviour ~w undefined", [Behaviour]); +format_error({undefined_behaviour_callbacks,Behaviour}) -> + io_lib:format("behaviour ~w callback functions are undefined", + [Behaviour]); +format_error({ill_defined_behaviour_callbacks,Behaviour}) -> + io_lib:format("behaviour ~w callback functions erroneously defined", + [Behaviour]); +%% --- types and specs --- +format_error({singleton_typevar, Name}) -> + io_lib:format("type variable ~w is only used once (is unbound)", [Name]); +format_error({type_ref, {TypeName, Arity}}) -> + io_lib:format("type ~w~s undefined", [TypeName, gen_type_paren(Arity)]); +format_error({unused_type, {TypeName, Arity}}) -> + io_lib:format("type ~w~s is unused", [TypeName, gen_type_paren(Arity)]); +format_error({new_builtin_type, {TypeName, Arity}}) -> + io_lib:format("type ~w~s is a new builtin type; " + "its (re)definition is allowed only until the next release", + [TypeName, gen_type_paren(Arity)]); +format_error({builtin_type, {TypeName, Arity}}) -> + io_lib:format("type ~w~s is a builtin type; it cannot be redefined", + [TypeName, gen_type_paren(Arity)]); +format_error({renamed_type, OldName, NewName}) -> + io_lib:format("type ~w() is now called ~w(); " + "please use the new name instead", [OldName, NewName]); +format_error({redefine_type, {TypeName, Arity}}) -> + io_lib:format("type ~w~s already defined", + [TypeName, gen_type_paren(Arity)]); +format_error({type_syntax, Constr}) -> + io_lib:format("bad ~w type", [Constr]); +format_error({redefine_spec, {M, F, A}}) -> + io_lib:format("spec for ~w:~w/~w already defined", [M, F, A]); +format_error({spec_fun_undefined, {M, F, A}}) -> + io_lib:format("spec for undefined function ~w:~w/~w", [M, F, A]); +format_error({missing_spec, {F,A}}) -> + io_lib:format("missing specification for function ~w/~w", [F, A]); +format_error(spec_wrong_arity) -> + "spec has the wrong arity"; +format_error({imported_predefined_type, Name}) -> + io_lib:format("referring to built-in type ~w as a remote type; " + "please take out the module name", [Name]); +%% --- obsolete? unused? --- +format_error({format_error, {Fmt, Args}}) -> + io_lib:format(Fmt, Args); +format_error({mnemosyne, What}) -> + "mnemosyne " ++ What ++ ", missing transformation". + +gen_type_paren(Arity) when is_integer(Arity), Arity >= 0 -> + gen_type_paren_1(Arity, ")"). + +gen_type_paren_1(0, Acc) -> "(" ++ Acc; +gen_type_paren_1(1, Acc) -> "(_" ++ Acc; +gen_type_paren_1(N, Acc) -> gen_type_paren_1(N - 1, ",_" ++ Acc). + +format_mfa({M, F, [_|_]=As}) -> + ","++ArityString = lists:append([[$,|integer_to_list(A)] || A <- As]), + format_mf(M, F, ArityString); +format_mfa({M, F, A}) when is_integer(A) -> + format_mf(M, F, integer_to_list(A)). + +format_mf(M, F, ArityString) when is_atom(M), is_atom(F) -> + atom_to_list(M) ++ ":" ++ atom_to_list(F) ++ "/" ++ ArityString. + +format_where(L) when is_integer(L) -> + io_lib:format("(line ~p)", [L]); +format_where({L,C}) when is_integer(L), is_integer(C) -> + io_lib:format("(line ~p, column ~p)", [L, C]). + +%% Local functions that are somehow automatically generated. + +pseudolocals() -> + [{module_info,0}, {module_info,1}, {record_info,2}]. + +%% +%% Used by erl_eval.erl to check commands. +%% +exprs(Exprs, BindingsList) -> + exprs_opt(Exprs, BindingsList, []). + +exprs_opt(Exprs, BindingsList, Opts) -> + {St0,Vs} = foldl(fun({{record,_SequenceNumber,_Name},Attr0}, {St1,Vs1}) -> + Attr = zip_file_and_line(Attr0, "none"), + {attribute_state(Attr, St1),Vs1}; + ({V,_}, {St1,Vs1}) -> + {St1,[{V,{bound,unused,[]}} | Vs1]} + end, {start("nofile",Opts),[]}, BindingsList), + Vt = orddict:from_list(Vs), + {_Evt,St} = exprs(zip_file_and_line(Exprs, "nofile"), Vt, St0), + return_status(St). + +used_vars(Exprs, BindingsList) -> + Vs = foldl(fun({{record,_SequenceNumber,_Name},_Attr}, Vs0) -> Vs0; + ({V,_Val}, Vs0) -> [{V,{bound,unused,[]}} | Vs0] + end, [], BindingsList), + Vt = orddict:from_list(Vs), + {Evt,_St} = exprs(zip_file_and_line(Exprs, "nofile"), Vt, start()), + {ok, foldl(fun({V,{_,used,_}}, L) -> [V | L]; + (_, L) -> L + end, [], Evt)}. + +%% module([Form]) -> +%% module([Form], FileName) -> +%% module([Form], FileName, [CompileOption]) -> +%% {ok,[Warning]} | {error,[Error],[Warning]} +%% Start processing a module. Define predefined functions and exports and +%% apply_lambda/2 has been called to shut lint up. N.B. these lists are +%% really all ordsets! + +module(Forms) -> + Opts = compiler_options(Forms), + St = forms(Forms, start("nofile", Opts)), + return_status(St). + +module(Forms, FileName) -> + Opts = compiler_options(Forms), + St = forms(Forms, start(FileName, Opts)), + return_status(St). + +module(Forms, FileName, Opts0) -> + %% We want the options given on the command line to take + %% precedence over options in the module. + Opts = compiler_options(Forms) ++ Opts0, + St = forms(Forms, start(FileName, Opts)), + return_status(St). + +compiler_options(Forms) -> + lists:flatten([C || {attribute,_,compile,C} <- Forms]). + +%% start() -> State +%% start(FileName, [Option]) -> State + +start() -> + start("nofile", []). + +start(File, Opts) -> + Enabled0 = + [{unused_vars, + bool_option(warn_unused_vars, nowarn_unused_vars, + true, Opts)}, + {export_all, + bool_option(warn_export_all, nowarn_export_all, + false, Opts)}, + {export_vars, + bool_option(warn_export_vars, nowarn_export_vars, + false, Opts)}, + {shadow_vars, + bool_option(warn_shadow_vars, nowarn_shadow_vars, + true, Opts)}, + {unused_import, + bool_option(warn_unused_import, nowarn_unused_import, + false, Opts)}, + {unused_function, + bool_option(warn_unused_function, nowarn_unused_function, + true, Opts)}, + {bif_clash, + bool_option(warn_bif_clash, nowarn_bif_clash, + true, Opts)}, + {unused_record, + bool_option(warn_unused_record, nowarn_unused_record, + true, Opts)}, + {deprecated_function, + bool_option(warn_deprecated_function, nowarn_deprecated_function, + true, Opts)}, + {obsolete_guard, + bool_option(warn_obsolete_guard, nowarn_obsolete_guard, + true, Opts)}, + {untyped_record, + bool_option(warn_untyped_record, nowarn_untyped_record, + false, Opts)}, + {missing_spec, + bool_option(warn_missing_spec, nowarn_missing_spec, + false, Opts)}, + {missing_spec_all, + bool_option(warn_missing_spec_all, nowarn_missing_spec_all, + false, Opts)} + ], + Enabled1 = [Category || {Category,true} <- Enabled0], + Enabled = ordsets:from_list(Enabled1), + Calls = case ordsets:is_element(unused_function, Enabled) of + true -> + dict:from_list([{{module_info,1},pseudolocals()}]); + false -> + undefined + end, + #lint{state = start, + exports = gb_sets:from_list([{module_info,0},{module_info,1}]), + mod_imports = dict:from_list([{erlang,erlang}]), + compile = Opts, + %% Internal pseudo-functions must appear as defined/reached. + defined = gb_sets:from_list(pseudolocals()), + called = [{F,0} || F <- pseudolocals()], + usage = #usage{calls=Calls}, + warn_format = value_option(warn_format, 1, warn_format, 1, + nowarn_format, 0, Opts), + enabled_warnings = Enabled, + file = File, + types = default_types() + }. + +%% is_warn_enabled(Category, St) -> boolean(). +%% Check whether a warning of category Category is enabled. +is_warn_enabled(Type, #lint{enabled_warnings=Enabled}) -> + ordsets:is_element(Type, Enabled). + +%% return_status(State) -> +%% {ok,[Warning]} | {error,[Error],[Warning]} +%% Pack errors and warnings properly and return ok | error. + +return_status(St) -> + Ws = pack_warnings(St#lint.warnings), + case pack_errors(St#lint.errors) of + [] -> {ok,Ws}; + Es -> {error,Es,Ws} + end. + +%% pack_errors([{File,ErrD}]) -> [{File,[ErrD]}]. +%% Sort on (reversed) insertion order. + +pack_errors(Es) -> + {Es1,_} = mapfoldl(fun ({File,E}, I) -> {{File,{I,E}}, I-1} end, -1, Es), + map(fun ({File,EIs}) -> {File, map(fun ({_I,E}) -> E end, EIs)} end, + pack_warnings(Es1)). + +%% pack_warnings([{File,ErrD}]) -> [{File,[ErrD]}] +%% Sort on line number. + +pack_warnings(Ws) -> + [{File,lists:sort([W || {F,W} <- Ws, F =:= File])} || + File <- lists:usort([F || {F,_} <- Ws])]. + +%% add_error(ErrorDescriptor, State) -> State' +%% add_error(Line, Error, State) -> State' +%% add_warning(ErrorDescriptor, State) -> State' +%% add_warning(Line, Error, State) -> State' + +add_error(E, St) -> St#lint{errors=[{St#lint.file,E}|St#lint.errors]}. + +add_error(FileLine, E, St) -> + {File,Location} = loc(FileLine), + add_error({Location,erl_lint,E}, St#lint{file = File}). + +add_warning(W, St) -> St#lint{warnings=[{St#lint.file,W}|St#lint.warnings]}. + +add_warning(FileLine, W, St) -> + {File,Location} = loc(FileLine), + add_warning({Location,erl_lint,W}, St#lint{file = File}). + +loc(L) -> + case erl_parse:get_attribute(L, location) of + {location,{{File,Line},Column}} -> + {File,{Line,Column}}; + {location,{File,Line}} -> + {File,Line} + end. + +%% forms([Form], State) -> State' + +forms(Forms0, St0) -> + Forms = eval_file_attribute(Forms0, St0), + %% Line numbers are from now on pairs {File,Line}. + St1 = includes_qlc_hrl(Forms, St0), + St2 = bif_clashes(Forms, St1), + St3 = not_deprecated(Forms, St2), + St4 = foldl(fun form/2, pre_scan(Forms, St3), Forms), + post_traversal_check(Forms, St4). + +pre_scan([{function,_L,new,_A,_Cs} | Fs], St) -> + pre_scan(Fs, St#lint{new=true}); +pre_scan([{attribute,_L,extends,M} | Fs], St) when is_atom(M) -> + pre_scan(Fs, St#lint{extends=true}); +pre_scan([{attribute,L,compile,C} | Fs], St) -> + case is_warn_enabled(export_all, St) andalso + member(export_all, lists:flatten([C])) of + true -> + pre_scan(Fs, add_warning(L, export_all, St)); + false -> + pre_scan(Fs, St) + end; +pre_scan([_ | Fs], St) -> + pre_scan(Fs, St); +pre_scan([], St) -> + St. + +includes_qlc_hrl(Forms, St) -> + %% QLC calls erl_lint several times, sometimes with the compile + %% attribute removed. The file attribute, however, is left as is. + QH = [File || {attribute,_,file,{File,_line}} <- Forms, + filename:basename(File) =:= "qlc.hrl"], + St#lint{xqlc = QH =/= []}. + +eval_file_attribute(Forms, St) -> + eval_file_attr(Forms, St#lint.file). + +eval_file_attr([{attribute,_L,file,{File,_Line}}=Form | Forms], _File) -> + [Form | eval_file_attr(Forms, File)]; +eval_file_attr([Form0 | Forms], File) -> + Form = zip_file_and_line(Form0, File), + [Form | eval_file_attr(Forms, File)]; +eval_file_attr([], _File) -> + []. + +zip_file_and_line(T, File) -> + F0 = fun(Line) -> {File,Line} end, + F = fun(L) -> erl_parse:set_line(L, F0) end, + modify_line(T, F). + +%% form(Form, State) -> State' +%% Check a form returning the updated State. Handle generic cases here. + +form({error,E}, St) -> add_error(E, St); +form({warning,W}, St) -> add_warning(W, St); +form({attribute,_L,file,{File,_Line}}, St) -> + St#lint{file = File}; +form({attribute,_L,compile,_}, St) -> + St; +form(Form, #lint{state=State}=St) -> + case State of + start -> start_state(Form, St); + attribute -> attribute_state(Form, St); + function -> function_state(Form, St) + end. + +%% start_state(Form, State) -> State' + +start_state({attribute,L,module,{M,Ps}}, St) -> + St1 = set_module(M, L, St), + Arity = length(Ps), + Ps1 = if is_atom(St1#lint.extends) -> + ['BASE', 'THIS' | Ps]; + true -> + ['THIS' | Ps] + end, + Vt = orddict:from_list([{V, {bound, used, []}} || V <- Ps1]), + St2 = add_instance(Arity, St1), + St3 = ensure_new(Arity, St2), + St3#lint{state=attribute, extends=[], global_vt=Vt}; +start_state({attribute,L,module,M}, St) -> + St1 = set_module(M, L, St), + St1#lint{state=attribute, extends=[]}; +start_state(Form, St) -> + St1 = add_error(element(2, Form), undefined_module, St), + attribute_state(Form, St1#lint{state=attribute, extends=[]}). + +set_module(M, L, St) -> + M1 = package_to_string(M), + case packages:is_valid(M1) of + true -> + St#lint{module=list_to_atom(M1), + package=packages:strip_last(M1)}; + false -> + add_error(L, {bad_module_name, M1}, St) + end. + +ensure_new(Arity, St) -> + case St#lint.new of + true -> + St; + false -> + add_func(new, Arity, St) + end. + +add_instance(Arity, St) -> + A = Arity + (if is_atom(St#lint.extends) -> 1; true -> 0 end), + add_func(instance, A, St). + +add_func(Name, Arity, St) -> + F = {Name, Arity}, + St#lint{exports = gb_sets:add_element(F, St#lint.exports), + defined = gb_sets:add_element(F, St#lint.defined)}. + +%% attribute_state(Form, State) -> +%% State' + +attribute_state({attribute,_L,module,_M}, #lint{module=[]}=St) -> + St; +attribute_state({attribute,L,module,_M}, St) -> + add_error(L, redefine_module, St); +attribute_state({attribute,L,extends,M}, #lint{module=M}=St) when is_atom(M) -> + add_error(L, extends_self, St); +attribute_state({attribute,_L,extends,M}, #lint{extends=[]}=St) + when is_atom(M) -> + St#lint{extends=M}; +attribute_state({attribute,L,extends,M}, St) when is_atom(M) -> + add_error(L, redefine_extends, St); +attribute_state({attribute,L,extends,_M}, St) -> + add_error(L, invalid_extends, St); +attribute_state({attribute,L,export,Es}, St) -> + export(L, Es, St); +attribute_state({attribute,L,import,Is}, St) -> + import(L, Is, St); +attribute_state({attribute,L,record,{Name,Fields}}, St) -> + record_def(L, Name, Fields, St); +attribute_state({attribute,La,behaviour,Behaviour}, St) -> + St#lint{behaviour=St#lint.behaviour ++ [{La,Behaviour}]}; +attribute_state({attribute,La,behavior,Behaviour}, St) -> + St#lint{behaviour=St#lint.behaviour ++ [{La,Behaviour}]}; +attribute_state({attribute,L,type,{TypeName,TypeDef,Args}}, St) -> + type_def(type, L, TypeName, TypeDef, Args, St); +attribute_state({attribute,L,opaque,{TypeName,TypeDef,Args}}, St) -> + type_def(opaque, L, TypeName, TypeDef, Args, St); +attribute_state({attribute,L,spec,{Fun,Types}}, St) -> + spec_decl(L, Fun, Types, St); +attribute_state({attribute,L,on_load,Val}, St) -> + on_load(L, Val, St); +attribute_state({attribute,_L,_Other,_Val}, St) -> % Ignore others + St; +attribute_state(Form, St) -> + function_state(Form, St#lint{state=function}). + +%% function_state(Form, State) -> +%% State' +%% Allow for record, type and opaque type definitions and spec +%% declarations to be intersperced within function definitions. + +function_state({attribute,L,record,{Name,Fields}}, St) -> + record_def(L, Name, Fields, St); +function_state({attribute,L,type,{TypeName,TypeDef,Args}}, St) -> + type_def(type, L, TypeName, TypeDef, Args, St); +function_state({attribute,L,opaque,{TypeName,TypeDef,Args}}, St) -> + type_def(opaque, L, TypeName, TypeDef, Args, St); +function_state({attribute,L,spec,{Fun,Types}}, St) -> + spec_decl(L, Fun, Types, St); +function_state({attribute,La,Attr,_Val}, St) -> + add_error(La, {attribute,Attr}, St); +function_state({function,L,N,A,Cs}, St) -> + function(L, N, A, Cs, St); +function_state({rule,L,_N,_A,_Cs}, St) -> + add_error(L, {mnemosyne,"rule"}, St); +function_state({eof,L}, St) -> eof(L, St). + +%% eof(LastLine, State) -> +%% State' + +eof(_Line, St0) -> + St0. + +%% bif_clashes(Forms, State0) -> State. + +bif_clashes(Forms, St) -> + Nowarn = nowarn_function(nowarn_bif_clash, St#lint.compile), + Clashes0 = [{Name,Arity} || {function,_L,Name,Arity,_Cs} <- Forms, + erl_internal:bif(Name, Arity)], + Clashes = ordsets:subtract(ordsets:from_list(Clashes0), Nowarn), + St#lint{clashes=Clashes}. + +-spec is_bif_clash(atom(), byte(), lint_state()) -> boolean(). + +is_bif_clash(_Name, _Arity, #lint{clashes=[]}) -> + false; +is_bif_clash(Name, Arity, #lint{clashes=Clashes}) -> + ordsets:is_element({Name,Arity}, Clashes). + +%% not_deprecated(Forms, State0) -> State + +not_deprecated(Forms, St0) -> + %% There are no line numbers in St0#lint.compile. + MFAsL = [{MFA,L} || + {attribute, L, compile, Args} <- Forms, + {nowarn_deprecated_function, MFAs0} <- lists:flatten([Args]), + MFA <- lists:flatten([MFAs0])], + Nowarn = [MFA || {MFA,_L} <- MFAsL], + Bad = [MFAL || {{M,F,A},_L}=MFAL <- MFAsL, + otp_internal:obsolete(M, F, A) =:= no], + St1 = func_line_warning(bad_nowarn_deprecated_function, Bad, St0), + St1#lint{not_deprecated = ordsets:from_list(Nowarn)}. + +%% post_traversal_check(Forms, State0) -> State. +%% Do some further checking after the forms have been traversed and +%% data about calls etc. have been collected. + +post_traversal_check(Forms, St0) -> + St1 = check_behaviour(St0), + St2 = check_deprecated(Forms, St1), + St3 = check_imports(Forms, St2), + St4 = check_inlines(Forms, St3), + St5 = check_undefined_functions(St4), + St6 = check_unused_functions(Forms, St5), + St7 = check_bif_clashes(Forms, St6), + St8 = check_specs_without_function(St7), + St9 = check_functions_without_spec(Forms, St8), + StA = check_unused_types(Forms, St9), + StB = check_untyped_records(Forms, StA), + StC = check_on_load(StB), + check_unused_records(Forms, StC). + +%% check_behaviour(State0) -> State +%% Check that the behaviour attribute is valid. + +check_behaviour(St0) -> + behaviour_check(St0#lint.behaviour, St0). + +%% behaviour_check([{Line,Behaviour}], State) -> State' +%% Check behaviours for existence and defined functions. + +behaviour_check(Bs, St0) -> + {AllBfs,St1} = all_behaviour_callbacks(Bs, [], St0), + St = behaviour_missing_callbacks(AllBfs, St1), + behaviour_conflicting(AllBfs, St). + +all_behaviour_callbacks([{Line,B}|Bs], Acc, St0) -> + {Bfs0,St} = behaviour_callbacks(Line, B, St0), + all_behaviour_callbacks(Bs, [{{Line,B},Bfs0}|Acc], St); +all_behaviour_callbacks([], Acc, St) -> {reverse(Acc),St}. + +behaviour_callbacks(Line, B, St0) -> + try B:behaviour_info(callbacks) of + Funcs when is_list(Funcs) -> + All = all(fun({FuncName, Arity}) -> + is_atom(FuncName) andalso is_integer(Arity); + (_Other) -> + false + end, + Funcs), + if + All =:= true -> + {Funcs, St0}; + true -> + St1 = add_warning(Line, + {ill_defined_behaviour_callbacks,B}, + St0), + {[], St1} + end; + undefined -> + St1 = add_warning(Line, {undefined_behaviour_callbacks,B}, St0), + {[], St1}; + _Other -> + St1 = add_warning(Line, {ill_defined_behaviour_callbacks,B}, St0), + {[], St1} + catch + _:_ -> + St1 = add_warning(Line, {undefined_behaviour,B}, St0), + {[], St1} + end. + +behaviour_missing_callbacks([{{Line,B},Bfs}|T], #lint{exports=Exp}=St0) -> + Missing = ordsets:subtract(ordsets:from_list(Bfs), gb_sets:to_list(Exp)), + St = foldl(fun (F, S0) -> + add_warning(Line, {undefined_behaviour_func,F,B}, S0) + end, St0, Missing), + behaviour_missing_callbacks(T, St); +behaviour_missing_callbacks([], St) -> St. + +behaviour_conflicting(AllBfs, St) -> + R0 = sofs:relation(AllBfs, [{item,[callback]}]), + R1 = sofs:family_to_relation(R0), + R2 = sofs:converse(R1), + R3 = sofs:relation_to_family(R2), + R4 = sofs:family_specification(fun(S) -> sofs:no_elements(S) > 1 end, R3), + R = sofs:to_external(R4), + behaviour_add_conflicts(R, St). + +behaviour_add_conflicts([{Cb,[{FirstLoc,FirstB}|Cs]}|T], St0) -> + FirstL = element(2, loc(FirstLoc)), + St = behaviour_add_conflict(Cs, Cb, FirstL, FirstB, St0), + behaviour_add_conflicts(T, St); +behaviour_add_conflicts([], St) -> St. + +behaviour_add_conflict([{Line,B}|Cs], Cb, FirstL, FirstB, St0) -> + St = add_warning(Line, {conflicting_behaviours,Cb,B,FirstL,FirstB}, St0), + behaviour_add_conflict(Cs, Cb, FirstL, FirstB, St); +behaviour_add_conflict([], _, _, _, St) -> St. + +%% check_deprecated(Forms, State0) -> State + +check_deprecated(Forms, St0) -> + %% Get the correct list of exported functions. + Exports = case member(export_all, St0#lint.compile) of + true -> St0#lint.defined; + false -> St0#lint.exports + end, + X = gb_sets:to_list(Exports), + #lint{module = Mod} = St0, + Bad = [{E,L} || {attribute, L, deprecated, Depr} <- Forms, + D <- lists:flatten([Depr]), + E <- depr_cat(D, X, Mod)], + foldl(fun ({E,L}, St1) -> + add_error(L, E, St1) + end, St0, Bad). + +depr_cat({F, A, Flg}=D, X, Mod) -> + case deprecated_flag(Flg) of + false -> [{invalid_deprecated,D}]; + true -> depr_fa(F, A, X, Mod) + end; +depr_cat({F, A}, X, Mod) -> + depr_fa(F, A, X, Mod); +depr_cat(module, _X, _Mod) -> + []; +depr_cat(D, _X, _Mod) -> + [{invalid_deprecated,D}]. + +depr_fa('_', '_', _X, _Mod) -> + []; +depr_fa(F, '_', X, _Mod) when is_atom(F) -> + %% Don't use this syntax for built-in functions. + case lists:filter(fun({F1,_}) -> F1 =:= F end, X) of + [] -> [{bad_deprecated,{F,'_'}}]; + _ -> [] + end; +depr_fa(F, A, X, Mod) when is_atom(F), is_integer(A), A >= 0 -> + case lists:member({F,A}, X) of + true -> []; + false -> + case erlang:is_builtin(Mod, F, A) of + true -> []; + false -> [{bad_deprecated,{F,A}}] + end + end; +depr_fa(F, A, _X, _Mod) -> + [{invalid_deprecated,{F,A}}]. + +deprecated_flag(next_version) -> true; +deprecated_flag(next_major_release) -> true; +deprecated_flag(eventually) -> true; +deprecated_flag(_) -> false. + +%% check_imports(Forms, State0) -> State + +check_imports(Forms, St0) -> + case is_warn_enabled(unused_import, St0) of + false -> + St0; + true -> + Usage = St0#lint.usage, + Unused = ordsets:subtract(St0#lint.imports, Usage#usage.imported), + Imports = [{{FA,list_to_atom(package_to_string(Mod))},L} + || {attribute,L,import,{Mod,Fs}} <- Forms, + FA <- lists:usort(Fs)], + Bad = [{FM,L} || FM <- Unused, {FM2,L} <- Imports, FM =:= FM2], + func_line_warning(unused_import, Bad, St0) + end. + +%% check_inlines(Forms, State0) -> State + +check_inlines(Forms, St0) -> + check_option_functions(Forms, inline, bad_inline, St0). + +%% check_unused_functions(Forms, State0) -> State + +check_unused_functions(Forms, St0) -> + St1 = check_option_functions(Forms, nowarn_unused_function, + bad_nowarn_unused_function, St0), + Opts = St1#lint.compile, + case member(export_all, Opts) orelse + not is_warn_enabled(unused_function, St1) of + true -> + St1; + false -> + Nowarn = nowarn_function(nowarn_unused_function, Opts), + Usage = St1#lint.usage, + Used = reached_functions(initially_reached(St1), + Usage#usage.calls), + UsedOrNowarn = ordsets:union(Used, Nowarn), + Unused = ordsets:subtract(gb_sets:to_list(St1#lint.defined), + UsedOrNowarn), + Functions = [{{N,A},L} || {function,L,N,A,_} <- Forms], + Bad = [{FA,L} || FA <- Unused, {FA2,L} <- Functions, FA =:= FA2], + func_line_warning(unused_function, Bad, St1) + end. + +initially_reached(#lint{exports=Exp,on_load=OnLoad}) -> + OnLoad ++ gb_sets:to_list(Exp). + +%% reached_functions(RootSet, CallRef) -> [ReachedFunc]. +%% reached_functions(RootSet, CallRef, [ReachedFunc]) -> [ReachedFunc]. + +reached_functions(Root, Ref) -> + reached_functions(Root, [], Ref, gb_sets:empty()). + +reached_functions([R|Rs], More0, Ref, Reached0) -> + case gb_sets:is_element(R, Reached0) of + true -> reached_functions(Rs, More0, Ref, Reached0); + false -> + Reached = gb_sets:add_element(R, Reached0), %It IS reached + case dict:find(R, Ref) of + {ok,More} -> reached_functions(Rs, [More|More0], Ref, Reached); + error -> reached_functions(Rs, More0, Ref, Reached) + end + end; +reached_functions([], [_|_]=More, Ref, Reached) -> + reached_functions(lists:append(More), [], Ref, Reached); +reached_functions([], [], _Ref, Reached) -> gb_sets:to_list(Reached). + +%% check_undefined_functions(State0) -> State + +check_undefined_functions(#lint{called=Called0,defined=Def0}=St0) -> + Called = sofs:relation(Called0, [{func,location}]), + Def = sofs:from_external(gb_sets:to_list(Def0), [func]), + Undef = sofs:to_external(sofs:drestriction(Called, Def)), + foldl(fun ({NA,L}, St) -> + add_error(L, {undefined_function,NA}, St) + end, St0, Undef). + +%% check_bif_clashes(Forms, State0) -> State + +check_bif_clashes(Forms, St0) -> + %% St0#lint.defined is now complete. + check_option_functions(Forms, nowarn_bif_clash, + bad_nowarn_bif_clash, St0). + +check_option_functions(Forms, Tag0, Type, St0) -> + %% There are no line numbers in St0#lint.compile. + FAsL = [{FA,L} || {attribute, L, compile, Args} <- Forms, + {Tag, FAs0} <- lists:flatten([Args]), + Tag0 =:= Tag, + FA <- lists:flatten([FAs0])], + DefFunctions = gb_sets:to_list(St0#lint.defined) -- pseudolocals(), + Bad = [{FA,L} || {FA,L} <- FAsL, not member(FA, DefFunctions)], + func_line_error(Type, Bad, St0). + +nowarn_function(Tag, Opts) -> + ordsets:from_list([FA || {Tag1,FAs} <- Opts, + Tag1 =:= Tag, + FA <- lists:flatten([FAs])]). + +func_line_warning(Type, Fs, St) -> + foldl(fun ({F,Line}, St0) -> add_warning(Line, {Type,F}, St0) end, St, Fs). + +func_line_error(Type, Fs, St) -> + foldl(fun ({F,Line}, St0) -> add_error(Line, {Type,F}, St0) end, St, Fs). + +check_untyped_records(Forms, St0) -> + case is_warn_enabled(untyped_record, St0) of + true -> + %% One possibility is to use the names of all records + %% RecNames = dict:fetch_keys(St0#lint.records), + %% but I think it's better to keep those that are used by the file + Usage = St0#lint.usage, + UsedRecNames = sets:to_list(Usage#usage.used_records), + %% these are the records with field(s) containing type info + TRecNames = [Name || + {attribute,_,type,{{record,Name},Fields,_}} <- Forms, + lists:all(fun ({typed_record_field,_,_}) -> true; + (_) -> false + end, Fields)], + foldl(fun (N, St) -> + {L, Fields} = dict:fetch(N, St0#lint.records), + case Fields of + [] -> St; % exclude records with no fields + [_|_] -> add_warning(L, {untyped_record, N}, St) + end + end, St0, UsedRecNames -- TRecNames); + false -> + St0 + end. + +check_unused_records(Forms, St0) -> + AttrFiles = [File || {attribute,_L,file,{File,_Line}} <- Forms], + case {is_warn_enabled(unused_record, St0),AttrFiles} of + {true,[FirstFile|_]} -> + %% The check is a bit imprecise in that uses from unused + %% functions count. + Usage = St0#lint.usage, + UsedRecords = sets:to_list(Usage#usage.used_records), + URecs = foldl(fun (Used, Recs) -> + dict:erase(Used, Recs) + end, St0#lint.records, UsedRecords), + Unused = [{Name,FileLine} || + {Name,{FileLine,_Fields}} <- dict:to_list(URecs), + element(1, loc(FileLine)) =:= FirstFile], + foldl(fun ({N,L}, St) -> + add_warning(L, {unused_record, N}, St) + end, St0, Unused); + _ -> + St0 + end. + +%% For storing the import list we use the orddict module. +%% We know an empty set is []. + +%% export(Line, Exports, State) -> State. +%% Mark functions as exported, also as called from the export line. + +export(Line, Es, #lint{exports = Es0, called = Called} = St0) -> + {Es1,C1,St1} = + foldl(fun (NA, {E,C,St2}) -> + St = case gb_sets:is_element(NA, E) of + true -> + add_warning(Line, {duplicated_export, NA}, St2); + false -> + St2 + end, + {gb_sets:add_element(NA, E), [{NA,Line}|C], St} + end, + {Es0,Called,St0}, Es), + St1#lint{exports = Es1, called = C1}. + +%% import(Line, Imports, State) -> State. +%% imported(Name, Arity, State) -> {yes,Module} | no. + +import(Line, {Mod,Fs}, St) -> + Mod1 = package_to_string(Mod), + case packages:is_valid(Mod1) of + true -> + Mfs = ordsets:from_list(Fs), + case check_imports(Line, Mfs, St#lint.imports) of + [] -> + St#lint{imports=add_imports(list_to_atom(Mod1), Mfs, + St#lint.imports)}; + Efs -> + foldl(fun (Ef, St0) -> + add_error(Line, {redefine_import,Ef}, + St0) + end, + St, Efs) + end; + false -> + add_error(Line, {bad_module_name, Mod1}, St) + end; +import(Line, Mod, St) -> + Mod1 = package_to_string(Mod), + case packages:is_valid(Mod1) of + true -> + Key = list_to_atom(packages:last(Mod1)), + Imps = St#lint.mod_imports, +%%% case dict:is_key(Key, Imps) of +%%% true -> +%%% M = packages:last(Mod1), +%%% P = packages:strip_last(Mod1), +%%% add_error(Line, {redefine_mod_import, M, P}, St); +%%% false -> +%%% St#lint{mod_imports = +%%% dict:store(Key, list_to_atom(Mod1), Imps)} +%%% end; + St#lint{mod_imports = dict:store(Key, list_to_atom(Mod1), + Imps)}; + false -> + add_error(Line, {bad_module_name, Mod1}, St) + end. + +check_imports(_Line, Fs, Is) -> + foldl(fun (F, Efs) -> + case orddict:find(F, Is) of + {ok,Mod} -> [{F,Mod}|Efs]; + error -> + {N,A} = F, + case erl_internal:bif(N, A) of + true -> + [{bif,F,erlang}|Efs]; + false -> + Efs + end + end end, [], Fs). + +add_imports(Mod, Fs, Is) -> + foldl(fun (F, Is0) -> orddict:store(F, Mod, Is0) end, Is, Fs). + +imported(F, A, St) -> + case orddict:find({F,A}, St#lint.imports) of + {ok,Mod} -> {yes,Mod}; + error -> no + end. + +%% on_load(Line, Val, State) -> State. +%% Check an on_load directive and remember it. + +on_load(Line, {Name,Arity}=Fa, #lint{on_load=OnLoad0}=St0) + when is_atom(Name), is_integer(Arity) -> + %% Always add the function name (even if there is a problem), + %% to avoid irrelevant warnings for unused functions. + St = St0#lint{on_load=[Fa|OnLoad0],on_load_line=Line}, + case St of + #lint{on_load=[{_,0}]} -> + %% This is the first on_load attribute seen in the module + %% and it has the correct arity. + St; + #lint{on_load=[{_,_}]} -> + %% Wrong arity. + add_error(Line, {bad_on_load_arity,Fa}, St); + #lint{on_load=[_,_|_]} -> + %% Multiple on_load attributes. + add_error(Line, multiple_on_loads, St) + end; +on_load(Line, Val, St) -> + %% Bad syntax. + add_error(Line, {bad_on_load,Val}, St). + +check_on_load(#lint{defined=Defined,on_load=[{_,0}=Fa], + on_load_line=Line}=St) -> + case gb_sets:is_member(Fa, Defined) of + true -> St; + false -> add_error(Line, {undefined_on_load,Fa}, St) + end; +check_on_load(St) -> St. + +%% call_function(Line, Name, Arity, State) -> State. +%% Add to both called and calls. + +call_function(Line, F, A, #lint{usage=Usage0,called=Cd,func=Func}=St) -> + #usage{calls = Cs} = Usage0, + NA = {F,A}, + Usage = case Cs of + undefined -> Usage0; + _ -> Usage0#usage{calls=dict:append(Func, NA, Cs)} + end, + St#lint{called=[{NA,Line}|Cd], usage=Usage}. + +%% is_function_exported(Name, Arity, State) -> false|true. + +is_function_exported(Name, Arity, #lint{exports=Exports,compile=Compile}) -> + gb_sets:is_element({Name,Arity}, Exports) orelse + member(export_all, Compile). + +%% function(Line, Name, Arity, Clauses, State) -> State. + +function(Line, instance, _Arity, _Cs, St) when St#lint.global_vt =/= [] -> + add_error(Line, define_instance, St); +function(Line, Name, Arity, Cs, St0) -> + St1 = define_function(Line, Name, Arity, St0#lint{func={Name,Arity}}), + clauses(Cs, St1#lint.global_vt, St1). + +%% define_function(Line, Name, Arity, State) -> State. + +define_function(Line, Name, Arity, St0) -> + St1 = keyword_warning(Line, Name, St0), + NA = {Name,Arity}, + case gb_sets:is_member(NA, St1#lint.defined) of + true -> + add_error(Line, {redefine_function,NA}, St1); + false -> + St2 = St1#lint{defined=gb_sets:add_element(NA, St1#lint.defined)}, + St = case erl_internal:bif(Name, Arity) andalso + not is_function_exported(Name, Arity, St2) of + true -> add_warning(Line, {redefine_bif,NA}, St2); + false -> St2 + end, + case imported(Name, Arity, St) of + {yes,_M} -> add_error(Line, {define_import,NA}, St); + no -> St + end + end. + +%% clauses([Clause], VarTable, State) -> {VarTable, State}. + +clauses(Cs, Vt, St) -> + foldl(fun (C, St0) -> + {_,St1} = clause(C, Vt, St0), + St1 + end, St, Cs). + +clause({clause,_Line,H,G,B}, Vt0, St0) -> + {Hvt,Binvt,St1} = head(H, Vt0, St0), + %% Cannot ignore BinVt since "binsize variables" may have been used. + Vt1 = vtupdate(Hvt, vtupdate(Binvt, Vt0)), + {Gvt,St2} = guard(G, Vt1, St1), + Vt2 = vtupdate(Gvt, Vt1), + {Bvt,St3} = exprs(B, Vt2, St2), + Upd = vtupdate(Bvt, Vt2), + check_unused_vars(Upd, Vt0, St3). + +%% head([HeadPattern], VarTable, State) -> +%% {VarTable,BinVarTable,State} +%% Check a patterns in head returning "all" variables. Not updating the +%% known variable list will result in multiple error messages/warnings. + +head(Ps, Vt, St0) -> + head(Ps, Vt, Vt, St0). % Old = Vt + +head([P|Ps], Vt, Old, St0) -> + {Pvt,Bvt1,St1} = pattern(P, Vt, Old, [], St0), + {Psvt,Bvt2,St2} = head(Ps, Vt, Old, St1), + {vtmerge_pat(Pvt, Psvt),vtmerge_pat(Bvt1,Bvt2),St2}; +head([], _Vt, _Env, St) -> {[],[],St}. + +%% pattern(Pattern, VarTable, Old, BinVarTable, State) -> +%% {UpdVarTable,BinVarTable,State}. +%% Check pattern return variables. Old is the set of variables used for +%% deciding whether an occurrence is a binding occurrence or a use, and +%% VarTable is the set of variables used for arguments to binary +%% patterns. UpdVarTable is updated when same variable in VarTable is +%% used in the size part of a bit segment. All other information about +%% used variables are recorded in BinVarTable. The caller can then decide +%% what to do with it depending on whether variables in the pattern shadow +%% variabler or not. This separation is one way of dealing with these: +%% A = 4, fun(<>) -> % A #2 unused +%% A = 4, fun(<>) -> % A #1 unused + +pattern(P, Vt, St) -> + pattern(P, Vt, Vt, [], St). % Old = Vt + +pattern({var,_Line,'_'}, _Vt, _Old, _Bvt, St) -> + {[],[],St}; %Ignore anonymous variable +pattern({var,Line,V}, _Vt, Old, Bvt, St) -> + pat_var(V, Line, Old, Bvt, St); +pattern({char,_Line,_C}, _Vt, _Old, _Bvt, St) -> {[],[],St}; +pattern({integer,_Line,_I}, _Vt, _Old, _Bvt, St) -> {[],[],St}; +pattern({float,_Line,_F}, _Vt, _Old, _Bvt, St) -> {[],[],St}; +pattern({atom,Line,A}, _Vt, _Old, _Bvt, St) -> + {[],[],keyword_warning(Line, A, St)}; +pattern({string,_Line,_S}, _Vt, _Old, _Bvt, St) -> {[],[],St}; +pattern({nil,_Line}, _Vt, _Old, _Bvt, St) -> {[],[],St}; +pattern({cons,_Line,H,T}, Vt, Old, Bvt, St0) -> + {Hvt,Bvt1,St1} = pattern(H, Vt, Old, Bvt, St0), + {Tvt,Bvt2,St2} = pattern(T, Vt, Old, Bvt, St1), + {vtmerge_pat(Hvt, Tvt),vtmerge_pat(Bvt1,Bvt2),St2}; +pattern({tuple,_Line,Ps}, Vt, Old, Bvt, St) -> + pattern_list(Ps, Vt, Old, Bvt, St); +%%pattern({struct,_Line,_Tag,Ps}, Vt, Old, Bvt, St) -> +%% pattern_list(Ps, Vt, Old, Bvt, St); +pattern({record_index,Line,Name,Field}, _Vt, _Old, _Bvt, St) -> + {Vt1,St1} = + check_record(Line, Name, St, + fun (Dfs, St1) -> + pattern_field(Field, Name, Dfs, St1) + end), + {Vt1,[],St1}; +pattern({record_field,Line,_,_}=M, _Vt, _Old, _Bvt, St0) -> + case expand_package(M, St0) of + {error, St1} -> + {[],[],add_error(Line, illegal_expr, St1)}; + {_, St1} -> + {[],[],St1} + end; +pattern({record,Line,Name,Pfs}, Vt, Old, Bvt, St) -> + case dict:find(Name, St#lint.records) of + {ok,{_Line,Fields}} -> + St1 = used_record(Name, St), + pattern_fields(Pfs, Name, Fields, Vt, Old, Bvt, St1); + error -> {[],[],add_error(Line, {undefined_record,Name}, St)} + end; +pattern({bin,_,Fs}, Vt, Old, Bvt, St) -> + pattern_bin(Fs, Vt, Old, Bvt, St); +pattern({op,_Line,'++',{nil,_},R}, Vt, Old, Bvt, St) -> + pattern(R, Vt, Old, Bvt, St); +pattern({op,_Line,'++',{cons,Li,{char,_L2,_C},T},R}, Vt, Old, Bvt, St) -> + pattern({op,Li,'++',T,R}, Vt, Old, Bvt, St); %Char unimportant here +pattern({op,_Line,'++',{cons,Li,{integer,_L2,_I},T},R}, Vt, Old, Bvt, St) -> + pattern({op,Li,'++',T,R}, Vt, Old, Bvt, St); %Weird, but compatible! +pattern({op,_Line,'++',{string,_Li,_S},R}, Vt, Old, Bvt, St) -> + pattern(R, Vt, Old, Bvt, St); %String unimportant here +pattern({match,_Line,Pat1,Pat2}, Vt, Old, Bvt, St0) -> + {Lvt,Bvt1,St1} = pattern(Pat1, Vt, Old, Bvt, St0), + {Rvt,Bvt2,St2} = pattern(Pat2, Vt, Old, Bvt, St1), + St3 = reject_bin_alias(Pat1, Pat2, St2), + {vtmerge_pat(Lvt, Rvt),vtmerge_pat(Bvt1,Bvt2),St3}; +%% Catch legal constant expressions, including unary +,-. +pattern(Pat, _Vt, _Old, _Bvt, St) -> + case is_pattern_expr(Pat) of + true -> {[],[],St}; + false -> {[],[],add_error(element(2, Pat), illegal_pattern, St)} + end. + +pattern_list(Ps, Vt, Old, Bvt0, St) -> + foldl(fun (P, {Psvt,Bvt,St0}) -> + {Pvt,Bvt1,St1} = pattern(P, Vt, Old, Bvt0, St0), + {vtmerge_pat(Pvt, Psvt),vtmerge_pat(Bvt,Bvt1),St1} + end, {[],[],St}, Ps). + +%% reject_bin_alias(Pat, Expr, St) -> St' +%% Reject aliases for binary patterns at the top level. + +reject_bin_alias_expr({bin,_,_}=P, {match,_,P0,E}, St0) -> + St = reject_bin_alias(P, P0, St0), + reject_bin_alias_expr(P, E, St); +reject_bin_alias_expr({match,_,_,_}=P, {match,_,P0,E}, St0) -> + St = reject_bin_alias(P, P0, St0), + reject_bin_alias_expr(P, E, St); +reject_bin_alias_expr(_, _, St) -> St. + + +%% reject_bin_alias(Pat1, Pat2, St) -> St' +%% Aliases of binary patterns, such as <> = <> or even +%% <> = <>, are not allowed. Traverse the patterns in parallel +%% and generate an error if any binary aliases are found. +%% We generate an error even if is obvious that the overall pattern can't +%% possibly match, for instance, {a,<>,c}={x,<>} WILL generate an +%% error. + +reject_bin_alias({bin,Line,_}, {bin,_,_}, St) -> + add_error(Line, illegal_bin_pattern, St); +reject_bin_alias({cons,_,H1,T1}, {cons,_,H2,T2}, St0) -> + St = reject_bin_alias(H1, H2, St0), + reject_bin_alias(T1, T2, St); +reject_bin_alias({tuple,_,Es1}, {tuple,_,Es2}, St) -> + reject_bin_alias_list(Es1, Es2, St); +reject_bin_alias({record,_,Name1,Pfs1}, {record,_,Name2,Pfs2}, + #lint{records=Recs}=St) -> + case {dict:find(Name1, Recs),dict:find(Name2, Recs)} of + {{ok,{_Line1,Fields1}},{ok,{_Line2,Fields2}}} -> + reject_bin_alias_rec(Pfs1, Pfs2, Fields1, Fields2, St); + {_,_} -> + %% One or more non-existing records. (An error messages has + %% already been generated, so we are done here.) + St + end; +reject_bin_alias({match,_,P1,P2}, P, St0) -> + St = reject_bin_alias(P1, P, St0), + reject_bin_alias(P2, P, St); +reject_bin_alias(P, {match,_,_,_}=M, St) -> + reject_bin_alias(M, P, St); +reject_bin_alias(_P1, _P2, St) -> St. + +reject_bin_alias_list([E1|Es1], [E2|Es2], St0) -> + St = reject_bin_alias(E1, E2, St0), + reject_bin_alias_list(Es1, Es2, St); +reject_bin_alias_list(_, _, St) -> St. + +reject_bin_alias_rec(PfsA0, PfsB0, FieldsA0, FieldsB0, St) -> + %% We treat records as if they have been converted to tuples. + PfsA1 = rbia_field_vars(PfsA0), + PfsB1 = rbia_field_vars(PfsB0), + FieldsA1 = rbia_fields(lists:reverse(FieldsA0), 0, []), + FieldsB1 = rbia_fields(lists:reverse(FieldsB0), 0, []), + FieldsA = sofs:relation(FieldsA1), + PfsA = sofs:relation(PfsA1), + A = sofs:join(FieldsA, 1, PfsA, 1), + FieldsB = sofs:relation(FieldsB1), + PfsB = sofs:relation(PfsB1), + B = sofs:join(FieldsB, 1, PfsB, 1), + C = sofs:join(A, 2, B, 2), + D = sofs:projection({external,fun({_,_,P1,_,P2}) -> {P1,P2} end}, C), + E = sofs:to_external(D), + {Ps1,Ps2} = lists:unzip(E), + reject_bin_alias_list(Ps1, Ps2, St). + +rbia_field_vars(Fs) -> + [{Name,Pat} || {record_field,_,{atom,_,Name},Pat} <- Fs]. + +rbia_fields([{record_field,_,{atom,_,Name},_}|Fs], I, Acc) -> + rbia_fields(Fs, I+1, [{Name,I}|Acc]); +rbia_fields([_|Fs], I, Acc) -> + rbia_fields(Fs, I+1, Acc); +rbia_fields([], _, Acc) -> Acc. + +%% is_pattern_expr(Expression) -> boolean(). +%% Test if a general expression is a valid pattern expression. + +is_pattern_expr(Expr) -> + case is_pattern_expr_1(Expr) of + false -> false; + true -> + %% Expression is syntactically correct - make sure that it + %% also can be evaluated. + case erl_eval:partial_eval(Expr) of + {integer,_,_} -> true; + {char,_,_} -> true; + {float,_,_} -> true; + {atom,_,_} -> true; + _ -> false + end + end. + +is_pattern_expr_1({char,_Line,_C}) -> true; +is_pattern_expr_1({integer,_Line,_I}) -> true; +is_pattern_expr_1({float,_Line,_F}) -> true; +is_pattern_expr_1({atom,_Line,_A}) -> true; +is_pattern_expr_1({tuple,_Line,Es}) -> + all(fun is_pattern_expr/1, Es); +is_pattern_expr_1({nil,_Line}) -> true; +is_pattern_expr_1({cons,_Line,H,T}) -> + case is_pattern_expr_1(H) of + true -> is_pattern_expr_1(T); + false -> false + end; +is_pattern_expr_1({op,_Line,Op,A}) -> + case erl_internal:arith_op(Op, 1) of + true -> is_pattern_expr_1(A); + false -> false + end; +is_pattern_expr_1({op,_Line,Op,A1,A2}) -> + case erl_internal:arith_op(Op, 2) of + true -> all(fun is_pattern_expr/1, [A1,A2]); + false -> false + end; +is_pattern_expr_1(_Other) -> false. + +%% pattern_bin([Element], VarTable, Old, BinVarTable, State) -> +%% {UpdVarTable,UpdBinVarTable,State}. +%% Check a pattern group. BinVarTable are used binsize variables. + +pattern_bin(Es, Vt, Old, Bvt0, St0) -> + {_Sz,Esvt,Bvt,St1} = foldl(fun (E, Acc) -> + pattern_element(E, Vt, Old, Acc) + end, + {0,[],Bvt0,St0}, Es), + {Esvt,Bvt,St1}. + +pattern_element({bin_element,Line,{string,_,_},Size,Ts}=Be, Vt, + Old, {Sz,Esvt,Bvt,St0}=Acc) -> + case good_string_size_type(Size, Ts) of + true -> + pattern_element_1(Be, Vt, Old, Acc); + false -> + St = add_error(Line, typed_literal_string, St0), + {Sz,Esvt,Bvt,St} + end; +pattern_element(Be, Vt, Old, Acc) -> + pattern_element_1(Be, Vt, Old, Acc). + +pattern_element_1({bin_element,Line,E,Sz0,Ts}, Vt, Old, {Size0,Esvt,Bvt,St0}) -> + {Pevt,Bvt1,St1} = pat_bit_expr(E, Old, Bvt, St0), + %% vtmerge or vtmerge_pat doesn't matter here + {Sz1,Szvt,Bvt2,St2} = pat_bit_size(Sz0, vtmerge(Vt, Esvt), Bvt, St1), + {Sz2,Bt,St3} = bit_type(Line, Sz1, Ts, St2), + {Sz3,St4} = bit_size_check(Line, Sz2, Bt, St3), + Sz4 = case {E,Sz3} of + {{string,_,S},all} -> 8*length(S); + {_,_} -> Sz3 + end, + {Size1,St5} = add_bit_size(Line, Sz4, Size0, false, St4), + {Size1,vtmerge(Szvt,vtmerge(Pevt, Esvt)), + vtmerge(Bvt2,vtmerge(Bvt, Bvt1)), St5}. + +good_string_size_type(default, default) -> + true; +good_string_size_type(default, Ts) -> + lists:any(fun(utf8) -> true; + (utf16) -> true; + (utf32) -> true; + (_) -> false + end, Ts); +good_string_size_type(_, _) -> false. + +%% pat_bit_expr(Pattern, OldVarTable, BinVarTable,State) -> +%% {UpdVarTable,UpdBinVarTable,State}. +%% Check pattern bit expression, only allow really valid patterns! + +pat_bit_expr({var,_,'_'}, _Old, _Bvt, St) -> {[],[],St}; +pat_bit_expr({var,Ln,V}, Old, Bvt, St) -> pat_var(V, Ln, Old, Bvt, St); +pat_bit_expr({string,_,_}, _Old, _Bvt, St) -> {[],[],St}; +pat_bit_expr({bin,L,_}, _Old, _Bvt, St) -> + {[],[],add_error(L, illegal_pattern, St)}; +pat_bit_expr(P, _Old, _Bvt, St) -> + case is_pattern_expr(P) of + true -> {[],[],St}; + false -> {[],[],add_error(element(2, P), illegal_pattern, St)} + end. + +%% pat_bit_size(Size, VarTable, BinVarTable, State) -> +%% {Value,UpdVarTable,UpdBinVarTable,State}. +%% Check pattern size expression, only allow really valid sizes! + +pat_bit_size(default, _Vt, _Bvt, St) -> {default,[],[],St}; +pat_bit_size({atom,_Line,all}, _Vt, _Bvt, St) -> {all,[],[],St}; +pat_bit_size({var,Lv,V}, Vt0, Bvt0, St0) -> + {Vt,Bvt,St1} = pat_binsize_var(V, Lv, Vt0, Bvt0, St0), + {unknown,Vt,Bvt,St1}; +pat_bit_size(Size, _Vt, _Bvt, St) -> + Line = element(2, Size), + case is_pattern_expr(Size) of + true -> + case erl_eval:partial_eval(Size) of + {integer,Line,I} -> {I,[],[],St}; + _Other -> {unknown,[],[],add_error(Line, illegal_bitsize, St)} + end; + false -> {unknown,[],[],add_error(Line, illegal_bitsize, St)} + end. + +%% expr_bin(Line, [Element], VarTable, State, CheckFun) -> {UpdVarTable,State}. +%% Check an expression group. + +expr_bin(Es, Vt, St0, Check) -> + {_Sz,Esvt,St1} = foldl(fun (E, Acc) -> bin_element(E, Vt, Acc, Check) end, + {0,[],St0}, Es), + {Esvt,St1}. + +bin_element({bin_element,Line,E,Sz0,Ts}, Vt, {Size0,Esvt,St0}, Check) -> + {Vt1,St1} = Check(E, Vt, St0), + {Sz1,Vt2,St2} = bit_size(Sz0, Vt, St1, Check), + {Sz2,Bt,St3} = bit_type(Line, Sz1, Ts, St2), + {Sz3,St4} = bit_size_check(Line, Sz2, Bt, St3), + {Size1,St5} = add_bit_size(Line, Sz3, Size0, true, St4), + {Size1,vtmerge([Vt2,Vt1,Esvt]),St5}. + +bit_size(default, _Vt, St, _Check) -> {default,[],St}; +bit_size({atom,_Line,all}, _Vt, St, _Check) -> {all,[],St}; +bit_size(Size, Vt, St, Check) -> + %% Try to safely evaluate Size if constant to get size, + %% otherwise just treat it as an expression. + case is_gexpr(Size, St#lint.records) of + true -> + case erl_eval:partial_eval(Size) of + {integer,_ILn,I} -> {I,[],St}; + _Other -> + {Evt,St1} = Check(Size, Vt, St), + {unknown,Evt,St1} + end; + false -> + {Evt,St1} = Check(Size, Vt, St), + {unknown,Evt,St1} + end. + +%% bit_type(Line, Size, TypeList, State) -> {Size,#bittype,St}. +%% Perform warning check on type and size. + +bit_type(Line, Size0, Type, St) -> + case erl_bits:set_bit_type(Size0, Type) of + {ok,Size1,Bt} -> {Size1,Bt,St}; + {error,What} -> + %% Flag error and generate a default. + {ok,Size1,Bt} = erl_bits:set_bit_type(default, []), + {Size1,Bt,add_error(Line, What, St)} + end. + +%% bit_size_check(Line, Size, BitType, State) -> {BitSize,State}. +%% Do some checking & warnings on types +%% float == 32 or 64 + +bit_size_check(_Line, unknown, _, St) -> {unknown,St}; +bit_size_check(_Line, undefined, #bittype{type=Type}, St) -> + true = (Type =:= utf8) or (Type =:= utf16) or (Type =:= utf32), %Assertion. + {undefined,St}; +bit_size_check(Line, all, #bittype{type=Type}, St) -> + case Type of + binary -> {all,St}; + _ -> {unknown,add_error(Line, illegal_bitsize, St)} + end; +bit_size_check(Line, Size, #bittype{type=Type,unit=Unit}, St) -> + Sz = Unit * Size, %Total number of bits! + St2 = elemtype_check(Line, Type, Sz, St), + {Sz,St2}. + +elemtype_check(_Line, float, 32, St) -> St; +elemtype_check(_Line, float, 64, St) -> St; +elemtype_check(Line, float, _Size, St) -> + add_warning(Line, {bad_bitsize,"float"}, St); +elemtype_check(_Line, _Type, _Size, St) -> St. + + +%% add_bit_size(Line, ElementSize, BinSize, Build, State) -> {Size,State}. +%% Add bits to group size. + +add_bit_size(Line, _Sz1, all, false, St) -> + {all,add_error(Line, unsized_binary_not_at_end, St)}; +add_bit_size(_Line, _Sz1, all, true, St) -> + {all,St}; +add_bit_size(_Line, all, _Sz2, _B, St) -> {all,St}; +add_bit_size(_Line, undefined, _Sz2, _B, St) -> {undefined,St}; +add_bit_size(_Line, unknown, _Sz2, _B, St) -> {unknown,St}; +add_bit_size(_Line, _Sz1, undefined, _B, St) -> {unknown,St}; +add_bit_size(_Line, _Sz1, unknown, _B, St) -> {unknown,St}; +add_bit_size(_Line, Sz1, Sz2, _B, St) -> {Sz1 + Sz2,St}. + +%% guard([GuardTest], VarTable, State) -> +%% {UsedVarTable,State} +%% Check a guard, return all variables. + +%% Disjunction of guard conjunctions +guard([L|R], Vt, St0) when is_list(L) -> + {Gvt, St1} = guard_tests(L, Vt, St0), + {Gsvt, St2} = guard(R, vtupdate(Gvt, Vt), St1), + {vtupdate(Gvt, Gsvt),St2}; +guard(L, Vt, St0) -> + guard_tests(L, Vt, St0). + +%% guard conjunction +guard_tests([G|Gs], Vt, St0) -> + {Gvt,St1} = guard_test(G, Vt, St0), + {Gsvt,St2} = guard_tests(Gs, vtupdate(Gvt, Vt), St1), + {vtupdate(Gvt, Gsvt),St2}; +guard_tests([], _Vt, St) -> {[],St}. + +%% guard_test(Test, VarTable, State) -> +%% {UsedVarTable,State'} +%% Check one guard test, returns NewVariables. We now allow more +%% expressions in guards including the new is_XXX type tests, but +%% only allow the old type tests at the top level. + +guard_test(G, Vt, St0) -> + St1 = obsolete_guard(G, St0), + guard_test2(G, Vt, St1). + +%% Specially handle record type test here. +guard_test2({call,Line,{atom,Lr,record},[E,A]}, Vt, St0) -> + gexpr({call,Line,{atom,Lr,is_record},[E,A]}, Vt, St0); +guard_test2({call,_Line,{atom,_La,F},As}=G, Vt, St0) -> + {Asvt,St1} = gexpr_list(As, Vt, St0), %Always check this. + A = length(As), + case erl_internal:type_test(F, A) of + true when F =/= is_record -> {Asvt,St1}; + _ -> gexpr(G, Vt, St0) + end; +guard_test2(G, Vt, St) -> + %% Everything else is a guard expression. + gexpr(G, Vt, St). + +%% gexpr(GuardExpression, VarTable, State) -> +%% {UsedVarTable,State'} +%% Check a guard expression, returns NewVariables. + +gexpr({var,Line,V}, Vt, St) -> + expr_var(V, Line, Vt, St); +gexpr({char,_Line,_C}, _Vt, St) -> {[],St}; +gexpr({integer,_Line,_I}, _Vt, St) -> {[],St}; +gexpr({float,_Line,_F}, _Vt, St) -> {[],St}; +gexpr({atom,Line,A}, _Vt, St) -> + {[],keyword_warning(Line, A, St)}; +gexpr({string,_Line,_S}, _Vt, St) -> {[],St}; +gexpr({nil,_Line}, _Vt, St) -> {[],St}; +gexpr({cons,_Line,H,T}, Vt, St) -> + gexpr_list([H,T], Vt, St); +gexpr({tuple,_Line,Es}, Vt, St) -> + gexpr_list(Es, Vt, St); +%%gexpr({struct,_Line,_Tag,Es}, Vt, St) -> +%% gexpr_list(Es, Vt, St); +gexpr({record_index,Line,Name,Field}, _Vt, St) -> + check_record(Line, Name, St, + fun (Dfs, St1) -> record_field(Field, Name, Dfs, St1) end ); +gexpr({record_field,Line,_,_}=M, _Vt, St0) -> + case expand_package(M, St0) of + {error, St1} -> + {[],add_error(Line, illegal_expr, St1)}; + {_, St1} -> + {[], St1} + end; +gexpr({record_field,Line,Rec,Name,Field}, Vt, St0) -> + {Rvt,St1} = gexpr(Rec, Vt, St0), + {Fvt,St2} = check_record(Line, Name, St1, + fun (Dfs, St) -> + record_field(Field, Name, Dfs, St) + end), + {vtmerge(Rvt, Fvt),St2}; +gexpr({record,Line,Name,Inits}, Vt, St) -> + check_record(Line, Name, St, + fun (Dfs, St1) -> + ginit_fields(Inits, Line, Name, Dfs, Vt, St1) + end); +gexpr({bin,_Line,Fs}, Vt,St) -> + expr_bin(Fs, Vt, St, fun gexpr/3); +gexpr({call,_Line,{atom,_Lr,is_record},[E,{atom,Ln,Name}]}, Vt, St0) -> + {Rvt,St1} = gexpr(E, Vt, St0), + {Rvt,exist_record(Ln, Name, St1)}; +gexpr({call,Line,{atom,_Lr,is_record},[E,R]}, Vt, St0) -> + {Asvt,St1} = gexpr_list([E,R], Vt, St0), + {Asvt,add_error(Line, illegal_guard_expr, St1)}; +gexpr({call,Line,{remote,_Lr,{atom,_Lm,erlang},{atom,Lf,is_record}},[E,A]}, + Vt, St0) -> + gexpr({call,Line,{atom,Lf,is_record},[E,A]}, Vt, St0); +gexpr({call,_Line,{atom,_Lr,is_record},[E,{atom,_,_Name},{integer,_,_}]}, + Vt, St0) -> + gexpr(E, Vt, St0); +gexpr({call,Line,{atom,_Lr,is_record},[_,_,_]=Asvt0}, Vt, St0) -> + {Asvt,St1} = gexpr_list(Asvt0, Vt, St0), + {Asvt,add_error(Line, illegal_guard_expr, St1)}; +gexpr({call,Line,{remote,_,{atom,_,erlang},{atom,_,is_record}=Isr},[_,_,_]=Args}, + Vt, St0) -> + gexpr({call,Line,Isr,Args}, Vt, St0); +gexpr({call,Line,{atom,_La,F},As}, Vt, St0) -> + {Asvt,St1} = gexpr_list(As, Vt, St0), + A = length(As), + case erl_internal:guard_bif(F, A) of + true -> + %% Also check that it is auto-imported. + case erl_internal:bif(F, A) of + true -> {Asvt,St1}; + false -> {Asvt,add_error(Line, {explicit_export,F,A}, St1)} + end; + false -> {Asvt,add_error(Line, illegal_guard_expr, St1)} + end; +gexpr({call,Line,{remote,_Lr,{atom,_Lm,erlang},{atom,_Lf,F}},As}, Vt, St0) -> + {Asvt,St1} = gexpr_list(As, Vt, St0), + A = length(As), + case erl_internal:guard_bif(F, A) orelse is_gexpr_op(F, A) of + true -> {Asvt,St1}; + false -> {Asvt,add_error(Line, illegal_guard_expr, St1)} + end; +gexpr({call,L,{tuple,Lt,[{atom,Lm,erlang},{atom,Lf,F}]},As}, Vt, St) -> + gexpr({call,L,{remote,Lt,{atom,Lm,erlang},{atom,Lf,F}},As}, Vt, St); +gexpr({op,Line,Op,A}, Vt, St0) -> + {Avt,St1} = gexpr(A, Vt, St0), + case is_gexpr_op(Op, 1) of + true -> {Avt,St1}; + false -> {Avt,add_error(Line, illegal_guard_expr, St1)} + end; +gexpr({op,Line,Op,L,R}, Vt, St0) -> + {Avt,St1} = gexpr_list([L,R], Vt, St0), + case is_gexpr_op(Op, 2) of + true -> {Avt,St1}; + false -> {Avt,add_error(Line, illegal_guard_expr, St1)} + end; +%% Everything else is illegal! You could put explicit tests here to +%% better error diagnostics. +gexpr(E, _Vt, St) -> + {[],add_error(element(2, E), illegal_guard_expr, St)}. + +%% gexpr_list(Expressions, VarTable, State) -> +%% {UsedVarTable,State'} + +gexpr_list(Es, Vt, St) -> + foldl(fun (E, {Esvt,St0}) -> + {Evt,St1} = gexpr(E, Vt, St0), + {vtmerge(Evt, Esvt),St1} + end, {[],St}, Es). + +%% is_guard_test(Expression) -> boolean(). +%% Test if a general expression is a guard test. +is_guard_test(E) -> + is_guard_test2(E, dict:new()). + +%% is_guard_test(Expression, Forms) -> boolean(). +is_guard_test(Expression, Forms) -> + RecordAttributes = [A || A = {attribute, _, record, _D} <- Forms], + St0 = foldl(fun(Attr0, St1) -> + Attr = zip_file_and_line(Attr0, "none"), + attribute_state(Attr, St1) + end, start(), RecordAttributes), + is_guard_test2(zip_file_and_line(Expression, "nofile"), St0#lint.records). + +%% is_guard_test2(Expression, RecordDefs :: dict()) -> boolean(). +is_guard_test2({call,Line,{atom,Lr,record},[E,A]}, RDs) -> + is_gexpr({call,Line,{atom,Lr,is_record},[E,A]}, RDs); +is_guard_test2({call,_Line,{atom,_La,Test},As}=Call, RDs) -> + case erl_internal:type_test(Test, length(As)) of + true -> is_gexpr_list(As, RDs); + false -> is_gexpr(Call, RDs) + end; +is_guard_test2(G, RDs) -> + %%Everything else is a guard expression. + is_gexpr(G, RDs). + +%% is_guard_expr(Expression) -> boolean(). +%% Test if an expression is a guard expression. + +is_guard_expr(E) -> is_gexpr(E, []). + +is_gexpr({var,_L,_V}, _RDs) -> true; +is_gexpr({char,_L,_C}, _RDs) -> true; +is_gexpr({integer,_L,_I}, _RDs) -> true; +is_gexpr({float,_L,_F}, _RDs) -> true; +is_gexpr({atom,_L,_A}, _RDs) -> true; +is_gexpr({string,_L,_S}, _RDs) -> true; +is_gexpr({nil,_L}, _RDs) -> true; +is_gexpr({cons,_L,H,T}, RDs) -> is_gexpr_list([H,T], RDs); +is_gexpr({tuple,_L,Es}, RDs) -> is_gexpr_list(Es, RDs); +%%is_gexpr({struct,_L,_Tag,Es}, RDs) -> +%% is_gexpr_list(Es, RDs); +is_gexpr({record_index,_L,_Name,Field}, RDs) -> + is_gexpr(Field, RDs); +is_gexpr({record_field,_L,_,_}=M, _RDs) -> + erl_parse:package_segments(M) =/= error; +is_gexpr({record_field,_L,Rec,_Name,Field}, RDs) -> + is_gexpr_list([Rec,Field], RDs); +is_gexpr({record,L,Name,Inits}, RDs) -> + is_gexpr_fields(Inits, L, Name, RDs); +is_gexpr({bin,_L,Fs}, RDs) -> + all(fun ({bin_element,_Line,E,Sz,_Ts}) -> + is_gexpr(E, RDs) and (Sz =:= default orelse is_gexpr(Sz, RDs)) + end, Fs); +is_gexpr({call,_L,{atom,_Lf,F},As}, RDs) -> + A = length(As), + case erl_internal:guard_bif(F, A) of + true -> is_gexpr_list(As, RDs); + false -> false + end; +is_gexpr({call,_L,{remote,_Lr,{atom,_Lm,erlang},{atom,_Lf,F}},As}, RDs) -> + A = length(As), + case erl_internal:guard_bif(F, A) orelse is_gexpr_op(F, A) of + true -> is_gexpr_list(As, RDs); + false -> false + end; +is_gexpr({call,L,{tuple,Lt,[{atom,Lm,erlang},{atom,Lf,F}]},As}, RDs) -> + is_gexpr({call,L,{remote,Lt,{atom,Lm,erlang},{atom,Lf,F}},As}, RDs); +is_gexpr({op,_L,Op,A}, RDs) -> + case is_gexpr_op(Op, 1) of + true -> is_gexpr(A, RDs); + false -> false + end; +is_gexpr({op,_L,Op,A1,A2}, RDs) -> + case is_gexpr_op(Op, 2) of + true -> is_gexpr_list([A1,A2], RDs); + false -> false + end; +is_gexpr(_Other, _RDs) -> false. + +is_gexpr_op('andalso', 2) -> true; +is_gexpr_op('orelse', 2) -> true; +is_gexpr_op(Op, A) -> + try erl_internal:op_type(Op, A) of + arith -> true; + bool -> true; + comp -> true; + list -> false; + send -> false + catch _:_ -> false + end. + +is_gexpr_list(Es, RDs) -> all(fun (E) -> is_gexpr(E, RDs) end, Es). + +is_gexpr_fields(Fs, L, Name, RDs) -> + IFs = case dict:find(Name, RDs) of + {ok,{_Line,Fields}} -> Fs ++ init_fields(Fs, L, Fields); + error -> Fs + end, + all(fun ({record_field,_Lf,_Name,V}) -> is_gexpr(V, RDs); + (_Other) -> false end, IFs). + +%% exprs(Sequence, VarTable, State) -> +%% {UsedVarTable,State'} +%% Check a sequence of expressions, return all variables. + +exprs([E|Es], Vt, St0) -> + {Evt,St1} = expr(E, Vt, St0), + {Esvt,St2} = exprs(Es, vtupdate(Evt, Vt), St1), + {vtupdate(Evt, Esvt),St2}; +exprs([], _Vt, St) -> {[],St}. + +%% expr(Expression, VarTable, State) -> +%% {UsedVarTable,State'} +%% Check an expression, returns NewVariables. Assume naive users and +%% mark illegally exported variables, e.g. from catch, as unsafe to better +%% show why unbound. + +expr({var,Line,V}, Vt, St) -> + expr_var(V, Line, Vt, St); +expr({char,_Line,_C}, _Vt, St) -> {[],St}; +expr({integer,_Line,_I}, _Vt, St) -> {[],St}; +expr({float,_Line,_F}, _Vt, St) -> {[],St}; +expr({atom,Line,A}, _Vt, St) -> + {[],keyword_warning(Line, A, St)}; +expr({string,_Line,_S}, _Vt, St) -> {[],St}; +expr({nil,_Line}, _Vt, St) -> {[],St}; +expr({cons,_Line,H,T}, Vt, St) -> + expr_list([H,T], Vt, St); +expr({lc,_Line,E,Qs}, Vt0, St0) -> + {Vt,St} = handle_comprehension(E, Qs, Vt0, St0), + {vtold(Vt, Vt0),St}; %Don't export local variables +expr({bc,_Line,E,Qs}, Vt0, St0) -> + {Vt,St} = handle_comprehension(E, Qs, Vt0, St0), + {vtold(Vt,Vt0),St}; %Don't export local variables +expr({tuple,_Line,Es}, Vt, St) -> + expr_list(Es, Vt, St); +%%expr({struct,Line,Tag,Es}, Vt, St) -> +%% expr_list(Es, Vt, St); +expr({record_index,Line,Name,Field}, _Vt, St) -> + check_record(Line, Name, St, + fun (Dfs, St1) -> record_field(Field, Name, Dfs, St1) end); +expr({record,Line,Name,Inits}, Vt, St) -> + check_record(Line, Name, St, + fun (Dfs, St1) -> + init_fields(Inits, Line, Name, Dfs, Vt, St1) + end); +expr({record_field,Line,_,_}=M, _Vt, St0) -> + case expand_package(M, St0) of + {error, St1} -> + {[],add_error(Line, illegal_expr, St1)}; + {_, St1} -> + {[], St1} + end; +expr({record_field,Line,Rec,Name,Field}, Vt, St0) -> + {Rvt,St1} = record_expr(Line, Rec, Vt, St0), + {Fvt,St2} = check_record(Line, Name, St1, + fun (Dfs, St) -> + record_field(Field, Name, Dfs, St) + end), + {vtmerge(Rvt, Fvt),St2}; +expr({record,Line,Rec,Name,Upds}, Vt, St0) -> + {Rvt,St1} = record_expr(Line, Rec, Vt, St0), + {Usvt,St2} = check_record(Line, Name, St1, + fun (Dfs, St) -> + update_fields(Upds, Name, Dfs, Vt, St) + end ), + case has_wildcard_field(Upds) of + true -> {[],add_error(Line, {wildcard_in_update,Name}, St2)}; + false -> {vtmerge(Rvt, Usvt),St2} + end; +expr({bin,_Line,Fs}, Vt, St) -> + expr_bin(Fs, Vt, St, fun expr/3); +expr({block,_Line,Es}, Vt, St) -> + %% Unfold block into a sequence. + exprs(Es, Vt, St); +expr({'if',Line,Cs}, Vt, St) -> + icrt_clauses(Cs, {'if',Line}, Vt, St); +expr({'case',Line,E,Cs}, Vt, St0) -> + {Evt,St1} = expr(E, Vt, St0), + {Cvt,St2} = icrt_clauses(Cs, {'case',Line}, vtupdate(Evt, Vt), St1), + {vtmerge(Evt, Cvt),St2}; +expr({'cond',Line,Cs}, Vt, St) -> + cond_clauses(Cs,{'cond',Line}, Vt, St); +expr({'receive',Line,Cs}, Vt, St) -> + icrt_clauses(Cs, {'receive',Line}, Vt, St); +expr({'receive',Line,Cs,To,ToEs}, Vt, St0) -> + %% Are variables from the timeout expression visible in the clauses? NO! + {Tvt,St1} = expr(To, Vt, St0), + {Tevt,St2} = exprs(ToEs, Vt, St1), + {Cvt,St3} = icrt_clauses(Cs, Vt, St2), + %% Csvts = [vtnew(Tevt, Vt)|Cvt], %This is just NEW variables! + Csvts = [Tevt|Cvt], + {Rvt,St4} = icrt_export(Csvts, Vt, {'receive',Line}, St3), + {vtmerge([Tvt,Tevt,Rvt]),St4}; +expr({'fun',Line,Body}, Vt, St) -> + %%No one can think funs export! + case Body of + {clauses,Cs} -> + {Bvt, St1} = fun_clauses(Cs, Vt, St), + {vtupdate(Bvt, Vt), St1}; + {function,F,A} -> + %% N.B. Only allows BIFs here as well, NO IMPORTS!! + case erl_internal:bif(F, A) of + true -> {[],St}; + false -> {[],call_function(Line, F, A, St)} + end; + {function,_M,_F,_A} -> + {[],St} + end; +expr({call,_Line,{atom,_Lr,is_record},[E,{atom,Ln,Name}]}, Vt, St0) -> + {Rvt,St1} = expr(E, Vt, St0), + {Rvt,exist_record(Ln, Name, St1)}; +expr({call,Line,{remote,_Lr,{atom,_Lm,erlang},{atom,Lf,is_record}},[E,A]}, + Vt, St0) -> + expr({call,Line,{atom,Lf,is_record},[E,A]}, Vt, St0); +expr({call,L,{tuple,Lt,[{atom,Lm,erlang},{atom,Lf,is_record}]},As}, Vt, St) -> + expr({call,L,{remote,Lt,{atom,Lm,erlang},{atom,Lf,is_record}},As}, Vt, St); +expr({call,Line,{remote,_Lr,M,F},As}, Vt, St0) -> + case expand_package(M, St0) of + {error, _} -> + expr_list([M,F|As], Vt, St0); + {{atom,_La,M1}, St1} -> + case F of + {atom,Lf,F1} -> + St2 = keyword_warning(Lf, F1, St1), + St3 = check_remote_function(Line, M1, F1, As, St2), + expr_list(As, Vt, St3); + _ -> + expr_list([F|As], Vt, St1) + end + end; +expr({call,Line,{atom,La,F},As}, Vt, St0) -> + St1 = keyword_warning(La, F, St0), + {Asvt,St2} = expr_list(As, Vt, St1), + A = length(As), + case erl_internal:bif(F, A) of + true -> + St3 = deprecated_function(Line, erlang, F, As, St2), + {Asvt,case is_warn_enabled(bif_clash, St3) andalso + is_bif_clash(F, A, St3) of + false -> + St3; + true -> + add_error(Line, {call_to_redefined_bif,{F,A}}, St3) + end}; + false -> + {Asvt,case imported(F, A, St2) of + {yes,M} -> + St3 = check_remote_function(Line, M, F, As, St2), + U0 = St3#lint.usage, + Imp = ordsets:add_element({{F,A},M},U0#usage.imported), + St3#lint{usage=U0#usage{imported = Imp}}; + no -> + case {F,A} of + {record_info,2} -> + check_record_info_call(Line,La,As,St2); + N when N =:= St2#lint.func -> St2; + _ -> call_function(Line, F, A, St2) + end + end} + end; +expr({call,Line,{record_field,_,_,_}=F,As}, Vt, St0) -> + case expand_package(F, St0) of + {error, _} -> + expr_list([F|As], Vt, St0); + {A, St1} -> + expr({call,Line,A,As}, Vt, St1) + end; +expr({call,Line,F,As}, Vt, St0) -> + St = warn_invalid_call(Line,F,St0), + expr_list([F|As], Vt, St); %They see the same variables +expr({'try',Line,Es,Scs,Ccs,As}, Vt, St0) -> + %% Currently, we don't allow any exports because later + %% passes cannot handle exports in combination with 'after'. + {Evt0,St1} = exprs(Es, Vt, St0), + TryLine = {'try',Line}, + Uvt = vtunsafe(vtnames(vtnew(Evt0, Vt)), TryLine, []), + Evt1 = vtupdate(Uvt, vtupdate(Evt0, Vt)), + {Sccs,St2} = icrt_clauses(Scs++Ccs, TryLine, Evt1, St1), + Rvt0 = Sccs, + Rvt1 = vtupdate(vtunsafe(vtnames(vtnew(Rvt0, Vt)), TryLine, []), Rvt0), + Evt2 = vtmerge(Evt1, Rvt1), + {Avt0,St} = exprs(As, Evt2, St2), + Avt1 = vtupdate(vtunsafe(vtnames(vtnew(Avt0, Vt)), TryLine, []), Avt0), + Avt = vtmerge(Evt2, Avt1), + {Avt,St}; +expr({'catch',Line,E}, Vt, St0) -> + %% No new variables added, flag new variables as unsafe. + {Evt,St1} = expr(E, Vt, St0), + Uvt = vtunsafe(vtnames(vtnew(Evt, Vt)), {'catch',Line}, []), + {vtupdate(Uvt,vtupdate(Evt, Vt)),St1}; +expr({match,_Line,P,E}, Vt, St0) -> + {Evt,St1} = expr(E, Vt, St0), + {Pvt,Bvt,St2} = pattern(P, vtupdate(Evt, Vt), St1), + St = reject_bin_alias_expr(P, E, St2), + {vtupdate(Bvt, vtmerge(Evt, Pvt)),St}; +%% No comparison or boolean operators yet. +expr({op,_Line,_Op,A}, Vt, St) -> + expr(A, Vt, St); +expr({op,Line,Op,L,R}, Vt, St0) when Op =:= 'orelse'; Op =:= 'andalso' -> + {Evt1,St1} = expr(L, Vt, St0), + Vt1 = vtupdate(Evt1, Vt), + {Evt2,St2} = expr(R, Vt1, St1), + Vt2 = vtmerge(Evt2, Vt1), + {Vt3,St3} = icrt_export([Vt1,Vt2], Vt1, {Op,Line}, St2), + {vtmerge(Evt1, Vt3),St3}; +expr({op,_Line,_Op,L,R}, Vt, St) -> + expr_list([L,R], Vt, St); %They see the same variables +%% The following are not allowed to occur anywhere! +expr({remote,Line,_M,_F}, _Vt, St) -> + {[],add_error(Line, illegal_expr, St)}; +expr({'query',Line,_Q}, _Vt, St) -> + {[],add_error(Line, {mnemosyne,"query"}, St)}. + +%% expr_list(Expressions, Variables, State) -> +%% {UsedVarTable,State} + +expr_list(Es, Vt, St) -> + foldl(fun (E, {Esvt,St0}) -> + {Evt,St1} = expr(E, Vt, St0), + {vtmerge(Evt, Esvt),St1} + end, {[],St}, Es). + +record_expr(Line, Rec, Vt, St0) -> + St1 = warn_invalid_record(Line, Rec, St0), + expr(Rec, Vt, St1). + +%% warn_invalid_record(Line, Record, State0) -> State +%% Adds warning if the record is invalid. + +warn_invalid_record(Line, R, St) -> + case is_valid_record(R) of + true -> St; + false -> add_warning(Line, invalid_record, St) + end. + +%% is_valid_record(Record) -> boolean(). + +is_valid_record(Rec) -> + case Rec of + {char, _, _} -> false; + {integer, _, _} -> false; + {float, _, _} -> false; + {atom, _, _} -> false; + {string, _, _} -> false; + {cons, _, _, _} -> false; + {nil, _} -> false; + {lc, _, _, _} -> false; + {record_index, _, _, _} -> false; + {'fun', _, _} -> false; + _ -> true + end. + +%% warn_invalid_call(Line, Call, State0) -> State +%% Adds warning if the call is invalid. + +warn_invalid_call(Line, F, St) -> + case is_valid_call(F) of + true -> St; + false -> add_warning(Line, invalid_call, St) + end. + +%% is_valid_call(Call) -> boolean(). + +is_valid_call(Call) -> + case Call of + {char, _, _} -> false; + {integer, _, _} -> false; + {float, _, _} -> false; + {string, _, _} -> false; + {cons, _, _, _} -> false; + {nil, _} -> false; + {lc, _, _, _} -> false; + {record_index, _, _, _} -> false; + {tuple, _, Exprs} when length(Exprs) =/= 2 -> false; + _ -> true + end. + +%% record_def(Line, RecordName, [RecField], State) -> State. +%% Add a record definition if it does not already exist. Normalise +%% so that all fields have explicit initial value. + +record_def(Line, Name, Fs0, St0) -> + case dict:is_key(Name, St0#lint.records) of + true -> add_error(Line, {redefine_record,Name}, St0); + false -> + {Fs1,St1} = def_fields(normalise_fields(Fs0), Name, St0), + St1#lint{records=dict:store(Name, {Line,Fs1}, St1#lint.records)} + end. + +%% def_fields([RecDef], RecordName, State) -> {[DefField],State}. +%% Check (normalised) fields for duplicates. Return unduplicated +%% record and set State. + +def_fields(Fs0, Name, St0) -> + foldl(fun ({record_field,Lf,{atom,La,F},V}, {Fs,St}) -> + case exist_field(F, Fs) of + true -> {Fs,add_error(Lf, {redefine_field,Name,F}, St)}; + false -> + St1 = St#lint{recdef_top = true}, + {_,St2} = expr(V, [], St1), + %% Warnings and errors found are kept, but + %% updated calls, records, etc. are discarded. + St3 = St1#lint{warnings = St2#lint.warnings, + errors = St2#lint.errors, + called = St2#lint.called, + recdef_top = false}, + %% This is one way of avoiding a loop for + %% "recursive" definitions. + NV = case St2#lint.errors =:= St1#lint.errors of + true -> V; + false -> {atom,La,undefined} + end, + {[{record_field,Lf,{atom,La,F},NV}|Fs],St3} + end + end, {[],St0}, Fs0). + +%% normalise_fields([RecDef]) -> [Field]. +%% Normalise the field definitions to always have a default value. If +%% none has been given then use 'undefined'. +%% Also, strip type information from typed record fields. + +normalise_fields(Fs) -> + map(fun ({record_field,Lf,Field}) -> + {record_field,Lf,Field,{atom,Lf,undefined}}; + ({typed_record_field,{record_field,Lf,Field},_Type}) -> + {record_field,Lf,Field,{atom,Lf,undefined}}; + ({typed_record_field,Field,_Type}) -> + Field; + (F) -> F end, Fs). + +%% exist_record(Line, RecordName, State) -> State. +%% Check if a record exists. Set State. + +exist_record(Line, Name, St) -> + case dict:is_key(Name, St#lint.records) of + true -> used_record(Name, St); + false -> add_error(Line, {undefined_record,Name}, St) + end. + +%% check_record(Line, RecordName, State, CheckFun) -> +%% {UpdVarTable, State}. +%% The generic record checking function, first checks that the record +%% exists then calls the specific check function. N.B. the check +%% function can safely assume that the record exists. +%% +%% The check function is called: +%% CheckFun(RecordDefFields, State) +%% and must return +%% {UpdatedVarTable,State} + +check_record(Line, Name, St, CheckFun) -> + case dict:find(Name, St#lint.records) of + {ok,{_Line,Fields}} -> CheckFun(Fields, used_record(Name, St)); + error -> {[],add_error(Line, {undefined_record,Name}, St)} + end. + +used_record(Name, #lint{usage=Usage}=St) -> + UsedRecs = sets:add_element(Name, Usage#usage.used_records), + St#lint{usage = Usage#usage{used_records=UsedRecs}}. + +%%% Record check functions. + +%% check_fields([ChkField], RecordName, [RecDefField], VarTable, State, CheckFun) -> +%% {UpdVarTable,State}. + +check_fields(Fs, Name, Fields, Vt, St0, CheckFun) -> + {_SeenFields,Uvt,St1} = + foldl(fun (Field, {Sfsa,Vta,Sta}) -> + {Sfsb,{Vtb,Stb}} = check_field(Field, Name, Fields, + Vt, Sta, Sfsa, CheckFun), + {Sfsb,vtmerge_pat(Vta, Vtb),Stb} + end, {[],[],St0}, Fs), + {Uvt,St1}. + +check_field({record_field,Lf,{atom,La,F},Val}, Name, Fields, + Vt, St, Sfs, CheckFun) -> + case member(F, Sfs) of + true -> {Sfs,{Vt,add_error(Lf, {redefine_field,Name,F}, St)}}; + false -> + {[F|Sfs], + case find_field(F, Fields) of + {ok,_I} -> CheckFun(Val, Vt, St); + error -> {[],add_error(La, {undefined_field,Name,F}, St)} + end} + end; +check_field({record_field,_Lf,{var,_La,'_'},Val}, _Name, _Fields, + Vt, St, Sfs, CheckFun) -> + {Sfs,CheckFun(Val, Vt, St)}; +check_field({record_field,_Lf,{var,La,V},_Val}, Name, _Fields, + Vt, St, Sfs, _CheckFun) -> + {Sfs,{Vt,add_error(La, {field_name_is_variable,Name,V}, St)}}. + +%% pattern_field(Field, RecordName, [RecDefField], State) -> +%% {UpdVarTable,State}. +%% Test if record RecordName has field Field. Set State. + +pattern_field({atom,La,F}, Name, Fields, St) -> + case find_field(F, Fields) of + {ok,_I} -> {[],St}; + error -> {[],add_error(La, {undefined_field,Name,F}, St)} + end. + +%% pattern_fields([PatField],RecordName,[RecDefField], +%% VarTable,Old,Bvt,State) -> +%% {UpdVarTable,UpdBinVarTable,State}. + +pattern_fields(Fs, Name, Fields, Vt0, Old, Bvt, St0) -> + CheckFun = fun (Val, Vt, St) -> pattern(Val, Vt, Old, Bvt, St) end, + {_SeenFields,Uvt,Bvt1,St1} = + foldl(fun (Field, {Sfsa,Vta,Bvt1,Sta}) -> + case check_field(Field, Name, Fields, + Vt0, Sta, Sfsa, CheckFun) of + {Sfsb,{Vtb,Stb}} -> + {Sfsb,vtmerge_pat(Vta, Vtb),[],Stb}; + {Sfsb,{Vtb,Bvt2,Stb}} -> + {Sfsb,vtmerge_pat(Vta, Vtb), + vtmerge_pat(Bvt1,Bvt2),Stb} + end + end, {[],[],[],St0}, Fs), + {Uvt,Bvt1,St1}. + +%% record_field(Field, RecordName, [RecDefField], State) -> +%% {UpdVarTable,State}. +%% Test if record RecordName has field Field. Set State. + +record_field({atom,La,F}, Name, Fields, St) -> + case find_field(F, Fields) of + {ok,_I} -> {[],St}; + error -> {[],add_error(La, {undefined_field,Name,F}, St)} + end. + +%% init_fields([InitField], InitLine, RecordName, [DefField], VarTable, State) -> +%% {UpdVarTable,State}. +%% ginit_fields([InitField], InitLine, RecordName, [DefField], VarTable, State) -> +%% {UpdVarTable,State}. +%% Check record initialisation. Explicit initialisations are checked +%% as is, while default values are checked only if there are no +%% explicit inititialisations of the fields. Be careful not to +%% duplicate warnings (and possibly errors, but def_fields +%% substitutes 'undefined' for bogus inititialisations) from when the +%% record definitions were checked. Usage of records, imports, and +%% functions is collected. + +init_fields(Ifs, Line, Name, Dfs, Vt0, St0) -> + {Vt1,St1} = check_fields(Ifs, Name, Dfs, Vt0, St0, fun expr/3), + Defs = init_fields(Ifs, Line, Dfs), + {_,St2} = check_fields(Defs, Name, Dfs, Vt1, St1, fun expr/3), + {Vt1,St1#lint{usage = St2#lint.usage}}. + +ginit_fields(Ifs, Line, Name, Dfs, Vt0, St0) -> + {Vt1,St1} = check_fields(Ifs, Name, Dfs, Vt0, St0, fun gexpr/3), + Defs = init_fields(Ifs, Line, Dfs), + St2 = St1#lint{errors = []}, + {_,St3} = check_fields(Defs, Name, Dfs, Vt1, St2, fun gexpr/3), + #lint{usage = Usage, errors = Errors} = St3, + IllErrs = [E || {_File,{_Line,erl_lint,illegal_guard_expr}}=E <- Errors], + St4 = St1#lint{usage = Usage, errors = IllErrs ++ St1#lint.errors}, + {Vt1,St4}. + +%% Default initializations to be carried out +init_fields(Ifs, Line, Dfs) -> + [ {record_field,Lf,{atom,La,F},copy_expr(Di, Line)} || + {record_field,Lf,{atom,La,F},Di} <- Dfs, + not exist_field(F, Ifs) ]. + +%% update_fields(UpdFields, RecordName, RecDefFields, VarTable, State) -> +%% {UpdVarTable,State} + +update_fields(Ufs, Name, Dfs, Vt, St) -> + check_fields(Ufs, Name, Dfs, Vt, St, fun expr/3). + +%% exist_field(FieldName, [Field]) -> boolean(). +%% Find a record field in a field list. + +exist_field(F, [{record_field,_Lf,{atom,_La,F},_Val}|_Fs]) -> true; +exist_field(F, [_|Fs]) -> exist_field(F, Fs); +exist_field(_F, []) -> false. + +%% find_field(FieldName, [Field]) -> {ok,Val} | error. +%% Find a record field in a field list. + +find_field(_F, [{record_field,_Lf,{atom,_La,_F},Val}|_Fs]) -> {ok,Val}; +find_field(F, [_|Fs]) -> find_field(F, Fs); +find_field(_F, []) -> error. + +%% type_def(Attr, Line, TypeName, PatField, Args, State) -> State. +%% Attr :: 'type' | 'opaque' +%% Checks that a type definition is valid. + +type_def(_Attr, _Line, {record, _RecName}, Fields, [], St0) -> + %% The record field names and such are checked in the record format. + %% We only need to check the types. + Types = [T || {typed_record_field, _, T} <- Fields], + check_type({type, -1, product, Types}, St0); +type_def(_Attr, Line, TypeName, ProtoType, Args, St0) -> + TypeDefs = St0#lint.types, + Arity = length(Args), + TypePair = {TypeName, Arity}, + case (dict:is_key(TypePair, TypeDefs) orelse is_var_arity_type(TypeName)) of + true -> + case dict:is_key(TypePair, default_types()) of + true -> + case is_newly_introduced_builtin_type(TypePair) of + %% allow some types just for bootstrapping + true -> + Warn = {new_builtin_type, TypePair}, + St1 = add_warning(Line, Warn, St0), + NewDefs = dict:store(TypePair, Line, TypeDefs), + CheckType = {type, -1, product, [ProtoType|Args]}, + check_type(CheckType, St1#lint{types=NewDefs}); + false -> + add_error(Line, {builtin_type, TypePair}, St0) + end; + false -> add_error(Line, {redefine_type, TypePair}, St0) + end; + false -> + NewDefs = dict:store(TypePair, Line, TypeDefs), + CheckType = {type, -1, product, [ProtoType|Args]}, + check_type(CheckType, St0#lint{types=NewDefs}) + end. + +check_type(Types, St) -> + {SeenVars, St1} = check_type(Types, dict:new(), St), + dict:fold(fun(Var, {seen_once, Line}, AccSt) -> + case atom_to_list(Var) of + [$_|_] -> AccSt; + _ -> add_error(Line, {singleton_typevar, Var}, AccSt) + end; + (_Var, seen_multiple, AccSt) -> + AccSt + end, St1, SeenVars). + +check_type({ann_type, _L, [_Var, Type]}, SeenVars, St) -> + check_type(Type, SeenVars, St); +check_type({paren_type, _L, [Type]}, SeenVars, St) -> + check_type(Type, SeenVars, St); +check_type({remote_type, L, [{atom, _, Mod}, {atom, _, Name}, Args]}, + SeenVars, St = #lint{module=CurrentMod}) -> + St1 = + case (dict:is_key({Name, length(Args)}, default_types()) + orelse is_var_arity_type(Name)) of + true -> add_error(L, {imported_predefined_type, Name}, St); + false -> St + end, + case Mod =:= CurrentMod of + true -> check_type({type, L, Name, Args}, SeenVars, St1); + false -> + lists:foldl(fun(T, {AccSeenVars, AccSt}) -> + check_type(T, AccSeenVars, AccSt) + end, {SeenVars, St1}, Args) + end; +check_type({integer, _L, _}, SeenVars, St) -> {SeenVars, St}; +check_type({atom, _L, _}, SeenVars, St) -> {SeenVars, St}; +check_type({var, _L, '_'}, SeenVars, St) -> {SeenVars, St}; +check_type({var, L, Name}, SeenVars, St) -> + NewSeenVars = + case dict:find(Name, SeenVars) of + {ok, {seen_once, _}} -> dict:store(Name, seen_multiple, SeenVars); + {ok, seen_multiple} -> SeenVars; + error -> dict:store(Name, {seen_once, L}, SeenVars) + end, + {NewSeenVars, St}; +check_type({type, L, bool, []}, SeenVars, St) -> + {SeenVars, add_warning(L, {renamed_type, bool, boolean}, St)}; +check_type({type, L, 'fun', [Dom, Range]}, SeenVars, St) -> + St1 = + case Dom of + {type, _, product, _} -> St; + {type, _, any} -> St; + _ -> add_error(L, {type_syntax, 'fun'}, St) + end, + check_type({type, -1, product, [Dom, Range]}, SeenVars, St1); +check_type({type, L, range, [From, To]}, SeenVars, St) -> + St1 = + case {From, To} of + {{integer, _, X}, {integer, _, Y}} when X < Y -> St; + _ -> add_error(L, {type_syntax, range}, St) + end, + {SeenVars, St1}; +check_type({type, _L, tuple, any}, SeenVars, St) -> {SeenVars, St}; +check_type({type, _L, any}, SeenVars, St) -> {SeenVars, St}; +check_type({type, L, binary, [Base, Unit]}, SeenVars, St) -> + St1 = + case {Base, Unit} of + {{integer, _, BaseVal}, + {integer, _, UnitVal}} when BaseVal >= 0, UnitVal >= 0 -> St; + _ -> add_error(L, {type_syntax, binary}, St) + end, + {SeenVars, St1}; +check_type({type, L, record, [Name|Fields]}, SeenVars, St) -> + case Name of + {atom, _, Atom} -> + St1 = used_record(Atom, St), + check_record_types(L, Atom, Fields, SeenVars, St1); + _ -> {SeenVars, add_error(L, {type_syntax, record}, St)} + end; +check_type({type, _L, product, Args}, SeenVars, St) -> + lists:foldl(fun(T, {AccSeenVars, AccSt}) -> + check_type(T, AccSeenVars, AccSt) + end, {SeenVars, St}, Args); +check_type({type, La, TypeName, Args}, SeenVars, + St = #lint{types=Defs, usage=Usage}) -> + Arity = length(Args), + St1 = + case dict:is_key({TypeName, Arity}, Defs) of + true -> + UsedTypes1 = Usage#usage.used_types, + UsedTypes2 = sets:add_element({TypeName, Arity}, UsedTypes1), + St#lint{usage=Usage#usage{used_types=UsedTypes2}}; + false -> + case is_var_arity_type(TypeName) of + true -> St; + false -> add_error(La, {type_ref, {TypeName, Arity}}, St) + end + end, + check_type({type, -1, product, Args}, SeenVars, St1). + +check_record_types(Line, Name, Fields, SeenVars, St) -> + case dict:find(Name, St#lint.records) of + {ok,{_L,DefFields}} -> + case lists:all(fun({type, _, field_type, _}) -> true; + (_) -> false + end, Fields) of + true -> + check_record_types(Fields, Name, DefFields, SeenVars, St, []); + false -> + {SeenVars, add_error(Line, {type_syntax, record}, St)} + end; + error -> + {SeenVars, add_error(Line, {undefined_record, Name}, St)} + end. + +check_record_types([{type, _, field_type, [{atom, AL, FName}, Type]}|Left], + Name, DefFields, SeenVars, St, SeenFields) -> + %% Check that the field name is valid + St1 = case exist_field(FName, DefFields) of + true -> St; + false -> add_error(AL, {undefined_field, Name, FName}, St) + end, + %% Check for duplicates + St2 = case ordsets:is_element(FName, SeenFields) of + true -> add_error(AL, {redefine_field, Name, FName}, St1); + false -> St1 + end, + %% Check Type + {NewSeenVars, St3} = check_type(Type, SeenVars, St2), + NewSeenFields = ordsets:add_element(FName, SeenFields), + check_record_types(Left, Name, DefFields, NewSeenVars, St3, NewSeenFields); +check_record_types([], _Name, _DefFields, SeenVars, St, _SeenFields) -> + {SeenVars, St}. + +is_var_arity_type(tuple) -> true; +is_var_arity_type(product) -> true; +is_var_arity_type(union) -> true; +is_var_arity_type(record) -> true; +is_var_arity_type(_) -> false. + +default_types() -> + DefTypes = [{any, 0}, + {arity, 0}, + {array, 0}, + {atom, 0}, + {atom, 1}, + {binary, 0}, + {binary, 2}, + {bitstring, 0}, + {bool, 0}, + {boolean, 0}, + {byte, 0}, + {char, 0}, + {dict, 0}, + {digraph, 0}, + {float, 0}, + {'fun', 0}, + {'fun', 2}, + {function, 0}, + {gb_set, 0}, + {gb_tree, 0}, + {identifier, 0}, + {integer, 0}, + {integer, 1}, + {iodata, 0}, + {iolist, 0}, + {list, 0}, + {list, 1}, + {maybe_improper_list, 0}, + {maybe_improper_list, 2}, + {mfa, 0}, + {module, 0}, + {neg_integer, 0}, + {nil, 0}, + {no_return, 0}, + {node, 0}, + {non_neg_integer, 0}, + {none, 0}, + {nonempty_list, 0}, + {nonempty_list, 1}, + {nonempty_improper_list, 2}, + {nonempty_maybe_improper_list, 0}, + {nonempty_maybe_improper_list, 2}, + {nonempty_string, 0}, + {number, 0}, + {pid, 0}, + {port, 0}, + {pos_integer, 0}, + {queue, 0}, + {range, 2}, + {reference, 0}, + {set, 0}, + {string, 0}, + {term, 0}, + {tid, 0}, + {timeout, 0}, + {var, 1}], + dict:from_list([{T, -1} || T <- DefTypes]). + +%% R12B-5 +is_newly_introduced_builtin_type({module, 0}) -> true; +is_newly_introduced_builtin_type({node, 0}) -> true; +is_newly_introduced_builtin_type({nonempty_string, 0}) -> true; +is_newly_introduced_builtin_type({term, 0}) -> true; +is_newly_introduced_builtin_type({timeout, 0}) -> true; +%% R13 +is_newly_introduced_builtin_type({arity, 0}) -> true; +is_newly_introduced_builtin_type({array, 0}) -> true; % opaque +is_newly_introduced_builtin_type({bitstring, 0}) -> true; +is_newly_introduced_builtin_type({dict, 0}) -> true; % opaque +is_newly_introduced_builtin_type({digraph, 0}) -> true; % opaque +is_newly_introduced_builtin_type({gb_set, 0}) -> true; % opaque +is_newly_introduced_builtin_type({gb_tree, 0}) -> true; % opaque +is_newly_introduced_builtin_type({iodata, 0}) -> true; +is_newly_introduced_builtin_type({queue, 0}) -> true; % opaque +is_newly_introduced_builtin_type({set, 0}) -> true; % opaque +is_newly_introduced_builtin_type({tid, 0}) -> true; % opaque +%% R13B01 +is_newly_introduced_builtin_type({boolean, 0}) -> true; +is_newly_introduced_builtin_type({Name, _}) when is_atom(Name) -> false. + +%% spec_decl(Line, Fun, Types, State) -> State. + +spec_decl(Line, MFA0, TypeSpecs, St0 = #lint{specs = Specs, module = Mod}) -> + MFA = case MFA0 of + {F, Arity} -> {Mod, F, Arity}; + {_M, _F, Arity} -> MFA0 + end, + St1 = St0#lint{specs = dict:store(MFA, Line, Specs)}, + case dict:is_key(MFA, Specs) of + true -> add_error(Line, {redefine_spec, MFA}, St1); + false -> check_specs(TypeSpecs, Arity, St1) + end. + +check_specs([FunType|Left], Arity, St0) -> + {FunType1, CTypes} = + case FunType of + {type, _, bounded_fun, [FT = {type, _, 'fun', _}, Cs]} -> + Types0 = [T || {type, _, constraint, [_, T]} <- Cs], + {FT, lists:append(Types0)}; + {type, _, 'fun', _} = FT -> {FT, []} + end, + SpecArity = + case FunType1 of + {type, L, 'fun', [any, _]} -> any; + {type, L, 'fun', [{type, _, product, D}, _]} -> length(D) + end, + St1 = case Arity =:= SpecArity of + true -> St0; + false -> add_error(L, spec_wrong_arity, St0) + end, + St2 = check_type({type, -1, product, [FunType1|CTypes]}, St1), + check_specs(Left, Arity, St2); +check_specs([], _Arity, St) -> + St. + +check_specs_without_function(St = #lint{module=Mod, defined=Funcs}) -> + Fun = fun({M, F, A} = MFA, Line, AccSt) when M =:= Mod -> + case gb_sets:is_element({F, A}, Funcs) of + true -> AccSt; + false -> add_error(Line, {spec_fun_undefined, MFA}, AccSt) + end; + ({_M, _F, _A}, _Line, AccSt) -> AccSt + end, + dict:fold(Fun, St, St#lint.specs). + +%% This generates warnings for functions without specs; if the user has +%% specified both options, we do not generate the same warnings twice. +check_functions_without_spec(Forms, St0) -> + case is_warn_enabled(missing_spec_all, St0) of + true -> + add_missing_spec_warnings(Forms, St0, all); + false -> + case is_warn_enabled(missing_spec, St0) of + true -> + add_missing_spec_warnings(Forms, St0, exported); + false -> + St0 + end + end. + +add_missing_spec_warnings(Forms, St0, Type) -> + Specs = [{F,A} || {_M,F,A} <- dict:fetch_keys(St0#lint.specs)], + Warns = %% functions + line numbers for which we should warn + case Type of + all -> + [{FA,L} || {function,L,F,A,_} <- Forms, + not lists:member(FA = {F,A}, Specs)]; + exported -> + Exps = gb_sets:to_list(St0#lint.exports) -- pseudolocals(), + [{FA,L} || {function,L,F,A,_} <- Forms, + member(FA = {F,A}, Exps -- Specs)] + end, + foldl(fun ({FA,L}, St) -> + add_warning(L, {missing_spec,FA}, St) + end, St0, Warns). + +check_unused_types(Forms, St = #lint{usage=Usage, types=Types}) -> + case [File || {attribute,_L,file,{File,_Line}} <- Forms] of + [FirstFile|_] -> + UsedTypes = Usage#usage.used_types, + FoldFun = + fun(_Type, -1, AccSt) -> + %% Default type + AccSt; + (Type, FileLine, AccSt) -> + case loc(FileLine) of + {FirstFile, _} -> + case sets:is_element(Type, UsedTypes) of + true -> AccSt; + false -> + add_warning(FileLine, + {unused_type, Type}, + AccSt) + end; + _ -> + %% Don't warn about unused types in include file + AccSt + end + end, + dict:fold(FoldFun, St, Types); + [] -> + St + end. + +%% icrt_clauses(Clauses, In, ImportVarTable, State) -> +%% {NewVts,State}. + +icrt_clauses(Cs, In, Vt, St0) -> + {Csvt,St1} = icrt_clauses(Cs, Vt, St0), + icrt_export(Csvt, Vt, In, St1). + +%% icrt_clauses(Clauses, ImportVarTable, State) -> +%% {NewVts,State}. + +icrt_clauses(Cs, Vt, St) -> + mapfoldl(fun (C, St0) -> icrt_clause(C, Vt, St0) end, St, Cs). + +icrt_clause({clause,_Line,H,G,B}, Vt0, St0) -> + {Hvt,Binvt,St1} = head(H, Vt0, St0), + Vt1 = vtupdate(Hvt, vtupdate(Binvt, Vt0)), + {Gvt,St2} = guard(G, Vt1, St1), + Vt2 = vtupdate(Gvt, Vt1), + {Bvt,St3} = exprs(B, Vt2, St2), + {vtupdate(Bvt, Vt2),St3}. + +%% The tests of 'cond' clauses are normal expressions - not guards. +%% Variables bound in a test is visible both in the corresponding body +%% and in the tests and bodies of subsequent clauses: a 'cond' is +%% *equivalent* to nested case-switches on boolean expressions. + +cond_clauses([C], In, Vt, St) -> + last_cond_clause(C, In, Vt, St); +cond_clauses([C | Cs], In, Vt, St) -> + cond_clause(C, Cs, In, Vt, St). + +%% see expr/3 for 'case' +cond_clause({clause,_L,[],[[E]],B}, Cs, In, Vt, St0) -> + {Evt,St1} = expr(E, Vt, St0), + {Cvt, St2} = cond_cases(B, Cs, In, vtupdate(Evt, Vt), St1), + Mvt = vtmerge(Evt, Cvt), + {Mvt,St2}. + +%% see icrt_clauses/4 +cond_cases(B, Cs, In, Vt, St0) -> + %% note that Vt is used for both cases + {Bvt,St1} = exprs(B, Vt, St0), % true case + Vt1 = vtupdate(Bvt, Vt), + {Cvt, St2} = cond_clauses(Cs, In, Vt, St1), % false case + Vt2 = vtupdate(Cvt, Vt), + %% and this also uses Vt + icrt_export([Vt1,Vt2], Vt, In, St2). + +%% last case must call icrt_export/4 with only one vartable +last_cond_clause({clause,_L,[],[[E]],B}, In, Vt, St0) -> + {Evt,St1} = expr(E, Vt, St0), + {Cvt, St2} = last_cond_case(B, In, vtupdate(Evt, Vt), St1), + Mvt = vtmerge(Evt, Cvt), + {Mvt,St2}. + +last_cond_case(B, In, Vt, St0) -> + {Bvt,St1} = exprs(B, Vt, St0), + Vt1 = vtupdate(Bvt, Vt), + icrt_export([Vt1], Vt, In, St1). + +icrt_export(Csvt, Vt, In, St) -> + Vt1 = vtmerge(Csvt), + All = ordsets:subtract(vintersection(Csvt), vtnames(Vt)), + Some = ordsets:subtract(vtnames(Vt1), vtnames(Vt)), + Xvt = vtexport(All, In, []), + Evt = vtunsafe(ordsets:subtract(Some, All), In, Xvt), + Unused = vtmerge([unused_vars(Vt0, Vt, St) || Vt0 <- Csvt]), + %% Exported and unsafe variables may be unused: + Uvt = vtmerge(Evt, Unused), + %% Make exported and unsafe unused variables unused in subsequent code: + Vt2 = vtmerge(Uvt, vtsubtract(Vt1, Uvt)), + {Vt2,St}. + +handle_comprehension(E, Qs, Vt0, St0) -> + {Vt1, Uvt, St1} = lc_quals(Qs, Vt0, St0), + {Evt,St2} = expr(E, Vt1, St1), + Vt2 = vtupdate(Evt, Vt1), + %% Shadowed global variables. + {_,St3} = check_old_unused_vars(Vt2, Uvt, St2), + %% There may be local variables in Uvt that are not global. + {_,St4} = check_unused_vars(Uvt, Vt0, St3), + %% Local variables that have not been shadowed. + {_,St} = check_unused_vars(Vt2, Vt0, St4), + Vt3 = vtmerge(vtsubtract(Vt2, Uvt), Uvt), + {Vt3,St}. + +%% lc_quals(Qualifiers, ImportVarTable, State) -> +%% {VarTable,ShadowedVarTable,State} +%% Test list comprehension qualifiers, return all variables. Allow +%% filters to be both guard tests and general expressions, but the errors +%% will be for expressions. Return the complete updated vartable including +%% local variables and all updates. ShadowVarTable contains the state of +%% each shadowed variable. All variable states of variables in ImportVarTable +%% that have been shadowed are included in ShadowVarTable. In addition, all +%% shadowed variables that are not included in ImportVarTable are included +%% in ShadowVarTable (these are local variables that are not global variables). + +lc_quals(Qs, Vt0, St0) -> + OldRecDef = St0#lint.recdef_top, + {Vt,Uvt,St} = lc_quals(Qs, Vt0, [], St0#lint{recdef_top = false}), + {Vt,Uvt,St#lint{recdef_top = OldRecDef}}. + +lc_quals([{generate,_Line,P,E} | Qs], Vt0, Uvt0, St0) -> + {Vt,Uvt,St} = handle_generator(P,E,Vt0,Uvt0,St0), + lc_quals(Qs, Vt, Uvt, St); +lc_quals([{b_generate,_Line,P,E} | Qs], Vt0, Uvt0, St0) -> + {Vt,Uvt,St} = handle_generator(P,E,Vt0,Uvt0,St0), + lc_quals(Qs, Vt, Uvt, St); +lc_quals([F|Qs], Vt, Uvt, St0) -> + {Fvt,St1} = case is_guard_test2(F, St0#lint.records) of + true -> guard_test(F, Vt, St0); + false -> expr(F, Vt, St0) + end, + lc_quals(Qs, vtupdate(Fvt, Vt), Uvt, St1); +lc_quals([], Vt, Uvt, St) -> + {Vt, Uvt, St}. + +handle_generator(P,E,Vt,Uvt,St0) -> + {Evt,St1} = expr(E, Vt, St0), + %% Forget variables local to E immediately. + Vt1 = vtupdate(vtold(Evt, Vt), Vt), + {_, St2} = check_unused_vars(Evt, Vt, St1), + {Pvt,Binvt,St3} = pattern(P, Vt1, [], [], St2), + %% Have to keep fresh variables separated from used variables somehow + %% in order to handle for example X = foo(), [X || <> <- bar()]. + %% 1 2 2 1 + Vt2 = vtupdate(Pvt, Vt1), + St4 = shadow_vars(Binvt, Vt1, generate, St3), + Svt = vtold(Vt2, Binvt), + {_, St5} = check_old_unused_vars(Svt, Uvt, St4), + NUvt = vtupdate(vtnew(Svt, Uvt), Uvt), + Vt3 = vtupdate(vtsubtract(Vt2, Binvt), Binvt), + {Vt3,NUvt,St5}. + +%% fun_clauses(Clauses, ImportVarTable, State) -> +%% {UsedVars, State}. +%% Fun's cannot export any variables. + +%% It is an error if variable is bound inside a record definition +%% unless it was introduced in a fun or an lc. Only if pat_var finds +%% such variables can the correct line number be given. + +fun_clauses(Cs, Vt, St) -> + OldRecDef = St#lint.recdef_top, + {Bvt,St2} = foldl(fun (C, {Bvt0, St0}) -> + {Cvt,St1} = fun_clause(C, Vt, St0), + {vtmerge(Cvt, Bvt0),St1} + end, {[],St#lint{recdef_top = false}}, Cs), + {Bvt,St2#lint{recdef_top = OldRecDef}}. + +fun_clause({clause,_Line,H,G,B}, Vt0, St0) -> + {Hvt,Binvt,St1} = head(H, Vt0, [], St0), % No imported pattern variables + Vt1 = vtupdate(Hvt, Vt0), + St2 = shadow_vars(Binvt, Vt0, 'fun', St1), + Vt2 = vtupdate(vtsubtract(Vt1, Binvt), Binvt), + {Gvt,St3} = guard(G, Vt2, St2), + Vt3 = vtupdate(Gvt, Vt2), + {Bvt,St4} = exprs(B, Vt3, St3), + Cvt = vtupdate(Bvt, Vt3), + %% Check new local variables. + {_, St5} = check_unused_vars(Cvt, Vt0, St4), + %% Check all shadowing variables. + Svt = vtold(Vt1, Binvt), + {_, St6} = check_old_unused_vars(Cvt, Svt, St5), + Vt4 = vtmerge(Svt, vtsubtract(Cvt, Svt)), + {vtold(Vt4, Vt0),St6}. + +%% In the variable table we store information about variables. The +%% information is a tuple {State,Usage,Lines}, the variables state and +%% usage. A variable can be in the following states: +%% +%% bound everything is normal +%% {export,From} variable has been exported +%% {unsafe,In} variable is unsafe +%% +%% The usage information has the following form: +%% +%% used variable has been used +%% unused variable has been bound but not used +%% +%% Lines is a list of line numbers where the variable was bound. +%% +%% Report variable errors/warnings as soon as possible and then change +%% the state to ok. This simplifies the code and reports errors only +%% once. Having the usage information like this makes it easy too when +%% merging states. + +%% For keeping track of which variables are bound, ordsets are used. +%% In order to be able to give warnings about unused variables, a +%% possible value is {bound, unused, [Line]}. The usual value when a +%% variable is used is {bound, used, [Line]}. An exception occurs for +%% variables in the size position in a bin element in a pattern. +%% Currently, such a variable is never matched out, always used, and +%% therefore it makes no sense to warn for "variable imported in +%% match". + +%% For storing the variable table we use the orddict module. +%% We know an empty set is []. + +%% pat_var(Variable, LineNo, VarTable, State) -> {UpdVarTable,State} +%% A pattern variable has been found. Handle errors and warnings. Return +%% all variables as bound so errors and warnings are only reported once. +%% Bvt "shadows" Vt here, which is necessary in order to separate uses of +%% shadowed and shadowing variables. See also pat_binsize_var. + +pat_var(V, Line, Vt, Bvt, St) -> + case orddict:find(V, Bvt) of + {ok, {bound,_Usage,Ls}} -> + {[],[{V,{bound,used,Ls}}],St}; + error -> + case orddict:find(V, Vt) of + {ok,{bound,_Usage,Ls}} -> + {[{V,{bound,used,Ls}}],[],St}; + {ok,{{unsafe,In},_Usage,Ls}} -> + {[{V,{bound,used,Ls}}],[], + add_error(Line, {unsafe_var,V,In}, St)}; + {ok,{{export,From},_Usage,Ls}} -> + {[{V,{bound,used,Ls}}],[], + %% As this is matching, exported vars are risky. + add_warning(Line, {exported_var,V,From}, St)}; + error when St#lint.recdef_top -> + {[],[{V,{bound,unused,[Line]}}], + add_error(Line, {variable_in_record_def,V}, St)}; + error -> {[],[{V,{bound,unused,[Line]}}],St} + end + end. + +%% pat_binsize_var(Variable, LineNo, VarTable, BinVarTable, State) -> +%% {UpdVarTable,UpdBinVarTable,State'} +%% A pattern variable has been found. Handle errors and warnings. Return +%% all variables as bound so errors and warnings are only reported once. + +pat_binsize_var(V, Line, Vt, Bvt, St) -> + case orddict:find(V, Bvt) of + {ok,{bound,_Used,Ls}} -> + {[],[{V,{bound,used,Ls}}],St}; + error -> + case orddict:find(V, Vt) of + {ok,{bound,_Used,Ls}} -> + {[{V,{bound,used,Ls}}],[],St}; + {ok,{{unsafe,In},_Used,Ls}} -> + {[{V,{bound,used,Ls}}],[], + add_error(Line, {unsafe_var,V,In}, St)}; + {ok,{{export,From},_Used,Ls}} -> + {[{V,{bound,used,Ls}}],[], + %% As this is not matching, exported vars are + %% probably safe. + exported_var(Line, V, From, St)}; + error -> + {[{V,{bound,used,[Line]}}],[], + add_error(Line, {unbound_var,V}, St)} + end + end. + +%% expr_var(Variable, LineNo, VarTable, State) -> +%% {UpdVarTable,State} +%% Check if a variable is defined, or if there is an error or warning +%% connected to its usage. Return all variables as bound so errors +%% and warnings are only reported once. As this is not matching +%% exported vars are probably safe, warn only if warn_export_vars is +%% set. + +expr_var(V, Line, Vt, St0) -> + case orddict:find(V, Vt) of + {ok,{bound,_Usage,Ls}} -> + {[{V,{bound,used,Ls}}],St0}; + {ok,{{unsafe,In},_Usage,Ls}} -> + {[{V,{bound,used,Ls}}], + add_error(Line, {unsafe_var,V,In}, St0)}; + {ok,{{export,From},_Usage,Ls}} -> + {[{V,{bound,used,Ls}}], + exported_var(Line, V, From, St0)}; + error -> + {[{V,{bound,used,[Line]}}], + add_error(Line, {unbound_var,V}, St0)} + end. + +exported_var(Line, V, From, St) -> + case is_warn_enabled(export_vars, St) of + true -> add_warning(Line, {exported_var,V,From}, St); + false -> St + end. + +shadow_vars(Vt, Vt0, In, St0) -> + case is_warn_enabled(shadow_vars, St0) of + true -> + foldl(fun ({V,{_,_,[L | _]}}, St) -> + add_warning(L, {shadowed_var,V,In}, St); + (_, St) -> St + end, St0, vtold(Vt, vt_no_unsafe(Vt0))); + false -> St0 + end. + +check_unused_vars(Vt, Vt0, St0) -> + U = unused_vars(Vt, Vt0, St0), + warn_unused_vars(U, Vt, St0). + +check_old_unused_vars(Vt, Vt0, St0) -> + U = unused_vars(vtold(Vt, Vt0), [], St0), + warn_unused_vars(U, Vt, St0). + +unused_vars(Vt, Vt0, _St0) -> + U0 = orddict:filter(fun (V, {_State,unused,_Ls}) -> + case atom_to_list(V) of + [$_|_] -> false; + _ -> true + end; + (_V, _How) -> false + end, Vt), + vtnew(U0, Vt0). % Only new variables. + +warn_unused_vars([], Vt, St0) -> + {Vt,St0}; +warn_unused_vars(U, Vt, St0) -> + St1 = case is_warn_enabled(unused_vars, St0) of + false -> St0; + true -> + foldl(fun ({V,{_,unused,Ls}}, St) -> + foldl(fun (L, St2) -> + add_warning(L, {unused_var,V}, + St2) + end, St, Ls) + end, St0, U) + end, + %% Return all variables as bound so warnings are only reported once. + UVt = map(fun ({V,{State,_,Ls}}) -> {V,{State,used,Ls}} end, U), + {vtmerge(Vt, UVt), St1}. + +%% vtupdate(UpdVarTable, VarTable) -> VarTable. +%% Add the variables in the updated vartable to VarTable. The variables +%% will be updated with their property in UpdVarTable. The state of +%% the variables in UpdVarTable will be returned. + +vtupdate(Uvt, Vt0) -> + orddict:merge(fun (_V, {S,U1,L1}, {_S,U2,L2}) -> + {S, merge_used(U1, U2), merge_lines(L1, L2)} + end, Uvt, Vt0). + +%% vtexport([Variable], From, VarTable) -> VarTable. +%% vtunsafe([Variable], From, VarTable) -> VarTable. +%% Add the variables to VarTable either as exported from From or as unsafe. + +vtexport(Vs, {InTag,FileLine}, Vt0) -> + {_File,Line} = loc(FileLine), + vtupdate([{V,{{export,{InTag,Line}},unused,[]}} || V <- Vs], Vt0). + +vtunsafe(Vs, {InTag,FileLine}, Vt0) -> + {_File,Line} = loc(FileLine), + vtupdate([{V,{{unsafe,{InTag,Line}},unused,[]}} || V <- Vs], Vt0). + +%% vtmerge(VarTable, VarTable) -> VarTable. +%% Merge two variables tables generating a new vartable. Give priority to +%% errors then warnings. + +vtmerge(Vt1, Vt2) -> + orddict:merge(fun (_V, {S1,U1,L1}, {S2,U2,L2}) -> + {merge_state(S1, S2), + merge_used(U1, U2), + merge_lines(L1, L2)} + end, Vt1, Vt2). + +vtmerge(Vts) -> foldl(fun (Vt, Mvts) -> vtmerge(Vt, Mvts) end, [], Vts). + +vtmerge_pat(Vt1, Vt2) -> + orddict:merge(fun (_V, {S1,_Usage1,L1}, {S2,_Usage2,L2}) -> + {merge_state(S1, S2),used, merge_lines(L1, L2)} + end, Vt1, Vt2). + +merge_lines(Ls1, Ls2) -> + ordsets:union(Ls1,Ls2). + +merge_state({unsafe,_F1}=S1, _S2) -> S1; %Take the error case +merge_state(_S1, {unsafe,_F2}=S2) -> S2; +merge_state(bound, S2) -> S2; %Take the warning +merge_state(S1, bound) -> S1; +merge_state({export,F1},{export,_F2}) -> %Sanity check + %% We want to report the outermost construct + {export,F1}. + +merge_used(used, _Usage2) -> used; +merge_used(_Usage1, used) -> used; +merge_used(unused, unused) -> unused. + +%% vtnew(NewVarTable, OldVarTable) -> NewVarTable. +%% Return all the truly new variables in NewVarTable. + +vtnew(New, Old) -> + orddict:filter(fun (V, _How) -> not orddict:is_key(V, Old) end, New). + +%% vtsubtract(VarTable1, VarTable2) -> NewVarTable. +%% Return all the variables in VarTable1 which don't occur in VarTable2. +%% Same thing as vtnew, but a more intuitive name for some uses. +vtsubtract(New, Old) -> + vtnew(New, Old). + +%% vtold(NewVarTable, OldVarTable) -> OldVarTable. +%% Return all the truly old variables in NewVarTable. + +vtold(New, Old) -> + orddict:filter(fun (V, _How) -> orddict:is_key(V, Old) end, New). + +vtnames(Vt) -> [ V || {V,_How} <- Vt ]. + +vt_no_unsafe(Vt) -> [V || {_,{S,_U,_L}}=V <- Vt, + case S of + {unsafe,_} -> false; + _ -> true + end]. + +%% vunion(VarTable1, VarTable2) -> [VarName]. +%% vunion([VarTable]) -> [VarName]. +%% vintersection(VarTable1, VarTable2) -> [VarName]. +%% vintersection([VarTable]) -> [VarName]. +%% Union/intersection of names of vars in VarTable. + +-ifdef(NOTUSED). +vunion(Vs1, Vs2) -> ordsets:union(vtnames(Vs1), vtnames(Vs2)). + +vunion(Vss) -> foldl(fun (Vs, Uvs) -> + ordsets:union(vtnames(Vs), Uvs) + end, [], Vss). + +vintersection(Vs1, Vs2) -> ordsets:intersection(vtnames(Vs1), vtnames(Vs2)). +-endif. + +vintersection([Vs]) -> + vtnames(Vs); %Boundary conditions!!! +vintersection([Vs|Vss]) -> + ordsets:intersection(vtnames(Vs), vintersection(Vss)); +vintersection([]) -> + []. + +%% copy_expr(Expr, Line) -> Expr. +%% Make a copy of Expr converting all line numbers to Line. + +copy_expr(Expr, Line) -> + modify_line(Expr, fun(_L) -> Line end). + +%% modify_line(Form, Fun) -> Form +%% modify_line(Expression, Fun) -> Expression +%% Applies Fun to each line number occurrence. + +modify_line(T, F0) -> + modify_line1(T, F0). + +%% Forms. +modify_line1({function,F,A}, _Mf) -> {function,F,A}; +modify_line1({function,M,F,A}, _Mf) -> {function,M,F,A}; +modify_line1({attribute,L,record,{Name,Fields}}, Mf) -> + {attribute,Mf(L),record,{Name,modify_line1(Fields, Mf)}}; +modify_line1({attribute,L,spec,{Fun,Types}}, Mf) -> + {attribute,Mf(L),spec,{Fun,modify_line1(Types, Mf)}}; +modify_line1({attribute,L,type,{TypeName,TypeDef,Args}}, Mf) -> + {attribute,Mf(L),type,{TypeName,modify_line1(TypeDef, Mf), + modify_line1(Args, Mf)}}; +modify_line1({attribute,L,opaque,{TypeName,TypeDef,Args}}, Mf) -> + {attribute,Mf(L),opaque,{TypeName,modify_line1(TypeDef, Mf), + modify_line1(Args, Mf)}}; +modify_line1({attribute,L,Attr,Val}, Mf) -> {attribute,Mf(L),Attr,Val}; +modify_line1({warning,W}, _Mf) -> {warning,W}; +modify_line1({error,W}, _Mf) -> {error,W}; +%% Expressions. +modify_line1({clauses,Cs}, Mf) -> {clauses,modify_line1(Cs, Mf)}; +modify_line1({typed_record_field,Field,Type}, Mf) -> + {typed_record_field,modify_line1(Field, Mf),modify_line1(Type, Mf)}; +modify_line1({Tag,L}, Mf) -> {Tag,Mf(L)}; +modify_line1({Tag,L,E1}, Mf) -> + {Tag,Mf(L),modify_line1(E1, Mf)}; +modify_line1({Tag,L,E1,E2}, Mf) -> + {Tag,Mf(L),modify_line1(E1, Mf),modify_line1(E2, Mf)}; +modify_line1({bin_element,L,E1,E2,TSL}, Mf) -> + {bin_element,Mf(L),modify_line1(E1, Mf),modify_line1(E2, Mf), TSL}; +modify_line1({Tag,L,E1,E2,E3}, Mf) -> + {Tag,Mf(L),modify_line1(E1, Mf),modify_line1(E2, Mf),modify_line1(E3, Mf)}; +modify_line1({Tag,L,E1,E2,E3,E4}, Mf) -> + {Tag,Mf(L), + modify_line1(E1, Mf), + modify_line1(E2, Mf), + modify_line1(E3, Mf), + modify_line1(E4, Mf)}; +modify_line1([H|T], Mf) -> + [modify_line1(H, Mf)|modify_line1(T, Mf)]; +modify_line1([], _Mf) -> []; +modify_line1(E, _Mf) when not is_tuple(E), not is_list(E) -> E. + +%% Check a record_info call. We have already checked that it is not +%% shadowed by an import. + +check_record_info_call(_Line,La,[{atom,Li,Info},{atom,_Ln,Name}],St) -> + case member(Info, [fields,size]) of + true -> exist_record(La, Name, St); + false -> add_error(Li, illegal_record_info, St) + end; +check_record_info_call(Line,_La,_As,St) -> + add_error(Line, illegal_record_info, St). + +has_wildcard_field([{record_field,_Lf,{var,_La,'_'},_Val}|_Fs]) -> true; +has_wildcard_field([_|Fs]) -> has_wildcard_field(Fs); +has_wildcard_field([]) -> false. + +%% check_remote_function(Line, ModuleName, FuncName, [Arg], State) -> State. +%% Perform checks on known remote calls. + +check_remote_function(Line, M, F, As, St0) -> + St1 = deprecated_function(Line, M, F, As, St0), + St2 = check_qlc_hrl(Line, M, F, As, St1), + format_function(Line, M, F, As, St2). + +%% check_qlc_hrl(Line, ModName, FuncName, [Arg], State) -> State +%% Add warning if qlc:q/1,2 has been called but qlc.hrl has not +%% been included. + +check_qlc_hrl(Line, M, F, As, St) -> + Arity = length(As), + case As of + [{lc,_L,_E,_Qs}|_] when M =:= qlc, F =:= q, + Arity < 3, not St#lint.xqlc -> + add_warning(Line, {missing_qlc_hrl, Arity}, St); + _ -> + St + end. + +%% deprecated_function(Line, ModName, FuncName, [Arg], State) -> State. +%% Add warning for calls to deprecated functions. + +deprecated_function(Line, M, F, As, St) -> + Arity = length(As), + MFA = {M, F, Arity}, + case otp_internal:obsolete(M, F, Arity) of + {deprecated, String} when is_list(String) -> + case not is_warn_enabled(deprecated_function, St) orelse + ordsets:is_element(MFA, St#lint.not_deprecated) of + true -> + St; + false -> + add_warning(Line, {deprecated, MFA, String}, St) + end; + {deprecated, Replacement, Rel} -> + case not is_warn_enabled(deprecated_function, St) orelse + ordsets:is_element(MFA, St#lint.not_deprecated) of + true -> + St; + false -> + add_warning(Line, {deprecated, MFA, Replacement, Rel}, St) + end; + {removed, String} when is_list(String) -> + add_warning(Line, {removed, MFA, String}, St); + {removed, Replacement, Rel} -> + add_warning(Line, {removed, MFA, Replacement, Rel}, St); + no -> + St + end. + +obsolete_guard({call,Line,{atom,Lr,F},As}, St0) -> + Arity = length(As), + case erl_internal:old_type_test(F, Arity) of + false -> + deprecated_function(Line, erlang, F, As, St0); + true -> + St1 = case F of + constant -> + deprecated_function(Lr, erlang, is_constant, As, St0); + _ -> + St0 + end, + case is_warn_enabled(obsolete_guard, St1) of + true -> + add_warning(Lr,{obsolete_guard, {F, Arity}}, St1); + false -> + St1 + end + end; +obsolete_guard(_G, St) -> + St. + +%% keyword_warning(Line, Atom, State) -> State. +%% Add warning for atoms that will be reserved keywords in the future. +%% (Currently, no such keywords to warn for.) +keyword_warning(_Line, _A, St) -> St. + +%% format_function(Line, ModName, FuncName, [Arg], State) -> State. +%% Add warning for bad calls to io:fwrite/format functions. + +format_function(Line, M, F, As, St) -> + case is_format_function(M, F) of + true -> + case St#lint.warn_format of + Lev when Lev > 0 -> + case check_format_1(As) of + {warn,Level,Fmt,Fas} when Level =< Lev -> + add_warning(Line, {format_error,{Fmt,Fas}}, St); + _ -> St + end; + _Lev -> St + end; + false -> St + end. + +is_format_function(io, fwrite) -> true; +is_format_function(io, format) -> true; +is_format_function(io_lib, fwrite) -> true; +is_format_function(io_lib, format) -> true; +is_format_function(M, F) when is_atom(M), is_atom(F) -> false. + +%% check_format_1([Arg]) -> ok | {warn,Level,Format,[Arg]}. + +check_format_1([Fmt]) -> + check_format_1([Fmt,{nil,0}]); +check_format_1([Fmt,As]) -> + check_format_2(Fmt, canonicalize_string(As)); +check_format_1([_Dev,Fmt,As]) -> + check_format_1([Fmt,As]); +check_format_1(_As) -> + {warn,1,"format call with wrong number of arguments",[]}. + +canonicalize_string({string,Line,Cs}) -> + foldr(fun (C, T) -> {cons,Line,{integer,Line,C},T} end, {nil,Line}, Cs); +canonicalize_string(Term) -> + Term. + +%% check_format_2([Arg]) -> ok | {warn,Level,Format,[Arg]}. + +check_format_2(Fmt, As) -> + case Fmt of + {string,_L,S} -> check_format_2a(S, As); + {atom,_L,A} -> check_format_2a(atom_to_list(A), As); + _ -> {warn,2,"format string not a textual constant",[]} + end. + +check_format_2a(Fmt, As) -> + case args_list(As) of + true -> check_format_3(Fmt, As); + false -> {warn,1,"format arguments not a list",[]}; + maybe -> {warn,2,"format arguments perhaps not a list",[]} + end. + +%% check_format_3(FormatString, [Arg]) -> ok | {warn,Level,Format,[Arg]}. + +check_format_3(Fmt, As) -> + case check_format_string(Fmt) of + {ok,Need} -> + case args_length(As) of + Len when length(Need) =:= Len -> ok; + _Len -> {warn,1,"wrong number of arguments in format call",[]} + end; + {error,S} -> + {warn,1,"format string invalid (~s)",[S]} + end. + +args_list({cons,_L,_H,T}) -> args_list(T); +%% Strange case: user has written something like [a | "bcd"]; pretend +%% we don't know: +args_list({string,_L,_Cs}) -> maybe; +args_list({nil,_L}) -> true; +args_list({atom,_,_}) -> false; +args_list({integer,_,_}) -> false; +args_list({float,_,_}) -> false; +args_list(_Other) -> maybe. + +args_length({cons,_L,_H,T}) -> 1 + args_length(T); +args_length({nil,_L}) -> 0. + +check_format_string(Fmt) -> + extract_sequences(Fmt, []). + +extract_sequences(Fmt, Need0) -> + case string:chr(Fmt, $~) of + 0 -> {ok,lists:reverse(Need0)}; %That's it + Pos -> + Fmt1 = string:substr(Fmt, Pos+1), %Skip ~ + case extract_sequence(1, Fmt1, Need0) of + {ok,Need1,Rest} -> extract_sequences(Rest, Need1); + Error -> Error + end + end. + +extract_sequence(1, [$-,C|Fmt], Need) when C >= $0, C =< $9 -> + extract_sequence_digits(1, Fmt, Need); +extract_sequence(1, [C|Fmt], Need) when C >= $0, C =< $9 -> + extract_sequence_digits(1, Fmt, Need); +extract_sequence(1, [$-,$*|Fmt], Need) -> + extract_sequence(2, Fmt, [int|Need]); +extract_sequence(1, [$*|Fmt], Need) -> + extract_sequence(2, Fmt, [int|Need]); +extract_sequence(1, Fmt, Need) -> + extract_sequence(2, Fmt, Need); +extract_sequence(2, [$.,C|Fmt], Need) when C >= $0, C =< $9 -> + extract_sequence_digits(2, Fmt, Need); +extract_sequence(2, [$.,$*|Fmt], Need) -> + extract_sequence(3, Fmt, [int|Need]); +extract_sequence(2, [$.|Fmt], Need) -> + extract_sequence(3, Fmt, Need); +extract_sequence(2, Fmt, Need) -> + extract_sequence(4, Fmt, Need); +extract_sequence(3, [$.,$*|Fmt], Need) -> + extract_sequence(4, Fmt, [int|Need]); +extract_sequence(3, [$.,_|Fmt], Need) -> + extract_sequence(4, Fmt, Need); +extract_sequence(3, Fmt, Need) -> + extract_sequence(4, Fmt, Need); +extract_sequence(4, [$t, $c | Fmt], Need) -> + extract_sequence(5, [$c|Fmt], Need); +extract_sequence(4, [$t, $s | Fmt], Need) -> + extract_sequence(5, [$s|Fmt], Need); +extract_sequence(4, [$t, C | _Fmt], _Need) -> + {error,"invalid control ~t" ++ [C]}; +extract_sequence(4, Fmt, Need) -> + extract_sequence(5, Fmt, Need); +extract_sequence(5, [C|Fmt], Need0) -> + case control_type(C, Need0) of + error -> {error,"invalid control ~" ++ [C]}; + Need1 -> {ok,Need1,Fmt} + end; +extract_sequence(_, [], _Need) -> {error,"truncated"}. + +extract_sequence_digits(Fld, [C|Fmt], Need) when C >= $0, C =< $9 -> + extract_sequence_digits(Fld, Fmt, Need); +extract_sequence_digits(Fld, Fmt, Need) -> + extract_sequence(Fld+1, Fmt, Need). + +control_type($~, Need) -> Need; +control_type($c, Need) -> [int|Need]; +control_type($f, Need) -> [float|Need]; +control_type($e, Need) -> [float|Need]; +control_type($g, Need) -> [float|Need]; +control_type($s, Need) -> [string|Need]; +control_type($w, Need) -> [term|Need]; +control_type($p, Need) -> [term|Need]; +control_type($W, Need) -> [int,term|Need]; %% Note: reversed +control_type($P, Need) -> [int,term|Need]; %% Note: reversed +control_type($b, Need) -> [term|Need]; +control_type($B, Need) -> [term|Need]; +control_type($x, Need) -> [string,term|Need]; %% Note: reversed +control_type($X, Need) -> [string,term|Need]; %% Note: reversed +control_type($+, Need) -> [term|Need]; +control_type($#, Need) -> [term|Need]; +control_type($n, Need) -> Need; +control_type($i, Need) -> [term|Need]; +control_type(_C, _Need) -> error. + +%% In syntax trees, module/package names are atoms or lists of atoms. + +package_to_string(A) when is_atom(A) -> atom_to_list(A); +package_to_string(L) when is_list(L) -> packages:concat(L). + +expand_package({atom,L,A} = M, St0) -> + St1 = keyword_warning(L, A, St0), + case dict:find(A, St1#lint.mod_imports) of + {ok, A1} -> + {{atom,L,A1}, St1}; + error -> + Name = atom_to_list(A), + case packages:is_valid(Name) of + true -> + case packages:is_segmented(Name) of + true -> + {M, St1}; + false -> + M1 = packages:concat(St1#lint.package, + Name), + {{atom,L,list_to_atom(M1)}, St1} + end; + false -> + St2 = add_error(L, {bad_module_name, Name}, St1), + {error, St2} + end + end; +expand_package(M, St0) -> + L = element(2, M), + case erl_parse:package_segments(M) of + error -> + {error, St0}; + M1 -> + Name = package_to_string(M1), + case packages:is_valid(Name) of + true -> + {{atom,L,list_to_atom(Name)}, St0}; + false -> + St1 = add_error(L, {bad_module_name, Name}, St0), + {error, St1} + end + end. diff --git a/lib/stdlib/src/erl_parse.yrl b/lib/stdlib/src/erl_parse.yrl new file mode 100644 index 0000000000..fd5d905797 --- /dev/null +++ b/lib/stdlib/src/erl_parse.yrl @@ -0,0 +1,1028 @@ +%% -*- erlang -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% Definition of the Erlang grammar. + +Nonterminals +form +attribute attr_val +function function_clauses function_clause +clause_args clause_guard clause_body +expr expr_100 expr_150 expr_160 expr_200 expr_300 expr_400 expr_500 +expr_600 expr_700 expr_800 expr_900 +expr_max +list tail +list_comprehension lc_expr lc_exprs +binary_comprehension +tuple +atom1 +%struct +record_expr record_tuple record_field record_fields +if_expr if_clause if_clauses case_expr cr_clause cr_clauses receive_expr +fun_expr fun_clause fun_clauses +%% cond_expr cond_clause cond_clauses +try_expr try_catch try_clause try_clauses query_expr +function_call argument_list +exprs guard +atomic strings +prefix_op mult_op add_op list_op comp_op +rule rule_clauses rule_clause rule_body +binary bin_elements bin_element bit_expr +opt_bit_size_expr bit_size_expr opt_bit_type_list bit_type_list bit_type +top_type top_type_100 top_types type typed_expr typed_attr_val +type_sig type_sigs type_guard type_guards fun_type fun_type_100 binary_type +type_spec spec_fun typed_exprs typed_record_fields field_types field_type +bin_base_type bin_unit_type int_type. + +Terminals +char integer float atom string var + +'(' ')' ',' '->' ':-' '{' '}' '[' ']' '|' '||' '<-' ';' ':' '#' '.' +'after' 'begin' 'case' 'try' 'catch' 'end' 'fun' 'if' 'of' 'receive' 'when' +'andalso' 'orelse' 'query' 'spec' +%% 'cond' +'bnot' 'not' +'*' '/' 'div' 'rem' 'band' 'and' +'+' '-' 'bor' 'bxor' 'bsl' 'bsr' 'or' 'xor' +'++' '--' +'==' '/=' '=<' '<' '>=' '>' '=:=' '=/=' '<=' +'<<' '>>' +'!' '=' '::' +dot. + +Expect 2. + +Rootsymbol form. + +form -> attribute dot : '$1'. +form -> function dot : '$1'. +form -> rule dot : '$1'. + +attribute -> '-' atom attr_val : build_attribute('$2', '$3'). +attribute -> '-' atom typed_attr_val : build_typed_attribute('$2','$3'). +attribute -> '-' atom '(' typed_attr_val ')' : build_typed_attribute('$2','$4'). +attribute -> '-' 'spec' type_spec : build_type_spec('$2', '$3'). + +atom1 -> 'spec' : {atom, ?line('$1'), 'spec'}. +atom1 -> atom : '$1'. + +type_spec -> spec_fun type_sigs : {'$1', '$2'}. +type_spec -> '(' spec_fun type_sigs ')' : {'$2', '$3'}. + +spec_fun -> atom1 : '$1'. +spec_fun -> atom1 ':' atom1 : {'$1', '$3'}. +%% The following two are retained only for backwards compatibility; +%% they are not part of the EEP syntax and should be removed. +spec_fun -> atom1 '/' integer '::' : {'$1', '$3'}. +spec_fun -> atom1 ':' atom1 '/' integer '::' : {'$1', '$3', '$5'}. + +typed_attr_val -> expr ',' typed_record_fields : {typed_record, '$1', '$3'}. +typed_attr_val -> expr '::' top_type : {type_def, '$1', '$3'}. + +typed_record_fields -> '{' typed_exprs '}' : {tuple, ?line('$1'), '$2'}. + +typed_exprs -> typed_expr : ['$1']. +typed_exprs -> typed_expr ',' typed_exprs : ['$1'|'$3']. +typed_exprs -> expr ',' typed_exprs : ['$1'|'$3']. +typed_exprs -> typed_expr ',' exprs : ['$1'|'$3']. + +typed_expr -> expr '::' top_type : {typed,'$1','$3'}. + +type_sigs -> type_sig : ['$1']. +type_sigs -> type_sig ';' type_sigs : ['$1'|'$3']. + +type_sig -> fun_type : '$1'. +type_sig -> fun_type 'when' type_guards : {type, ?line('$1'), bounded_fun, + ['$1','$3']}. + +type_guards -> type_guard : ['$1']. +type_guards -> type_guard ',' type_guards : ['$1'|'$3']. + +type_guard -> atom1 '(' top_types ')' : {type, ?line('$1'), constraint, + ['$1', '$3']}. + +top_types -> top_type : ['$1']. +top_types -> top_type ',' top_types : ['$1'|'$3']. + +top_type -> var '::' top_type_100 : {ann_type, ?line('$1'), ['$1','$3']}. +top_type -> top_type_100 : '$1'. + +top_type_100 -> type : '$1'. +top_type_100 -> type '|' top_type_100 : lift_unions('$1','$3'). + +type -> '(' top_type ')' : {paren_type, ?line('$2'), ['$2']}. +type -> var : '$1'. +type -> atom1 : '$1'. +type -> atom1 '(' ')' : build_gen_type('$1'). +type -> atom1 '(' top_types ')' : {type, ?line('$1'), + normalise('$1'), '$3'}. +type -> atom1 ':' atom1 '(' ')' : {remote_type, ?line('$1'), + ['$1', '$3', []]}. +type -> atom1 ':' atom1 '(' top_types ')' : {remote_type, ?line('$1'), + ['$1', '$3', '$5']}. +type -> '[' ']' : {type, ?line('$1'), nil, []}. +type -> '[' top_type ']' : {type, ?line('$1'), list, ['$2']}. +type -> '[' top_type ',' '.' '.' '.' ']' : {type, ?line('$1'), + nonempty_list, ['$2']}. +type -> '{' '}' : {type, ?line('$1'), tuple, []}. +type -> '{' top_types '}' : {type, ?line('$1'), tuple, '$2'}. +type -> '#' atom1 '{' '}' : {type, ?line('$1'), record, ['$2']}. +type -> '#' atom1 '{' field_types '}' : {type, ?line('$1'), + record, ['$2'|'$4']}. +type -> binary_type : '$1'. +type -> int_type : '$1'. +type -> int_type '.' '.' int_type : {type, ?line('$1'), range, + ['$1', '$4']}. +type -> 'fun' '(' ')' : {type, ?line('$1'), 'fun', []}. +type -> 'fun' '(' fun_type_100 ')' : '$3'. + +int_type -> integer : '$1'. +int_type -> '-' integer : abstract(-normalise('$2'), + ?line('$2')). + +fun_type_100 -> '(' '.' '.' '.' ')' '->' top_type + : {type, ?line('$1'), 'fun', + [{type, ?line('$1'), any}, '$7']}. +fun_type_100 -> fun_type : '$1'. + +fun_type -> '(' ')' '->' top_type : {type, ?line('$1'), 'fun', + [{type, ?line('$1'), product, []}, '$4']}. +fun_type -> '(' top_types ')' '->' top_type + : {type, ?line('$1'), 'fun', + [{type, ?line('$1'), product, '$2'},'$5']}. + +field_types -> field_type : ['$1']. +field_types -> field_type ',' field_types : ['$1'|'$3']. + +field_type -> atom1 '::' top_type : {type, ?line('$1'), field_type, + ['$1', '$3']}. + +binary_type -> '<<' '>>' : {type, ?line('$1'),binary, + [abstract(0, ?line('$1')), + abstract(0, ?line('$1'))]}. +binary_type -> '<<' bin_base_type '>>' : {type, ?line('$1'),binary, + ['$2', abstract(0, ?line('$1'))]}. +binary_type -> '<<' bin_unit_type '>>' : {type, ?line('$1'),binary, + [abstract(0, ?line('$1')), '$2']}. +binary_type -> '<<' bin_base_type ',' bin_unit_type '>>' + : {type, ?line('$1'), binary, ['$2', '$4']}. + +bin_base_type -> var ':' integer : build_bin_type(['$1'], '$3'). + +bin_unit_type -> var ':' var '*' integer : build_bin_type(['$1', '$3'], '$5'). + +attr_val -> expr : ['$1']. +attr_val -> expr ',' exprs : ['$1' | '$3']. +attr_val -> '(' expr ',' exprs ')' : ['$2' | '$4']. + +function -> function_clauses : build_function('$1'). + +function_clauses -> function_clause : ['$1']. +function_clauses -> function_clause ';' function_clauses : ['$1'|'$3']. + +function_clause -> atom1 clause_args clause_guard clause_body : + {clause,?line('$1'),element(3, '$1'),'$2','$3','$4'}. + + +clause_args -> argument_list : element(1, '$1'). + +clause_guard -> 'when' guard : '$2'. +clause_guard -> '$empty' : []. + +clause_body -> '->' exprs: '$2'. + + +expr -> 'catch' expr : {'catch',?line('$1'),'$2'}. +expr -> expr_100 : '$1'. + +expr_100 -> expr_150 '=' expr_100 : {match,?line('$2'),'$1','$3'}. +expr_100 -> expr_150 '!' expr_100 : ?mkop2('$1', '$2', '$3'). +expr_100 -> expr_150 : '$1'. + +expr_150 -> expr_160 'orelse' expr_150 : ?mkop2('$1', '$2', '$3'). +expr_150 -> expr_160 : '$1'. + +expr_160 -> expr_200 'andalso' expr_160 : ?mkop2('$1', '$2', '$3'). +expr_160 -> expr_200 : '$1'. + +expr_200 -> expr_300 comp_op expr_300 : + ?mkop2('$1', '$2', '$3'). +expr_200 -> expr_300 : '$1'. + +expr_300 -> expr_400 list_op expr_300 : + ?mkop2('$1', '$2', '$3'). +expr_300 -> expr_400 : '$1'. + +expr_400 -> expr_400 add_op expr_500 : + ?mkop2('$1', '$2', '$3'). +expr_400 -> expr_500 : '$1'. + +expr_500 -> expr_500 mult_op expr_600 : + ?mkop2('$1', '$2', '$3'). +expr_500 -> expr_600 : '$1'. + +expr_600 -> prefix_op expr_700 : + ?mkop1('$1', '$2'). +expr_600 -> expr_700 : '$1'. + +expr_700 -> function_call : '$1'. +expr_700 -> record_expr : '$1'. +expr_700 -> expr_800 : '$1'. + +expr_800 -> expr_900 ':' expr_max : + {remote,?line('$2'),'$1','$3'}. +expr_800 -> expr_900 : '$1'. + +expr_900 -> '.' atom1 : + {record_field,?line('$1'),{atom,?line('$1'),''},'$2'}. +expr_900 -> expr_900 '.' atom1 : + {record_field,?line('$2'),'$1','$3'}. +expr_900 -> expr_max : '$1'. + +expr_max -> var : '$1'. +expr_max -> atomic : '$1'. +expr_max -> list : '$1'. +expr_max -> binary : '$1'. +expr_max -> list_comprehension : '$1'. +expr_max -> binary_comprehension : '$1'. +expr_max -> tuple : '$1'. +%%expr_max -> struct : '$1'. +expr_max -> '(' expr ')' : '$2'. +expr_max -> 'begin' exprs 'end' : {block,?line('$1'),'$2'}. +expr_max -> if_expr : '$1'. +expr_max -> case_expr : '$1'. +expr_max -> receive_expr : '$1'. +expr_max -> fun_expr : '$1'. +%%expr_max -> cond_expr : '$1'. +expr_max -> try_expr : '$1'. +expr_max -> query_expr : '$1'. + + +list -> '[' ']' : {nil,?line('$1')}. +list -> '[' expr tail : {cons,?line('$1'),'$2','$3'}. + +tail -> ']' : {nil,?line('$1')}. +tail -> '|' expr ']' : '$2'. +tail -> ',' expr tail : {cons,?line('$2'),'$2','$3'}. + + +binary -> '<<' '>>' : {bin,?line('$1'),[]}. +binary -> '<<' bin_elements '>>' : {bin,?line('$1'),'$2'}. + +bin_elements -> bin_element : ['$1']. +bin_elements -> bin_element ',' bin_elements : ['$1'|'$3']. + +bin_element -> bit_expr opt_bit_size_expr opt_bit_type_list : + {bin_element,?line('$1'),'$1','$2','$3'}. + +bit_expr -> prefix_op expr_max : ?mkop1('$1', '$2'). +bit_expr -> expr_max : '$1'. + +opt_bit_size_expr -> ':' bit_size_expr : '$2'. +opt_bit_size_expr -> '$empty' : default. + +opt_bit_type_list -> '/' bit_type_list : '$2'. +opt_bit_type_list -> '$empty' : default. + +bit_type_list -> bit_type '-' bit_type_list : ['$1' | '$3']. +bit_type_list -> bit_type : ['$1']. + +bit_type -> atom1 : element(3,'$1'). +bit_type -> atom1 ':' integer : { element(3,'$1'), element(3,'$3') }. + +bit_size_expr -> expr_max : '$1'. + + +list_comprehension -> '[' expr '||' lc_exprs ']' : + {lc,?line('$1'),'$2','$4'}. +binary_comprehension -> '<<' binary '||' lc_exprs '>>' : + {bc,?line('$1'),'$2','$4'}. +lc_exprs -> lc_expr : ['$1']. +lc_exprs -> lc_expr ',' lc_exprs : ['$1'|'$3']. + +lc_expr -> expr : '$1'. +lc_expr -> expr '<-' expr : {generate,?line('$2'),'$1','$3'}. +lc_expr -> binary '<=' expr : {b_generate,?line('$2'),'$1','$3'}. + +tuple -> '{' '}' : {tuple,?line('$1'),[]}. +tuple -> '{' exprs '}' : {tuple,?line('$1'),'$2'}. + + +%%struct -> atom1 tuple : +%% {struct,?line('$1'),element(3, '$1'),element(3, '$2')}. + + +%% N.B. This is called from expr_700. +%% N.B. Field names are returned as the complete object, even if they are +%% always atoms for the moment, this might change in the future. + +record_expr -> '#' atom1 '.' atom1 : + {record_index,?line('$1'),element(3, '$2'),'$4'}. +record_expr -> '#' atom1 record_tuple : + {record,?line('$1'),element(3, '$2'),'$3'}. +record_expr -> expr_max '#' atom1 '.' atom1 : + {record_field,?line('$2'),'$1',element(3, '$3'),'$5'}. +record_expr -> expr_max '#' atom1 record_tuple : + {record,?line('$2'),'$1',element(3, '$3'),'$4'}. + +record_tuple -> '{' '}' : []. +record_tuple -> '{' record_fields '}' : '$2'. + +record_fields -> record_field : ['$1']. +record_fields -> record_field ',' record_fields : ['$1' | '$3']. + +record_field -> var '=' expr : {record_field,?line('$1'),'$1','$3'}. +record_field -> atom1 '=' expr : {record_field,?line('$1'),'$1','$3'}. + +%% N.B. This is called from expr_700. + +function_call -> expr_800 argument_list : + {call,?line('$1'),'$1',element(1, '$2')}. + + +if_expr -> 'if' if_clauses 'end' : {'if',?line('$1'),'$2'}. + +if_clauses -> if_clause : ['$1']. +if_clauses -> if_clause ';' if_clauses : ['$1' | '$3']. + +if_clause -> guard clause_body : + {clause,?line(hd(hd('$1'))),[],'$1','$2'}. + + +case_expr -> 'case' expr 'of' cr_clauses 'end' : + {'case',?line('$1'),'$2','$4'}. + +cr_clauses -> cr_clause : ['$1']. +cr_clauses -> cr_clause ';' cr_clauses : ['$1' | '$3']. + +cr_clause -> expr clause_guard clause_body : + {clause,?line('$1'),['$1'],'$2','$3'}. + +receive_expr -> 'receive' cr_clauses 'end' : + {'receive',?line('$1'),'$2'}. +receive_expr -> 'receive' 'after' expr clause_body 'end' : + {'receive',?line('$1'),[],'$3','$4'}. +receive_expr -> 'receive' cr_clauses 'after' expr clause_body 'end' : + {'receive',?line('$1'),'$2','$4','$5'}. + + +fun_expr -> 'fun' atom1 '/' integer : + {'fun',?line('$1'),{function,element(3, '$2'),element(3, '$4')}}. +fun_expr -> 'fun' atom1 ':' atom1 '/' integer : + {'fun',?line('$1'),{function,element(3, '$2'),element(3, '$4'),element(3,'$6')}}. +fun_expr -> 'fun' fun_clauses 'end' : + build_fun(?line('$1'), '$2'). + +fun_clauses -> fun_clause : ['$1']. +fun_clauses -> fun_clause ';' fun_clauses : ['$1' | '$3']. + +fun_clause -> argument_list clause_guard clause_body : + {Args,Pos} = '$1', + {clause,Pos,'fun',Args,'$2','$3'}. + +try_expr -> 'try' exprs 'of' cr_clauses try_catch : + build_try(?line('$1'),'$2','$4','$5'). +try_expr -> 'try' exprs try_catch : + build_try(?line('$1'),'$2',[],'$3'). + +try_catch -> 'catch' try_clauses 'end' : + {'$2',[]}. +try_catch -> 'catch' try_clauses 'after' exprs 'end' : + {'$2','$4'}. +try_catch -> 'after' exprs 'end' : + {[],'$2'}. + +try_clauses -> try_clause : ['$1']. +try_clauses -> try_clause ';' try_clauses : ['$1' | '$3']. + +try_clause -> expr clause_guard clause_body : + L = ?line('$1'), + {clause,L,[{tuple,L,[{atom,L,throw},'$1',{var,L,'_'}]}],'$2','$3'}. +try_clause -> atom1 ':' expr clause_guard clause_body : + L = ?line('$1'), + {clause,L,[{tuple,L,['$1','$3',{var,L,'_'}]}],'$4','$5'}. +try_clause -> var ':' expr clause_guard clause_body : + L = ?line('$1'), + {clause,L,[{tuple,L,['$1','$3',{var,L,'_'}]}],'$4','$5'}. + +%%cond_expr -> 'cond' cond_clauses 'end' : {'cond',?line('$1'),'$2'}. + +%%cond_clauses -> cond_clause : ['$1']. +%%cond_clauses -> cond_clause ';' cond_clauses : ['$1' | '$3']. + +%%cond_clause -> expr clause_body : +%% {clause,?line('$1'),[],[['$1']],'$2'}. + +query_expr -> 'query' list_comprehension 'end' : + {'query',?line('$1'),'$2'}. + + +argument_list -> '(' ')' : {[],?line('$1')}. +argument_list -> '(' exprs ')' : {'$2',?line('$1')}. + + +exprs -> expr : ['$1']. +exprs -> expr ',' exprs : ['$1' | '$3']. + +guard -> exprs : ['$1']. +guard -> exprs ';' guard : ['$1'|'$3']. + +atomic -> char : '$1'. +atomic -> integer : '$1'. +atomic -> float : '$1'. +atomic -> atom1 : '$1'. +atomic -> strings : '$1'. + +strings -> string : '$1'. +strings -> string strings : + {string,?line('$1'),element(3, '$1') ++ element(3, '$2')}. + +prefix_op -> '+' : '$1'. +prefix_op -> '-' : '$1'. +prefix_op -> 'bnot' : '$1'. +prefix_op -> 'not' : '$1'. + +mult_op -> '/' : '$1'. +mult_op -> '*' : '$1'. +mult_op -> 'div' : '$1'. +mult_op -> 'rem' : '$1'. +mult_op -> 'band' : '$1'. +mult_op -> 'and' : '$1'. + +add_op -> '+' : '$1'. +add_op -> '-' : '$1'. +add_op -> 'bor' : '$1'. +add_op -> 'bxor' : '$1'. +add_op -> 'bsl' : '$1'. +add_op -> 'bsr' : '$1'. +add_op -> 'or' : '$1'. +add_op -> 'xor' : '$1'. + +list_op -> '++' : '$1'. +list_op -> '--' : '$1'. + +comp_op -> '==' : '$1'. +comp_op -> '/=' : '$1'. +comp_op -> '=<' : '$1'. +comp_op -> '<' : '$1'. +comp_op -> '>=' : '$1'. +comp_op -> '>' : '$1'. +comp_op -> '=:=' : '$1'. +comp_op -> '=/=' : '$1'. + +rule -> rule_clauses : build_rule('$1'). + +rule_clauses -> rule_clause : ['$1']. +rule_clauses -> rule_clause ';' rule_clauses : ['$1'|'$3']. + +rule_clause -> atom1 clause_args clause_guard rule_body : + {clause,?line('$1'),element(3, '$1'),'$2','$3','$4'}. + +rule_body -> ':-' lc_exprs: '$2'. + + +Erlang code. + +-export([parse_form/1,parse_exprs/1,parse_term/1]). +-export([normalise/1,abstract/1,tokens/1,tokens/2]). +-export([abstract/2, package_segments/1]). +-export([inop_prec/1,preop_prec/1,func_prec/0,max_prec/0]). +-export([set_line/2,get_attribute/2,get_attributes/1]). + +%% The following directive is needed for (significantly) faster compilation +%% of the generated .erl file by the HiPE compiler. Please do not remove. +-compile([{hipe,[{regalloc,linear_scan}]}]). + + +%% mkop(Op, Arg) -> {op,Line,Op,Arg}. +%% mkop(Left, Op, Right) -> {op,Line,Op,Left,Right}. + +-define(mkop2(L, OpPos, R), + begin + {Op,Pos} = OpPos, + {op,Pos,Op,L,R} + end). + +-define(mkop1(OpPos, A), + begin + {Op,Pos} = OpPos, + {op,Pos,Op,A} + end). + +%% keep track of line info in tokens +-define(line(Tup), element(2, Tup)). + +%% Entry points compatible to old erl_parse. +%% These really suck and are only here until Calle gets multiple +%% entry points working. + +parse_form(Tokens) -> + parse(Tokens). + +parse_exprs(Tokens) -> + case parse([{atom,0,f},{'(',0},{')',0},{'->',0}|Tokens]) of + {ok,{function,_Lf,f,0,[{clause,_Lc,[],[],Exprs}]}} -> + {ok,Exprs}; + {error,_} = Err -> Err + end. + +parse_term(Tokens) -> + case parse([{atom,0,f},{'(',0},{')',0},{'->',0}|Tokens]) of + {ok,{function,_Lf,f,0,[{clause,_Lc,[],[],[Expr]}]}} -> + try normalise(Expr) of + Term -> {ok,Term} + catch + _:_R -> {error,{?line(Expr),?MODULE,"bad term"}} + end; + {ok,{function,_Lf,f,0,[{clause,_Lc,[],[],[_E1,E2|_Es]}]}} -> + {error,{?line(E2),?MODULE,"bad term"}}; + {error,_} = Err -> Err + end. + +-type attributes() :: 'export' | 'file' | 'import' | 'module' + | 'opaque' | 'record' | 'type'. + +build_typed_attribute({atom,La,record}, + {typed_record, {atom,_Ln,RecordName}, RecTuple}) -> + {attribute,La,record,{RecordName,record_tuple(RecTuple)}}; +build_typed_attribute({atom,La,Attr}, + {type_def, {call,_,{atom,_,TypeName},Args}, Type}) + when Attr =:= 'type' ; Attr =:= 'opaque' -> + case lists:all(fun({var, _, _}) -> true; + (_) -> false + end, Args) of + true -> {attribute,La,Attr,{TypeName,Type,Args}}; + false -> error_bad_decl(La, Attr) + end; +build_typed_attribute({atom,La,Attr},_) -> + case Attr of + record -> error_bad_decl(La, record); + type -> error_bad_decl(La, type); + opaque -> error_bad_decl(La, opaque); + _ -> ret_err(La, "bad attribute") + end. + +build_type_spec({spec,La}, {SpecFun, TypeSpecs}) -> + NewSpecFun = + case SpecFun of + {atom, _, Fun} -> + {Fun, find_arity_from_specs(TypeSpecs)}; + {{atom,_, Mod}, {atom,_, Fun}} -> + {Mod,Fun,find_arity_from_specs(TypeSpecs)}; + {{atom, _, Fun}, {integer, _, Arity}} -> + %% Old style spec. Allow this for now. + {Fun,Arity}; + {{atom,_, Mod}, {atom, _, Fun}, {integer, _, Arity}} -> + %% Old style spec. Allow this for now. + {Mod,Fun,Arity} + end, + {attribute,La,spec,{NewSpecFun, TypeSpecs}}. + +find_arity_from_specs([Spec|_]) -> + %% Use the first spec to find the arity. If all are not the same, + %% erl_lint will find this. + Fun = case Spec of + {type, _, bounded_fun, [F, _]} -> F; + {type, _, 'fun', _} = F -> F + end, + {type, _, 'fun', [{type, _, product, Args},_]} = Fun, + length(Args). + +lift_unions(T1, {type, _La, union, List}) -> + {type, ?line(T1), union, [T1|List]}; +lift_unions(T1, T2) -> + {type, ?line(T1), union, [T1, T2]}. + +build_gen_type({atom, La, tuple}) -> + {type, La, tuple, any}; +build_gen_type({atom, La, Name}) -> + {type, La, Name, []}. + +build_bin_type([{var, _, '_'}|Left], Int) -> + build_bin_type(Left, Int); +build_bin_type([], Int) -> + Int; +build_bin_type([{var, La, _}|_], _) -> + ret_err(La, "Bad binary type"). + +%% build_attribute(AttrName, AttrValue) -> +%% {attribute,Line,module,Module} +%% {attribute,Line,export,Exports} +%% {attribute,Line,import,Imports} +%% {attribute,Line,record,{Name,Inits}} +%% {attribute,Line,file,{Name,Line}} +%% {attribute,Line,Name,Val} + +build_attribute({atom,La,module}, Val) -> + case Val of + [{atom,_Lm,Module}] -> + {attribute,La,module,Module}; + [{atom,_Lm,Module},ExpList] -> + {attribute,La,module,{Module,var_list(ExpList)}}; + [Name] -> + case package_segments(Name) of + error -> + error_bad_decl(La, module); + Module -> + {attribute,La,module,Module} + end; + [Name,ExpList] -> + case package_segments(Name) of + error -> + error_bad_decl(La, module); + Module -> + {attribute,La,module,{Module,var_list(ExpList)}} + end; + _Other -> + error_bad_decl(La, module) + end; +build_attribute({atom,La,export}, Val) -> + case Val of + [ExpList] -> + {attribute,La,export,farity_list(ExpList)}; + _Other -> error_bad_decl(La, export) + end; +build_attribute({atom,La,import}, Val) -> + case Val of + [Name] -> + case package_segments(Name) of + error -> + error_bad_decl(La, import); + Module -> + {attribute,La,import,Module} + end; + [{atom,_Lm,Mod},ImpList] -> + {attribute,La,import,{Mod,farity_list(ImpList)}}; + [Name, ImpList] -> + case package_segments(Name) of + error -> + error_bad_decl(La, import); + Module -> + {attribute,La,import,{Module,farity_list(ImpList)}} + end; + _Other -> error_bad_decl(La, import) + end; +build_attribute({atom,La,record}, Val) -> + case Val of + [{atom,_Ln,Record},RecTuple] -> + {attribute,La,record,{Record,record_tuple(RecTuple)}}; + _Other -> error_bad_decl(La, record) + end; +build_attribute({atom,La,file}, Val) -> + case Val of + [{string,_Ln,Name},{integer,_Ll,Line}] -> + {attribute,La,file,{Name,Line}}; + _Other -> error_bad_decl(La, file) + end; +build_attribute({atom,La,Attr}, Val) -> + case Val of + [Expr0] -> + Expr = attribute_farity(Expr0), + {attribute,La,Attr,term(Expr)}; + _Other -> ret_err(La, "bad attribute") + end. + +var_list({cons,_Lc,{var,_,V},Tail}) -> + [V|var_list(Tail)]; +var_list({nil,_Ln}) -> []; +var_list(Other) -> + ret_err(?line(Other), "bad variable list"). + +attribute_farity({cons,L,H,T}) -> + {cons,L,attribute_farity(H),attribute_farity(T)}; +attribute_farity({tuple,L,Args0}) -> + Args = attribute_farity_list(Args0), + {tuple,L,Args}; +attribute_farity({op,L,'/',{atom,_,_}=Name,{integer,_,_}=Arity}) -> + {tuple,L,[Name,Arity]}; +attribute_farity(Other) -> Other. + +attribute_farity_list(Args) -> + [attribute_farity(A) || A <- Args]. + +-spec error_bad_decl(integer(), attributes()) -> no_return(). + +error_bad_decl(L, S) -> + ret_err(L, io_lib:format("bad ~w declaration", [S])). + +farity_list({cons,_Lc,{op,_Lo,'/',{atom,_La,A},{integer,_Li,I}},Tail}) -> + [{A,I}|farity_list(Tail)]; +farity_list({nil,_Ln}) -> []; +farity_list(Other) -> + ret_err(?line(Other), "bad function arity"). + +record_tuple({tuple,_Lt,Fields}) -> + record_fields(Fields); +record_tuple(Other) -> + ret_err(?line(Other), "bad record declaration"). + +record_fields([{atom,La,A}|Fields]) -> + [{record_field,La,{atom,La,A}}|record_fields(Fields)]; +record_fields([{match,_Lm,{atom,La,A},Expr}|Fields]) -> + [{record_field,La,{atom,La,A},Expr}|record_fields(Fields)]; +record_fields([{typed,Expr,TypeInfo}|Fields]) -> + [Field] = record_fields([Expr]), + TypeInfo1 = + case Expr of + {match, _, _, _} -> TypeInfo; %% If we have an initializer. + {atom, La, _} -> + lift_unions(abstract(undefined, La), TypeInfo) + end, + [{typed_record_field,Field,TypeInfo1}|record_fields(Fields)]; +record_fields([Other|_Fields]) -> + ret_err(?line(Other), "bad record field"); +record_fields([]) -> []. + +term(Expr) -> + try normalise(Expr) + catch _:_R -> ret_err(?line(Expr), "bad attribute") + end. + +package_segments(Name) -> + package_segments(Name, [], []). + +package_segments({record_field, _, F1, F2}, Fs, As) -> + package_segments(F1, [F2 | Fs], As); +package_segments({atom, _, A}, [F | Fs], As) -> + package_segments(F, Fs, [A | As]); +package_segments({atom, _, A}, [], As) -> + lists:reverse([A | As]); +package_segments(_, _, _) -> + error. + +%% build_function([Clause]) -> {function,Line,Name,Arity,[Clause]} + +build_function(Cs) -> + Name = element(3, hd(Cs)), + Arity = length(element(4, hd(Cs))), + {function,?line(hd(Cs)),Name,Arity,check_clauses(Cs, Name, Arity)}. + +%% build_rule([Clause]) -> {rule,Line,Name,Arity,[Clause]'} + +build_rule(Cs) -> + Name = element(3, hd(Cs)), + Arity = length(element(4, hd(Cs))), + {rule,?line(hd(Cs)),Name,Arity,check_clauses(Cs, Name, Arity)}. + +%% build_fun(Line, [Clause]) -> {'fun',Line,{clauses,[Clause]}}. + +build_fun(Line, Cs) -> + Arity = length(element(4, hd(Cs))), + {'fun',Line,{clauses,check_clauses(Cs, 'fun', Arity)}}. + +check_clauses(Cs, Name, Arity) -> + mapl(fun ({clause,L,N,As,G,B}) when N =:= Name, length(As) =:= Arity -> + {clause,L,As,G,B}; + ({clause,L,_N,_As,_G,_B}) -> + ret_err(L, "head mismatch") end, Cs). + +build_try(L,Es,Scs,{Ccs,As}) -> + {'try',L,Es,Scs,Ccs,As}. + +ret_err(L, S) -> + {location,Location} = get_attribute(L, location), + return_error(Location, S). + +%% mapl(F,List) +%% an alternative map which always maps from left to right +%% and makes it possible to interrupt the mapping with throw on +%% the first occurence from left as expected. +%% can be removed when the jam machine (and all other machines) +%% uses the standardized (Erlang 5.0) evaluation order (from left to right) +mapl(F, [H|T]) -> + V = F(H), + [V | mapl(F,T)]; +mapl(_, []) -> + []. + +%% normalise(AbsTerm) +%% abstract(Term) +%% Convert between the abstract form of a term and a term. + +normalise({char,_,C}) -> C; +normalise({integer,_,I}) -> I; +normalise({float,_,F}) -> F; +normalise({atom,_,A}) -> A; +normalise({string,_,S}) -> S; +normalise({nil,_}) -> []; +normalise({bin,_,Fs}) -> + {value, B, _} = + eval_bits:expr_grp(Fs, [], + fun(E, _) -> + {value, normalise(E), []} + end, [], true), + B; +normalise({cons,_,Head,Tail}) -> + [normalise(Head)|normalise(Tail)]; +normalise({tuple,_,Args}) -> + list_to_tuple(normalise_list(Args)); +%% Atom dot-notation, as in 'foo.bar.baz' +normalise({record_field,_,_,_}=A) -> + case package_segments(A) of + error -> erlang:error({badarg, A}); + As -> list_to_atom(packages:concat(As)) + end; +%% Special case for unary +/-. +normalise({op,_,'+',{char,_,I}}) -> I; +normalise({op,_,'+',{integer,_,I}}) -> I; +normalise({op,_,'+',{float,_,F}}) -> F; +normalise({op,_,'-',{char,_,I}}) -> -I; %Weird, but compatible! +normalise({op,_,'-',{integer,_,I}}) -> -I; +normalise({op,_,'-',{float,_,F}}) -> -F; +normalise(X) -> erlang:error({badarg, X}). + +normalise_list([H|T]) -> + [normalise(H)|normalise_list(T)]; +normalise_list([]) -> + []. + +abstract(T) when is_integer(T) -> {integer,0,T}; +abstract(T) when is_float(T) -> {float,0,T}; +abstract(T) when is_atom(T) -> {atom,0,T}; +abstract([]) -> {nil,0}; +abstract(B) when is_bitstring(B) -> + {bin, 0, [abstract_byte(Byte, 0) || Byte <- bitstring_to_list(B)]}; +abstract([C|T]) when is_integer(C), 0 =< C, C < 256 -> + abstract_string(T, [C]); +abstract([H|T]) -> + {cons,0,abstract(H),abstract(T)}; +abstract(Tuple) when is_tuple(Tuple) -> + {tuple,0,abstract_list(tuple_to_list(Tuple))}. + +abstract_string([C|T], String) when is_integer(C), 0 =< C, C < 256 -> + abstract_string(T, [C|String]); +abstract_string([], String) -> + {string, 0, lists:reverse(String)}; +abstract_string(T, String) -> + not_string(String, abstract(T)). + +not_string([C|T], Result) -> + not_string(T, {cons, 0, {integer, 0, C}, Result}); +not_string([], Result) -> + Result. + +abstract_list([H|T]) -> + [abstract(H)|abstract_list(T)]; +abstract_list([]) -> + []. + +abstract_byte(Byte, Line) when is_integer(Byte) -> + {bin_element, Line, {integer, Line, Byte}, default, default}; +abstract_byte(Bits, Line) -> + Sz = bit_size(Bits), + <> = Bits, + {bin_element, Line, {integer, Line, Val}, {integer, Line, Sz}, default}. + +%%% abstract/2 keeps the line number +abstract(T, Line) when is_integer(T) -> {integer,Line,T}; +abstract(T, Line) when is_float(T) -> {float,Line,T}; +abstract(T, Line) when is_atom(T) -> {atom,Line,T}; +abstract([], Line) -> {nil,Line}; +abstract(B, Line) when is_bitstring(B) -> + {bin, Line, [abstract_byte(Byte, Line) || Byte <- bitstring_to_list(B)]}; +abstract([C|T], Line) when is_integer(C), 0 =< C, C < 256 -> + abstract_string(T, [C], Line); +abstract([H|T], Line) -> + {cons,Line,abstract(H, Line),abstract(T, Line)}; +abstract(Tuple, Line) when is_tuple(Tuple) -> + {tuple,Line,abstract_list(tuple_to_list(Tuple), Line)}. + +abstract_string([C|T], String, Line) when is_integer(C), 0 =< C, C < 256 -> + abstract_string(T, [C|String], Line); +abstract_string([], String, Line) -> + {string, Line, lists:reverse(String)}; +abstract_string(T, String, Line) -> + not_string(String, abstract(T, Line), Line). + +not_string([C|T], Result, Line) -> + not_string(T, {cons, Line, {integer, Line, C}, Result}, Line); +not_string([], Result, _Line) -> + Result. + +abstract_list([H|T], Line) -> + [abstract(H, Line)|abstract_list(T, Line)]; +abstract_list([], _Line) -> + []. + +%% tokens(AbsTerm) -> [Token] +%% tokens(AbsTerm, More) -> [Token] +%% Generate a list of tokens representing the abstract term. + +tokens(Abs) -> + tokens(Abs, []). + +tokens({char,L,C}, More) -> [{char,L,C}|More]; +tokens({integer,L,N}, More) -> [{integer,L,N}|More]; +tokens({float,L,F}, More) -> [{float,L,F}|More]; +tokens({atom,L,A}, More) -> [{atom,L,A}|More]; +tokens({var,L,V}, More) -> [{var,L,V}|More]; +tokens({string,L,S}, More) -> [{string,L,S}|More]; +tokens({nil,L}, More) -> [{'[',L},{']',L}|More]; +tokens({cons,L,Head,Tail}, More) -> + [{'[',L}|tokens(Head, tokens_tail(Tail, More))]; +tokens({tuple,L,[]}, More) -> + [{'{',L},{'}',L}|More]; +tokens({tuple,L,[E|Es]}, More) -> + [{'{',L}|tokens(E, tokens_tuple(Es, ?line(E), More))]. + +tokens_tail({cons,L,Head,Tail}, More) -> + [{',',L}|tokens(Head, tokens_tail(Tail, More))]; +tokens_tail({nil,L}, More) -> + [{']',L}|More]; +tokens_tail(Other, More) -> + L = ?line(Other), + [{'|',L}|tokens(Other, [{']',L}|More])]. + +tokens_tuple([E|Es], Line, More) -> + [{',',Line}|tokens(E, tokens_tuple(Es, ?line(E), More))]; +tokens_tuple([], Line, More) -> + [{'}',Line}|More]. + +%% Give the relative precedences of operators. + +inop_prec('=') -> {150,100,100}; +inop_prec('!') -> {150,100,100}; +inop_prec('orelse') -> {160,150,150}; +inop_prec('andalso') -> {200,160,160}; +inop_prec('==') -> {300,200,300}; +inop_prec('/=') -> {300,200,300}; +inop_prec('=<') -> {300,200,300}; +inop_prec('<') -> {300,200,300}; +inop_prec('>=') -> {300,200,300}; +inop_prec('>') -> {300,200,300}; +inop_prec('=:=') -> {300,200,300}; +inop_prec('=/=') -> {300,200,300}; +inop_prec('++') -> {400,300,300}; +inop_prec('--') -> {400,300,300}; +inop_prec('+') -> {400,400,500}; +inop_prec('-') -> {400,400,500}; +inop_prec('bor') -> {400,400,500}; +inop_prec('bxor') -> {400,400,500}; +inop_prec('bsl') -> {400,400,500}; +inop_prec('bsr') -> {400,400,500}; +inop_prec('or') -> {400,400,500}; +inop_prec('xor') -> {400,400,500}; +inop_prec('*') -> {500,500,600}; +inop_prec('/') -> {500,500,600}; +inop_prec('div') -> {500,500,600}; +inop_prec('rem') -> {500,500,600}; +inop_prec('band') -> {500,500,600}; +inop_prec('and') -> {500,500,600}; +inop_prec('#') -> {800,700,800}; +inop_prec(':') -> {900,800,900}; +inop_prec('.') -> {900,900,1000}. + +-type pre_op() :: 'catch' | '+' | '-' | 'bnot' | '#'. + +-spec preop_prec(pre_op()) -> {0 | 600 | 700, 100 | 700 | 800}. + +preop_prec('catch') -> {0,100}; +preop_prec('+') -> {600,700}; +preop_prec('-') -> {600,700}; +preop_prec('bnot') -> {600,700}; +preop_prec('not') -> {600,700}; +preop_prec('#') -> {700,800}. + +-spec func_prec() -> {800,700}. + +func_prec() -> {800,700}. + +-spec max_prec() -> 1000. + +max_prec() -> 1000. + +%%% [Experimental]. The parser just copies the attributes of the +%%% scanner tokens to the abstract format. This design decision has +%%% been hidden to some extent: use set_line() and get_attribute() to +%%% access the second element of (almost all) of the abstract format +%%% tuples. A typical use is to negate line numbers to prevent the +%%% compiler from emitting warnings and errors. The second element can +%%% (of course) be set to any value, but then these functions no +%%% longer apply. To get all present attributes as a property list +%%% get_attributes() should be used. + +set_line(L, F) -> + erl_scan:set_attribute(line, L, F). + +get_attribute(L, Name) -> + erl_scan:attributes_info(L, Name). + +get_attributes(L) -> + erl_scan:attributes_info(L). diff --git a/lib/stdlib/src/erl_posix_msg.erl b/lib/stdlib/src/erl_posix_msg.erl new file mode 100644 index 0000000000..fe981b23a7 --- /dev/null +++ b/lib/stdlib/src/erl_posix_msg.erl @@ -0,0 +1,166 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(erl_posix_msg). + +%% Converts from errno identifiers to error messages. + +-export([message/1]). + +-spec message(atom()) -> string(). + +message(e2big) -> "argument list too long"; +message(eacces) -> "permission denied"; +message(eaddrinuse) -> "address already in use"; +message(eaddrnotavail) -> "can't assign requested address"; +message(eadv) -> "advertise error"; +message(eafnosupport) -> "address family not supported by protocol family"; +message(eagain) -> "resource temporarily unavailable"; +message(ealign) -> "EALIGN"; +message(ealready) -> "operation already in progress"; +message(ebade) -> "bad exchange descriptor"; +message(ebadf) -> "bad file number"; +message(ebadfd) -> "file descriptor in bad state"; +message(ebadmsg) -> "not a data message"; +message(ebadr) -> "bad request descriptor"; +message(ebadrpc) -> "RPC structure is bad"; +message(ebadrqc) -> "bad request code"; +message(ebadslt) -> "invalid slot"; +message(ebfont) -> "bad font file format"; +message(ebusy) -> "file busy"; +message(echild) -> "no children"; +message(echrng) -> "channel number out of range"; +message(ecomm) -> "communication error on send"; +message(econnaborted) -> "software caused connection abort"; +message(econnrefused) -> "connection refused"; +message(econnreset) -> "connection reset by peer"; +message(edeadlk) -> "resource deadlock avoided"; +message(edeadlock) -> "resource deadlock avoided"; +message(edestaddrreq) -> "destination address required"; +message(edirty) -> "mounting a dirty fs w/o force"; +message(edom) -> "math argument out of range"; +message(edotdot) -> "cross mount point"; +message(edquot) -> "disk quota exceeded"; +message(eduppkg) -> "duplicate package name"; +message(eexist) -> "file already exists"; +message(efault) -> "bad address in system call argument"; +message(efbig) -> "file too large"; +message(ehostdown) -> "host is down"; +message(ehostunreach) -> "host is unreachable"; +message(eidrm) -> "identifier removed"; +message(einit) -> "initialization error"; +message(einprogress) -> "operation now in progress"; +message(eintr) -> "interrupted system call"; +message(einval) -> "invalid argument"; +message(eio) -> "I/O error"; +message(eisconn) -> "socket is already connected"; +message(eisdir) -> "illegal operation on a directory"; +message(eisnam) -> "is a name file"; +message(elbin) -> "ELBIN"; +message(el2hlt) -> "level 2 halted"; +message(el2nsync) -> "level 2 not synchronized"; +message(el3hlt) -> "level 3 halted"; +message(el3rst) -> "level 3 reset"; +message(elibacc) -> "can not access a needed shared library"; +message(elibbad) -> "accessing a corrupted shared library"; +message(elibexec) -> "can not exec a shared library directly"; +message(elibmax) -> + "attempting to link in more shared libraries than system limit"; +message(elibscn) -> ".lib section in a.out corrupted"; +message(elnrng) -> "link number out of range"; +message(eloop) -> "too many levels of symbolic links"; +message(emfile) -> "too many open files"; +message(emlink) -> "too many links"; +message(emsgsize) -> "message too long"; +message(emultihop) -> "multihop attempted"; +message(enametoolong) -> "file name too long"; +message(enavail) -> "not available"; +message(enet) -> "ENET"; +message(enetdown) -> "network is down"; +message(enetreset) -> "network dropped connection on reset"; +message(enetunreach) -> "network is unreachable"; +message(enfile) -> "file table overflow"; +message(enoano) -> "anode table overflow"; +message(enobufs) -> "no buffer space available"; +message(enocsi) -> "no CSI structure available"; +message(enodata) -> "no data available"; +message(enodev) -> "no such device"; +message(enoent) -> "no such file or directory"; +message(enoexec) -> "exec format error"; +message(enolck) -> "no locks available"; +message(enolink) -> "link has be severed"; +message(enomem) -> "not enough memory"; +message(enomsg) -> "no message of desired type"; +message(enonet) -> "machine is not on the network"; +message(enopkg) -> "package not installed"; +message(enoprotoopt) -> "bad proocol option"; +message(enospc) -> "no space left on device"; +message(enosr) -> "out of stream resources or not a stream device"; +message(enosym) -> "unresolved symbol name"; +message(enosys) -> "function not implemented"; +message(enotblk) -> "block device required"; +message(enotconn) -> "socket is not connected"; +message(enotdir) -> "not a directory"; +message(enotempty) -> "directory not empty"; +message(enotnam) -> "not a name file"; +message(enotsock) -> "socket operation on non-socket"; +message(enotsup) -> "operation not supported"; +message(enotty) -> "inappropriate device for ioctl"; +message(enotuniq) -> "name not unique on network"; +message(enxio) -> "no such device or address"; +message(eopnotsupp) -> "operation not supported on socket"; +message(eperm) -> "not owner"; +message(epfnosupport) -> "protocol family not supported"; +message(epipe) -> "broken pipe"; +message(eproclim) -> "too many processes"; +message(eprocunavail) -> "bad procedure for program"; +message(eprogmismatch) -> "program version wrong"; +message(eprogunavail) -> "RPC program not available"; +message(eproto) -> "protocol error"; +message(eprotonosupport) -> "protocol not suppored"; +message(eprototype) -> "protocol wrong type for socket"; +message(erange) -> "math result unrepresentable"; +message(erefused) -> "EREFUSED"; +message(eremchg) -> "remote address changed"; +message(eremdev) -> "remote device"; +message(eremote) -> "pathname hit remote file system"; +message(eremoteio) -> "remote i/o error"; +message(eremoterelease) -> "EREMOTERELEASE"; +message(erofs) -> "read-only file system"; +message(erpcmismatch) -> "RPC version is wrong"; +message(erremote) -> "object is remote"; +message(eshutdown) -> "can't send after socket shutdown"; +message(esocktnosupport) -> "socket type not supported"; +message(espipe) -> "invalid seek"; +message(esrch) -> "no such process"; +message(esrmnt) -> "srmount error"; +message(estale) -> "stale remote file handle"; +message(esuccess) -> "Error 0"; +message(etime) -> "timer expired"; +message(etimedout) -> "connection timed out"; +message(etoomanyrefs) -> "too many references: can't splice"; +message(etxtbsy) -> "text file or pseudo-device busy"; +message(euclean) -> "structure needs cleaning"; +message(eunatch) -> "protocol driver not attached"; +message(eusers) -> "too many users"; +message(eversion) -> "version mismatch"; +message(ewouldblock) -> "operation would block"; +message(exdev) -> "cross-domain link"; +message(exfull) -> "message tables full"; +message(nxdomain) -> "non-existing domain"; +message(_) -> "unknown POSIX error". diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl new file mode 100644 index 0000000000..b1b5bad294 --- /dev/null +++ b/lib/stdlib/src/erl_pp.erl @@ -0,0 +1,992 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(erl_pp). + +%%% Pretty printer for Erlang code in the same format as returned from +%%% the parser. It does not always produce pretty code. + +-export([form/1,form/2, + attribute/1,attribute/2,function/1,function/2,rule/1,rule/2, + guard/1,guard/2,exprs/1,exprs/2,exprs/3,expr/1,expr/2,expr/3,expr/4]). + +-import(lists, [append/1,foldr/3,mapfoldl/3,reverse/1,reverse/2]). +-import(io_lib, [write/1,format/2,write_char/1,write_string/1]). +-import(erl_parse, [inop_prec/1,preop_prec/1,func_prec/0,max_prec/0]). + +-define(MAXLINE, 72). + +%%% +%%% Exported functions +%%% + +form(Thing) -> + form(Thing, none). + +form(Thing, Hook) -> + frmt(lform(Thing, Hook)). + +attribute(Thing) -> + attribute(Thing, none). + +attribute(Thing, Hook) -> + frmt(lattribute(Thing, Hook)). + +function(F) -> + function(F, none). + +function(F, Hook) -> + frmt(lfunction(F, Hook)). + +rule(R) -> + rule(R, none). + +rule(R, Hook) -> + frmt(lrule(R, Hook)). + +guard(Gs) -> + guard(Gs, none). + +guard(Gs, Hook) -> + frmt(lguard(Gs, Hook)). + +exprs(Es) -> + exprs(Es, 0, none). + +exprs(Es, Hook) -> + exprs(Es, 0, Hook). + +exprs(Es, I, Hook) -> + frmt({seq,[],[],[$,],lexprs(Es, Hook)}, I). + +expr(E) -> + frmt(lexpr(E, 0, none)). + +expr(E, Hook) -> + frmt(lexpr(E, 0, Hook)). + +expr(E, I, Hook) -> + frmt(lexpr(E, 0, Hook), I). + +expr(E, I, P, Hook) -> + frmt(lexpr(E, P, Hook), I). + +%%% +%%% Local functions +%%% + +lform({attribute,Line,Name,Arg}, Hook) -> + lattribute({attribute,Line,Name,Arg}, Hook); +lform({function,Line,Name,Arity,Clauses}, Hook) -> + lfunction({function,Line,Name,Arity,Clauses}, Hook); +lform({rule,Line,Name,Arity,Clauses}, Hook) -> + lrule({rule,Line,Name,Arity,Clauses}, Hook); +%% These are specials to make it easier for the compiler. +lform({error,E}, _Hook) -> + leaf(format("~p\n", [{error,E}])); +lform({warning,W}, _Hook) -> + leaf(format("~p\n", [{warning,W}])); +lform({eof,_Line}, _Hook) -> + $\n. + +lattribute({attribute,_Line,type,Type}, Hook) -> + [typeattr(type, Type, Hook),leaf(".\n")]; +lattribute({attribute,_Line,opaque,Type}, Hook) -> + [typeattr(opaque, Type, Hook),leaf(".\n")]; +lattribute({attribute,_Line,spec,Arg}, _Hook) -> + [specattr(Arg),leaf(".\n")]; +lattribute({attribute,_Line,Name,Arg}, Hook) -> + [lattribute(Name, Arg, Hook),leaf(".\n")]. + +lattribute(module, {M,Vs}, _Hook) -> + attr("module",[{var,0,pname(M)}, + foldr(fun(V, C) -> {cons,0,{var,0,V},C} + end, {nil,0}, Vs)]); +lattribute(module, M, _Hook) -> + attr("module", [{var,0,pname(M)}]); +lattribute(export, Falist, _Hook) -> + call({var,0,"-export"}, [falist(Falist)], 0, none); +lattribute(import, Name, _Hook) when is_list(Name) -> + attr("import", [{var,0,pname(Name)}]); +lattribute(import, {From,Falist}, _Hook) -> + attr("import",[{var,0,pname(From)},falist(Falist)]); +lattribute(file, {Name,Line}, _Hook) -> + attr("file", [{var,0,format("~p", [Name])},{integer,0,Line}]); +lattribute(record, {Name,Is}, Hook) -> + Nl = leaf(format("-record(~w,", [Name])), + [{first,Nl,record_fields(Is, Hook)},$)]; +lattribute(Name, Arg, _Hook) -> + attr(write(Name), [erl_parse:abstract(Arg)]). + +typeattr(Tag, {TypeName,Type,Args}, _Hook) -> + {first,leaf("-"++atom_to_list(Tag)++" "), + typed(call({atom,0,TypeName}, Args, 0, none), Type)}. + +ltype({ann_type,_Line,[V,T]}) -> + typed(lexpr(V, none), T); +ltype({paren_type,_Line,[T]}) -> + [$(,ltype(T),$)]; +ltype({type,_Line,union,Ts}) -> + {seq,[],[],[' |'],ltypes(Ts)}; +ltype({type,_Line,list,[T]}) -> + {seq,$[,$],$,,[ltype(T)]}; +ltype({type,_Line,nonempty_list,[T]}) -> + {seq,$[,$],[$,],[ltype(T),leaf("...")]}; +ltype({type,Line,nil,[]}) -> + lexpr({nil,Line}, 0, none); +ltype({type,Line,tuple,any}) -> + simple_type({atom,Line,tuple}, []); +ltype({type,_Line,tuple,Ts}) -> + tuple_type(Ts, fun ltype/1); +ltype({type,_Line,record,[N|Fs]}) -> + record_type(N, Fs); +ltype({type,_Line,range,[_I1,_I2]=Es}) -> + expr_list(Es, '..', fun lexpr/2, none); +ltype({type,_Line,binary,[I1,I2]}) -> + binary_type(I1, I2); % except binary() +ltype({type,_Line,'fun',[]}) -> + leaf("fun()"); +ltype({type,_Line,'fun',_}=FunType) -> + [fun_type(['fun',$(], FunType),$)]; +ltype({type,Line,T,Ts}) -> + simple_type({atom,Line,T}, Ts); +ltype({remote_type,Line,[M,F,Ts]}) -> + simple_type({remote,Line,M,F}, Ts); +ltype({atom,_,T}) -> + %% Follow the convention to always quote atoms (in types): + leaf([$',atom_to_list(T),$']); +ltype(E) -> + lexpr(E, 0, none). + +binary_type({integer,_,Int1}=I1, {integer,_,Int2}=I2) -> + E1 = [[leaf("_:"),lexpr(I1, 0, none)] || Int1 =/= 0], + E2 = [[leaf("_:_*"),lexpr(I2, 0, none)] || Int2 =/= 0], + {seq,'<<','>>',[$,],E1++E2}. + +record_type({atom,_,Name}, Fields) -> + {first,[record_name(Name)],field_types(Fields)}. + +field_types(Fs) -> + tuple_type(Fs, fun field_type/1). + +field_type({type,_Line,field_type,[Name,Type]}) -> + typed(lexpr(Name, none), Type). + +typed(B, {type,_,union,Ts}) -> + %% Special layout for :: followed by union. + {first,[B,$\s],{seq,[],[],[],union_type(Ts)}}; +typed(B, Type) -> + {list,[{cstep,[B,' ::'],ltype(Type)}]}. + +union_type([T|Ts]) -> + [[leaf(":: "),ltype(T)] | ltypes(Ts, fun union_elem/1)]. + +union_elem(T) -> + [leaf(" | "),ltype(T)]. + +tuple_type(Ts, F) -> + {seq,${,$},[$,],ltypes(Ts, F)}. + +specattr({FuncSpec,TypeSpecs}) -> + Func = case FuncSpec of + {F,_A} -> + format("~w", [F]); + {M,F,_A} -> + format("~w:~w", [M, F]) + end, + {first,leaf("-spec "), + {list,[{first,leaf(Func),spec_clauses(TypeSpecs)}]}}. + +spec_clauses(TypeSpecs) -> + {prefer_nl,[$;],[sig_type(T) || T <- TypeSpecs]}. + +sig_type({type,_Line,bounded_fun,[T,Gs]}) -> + guard_type(fun_type([], T), Gs); +sig_type(FunType) -> + fun_type([], FunType). + +guard_type(Before, Gs) -> + Gl = {list,[{step,'when',expr_list(Gs, [$,], fun constraint/2, none)}]}, + {list,[{step,Before,Gl}]}. + +constraint({type,_Line,constraint,[Tag,As]}, _Hook) -> + simple_type(Tag, As). + +fun_type(Before, {type,_,'fun',[FType,Ret]}) -> + {first,Before,{step,[type_args(FType),' ->'],ltype(Ret)}}. + +type_args({type,_Line,any}) -> + leaf("(...)"); +type_args({type,_line,product,Ts}) -> + targs(Ts). + +simple_type(Tag, Types) -> + {first,lexpr(Tag, 0, none),targs(Types)}. + +targs(Ts) -> + {seq,$(,$),[$,],ltypes(Ts)}. + +ltypes(Ts) -> + ltypes(Ts, fun ltype/1). + +ltypes(Ts, F) -> + [F(T) || T <- Ts]. + +attr(Name, Args) -> + call({var,0,format("-~s", [Name])}, Args, 0, none). + +pname(['' | As]) -> + [$. | pname(As)]; +pname([A]) -> + write(A); +pname([A | As]) -> + [write(A),$.|pname(As)]; +pname(A) when is_atom(A) -> + write(A). + +falist([]) -> + {nil,0}; +falist([{Name,Arity}|Falist]) -> + {cons,0,{var,0,format("~w/~w", [Name,Arity])},falist(Falist)}. + +lfunction({function,_Line,Name,_Arity,Cs}, Hook) -> + Cll = nl_clauses(fun (C, H) -> func_clause(Name, C, H) end, $;, Hook, Cs), + [Cll,leaf(".\n")]. + +func_clause(Name, {clause,Line,Head,Guard,Body}, Hook) -> + Hl = call({atom,Line,Name}, Head, 0, Hook), + Gl = guard_when(Hl, Guard, Hook), + Bl = body(Body, Hook), + {step,Gl,Bl}. + +lrule({rule,_Line,Name,_Arity,Cs}, Hook) -> + Cll = nl_clauses(fun (C, H) -> rule_clause(Name, C, H) end, $;, Hook, Cs), + [Cll,leaf(".\n")]. + +rule_clause(Name, {clause,Line,Head,Guard,Body}, Hook) -> + Hl = call({atom,Line,Name}, Head, 0, Hook), + Gl = guard_when(Hl, Guard, Hook, leaf(" :-")), + Bl = rule_body(Body, Hook), + {step,Gl,Bl}. + +rule_body(Es, Hook) -> + lc_quals(Es, Hook). + +guard_when(Before, Guard, Hook) -> + guard_when(Before, Guard, Hook, ' ->'). + +guard_when(Before, Guard, Hook, After) -> + Gl = lguard(Guard, Hook), + [{list,[{step,Before,Gl}]},After]. + +lguard([E|Es], Hook) when is_list(E) -> + {list,[{step,'when',expr_list([E|Es], [$;], fun guard0/2, Hook)}]}; +lguard([E|Es], Hook) -> % before R6 + lguard([[E|Es]], Hook); +lguard([], _) -> + []. + +guard0(Es, Hook) -> + expr_list(Es, [$,], fun lexpr/2, Hook). + +%% body(Before, Es, Hook) -> [Char]. + +body([E], Hook) -> + lexpr(E, Hook); +body(Es, Hook) -> + {prefer_nl,[$,],lexprs(Es, Hook)}. + +lexpr(E, Hook) -> + lexpr(E, 0, Hook). + +lexpr({var,_,V}, _, _) when is_integer(V) -> %Special hack for Robert + leaf(format("_~w", [V])); +lexpr({var,_,V}, _, _) -> leaf(format("~s", [V])); +lexpr({char,_,C}, _, _) -> leaf(write_char(C)); +lexpr({integer,_,N}, _, _) -> leaf(write(N)); +lexpr({float,_,F}, _, _) -> leaf(write(F)); +lexpr({atom,_,A}, _, _) -> leaf(write(A)); +lexpr({string,_,S}, _, _) -> {string,S}; +lexpr({nil,_}, _, _) -> '[]'; +lexpr({cons,_,H,T}, _, Hook) -> + list(T, [H], Hook); +lexpr({lc,_,E,Qs}, _Prec, Hook) -> + Lcl = {list,[{step,[lexpr(E, Hook),leaf(" ||")],lc_quals(Qs, Hook)}]}, + {list,[{seq,$[,[],[[]],[{force_nl,leaf(" "),[Lcl]}]},$]]}; + %% {list,[{step,$[,Lcl},$]]}; +lexpr({bc,_,E,Qs}, _Prec, Hook) -> + Lcl = {list,[{step,[lexpr(E, Hook),leaf(" ||")],lc_quals(Qs, Hook)}]}, + {list,[{seq,'<<',[],[[]],[{force_nl,leaf(" "),[Lcl]}]},'>>']}; + %% {list,[{step,'<<',Lcl},'>>']}; +lexpr({tuple,_,Elts}, _, Hook) -> + tuple(Elts, Hook); +%%lexpr({struct,_,Tag,Elts}, _, Hook) -> +%% {first,format("~w", [Tag]),tuple(Elts, Hook)}; +lexpr({record_index, _, Name, F}, Prec, Hook) -> + {P,R} = preop_prec('#'), + Nl = record_name(Name), + El = [Nl,$.,lexpr(F, R, Hook)], + maybe_paren(P, Prec, El); +lexpr({record, _, Name, Fs}, Prec, Hook) -> + {P,_R} = preop_prec('#'), + Nl = record_name(Name), + El = {first,Nl,record_fields(Fs, Hook)}, + maybe_paren(P, Prec, El); +lexpr({record_field, _, Rec, Name, F}, Prec, Hook) -> + {L,P,R} = inop_prec('#'), + Rl = lexpr(Rec, L, Hook), + Nl = leaf(format("#~w.", [Name])), + El = [Rl,Nl,lexpr(F, R, Hook)], + maybe_paren(P, Prec, El); +lexpr({record, _, Rec, Name, Fs}, Prec, Hook) -> + {L,P,_R} = inop_prec('#'), + Rl = lexpr(Rec, L, Hook), + Nl = record_name(Name), + El = {first,[Rl,Nl],record_fields(Fs, Hook)}, + maybe_paren(P, Prec, El); +lexpr({record_field, _, {atom,_,''}, F}, Prec, Hook) -> + {_L,P,R} = inop_prec('.'), + El = [$.,lexpr(F, R, Hook)], + maybe_paren(P, Prec, El); +lexpr({record_field, _, Rec, F}, Prec, Hook) -> + {L,P,R} = inop_prec('.'), + El = [lexpr(Rec, L, Hook),$.,lexpr(F, R, Hook)], + maybe_paren(P, Prec, El); +lexpr({block,_,Es}, _, Hook) -> + {list,[{step,'begin',body(Es, Hook)},'end']}; +lexpr({'if',_,Cs}, _, Hook) -> + {list,[{step,'if',if_clauses(Cs, Hook)},'end']}; +lexpr({'case',_,Expr,Cs}, _, Hook) -> + {list,[{step,{list,[{step,'case',lexpr(Expr, Hook)},'of']}, + cr_clauses(Cs, Hook)}, + 'end']}; +lexpr({'cond',_,Cs}, _, Hook) -> + {list,[{step,leaf("cond"),cond_clauses(Cs, Hook)},'end']}; +lexpr({'receive',_,Cs}, _, Hook) -> + {list,[{step,'receive',cr_clauses(Cs, Hook)},'end']}; +lexpr({'receive',_,Cs,To,ToOpt}, _, Hook) -> + Al = {list,[{step,[lexpr(To, Hook),' ->'],body(ToOpt, Hook)}]}, + {list,[{step,'receive',cr_clauses(Cs, Hook)}, + {step,'after',Al}, + 'end']}; +lexpr({'fun',_,{function,F,A}}, _Prec, _Hook) -> + leaf(format("fun ~w/~w", [F,A])); +lexpr({'fun',_,{function,F,A},Extra}, _Prec, _Hook) -> + {force_nl,fun_info(Extra),leaf(format("fun ~w/~w", [F,A]))}; +lexpr({'fun',_,{function,M,F,A}}, _Prec, _Hook) -> + leaf(format("fun ~w:~w/~w", [M,F,A])); +lexpr({'fun',_,{clauses,Cs}}, _Prec, Hook) -> + {list,[{first,'fun',fun_clauses(Cs, Hook)},'end']}; +lexpr({'fun',_,{clauses,Cs},Extra}, _Prec, Hook) -> + {force_nl,fun_info(Extra), + {list,[{first,'fun',fun_clauses(Cs, Hook)},'end']}}; +lexpr({'query',_,Lc}, _Prec, Hook) -> + {list,[{step,leaf("query"),lexpr(Lc, 0, Hook)},'end']}; +lexpr({call,_,{remote,_,{atom,_,M},{atom,_,F}=N}=Name,Args}, Prec, Hook) -> + case erl_internal:bif(M, F, length(Args)) of + true -> + call(N, Args, Prec, Hook); + false -> + call(Name, Args, Prec, Hook) + end; +lexpr({call,_,Name,Args}, Prec, Hook) -> + call(Name, Args, Prec, Hook); +lexpr({'try',_,Es,Scs,Ccs,As}, _, Hook) -> + {list,[if + Scs =:= [] -> + {step,'try',body(Es, Hook)}; + true -> + {step,{list,[{step,'try',body(Es, Hook)},'of']}, + cr_clauses(Scs, Hook)} + end, + if + Ccs =:= [] -> + []; + true -> + {step,'catch',try_clauses(Ccs, Hook)} + end, + if + As =:= [] -> + []; + true -> + {step,'after',body(As, Hook)} + end, + 'end']}; +lexpr({'catch',_,Expr}, Prec, Hook) -> + {P,R} = preop_prec('catch'), + El = {list,[{step,'catch',lexpr(Expr, R, Hook)}]}, + maybe_paren(P, Prec, El); +lexpr({match,_,Lhs,Rhs}, Prec, Hook) -> + {L,P,R} = inop_prec('='), + Pl = lexpr(Lhs, L, Hook), + Rl = lexpr(Rhs, R, Hook), + El = {list,[{cstep,[Pl,' ='],Rl}]}, + maybe_paren(P, Prec, El); +lexpr({op,_,Op,Arg}, Prec, Hook) -> + {P,R} = preop_prec(Op), + Ol = leaf(format("~s ", [Op])), + El = [Ol,lexpr(Arg, R, Hook)], + maybe_paren(P, Prec, El); +lexpr({op,_,Op,Larg,Rarg}, Prec, Hook) when Op =:= 'orelse'; + Op =:= 'andalso' -> + %% Breaks lines since R12B. + {L,P,R} = inop_prec(Op), + Ll = lexpr(Larg, L, Hook), + Ol = leaf(format("~s", [Op])), + Lr = lexpr(Rarg, R, Hook), + El = {prefer_nl,[[]],[Ll,Ol,Lr]}, + maybe_paren(P, Prec, El); +lexpr({op,_,Op,Larg,Rarg}, Prec, Hook) -> + {L,P,R} = inop_prec(Op), + Ll = lexpr(Larg, L, Hook), + Ol = leaf(format("~s", [Op])), + Lr = lexpr(Rarg, R, Hook), + El = {list,[Ll,Ol,Lr]}, + maybe_paren(P, Prec, El); +%% Special expressions which are not really legal everywhere. +lexpr({remote,_,M,F}, Prec, Hook) -> + {L,P,R} = inop_prec(':'), + NameItem = lexpr(M, L, Hook), + CallItem = lexpr(F, R, Hook), + maybe_paren(P, Prec, [NameItem,$:,CallItem]); +%% BIT SYNTAX: +lexpr({bin,_,Fs}, _, Hook) -> + bit_grp(Fs, Hook); +%% Special case for straight values. +lexpr({value,_,Val}, _,_) -> + leaf(write(Val)); +%% Now do the hook. +lexpr(Other, _Precedence, none) -> + leaf(format("INVALID-FORM:~w:",[Other])); +lexpr(HookExpr, Precedence, {Mod,Func,Eas}) when Mod =/= 'fun' -> + {ehook,HookExpr,Precedence,{Mod,Func,Eas}}; +lexpr(HookExpr, Precedence, Func) -> + {hook,HookExpr,Precedence,Func}. + +call(Name, Args, Prec, Hook) -> + {F,P} = func_prec(), + Item = {first,lexpr(Name, F, Hook),args(Args, Hook)}, + maybe_paren(P, Prec, Item). + +fun_info(Extra) -> + leaf(format("% fun-info: ~w", [Extra])). + +%% BITS: + +bit_grp(Fs, Hook) -> + append([['<<'], + [try + true = Fs =/= [], + S = bin_string(Fs), + true = io_lib:printable_list(S), + {string,S} + catch _:_ -> + bit_elems(Fs, Hook) + end], + ['>>']]). + +bin_string([]) -> + []; +bin_string([{bin_element,_,{char,_,C},_,_}|Bin]) -> + [C | bin_string(Bin)]. + +bit_elems(Es, Hook) -> + expr_list(Es, $,, fun bit_elem/2, Hook). + +bit_elem({bin_element,_,Expr,Sz,Types}, Hook) -> + P = max_prec(), + VChars = lexpr(Expr, P, Hook), + SChars = if + Sz =/= default -> + [VChars,$:,lexpr(Sz, P, Hook)]; + true -> + VChars + end, + if + Types =/= default -> + [SChars,$/|bit_elem_types(Types)]; + true -> + SChars + end. + +bit_elem_types([T]) -> + [bit_elem_type(T)]; +bit_elem_types([T | Rest]) -> + [bit_elem_type(T), $-|bit_elem_types(Rest)]. + +bit_elem_type({A,B}) -> + [lexpr(erl_parse:abstract(A), none), + $:, + lexpr(erl_parse:abstract(B), none)]; +bit_elem_type(T) -> + lexpr(erl_parse:abstract(T), none). + +%% end of BITS + +record_name(Name) -> + leaf(format("#~w", [Name])). + +record_fields(Fs, Hook) -> + tuple(Fs, fun record_field/2, Hook). + +record_field({record_field,_,F,Val}, Hook) -> + {L,_P,R} = inop_prec('='), + Fl = lexpr(F, L, Hook), + Vl = lexpr(Val, R, Hook), + {list,[{cstep,[Fl,' ='],Vl}]}; +record_field({typed_record_field,{record_field,_,F,Val},Type}, Hook) -> + {L,_P,R} = inop_prec('='), + Fl = lexpr(F, L, Hook), + Vl = typed(lexpr(Val, R, Hook), Type), + {list,[{cstep,[Fl,' ='],Vl}]}; +record_field({typed_record_field,Field,Type0}, Hook) -> + Type = remove_undefined(Type0), + typed(record_field(Field, Hook), Type); +record_field({record_field,_,F}, Hook) -> + lexpr(F, 0, Hook). + +remove_undefined({type,L,union,[{atom,_,undefined}|T]}) -> + {type,L,union,T}; +remove_undefined(T) -> % cannot happen + T. + +list({cons,_,H,T}, Es, Hook) -> + list(T, [H|Es], Hook); +list({nil,_}, Es, Hook) -> + proper_list(reverse(Es), Hook); +list(Other, Es, Hook) -> + improper_list(reverse(Es, [Other]), Hook). + +%% if_clauses(Clauses, Hook) -> [Char]. +%% Print 'if' clauses. + +if_clauses(Cs, Hook) -> + clauses(fun if_clause/2, Hook, Cs). + +if_clause({clause,_,[],G,B}, Hook) -> + Gl = [guard_no_when(G, Hook),' ->'], + {step,Gl,body(B, Hook)}. + +guard_no_when([E|Es], Hook) when is_list(E) -> + expr_list([E|Es], $;, fun guard0/2, Hook); +guard_no_when([E|Es], Hook) -> % before R6 + guard_no_when([[E|Es]], Hook); +guard_no_when([], _) -> % cannot happen + leaf("true"). + +%% cr_clauses(Clauses, Hook) -> [Char]. +%% Print 'case'/'receive' clauses. + +cr_clauses(Cs, Hook) -> + clauses(fun cr_clause/2, Hook, Cs). + +cr_clause({clause,_,[T],G,B}, Hook) -> + El = lexpr(T, 0, Hook), + Gl = guard_when(El, G, Hook), + Bl = body(B, Hook), + {step,Gl,Bl}. + +%% try_clauses(Clauses, Hook) -> [Char]. +%% Print 'try' clauses. + +try_clauses(Cs, Hook) -> + clauses(fun try_clause/2, Hook, Cs). + +try_clause({clause,_,[{tuple,_,[{atom,_,throw},V,S]}],G,B}, Hook) -> + El = lexpr(V, 0, Hook), + Sl = stack_backtrace(S, [El], Hook), + Gl = guard_when(Sl, G, Hook), + Bl = body(B, Hook), + {step,Gl,Bl}; +try_clause({clause,_,[{tuple,_,[C,V,S]}],G,B}, Hook) -> + Cs = lexpr(C, 0, Hook), + El = lexpr(V, 0, Hook), + CsEl = [Cs,$:,El], + Sl = stack_backtrace(S, CsEl, Hook), + Gl = guard_when(Sl, G, Hook), + Bl = body(B, Hook), + {step,Gl,Bl}. + +stack_backtrace({var,_,'_'}, El, _Hook) -> + El; +stack_backtrace(S, El, Hook) -> + El++[$:,lexpr(S, 0, Hook)]. + +%% fun_clauses(Clauses, Hook) -> [Char]. +%% Print 'fun' clauses. + +fun_clauses(Cs, Hook) -> + nl_clauses(fun fun_clause/2, [$;], Hook, Cs). + +fun_clause({clause,_,A,G,B}, Hook) -> + El = args(A, Hook), + Gl = guard_when(El, G, Hook), + Bl = body(B, Hook), + {step,Gl,Bl}. + +%% cond_clauses(Clauses, Hook) -> [Char]. +%% Print 'cond' clauses. + +cond_clauses(Cs, Hook) -> + clauses(fun cond_clause/2, Hook, Cs). + +cond_clause({clause,_,[],[[E]],B}, Hook) -> + {step,[lexpr(E, Hook),' ->'],body(B, Hook)}. + +%% nl_clauses(Type, Hook, Clauses) -> [Char]. +%% Generic clause printing function (always breaks lines). + +nl_clauses(Type, Sep, Hook, Cs) -> + {prefer_nl,Sep,lexprs(Cs, Type, Hook)}. + +%% clauses(Type, Hook, Clauses) -> [Char]. +%% Generic clause printing function (breaks lines since R12B). + +clauses(Type, Hook, Cs) -> + {prefer_nl,[$;],lexprs(Cs, Type, Hook)}. + +%% lc_quals(Qualifiers, After, Hook) +%% List comprehension qualifiers (breaks lines since R12B). + +lc_quals(Qs, Hook) -> + {prefer_nl,[$,],lexprs(Qs, fun lc_qual/2, Hook)}. + +lc_qual({b_generate,_,Pat,E}, Hook) -> + Pl = lexpr(Pat, 0, Hook), + {list,[{step,[Pl,leaf(" <=")],lexpr(E, 0, Hook)}]}; +lc_qual({generate,_,Pat,E}, Hook) -> + Pl = lexpr(Pat, 0, Hook), + {list,[{step,[Pl,leaf(" <-")],lexpr(E, 0, Hook)}]}; +lc_qual(Q, Hook) -> + lexpr(Q, 0, Hook). + +proper_list(Es, Hook) -> + {seq,$[,$],$,,lexprs(Es, Hook)}. + +improper_list(Es, Hook) -> + {seq,$[,$],{$,,$|},lexprs(Es, Hook)}. + +tuple(L, Hook) -> + tuple(L, fun lexpr/2, Hook). + +tuple(Es, F, Hook) -> + {seq,${,$},$,,lexprs(Es, F, Hook)}. + +args(As, Hook) -> + {seq,$(,$),[$,],lexprs(As, Hook)}. + +expr_list(Es, Sep, F, Hook) -> + {seq,[],[],Sep,lexprs(Es, F, Hook)}. + +lexprs(Es, Hook) -> + lexprs(Es, fun lexpr/2, Hook). + +lexprs(Es, F, Hook) -> + [F(E, Hook) || E <- Es]. + +maybe_paren(P, Prec, Expr) when P < Prec -> + [$(,Expr,$)]; +maybe_paren(_P, _Prec, Expr) -> + Expr. + +leaf(S) -> + {leaf,iolist_size(S),S}. + +%%% Do the formatting. Currently nothing fancy. Could probably have +%%% done it in one single pass. + +frmt(Item) -> + frmt(Item, 0). + +frmt(Item, I) -> + ST = spacetab(), + WT = wordtable(), + {Chars,_Length} = f(Item, I, ST, WT), + [Chars]. + +%%% What the tags mean: +%%% - C: a character +%%% - [I|Is]: Is follow after I without newline or space +%%% - {list,IPs}: try to put all IPs on one line, if that fails newlines +%%% and indentation are inserted between IPs. +%%% - {first,I,IP2}: IP2 follows after I, and is output with an indentation +%%% updated with the width of I. +%%% - {seq,Before,After,Separator,IPs}: a sequence of Is separated by +%%% Separator. Before is output before IPs, and the indentation of IPs +%%% is updated with the width of Before. After follows after IPs. +%%% - {force_nl,ExtraInfo,I}: fun-info (a comment) forces linebreak before I. +%%% - {prefer_nl,Sep,IPs}: forces linebreak between Is unlesss negative +%%% indentation. +%%% - {string,S}: a string. +%%% - {hook,...}, {ehook,...}: hook expressions. +%%% +%%% list, first, seq, force_nl, and prefer_nl all accept IPs, where each +%%% element is either an item or a tuple {step|cstep,I1,I2}. step means +%%% that I2 is output after linebreak and an incremented indentation. +%%% cstep works similarly, but no linebreak if the width of I1 is less +%%% than the indentation (this is for "A = ). + +f([]=Nil, _I0, _ST, _WT) -> + {Nil,0}; +f(C, _I0, _ST, _WT) when is_integer(C) -> + {C,1}; +f({leaf,Length,Chars}, _I0, _ST, _WT) -> + {Chars,Length}; +f([Item|Items], I0, ST, WT) -> + consecutive(Items, f(Item, I0, ST, WT), I0, ST, WT); +f({list,Items}, I0, ST, WT) -> + f({seq,[],[],[[]],Items}, I0, ST, WT); +f({first,E,Item}, I0, ST, WT) -> + f({seq,E,[],[[]],[Item]}, I0, ST, WT); +f({seq,Before,After,Sep,LItems}, I0, ST, WT) -> + BCharsSize = f(Before, I0, ST, WT), + I = indent(BCharsSize, I0), + CharsSizeL = fl(LItems, Sep, I, After, ST, WT), + {CharsL,SizeL} = unz(CharsSizeL), + {BCharsL,BSizeL} = unz1([BCharsSize]), + Sizes = BSizeL ++ SizeL, + NSepChars = if + is_list(Sep), Sep =/= [] -> + erlang:max(0, length(CharsL)-1); + true -> + 0 + end, + case same_line(I0, Sizes, NSepChars) of + {yes,Size} -> + Chars = if + NSepChars > 0 -> insert_sep(CharsL, $\s); + true -> CharsL + end, + {BCharsL++Chars,Size}; + no -> + {BCharsL++insert_newlines(CharsSizeL, I, ST), + nsz(lists:last(Sizes), I0)} + end; +f({force_nl,_ExtraInfoItem,Item}, I, ST, WT) when I < 0 -> + %% Extra info is a comment; cannot have that on the same line + f(Item, I, ST, WT); +f({force_nl,ExtraInfoItem,Item}, I, ST, WT) -> + f({prefer_nl,[],[ExtraInfoItem,Item]}, I, ST, WT); +f({prefer_nl,Sep,LItems}, I, ST, WT) when I < 0 -> + f({seq,[],[],Sep,LItems}, I, ST, WT); +f({prefer_nl,Sep,LItems}, I0, ST, WT) -> + CharsSize2L = fl(LItems, Sep, I0, [], ST, WT), + {_CharsL,Sizes} = unz(CharsSize2L), + if + Sizes =:= [] -> + {[], 0}; + true -> + {insert_newlines(CharsSize2L, I0, ST),nsz(lists:last(Sizes), I0)} + end; +f({string,S}, I, ST, WT) -> + f(write_a_string(S, I), I, ST, WT); +f({hook,HookExpr,Precedence,Func}, I, _ST, _WT) -> + Chars = Func(HookExpr, I, Precedence, Func), + {Chars,indentation(Chars, I)}; +f({ehook,HookExpr,Precedence,{Mod,Func,Eas}=ModFuncEas}, I, _ST, _WT) -> + Chars = apply(Mod, Func, [HookExpr,I,Precedence,ModFuncEas|Eas]), + {Chars,indentation(Chars, I)}; +f(WordName, _I, _ST, WT) -> % when is_atom(WordName) + word(WordName, WT). + +-define(IND, 4). + +%% fl(ListItems, I0, ST, WT) -> [[CharsSize1,CharsSize2]] +%% ListItems = [{Item,Items}|Item] +fl([], _Sep, I0, After, ST, WT) -> + [[f(After, I0, ST, WT),{[],0}]]; +fl(CItems, Sep0, I0, After, ST, WT) -> + F = fun({step,Item1,Item2}, S) -> + [f(Item1, I0, ST, WT),f([Item2,S], incr(I0, ?IND), ST, WT)]; + ({cstep,Item1,Item2}, S) -> + {_,Sz1} = CharSize1 = f(Item1, I0, ST, WT), + if + is_integer(Sz1), Sz1 < ?IND -> + Item2p = [leaf("\s"),Item2,S], + [consecutive(Item2p, CharSize1, I0, ST, WT),{[],0}]; + true -> + [CharSize1,f([Item2,S], incr(I0, ?IND), ST, WT)] + end; + (Item, S) -> + [f([Item,S], I0, ST, WT),{[],0}] + end, + {Sep,LastSep} = case Sep0 of {_,_} -> Sep0; _ -> {Sep0,Sep0} end, + fl1(CItems, F, Sep, LastSep, After). + +fl1([CItem], F, _Sep, _LastSep, After) -> + [F(CItem,After)]; +fl1([CItem1,CItem2], F, _Sep, LastSep, After) -> + [F(CItem1, LastSep),F(CItem2, After)]; +fl1([CItem|CItems], F, Sep, LastSep, After) -> + [F(CItem, Sep)|fl1(CItems, F, Sep, LastSep, After)]. + +consecutive(Items, CharSize1, I0, ST, WT) -> + {CharsSizes,_Length} = + mapfoldl(fun(Item, Len) -> + CharsSize = f(Item, Len, ST, WT), + {CharsSize,indent(CharsSize, Len)} + end, indent(CharSize1, I0), Items), + {CharsL,SizeL} = unz1([CharSize1|CharsSizes]), + {CharsL,line_size(SizeL)}. + +unz(CharsSizesL) -> + unz1(append(CharsSizesL)). + +unz1(CharSizes) -> + lists:unzip(nonzero(CharSizes)). + +nonzero(CharSizes) -> + lists:filter(fun({_,Sz}) -> Sz =/= 0 end, CharSizes). + +insert_newlines(CharsSizesL, I, ST) when I >= 0 -> + insert_nl(foldr(fun([{_C1,0},{_C2,0}], A) -> + A; + ([{C1,_Sz1},{_C2,0}], A) -> + [C1|A]; + ([{C1,_Sz1},{C2,Sz2}], A) when Sz2 > 0 -> + [insert_nl([C1,C2], I+?IND, ST)|A] + end, [], CharsSizesL), I, ST). + + +insert_nl(CharsL, I, ST) -> + insert_sep(CharsL, nl_indent(I, ST)). + +insert_sep([Chars1 | CharsL], Sep) -> + [Chars1 | [[Sep,Chars] || Chars <- CharsL]]. + +nl_indent(0, _T) -> + $\n; +nl_indent(I, T) when I > 0 -> + [$\n|spaces(I, T)]. + +same_line(I0, SizeL, NSepChars) -> + try + Size = lists:sum(SizeL) + NSepChars, + true = incr(I0, Size) =< ?MAXLINE, + {yes,Size} + catch _:_ -> + no + end. + +line_size(SizeL) -> + line_size(SizeL, 0, false). + +line_size([], Size, false) -> + Size; +line_size([], Size, true) -> + {line,Size}; +line_size([{line,Len}|SizeL], _, _) -> + line_size(SizeL, Len, true); +line_size([Sz|SizeL], SizeSoFar, LF) -> + line_size(SizeL, SizeSoFar+Sz, LF). + +nsz({line,_Len}=Sz, _I) -> + Sz; +nsz(Size, I) when I >= 0 -> + {line,Size+I}. + +indent({_Chars,{line,Len}}, _I) -> + Len; +indent({_Chars,Size}, I) -> + incr(I, Size). + +incr(I, _Incr) when I < 0 -> + I; +incr(I, Incr) -> + I+Incr. + +indentation(E, I) when I < 0 -> + iolist_size(E); +indentation(E, I0) -> + I = io_lib_format:indentation(E, I0), + case has_nl(E) of + true -> {line,I}; + false -> I + end. + +has_nl([$\n|_]) -> + true; +has_nl([C|Cs]) when is_integer(C) -> + has_nl(Cs); +has_nl([C|Cs]) -> + has_nl(C) orelse has_nl(Cs); +has_nl([]) -> + false. + +-define(MIN_SUBSTRING, 5). + +write_a_string(S, I) when I < 0; S =:= [] -> + leaf(write_string(S)); +write_a_string(S, I) -> + Len = erlang:max(?MAXLINE-I, ?MIN_SUBSTRING), + {list,write_a_string(S, Len, Len)}. + +write_a_string([], _N, _Len) -> + []; +write_a_string(S, N, Len) -> + SS = string:sub_string(S, 1, N), + Sl = write_string(SS), + case (iolist_size(Sl) > Len) and (N > ?MIN_SUBSTRING) of + true -> + write_a_string(S, N-1, Len); + false -> + [leaf(Sl)|write_a_string(lists:nthtail(length(SS), S), Len, Len)] + end. + +%% +%% Utilities +%% + +-define(N_SPACES, 30). + +spacetab() -> + {[_|L],_} = mapfoldl(fun(_, A) -> {A,[$\s|A]} + end, [], lists:seq(0, ?N_SPACES)), + list_to_tuple(L). + +spaces(N, T) when N =< ?N_SPACES -> + element(N, T); +spaces(N, T) -> + [element(?N_SPACES, T)|spaces(N-?N_SPACES, T)]. + +wordtable() -> + L = [begin {leaf,Sz,S} = leaf(W), {S,Sz} end || + W <- [" ->"," =","<<",">>","[]","after","begin","case","catch", + "end","fun","if","of","receive","try","when"," ::","..", + " |"]], + list_to_tuple(L). + +word(' ->', WT) -> element(1, WT); +word(' =', WT) -> element(2, WT); +word('<<', WT) -> element(3, WT); +word('>>', WT) -> element(4, WT); +word('[]', WT) -> element(5, WT); +word('after', WT) -> element(6, WT); +word('begin', WT) -> element(7, WT); +word('case', WT) -> element(8, WT); +word('catch', WT) -> element(9, WT); +word('end', WT) -> element(10, WT); +word('fun', WT) -> element(11, WT); +word('if', WT) -> element(12, WT); +word('of', WT) -> element(13, WT); +word('receive', WT) -> element(14, WT); +word('try', WT) -> element(15, WT); +word('when', WT) -> element(16, WT); +word(' ::', WT) -> element(17, WT); +word('..', WT) -> element(18, WT); +word(' |', WT) -> element(19, WT). diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl new file mode 100644 index 0000000000..52ec81a78b --- /dev/null +++ b/lib/stdlib/src/erl_scan.erl @@ -0,0 +1,1307 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% Erlang token scanning functions of io library. + +%% For handling ISO 8859-1 (Latin-1) we use the following type +%% information: +%% +%% 000 - 037 NUL - US control +%% 040 - 057 SPC - / punctuation +%% 060 - 071 0 - 9 digit +%% 072 - 100 : - @ punctuation +%% 101 - 132 A - Z uppercase +%% 133 - 140 [ - ` punctuation +%% 141 - 172 a - z lowercase +%% 173 - 176 { - ~ punctuation +%% 177 DEL control +%% 200 - 237 control +%% 240 - 277 NBSP - ¿ punctuation +%% 300 - 326 À - Ö uppercase +%% 327 × punctuation +%% 330 - 336 Ø - Þ uppercase +%% 337 - 366 ß - ö lowercase +%% 367 ÷ punctuation +%% 370 - 377 ø - ÿ lowercase +%% +%% Many punctuation characters have special meaning: +%% $\s, $_, $", $$, $%, $', $. +%% DEL is a punctuation. +%% +%% Must watch using × \327, very close to x \170. + +-module(erl_scan). + +%%% External exports + +-export([string/1,string/2,string/3,tokens/3,tokens/4, + format_error/1,reserved_word/1, + token_info/1,token_info/2, + attributes_info/1,attributes_info/2,set_attribute/3]). + +%%% Local record. +-record(erl_scan, + {resword_fun=fun reserved_word/1, + ws=false, + comment=false, + text=false}). + +%%% +%%% Exported functions +%%% + +-define(COLUMN(C), is_integer(C), C >= 1). +%% Line numbers less than zero have always been allowed: +-define(ALINE(L), is_integer(L)). +-define(STRING(S), is_list(S)). +-define(RESWORDFUN(F), is_function(F, 1)). +-define(SETATTRFUN(F), is_function(F, 1)). + +-type category() :: atom(). +-type column() :: pos_integer(). +-type line() :: integer(). +-type location() :: line() | {line(),column()}. +-type resword_fun() :: fun((atom()) -> boolean()). +-type option() :: 'return' | 'return_white_spaces' | 'return_comments' + | 'text' | {'reserved_word_fun', resword_fun()}. +-type options() :: option() | [option()]. +-type symbol() :: atom() | float() | integer() | string(). +-type info_line() :: integer() | term(). +-type attributes_data() + :: [{'column', column()} | {'line', info_line()} | {'text', string()}] + | {line(), column()}. +%% The fact that {line(),column()} is a possible attributes() type +%% is hidden. +-type attributes() :: line() | attributes_data(). +-type token() :: {category(), attributes(), symbol()} + | {category(), attributes()}. +-type tokens() :: [token()]. +-type error_description() :: term(). +-type error_info() :: {location(), module(), error_description()}. + +-spec format_error(Error :: term()) -> string(). +format_error({string,Quote,Head}) -> + lists:flatten(["unterminated " ++ string_thing(Quote) ++ + " starting with " ++ + io_lib:write_unicode_string(Head, Quote)]); +format_error({illegal,Type}) -> + lists:flatten(io_lib:fwrite("illegal ~w", [Type])); +format_error(char) -> "unterminated character"; +format_error({base,Base}) -> + lists:flatten(io_lib:fwrite("illegal base '~w'", [Base])); +format_error(Other) -> + lists:flatten(io_lib:write(Other)). + +-type string_return() :: {'ok', tokens(), location()} + | {'error', error_info(), location()}. + +-spec string(String :: string()) -> string_return(). +string(String) -> + string(String, 1, []). + +-spec string(String :: string(), StartLocation :: location()) -> + string_return(). +string(String, StartLocation) -> + string(String, StartLocation, []). + +-spec string(String :: string(), StartLocation :: location(), + Options :: options()) -> string_return(). +string(String, Line, Options) when ?STRING(String), ?ALINE(Line) -> + string1(String, options(Options), Line, no_col, []); +string(String, {Line,Column}, Options) when ?STRING(String), + ?ALINE(Line), + ?COLUMN(Column) -> + string1(String, options(Options), Line, Column, []). + +-type char_spec() :: string() | 'eof'. +-type cont_fun() :: fun((char_spec(), #erl_scan{}, line(), column(), + tokens(), any()) -> any()). +-opaque return_cont() :: {string(), column(), tokens(), line(), + #erl_scan{}, cont_fun(), any()}. +-type cont() :: return_cont() | []. +-type tokens_result() :: {'ok', tokens(), location()} + | {'eof', location()} + | {'error', error_info(), location()}. +-type tokens_return() :: {'done', tokens_result(), char_spec()} + | {'more', return_cont()}. + +-spec tokens(Cont :: cont(), CharSpec :: char_spec(), + StartLocation :: location()) -> tokens_return(). +tokens(Cont, CharSpec, StartLocation) -> + tokens(Cont, CharSpec, StartLocation, []). + +-spec tokens(Cont :: cont(), CharSpec :: char_spec(), + StartLocation :: location(), Options :: options()) -> + tokens_return(). +tokens([], CharSpec, Line, Options) when ?ALINE(Line) -> + tokens1(CharSpec, options(Options), Line, no_col, [], fun scan/6, []); +tokens([], CharSpec, {Line,Column}, Options) when ?ALINE(Line), + ?COLUMN(Column) -> + tokens1(CharSpec, options(Options), Line, Column, [], fun scan/6, []); +tokens({Cs,Col,Toks,Line,St,Any,Fun}, CharSpec, _Loc, _Opts) -> + tokens1(Cs++CharSpec, St, Line, Col, Toks, Fun, Any). + +-type attribute_item() :: 'column' | 'length' | 'line' + | 'location' | 'text'. +-type info_location() :: location() | term(). +-type attribute_info() :: {'column', column()}| {'length', pos_integer()} + | {'line', info_line()} + | {'location', info_location()} + | {'text', string()}. +-type token_item() :: 'category' | 'symbol' | attribute_item(). +-type token_info() :: {'category', category()} | {'symbol', symbol()} + | attribute_info(). + +-spec token_info(token()) -> [token_info()]. +token_info(Token) -> + Items = [category,column,length,line,symbol,text], % undefined order + token_info(Token, Items). + +-spec token_info(token(), token_item()) -> token_info() | 'undefined'; + (token(), [token_item()]) -> [token_info()]. +token_info(_Token, []) -> + []; +token_info(Token, [Item|Items]) when is_atom(Item) -> + case token_info(Token, Item) of + undefined -> + token_info(Token, Items); + TokenInfo when is_tuple(TokenInfo) -> + [TokenInfo|token_info(Token, Items)] + end; +token_info({Category,_Attrs}, category=Item) -> + {Item,Category}; +token_info({Category,_Attrs,_Symbol}, category=Item) -> + {Item,Category}; +token_info({Category,_Attrs}, symbol=Item) -> + {Item,Category}; +token_info({_Category,_Attrs,Symbol}, symbol=Item) -> + {Item,Symbol}; +token_info({_Category,Attrs}, Item) -> + attributes_info(Attrs, Item); +token_info({_Category,Attrs,_Symbol}, Item) -> + attributes_info(Attrs, Item). + +-spec attributes_info(attributes()) -> [attribute_info()]. +attributes_info(Attributes) -> + Items = [column,length,line,text], % undefined order + attributes_info(Attributes, Items). + +-spec attributes_info(attributes(), attribute_item()) -> + attribute_info() | 'undefined'; + (attributes(), [attribute_item()]) -> [attribute_info()]. +attributes_info(_Attrs, []) -> + []; +attributes_info(Attrs, [A|As]) when is_atom(A) -> + case attributes_info(Attrs, A) of + undefined -> + attributes_info(Attrs, As); + AttributeInfo when is_tuple(AttributeInfo) -> + [AttributeInfo|attributes_info(Attrs, As)] + end; +attributes_info({Line,Column}, column=Item) when ?ALINE(Line), + ?COLUMN(Column) -> + {Item,Column}; +attributes_info(Line, column) when ?ALINE(Line) -> + undefined; +attributes_info(Attrs, column=Item) -> + attr_info(Attrs, Item); +attributes_info(Attrs, length=Item) -> + case attributes_info(Attrs, text) of + undefined -> + undefined; + {text,Text} -> + {Item,length(Text)} + end; +attributes_info(Line, line=Item) when ?ALINE(Line) -> + {Item,Line}; +attributes_info({Line,Column}, line=Item) when ?ALINE(Line), + ?COLUMN(Column) -> + {Item,Line}; +attributes_info(Attrs, line=Item) -> + attr_info(Attrs, Item); +attributes_info({Line,Column}=Location, location=Item) when ?ALINE(Line), + ?COLUMN(Column) -> + {Item,Location}; +attributes_info(Line, location=Item) when ?ALINE(Line) -> + {Item,Line}; +attributes_info(Attrs, location=Item) -> + {line,Line} = attributes_info(Attrs, line), % assume line is present + case attributes_info(Attrs, column) of + undefined -> + %% If set_attribute() has assigned a term such as {17,42} + %% to 'line', then Line will look like {Line,Column}. One + %% should not use 'location' but 'line' and 'column' in + %% such special cases. + {Item,Line}; + {column,Column} -> + {Item,{Line,Column}} + end; +attributes_info({Line,Column}, text) when ?ALINE(Line), ?COLUMN(Column) -> + undefined; +attributes_info(Line, text) when ?ALINE(Line) -> + undefined; +attributes_info(Attrs, text=Item) -> + attr_info(Attrs, Item); +attributes_info(T1, T2) -> + erlang:error(badarg, [T1,T2]). + +-type setlineattr_fun() :: fun((info_line()) -> info_line()). + +-spec set_attribute('line', attributes(), setlineattr_fun()) -> attributes(). +set_attribute(Tag, Attributes, Fun) when ?SETATTRFUN(Fun) -> + set_attr(Tag, Attributes, Fun). + +%%% +%%% Local functions +%%% + +string_thing($') -> "atom"; %' Stupid Emacs +string_thing(_) -> "string". + +-define(WHITE_SPACE(C), + is_integer(C) andalso + (C >= $\000 andalso C =< $\s orelse C >= $\200 andalso C =< $\240)). +-define(DIGIT(C), C >= $0, C =< $9). +-define(CHAR(C), is_integer(C), C >= 0). + +%% A workaround: Unicode strings are not returned as strings, but as +%% lists of integers. For instance, "b\x{aaa}c" => [98,2730,99]. This +%% is to protect the system from character codes greater than 255. To +%% be removed. Search for UNI to find workaround code. +-define(NO_UNICODE, 0). +-define(UNI255(C), (C) =< 16#ff). + +options(Opts0) when is_list(Opts0) -> + Opts = lists:foldr(fun expand_opt/2, [], Opts0), + [RW_fun] = + case opts(Opts, [reserved_word_fun], []) of + badarg -> + erlang:error(badarg, [Opts0]); + R -> + R + end, + Comment = proplists:get_bool(return_comments, Opts), + WS = proplists:get_bool(return_white_spaces, Opts), + Txt = proplists:get_bool(text, Opts), + #erl_scan{resword_fun = RW_fun, + comment = Comment, + ws = WS, + text = Txt}; +options(Opt) -> + options([Opt]). + +opts(Options, [Key|Keys], L) -> + V = case lists:keysearch(Key, 1, Options) of + {value,{reserved_word_fun,F}} when ?RESWORDFUN(F) -> + {ok,F}; + {value,{Key,_}} -> + badarg; + false -> + {ok,default_option(Key)} + end, + case V of + badarg -> + badarg; + {ok,Value} -> + opts(Options, Keys, [Value|L]) + end; +opts(_Options, [], L) -> + lists:reverse(L). + +default_option(reserved_word_fun) -> + fun reserved_word/1. + +expand_opt(return, Os) -> + [return_comments,return_white_spaces|Os]; +expand_opt(O, Os) -> + [O|Os]. + +attr_info(Attrs, Item) -> + case catch lists:keysearch(Item, 1, Attrs) of + {value,{Item,Value}} -> + {Item,Value}; + false -> + undefined; + _ -> + erlang:error(badarg, [Attrs, Item]) + end. + +-spec set_attr('line', attributes(), fun((line()) -> line())) -> attributes(). + +set_attr(line, Line, Fun) when ?ALINE(Line) -> + Ln = Fun(Line), + if + ?ALINE(Ln) -> + Ln; + true -> + [{line,Ln}] + end; +set_attr(line, {Line,Column}, Fun) when ?ALINE(Line), ?COLUMN(Column) -> + Ln = Fun(Line), + if + ?ALINE(Ln) -> + {Ln,Column}; + true -> + [{line,Ln},{column,Column}] + end; +set_attr(line=Tag, Attrs, Fun) when is_list(Attrs) -> + {line,Line} = lists:keyfind(Tag, 1, Attrs), + lists:keyreplace(Tag, 1, Attrs, {line,Fun(Line)}); +set_attr(T1, T2, T3) -> + erlang:error(badarg, [T1,T2,T3]). + +tokens1(Cs, St, Line, Col, Toks, Fun, Any) when ?STRING(Cs); Cs =:= eof -> + case Fun(Cs, St, Line, Col, Toks, Any) of + {more,{Cs0,Ncol,Ntoks,Nline,Nany,Nfun}} -> + {more,{Cs0,Ncol,Ntoks,Nline,St,Nany,Nfun}}; + {ok,Toks0,eof,Nline,Ncol} -> + Res = case Toks0 of + [] -> + {eof,location(Nline, Ncol)}; + _ -> + {ok,lists:reverse(Toks0),location(Nline,Ncol)} + end, + {done,Res,eof}; + {ok,Toks0,Rest,Nline,Ncol} -> + {done,{ok,lists:reverse(Toks0),location(Nline, Ncol)},Rest}; + {{error,_,_}=Error,Rest} -> + {done,Error,Rest} + end. + +string1(Cs, St, Line, Col, Toks) -> + case scan1(Cs, St, Line, Col, Toks) of + {more,{Cs0,Ncol,Ntoks,Nline,Any,Fun}} -> + case Fun(Cs0++eof, St, Nline, Ncol, Ntoks, Any) of + {ok,Toks1,_Rest,Line2,Col2} -> + {ok,lists:reverse(Toks1),location(Line2, Col2)}; + {{error,_,_}=Error,_Rest} -> + Error + end; + {ok,Ntoks,[_|_]=Rest,Nline,Ncol} -> + string1(Rest, St, Nline, Ncol, Ntoks); + {ok,Ntoks,_,Nline,Ncol} -> + {ok,lists:reverse(Ntoks),location(Nline, Ncol)}; + {{error,_,_}=Error,_Rest} -> + Error + end. + +scan(Cs, St, Line, Col, Toks, _) -> + scan1(Cs, St, Line, Col, Toks). + +scan1([$\s|Cs], St, Line, Col, Toks) when St#erl_scan.ws -> + scan_spcs(Cs, St, Line, Col, Toks, 1); +scan1([$\s|Cs], St, Line, Col, Toks) -> + skip_white_space(Cs, St, Line, Col, Toks, 1); +scan1([$\n|Cs], St, Line, Col, Toks) when St#erl_scan.ws -> + scan_newline(Cs, St, Line, Col, Toks); +scan1([$\n|Cs], St, Line, Col, Toks) -> + skip_white_space(Cs, St, Line+1, new_column(Col, 1), Toks, 0); +scan1([C|Cs], St, Line, Col, Toks) when C >= $A, C =< $Z -> + scan_variable(Cs, St, Line, Col, Toks, [C]); +scan1([C|Cs], St, Line, Col, Toks) when C >= $a, C =< $z -> + scan_atom(Cs, St, Line, Col, Toks, [C]); +%% Optimization: some very common punctuation characters: +scan1([$,|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, ",", ',', 1); +scan1([$(|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "(", '(', 1); +scan1([$)|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, ")", ')', 1); +scan1([${|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "{", '{', 1); +scan1([$}|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "}", '}', 1); +scan1([$[|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "[", '[', 1); +scan1([$]|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "]", ']', 1); +scan1([$;|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, ";", ';', 1); +scan1([$_=C|Cs], St, Line, Col, Toks) -> + scan_variable(Cs, St, Line, Col, Toks, [C]); +%% More punctuation characters below. +scan1([$\%|Cs], St, Line, Col, Toks) when not St#erl_scan.comment -> + skip_comment(Cs, St, Line, Col, Toks, 1); +scan1([$\%=C|Cs], St, Line, Col, Toks) -> + scan_comment(Cs, St, Line, Col, Toks, [C]); +scan1([C|Cs], St, Line, Col, Toks) when ?DIGIT(C) -> + scan_number(Cs, St, Line, Col, Toks, [C]); +scan1([$.=C|Cs], St, Line, Col, Toks) -> + scan_dot(Cs, St, Line, Col, Toks, [C]); +scan1([$"|Cs], St, Line, Col, Toks) -> %" Emacs + State0 = {[],[],Line,Col,?NO_UNICODE}, + scan_string(Cs, St, Line, incr_column(Col, 1), Toks, State0); +scan1([$'|Cs], St, Line, Col, Toks) -> %' Emacs + State0 = {[],[],Line,Col,?NO_UNICODE}, + scan_qatom(Cs, St, Line, incr_column(Col, 1), Toks, State0); +scan1([$$|Cs], St, Line, Col, Toks) -> + scan_char(Cs, St, Line, Col, Toks); +scan1([$\r|Cs], St, Line, Col, Toks) when St#erl_scan.ws -> + white_space_end(Cs, St, Line, Col, Toks, 1, "\r"); +scan1([C|Cs], St, Line, Col, Toks) when C >= $ß, C =< $ÿ, C =/= $÷ -> + scan_atom(Cs, St, Line, Col, Toks, [C]); +scan1([C|Cs], St, Line, Col, Toks) when C >= $À, C =< $Þ, C /= $× -> + scan_variable(Cs, St, Line, Col, Toks, [C]); +scan1([$\t|Cs], St, Line, Col, Toks) when St#erl_scan.ws -> + scan_tabs(Cs, St, Line, Col, Toks, 1); +scan1([$\t|Cs], St, Line, Col, Toks) -> + skip_white_space(Cs, St, Line, Col, Toks, 1); +scan1([C|Cs], St, Line, Col, Toks) when ?WHITE_SPACE(C) -> + case St#erl_scan.ws of + true -> + scan_white_space(Cs, St, Line, Col, Toks, [C]); + false -> + skip_white_space(Cs, St, Line, Col, Toks, 1) + end; +%% Punctuation characters and operators, first recognise multiples. +%% << <- <= +scan1("<<"++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "<<", '<<', 2); +scan1("<-"++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "<-", '<-', 2); +scan1("<="++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "<=", '<=', 2); +scan1("<"=Cs, _St, Line, Col, Toks) -> + {more,{Cs,Col,Toks,Line,[],fun scan/6}}; +%% >> >= +scan1(">>"++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, ">>", '>>', 2); +scan1(">="++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, ">=", '>=', 2); +scan1(">"=Cs, _St, Line, Col, Toks) -> + {more,{Cs,Col,Toks,Line,[],fun scan/6}}; +%% -> -- +scan1("->"++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "->", '->', 2); +scan1("--"++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "--", '--', 2); +scan1("-"=Cs, _St, Line, Col, Toks) -> + {more,{Cs,Col,Toks,Line,[],fun scan/6}}; +%% ++ +scan1("++"++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "++", '++', 2); +scan1("+"=Cs, _St, Line, Col, Toks) -> + {more,{Cs,Col,Toks,Line,[],fun scan/6}}; +%% =:= =/= =< == +scan1("=:="++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "=:=", '=:=', 3); +scan1("=:"=Cs, _St, Line, Col, Toks) -> + {more,{Cs,Col,Toks,Line,[],fun scan/6}}; +scan1("=/="++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "=/=", '=/=', 3); +scan1("=/"=Cs, _St, Line, Col, Toks) -> + {more,{Cs,Col,Toks,Line,[],fun scan/6}}; +scan1("=<"++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "=<", '=<', 2); +scan1("=="++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "==", '==', 2); +scan1("="=Cs, _St, Line, Col, Toks) -> + {more,{Cs,Col,Toks,Line,[],fun scan/6}}; +%% /= +scan1("/="++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "/=", '/=', 2); +scan1("/"=Cs, _St, Line, Col, Toks) -> + {more,{Cs,Col,Toks,Line,[],fun scan/6}}; +%% || +scan1("||"++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "||", '||', 2); +scan1("|"=Cs, _St, Line, Col, Toks) -> + {more,{Cs,Col,Toks,Line,[],fun scan/6}}; +%% :- +scan1(":-"++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, ":-", ':-', 2); +%% :: for typed records +scan1("::"++Cs, St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "::", '::', 2); +scan1(":"=Cs, _St, Line, Col, Toks) -> + {more,{Cs,Col,Toks,Line,[],fun scan/6}}; +%% Optimization: punctuation characters less than 127: +scan1([$=|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "=", '=', 1); +scan1([$:|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, ":", ':', 1); +scan1([$||Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "|", '|', 1); +scan1([$#|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "#", '#', 1); +scan1([$/|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "/", '/', 1); +scan1([$?|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "?", '?', 1); +scan1([$-|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "-", '-', 1); +scan1([$+|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "+", '+', 1); +scan1([$*|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "*", '*', 1); +scan1([$<|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "<", '<', 1); +scan1([$>|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, ">", '>', 1); +scan1([$!|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "!", '!', 1); +scan1([$@|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "@", '@', 1); +scan1([$\\|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "\\", '\\', 1); +scan1([$^|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "^", '^', 1); +scan1([$`|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "`", '`', 1); +scan1([$~|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "~", '~', 1); +scan1([$&|Cs], St, Line, Col, Toks) -> + tok2(Cs, St, Line, Col, Toks, "&", '&', 1); +%% End of optimization. +scan1([C|Cs], St, Line, Col, Toks) when ?CHAR(C) -> + Str = [C], + case catch list_to_atom(Str) of + Sym when is_atom(Sym) -> + tok2(Cs, St, Line, Col, Toks, Str, Sym, 1); + _ -> + Ncol = incr_column(Col, 1), + scan_error({illegal,character}, Line, Col, Line, Ncol, Cs) + end; +scan1([]=Cs, _St, Line, Col, Toks) -> + {more,{Cs,Col,Toks,Line,[],fun scan/6}}; +scan1(eof=Cs, _St, Line, Col, Toks) -> + {ok,Toks,Cs,Line,Col}. + +scan_atom(Cs0, St, Line, Col, Toks, Ncs0) -> + case scan_name(Cs0, Ncs0) of + {more,Ncs} -> + {more,{[],Col,Toks,Line,Ncs,fun scan_atom/6}}; + {Wcs,Cs} -> + case catch list_to_atom(Wcs) of + Name when is_atom(Name) -> + case (St#erl_scan.resword_fun)(Name) of + true -> + tok2(Cs, St, Line, Col, Toks, Wcs, Name); + false -> + tok3(Cs, St, Line, Col, Toks, atom, Wcs, Name) + end; + _Error -> + Ncol = incr_column(Col, length(Wcs)), + scan_error({illegal,atom}, Line, Col, Line, Ncol, Cs) + end + end. + +scan_variable(Cs0, St, Line, Col, Toks, Ncs0) -> + case scan_name(Cs0, Ncs0) of + {more,Ncs} -> + {more,{[],Col,Toks,Line,Ncs,fun scan_variable/6}}; + {Wcs,Cs} -> + case catch list_to_atom(Wcs) of + Name when is_atom(Name) -> + tok3(Cs, St, Line, Col, Toks, var, Wcs, Name); + _Error -> + Ncol = incr_column(Col, length(Wcs)), + scan_error({illegal,var}, Line, Col, Line, Ncol, Cs) + end + end. + +scan_name([C|Cs], Ncs) when C >= $a, C =< $z -> + scan_name(Cs, [C|Ncs]); +scan_name([C|Cs], Ncs) when C >= $A, C =< $Z -> + scan_name(Cs, [C|Ncs]); +scan_name([$_=C|Cs], Ncs) -> + scan_name(Cs, [C|Ncs]); +scan_name([C|Cs], Ncs) when ?DIGIT(C) -> + scan_name(Cs, [C|Ncs]); +scan_name([$@=C|Cs], Ncs) -> + scan_name(Cs, [C|Ncs]); +scan_name([C|Cs], Ncs) when C >= $ß, C =< $ÿ, C =/= $÷ -> + scan_name(Cs, [C|Ncs]); +scan_name([C|Cs], Ncs) when C >= $À, C =< $Þ, C =/= $× -> + scan_name(Cs, [C|Ncs]); +scan_name([], Ncs) -> + {more,Ncs}; +scan_name(Cs, Ncs) -> + {lists:reverse(Ncs),Cs}. + +scan_dot([$%|_]=Cs, St, Line, Col, Toks, Ncs) -> + Attrs = attributes(Line, Col, St, Ncs), + {ok,[{dot,Attrs}|Toks],Cs,Line,incr_column(Col, 1)}; +scan_dot([$\n=C|Cs], St, Line, Col, Toks, Ncs) -> + Attrs = attributes(Line, Col, St, Ncs++[C]), + {ok,[{dot,Attrs}|Toks],Cs,Line+1,new_column(Col, 1)}; +scan_dot([C|Cs], St, Line, Col, Toks, Ncs) when ?WHITE_SPACE(C) -> + Attrs = attributes(Line, Col, St, Ncs++[C]), + {ok,[{dot,Attrs}|Toks],Cs,Line,incr_column(Col, 2)}; +scan_dot([]=Cs, _St, Line, Col, Toks, Ncs) -> + {more,{Cs,Col,Toks,Line,Ncs,fun scan_dot/6}}; +scan_dot(eof=Cs, St, Line, Col, Toks, Ncs) -> + Attrs = attributes(Line, Col, St, Ncs), + {ok,[{dot,Attrs}|Toks],Cs,Line,incr_column(Col, 1)}; +scan_dot(Cs, St, Line, Col, Toks, Ncs) -> + tok2(Cs, St, Line, Col, Toks, Ncs, '.', 1). + +%%% White space characters are very common, so it is worthwhile to +%%% scan them fast and store them compactly. (The words "whitespace" +%%% and "white space" usually mean the same thing. The Erlang +%%% specification denotes the characters with ASCII code in the +%%% interval 0 to 32 as "white space".) +%%% +%%% Convention: if there is a white newline ($\n) it will always be +%%% the first character in the text string. As a consequence, there +%%% cannot be more than one newline in a white_space token string. +%%% +%%% Some common combinations are recognized, some are not. Examples +%%% of the latter are tab(s) followed by space(s), like "\t ". +%%% (They will be represented by two (or more) tokens.) +%%% +%%% Note: the character sequence "\r\n" is *not* recognized since it +%%% would violate the property that $\n will always be the first +%%% character. (But since "\r\n\r\n" is common, it pays off to +%%% recognize "\n\r".) + +scan_newline([$\s|Cs], St, Line, Col, Toks) -> + scan_nl_spcs(Cs, St, Line, Col, Toks, 2); +scan_newline([$\t|Cs], St, Line, Col, Toks) -> + scan_nl_tabs(Cs, St, Line, Col, Toks, 2); +scan_newline([$\r|Cs], St, Line, Col, Toks) -> + newline_end(Cs, St, Line, Col, Toks, 2, "\n\r"); +scan_newline([$\f|Cs], St, Line, Col, Toks) -> + newline_end(Cs, St, Line, Col, Toks, 2, "\n\f"); +scan_newline([], _St, Line, Col, Toks) -> + {more,{[$\n],Col,Toks,Line,[],fun scan/6}}; +scan_newline(Cs, St, Line, Col, Toks) -> + scan_nl_white_space(Cs, St, Line, Col, Toks, "\n"). + +scan_nl_spcs([$\s|Cs], St, Line, Col, Toks, N) when N < 17 -> + scan_nl_spcs(Cs, St, Line, Col, Toks, N+1); +scan_nl_spcs([]=Cs, _St, Line, Col, Toks, N) -> + {more,{Cs,Col,Toks,Line,N,fun scan_nl_spcs/6}}; +scan_nl_spcs(Cs, St, Line, Col, Toks, N) -> + newline_end(Cs, St, Line, Col, Toks, N, nl_spcs(N)). + +scan_nl_tabs([$\t|Cs], St, Line, Col, Toks, N) when N < 11 -> + scan_nl_tabs(Cs, St, Line, Col, Toks, N+1); +scan_nl_tabs([]=Cs, _St, Line, Col, Toks, N) -> + {more,{Cs,Col,Toks,Line,N,fun scan_nl_tabs/6}}; +scan_nl_tabs(Cs, St, Line, Col, Toks, N) -> + newline_end(Cs, St, Line, Col, Toks, N, nl_tabs(N)). + +%% Note: returning {more,Cont} is meaningless here; one could just as +%% well return several tokens. But since tokens() scans up to a full +%% stop anyway, nothing is gained by not collecting all white spaces. +scan_nl_white_space([$\n|Cs], #erl_scan{text = false}=St, Line, no_col=Col, + Toks0, Ncs) -> + Toks = [{white_space,Line,lists:reverse(Ncs)}|Toks0], + scan_newline(Cs, St, Line+1, Col, Toks); +scan_nl_white_space([$\n|Cs], St, Line, Col, Toks, Ncs0) -> + Ncs = lists:reverse(Ncs0), + Attrs = attributes(Line, Col, St, Ncs), + Token = {white_space,Attrs,Ncs}, + scan_newline(Cs, St, Line+1, new_column(Col, length(Ncs)), [Token|Toks]); +scan_nl_white_space([C|Cs], St, Line, Col, Toks, Ncs) when ?WHITE_SPACE(C) -> + scan_nl_white_space(Cs, St, Line, Col, Toks, [C|Ncs]); +scan_nl_white_space([]=Cs, _St, Line, Col, Toks, Ncs) -> + {more,{Cs,Col,Toks,Line,Ncs,fun scan_nl_white_space/6}}; +scan_nl_white_space(Cs, #erl_scan{text = false}=St, Line, no_col=Col, + Toks, Ncs) -> + scan1(Cs, St, Line+1, Col, [{white_space,Line,lists:reverse(Ncs)}|Toks]); +scan_nl_white_space(Cs, St, Line, Col, Toks, Ncs0) -> + Ncs = lists:reverse(Ncs0), + Attrs = attributes(Line, Col, St, Ncs), + Token = {white_space,Attrs,Ncs}, + scan1(Cs, St, Line+1, new_column(Col, length(Ncs)), [Token|Toks]). + +newline_end(Cs, #erl_scan{text = false}=St, Line, no_col=Col, + Toks, _N, Ncs) -> + scan1(Cs, St, Line+1, Col, [{white_space,Line,Ncs}|Toks]); +newline_end(Cs, St, Line, Col, Toks, N, Ncs) -> + Attrs = attributes(Line, Col, St, Ncs), + scan1(Cs, St, Line+1, new_column(Col, N), [{white_space,Attrs,Ncs}|Toks]). + +scan_spcs([$\s|Cs], St, Line, Col, Toks, N) when N < 16 -> + scan_spcs(Cs, St, Line, Col, Toks, N+1); +scan_spcs([]=Cs, _St, Line, Col, Toks, N) -> + {more,{Cs,Col,Toks,Line,N,fun scan_spcs/6}}; +scan_spcs(Cs, St, Line, Col, Toks, N) -> + white_space_end(Cs, St, Line, Col, Toks, N, spcs(N)). + +scan_tabs([$\t|Cs], St, Line, Col, Toks, N) when N < 10 -> + scan_tabs(Cs, St, Line, Col, Toks, N+1); +scan_tabs([]=Cs, _St, Line, Col, Toks, N) -> + {more,{Cs,Col,Toks,Line,N,fun scan_tabs/6}}; +scan_tabs(Cs, St, Line, Col, Toks, N) -> + white_space_end(Cs, St, Line, Col, Toks, N, tabs(N)). + +skip_white_space([$\n|Cs], St, Line, Col, Toks, _N) -> + skip_white_space(Cs, St, Line+1, new_column(Col, 1), Toks, 0); +skip_white_space([C|Cs], St, Line, Col, Toks, N) when ?WHITE_SPACE(C) -> + skip_white_space(Cs, St, Line, Col, Toks, N+1); +skip_white_space([]=Cs, _St, Line, Col, Toks, N) -> + {more,{Cs,Col,Toks,Line,N,fun skip_white_space/6}}; +skip_white_space(Cs, St, Line, Col, Toks, N) -> + scan1(Cs, St, Line, incr_column(Col, N), Toks). + +%% Maybe \t and \s should break the loop. +scan_white_space([$\n|_]=Cs, St, Line, Col, Toks, Ncs) -> + white_space_end(Cs, St, Line, Col, Toks, length(Ncs), lists:reverse(Ncs)); +scan_white_space([C|Cs], St, Line, Col, Toks, Ncs) when ?WHITE_SPACE(C) -> + scan_white_space(Cs, St, Line, Col, Toks, [C|Ncs]); +scan_white_space([]=Cs, _St, Line, Col, Toks, Ncs) -> + {more,{Cs,Col,Toks,Line,Ncs,fun scan_white_space/6}}; +scan_white_space(Cs, St, Line, Col, Toks, Ncs) -> + white_space_end(Cs, St, Line, Col, Toks, length(Ncs), lists:reverse(Ncs)). + +-compile({inline,[white_space_end/7]}). + +white_space_end(Cs, St, Line, Col, Toks, N, Ncs) -> + tok3(Cs, St, Line, Col, Toks, white_space, Ncs, Ncs, N). + +scan_char([$\\|Cs]=Cs0, St, Line, Col, Toks) -> + case scan_escape(Cs, incr_column(Col, 2)) of + more -> + {more,{[$$|Cs0],Col,Toks,Line,[],fun scan/6}}; + {error,Ncs,Error,Ncol} -> + scan_error(Error, Line, Col, Line, Ncol, Ncs); + {eof,Ncol} -> + scan_error(char, Line, Col, Line, Ncol, eof); + {nl,Val,Str,Ncs,Ncol} -> + Attrs = attributes(Line, Col, St, "$\\"++Str), + Ntoks = [{char,Attrs,Val}|Toks], + scan1(Ncs, St, Line+1, Ncol, Ntoks); + {unicode,Val,Str,Ncs,Ncol} -> + Attrs = attributes(Line, Col, St, "$\\"++Str), + Ntoks = [{integer,Attrs,Val}|Toks], % UNI + scan1(Ncs, St, Line, Ncol, Ntoks); + {Val,Str,Ncs,Ncol} -> + Attrs = attributes(Line, Col, St, "$\\"++Str), + Ntoks = [{char,Attrs,Val}|Toks], + scan1(Ncs, St, Line, Ncol, Ntoks) + end; +scan_char([$\n=C|Cs], St, Line, Col, Toks) -> + Attrs = attributes(Line, Col, St, [$$,C]), + scan1(Cs, St, Line+1, new_column(Col, 1), [{char,Attrs,C}|Toks]); +scan_char([C|Cs], St, Line, Col, Toks) when ?CHAR(C) -> + Tag = if ?UNI255(C) -> char; true -> integer end, % UNI + Attrs = attributes(Line, Col, St, [$$,C]), + scan1(Cs, St, Line, incr_column(Col, 2), [{Tag,Attrs,C}|Toks]); +scan_char([], _St, Line, Col, Toks) -> + {more,{[$$],Col,Toks,Line,[],fun scan/6}}; +scan_char(eof, _St, Line, Col, _Toks) -> + scan_error(char, Line, Col, Line, incr_column(Col, 1), eof). + +scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0,Uni0}) -> + case scan_string0(Cs, St, Line, Col, $\", Str, Wcs, Uni0) of + {more,Ncs,Nline,Ncol,Nstr,Nwcs,Uni} -> + State = {Nwcs,Nstr,Line0,Col0,Uni}, + {more,{Ncs,Ncol,Toks,Nline,State,fun scan_string/6}}; + {char_error,Ncs,Error,Nline,Ncol,EndCol} -> + scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs); + {error,Nline,Ncol,Nwcs,Ncs} -> + Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars. + scan_error({string,$\",Estr}, Line0, Col0, Nline, Ncol, Ncs); + {Ncs,Nline,Ncol,Nstr,Nwcs,?NO_UNICODE} -> + Attrs = attributes(Line0, Col0, St, Nstr), + scan1(Ncs, St, Nline, Ncol, [{string,Attrs,Nwcs}|Toks]); + {Ncs,Nline,Ncol,Nstr,_Nwcs,_Uni} -> + Ntoks = unicode_string_to_list(Line0, Col0, St, Nstr, Toks), + scan1(Ncs, St, Nline, Ncol, Ntoks) + end. + +%% UNI +unicode_string_to_list(Line, Col, St, [$"=C|Nstr], Toks) -> %" Emacs + Paren = {'[',attributes(Line, Col, St, [C])}, + u2l(Nstr, Line, incr_column(Col, 1), St, [Paren|Toks]). + +u2l([$"]=Cs, Line, Col, St, Toks) -> %" Emacs + [{']',attributes(Line, Col, St, Cs)}|Toks]; +u2l([$\n=C|Cs], Line, Col, St, Toks) -> + Ntoks = unicode_nl_tokens(Line, Col, [C], C, St, Toks, Cs), + u2l(Cs, Line+1, new_column(Col, 1), St, Ntoks); +u2l([$\\|Cs], Line, Col, St, Toks) -> + case scan_escape(Cs, Col) of + {nl,Val,ValStr,Ncs,Ncol} -> + Nstr = [$\\|ValStr], + Ntoks = unicode_nl_tokens(Line, Col, Nstr, Val, St, Toks, Ncs), + u2l(Ncs, Line+1, Ncol, St, Ntoks); + {unicode,Val,ValStr,Ncs,Ncol} -> + Nstr = [$\\|ValStr], + Ntoks = unicode_tokens(Line, Col, Nstr, Val, St, Toks, Ncs), + u2l(Ncs, Line, incr_column(Ncol, 1), St, Ntoks); + {Val,ValStr,Ncs,Ncol} -> + Nstr = [$\\|ValStr], + Ntoks = unicode_tokens(Line, Col, Nstr, Val, St, Toks, Ncs), + u2l(Ncs, Line, incr_column(Ncol, 1), St, Ntoks) + end; +u2l([C|Cs], Line, Col, St, Toks) -> + Ntoks = unicode_tokens(Line, Col, [C], C, St, Toks, Cs), + u2l(Cs, Line, incr_column(Col, 1), St, Ntoks). + +unicode_nl_tokens(Line, Col, Str, Val, St, Toks, Cs) -> + Ccol = new_column(Col, 1), + unicode_tokens(Line, Col, Str, Val, St, Toks, Cs, Line+1, Ccol). + +unicode_tokens(Line, Col, Str, Val, St, Toks, Cs) -> + Ccol = incr_column(Col, length(Str)), + unicode_tokens(Line, Col, Str, Val, St, Toks, Cs, Line, Ccol). + +unicode_tokens(Line, Col, Str, Val, St, Toks, Cs, Cline, Ccol) -> + Attrs = attributes(Line, Col, St, Str), + Tag = if ?UNI255(Val) -> char; true -> integer end, + Token = {Tag,Attrs,Val}, + [{',',attributes(Cline, Ccol, St, "")} || Cs =/= "\""] ++ [Token|Toks]. + +scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0,Uni0}) -> + case scan_string0(Cs, St, Line, Col, $\', Str, Wcs, Uni0) of + {more,Ncs,Nline,Ncol,Nstr,Nwcs,Uni} -> + State = {Nwcs,Nstr,Line0,Col0,Uni}, + {more,{Ncs,Ncol,Toks,Nline,State,fun scan_qatom/6}}; + {char_error,Ncs,Error,Nline,Ncol,EndCol} -> + scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs); + {error,Nline,Ncol,Nwcs,Ncs} -> + Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars. + scan_error({string,$\',Estr}, Line0, Col0, Nline, Ncol, Ncs); + {Ncs,Nline,Ncol,Nstr,Nwcs,?NO_UNICODE} -> + case catch list_to_atom(Nwcs) of + A when is_atom(A) -> + Attrs = attributes(Line0, Col0, St, Nstr), + scan1(Ncs, St, Nline, Ncol, [{atom,Attrs,A}|Toks]); + _ -> + scan_error({illegal,atom}, Line0, Col0, Nline, Ncol, Ncs) + end + end. + +scan_string0(Cs, #erl_scan{text=false}, Line, no_col=Col, Q, [], Wcs, Uni) -> + scan_string_no_col(Cs, Line, Col, Q, Wcs, Uni); +scan_string0(Cs, #erl_scan{text=true}, Line, no_col=Col, Q, Str, Wcs, Uni) -> + scan_string1(Cs, Line, Col, Q, Str, Wcs, Uni); +scan_string0(Cs, _St, Line, Col, Q, [], Wcs, Uni) -> + scan_string_col(Cs, Line, Col, Q, Wcs, Uni); +scan_string0(Cs, _St, Line, Col, Q, Str, Wcs, Uni) -> + scan_string1(Cs, Line, Col, Q, Str, Wcs, Uni). + +%% Optimization. Col =:= no_col. +scan_string_no_col([Q|Cs], Line, Col, Q, Wcs, Uni) -> + {Cs,Line,Col,_DontCare=[],lists:reverse(Wcs),Uni}; +scan_string_no_col([$\n=C|Cs], Line, Col, Q, Wcs, Uni) -> + scan_string_no_col(Cs, Line+1, Col, Q, [C|Wcs], Uni); +scan_string_no_col([C|Cs], Line, Col, Q, Wcs, Uni) when C =/= $\\, + ?CHAR(C), ?UNI255(C) -> + scan_string_no_col(Cs, Line, Col, Q, [C|Wcs], Uni); +scan_string_no_col(Cs, Line, Col, Q, Wcs, Uni) -> + scan_string1(Cs, Line, Col, Q, Wcs, Wcs, Uni). + +%% Optimization. Col =/= no_col. +scan_string_col([Q|Cs], Line, Col, Q, Wcs0, Uni) -> + Wcs = lists:reverse(Wcs0), + Str = [Q|Wcs++[Q]], + {Cs,Line,Col+1,Str,Wcs,Uni}; +scan_string_col([$\n=C|Cs], Line, _xCol, Q, Wcs, Uni) -> + scan_string_col(Cs, Line+1, 1, Q, [C|Wcs], Uni); +scan_string_col([C|Cs], Line, Col, Q, Wcs, Uni) when C =/= $\\, + ?CHAR(C), ?UNI255(C) -> + scan_string_col(Cs, Line, Col+1, Q, [C|Wcs], Uni); +scan_string_col(Cs, Line, Col, Q, Wcs, Uni) -> + scan_string1(Cs, Line, Col, Q, Wcs, Wcs, Uni). + +%% UNI_STR is to be replaced by STR when the Unicode-string-to-list +%% workaround is eventually removed. +-define(UNI_STR(Col, S), S). + +%% Note: in those cases when a 'char_error' tuple is returned below it +%% is tempting to skip over characters up to the first Q character, +%% but then the end location of the error tuple would not correspond +%% to the start location of the returned Rest string. (Maybe the end +%% location could be modified, but that too is ugly.) +scan_string1([Q|Cs], Line, Col, Q, Str0, Wcs0, Uni) -> + Wcs = lists:reverse(Wcs0), + Str = ?UNI_STR(Col, [Q|lists:reverse(Str0, [Q])]), + {Cs,Line,incr_column(Col, 1),Str,Wcs,Uni}; +scan_string1([$\n=C|Cs], Line, Col, Q, Str, Wcs, Uni) -> + Ncol = new_column(Col, 1), + scan_string1(Cs, Line+1, Ncol, Q, ?UNI_STR(Col, [C|Str]), [C|Wcs], Uni); +scan_string1([$\\|Cs]=Cs0, Line, Col, Q, Str, Wcs, Uni) -> + case scan_escape(Cs, Col) of + more -> + {more,Cs0,Line,Col,Str,Wcs,Uni}; + {error,Ncs,Error,Ncol} -> + {char_error,Ncs,Error,Line,Col,incr_column(Ncol, 1)}; + {eof,Ncol} -> + {error,Line,incr_column(Ncol, 1),lists:reverse(Wcs),eof}; + {nl,Val,ValStr,Ncs,Ncol} -> + Nstr = ?UNI_STR(Ncol, lists:reverse(ValStr, [$\\|Str])), + Nwcs = [Val|Wcs], + scan_string1(Ncs, Line+1, Ncol, Q, Nstr, Nwcs, Uni); + {unicode,_Val,_ValStr,Ncs,Ncol} when Q =:= $' -> %' Emacs + {char_error,Ncs,{illegal,character},Line,Col,incr_column(Ncol, 1)}; + {unicode,Val,ValStr,Ncs,Ncol} -> % UNI. Uni is set to Val. + Nstr = ?UNI_STR(Ncol, lists:reverse(ValStr, [$\\|Str])), + Nwcs = [Val|Wcs], % not used + scan_string1(Ncs, Line, incr_column(Ncol, 1), Q, Nstr, Nwcs, Val); + {Val,ValStr,Ncs,Ncol} -> + Nstr = ?UNI_STR(Ncol, lists:reverse(ValStr, [$\\|Str])), + Nwcs = [Val|Wcs], + scan_string1(Ncs, Line, incr_column(Ncol, 1), Q, Nstr, Nwcs, Uni) + end; +scan_string1([C|Cs], Line, no_col=Col, Q, Str, Wcs, Uni) when ?CHAR(C), + ?UNI255(C) -> + %% scan_string1(Cs, Line, Col, Q, Str, [C|Wcs], Uni); + scan_string1(Cs, Line, Col, Q, [C|Str], [C|Wcs], Uni); % UNI +scan_string1([C|Cs], Line, Col, Q, Str, Wcs, Uni) when ?CHAR(C), ?UNI255(C) -> + scan_string1(Cs, Line, Col+1, Q, [C|Str], [C|Wcs], Uni); +scan_string1([C|Cs], Line, Col, $', _Str, _Wcs, _Uni) when ?CHAR(C) -> %' UNI + {char_error,Cs,{illegal,character},Line,Col,incr_column(Col, 1)}; +scan_string1([C|Cs], Line, Col, Q, Str, Wcs, _Uni) when ?CHAR(C) -> % UNI + scan_string1(Cs, Line, incr_column(Col, 1), Q, [C|Str], [C|Wcs], C); +scan_string1([]=Cs, Line, Col, _Q, Str, Wcs, Uni) -> + {more,Cs,Line,Col,Str,Wcs,Uni}; +scan_string1(eof, Line, Col, _Q, _Str, Wcs, _Uni) -> + {error,Line,Col,lists:reverse(Wcs),eof}. + +-define(OCT(C), C >= $0, C =< $7). +-define(HEX(C), C >= $0 andalso C =< $9 orelse + C >= $A andalso C =< $F orelse + C >= $a andalso C =< $f). + +%% \<1-3> octal digits +scan_escape([O1,O2,O3|Cs], Col) when ?OCT(O1), ?OCT(O2), ?OCT(O3) -> + Val = (O1*8 + O2)*8 + O3 - 73*$0, + {Val,?UNI_STR(Col, [O1,O2,O3]),Cs,incr_column(Col, 3)}; +scan_escape([O1,O2], _Col) when ?OCT(O1), ?OCT(O2) -> + more; +scan_escape([O1,O2|Cs], Col) when ?OCT(O1), ?OCT(O2) -> + Val = (O1*8 + O2) - 9*$0, + {Val,?UNI_STR(Col, [O1,O2]),Cs,incr_column(Col, 2)}; +scan_escape([O1], _Col) when ?OCT(O1) -> + more; +scan_escape([O1|Cs], Col) when ?OCT(O1) -> + {O1 - $0,?UNI_STR(Col, [O1]),Cs,incr_column(Col, 1)}; +%% \x{} +scan_escape([$x,${|Cs], Col) -> + scan_hex(Cs, incr_column(Col, 2), []); +scan_escape([$x], _Col) -> + more; +scan_escape([$x|eof], Col) -> + {eof,incr_column(Col, 1)}; +%% \x<2> hexadecimal digits +scan_escape([$x,H1,H2|Cs], Col) when ?HEX(H1), ?HEX(H2) -> + Val = erlang:list_to_integer([H1,H2], 16), + {Val,?UNI_STR(Col, [$x,H1,H2]),Cs,incr_column(Col, 3)}; +scan_escape([$x,H1], _Col) when ?HEX(H1) -> + more; +scan_escape([$x|Cs], Col) -> + {error,Cs,{illegal,character},incr_column(Col, 1)}; +%% \^X -> CTL-X +scan_escape([$^=C0,$\n=C|Cs], Col) -> + {nl,C,?UNI_STR(Col, [C0,C]),Cs,new_column(Col, 1)}; +scan_escape([$^=C0,C|Cs], Col) when ?CHAR(C) -> + Val = C band 31, + {Val,?UNI_STR(Col, [C0,C]),Cs,incr_column(Col, 2)}; +scan_escape([$^], _Col) -> + more; +scan_escape([$^|eof], Col) -> + {eof,incr_column(Col, 1)}; +scan_escape([$\n=C|Cs], Col) -> + {nl,C,?UNI_STR(Col, [C]),Cs,new_column(Col, 1)}; +scan_escape([C0|Cs], Col) when ?CHAR(C0), ?UNI255(C0) -> + C = escape_char(C0), + {C,?UNI_STR(Col, [C0]),Cs,incr_column(Col, 1)}; +scan_escape([C|Cs], Col) when ?CHAR(C) -> % UNI + {unicode,C,?UNI_STR(Col, [C]),Cs,incr_column(Col, 1)}; +scan_escape([], _Col) -> + more; +scan_escape(eof, Col) -> + {eof,Col}. + +scan_hex([C|Cs], no_col=Col, Wcs) when ?HEX(C) -> + scan_hex(Cs, Col, [C|Wcs]); +scan_hex([C|Cs], Col, Wcs) when ?HEX(C) -> + scan_hex(Cs, Col+1, [C|Wcs]); +scan_hex(Cs, Col, Wcs) -> + scan_esc_end(Cs, Col, Wcs, 16, "x{"). + +scan_esc_end([$}|Cs], Col, Wcs0, B, Str0) -> + Wcs = lists:reverse(Wcs0), + case catch erlang:list_to_integer(Wcs, B) of + Val when Val =< 16#FF -> + {Val,?UNI_STR(Col, Str0++Wcs++[$}]),Cs,incr_column(Col, 1)}; + Val when Val =< 16#10FFFF -> + {unicode,Val,?UNI_STR(Col, Str0++Wcs++[$}]),Cs,incr_column(Col,1)}; + _ -> + {error,Cs,{illegal,character},incr_column(Col, 1)} + end; +scan_esc_end([], _Col, _Wcs, _B, _Str0) -> + more; +scan_esc_end(eof, Col, _Wcs, _B, _Str0) -> + {eof,Col}; +scan_esc_end(Cs, Col, _Wcs, _B, _Str0) -> + {error,Cs,{illegal,character},Col}. + +escape_char($n) -> $\n; % \n = LF +escape_char($r) -> $\r; % \r = CR +escape_char($t) -> $\t; % \t = TAB +escape_char($v) -> $\v; % \v = VT +escape_char($b) -> $\b; % \b = BS +escape_char($f) -> $\f; % \f = FF +escape_char($e) -> $\e; % \e = ESC +escape_char($s) -> $\s; % \s = SPC +escape_char($d) -> $\d; % \d = DEL +escape_char(C) -> C. + +scan_number([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> + scan_number(Cs, St, Line, Col, Toks, [C|Ncs]); +scan_number([$.,C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> + scan_fraction(Cs, St, Line, Col, Toks, [C,$.|Ncs]); +scan_number([$.]=Cs, _St, Line, Col, Toks, Ncs) -> + {more,{Cs,Col,Toks,Line,Ncs,fun scan_number/6}}; +scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0) -> + Ncs = lists:reverse(Ncs0), + case catch list_to_integer(Ncs) of + B when B >= 2, B =< 1+$Z-$A+10 -> + Bcs = Ncs++[$#], + scan_based_int(Cs, St, Line, Col, Toks, {B,[],Bcs}); + B -> + Len = length(Ncs), + scan_error({base,B}, Line, Col, Line, incr_column(Col, Len), Cs0) + end; +scan_number([]=Cs, _St, Line, Col, Toks, Ncs) -> + {more,{Cs,Col,Toks,Line,Ncs,fun scan_number/6}}; +scan_number(Cs, St, Line, Col, Toks, Ncs0) -> + Ncs = lists:reverse(Ncs0), + case catch list_to_integer(Ncs) of + N when is_integer(N) -> + tok3(Cs, St, Line, Col, Toks, integer, Ncs, N); + _ -> + Ncol = incr_column(Col, length(Ncs)), + scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs) + end. + +scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) + when ?DIGIT(C), C < $0+B -> + scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs}); +scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) + when C >= $A, B > 10, C < $A+B-10 -> + scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs}); +scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) + when C >= $a, B > 10, C < $a+B-10 -> + scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs}); +scan_based_int([]=Cs, _St, Line, Col, Toks, State) -> + {more,{Cs,Col,Toks,Line,State,fun scan_based_int/6}}; +scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}) -> + Ncs = lists:reverse(Ncs0), + case catch erlang:list_to_integer(Ncs, B) of + N when is_integer(N) -> + tok3(Cs, St, Line, Col, Toks, integer, Bcs++Ncs, N); + _ -> + Len = length(Bcs)+length(Ncs), + Ncol = incr_column(Col, Len), + scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs) + end. + +scan_fraction([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> + scan_fraction(Cs, St, Line, Col, Toks, [C|Ncs]); +scan_fraction([E|Cs], St, Line, Col, Toks, Ncs) when E =:= $e; E =:= $E -> + scan_exponent_sign(Cs, St, Line, Col, Toks, [E|Ncs]); +scan_fraction([]=Cs, _St, Line, Col, Toks, Ncs) -> + {more,{Cs,Col,Toks,Line,Ncs,fun scan_fraction/6}}; +scan_fraction(Cs, St, Line, Col, Toks, Ncs) -> + float_end(Cs, St, Line, Col, Toks, Ncs). + +scan_exponent_sign([C|Cs], St, Line, Col, Toks, Ncs) when C =:= $+; C =:= $- -> + scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs]); +scan_exponent_sign([]=Cs, _St, Line, Col, Toks, Ncs) -> + {more,{Cs,Col,Toks,Line,Ncs,fun scan_exponent_sign/6}}; +scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs) -> + scan_exponent(Cs, St, Line, Col, Toks, Ncs). + +scan_exponent([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> + scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs]); +scan_exponent([]=Cs, _St, Line, Col, Toks, Ncs) -> + {more,{Cs,Col,Toks,Line,Ncs,fun scan_exponent/6}}; +scan_exponent(Cs, St, Line, Col, Toks, Ncs) -> + float_end(Cs, St, Line, Col, Toks, Ncs). + +float_end(Cs, St, Line, Col, Toks, Ncs0) -> + Ncs = lists:reverse(Ncs0), + case catch list_to_float(Ncs) of + F when is_float(F) -> + tok3(Cs, St, Line, Col, Toks, float, Ncs, F); + _ -> + Ncol = incr_column(Col, length(Ncs)), + scan_error({illegal,float}, Line, Col, Line, Ncol, Cs) + end. + +skip_comment([C|Cs], St, Line, Col, Toks, N) when C =/= $\n, ?CHAR(C) -> + skip_comment(Cs, St, Line, Col, Toks, N+1); +skip_comment([]=Cs, _St, Line, Col, Toks, N) -> + {more,{Cs,Col,Toks,Line,N,fun skip_comment/6}}; +skip_comment(Cs, St, Line, Col, Toks, N) -> + scan1(Cs, St, Line, incr_column(Col, N), Toks). + +scan_comment([C|Cs], St, Line, Col, Toks, Ncs) when C =/= $\n, ?CHAR(C) -> + scan_comment(Cs, St, Line, Col, Toks, [C|Ncs]); +scan_comment([]=Cs, _St, Line, Col, Toks, Ncs) -> + {more,{Cs,Col,Toks,Line,Ncs,fun scan_comment/6}}; +scan_comment(Cs, St, Line, Col, Toks, Ncs0) -> + Ncs = lists:reverse(Ncs0), + tok3(Cs, St, Line, Col, Toks, comment, Ncs, Ncs). + +tok2(Cs, #erl_scan{text = false}=St, Line, no_col=Col, Toks, _Wcs, P) -> + scan1(Cs, St, Line, Col, [{P,Line}|Toks]); +tok2(Cs, St, Line, Col, Toks, Wcs, P) -> + Attrs = attributes(Line, Col, St, Wcs), + scan1(Cs, St, Line, incr_column(Col, length(Wcs)), [{P,Attrs}|Toks]). + +tok2(Cs, #erl_scan{text = false}=St, Line, no_col=Col, Toks, _Wcs, P, _N) -> + scan1(Cs, St, Line, Col, [{P,Line}|Toks]); +tok2(Cs, St, Line, Col, Toks, Wcs, P, N) -> + Attrs = attributes(Line, Col, St, Wcs), + scan1(Cs, St, Line, incr_column(Col, N), [{P,Attrs}|Toks]). + +tok3(Cs, #erl_scan{text = false}=St, Line, no_col=Col, Toks, Item, _S, Sym) -> + scan1(Cs, St, Line, Col, [{Item,Line,Sym}|Toks]); +tok3(Cs, St, Line, Col, Toks, Item, String, Sym) -> + Token = {Item,attributes(Line, Col, St, String),Sym}, + scan1(Cs, St, Line, incr_column(Col, length(String)), [Token|Toks]). + +tok3(Cs, #erl_scan{text = false}=St, Line, no_col=Col, Toks, Item, + _String, Sym, _Length) -> + scan1(Cs, St, Line, Col, [{Item,Line,Sym}|Toks]); +tok3(Cs, St, Line, Col, Toks, Item, String, Sym, Length) -> + Token = {Item,attributes(Line, Col, St, String),Sym}, + scan1(Cs, St, Line, incr_column(Col, Length), [Token|Toks]). + +scan_error(Error, Line, Col, EndLine, EndCol, Rest) -> + Loc = location(Line, Col), + EndLoc = location(EndLine, EndCol), + scan_error(Error, Loc, EndLoc, Rest). + +scan_error(Error, ErrorLoc, EndLoc, Rest) -> + {{error,{ErrorLoc,?MODULE,Error},EndLoc},Rest}. + +-compile({inline,[attributes/4]}). + +attributes(Line, no_col, #erl_scan{text = false}, _String) -> + Line; +attributes(Line, no_col, #erl_scan{text = true}, String) -> + [{line,Line},{text,String}]; +attributes(Line, Col, #erl_scan{text = false}, _String) -> + {Line,Col}; +attributes(Line, Col, #erl_scan{text = true}, String) -> + [{line,Line},{column,Col},{text,String}]. + +location(Line, no_col) -> + Line; +location(Line, Col) when is_integer(Col) -> + {Line,Col}. + +-compile({inline,[incr_column/2,new_column/2]}). + +incr_column(no_col=Col, _N) -> + Col; +incr_column(Col, N) when is_integer(Col) -> + Col + N. + +new_column(no_col=Col, _Ncol) -> + Col; +new_column(Col, Ncol) when is_integer(Col) -> + Ncol. + +nl_spcs(2) -> "\n "; +nl_spcs(3) -> "\n "; +nl_spcs(4) -> "\n "; +nl_spcs(5) -> "\n "; +nl_spcs(6) -> "\n "; +nl_spcs(7) -> "\n "; +nl_spcs(8) -> "\n "; +nl_spcs(9) -> "\n "; +nl_spcs(10) -> "\n "; +nl_spcs(11) -> "\n "; +nl_spcs(12) -> "\n "; +nl_spcs(13) -> "\n "; +nl_spcs(14) -> "\n "; +nl_spcs(15) -> "\n "; +nl_spcs(16) -> "\n "; +nl_spcs(17) -> "\n ". + +spcs(1) -> " "; +spcs(2) -> " "; +spcs(3) -> " "; +spcs(4) -> " "; +spcs(5) -> " "; +spcs(6) -> " "; +spcs(7) -> " "; +spcs(8) -> " "; +spcs(9) -> " "; +spcs(10) -> " "; +spcs(11) -> " "; +spcs(12) -> " "; +spcs(13) -> " "; +spcs(14) -> " "; +spcs(15) -> " "; +spcs(16) -> " ". + +nl_tabs(2) -> "\n\t"; +nl_tabs(3) -> "\n\t\t"; +nl_tabs(4) -> "\n\t\t\t"; +nl_tabs(5) -> "\n\t\t\t\t"; +nl_tabs(6) -> "\n\t\t\t\t\t"; +nl_tabs(7) -> "\n\t\t\t\t\t\t"; +nl_tabs(8) -> "\n\t\t\t\t\t\t\t"; +nl_tabs(9) -> "\n\t\t\t\t\t\t\t\t"; +nl_tabs(10) -> "\n\t\t\t\t\t\t\t\t\t"; +nl_tabs(11) -> "\n\t\t\t\t\t\t\t\t\t\t". + +tabs(1) -> "\t"; +tabs(2) -> "\t\t"; +tabs(3) -> "\t\t\t"; +tabs(4) -> "\t\t\t\t"; +tabs(5) -> "\t\t\t\t\t"; +tabs(6) -> "\t\t\t\t\t\t"; +tabs(7) -> "\t\t\t\t\t\t\t"; +tabs(8) -> "\t\t\t\t\t\t\t\t"; +tabs(9) -> "\t\t\t\t\t\t\t\t\t"; +tabs(10) -> "\t\t\t\t\t\t\t\t\t\t". + +-spec reserved_word(atom()) -> boolean(). +reserved_word('after') -> true; +reserved_word('begin') -> true; +reserved_word('case') -> true; +reserved_word('try') -> true; +reserved_word('cond') -> true; +reserved_word('catch') -> true; +reserved_word('andalso') -> true; +reserved_word('orelse') -> true; +reserved_word('end') -> true; +reserved_word('fun') -> true; +reserved_word('if') -> true; +reserved_word('let') -> true; +reserved_word('of') -> true; +reserved_word('query') -> true; +reserved_word('receive') -> true; +reserved_word('when') -> true; +reserved_word('bnot') -> true; +reserved_word('not') -> true; +reserved_word('div') -> true; +reserved_word('rem') -> true; +reserved_word('band') -> true; +reserved_word('and') -> true; +reserved_word('bor') -> true; +reserved_word('bxor') -> true; +reserved_word('bsl') -> true; +reserved_word('bsr') -> true; +reserved_word('or') -> true; +reserved_word('xor') -> true; +reserved_word('spec') -> true; +reserved_word(_) -> false. diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl new file mode 100644 index 0000000000..fd85c7aef5 --- /dev/null +++ b/lib/stdlib/src/erl_tar.erl @@ -0,0 +1,959 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(erl_tar). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Purpose: Unix tar (tape archive) utility. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-export([create/2, create/3, extract/1, extract/2, table/1, table/2, + open/2, close/1, add/3, add/4, + t/1, tt/1, format_error/1]). + +-include_lib("kernel/include/file.hrl"). + +-record(add_opts, + {read_info, % Fun to use for read file/link info. + verbose = false :: boolean()}). % Verbose on/off. + +%% Opens a tar archive. + +open(Name, Mode) -> + case open_mode(Mode) of + {ok, Access, Raw, Opts} -> + open1(Name, Access, Raw, Opts); + {error, Reason} -> + {error, {Name, Reason}} + end. + +open1({binary,Bin}, read, _Raw, Opts) -> + case file:open(Bin, [ram,binary,read]) of + {ok,File} -> + case Opts of + [compressed] -> ram_file:uncompress(File); + [] -> ok + end, + {ok,{read,File}}; + Error -> + Error + end; +open1({file, Fd}, read, _Raw, _Opts) -> + {ok, {read, Fd}}; +open1(Name, Access, Raw, Opts) -> + case file:open(Name, Raw ++ [binary, Access|Opts]) of + {ok, File} -> + {ok, {Access, File}}; + {error, Reason} -> + {error, {Name, Reason}} + end. + +%% Closes a tar archive. + +close({read, File}) -> + ok = file:close(File); +close({write, File}) -> + PadResult = pad_file(File), + ok = file:close(File), + PadResult; +close(_) -> + {error, einval}. + +%% Adds a file to a tape archive. + +add(File, Name, Options) -> + add(File, Name, Name, Options). + +add({write, File}, Name, NameInArchive, Options) -> + Opts = #add_opts{read_info=fun(F) -> file:read_link_info(F) end}, + add1(File, Name, NameInArchive, add_opts(Options, Opts)); +add({read, _File}, _, _, _) -> + {error, eacces}; +add(_, _, _, _) -> + {error, einval}. + +add_opts([dereference|T], Opts) -> + add_opts(T, Opts#add_opts{read_info=fun(F) -> file:read_file_info(F) end}); +add_opts([verbose|T], Opts) -> + add_opts(T, Opts#add_opts{verbose=true}); +add_opts([_|T], Opts) -> + add_opts(T, Opts); +add_opts([], Opts) -> + Opts. + +%% Creates a tar file Name containing the given files. + +create(Name, Filenames) -> + create(Name, Filenames, []). + +%% Creates a tar archive Name containing the given files. +%% Accepted options: verbose, compressed, cooked + +create(Name, FileList, Options) -> + Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked) + end, Options), + case open(Name, [write|Mode]) of + {ok, TarFile} -> + Add = fun({NmInA, NmOrBin}) -> + add(TarFile, NmOrBin, NmInA, Options); + (Nm) -> + add(TarFile, Nm, Nm, Options) + end, + Result = foreach_while_ok(Add, FileList), + case {Result, close(TarFile)} of + {ok, Res} -> Res; + {Res, _} -> Res + end; + Reason -> + Reason + end. + +%% Extracts all files from the tar file Name. + +extract(Name) -> + extract(Name, []). + +%% Extracts (all) files from the tar file Name. +%% Options accepted: keep_old_files, {files, ListOfFilesToExtract}, verbose, +%% {cwd, AbsoluteDirectory} + +extract(Name, Opts) -> + foldl_read(Name, fun extract1/4, ok, extract_opts(Opts)). + +%% Returns a list of names of the files in the tar file Name. +%% Options accepted: verbose + +table(Name) -> + table(Name, []). + +%% Returns a list of names of the files in the tar file Name. +%% Options accepted: compressed, verbose, cooked. + +table(Name, Opts) -> + foldl_read(Name, fun table1/4, [], table_opts(Opts)). + + +%% Comments for printing the contents of a tape archive, +%% meant to be invoked from the shell. + +t(Name) -> + case table(Name) of + {ok, List} -> + lists:foreach(fun(N) -> ok = io:format("~s\n", [N]) end, List); + Error -> + Error + end. + +tt(Name) -> + case table(Name, [verbose]) of + {ok, List} -> + lists:foreach(fun print_header/1, List); + Error -> + Error + end. + +print_header({Name, Type, Size, Mtime, Mode, Uid, Gid}) -> + io:format("~s~s ~4w/~-4w ~7w ~s ~s\n", + [type_to_string(Type), mode_to_string(Mode), + Uid, Gid, Size, time_to_string(Mtime), Name]). + +type_to_string(regular) -> "-"; +type_to_string(directory) -> "d"; +type_to_string(link) -> "l"; +type_to_string(symlink) -> "s"; +type_to_string(char) -> "c"; +type_to_string(block) -> "b"; +type_to_string(fifo) -> "f"; +type_to_string(_) -> "?". + +mode_to_string(Mode) -> + mode_to_string(Mode, "xwrxwrxwr", []). + +mode_to_string(Mode, [C|T], Acc) when Mode band 1 =:= 1 -> + mode_to_string(Mode bsr 1, T, [C|Acc]); +mode_to_string(Mode, [_|T], Acc) -> + mode_to_string(Mode bsr 1, T, [$-|Acc]); +mode_to_string(_, [], Acc) -> + Acc. + +time_to_string({{Y, Mon, Day}, {H, Min, _}}) -> + io_lib:format("~s ~2w ~s:~s ~w", [month(Mon), Day, two_d(H), two_d(Min), Y]). + +two_d(N) -> + tl(integer_to_list(N + 100)). + +month(1) -> "Jan"; +month(2) -> "Feb"; +month(3) -> "Mar"; +month(4) -> "Apr"; +month(5) -> "May"; +month(6) -> "Jun"; +month(7) -> "Jul"; +month(8) -> "Aug"; +month(9) -> "Sep"; +month(10) -> "Oct"; +month(11) -> "Nov"; +month(12) -> "Dec". + +%% Converts the short error reason to a descriptive string. + +format_error(bad_header) -> "Bad directory header"; +format_error(eof) -> "Unexpected end of file"; +format_error(symbolic_link_too_long) -> "Symbolic link too long"; +format_error({Name,Reason}) -> + lists:flatten(io_lib:format("~s: ~s", [Name,format_error(Reason)])); +format_error(Atom) when is_atom(Atom) -> + file:format_error(Atom); +format_error(Term) -> + lists:flatten(io_lib:format("~p", [Term])). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% +%%% Useful definitions (also start of implementation). +%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Offset for fields in the tar header. +%% Note that these offsets are ZERO-based as in the POSIX standard +%% document, while binaries use ONE-base offset. Caveat Programmer. + +-define(th_name, 0). +-define(th_mode, 100). +-define(th_uid, 108). +-define(th_gid, 116). +-define(th_size, 124). +-define(th_mtime, 136). +-define(th_chksum, 148). +-define(th_typeflag, 156). +-define(th_linkname, 157). +-define(th_magic, 257). +-define(th_version, 263). +-define(th_prefix, 345). + +%% Length of these fields. + +-define(th_name_len, 100). +-define(th_mode_len, 8). +-define(th_uid_len, 8). +-define(th_gid_len, 8). +-define(th_size_len, 12). +-define(th_mtime_len, 12). +-define(th_chksum_len, 8). +-define(th_linkname_len, 100). +-define(th_magic_len, 6). +-define(th_version_len, 2). +-define(th_prefix_len, 167). + +-record(tar_header, + {name, % Name of file. + mode, % Mode bits. + uid, % User id. + gid, % Group id. + size, % Size of file + mtime, % Last modified (seconds since + % Jan 1, 1970). + chksum, % Checksum of header. + typeflag = [], % Type of file. + linkname = [], % Name of link. + filler = [], + prefix}). % Filename prefix. + +-define(record_size, 512). +-define(block_size, (512*20)). + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% +%%% Adding members to a tar archive. +%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +add1(TarFile, Bin, NameInArchive, Opts) when is_binary(Bin) -> + Now = calendar:now_to_local_time(now()), + Info = #file_info{size = byte_size(Bin), + type = regular, + access = read_write, + atime = Now, + mtime = Now, + ctime = Now, + mode = 8#100644, + links = 1, + major_device = 0, + minor_device = 0, + inode = 0, + uid = 0, + gid = 0}, + Header = create_header(NameInArchive, Info), + add1(TarFile, NameInArchive, Header, Bin, Opts); +add1(TarFile, Name, NameInArchive, Opts) -> + case read_file_and_info(Name, Opts) of + {ok, Bin, Info} when Info#file_info.type =:= regular -> + Header = create_header(NameInArchive, Info), + add1(TarFile, Name, Header, Bin, Opts); + {ok, PointsTo, Info} when Info#file_info.type =:= symlink -> + if + length(PointsTo) > 100 -> + {error,{PointsTo,symbolic_link_too_long}}; + true -> + Info2 = Info#file_info{size=0}, + Header = create_header(NameInArchive, Info2, PointsTo), + add1(TarFile, Name, Header, list_to_binary([]), Opts) + end; + {ok, _, Info} when Info#file_info.type =:= directory -> + add_directory(TarFile, Name, NameInArchive, Info, Opts); + {ok, _, #file_info{type=Type}} -> + {error, {bad_file_type, Name, Type}}; + {error, Reason} -> + {error, {Name, Reason}} + end. + +add1(Tar, Name, Header, Bin, Options) -> + add_verbose(Options, "a ~s~n", [Name]), + file:write(Tar, [Header, Bin, padding(byte_size(Bin), ?record_size)]). + +add_directory(TarFile, DirName, NameInArchive, Info, Options) -> + case file:list_dir(DirName) of + {ok, []} -> + add_verbose(Options, "a ~s~n", [DirName]), + Header = create_header(NameInArchive, Info), + file:write(TarFile, Header); + {ok, Files} -> + Add = fun (File) -> + add1(TarFile, + filename:join(DirName, File), + filename:join(NameInArchive, File), + Options) end, + foreach_while_ok(Add, Files); + {error, Reason} -> + {error, {DirName, Reason}} + end. + +%% Creates a header for file in a tar file. + +create_header(Name, Info) -> + create_header(Name, Info, []). +create_header(Name, #file_info {mode=Mode, uid=Uid, gid=Gid, + size=Size, mtime=Mtime0, type=Type}, Linkname) -> + Mtime = posix_time(erlang:localtime_to_universaltime(Mtime0)), + {Prefix,Suffix} = split_filename(Name), + H0 = [to_string(Suffix, 100), + to_octal(Mode, 8), + to_octal(Uid, 8), + to_octal(Gid, 8), + to_octal(Size, ?th_size_len), + to_octal(Mtime, ?th_mtime_len), + <<" ">>, + file_type(Type), + to_string(Linkname, ?th_linkname_len), + "ustar",0, + "00", + zeroes(?th_prefix-?th_version-?th_version_len), + to_string(Prefix, ?th_prefix_len)], + H = list_to_binary(H0), + 512 = byte_size(H), %Assertion. + ChksumString = to_octal(checksum(H), 6, [0,$\s]), + <> = H, + [Before,ChksumString,After]. + +file_type(regular) -> $0; +file_type(symlink) -> $2; +file_type(directory) -> $5. + +to_octal(Int, Count) when Count > 1 -> + to_octal(Int, Count-1, [0]). + +to_octal(_, 0, Result) -> Result; +to_octal(Int, Count, Result) -> + to_octal(Int div 8, Count-1, [Int rem 8 + $0|Result]). + +to_string(Str0, Count) -> + Str = list_to_binary(Str0), + case byte_size(Str) of + Size when Size < Count -> + [Str|zeroes(Count-Size)]; + _ -> Str + end. + +%% Pads out end of file. + +pad_file(File) -> + {ok,Position} = file:position(File, {cur,0}), + %% There must be at least one empty record at the end of the file. + Zeros = zeroes(?block_size - (Position rem ?block_size)), + file:write(File, Zeros). + +split_filename(Name) when length(Name) =< ?th_name_len -> + {"", Name}; +split_filename(Name0) -> + split_filename(lists:reverse(filename:split(Name0)), [], [], 0). + +split_filename([Comp|Rest], Prefix, Suffix, Len) + when Len+length(Comp) < ?th_name_len -> + split_filename(Rest, Prefix, [Comp|Suffix], Len+length(Comp)+1); +split_filename([Comp|Rest], Prefix, Suffix, Len) -> + split_filename(Rest, [Comp|Prefix], Suffix, Len+length(Comp)+1); +split_filename([], Prefix, Suffix, _) -> + {filename:join(Prefix),filename:join(Suffix)}. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% +%%% Retrieving files from a tape archive. +%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Options used when reading a tar archive. + +-record(read_opts, + {cwd :: string(), % Current working directory. + keep_old_files = false :: boolean(), % Owerwrite or not. + files = all, % Set of files to extract + % (or all). + output = file :: 'file' | 'memory', + open_mode = [], % Open mode options. + verbose = false :: boolean()}). % Verbose on/off. + +extract_opts(List) -> + extract_opts(List, default_options()). + +table_opts(List) -> + read_opts(List, default_options()). + +default_options() -> + {ok, Cwd} = file:get_cwd(), + #read_opts{cwd=Cwd}. + +%% Parse options for extract. + +extract_opts([keep_old_files|Rest], Opts) -> + extract_opts(Rest, Opts#read_opts{keep_old_files=true}); +extract_opts([{cwd, Cwd}|Rest], Opts) -> + extract_opts(Rest, Opts#read_opts{cwd=Cwd}); +extract_opts([{files, Files}|Rest], Opts) -> + Set = ordsets:from_list(Files), + extract_opts(Rest, Opts#read_opts{files=Set}); +extract_opts([memory|Rest], Opts) -> + extract_opts(Rest, Opts#read_opts{output=memory}); +extract_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) -> + extract_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]}); +extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) -> + extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]}); +extract_opts([verbose|Rest], Opts) -> + extract_opts(Rest, Opts#read_opts{verbose=true}); +extract_opts([Other|Rest], Opts) -> + extract_opts(Rest, read_opts([Other], Opts)); +extract_opts([], Opts) -> + Opts. + +%% Common options for all read operations. + +read_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) -> + read_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]}); +read_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) -> + read_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]}); +read_opts([verbose|Rest], Opts) -> + read_opts(Rest, Opts#read_opts{verbose=true}); +read_opts([_|Rest], Opts) -> + read_opts(Rest, Opts); +read_opts([], Opts) -> + Opts. + +foldl_read(TarName, Fun, Accu, Opts) -> + case open(TarName, [read|Opts#read_opts.open_mode]) of + {ok, {read, File}} -> + Result = + case catch foldl_read1(Fun, Accu, File, Opts) of + {'EXIT', Reason} -> + exit(Reason); + {error, {Reason, Format, Args}} -> + read_verbose(Opts, Format, Args), + {error, Reason}; + {error, Reason} -> + {error, Reason}; + Ok -> + Ok + end, + ok = file:close(File), + Result; + Error -> + Error + end. + +foldl_read1(Fun, Accu0, File, Opts) -> + case get_header(File) of + eof -> + Fun(eof, File, Opts, Accu0); + Header -> + {ok, NewAccu} = Fun(Header, File, Opts, Accu0), + foldl_read1(Fun, NewAccu, File, Opts) + end. + +table1(eof, _, _, Result) -> + {ok, lists:reverse(Result)}; +table1(Header = #tar_header{}, File, #read_opts{verbose=true}, Result) -> + #tar_header{name=Name, size=Size, mtime=Mtime, typeflag=Type, + mode=Mode, uid=Uid, gid=Gid} = Header, + skip(File, Size), + {ok, [{Name, Type, Size, posix_to_erlang_time(Mtime), Mode, Uid, Gid}|Result]}; +table1(#tar_header{name=Name, size=Size}, File, _, Result) -> + skip(File, Size), + {ok, [Name|Result]}. + +extract1(eof, _, _, Acc) -> + if + is_list(Acc) -> + {ok, lists:reverse(Acc)}; + true -> + Acc + end; +extract1(Header, File, Opts, Acc) -> + Name = Header#tar_header.name, + case check_extract(Name, Opts) of + true -> + {ok, Bin} = get_element(File, Header), + case write_extracted_element(Header, Bin, Opts) of + ok -> + {ok, Acc}; + {ok, NameBin} when is_list(Acc) -> + {ok, [NameBin | Acc]}; + {ok, NameBin} when Acc =:= ok -> + {ok, [NameBin]} + end; + false -> + ok = skip(File, Header#tar_header.size), + {ok, Acc} + end. + +%% Checks if the file Name should be extracted. + +check_extract(_, #read_opts{files=all}) -> + true; +check_extract(Name, #read_opts{files=Files}) -> + ordsets:is_element(Name, Files). + +get_header(File) -> + case file:read(File, ?record_size) of + eof -> + throw({error,eof}); + {ok, Bin} when is_binary(Bin) -> + convert_header(Bin); + {ok, List} -> + convert_header(list_to_binary(List)); + {error, Reason} -> + throw({error, Reason}) + end. + +%% Converts the tar header to a record. + +convert_header(Bin) when byte_size(Bin) =:= ?record_size -> + case verify_checksum(Bin) of + ok -> + Hd = #tar_header{name=get_name(Bin), + mode=from_octal(Bin, ?th_mode, ?th_mode_len), + uid=from_octal(Bin, ?th_uid, ?th_uid_len), + gid=from_octal(Bin, ?th_gid, ?th_gid_len), + size=from_octal(Bin, ?th_size, ?th_size_len), + mtime=from_octal(Bin, ?th_mtime, ?th_mtime_len), + linkname=from_string(Bin, + ?th_linkname, ?th_linkname_len), + typeflag=typeflag(Bin)}, + convert_header1(Hd); + eof -> + eof + end; +convert_header(Bin) when byte_size(Bin) =:= 0 -> + eof; +convert_header(_Bin) -> + throw({error, eof}). + +%% Basic sanity. Better set the element size to zero here if the type +%% always is of zero length. + +convert_header1(H) when H#tar_header.typeflag =:= symlink, H#tar_header.size =/= 0 -> + convert_header1(H#tar_header{size=0}); +convert_header1(H) when H#tar_header.typeflag =:= directory, H#tar_header.size =/= 0 -> + convert_header1(H#tar_header{size=0}); +convert_header1(Header) -> + Header. + +typeflag(Bin) -> + [T] = binary_to_list(Bin, ?th_typeflag+1, ?th_typeflag+1), + case T of + 0 -> regular; + $0 -> regular; + $1 -> link; + $2 -> symlink; + $3 -> char; + $4 -> block; + $5 -> directory; + $6 -> fifo; + $7 -> regular; + _ -> unknown + end. + +%% Get the name of the file from the prefix and name fields of the +%% tar header. + +get_name(Bin) -> + Name = from_string(Bin, ?th_name, ?th_name_len), + case binary_to_list(Bin, ?th_prefix+1, ?th_prefix+1) of + [0] -> + Name; + [_] -> + Prefix = binary_to_list(Bin, ?th_prefix+1, byte_size(Bin)), + lists:reverse(remove_nulls(Prefix), [$/|Name]) + end. + +from_string(Bin, Pos, Len) -> + lists:reverse(remove_nulls(binary_to_list(Bin, Pos+1, Pos+Len))). + +%% Returns all characters up to (but not including) the first null +%% character, in REVERSE order. + +remove_nulls(List) -> + remove_nulls(List, []). + +remove_nulls([0|_], Result) -> + remove_nulls([], Result); +remove_nulls([C|Rest], Result) -> + remove_nulls(Rest, [C|Result]); +remove_nulls([], Result) -> + Result. + +from_octal(Bin, Pos, Len) -> + from_octal(binary_to_list(Bin, Pos+1, Pos+Len)). + +from_octal([$\s|Rest]) -> + from_octal(Rest); +from_octal([Digit|Rest]) when $0 =< Digit, Digit =< $7 -> + from_octal(Rest, Digit-$0); +from_octal(Bin) when is_binary(Bin) -> + from_octal(binary_to_list(Bin)); +from_octal(Other) -> + throw({error, {bad_header, "Bad octal number: ~p", [Other]}}). + +from_octal([Digit|Rest], Result) when $0 =< Digit, Digit =< $7 -> + from_octal(Rest, Result*8+Digit-$0); +from_octal([$\s|_], Result) -> + Result; +from_octal([0|_], Result) -> + Result; +from_octal(Other, _) -> + throw({error, {bad_header, "Bad contents in octal field: ~p", [Other]}}). + +%% Retrieves the next element from the archive. +%% Returns {ok, Bin} | eof | {error, Reason} + +get_element(File, #tar_header{size = 0}) -> + skip_to_next(File), + {ok,<<>>}; +get_element(File, #tar_header{size = Size}) -> + case file:read(File, Size) of + {ok,Bin}=Res when byte_size(Bin) =:= Size -> + skip_to_next(File), + Res; + {ok,List} when length(List) =:= Size -> + skip_to_next(File), + {ok,list_to_binary(List)}; + {ok,_} -> throw({error,eof}); + {error, Reason} -> throw({error, Reason}); + eof -> throw({error,eof}) + end. + +%% Verify the checksum in the header. First try an unsigned addition +%% of all bytes in the header (as it should be according to Posix). + +verify_checksum(Bin) -> + <> = Bin, + case checksum(H1) + checksum(H2) of + 0 -> eof; + Checksum0 -> + Csum = from_octal(CheckStr), + CsumInit = ?th_chksum_len * $\s, + case Checksum0 + CsumInit of + Csum -> ok; + Unsigned -> + verify_checksum(H1, H2, CsumInit, Csum, Unsigned) + end + end. + +%% The checksums didn't match. Now try a signed addition. + +verify_checksum(H1, H2, Csum, ShouldBe, Unsigned) -> + case signed_sum(binary_to_list(H1), signed_sum(binary_to_list(H2), Csum)) of + ShouldBe -> ok; + Signed -> + throw({error, + {bad_header, + "Incorrect directory checksum ~w (~w), should be ~w", + [Signed, Unsigned, ShouldBe]}}) + end. + +signed_sum([C|Rest], Sum) when C < 128 -> + signed_sum(Rest, Sum+C); +signed_sum([C|Rest], Sum) -> + signed_sum(Rest, Sum+C-256); +signed_sum([], Sum) -> Sum. + +write_extracted_element(Header, Bin, Opts) + when Opts#read_opts.output =:= memory -> + case Header#tar_header.typeflag of + regular -> + {ok, {Header#tar_header.name, Bin}}; + _ -> + ok + end; +write_extracted_element(Header, Bin, Opts) -> + Name = filename:absname(Header#tar_header.name, Opts#read_opts.cwd), + Created = + case Header#tar_header.typeflag of + regular -> + write_extracted_file(Name, Bin, Opts); + directory -> + create_extracted_dir(Name, Opts); + symlink -> + create_symlink(Name, Header, Opts); + Other -> % Ignore. + read_verbose(Opts, "x ~s - unsupported type ~p~n", + [Name, Other]), + not_written + end, + case Created of + ok -> set_extracted_file_info(Name, Header); + not_written -> ok + end. + +create_extracted_dir(Name, _Opts) -> + case file:make_dir(Name) of + ok -> ok; + {error,enotsup} -> not_written; + {error,eexist} -> not_written; + {error,enoent} -> make_dirs(Name, dir); + {error,Reason} -> throw({error, Reason}) + end. + +create_symlink(Name, #tar_header{linkname=Linkname}=Header, Opts) -> + case file:make_symlink(Linkname, Name) of + ok -> ok; + {error,enoent} -> + ok = make_dirs(Name, file), + create_symlink(Name, Header, Opts); + {error,eexist} -> not_written; + {error,enotsup} -> + read_verbose(Opts, "x ~s - symbolic links not supported~n", [Name]), + not_written; + {error,Reason} -> throw({error, Reason}) + end. + +write_extracted_file(Name, Bin, Opts) -> + Write = + case Opts#read_opts.keep_old_files of + true -> + case file:read_file_info(Name) of + {ok, _} -> false; + _ -> true + end; + false -> true + end, + case Write of + true -> + read_verbose(Opts, "x ~s~n", [Name]), + write_file(Name, Bin); + false -> + read_verbose(Opts, "x ~s - exists, not created~n", [Name]), + not_written + end. + +write_file(Name, Bin) -> + case file:write_file(Name, Bin) of + ok -> ok; + {error,enoent} -> + ok = make_dirs(Name, file), + write_file(Name, Bin); + {error,Reason} -> + throw({error, Reason}) + end. + +set_extracted_file_info(_, #tar_header{typeflag = symlink}) -> ok; +set_extracted_file_info(Name, #tar_header{mode=Mode, mtime=Mtime}) -> + Info = #file_info{mode=Mode, mtime=posix_to_erlang_time(Mtime)}, + file:write_file_info(Name, Info). + +%% Makes all directories leading up to the file. + +make_dirs(Name, Type) -> + make_dirs1(filename:split(Name), Type). + +make_dirs1([Dir, Next|Rest], Type) -> + case file:read_file_info(Dir) of + {ok, #file_info{type=directory}} -> + make_dirs1([filename:join(Dir, Next)|Rest], Type); + {ok, #file_info{}} -> + throw({error, enotdir}); + {error, _} -> + case file:make_dir(Dir) of + ok -> + make_dirs1([filename:join(Dir, Next)|Rest], Type); + {error, Reason} -> + throw({error, Reason}) + end + end; +make_dirs1([_], file) -> ok; +make_dirs1([Dir], dir) -> + file:make_dir(Dir); +make_dirs1([], _) -> + %% There must be something wrong here. The list was not supposed + %% to be empty. + throw({error, enoent}). + +%% Prints the message on if the verbose option is given (for reading). + +read_verbose(#read_opts{verbose=true}, Format, Args) -> + io:format(Format, Args), + io:nl(); +read_verbose(_, _, _) -> + ok. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% +%%% Utility functions. +%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Returns the checksum of a binary. + +checksum(Bin) -> checksum(Bin, 0). + +checksum(<>, Sum) -> + checksum(T, Sum+A+B+C+D+E+F+G+H); +checksum(<>, Sum) -> + checksum(T, Sum+A); +checksum(<<>>, Sum) -> Sum. + +%% Returns a list of zeroes to pad out to the given block size. + +padding(Size, BlockSize) -> + zeroes(pad_size(Size, BlockSize)). + +pad_size(Size, BlockSize) -> + case Size rem BlockSize of + 0 -> 0; + Rem -> BlockSize-Rem + end. + +zeroes(0) -> []; +zeroes(1) -> [0]; +zeroes(2) -> [0,0]; +zeroes(Number) -> + Half = zeroes(Number div 2), + case Number rem 2 of + 0 -> [Half|Half]; + 1 -> [Half|[0|Half]] + end. + +%% Skips the given number of bytes rounded up to an even record. + +skip(File, Size) -> + %% Note: There is no point in handling failure to get the current position + %% in the file. If it doesn't work, something serious is wrong. + Amount = ((Size + ?record_size - 1) div ?record_size) * ?record_size, + {ok,_} = file:position(File, {cur, Amount}), + ok. + +%% Skips to the next record in the file. + +skip_to_next(File) -> + %% Note: There is no point in handling failure to get the current position + %% in the file. If it doesn't work, something serious is wrong. + {ok, Position} = file:position(File, {cur, 0}), + NewPosition = ((Position + ?record_size - 1) div ?record_size) * ?record_size, + {ok,NewPosition} = file:position(File, NewPosition), + ok. + +%% Prints the message on if the verbose option is given. + +add_verbose(#add_opts{verbose=true}, Format, Args) -> + io:format(Format, Args); +add_verbose(_, _, _) -> + ok. + +%% Converts a tuple containing the time to a Posix time (seconds +%% since Jan 1, 1970). + +posix_time(Time) -> + EpochStart = {{1970,1,1},{0,0,0}}, + {Days,{Hour,Min,Sec}} = calendar:time_difference(EpochStart, Time), + 86400*Days + 3600*Hour + 60*Min + Sec. + +posix_to_erlang_time(Sec) -> + OneMillion = 1000000, + Time = calendar:now_to_datetime({Sec div OneMillion, Sec rem OneMillion, 0}), + erlang:universaltime_to_localtime(Time). + +read_file_and_info(Name, Opts) -> + ReadInfo = Opts#add_opts.read_info, + case ReadInfo(Name) of + {ok,Info} when Info#file_info.type =:= regular -> + case file:read_file(Name) of + {ok,Bin} -> + {ok,Bin,Info}; + Error -> + Error + end; + {ok,Info} when Info#file_info.type =:= symlink -> + case file:read_link(Name) of + {ok,PointsTo} -> + {ok,PointsTo,Info}; + Error -> + Error + end; + {ok, Info} -> + {ok,[],Info}; + Error -> + Error + end. + +foreach_while_ok(Fun, [First|Rest]) -> + case Fun(First) of + ok -> foreach_while_ok(Fun, Rest); + Other -> Other + end; +foreach_while_ok(_, []) -> ok. + +open_mode(Mode) -> + open_mode(Mode, false, [raw], []). + +open_mode(read, _, Raw, _) -> + {ok, read, Raw, []}; +open_mode(write, _, Raw, _) -> + {ok, write, Raw, []}; +open_mode([read|Rest], false, Raw, Opts) -> + open_mode(Rest, read, Raw, Opts); +open_mode([write|Rest], false, Raw, Opts) -> + open_mode(Rest, write, Raw, Opts); +open_mode([compressed|Rest], Access, Raw, Opts) -> + open_mode(Rest, Access, Raw, [compressed|Opts]); +open_mode([cooked|Rest], Access, _Raw, Opts) -> + open_mode(Rest, Access, [], Opts); +open_mode([], Access, Raw, Opts) -> + {ok, Access, Raw, Opts}; +open_mode(_, _, _, _) -> + {error, einval}. diff --git a/lib/stdlib/src/error_logger_file_h.erl b/lib/stdlib/src/error_logger_file_h.erl new file mode 100644 index 0000000000..ee4f0b3a51 --- /dev/null +++ b/lib/stdlib/src/error_logger_file_h.erl @@ -0,0 +1,265 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(error_logger_file_h). + +-behaviour(gen_event). + + +%%% +%%% A handler that can be connected to the error_logger +%%% event handler. +%%% Writes all events formatted to file. +%%% Handles events tagged error, emulator and info. +%%% +%%% It can only be started from error_logger:swap_handler({logfile, File}) +%%% or error_logger:logfile(File) +%%% + +-export([init/1, + handle_event/2, handle_call/2, handle_info/2, + terminate/2, code_change/3]). + +%% This one is used when we takeover from the simple error_logger. +init({File, {error_logger, Buf}}) -> + case init(File, error_logger) of + {ok, {Fd, File, PrevHandler}} -> + write_events(Fd, Buf), + {ok, {Fd, File, PrevHandler}}; + Error -> + Error + end; +%% This one is used when we are started directly. +init(File) -> + init(File, []). + +init(File, PrevHandler) -> + process_flag(trap_exit, true), + case file:open(File, [write]) of + {ok,Fd} -> + {ok, {Fd, File, PrevHandler}}; + Error -> + Error + end. + +handle_event({_Type, GL, _Msg}, State) when node(GL) =/= node() -> + {ok, State}; +handle_event(Event, {Fd, File, PrevHandler}) -> + write_event(Fd, tag_event(Event)), + {ok, {Fd, File, PrevHandler}}; +handle_event(_, State) -> + {ok, State}. + +handle_info({'EXIT', Fd, _Reason}, {Fd, _File, PrevHandler}) -> + case PrevHandler of + [] -> + remove_handler; + _ -> + {swap_handler, install_prev, [], PrevHandler, go_back} + end; +handle_info({emulator, GL, Chars}, {Fd, File, PrevHandler}) + when node(GL) == node() -> + write_event(Fd, tag_event({emulator, GL, Chars})), + {ok, {Fd, File, PrevHandler}}; +handle_info({emulator, noproc, Chars}, {Fd, File, PrevHandler}) -> + write_event(Fd, tag_event({emulator, noproc, Chars})), + {ok, {Fd, File, PrevHandler}}; +handle_info(_, State) -> + {ok, State}. + +handle_call(filename, {Fd, File, Prev}) -> + {ok, File, {Fd, File, Prev}}; +handle_call(_Query, State) -> + {ok, {error, bad_query}, State}. + +terminate(_Reason, State) -> + case State of + {Fd, _File, _Prev} -> + ok = file:close(Fd); + _ -> + ok + end, + []. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%% ------------------------------------------------------ +%%% Misc. functions. +%%% ------------------------------------------------------ + +tag_event(Event) -> + {erlang:localtime(), Event}. + +write_events(Fd, Events) -> write_events1(Fd, lists:reverse(Events)). + +write_events1(Fd, [Event|Es]) -> + write_event(Fd, Event), + write_events1(Fd, Es); +write_events1(_, []) -> + ok. + +write_event(Fd, {Time, {error, _GL, {Pid, Format, Args}}}) -> + T = write_time(maybe_utc(Time)), + case catch io_lib:format(add_node(Format,Pid), Args) of + S when is_list(S) -> + io:format(Fd, T ++ S, []); + _ -> + F = add_node("ERROR: ~p - ~p~n", Pid), + io:format(Fd, T ++ F, [Format,Args]) + end; +write_event(Fd, {Time, {emulator, _GL, Chars}}) -> + T = write_time(maybe_utc(Time)), + case catch io_lib:format(Chars, []) of + S when is_list(S) -> + io:format(Fd, T ++ S, []); + _ -> + io:format(Fd, T ++ "ERROR: ~p ~n", [Chars]) + end; +write_event(Fd, {Time, {info, _GL, {Pid, Info, _}}}) -> + T = write_time(maybe_utc(Time)), + io:format(Fd, T ++ add_node("~p~n",Pid),[Info]); +write_event(Fd, {Time, {error_report, _GL, {Pid, std_error, Rep}}}) -> + T = write_time(maybe_utc(Time)), + S = format_report(Rep), + io:format(Fd, T ++ S ++ add_node("", Pid), []); +write_event(Fd, {Time, {info_report, _GL, {Pid, std_info, Rep}}}) -> + T = write_time(maybe_utc(Time), "INFO REPORT"), + S = format_report(Rep), + io:format(Fd, T ++ S ++ add_node("", Pid), []); +write_event(Fd, {Time, {info_msg, _GL, {Pid, Format, Args}}}) -> + T = write_time(maybe_utc(Time), "INFO REPORT"), + case catch io_lib:format(add_node(Format,Pid), Args) of + S when is_list(S) -> + io:format(Fd, T ++ S, []); + _ -> + F = add_node("ERROR: ~p - ~p~n", Pid), + io:format(Fd, T ++ F, [Format,Args]) + end; +write_event(Fd, {Time, {warning_report, _GL, {Pid, std_warning, Rep}}}) -> + T = write_time(maybe_utc(Time), "WARNING REPORT"), + S = format_report(Rep), + io:format(Fd, T ++ S ++ add_node("", Pid), []); +write_event(Fd, {Time, {warning_msg, _GL, {Pid, Format, Args}}}) -> + T = write_time(maybe_utc(Time), "WARNING REPORT"), + case catch io_lib:format(add_node(Format,Pid), Args) of + S when is_list(S) -> + io:format(Fd, T ++ S, []); + _ -> + F = add_node("ERROR: ~p - ~p~n", Pid), + io:format(Fd, T ++ F, [Format,Args]) + end; +write_event(_, _) -> + ok. + +maybe_utc(Time) -> + UTC = case application:get_env(sasl, utc_log) of + {ok, Val} -> + Val; + undefined -> + %% Backwards compatible: + case application:get_env(stdlib, utc_log) of + {ok, Val} -> + Val; + undefined -> + false + end + end, + if + UTC =:= true -> + {utc, calendar:local_time_to_universal_time(Time)}; + true -> + Time + end. + +format_report(Rep) when is_list(Rep) -> + case string_p(Rep) of + true -> + io_lib:format("~s~n",[Rep]); + _ -> + format_rep(Rep) + end; +format_report(Rep) -> + io_lib:format("~p~n",[Rep]). + +format_rep([{Tag,Data}|Rep]) -> + io_lib:format(" ~p: ~p~n",[Tag,Data]) ++ format_rep(Rep); +format_rep([Other|Rep]) -> + io_lib:format(" ~p~n",[Other]) ++ format_rep(Rep); +format_rep(_) -> + []. + +add_node(X, Pid) when is_atom(X) -> + add_node(atom_to_list(X), Pid); +add_node(X, Pid) when node(Pid) =/= node() -> + lists:concat([X,"** at node ",node(Pid)," **~n"]); +add_node(X, _) -> + X. + +string_p([]) -> + false; +string_p(Term) -> + string_p1(Term). + +string_p1([H|T]) when is_integer(H), H >= $\s, H < 255 -> + string_p1(T); +string_p1([$\n|T]) -> string_p1(T); +string_p1([$\r|T]) -> string_p1(T); +string_p1([$\t|T]) -> string_p1(T); +string_p1([$\v|T]) -> string_p1(T); +string_p1([$\b|T]) -> string_p1(T); +string_p1([$\f|T]) -> string_p1(T); +string_p1([$\e|T]) -> string_p1(T); +string_p1([H|T]) when is_list(H) -> + case string_p1(H) of + true -> string_p1(T); + _ -> false + end; +string_p1([]) -> true; +string_p1(_) -> false. + +write_time(Time) -> write_time(Time, "ERROR REPORT"). + +write_time({utc,{{Y,Mo,D},{H,Mi,S}}}, Type) -> + io_lib:format("~n=~s==== ~p-~s-~p::~s:~s:~s UTC ===~n", + [Type,D,month(Mo),Y,t(H),t(Mi),t(S)]); +write_time({{Y,Mo,D},{H,Mi,S}}, Type) -> + io_lib:format("~n=~s==== ~p-~s-~p::~s:~s:~s ===~n", + [Type,D,month(Mo),Y,t(H),t(Mi),t(S)]). + +t(X) when is_integer(X) -> + t1(integer_to_list(X)); +t(_) -> + "". +t1([X]) -> [$0,X]; +t1(X) -> X. + +month(1) -> "Jan"; +month(2) -> "Feb"; +month(3) -> "Mar"; +month(4) -> "Apr"; +month(5) -> "May"; +month(6) -> "Jun"; +month(7) -> "Jul"; +month(8) -> "Aug"; +month(9) -> "Sep"; +month(10) -> "Oct"; +month(11) -> "Nov"; +month(12) -> "Dec". + + diff --git a/lib/stdlib/src/error_logger_tty_h.erl b/lib/stdlib/src/error_logger_tty_h.erl new file mode 100644 index 0000000000..435e57aa0e --- /dev/null +++ b/lib/stdlib/src/error_logger_tty_h.erl @@ -0,0 +1,261 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(error_logger_tty_h). + +-behaviour(gen_event). + +%%% +%%% A handler that can be connected to the error_logger +%%% event handler. +%%% Writes all events formatted to stdout. +%%% Handles events tagged error, emulator and info. +%%% +%%% It can only be started from error_logger:swap_handler(tty) +%%% or error_logger:tty(true) +%%% + +-export([init/1, + handle_event/2, handle_call/2, handle_info/2, + terminate/2, code_change/3]). + +%% This one is used when we takeover from the simple error_logger. +init({[], {error_logger, Buf}}) -> + User = set_group_leader(), + write_events(Buf), + {ok, {User, error_logger}}; +%% This one is used if someone took over from us, and now wants to +%% go back. +init({[], {error_logger_tty_h, PrevHandler}}) -> + User = set_group_leader(), + {ok, {User, PrevHandler}}; +%% This one is used when we are started directly. +init([]) -> + User = set_group_leader(), + {ok, {User, []}}. + +handle_event({_Type, GL, _Msg}, State) when node(GL) =/= node() -> + {ok, State}; +handle_event(Event, State) -> + write_event(tag_event(Event)), + {ok, State}. + +handle_info({'EXIT', User, _Reason}, {User, PrevHandler}) -> + case PrevHandler of + [] -> + remove_handler; + _ -> + {swap_handler, install_prev, {User, PrevHandler}, + PrevHandler, go_back} + end; +handle_info({emulator, GL, Chars}, State) when node(GL) == node() -> + write_event(tag_event({emulator, GL, Chars})), + {ok, State}; +handle_info({emulator, noproc, Chars}, State) -> + write_event(tag_event({emulator, noproc, Chars})), + {ok, State}; +handle_info(_, State) -> + {ok, State}. + +handle_call(_Query, State) -> {ok, {error, bad_query}, State}. + +% unfortunately, we can't unlink from User - links are not counted! +% if pid(User) -> unlink(User); true -> ok end, +terminate(install_prev, _State) -> + []; +terminate(_Reason, {_User, PrevHandler}) -> + {error_logger_tty_h, PrevHandler}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%% ------------------------------------------------------ +%%% Misc. functions. +%%% ------------------------------------------------------ + +set_group_leader() -> + case whereis(user) of + User when is_pid(User) -> link(User), group_leader(User,self()), User; + _ -> false + end. + +tag_event(Event) -> + {erlang:localtime(), Event}. + +write_events(Events) -> write_events1(lists:reverse(Events)). + +write_events1([Event|Es]) -> + write_event(Event), + write_events1(Es); +write_events1([]) -> + ok. + +write_event({Time, {error, _GL, {Pid, Format, Args}}}) -> + T = write_time(maybe_utc(Time)), + case catch io_lib:format(add_node(Format,Pid), Args) of + S when is_list(S) -> + format(T ++ S); + _ -> + F = add_node("ERROR: ~p - ~p~n", Pid), + format(T ++ F, [Format,Args]) + end; +write_event({Time, {emulator, _GL, Chars}}) -> + T = write_time(maybe_utc(Time)), + case catch io_lib:format(Chars, []) of + S when is_list(S) -> + format(T ++ S); + _ -> + format(T ++ "ERROR: ~p ~n", [Chars]) + end; +write_event({Time, {info, _GL, {Pid, Info, _}}}) -> + T = write_time(maybe_utc(Time)), + format(T ++ add_node("~p~n",Pid),[Info]); +write_event({Time, {error_report, _GL, {Pid, std_error, Rep}}}) -> + T = write_time(maybe_utc(Time)), + S = format_report(Rep), + format(T ++ S ++ add_node("", Pid)); +write_event({Time, {info_report, _GL, {Pid, std_info, Rep}}}) -> + T = write_time(maybe_utc(Time), "INFO REPORT"), + S = format_report(Rep), + format(T ++ S ++ add_node("", Pid)); +write_event({Time, {info_msg, _GL, {Pid, Format, Args}}}) -> + T = write_time(maybe_utc(Time), "INFO REPORT"), + case catch io_lib:format(add_node(Format,Pid), Args) of + S when is_list(S) -> + format(T ++ S); + _ -> + F = add_node("ERROR: ~p - ~p~n", Pid), + format(T ++ F, [Format,Args]) + end; +write_event({Time, {warning_report, _GL, {Pid, std_warning, Rep}}}) -> + T = write_time(maybe_utc(Time), "WARNING REPORT"), + S = format_report(Rep), + format(T ++ S ++ add_node("", Pid)); +write_event({Time, {warning_msg, _GL, {Pid, Format, Args}}}) -> + T = write_time(maybe_utc(Time), "WARNING REPORT"), + case catch io_lib:format(add_node(Format,Pid), Args) of + S when is_list(S) -> + format(T ++ S); + _ -> + F = add_node("ERROR: ~p - ~p~n", Pid), + format(T ++ F, [Format,Args]) + end; +write_event({_Time, _Error}) -> + ok. + +maybe_utc(Time) -> + UTC = case application:get_env(sasl, utc_log) of + {ok, Val} -> + Val; + undefined -> + %% Backwards compatible: + case application:get_env(stdlib, utc_log) of + {ok, Val} -> + Val; + undefined -> + false + end + end, + if + UTC =:= true -> + {utc, calendar:local_time_to_universal_time(Time)}; + true -> + Time + end. + +format(String) -> io:format(user, String, []). +format(String, Args) -> io:format(user, String, Args). + +format_report(Rep) when is_list(Rep) -> + case string_p(Rep) of + true -> + io_lib:format("~s~n",[Rep]); + _ -> + format_rep(Rep) + end; +format_report(Rep) -> + io_lib:format("~p~n",[Rep]). + +format_rep([{Tag,Data}|Rep]) -> + io_lib:format(" ~p: ~p~n",[Tag,Data]) ++ format_rep(Rep); +format_rep([Other|Rep]) -> + io_lib:format(" ~p~n",[Other]) ++ format_rep(Rep); +format_rep(_) -> + []. + +add_node(X, Pid) when is_atom(X) -> + add_node(atom_to_list(X), Pid); +add_node(X, Pid) when node(Pid) =/= node() -> + lists:concat([X,"** at node ",node(Pid)," **~n"]); +add_node(X, _) -> + X. + +string_p([]) -> + false; +string_p(Term) -> + string_p1(Term). + +string_p1([H|T]) when is_integer(H), H >= $\s, H < 255 -> + string_p1(T); +string_p1([$\n|T]) -> string_p1(T); +string_p1([$\r|T]) -> string_p1(T); +string_p1([$\t|T]) -> string_p1(T); +string_p1([$\v|T]) -> string_p1(T); +string_p1([$\b|T]) -> string_p1(T); +string_p1([$\f|T]) -> string_p1(T); +string_p1([$\e|T]) -> string_p1(T); +string_p1([H|T]) when is_list(H) -> + case string_p1(H) of + true -> string_p1(T); + _ -> false + end; +string_p1([]) -> true; +string_p1(_) -> false. + +write_time(Time) -> write_time(Time, "ERROR REPORT"). +write_time({utc,{{Y,Mo,D},{H,Mi,S}}},Type) -> + io_lib:format("~n=~s==== ~p-~s-~p::~s:~s:~s UTC ===~n", + [Type,D,month(Mo),Y,t(H),t(Mi),t(S)]); +write_time({{Y,Mo,D},{H,Mi,S}},Type) -> + io_lib:format("~n=~s==== ~p-~s-~p::~s:~s:~s ===~n", + [Type,D,month(Mo),Y,t(H),t(Mi),t(S)]). + +t(X) when is_integer(X) -> + t1(integer_to_list(X)); +t(_) -> + "". +t1([X]) -> [$0,X]; +t1(X) -> X. + +month(1) -> "Jan"; +month(2) -> "Feb"; +month(3) -> "Mar"; +month(4) -> "Apr"; +month(5) -> "May"; +month(6) -> "Jun"; +month(7) -> "Jul"; +month(8) -> "Aug"; +month(9) -> "Sep"; +month(10) -> "Oct"; +month(11) -> "Nov"; +month(12) -> "Dec". + + + + + diff --git a/lib/stdlib/src/escript.erl b/lib/stdlib/src/escript.erl new file mode 100644 index 0000000000..697a69b801 --- /dev/null +++ b/lib/stdlib/src/escript.erl @@ -0,0 +1,694 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% + +-module(escript). + +%% Useful functions that can be called from scripts. +-export([script_name/0, foldl/3]). + +%% Internal API. +-export([start/0, start/1]). + +-record(state, {file, + module, + forms_or_bin, + source, + n_errors, + mode, + exports_main, + has_records}). + +script_name() -> + [ScriptName|_] = init:get_plain_arguments(), + ScriptName. + +%% Apply Fun(Name, GetInfo, GetBin, Acc) for each file in the escript. +%% +%% Fun/2 must return a new accumulator which is passed to the next call. +%% The function returns the final value of the accumulator. Acc0 is +%% returned if the escript contain an empty archive. +%% +%% GetInfo/0 is a fun that returns a #file_info{} record for the file. +%% GetBin/0 is a fun that returns a the contents of the file as a binary. +%% +%% An escript may contain erlang code, beam code or an archive: +%% +%% archive - the Fun/2 will be applied for each file in the archive +%% beam - the Fun/2 will be applied once and GetInfo/0 returns the file +%% info for the (entire) escript file +%% erl - the Fun/2 will be applied once, GetInfo/0 returns the file +%% info for the (entire) escript file and the GetBin returns +%% the compiled beam code + +%%-spec foldl(fun((string(), +%% fun(() -> #file_info()), +%% fun(() -> binary() -> term()), +%% term()) -> term()), +%% term(), +%% string()). +foldl(Fun, Acc0, File) when is_function(Fun, 4) -> + case parse_file(File, false) of + {text, _, Forms, _Mode} when is_list(Forms) -> + GetInfo = fun() -> file:read_file_info(File) end, + GetBin = + fun() -> + case compile:forms(Forms, [return_errors, debug_info]) of + {ok, _, BeamBin} -> + BeamBin; + {error, _Errors, _Warnings} -> + fatal("There were compilation errors.") + end + end, + try + {ok, Fun(".", GetInfo, GetBin, Acc0)} + catch + throw:Reason -> + {error, Reason} + end; + {beam, _, BeamBin, _Mode} when is_binary(BeamBin) -> + GetInfo = fun() -> file:read_file_info(File) end, + GetBin = fun() -> BeamBin end, + try + {ok, Fun(".", GetInfo, GetBin, Acc0)} + catch + throw:Reason -> + {error, Reason} + end; + {archive, _, ArchiveBin, _Mode} when is_binary(ArchiveBin) -> + ZipFun = + fun({Name, GetInfo, GetBin}, A) -> + A2 = Fun(Name, GetInfo, GetBin, A), + {true, false, A2} + end, + case prim_zip:open(ZipFun, Acc0, {File, ArchiveBin}) of + {ok, PrimZip, Res} -> + ok = prim_zip:close(PrimZip), + {ok, Res}; + {error, bad_eocd} -> + {error, "Not an archive file"}; + {error, Reason} -> + {error, Reason} + end + end. + +%% +%% Internal API. +%% + +start() -> + start([]). + +start(EscriptOptions) -> + try + %% Commands run using -run or -s are run in a process + %% trap_exit set to false. Because this behaviour is + %% surprising for users of escript, make sure to reset + %% trap_exit to false. + process_flag(trap_exit, false), + case init:get_plain_arguments() of + [File|Args] -> + parse_and_run(File, Args, EscriptOptions); + [] -> + io:format("escript: Missing filename\n", []), + my_halt(127) + end + catch + throw:Str -> + io:format("escript: ~s\n", [Str]), + my_halt(127); + _:Reason -> + io:format("escript: Internal error: ~p\n", [Reason]), + io:format("~p\n", [erlang:get_stacktrace()]), + my_halt(127) + end. + +parse_and_run(File, Args, Options) -> + CheckOnly = lists:member("s", Options), + {Source, Module, FormsOrBin, Mode} = parse_file(File, CheckOnly), + Mode2 = + case lists:member("d", Options) of + true -> + debug; + false -> + case lists:member("c", Options) of + true -> + compile; + false -> + case lists:member("i", Options) of + true -> interpret; + false -> Mode + end + end + end, + if + is_list(FormsOrBin) -> + case Mode2 of + interpret -> + interpret(FormsOrBin, File, Args); + compile -> + case compile:forms(FormsOrBin, [report]) of + {ok, Module, BeamBin} -> + {module, Module} = code:load_binary(Module, File, BeamBin), + run(Module, Args); + _Other -> + fatal("There were compilation errors.") + end; + debug -> + case compile:forms(FormsOrBin, [report, debug_info]) of + {ok,Module,BeamBin} -> + {module, Module} = code:load_binary(Module, File, BeamBin), + debug(Module, {Module, File, File, BeamBin}, Args); + _Other -> + fatal("There were compilation errors.") + end + end; + is_binary(FormsOrBin) -> + case Source of + archive -> + case code:set_primary_archive(File, FormsOrBin) of + ok when CheckOnly -> + case code:load_file(Module) of + {module, _} -> + case erlang:function_exported(Module, main, 1) of + true -> + my_halt(0); + false -> + Text = lists:concat(["Function ", Module, ":main/1 is not exported"]), + fatal(Text) + end; + _ -> + Text = lists:concat(["Cannot load module ", Module, " from archive"]), + fatal(Text) + end; + ok -> + case Mode2 of + run -> run(Module, Args); + debug -> debug(Module, Module, Args) + end; + {error, bad_eocd} -> + fatal("Not an archive file"); + {error, Reason} -> + fatal(Reason) + end; + beam -> + case Mode2 of + run -> + {module, Module} = code:load_binary(Module, File, FormsOrBin), + run(Module, Args); + debug -> + [Base | Rest] = lists:reverse(filename:split(File)), + Base2 = filename:basename(Base, code:objfile_extension()), + Rest2 = + case Rest of + ["ebin" | Top] -> ["src" | Top]; + _ -> Rest + end, + SrcFile = filename:join(lists:reverse([Base2 ++ ".erl" | Rest2])), + debug(Module, {Module, SrcFile, File, FormsOrBin}, Args) + end + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Parse script +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +parse_file(File, CheckOnly) -> + S = #state{file = File, + n_errors = 0, + mode = interpret, + exports_main = false, + has_records = false}, + {ok, Fd} = + case file:open(File, [read]) of + {ok, Fd0} -> + {ok, Fd0}; + {error, R} -> + fatal(lists:concat([file:format_error(R), ": '", File, "'"])) + end, + {HeaderSz, StartLine, ScriptType} = skip_header(Fd, 1), + #state{mode = Mode, + source = Source, + module = Module, + forms_or_bin = FormsOrBin} = + case ScriptType of + archive -> + %% Archive file + ok = file:close(Fd), + parse_archive(S, File, HeaderSz); + beam -> + %% Beam file + ok = file:close(Fd), + parse_beam(S, File, HeaderSz, CheckOnly); + source -> + %% Source code + parse_source(S, File, Fd, StartLine, HeaderSz, CheckOnly) + end, + {Source, Module, FormsOrBin, Mode}. + +%% Skip header and make a heuristic guess about the script type +skip_header(P, LineNo) -> + %% Skip shebang on first line + {ok, HeaderSz0} = file:position(P, cur), + Line1 = get_line(P), + case classify_line(Line1) of + shebang -> + find_first_body_line(P, LineNo); + archive -> + {HeaderSz0, LineNo, archive}; + beam -> + {HeaderSz0, LineNo, beam}; + _ -> + find_first_body_line(P, LineNo) + end. + +find_first_body_line(P, LineNo) -> + {ok, HeaderSz1} = file:position(P, cur), + %% Look for special comment on second line + Line2 = get_line(P), + {ok, HeaderSz2} = file:position(P, cur), + case classify_line(Line2) of + emu_args -> + %% Skip special comment on second line + Line3 = get_line(P), + {HeaderSz2, LineNo + 2, guess_type(Line3)}; + _ -> + %% Look for special comment on third line + Line3 = get_line(P), + {ok, HeaderSz3} = file:position(P, cur), + case classify_line(Line3) of + emu_args -> + %% Skip special comment on third line + Line4 = get_line(P), + {HeaderSz3, LineNo + 3, guess_type(Line4)}; + _ -> + %% Just skip shebang on first line + {HeaderSz1, LineNo + 1, guess_type(Line2)} + end + end. + +classify_line(Line) -> + case Line of + [$\#, $\! | _] -> + shebang; + [$P, $K | _] -> + archive; + [$F, $O, $R, $1 | _] -> + beam; + [$\%, $\%, $\! | _] -> + emu_args; + _ -> + undefined + end. + +guess_type(Line) -> + case classify_line(Line) of + archive -> archive; + beam -> beam; + _ -> source + end. + +get_line(P) -> + case io:get_line(P, '') of + eof -> + fatal("Premature end of file reached"); + Line -> + Line + end. + +parse_archive(S, File, HeaderSz) -> + case file:read_file(File) of + {ok, <<_FirstLine:HeaderSz/binary, Bin/binary>>} -> + Mod = + case init:get_argument(escript) of + {ok, [["main", M]]} -> + %% Use explicit module name + list_to_atom(M); + _ -> + %% Use escript name without extension as module name + RevBase = lists:reverse(filename:basename(File)), + RevBase2 = + case lists:dropwhile(fun(X) -> X =/= $. end, RevBase) of + [$. | Rest] -> Rest; + [] -> RevBase + end, + list_to_atom(lists:reverse(RevBase2)) + end, + + S#state{source = archive, + mode = run, + module = Mod, + forms_or_bin = Bin}; + {ok, _} -> + fatal("Illegal archive format"); + {error, Reason} -> + fatal(file:format_error(Reason)) + end. + + +parse_beam(S, File, HeaderSz, CheckOnly) -> + {ok, <<_FirstLine:HeaderSz/binary, Bin/binary>>} = + file:read_file(File), + case beam_lib:chunks(Bin, [exports]) of + {ok, {Module, [{exports, Exports}]}} -> + case CheckOnly of + true -> + case lists:member({main, 1}, Exports) of + true -> + my_halt(0); + false -> + Text = lists:concat(["Function ", Module, ":main/1 is not exported"]), + fatal(Text) + end; + false -> + S#state{source = beam, + mode = run, + module = Module, + forms_or_bin = Bin} + end; + {error, beam_lib, Reason} when is_tuple(Reason) -> + fatal(element(1, Reason)); + {error, beam_lib, Reason} -> + fatal(Reason) + end. + +parse_source(S, File, Fd, StartLine, HeaderSz, CheckOnly) -> + {PreDefMacros, Module} = pre_def_macros(File), + IncludePath = [], + {ok, _} = file:position(Fd, {bof, HeaderSz}), + case epp:open(File, Fd, StartLine, IncludePath, PreDefMacros) of + {ok, Epp} -> + {ok, FileForm} = epp:parse_erl_form(Epp), + OptModRes = epp:parse_erl_form(Epp), + S2 = S#state{source = text, module = Module}, + S3 = + case OptModRes of + {ok, {attribute,_, module, M} = Form} -> + epp_parse_file(Epp, S2#state{module = M}, [Form, FileForm]); + {ok, _} -> + ModForm = {attribute,1,module, Module}, + epp_parse_file2(Epp, S2, [ModForm, FileForm], OptModRes); + {error, _} -> + epp_parse_file2(Epp, S2, [FileForm], OptModRes); + {eof,LastLine} -> + S#state{forms_or_bin = [FileForm, {eof,LastLine}]} + end, + ok = epp:close(Epp), + ok = file:close(Fd), + check_source(S3, CheckOnly); + {error, Reason} -> + io:format("escript: ~p\n", [Reason]), + fatal("Preprocessor error") + end. + +check_source(S, CheckOnly) -> + case S of + #state{n_errors = Nerrs} when Nerrs =/= 0 -> + fatal("There were compilation errors."); + #state{exports_main = ExpMain, + has_records = HasRecs, + forms_or_bin = [FileForm2, ModForm2 | Forms]} -> + %% Optionally add export of main/1 + Forms2 = + case ExpMain of + false -> [{attribute,0,export, [{main,1}]} | Forms]; + true -> Forms + end, + Forms3 = [FileForm2, ModForm2 | Forms2], + case CheckOnly of + true -> + %% Optionally expand records + Forms4 = + case HasRecs of + false -> Forms3; + true -> erl_expand_records:module(Forms3, []) + end, + %% Strong validation and halt + case compile:forms(Forms4, [report,strong_validation]) of + {ok,_} -> + my_halt(0); + _Other -> + fatal("There were compilation errors.") + end; + false -> + %% Basic validation before execution + case erl_lint:module(Forms3) of + {ok,Ws} -> + report_warnings(Ws); + {error,Es,Ws} -> + report_errors(Es), + report_warnings(Ws), + fatal("There were compilation errors.") + end, + %% Optionally expand records + Forms4 = + case HasRecs of + false -> Forms3; + true -> erl_expand_records:module(Forms3, []) + end, + S#state{forms_or_bin = Forms4} + end + end. + +pre_def_macros(File) -> + {MegaSecs, Secs, MicroSecs} = erlang:now(), + Replace = fun(Char) -> + case Char of + $\. -> $\_; + _ -> Char + end + end, + CleanBase = lists:map(Replace, filename:basename(File)), + ModuleStr = + CleanBase ++ "__" ++ + "escript__" ++ + integer_to_list(MegaSecs) ++ "__" ++ + integer_to_list(Secs) ++ "__" ++ + integer_to_list(MicroSecs), + Module = list_to_atom(ModuleStr), + PreDefMacros = [{'MODULE', Module, redefine}, + {'MODULE_STRING', ModuleStr, redefine}], + {PreDefMacros, Module}. + +epp_parse_file(Epp, S, Forms) -> + Parsed = epp:parse_erl_form(Epp), + epp_parse_file2(Epp, S, Forms, Parsed). + +epp_parse_file2(Epp, S, Forms, Parsed) -> + %% io:format("~p\n", [Parsed]), + case Parsed of + {ok, Form} -> + case Form of + {attribute,Ln,record,{Record,Fields}} -> + S2 = S#state{has_records = true}, + case epp:normalize_typed_record_fields(Fields) of + {typed, NewFields} -> + epp_parse_file(Epp, S2, + [{attribute, Ln, record, {Record, NewFields}}, + {attribute, Ln, type, + {{record, Record}, Fields, []}} | Forms]); + not_typed -> + epp_parse_file(Epp, S2, [Form | Forms]) + end; + {attribute,Ln,mode,NewMode} -> + S2 = S#state{mode = NewMode}, + if + NewMode =:= compile; NewMode =:= interpret; NewMode =:= debug -> + epp_parse_file(Epp, S2, [Form | Forms]); + true -> + Args = lists:flatten(io_lib:format("illegal mode attribute: ~p", [NewMode])), + io:format("~s:~w ~s\n", [S#state.file,Ln,Args]), + Error = {error,{Ln,erl_parse,Args}}, + Nerrs= S#state.n_errors + 1, + epp_parse_file(Epp, S2#state{n_errors = Nerrs}, [Error | Forms]) + end; + {attribute,_,export,Fs} -> + case lists:member({main,1}, Fs) of + false -> + epp_parse_file(Epp, S, [Form | Forms]); + true -> + epp_parse_file(Epp, S#state{exports_main = true}, [Form | Forms]) + end; + _ -> + epp_parse_file(Epp, S, [Form | Forms]) + end; + {error,{Ln,Mod,Args}} = Form -> + io:format("~s:~w: ~s\n", + [S#state.file,Ln,Mod:format_error(Args)]), + epp_parse_file(Epp, S#state{n_errors = S#state.n_errors + 1}, [Form | Forms]); + {eof,LastLine} -> + S#state{forms_or_bin = lists:reverse([{eof, LastLine} | Forms])} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Evaluate script +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +debug(Module, AbsMod, Args) -> + case hidden_apply(debugger, debugger, start, []) of + {ok, _} -> + case hidden_apply(debugger, int, i, [AbsMod]) of + {module, _} -> + hidden_apply(debugger, debugger, auto_attach, [[init]]), + run(Module, Args); + error -> + Text = lists:concat(["Cannot load the code for ", Module, " into the debugger"]), + fatal(Text) + end; + _ -> + fatal("Cannot start the debugger") + end. + +run(Module, Args) -> + try + Module:main(Args), + my_halt(0) + catch + Class:Reason -> + fatal(format_exception(Class, Reason)) + end. + +interpret(Forms, File, Args) -> + Dict = parse_to_dict(Forms), + ArgsA = erl_parse:abstract(Args, 0), + Call = {call,0,{atom,0,main},[ArgsA]}, + try + erl_eval:expr(Call, + erl_eval:new_bindings(), + {value,fun(I, J) -> code_handler(I, J, Dict, File) end}), + my_halt(0) + catch + Class:Reason -> + fatal(format_exception(Class, Reason)) + end. + +report_errors(Errors) -> + lists:foreach(fun ({{F,_L},Eds}) -> list_errors(F, Eds); + ({F,Eds}) -> list_errors(F, Eds) end, + Errors). + +list_errors(F, [{Line,Mod,E}|Es]) -> + io:fwrite("~s:~w: ~s\n", [F,Line,Mod:format_error(E)]), + list_errors(F, Es); +list_errors(F, [{Mod,E}|Es]) -> + io:fwrite("~s: ~s\n", [F,Mod:format_error(E)]), + list_errors(F, Es); +list_errors(_F, []) -> ok. + +report_warnings(Ws0) -> + Ws1 = lists:flatmap(fun({{F,_L},Eds}) -> format_message(F, Eds); + ({F,Eds}) -> format_message(F, Eds) end, + Ws0), + Ws = ordsets:from_list(Ws1), + lists:foreach(fun({_,Str}) -> io:put_chars(Str) end, Ws). + +format_message(F, [{Line,Mod,E}|Es]) -> + M = {{F,Line},io_lib:format("~s:~w: Warning: ~s\n", [F,Line,Mod:format_error(E)])}, + [M|format_message(F, Es)]; +format_message(F, [{Mod,E}|Es]) -> + M = {none,io_lib:format("~s: Warning: ~s\n", [F,Mod:format_error(E)])}, + [M|format_message(F, Es)]; +format_message(_, []) -> []. + +parse_to_dict(L) -> parse_to_dict(L, dict:new()). + +parse_to_dict([{function,_,Name,Arity,Clauses}|T], Dict0) -> + Dict = dict:store({local, Name,Arity}, Clauses, Dict0), + parse_to_dict(T, Dict); +parse_to_dict([{attribute,_,import,{Mod,Funcs}}|T], Dict0) -> + Dict = lists:foldl(fun(I, D) -> + dict:store({remote,I}, Mod, D) + end, Dict0, Funcs), + parse_to_dict(T, Dict); +parse_to_dict([_|T], Dict) -> + parse_to_dict(T, Dict); +parse_to_dict([], Dict) -> + Dict. + +code_handler(local, [file], _, File) -> + File; +code_handler(Name, Args, Dict, File) -> + %%io:format("code handler=~p~n",[{Name, Args}]), + Arity = length(Args), + case dict:find({local,Name,Arity}, Dict) of + {ok, Cs} -> + LF = {value,fun(I, J) -> code_handler(I, J, Dict, File) end}, + case erl_eval:match_clause(Cs, Args,erl_eval:new_bindings(),LF) of + {Body, Bs} -> + eval_exprs(Body, Bs, LF, none, none); + nomatch -> + erlang:error({function_clause,[{local,Name,Args}]}) + end; + error -> + case dict:find({remote,{Name,Arity}}, Dict) of + {ok, Mod} -> + %% io:format("Calling:~p~n",[{Mod,Name,Args}]), + apply(Mod, Name, Args); + error -> + io:format("Script does not export ~w/~w\n", [Name,Arity]), + my_halt(127) + end + end. + +eval_exprs([E], Bs0, Lf, Ef, _RBs) -> + RBs1 = value, + erl_eval:expr(E, Bs0, Lf, Ef, RBs1); +eval_exprs([E|Es], Bs0, Lf, Ef, RBs) -> + RBs1 = none, + {value,_V,Bs} = erl_eval:expr(E, Bs0, Lf, Ef, RBs1), + eval_exprs(Es, Bs, Lf, Ef, RBs). + +format_exception(Class, Reason) -> + PF = fun(Term, I) -> + io_lib:format("~." ++ integer_to_list(I) ++ "P", [Term, 50]) + end, + StackTrace = erlang:get_stacktrace(), + StackFun = fun(M, _F, _A) -> (M =:= erl_eval) or (M =:= ?MODULE) end, + lib:format_exception(1, Class, Reason, StackTrace, StackFun, PF). + +fatal(Str) -> + throw(Str). + +my_halt(Reason) -> + case process_info(group_leader(), status) of + {_,waiting} -> + %% Now all output data is down in the driver. + %% Give the driver some extra time before halting. + receive after 1 -> ok end, + halt(Reason); + _ -> + %% Probably still processing I/O requests. + erlang:yield(), + my_halt(Reason) + end. + +hidden_apply(App, M, F, Args) -> + try + apply(fun() -> M end(), F, Args) + catch + error:undef -> + case erlang:get_stacktrace() of + [{M,F,Args} | _] -> + Arity = length(Args), + Text = io_lib:format("Call to ~w:~w/~w in application ~w failed.\n", + [M, F, Arity, App]), + fatal(Text); + Stk -> + erlang:raise(error, undef, Stk) + end + end. diff --git a/lib/stdlib/src/ets.erl b/lib/stdlib/src/ets.erl new file mode 100644 index 0000000000..9f84e3639f --- /dev/null +++ b/lib/stdlib/src/ets.erl @@ -0,0 +1,1269 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(ets). + +%% Interface to the Term store BIF's +%% ets == Erlang Term Store + +-export([file2tab/1, + file2tab/2, + filter/3, + foldl/3, foldr/3, + match_delete/2, + tab2file/2, + tab2file/3, + tabfile_info/1, + from_dets/2, + to_dets/2, + init_table/2, + test_ms/2, + tab2list/1, + table/1, + table/2, + fun2ms/1, + match_spec_run/2, + repair_continuation/2]). + +-export([i/0, i/1, i/2, i/3]). + +%%------------------------------------------------------------------------------ + +-type tab() :: atom() | tid(). + +-type ext_info() :: 'md5sum' | 'object_count'. +-type protection() :: 'private' | 'protected' | 'public'. +-type type() :: 'bag' | 'duplicate_bag' | 'ordered_set' | 'set'. + +-type table_info() :: {'name', atom()} + | {'type', type()} + | {'protection', protection()} + | {'named_table', boolean()} + | {'keypos', non_neg_integer()} + | {'size', non_neg_integer()} + | {'extended_info', [ext_info()]} + | {'version', {non_neg_integer(), non_neg_integer()}}. + +%% these ones are also defined in erl_bif_types +-type match_pattern() :: atom() | tuple(). +-type match_specs() :: [{match_pattern(), [_], [_]}]. + +%%------------------------------------------------------------------------------ + +%% The following functions used to be found in this module, but +%% are now BIFs (i.e. implemented in C). +%% +%% all/0 +%% new/2 +%% delete/1 +%% delete/2 +%% first/1 +%% info/1 +%% info/2 +%% safe_fixtable/2 +%% lookup/2 +%% lookup_element/3 +%% insert/2 +%% is_compiled_ms/1 +%% last/1 +%% next/2 +%% prev/2 +%% rename/2 +%% slot/2 +%% match/1 +%% match/2 +%% match/3 +%% match_object/1 +%% match_object/2 +%% match_object/3 +%% match_spec_compile/1 +%% match_spec_run_r/3 +%% select/1 +%% select/2 +%% select/3 +%% select_reverse/1 +%% select_reverse/2 +%% select_reverse/3 +%% select_delete/2 +%% update_counter/3 +%% + +-opaque comp_match_spec() :: any(). %% this one is REALLY opaque + +-spec match_spec_run([tuple()], comp_match_spec()) -> [term()]. + +match_spec_run(List, CompiledMS) -> + lists:reverse(ets:match_spec_run_r(List, CompiledMS, [])). + +-type continuation() :: '$end_of_table' + | {tab(),integer(),integer(),binary(),list(),integer()} + | {tab(),_,_,integer(),binary(),list(),integer(),integer()}. + +-spec repair_continuation(continuation(), match_specs()) -> continuation(). + +%% $end_of_table is an allowed continuation in ets... +repair_continuation('$end_of_table', _) -> + '$end_of_table'; +%% ordered_set +repair_continuation(Untouched = {Table,Lastkey,EndCondition,N2,Bin,L2,N3,N4}, MS) + when %% (is_atom(Table) or is_integer(Table)), + is_integer(N2), + byte_size(Bin) =:= 0, + is_list(L2), + is_integer(N3), + is_integer(N4) -> + case ets:is_compiled_ms(Bin) of + true -> + Untouched; + false -> + {Table,Lastkey,EndCondition,N2,ets:match_spec_compile(MS),L2,N3,N4} + end; +%% set/bag/duplicate_bag +repair_continuation(Untouched = {Table,N1,N2,Bin,L,N3}, MS) + when %% (is_atom(Table) or is_integer(Table)), + is_integer(N1), + is_integer(N2), + byte_size(Bin) =:= 0, + is_list(L), + is_integer(N3) -> + case ets:is_compiled_ms(Bin) of + true -> + Untouched; + false -> + {Table,N1,N2,ets:match_spec_compile(MS),L,N3} + end. + +-spec fun2ms(function()) -> match_specs(). + +fun2ms(ShellFun) when is_function(ShellFun) -> + %% Check that this is really a shell fun... + case erl_eval:fun_data(ShellFun) of + {fun_data,ImportList,Clauses} -> + case ms_transform:transform_from_shell( + ?MODULE,Clauses,ImportList) of + {error,[{_,[{_,_,Code}|_]}|_],_} -> + io:format("Error: ~s~n", + [ms_transform:format_error(Code)]), + {error,transform_error}; + Else -> + Else + end; + false -> + exit({badarg,{?MODULE,fun2ms, + [function,called,with,real,'fun', + should,be,transformed,with, + parse_transform,'or',called,with, + a,'fun',generated,in,the, + shell]}}) + end. + +-spec foldl(fun((_, term()) -> term()), term(), tab()) -> term(). + +foldl(F, Accu, T) -> + ets:safe_fixtable(T, true), + First = ets:first(T), + try + do_foldl(F, Accu, First, T) + after + ets:safe_fixtable(T, false) + end. + +do_foldl(F, Accu0, Key, T) -> + case Key of + '$end_of_table' -> + Accu0; + _ -> + do_foldl(F, + lists:foldl(F, Accu0, ets:lookup(T, Key)), + ets:next(T, Key), T) + end. + +-spec foldr(fun((_, term()) -> term()), term(), tab()) -> term(). + +foldr(F, Accu, T) -> + ets:safe_fixtable(T, true), + Last = ets:last(T), + try + do_foldr(F, Accu, Last, T) + after + ets:safe_fixtable(T, false) + end. + +do_foldr(F, Accu0, Key, T) -> + case Key of + '$end_of_table' -> + Accu0; + _ -> + do_foldr(F, + lists:foldr(F, Accu0, ets:lookup(T, Key)), + ets:prev(T, Key), T) + end. + +-spec from_dets(tab(), dets:tab_name()) -> 'true'. + +from_dets(EtsTable, DetsTable) -> + case (catch dets:to_ets(DetsTable, EtsTable)) of + {error, Reason} -> + erlang:error(Reason, [EtsTable,DetsTable]); + {'EXIT', {Reason1, _Stack1}} -> + erlang:error(Reason1,[EtsTable,DetsTable]); + {'EXIT', EReason} -> + erlang:error(EReason,[EtsTable,DetsTable]); + EtsTable -> + true; + Unexpected -> %% Dets bug? + erlang:error(Unexpected,[EtsTable,DetsTable]) + end. + +-spec to_dets(tab(), dets:tab_name()) -> tab(). + +to_dets(EtsTable, DetsTable) -> + case (catch dets:from_ets(DetsTable, EtsTable)) of + {error, Reason} -> + erlang:error(Reason, [EtsTable,DetsTable]); + {'EXIT', {Reason1, _Stack1}} -> + erlang:error(Reason1,[EtsTable,DetsTable]); + {'EXIT', EReason} -> + erlang:error(EReason,[EtsTable,DetsTable]); + ok -> + DetsTable; + Unexpected -> %% Dets bug? + erlang:error(Unexpected,[EtsTable,DetsTable]) + end. + +-spec test_ms(tuple(), match_specs()) -> + {'ok', term()} | {'error', [{'warning'|'error', string()}]}. + +test_ms(Term, MS) -> + case erlang:match_spec_test(Term, MS, table) of + {ok, Result, _Flags, _Messages} -> + {ok, Result}; + {error, _Errors} = Error -> + Error + end. + +-spec init_table(tab(), fun(('read' | 'close') -> term())) -> 'true'. + +init_table(Table, Fun) -> + ets:delete_all_objects(Table), + init_table_continue(Table, Fun(read)). + +init_table_continue(_Table, end_of_input) -> + true; +init_table_continue(Table, {List, Fun}) when is_list(List), is_function(Fun) -> + case (catch init_table_sub(Table, List)) of + {'EXIT', Reason} -> + (catch Fun(close)), + exit(Reason); + true -> + init_table_continue(Table, Fun(read)) + end; +init_table_continue(_Table, Error) -> + exit(Error). + +init_table_sub(_Table, []) -> + true; +init_table_sub(Table, [H|T]) -> + ets:insert(Table, H), + init_table_sub(Table, T). + +-spec match_delete(tab(), match_pattern()) -> 'true'. + +match_delete(Table, Pattern) -> + ets:select_delete(Table, [{Pattern,[],[true]}]), + true. + +%% Produce a list of tuples from a table + +-spec tab2list(tab()) -> [tuple()]. + +tab2list(T) -> + ets:match_object(T, '_'). + +-spec filter(tab(), function(), [term()]) -> [term()]. + +filter(Tn, F, A) when is_atom(Tn) ; is_integer(Tn) -> + do_filter(Tn, ets:first(Tn), F, A, []). + +do_filter(_Tab, '$end_of_table', _, _, Ack) -> + Ack; +do_filter(Tab, Key, F, A, Ack) -> + case apply(F, [ets:lookup(Tab, Key)|A]) of + false -> + do_filter(Tab, ets:next(Tab, Key), F, A, Ack); + true -> + Ack2 = ets:lookup(Tab, Key) ++ Ack, + do_filter(Tab, ets:next(Tab, Key), F, A, Ack2); + {true, Value} -> + do_filter(Tab, ets:next(Tab, Key), F, A, [Value|Ack]) + end. + + +%% Dump a table to a file using the disk_log facility + +%% Options := [Option] +%% Option := {extended_info,[ExtInfo]} +%% ExtInfo := object_count | md5sum + +-define(MAJOR_F2T_VERSION,1). +-define(MINOR_F2T_VERSION,0). + +-record(filetab_options, + { + object_count = false :: boolean(), + md5sum = false :: boolean() + }). + +-type fname() :: string() | atom(). +-type t2f_option() :: {'extended_info', [ext_info()]}. + +-spec tab2file(tab(), fname()) -> 'ok' | {'error', term()}. + +tab2file(Tab, File) -> + tab2file(Tab, File, []). + +-spec tab2file(tab(), fname(), [t2f_option()]) -> 'ok' | {'error', term()}. + +tab2file(Tab, File, Options) -> + try + {ok, FtOptions} = parse_ft_options(Options), + file:delete(File), + case file:read_file_info(File) of + {error, enoent} -> ok; + _ -> throw(eaccess) + end, + Name = make_ref(), + case disk_log:open([{name, Name}, {file, File}]) of + {ok, Name} -> + ok; + {error, Reason} -> + throw(Reason) + end, + try + Info0 = case ets:info(Tab) of + undefined -> + %% erlang:error(badarg, [Tab, File, Options]); + throw(badtab); + I -> + I + end, + Info = [list_to_tuple(Info0 ++ + [{major_version,?MAJOR_F2T_VERSION}, + {minor_version,?MINOR_F2T_VERSION}, + {extended_info, + ft_options_to_list(FtOptions)}])], + {LogFun, InitState} = + case FtOptions#filetab_options.md5sum of + true -> + {fun(Oldstate,Termlist) -> + {NewState,BinList} = + md5terms(Oldstate,Termlist), + disk_log:blog_terms(Name,BinList), + NewState + end, + erlang:md5_init()}; + false -> + {fun(_,Termlist) -> + disk_log:log_terms(Name,Termlist), + true + end, + true} + end, + ets:safe_fixtable(Tab,true), + {NewState1,Num} = try + NewState = LogFun(InitState,Info), + dump_file( + ets:select(Tab,[{'_',[],['$_']}],100), + LogFun, NewState, 0) + after + (catch ets:safe_fixtable(Tab,false)) + end, + EndInfo = + case FtOptions#filetab_options.object_count of + true -> + [{count,Num}]; + false -> + [] + end ++ + case FtOptions#filetab_options.md5sum of + true -> + [{md5,erlang:md5_final(NewState1)}]; + false -> + [] + end, + case EndInfo of + [] -> + ok; + List -> + LogFun(NewState1,[['$end_of_table',List]]) + end, + disk_log:close(Name) + catch + throw:TReason -> + disk_log:close(Name), + file:delete(File), + throw(TReason); + exit:ExReason -> + disk_log:close(Name), + file:delete(File), + exit(ExReason); + error:ErReason -> + disk_log:close(Name), + file:delete(File), + erlang:raise(error,ErReason,erlang:get_stacktrace()) + end + catch + throw:TReason2 -> + {error,TReason2}; + exit:ExReason2 -> + {error,ExReason2} + end. + +dump_file('$end_of_table', _LogFun, State, Num) -> + {State,Num}; +dump_file({Terms, Context}, LogFun, State, Num) -> + Count = length(Terms), + NewState = LogFun(State, Terms), + dump_file(ets:select(Context), LogFun, NewState, Num + Count). + +ft_options_to_list(#filetab_options{md5sum = MD5, object_count = PS}) -> + case PS of + true -> + [object_count]; + _ -> + [] + end ++ + case MD5 of + true -> + [md5sum]; + _ -> + [] + end. + +md5terms(State, []) -> + {State, []}; +md5terms(State, [H|T]) -> + B = term_to_binary(H), + NewState = erlang:md5_update(State, B), + {FinState, TL} = md5terms(NewState, T), + {FinState, [B|TL]}. + +parse_ft_options(Options) when is_list(Options) -> + {Opt,Rest} = case (catch lists:keytake(extended_info,1,Options)) of + false -> + {[],Options}; + {value,{extended_info,L},R} when is_list(L) -> + {L,R} + end, + case Rest of + [] -> + parse_ft_info_options(#filetab_options{}, Opt); + Other -> + throw({unknown_option, Other}) + end; +parse_ft_options(Malformed) -> + throw({malformed_option, Malformed}). + +parse_ft_info_options(FtOpt,[]) -> + {ok,FtOpt}; +parse_ft_info_options(FtOpt,[object_count | T]) -> + parse_ft_info_options(FtOpt#filetab_options{object_count = true}, T); +parse_ft_info_options(FtOpt,[md5sum | T]) -> + parse_ft_info_options(FtOpt#filetab_options{md5sum = true}, T); +parse_ft_info_options(_,[Unexpected | _]) -> + throw({unknown_option,[{extended_info,[Unexpected]}]}); +parse_ft_info_options(_,Malformed) -> + throw({malformed_option,Malformed}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Read a dumped file from disk and create a corresponding table +%% Opts := [Opt] +%% Opt := {verify,boolean()} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-type f2t_option() :: {'verify', boolean()}. + +-spec file2tab(fname()) -> {'ok', tab()} | {'error', term()}. + +file2tab(File) -> + file2tab(File, []). + +-spec file2tab(fname(), [f2t_option()]) -> {'ok', tab()} | {'error', term()}. + +file2tab(File, Opts) -> + try + {ok,Verify} = parse_f2t_opts(Opts,false), + Name = make_ref(), + {ok, Major, Minor, FtOptions, MD5State, FullHeader, DLContext} = + case disk_log:open([{name, Name}, + {file, File}, + {mode, read_only}]) of + {ok, Name} -> + get_header_data(Name,Verify); + {repaired, Name, _,_} -> %Uh? cannot happen? + case Verify of + true -> + disk_log:close(Name), + throw(badfile); + false -> + get_header_data(Name,Verify) + end; + {error, Other1} -> + throw({read_error, Other1}); + Other2 -> + throw(Other2) + end, + try + if + Major > ?MAJOR_F2T_VERSION -> + throw({unsupported_file_version,{Major,Minor}}); + true -> + ok + end, + {ok, Tab, HeadCount} = create_tab(FullHeader), + StrippedOptions = + case Verify of + true -> + FtOptions; + false -> + #filetab_options{} + end, + {ReadFun,InitState} = + case StrippedOptions#filetab_options.md5sum of + true -> + {fun({OldMD5State,OldCount,_OL,ODLContext} = OS) -> + case wrap_bchunk(Name,ODLContext,100,Verify) of + eof -> + {OS,[]}; + {NDLContext,Blist} -> + {Termlist, NewMD5State, + NewCount,NewLast} = + md5_and_convert(Blist, + OldMD5State, + OldCount), + {{NewMD5State, NewCount, + NewLast,NDLContext}, + Termlist} + end + end, + {MD5State,0,[],DLContext}}; + false -> + {fun({_,OldCount,_OL,ODLContext} = OS) -> + case wrap_chunk(Name,ODLContext,100,Verify) of + eof -> + {OS,[]}; + {NDLContext,List} -> + {NewLast,NewCount,NewList} = + scan_for_endinfo(List, OldCount), + {{false,NewCount,NewLast,NDLContext}, + NewList} + end + end, + {false,0,[],DLContext}} + end, + try + do_read_and_verify(ReadFun,InitState,Tab, + StrippedOptions,HeadCount,Verify) + catch + throw:TReason -> + ets:delete(Tab), + throw(TReason); + exit:ExReason -> + ets:delete(Tab), + exit(ExReason); + error:ErReason -> + ets:delete(Tab), + erlang:raise(error,ErReason,erlang:get_stacktrace()) + end + after + disk_log:close(Name) + end + catch + throw:TReason2 -> + {error,TReason2}; + exit:ExReason2 -> + {error,ExReason2} + end. + +do_read_and_verify(ReadFun,InitState,Tab,FtOptions,HeadCount,Verify) -> + case load_table(ReadFun,InitState,Tab) of + {ok,{_,FinalCount,[],_}} -> + case {FtOptions#filetab_options.md5sum, + FtOptions#filetab_options.object_count} of + {false,false} -> + case Verify of + false -> + ok; + true -> + case FinalCount of + HeadCount -> + ok; + _ -> + throw(invalid_object_count) + end + end; + _ -> + throw(badfile) + end, + {ok,Tab}; + {ok,{FinalMD5State,FinalCount,['$end_of_table',LastInfo],_}} -> + ECount = case lists:keysearch(count,1,LastInfo) of + {value,{count,N}} -> + N; + _ -> + false + end, + EMD5 = case lists:keysearch(md5,1,LastInfo) of + {value,{md5,M}} -> + M; + _ -> + false + end, + case FtOptions#filetab_options.md5sum of + true -> + case erlang:md5_final(FinalMD5State) of + EMD5 -> + ok; + _MD5MisM -> + throw(checksum_error) + end; + false -> + ok + end, + case FtOptions#filetab_options.object_count of + true -> + case FinalCount of + ECount -> + ok; + _Other -> + throw(invalid_object_count) + end; + false -> + %% Only use header count if no extended info + %% at all is present and verification is requested. + case {Verify,FtOptions#filetab_options.md5sum} of + {true,false} -> + case FinalCount of + HeadCount -> + ok; + _Other2 -> + throw(invalid_object_count) + end; + _ -> + ok + end + end, + {ok,Tab} + end. + +parse_f2t_opts([],Verify) -> + {ok,Verify}; +parse_f2t_opts([{verify, true}|T],_OV) -> + parse_f2t_opts(T,true); +parse_f2t_opts([{verify,false}|T],OV) -> + parse_f2t_opts(T,OV); +parse_f2t_opts([Unexpected|_],_) -> + throw({unknown_option,Unexpected}); +parse_f2t_opts(Malformed,_) -> + throw({malformed_option,Malformed}). + +count_mandatory([]) -> + 0; +count_mandatory([{Tag,_}|T]) when Tag =:= name; + Tag =:= type; + Tag =:= protection; + Tag =:= named_table; + Tag =:= keypos; + Tag =:= size -> + 1+count_mandatory(T); +count_mandatory([_|T]) -> + count_mandatory(T). + +verify_header_mandatory(L) -> + count_mandatory(L) =:= 6. + +wrap_bchunk(Name,C,N,true) -> + case disk_log:bchunk(Name,C,N) of + {_,_,X} when X > 0 -> + throw(badfile); + {NC,Bin,_} -> + {NC,Bin}; + Y -> + Y + end; +wrap_bchunk(Name,C,N,false) -> + case disk_log:bchunk(Name,C,N) of + {NC,Bin,_} -> + {NC,Bin}; + Y -> + Y + end. + +wrap_chunk(Name,C,N,true) -> + case disk_log:chunk(Name,C,N) of + {_,_,X} when X > 0 -> + throw(badfile); + {NC,TL,_} -> + {NC,TL}; + Y -> + Y + end; +wrap_chunk(Name,C,N,false) -> + case disk_log:chunk(Name,C,N) of + {NC,TL,_} -> + {NC,TL}; + Y -> + Y + end. + +get_header_data(Name,true) -> + case wrap_bchunk(Name,start,1,true) of + {C,[Bin]} when is_binary(Bin) -> + T = binary_to_term(Bin), + case T of + Tup when is_tuple(Tup) -> + L = tuple_to_list(Tup), + case verify_header_mandatory(L) of + false -> + throw(badfile); + true -> + Major = case lists:keysearch(major,1,L) of + {value,{major,Maj}} -> + Maj; + _ -> + 0 + end, + Minor = case lists:keysearch(minor,1,L) of + {value,{minor,Min}} -> + Min; + _ -> + 0 + end, + FtOptions = + case lists:keysearch(extended_info,1,L) of + {value,{extended_info,I}} + when is_list(I) -> + #filetab_options + { + object_count = + lists:member(object_count,I), + md5sum = + lists:member(md5sum,I) + }; + _ -> + #filetab_options{} + end, + MD5Initial = + case FtOptions#filetab_options.md5sum of + true -> + X = erlang:md5_init(), + erlang:md5_update(X,Bin); + false -> + false + end, + {ok, Major, Minor, FtOptions, MD5Initial, L, C} + end; + _X -> + throw(badfile) + end; + _Y -> + throw(badfile) + end; + +get_header_data(Name, false) -> + case wrap_chunk(Name,start,1,false) of + {C,[Tup]} when is_tuple(Tup) -> + L = tuple_to_list(Tup), + case verify_header_mandatory(L) of + false -> + throw(badfile); + true -> + Major = case lists:keysearch(major_version,1,L) of + {value,{major_version,Maj}} -> + Maj; + _ -> + 0 + end, + Minor = case lists:keysearch(minor_version,1,L) of + {value,{minor_version,Min}} -> + Min; + _ -> + 0 + end, + FtOptions = + case lists:keysearch(extended_info,1,L) of + {value,{extended_info,I}} + when is_list(I) -> + #filetab_options + { + object_count = + lists:member(object_count,I), + md5sum = + lists:member(md5sum,I) + }; + _ -> + #filetab_options{} + end, + {ok, Major, Minor, FtOptions, false, L, C} + end; + _ -> + throw(badfile) + end. + +md5_and_convert([],MD5State,Count) -> + {[],MD5State,Count,[]}; +md5_and_convert([H|T],MD5State,Count) when is_binary(H) -> + case (catch binary_to_term(H)) of + {'EXIT', _} -> + md5_and_convert(T,MD5State,Count); + ['$end_of_table',Dat] -> + {[],MD5State,Count,['$end_of_table',Dat]}; + Term -> + X = erlang:md5_update(MD5State,H), + {Rest,NewMD5,NewCount,NewLast} = md5_and_convert(T,X,Count+1), + {[Term | Rest],NewMD5,NewCount,NewLast} + end. +scan_for_endinfo([],Count) -> + {[],Count,[]}; +scan_for_endinfo([['$end_of_table',Dat]],Count) -> + {['$end_of_table',Dat],Count,[]}; +scan_for_endinfo([Term|T],Count) -> + {NewLast,NCount,Rest} = scan_for_endinfo(T,Count+1), + {NewLast,NCount,[Term | Rest]}. + +load_table(ReadFun, State, Tab) -> + {NewState,NewData} = ReadFun(State), + case NewData of + [] -> + {ok,NewState}; + List -> + ets:insert(Tab,List), + load_table(ReadFun,NewState,Tab) + end. + +create_tab(I) -> + {value, {name, Name}} = lists:keysearch(name, 1, I), + {value, {type, Type}} = lists:keysearch(type, 1, I), + {value, {protection, P}} = lists:keysearch(protection, 1, I), + {value, {named_table, Val}} = lists:keysearch(named_table, 1, I), + {value, {keypos, Kp}} = lists:keysearch(keypos, 1, I), + {value, {size, Sz}} = lists:keysearch(size, 1, I), + try + Tab = ets:new(Name, [Type, P, {keypos, Kp} | named_table(Val)]), + {ok, Tab, Sz} + catch + _:_ -> + throw(cannot_create_table) + end. + +named_table(true) -> [named_table]; +named_table(false) -> []. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% tabfile_info/1 reads the head information in an ets table dumped to +%% disk by means of file2tab and returns a list of the relevant table +%% information +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec tabfile_info(fname()) -> {'ok', [table_info()]} | {'error', term()}. + +tabfile_info(File) when is_list(File) ; is_atom(File) -> + try + Name = make_ref(), + {ok, Major, Minor, _FtOptions, _MD5State, FullHeader, _DLContext} = + case disk_log:open([{name, Name}, + {file, File}, + {mode, read_only}]) of + {ok, Name} -> + get_header_data(Name,false); + {repaired, Name, _,_} -> %Uh? cannot happen? + get_header_data(Name,false); + {error, Other1} -> + throw({read_error, Other1}); + Other2 -> + throw(Other2) + end, + disk_log:close(Name), + {value, N} = lists:keysearch(name, 1, FullHeader), + {value, Type} = lists:keysearch(type, 1, FullHeader), + {value, P} = lists:keysearch(protection, 1, FullHeader), + {value, Val} = lists:keysearch(named_table, 1, FullHeader), + {value, Kp} = lists:keysearch(keypos, 1, FullHeader), + {value, Sz} = lists:keysearch(size, 1, FullHeader), + Ei = case lists:keysearch(extended_info, 1, FullHeader) of + {value, Ei0} -> Ei0; + _ -> {extended_info, []} + end, + {ok, [N,Type,P,Val,Kp,Sz,Ei,{version,{Major,Minor}}]} + catch + throw:TReason -> + {error,TReason}; + exit:ExReason -> + {error,ExReason} + end. + +-type qlc__query_handle() :: term(). %% XXX: belongs in 'qlc' + +-type num_objects() :: 'default' | pos_integer(). +-type trav_method() :: 'first_next' | 'last_prev' + | 'select' | {'select', match_specs()}. +-type table_option() :: {'n_objects', num_objects()} + | {'traverse', trav_method()}. + +-spec table(tab()) -> qlc__query_handle(). + +table(Tab) -> + table(Tab, []). + +-spec table(tab(), table_option() | [table_option()]) -> qlc__query_handle(). + +table(Tab, Opts) -> + case options(Opts, [traverse, n_objects]) of + {badarg,_} -> + erlang:error(badarg, [Tab, Opts]); + [[Traverse, NObjs], QlcOptions] -> + TF = case Traverse of + first_next -> + fun() -> qlc_next(Tab, ets:first(Tab)) end; + last_prev -> + fun() -> qlc_prev(Tab, ets:last(Tab)) end; + select -> + fun(MS) -> qlc_select(ets:select(Tab, MS, NObjs)) end; + {select, MS} -> + fun() -> qlc_select(ets:select(Tab, MS, NObjs)) end + end, + PreFun = fun(_) -> ets:safe_fixtable(Tab, true) end, + PostFun = fun() -> ets:safe_fixtable(Tab, false) end, + InfoFun = fun(Tag) -> table_info(Tab, Tag) end, + KeyEquality = case ets:info(Tab, type) of + ordered_set -> '=='; + _ -> '=:=' + end, + LookupFun = + case Traverse of + {select, _MS} -> + undefined; + _ -> + fun(_Pos, [K]) -> + ets:lookup(Tab, K); + (_Pos, Ks) -> + lists:flatmap(fun(K) -> ets:lookup(Tab, K) + end, Ks) + end + end, + FormatFun = + fun({all, _NElements, _ElementFun}) -> + As = [Tab | [Opts || _ <- [[]], Opts =/= []]], + {?MODULE, table, As}; + ({match_spec, MS}) -> + {?MODULE, table, + [Tab, [{traverse, {select, MS}} | + listify(Opts)]]}; + ({lookup, _KeyPos, [Value], _NElements, ElementFun}) -> + io_lib:format("~w:lookup(~w, ~w)", + [?MODULE, Tab, ElementFun(Value)]); + ({lookup, _KeyPos, Values, _NElements, ElementFun}) -> + Vals = [ElementFun(V) || V <- Values], + io_lib:format("lists:flatmap(fun(V) -> " + "~w:lookup(~w, V) end, ~w)", + [?MODULE, Tab, Vals]) + end, + qlc:table(TF, [{pre_fun, PreFun}, {post_fun, PostFun}, + {info_fun, InfoFun}, {format_fun, FormatFun}, + {key_equality, KeyEquality}, + {lookup_fun, LookupFun}] ++ QlcOptions) + end. + +table_info(Tab, num_of_objects) -> + ets:info(Tab, size); +table_info(Tab, keypos) -> + ets:info(Tab, keypos); +table_info(Tab, is_unique_objects) -> + ets:info(Tab, type) =/= duplicate_bag; +table_info(Tab, is_sorted_key) -> + ets:info(Tab, type) =:= ordered_set; +table_info(_Tab, _) -> + undefined. + +qlc_next(_Tab, '$end_of_table') -> + []; +qlc_next(Tab, Key) -> + ets:lookup(Tab, Key) ++ fun() -> qlc_next(Tab, ets:next(Tab, Key)) end. + +qlc_prev(_Tab, '$end_of_table') -> + []; +qlc_prev(Tab, Key) -> + ets:lookup(Tab, Key) ++ fun() -> qlc_prev(Tab, ets:prev(Tab, Key)) end. + +qlc_select('$end_of_table') -> + []; +qlc_select({Objects, Cont}) -> + Objects ++ fun() -> qlc_select(ets:select(Cont)) end. + +options(Options, Keys) when is_list(Options) -> + options(Options, Keys, []); +options(Option, Keys) -> + options([Option], Keys, []). + +options(Options, [Key | Keys], L) when is_list(Options) -> + V = case lists:keysearch(Key, 1, Options) of + {value, {n_objects, default}} -> + {ok, default_option(Key)}; + {value, {n_objects, NObjs}} when is_integer(NObjs), + NObjs >= 1 -> + {ok, NObjs}; + {value, {traverse, select}} -> + {ok, select}; + {value, {traverse, {select, MS}}} -> + {ok, {select, MS}}; + {value, {traverse, first_next}} -> + {ok, first_next}; + {value, {traverse, last_prev}} -> + {ok, last_prev}; + {value, {Key, _}} -> + badarg; + false -> + Default = default_option(Key), + {ok, Default} + end, + case V of + badarg -> + {badarg, Key}; + {ok,Value} -> + NewOptions = lists:keydelete(Key, 1, Options), + options(NewOptions, Keys, [Value | L]) + end; +options(Options, [], L) -> + [lists:reverse(L), Options]. + +default_option(traverse) -> select; +default_option(n_objects) -> 100. + +listify(L) when is_list(L) -> + L; +listify(T) -> + [T]. + +%% End of table/2. + +%% Print info about all tabs on the tty +-spec i() -> 'ok'. + +i() -> + hform('id', 'name', 'type', 'size', 'mem', 'owner'), + io:format(" -------------------------------------" + "---------------------------------------\n"), + lists:foreach(fun prinfo/1, tabs()), + ok. + +tabs() -> + lists:sort(ets:all()). + +prinfo(Tab) -> + case catch prinfo2(Tab) of + {'EXIT', _} -> + io:format("~-10s ... unreadable \n", [to_string(Tab)]); + ok -> + ok + end. +prinfo2(Tab) -> + Name = ets:info(Tab, name), + Type = ets:info(Tab, type), + Size = ets:info(Tab, size), + Mem = ets:info(Tab, memory), + Owner = ets:info(Tab, owner), + hform(Tab, Name, Type, Size, Mem, is_reg(Owner)). + +is_reg(Owner) -> + case process_info(Owner, registered_name) of + {registered_name, Name} -> Name; + _ -> Owner + end. + +%%% Arndt: this code used to truncate over-sized fields. Now it +%%% pushes the remaining entries to the right instead, rather than +%%% losing information. +hform(A0, B0, C0, D0, E0, F0) -> + [A,B,C,D,E,F] = [to_string(T) || T <- [A0,B0,C0,D0,E0,F0]], + A1 = pad_right(A, 15), + B1 = pad_right(B, 17), + C1 = pad_right(C, 5), + D1 = pad_right(D, 6), + E1 = pad_right(E, 8), + %% no need to pad the last entry on the line + io:format(" ~s ~s ~s ~s ~s ~s\n", [A1,B1,C1,D1,E1,F]). + +pad_right(String, Len) -> + if + length(String) >= Len -> + String; + true -> + [Space] = " ", + String ++ lists:duplicate(Len - length(String), Space) + end. + +to_string(X) -> + lists:flatten(io_lib:format("~p", [X])). + +%% view a specific table +-spec i(tab()) -> 'ok'. + +i(Tab) -> + i(Tab, 40). + +-spec i(tab(), pos_integer()) -> 'ok'. + +i(Tab, Height) -> + i(Tab, Height, 80). + +-spec i(tab(), pos_integer(), pos_integer()) -> 'ok'. + +i(Tab, Height, Width) -> + First = ets:first(Tab), + display_items(Height, Width, Tab, First, 1, 1). + +display_items(Height, Width, Tab, '$end_of_table', Turn, Opos) -> + P = 'EOT (q)uit (p)Digits (k)ill /Regexp -->', + choice(Height, Width, P, eot, Tab, '$end_of_table', Turn, Opos); +display_items(Height, Width, Tab, Key, Turn, Opos) when Turn < Height -> + do_display(Height, Width, Tab, Key, Turn, Opos); +display_items(Height, Width, Tab, Key, Turn, Opos) when Turn >= Height -> + P = '(c)ontinue (q)uit (p)Digits (k)ill /Regexp -->', + choice(Height, Width, P, normal, Tab, Key, Turn, Opos). + +choice(Height, Width, P, Mode, Tab, Key, Turn, Opos) -> + case get_line(P, "c\n") of + "c\n" when Mode =:= normal -> + do_display(Height, Width, Tab, Key, 1, Opos); + "c\n" when is_tuple(Mode), element(1, Mode) =:= re -> + {re, Re} = Mode, + re_search(Height, Width, Tab, Key, Re, 1, Opos); + "q\n" -> + ok; + "k\n" -> + ets:delete(Tab), + ok; + [$p|Digs] -> + catch case catch list_to_integer(nonl(Digs)) of + {'EXIT', _} -> + io:put_chars("Bad digits\n"); + Number when Mode =:= normal -> + print_number(Tab, ets:first(Tab), Number); + Number when Mode =:= eot -> + print_number(Tab, ets:first(Tab), Number); + Number -> %% regexp + {re, Re} = Mode, + print_re_num(Tab, ets:first(Tab), Number, Re) + end, + choice(Height, Width, P, Mode, Tab, Key, Turn, Opos); + [$/|Regexp] -> %% from regexp + case re:compile(nonl(Regexp)) of + {ok,Re} -> + re_search(Height, Width, Tab, ets:first(Tab), Re, 1, 1); + {error,{ErrorString,_Pos}} -> + io:format("~s\n", [ErrorString]), + choice(Height, Width, P, Mode, Tab, Key, Turn, Opos) + end; + _ -> + choice(Height, Width, P, Mode, Tab, Key, Turn, Opos) + end. + +get_line(P, Default) -> + case io:get_line(P) of + "\n" -> + Default; + L -> + L + end. + +nonl(S) -> string:strip(S, right, $\n). + +print_number(Tab, Key, Num) -> + Os = ets:lookup(Tab, Key), + Len = length(Os), + if + (Num - Len) < 1 -> + O = lists:nth(Num, Os), + io:format("~p~n", [O]); %% use ppterm here instead + true -> + print_number(Tab, ets:next(Tab, Key), Num - Len) + end. + +do_display(Height, Width, Tab, Key, Turn, Opos) -> + Objs = ets:lookup(Tab, Key), + do_display_items(Height, Width, Objs, Opos), + Len = length(Objs), + display_items(Height, Width, Tab, ets:next(Tab, Key), Turn+Len, Opos+Len). + +do_display_items(Height, Width, [Obj|Tail], Opos) -> + do_display_item(Height, Width, Obj, Opos), + do_display_items(Height, Width, Tail, Opos+1); +do_display_items(_Height, _Width, [], Opos) -> + Opos. + +do_display_item(_Height, Width, I, Opos) -> + L = to_string(I), + L2 = if + length(L) > Width - 8 -> + string:substr(L, 1, Width-13) ++ " ..."; + true -> + L + end, + io:format("<~-4w> ~s~n", [Opos,L2]). + +re_search(Height, Width, Tab, '$end_of_table', Re, Turn, Opos) -> + P = 'EOT (q)uit (p)Digits (k)ill /Regexp -->', + choice(Height, Width, P, {re, Re}, Tab, '$end_of_table', Turn, Opos); +re_search(Height, Width, Tab, Key, Re, Turn, Opos) when Turn < Height -> + re_display(Height, Width, Tab, Key, ets:lookup(Tab, Key), Re, Turn, Opos); +re_search(Height, Width, Tab, Key, Re, Turn, Opos) -> + P = '(c)ontinue (q)uit (p)Digits (k)ill /Regexp -->', + choice(Height, Width, P, {re, Re}, Tab, Key, Turn, Opos). + +re_display(Height, Width, Tab, Key, [], Re, Turn, Opos) -> + re_search(Height, Width, Tab, ets:next(Tab, Key), Re, Turn, Opos); +re_display(Height, Width, Tab, Key, [H|T], Re, Turn, Opos) -> + Str = to_string(H), + case re:run(Str, Re, [{capture,none}]) of + match -> + do_display_item(Height, Width, H, Opos), + re_display(Height, Width, Tab, Key, T, Re, Turn+1, Opos+1); + nomatch -> + re_display(Height, Width, Tab, Key, T, Re, Turn, Opos) + end. + +print_re_num(_,'$end_of_table',_,_) -> ok; +print_re_num(Tab, Key, Num, Re) -> + Os = re_match(ets:lookup(Tab, Key), Re), + Len = length(Os), + if + (Num - Len) < 1 -> + O = lists:nth(Num, Os), + io:format("~p~n", [O]); %% use ppterm here instead + true -> + print_re_num(Tab, ets:next(Tab, Key), Num - Len, Re) + end. + +re_match([], _) -> []; +re_match([H|T], Re) -> + case re:run(to_string(H), Re, [{capture,none}]) of + match -> + [H|re_match(T,Re)]; + nomatch -> + re_match(T, Re) + end. diff --git a/lib/stdlib/src/eval_bits.erl b/lib/stdlib/src/eval_bits.erl new file mode 100644 index 0000000000..3671aecdcb --- /dev/null +++ b/lib/stdlib/src/eval_bits.erl @@ -0,0 +1,348 @@ +%% -*- erlang-indent-level: 4 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1999-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(eval_bits). + +-export([expr_grp/3,expr_grp/5,match_bits/6, + match_bits/7,bin_gen/6]). + +%% Types used in this module: +%% @type bindings(). An abstract structure for bindings between +%% variables and values (the environment) +%% +%% @type evalfun(). A closure which evaluates an expression given an +%% environment +%% +%% @type matchfun(). A closure which performs a match given a value, a +%% pattern and an environment +%% +%% @type field() represents a field in a "bin" + +%%% Part 1: expression evaluation (binary construction) + +%% @spec expr_grp(Fields::[field()], Bindings::bindings(), +%% EvalFun::evalfun()) -> +%% {value, binary(), bindings()} +%% +%% @doc Returns a tuple with {value,Bin,Bs} where Bin is the binary +%% constructed from form the Fields under the current Bindings. Bs +%% contains the present bindings. This function can also throw an +%% exception if the construction fails. + +expr_grp(Fields, Bindings, EvalFun, [], _) -> + expr_grp(Fields, Bindings, EvalFun, <<>>); +expr_grp(Fields, Bindings, EvalFun, ListOfBits, _) -> + Bin = convert_list(ListOfBits), + expr_grp(Fields, Bindings, EvalFun, Bin). + +convert_list(List) -> + << <> || X <- List >>. + +expr_grp(Fields, Bindings, EvalFun) -> + expr_grp(Fields, Bindings, EvalFun, <<>>). + +expr_grp([Field | FS], Bs0, Lf, Acc) -> + {Bin,Bs} = eval_field(Field, Bs0, Lf), + expr_grp(FS, Bs, Lf, <>); +expr_grp([], Bs0, _Lf, Acc) -> + {value,Acc,Bs0}. + +eval_field({bin_element, _, {string, _, S}, default, default}, Bs0, _Fun) -> + {list_to_binary(S),Bs0}; +eval_field({bin_element, Line, {string, _, S}, Size0, Options0}, Bs, _Fun) -> + {_Size,[Type,_Unit,_Sign,Endian]} = + make_bit_type(Line, Size0, Options0), + Res = << <<(eval_exp_field1(C, no_size, no_unit, + Type, Endian, no_sign))/binary>> || + C <- S >>, + {Res,Bs}; +eval_field({bin_element,Line,E,Size0,Options0}, Bs0, Fun) -> + {value,V,Bs1} = Fun(E, Bs0), + {Size1,[Type,{unit,Unit},Sign,Endian]} = + make_bit_type(Line, Size0, Options0), + {value,Size,Bs} = Fun(Size1, Bs1), + {eval_exp_field1(V, Size, Unit, Type, Endian, Sign),Bs}. + +eval_exp_field1(V, Size, Unit, Type, Endian, Sign) -> + try + eval_exp_field(V, Size, Unit, Type, Endian, Sign) + catch + error:system_limit -> + error(system_limit); + error:_ -> + error(badarg) + end. + +eval_exp_field(Val, Size, Unit, integer, little, signed) -> + <>; +eval_exp_field(Val, Size, Unit, integer, little, unsigned) -> + <>; +eval_exp_field(Val, Size, Unit, integer, native, signed) -> + <>; +eval_exp_field(Val, Size, Unit, integer, native, unsigned) -> + <>; +eval_exp_field(Val, Size, Unit, integer, big, signed) -> + <>; +eval_exp_field(Val, Size, Unit, integer, big, unsigned) -> + <>; +eval_exp_field(Val, _Size, _Unit, utf8, _, _) -> + <>; +eval_exp_field(Val, _Size, _Unit, utf16, big, _) -> + <>; +eval_exp_field(Val, _Size, _Unit, utf16, little, _) -> + <>; +eval_exp_field(Val, _Size, _Unit, utf32, big, _) -> + <>; +eval_exp_field(Val, _Size, _Unit, utf32, little, _) -> + <>; +eval_exp_field(Val, Size, Unit, float, little, _) -> + <>; +eval_exp_field(Val, Size, Unit, float, native, _) -> + <>; +eval_exp_field(Val, Size, Unit, float, big, _) -> + <>; +eval_exp_field(Val, all, Unit, binary, _, _) -> + case bit_size(Val) of + Size when Size rem Unit =:= 0 -> + <>; + _ -> + error(badarg) + end; +eval_exp_field(Val, Size, Unit, binary, _, _) -> + <>. + + +%%% Part 2: matching in binary comprehensions +%% @spec bin_gen(BinPattern::{bin,integer(),[field()]}, Bin::binary(), +%% GlobalEnv::bindings(), LocalEnv::bindings(), +%% MatchFun::matchfun(), EvalFun::evalfun()) -> +%% {match, binary(), bindings()} | {nomatch, binary()} | done +%% +%% @doc Used to perform matching in a comprehension. If the match +%% succeeds a new environment and what remains of the binary is +%% returned. If the match fails what remains of the binary is returned. +%% If nothing remains of the binary the atom 'done' is returned. + +bin_gen({bin,_,Fs}, Bin, Bs0, BBs0, Mfun, Efun) -> + bin_gen(Fs, Bin, Bs0, BBs0, Mfun, Efun, true). + +bin_gen([F|Fs], Bin, Bs0, BBs0, Mfun, Efun, Flag) -> + case bin_gen_field(F, Bin, Bs0, BBs0, Mfun, Efun) of + {match,Bs,BBs,Rest} -> + bin_gen(Fs, Rest, Bs, BBs, Mfun, Efun, Flag); + {nomatch,Rest} -> + bin_gen(Fs, Rest, Bs0, BBs0, Mfun, Efun, false); + done -> + done + end; +bin_gen([], Bin, Bs0, _BBs0, _Mfun, _Efun, true) -> + {match, Bin, Bs0}; +bin_gen([], Bin, _Bs0, _BBs0, _Mfun, _Efun, false) -> + {nomatch, Bin}. + +bin_gen_field({bin_element,_,{string,_,S},default,default}, + Bin, Bs, BBs, _Mfun, _Efun) -> + Bits = list_to_binary(S), + Size = byte_size(Bits), + case Bin of + <> -> + {match,Bs,BBs,Rest}; + <<_:Size/binary,Rest/bitstring>> -> + {nomatch,Rest}; + _ -> + done + end; +bin_gen_field({bin_element,Line,VE,Size0,Options0}, + Bin, Bs0, BBs0, Mfun, Efun) -> + {Size1, [Type,{unit,Unit},Sign,Endian]} = + make_bit_type(Line, Size0, Options0), + V = erl_eval:partial_eval(VE), + match_check_size(Size1, BBs0), + {value, Size, _BBs} = Efun(Size1, BBs0), + case catch get_value(Bin, Type, Size, Unit, Sign, Endian) of + {Val,<<_/bitstring>>=Rest} -> + NewV = coerce_to_float(V, Type), + case catch Mfun(NewV, Val, Bs0) of + {match,Bs} -> + BBs = add_bin_binding(NewV, Bs, BBs0), + {match,Bs,BBs,Rest}; + _ -> + {nomatch,Rest} + end; + _ -> + done + end. + +%%% Part 3: binary pattern matching +%% @spec match_bits(Fields::[field()], Bin::binary() +%% GlobalEnv::bindings(), LocalEnv::bindings(), +%% MatchFun::matchfun(),EvalFun::evalfun()) -> +%% {match, bindings()} +%% @doc Used to perform matching. If the match succeeds a new +%% environment is returned. If the match have some syntactic or +%% semantic problem which would have been caught at compile time this +%% function throws 'invalid', if the matching fails for other reasons +%% the function throws 'nomatch' + +match_bits(Fs, Bin, Bs0, BBs, Mfun, Efun, _) -> + match_bits(Fs, Bin, Bs0, BBs, Mfun, Efun). + +match_bits(Fs, Bin, Bs0, BBs, Mfun, Efun) -> + case catch match_bits_1(Fs, Bin, Bs0, BBs, Mfun, Efun) of + {match,Bs} -> {match,Bs}; + invalid -> throw(invalid); + _Error -> throw(nomatch) + end. + +match_bits_1([], <<>>, Bs, _BBs, _Mfun, _Efun) -> + {match,Bs}; +match_bits_1([F|Fs], Bits0, Bs0, BBs0, Mfun, Efun) -> + {Bs,BBs,Bits} = match_field_1(F, Bits0, Bs0, BBs0, Mfun, Efun), + match_bits_1(Fs, Bits, Bs, BBs, Mfun, Efun). + +match_field_1({bin_element,_,{string,_,S},default,default}, + Bin, Bs, BBs, _Mfun, _Efun) -> + Bits = list_to_binary(S), + Size = byte_size(Bits), + <> = Bin, + {Bs,BBs,Rest}; +match_field_1({bin_element,Line,VE,Size0,Options0}, + Bin, Bs0, BBs0, Mfun, Efun) -> + {Size1, [Type,{unit,Unit},Sign,Endian]} = + make_bit_type(Line, Size0, Options0), + V = erl_eval:partial_eval(VE), + Size2 = erl_eval:partial_eval(Size1), + match_check_size(Size2, BBs0), + {value, Size, _BBs} = Efun(Size2, BBs0), + {Val,Rest} = get_value(Bin, Type, Size, Unit, Sign, Endian), + NewV = coerce_to_float(V, Type), + {match,Bs} = Mfun(NewV, Val, Bs0), + BBs = add_bin_binding(NewV, Bs, BBs0), + {Bs,BBs,Rest}. + +%% Almost identical to the one in sys_pre_expand. +coerce_to_float({integer,L,I}=E, float) -> + try + {float,L,float(I)} + catch + error:badarg -> E; + error:badarith -> E + end; +coerce_to_float(E, _Type) -> + E. + +add_bin_binding({var,_,'_'}, _Bs, BBs) -> + BBs; +add_bin_binding({var,_,Name}, Bs, BBs) -> + {value,Value} = erl_eval:binding(Name, Bs), + erl_eval:add_binding(Name, Value, BBs); +add_bin_binding(_, _Bs, BBs) -> + BBs. + +get_value(Bin, integer, Size, Unit, Sign, Endian) -> + get_integer(Bin, Size*Unit, Sign, Endian); +get_value(Bin, float, Size, Unit, _Sign, Endian) -> + get_float(Bin, Size*Unit, Endian); +get_value(Bin, utf8, undefined, _Unit, _Sign, _Endian) -> + <> = Bin, + {I,Rest}; +get_value(Bin, utf16, undefined, _Unit, _Sign, big) -> + <> = Bin, + {I,Rest}; +get_value(Bin, utf16, undefined, _Unit, _Sign, little) -> + <> = Bin, + {I,Rest}; +get_value(Bin, utf32, undefined, _Unit, _Sign, big) -> + <> = Bin, + {Val,Rest}; +get_value(Bin, utf32, undefined, _Unit, _Sign, little) -> + <> = Bin, + {Val,Rest}; +get_value(Bin, binary, all, Unit, _Sign, _Endian) -> + 0 = (bit_size(Bin) rem Unit), + {Bin,<<>>}; +get_value(Bin, binary, Size, Unit, _Sign, _Endian) -> + TotSize = Size*Unit, + <> = Bin, + {Val,Rest}. + +get_integer(Bin, Size, signed, little) -> + <> = Bin, + {Val,Rest}; +get_integer(Bin, Size, unsigned, little) -> + <> = Bin, + {Val,Rest}; +get_integer(Bin, Size, signed, native) -> + <> = Bin, + {Val,Rest}; +get_integer(Bin, Size, unsigned, native) -> + <> = Bin, + {Val,Rest}; +get_integer(Bin, Size, signed, big) -> + <> = Bin, + {Val,Rest}; +get_integer(Bin, Size, unsigned, big) -> + <> = Bin, + {Val,Rest}. + +get_float(Bin, Size, little) -> + <> = Bin, + {Val,Rest}; +get_float(Bin, Size, native) -> + <> = Bin, + {Val,Rest}; +get_float(Bin, Size, big) -> + <> = Bin, + {Val,Rest}. + +%% Identical to the one in sys_pre_expand. +make_bit_type(Line, default, Type0) -> + case erl_bits:set_bit_type(default, Type0) of + {ok,all,Bt} -> {{atom,Line,all},erl_bits:as_list(Bt)}; + {ok,undefined,Bt} -> {{atom,Line,undefined},erl_bits:as_list(Bt)}; + {ok,Size,Bt} -> {{integer,Line,Size},erl_bits:as_list(Bt)}; + {error,Reason} -> error(Reason) + end; +make_bit_type(_Line, Size, Type0) -> %Size evaluates to an integer or 'all' + case erl_bits:set_bit_type(Size, Type0) of + {ok,Size,Bt} -> {Size,erl_bits:as_list(Bt)}; + {error,Reason} -> error(Reason) + end. + +match_check_size({var,_,V}, Bs) -> + case erl_eval:binding(V, Bs) of + {value,_} -> ok; + unbound -> throw(invalid) % or, rather, error({unbound,V}) + end; +match_check_size({atom,_,all}, _Bs) -> + ok; +match_check_size({atom,_,undefined}, _Bs) -> + ok; +match_check_size({integer,_,_}, _Bs) -> + ok; +match_check_size({value,_,_}, _Bs) -> + ok; %From the debugger. +match_check_size(_, _Bs) -> + throw(invalid). + +%% error(Reason) -> exception thrown +%% Throw a nice-looking exception, similar to exceptions from erl_eval. +error(Reason) -> + erlang:raise(error, Reason, [{erl_eval,expr,3}]). + diff --git a/lib/stdlib/src/file_sorter.erl b/lib/stdlib/src/file_sorter.erl new file mode 100644 index 0000000000..de9e628e22 --- /dev/null +++ b/lib/stdlib/src/file_sorter.erl @@ -0,0 +1,1500 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(file_sorter). + +-export([sort/1, sort/2, sort/3, + keysort/2, keysort/3, keysort/4, + merge/2, merge/3, + keymerge/3, keymerge/4, + check/1, check/2, + keycheck/2, keycheck/3]). + +-include_lib("kernel/include/file.hrl"). + +-define(CHUNKSIZE, 16384). +-define(RUNSIZE, 524288). +-define(NOMERGE, 16). +-define(MERGESIZE, ?CHUNKSIZE). + +-define(MAXSIZE, (1 bsl 31)). + +-record(w, {keypos, runs = [[]], seq = 1, in, out, fun_out, prefix, temp = [], + format, runsize, no_files, order, chunksize, wfd, ref, z, unique, + hdlen, inout_value}). + +-record(opts, {format = binary_term_fun(), size = ?RUNSIZE, + no_files = ?NOMERGE, tmpdir = default, order = ascending, + compressed = false, unique = false, header = 4}). + +-compile({inline, [{badarg, 2}, {make_key,2}, {make_stable_key,3}, {cfun,3}]}). + +%%% +%%% Exported functions +%%% + +sort(FileName) -> + sort([FileName], FileName). + +sort(Input, Output) -> + sort(Input, Output, []). + +sort(Input0, Output0, Options) -> + case {is_input(Input0), maybe_output(Output0), options(Options)} of + {{true,Input}, {true,Output}, #opts{}=Opts} -> + do_sort(0, Input, Output, Opts, sort); + T -> + badarg(culprit(tuple_to_list(T)), [Input0, Output0, Options]) + end. + +keysort(KeyPos, FileName) -> + keysort(KeyPos, [FileName], FileName). + +keysort(KeyPos, Input, Output) -> + keysort(KeyPos, Input, Output, []). + +keysort(KeyPos, Input0, Output0, Options) -> + R = case {is_keypos(KeyPos), is_input(Input0), + maybe_output(Output0), options(Options)} of + {_, _, _, #opts{format = binary}} -> + {Input0,Output0,[{badarg,format}]}; + {_, _, _, #opts{order = Order}} when is_function(Order) -> + {Input0,Output0,[{badarg,order}]}; + {true, {true,In}, {true,Out}, #opts{}=Opts} -> + {In,Out,Opts}; + T -> + {Input0,Output0,tuple_to_list(T)} + end, + case R of + {Input,Output,#opts{}=O} -> + do_sort(KeyPos, Input, Output, O, sort); + {_,_,O} -> + badarg(culprit(O), [KeyPos, Input0, Output0, Options]) + end. + +merge(Files, Output) -> + merge(Files, Output, []). + +merge(Files0, Output0, Options) -> + case {is_files(Files0), maybe_output(Output0), options(Options)} of + %% size not used + {{true,Files}, {true,Output}, #opts{}=Opts} -> + do_sort(0, Files, Output, Opts, merge); + T -> + badarg(culprit(tuple_to_list(T)), [Files0, Output0, Options]) + end. + +keymerge(KeyPos, Files, Output) -> + keymerge(KeyPos, Files, Output, []). + +keymerge(KeyPos, Files0, Output0, Options) -> + R = case {is_keypos(KeyPos), is_files(Files0), + maybe_output(Output0), options(Options)} of + {_, _, _, #opts{format = binary}} -> + {Files0,Output0,[{badarg,format}]}; + {_, _, _, #opts{order = Order}} when is_function(Order) -> + {Files0,Output0,[{badarg,order}]}; + {true, {true,Fs}, {true,Out}, #opts{}=Opts} -> + {Fs,Out,Opts}; + T -> + {Files0,Output0,tuple_to_list(T)} + end, + case R of + {Files,Output,#opts{}=O} -> + do_sort(KeyPos, Files, Output, O, merge); + {_,_,O} -> + badarg(culprit(O), [KeyPos, Files0, Output0, Options]) + end. + +check(FileName) -> + check([FileName], []). + +check(Files0, Options) -> + case {is_files(Files0), options(Options)} of + {{true,Files}, #opts{}=Opts} -> + do_sort(0, Files, undefined, Opts, check); + T -> + badarg(culprit(tuple_to_list(T)), [Files0, Options]) + end. + +keycheck(KeyPos, FileName) -> + keycheck(KeyPos, [FileName], []). + +keycheck(KeyPos, Files0, Options) -> + R = case {is_keypos(KeyPos), is_files(Files0), options(Options)} of + {_, _, #opts{format = binary}} -> + {Files0,[{badarg,format}]}; + {_, _, #opts{order = Order}} when is_function(Order) -> + {Files0,[{badarg,order}]}; + {true, {true,Fs}, #opts{}=Opts} -> + {Fs,Opts}; + T -> + {Files0,tuple_to_list(T)} + end, + case R of + {Files,#opts{}=O} -> + do_sort(KeyPos, Files, undefined, O, check); + {_,O} -> + badarg(culprit(O), [KeyPos, Files0, Options]) + end. + +%%% +%%% Local functions +%%% + +%%-define(debug, true). + +-ifdef(debug). +-define(DEBUG(S, A), io:format(S, A)). +-else. +-define(DEBUG(S, A), ok). +-endif. + +culprit([{error, _} = E | _]) -> + E; +culprit([{badarg, _} = B | _]) -> + B; +culprit([_ | B]) -> + culprit(B). + +%% Inlined. +badarg({error, _} = E, _Args) -> + E; +badarg({badarg, _} = B, Args) -> + erlang:error(B, Args). + +options(Options) when is_list(Options) -> + options(Options, #opts{}); +options(Option) -> + options([Option]). + +options([{format, Format} | L], Opts) when Format =:= binary; + Format =:= term; + is_function(Format), + is_function(Format, 1) -> + options(L, Opts#opts{format = Format}); +options([{format, binary_term} | L], Opts) -> + options(L, Opts#opts{format = binary_term_fun()}); +options([{size, Size} | L], Opts) when is_integer(Size), Size >= 0 -> + options(L, Opts#opts{size = max(Size, 1)}); +options([{no_files, NoFiles} | L], Opts) when is_integer(NoFiles), + NoFiles > 1 -> + options(L, Opts#opts{no_files = NoFiles}); +options([{tmpdir, ""} | L], Opts) -> + options(L, Opts#opts{tmpdir = default}); +options([{tmpdir, Dir} | L], Opts) -> + case catch filename:absname(Dir) of + {'EXIT', _} -> + {badarg, Dir}; + FileName -> + options(L, Opts#opts{tmpdir = {dir, FileName}}) + end; +options([{order, Fun} | L], Opts) when is_function(Fun), is_function(Fun, 2) -> + options(L, Opts#opts{order = Fun}); +options([{order, Order} | L], Opts) when Order =:= ascending; + Order =:= descending -> + options(L, Opts#opts{order = Order}); +options([{compressed, Bool} | L], Opts) when is_boolean(Bool) -> + options(L, Opts#opts{compressed = Bool}); +options([{unique, Bool} | L], Opts) when is_boolean(Bool) -> + options(L, Opts#opts{unique = Bool}); +options([{header, Len} | L], Opts) + when is_integer(Len), Len > 0, Len < ?MAXSIZE -> + options(L, Opts#opts{header = Len}); +options([], Opts) -> + if + Opts#opts.format =:= term, Opts#opts.header =/= 4 -> + {badarg, header}; + true -> + Opts + end; +options([Bad | _], _Opts) -> + {badarg, Bad}; +options(Bad, _Opts) -> + {badarg, Bad}. + +-define(OBJ(X, Y), {X, Y}). +-define(SK(T, I), [T | I]). % stable key + +do_sort(KeyPos0, Input0, Output0, Opts, Do) -> + #opts{format = Format0, size = Size, no_files = NoFiles, + tmpdir = TmpDir, order = Order, compressed = Compressed, + unique = Unique, header = HdLen} = Opts, + Prefix = tmp_prefix(Output0, TmpDir), + ChunkSize = ?CHUNKSIZE, + Ref = make_ref(), + KeyPos = case KeyPos0 of [Kp] -> Kp; _ -> KeyPos0 end, + {Format, Input} = wrap_input(Format0, Do, Input0), + Z = if Compressed -> [compressed]; true -> [] end, + {Output, FunOut} = wrap_output_terms(Format0, Output0, Z), + W = #w{keypos = KeyPos, out = Output, fun_out = FunOut, + prefix = Prefix, format = Format, runsize = Size, + no_files = NoFiles, order = Order, chunksize = ChunkSize, + ref = Ref, z = Z, unique = Unique, hdlen = HdLen, + inout_value = no_value}, + try + doit(Do, Input, W) + catch {Ref,Error} -> + Error + end. + +doit(sort, Input, W) -> + files(1, [], 0, W, Input); +doit(merge, Input, W) -> + last_merge(Input, W); +doit(check, Input, W) -> + check_files(Input, W, []). + +wrap_input(term, check, Files) -> + Fun = fun(File) -> + Fn = merge_terms_fun(file_rterms(no_file, [File])), + {fn, Fn, File} + end, + {binary_term_fun(), [Fun(F) || F <- Files]}; +wrap_input(Format, check, Files) -> + {Format, Files}; +wrap_input(term, merge, Files) -> + Fun = fun(File) -> merge_terms_fun(file_rterms(no_file, [File])) end, + Input = lists:reverse([Fun(F) || F <- Files]), + {binary_term_fun(), Input}; +wrap_input(Format, merge, Files) -> + Input = lists:reverse([merge_bins_fun(F) || F <- Files]), + {Format, Input}; +wrap_input(term, sort, InFun) when is_function(InFun, 1) -> + {binary_term_fun(), fun_rterms(InFun)}; +wrap_input(term, sort, Files) -> + {binary_term_fun(), file_rterms(no_file, Files)}; +wrap_input(Format, sort, Input) -> + {Format, Input}. + +merge_terms_fun(RFun) -> + fun(close) -> + RFun(close); + ({I, [], _LSz, W}) -> + case RFun(read) of + end_of_input -> + eof; + {Objs, NRFun} when is_function(NRFun), is_function(NRFun, 1) -> + {_, [], Ts, _} = fun_objs(Objs, [], 0, ?MAXSIZE, I, W), + {{I, Ts, ?CHUNKSIZE}, merge_terms_fun(NRFun)}; + Error -> + error(Error, W) + end + end. + +merge_bins_fun(FileName) -> + fun(close) -> + ok; + ({_I, _L, _LSz, W} = A) -> + Fun = read_fun(FileName, user, W), + Fun(A) + end. + +wrap_output_terms(term, OutFun, _Z) when is_function(OutFun), + is_function(OutFun, 1) -> + {fun_wterms(OutFun), true}; +wrap_output_terms(term, File, Z) when File =/= undefined -> + {file_wterms(name, File, Z++[write]), false}; +wrap_output_terms(_Format, Output, _Z) -> + {Output, is_function(Output) and is_function(Output, 1)}. + +binary_term_fun() -> + fun binary_to_term/1. + +check_files([], _W, L) -> + {ok, lists:reverse(L)}; +check_files([FN | FNs], W, L) -> + {IFun, FileName} = + case FN of + {fn, Fun, File} -> + {Fun, File}; + File -> + {read_fun(File, user, W), File} + end, + NW = W#w{in = IFun}, + check_run(IFun, FileName, FNs, NW, L, 2, nolast). + +check_run(IFun, F, FNs, W, L, I, Last) -> + case IFun({{merge,I}, [], 0, W}) of + {{_I, Objs, _LSz}, IFun1} -> + NW = W#w{in = IFun1}, + check_objs0(IFun1, F, FNs, NW, L, I, Last, lists:reverse(Objs)); + eof -> + NW = W#w{in = undefined}, + check_files(FNs, NW, L) + end. + +check_objs0(IFun, F, FNs, W, L, I, nolast, [?OBJ(T,_BT) | Os]) -> + check_objs1(IFun, F, FNs, W, L, I, T, Os); +check_objs0(IFun, F, FNs, W, L, I, Last, []) -> + check_run(IFun, F, FNs, W, L, I, Last); +check_objs0(IFun, F, FNs, W, L, I, {last, Last}, Os) -> + check_objs1(IFun, F, FNs, W, L, I, Last, Os). + +check_objs1(IFun, F, FNs, W, L, I, LastT, Os) -> + case W of + #w{order = ascending, unique = true} -> + ucheck_objs(IFun, F, FNs, W, L, I, LastT, Os); + #w{order = ascending, unique = false} -> + check_objs(IFun, F, FNs, W, L, I, LastT, Os); + #w{order = descending, unique = true} -> + rucheck_objs(IFun, F, FNs, W, L, I, LastT, Os); + #w{order = descending, unique = false} -> + rcheck_objs(IFun, F, FNs, W, L, I, LastT, Os); + #w{order = CF, unique = true} -> + uccheck_objs(IFun, F, FNs, W, L, I, LastT, Os, CF); + #w{order = CF, unique = false} -> + ccheck_objs(IFun, F, FNs, W, L, I, LastT, Os, CF) + end. + +check_objs(IFun, F, FNs, W, L, I, Last, [?OBJ(T,_BT) | Os]) when T >= Last -> + check_objs(IFun, F, FNs, W, L, I+1, T, Os); +check_objs(IFun, F, FNs, W, L, I, _Last, [?OBJ(_T,BT) | _]) -> + culprit_found(IFun, F, FNs, W, L, I, BT); +check_objs(IFun, F, FNs, W, L, I, Last, []) -> + check_run(IFun, F, FNs, W, L, I, {last, Last}). + +rcheck_objs(IFun, F, FNs, W, L, I, Last, [?OBJ(T,_BT) | Os]) when T =< Last -> + rcheck_objs(IFun, F, FNs, W, L, I+1, T, Os); +rcheck_objs(IFun, F, FNs, W, L, I, _Last, [?OBJ(_T,BT) | _]) -> + culprit_found(IFun, F, FNs, W, L, I, BT); +rcheck_objs(IFun, F, FNs, W, L, I, Last, []) -> + check_run(IFun, F, FNs, W, L, I, {last, Last}). + +ucheck_objs(IFun, F, FNs, W, L, I, LT, [?OBJ(T,_BT) | Os]) when T > LT -> + ucheck_objs(IFun, F, FNs, W, L, I+1, T, Os); +ucheck_objs(IFun, F, FNs, W, L, I, _LT, [?OBJ(_T,BT) | _]) -> + culprit_found(IFun, F, FNs, W, L, I, BT); +ucheck_objs(IFun, F, FNs, W, L, I, LT, []) -> + check_run(IFun, F, FNs, W, L, I, {last, LT}). + +rucheck_objs(IFun, F, FNs, W, L, I, LT, [?OBJ(T,_BT) | Os]) when T < LT -> + rucheck_objs(IFun, F, FNs, W, L, I+1, T, Os); +rucheck_objs(IFun, F, FNs, W, L, I, _LT, [?OBJ(_T,BT) | _]) -> + culprit_found(IFun, F, FNs, W, L, I, BT); +rucheck_objs(IFun, F, FNs, W, L, I, LT, []) -> + check_run(IFun, F, FNs, W, L, I, {last, LT}). + +ccheck_objs(IFun, F, FNs, W, L, I, LT, [?OBJ(T,BT) | Os], CF) -> + case CF(LT, T) of + true -> % LT =< T + ccheck_objs(IFun, F, FNs, W, L, I+1, T, Os, CF); + false -> % LT > T + culprit_found(IFun, F, FNs, W, L, I, BT) + end; +ccheck_objs(IFun, F, FNs, W, L, I, LT, [], _CF) -> + check_run(IFun, F, FNs, W, L, I, {last, LT}). + +uccheck_objs(IFun, F, FNs, W, L, I, LT, [?OBJ(T,BT) | Os], CF) -> + case CF(LT, T) of + true -> % LT =< T + case CF(T, LT) of + true -> % T equal to LT + culprit_found(IFun, F, FNs, W, L, I, BT); + false -> % LT < T + uccheck_objs(IFun, F, FNs, W, L, I+1, T, Os, CF) + end; + false -> % LT > T + culprit_found(IFun, F, FNs, W, L, I, BT) + end; +uccheck_objs(IFun, F, FNs, W, L, I, LT, [], _CF) -> + check_run(IFun, F, FNs, W, L, I, {last, LT}). + +culprit_found(IFun, F, FNs, W, L, I, [_Size | BT]) -> + IFun(close), + check_files(FNs, W, [{F,I,binary_to_term(BT)} | L]). + +files(_I, L, _LSz, #w{seq = 1}=W, []) -> + %% No temporary files created, everything in L. + case W#w.out of + Fun when is_function(Fun) -> + SL = internal_sort(L, W), + W1 = outfun(binterm_objects(SL, []), W), + NW = close_input(W1), + outfun(close, NW); + Out -> + write_run(L, W, Out), + ok + end; +files(_I, L, _LSz, W, []) -> + W1 = write_run(L, W), + last_merge(lists:append(W1#w.runs), W1); +files(I, L, LSz, W, Fun) when is_function(Fun) -> + NW = W#w{in = Fun}, + fun_run(I, L, LSz, NW, []); +files(I, L, LSz, W, [FileName | FileNames]) -> + InFun = read_fun(FileName, user, W), + NW = W#w{in = InFun}, + file_run(InFun, FileNames, I, L, LSz, NW). + +file_run(InFun, FileNames, I, L, LSz, W) when LSz < W#w.runsize -> + case InFun({I, L, LSz, W}) of + {{I1, L1, LSz1}, InFun1} -> + NW = W#w{in = InFun1}, + file_run(InFun1, FileNames, I1, L1, LSz1, NW); + eof -> + NW = W#w{in = undefined}, + files(I, L, LSz, NW, FileNames) + end; +file_run(InFun, FileNames, I, L, _LSz, W) -> + NW = write_run(L, W), + file_run(InFun, FileNames, I, [], 0, NW). + +fun_run(I, L, LSz, W, []) -> + case infun(W) of + {end_of_input, NW} -> + files(I, L, LSz, NW, []); + {cont, NW, Objs} -> + fun_run(I, L, LSz, NW, Objs) + end; +fun_run(I, L, LSz, W, Objs) when LSz < W#w.runsize -> + {NI, NObjs, NL, NLSz} = fun_objs(Objs, L, LSz, W#w.runsize, I, W), + fun_run(NI, NL, NLSz, W, NObjs); +fun_run(I, L, _LSz, W, Objs) -> + NW = write_run(L, W), + fun_run(I, [], 0, NW, Objs). + +write_run([], W) -> + W; +write_run(L, W) -> + {W1, Temp} = next_temp(W), + NW = write_run(L, W1, Temp), + [R | Rs] = NW#w.runs, + merge_runs([[Temp | R] | Rs], [], NW). + +write_run(L, W, FileName) -> + SL = internal_sort(L, W), + BTs = binterms(SL, []), + {Fd, W1} = open_file(FileName, W), + write(Fd, FileName, BTs, W1), + close_file(Fd, W1). + +%% Returns a list in reversed order. +internal_sort([]=L, _W) -> + L; +internal_sort(L, #w{order = CFun, unique = Unique}) when is_function(CFun) -> + Fun = fun(?OBJ(T1, _), ?OBJ(T2, _)) -> CFun(T1, T2) end, + RL = lists:reverse(L), + lists:reverse(if + Unique -> + lists:usort(Fun, RL); + true -> + lists:sort(Fun, RL) + end); +internal_sort(L, #w{unique = true, keypos = 0}=W) -> + rev(lists:usort(L), W); +internal_sort(L, #w{unique = false, keypos = 0}=W) -> + rev(lists:sort(L), W); +internal_sort(L, #w{unique = true}=W) -> + rev(lists:ukeysort(1, lists:reverse(L)), W); +internal_sort(L, #w{unique = false}=W) -> + rev(lists:keysort(1, lists:reverse(L)), W). + +rev(L, #w{order = ascending}) -> + lists:reverse(L); +rev(L, #w{order = descending}) -> + L. + +last_merge(R, W) when length(R) =< W#w.no_files -> + case W#w.out of + Fun when is_function(Fun) -> + {Fs, W1} = init_merge(lists:reverse(R), 1, [], W), + ?DEBUG("merging ~p~n", [lists:reverse(R)]), + W2 = merge_files(Fs, [], 0, nolast, W1), + NW = close_input(W2), + outfun(close, NW); + Out -> + merge_files(R, W, Out), + ok + end; +last_merge(R, W) -> + L = lists:sublist(R, W#w.no_files), + {M, NW} = merge_files(L, W), + last_merge([M | lists:nthtail(W#w.no_files, R)], NW). + +merge_runs([R | Rs], NRs0, W) when length(R) < W#w.no_files -> + NRs = lists:reverse(NRs0) ++ [R | Rs], + W#w{runs = NRs}; +merge_runs([R], NRs0, W) -> + {M, NW} = merge_files(R, W), + NRs = [[] | lists:reverse([[M] | NRs0])], + NW#w{runs = NRs}; +merge_runs([R, R1 | Rs], NRs0, W) -> + {M, NW} = merge_files(R, W), + merge_runs([[M | R1] | Rs], [[] | NRs0], NW). + +merge_files(R, W) -> + {W1, Temp} = next_temp(W), + ?DEBUG("merging ~p~nto ~p~n", [lists:reverse(R), Temp]), + {Temp, merge_files(R, W1, Temp)}. + +merge_files(R, W, FileName) -> + {Fs, W1} = init_merge(lists:reverse(R), 1, [], W), + {Fd, W2} = open_file(FileName, W1), + W3 = W2#w{wfd = {Fd, FileName}}, + W4 = merge_files(Fs, [], 0, nolast, W3), + NW = W4#w{wfd = undefined}, + close_file(Fd, NW). + +%% A file number, I, is used for making the merge phase stable. +init_merge([FN | FNs], I, Fs, W) -> + IFun = case FN of + _ when is_function(FN) -> + %% When and only when merge/2,3 or keymerge/3,4 was called. + FN; + _ -> + read_fun(FN, fsort, W) + end, + W1 = W#w{temp = [IFun | lists:delete(FN, W#w.temp)]}, + case read_more(IFun, I, 0, W1) of + {Ts, _LSz, NIFun, NW} -> + InEtc = {I, NIFun}, + init_merge(FNs, I+1, [[Ts | InEtc] | Fs], NW); + {eof, NW} -> % can only happen when merging files + init_merge(FNs, I+1, Fs, NW) + end; +init_merge([], _I, Fs0, #w{order = ascending}=W) -> + {lists:sort(Fs0), W}; +init_merge([], _I, Fs0, #w{order = descending}=W) -> + {lists:reverse(lists:sort(Fs0)), W}; +init_merge([], _I, Fs0, #w{order = Order}=W) when is_function(Order) -> + {lists:sort(cfun_files(W#w.order), lists:reverse(Fs0)), W}. + +cfun_files(CFun) -> + fun(F1, F2) -> + [[?OBJ(T1,_) | _] | _] = F1, + [[?OBJ(T2,_) | _] | _] = F2, + CFun(T1, T2) + end. + +%% The argument Last is used when unique = true. It is the last kept +%% element. +%% LSz is not the sum of the sizes of objects in L. Instead it is +%% the number of bytes read. After init_merge it is set to 0, which +%% means that the first chunk written may be quite large (it may take +%% a while before buffers are exhausted). +merge_files([F1, F2 | Fs], L0, LSz, Last0, W) when LSz < ?MERGESIZE -> + [Ts0 | InEtc] = F1, + Kind = merge_kind(W), + {Last, L, Ts} = case {Last0, Kind} of + {{last, Lst}, Kind} -> + {Lst, L0, Ts0}; + {nolast, {ukmerge, _Kp}} -> + [?OBJ(?SK(T, _I), BT) | Ts1] = Ts0, + {T, [BT], Ts1}; + {nolast, {rukmerge, _Kp}} -> + [?OBJ(?SK(T, _I), BT) | Ts1] = Ts0, + {{T, BT}, [], Ts1}; + {nolast, _} -> + [?OBJ(T, BT) | Ts1] = Ts0, + {T, [BT], Ts1} + end, + [[?OBJ(T2, BT2) | Ts2T] = Ts2 | InEtc2] = F2, + {NInEtc, NFs, NL, NLast} = + case Kind of + umerge -> + umerge_files(L, F2, Fs, InEtc2, Ts2, Ts, InEtc, T2, Last); + {ukmerge, Kp} -> + ukmerge_files(L, F2, Fs, InEtc2, Ts2, Ts, InEtc, T2, Kp, Last); + merge -> + merge_files(L, F2, Fs, InEtc2, BT2, Ts2T, Ts, InEtc, T2); + rumerge -> + rumerge_files(L, F2, Fs, InEtc2, Ts2, Ts, InEtc, T2, Last); + {rukmerge, Kp} -> + {Lt, LtBT} = Last, + rukmerge_files(L, F2, Fs, InEtc2, Ts2, Ts, InEtc, T2, Kp, + Lt, LtBT); + rmerge -> + rmerge_files(L, F2, Fs, InEtc2, BT2, Ts2T, Ts, InEtc, T2); + {ucmerge, CF} -> + {I2, _} = InEtc2, + {I, _} = InEtc, + ucmerge_files(L, F2, Fs, InEtc2, Ts2, I2, Ts, I, InEtc, T2, CF, + Last); + {cmerge, CF} -> + {I2, _} = InEtc2, + {I, _} = InEtc, + cmerge_files(L, F2, Fs, InEtc2, BT2, Ts2T, I2, Ts, I, InEtc, T2, + CF) + end, + read_chunk(NInEtc, NFs, NL, LSz, NLast, W); +merge_files([F1], L, LSz, Last, W) when LSz < ?MERGESIZE -> + [Ts | InEtc] = F1, + NL = last_file(Ts, L, Last, merge_kind(W), W), + read_chunk(InEtc, [], NL, LSz, nolast, W); +merge_files([], [], 0, nolast, W) -> + %% When merging files, ensure that the output fun (if there is + %% one) is called at least once before closing. + merge_write(W, []); +merge_files([], L, _LSz, Last, W) -> + Last = nolast, + merge_write(W, L); +merge_files(Fs, L, _LSz, Last, W) -> + NW = merge_write(W, L), + merge_files(Fs, [], 0, Last, NW). + +merge_kind(#w{order = ascending, unique = true, keypos = 0}) -> + umerge; +merge_kind(#w{order = ascending, unique = true, keypos = Kp}) -> + {ukmerge, Kp}; +merge_kind(#w{order = ascending, unique = false}) -> + merge; +merge_kind(#w{order = descending, unique = true, keypos = 0}) -> + rumerge; +merge_kind(#w{order = descending, unique = true, keypos = Kp}) -> + {rukmerge, Kp}; +merge_kind(#w{order = descending, unique = false}) -> + rmerge; +merge_kind(#w{order = CF, unique = true}) -> + {ucmerge, CF}; +merge_kind(#w{order = CF, unique = false}) -> + {cmerge, CF}. + +merge_write(W, L) -> + case {W#w.wfd, W#w.out} of + {undefined, Fun} when is_function(Fun) -> + outfun(objects(L, []), W); + {{Fd, FileName}, _} -> + write(Fd, FileName, lists:reverse(L), W), + W + end. + +umerge_files(L, F2, Fs, InEtc2, Ts2, [?OBJ(T, _BT) | Ts], InEtc, T2, Last) + when T == Last -> + umerge_files(L, F2, Fs, InEtc2, Ts2, Ts, InEtc, T2, Last); +umerge_files(L, F2, Fs, InEtc2, Ts2, [?OBJ(T, BT) | Ts], InEtc, T2, _Last) + when T =< T2 -> + umerge_files([BT | L], F2, Fs, InEtc2, Ts2, Ts, InEtc, T2, T); +umerge_files(L, F2, Fs, _InEtc2, _Ts2, [], InEtc, _T2, Last) -> + {InEtc, [F2 | Fs], L, {last, Last}}; +umerge_files(L, _F2, Fs, InEtc2, Ts2, Ts, InEtc, _T2, Last) -> + [F3 | NFs] = insert([Ts | InEtc], Fs), + [[?OBJ(T3,_BT3) | _] = Ts3 | InEtc3] = F3, + umerge_files(L, F3, NFs, InEtc3, Ts3, Ts2, InEtc2, T3, Last). + +rumerge_files(L, F2, Fs, InEtc2, Ts2, [?OBJ(T, _BT) | Ts], InEtc, T2, Last) + when T == Last -> + rumerge_files(L, F2, Fs, InEtc2, Ts2, Ts, InEtc, T2, Last); +rumerge_files(L, F2, Fs, InEtc2, Ts2, [?OBJ(T, BT) | Ts], InEtc, T2, _Last) + when T >= T2 -> + rumerge_files([BT | L], F2, Fs, InEtc2, Ts2, Ts, InEtc, T2, T); +rumerge_files(L, F2, Fs, _InEtc2, _Ts2, [], InEtc, _T2, Last) -> + {InEtc, [F2 | Fs], L, {last, Last}}; +rumerge_files(L, _F2, Fs, InEtc2, Ts2, Ts, InEtc, _T2, Last) -> + [F3 | NFs] = rinsert([Ts | InEtc], Fs), + [[?OBJ(T3,_BT3) | _] = Ts3 | InEtc3] = F3, + rumerge_files(L, F3, NFs, InEtc3, Ts3, Ts2, InEtc2, T3, Last). + +merge_files(L, F2, Fs, InEtc2, BT2, Ts2, [?OBJ(T, BT) | Ts], InEtc, T2) + when T =< T2 -> + merge_files([BT | L], F2, Fs, InEtc2, BT2, Ts2, Ts, InEtc, T2); +merge_files(L, F2, Fs, _InEtc2, _BT2, _Ts2, [], InEtc, _T2) -> + {InEtc, [F2 | Fs], L, {last, foo}}; +merge_files(L, _F2, Fs, InEtc2, BT2, Ts2, Ts, InEtc, _T2) -> + L1 = [BT2 | L], + [F3 | NFs] = insert([Ts | InEtc], Fs), + [[?OBJ(T3,BT3) | Ts3] | InEtc3] = F3, + merge_files(L1, F3, NFs, InEtc3, BT3, Ts3, Ts2, InEtc2, T3). + +rmerge_files(L, F2, Fs, InEtc2, BT2, Ts2, [?OBJ(T, BT) | Ts], InEtc, T2) + when T >= T2 -> + rmerge_files([BT | L], F2, Fs, InEtc2, BT2, Ts2, Ts, InEtc, T2); +rmerge_files(L, F2, Fs, _InEtc2, _BT2, _Ts2, [], InEtc, _T2) -> + {InEtc, [F2 | Fs], L, {last, foo}}; +rmerge_files(L, _F2, Fs, InEtc2, BT2, Ts2, Ts, InEtc, _T2) -> + L1 = [BT2 | L], + [F3 | NFs] = rinsert([Ts | InEtc], Fs), + [[?OBJ(T3,BT3) | Ts3] | InEtc3] = F3, + rmerge_files(L1, F3, NFs, InEtc3, BT3, Ts3, Ts2, InEtc2, T3). + +ukmerge_files(L, F2, Fs, InEtc2, Ts2, [?OBJ(?SK(T, _I),_BT) | Ts], InEtc, + T2, Kp, Last) when T == Last -> + ukmerge_files(L, F2, Fs, InEtc2, Ts2, Ts, InEtc, T2, Kp, Last); +ukmerge_files(L, F2, Fs, InEtc2, Ts2, [?OBJ(?SK(T0,_I)=T,BT) | Ts], InEtc, + T2, Kp, _Last) when T =< T2 -> + ukmerge_files([BT | L], F2, Fs, InEtc2, Ts2, Ts, InEtc, T2, Kp, T0); +ukmerge_files(L, F2, Fs, _InEtc2, _Ts2, [], InEtc, _T2, _Kp, Last) -> + {InEtc, [F2 | Fs], L, {last, Last}}; +ukmerge_files(L, _F2, Fs, InEtc2, Ts2, Ts, InEtc, _T2, Kp, Last) -> + [F3 | NFs] = insert([Ts | InEtc], Fs), + [[?OBJ(T3,_BT3) | _] = Ts3 | InEtc3] = F3, + ukmerge_files(L, F3, NFs, InEtc3, Ts3, Ts2, InEtc2, T3, Kp, Last). + +rukmerge_files(L, F2, Fs, InEtc2, Ts2, [?OBJ(?SK(T, _I), BT) | Ts], InEtc, + T2, Kp, Last, _LastBT) when T == Last -> + rukmerge_files(L, F2, Fs, InEtc2, Ts2, Ts, InEtc, T2, Kp, T, BT); +rukmerge_files(L, F2, Fs, InEtc2, Ts2, [?OBJ(?SK(T0, _I)=T, BT) | Ts], InEtc, + T2, Kp, _Last, LastBT) when T >= T2 -> + rukmerge_files([LastBT|L], F2, Fs, InEtc2, Ts2, Ts, InEtc, T2, Kp, T0,BT); +rukmerge_files(L, F2, Fs, _InEtc2, _Ts2, [], InEtc, _T2, _Kp, Last, LastBT) -> + {InEtc, [F2 | Fs], L, {last, {Last, LastBT}}}; +rukmerge_files(L, _F2, Fs, InEtc2, Ts2, Ts, InEtc, _T2, Kp, Last, LastBT) -> + [F3 | NFs] = rinsert([Ts | InEtc], Fs), + [[?OBJ(T3,_BT3) | _] = Ts3 | InEtc3] = F3, + rukmerge_files(L, F3, NFs, InEtc3, Ts3, Ts2, InEtc2, T3, Kp, Last,LastBT). + +ucmerge_files(L, F2, Fs, InEtc2, Ts2, I2, [?OBJ(T, BT) | Ts] = Ts0, I, + InEtc, T2, CF, Last) when I < I2 -> + case CF(T, T2) of + true -> % T =< T2 + case CF(T, Last) of + true -> + ucmerge_files(L, F2, Fs, InEtc2, Ts2, I2, Ts, I, InEtc, T2, + CF, Last); + false -> + ucmerge_files([BT | L], F2, Fs, InEtc2, Ts2, I2, Ts, I, + InEtc, T2, CF, T) + end; + false -> % T > T2 + [F3 | NFs] = cinsert([Ts0 | InEtc], Fs, CF), + [[?OBJ(T3,_BT3) | _] = Ts3 | {I3,_} = InEtc3] = F3, + ucmerge_files(L, F3, NFs, InEtc3, Ts3, I3, Ts2, I2, InEtc2, T3, CF, Last) + end; +ucmerge_files(L, F2, Fs, InEtc2, Ts2, I2, [?OBJ(T, BT) | Ts] = Ts0, I, + InEtc, T2, CF, Last) -> % when I2 < I + case CF(T2, T) of + true -> % T2 =< T + [F3 | NFs] = cinsert([Ts0 | InEtc], Fs, CF), + [[?OBJ(T3,_BT3) | _] = Ts3 | {I3,_} = InEtc3] = F3, + ucmerge_files(L, F3, NFs, InEtc3, Ts3, I3, Ts2, I2, InEtc2, T3, + CF, Last); + false -> % T < T2 + case CF(T, Last) of + true -> + ucmerge_files(L, F2, Fs, InEtc2, Ts2, I2, Ts, I, InEtc, T2, + CF, Last); + false -> + ucmerge_files([BT | L], F2, Fs, InEtc2, Ts2, I2, Ts, I, + InEtc, T2, CF, T) + end + end; +ucmerge_files(L, F2, Fs, _InEtc2, _Ts2, _I2, [], _I, InEtc, _T2, _CF, Last) -> + {InEtc, [F2 | Fs], L, {last, Last}}. + +cmerge_files(L, F2, Fs, InEtc2, BT2, Ts2, I2, [?OBJ(T, BT) | Ts] = Ts0, I, + InEtc, T2, CF) when I < I2 -> + case CF(T, T2) of + true -> % T =< T2 + cmerge_files([BT|L], F2, Fs, InEtc2, BT2, Ts2, I2, Ts, I, InEtc, T2, CF); + false -> % T > T2 + L1 = [BT2 | L], + [F3 | NFs] = cinsert([Ts0 | InEtc], Fs, CF), + [[?OBJ(T3,BT3) | Ts3] | {I3,_} = InEtc3] = F3, + cmerge_files(L1, F3, NFs, InEtc3, BT3, Ts3, I3, Ts2, I2, InEtc2, T3, CF) + end; +cmerge_files(L, F2, Fs, InEtc2, BT2, Ts2, I2, [?OBJ(T, BT) | Ts] = Ts0, I, + InEtc, T2, CF) -> % when I2 < I + case CF(T2, T) of + true -> % T2 =< T + L1 = [BT2 | L], + [F3 | NFs] = cinsert([Ts0 | InEtc], Fs, CF), + [[?OBJ(T3,BT3) | Ts3] | {I3,_} = InEtc3] = F3, + cmerge_files(L1, F3, NFs, InEtc3, BT3, Ts3, I3, Ts2, I2, InEtc2, T3, CF); + false -> % T < T2 + cmerge_files([BT|L], F2, Fs, InEtc2, BT2, Ts2, I2, Ts, I, InEtc, T2, CF) + end; +cmerge_files(L, F2, Fs, _InEtc2, _BT2, _Ts2, _I2, [], _I, InEtc, _T2, _CF) -> + {InEtc, [F2 | Fs], L, {last, foo}}. + +last_file(Ts, L, {last, T}, {ukmerge,_}, _W) -> + kulast_file(Ts, T, L); +last_file(Ts, L, {last, {T,BT}}, {rukmerge,_}, _W) -> + ruklast_file(Ts, T, BT, L); +last_file(Ts, L, {last, T}, {ucmerge,CF}, _W) -> + uclast_file(Ts, T, CF, L); +last_file(Ts, L, {last, T}, _Kind, #w{unique = true}) -> + ulast_file(Ts, T, L); +last_file(Ts, L, _Last, _Kind, _W) -> + last_file(Ts, L). + +ulast_file([?OBJ(T, _BT) | Ts], Last, L) when Last == T -> + last_file(Ts, L); +ulast_file(Ts, _Last, L) -> + last_file(Ts, L). + +kulast_file([?OBJ(?SK(T, _I), _BT) | Ts], Last, L) when Last == T -> + last_file(Ts, L); +kulast_file(Ts, _Last, L) -> + last_file(Ts, L). + +ruklast_file([?OBJ(?SK(T, _I), BT) | Ts], Last, _LastBT, L) when Last == T -> + last_file(Ts, [BT | L]); +ruklast_file(Ts, _Last, LastBT, L) -> + last_file(Ts, [LastBT | L]). + +uclast_file([?OBJ(T, BT) | Ts], Last, CF, L) -> + case CF(T, Last) of + true -> + last_file(Ts, L); + false -> + last_file(Ts, [BT | L]) + end. + +last_file([?OBJ(_Ta, BTa), ?OBJ(_Tb, BTb) | Ts], L) -> + last_file(Ts, [BTb, BTa | L]); +last_file([?OBJ(_T, BT) | Ts], L) -> + last_file(Ts, [BT | L]); +last_file([], L) -> + L. + +%% OK for 16 files. +insert(A, [X1, X2, X3, X4 | Xs]) when A > X4 -> + [X1, X2, X3, X4 | insert(A, Xs)]; +insert(A, [X1, X2, X3 | T]) when A > X3 -> + [X1, X2, X3, A | T]; +insert(A, [X1, X2 | Xs]) when A > X2 -> + [X1, X2, A | Xs]; +insert(A, [X1 | T]) when A > X1 -> + [X1, A | T]; +insert(A, Xs) -> + [A | Xs]. + +rinsert(A, [X1, X2, X3, X4 | Xs]) when A < X4 -> + [X1, X2, X3, X4 | rinsert(A, Xs)]; +rinsert(A, [X1, X2, X3 | T]) when A < X3 -> + [X1, X2, X3, A | T]; +rinsert(A, [X1, X2 | Xs]) when A < X2 -> + [X1, X2, A | Xs]; +rinsert(A, [X1 | T]) when A < X1 -> + [X1, A | T]; +rinsert(A, Xs) -> + [A | Xs]. + +-define(CINSERT(F, A, T1, T2), + case cfun(CF, F, A) of + true -> [F, A | T2]; + false -> [A | T1] + end). + +cinsert(A, [F1 | [F2 | [F3 | [F4 | Fs]=T4]=T3]=T2]=T1, CF) -> + case cfun(CF, F4, A) of + true -> [F1, F2, F3, F4 | cinsert(A, Fs, CF)]; + false -> + case cfun(CF, F2, A) of + true -> [F1, F2 | ?CINSERT(F3, A, T3, T4)]; + false -> ?CINSERT(F1, A, T1, T2) + end + end; +cinsert(A, [F1 | [F2 | Fs]=T2]=T1, CF) -> + case cfun(CF, F2, A) of + true -> [F1, F2 | cinsert(A, Fs, CF)]; + false -> ?CINSERT(F1, A, T1, T2) + end; +cinsert(A, [F | Fs]=T, CF) -> + ?CINSERT(F, A, T, Fs); +cinsert(A, _, _CF) -> + [A]. + +%% Inlined. +cfun(CF, F1, F2) -> + [[?OBJ(T1,_) | _] | {I1,_}] = F1, + [[?OBJ(T2,_) | _] | {I2,_}] = F2, + if + I1 < I2 -> + CF(T1, T2); + true -> % I2 < I1 + not CF(T2, T1) + end. + +binterm_objects([?OBJ(_T, [_Sz | BT]) | Ts], L) -> + binterm_objects(Ts, [BT | L]); +binterm_objects([], L) -> + L. + +objects([[_Sz | BT] | Ts], L) -> + objects(Ts, [BT | L]); +objects([], L) -> + L. + +binterms([?OBJ(_T1, BT1), ?OBJ(_T2, BT2) | Ts], L) -> + binterms(Ts, [BT2, BT1 | L]); +binterms([?OBJ(_T, BT) | Ts], L) -> + binterms(Ts, [BT | L]); +binterms([], L) -> + L. + +read_chunk(InEtc, Fs, L, LSz, Last, W) -> + {I, IFun} = InEtc, + case read_more(IFun, I, LSz, W) of + {Ts, NLSz, NIFun, #w{order = ascending}=NW} -> + NInEtc = {I, NIFun}, + NFs = insert([Ts | NInEtc], Fs), + merge_files(NFs, L, NLSz, Last, NW); + {Ts, NLSz, NIFun, #w{order = descending}=NW} -> + NInEtc = {I, NIFun}, + NFs = rinsert([Ts | NInEtc], Fs), + merge_files(NFs, L, NLSz, Last, NW); + {Ts, NLSz, NIFun, NW} -> + NInEtc = {I, NIFun}, + NFs = cinsert([Ts | NInEtc], Fs, NW#w.order), + merge_files(NFs, L, NLSz, Last, NW); + {eof, NW} -> + merge_files(Fs, L, LSz, Last, NW) + end. + +%% -> {[{term() | binary()}], NewLSz, NewIFun, NewW} | eof | throw(Error) +read_more(IFun, I, LSz, W) -> + case IFun({{merge, I}, [], LSz, W}) of + {{_, [], NLSz}, NIFun} -> + read_more(NIFun, I, NLSz, W); + {{_, L, NLSz}, NInFun} -> + NW = case lists:member(IFun, W#w.temp) of + true -> + %% temporary file + W#w{temp = [NInFun | lists:delete(IFun, W#w.temp)]}; + false -> + %% input file + W + end, + {lists:reverse(L), NLSz, NInFun, NW}; + eof -> + %% already closed. + NW = W#w{temp = lists:delete(IFun, W#w.temp)}, + {eof, NW} + end. + +read_fun(FileName, Owner, W) -> + case file:open(FileName, [raw, binary, read, compressed]) of + {ok, Fd} -> + read_fun2(Fd, <<>>, 0, FileName, Owner); + Error -> + file_error(FileName, Error, W) + end. + +read_fun2(Fd, Bin, Size, FileName, Owner) -> + fun(close) -> + close_read_fun(Fd, FileName, Owner); + ({I, L, LSz, W}) -> + case read_objs(Fd, FileName, I, L, Bin, Size, LSz, W) of + {{I1, L1, Bin1, Size1}, LSz1} -> + NIFun = read_fun2(Fd, Bin1, Size1, FileName, Owner), + {{I1, L1, LSz1}, NIFun}; + eof -> + close_read_fun(Fd, FileName, Owner), + eof + end + end. + +close_read_fun(Fd, _FileName, user) -> + file:close(Fd); +close_read_fun(Fd, FileName, fsort) -> + file:close(Fd), + file:delete(FileName). + +read_objs(Fd, FileName, I, L, Bin0, Size0, LSz, W) -> + Max = max(Size0, ?CHUNKSIZE), + BSz0 = byte_size(Bin0), + Min = Size0 - BSz0 + W#w.hdlen, % Min > 0 + NoBytes = max(Min, Max), + case read(Fd, FileName, NoBytes, W) of + {ok, Bin} -> + BSz = byte_size(Bin), + NLSz = LSz + BSz, + case catch file_loop(L, I, Bin0, Bin, Size0, BSz0, BSz, Min, W) + of + {'EXIT', _R} -> + error({error, {bad_object, FileName}}, W); + Reply -> + {Reply, NLSz} + end; + eof when byte_size(Bin0) =:= 0 -> + eof; + eof -> + error({error, {premature_eof, FileName}}, W) + end. + +file_loop(L, I, _B1, B2, Sz, 0, _B2Sz, _Min, W) -> + file_loop(L, I, B2, Sz, W); +file_loop(L, I, B1, B2, Sz, _B1Sz, B2Sz, Min, W) when B2Sz > Min -> + {B3, B4} = split_binary(B2, Min), + {I1, L1, <<>>, Sz1} = file_loop(L, I, list_to_binary([B1, B3]), Sz, W), + file_loop(L1, I1, B4, Sz1, W); +file_loop(L, I, B1, B2, Sz, _B1Sz, _B2Sz, _Min, W) -> + file_loop(L, I, list_to_binary([B1, B2]), Sz, W). + +file_loop(L, I, B, Sz, W) -> + #w{keypos = Kp, format = Format, hdlen = HdLen} = W, + file_loop1(L, I, B, Sz, Kp, Format, HdLen). + +file_loop1(L, I, HB, 0, Kp, F, HdLen) -> + <> = HB, + file_loop2(L, I, B, Size, <>, Kp, F, HdLen); +file_loop1(L, I, B, Sz, Kp, F, HdLen) -> + file_loop2(L, I, B, Sz, <>, Kp, F, HdLen). + +file_loop2(L, _I, B, Sz, SzB, 0, binary, HdLen) -> + {NL, NB, NSz, NSzB} = file_binloop(L, Sz, SzB, B, HdLen), + if + byte_size(NB) =:= NSz -> + <> = NB, + {0, [?OBJ(Bin, [NSzB | Bin]) | NL], <<>>, 0}; + true -> + {0, NL, NB, NSz} + end; +file_loop2(L, _I, B, Sz, SzB, 0, Fun, HdLen) -> + file_binterm_loop(L, Sz, SzB, B, Fun, HdLen); +file_loop2(L, {merge, I}, B, Sz, SzB, Kp, Fun, HdLen) -> % when Kp =/= 0 + merge_loop(Kp, I, L, Sz, SzB, B, Fun, HdLen); +file_loop2(L, I, B, Sz, SzB, Kp, Fun, HdLen) when is_integer(I) -> + key_loop(Kp, I, L, Sz, SzB, B, Fun, HdLen). + +file_binloop(L, Size, SizeB, B, HL) -> + case B of + <> -> + <> = NSizeB, + file_binloop([?OBJ(Bin, [SizeB | Bin]) | L], NSize, NSizeB, R, HL); + _ -> + {L, B, Size, SizeB} + end. + +file_binterm_loop(L, Size, SizeB, B, Fun, HL) -> + case B of + <> -> + <> = NSizeB, + BT = [SizeB | BinTerm], + Term = Fun(BinTerm), + file_binterm_loop([?OBJ(Term, BT) | L], NSize, NSizeB, R, Fun, HL); + <> -> + Term = Fun(BinTerm), + NL = [?OBJ(Term, [SizeB | BinTerm]) | L], + {0, NL, <<>>, 0}; + _ -> + {0, L, B, Size} + end. + +key_loop(KeyPos, I, L, Size, SizeB, B, Fun, HL) -> + case B of + <> -> + <> = NSizeB, + BT = [SizeB | BinTerm], + UniqueKey = make_key(KeyPos, Fun(BinTerm)), + E = ?OBJ(UniqueKey, BT), + key_loop(KeyPos, I+1, [E | L], NSize, NSizeB, R, Fun, HL); + <> -> + UniqueKey = make_key(KeyPos, Fun(BinTerm)), + NL = [?OBJ(UniqueKey, [SizeB | BinTerm]) | L], + {I+1, NL, <<>>, 0}; + _ -> + {I, L, B, Size} + end. + +merge_loop(KeyPos, I, L, Size, SizeB, B, Fun, HL) -> + case B of + <> -> + <> = NSizeB, + BT = [SizeB | BinTerm], + UniqueKey = make_stable_key(KeyPos, I, Fun(BinTerm)), + E = ?OBJ(UniqueKey, BT), + merge_loop(KeyPos, I, [E | L], NSize, NSizeB, R, Fun, HL); + <> -> + UniqueKey = make_stable_key(KeyPos, I, Fun(BinTerm)), + NL = [?OBJ(UniqueKey, [SizeB | BinTerm]) | L], + {{merge, I}, NL, <<>>, 0}; + _ -> + {{merge, I}, L, B, Size} + end. + +fun_objs(Objs, L, LSz, NoBytes, I, W) -> + #w{keypos = Keypos, format = Format, hdlen = HL} = W, + case catch fun_loop(Objs, L, LSz, NoBytes, I, Keypos, Format, HL) of + {'EXIT', _R} -> + error({error, bad_object}, W); + Reply -> + Reply + end. + +fun_loop(Objs, L, LSz, RunSize, _I, 0, binary, HdLen) -> + fun_binloop(Objs, L, LSz, RunSize, HdLen); +fun_loop(Objs, L, LSz, RunSize, _I, 0, Fun, HdLen) -> + fun_loop(Objs, L, LSz, RunSize, Fun, HdLen); +fun_loop(Objs, L, LSz, RunSize, {merge, I}, Keypos, Fun, HdLen) -> + fun_mergeloop(Objs, L, LSz, RunSize, I, Keypos, Fun, HdLen); +fun_loop(Objs, L, LSz, RunSize, I, Keypos, Fun, HdLen) when is_integer(I) -> + fun_keyloop(Objs, L, LSz, RunSize, I, Keypos, Fun, HdLen). + +fun_binloop([B | Bs], L, LSz, RunSize, HL) when LSz < RunSize -> + Size = byte_size(B), + Obj = ?OBJ(B, [<> | B]), + fun_binloop(Bs, [Obj | L], LSz+Size, RunSize, HL); +fun_binloop(Bs, L, LSz, _RunSize, _HL) -> + {0, Bs, L, LSz}. + +fun_loop([B | Bs], L, LSz, RunSize, Fun, HL) when LSz < RunSize -> + Size = byte_size(B), + Obj = ?OBJ(Fun(B), [<> | B]), + fun_loop(Bs, [Obj | L], LSz+Size, RunSize, Fun, HL); +fun_loop(Bs, L, LSz, _RunSize, _Fun, _HL) -> + {0, Bs, L, LSz}. + +fun_keyloop([B | Bs], L, LSz, RunSize, I, Kp, Fun, HL) when LSz < RunSize -> + Size = byte_size(B), + UniqueKey = make_key(Kp, Fun(B)), + E = ?OBJ(UniqueKey, [<> | B]), + fun_keyloop(Bs, [E | L], LSz+Size, RunSize, I+1, Kp, Fun, HL); +fun_keyloop(Bs, L, LSz, _RunSize, I, _Kp, _Fun, _HL) -> + {I, Bs, L, LSz}. + +fun_mergeloop([B | Bs], L, LSz, RunSize, I, Kp, Fun, HL) when LSz < RunSize -> + Size = byte_size(B), + UniqueKey = make_stable_key(Kp, I, Fun(B)), + E = ?OBJ(UniqueKey, [<> | B]), + fun_mergeloop(Bs, [E | L], LSz+Size, RunSize, I, Kp, Fun, HL); +fun_mergeloop(Bs, L, LSz, _RunSize, I, _Kp, _Fun, _HL) -> + {{merge, I}, Bs, L, LSz}. % any I would do + +%% Inlined. +make_key(Kp, T) when is_integer(Kp) -> + element(Kp, T); +make_key([Kp1, Kp2], T) -> + [element(Kp1, T), element(Kp2, T)]; +make_key([Kp1, Kp2 | Kps], T) -> + [element(Kp1, T), element(Kp2, T) | make_key2(Kps, T)]. + +%% Inlined. +%% A sequence number (I) is used for making the internal sort stable. +%% I is ordering number of the file from which T was read. +make_stable_key(Kp, I, T) when is_integer(Kp) -> + ?SK(element(Kp, T), I); +make_stable_key([Kp1, Kp2], I, T) -> + ?SK([element(Kp1, T) | element(Kp2, T)], I); +make_stable_key([Kp1, Kp2 | Kps], I, T) -> + ?SK([element(Kp1, T), element(Kp2, T) | make_key2(Kps, T)], I). + +make_key2([Kp], T) -> + [element(Kp, T)]; +make_key2([Kp | Kps], T) -> + [element(Kp, T) | make_key2(Kps, T)]. + +max(A, B) when A < B -> B; +max(A, _) -> A. + +infun(W) -> + W1 = W#w{in = undefined}, + try (W#w.in)(read) of + end_of_input -> + {end_of_input, W1}; + {end_of_input, Value} -> + {end_of_input, W1#w{inout_value = {value, Value}}}; + {Objs, NFun} when is_function(NFun), + is_function(NFun, 1), + is_list(Objs) -> + {cont, W#w{in = NFun}, Objs}; + Error -> + error(Error, W1) + catch Class:Reason -> + cleanup(W1), + erlang:raise(Class, Reason, erlang:get_stacktrace()) + end. + +outfun(A, W) when W#w.inout_value =/= no_value -> + W1 = W#w{inout_value = no_value}, + W2 = if + W1#w.fun_out -> + outfun(W#w.inout_value, W1); + true -> W1 + end, + outfun(A, W2); +outfun(A, W) -> + W1 = W#w{out = undefined}, + try (W#w.out)(A) of + Reply when A =:= close -> + Reply; + NF when is_function(NF), is_function(NF, 1) -> + W#w{out = NF}; + Error -> + error(Error, W1) + catch Class:Reason -> + cleanup(W1), + erlang:raise(Class, Reason, erlang:get_stacktrace()) + end. + +is_keypos(Keypos) when is_integer(Keypos), Keypos > 0 -> + true; +is_keypos([]) -> + {badarg, []}; +is_keypos(L) -> + is_keyposs(L). + +is_keyposs([Kp | Kps]) when is_integer(Kp), Kp > 0 -> + is_keyposs(Kps); +is_keyposs([]) -> + true; +is_keyposs([Bad | _]) -> + {badarg, Bad}; +is_keyposs(Bad) -> + {badarg, Bad}. + +is_input(Fun) when is_function(Fun), is_function(Fun, 1) -> + {true, Fun}; +is_input(Files) -> + is_files(Files). + +is_files(Fs) -> + is_files(Fs, []). + +is_files([F | Fs], L) -> + case read_file_info(F) of + {ok, File, _FI} -> + is_files(Fs, [File | L]); + Error -> + Error + end; +is_files([], L) -> + {true, lists:reverse(L)}; +is_files(Bad, _L) -> + {badarg, Bad}. + +maybe_output(Fun) when is_function(Fun), is_function(Fun, 1) -> + {true, Fun}; +maybe_output(File) -> + case read_file_info(File) of + {badarg, _File} = Badarg -> + Badarg; + {ok, FileName, _FileInfo} -> + {true, FileName}; + {error, {file_error, FileName, _Reason}} -> + {true, FileName} + end. + +read_file_info(File) -> + %% Absolute names in case some process should call file:set_cwd/1. + case catch filename:absname(File) of + {'EXIT', _} -> + {badarg, File}; + FileName -> + case file:read_file_info(FileName) of + {ok, FileInfo} -> + {ok, FileName, FileInfo}; + {error, einval} -> + {badarg, File}; + {error, Reason} -> + {error, {file_error, FileName, Reason}} + end + end. + +%% No attempt is made to avoid overwriting existing files. +next_temp(W) -> + Seq = W#w.seq, + NW = W#w{seq = Seq + 1}, + Temp = lists:concat([W#w.prefix, Seq]), + {NW, Temp}. + +%% Would use the temporary directory (TMP|TEMP|TMPDIR), were it +%% readily accessible. +tmp_prefix(F, TmpDirOpt) when is_function(F); F =:= undefined -> + {ok, CurDir} = file:get_cwd(), + tmp_prefix1(CurDir, TmpDirOpt); +tmp_prefix(OutFile, TmpDirOpt) -> + Dir = filename:dirname(OutFile), + tmp_prefix1(Dir, TmpDirOpt). + +tmp_prefix1(Dir, TmpDirOpt) -> + U = "_", + Node = node(), + Pid = os:getpid(), + {MSecs,Secs,MySecs} = now(), + F = lists:concat(["fs_",Node,U,Pid,U,MSecs,U,Secs,U,MySecs,"."]), + TmpDir = case TmpDirOpt of + default -> + Dir; + {dir, TDir} -> + TDir + end, + filename:join(filename:absname(TmpDir), F). + +%% -> {Fd, NewW} | throw(Error) +open_file(FileName, W) -> + case file:open(FileName, W#w.z ++ [raw, binary, write]) of + {ok, Fd} -> + {Fd, W#w{temp = [{Fd,FileName} | W#w.temp]}}; + Error -> + file_error(FileName, Error, W) + end. + +read(Fd, FileName, N, W) -> + case file:read(Fd, N) of + {ok, Bin} -> + {ok, Bin}; + eof -> + eof; + {error, enomem} -> + %% Bad N + error({error, {bad_object, FileName}}, W); + {error, einval} -> + %% Bad N + error({error, {bad_object, FileName}}, W); + Error -> + file_error(FileName, Error, W) + end. + +write(Fd, FileName, B, W) -> + case file:write(Fd, B) of + ok -> + ok; + Error -> + file_error(FileName, Error, W) + end. + +-spec file_error(_, {'error',atom()}, #w{}) -> no_return(). + +file_error(File, {error, Reason}, W) -> + error({error, {file_error, File, Reason}}, W). + +error(Error, W) -> + cleanup(W), + throw({W#w.ref, Error}). + +cleanup(W) -> + close_out(W), + W1 = close_input(W), + F = fun(IFun) when is_function(IFun) -> + IFun(close); + ({Fd,FileName}) -> + file:close(Fd), + file:delete(FileName); + (FileName) -> + file:delete(FileName) + end, + lists:foreach(F, W1#w.temp). + +close_input(W) when is_function(W#w.in) -> + catch (W#w.in)(close), + W#w{in = undefined}; +close_input(#w{in = undefined}=W) -> + W. + +close_out(W) when is_function(W#w.out) -> + catch (W#w.out)(close); +close_out(_) -> + ok. + +close_file(Fd, W) -> + {value, {Fd, FileName}} = lists:keysearch(Fd, 1, W#w.temp), + ?DEBUG("closing ~p~n", [FileName]), + file:close(Fd), + W#w{temp = [FileName | lists:keydelete(Fd, 1, W#w.temp)]}. + +%%% +%%% Format 'term'. +%%% + +file_rterms(no_file, Files) -> + fun(close) -> + ok; + (read) when Files =:= [] -> + end_of_input; + (read) -> + [F | Fs] = Files, + case file:open(F, [read, compressed]) of + {ok, Fd} -> + file_rterms2(Fd, [], 0, F, Fs); + {error, Reason} -> + {error, {file_error, F, Reason}} + end + end; +file_rterms({Fd, FileName}, Files) -> + fun(close) -> + file:close(Fd); + (read) -> + file_rterms2(Fd, [], 0, FileName, Files) + end. + +file_rterms2(Fd, L, LSz, FileName, Files) when LSz < ?CHUNKSIZE -> + case io:read(Fd, '') of + {ok, Term} -> + B = term_to_binary(Term), + file_rterms2(Fd, [B | L], LSz + byte_size(B), FileName, Files); + eof -> + file:close(Fd), + {lists:reverse(L), file_rterms(no_file, Files)}; + _Error -> + file:close(Fd), + {error, {bad_term, FileName}} + end; +file_rterms2(Fd, L, _LSz, FileName, Files) -> + {lists:reverse(L), file_rterms({Fd, FileName}, Files)}. + +file_wterms(W, F, Args) -> + fun(close) when W =:= name -> + ok; + (close) -> + {fd, Fd} = W, + file:close(Fd); + (L) when W =:= name -> + case file:open(F, Args) of + {ok, Fd} -> + write_terms(Fd, F, L, Args); + {error, Reason} -> + {error, {file_error, F, Reason}} + end; + (L) -> + {fd, Fd} = W, + write_terms(Fd, F, L, Args) + end. + +write_terms(Fd, F, [B | Bs], Args) -> + case io:request(Fd, {format, "~p.~n", [binary_to_term(B)]}) of + ok -> + write_terms(Fd, F, Bs, Args); + {error, Reason} -> + file:close(Fd), + {error, {file_error, F, Reason}} + end; +write_terms(Fd, F, [], Args) -> + file_wterms({fd, Fd}, F, Args). + +fun_rterms(InFun) -> + fun(close) -> + InFun(close); + (read) -> + case InFun(read) of + {Ts, NInFun} when is_list(Ts), + is_function(NInFun), + is_function(NInFun, 1) -> + {to_bin(Ts, []), fun_rterms(NInFun)}; + Else -> + Else + end + end. + +fun_wterms(OutFun) -> + fun(close) -> + OutFun(close); + (L) -> + case OutFun(wterms_arg(L)) of + NOutFun when is_function(NOutFun), is_function(NOutFun, 1) -> + fun_wterms(NOutFun); + Else -> + Else + end + end. + +to_bin([E | Es], L) -> + to_bin(Es, [term_to_binary(E) | L]); +to_bin([], L) -> + lists:reverse(L). + +wterms_arg(L) when is_list(L) -> + to_term(L, []); +wterms_arg(Value) -> + Value. + +to_term([B | Bs], L) -> + to_term(Bs, [binary_to_term(B) | L]); +to_term([], L) -> + lists:reverse(L). diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl new file mode 100644 index 0000000000..d65588f0d1 --- /dev/null +++ b/lib/stdlib/src/filelib.erl @@ -0,0 +1,443 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% + +-module(filelib). + +%% File utilities. + +-export([wildcard/1, wildcard/2, is_dir/1, is_file/1, is_regular/1, + compile_wildcard/1]). +-export([fold_files/5, last_modified/1, file_size/1, ensure_dir/1]). + +-export([wildcard/3, is_dir/2, is_file/2, is_regular/2]). +-export([fold_files/6, last_modified/2, file_size/2]). + +-include_lib("kernel/include/file.hrl"). + +-define(HANDLE_ERROR(Expr), + try + Expr + catch + error:{badpattern,_}=UnUsUalVaRiAbLeNaMe -> + %% Get the stack backtrace correct. + erlang:error(UnUsUalVaRiAbLeNaMe) + end). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec wildcard(name()) -> [file:filename()]. +wildcard(Pattern) when is_list(Pattern) -> + ?HANDLE_ERROR(do_wildcard(Pattern, file)). + +-spec wildcard(name(), name() | atom()) -> [file:filename()]. +wildcard(Pattern, Cwd) when is_list(Pattern), is_list(Cwd) -> + ?HANDLE_ERROR(do_wildcard(Pattern, Cwd, file)); +wildcard(Pattern, Mod) when is_list(Pattern), is_atom(Mod) -> + ?HANDLE_ERROR(do_wildcard(Pattern, Mod)). + +-spec wildcard(name(), name(), atom()) -> [file:filename()]. +wildcard(Pattern, Cwd, Mod) + when is_list(Pattern), is_list(Cwd), is_atom(Mod) -> + ?HANDLE_ERROR(do_wildcard(Pattern, Cwd, Mod)). + +-spec is_dir(name()) -> boolean(). +is_dir(Dir) -> + do_is_dir(Dir, file). + +-spec is_dir(name(), atom()) -> boolean(). +is_dir(Dir, Mod) when is_atom(Mod) -> + do_is_dir(Dir, Mod). + +-spec is_file(name()) -> boolean(). +is_file(File) -> + do_is_file(File, file). + +-spec is_file(name(), atom()) -> boolean(). +is_file(File, Mod) when is_atom(Mod) -> + do_is_file(File, Mod). + +-spec is_regular(name()) -> boolean(). +is_regular(File) -> + do_is_regular(File, file). + +-spec is_regular(name(), atom()) -> boolean(). +is_regular(File, Mod) when is_atom(Mod) -> + do_is_regular(File, Mod). + +-spec fold_files(name(), string(), boolean(), fun((_,_) -> _), _) -> _. +fold_files(Dir, RegExp, Recursive, Fun, Acc) -> + do_fold_files(Dir, RegExp, Recursive, Fun, Acc, file). + +-spec fold_files(name(), string(), boolean(), fun((_,_) -> _), _, atom()) -> _. +fold_files(Dir, RegExp, Recursive, Fun, Acc, Mod) when is_atom(Mod) -> + do_fold_files(Dir, RegExp, Recursive, Fun, Acc, Mod). + +-spec last_modified(name()) -> date_time() | 0. +last_modified(File) -> + do_last_modified(File, file). + +-spec last_modified(name(), atom()) -> date_time() | 0. +last_modified(File, Mod) when is_atom(Mod) -> + do_last_modified(File, Mod). + +-spec file_size(name()) -> non_neg_integer(). +file_size(File) -> + do_file_size(File, file). + +-spec file_size(name(), atom()) -> non_neg_integer(). +file_size(File, Mod) when is_atom(Mod) -> + do_file_size(File, Mod). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +do_wildcard(Pattern, Mod) when is_list(Pattern) -> + do_wildcard_comp(do_compile_wildcard(Pattern), Mod). + +do_wildcard_comp({compiled_wildcard,{exists,File}}, Mod) -> + case eval_read_file_info(File, Mod) of + {ok,_} -> [File]; + _ -> [] + end; +do_wildcard_comp({compiled_wildcard,[Base|Rest]}, Mod) -> + do_wildcard_1([Base], Rest, Mod). + +do_wildcard(Pattern, Cwd, Mod) when is_list(Pattern), is_list(Cwd) -> + do_wildcard_comp(do_compile_wildcard(Pattern), Cwd, Mod). + +do_wildcard_comp({compiled_wildcard,{exists,File}}, Cwd, Mod) -> + case eval_read_file_info(filename:absname(File, Cwd), Mod) of + {ok,_} -> [File]; + _ -> [] + end; +do_wildcard_comp({compiled_wildcard,[current|Rest]}, Cwd0, Mod) -> + Cwd = filename:join([Cwd0]), %Slash away redundant slashes. + PrefixLen = length(Cwd)+1, + [lists:nthtail(PrefixLen, N) || N <- do_wildcard_1([Cwd], Rest, Mod)]; +do_wildcard_comp({compiled_wildcard,[Base|Rest]}, _Cwd, Mod) -> + do_wildcard_1([Base], Rest, Mod). + +do_is_dir(Dir, Mod) -> + case eval_read_file_info(Dir, Mod) of + {ok, #file_info{type=directory}} -> + true; + _ -> + false + end. + +do_is_file(File, Mod) -> + case eval_read_file_info(File, Mod) of + {ok, #file_info{type=regular}} -> + true; + {ok, #file_info{type=directory}} -> + true; + _ -> + false + end. + +do_is_regular(File, Mod) -> + case eval_read_file_info(File, Mod) of + {ok, #file_info{type=regular}} -> + true; + _ -> + false + end. + +%% fold_files(Dir, RegExp, Recursive, Fun, AccIn). + +%% folds the function Fun(F, Acc) -> Acc1 over +%% all files in that match the regular expression +%% If is true all sub-directories to are processed + +do_fold_files(Dir, RegExp, Recursive, Fun, Acc, Mod) -> + {ok, Re1} = re:compile(RegExp), + do_fold_files1(Dir, Re1, Recursive, Fun, Acc, Mod). + +do_fold_files1(Dir, RegExp, Recursive, Fun, Acc, Mod) -> + case eval_list_dir(Dir, Mod) of + {ok, Files} -> do_fold_files2(Files, Dir, RegExp, Recursive, Fun, Acc, Mod); + {error, _} -> Acc + end. + +do_fold_files2([], _Dir, _RegExp, _Recursive, _Fun, Acc, _Mod) -> + Acc; +do_fold_files2([File|T], Dir, RegExp, Recursive, Fun, Acc0, Mod) -> + FullName = filename:join(Dir, File), + case do_is_regular(FullName, Mod) of + true -> + case re:run(File, RegExp, [{capture,none}]) of + match -> + Acc = Fun(FullName, Acc0), + do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc, Mod); + nomatch -> + do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc0, Mod) + end; + false -> + case Recursive andalso do_is_dir(FullName, Mod) of + true -> + Acc1 = do_fold_files1(FullName, RegExp, Recursive, + Fun, Acc0, Mod), + do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc1, Mod); + false -> + do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc0, Mod) + end + end. + +do_last_modified(File, Mod) -> + case eval_read_file_info(File, Mod) of + {ok, Info} -> + Info#file_info.mtime; + _ -> + 0 + end. + +do_file_size(File, Mod) -> + case eval_read_file_info(File, Mod) of + {ok, Info} -> + Info#file_info.size; + _ -> + 0 + end. + +%%---------------------------------------------------------------------- +%% +type ensure_dir(X) -> ok | {error, Reason}. +%% +type X = filename() | dirname() +%% ensures that the directory name required to create D exists + +-spec ensure_dir(name()) -> 'ok' | {'error', posix()}. +ensure_dir("/") -> + ok; +ensure_dir(F) -> + Dir = filename:dirname(F), + case do_is_dir(Dir, file) of + true -> + ok; + false -> + ensure_dir(Dir), + file:make_dir(Dir) + end. + + +%%% +%%% Pattern matching using a compiled wildcard. +%%% + +do_wildcard_1(Files, Pattern, Mod) -> + do_wildcard_2(Files, Pattern, [], Mod). + +do_wildcard_2([File|Rest], Pattern, Result, Mod) -> + do_wildcard_2(Rest, Pattern, do_wildcard_3(File, Pattern, Result, Mod), Mod); +do_wildcard_2([], _, Result, _Mod) -> + Result. + +do_wildcard_3(Base, [Pattern|Rest], Result, Mod) -> + case do_list_dir(Base, Mod) of + {ok, Files0} -> + Files = lists:sort(Files0), + Matches = wildcard_4(Pattern, Files, Base, []), + do_wildcard_2(Matches, Rest, Result, Mod); + _ -> + Result + end; +do_wildcard_3(Base, [], Result, _Mod) -> + [Base|Result]. + +wildcard_4(Pattern, [File|Rest], Base, Result) -> + case wildcard_5(Pattern, File) of + true -> + wildcard_4(Pattern, Rest, Base, [join(Base, File)|Result]); + false -> + wildcard_4(Pattern, Rest, Base, Result) + end; +wildcard_4(_Patt, [], _Base, Result) -> + Result. + +wildcard_5([question|Rest1], [_|Rest2]) -> + wildcard_5(Rest1, Rest2); +wildcard_5([accept], _) -> + true; +wildcard_5([star|Rest], File) -> + do_star(Rest, File); +wildcard_5([{one_of, Ordset}|Rest], [C|File]) -> + case ordsets:is_element(C, Ordset) of + true -> wildcard_5(Rest, File); + false -> false + end; +wildcard_5([{alt, Alts}], File) -> + do_alt(Alts, File); +wildcard_5([C|Rest1], [C|Rest2]) when is_integer(C) -> + wildcard_5(Rest1, Rest2); +wildcard_5([X|_], [Y|_]) when is_integer(X), is_integer(Y) -> + false; +wildcard_5([], []) -> + true; +wildcard_5([], [_|_]) -> + false; +wildcard_5([_|_], []) -> + false. + +do_star(Pattern, [X|Rest]) -> + case wildcard_5(Pattern, [X|Rest]) of + true -> true; + false -> do_star(Pattern, Rest) + end; +do_star(Pattern, []) -> + wildcard_5(Pattern, []). + +do_alt([Alt|Rest], File) -> + case wildcard_5(Alt, File) of + true -> true; + false -> do_alt(Rest, File) + end; +do_alt([], _File) -> + false. + +do_list_dir(current, Mod) -> eval_list_dir(".", Mod); +do_list_dir(Dir, Mod) -> eval_list_dir(Dir, Mod). + +join(current, File) -> File; +join(Base, File) -> filename:join(Base, File). + + +%%% Compiling a wildcard. + +compile_wildcard(Pattern) -> + ?HANDLE_ERROR(do_compile_wildcard(Pattern)). + +do_compile_wildcard(Pattern) -> + {compiled_wildcard,compile_wildcard_1(Pattern)}. + +compile_wildcard_1(Pattern) -> + [Root|Rest] = filename:split(Pattern), + case filename:pathtype(Root) of + relative -> + compile_wildcard_2([Root|Rest], current); + _ -> + compile_wildcard_2(Rest, [Root]) + end. + +compile_wildcard_2([Part|Rest], Root) -> + case compile_part(Part) of + Part -> + compile_wildcard_2(Rest, join(Root, Part)); + Pattern -> + compile_wildcard_3(Rest, [Pattern,Root]) + end; +compile_wildcard_2([], Root) -> {exists,Root}. + +compile_wildcard_3([Part|Rest], Result) -> + compile_wildcard_3(Rest, [compile_part(Part)|Result]); +compile_wildcard_3([], Result) -> + lists:reverse(Result). + +compile_part(Part) -> + compile_part(Part, false, []). + +compile_part_to_sep(Part) -> + compile_part(Part, true, []). + +compile_part([], true, _) -> + error(missing_delimiter); +compile_part([$,|Rest], true, Result) -> + {ok, $,, lists:reverse(Result), Rest}; +compile_part([$}|Rest], true, Result) -> + {ok, $}, lists:reverse(Result), Rest}; +compile_part([$?|Rest], Upto, Result) -> + compile_part(Rest, Upto, [question|Result]); +compile_part([$*], Upto, Result) -> + compile_part([], Upto, [accept|Result]); +compile_part([$*|Rest], Upto, Result) -> + compile_part(Rest, Upto, [star|Result]); +compile_part([$[|Rest], Upto, Result) -> + case compile_charset(Rest, ordsets:new()) of + {ok, Charset, Rest1} -> + compile_part(Rest1, Upto, [Charset|Result]); + error -> + compile_part(Rest, Upto, [$[|Result]) + end; +compile_part([${|Rest], Upto, Result) -> + case compile_alt(Rest) of + {ok, Alt} -> + lists:reverse(Result, [Alt]); + error -> + compile_part(Rest, Upto, [${|Result]) + end; +compile_part([X|Rest], Upto, Result) -> + compile_part(Rest, Upto, [X|Result]); +compile_part([], _Upto, Result) -> + lists:reverse(Result). + +compile_charset([$]|Rest], Ordset) -> + compile_charset1(Rest, ordsets:add_element($], Ordset)); +compile_charset([$-|Rest], Ordset) -> + compile_charset1(Rest, ordsets:add_element($-, Ordset)); +compile_charset([], _Ordset) -> + error; +compile_charset(List, Ordset) -> + compile_charset1(List, Ordset). + +compile_charset1([Lower, $-, Upper|Rest], Ordset) when Lower =< Upper -> + compile_charset1(Rest, compile_range(Lower, Upper, Ordset)); +compile_charset1([$]|Rest], Ordset) -> + {ok, {one_of, Ordset}, Rest}; +compile_charset1([X|Rest], Ordset) -> + compile_charset1(Rest, ordsets:add_element(X, Ordset)); +compile_charset1([], _Ordset) -> + error. + +compile_range(Lower, Current, Ordset) when Lower =< Current -> + compile_range(Lower, Current-1, ordsets:add_element(Current, Ordset)); +compile_range(_, _, Ordset) -> + Ordset. + +compile_alt(Pattern) -> + compile_alt(Pattern, []). + +compile_alt(Pattern, Result) -> + case compile_part_to_sep(Pattern) of + {ok, $,, AltPattern, Rest} -> + compile_alt(Rest, [AltPattern|Result]); + {ok, $}, AltPattern, Rest} -> + NewResult = [AltPattern|Result], + RestPattern = compile_part(Rest), + {ok, {alt, [Alt++RestPattern || Alt <- NewResult]}}; + Pattern -> + error + end. + +error(Reason) -> + erlang:error({badpattern,Reason}). + +eval_read_file_info(File, file) -> + file:read_file_info(File); +eval_read_file_info(File, erl_prim_loader) -> + case erl_prim_loader:read_file_info(File) of + error -> {error, erl_prim_loader}; + Res-> Res + end; +eval_read_file_info(File, Mod) -> + Mod:read_file_info(File). + +eval_list_dir(Dir, file) -> + file:list_dir(Dir); +eval_list_dir(Dir, erl_prim_loader) -> + case erl_prim_loader:list_dir(Dir) of + error -> {error, erl_prim_loader}; + Res-> Res + end; +eval_list_dir(Dir, Mod) -> + Mod:list_dir(Dir). diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl new file mode 100644 index 0000000000..cd26b2e219 --- /dev/null +++ b/lib/stdlib/src/filename.erl @@ -0,0 +1,787 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(filename). + +%% Purpose: Provides generic manipulation of filenames. +%% +%% Generally, these functions accept filenames in the native format +%% for the current operating system (Unix or Windows). +%% Deep characters lists (as returned by io_lib:format()) are accepted; +%% resulting strings will always be flat. +%% +%% Implementation note: We used to only flatten if the list turned out +%% to be deep. Now that atoms are allowed in deep lists, in most cases +%% we flatten the arguments immediately on function entry as that makes +%% it easier to ensure that the code works. + +-export([absname/1, absname/2, absname_join/2, + basename/1, basename/2, dirname/1, + extension/1, join/1, join/2, pathtype/1, + rootname/1, rootname/2, split/1, nativename/1]). +-export([find_src/1, find_src/2, flatten/1]). + +%% Undocumented and unsupported exports. +-export([append/2]). + +-include_lib("kernel/include/file.hrl"). + +%% Converts a relative filename to an absolute filename +%% or the filename itself if it already is an absolute filename +%% Note that no attempt is made to create the most beatiful +%% absolute name since this can give incorrect results on +%% file systems which allows links. +%% Examples: +%% Assume (for UNIX) current directory "/usr/local" +%% Assume (for WIN32) current directory "D:/usr/local" +%% +%% (for Unix) : absname("foo") -> "/usr/local/foo" +%% (for WIN32): absname("foo") -> "D:/usr/local/foo" +%% (for Unix) : absname("../x") -> "/usr/local/../x" +%% (for WIN32): absname("../x") -> "D:/usr/local/../x" +%% (for Unix) : absname("/") -> "/" +%% (for WIN32): absname("/") -> "D:/" + +-spec absname(name()) -> string(). +absname(Name) -> + {ok, Cwd} = file:get_cwd(), + absname(Name, Cwd). + +-spec absname(name(), string()) -> string(). +absname(Name, AbsBase) -> + case pathtype(Name) of + relative -> + absname_join(AbsBase, Name); + absolute -> + %% We must flatten the filename before passing it into join/1, + %% or we will get slashes inserted into the wrong places. + join([flatten(Name)]); + volumerelative -> + absname_vr(split(Name), split(AbsBase), AbsBase) + end. + +%% Handles volumerelative names (on Windows only). + +absname_vr(["/"|Rest1], [Volume|_], _AbsBase) -> + %% Absolute path on current drive. + join([Volume|Rest1]); +absname_vr([[X, $:]|Rest1], [[X|_]|_], AbsBase) -> + %% Relative to current directory on current drive. + absname(join(Rest1), AbsBase); +absname_vr([[X, $:]|Name], _, _AbsBase) -> + %% Relative to current directory on another drive. + Dcwd = + case file:get_cwd([X, $:]) of + {ok, Dir} -> Dir; + {error, _} -> [X, $:, $/] + end, + absname(join(Name), Dcwd). + +%% Joins a relative filename to an absolute base. For VxWorks the +%% resulting name is fixed to minimize the length by collapsing +%% ".." directories. +%% For other systems this is just a join/2, but assumes that +%% AbsBase must be absolute and Name must be relative. + +-spec absname_join(string(), name()) -> string(). +absname_join(AbsBase, Name) -> + case major_os_type() of + vxworks -> + absname_pretty(AbsBase, split(Name), lists:reverse(split(AbsBase))); + _Else -> + join(AbsBase, flatten(Name)) + end. + +%% Handles absolute filenames for VxWorks - these are 'pretty-printed', +%% since a C function call chdir("/erlang/lib/../bin") really sets +%% cwd to '/erlang/lib/../bin' which also works, but the long term +%% effect is potentially not so good ... +%% +%% absname_pretty("../bin", "/erlang/lib") -> "/erlang/bin" +%% absname_pretty("../../../..", "/erlang") -> "/erlang" + +absname_pretty(Abspath, Relpath, []) -> + %% AbsBase _must_ begin with a vxworks device name + {device, _Rest, Dev} = vxworks_first(Abspath), + absname_pretty(Abspath, Relpath, [lists:reverse(Dev)]); +absname_pretty(_Abspath, [], AbsBase) -> + join(lists:reverse(AbsBase)); +absname_pretty(Abspath, [[$.]|Rest], AbsBase) -> + absname_pretty(Abspath, Rest, AbsBase); +absname_pretty(Abspath, [[$.,$.]|Rest], [_|AbsRest]) -> + absname_pretty(Abspath, Rest, AbsRest); +absname_pretty(Abspath, [First|Rest], AbsBase) -> + absname_pretty(Abspath, Rest, [First|AbsBase]). + +%% Returns the part of the filename after the last directory separator, +%% or the filename itself if it has no separators. +%% +%% Examples: basename("foo") -> "foo" +%% basename("/usr/foo") -> "foo" +%% basename("/usr/foo/") -> "foo" (trailing slashes ignored) +%% basename("/") -> [] + +-spec basename(name()) -> string(). +basename(Name0) -> + Name = flatten(Name0), + {DirSep2, DrvSep} = separators(), + basename1(skip_prefix(Name, DrvSep), [], DirSep2). + +basename1([$/|[]], Tail, DirSep2) -> + basename1([], Tail, DirSep2); +basename1([$/|Rest], _Tail, DirSep2) -> + basename1(Rest, [], DirSep2); +basename1([[_|_]=List|Rest], Tail, DirSep2) -> + basename1(List++Rest, Tail, DirSep2); +basename1([DirSep2|Rest], Tail, DirSep2) when is_integer(DirSep2) -> + basename1([$/|Rest], Tail, DirSep2); +basename1([Char|Rest], Tail, DirSep2) when is_integer(Char) -> + basename1(Rest, [Char|Tail], DirSep2); +basename1([], Tail, _DirSep2) -> + lists:reverse(Tail). + +skip_prefix(Name, false) -> % No prefix for unix, but for VxWorks. + case major_os_type() of + vxworks -> + case vxworks_first(Name) of + {device, Rest, _Device} -> + Rest; + {not_device, _Rest, _First} -> + Name + end; + _Else -> + Name + end; +skip_prefix(Name, DrvSep) -> + skip_prefix1(Name, DrvSep). + +skip_prefix1([L, DrvSep|Name], DrvSep) when is_integer(L) -> + Name; +skip_prefix1([L], _) when is_integer(L) -> + [L]; +skip_prefix1(Name, _) -> + Name. + +%% Returns the last component of the filename, with the given +%% extension stripped. Use this function if you want +%% to remove an extension that might or might not be there. +%% Use rootname(basename(File)) if you want to remove an extension +%% that you know exists, but you are not sure which one it is. +%% +%% Example: basename("~/src/kalle.erl", ".erl") -> "kalle" +%% basename("~/src/kalle.jam", ".erl") -> "kalle.jam" +%% basename("~/src/kalle.old.erl", ".erl") -> "kalle.old" +%% +%% rootname(basename("xxx.jam")) -> "xxx" +%% rootname(basename("xxx.erl")) -> "xxx" + +-spec basename(name(), name()) -> string(). +basename(Name0, Ext0) -> + Name = flatten(Name0), + Ext = flatten(Ext0), + {DirSep2,DrvSep} = separators(), + NoPrefix = skip_prefix(Name, DrvSep), + basename(NoPrefix, Ext, [], DirSep2). + +basename(Ext, Ext, Tail, _DrvSep2) -> + lists:reverse(Tail); +basename([$/|[]], Ext, Tail, DrvSep2) -> + basename([], Ext, Tail, DrvSep2); +basename([$/|Rest], Ext, _Tail, DrvSep2) -> + basename(Rest, Ext, [], DrvSep2); +basename([$\\|Rest], Ext, Tail, DirSep2) when is_integer(DirSep2) -> + basename([$/|Rest], Ext, Tail, DirSep2); +basename([Char|Rest], Ext, Tail, DrvSep2) when is_integer(Char) -> + basename(Rest, Ext, [Char|Tail], DrvSep2); +basename([], _Ext, Tail, _DrvSep2) -> + lists:reverse(Tail). + +%% Returns the directory part of a pathname. +%% +%% Example: dirname("/usr/src/kalle.erl") -> "/usr/src", +%% dirname("kalle.erl") -> "." + +-spec dirname(name()) -> string(). +dirname(Name0) -> + Name = flatten(Name0), + case os:type() of + vxworks -> + {Devicep, Restname, FirstComp} = vxworks_first(Name), + case Devicep of + device -> + dirname(Restname, FirstComp, [], separators()); + _ -> + dirname(Name, [], [], separators()) + end; + _ -> + dirname(Name, [], [], separators()) + end. + +dirname([[_|_]=List|Rest], Dir, File, Seps) -> + dirname(List++Rest, Dir, File, Seps); +dirname([$/|Rest], Dir, File, Seps) -> + dirname(Rest, File++Dir, [$/], Seps); +dirname([DirSep|Rest], Dir, File, {DirSep,_}=Seps) when is_integer(DirSep) -> + dirname(Rest, File++Dir, [$/], Seps); +dirname([Dl,DrvSep|Rest], [], [], {_,DrvSep}=Seps) + when is_integer(DrvSep), ((($a =< Dl) and (Dl =< $z)) or + (($A =< Dl) and (Dl =< $Z))) -> + dirname(Rest, [DrvSep,Dl], [], Seps); +dirname([Char|Rest], Dir, File, Seps) when is_integer(Char) -> + dirname(Rest, Dir, [Char|File], Seps); +dirname([], [], File, _Seps) -> + case lists:reverse(File) of + [$/|_] -> [$/]; + _ -> "." + end; +dirname([], [$/|Rest], File, Seps) -> + dirname([], Rest, File, Seps); +dirname([], [DrvSep,Dl], File, {_,DrvSep}) -> + case lists:reverse(File) of + [$/|_] -> [Dl,DrvSep,$/]; + _ -> [Dl,DrvSep] + end; +dirname([], Dir, _, _) -> + lists:reverse(Dir). + +%% Given a filename string, returns the file extension, +%% including the period. Returns an empty list if there +%% is no extension. +%% +%% Example: extension("foo.erl") -> ".erl" +%% extension("jam.src/kalle") -> "" +%% +%% On Windows: fn:dirname("\\usr\\src/kalle.erl") -> "/usr/src" + +-spec extension(name()) -> string(). +extension(Name0) -> + Name = flatten(Name0), + extension(Name, [], major_os_type()). + +extension([$.|Rest], _Result, OsType) -> + extension(Rest, [$.], OsType); +extension([Char|Rest], [], OsType) when is_integer(Char) -> + extension(Rest, [], OsType); +extension([$/|Rest], _Result, OsType) -> + extension(Rest, [], OsType); +extension([$\\|Rest], _Result, win32) -> + extension(Rest, [], win32); +extension([$\\|Rest], _Result, vxworks) -> + extension(Rest, [], vxworks); +extension([Char|Rest], Result, OsType) when is_integer(Char) -> + extension(Rest, [Char|Result], OsType); +extension([], Result, _OsType) -> + lists:reverse(Result). + +%% Joins a list of filenames with directory separators. + +-spec join([string()]) -> string(). +join([Name1, Name2|Rest]) -> + join([join(Name1, Name2)|Rest]); +join([Name]) when is_list(Name) -> + join1(Name, [], [], major_os_type()); +join([Name]) when is_atom(Name) -> + join([atom_to_list(Name)]). + +%% Joins two filenames with directory separators. + +-spec join(string(), string()) -> string(). +join(Name1, Name2) when is_list(Name1), is_list(Name2) -> + OsType = major_os_type(), + case pathtype(Name2) of + relative -> join1(Name1, Name2, [], OsType); + _Other -> join1(Name2, [], [], OsType) + end; +join(Name1, Name2) when is_atom(Name1) -> + join(atom_to_list(Name1), Name2); +join(Name1, Name2) when is_atom(Name2) -> + join(Name1, atom_to_list(Name2)). + +%% Internal function to join an absolute name and a relative name. +%% It is the responsibility of the caller to ensure that RelativeName +%% is relative. + +join1([UcLetter, $:|Rest], RelativeName, [], win32) +when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> + join1(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); +join1([$\\|Rest], RelativeName, Result, win32) -> + join1([$/|Rest], RelativeName, Result, win32); +join1([$\\|Rest], RelativeName, Result, vxworks) -> + join1([$/|Rest], RelativeName, Result, vxworks); +join1([$/|Rest], RelativeName, [$., $/|Result], OsType) -> + join1(Rest, RelativeName, [$/|Result], OsType); +join1([$/|Rest], RelativeName, [$/|Result], OsType) -> + join1(Rest, RelativeName, [$/|Result], OsType); +join1([], [], Result, OsType) -> + maybe_remove_dirsep(Result, OsType); +join1([], RelativeName, [$:|Rest], win32) -> + join1(RelativeName, [], [$:|Rest], win32); +join1([], RelativeName, [$/|Result], OsType) -> + join1(RelativeName, [], [$/|Result], OsType); +join1([], RelativeName, Result, OsType) -> + join1(RelativeName, [], [$/|Result], OsType); +join1([[_|_]=List|Rest], RelativeName, Result, OsType) -> + join1(List++Rest, RelativeName, Result, OsType); +join1([[]|Rest], RelativeName, Result, OsType) -> + join1(Rest, RelativeName, Result, OsType); +join1([Char|Rest], RelativeName, Result, OsType) when is_integer(Char) -> + join1(Rest, RelativeName, [Char|Result], OsType); +join1([Atom|Rest], RelativeName, Result, OsType) when is_atom(Atom) -> + join1(atom_to_list(Atom)++Rest, RelativeName, Result, OsType). + +maybe_remove_dirsep([$/, $:, Letter], win32) -> + [Letter, $:, $/]; +maybe_remove_dirsep([$/], _) -> + [$/]; +maybe_remove_dirsep([$/|Name], _) -> + lists:reverse(Name); +maybe_remove_dirsep(Name, _) -> + lists:reverse(Name). + +%% Appends a directory separator and a pathname component to +%% a given base directory, which is is assumed to be normalised +%% by a previous call to join/{1,2}. + +-spec append(string(), name()) -> string(). +append(Dir, Name) -> + Dir ++ [$/|Name]. + +%% Returns one of absolute, relative or volumerelative. +%% +%% absolute The pathname refers to a specific file on a specific +%% volume. Example: /usr/local/bin/ (on Unix), +%% h:/port_test (on Windows). +%% relative The pathname is relative to the current working directory +%% on the current volume. Example: foo/bar, ../src +%% volumerelative The pathname is relative to the current working directory +%% on the specified volume, or is a specific file on the +%% current working volume. (Windows only) +%% Example: a:bar.erl, /temp/foo.erl + +-spec pathtype(name()) -> 'absolute' | 'relative' | 'volumerelative'. +pathtype(Atom) when is_atom(Atom) -> + pathtype(atom_to_list(Atom)); +pathtype(Name) when is_list(Name) -> + case os:type() of + {unix, _} -> unix_pathtype(Name); + {win32, _} -> win32_pathtype(Name); + vxworks -> case vxworks_first(Name) of + {device, _Rest, _Dev} -> + absolute; + _ -> + relative + end; + {ose,_} -> unix_pathtype(Name) + end. + +unix_pathtype([$/|_]) -> + absolute; +unix_pathtype([List|Rest]) when is_list(List) -> + unix_pathtype(List++Rest); +unix_pathtype([Atom|Rest]) when is_atom(Atom) -> + unix_pathtype(atom_to_list(Atom)++Rest); +unix_pathtype(_) -> + relative. + +win32_pathtype([List|Rest]) when is_list(List) -> + win32_pathtype(List++Rest); +win32_pathtype([Atom|Rest]) when is_atom(Atom) -> + win32_pathtype(atom_to_list(Atom)++Rest); +win32_pathtype([Char, List|Rest]) when is_list(List) -> + win32_pathtype([Char|List++Rest]); +win32_pathtype([$/, $/|_]) -> absolute; +win32_pathtype([$\\, $/|_]) -> absolute; +win32_pathtype([$/, $\\|_]) -> absolute; +win32_pathtype([$\\, $\\|_]) -> absolute; +win32_pathtype([$/|_]) -> volumerelative; +win32_pathtype([$\\|_]) -> volumerelative; +win32_pathtype([C1, C2, List|Rest]) when is_list(List) -> + pathtype([C1, C2|List++Rest]); +win32_pathtype([_Letter, $:, $/|_]) -> absolute; +win32_pathtype([_Letter, $:, $\\|_]) -> absolute; +win32_pathtype([_Letter, $:|_]) -> volumerelative; +win32_pathtype(_) -> relative. + +%% Returns all characters in the filename, except the extension. +%% +%% Examples: rootname("/jam.src/kalle") -> "/jam.src/kalle" +%% rootname("/jam.src/foo.erl") -> "/jam.src/foo" + +-spec rootname(name()) -> string(). +rootname(Name0) -> + Name = flatten(Name0), + rootname(Name, [], [], major_os_type()). + +rootname([$/|Rest], Root, Ext, OsType) -> + rootname(Rest, [$/]++Ext++Root, [], OsType); +rootname([$\\|Rest], Root, Ext, win32) -> + rootname(Rest, [$/]++Ext++Root, [], win32); +rootname([$\\|Rest], Root, Ext, vxworks) -> + rootname(Rest, [$/]++Ext++Root, [], vxworks); +rootname([$.|Rest], Root, [], OsType) -> + rootname(Rest, Root, ".", OsType); +rootname([$.|Rest], Root, Ext, OsType) -> + rootname(Rest, Ext++Root, ".", OsType); +rootname([Char|Rest], Root, [], OsType) when is_integer(Char) -> + rootname(Rest, [Char|Root], [], OsType); +rootname([Char|Rest], Root, Ext, OsType) when is_integer(Char) -> + rootname(Rest, Root, [Char|Ext], OsType); +rootname([], Root, _Ext, _OsType) -> + lists:reverse(Root). + +%% Returns all characters in the filename, except the given extension. +%% If the filename has another extension, the complete filename is +%% returned. +%% +%% Examples: rootname("/jam.src/kalle.jam", ".erl") -> "/jam.src/kalle.jam" +%% rootname("/jam.src/foo.erl", ".erl") -> "/jam.src/foo" + +-spec rootname(name(), name()) -> string(). +rootname(Name0, Ext0) -> + Name = flatten(Name0), + Ext = flatten(Ext0), + rootname2(Name, Ext, []). + +rootname2(Ext, Ext, Result) -> + lists:reverse(Result); +rootname2([], _Ext, Result) -> + lists:reverse(Result); +rootname2([Char|Rest], Ext, Result) when is_integer(Char) -> + rootname2(Rest, Ext, [Char|Result]). + +%% Returns a list whose elements are the path components in the filename. +%% +%% Examples: +%% split("/usr/local/bin") -> ["/", "usr", "local", "bin"] +%% split("foo/bar") -> ["foo", "bar"] +%% split("a:\\msdev\\include") -> ["a:/", "msdev", "include"] + +-spec split(name()) -> [string()]. +split(Name0) -> + Name = flatten(Name0), + case os:type() of + {unix, _} -> unix_split(Name); + {win32, _} -> win32_split(Name); + vxworks -> vxworks_split(Name); + {ose,_} -> unix_split(Name) + end. + +%% If a VxWorks filename starts with '[/\].*[^/\]' '[/\].*:' or '.*:' +%% that part of the filename is considered a device. +%% The rest of the name is interpreted exactly as for win32. + +%% XXX - dirty solution to make filename:split([]) return the same thing on +%% VxWorks as on unix and win32. +vxworks_split([]) -> + []; +vxworks_split(L) -> + {_Devicep, Rest, FirstComp} = vxworks_first(L), + split(Rest, [], [lists:reverse(FirstComp)], win32). + +unix_split(Name) -> + split(Name, [], unix). + +win32_split([$\\|Rest]) -> + win32_split([$/|Rest]); +win32_split([X, $\\|Rest]) when is_integer(X) -> + win32_split([X, $/|Rest]); +win32_split([X, Y, $\\|Rest]) when is_integer(X), is_integer(Y) -> + win32_split([X, Y, $/|Rest]); +win32_split([$/, $/|Rest]) -> + split(Rest, [], [[$/, $/]]); +win32_split([UcLetter, $:|Rest]) when UcLetter >= $A, UcLetter =< $Z -> + win32_split([UcLetter+$a-$A, $:|Rest]); +win32_split([Letter, $:, $/|Rest]) -> + split(Rest, [], [[Letter, $:, $/]], win32); +win32_split([Letter, $:|Rest]) -> + split(Rest, [], [[Letter, $:]], win32); +win32_split(Name) -> + split(Name, [], win32). + +split([$/|Rest], Components, OsType) -> + split(Rest, [], [[$/]|Components], OsType); +split([$\\|Rest], Components, win32) -> + split(Rest, [], [[$/]|Components], win32); +split(RelativeName, Components, OsType) -> + split(RelativeName, [], Components, OsType). + +split([$\\|Rest], Comp, Components, win32) -> + split([$/|Rest], Comp, Components, win32); +split([$/|Rest], [], Components, OsType) -> + split(Rest, [], Components, OsType); +split([$/|Rest], Comp, Components, OsType) -> + split(Rest, [], [lists:reverse(Comp)|Components], OsType); +split([Char|Rest], Comp, Components, OsType) when is_integer(Char) -> + split(Rest, [Char|Comp], Components, OsType); +split([List|Rest], Comp, Components, OsType) when is_list(List) -> + split(List++Rest, Comp, Components, OsType); +split([], [], Components, _OsType) -> + lists:reverse(Components); +split([], Comp, Components, OsType) -> + split([], [], [lists:reverse(Comp)|Components], OsType). + +%% Converts a filename to a form accepedt by the command shell and native +%% applications on the current platform. On Windows, forward slashes +%% will be converted to backslashes. On all platforms, the +%% name will be normalized as done by join/1. + +-spec nativename(string()) -> string(). +nativename(Name0) -> + Name = join([Name0]), %Normalize. + case os:type() of + {win32, _} -> win32_nativename(Name); + _ -> Name + end. + +win32_nativename([$/|Rest]) -> + [$\\|win32_nativename(Rest)]; +win32_nativename([C|Rest]) -> + [C|win32_nativename(Rest)]; +win32_nativename([]) -> + []. + +separators() -> + case os:type() of + {unix, _} -> {false, false}; + {win32, _} -> {$\\, $:}; + vxworks -> {$\\, false}; + {ose,_} -> {false, false} + end. + + +%% find_src(Module) -- +%% find_src(Module, Rules) -- +%% +%% Finds the source file name and compilation options for a compiled +%% module. The result can be fed to compile:file/2 to compile the +%% file again. +%% +%% The Module argument (which can be a string or an atom) specifies +%% either the module name or the path to the source code, with or +%% without the ".erl" extension. In either case the module must be +%% known by the code manager, i.e. code:which/1 should succeed. +%% +%% Rules describes how the source directory should be found given +%% the directory for the object code. Each rule is on the form +%% {BinSuffix, SourceSuffix}, and is interpreted like this: +%% If the end of directory name where the object is located matches +%% BinSuffix, then the suffix will be replaced with SourceSuffix +%% in the directory name. If the source file in the resulting +%% directory, the next rule will be tried. +%% +%% Returns: {SourceFile, Options} +%% +%% SourceFile is the absolute path to the source file (but without the ".erl" +%% extension) and Options are the necessary options to compile the file +%% with compile:file/2, but doesn't include options like 'report' or +%% 'verbose' that doesn't change the way code is generated. +%% The paths in the {outdir, Path} and {i, Path} options are guaranteed +%% to be absolute. + +-type rule() :: {string(), string()}. +-type ecode() :: 'non_existing' | 'preloaded' | 'interpreted'. +-type option() :: {'i', string()} | {'outdir', string()} | {'d', atom()}. + +-spec find_src(atom() | string()) -> + {string(), [option()]} | {'error', {ecode(), atom()}}. +find_src(Mod) -> + Default = [{"", ""}, {"ebin", "src"}, {"ebin", "esrc"}], + Rules = + case application:get_env(kernel, source_search_rules) of + undefined -> Default; + {ok, []} -> Default; + {ok, R} when is_list(R) -> R + end, + find_src(Mod, Rules). + +-spec find_src(atom() | string(), [rule()]) -> + {string(), [option()]} | {'error', {ecode(), atom()}}. +find_src(Mod, Rules) when is_atom(Mod) -> + find_src(atom_to_list(Mod), Rules); +find_src(File0, Rules) when is_list(File0) -> + Mod = list_to_atom(basename(File0, ".erl")), + File = rootname(File0, ".erl"), + case readable_file(File++".erl") of + true -> + try_file(File, Mod, Rules); + false -> + try_file(undefined, Mod, Rules) + end. + +try_file(File, Mod, Rules) -> + case code:which(Mod) of + Possibly_Rel_Path when is_list(Possibly_Rel_Path) -> + {ok, Cwd} = file:get_cwd(), + Path = join(Cwd, Possibly_Rel_Path), + try_file(File, Path, Mod, Rules); + Ecode when is_atom(Ecode) -> % Ecode :: ecode() + {error, {Ecode, Mod}} + end. + +%% At this point, the Mod is known to be valid. +%% If the source name is not known, find it. +%% Then get the compilation options. +%% Returns: {SrcFile, Options} + +try_file(undefined, ObjFilename, Mod, Rules) -> + case get_source_file(ObjFilename, Mod, Rules) of + {ok, File} -> try_file(File, ObjFilename, Mod, Rules); + Error -> Error + end; +try_file(Src, _ObjFilename, Mod, _Rules) -> + List = Mod:module_info(compile), + {options, Options} = lists:keyfind(options, 1, List), + {ok, Cwd} = file:get_cwd(), + AbsPath = make_abs_path(Cwd, Src), + {AbsPath, filter_options(dirname(AbsPath), Options, [])}. + +%% Filters the options. +%% +%% 1) Remove options that have no effect on the generated code, +%% such as report and verbose. +%% +%% 2) The paths found in {i, Path} and {outdir, Path} are converted +%% to absolute paths. When doing this, it is assumed that relatives +%% paths are relative to directory where the source code is located. +%% This is not necessarily true. It would be safer if the compiler +%% would emit absolute paths in the first place. + +filter_options(Base, [{outdir, Path}|Rest], Result) -> + filter_options(Base, Rest, [{outdir, make_abs_path(Base, Path)}|Result]); +filter_options(Base, [{i, Path}|Rest], Result) -> + filter_options(Base, Rest, [{i, make_abs_path(Base, Path)}|Result]); +filter_options(Base, [Option|Rest], Result) when Option =:= trace -> + filter_options(Base, Rest, [Option|Result]); +filter_options(Base, [Option|Rest], Result) when Option =:= export_all -> + filter_options(Base, Rest, [Option|Result]); +filter_options(Base, [Option|Rest], Result) when Option =:= binary -> + filter_options(Base, Rest, [Option|Result]); +filter_options(Base, [Option|Rest], Result) when Option =:= fast -> + filter_options(Base, Rest, [Option|Result]); +filter_options(Base, [Tuple|Rest], Result) when element(1, Tuple) =:= d -> + filter_options(Base, Rest, [Tuple|Result]); +filter_options(Base, [Tuple|Rest], Result) +when element(1, Tuple) =:= parse_transform -> + filter_options(Base, Rest, [Tuple|Result]); +filter_options(Base, [_|Rest], Result) -> + filter_options(Base, Rest, Result); +filter_options(_Base, [], Result) -> + Result. + +%% Gets the source file given path of object code and module name. + +get_source_file(Obj, Mod, Rules) -> + case catch Mod:module_info(source_file) of + {'EXIT', _Reason} -> + source_by_rules(dirname(Obj), packages:last(Mod), Rules); + File -> + {ok, File} + end. + +source_by_rules(Dir, Base, [{From, To}|Rest]) -> + case try_rule(Dir, Base, From, To) of + {ok, File} -> {ok, File}; + error -> source_by_rules(Dir, Base, Rest) + end; +source_by_rules(_Dir, _Base, []) -> + {error, source_file_not_found}. + +try_rule(Dir, Base, From, To) -> + case lists:suffix(From, Dir) of + true -> + NewDir = lists:sublist(Dir, 1, length(Dir)-length(From))++To, + Src = join(NewDir, Base), + case readable_file(Src++".erl") of + true -> {ok, Src}; + false -> error + end; + false -> + error + end. + +readable_file(File) -> + case file:read_file_info(File) of + {ok, #file_info{type=regular, access=read}} -> + true; + {ok, #file_info{type=regular, access=read_write}} -> + true; + _Other -> + false + end. + +make_abs_path(BasePath, Path) -> + join(BasePath, Path). + +major_os_type() -> + case os:type() of + {OsT, _} -> OsT; + OsT -> OsT + end. + +%% Need to take care of the first pathname component separately +%% due to VxWorks less than good device naming rules. +%% (i.e. this is VxWorks specific ...) +%% The following four all starts with device names +%% elrond:/foo -> elrond: +%% elrond:\\foo.bar -> elrond: +%% /DISK1:foo -> /DISK1: +%% /usr/include -> /usr +%% This one doesn't: +%% foo/bar + +vxworks_first([]) -> + {not_device, [], []}; +vxworks_first([$/|T]) -> + vxworks_first2(device, T, [$/]); +vxworks_first([$\\|T]) -> + vxworks_first2(device, T, [$/]); +vxworks_first([H|T]) when is_list(H) -> + vxworks_first(H++T); +vxworks_first([H|T]) -> + vxworks_first2(not_device, T, [H]). + +vxworks_first2(Devicep, [], FirstComp) -> + {Devicep, [], FirstComp}; +vxworks_first2(Devicep, [$/|T], FirstComp) -> + {Devicep, [$/|T], FirstComp}; +vxworks_first2(Devicep, [$\\|T], FirstComp) -> + {Devicep, [$/|T], FirstComp}; +vxworks_first2(_Devicep, [$:|T], FirstComp)-> + {device, T, [$:|FirstComp]}; +vxworks_first2(Devicep, [H|T], FirstComp) when is_list(H) -> + vxworks_first2(Devicep, H++T, FirstComp); +vxworks_first2(Devicep, [H|T], FirstComp) -> + vxworks_first2(Devicep, T, [H|FirstComp]). + +%% flatten(List) +%% Flatten a list, also accepting atoms. + +-spec flatten(name()) -> string(). +flatten(List) -> + do_flatten(List, []). + +do_flatten([H|T], Tail) when is_list(H) -> + do_flatten(H, do_flatten(T, Tail)); +do_flatten([H|T], Tail) when is_atom(H) -> + atom_to_list(H) ++ do_flatten(T, Tail); +do_flatten([H|T], Tail) -> + [H|do_flatten(T, Tail)]; +do_flatten([], Tail) -> + Tail; +do_flatten(Atom, Tail) when is_atom(Atom) -> + atom_to_list(Atom) ++ flatten(Tail). diff --git a/lib/stdlib/src/gb_sets.erl b/lib/stdlib/src/gb_sets.erl new file mode 100644 index 0000000000..086dc79b46 --- /dev/null +++ b/lib/stdlib/src/gb_sets.erl @@ -0,0 +1,812 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ===================================================================== +%% Ordered Sets implemented as General Balanced Trees +%% +%% Copyright (C) 1999-2001 Richard Carlsson +%% +%% An implementation of ordered sets using Prof. Arne Andersson's +%% General Balanced Trees. This can be much more efficient than using +%% ordered lists, for larger sets, but depends on the application. See +%% notes below for details. +%% --------------------------------------------------------------------- +%% Notes: +%% +%% The complexity on set operations is bounded by either O(|S|) or O(|T| +%% * log(|S|)), where S is the largest given set, depending on which is +%% fastest for any particular function call. For operating on sets of +%% almost equal size, this implementation is about 3 times slower than +%% using ordered-list sets directly. For sets of very different sizes, +%% however, this solution can be arbitrarily much faster; in practical +%% cases, often between 10 and 100 times. This implementation is +%% particularly suited for ackumulating elements a few at a time, +%% building up a large set (more than 100-200 elements), and repeatedly +%% testing for membership in the current set. +%% +%% As with normal tree structures, lookup (membership testing), +%% insertion and deletion have logarithmic complexity. +%% +%% Operations: +%% +%% - empty(): returns empty set. +%% +%% Alias: new(), for compatibility with `sets'. +%% +%% - is_empty(S): returns 'true' if S is an empty set, and 'false' +%% otherwise. +%% +%% - size(S): returns the number of nodes in the set as an integer. +%% Returns 0 (zero) if the set is empty. +%% +%% - singleton(X): returns a set containing only the element X. +%% +%% - is_member(X, S): returns `true' if element X is a member of set S, +%% and `false' otherwise. +%% +%% Alias: is_element(), for compatibility with `sets'. +%% +%% - insert(X, S): inserts element X into set S; returns the new set. +%% *Assumes that the element is not present in S.* +%% +%% - add(X, S): adds element X to set S; returns the new set. If X is +%% already an element in S, nothing is changed. +%% +%% Alias: add_element(), for compatibility with `sets'. +%% +%% - delete(X, S): removes element X from set S; returns new set. +%% Assumes that the element exists in the set. +%% +%% - delete_any(X, S): removes key X from set S if the key is present +%% in the set, otherwise does nothing; returns new set. +%% +%% Alias: del_element(), for compatibility with `sets'. +%% +%% - balance(S): rebalances the tree representation of S. Note that this +%% is rarely necessary, but may be motivated when a large number of +%% elements have been deleted from the tree without further +%% insertions. Rebalancing could then be forced in order to minimise +%% lookup times, since deletion only does not rebalance the tree. +%% +%% - union(S1, S2): returns a new set that contains each element that is +%% in either S1 or S2 or both, and no other elements. +%% +%% - union(Ss): returns a new set that contains each element that is in +%% at least one of the sets in the list Ss, and no other elements. +%% +%% - intersection(S1, S2): returns a new set that contains each element +%% that is in both S1 and S2, and no other elements. +%% +%% - intersection(Ss): returns a new set that contains each element that +%% is in all of the sets in the list Ss, and no other elements. +%% +%% - is_disjoint(S1, S2): returns `true' if none of the elements in S1 +%% occurs in S2. +%% +%% - difference(S1, S2): returns a new set that contains each element in +%% S1 that is not also in S2, and no other elements. +%% +%% Alias: subtract(), for compatibility with `sets'. +%% +%% - is_subset(S1, S2): returns `true' if each element in S1 is also a +%% member of S2, and `false' otherwise. +%% +%% - to_list(S): returns an ordered list of all elements in set S. The +%% list never contains duplicates. +%% +%% - from_list(List): creates a set containing all elements in List, +%% where List may be unordered and contain duplicates. +%% +%% - from_ordset(L): turns an ordered-set list L into a set. The list +%% must not contain duplicates. +%% +%% - smallest(S): returns the smallest element in set S. Assumes that +%% the set S is nonempty. +%% +%% - largest(S): returns the largest element in set S. Assumes that the +%% set S is nonempty. +%% +%% - take_smallest(S): returns {X, S1}, where X is the smallest element +%% in set S, and S1 is the set S with element X deleted. Assumes that +%% the set S is nonempty. +%% +%% - take_largest(S): returns {X, S1}, where X is the largest element in +%% set S, and S1 is the set S with element X deleted. Assumes that the +%% set S is nonempty. +%% +%% - iterator(S): returns an iterator that can be used for traversing +%% the entries of set S; see `next'. The implementation of this is +%% very efficient; traversing the whole set using `next' is only +%% slightly slower than getting the list of all elements using +%% `to_list' and traversing that. The main advantage of the iterator +%% approach is that it does not require the complete list of all +%% elements to be built in memory at one time. +%% +%% - next(T): returns {X, T1} where X is the smallest element referred +%% to by the iterator T, and T1 is the new iterator to be used for +%% traversing the remaining elements, or the atom `none' if no +%% elements remain. +%% +%% - filter(P, S): Filters set S using predicate function P. Included +%% for compatibility with `sets'. +%% +%% - fold(F, A, S): Folds function F over set S with A as the initial +%% ackumulator. Included for compatibility with `sets'. +%% +%% - is_set(S): returns 'true' if S appears to be a set, and 'false' +%% otherwise. Not recommended; included for compatibility with `sets'. + +-module(gb_sets). + +-export([empty/0, is_empty/1, size/1, singleton/1, is_member/2, + insert/2, add/2, delete/2, delete_any/2, balance/1, union/2, + union/1, intersection/2, intersection/1, is_disjoint/2, difference/2, + is_subset/2, to_list/1, from_list/1, from_ordset/1, smallest/1, + largest/1, take_smallest/1, take_largest/1, iterator/1, next/1, + filter/2, fold/3, is_set/1]). + +%% `sets' compatibility aliases: + +-export([new/0, is_element/2, add_element/2, del_element/2, + subtract/2]). + +%% GB-trees adapted from Sven-Olof Nyström's implementation for +%% representation of sets. +%% +%% Data structures: +%% - {Size, Tree}, where `Tree' is composed of nodes of the form: +%% - {Key, Smaller, Bigger}, and the "empty tree" node: +%% - nil. +%% +%% No attempt is made to balance trees after deletions. Since deletions +%% don't increase the height of a tree, this should be OK. +%% +%% Original balance condition h(T) <= ceil(c * log(|T|)) has been +%% changed to the similar (but not quite equivalent) condition 2 ^ h(T) +%% <= |T| ^ c. This should also be OK. +%% +%% Behaviour is logarithmic (as it should be). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Some macros. + +-define(p, 2). % It seems that p = 2 is optimal for sorted keys + +-define(pow(A, _), A * A). % correct with exponent as defined above. + +-define(div2(X), X bsr 1). + +-define(mul2(X), X bsl 1). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Some types. + +-type gb_set_node() :: 'nil' | {term(), _, _}. + +%% A declaration equivalent to the following is currently hard-coded +%% in erl_types.erl +%% +%% -opaque gb_set() :: {non_neg_integer(), gb_set_node()}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec empty() -> gb_set(). + +empty() -> + {0, nil}. + +-spec new() -> gb_set(). + +new() -> empty(). + +-spec is_empty(gb_set()) -> boolean(). + +is_empty({0, nil}) -> + true; +is_empty(_) -> + false. + +-spec size(gb_set()) -> non_neg_integer(). + +size({Size, _}) -> + Size. + +-spec singleton(term()) -> gb_set(). + +singleton(Key) -> + {1, {Key, nil, nil}}. + +-spec is_element(term(), gb_set()) -> boolean(). + +is_element(Key, S) -> + is_member(Key, S). + +-spec is_member(term(), gb_set()) -> boolean(). + +is_member(Key, {_, T}) -> + is_member_1(Key, T). + +is_member_1(Key, {Key1, Smaller, _}) when Key < Key1 -> + is_member_1(Key, Smaller); +is_member_1(Key, {Key1, _, Bigger}) when Key > Key1 -> + is_member_1(Key, Bigger); +is_member_1(_, {_, _, _}) -> + true; +is_member_1(_, nil) -> + false. + +-spec insert(term(), gb_set()) -> gb_set(). + +insert(Key, {S, T}) -> + S1 = S + 1, + {S1, insert_1(Key, T, ?pow(S1, ?p))}. + +insert_1(Key, {Key1, Smaller, Bigger}, S) when Key < Key1 -> + case insert_1(Key, Smaller, ?div2(S)) of + {T1, H1, S1} when is_integer(H1) -> + T = {Key1, T1, Bigger}, + {H2, S2} = count(Bigger), + H = ?mul2(erlang:max(H1, H2)), + SS = S1 + S2 + 1, + P = ?pow(SS, ?p), + if + H > P -> + balance(T, SS); + true -> + {T, H, SS} + end; + T1 -> + {Key1, T1, Bigger} + end; +insert_1(Key, {Key1, Smaller, Bigger}, S) when Key > Key1 -> + case insert_1(Key, Bigger, ?div2(S)) of + {T1, H1, S1} when is_integer(H1) -> + T = {Key1, Smaller, T1}, + {H2, S2} = count(Smaller), + H = ?mul2(erlang:max(H1, H2)), + SS = S1 + S2 + 1, + P = ?pow(SS, ?p), + if + H > P -> + balance(T, SS); + true -> + {T, H, SS} + end; + T1 -> + {Key1, Smaller, T1} + end; +insert_1(Key, nil, 0) -> + {{Key, nil, nil}, 1, 1}; +insert_1(Key, nil, _) -> + {Key, nil, nil}; +insert_1(Key, _, _) -> + erlang:error({key_exists, Key}). + +count({_, nil, nil}) -> + {1, 1}; +count({_, Sm, Bi}) -> + {H1, S1} = count(Sm), + {H2, S2} = count(Bi), + {?mul2(erlang:max(H1, H2)), S1 + S2 + 1}; +count(nil) -> + {1, 0}. + +-spec balance(gb_set()) -> gb_set(). + +balance({S, T}) -> + {S, balance(T, S)}. + +balance(T, S) -> + balance_list(to_list_1(T), S). + +balance_list(L, S) -> + {T, _} = balance_list_1(L, S), + T. + +balance_list_1(L, S) when S > 1 -> + Sm = S - 1, + S2 = Sm div 2, + S1 = Sm - S2, + {T1, [K | L1]} = balance_list_1(L, S1), + {T2, L2} = balance_list_1(L1, S2), + T = {K, T1, T2}, + {T, L2}; +balance_list_1([Key | L], 1) -> + {{Key, nil, nil}, L}; +balance_list_1(L, 0) -> + {nil, L}. + +-spec add_element(term(), gb_set()) -> gb_set(). + +add_element(X, S) -> + add(X, S). + +-spec add(term(), gb_set()) -> gb_set(). + +add(X, S) -> + case is_member(X, S) of + true -> + S; % we don't have to do anything here + false -> + insert(X, S) + end. + +-spec from_list([term()]) -> gb_set(). + +from_list(L) -> + from_ordset(ordsets:from_list(L)). + +-spec from_ordset([term()]) -> gb_set(). + +from_ordset(L) -> + S = length(L), + {S, balance_list(L, S)}. + +-spec del_element(term(), gb_set()) -> gb_set(). + +del_element(Key, S) -> + delete_any(Key, S). + +-spec delete_any(term(), gb_set()) -> gb_set(). + +delete_any(Key, S) -> + case is_member(Key, S) of + true -> + delete(Key, S); + false -> + S + end. + +-spec delete(term(), gb_set()) -> gb_set(). + +delete(Key, {S, T}) -> + {S - 1, delete_1(Key, T)}. + +delete_1(Key, {Key1, Smaller, Larger}) when Key < Key1 -> + Smaller1 = delete_1(Key, Smaller), + {Key1, Smaller1, Larger}; +delete_1(Key, {Key1, Smaller, Bigger}) when Key > Key1 -> + Bigger1 = delete_1(Key, Bigger), + {Key1, Smaller, Bigger1}; +delete_1(_, {_, Smaller, Larger}) -> + merge(Smaller, Larger). + +merge(Smaller, nil) -> + Smaller; +merge(nil, Larger) -> + Larger; +merge(Smaller, Larger) -> + {Key, Larger1} = take_smallest1(Larger), + {Key, Smaller, Larger1}. + +-spec take_smallest(gb_set()) -> {term(), gb_set()}. + +take_smallest({S, T}) -> + {Key, Larger} = take_smallest1(T), + {Key, {S - 1, Larger}}. + +take_smallest1({Key, nil, Larger}) -> + {Key, Larger}; +take_smallest1({Key, Smaller, Larger}) -> + {Key1, Smaller1} = take_smallest1(Smaller), + {Key1, {Key, Smaller1, Larger}}. + +-spec smallest(gb_set()) -> term(). + +smallest({_, T}) -> + smallest_1(T). + +smallest_1({Key, nil, _Larger}) -> + Key; +smallest_1({_Key, Smaller, _Larger}) -> + smallest_1(Smaller). + +-spec take_largest(gb_set()) -> {term(), gb_set()}. + +take_largest({S, T}) -> + {Key, Smaller} = take_largest1(T), + {Key, {S - 1, Smaller}}. + +take_largest1({Key, Smaller, nil}) -> + {Key, Smaller}; +take_largest1({Key, Smaller, Larger}) -> + {Key1, Larger1} = take_largest1(Larger), + {Key1, {Key, Smaller, Larger1}}. + +-spec largest(gb_set()) -> term(). + +largest({_, T}) -> + largest_1(T). + +largest_1({Key, _Smaller, nil}) -> + Key; +largest_1({_Key, _Smaller, Larger}) -> + largest_1(Larger). + +-spec to_list(gb_set()) -> [term()]. + +to_list({_, T}) -> + to_list(T, []). + +to_list_1(T) -> to_list(T, []). + +to_list({Key, Small, Big}, L) -> + to_list(Small, [Key | to_list(Big, L)]); +to_list(nil, L) -> L. + +-spec iterator(gb_set()) -> [term()]. + +iterator({_, T}) -> + iterator(T, []). + +%% The iterator structure is really just a list corresponding to the +%% call stack of an in-order traversal. This is quite fast. + +iterator({_, nil, _} = T, As) -> + [T | As]; +iterator({_, L, _} = T, As) -> + iterator(L, [T | As]); +iterator(nil, As) -> + As. + +-spec next([term()]) -> {term(), [term()]} | 'none'. + +next([{X, _, T} | As]) -> + {X, iterator(T, As)}; +next([]) -> + none. + + +%% Set operations: + + +%% If |X| < |Y|, then we traverse the elements of X. The cost for +%% testing a single random element for membership in a tree S is +%% proportional to log(|S|); thus, if |Y| / |X| < c * log(|Y|), for some +%% c, it is more efficient to scan the ordered sequence of elements of Y +%% while traversing X (under the same ordering) in order to test whether +%% elements of X are already in Y. Since the `math' module does not have +%% a `log2'-function, we rewrite the condition to |X| < |Y| * c1 * +%% ln(|X|), where c1 = c / ln 2. + +-define(c, 1.46). % 1 / ln 2; this appears to be best + +%% If the sets are not very different in size, i.e., if |Y| / |X| >= c * +%% log(|Y|), then the fastest way to do union (and the other similar set +%% operations) is to build the lists of elements, traverse these lists +%% in parallel while building a reversed ackumulator list, and finally +%% rebuild the tree directly from the ackumulator. Other methods of +%% traversing the elements can be devised, but they all have higher +%% overhead. + +-spec union(gb_set(), gb_set()) -> gb_set(). + +union({N1, T1}, {N2, T2}) when N2 < N1 -> + union(to_list_1(T2), N2, T1, N1); +union({N1, T1}, {N2, T2}) -> + union(to_list_1(T1), N1, T2, N2). + +%% We avoid the expensive mathematical computations if there is little +%% chance at saving at least the same amount of time by making the right +%% choice of strategy. Recall that N1 < N2 here. + +union(L, N1, T2, N2) when N2 < 10 -> + %% Break even is about 7 for N1 = 1 and 10 for N1 = 2 + union_2(L, to_list_1(T2), N1 + N2); +union(L, N1, T2, N2) -> + X = N1 * round(?c * math:log(N2)), + if N2 < X -> + union_2(L, to_list_1(T2), N1 + N2); + true -> + union_1(L, mk_set(N2, T2)) + end. + +-spec mk_set(non_neg_integer(), gb_set_node()) -> gb_set(). + +mk_set(N, T) -> + {N, T}. + +%% If the length of the list is in proportion with the size of the +%% target set, this version spends too much time doing lookups, compared +%% to the below version. + +union_1([X | Xs], S) -> + union_1(Xs, add(X, S)); +union_1([], S) -> + S. + + +%% If the length of the first list is too small in comparison with the +%% size of the target set, this version spends too much time scanning +%% the element list of the target set for possible membership, compared +%% with the above version. + +%% Some notes on sequential scanning of ordered lists +%% +%% 1) We want to put the equality case last, if we can assume that the +%% probability for overlapping elements is relatively low on average. +%% Doing this also allows us to completely skip the (arithmetic) +%% equality test, since the term order is arithmetically total. +%% +%% 2) We always test for `smaller than' first, i.e., whether the head of +%% the left list is smaller than the head of the right list, and if the +%% `greater than' test should instead turn out to be true, we switch +%% left and right arguments in the recursive call under the assumption +%% that the same is likely to apply to the next element also, +%% statistically reducing the number of failed tests and automatically +%% adapting to cases of lists having very different lengths. This saves +%% 10-40% of the traversation time compared to a "fixed" strategy, +%% depending on the sizes and contents of the lists. +%% +%% 3) A tail recursive version using `lists:reverse/2' is about 5-10% +%% faster than a plain recursive version using the stack, for lists of +%% more than about 20 elements and small stack frames. For very short +%% lists, however (length < 10), the stack version can be several times +%% faster. As stack frames grow larger, the advantages of using +%% `reverse' could get greater. + +union_2(Xs, Ys, S) -> + union_2(Xs, Ys, [], S). % S is the sum of the sizes here + +union_2([X | Xs1], [Y | _] = Ys, As, S) when X < Y -> + union_2(Xs1, Ys, [X | As], S); +union_2([X | _] = Xs, [Y | Ys1], As, S) when X > Y -> + union_2(Ys1, Xs, [Y | As], S); +union_2([X | Xs1], [_ | Ys1], As, S) -> + union_2(Xs1, Ys1, [X | As], S - 1); +union_2([], Ys, As, S) -> + {S, balance_revlist(push(Ys, As), S)}; +union_2(Xs, [], As, S) -> + {S, balance_revlist(push(Xs, As), S)}. + +push([X | Xs], As) -> + push(Xs, [X | As]); +push([], As) -> + As. + +balance_revlist(L, S) -> + {T, _} = balance_revlist_1(L, S), + T. + +balance_revlist_1(L, S) when S > 1 -> + Sm = S - 1, + S2 = Sm div 2, + S1 = Sm - S2, + {T2, [K | L1]} = balance_revlist_1(L, S1), + {T1, L2} = balance_revlist_1(L1, S2), + T = {K, T1, T2}, + {T, L2}; +balance_revlist_1([Key | L], 1) -> + {{Key, nil, nil}, L}; +balance_revlist_1(L, 0) -> + {nil, L}. + +-spec union([gb_set()]) -> gb_set(). + +union([S | Ss]) -> + union_list(S, Ss); +union([]) -> empty(). + +union_list(S, [S1 | Ss]) -> + union_list(union(S, S1), Ss); +union_list(S, []) -> S. + + +%% The rest is modelled on the above. + +-spec intersection(gb_set(), gb_set()) -> gb_set(). + +intersection({N1, T1}, {N2, T2}) when N2 < N1 -> + intersection(to_list_1(T2), N2, T1, N1); +intersection({N1, T1}, {N2, T2}) -> + intersection(to_list_1(T1), N1, T2, N2). + +intersection(L, _N1, T2, N2) when N2 < 10 -> + intersection_2(L, to_list_1(T2)); +intersection(L, N1, T2, N2) -> + X = N1 * round(?c * math:log(N2)), + if N2 < X -> + intersection_2(L, to_list_1(T2)); + true -> + intersection_1(L, T2) + end. + +%% We collect the intersecting elements in an accumulator list and count +%% them at the same time so we can balance the list afterwards. + +intersection_1(Xs, T) -> + intersection_1(Xs, T, [], 0). + +intersection_1([X | Xs], T, As, N) -> + case is_member_1(X, T) of + true -> + intersection_1(Xs, T, [X | As], N + 1); + false -> + intersection_1(Xs, T, As, N) + end; +intersection_1([], _, As, N) -> + {N, balance_revlist(As, N)}. + + +intersection_2(Xs, Ys) -> + intersection_2(Xs, Ys, [], 0). + +intersection_2([X | Xs1], [Y | _] = Ys, As, S) when X < Y -> + intersection_2(Xs1, Ys, As, S); +intersection_2([X | _] = Xs, [Y | Ys1], As, S) when X > Y -> + intersection_2(Ys1, Xs, As, S); +intersection_2([X | Xs1], [_ | Ys1], As, S) -> + intersection_2(Xs1, Ys1, [X | As], S + 1); +intersection_2([], _, As, S) -> + {S, balance_revlist(As, S)}; +intersection_2(_, [], As, S) -> + {S, balance_revlist(As, S)}. + +-spec intersection([gb_set()]) -> gb_set(). + +intersection([S | Ss]) -> + intersection_list(S, Ss). + +intersection_list(S, [S1 | Ss]) -> + intersection_list(intersection(S, S1), Ss); +intersection_list(S, []) -> S. + +-spec is_disjoint(gb_set(), gb_set()) -> boolean(). + +is_disjoint({N1, T1}, {N2, T2}) when N1 < N2 -> + is_disjoint_1(T1, T2); +is_disjoint({_, T1}, {_, T2}) -> + is_disjoint_1(T2, T1). + +is_disjoint_1({K1, Smaller1, Bigger}, {K2, Smaller2, _}=Tree) when K1 < K2 -> + not is_member_1(K1, Smaller2) andalso + is_disjoint_1(Smaller1, Smaller2) andalso + is_disjoint_1(Bigger, Tree); +is_disjoint_1({K1, Smaller, Bigger1}, {K2, _, Bigger2}=Tree) when K1 > K2 -> + not is_member_1(K1, Bigger2) andalso + is_disjoint_1(Bigger1, Bigger2) andalso + is_disjoint_1(Smaller, Tree); +is_disjoint_1({_K1, _, _}, {_K2, _, _}) -> %K1 == K2 + false; +is_disjoint_1(nil, _) -> + true; +is_disjoint_1(_, nil) -> + true. + +%% Note that difference is not symmetric. We don't use `delete' here, +%% since the GB-trees implementation does not rebalance after deletion +%% and so we could end up with very unbalanced trees indeed depending on +%% the sets. Therefore, we always build a new tree, and thus we need to +%% traverse the whole element list of the left operand. + +-spec subtract(gb_set(), gb_set()) -> gb_set(). + +subtract(S1, S2) -> + difference(S1, S2). + +-spec difference(gb_set(), gb_set()) -> gb_set(). + +difference({N1, T1}, {N2, T2}) -> + difference(to_list_1(T1), N1, T2, N2). + +difference(L, N1, T2, N2) when N2 < 10 -> + difference_2(L, to_list_1(T2), N1); +difference(L, N1, T2, N2) -> + X = N1 * round(?c * math:log(N2)), + if N2 < X -> + difference_2(L, to_list_1(T2), N1); + true -> + difference_1(L, T2) + end. + + +difference_1(Xs, T) -> + difference_1(Xs, T, [], 0). + +difference_1([X | Xs], T, As, N) -> + case is_member_1(X, T) of + true -> + difference_1(Xs, T, As, N); + false -> + difference_1(Xs, T, [X | As], N + 1) + end; +difference_1([], _, As, N) -> + {N, balance_revlist(As, N)}. + + +difference_2(Xs, Ys, S) -> + difference_2(Xs, Ys, [], S). % S is the size of the left set + +difference_2([X | Xs1], [Y | _] = Ys, As, S) when X < Y -> + difference_2(Xs1, Ys, [X | As], S); +difference_2([X | _] = Xs, [Y | Ys1], As, S) when X > Y -> + difference_2(Xs, Ys1, As, S); +difference_2([_X | Xs1], [_Y | Ys1], As, S) -> + difference_2(Xs1, Ys1, As, S - 1); +difference_2([], _Ys, As, S) -> + {S, balance_revlist(As, S)}; +difference_2(Xs, [], As, S) -> + {S, balance_revlist(push(Xs, As), S)}. + + +%% Subset testing is much the same thing as set difference, but +%% without the construction of a new set. + +-spec is_subset(gb_set(), gb_set()) -> boolean(). + +is_subset({N1, T1}, {N2, T2}) -> + is_subset(to_list_1(T1), N1, T2, N2). + +is_subset(L, _N1, T2, N2) when N2 < 10 -> + is_subset_2(L, to_list_1(T2)); +is_subset(L, N1, T2, N2) -> + X = N1 * round(?c * math:log(N2)), + if N2 < X -> + is_subset_2(L, to_list_1(T2)); + true -> + is_subset_1(L, T2) + end. + + +is_subset_1([X | Xs], T) -> + case is_member_1(X, T) of + true -> + is_subset_1(Xs, T); + false -> + false + end; +is_subset_1([], _) -> + true. + + +is_subset_2([X | _], [Y | _]) when X < Y -> + false; +is_subset_2([X | _] = Xs, [Y | Ys1]) when X > Y -> + is_subset_2(Xs, Ys1); +is_subset_2([_ | Xs1], [_ | Ys1]) -> + is_subset_2(Xs1, Ys1); +is_subset_2([], _) -> + true; +is_subset_2(_, []) -> + false. + + +%% For compatibility with `sets': + +-spec is_set(term()) -> boolean(). + +is_set({0, nil}) -> true; +is_set({N, {_, _, _}}) when is_integer(N), N >= 0 -> true; +is_set(_) -> false. + +-spec filter(fun((term()) -> boolean()), gb_set()) -> gb_set(). + +filter(F, S) -> + from_ordset([X || X <- to_list(S), F(X)]). + +-spec fold(fun((term(), term()) -> term()), term(), gb_set()) -> term(). + +fold(F, A, {_, T}) when is_function(F, 2) -> + fold_1(F, A, T). + +fold_1(F, Acc0, {Key, Small, Big}) -> + Acc1 = fold_1(F, Acc0, Small), + Acc = F(Key, Acc1), + fold_1(F, Acc, Big); +fold_1(_, Acc, _) -> + Acc. diff --git a/lib/stdlib/src/gb_trees.erl b/lib/stdlib/src/gb_trees.erl new file mode 100644 index 0000000000..d37786a100 --- /dev/null +++ b/lib/stdlib/src/gb_trees.erl @@ -0,0 +1,515 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ===================================================================== +%% General Balanced Trees - highly efficient dictionaries. +%% +%% Copyright (C) 1999-2001 Sven-Olof Nyström, Richard Carlsson +%% +%% An efficient implementation of Prof. Arne Andersson's General +%% Balanced Trees. These have no storage overhead compared to plain +%% unbalanced binary trees, and their performance is in general better +%% than AVL trees. +%% --------------------------------------------------------------------- +%% Operations: +%% +%% - empty(): returns empty tree. +%% +%% - is_empty(T): returns 'true' if T is an empty tree, and 'false' +%% otherwise. +%% +%% - size(T): returns the number of nodes in the tree as an integer. +%% Returns 0 (zero) if the tree is empty. +%% +%% - lookup(X, T): looks up key X in tree T; returns {value, V}, or +%% `none' if the key is not present. +%% +%% - get(X, T): retreives the value stored with key X in tree T. Assumes +%% that the key is present in the tree. +%% +%% - insert(X, V, T): inserts key X with value V into tree T; returns +%% the new tree. Assumes that the key is *not* present in the tree. +%% +%% - update(X, V, T): updates key X to value V in tree T; returns the +%% new tree. Assumes that the key is present in the tree. +%% +%% - enter(X, V, T): inserts key X with value V into tree T if the key +%% is not present in the tree, otherwise updates key X to value V in +%% T. Returns the new tree. +%% +%% - delete(X, T): removes key X from tree T; returns new tree. Assumes +%% that the key is present in the tree. +%% +%% - delete_any(X, T): removes key X from tree T if the key is present +%% in the tree, otherwise does nothing; returns new tree. +%% +%% - balance(T): rebalances tree T. Note that this is rarely necessary, +%% but may be motivated when a large number of entries have been +%% deleted from the tree without further insertions. Rebalancing could +%% then be forced in order to minimise lookup times, since deletion +%% only does not rebalance the tree. +%% +%% - is_defined(X, T): returns `true' if key X is present in tree T, and +%% `false' otherwise. +%% +%% - keys(T): returns an ordered list of all keys in tree T. +%% +%% - values(T): returns the list of values for all keys in tree T, +%% sorted by their corresponding keys. Duplicates are not removed. +%% +%% - to_list(T): returns an ordered list of {Key, Value} pairs for all +%% keys in tree T. +%% +%% - from_orddict(L): turns an ordered list L of {Key, Value} pairs into +%% a tree. The list must not contain duplicate keys. +%% +%% - smallest(T): returns {X, V}, where X is the smallest key in tree T, +%% and V is the value associated with X in T. Assumes that the tree T +%% is nonempty. +%% +%% - largest(T): returns {X, V}, where X is the largest key in tree T, +%% and V is the value associated with X in T. Assumes that the tree T +%% is nonempty. +%% +%% - take_smallest(T): returns {X, V, T1}, where X is the smallest key +%% in tree T, V is the value associated with X in T, and T1 is the +%% tree T with key X deleted. Assumes that the tree T is nonempty. +%% +%% - take_largest(T): returns {X, V, T1}, where X is the largest key +%% in tree T, V is the value associated with X in T, and T1 is the +%% tree T with key X deleted. Assumes that the tree T is nonempty. +%% +%% - iterator(T): returns an iterator that can be used for traversing +%% the entries of tree T; see `next'. The implementation of this is +%% very efficient; traversing the whole tree using `next' is only +%% slightly slower than getting the list of all elements using +%% `to_list' and traversing that. The main advantage of the iterator +%% approach is that it does not require the complete list of all +%% elements to be built in memory at one time. +%% +%% - next(S): returns {X, V, S1} where X is the smallest key referred to +%% by the iterator S, and S1 is the new iterator to be used for +%% traversing the remaining entries, or the atom `none' if no entries +%% remain. +%% +%% - map(F, T): maps the function F(K, V) -> V' to all key-value pairs +%% of the tree T and returns a new tree T' with the same set of keys +%% as T and the new set of values V'. + +-module(gb_trees). + +-export([empty/0, is_empty/1, size/1, lookup/2, get/2, insert/3, + update/3, enter/3, delete/2, delete_any/2, balance/1, + is_defined/2, keys/1, values/1, to_list/1, from_orddict/1, + smallest/1, largest/1, take_smallest/1, take_largest/1, + iterator/1, next/1, map/2]). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Data structure: +%% - {Size, Tree}, where `Tree' is composed of nodes of the form: +%% - {Key, Value, Smaller, Bigger}, and the "empty tree" node: +%% - nil. +%% +%% I make no attempt to balance trees after deletions. Since deletions +%% don't increase the height of a tree, I figure this is OK. +%% +%% Original balance condition h(T) <= ceil(c * log(|T|)) has been +%% changed to the similar (but not quite equivalent) condition 2 ^ h(T) +%% <= |T| ^ c. I figure this should also be OK. +%% +%% Performance is comparable to the AVL trees in the Erlang book (and +%% faster in general due to less overhead); the difference is that +%% deletion works for my trees, but not for the book's trees. Behaviour +%% is logaritmic (as it should be). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Some macros. + +-define(p, 2). % It seems that p = 2 is optimal for sorted keys + +-define(pow(A, _), A * A). % correct with exponent as defined above. + +-define(div2(X), X bsr 1). + +-define(mul2(X), X bsl 1). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Some types. + +-type gb_tree_node() :: 'nil' | {_, _, _, _}. + +%% A declaration equivalent to the following is currently hard-coded +%% in erl_types.erl +%% +%% -opaque gb_tree() :: {non_neg_integer(), gb_tree_node()}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec empty() -> gb_tree(). + +empty() -> + {0, nil}. + +-spec is_empty(gb_tree()) -> boolean(). + +is_empty({0, nil}) -> + true; +is_empty(_) -> + false. + +-spec size(gb_tree()) -> non_neg_integer(). + +size({Size, _}) when is_integer(Size), Size >= 0 -> + Size. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec lookup(term(), gb_tree()) -> 'none' | {'value', term()}. + +lookup(Key, {_, T}) -> + lookup_1(Key, T). + +%% The term order is an arithmetic total order, so we should not +%% test exact equality for the keys. (If we do, then it becomes +%% possible that neither `>', `<', nor `=:=' matches.) Testing '<' +%% and '>' first is statistically better than testing for +%% equality, and also allows us to skip the test completely in the +%% remaining case. + +lookup_1(Key, {Key1, _, Smaller, _}) when Key < Key1 -> + lookup_1(Key, Smaller); +lookup_1(Key, {Key1, _, _, Bigger}) when Key > Key1 -> + lookup_1(Key, Bigger); +lookup_1(_, {_, Value, _, _}) -> + {value, Value}; +lookup_1(_, nil) -> + none. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% This is a specialized version of `lookup'. + +-spec is_defined(term(), gb_tree()) -> boolean(). + +is_defined(Key, {_, T}) -> + is_defined_1(Key, T). + +is_defined_1(Key, {Key1, _, Smaller, _}) when Key < Key1 -> + is_defined_1(Key, Smaller); +is_defined_1(Key, {Key1, _, _, Bigger}) when Key > Key1 -> + is_defined_1(Key, Bigger); +is_defined_1(_, {_, _, _, _}) -> + true; +is_defined_1(_, nil) -> + false. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% This is a specialized version of `lookup'. + +-spec get(term(), gb_tree()) -> term(). + +get(Key, {_, T}) -> + get_1(Key, T). + +get_1(Key, {Key1, _, Smaller, _}) when Key < Key1 -> + get_1(Key, Smaller); +get_1(Key, {Key1, _, _, Bigger}) when Key > Key1 -> + get_1(Key, Bigger); +get_1(_, {_, Value, _, _}) -> + Value. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec update(term(), term(), gb_tree()) -> gb_tree(). + +update(Key, Val, {S, T}) -> + T1 = update_1(Key, Val, T), + {S, T1}. + +%% See `lookup' for notes on the term comparison order. + +update_1(Key, Value, {Key1, V, Smaller, Bigger}) when Key < Key1 -> + {Key1, V, update_1(Key, Value, Smaller), Bigger}; +update_1(Key, Value, {Key1, V, Smaller, Bigger}) when Key > Key1 -> + {Key1, V, Smaller, update_1(Key, Value, Bigger)}; +update_1(Key, Value, {_, _, Smaller, Bigger}) -> + {Key, Value, Smaller, Bigger}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec insert(term(), term(), gb_tree()) -> gb_tree(). + +insert(Key, Val, {S, T}) when is_integer(S) -> + S1 = S+1, + {S1, insert_1(Key, Val, T, ?pow(S1, ?p))}. + +insert_1(Key, Value, {Key1, V, Smaller, Bigger}, S) when Key < Key1 -> + case insert_1(Key, Value, Smaller, ?div2(S)) of + {T1, H1, S1} -> + T = {Key1, V, T1, Bigger}, + {H2, S2} = count(Bigger), + H = ?mul2(erlang:max(H1, H2)), + SS = S1 + S2 + 1, + P = ?pow(SS, ?p), + if + H > P -> + balance(T, SS); + true -> + {T, H, SS} + end; + T1 -> + {Key1, V, T1, Bigger} + end; +insert_1(Key, Value, {Key1, V, Smaller, Bigger}, S) when Key > Key1 -> + case insert_1(Key, Value, Bigger, ?div2(S)) of + {T1, H1, S1} -> + T = {Key1, V, Smaller, T1}, + {H2, S2} = count(Smaller), + H = ?mul2(erlang:max(H1, H2)), + SS = S1 + S2 + 1, + P = ?pow(SS, ?p), + if + H > P -> + balance(T, SS); + true -> + {T, H, SS} + end; + T1 -> + {Key1, V, Smaller, T1} + end; +insert_1(Key, Value, nil, S) when S =:= 0 -> + {{Key, Value, nil, nil}, 1, 1}; +insert_1(Key, Value, nil, _S) -> + {Key, Value, nil, nil}; +insert_1(Key, _, _, _) -> + erlang:error({key_exists, Key}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec enter(term(), term(), gb_tree()) -> gb_tree(). + +enter(Key, Val, T) -> + case is_defined(Key, T) of + true -> + update(Key, Val, T); + false -> + insert(Key, Val, T) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +count({_, _, nil, nil}) -> + {1, 1}; +count({_, _, Sm, Bi}) -> + {H1, S1} = count(Sm), + {H2, S2} = count(Bi), + {?mul2(erlang:max(H1, H2)), S1 + S2 + 1}; +count(nil) -> + {1, 0}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec balance(gb_tree()) -> gb_tree(). + +balance({S, T}) -> + {S, balance(T, S)}. + +balance(T, S) -> + balance_list(to_list_1(T), S). + +balance_list(L, S) -> + {T, []} = balance_list_1(L, S), + T. + +balance_list_1(L, S) when S > 1 -> + Sm = S - 1, + S2 = Sm div 2, + S1 = Sm - S2, + {T1, [{K, V} | L1]} = balance_list_1(L, S1), + {T2, L2} = balance_list_1(L1, S2), + T = {K, V, T1, T2}, + {T, L2}; +balance_list_1([{Key, Val} | L], 1) -> + {{Key, Val, nil, nil}, L}; +balance_list_1(L, 0) -> + {nil, L}. + +-spec from_orddict([{_,_}]) -> gb_tree(). + +from_orddict(L) -> + S = length(L), + {S, balance_list(L, S)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec delete_any(term(), gb_tree()) -> gb_tree(). + +delete_any(Key, T) -> + case is_defined(Key, T) of + true -> + delete(Key, T); + false -> + T + end. + +%%% delete. Assumes that key is present. + +-spec delete(term(), gb_tree()) -> gb_tree(). + +delete(Key, {S, T}) when is_integer(S), S >= 0 -> + {S - 1, delete_1(Key, T)}. + +%% See `lookup' for notes on the term comparison order. + +delete_1(Key, {Key1, Value, Smaller, Larger}) when Key < Key1 -> + Smaller1 = delete_1(Key, Smaller), + {Key1, Value, Smaller1, Larger}; +delete_1(Key, {Key1, Value, Smaller, Bigger}) when Key > Key1 -> + Bigger1 = delete_1(Key, Bigger), + {Key1, Value, Smaller, Bigger1}; +delete_1(_, {_, _, Smaller, Larger}) -> + merge(Smaller, Larger). + +merge(Smaller, nil) -> + Smaller; +merge(nil, Larger) -> + Larger; +merge(Smaller, Larger) -> + {Key, Value, Larger1} = take_smallest1(Larger), + {Key, Value, Smaller, Larger1}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec take_smallest(gb_tree()) -> {term(), term(), gb_tree()}. + +take_smallest({Size, Tree}) when is_integer(Size), Size >= 0 -> + {Key, Value, Larger} = take_smallest1(Tree), + {Key, Value, {Size - 1, Larger}}. + +take_smallest1({Key, Value, nil, Larger}) -> + {Key, Value, Larger}; +take_smallest1({Key, Value, Smaller, Larger}) -> + {Key1, Value1, Smaller1} = take_smallest1(Smaller), + {Key1, Value1, {Key, Value, Smaller1, Larger}}. + +-spec smallest(gb_tree()) -> {term(), term()}. + +smallest({_, Tree}) -> + smallest_1(Tree). + +smallest_1({Key, Value, nil, _Larger}) -> + {Key, Value}; +smallest_1({_Key, _Value, Smaller, _Larger}) -> + smallest_1(Smaller). + +-spec take_largest(gb_tree()) -> {term(), term(), gb_tree()}. + +take_largest({Size, Tree}) when is_integer(Size), Size >= 0 -> + {Key, Value, Smaller} = take_largest1(Tree), + {Key, Value, {Size - 1, Smaller}}. + +take_largest1({Key, Value, Smaller, nil}) -> + {Key, Value, Smaller}; +take_largest1({Key, Value, Smaller, Larger}) -> + {Key1, Value1, Larger1} = take_largest1(Larger), + {Key1, Value1, {Key, Value, Smaller, Larger1}}. + +-spec largest(gb_tree()) -> {term(), term()}. + +largest({_, Tree}) -> + largest_1(Tree). + +largest_1({Key, Value, _Smaller, nil}) -> + {Key, Value}; +largest_1({_Key, _Value, _Smaller, Larger}) -> + largest_1(Larger). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec to_list(gb_tree()) -> [{term(), term()}]. + +to_list({_, T}) -> + to_list(T, []). + +to_list_1(T) -> to_list(T, []). + +to_list({Key, Value, Small, Big}, L) -> + to_list(Small, [{Key, Value} | to_list(Big, L)]); +to_list(nil, L) -> L. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec keys(gb_tree()) -> [term()]. + +keys({_, T}) -> + keys(T, []). + +keys({Key, _Value, Small, Big}, L) -> + keys(Small, [Key | keys(Big, L)]); +keys(nil, L) -> L. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec values(gb_tree()) -> [term()]. + +values({_, T}) -> + values(T, []). + +values({_Key, Value, Small, Big}, L) -> + values(Small, [Value | values(Big, L)]); +values(nil, L) -> L. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec iterator(gb_tree()) -> [gb_tree_node()]. + +iterator({_, T}) -> + iterator_1(T). + +iterator_1(T) -> + iterator(T, []). + +%% The iterator structure is really just a list corresponding to +%% the call stack of an in-order traversal. This is quite fast. + +iterator({_, _, nil, _} = T, As) -> + [T | As]; +iterator({_, _, L, _} = T, As) -> + iterator(L, [T | As]); +iterator(nil, As) -> + As. + +-spec next([gb_tree_node()]) -> 'none' | {term(), term(), [gb_tree_node()]}. + +next([{X, V, _, T} | As]) -> + {X, V, iterator(T, As)}; +next([]) -> + none. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec map(fun((term(), term()) -> term()), gb_tree()) -> gb_tree(). + +map(F, {Size, Tree}) when is_function(F, 2) -> + {Size, map_1(F, Tree)}. + +map_1(_, nil) -> nil; +map_1(F, {K, V, Smaller, Larger}) -> + {K, F(K, V), map_1(F, Smaller), map_1(F, Larger)}. diff --git a/lib/stdlib/src/gen.erl b/lib/stdlib/src/gen.erl new file mode 100644 index 0000000000..5aab547644 --- /dev/null +++ b/lib/stdlib/src/gen.erl @@ -0,0 +1,320 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(gen). + +%%%----------------------------------------------------------------- +%%% This module implements the really generic stuff of the generic +%%% standard behaviours (e.g. gen_server, gen_fsm). +%%% +%%% The standard behaviour should export init_it/6. +%%%----------------------------------------------------------------- +-export([start/5, start/6, debug_options/1, + call/3, call/4, reply/2]). + +-export([init_it/6, init_it/7]). + +-define(default_timeout, 5000). + +%%----------------------------------------------------------------- + +-type linkage() :: 'link' | 'nolink'. +-type emgr_name() :: {'local', atom()} | {'global', term()}. + +-type start_ret() :: {'ok', pid()} | 'ignore' | {'error', term()}. + +-type debug_flag() :: 'trace' | 'log' | 'statistics' | 'debug' + | {'logfile', string()}. +-type option() :: {'timeout', timeout()} + | {'debug', [debug_flag()]} + | {'spawn_opt', [proc_lib:spawn_option()]}. +-type options() :: [option()]. + +%%----------------------------------------------------------------- +%% Starts a generic process. +%% start(GenMod, LinkP, Mod, Args, Options) +%% start(GenMod, LinkP, Name, Mod, Args, Options) +%% GenMod = atom(), callback module implementing the 'real' fsm +%% LinkP = link | nolink +%% Name = {local, atom()} | {global, term()} +%% Args = term(), init arguments (to Mod:init/1) +%% Options = [{timeout, Timeout} | {debug, [Flag]} | {spawn_opt, OptionList}] +%% Flag = trace | log | {logfile, File} | statistics | debug +%% (debug == log && statistics) +%% Returns: {ok, Pid} | ignore |{error, Reason} | +%% {error, {already_started, Pid}} | +%% The 'already_started' is returned only if Name is given +%%----------------------------------------------------------------- + +-spec start(module(), linkage(), emgr_name(), module(), term(), options()) -> + start_ret(). + +start(GenMod, LinkP, Name, Mod, Args, Options) -> + case where(Name) of + undefined -> + do_spawn(GenMod, LinkP, Name, Mod, Args, Options); + Pid -> + {error, {already_started, Pid}} + end. + +-spec start(module(), linkage(), module(), term(), options()) -> start_ret(). + +start(GenMod, LinkP, Mod, Args, Options) -> + do_spawn(GenMod, LinkP, Mod, Args, Options). + +%%----------------------------------------------------------------- +%% Spawn the process (and link) maybe at another node. +%% If spawn without link, set parent to ourselves 'self'!!! +%%----------------------------------------------------------------- +do_spawn(GenMod, link, Mod, Args, Options) -> + Time = timeout(Options), + proc_lib:start_link(?MODULE, init_it, + [GenMod, self(), self(), Mod, Args, Options], + Time, + spawn_opts(Options)); +do_spawn(GenMod, _, Mod, Args, Options) -> + Time = timeout(Options), + proc_lib:start(?MODULE, init_it, + [GenMod, self(), self, Mod, Args, Options], + Time, + spawn_opts(Options)). + +do_spawn(GenMod, link, Name, Mod, Args, Options) -> + Time = timeout(Options), + proc_lib:start_link(?MODULE, init_it, + [GenMod, self(), self(), Name, Mod, Args, Options], + Time, + spawn_opts(Options)); +do_spawn(GenMod, _, Name, Mod, Args, Options) -> + Time = timeout(Options), + proc_lib:start(?MODULE, init_it, + [GenMod, self(), self, Name, Mod, Args, Options], + Time, + spawn_opts(Options)). + +%%----------------------------------------------------------------- +%% Initiate the new process. +%% Register the name using the Rfunc function +%% Calls the Mod:init/Args function. +%% Finally an acknowledge is sent to Parent and the main +%% loop is entered. +%%----------------------------------------------------------------- +init_it(GenMod, Starter, Parent, Mod, Args, Options) -> + init_it2(GenMod, Starter, Parent, self(), Mod, Args, Options). + +init_it(GenMod, Starter, Parent, Name, Mod, Args, Options) -> + case name_register(Name) of + true -> + init_it2(GenMod, Starter, Parent, Name, Mod, Args, Options); + {false, Pid} -> + proc_lib:init_ack(Starter, {error, {already_started, Pid}}) + end. + +init_it2(GenMod, Starter, Parent, Name, Mod, Args, Options) -> + GenMod:init_it(Starter, Parent, Name, Mod, Args, Options). + +%%----------------------------------------------------------------- +%% Makes a synchronous call to a generic process. +%% Request is sent to the Pid, and the response must be +%% {Tag, _, Reply}. +%%----------------------------------------------------------------- + +%%% New call function which uses the new monitor BIF +%%% call(ServerId, Label, Request) + +call(Process, Label, Request) -> + call(Process, Label, Request, ?default_timeout). + +%% Local or remote by pid +call(Pid, Label, Request, Timeout) + when is_pid(Pid), Timeout =:= infinity; + is_pid(Pid), is_integer(Timeout), Timeout >= 0 -> + do_call(Pid, Label, Request, Timeout); +%% Local by name +call(Name, Label, Request, Timeout) + when is_atom(Name), Timeout =:= infinity; + is_atom(Name), is_integer(Timeout), Timeout >= 0 -> + case whereis(Name) of + Pid when is_pid(Pid) -> + do_call(Pid, Label, Request, Timeout); + undefined -> + exit(noproc) + end; +%% Global by name +call({global, _Name}=Process, Label, Request, Timeout) + when Timeout =:= infinity; + is_integer(Timeout), Timeout >= 0 -> + case where(Process) of + Pid when is_pid(Pid) -> + Node = node(Pid), + try do_call(Pid, Label, Request, Timeout) + catch + exit:{nodedown, Node} -> + %% A nodedown not yet detected by global, + %% pretend that it was. + exit(noproc) + end; + undefined -> + exit(noproc) + end; +%% Local by name in disguise +call({Name, Node}, Label, Request, Timeout) + when Node =:= node(), Timeout =:= infinity; + Node =:= node(), is_integer(Timeout), Timeout >= 0 -> + call(Name, Label, Request, Timeout); +%% Remote by name +call({_Name, Node}=Process, Label, Request, Timeout) + when is_atom(Node), Timeout =:= infinity; + is_atom(Node), is_integer(Timeout), Timeout >= 0 -> + if + node() =:= nonode@nohost -> + exit({nodedown, Node}); + true -> + do_call(Process, Label, Request, Timeout) + end. + +do_call(Process, Label, Request, Timeout) -> + %% We trust the arguments to be correct, i.e + %% Process is either a local or remote pid, + %% or a {Name, Node} tuple (of atoms) and in this + %% case this node (node()) _is_ distributed and Node =/= node(). + Node = case Process of + {_S, N} when is_atom(N) -> + N; + _ when is_pid(Process) -> + node(Process) + end, + try erlang:monitor(process, Process) of + Mref -> + %% If the monitor/2 call failed to set up a connection to a + %% remote node, we don't want the '!' operator to attempt + %% to set up the connection again. (If the monitor/2 call + %% failed due to an expired timeout, '!' too would probably + %% have to wait for the timeout to expire.) Therefore, + %% use erlang:send/3 with the 'noconnect' option so that it + %% will fail immediately if there is no connection to the + %% remote node. + + catch erlang:send(Process, {Label, {self(), Mref}, Request}, + [noconnect]), + wait_resp_mon(Node, Mref, Timeout) + catch + error:_ -> + %% Node (C/Java?) is not supporting the monitor. + %% The other possible case -- this node is not distributed + %% -- should have been handled earlier. + %% Do the best possible with monitor_node/2. + %% This code may hang indefinitely if the Process + %% does not exist. It is only used for featureweak remote nodes. + monitor_node(Node, true), + receive + {nodedown, Node} -> + monitor_node(Node, false), + exit({nodedown, Node}) + after 0 -> + Tag = make_ref(), + Process ! {Label, {self(), Tag}, Request}, + wait_resp(Node, Tag, Timeout) + end + end. + +wait_resp_mon(Node, Mref, Timeout) -> + receive + {Mref, Reply} -> + erlang:demonitor(Mref, [flush]), + {ok, Reply}; + {'DOWN', Mref, _, _, noconnection} -> + exit({nodedown, Node}); + {'DOWN', Mref, _, _, Reason} -> + exit(Reason) + after Timeout -> + erlang:demonitor(Mref), + receive + {'DOWN', Mref, _, _, _} -> true + after 0 -> true + end, + exit(timeout) + end. + +wait_resp(Node, Tag, Timeout) -> + receive + {Tag, Reply} -> + monitor_node(Node, false), + {ok, Reply}; + {nodedown, Node} -> + monitor_node(Node, false), + exit({nodedown, Node}) + after Timeout -> + monitor_node(Node, false), + exit(timeout) + end. + +%% +%% Send a reply to the client. +%% +reply({To, Tag}, Reply) -> + Msg = {Tag, Reply}, + try To ! Msg catch _:_ -> Msg end. + +%%%----------------------------------------------------------------- +%%% Misc. functions. +%%%----------------------------------------------------------------- +where({global, Name}) -> global:safe_whereis_name(Name); +where({local, Name}) -> whereis(Name). + +name_register({local, Name} = LN) -> + try register(Name, self()) of + true -> true + catch + error:_ -> + {false, where(LN)} + end; +name_register({global, Name} = GN) -> + case global:register_name(Name, self()) of + yes -> true; + no -> {false, where(GN)} + end. + +timeout(Options) -> + case opt(timeout, Options) of + {ok, Time} -> + Time; + _ -> + infinity + end. + +spawn_opts(Options) -> + case opt(spawn_opt, Options) of + {ok, Opts} -> + Opts; + _ -> + [] + end. + +opt(Op, [{Op, Value}|_]) -> + {ok, Value}; +opt(Op, [_|Options]) -> + opt(Op, Options); +opt(_, []) -> + false. + +debug_options(Opts) -> + case opt(debug, Opts) of + {ok, Options} -> sys:debug_options(Options); + _ -> [] + end. diff --git a/lib/stdlib/src/gen_event.erl b/lib/stdlib/src/gen_event.erl new file mode 100644 index 0000000000..1b30aaf5eb --- /dev/null +++ b/lib/stdlib/src/gen_event.erl @@ -0,0 +1,721 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(gen_event). + +%%% +%%% A general event handler. +%%% Several handlers (functions) can be added. +%%% Each handler holds a state and will be called +%%% for every event received of the handler. +%%% + +%%% Modified by Magnus. +%%% Take care of fault situations and made notify asynchronous. +%%% Re-written by Joe with new functional interface ! +%%% Modified by Martin - uses proc_lib, sys and gen! + + +-export([start/0, start/1, start_link/0, start_link/1, stop/1, notify/2, + sync_notify/2, + add_handler/3, add_sup_handler/3, delete_handler/3, swap_handler/3, + swap_sup_handler/3, which_handlers/1, call/3, call/4, wake_hib/4]). + +-export([behaviour_info/1]). + +-export([init_it/6, + system_continue/3, + system_terminate/4, + system_code_change/4, + print_event/3, + format_status/2]). + +-import(error_logger, [error_msg/2]). + +-define(reply(X), From ! {element(2,Tag), X}). + +-record(handler, {module :: atom(), + id = false, + state, + supervised = false :: 'false' | pid()}). + +%%%========================================================================= +%%% API +%%%========================================================================= + +-spec behaviour_info(atom()) -> 'undefined' | [{atom(), arity()}]. + +behaviour_info(callbacks) -> + [{init,1},{handle_event,2},{handle_call,2},{handle_info,2}, + {terminate,2},{code_change,3}]; +behaviour_info(_Other) -> + undefined. + +%% gen_event:start(Handler) -> {ok, Pid} | {error, What} +%% gen_event:add_handler(Handler, Mod, Args) -> ok | Other +%% gen_event:notify(Handler, Event) -> ok +%% gen_event:call(Handler, Mod, Query) -> {ok, Val} | {error, Why} +%% gen_event:call(Handler, Mod, Query, Timeout) -> {ok, Val} | {error, Why} +%% gen_event:delete_handler(Handler, Mod, Args) -> Val +%% gen_event:swap_handler(Handler, {OldMod, Args1}, {NewMod, Args2}) -> ok +%% gen_event:which_handler(Handler) -> [Mod] +%% gen_event:stop(Handler) -> ok + + +%% handlers must export +%% Mod:init(Args) -> {ok, State} | Other +%% Mod:handle_event(Event, State) -> +%% {ok, State'} | remove_handler | {swap_handler,Args1,State1,Mod2,Args2} +%% Mod:handle_info(Info, State) -> +%% {ok, State'} | remove_handler | {swap_handler,Args1,State1,Mod2,Args2} +%% Mod:handle_call(Query, State) -> +%% {ok, Reply, State'} | {remove_handler, Reply} | +%% {swap_handler, Reply, Args1,State1,Mod2,Args2} +%% Mod:terminate(Args, State) -> Val + + +%% add_handler(H, Mod, Args) -> ok | Other +%% Mod:init(Args) -> {ok, State} | Other + +%% delete_handler(H, Mod, Args) -> Val +%% Mod:terminate(Args, State) -> Val + +%% notify(H, Event) +%% Mod:handle_event(Event, State) -> +%% {ok, State1} +%% remove_handler +%% Mod:terminate(remove_handler, State) is called +%% the return value is ignored +%% {swap_handler, Args1, State1, Mod2, Args2} +%% State2 = Mod:terminate(Args1, State1) is called +%% the return value is chained into the new module and +%% Mod2:init({Args2, State2}) is called +%% Other +%% Mod:terminate({error, Other}, State) is called +%% The return value is ignored +%% call(H, Mod, Query) -> Val +%% call(H, Mod, Query, Timeout) -> Val +%% Mod:handle_call(Query, State) -> as above + +%%--------------------------------------------------------------------------- + +-type handler() :: atom() | {atom(), term()}. +-type emgr_name() :: {'local', atom()} | {'global', atom()}. +-type emgr_ref() :: atom() | {atom(), atom()} | {'global', atom()} | pid(). +-type start_ret() :: {'ok', pid()} | {'error', term()}. + +%%--------------------------------------------------------------------------- + +-define(NO_CALLBACK, 'no callback module'). + +-spec start() -> start_ret(). +start() -> + gen:start(?MODULE, nolink, ?NO_CALLBACK, [], []). + +-spec start(emgr_name()) -> start_ret(). +start(Name) -> + gen:start(?MODULE, nolink, Name, ?NO_CALLBACK, [], []). + +-spec start_link() -> start_ret(). +start_link() -> + gen:start(?MODULE, link, ?NO_CALLBACK, [], []). + +-spec start_link(emgr_name()) -> start_ret(). +start_link(Name) -> + gen:start(?MODULE, link, Name, ?NO_CALLBACK, [], []). + +%% -spec init_it(pid(), 'self' | pid(), emgr_name(), module(), [term()], [_]) -> +init_it(Starter, self, Name, Mod, Args, Options) -> + init_it(Starter, self(), Name, Mod, Args, Options); +init_it(Starter, Parent, Name0, _, _, Options) -> + process_flag(trap_exit, true), + Debug = gen:debug_options(Options), + proc_lib:init_ack(Starter, {ok, self()}), + Name = name(Name0), + loop(Parent, Name, [], Debug, false). + +name({local,Name}) -> Name; +name({global,Name}) -> Name; +name(Pid) when is_pid(Pid) -> Pid. + +-spec add_handler(emgr_ref(), handler(), term()) -> term(). +add_handler(M, Handler, Args) -> rpc(M, {add_handler, Handler, Args}). + +-spec add_sup_handler(emgr_ref(), handler(), term()) -> term(). +add_sup_handler(M, Handler, Args) -> + rpc(M, {add_sup_handler, Handler, Args, self()}). + +-spec notify(emgr_ref(), term()) -> 'ok'. +notify(M, Event) -> send(M, {notify, Event}). + +-spec sync_notify(emgr_ref(), term()) -> 'ok'. +sync_notify(M, Event) -> rpc(M, {sync_notify, Event}). + +-spec call(emgr_ref(), handler(), term()) -> term(). +call(M, Handler, Query) -> call1(M, Handler, Query). + +-spec call(emgr_ref(), handler(), term(), timeout()) -> term(). +call(M, Handler, Query, Timeout) -> call1(M, Handler, Query, Timeout). + +-spec delete_handler(emgr_ref(), handler(), term()) -> term(). +delete_handler(M, Handler, Args) -> rpc(M, {delete_handler, Handler, Args}). + +-spec swap_handler(emgr_ref(), {handler(), term()}, {handler(), term()}) -> + 'ok' | {'error', term()}. +swap_handler(M, {H1, A1}, {H2, A2}) -> rpc(M, {swap_handler, H1, A1, H2, A2}). + +-spec swap_sup_handler(emgr_ref(), {handler(), term()}, {handler(), term()}) -> + 'ok' | {'error', term()}. +swap_sup_handler(M, {H1, A1}, {H2, A2}) -> + rpc(M, {swap_sup_handler, H1, A1, H2, A2, self()}). + +-spec which_handlers(emgr_ref()) -> [handler()]. +which_handlers(M) -> rpc(M, which_handlers). + +-spec stop(emgr_ref()) -> 'ok'. +stop(M) -> rpc(M, stop). + +rpc(M, Cmd) -> + {ok, Reply} = gen:call(M, self(), Cmd, infinity), + Reply. + +call1(M, Handler, Query) -> + Cmd = {call, Handler, Query}, + try gen:call(M, self(), Cmd) of + {ok, Res} -> + Res + catch + exit:Reason -> + exit({Reason, {?MODULE, call, [M, Handler, Query]}}) + end. + +call1(M, Handler, Query, Timeout) -> + Cmd = {call, Handler, Query}, + try gen:call(M, self(), Cmd, Timeout) of + {ok, Res} -> + Res + catch + exit:Reason -> + exit({Reason, {?MODULE, call, [M, Handler, Query, Timeout]}}) + end. + +send({global, Name}, Cmd) -> + catch global:send(Name, Cmd), + ok; +send(M, Cmd) -> + M ! Cmd, + ok. + +loop(Parent, ServerName, MSL, Debug, true) -> + proc_lib:hibernate(?MODULE, wake_hib, [Parent, ServerName, MSL, Debug]); +loop(Parent, ServerName, MSL, Debug, _) -> + fetch_msg(Parent, ServerName, MSL, Debug, false). + +wake_hib(Parent, ServerName, MSL, Debug) -> + fetch_msg(Parent, ServerName, MSL, Debug, true). + +fetch_msg(Parent, ServerName, MSL, Debug, Hib) -> + receive + {system, From, Req} -> + sys:handle_system_msg(Req, From, Parent, ?MODULE, Debug, + [ServerName, MSL, Hib],Hib); + {'EXIT', Parent, Reason} -> + terminate_server(Reason, Parent, MSL, ServerName); + Msg when Debug =:= [] -> + handle_msg(Msg, Parent, ServerName, MSL, []); + Msg -> + Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, + ServerName, {in, Msg}), + handle_msg(Msg, Parent, ServerName, MSL, Debug1) + end. + +handle_msg(Msg, Parent, ServerName, MSL, Debug) -> + case Msg of + {notify, Event} -> + {Hib,MSL1} = server_notify(Event, handle_event, MSL, ServerName), + loop(Parent, ServerName, MSL1, Debug, Hib); + {From, Tag, {sync_notify, Event}} -> + {Hib, MSL1} = server_notify(Event, handle_event, MSL, ServerName), + ?reply(ok), + loop(Parent, ServerName, MSL1, Debug, Hib); + {'EXIT', From, Reason} -> + MSL1 = handle_exit(From, Reason, MSL, ServerName), + loop(Parent, ServerName, MSL1, Debug, false); + {From, Tag, {call, Handler, Query}} -> + {Hib, Reply, MSL1} = server_call(Handler, Query, MSL, ServerName), + ?reply(Reply), + loop(Parent, ServerName, MSL1, Debug, Hib); + {From, Tag, {add_handler, Handler, Args}} -> + {Hib, Reply, MSL1} = server_add_handler(Handler, Args, MSL), + ?reply(Reply), + loop(Parent, ServerName, MSL1, Debug, Hib); + {From, Tag, {add_sup_handler, Handler, Args, SupP}} -> + {Hib, Reply, MSL1} = server_add_sup_handler(Handler, Args, MSL, SupP), + ?reply(Reply), + loop(Parent, ServerName, MSL1, Debug, Hib); + {From, Tag, {delete_handler, Handler, Args}} -> + {Reply, MSL1} = server_delete_handler(Handler, Args, MSL, + ServerName), + ?reply(Reply), + loop(Parent, ServerName, MSL1, Debug, false); + {From, Tag, {swap_handler, Handler1, Args1, Handler2, Args2}} -> + {Hib, Reply, MSL1} = server_swap_handler(Handler1, Args1, Handler2, + Args2, MSL, ServerName), + ?reply(Reply), + loop(Parent, ServerName, MSL1, Debug, Hib); + {From, Tag, {swap_sup_handler, Handler1, Args1, Handler2, Args2, + Sup}} -> + {Hib, Reply, MSL1} = server_swap_handler(Handler1, Args1, Handler2, + Args2, MSL, Sup, ServerName), + ?reply(Reply), + loop(Parent, ServerName, MSL1, Debug, Hib); + {From, Tag, stop} -> + catch terminate_server(normal, Parent, MSL, ServerName), + ?reply(ok); + {From, Tag, which_handlers} -> + ?reply(the_handlers(MSL)), + loop(Parent, ServerName, MSL, Debug, false); + {From, Tag, get_modules} -> + ?reply(get_modules(MSL)), + loop(Parent, ServerName, MSL, Debug, false); + Other -> + {Hib, MSL1} = server_notify(Other, handle_info, MSL, ServerName), + loop(Parent, ServerName, MSL1, Debug, Hib) + end. + +terminate_server(Reason, Parent, MSL, ServerName) -> + stop_handlers(MSL, ServerName), + do_unlink(Parent, MSL), + exit(Reason). + +%% unlink the supervisor process of all supervised handlers. +%% We do not want a handler supervisor to EXIT due to the +%% termination of the event manager (server). +%% Do not unlink Parent ! +do_unlink(Parent, MSL) -> + lists:foreach(fun(Handler) when Handler#handler.supervised =:= Parent -> + true; + (Handler) when is_pid(Handler#handler.supervised) -> + unlink(Handler#handler.supervised), + true; + (_) -> + true + end, + MSL). + +%% First terminate the supervised (if exists) handlers and +%% then inform other handlers. +%% We do not know if any handler really is interested but it +%% may be so ! +handle_exit(From, Reason, MSL, SName) -> + MSL1 = terminate_supervised(From, Reason, MSL, SName), + {_,MSL2}=server_notify({'EXIT', From, Reason}, handle_info, MSL1, SName), + MSL2. + +terminate_supervised(Pid, Reason, MSL, SName) -> + F = fun(Ha) when Ha#handler.supervised =:= Pid -> + do_terminate(Ha#handler.module, + Ha, + {stop,Reason}, + Ha#handler.state, + {parent_terminated, {Pid,Reason}}, + SName, + shutdown), + false; + (_) -> + true + end, + lists:filter(F, MSL). + +%%----------------------------------------------------------------- +%% Callback functions for system messages handling. +%%----------------------------------------------------------------- +system_continue(Parent, Debug, [ServerName, MSL, Hib]) -> + loop(Parent, ServerName, MSL, Debug, Hib). + +-spec system_terminate(_, _, _, [_]) -> no_return(). +system_terminate(Reason, Parent, _Debug, [ServerName, MSL, _Hib]) -> + terminate_server(Reason, Parent, MSL, ServerName). + +%%----------------------------------------------------------------- +%% Module here is sent in the system msg change_code. It specifies +%% which module should be changed. +%%----------------------------------------------------------------- +system_code_change([ServerName, MSL, Hib], Module, OldVsn, Extra) -> + MSL1 = lists:zf(fun(H) when H#handler.module =:= Module -> + {ok, NewState} = + Module:code_change(OldVsn, + H#handler.state, Extra), + {true, H#handler{state = NewState}}; + (_) -> true + end, + MSL), + {ok, [ServerName, MSL1, Hib]}. + +%%----------------------------------------------------------------- +%% Format debug messages. Print them as the call-back module sees +%% them, not as the real erlang messages. Use trace for that. +%%----------------------------------------------------------------- +print_event(Dev, {in, Msg}, Name) -> + case Msg of + {notify, Event} -> + io:format(Dev, "*DBG* ~p got event ~p~n", [Name, Event]); + {_,_,{call, Handler, Query}} -> + io:format(Dev, "*DBG* ~p(~p) got call ~p~n", + [Name, Handler, Query]); + _ -> + io:format(Dev, "*DBG* ~p got ~p~n", [Name, Msg]) + end; +print_event(Dev, Dbg, Name) -> + io:format(Dev, "*DBG* ~p : ~p~n", [Name, Dbg]). + + +%% server_add_handler(Handler, Args, MSL) -> {Ret, MSL'}. +%% where MSL = [#handler{}] +%% Ret goes to the top level MSL' is the new internal state of the +%% event handler + +server_add_handler({Mod,Id}, Args, MSL) -> + Handler = #handler{module = Mod, + id = Id}, + server_add_handler(Mod, Handler, Args, MSL); +server_add_handler(Mod, Args, MSL) -> + Handler = #handler{module = Mod}, + server_add_handler(Mod, Handler, Args, MSL). + +server_add_handler(Mod, Handler, Args, MSL) -> + case catch Mod:init(Args) of + {ok, State} -> + {false, ok, [Handler#handler{state = State}|MSL]}; + {ok, State, hibernate} -> + {true, ok, [Handler#handler{state = State}|MSL]}; + Other -> + {false, Other, MSL} + end. + +%% Set up a link to the supervising process. +%% (Ought to be unidirected links here, Erl5.0 !!) +%% NOTE: This link will not be removed then the +%% handler is removed in case another handler has +%% own link to this process. +server_add_sup_handler({Mod,Id}, Args, MSL, Parent) -> + link(Parent), + Handler = #handler{module = Mod, + id = Id, + supervised = Parent}, + server_add_handler(Mod, Handler, Args, MSL); +server_add_sup_handler(Mod, Args, MSL, Parent) -> + link(Parent), + Handler = #handler{module = Mod, + supervised = Parent}, + server_add_handler(Mod, Handler, Args, MSL). + +%% server_delete_handler(HandlerId, Args, MSL) -> {Ret, MSL'} + +server_delete_handler(HandlerId, Args, MSL, SName) -> + case split(HandlerId, MSL) of + {Mod, Handler, MSL1} -> + {do_terminate(Mod, Handler, Args, + Handler#handler.state, delete, SName, normal), + MSL1}; + error -> + {{error, module_not_found}, MSL} + end. + +%% server_swap_handler(Handler1, Args1, Handler2, Args2, MSL, SN) -> MSL' +%% server_swap_handler(Handler1, Args1, Handler2, Args2, MSL, Sup, SN) -> MSL' + +server_swap_handler(Handler1, Args1, Handler2, Args2, MSL, SName) -> + {State2, Sup, MSL1} = split_and_terminate(Handler1, Args1, MSL, + SName, Handler2, false), + case s_s_h(Sup, Handler2, {Args2, State2}, MSL1) of + {Hib, ok, MSL2} -> + {Hib, ok, MSL2}; + {Hib, What, MSL2} -> + {Hib, {error, What}, MSL2} + end. + +server_swap_handler(Handler1, Args1, Handler2, Args2, MSL, Sup, SName) -> + {State2, _, MSL1} = split_and_terminate(Handler1, Args1, MSL, + SName, Handler2, Sup), + case s_s_h(Sup, Handler2, {Args2, State2}, MSL1) of + {Hib, ok, MSL2} -> + {Hib, ok, MSL2}; + {Hib, What, MSL2} -> + {Hib, {error, What}, MSL2} + end. + +s_s_h(false, Handler, Args, MSL) -> + server_add_handler(Handler, Args, MSL); +s_s_h(Pid, Handler, Args, MSL) -> + server_add_sup_handler(Handler, Args, MSL, Pid). + +split_and_terminate(HandlerId, Args, MSL, SName, Handler2, Sup) -> + case split(HandlerId, MSL) of + {Mod, Handler, MSL1} -> + OldSup = Handler#handler.supervised, + NewSup = if + not Sup -> OldSup; + true -> Sup + end, + {do_terminate(Mod, Handler, Args, + Handler#handler.state, swapped, SName, + {swapped, Handler2, NewSup}), + OldSup, + MSL1}; + error -> + {error, false, MSL} + end. + +%% server_notify(Event, Func, MSL, SName) -> MSL' + +server_notify(Event, Func, [Handler|T], SName) -> + case server_update(Handler, Func, Event, SName) of + {ok, Handler1} -> + {Hib, NewHandlers} = server_notify(Event, Func, T, SName), + {Hib, [Handler1|NewHandlers]}; + {hibernate, Handler1} -> + {_Hib, NewHandlers} = server_notify(Event, Func, T, SName), + {true, [Handler1|NewHandlers]}; + no -> + server_notify(Event, Func, T, SName) + end; +server_notify(_, _, [], _) -> + {false, []}. + +%% server_update(Handler, Func, Event, ServerName) -> Handler1 | no + +server_update(Handler1, Func, Event, SName) -> + Mod1 = Handler1#handler.module, + State = Handler1#handler.state, + case catch Mod1:Func(Event, State) of + {ok, State1} -> + {ok, Handler1#handler{state = State1}}; + {ok, State1, hibernate} -> + {hibernate, Handler1#handler{state = State1}}; + {swap_handler, Args1, State1, Handler2, Args2} -> + do_swap(Mod1, Handler1, Args1, State1, Handler2, Args2, SName); + remove_handler -> + do_terminate(Mod1, Handler1, remove_handler, State, + remove, SName, normal), + no; + Other -> + do_terminate(Mod1, Handler1, {error, Other}, State, + Event, SName, crash), + no + end. + +do_swap(Mod1, Handler1, Args1, State1, Handler2, Args2, SName) -> + %% finalise the existing handler + State2 = do_terminate(Mod1, Handler1, Args1, State1, + swapped, SName, + {swapped, Handler2, Handler1#handler.supervised}), + {Mod2, Handler} = new_handler(Handler2, Handler1), + case catch Mod2:init({Args2, State2}) of + {ok, State2a} -> + {ok, Handler#handler{state = State2a}}; + Other -> + report_terminate(Handler, crash, {error, Other}, SName, false), + no + end. + +new_handler({Mod,Id}, Handler1) -> + {Mod, #handler{module = Mod, + id = Id, + supervised = Handler1#handler.supervised}}; +new_handler(Mod, Handler1) -> + {Mod, #handler{module = Mod, + supervised = Handler1#handler.supervised}}. + + +-spec split(handler(), [#handler{}]) -> + {atom(), #handler{}, [#handler{}]} | 'error'. + +split(Ha, MSL) -> split(Ha, MSL, []). + +split({Mod,Id}, [Ha|T], L) when Ha#handler.module =:= Mod, + Ha#handler.id =:= Id -> + {Mod, Ha, lists:reverse(L, T)}; +split(Mod, [Ha|T], L) when Ha#handler.module =:= Mod, + not Ha#handler.id -> + {Mod, Ha, lists:reverse(L, T)}; +split(Ha, [H|T], L) -> + split(Ha, T, [H|L]); +split(_, [], _) -> + error. + +%% server_call(Handler, Query, MSL, ServerName) -> +%% {Reply, MSL1} + +server_call(Handler, Query, MSL, SName) -> + case search(Handler, MSL) of + {ok, Ha} -> + case server_call_update(Ha, Query, SName) of + {no, Reply} -> + {false, Reply, delete(Handler, MSL)}; + {{ok, Ha1}, Reply} -> + {false, Reply, replace(Handler, MSL, Ha1)}; + {{hibernate, Ha1}, Reply} -> + {true, Reply, replace(Handler, MSL, Ha1)} + end; + false -> + {false, {error, bad_module}, MSL} + end. + +search({Mod, Id}, [Ha|_MSL]) when Ha#handler.module =:= Mod, + Ha#handler.id =:= Id -> + {ok, Ha}; +search(Mod, [Ha|_MSL]) when Ha#handler.module =:= Mod, + not Ha#handler.id -> + {ok, Ha}; +search(Handler, [_|MSL]) -> + search(Handler, MSL); +search(_, []) -> + false. + +delete({Mod, Id}, [Ha|MSL]) when Ha#handler.module =:= Mod, + Ha#handler.id =:= Id -> + MSL; +delete(Mod, [Ha|MSL]) when Ha#handler.module =:= Mod, + not Ha#handler.id -> + MSL; +delete(Handler, [Ha|MSL]) -> + [Ha|delete(Handler, MSL)]; +delete(_, []) -> + []. + +replace({Mod, Id}, [Ha|MSL], NewHa) when Ha#handler.module =:= Mod, + Ha#handler.id =:= Id -> + [NewHa|MSL]; +replace(Mod, [Ha|MSL], NewHa) when Ha#handler.module =:= Mod, + not Ha#handler.id -> + [NewHa|MSL]; +replace(Handler, [Ha|MSL], NewHa) -> + [Ha|replace(Handler, MSL, NewHa)]; +replace(_, [], NewHa) -> + [NewHa]. + +%% server_call_update(Handler, Query, ServerName) -> +%% {{Handler1, State1} | 'no', Reply} + +server_call_update(Handler1, Query, SName) -> + Mod1 = Handler1#handler.module, + State = Handler1#handler.state, + case catch Mod1:handle_call(Query, State) of + {ok, Reply, State1} -> + {{ok, Handler1#handler{state = State1}}, Reply}; + {ok, Reply, State1, hibernate} -> + {{hibernate, Handler1#handler{state = State1}}, + Reply}; + {swap_handler, Reply, Args1, State1, Handler2, Args2} -> + {do_swap(Mod1,Handler1,Args1,State1,Handler2,Args2,SName), Reply}; + {remove_handler, Reply} -> + do_terminate(Mod1, Handler1, remove_handler, State, + remove, SName, normal), + {no, Reply}; + Other -> + do_terminate(Mod1, Handler1, {error, Other}, State, + Query, SName, crash), + {no, {error, Other}} + end. + +do_terminate(Mod, Handler, Args, State, LastIn, SName, Reason) -> + Res = (catch Mod:terminate(Args, State)), + report_terminate(Handler, Reason, Args, State, LastIn, SName, Res), + Res. + +report_terminate(Handler, crash, {error, Why}, State, LastIn, SName, _) -> + report_terminate(Handler, Why, State, LastIn, SName); +report_terminate(Handler, How, _, State, LastIn, SName, _) -> + %% How == normal | shutdown | {swapped, NewHandler, NewSupervisor} + report_terminate(Handler, How, State, LastIn, SName). + +report_terminate(Handler, Reason, State, LastIn, SName) -> + report_error(Handler, Reason, State, LastIn, SName), + case Handler#handler.supervised of + false -> + ok; + Pid -> + Pid ! {gen_event_EXIT,handler(Handler),Reason}, + ok + end. + +report_error(_Handler, normal, _, _, _) -> ok; +report_error(_Handler, shutdown, _, _, _) -> ok; +report_error(_Handler, {swapped,_,_}, _, _, _) -> ok; +report_error(Handler, Reason, State, LastIn, SName) -> + Reason1 = + case Reason of + {'EXIT',{undef,[{M,F,A}|MFAs]}} -> + case code:is_loaded(M) of + false -> + {'module could not be loaded',[{M,F,A}|MFAs]}; + _ -> + case erlang:function_exported(M, F, length(A)) of + true -> + {undef,[{M,F,A}|MFAs]}; + false -> + {'function not exported',[{M,F,A}|MFAs]} + end + end; + {'EXIT',Why} -> + Why; + _ -> + Reason + end, + error_msg("** gen_event handler ~p crashed.~n" + "** Was installed in ~p~n" + "** Last event was: ~p~n" + "** When handler state == ~p~n" + "** Reason == ~p~n", + [handler(Handler),SName,LastIn,State,Reason1]). + +handler(Handler) when not Handler#handler.id -> + Handler#handler.module; +handler(Handler) -> + {Handler#handler.module, Handler#handler.id}. + +the_handlers(MSL) -> + [handler(Handler) || Handler <- MSL]. + +%% stop_handlers(MSL, ServerName) -> [] + +stop_handlers([Handler|T], SName) -> + Mod = Handler#handler.module, + do_terminate(Mod, Handler, stop, Handler#handler.state, + stop, SName, shutdown), + stop_handlers(T, SName); +stop_handlers([], _) -> + []. + +%% Message from the release_handler. +%% The list of modules got to be a set ! +get_modules(MSL) -> + Mods = [Handler#handler.module || Handler <- MSL], + ordsets:to_list(ordsets:from_list(Mods)). + +%%----------------------------------------------------------------- +%% Status information +%%----------------------------------------------------------------- +format_status(_Opt, StatusData) -> + [_PDict, SysState, Parent, _Debug, [ServerName, MSL, _Hib]] = StatusData, + Header = lists:concat(["Status for event handler ", ServerName]), + [{header, Header}, + {data, [{"Status", SysState}, + {"Parent", Parent}]}, + {items, {"Installed handlers", MSL}}]. diff --git a/lib/stdlib/src/gen_fsm.erl b/lib/stdlib/src/gen_fsm.erl new file mode 100644 index 0000000000..f3775f967a --- /dev/null +++ b/lib/stdlib/src/gen_fsm.erl @@ -0,0 +1,623 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(gen_fsm). + +%%%----------------------------------------------------------------- +%%% +%%% This state machine is somewhat more pure than state_lib. It is +%%% still based on State dispatching (one function per state), but +%%% allows a function handle_event to take care of events in all states. +%%% It's not that pure anymore :( We also allow synchronized event sending. +%%% +%%% If the Parent process terminates the Module:terminate/2 +%%% function is called. +%%% +%%% The user module should export: +%%% +%%% init(Args) +%%% ==> {ok, StateName, StateData} +%%% {ok, StateName, StateData, Timeout} +%%% ignore +%%% {stop, Reason} +%%% +%%% StateName(Msg, StateData) +%%% +%%% ==> {next_state, NewStateName, NewStateData} +%%% {next_state, NewStateName, NewStateData, Timeout} +%%% {stop, Reason, NewStateData} +%%% Reason = normal | shutdown | Term terminate(State) is called +%%% +%%% StateName(Msg, From, StateData) +%%% +%%% ==> {next_state, NewStateName, NewStateData} +%%% {next_state, NewStateName, NewStateData, Timeout} +%%% {reply, Reply, NewStateName, NewStateData} +%%% {reply, Reply, NewStateName, NewStateData, Timeout} +%%% {stop, Reason, NewStateData} +%%% Reason = normal | shutdown | Term terminate(State) is called +%%% +%%% handle_event(Msg, StateName, StateData) +%%% +%%% ==> {next_state, NewStateName, NewStateData} +%%% {next_state, NewStateName, NewStateData, Timeout} +%%% {stop, Reason, Reply, NewStateData} +%%% {stop, Reason, NewStateData} +%%% Reason = normal | shutdown | Term terminate(State) is called +%%% +%%% handle_sync_event(Msg, From, StateName, StateData) +%%% +%%% ==> {next_state, NewStateName, NewStateData} +%%% {next_state, NewStateName, NewStateData, Timeout} +%%% {reply, Reply, NewStateName, NewStateData} +%%% {reply, Reply, NewStateName, NewStateData, Timeout} +%%% {stop, Reason, Reply, NewStateData} +%%% {stop, Reason, NewStateData} +%%% Reason = normal | shutdown | Term terminate(State) is called +%%% +%%% handle_info(Info, StateName) (e.g. {'EXIT', P, R}, {nodedown, N}, ... +%%% +%%% ==> {next_state, NewStateName, NewStateData} +%%% {next_state, NewStateName, NewStateData, Timeout} +%%% {stop, Reason, NewStateData} +%%% Reason = normal | shutdown | Term terminate(State) is called +%%% +%%% terminate(Reason, StateName, StateData) Let the user module clean up +%%% always called when server terminates +%%% +%%% ==> the return value is ignored +%%% +%%% +%%% The work flow (of the fsm) can be described as follows: +%%% +%%% User module fsm +%%% ----------- ------- +%%% start -----> start +%%% init <----- . +%%% +%%% loop +%%% StateName <----- . +%%% +%%% handle_event <----- . +%%% +%%% handle__sunc_event <----- . +%%% +%%% handle_info <----- . +%%% +%%% terminate <----- . +%%% +%%% +%%% --------------------------------------------------- + +-export([start/3, start/4, + start_link/3, start_link/4, + send_event/2, sync_send_event/2, sync_send_event/3, + send_all_state_event/2, + sync_send_all_state_event/2, sync_send_all_state_event/3, + reply/2, + start_timer/2,send_event_after/2,cancel_timer/1, + enter_loop/4, enter_loop/5, enter_loop/6, wake_hib/6]). + +-export([behaviour_info/1]). + +%% Internal exports +-export([init_it/6, print_event/3, + system_continue/3, + system_terminate/4, + system_code_change/4, + format_status/2]). + +-import(error_logger, [format/2]). + +%%% --------------------------------------------------- +%%% Interface functions. +%%% --------------------------------------------------- + +-spec behaviour_info(atom()) -> 'undefined' | [{atom(), arity()}]. + +behaviour_info(callbacks) -> + [{init,1},{handle_event,3},{handle_sync_event,4},{handle_info,3}, + {terminate,3},{code_change,4}]; +behaviour_info(_Other) -> + undefined. + +%%% --------------------------------------------------- +%%% Starts a generic state machine. +%%% start(Mod, Args, Options) +%%% start(Name, Mod, Args, Options) +%%% start_link(Mod, Args, Options) +%%% start_link(Name, Mod, Args, Options) where: +%%% Name ::= {local, atom()} | {global, atom()} +%%% Mod ::= atom(), callback module implementing the 'real' fsm +%%% Args ::= term(), init arguments (to Mod:init/1) +%%% Options ::= [{debug, [Flag]}] +%%% Flag ::= trace | log | {logfile, File} | statistics | debug +%%% (debug == log && statistics) +%%% Returns: {ok, Pid} | +%%% {error, {already_started, Pid}} | +%%% {error, Reason} +%%% --------------------------------------------------- +start(Mod, Args, Options) -> + gen:start(?MODULE, nolink, Mod, Args, Options). + +start(Name, Mod, Args, Options) -> + gen:start(?MODULE, nolink, Name, Mod, Args, Options). + +start_link(Mod, Args, Options) -> + gen:start(?MODULE, link, Mod, Args, Options). + +start_link(Name, Mod, Args, Options) -> + gen:start(?MODULE, link, Name, Mod, Args, Options). + + +send_event({global, Name}, Event) -> + catch global:send(Name, {'$gen_event', Event}), + ok; +send_event(Name, Event) -> + Name ! {'$gen_event', Event}, + ok. + +sync_send_event(Name, Event) -> + case catch gen:call(Name, '$gen_sync_event', Event) of + {ok,Res} -> + Res; + {'EXIT',Reason} -> + exit({Reason, {?MODULE, sync_send_event, [Name, Event]}}) + end. + +sync_send_event(Name, Event, Timeout) -> + case catch gen:call(Name, '$gen_sync_event', Event, Timeout) of + {ok,Res} -> + Res; + {'EXIT',Reason} -> + exit({Reason, {?MODULE, sync_send_event, [Name, Event, Timeout]}}) + end. + +send_all_state_event({global, Name}, Event) -> + catch global:send(Name, {'$gen_all_state_event', Event}), + ok; +send_all_state_event(Name, Event) -> + Name ! {'$gen_all_state_event', Event}, + ok. + +sync_send_all_state_event(Name, Event) -> + case catch gen:call(Name, '$gen_sync_all_state_event', Event) of + {ok,Res} -> + Res; + {'EXIT',Reason} -> + exit({Reason, {?MODULE, sync_send_all_state_event, [Name, Event]}}) + end. + +sync_send_all_state_event(Name, Event, Timeout) -> + case catch gen:call(Name, '$gen_sync_all_state_event', Event, Timeout) of + {ok,Res} -> + Res; + {'EXIT',Reason} -> + exit({Reason, {?MODULE, sync_send_all_state_event, + [Name, Event, Timeout]}}) + end. + +%% Designed to be only callable within one of the callbacks +%% hence using the self() of this instance of the process. +%% This is to ensure that timers don't go astray in global +%% e.g. when straddling a failover, or turn up in a restarted +%% instance of the process. + +%% Returns Ref, sends event {timeout,Ref,Msg} after Time +%% to the (then) current state. +start_timer(Time, Msg) -> + erlang:start_timer(Time, self(), {'$gen_timer', Msg}). + +%% Returns Ref, sends Event after Time to the (then) current state. +send_event_after(Time, Event) -> + erlang:start_timer(Time, self(), {'$gen_event', Event}). + +%% Returns the remaing time for the timer if Ref referred to +%% an active timer/send_event_after, false otherwise. +cancel_timer(Ref) -> + case erlang:cancel_timer(Ref) of + false -> + receive {timeout, Ref, _} -> 0 + after 0 -> false + end; + RemainingTime -> + RemainingTime + end. + +%% enter_loop/4,5,6 +%% Makes an existing process into a gen_fsm. +%% The calling process will enter the gen_fsm receive loop and become a +%% gen_fsm process. +%% The process *must* have been started using one of the start functions +%% in proc_lib, see proc_lib(3). +%% The user is responsible for any initialization of the process, +%% including registering a name for it. +enter_loop(Mod, Options, StateName, StateData) -> + enter_loop(Mod, Options, StateName, StateData, self(), infinity). + +enter_loop(Mod, Options, StateName, StateData, ServerName = {_,_}) -> + enter_loop(Mod, Options, StateName, StateData, ServerName,infinity); +enter_loop(Mod, Options, StateName, StateData, Timeout) -> + enter_loop(Mod, Options, StateName, StateData, self(), Timeout). + +enter_loop(Mod, Options, StateName, StateData, ServerName, Timeout) -> + Name = get_proc_name(ServerName), + Parent = get_parent(), + Debug = gen:debug_options(Options), + loop(Parent, Name, StateName, StateData, Mod, Timeout, Debug). + +get_proc_name(Pid) when is_pid(Pid) -> + Pid; +get_proc_name({local, Name}) -> + case process_info(self(), registered_name) of + {registered_name, Name} -> + Name; + {registered_name, _Name} -> + exit(process_not_registered); + [] -> + exit(process_not_registered) + end; +get_proc_name({global, Name}) -> + case global:safe_whereis_name(Name) of + undefined -> + exit(process_not_registered_globally); + Pid when Pid =:= self() -> + Name; + _Pid -> + exit(process_not_registered_globally) + end. + +get_parent() -> + case get('$ancestors') of + [Parent | _] when is_pid(Parent) -> + Parent; + [Parent | _] when is_atom(Parent) -> + name_to_pid(Parent); + _ -> + exit(process_was_not_started_by_proc_lib) + end. + +name_to_pid(Name) -> + case whereis(Name) of + undefined -> + case global:safe_whereis_name(Name) of + undefined -> + exit(could_not_find_registerd_name); + Pid -> + Pid + end; + Pid -> + Pid + end. + +%%% --------------------------------------------------- +%%% Initiate the new process. +%%% Register the name using the Rfunc function +%%% Calls the Mod:init/Args function. +%%% Finally an acknowledge is sent to Parent and the main +%%% loop is entered. +%%% --------------------------------------------------- +init_it(Starter, self, Name, Mod, Args, Options) -> + init_it(Starter, self(), Name, Mod, Args, Options); +init_it(Starter, Parent, Name0, Mod, Args, Options) -> + Name = name(Name0), + Debug = gen:debug_options(Options), + case catch Mod:init(Args) of + {ok, StateName, StateData} -> + proc_lib:init_ack(Starter, {ok, self()}), + loop(Parent, Name, StateName, StateData, Mod, infinity, Debug); + {ok, StateName, StateData, Timeout} -> + proc_lib:init_ack(Starter, {ok, self()}), + loop(Parent, Name, StateName, StateData, Mod, Timeout, Debug); + {stop, Reason} -> + proc_lib:init_ack(Starter, {error, Reason}), + exit(Reason); + ignore -> + proc_lib:init_ack(Starter, ignore), + exit(normal); + {'EXIT', Reason} -> + proc_lib:init_ack(Starter, {error, Reason}), + exit(Reason); + Else -> + Error = {bad_return_value, Else}, + proc_lib:init_ack(Starter, {error, Error}), + exit(Error) + end. + +name({local,Name}) -> Name; +name({global,Name}) -> Name; +name(Pid) when is_pid(Pid) -> Pid. + +%%----------------------------------------------------------------- +%% The MAIN loop +%%----------------------------------------------------------------- +loop(Parent, Name, StateName, StateData, Mod, hibernate, Debug) -> + proc_lib:hibernate(?MODULE,wake_hib, + [Parent, Name, StateName, StateData, Mod, + Debug]); +loop(Parent, Name, StateName, StateData, Mod, Time, Debug) -> + Msg = receive + Input -> + Input + after Time -> + {'$gen_event', timeout} + end, + decode_msg(Msg,Parent, Name, StateName, StateData, Mod, Time, Debug, false). + +wake_hib(Parent, Name, StateName, StateData, Mod, Debug) -> + Msg = receive + Input -> + Input + end, + decode_msg(Msg, Parent, Name, StateName, StateData, Mod, hibernate, Debug, true). + +decode_msg(Msg,Parent, Name, StateName, StateData, Mod, Time, Debug, Hib) -> + case Msg of + {system, From, Req} -> + sys:handle_system_msg(Req, From, Parent, ?MODULE, Debug, + [Name, StateName, StateData, Mod, Time], Hib); + {'EXIT', Parent, Reason} -> + terminate(Reason, Name, Msg, Mod, StateName, StateData, Debug); + _Msg when Debug =:= [] -> + handle_msg(Msg, Parent, Name, StateName, StateData, Mod, Time); + _Msg -> + Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, + {Name, StateName}, {in, Msg}), + handle_msg(Msg, Parent, Name, StateName, StateData, + Mod, Time, Debug1) + end. + +%%----------------------------------------------------------------- +%% Callback functions for system messages handling. +%%----------------------------------------------------------------- +system_continue(Parent, Debug, [Name, StateName, StateData, Mod, Time]) -> + loop(Parent, Name, StateName, StateData, Mod, Time, Debug). + +-spec system_terminate(term(), _, _, [term(),...]) -> no_return(). + +system_terminate(Reason, _Parent, Debug, + [Name, StateName, StateData, Mod, _Time]) -> + terminate(Reason, Name, [], Mod, StateName, StateData, Debug). + +system_code_change([Name, StateName, StateData, Mod, Time], + _Module, OldVsn, Extra) -> + case catch Mod:code_change(OldVsn, StateName, StateData, Extra) of + {ok, NewStateName, NewStateData} -> + {ok, [Name, NewStateName, NewStateData, Mod, Time]}; + Else -> Else + end. + +%%----------------------------------------------------------------- +%% Format debug messages. Print them as the call-back module sees +%% them, not as the real erlang messages. Use trace for that. +%%----------------------------------------------------------------- +print_event(Dev, {in, Msg}, {Name, StateName}) -> + case Msg of + {'$gen_event', Event} -> + io:format(Dev, "*DBG* ~p got event ~p in state ~w~n", + [Name, Event, StateName]); + {'$gen_all_state_event', Event} -> + io:format(Dev, + "*DBG* ~p got all_state_event ~p in state ~w~n", + [Name, Event, StateName]); + {timeout, Ref, {'$gen_timer', Message}} -> + io:format(Dev, + "*DBG* ~p got timer ~p in state ~w~n", + [Name, {timeout, Ref, Message}, StateName]); + {timeout, _Ref, {'$gen_event', Event}} -> + io:format(Dev, + "*DBG* ~p got timer ~p in state ~w~n", + [Name, Event, StateName]); + _ -> + io:format(Dev, "*DBG* ~p got ~p in state ~w~n", + [Name, Msg, StateName]) + end; +print_event(Dev, {out, Msg, To, StateName}, Name) -> + io:format(Dev, "*DBG* ~p sent ~p to ~w~n" + " and switched to state ~w~n", + [Name, Msg, To, StateName]); +print_event(Dev, return, {Name, StateName}) -> + io:format(Dev, "*DBG* ~p switched to state ~w~n", + [Name, StateName]). + +handle_msg(Msg, Parent, Name, StateName, StateData, Mod, _Time) -> %No debug here + From = from(Msg), + case catch dispatch(Msg, Mod, StateName, StateData) of + {next_state, NStateName, NStateData} -> + loop(Parent, Name, NStateName, NStateData, Mod, infinity, []); + {next_state, NStateName, NStateData, Time1} -> + loop(Parent, Name, NStateName, NStateData, Mod, Time1, []); + {reply, Reply, NStateName, NStateData} when From =/= undefined -> + reply(From, Reply), + loop(Parent, Name, NStateName, NStateData, Mod, infinity, []); + {reply, Reply, NStateName, NStateData, Time1} when From =/= undefined -> + reply(From, Reply), + loop(Parent, Name, NStateName, NStateData, Mod, Time1, []); + {stop, Reason, NStateData} -> + terminate(Reason, Name, Msg, Mod, StateName, NStateData, []); + {stop, Reason, Reply, NStateData} when From =/= undefined -> + {'EXIT', R} = (catch terminate(Reason, Name, Msg, Mod, + StateName, NStateData, [])), + reply(From, Reply), + exit(R); + {'EXIT', What} -> + terminate(What, Name, Msg, Mod, StateName, StateData, []); + Reply -> + terminate({bad_return_value, Reply}, + Name, Msg, Mod, StateName, StateData, []) + end. + +handle_msg(Msg, Parent, Name, StateName, StateData, Mod, _Time, Debug) -> + From = from(Msg), + case catch dispatch(Msg, Mod, StateName, StateData) of + {next_state, NStateName, NStateData} -> + Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, + {Name, NStateName}, return), + loop(Parent, Name, NStateName, NStateData, Mod, infinity, Debug1); + {next_state, NStateName, NStateData, Time1} -> + Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, + {Name, NStateName}, return), + loop(Parent, Name, NStateName, NStateData, Mod, Time1, Debug1); + {reply, Reply, NStateName, NStateData} when From =/= undefined -> + Debug1 = reply(Name, From, Reply, Debug, NStateName), + loop(Parent, Name, NStateName, NStateData, Mod, infinity, Debug1); + {reply, Reply, NStateName, NStateData, Time1} when From =/= undefined -> + Debug1 = reply(Name, From, Reply, Debug, NStateName), + loop(Parent, Name, NStateName, NStateData, Mod, Time1, Debug1); + {stop, Reason, NStateData} -> + terminate(Reason, Name, Msg, Mod, StateName, NStateData, Debug); + {stop, Reason, Reply, NStateData} when From =/= undefined -> + {'EXIT', R} = (catch terminate(Reason, Name, Msg, Mod, + StateName, NStateData, Debug)), + reply(Name, From, Reply, Debug, StateName), + exit(R); + {'EXIT', What} -> + terminate(What, Name, Msg, Mod, StateName, StateData, Debug); + Reply -> + terminate({bad_return_value, Reply}, + Name, Msg, Mod, StateName, StateData, Debug) + end. + +dispatch({'$gen_event', Event}, Mod, StateName, StateData) -> + Mod:StateName(Event, StateData); +dispatch({'$gen_all_state_event', Event}, Mod, StateName, StateData) -> + Mod:handle_event(Event, StateName, StateData); +dispatch({'$gen_sync_event', From, Event}, Mod, StateName, StateData) -> + Mod:StateName(Event, From, StateData); +dispatch({'$gen_sync_all_state_event', From, Event}, + Mod, StateName, StateData) -> + Mod:handle_sync_event(Event, From, StateName, StateData); +dispatch({timeout, Ref, {'$gen_timer', Msg}}, Mod, StateName, StateData) -> + Mod:StateName({timeout, Ref, Msg}, StateData); +dispatch({timeout, _Ref, {'$gen_event', Event}}, Mod, StateName, StateData) -> + Mod:StateName(Event, StateData); +dispatch(Info, Mod, StateName, StateData) -> + Mod:handle_info(Info, StateName, StateData). + +from({'$gen_sync_event', From, _Event}) -> From; +from({'$gen_sync_all_state_event', From, _Event}) -> From; +from(_) -> undefined. + +%% Send a reply to the client. +reply({To, Tag}, Reply) -> + catch To ! {Tag, Reply}. + +reply(Name, {To, Tag}, Reply, Debug, StateName) -> + reply({To, Tag}, Reply), + sys:handle_debug(Debug, {?MODULE, print_event}, Name, + {out, Reply, To, StateName}). + +%%% --------------------------------------------------- +%%% Terminate the server. +%%% --------------------------------------------------- + +-spec terminate(term(), _, _, atom(), _, _, _) -> no_return(). + +terminate(Reason, Name, Msg, Mod, StateName, StateData, Debug) -> + case catch Mod:terminate(Reason, StateName, StateData) of + {'EXIT', R} -> + error_info(R, Name, Msg, StateName, StateData, Debug), + exit(R); + _ -> + case Reason of + normal -> + exit(normal); + shutdown -> + exit(shutdown); + {shutdown,_}=Shutdown -> + exit(Shutdown); + _ -> + error_info(Reason, Name, Msg, StateName, StateData, Debug), + exit(Reason) + end + end. + +error_info(Reason, Name, Msg, StateName, StateData, Debug) -> + Reason1 = + case Reason of + {undef,[{M,F,A}|MFAs]} -> + case code:is_loaded(M) of + false -> + {'module could not be loaded',[{M,F,A}|MFAs]}; + _ -> + case erlang:function_exported(M, F, length(A)) of + true -> + Reason; + false -> + {'function not exported',[{M,F,A}|MFAs]} + end + end; + _ -> + Reason + end, + Str = "** State machine ~p terminating \n" ++ + get_msg_str(Msg) ++ + "** When State == ~p~n" + "** Data == ~p~n" + "** Reason for termination = ~n** ~p~n", + format(Str, [Name, get_msg(Msg), StateName, StateData, Reason1]), + sys:print_log(Debug), + ok. + +get_msg_str({'$gen_event', _Event}) -> + "** Last event in was ~p~n"; +get_msg_str({'$gen_sync_event', _Event}) -> + "** Last sync event in was ~p~n"; +get_msg_str({'$gen_all_state_event', _Event}) -> + "** Last event in was ~p (for all states)~n"; +get_msg_str({'$gen_sync_all_state_event', _Event}) -> + "** Last sync event in was ~p (for all states)~n"; +get_msg_str({timeout, _Ref, {'$gen_timer', _Msg}}) -> + "** Last timer event in was ~p~n"; +get_msg_str({timeout, _Ref, {'$gen_event', _Msg}}) -> + "** Last timer event in was ~p~n"; +get_msg_str(_Msg) -> + "** Last message in was ~p~n". + +get_msg({'$gen_event', Event}) -> Event; +get_msg({'$gen_sync_event', Event}) -> Event; +get_msg({'$gen_all_state_event', Event}) -> Event; +get_msg({'$gen_sync_all_state_event', Event}) -> Event; +get_msg({timeout, Ref, {'$gen_timer', Msg}}) -> {timeout, Ref, Msg}; +get_msg({timeout, _Ref, {'$gen_event', Event}}) -> Event; +get_msg(Msg) -> Msg. + +%%----------------------------------------------------------------- +%% Status information +%%----------------------------------------------------------------- +format_status(Opt, StatusData) -> + [PDict, SysState, Parent, Debug, [Name, StateName, StateData, Mod, _Time]] = + StatusData, + Header = lists:concat(["Status for state machine ", Name]), + Log = sys:get_debug(log, Debug, []), + Specfic = + case erlang:function_exported(Mod, format_status, 2) of + true -> + case catch Mod:format_status(Opt,[PDict,StateData]) of + {'EXIT', _} -> [{data, [{"StateData", StateData}]}]; + Else -> Else + end; + _ -> + [{data, [{"StateData", StateData}]}] + end, + [{header, Header}, + {data, [{"Status", SysState}, + {"Parent", Parent}, + {"Logged events", Log}, + {"StateName", StateName}]} | + Specfic]. diff --git a/lib/stdlib/src/gen_server.erl b/lib/stdlib/src/gen_server.erl new file mode 100644 index 0000000000..f1a9a31c63 --- /dev/null +++ b/lib/stdlib/src/gen_server.erl @@ -0,0 +1,853 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(gen_server). + +%%% --------------------------------------------------- +%%% +%%% The idea behind THIS server is that the user module +%%% provides (different) functions to handle different +%%% kind of inputs. +%%% If the Parent process terminates the Module:terminate/2 +%%% function is called. +%%% +%%% The user module should export: +%%% +%%% init(Args) +%%% ==> {ok, State} +%%% {ok, State, Timeout} +%%% ignore +%%% {stop, Reason} +%%% +%%% handle_call(Msg, {From, Tag}, State) +%%% +%%% ==> {reply, Reply, State} +%%% {reply, Reply, State, Timeout} +%%% {noreply, State} +%%% {noreply, State, Timeout} +%%% {stop, Reason, Reply, State} +%%% Reason = normal | shutdown | Term terminate(State) is called +%%% +%%% handle_cast(Msg, State) +%%% +%%% ==> {noreply, State} +%%% {noreply, State, Timeout} +%%% {stop, Reason, State} +%%% Reason = normal | shutdown | Term terminate(State) is called +%%% +%%% handle_info(Info, State) Info is e.g. {'EXIT', P, R}, {nodedown, N}, ... +%%% +%%% ==> {noreply, State} +%%% {noreply, State, Timeout} +%%% {stop, Reason, State} +%%% Reason = normal | shutdown | Term, terminate(State) is called +%%% +%%% terminate(Reason, State) Let the user module clean up +%%% always called when server terminates +%%% +%%% ==> ok +%%% +%%% +%%% The work flow (of the server) can be described as follows: +%%% +%%% User module Generic +%%% ----------- ------- +%%% start -----> start +%%% init <----- . +%%% +%%% loop +%%% handle_call <----- . +%%% -----> reply +%%% +%%% handle_cast <----- . +%%% +%%% handle_info <----- . +%%% +%%% terminate <----- . +%%% +%%% -----> reply +%%% +%%% +%%% --------------------------------------------------- + +%% API +-export([start/3, start/4, + start_link/3, start_link/4, + call/2, call/3, + cast/2, reply/2, + abcast/2, abcast/3, + multi_call/2, multi_call/3, multi_call/4, + enter_loop/3, enter_loop/4, enter_loop/5, wake_hib/5]). + +-export([behaviour_info/1]). + +%% System exports +-export([system_continue/3, + system_terminate/4, + system_code_change/4, + format_status/2]). + +%% Internal exports +-export([init_it/6, print_event/3]). + +-import(error_logger, [format/2]). + +%%%========================================================================= +%%% API +%%%========================================================================= + +-spec behaviour_info(atom()) -> 'undefined' | [{atom(), arity()}]. + +behaviour_info(callbacks) -> + [{init,1},{handle_call,3},{handle_cast,2},{handle_info,2}, + {terminate,2},{code_change,3}]; +behaviour_info(_Other) -> + undefined. + +%%% ----------------------------------------------------------------- +%%% Starts a generic server. +%%% start(Mod, Args, Options) +%%% start(Name, Mod, Args, Options) +%%% start_link(Mod, Args, Options) +%%% start_link(Name, Mod, Args, Options) where: +%%% Name ::= {local, atom()} | {global, atom()} +%%% Mod ::= atom(), callback module implementing the 'real' server +%%% Args ::= term(), init arguments (to Mod:init/1) +%%% Options ::= [{timeout, Timeout} | {debug, [Flag]}] +%%% Flag ::= trace | log | {logfile, File} | statistics | debug +%%% (debug == log && statistics) +%%% Returns: {ok, Pid} | +%%% {error, {already_started, Pid}} | +%%% {error, Reason} +%%% ----------------------------------------------------------------- +start(Mod, Args, Options) -> + gen:start(?MODULE, nolink, Mod, Args, Options). + +start(Name, Mod, Args, Options) -> + gen:start(?MODULE, nolink, Name, Mod, Args, Options). + +start_link(Mod, Args, Options) -> + gen:start(?MODULE, link, Mod, Args, Options). + +start_link(Name, Mod, Args, Options) -> + gen:start(?MODULE, link, Name, Mod, Args, Options). + + +%% ----------------------------------------------------------------- +%% Make a call to a generic server. +%% If the server is located at another node, that node will +%% be monitored. +%% If the client is trapping exits and is linked server termination +%% is handled here (? Shall we do that here (or rely on timeouts) ?). +%% ----------------------------------------------------------------- +call(Name, Request) -> + case catch gen:call(Name, '$gen_call', Request) of + {ok,Res} -> + Res; + {'EXIT',Reason} -> + exit({Reason, {?MODULE, call, [Name, Request]}}) + end. + +call(Name, Request, Timeout) -> + case catch gen:call(Name, '$gen_call', Request, Timeout) of + {ok,Res} -> + Res; + {'EXIT',Reason} -> + exit({Reason, {?MODULE, call, [Name, Request, Timeout]}}) + end. + +%% ----------------------------------------------------------------- +%% Make a cast to a generic server. +%% ----------------------------------------------------------------- +cast({global,Name}, Request) -> + catch global:send(Name, cast_msg(Request)), + ok; +cast({Name,Node}=Dest, Request) when is_atom(Name), is_atom(Node) -> + do_cast(Dest, Request); +cast(Dest, Request) when is_atom(Dest) -> + do_cast(Dest, Request); +cast(Dest, Request) when is_pid(Dest) -> + do_cast(Dest, Request). + +do_cast(Dest, Request) -> + do_send(Dest, cast_msg(Request)), + ok. + +cast_msg(Request) -> {'$gen_cast',Request}. + +%% ----------------------------------------------------------------- +%% Send a reply to the client. +%% ----------------------------------------------------------------- +reply({To, Tag}, Reply) -> + catch To ! {Tag, Reply}. + +%% ----------------------------------------------------------------- +%% Asyncronous broadcast, returns nothing, it's just send'n prey +%%----------------------------------------------------------------- +abcast(Name, Request) when is_atom(Name) -> + do_abcast([node() | nodes()], Name, cast_msg(Request)). + +abcast(Nodes, Name, Request) when is_list(Nodes), is_atom(Name) -> + do_abcast(Nodes, Name, cast_msg(Request)). + +do_abcast([Node|Nodes], Name, Msg) when is_atom(Node) -> + do_send({Name,Node},Msg), + do_abcast(Nodes, Name, Msg); +do_abcast([], _,_) -> abcast. + +%%% ----------------------------------------------------------------- +%%% Make a call to servers at several nodes. +%%% Returns: {[Replies],[BadNodes]} +%%% A Timeout can be given +%%% +%%% A middleman process is used in case late answers arrives after +%%% the timeout. If they would be allowed to glog the callers message +%%% queue, it would probably become confused. Late answers will +%%% now arrive to the terminated middleman and so be discarded. +%%% ----------------------------------------------------------------- +multi_call(Name, Req) + when is_atom(Name) -> + do_multi_call([node() | nodes()], Name, Req, infinity). + +multi_call(Nodes, Name, Req) + when is_list(Nodes), is_atom(Name) -> + do_multi_call(Nodes, Name, Req, infinity). + +multi_call(Nodes, Name, Req, infinity) -> + do_multi_call(Nodes, Name, Req, infinity); +multi_call(Nodes, Name, Req, Timeout) + when is_list(Nodes), is_atom(Name), is_integer(Timeout), Timeout >= 0 -> + do_multi_call(Nodes, Name, Req, Timeout). + + +%%----------------------------------------------------------------- +%% enter_loop(Mod, Options, State, , ) ->_ +%% +%% Description: Makes an existing process into a gen_server. +%% The calling process will enter the gen_server receive +%% loop and become a gen_server process. +%% The process *must* have been started using one of the +%% start functions in proc_lib, see proc_lib(3). +%% The user is responsible for any initialization of the +%% process, including registering a name for it. +%%----------------------------------------------------------------- +enter_loop(Mod, Options, State) -> + enter_loop(Mod, Options, State, self(), infinity). + +enter_loop(Mod, Options, State, ServerName = {_, _}) -> + enter_loop(Mod, Options, State, ServerName, infinity); + +enter_loop(Mod, Options, State, Timeout) -> + enter_loop(Mod, Options, State, self(), Timeout). + +enter_loop(Mod, Options, State, ServerName, Timeout) -> + Name = get_proc_name(ServerName), + Parent = get_parent(), + Debug = debug_options(Name, Options), + loop(Parent, Name, State, Mod, Timeout, Debug). + +%%%======================================================================== +%%% Gen-callback functions +%%%======================================================================== + +%%% --------------------------------------------------- +%%% Initiate the new process. +%%% Register the name using the Rfunc function +%%% Calls the Mod:init/Args function. +%%% Finally an acknowledge is sent to Parent and the main +%%% loop is entered. +%%% --------------------------------------------------- +init_it(Starter, self, Name, Mod, Args, Options) -> + init_it(Starter, self(), Name, Mod, Args, Options); +init_it(Starter, Parent, Name0, Mod, Args, Options) -> + Name = name(Name0), + Debug = debug_options(Name, Options), + case catch Mod:init(Args) of + {ok, State} -> + proc_lib:init_ack(Starter, {ok, self()}), + loop(Parent, Name, State, Mod, infinity, Debug); + {ok, State, Timeout} -> + proc_lib:init_ack(Starter, {ok, self()}), + loop(Parent, Name, State, Mod, Timeout, Debug); + {stop, Reason} -> + %% For consistency, we must make sure that the + %% registered name (if any) is unregistered before + %% the parent process is notified about the failure. + %% (Otherwise, the parent process could get + %% an 'already_started' error if it immediately + %% tried starting the process again.) + unregister_name(Name0), + proc_lib:init_ack(Starter, {error, Reason}), + exit(Reason); + ignore -> + unregister_name(Name0), + proc_lib:init_ack(Starter, ignore), + exit(normal); + {'EXIT', Reason} -> + unregister_name(Name0), + proc_lib:init_ack(Starter, {error, Reason}), + exit(Reason); + Else -> + Error = {bad_return_value, Else}, + proc_lib:init_ack(Starter, {error, Error}), + exit(Error) + end. + +name({local,Name}) -> Name; +name({global,Name}) -> Name; +name(Pid) when is_pid(Pid) -> Pid. + +unregister_name({local,Name}) -> + _ = (catch unregister(Name)); +unregister_name({global,Name}) -> + _ = global:unregister_name(Name); +unregister_name(Pid) when is_pid(Pid) -> + Pid. + +%%%======================================================================== +%%% Internal functions +%%%======================================================================== +%%% --------------------------------------------------- +%%% The MAIN loop. +%%% --------------------------------------------------- +loop(Parent, Name, State, Mod, hibernate, Debug) -> + proc_lib:hibernate(?MODULE,wake_hib,[Parent, Name, State, Mod, Debug]); +loop(Parent, Name, State, Mod, Time, Debug) -> + Msg = receive + Input -> + Input + after Time -> + timeout + end, + decode_msg(Msg, Parent, Name, State, Mod, Time, Debug, false). + +wake_hib(Parent, Name, State, Mod, Debug) -> + Msg = receive + Input -> + Input + end, + decode_msg(Msg, Parent, Name, State, Mod, hibernate, Debug, true). + +decode_msg(Msg, Parent, Name, State, Mod, Time, Debug, Hib) -> + case Msg of + {system, From, Req} -> + sys:handle_system_msg(Req, From, Parent, ?MODULE, Debug, + [Name, State, Mod, Time], Hib); + {'EXIT', Parent, Reason} -> + terminate(Reason, Name, Msg, Mod, State, Debug); + _Msg when Debug =:= [] -> + handle_msg(Msg, Parent, Name, State, Mod); + _Msg -> + Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, + Name, {in, Msg}), + handle_msg(Msg, Parent, Name, State, Mod, Debug1) + end. + +%%% --------------------------------------------------- +%%% Send/recive functions +%%% --------------------------------------------------- +do_send(Dest, Msg) -> + case catch erlang:send(Dest, Msg, [noconnect]) of + noconnect -> + spawn(erlang, send, [Dest,Msg]); + Other -> + Other + end. + +do_multi_call(Nodes, Name, Req, infinity) -> + Tag = make_ref(), + Monitors = send_nodes(Nodes, Name, Tag, Req), + rec_nodes(Tag, Monitors, Name, undefined); +do_multi_call(Nodes, Name, Req, Timeout) -> + Tag = make_ref(), + Caller = self(), + Receiver = + spawn( + fun() -> + %% Middleman process. Should be unsensitive to regular + %% exit signals. The sychronization is needed in case + %% the receiver would exit before the caller started + %% the monitor. + process_flag(trap_exit, true), + Mref = erlang:monitor(process, Caller), + receive + {Caller,Tag} -> + Monitors = send_nodes(Nodes, Name, Tag, Req), + TimerId = erlang:start_timer(Timeout, self(), ok), + Result = rec_nodes(Tag, Monitors, Name, TimerId), + exit({self(),Tag,Result}); + {'DOWN',Mref,_,_,_} -> + %% Caller died before sending us the go-ahead. + %% Give up silently. + exit(normal) + end + end), + Mref = erlang:monitor(process, Receiver), + Receiver ! {self(),Tag}, + receive + {'DOWN',Mref,_,_,{Receiver,Tag,Result}} -> + Result; + {'DOWN',Mref,_,_,Reason} -> + %% The middleman code failed. Or someone did + %% exit(_, kill) on the middleman process => Reason==killed + exit(Reason) + end. + +send_nodes(Nodes, Name, Tag, Req) -> + send_nodes(Nodes, Name, Tag, Req, []). + +send_nodes([Node|Tail], Name, Tag, Req, Monitors) + when is_atom(Node) -> + Monitor = start_monitor(Node, Name), + %% Handle non-existing names in rec_nodes. + catch {Name, Node} ! {'$gen_call', {self(), {Tag, Node}}, Req}, + send_nodes(Tail, Name, Tag, Req, [Monitor | Monitors]); +send_nodes([_Node|Tail], Name, Tag, Req, Monitors) -> + %% Skip non-atom Node + send_nodes(Tail, Name, Tag, Req, Monitors); +send_nodes([], _Name, _Tag, _Req, Monitors) -> + Monitors. + +%% Against old nodes: +%% If no reply has been delivered within 2 secs. (per node) check that +%% the server really exists and wait for ever for the answer. +%% +%% Against contemporary nodes: +%% Wait for reply, server 'DOWN', or timeout from TimerId. + +rec_nodes(Tag, Nodes, Name, TimerId) -> + rec_nodes(Tag, Nodes, Name, [], [], 2000, TimerId). + +rec_nodes(Tag, [{N,R}|Tail], Name, Badnodes, Replies, Time, TimerId ) -> + receive + {'DOWN', R, _, _, _} -> + rec_nodes(Tag, Tail, Name, [N|Badnodes], Replies, Time, TimerId); + {{Tag, N}, Reply} -> %% Tag is bound !!! + unmonitor(R), + rec_nodes(Tag, Tail, Name, Badnodes, + [{N,Reply}|Replies], Time, TimerId); + {timeout, TimerId, _} -> + unmonitor(R), + %% Collect all replies that already have arrived + rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies) + end; +rec_nodes(Tag, [N|Tail], Name, Badnodes, Replies, Time, TimerId) -> + %% R6 node + receive + {nodedown, N} -> + monitor_node(N, false), + rec_nodes(Tag, Tail, Name, [N|Badnodes], Replies, 2000, TimerId); + {{Tag, N}, Reply} -> %% Tag is bound !!! + receive {nodedown, N} -> ok after 0 -> ok end, + monitor_node(N, false), + rec_nodes(Tag, Tail, Name, Badnodes, + [{N,Reply}|Replies], 2000, TimerId); + {timeout, TimerId, _} -> + receive {nodedown, N} -> ok after 0 -> ok end, + monitor_node(N, false), + %% Collect all replies that already have arrived + rec_nodes_rest(Tag, Tail, Name, [N | Badnodes], Replies) + after Time -> + case rpc:call(N, erlang, whereis, [Name]) of + Pid when is_pid(Pid) -> % It exists try again. + rec_nodes(Tag, [N|Tail], Name, Badnodes, + Replies, infinity, TimerId); + _ -> % badnode + receive {nodedown, N} -> ok after 0 -> ok end, + monitor_node(N, false), + rec_nodes(Tag, Tail, Name, [N|Badnodes], + Replies, 2000, TimerId) + end + end; +rec_nodes(_, [], _, Badnodes, Replies, _, TimerId) -> + case catch erlang:cancel_timer(TimerId) of + false -> % It has already sent it's message + receive + {timeout, TimerId, _} -> ok + after 0 -> + ok + end; + _ -> % Timer was cancelled, or TimerId was 'undefined' + ok + end, + {Replies, Badnodes}. + +%% Collect all replies that already have arrived +rec_nodes_rest(Tag, [{N,R}|Tail], Name, Badnodes, Replies) -> + receive + {'DOWN', R, _, _, _} -> + rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies); + {{Tag, N}, Reply} -> %% Tag is bound !!! + unmonitor(R), + rec_nodes_rest(Tag, Tail, Name, Badnodes, [{N,Reply}|Replies]) + after 0 -> + unmonitor(R), + rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies) + end; +rec_nodes_rest(Tag, [N|Tail], Name, Badnodes, Replies) -> + %% R6 node + receive + {nodedown, N} -> + monitor_node(N, false), + rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies); + {{Tag, N}, Reply} -> %% Tag is bound !!! + receive {nodedown, N} -> ok after 0 -> ok end, + monitor_node(N, false), + rec_nodes_rest(Tag, Tail, Name, Badnodes, [{N,Reply}|Replies]) + after 0 -> + receive {nodedown, N} -> ok after 0 -> ok end, + monitor_node(N, false), + rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies) + end; +rec_nodes_rest(_Tag, [], _Name, Badnodes, Replies) -> + {Replies, Badnodes}. + + +%%% --------------------------------------------------- +%%% Monitor functions +%%% --------------------------------------------------- + +start_monitor(Node, Name) when is_atom(Node), is_atom(Name) -> + if node() =:= nonode@nohost, Node =/= nonode@nohost -> + Ref = make_ref(), + self() ! {'DOWN', Ref, process, {Name, Node}, noconnection}, + {Node, Ref}; + true -> + case catch erlang:monitor(process, {Name, Node}) of + {'EXIT', _} -> + %% Remote node is R6 + monitor_node(Node, true), + Node; + Ref when is_reference(Ref) -> + {Node, Ref} + end + end. + +%% Cancels a monitor started with Ref=erlang:monitor(_, _). +unmonitor(Ref) when is_reference(Ref) -> + erlang:demonitor(Ref), + receive + {'DOWN', Ref, _, _, _} -> + true + after 0 -> + true + end. + +%%% --------------------------------------------------- +%%% Message handling functions +%%% --------------------------------------------------- + +dispatch({'$gen_cast', Msg}, Mod, State) -> + Mod:handle_cast(Msg, State); +dispatch(Info, Mod, State) -> + Mod:handle_info(Info, State). + +handle_msg({'$gen_call', From, Msg}, Parent, Name, State, Mod) -> + case catch Mod:handle_call(Msg, From, State) of + {reply, Reply, NState} -> + reply(From, Reply), + loop(Parent, Name, NState, Mod, infinity, []); + {reply, Reply, NState, Time1} -> + reply(From, Reply), + loop(Parent, Name, NState, Mod, Time1, []); + {noreply, NState} -> + loop(Parent, Name, NState, Mod, infinity, []); + {noreply, NState, Time1} -> + loop(Parent, Name, NState, Mod, Time1, []); + {stop, Reason, Reply, NState} -> + {'EXIT', R} = + (catch terminate(Reason, Name, Msg, Mod, NState, [])), + reply(From, Reply), + exit(R); + Other -> handle_common_reply(Other, Parent, Name, Msg, Mod, State) + end; +handle_msg(Msg, Parent, Name, State, Mod) -> + Reply = (catch dispatch(Msg, Mod, State)), + handle_common_reply(Reply, Parent, Name, Msg, Mod, State). + +handle_msg({'$gen_call', From, Msg}, Parent, Name, State, Mod, Debug) -> + case catch Mod:handle_call(Msg, From, State) of + {reply, Reply, NState} -> + Debug1 = reply(Name, From, Reply, NState, Debug), + loop(Parent, Name, NState, Mod, infinity, Debug1); + {reply, Reply, NState, Time1} -> + Debug1 = reply(Name, From, Reply, NState, Debug), + loop(Parent, Name, NState, Mod, Time1, Debug1); + {noreply, NState} -> + Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name, + {noreply, NState}), + loop(Parent, Name, NState, Mod, infinity, Debug1); + {noreply, NState, Time1} -> + Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name, + {noreply, NState}), + loop(Parent, Name, NState, Mod, Time1, Debug1); + {stop, Reason, Reply, NState} -> + {'EXIT', R} = + (catch terminate(Reason, Name, Msg, Mod, NState, Debug)), + reply(Name, From, Reply, NState, Debug), + exit(R); + Other -> + handle_common_reply(Other, Parent, Name, Msg, Mod, State, Debug) + end; +handle_msg(Msg, Parent, Name, State, Mod, Debug) -> + Reply = (catch dispatch(Msg, Mod, State)), + handle_common_reply(Reply, Parent, Name, Msg, Mod, State, Debug). + +handle_common_reply(Reply, Parent, Name, Msg, Mod, State) -> + case Reply of + {noreply, NState} -> + loop(Parent, Name, NState, Mod, infinity, []); + {noreply, NState, Time1} -> + loop(Parent, Name, NState, Mod, Time1, []); + {stop, Reason, NState} -> + terminate(Reason, Name, Msg, Mod, NState, []); + {'EXIT', What} -> + terminate(What, Name, Msg, Mod, State, []); + _ -> + terminate({bad_return_value, Reply}, Name, Msg, Mod, State, []) + end. + +handle_common_reply(Reply, Parent, Name, Msg, Mod, State, Debug) -> + case Reply of + {noreply, NState} -> + Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name, + {noreply, NState}), + loop(Parent, Name, NState, Mod, infinity, Debug1); + {noreply, NState, Time1} -> + Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name, + {noreply, NState}), + loop(Parent, Name, NState, Mod, Time1, Debug1); + {stop, Reason, NState} -> + terminate(Reason, Name, Msg, Mod, NState, Debug); + {'EXIT', What} -> + terminate(What, Name, Msg, Mod, State, Debug); + _ -> + terminate({bad_return_value, Reply}, Name, Msg, Mod, State, Debug) + end. + +reply(Name, {To, Tag}, Reply, State, Debug) -> + reply({To, Tag}, Reply), + sys:handle_debug(Debug, {?MODULE, print_event}, Name, + {out, Reply, To, State} ). + + +%%----------------------------------------------------------------- +%% Callback functions for system messages handling. +%%----------------------------------------------------------------- +system_continue(Parent, Debug, [Name, State, Mod, Time]) -> + loop(Parent, Name, State, Mod, Time, Debug). + +-spec system_terminate(_, _, _, [_]) -> no_return(). + +system_terminate(Reason, _Parent, Debug, [Name, State, Mod, _Time]) -> + terminate(Reason, Name, [], Mod, State, Debug). + +system_code_change([Name, State, Mod, Time], _Module, OldVsn, Extra) -> + case catch Mod:code_change(OldVsn, State, Extra) of + {ok, NewState} -> {ok, [Name, NewState, Mod, Time]}; + Else -> Else + end. + +%%----------------------------------------------------------------- +%% Format debug messages. Print them as the call-back module sees +%% them, not as the real erlang messages. Use trace for that. +%%----------------------------------------------------------------- +print_event(Dev, {in, Msg}, Name) -> + case Msg of + {'$gen_call', {From, _Tag}, Call} -> + io:format(Dev, "*DBG* ~p got call ~p from ~w~n", + [Name, Call, From]); + {'$gen_cast', Cast} -> + io:format(Dev, "*DBG* ~p got cast ~p~n", + [Name, Cast]); + _ -> + io:format(Dev, "*DBG* ~p got ~p~n", [Name, Msg]) + end; +print_event(Dev, {out, Msg, To, State}, Name) -> + io:format(Dev, "*DBG* ~p sent ~p to ~w, new state ~w~n", + [Name, Msg, To, State]); +print_event(Dev, {noreply, State}, Name) -> + io:format(Dev, "*DBG* ~p new state ~w~n", [Name, State]); +print_event(Dev, Event, Name) -> + io:format(Dev, "*DBG* ~p dbg ~p~n", [Name, Event]). + + +%%% --------------------------------------------------- +%%% Terminate the server. +%%% --------------------------------------------------- + +terminate(Reason, Name, Msg, Mod, State, Debug) -> + case catch Mod:terminate(Reason, State) of + {'EXIT', R} -> + error_info(R, Name, Msg, State, Debug), + exit(R); + _ -> + case Reason of + normal -> + exit(normal); + shutdown -> + exit(shutdown); + {shutdown,_}=Shutdown -> + exit(Shutdown); + _ -> + error_info(Reason, Name, Msg, State, Debug), + exit(Reason) + end + end. + +error_info(_Reason, application_controller, _Msg, _State, _Debug) -> + %% OTP-5811 Don't send an error report if it's the system process + %% application_controller which is terminating - let init take care + %% of it instead + ok; +error_info(Reason, Name, Msg, State, Debug) -> + Reason1 = + case Reason of + {undef,[{M,F,A}|MFAs]} -> + case code:is_loaded(M) of + false -> + {'module could not be loaded',[{M,F,A}|MFAs]}; + _ -> + case erlang:function_exported(M, F, length(A)) of + true -> + Reason; + false -> + {'function not exported',[{M,F,A}|MFAs]} + end + end; + _ -> + Reason + end, + format("** Generic server ~p terminating \n" + "** Last message in was ~p~n" + "** When Server state == ~p~n" + "** Reason for termination == ~n** ~p~n", + [Name, Msg, State, Reason1]), + sys:print_log(Debug), + ok. + +%%% --------------------------------------------------- +%%% Misc. functions. +%%% --------------------------------------------------- + +opt(Op, [{Op, Value}|_]) -> + {ok, Value}; +opt(Op, [_|Options]) -> + opt(Op, Options); +opt(_, []) -> + false. + +debug_options(Name, Opts) -> + case opt(debug, Opts) of + {ok, Options} -> dbg_options(Name, Options); + _ -> dbg_options(Name, []) + end. + +dbg_options(Name, []) -> + Opts = + case init:get_argument(generic_debug) of + error -> + []; + _ -> + [log, statistics] + end, + dbg_opts(Name, Opts); +dbg_options(Name, Opts) -> + dbg_opts(Name, Opts). + +dbg_opts(Name, Opts) -> + case catch sys:debug_options(Opts) of + {'EXIT',_} -> + format("~p: ignoring erroneous debug options - ~p~n", + [Name, Opts]), + []; + Dbg -> + Dbg + end. + +get_proc_name(Pid) when is_pid(Pid) -> + Pid; +get_proc_name({local, Name}) -> + case process_info(self(), registered_name) of + {registered_name, Name} -> + Name; + {registered_name, _Name} -> + exit(process_not_registered); + [] -> + exit(process_not_registered) + end; +get_proc_name({global, Name}) -> + case global:safe_whereis_name(Name) of + undefined -> + exit(process_not_registered_globally); + Pid when Pid =:= self() -> + Name; + _Pid -> + exit(process_not_registered_globally) + end. + +get_parent() -> + case get('$ancestors') of + [Parent | _] when is_pid(Parent)-> + Parent; + [Parent | _] when is_atom(Parent)-> + name_to_pid(Parent); + _ -> + exit(process_was_not_started_by_proc_lib) + end. + +name_to_pid(Name) -> + case whereis(Name) of + undefined -> + case global:safe_whereis_name(Name) of + undefined -> + exit(could_not_find_registerd_name); + Pid -> + Pid + end; + Pid -> + Pid + end. + +%%----------------------------------------------------------------- +%% Status information +%%----------------------------------------------------------------- +format_status(Opt, StatusData) -> + [PDict, SysState, Parent, Debug, [Name, State, Mod, _Time]] = StatusData, + NameTag = if is_pid(Name) -> + pid_to_list(Name); + is_atom(Name) -> + Name + end, + Header = lists:concat(["Status for generic server ", NameTag]), + Log = sys:get_debug(log, Debug, []), + Specfic = + case erlang:function_exported(Mod, format_status, 2) of + true -> + case catch Mod:format_status(Opt, [PDict, State]) of + {'EXIT', _} -> [{data, [{"State", State}]}]; + Else -> Else + end; + _ -> + [{data, [{"State", State}]}] + end, + [{header, Header}, + {data, [{"Status", SysState}, + {"Parent", Parent}, + {"Logged events", Log}]} | + Specfic]. diff --git a/lib/stdlib/src/io.erl b/lib/stdlib/src/io.erl new file mode 100644 index 0000000000..1f8076e864 --- /dev/null +++ b/lib/stdlib/src/io.erl @@ -0,0 +1,578 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(io). + +-export([put_chars/1,put_chars/2,nl/0,nl/1, + get_chars/2,get_chars/3,get_line/1,get_line/2, + get_password/0, get_password/1, + setopts/1, setopts/2, getopts/0, getopts/1]). +-export([write/1,write/2,read/1,read/2,read/3]). +-export([columns/0,columns/1,rows/0,rows/1]). +-export([fwrite/1,fwrite/2,fwrite/3,fread/2,fread/3, + format/1,format/2,format/3]). +-export([scan_erl_exprs/1,scan_erl_exprs/2,scan_erl_exprs/3, + scan_erl_form/1,scan_erl_form/2,scan_erl_form/3, + parse_erl_exprs/1,parse_erl_exprs/2,parse_erl_exprs/3, + parse_erl_form/1,parse_erl_form/2,parse_erl_form/3]). +-export([request/1,request/2,requests/1,requests/2]). + + +%%------------------------------------------------------------------------- + +-type device() :: atom() | pid(). +-type prompt() :: atom() | string(). + +%% XXX: Some uses of line() in this file may need to read erl_scan:location() +-type line() :: pos_integer(). + +%%------------------------------------------------------------------------- + +%% +%% User interface. +%% + +%% Writing and reading characters. + +to_tuple(T) when is_tuple(T) -> T; +to_tuple(T) -> {T}. + +%% Problem: the variables Other, Name and Args may collide with surrounding +%% ones. +%% Give extra args to macro, being the variables to use. +-define(O_REQUEST(Io, Request), + case request(Io, Request) of + {error, Reason} -> + [Name | Args] = tuple_to_list(to_tuple(Request)), + erlang:error(conv_reason(Name, Reason), [Name, Io | Args]); + Other -> + Other + end). + +o_request(Io, Request, Func) -> + case request(Io, Request) of + {error, Reason} -> + [_Name | Args] = tuple_to_list(to_tuple(Request)), + {'EXIT',{undef,[_Current|Mfas]}} = (catch erlang:error(undef)), + MFA = {io, Func, [Io | Args]}, + exit({conv_reason(Func, Reason),[MFA|Mfas]}); +% erlang:error(conv_reason(Name, Reason), [Name, Io | Args]); + Other -> + Other + end. + +%% Put chars takes mixed *unicode* list from R13 onwards. +-spec put_chars(iodata()) -> 'ok'. + +put_chars(Chars) -> + put_chars(default_output(), Chars). + +-spec put_chars(device(), iodata()) -> 'ok'. + +put_chars(Io, Chars) -> + o_request(Io, {put_chars,unicode,Chars}, put_chars). + +-spec nl() -> 'ok'. + +nl() -> + nl(default_output()). + +-spec nl(device()) -> 'ok'. + +nl(Io) -> +% o_request(Io, {put_chars,io_lib:nl()}). + o_request(Io, nl, nl). + +-spec columns() -> {'ok', pos_integer()} | {'error', 'enotsup'}. + +columns() -> + columns(default_output()). + +-spec columns(device()) -> {'ok', pos_integer()} | {'error', 'enotsup'}. + +columns(Io) -> + case request(Io, {get_geometry,columns}) of + N when is_integer(N), N > 0 -> + {ok,N}; + _ -> + {error,enotsup} + end. + +-spec rows() -> {'ok', pos_integer()} | {'error', 'enotsup'}. + +rows() -> + rows(default_output()). + +-spec rows(device()) -> {'ok', pos_integer()} | {'error', 'enotsup'}. + +rows(Io) -> + case request(Io,{get_geometry,rows}) of + N when is_integer(N), N > 0 -> + {ok,N}; + _ -> + {error,enotsup} + end. + +-spec get_chars(prompt(), non_neg_integer()) -> iodata() | 'eof'. + +get_chars(Prompt, N) -> + get_chars(default_input(), Prompt, N). + +-spec get_chars(device(), prompt(), non_neg_integer()) -> iodata() | 'eof'. + +get_chars(Io, Prompt, N) when is_integer(N), N >= 0 -> + request(Io, {get_chars,unicode,Prompt,N}). + +-spec get_line(prompt()) -> iodata() | 'eof' | {'error', term()}. + +get_line(Prompt) -> + get_line(default_input(), Prompt). + +-spec get_line(device(), prompt()) -> iodata() | 'eof' | {'error', term()}. + +get_line(Io, Prompt) -> + request(Io, {get_line,unicode,Prompt}). + +get_password() -> + get_password(default_input()). + +get_password(Io) -> + request(Io, {get_password,unicode}). + +-type encoding() :: 'latin1' | 'unicode' | 'utf8' | 'utf16' | 'utf32' + | {'utf16', 'big' | 'little'} | {'utf32','big' | 'little'}. +-type expand_fun() :: fun((term()) -> {'yes'|'no', string(), [string(), ...]}). +-type opt_pair() :: {'binary', boolean()} + | {'echo', boolean()} + | {'expand_fun', expand_fun()} + | {'encoding', encoding()}. + +-spec getopts() -> [opt_pair()]. + +getopts() -> + getopts(default_input()). + +-spec getopts(device()) -> [opt_pair()]. + +getopts(Io) -> + request(Io, getopts). + +-type setopt() :: 'binary' | 'list' | opt_pair(). + +-spec setopts([setopt()]) -> 'ok' | {'error', term()}. + +setopts(Opts) -> + setopts(default_input(), Opts). + +-spec setopts(device(), [setopt()]) -> 'ok' | {'error', term()}. + +setopts(Io, Opts) -> + request(Io, {setopts, Opts}). + +%% Writing and reading Erlang terms. + +-spec write(term()) -> 'ok'. + +write(Term) -> + write(default_output(), Term). + +-spec write(device(), term()) -> 'ok'. + +write(Io, Term) -> + o_request(Io, {write,Term}, write). + + +-spec read(prompt()) -> + {'ok', term()} | 'eof' | {'error', erl_scan:error_info()}. + +% Read does not use get_until as erl_scan does not work with unicode +% XXX:PaN fixme? +read(Prompt) -> + read(default_input(), Prompt). + +-spec read(device(), prompt()) -> + {'ok', term()} | 'eof' | {'error', erl_scan:error_info()}. + +read(Io, Prompt) -> + case request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[1]}) of + {ok,Toks,_EndLine} -> + erl_parse:parse_term(Toks); +% {error, Reason} when atom(Reason) -> +% erlang:error(conv_reason(read, Reason), [Io, Prompt]); + {error,E,_EndLine} -> + {error,E}; + {eof,_EndLine} -> + eof; + Other -> + Other + end. + +-spec read(device(), prompt(), line()) -> + {'ok', term(), line()} | {'eof', line()} | + {'error', erl_scan:error_info(), line()}. + +read(Io, Prompt, StartLine) when is_integer(StartLine) -> + case request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[StartLine]}) of + {ok,Toks,EndLine} -> + case erl_parse:parse_term(Toks) of + {ok,Term} -> {ok,Term,EndLine}; + {error,ErrorInfo} -> {error,ErrorInfo,EndLine} + end; + {error,_E,_EndLine} = Error -> + Error; + {eof,_EndLine} = Eof -> + Eof; + Other -> + Other + end. + +%% Formatted writing and reading. + +conv_reason(_, arguments) -> badarg; +conv_reason(_, terminated) -> terminated; +conv_reason(_, {no_translation,_,_}) -> no_translation; +conv_reason(_, _Reason) -> badarg. + +-type format() :: atom() | string() | binary(). + +-spec fwrite(format()) -> 'ok'. + +fwrite(Format) -> + format(Format). + +-spec fwrite(format(), [term()]) -> 'ok'. + +fwrite(Format, Args) -> + format(Format, Args). + +-spec fwrite(device(), format(), [term()]) -> 'ok'. + +fwrite(Io, Format, Args) -> + format(Io, Format, Args). + +-spec fread(prompt(), format()) -> {'ok', [term()]} | 'eof' | {'error',term()}. + +fread(Prompt, Format) -> + fread(default_input(), Prompt, Format). + +-spec fread(device(), prompt(), format()) -> + {'ok', [term()]} | 'eof' | {'error',term()}. + +fread(Io, Prompt, Format) -> + case request(Io, {fread,Prompt,Format}) of +% {error, Reason} when atom(Reason) -> +% erlang:error(conv_reason(fread, Reason), [Io, Prompt, Format]); + Other -> + Other + end. + +-spec format(format()) -> 'ok'. + +format(Format) -> + format(Format, []). + +-spec format(format(), [term()]) -> 'ok'. + +format(Format, Args) -> + format(default_output(), Format, Args). + +-spec format(device(), format(), [term()]) -> 'ok'. + +format(Io, Format, Args) -> + o_request(Io, {format,Format,Args}, format). + +%% Scanning Erlang code. + +-spec scan_erl_exprs(prompt()) -> erl_scan:tokens_result(). + +scan_erl_exprs(Prompt) -> + scan_erl_exprs(default_input(), Prompt, 1). + +-spec scan_erl_exprs(device(), prompt()) -> erl_scan:tokens_result(). + +scan_erl_exprs(Io, Prompt) -> + scan_erl_exprs(Io, Prompt, 1). + +-spec scan_erl_exprs(device(), prompt(), line()) -> erl_scan:tokens_result(). + +scan_erl_exprs(Io, Prompt, Pos0) -> + request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[Pos0]}). + +-spec scan_erl_form(prompt()) -> erl_scan:tokens_result(). + +scan_erl_form(Prompt) -> + scan_erl_form(default_input(), Prompt, 1). + +-spec scan_erl_form(device(), prompt()) -> erl_scan:tokens_result(). + +scan_erl_form(Io, Prompt) -> + scan_erl_form(Io, Prompt, 1). + +-spec scan_erl_form(device(), prompt(), line()) -> erl_scan:tokens_result(). + +scan_erl_form(Io, Prompt, Pos0) -> + request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[Pos0]}). + +%% Parsing Erlang code. + +-type erl_parse_expr_list() :: [_]. %% XXX: should be imported from erl_parse + +-type parse_ret() :: {'ok', erl_parse_expr_list(), line()} + | {'eof', line()} + | {'error', erl_scan:error_info(), line()}. + +-spec parse_erl_exprs(prompt()) -> parse_ret(). + +parse_erl_exprs(Prompt) -> + parse_erl_exprs(default_input(), Prompt, 1). + +-spec parse_erl_exprs(device(), prompt()) -> parse_ret(). + +parse_erl_exprs(Io, Prompt) -> + parse_erl_exprs(Io, Prompt, 1). + +-spec parse_erl_exprs(device(), prompt(), line()) -> parse_ret(). + +parse_erl_exprs(Io, Prompt, Pos0) -> + case request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[Pos0]}) of + {ok,Toks,EndPos} -> + case erl_parse:parse_exprs(Toks) of + {ok,Exprs} -> {ok,Exprs,EndPos}; + {error,E} -> {error,E,EndPos} + end; + Other -> + Other + end. + +-type erl_parse_absform() :: _. %% XXX: should be imported from erl_parse + +-type parse_form_ret() :: {'ok', erl_parse_absform(), line()} + | {'eof', line()} + | {'error', erl_scan:error_info(), line()}. + +-spec parse_erl_form(prompt()) -> parse_form_ret(). + +parse_erl_form(Prompt) -> + parse_erl_form(default_input(), Prompt, 1). + +-spec parse_erl_form(device(), prompt()) -> parse_form_ret(). + +parse_erl_form(Io, Prompt) -> + parse_erl_form(Io, Prompt, 1). + +-spec parse_erl_form(device(), prompt(), line()) -> parse_form_ret(). + +parse_erl_form(Io, Prompt, Pos0) -> + case request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[Pos0]}) of + {ok,Toks,EndPos} -> + case erl_parse:parse_form(Toks) of + {ok,Exprs} -> {ok,Exprs,EndPos}; + {error,E} -> {error,E,EndPos} + end; + Other -> + Other + end. + +%% Miscellaneous functions. + +request(Request) -> + request(default_output(), Request). + +request(standard_io, Request) -> + request(group_leader(), Request); +request(Pid, Request) when is_pid(Pid) -> + execute_request(Pid, io_request(Pid, Request)); +request(Name, Request) when is_atom(Name) -> + case whereis(Name) of + undefined -> + {error, arguments}; + Pid -> + request(Pid, Request) + end. + +execute_request(Pid, {Convert,Converted}) -> + Mref = erlang:monitor(process, Pid), + Pid ! {io_request,self(),Pid,Converted}, + if + Convert -> + convert_binaries(wait_io_mon_reply(Pid, Mref)); + true -> + wait_io_mon_reply(Pid, Mref) + end. + +requests(Requests) -> %Requests as atomic action + requests(default_output(), Requests). + +requests(standard_io, Requests) -> %Requests as atomic action + requests(group_leader(), Requests); +requests(Pid, Requests) when is_pid(Pid) -> + {Convert, Converted} = io_requests(Pid, Requests), + execute_request(Pid,{Convert,{requests,Converted}}); +requests(Name, Requests) when is_atom(Name) -> + case whereis(Name) of + undefined -> + {error, arguments}; + Pid -> + requests(Pid, Requests) + end. + + +default_input() -> + group_leader(). + +default_output() -> + group_leader(). + +wait_io_mon_reply(From, Mref) -> + receive + {io_reply, From, Reply} -> + erlang:demonitor(Mref), + receive + {'DOWN', Mref, _, _, _} -> true + after 0 -> true + end, + Reply; + {'EXIT', From, _What} -> + receive + {'DOWN', Mref, _, _, _} -> true + after 0 -> true + end, + {error,terminated}; + {'DOWN', Mref, _, _, _} -> + receive + {'EXIT', From, _What} -> true + after 0 -> true + end, + {error,terminated} + end. + + +%% io_requests(Requests) +%% Transform requests into correct i/o server messages. Only handle the +%% one we KNOW must be changed, others, including incorrect ones, are +%% passed straight through. Perform a flatten on the request list. + +io_requests(Pid, Rs) -> + io_requests(Pid, Rs, [], []). + +io_requests(Pid, [{requests,Rs1}|Rs], Cont, Tail) -> + io_requests(Pid, Rs1, [Rs|Cont], Tail); +io_requests(Pid, [R], [], _Tail) -> + {Conv,Request} = io_request(Pid, R), + {Conv,[Request]}; +io_requests(Pid, [R|Rs], Cont, Tail) -> + {_,Request} = io_request(Pid, R), + {Conv,Requests} = io_requests(Pid, Rs, Cont, Tail), + {Conv,[Request|Requests]}; +io_requests(Pid, [], [Rs|Cont], Tail) -> + io_requests(Pid, Rs, Cont, Tail); +io_requests(_Pid, [], [], _Tail) -> + {false,[]}. + + +bc_req(Pid,{Op,Enc,Param},MaybeConvert) -> + case net_kernel:dflag_unicode_io(Pid) of + true -> + {false,{Op,Enc,Param}}; + false -> + {MaybeConvert,{Op,Param}} + end; +bc_req(Pid,{Op,Enc,P,F},MaybeConvert) -> + case net_kernel:dflag_unicode_io(Pid) of + true -> + {false,{Op,Enc,P,F}}; + false -> + {MaybeConvert,{Op,P,F}} + end; +bc_req(Pid, {Op,Enc,M,F,A},MaybeConvert) -> + case net_kernel:dflag_unicode_io(Pid) of + true -> + {false,{Op,Enc,M,F,A}}; + false -> + {MaybeConvert,{Op,M,F,A}} + end; +bc_req(Pid, {Op,Enc,P,M,F,A},MaybeConvert) -> + case net_kernel:dflag_unicode_io(Pid) of + true -> + {false,{Op,Enc,P,M,F,A}}; + false -> + {MaybeConvert,{Op,P,M,F,A}} + end; +bc_req(Pid,{Op,Enc},MaybeConvert) -> + case net_kernel:dflag_unicode_io(Pid) of + true -> + {false,{Op, Enc}}; + false -> + {MaybeConvert,Op} + end. + +io_request(Pid, {write,Term}) -> + bc_req(Pid,{put_chars,unicode,io_lib,write,[Term]},false); +io_request(Pid, {format,Format,Args}) -> + bc_req(Pid,{put_chars,unicode,io_lib,format,[Format,Args]},false); +io_request(Pid, {fwrite,Format,Args}) -> + bc_req(Pid,{put_chars,unicode,io_lib,fwrite,[Format,Args]},false); +io_request(Pid, nl) -> + bc_req(Pid,{put_chars,unicode,io_lib:nl()},false); +io_request(Pid, {put_chars,Enc,Chars}=Request0) + when is_list(Chars), node(Pid) =:= node() -> + %% Convert to binary data if the I/O server is guaranteed to be new + Request = + case catch unicode:characters_to_binary(Chars,Enc) of + Binary when is_binary(Binary) -> + {put_chars,Enc,Binary}; + _ -> + Request0 + end, + {false,Request}; +io_request(Pid, {put_chars,Enc,Chars}=Request0) + when is_list(Chars) -> + case net_kernel:dflag_unicode_io(Pid) of + true -> + case catch unicode:characters_to_binary(Chars,Enc,unicode) of + Binary when is_binary(Binary) -> + {false,{put_chars,unicode,Binary}}; + _ -> + {false,Request0} + end; + false -> + %% Convert back to old style put_chars message... + case catch unicode:characters_to_binary(Chars,Enc,latin1) of + Binary when is_binary(Binary) -> + {false,{put_chars,Binary}}; + _ -> + {false,{put_chars,Chars}} + end + end; +io_request(Pid, {fread,Prompt,Format}) -> + bc_req(Pid,{get_until,unicode,Prompt,io_lib,fread,[Format]},true); +io_request(Pid, {get_until,Enc,Prompt,M,F,A}) -> + bc_req(Pid,{get_until,Enc,Prompt,M,F,A},true); +io_request(Pid, {get_chars,Enc,Prompt,N}) -> + bc_req(Pid,{get_chars,Enc,Prompt,N},true); +io_request(Pid, {get_line,Enc,Prompt}) -> + bc_req(Pid,{get_line,Enc,Prompt},true); +io_request(Pid, {get_password,Enc}) -> + bc_req(Pid,{get_password, Enc},true); +io_request(_Pid, R) -> %Pass this straight through + {false,R}. + +convert_binaries(Bin) when is_binary(Bin) -> + unicode:characters_to_binary(Bin,latin1,unicode); +convert_binaries(Else) -> + Else. diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl new file mode 100644 index 0000000000..2d3c86e4ea --- /dev/null +++ b/lib/stdlib/src/io_lib.erl @@ -0,0 +1,688 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% This module is a library of useful i/o functions. It is hoped that the +%% functions defined in it are basic enough to be used without modification +%% as components of more complex utilities. +%% +%% It is completely self-contained and uses no other modules. Its own +%% utilities are exported. +%% +%% Most of the code here is derived from the original prolog versions and +%% from similar code written by Joe Armstrong and myself. +%% +%% This module has been split into seperate modules: +%% io_lib - basic write and utilities +%% io_lib_format - formatted output +%% io_lib_fread - formatted input +%% io_lib_pretty - term prettyprinter + +%% For handling ISO 8859-1 (Latin-1) we use the following type +%% information: +%% +%% 000 - 037 NUL - US control +%% 040 - 057 SPC - / punctuation +%% 060 - 071 0 - 9 digit +%% 072 - 100 : - @ punctuation +%% 101 - 132 A - Z uppercase +%% 133 - 140 [ - ` punctuation +%% 141 - 172 a - z lowercase +%% 173 - 176 { - ~ punctuation +%% 177 DEL control +%% 200 - 237 control +%% 240 - 277 NBSP - ¿ punctuation +%% 300 - 326 À - Ö uppercase +%% 327 × punctuation +%% 330 - 336 Ø - Þ uppercase +%% 337 - 366 ß - ö lowercase +%% 367 ÷ punctuation +%% 370 - 377 ø - ÿ lowercase +%% +%% Many punctuation characters region have special meaning. Must +%% watch using × \327, very close to x \170 + +-module(io_lib). + +-export([fwrite/2,fread/2,fread/3,format/2]). +-export([print/1,print/4,indentation/2]). + +-export([write/1,write/2,write/3,nl/0,format_prompt/1]). +-export([write_atom/1,write_string/1,write_string/2,write_unicode_string/1, + write_unicode_string/2, write_char/1, write_unicode_char/1]). + +-export([quote_atom/2, char_list/1, unicode_char_list/1, + deep_char_list/1, deep_unicode_char_list/1, + printable_list/1, printable_unicode_list/1]). + +%% Utilities for collecting characters. +-export([collect_chars/3, collect_chars/4, + collect_line/2, collect_line/3, collect_line/4, + get_until/3, get_until/4]). + +%%---------------------------------------------------------------------- + + %% XXX: overapproximates a deep list of (unicode) characters +-type chars() :: [_]. +-type depth() :: -1 | non_neg_integer(). + +%%---------------------------------------------------------------------- + +%% Interface calls to sub-modules. + +-spec fwrite(io:format(), [term()]) -> chars(). + +fwrite(Format, Args) -> + format(Format, Args). + +-spec fread(string(), string()) -> io_lib_fread:fread_2_ret(). + +fread(Chars, Format) -> + io_lib_fread:fread(Chars, Format). + +-spec fread(io_lib_fread:continuation(), string(), string()) -> + io_lib_fread:fread_3_ret(). + +fread(Cont, Chars, Format) -> + io_lib_fread:fread(Cont, Chars, Format). + +-spec format(io:format(), [term()]) -> chars(). + +format(Format, Args) -> + case catch io_lib_format:fwrite(Format, Args) of + {'EXIT',_} -> + erlang:error(badarg, [Format, Args]); + Other -> + Other + end. + +-spec print(term()) -> chars(). + +print(Term) -> + io_lib_pretty:print(Term). + +-spec print(term(), non_neg_integer(), non_neg_integer(), depth()) -> chars(). + +print(Term, Column, LineLength, Depth) -> + io_lib_pretty:print(Term, Column, LineLength, Depth). + +-spec indentation(string(), integer()) -> integer(). + +indentation(Chars, Current) -> + io_lib_format:indentation(Chars, Current). + + +%% Format an IO-request prompt (handles formatting errors safely). +%% Atoms, binaries, and iolists can be used as-is, and will be +%% printed without any additional quotes. +%% Note that the output is a deep string, and not an iolist (i.e., +%% it may be deep, but never contains binaries, due to the "~s"). + +-spec format_prompt(term()) -> chars(). + +format_prompt({format,Format,Args}) -> + format_prompt(Format,Args); +format_prompt(Prompt) + when is_list(Prompt); is_atom(Prompt); is_binary(Prompt) -> + format_prompt("~s", [Prompt]); +format_prompt(Prompt) -> + format_prompt("~p", [Prompt]). + +format_prompt(Format, Args) -> + case catch io_lib:format(Format, Args) of + {'EXIT',_} -> "???"; + List -> List + end. + + +%% write(Term) +%% write(Term, Depth) +%% write(Term, Depth, Pretty) +%% Return a (non-flattened) list of characters giving a printed +%% representation of the term. write/3 is for backward compatibility. + +-spec write(term()) -> chars(). + +write(Term) -> write(Term, -1). + +-spec write(term(), depth(), boolean()) -> chars(). + +write(Term, D, true) -> + io_lib_pretty:print(Term, 1, 80, D); +write(Term, D, false) -> + write(Term, D). + +-spec write(term(), depth()) -> chars(). + +write(_Term, 0) -> "..."; +write(Term, _D) when is_integer(Term) -> integer_to_list(Term); +write(Term, _D) when is_float(Term) -> io_lib_format:fwrite_g(Term); +write(Atom, _D) when is_atom(Atom) -> write_atom(Atom); +write(Term, _D) when is_port(Term) -> write_port(Term); +write(Term, _D) when is_pid(Term) -> pid_to_list(Term); +write(Term, _D) when is_reference(Term) -> write_ref(Term); +write(<<_/bitstring>>=Term, D) -> write_binary(Term, D); +write([], _D) -> "[]"; +write({}, _D) -> "{}"; +write([H|T], D) -> + if + D =:= 1 -> "[...]"; + true -> + [$[,[write(H, D-1)|write_tail(T, D-1, $|)],$]] + end; +write(F, _D) when is_function(F) -> + erlang:fun_to_list(F); +write(T, D) when is_tuple(T) -> + if + D =:= 1 -> "{...}"; + true -> + [${, + [write(element(1, T), D-1)| + write_tail(tl(tuple_to_list(T)), D-1, $,)], + $}] + end. + +%% write_tail(List, Depth, CharacterBeforeDots) +%% Test the terminating case first as this looks better with depth. + +write_tail([], _D, _S) -> ""; +write_tail(_, 1, S) -> [S | "..."]; +write_tail([H|T], D, S) -> + [$,,write(H, D-1)|write_tail(T, D-1, S)]; +write_tail(Other, D, S) -> + [S,write(Other, D-1)]. + +write_port(Port) -> + erlang:port_to_list(Port). + +write_ref(Ref) -> + erlang:ref_to_list(Ref). + +write_binary(B, D) when is_integer(D) -> + [$<,$<,write_binary_body(B, D),$>,$>]. + +write_binary_body(_B, 1) -> + "..."; +write_binary_body(<<>>, _D) -> + ""; +write_binary_body(<>, _D) -> + [integer_to_list(X)]; +write_binary_body(<>, D) -> + [integer_to_list(X),$,|write_binary_body(Rest, D-1)]; +write_binary_body(B, _D) -> + L = bit_size(B), + <> = B, + [integer_to_list(X),$:,integer_to_list(L)]. + +%% write_atom(Atom) -> [Char] +%% Generate the list of characters needed to print an atom. + +-spec write_atom(atom()) -> chars(). + +write_atom(Atom) -> + Chars = atom_to_list(Atom), + case quote_atom(Atom, Chars) of + true -> + write_string(Chars, $'); %' + false -> + Chars + end. + +%% quote_atom(Atom, CharList) +%% Return 'true' if atom with chars in CharList needs to be quoted, else +%% return 'false'. + +-spec quote_atom(atom(), chars()) -> boolean(). + +quote_atom(Atom, Cs0) -> + case erl_scan:reserved_word(Atom) of + true -> true; + false -> + case Cs0 of + [C|Cs] when C >= $a, C =< $z -> + not name_chars(Cs); + [C|Cs] when C >= $ß, C =< $ÿ, C =/= $÷ -> + not name_chars(Cs); + _ -> true + end + end. + +name_chars([C|Cs]) -> + case name_char(C) of + true -> name_chars(Cs); + false -> false + end; +name_chars([]) -> true. + +name_char(C) when C >= $a, C =< $z -> true; +name_char(C) when C >= $ß, C =< $ÿ, C =/= $÷ -> true; +name_char(C) when C >= $A, C =< $Z -> true; +name_char(C) when C >= $À, C =< $Þ, C =/= $× -> true; +name_char(C) when C >= $0, C =< $9 -> true; +name_char($_) -> true; +name_char($@) -> true; +name_char(_) -> false. + +%% write_string([Char]) -> [Char] +%% Generate the list of characters needed to print a string. + +-spec write_string(string()) -> chars(). + +write_string(S) -> + write_string(S, $"). %" + +-spec write_string(string(), char()) -> chars(). + +write_string(S, Q) -> + [Q|write_string1(latin1, S, Q)]. + +write_unicode_string(S) -> + write_unicode_string(S, $"). %" + +write_unicode_string(S, Q) -> + [Q|write_string1(unicode, S, Q)]. + +write_string1(_,[], Q) -> + [Q]; +write_string1(Enc,[C|Cs], Q) -> + string_char(Enc,C, Q, write_string1(Enc,Cs, Q)). + +string_char(_,Q, Q, Tail) -> [$\\,Q|Tail]; %Must check these first! +string_char(_,$\\, _, Tail) -> [$\\,$\\|Tail]; +string_char(_,C, _, Tail) when C >= $\s, C =< $~ -> + [C|Tail]; +string_char(latin1,C, _, Tail) when C >= $\240, C =< $\377 -> + [C|Tail]; +string_char(unicode,C, _, Tail) when C >= $\240 -> + "\\x{"++erlang:integer_to_list(C, 16)++"}"++Tail; +string_char(_,$\n, _, Tail) -> [$\\,$n|Tail]; %\n = LF +string_char(_,$\r, _, Tail) -> [$\\,$r|Tail]; %\r = CR +string_char(_,$\t, _, Tail) -> [$\\,$t|Tail]; %\t = TAB +string_char(_,$\v, _, Tail) -> [$\\,$v|Tail]; %\v = VT +string_char(_,$\b, _, Tail) -> [$\\,$b|Tail]; %\b = BS +string_char(_,$\f, _, Tail) -> [$\\,$f|Tail]; %\f = FF +string_char(_,$\e, _, Tail) -> [$\\,$e|Tail]; %\e = ESC +string_char(_,$\d, _, Tail) -> [$\\,$d|Tail]; %\d = DEL +string_char(_,C, _, Tail) when C < $\240-> %Other control characters. + C1 = (C bsr 6) + $0, + C2 = ((C bsr 3) band 7) + $0, + C3 = (C band 7) + $0, + [$\\,C1,C2,C3|Tail]. + +%% write_char(Char) -> [char()]. +%% Generate the list of characters needed to print a character constant. +%% Must special case SPACE, $\s, here. + +-spec write_char(char()) -> chars(). + +write_char($\s) -> "$\\s"; %Must special case this. +write_char(C) when is_integer(C), C >= $\000, C =< $\377 -> + [$$|string_char(latin1,C, -1, [])]. + +write_unicode_char(Ch) when Ch =< 255 -> + write_char(Ch); +write_unicode_char(Uni) -> + [$$|string_char(unicode,Uni, -1, [])]. + +%% char_list(CharList) +%% deep_char_list(CharList) +%% Return true if CharList is a (possibly deep) list of characters, else +%% false. + +-spec char_list(term()) -> boolean(). + +char_list([C|Cs]) when is_integer(C), C >= $\000, C =< $\377 -> + char_list(Cs); +char_list([]) -> true; +char_list(_) -> false. %Everything else is false + +-spec unicode_char_list(term()) -> boolean(). + +unicode_char_list([C|Cs]) when is_integer(C), C >= 0, C < 16#D800; + is_integer(C), C > 16#DFFF, C < 16#FFFE; + is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> + unicode_char_list(Cs); +unicode_char_list([]) -> true; +unicode_char_list(_) -> false. %Everything else is false + +-spec deep_char_list(term()) -> boolean(). + +deep_char_list(Cs) -> + deep_char_list(Cs, []). + +deep_char_list([C|Cs], More) when is_list(C) -> + deep_char_list(C, [Cs|More]); +deep_char_list([C|Cs], More) when is_integer(C), C >= $\000, C =< $\377 -> + deep_char_list(Cs, More); +deep_char_list([], [Cs|More]) -> + deep_char_list(Cs, More); +deep_char_list([], []) -> true; +deep_char_list(_, _More) -> %Everything else is false + false. + +-spec deep_unicode_char_list(term()) -> boolean(). + +deep_unicode_char_list(Cs) -> + deep_unicode_char_list(Cs, []). + +deep_unicode_char_list([C|Cs], More) when is_list(C) -> + deep_unicode_char_list(C, [Cs|More]); +deep_unicode_char_list([C|Cs], More) + when is_integer(C), C >= 0, C < 16#D800; + is_integer(C), C > 16#DFFF, C < 16#FFFE; + is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> + deep_unicode_char_list(Cs, More); +deep_unicode_char_list([], [Cs|More]) -> + deep_unicode_char_list(Cs, More); +deep_unicode_char_list([], []) -> true; +deep_unicode_char_list(_, _More) -> %Everything else is false + false. + +%% printable_list([Char]) -> boolean() +%% Return true if CharList is a list of printable characters, else +%% false. + +-spec printable_list(term()) -> boolean(). + +printable_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 -> + printable_list(Cs); +printable_list([C|Cs]) when is_integer(C), C >= $\240, C =< $\377 -> + printable_list(Cs); +printable_list([$\n|Cs]) -> printable_list(Cs); +printable_list([$\r|Cs]) -> printable_list(Cs); +printable_list([$\t|Cs]) -> printable_list(Cs); +printable_list([$\v|Cs]) -> printable_list(Cs); +printable_list([$\b|Cs]) -> printable_list(Cs); +printable_list([$\f|Cs]) -> printable_list(Cs); +printable_list([$\e|Cs]) -> printable_list(Cs); +printable_list([]) -> true; +printable_list(_) -> false. %Everything else is false + +%% printable_unicode_list([Char]) -> boolean() +%% Return true if CharList is a list of printable characters, else +%% false. The notion of printable in Unicode terms is somewhat floating. +%% Everything that is not a control character and not invalid unicode +%% will be considered printable. + +-spec printable_unicode_list(term()) -> boolean(). + +printable_unicode_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 -> + printable_unicode_list(Cs); +printable_unicode_list([C|Cs]) + when is_integer(C), C >= 16#A0, C < 16#D800; + is_integer(C), C > 16#DFFF, C < 16#FFFE; + is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> + printable_unicode_list(Cs); +printable_unicode_list([$\n|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\r|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\t|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\v|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\b|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\f|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\e|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([]) -> true; +printable_unicode_list(_) -> false. %Everything else is false + +%% List = nl() +%% Return a list of characters to generate a newline. + +-spec nl() -> string(). + +nl() -> + "\n". + +%% +%% Utilities for collecting characters in input files +%% + +count_and_find_utf8(Bin,N) -> + cafu(Bin,N,0,0,none). + +cafu(<<>>,_N,Count,_ByteCount,SavePos) -> + {Count,SavePos}; +cafu(<<_/utf8,Rest/binary>>, 0, Count, ByteCount, _SavePos) -> + cafu(Rest,-1,Count+1,0,ByteCount); +cafu(<<_/utf8,Rest/binary>>, N, Count, _ByteCount, SavePos) when N < 0 -> + cafu(Rest,-1,Count+1,0,SavePos); +cafu(<<_/utf8,Rest/binary>> = Whole, N, Count, ByteCount, SavePos) -> + Delta = byte_size(Whole) - byte_size(Rest), + cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos); +cafu(_Other,_N,Count,_ByteCount,SavePos) -> % Non Utf8 character at end + {Count,SavePos}. + +%% collect_chars(State, Data, Count). New in R9C. +%% Returns: +%% {stop,Result,RestData} +%% NewState +%%% BC (with pre-R13). +collect_chars(Tag, Data, N) -> + collect_chars(Tag, Data, latin1, N). + +%% Now we are aware of encoding... +collect_chars(start, Data, unicode, N) when is_binary(Data) -> + {Size,Npos} = count_and_find_utf8(Data,N), + if Size > N -> + {B1,B2} = split_binary(Data, Npos), + {stop,B1,B2}; + Size < N -> + {binary,[Data],N-Size}; + true -> + {stop,Data,eof} + end; +collect_chars(start, Data, latin1, N) when is_binary(Data) -> + Size = byte_size(Data), + if Size > N -> + {B1,B2} = split_binary(Data, N), + {stop,B1,B2}; + Size < N -> + {binary,[Data],N-Size}; + true -> + {stop,Data,eof} + end; +collect_chars(start,Data,_,N) when is_list(Data) -> + collect_chars_list([], N, Data); +collect_chars(start, eof, _,_) -> + {stop,eof,eof}; +collect_chars({binary,Stack,_N}, eof, _,_) -> + {stop,binrev(Stack),eof}; +collect_chars({binary,Stack,N}, Data,unicode, _) -> + {Size,Npos} = count_and_find_utf8(Data,N), + if Size > N -> + {B1,B2} = split_binary(Data, Npos), + {stop,binrev(Stack, [B1]),B2}; + Size < N -> + {binary,[Data|Stack],N-Size}; + true -> + {stop,binrev(Stack, [Data]),eof} + end; +collect_chars({binary,Stack,N}, Data,latin1, _) -> + Size = byte_size(Data), + if Size > N -> + {B1,B2} = split_binary(Data, N), + {stop,binrev(Stack, [B1]),B2}; + Size < N -> + {binary,[Data|Stack],N-Size}; + true -> + {stop,binrev(Stack, [Data]),eof} + end; +collect_chars({list,Stack,N}, Data, _,_) -> + collect_chars_list(Stack, N, Data); +%% collect_chars(Continuation, MoreChars, Count) +%% Returns: +%% {done,Result,RestChars} +%% {more,Continuation} + +collect_chars([], Chars, _, N) -> + collect_chars1(N, Chars, []); +collect_chars({Left,Sofar}, Chars, _, _N) -> + collect_chars1(Left, Chars, Sofar). + +collect_chars1(N, Chars, Stack) when N =< 0 -> + {done,lists:reverse(Stack, []),Chars}; +collect_chars1(N, [C|Rest], Stack) -> + collect_chars1(N-1, Rest, [C|Stack]); +collect_chars1(_N, eof, []) -> + {done,eof,[]}; +collect_chars1(_N, eof, Stack) -> + {done,lists:reverse(Stack, []),[]}; +collect_chars1(N, [], Stack) -> + {more,{N,Stack}}. + +collect_chars_list(Stack, 0, Data) -> + {stop,lists:reverse(Stack, []),Data}; +collect_chars_list(Stack, _N, eof) -> + {stop,lists:reverse(Stack, []),eof}; +collect_chars_list(Stack, N, []) -> + {list,Stack,N}; +collect_chars_list(Stack,N, [H|T]) -> + collect_chars_list([H|Stack], N-1, T). + +%% collect_line(Continuation, MoreChars) +%% Returns: +%% {done,Result,RestChars} +%% {more,Continuation} +%% +%% XXX Can be removed when compatibility with pre-R12B-5 nodes +%% is no longer required. +%% +collect_line([], Chars) -> + collect_line1(Chars, []); +collect_line({SoFar}, More) -> + collect_line1(More, SoFar). + +collect_line1([$\r, $\n|Rest], Stack) -> + collect_line1([$\n|Rest], Stack); +collect_line1([$\n|Rest], Stack) -> + {done,lists:reverse([$\n|Stack], []),Rest}; +collect_line1([C|Rest], Stack) -> + collect_line1(Rest, [C|Stack]); +collect_line1(eof, []) -> + {done,eof,[]}; +collect_line1(eof, Stack) -> + {done,lists:reverse(Stack, []),[]}; +collect_line1([], Stack) -> + {more,{Stack}}. + +%% collect_line(State, Data, _). New in R9C. +%% Returns: +%% {stop,Result,RestData} +%% NewState +%%% BC (with pre-R13). +collect_line(Tag, Data, Any) -> + collect_line(Tag, Data, latin1, Any). + +%% Now we are aware of encoding... +collect_line(start, Data, Encoding, _) when is_binary(Data) -> + collect_line_bin(Data, Data, [], Encoding); +collect_line(start, Data, _, _) when is_list(Data) -> + collect_line_list(Data, []); +collect_line(start, eof, _, _) -> + {stop,eof,eof}; +collect_line(Stack, Data, Encoding, _) when is_binary(Data) -> + collect_line_bin(Data, Data, Stack, Encoding); +collect_line(Stack, Data, _, _) when is_list(Data) -> + collect_line_list(Data, Stack); +collect_line([B|_]=Stack, eof, _, _) when is_binary(B) -> + {stop,binrev(Stack),eof}; +collect_line(Stack, eof, _, _) -> + {stop,lists:reverse(Stack, []),eof}. + + +collect_line_bin(<<$\n,T/binary>>, Data, Stack0, _) -> + N = byte_size(Data) - byte_size(T), + <> = Data, + case Stack0 of + [] -> + {stop,Line,T}; + [<<$\r>>|Stack] when N =:= 1 -> + {stop,binrev(Stack, [$\n]),T}; + _ -> + {stop,binrev(Stack0, [Line]),T} + end; +collect_line_bin(<<$\r,$\n,T/binary>>, Data, Stack, _) -> + N = byte_size(Data) - byte_size(T) - 2, + <> = Data, + {stop,binrev(Stack, [Line,$\n]),T}; +collect_line_bin(<<$\r>>, Data0, Stack, _) -> + N = byte_size(Data0) - 1, + <> = Data0, + [<<$\r>>,Data|Stack]; +collect_line_bin(<<_,T/binary>>, Data, Stack, Enc) -> + collect_line_bin(T, Data, Stack, Enc); +collect_line_bin(<<>>, Data, Stack, _) -> + %% Need more data here. + [Data|Stack]. + +collect_line_list([$\n|T], [$\r|Stack]) -> + {stop,lists:reverse(Stack, [$\n]),T}; +collect_line_list([$\n|T], Stack) -> + {stop,lists:reverse(Stack, [$\n]),T}; +collect_line_list([H|T], Stack) -> + collect_line_list(T, [H|Stack]); +collect_line_list([], Stack) -> + Stack. + +%% Translator function to emulate a new (R9C and later) +%% I/O client when you have an old one. +%% +%% Implements a middleman that is get_until server and get_chars client. + +%%% BC (with pre-R13). +get_until(Any,Data,Arg) -> + get_until(Any,Data,latin1,Arg). + +%% Now we are aware of encoding... +get_until(start, Data, Encoding, XtraArg) -> + get_until([], Data, Encoding, XtraArg); +get_until(Cont, Data, Encoding, {Mod, Func, XtraArgs}) -> + Chars = if is_binary(Data), Encoding =:= unicode -> + unicode:characters_to_list(Data,utf8); + is_binary(Data) -> + binary_to_list(Data); + true -> + Data + end, + case apply(Mod, Func, [Cont,Chars|XtraArgs]) of + {done,Result,Buf} -> + {stop,if is_binary(Data), + is_list(Result), + Encoding =:= unicode -> + unicode:characters_to_binary(Result,unicode,unicode); + is_binary(Data), + is_list(Result) -> + erlang:iolist_to_binary(Result); +%% is_list(Data), +%% is_list(Result), +%% Encoding =:= latin1 -> +%% % Should check for only latin1, but skip that for +%% % efficiency reasons. +%% [ exit({cannot_convert, unicode, latin1}) || +%% X <- List, X > 255 ]; + true -> + Result + end, + Buf}; + {more,NewCont} -> + NewCont + end. + +binrev(L) -> + list_to_binary(lists:reverse(L, [])). + +binrev(L, T) -> + list_to_binary(lists:reverse(L, T)). diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl new file mode 100644 index 0000000000..eb1885021d --- /dev/null +++ b/lib/stdlib/src/io_lib_format.erl @@ -0,0 +1,678 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(io_lib_format). + +%% Formatting functions of io library. + +-export([fwrite/2,fwrite_g/1,indentation/2]). + +%% fwrite(Format, ArgList) -> [Char]. +%% Format the arguments in ArgList after string Format. Just generate +%% an error if there is an error in the arguments. +%% +%% To do the printing command correctly we need to calculate the +%% current indentation for everything before it. This may be very +%% expensive, especially when it is not needed, so we first determine +%% if, and for how long, we need to calculate the indentations. We do +%% this by first collecting all the control sequences and +%% corresponding arguments, then counting the print sequences and +%% then building the output. This method has some drawbacks, it does +%% two passes over the format string and creates more temporary data, +%% and it also splits the handling of the control characters into two +%% parts. + +fwrite(Format, Args) when is_atom(Format) -> + fwrite(atom_to_list(Format), Args); +fwrite(Format, Args) when is_binary(Format) -> + fwrite(binary_to_list(Format), Args); +fwrite(Format, Args) -> + Cs = collect(Format, Args), + Pc = pcount(Cs), + build(Cs, Pc, 0). + +collect([$~|Fmt0], Args0) -> + {C,Fmt1,Args1} = collect_cseq(Fmt0, Args0), + [C|collect(Fmt1, Args1)]; +collect([C|Fmt], Args) -> + [C|collect(Fmt, Args)]; +collect([], []) -> []. + +collect_cseq(Fmt0, Args0) -> + {F,Ad,Fmt1,Args1} = field_width(Fmt0, Args0), + {P,Fmt2,Args2} = precision(Fmt1, Args1), + {Pad,Fmt3,Args3} = pad_char(Fmt2, Args2), + {Encoding,Fmt4,Args4} = encoding(Fmt3, Args3), + {C,As,Fmt5,Args5} = collect_cc(Fmt4, Args4), + {{C,As,F,Ad,P,Pad,Encoding},Fmt5,Args5}. + +encoding([$t|Fmt],Args) -> + {unicode,Fmt,Args}; +encoding(Fmt,Args) -> + {latin1,Fmt,Args}. + +field_width([$-|Fmt0], Args0) -> + {F,Fmt,Args} = field_value(Fmt0, Args0), + field_width(-F, Fmt, Args); +field_width(Fmt0, Args0) -> + {F,Fmt,Args} = field_value(Fmt0, Args0), + field_width(F, Fmt, Args). + +field_width(F, Fmt, Args) when F < 0 -> + {-F,left,Fmt,Args}; +field_width(F, Fmt, Args) when F >= 0 -> + {F,right,Fmt,Args}. + +precision([$.|Fmt], Args) -> + field_value(Fmt, Args); +precision(Fmt, Args) -> + {none,Fmt,Args}. + +field_value([$*|Fmt], [A|Args]) when is_integer(A) -> + {A,Fmt,Args}; +field_value([C|Fmt], Args) when is_integer(C), C >= $0, C =< $9 -> + field_value([C|Fmt], Args, 0); +field_value(Fmt, Args) -> + {none,Fmt,Args}. + +field_value([C|Fmt], Args, F) when is_integer(C), C >= $0, C =< $9 -> + field_value(Fmt, Args, 10*F + (C - $0)); +field_value(Fmt, Args, F) -> %Default case + {F,Fmt,Args}. + +pad_char([$.,$*|Fmt], [Pad|Args]) -> {Pad,Fmt,Args}; +pad_char([$.,Pad|Fmt], Args) -> {Pad,Fmt,Args}; +pad_char(Fmt, Args) -> {$\s,Fmt,Args}. + +%% collect_cc([FormatChar], [Argument]) -> +%% {Control,[ControlArg],[FormatChar],[Arg]}. +%% Here we collect the argments for each control character. +%% Be explicit to cause failure early. + +collect_cc([$w|Fmt], [A|Args]) -> {$w,[A],Fmt,Args}; +collect_cc([$p|Fmt], [A|Args]) -> {$p,[A],Fmt,Args}; +collect_cc([$W|Fmt], [A,Depth|Args]) -> {$W,[A,Depth],Fmt,Args}; +collect_cc([$P|Fmt], [A,Depth|Args]) -> {$P,[A,Depth],Fmt,Args}; +collect_cc([$s|Fmt], [A|Args]) -> {$s,[A],Fmt,Args}; +collect_cc([$e|Fmt], [A|Args]) -> {$e,[A],Fmt,Args}; +collect_cc([$f|Fmt], [A|Args]) -> {$f,[A],Fmt,Args}; +collect_cc([$g|Fmt], [A|Args]) -> {$g,[A],Fmt,Args}; +collect_cc([$b|Fmt], [A|Args]) -> {$b,[A],Fmt,Args}; +collect_cc([$B|Fmt], [A|Args]) -> {$B,[A],Fmt,Args}; +collect_cc([$x|Fmt], [A,Prefix|Args]) -> {$x,[A,Prefix],Fmt,Args}; +collect_cc([$X|Fmt], [A,Prefix|Args]) -> {$X,[A,Prefix],Fmt,Args}; +collect_cc([$+|Fmt], [A|Args]) -> {$+,[A],Fmt,Args}; +collect_cc([$#|Fmt], [A|Args]) -> {$#,[A],Fmt,Args}; +collect_cc([$c|Fmt], [A|Args]) -> {$c,[A],Fmt,Args}; +collect_cc([$~|Fmt], Args) when is_list(Args) -> {$~,[],Fmt,Args}; +collect_cc([$n|Fmt], Args) when is_list(Args) -> {$n,[],Fmt,Args}; +collect_cc([$i|Fmt], [A|Args]) -> {$i,[A],Fmt,Args}. + +%% pcount([ControlC]) -> Count. +%% Count the number of print requests. + +pcount(Cs) -> pcount(Cs, 0). + +pcount([{$p,_As,_F,_Ad,_P,_Pad,_Enc}|Cs], Acc) -> pcount(Cs, Acc+1); +pcount([{$P,_As,_F,_Ad,_P,_Pad,_Enc}|Cs], Acc) -> pcount(Cs, Acc+1); +pcount([_|Cs], Acc) -> pcount(Cs, Acc); +pcount([], Acc) -> Acc. + +%% build([Control], Pc, Indentation) -> [Char]. +%% Interpret the control structures. Count the number of print +%% remaining and only calculate indentation when necessary. Must also +%% be smart when calculating indentation for characters in format. + +build([{C,As,F,Ad,P,Pad,Enc}|Cs], Pc0, I) -> + S = control(C, As, F, Ad, P, Pad, Enc, I), + Pc1 = decr_pc(C, Pc0), + if + Pc1 > 0 -> [S|build(Cs, Pc1, indentation(S, I))]; + true -> [S|build(Cs, Pc1, I)] + end; +build([$\n|Cs], Pc, _I) -> [$\n|build(Cs, Pc, 0)]; +build([$\t|Cs], Pc, I) -> [$\t|build(Cs, Pc, ((I + 8) div 8) * 8)]; +build([C|Cs], Pc, I) -> [C|build(Cs, Pc, I+1)]; +build([], _Pc, _I) -> []. + +decr_pc($p, Pc) -> Pc - 1; +decr_pc($P, Pc) -> Pc - 1; +decr_pc(_, Pc) -> Pc. + +%% indentation([Char], Indentation) -> Indentation. +%% Calculate the indentation of the end of a string given its start +%% indentation. We assume tabs at 8 cols. + +indentation([$\n|Cs], _I) -> indentation(Cs, 0); +indentation([$\t|Cs], I) -> indentation(Cs, ((I + 8) div 8) * 8); +indentation([C|Cs], I) when is_integer(C) -> + indentation(Cs, I+1); +indentation([C|Cs], I) -> + indentation(Cs, indentation(C, I)); +indentation([], I) -> I. + +%% control(FormatChar, [Argument], FieldWidth, Adjust, Precision, PadChar, +%% Indentation) -> +%% [Char] +%% This is the main dispatch function for the various formatting commands. +%% Field widths and precisions have already been calculated. + +control($w, [A], F, Adj, P, Pad, _Enc,_I) -> + term(io_lib:write(A, -1), F, Adj, P, Pad); +control($p, [A], F, Adj, P, Pad, _Enc, I) -> + print(A, -1, F, Adj, P, Pad, I); +control($W, [A,Depth], F, Adj, P, Pad, _Enc, _I) when is_integer(Depth) -> + term(io_lib:write(A, Depth), F, Adj, P, Pad); +control($P, [A,Depth], F, Adj, P, Pad, _Enc, I) when is_integer(Depth) -> + print(A, Depth, F, Adj, P, Pad, I); +control($s, [A], F, Adj, P, Pad, _Enc, _I) when is_atom(A) -> + string(atom_to_list(A), F, Adj, P, Pad); +control($s, [L0], F, Adj, P, Pad, latin1, _I) -> + L = iolist_to_chars(L0), + string(L, F, Adj, P, Pad); +control($s, [L0], F, Adj, P, Pad, unicode, _I) -> + L = unicode:characters_to_list(L0), + uniconv(string(L, F, Adj, P, Pad)); +control($e, [A], F, Adj, P, Pad, _Enc, _I) when is_float(A) -> + fwrite_e(A, F, Adj, P, Pad); +control($f, [A], F, Adj, P, Pad, _Enc, _I) when is_float(A) -> + fwrite_f(A, F, Adj, P, Pad); +control($g, [A], F, Adj, P, Pad, _Enc, _I) when is_float(A) -> + fwrite_g(A, F, Adj, P, Pad); +control($b, [A], F, Adj, P, Pad, _Enc, _I) when is_integer(A) -> + unprefixed_integer(A, F, Adj, base(P), Pad, true); +control($B, [A], F, Adj, P, Pad, _Enc, _I) when is_integer(A) -> + unprefixed_integer(A, F, Adj, base(P), Pad, false); +control($x, [A,Prefix], F, Adj, P, Pad, _Enc, _I) when is_integer(A), + is_atom(Prefix) -> + prefixed_integer(A, F, Adj, base(P), Pad, atom_to_list(Prefix), true); +control($x, [A,Prefix], F, Adj, P, Pad, _Enc, _I) when is_integer(A) -> + true = io_lib:deep_char_list(Prefix), %Check if Prefix a character list + prefixed_integer(A, F, Adj, base(P), Pad, Prefix, true); +control($X, [A,Prefix], F, Adj, P, Pad, _Enc, _I) when is_integer(A), + is_atom(Prefix) -> + prefixed_integer(A, F, Adj, base(P), Pad, atom_to_list(Prefix), false); +control($X, [A,Prefix], F, Adj, P, Pad, _Enc, _I) when is_integer(A) -> + true = io_lib:deep_char_list(Prefix), %Check if Prefix a character list + prefixed_integer(A, F, Adj, base(P), Pad, Prefix, false); +control($+, [A], F, Adj, P, Pad, _Enc, _I) when is_integer(A) -> + Base = base(P), + Prefix = [integer_to_list(Base), $#], + prefixed_integer(A, F, Adj, Base, Pad, Prefix, true); +control($#, [A], F, Adj, P, Pad, _Enc, _I) when is_integer(A) -> + Base = base(P), + Prefix = [integer_to_list(Base), $#], + prefixed_integer(A, F, Adj, Base, Pad, Prefix, false); +control($c, [A], F, Adj, P, Pad, unicode, _I) when is_integer(A) -> + char(A, F, Adj, P, Pad); +control($c, [A], F, Adj, P, Pad, _Enc, _I) when is_integer(A) -> + char(A band 255, F, Adj, P, Pad); +control($~, [], F, Adj, P, Pad, _Enc, _I) -> char($~, F, Adj, P, Pad); +control($n, [], F, Adj, P, Pad, _Enc, _I) -> newline(F, Adj, P, Pad); +control($i, [_A], _F, _Adj, _P, _Pad, _Enc, _I) -> []. + +-ifdef(UNICODE_AS_BINARIES). +uniconv(C) -> + unicode:characters_to_binary(C,unicode). +-else. +uniconv(C) -> + C. +-endif. +%% Default integer base +base(none) -> + 10; +base(B) when is_integer(B) -> + B. + +%% term(TermList, Field, Adjust, Precision, PadChar) +%% Output the characters in a term. +%% Adjust the characters within the field if length less than Max padding +%% with PadChar. + +term(T, none, _Adj, none, _Pad) -> T; +term(T, none, Adj, P, Pad) -> term(T, P, Adj, P, Pad); +term(T, F, Adj, P0, Pad) -> + L = lists:flatlength(T), + P = case P0 of none -> erlang:min(L, F); _ -> P0 end, + if + L > P -> + adjust(chars($*, P), chars(Pad, F-P), Adj); + F >= P -> + adjust(T, chars(Pad, F-L), Adj) + end. + +%% print(Term, Depth, Field, Adjust, Precision, PadChar, Indentation) +%% Print a term. + +print(T, D, none, Adj, P, Pad, I) -> print(T, D, 80, Adj, P, Pad, I); +print(T, D, F, Adj, none, Pad, I) -> print(T, D, F, Adj, I+1, Pad, I); +print(T, D, F, right, P, _Pad, _I) -> + io_lib_pretty:print(T, P, F, D). + +%% fwrite_e(Float, Field, Adjust, Precision, PadChar) + +fwrite_e(Fl, none, Adj, none, Pad) -> %Default values + fwrite_e(Fl, none, Adj, 6, Pad); +fwrite_e(Fl, none, _Adj, P, _Pad) when P >= 2 -> + float_e(Fl, float_data(Fl), P); +fwrite_e(Fl, F, Adj, none, Pad) -> + fwrite_e(Fl, F, Adj, 6, Pad); +fwrite_e(Fl, F, Adj, P, Pad) when P >= 2 -> + term(float_e(Fl, float_data(Fl), P), F, Adj, F, Pad). + +float_e(Fl, Fd, P) when Fl < 0.0 -> %Negative numbers + [$-|float_e(-Fl, Fd, P)]; +float_e(_Fl, {Ds,E}, P) -> + case float_man(Ds, 1, P-1) of + {[$0|Fs],true} -> [[$1|Fs]|float_exp(E)]; + {Fs,false} -> [Fs|float_exp(E-1)] + end. + +%% float_man([Digit], Icount, Dcount) -> {[Chars],CarryFlag}. +%% Generate the characters in the mantissa from the digits with Icount +%% characters before the '.' and Dcount decimals. Handle carry and let +%% caller decide what to do at top. + +float_man(Ds, 0, Dc) -> + {Cs,C} = float_man(Ds, Dc), + {[$.|Cs],C}; +float_man([D|Ds], I, Dc) -> + case float_man(Ds, I-1, Dc) of + {Cs,true} when D =:= $9 -> {[$0|Cs],true}; + {Cs,true} -> {[D+1|Cs],false}; + {Cs,false} -> {[D|Cs],false} + end; +float_man([], I, Dc) -> %Pad with 0's + {string:chars($0, I, [$.|string:chars($0, Dc)]),false}. + +float_man([D|_], 0) when D >= $5 -> {[],true}; +float_man([_|_], 0) -> {[],false}; +float_man([D|Ds], Dc) -> + case float_man(Ds, Dc-1) of + {Cs,true} when D =:= $9 -> {[$0|Cs],true}; + {Cs,true} -> {[D+1|Cs],false}; + {Cs,false} -> {[D|Cs],false} + end; +float_man([], Dc) -> {string:chars($0, Dc),false}. %Pad with 0's + +%% float_exp(Exponent) -> [Char]. +%% Generate the exponent of a floating point number. Always include sign. + +float_exp(E) when E >= 0 -> + [$e,$+|integer_to_list(E)]; +float_exp(E) -> + [$e|integer_to_list(E)]. + +%% fwrite_f(FloatData, Field, Adjust, Precision, PadChar) + +fwrite_f(Fl, none, Adj, none, Pad) -> %Default values + fwrite_f(Fl, none, Adj, 6, Pad); +fwrite_f(Fl, none, _Adj, P, _Pad) when P >= 1 -> + float_f(Fl, float_data(Fl), P); +fwrite_f(Fl, F, Adj, none, Pad) -> + fwrite_f(Fl, F, Adj, 6, Pad); +fwrite_f(Fl, F, Adj, P, Pad) when P >= 1 -> + term(float_f(Fl, float_data(Fl), P), F, Adj, F, Pad). + +float_f(Fl, Fd, P) when Fl < 0.0 -> + [$-|float_f(-Fl, Fd, P)]; +float_f(Fl, {Ds,E}, P) when E =< 0 -> + float_f(Fl, {string:chars($0, -E+1, Ds),1}, P); %Prepend enough 0's +float_f(_Fl, {Ds,E}, P) -> + case float_man(Ds, E, P) of + {Fs,true} -> "1" ++ Fs; %Handle carry + {Fs,false} -> Fs + end. + +%% float_data([FloatChar]) -> {[Digit],Exponent} + +float_data(Fl) -> + float_data(float_to_list(Fl), []). + +float_data([$e|E], Ds) -> + {lists:reverse(Ds),list_to_integer(E)+1}; +float_data([D|Cs], Ds) when D >= $0, D =< $9 -> + float_data(Cs, [D|Ds]); +float_data([_|Cs], Ds) -> + float_data(Cs, Ds). + +%% fwrite_g(Float) +%% Writes the shortest, correctly rounded string that converts +%% to Float when read back with list_to_float/1. +%% +%% See also "Printing Floating-Point Numbers Quickly and Accurately" +%% in Proceedings of the SIGPLAN '96 Conference on Programming +%% Language Design and Implementation. + +fwrite_g(0.0) -> + "0.0"; +fwrite_g(Float) when is_float(Float) -> + {Frac, Exp} = mantissa_exponent(Float), + {Place, Digits} = fwrite_g_1(Float, Exp, Frac), + R = insert_decimal(Place, [$0 + D || D <- Digits]), + [$- || true <- [Float < 0.0]] ++ R. + +-define(BIG_POW, (1 bsl 52)). +-define(MIN_EXP, (-1074)). + +mantissa_exponent(F) -> + case <> of + <<_S:1, 0:11, M:52>> -> % denormalized + E = log2floor(M), + {M bsl (53 - E), E - 52 - 1075}; + <<_S:1, BE:11, M:52>> when BE < 2047 -> + {M + ?BIG_POW, BE - 1075} + end. + +fwrite_g_1(Float, Exp, Frac) -> + Round = (Frac band 1) =:= 0, + if + Exp >= 0 -> + BExp = 1 bsl Exp, + if + Frac =:= ?BIG_POW -> + scale(Frac * BExp * 4, 4, BExp * 2, BExp, + Round, Round, Float); + true -> + scale(Frac * BExp * 2, 2, BExp, BExp, + Round, Round, Float) + end; + Exp < ?MIN_EXP -> + BExp = 1 bsl (?MIN_EXP - Exp), + scale(Frac * 2, 1 bsl (1 - Exp), BExp, BExp, + Round, Round, Float); + Exp > ?MIN_EXP, Frac =:= ?BIG_POW -> + scale(Frac * 4, 1 bsl (2 - Exp), 2, 1, + Round, Round, Float); + true -> + scale(Frac * 2, 1 bsl (1 - Exp), 1, 1, + Round, Round, Float) + end. + +scale(R, S, MPlus, MMinus, LowOk, HighOk, Float) -> + Est = int_ceil(math:log10(abs(Float)) - 1.0e-10), + %% Note that the scheme implementation uses a 326 element look-up + %% table for int_pow(10, N) where we do not. + if + Est >= 0 -> + fixup(R, S * int_pow(10, Est), MPlus, MMinus, Est, + LowOk, HighOk); + true -> + Scale = int_pow(10, -Est), + fixup(R * Scale, S, MPlus * Scale, MMinus * Scale, Est, + LowOk, HighOk) + end. + +fixup(R, S, MPlus, MMinus, K, LowOk, HighOk) -> + TooLow = if + HighOk -> R + MPlus >= S; + true -> R + MPlus > S + end, + case TooLow of + true -> + {K + 1, generate(R, S, MPlus, MMinus, LowOk, HighOk)}; + false -> + {K, generate(R * 10, S, MPlus * 10, MMinus * 10, LowOk, HighOk)} + end. + +generate(R0, S, MPlus, MMinus, LowOk, HighOk) -> + D = R0 div S, + R = R0 rem S, + TC1 = if + LowOk -> R =< MMinus; + true -> R < MMinus + end, + TC2 = if + HighOk -> R + MPlus >= S; + true -> R + MPlus > S + end, + case {TC1, TC2} of + {false, false} -> + [D | generate(R * 10, S, MPlus * 10, MMinus * 10, LowOk, HighOk)]; + {false, true} -> + [D + 1]; + {true, false} -> + [D]; + {true, true} when R * 2 < S -> + [D]; + {true, true} -> + [D + 1] + end. + +insert_decimal(0, S) -> + "0." ++ S; +insert_decimal(Place, S) -> + L = length(S), + if + Place < 0; + Place >= L -> + ExpL = integer_to_list(Place - 1), + ExpDot = if L =:= 1 -> 2; true -> 1 end, + ExpCost = length(ExpL) + 1 + ExpDot, + if + Place < 0 -> + if + 2 - Place =< ExpCost -> + "0." ++ lists:duplicate(-Place, $0) ++ S; + true -> + insert_exp(ExpL, S) + end; + true -> + if + Place - L + 2 =< ExpCost -> + S ++ lists:duplicate(Place - L, $0) ++ ".0"; + true -> + insert_exp(ExpL, S) + end + end; + true -> + {S0, S1} = lists:split(Place, S), + S0 ++ "." ++ S1 + end. + +insert_exp(ExpL, [C]) -> + [C] ++ ".0e" ++ ExpL; +insert_exp(ExpL, [C | S]) -> + [C] ++ "." ++ S ++ "e" ++ ExpL. + +int_ceil(X) when is_float(X) -> + T = trunc(X), + case (X - T) of + Neg when Neg < 0 -> T; + Pos when Pos > 0 -> T + 1; + _ -> T + end. + +int_pow(X, 0) when is_integer(X) -> + 1; +int_pow(X, N) when is_integer(X), is_integer(N), N > 0 -> + int_pow(X, N, 1). + +int_pow(X, N, R) when N < 2 -> + R * X; +int_pow(X, N, R) -> + int_pow(X * X, N bsr 1, case N band 1 of 1 -> R * X; 0 -> R end). + +log2floor(Int) when is_integer(Int), Int > 0 -> + log2floor(Int, 0). + +log2floor(0, N) -> + N; +log2floor(Int, N) -> + log2floor(Int bsr 1, 1 + N). + +%% fwrite_g(Float, Field, Adjust, Precision, PadChar) +%% Use the f form if Float is >= 0.1 and < 1.0e4, +%% and the prints correctly in the f form, else the e form. +%% Precision always means the # of significant digits. + +fwrite_g(Fl, F, Adj, none, Pad) -> + fwrite_g(Fl, F, Adj, 6, Pad); +fwrite_g(Fl, F, Adj, P, Pad) when P >= 1 -> + A = abs(Fl), + E = if A < 1.0e-1 -> -2; + A < 1.0e0 -> -1; + A < 1.0e1 -> 0; + A < 1.0e2 -> 1; + A < 1.0e3 -> 2; + A < 1.0e4 -> 3; + true -> fwrite_f + end, + if P =< 1, E =:= -1; + P-1 > E, E >= -1 -> + fwrite_f(Fl, F, Adj, P-1-E, Pad); + P =< 1 -> + fwrite_e(Fl, F, Adj, 2, Pad); + true -> + fwrite_e(Fl, F, Adj, P, Pad) + end. + + +%% iolist_to_chars(iolist()) -> deep_char_list() + +iolist_to_chars([C|Cs]) when is_integer(C), C >= $\000, C =< $\377 -> + [C | iolist_to_chars(Cs)]; +iolist_to_chars([I|Cs]) -> + [iolist_to_chars(I) | iolist_to_chars(Cs)]; +iolist_to_chars([]) -> + []; +iolist_to_chars(B) when is_binary(B) -> + binary_to_list(B). + +%% string(String, Field, Adjust, Precision, PadChar) + +string(S, none, _Adj, none, _Pad) -> S; +string(S, F, Adj, none, Pad) -> + N = lists:flatlength(S), + if N > F -> flat_trunc(S, F); + N =:= F -> S; + true -> adjust(S, chars(Pad, F-N), Adj) + end; +string(S, none, _Adj, P, Pad) -> + N = lists:flatlength(S), + if N > P -> flat_trunc(S, P); + N =:= P -> S; + true -> [S|chars(Pad, P-N)] + end; +string(S, F, Adj, F, Pad) -> + string(S, none, Adj, F, Pad); +string(S, F, Adj, P, Pad) when F > P -> + N = lists:flatlength(S), + if N > F -> flat_trunc(S, F); + N =:= F -> S; + N > P -> adjust(flat_trunc(S, P), chars(Pad, F-P), Adj); + N =:= P -> adjust(S, chars(Pad, F-P), Adj); + true -> adjust([S|chars(Pad, P-N)], chars(Pad, F-P), Adj) + end. + +%% unprefixed_integer(Int, Field, Adjust, Base, PadChar, Lowercase) +%% -> [Char]. + +unprefixed_integer(Int, F, Adj, Base, Pad, Lowercase) + when Base >= 2, Base =< 1+$Z-$A+10 -> + if Int < 0 -> + S = cond_lowercase(erlang:integer_to_list(-Int, Base), Lowercase), + term([$-|S], F, Adj, none, Pad); + true -> + S = cond_lowercase(erlang:integer_to_list(Int, Base), Lowercase), + term(S, F, Adj, none, Pad) + end. + +%% prefixed_integer(Int, Field, Adjust, Base, PadChar, Prefix, Lowercase) +%% -> [Char]. + +prefixed_integer(Int, F, Adj, Base, Pad, Prefix, Lowercase) + when Base >= 2, Base =< 1+$Z-$A+10 -> + if Int < 0 -> + S = cond_lowercase(erlang:integer_to_list(-Int, Base), Lowercase), + term([$-,Prefix|S], F, Adj, none, Pad); + true -> + S = cond_lowercase(erlang:integer_to_list(Int, Base), Lowercase), + term([Prefix|S], F, Adj, none, Pad) + end. + +%% char(Char, Field, Adjust, Precision, PadChar) -> [Char]. + +char(C, none, _Adj, none, _Pad) -> [C]; +char(C, F, _Adj, none, _Pad) -> chars(C, F); +char(C, none, _Adj, P, _Pad) -> chars(C, P); +char(C, F, Adj, P, Pad) when F >= P -> + adjust(chars(C, P), chars(Pad, F - P), Adj). + +%% newline(Field, Adjust, Precision, PadChar) -> [Char]. + +newline(none, _Adj, _P, _Pad) -> "\n"; +newline(F, right, _P, _Pad) -> chars($\n, F). + +%% +%% Utilities +%% + +adjust(Data, [], _) -> Data; +adjust(Data, Pad, left) -> [Data,Pad]; +adjust(Data, Pad, right) -> [Pad,Data]. + +%% Flatten and truncate a deep list to at most N elements. + +flat_trunc(List, N) when is_integer(N), N >= 0 -> + flat_trunc(List, N, [], []). + +flat_trunc(L, 0, _, R) when is_list(L) -> + lists:reverse(R); +flat_trunc([H|T], N, S, R) when is_list(H) -> + flat_trunc(H, N, [T|S], R); +flat_trunc([H|T], N, S, R) -> + flat_trunc(T, N-1, S, [H|R]); +flat_trunc([], N, [H|S], R) -> + flat_trunc(H, N, S, R); +flat_trunc([], _, [], R) -> + lists:reverse(R). + +%% A deep version of string:chars/2,3 + +chars(_C, 0) -> + []; +chars(C, 1) -> + [C]; +chars(C, 2) -> + [C,C]; +chars(C, 3) -> + [C,C,C]; +chars(C, N) when is_integer(N), (N band 1) =:= 0 -> + S = chars(C, N bsr 1), + [S|S]; +chars(C, N) when is_integer(N) -> + S = chars(C, N bsr 1), + [C,S|S]. + +%chars(C, N, Tail) -> +% [chars(C, N)|Tail]. + +%% Lowercase conversion + +cond_lowercase(String, true) -> + lowercase(String); +cond_lowercase(String,false) -> + String. + +lowercase([H|T]) when is_integer(H), H >= $A, H =< $Z -> + [(H-$A+$a)|lowercase(T)]; +lowercase([H|T]) -> + [H|lowercase(T)]; +lowercase([]) -> + []. diff --git a/lib/stdlib/src/io_lib_fread.erl b/lib/stdlib/src/io_lib_fread.erl new file mode 100644 index 0000000000..74316dc730 --- /dev/null +++ b/lib/stdlib/src/io_lib_fread.erl @@ -0,0 +1,466 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(io_lib_fread). + +%% Formatted input functions of io library. + +-export([fread/2,fread/3]). + +-import(lists, [reverse/1,reverse/2]). + +%%----------------------------------------------------------------------- +%% Local types +%%----------------------------------------------------------------------- + +-type done_arg2() :: {'ok', io_lib:chars()} | 'eof' | {'error', term()}. + +%%----------------------------------------------------------------------- +%% Types also used in other files +%%----------------------------------------------------------------------- + +-type continuation() :: [] | {_, _, _, _}. % XXX: refine + +-type fread_2_ret() :: {'ok', io_lib:chars(), string()} + | {'more', string(), non_neg_integer(), io_lib:chars()} + | {'error', term()}. +-type fread_3_ret() :: {'more', continuation()} + | {'done', done_arg2(), string()}. + +%%----------------------------------------------------------------------- + +%% fread(Continuation, CharList, FormatString) +%% This is the main function into the re-entrant formatted reader. It +%% repeatedly collects lines and calls fread/2 to format the input until +%% all the format string has been used. And it counts the characters. + +-spec fread(io_lib_fread:continuation(), string(), string()) -> fread_3_ret(). + +fread([], Chars, Format) -> + %%io:format("FREAD: ~w `~s'~n", [Format,Chars]), + fread_collect(Format, [], 0, [], Chars); +fread({Format,Stack,N,Results}=_Continuation, Chars, _) -> + %%io:format("FREAD: ~w `~s'~n", [_Continuation,Chars]), + fread_collect(Format, Stack, N, Results, Chars). + +fread_collect(Format, [$\r|Stack], N, Results, [$\n|Chars]) -> + fread_line(Format, reverse(Stack), N, Results, Chars, [$\r,$\n]); +fread_collect(Format, Stack, N, Results, [$\n|Chars]) -> + fread_line(Format, reverse(Stack), N, Results, Chars, [$\n]); +fread_collect(Format, Stack, N, Results, []) -> + Continuation = {Format,Stack,N,Results}, + {more,Continuation}; +fread_collect(Format, [$\r|Stack], N, Results, Chars) -> % Maybe eof + fread_line(Format, reverse(Stack), N, Results, Chars, [$\r]); +fread_collect(Format, Stack, N, Results, [C|Chars]) -> + fread_collect(Format, [C|Stack], N, Results, Chars); +fread_collect(Format, Stack, N, Results, Chars) -> % eof + fread_line(Format, reverse(Stack), N, Results, Chars, []). + +fread_line(Format0, Line, N0, Results0, More, Newline) -> + %%io:format("FREAD1: `~s' `~s'~n", [Format0,Line]), + Chars = if is_list(More) -> More; true -> [] end, + case fread(Format0, Line, N0, Results0) of + {ok,Results,[]} -> + {done,{ok,Results},Chars}; + {ok,Results,Rest} -> + %% Don't lose the whitespace + {done,{ok,Results},Rest++(Newline++Chars)}; + %% fread/4 should not return {more,...} on eof; guard just in case... + %% Count newline characters here since fread/4 does not get them. + {more,Format,N,Results} when is_list(Line), is_list(More) -> + fread_collect(Format, [], N+length(Newline), Results, More); + {more,Format,N,Results} when is_list(Line) -> % eof + fread_line(Format, eof, N+length(Newline), Results, More, []); + Other -> %An error has occurred + {done,Other,More} + end. + + +%% Conventions +%% ~s String White terminated +%% ~d Integer terminated by ~[0-9] +%% ~u Unsigned integer in base 2..36, no leading whitespace +%% ~- Optional sign character, no leading whitespace +%% ~f Float +%% ~a as ~s but converted to an atom +%% ~c characters without any stripping +%% ~n number of characters scanned +%% WHITE Skip white space +%% X Literal X + +-spec fread(string(), string()) -> fread_2_ret(). + +fread(Format, Line) -> + fread(Format, Line, 0, []). + +fread([$~|Format0], Line, N, Results) -> + {Format,F,Sup,Unicode} = fread_field(Format0), + fread1(Format, F, Sup, Unicode, Line, N, Results, Format0); +fread([$\s|Format], Line, N, Results) -> + fread_skip_white(Format, Line, N, Results); +fread([$\t|Format], Line, N, Results) -> + fread_skip_white(Format, Line, N, Results); +fread([$\r|Format], Line, N, Results) -> + fread_skip_white(Format, Line, N, Results); +fread([$\n|Format], Line, N, Results) -> + fread_skip_white(Format, Line, N, Results); +fread([C|Format], [C|Line], N, Results) -> + fread(Format, Line, N+1, Results); +fread([_F|_Format], [_C|_Line], _N, _Results) -> + fread_error(input); +fread([], Line, _N, Results) -> + {ok,reverse(Results),Line}. + +fread_skip_white(Format, [$\s|Line], N, Results) -> + fread_skip_white(Format, Line, N+1, Results); +fread_skip_white(Format, [$\t|Line], N, Results) -> + fread_skip_white(Format, Line, N+1, Results); +fread_skip_white(Format, [$\r|Line], N, Results) -> + fread_skip_white(Format, Line, N+1, Results); +fread_skip_white(Format, [$\n|Line], N, Results) -> + fread_skip_white(Format, Line, N+1, Results); +fread_skip_white(Format, Line, N, Results) -> + fread(Format, Line, N, Results). + +%% fread_field(Format) +%% Reads the field specification paramters. Returns: +%% +%% {RestFormat,FieldWidth,Suppress} + +fread_field([$*|Format]) -> fread_field(Format, true, false); +fread_field(Format) -> fread_field(Format, false, false). + +fread_field([C|Format], Sup, Unic) when C >= $0, C =< $9 -> + fread_field(Format, C - $0, Sup, Unic); +fread_field([$t|Format], Sup, _Unic) -> + {Format,none,Sup,true}; +fread_field(Format, Sup, Unic) -> + {Format,none,Sup,Unic}. + +fread_field([C|Format], F, Sup, Unic) when C >= $0, C =< $9 -> + fread_field(Format, 10*F + C - $0, Sup, Unic); +fread_field([$t|Format], F, Sup, _Unic) -> + {Format,F,Sup,true}; +fread_field(Format, F, Sup, Unic) -> + {Format,F,Sup,Unic}. + +%% fread1(Format, FieldWidth, Suppress, Line, N, Results, AllFormat) +%% fread1(Format, FieldWidth, Suppress, Line, N, Results) +%% The main dispatch function for the formatting commands. Done in two +%% stages so format commands that need no input can always be processed. + +fread1([$l|Format], _F, Sup, _U, Line, N, Res, _AllFormat) -> + fread(Format, Line, N, fread_result(Sup, N, Res)); +fread1(_Format, _F, _Sup, _U, [], N, Res, AllFormat) -> + %% Need more input here. + {more,[$~|AllFormat],N,Res}; +fread1(_Format, _F, _Sup, _U, eof, _N, [], _AllFormat) -> + %% This is at start of format string so no error. + eof; +fread1(_Format, _F, _Sup, _U, eof, _N, _Res, _AllFormat) -> + %% This is an error as there is no more input. + fread_error(input); +fread1(Format, F, Sup, U, Line, N, Res, _AllFormat) -> + fread1(Format, F, Sup, U, Line, N, Res). + +fread1([$f|Format], none, Sup, false, Line0, N0, Res) -> + {Line,N,Cs} = fread_float_cs(Line0, N0), + fread_float(Cs, Sup, Format, Line, N, Res); +fread1([$f|Format], F, Sup, false, Line0, N, Res) -> + {Line,Cs} = fread_chars(Line0, F, false), + fread_float(Cs, Sup, Format, Line, N+F, Res); +fread1([$d|Format], none, Sup, false, Line0, N0, Res) -> + {Line,N,Cs} = fread_int_cs(Line0, N0), + fread_integer(Cs, 10, Sup, Format, Line, N, Res); +fread1([$d|Format], F, Sup, false, Line0, N, Res) -> + {Line,Cs} = fread_chars(Line0, F, false), + fread_integer(Cs, 10, Sup, Format, Line, N+F, Res); +fread1([$u|Format], none, Sup, false, Line0, N0, Res) -> + {Line,N,Cs} = fread_digits(Line0, N0, 10, []), + fread_unsigned(Cs, 10, Sup, Format, Line, N, Res); +fread1([$u|Format], F, Sup, false, Line0, N0, Res) when F >= 2, F =< 1+$Z-$A+10 -> + {Line,N,Cs} = fread_digits(Line0, N0, F, []), + fread_unsigned(Cs, F, Sup, Format, Line, N, Res); +fread1([$-|Format], _F, Sup, false, Line, N, Res) -> + fread_sign_char(Sup, Format, Line, N, Res); +fread1([$#|Format], none, Sup, false, Line0, N0, Res) -> + case catch + begin + {Line1,N1,B1} = fread_base(Line0, N0), + B = abs(B1), + true = (B >= 2) and (B =< 1+$Z-$A+10), + {Line2,N2,Cs2} = fread_digits(Line1, N1, B, []), + fread_based(reverse(Cs2), B1, Sup, Format, Line2, N2, Res) + end of + {'EXIT',_} -> + fread_error(based); + Other -> + Other + end; +fread1([$#|Format], F, Sup, false, Line0, N, Res) -> + case catch + begin + {Line1,Cs1} = fread_chars(Line0, F, false), + {Line2,_,B2} = fread_base(reverse(Cs1), N), + true = ((B2 >= 2) and (B2 =< 1+$Z-$A+10)), + fread_based(Line2, B2, Sup, Format, Line1, N+F, Res) + end of + {'EXIT',_} -> + fread_error(based); + Other -> + Other + end; +fread1([$s|Format], none, Sup, U, Line0, N0, Res) -> + {Line,N,Cs} = fread_string_cs(Line0, N0, U), + fread_string(Cs, Sup, U, Format, Line, N, Res); +fread1([$s|Format], F, Sup, U, Line0, N, Res) -> + {Line,Cs} = fread_chars(Line0, F, U), + fread_string(Cs, Sup, U, Format, Line, N+F, Res); +%% XXX:PaN Atoms still only latin1... +fread1([$a|Format], none, Sup, false, Line0, N0, Res) -> + {Line,N,Cs} = fread_string_cs(Line0, N0, false), + fread_atom(Cs, Sup, Format, Line, N, Res); +fread1([$a|Format], F, Sup, false, Line0, N, Res) -> + {Line,Cs} = fread_chars(Line0, F, false), + fread_atom(Cs, Sup, Format, Line, N+F, Res); +fread1([$c|Format], none, Sup, U, Line0, N, Res) -> + {Line,Cs} = fread_chars(Line0, 1, U), + fread_chars(Cs, Sup, U, Format, Line, N+1, Res); +fread1([$c|Format], F, Sup, U, Line0, N, Res) -> + {Line,Cs} = fread_chars(Line0, F, U), + fread_chars(Cs, Sup, U, Format, Line, N+F, Res); +fread1([$~|Format], _F, _Sup, _U, [$~|Line], N, Res) -> + fread(Format, Line, N+1, Res); +fread1(_Format, _F, _Sup, _U, _Line, _N, _Res) -> + fread_error(format). + +%% fread_float(FloatChars, Suppress, Format, Line, N, Results) + +fread_float(Cs, Sup, Format, Line, N, Res) -> + case catch list_to_float(fread_skip_white(reverse(Cs))) of + {'EXIT',_} -> + fread_error(float); + Float -> + fread(Format, Line, N, fread_result(Sup, Float, Res)) + end. + +%% fread_integer(IntegerChars, Base, Suppress, Format, Line, N, Results) + +fread_integer(Cs, Base, Sup, Format, Line, N, Res) -> + case catch erlang:list_to_integer(fread_skip_white(reverse(Cs)), Base) of + {'EXIT',_} -> + fread_error(integer); + Integer -> + fread(Format, Line, N, fread_result(Sup, Integer, Res)) + end. + + +%% fread_unsigned(IntegerChars, Base, Suppress, Format, Line, N, Results) + +fread_unsigned(Cs, Base, Sup, Format, Line, N, Res) -> + case catch erlang:list_to_integer(fread_skip_white(reverse(Cs)), Base) of + {'EXIT',_} -> + fread_error(unsigned); + Integer -> + fread(Format, Line, N, fread_result(Sup, Integer, Res)) + end. + + +%% fread_based(IntegerChars, Base, Suppress, Format, Line, N, Results) + +fread_based(Cs0, B, Sup, Format, Line, N, Res) -> + {Cs,Base} = if B < 0 -> {[$-|Cs0],-B}; + true -> {Cs0,B} + end, + I = erlang:list_to_integer(Cs, Base), + fread(Format, Line, N, fread_result(Sup, I, Res)). + + +%% fread_sign_char(Suppress, Format, Line, N, Results) + +fread_sign_char(Sup, Format, [$-|Line], N, Res) -> + fread(Format, Line, N+1, fread_result(Sup, -1, Res)); +fread_sign_char(Sup, Format, [$+|Line], N, Res) -> + fread(Format, Line, N+1, fread_result(Sup, +1, Res)); +fread_sign_char(Sup, Format, Line, N, Res) -> + fread(Format, Line, N, fread_result(Sup, 1, Res)). + + +%% fread_string(StringChars, Suppress, Format, Line, N, Results) + +fread_string(error, _Sup, _U, _Format, _Line, _N, _Res) -> + fread_error(string); +fread_string(Cs0, Sup, U, Format, Line, N, Res) -> + Cs = fread_skip_white(reverse(fread_skip_white(Cs0))), + fread(Format, Line, N, fread_convert(fread_result(Sup, Cs, Res),U)). + +%% fread_atom(AtomChars, Suppress, Format, Line, N, Results) + +fread_atom(error, _Sup, _Format, _Line, _N, _Res) -> + fread_error(atom); +fread_atom(Cs0, Sup, Format, Line, N, Res) -> + Cs = fread_skip_white(reverse(fread_skip_white(Cs0))), + fread(Format, Line, N, fread_result(Sup, list_to_atom(Cs), Res)). + +%% fread_chars(Characters, Suppress, Format, Line, N, Results) + +fread_chars(error, _Sup, _U, _Format, _Line, _N, _Res) -> + fread_error(character); +fread_chars(Cs, Sup, U, Format, Line, N, Res) -> + fread(Format, Line, N, fread_convert(fread_result(Sup, reverse(Cs), Res),U)). + +%% fread_chars(Line, Count) + +fread_chars(Line, C, U) -> + fread_chars(C, Line, U, []). + +fread_chars(0, Line, _U, Cs) -> {Line,Cs}; +fread_chars(_N, [$\n|Line], _U, _Cs) -> {[$\n|Line],error}; +fread_chars(N, [C|Line], true, Cs) -> + fread_chars(N-1, Line, true, [C|Cs]); +fread_chars(N, [C|Line], false, Cs) when C >= 0, C =< 255 -> + fread_chars(N-1, Line, false, [C|Cs]); +fread_chars(_N, L, _U, _Cs) -> + {L,error}. +%%fread_chars(_N, [], _U,_Cs) -> +%% {[],error}. + +%% fread_int_cs(Line, N) + +fread_int_cs(Line0, N0) -> + {Line1,N1} = fread_skip_white(Line0, N0), + {Line,N,Cs} = fread_sign(Line1, N1, []), + fread_digits(Line, N, Cs). + +%% fread_float_cs(Line, N) +%% A float is "[+|-][0-9]+.[0-9]+[[E|e][+|-][09-]+] + +fread_float_cs(Line0, N0) -> + {Line1,N1} = fread_skip_white(Line0, N0), + {Line2,N2,Cs2} = fread_sign(Line1, N1, []), + {Line,N,Cs} = fread_digits(Line2, N2, Cs2), + fread_float_cs_1(Line, N, Cs). + +fread_float_cs_1([$.|Line0], N0, Cs0) -> + {Line,N,Cs} = fread_digits(Line0, N0+1, [$.|Cs0]), + fread_float_cs_2(Line, N, Cs); +fread_float_cs_1(Line, N, Cs) -> + {Line,N,Cs}. + +fread_float_cs_2([$e|Line0], N0, Cs0) -> + {Line,N,Cs} = fread_sign(Line0, N0+1, [$e|Cs0]), + fread_digits(Line, N, Cs); +fread_float_cs_2([$E|Line0], N0, Cs0) -> + {Line,N,Cs} = fread_sign(Line0, N0+1, [$E|Cs0]), + fread_digits(Line, N, Cs); +fread_float_cs_2(Line, N, Cs) -> + {Line,N,Cs}. + +%% fread_string_cs(Line, N, Unicode) + +fread_string_cs(Line0, N0, false) -> + {Line,N} = fread_skip_white(Line0, N0), + fread_skip_latin1_nonwhite(Line, N, []); +fread_string_cs(Line0, N0, true) -> + {Line,N} = fread_skip_white(Line0, N0), + fread_skip_nonwhite(Line, N, []). + +%% fread_skip_white(Line) +%% fread_skip_white(Line, N) +%% fread_skip_nonwhite(Line, N, Characters) +%% fread_sign(Line, N, Characters) +%% fread_digits(Line, N, Characters) +%% fread_digits(Line, N, Base, Characters) +%% Read segments of things, return "thing" characters in reverse order. + +fread_skip_white([$\s|Line]) -> fread_skip_white(Line); +fread_skip_white([$\t|Line]) -> fread_skip_white(Line); +fread_skip_white([$\r|Line]) -> fread_skip_white(Line); +fread_skip_white([$\n|Line]) -> fread_skip_white(Line); +fread_skip_white(Line) -> Line. + +fread_skip_white([$\s|Line], N) -> + fread_skip_white(Line, N+1); +fread_skip_white([$\t|Line], N) -> + fread_skip_white(Line, N+1); +fread_skip_white([$\r|Line], N) -> + fread_skip_white(Line, N+1); +fread_skip_white([$\n|Line], N) -> + fread_skip_white(Line, N+1); +fread_skip_white(Line, N) -> {Line,N}. + +fread_skip_latin1_nonwhite([$\s|Line], N, Cs) -> {[$\s|Line],N,Cs}; +fread_skip_latin1_nonwhite([$\t|Line], N, Cs) -> {[$\t|Line],N,Cs}; +fread_skip_latin1_nonwhite([$\r|Line], N, Cs) -> {[$\r|Line],N,Cs}; +fread_skip_latin1_nonwhite([$\n|Line], N, Cs) -> {[$\n|Line],N,Cs}; +fread_skip_latin1_nonwhite([C|Line], N, []) when C > 255 -> + {[C|Line],N,error}; +fread_skip_latin1_nonwhite([C|Line], N, Cs) when C > 255 -> + {[C|Line],N,Cs}; +fread_skip_latin1_nonwhite([C|Line], N, Cs) -> + fread_skip_latin1_nonwhite(Line, N+1, [C|Cs]); +fread_skip_latin1_nonwhite([], N, Cs) -> {[],N,Cs}. + +fread_skip_nonwhite([$\s|Line], N, Cs) -> {[$\s|Line],N,Cs}; +fread_skip_nonwhite([$\t|Line], N, Cs) -> {[$\t|Line],N,Cs}; +fread_skip_nonwhite([$\r|Line], N, Cs) -> {[$\r|Line],N,Cs}; +fread_skip_nonwhite([$\n|Line], N, Cs) -> {[$\n|Line],N,Cs}; +fread_skip_nonwhite([C|Line], N, Cs) -> + fread_skip_nonwhite(Line, N+1, [C|Cs]); +fread_skip_nonwhite([], N, Cs) -> {[],N,Cs}. + +fread_sign([$+|Line], N, Cs) -> {Line,N+1,[$+|Cs]}; +fread_sign([$-|Line], N, Cs) -> {Line,N+1,[$-|Cs]}; +fread_sign(Line, N, Cs) -> {Line,N,Cs}. + +fread_base(Line0, N0) -> + {[$#|Line1],N1,Cs1} = fread_int_cs(Line0, N0), + B = list_to_integer(reverse(Cs1)), + {Line1,N1+1,B}. + +fread_digits([C|Line], N, Cs) when C >= $0, C =< $9 -> + fread_digits(Line, N+1, [C|Cs]); +fread_digits(Line, N, Cs) -> {Line,N,Cs}. + +fread_digits([C|Line], N, Base, Cs) when C >= $0, C =< $9 -> + fread_digits(Line, N+1, Base, [C|Cs]); +fread_digits([C|Line], N, Base, Cs) when C >= $A, C < $A+Base-10 -> + fread_digits(Line, N+1, Base, [C|Cs]); +fread_digits([C|Line], N, Base, Cs) when C >= $a, C < $a+Base-10 -> + fread_digits(Line, N+1, Base, [C|Cs]); +fread_digits(Line, N, _Base, Cs) -> {Line,N,Cs}. + + + +%% fread_result(Suppress, Value, Results) + +fread_result(true, _V, Res) -> Res; +fread_result(false, V, Res) -> [V|Res]. + +-ifdef(UNICODE_AS_BINARIES). +fread_convert([L|R],true) when is_list(L) -> + [unicode:characters_to_binary(L) | R]; +fread_convert(Any,_) -> + Any. +-else. +fread_convert(Any,_) -> + Any. +-endif. +fread_error(In) -> + {error,{fread,In}}. diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl new file mode 100644 index 0000000000..169410796b --- /dev/null +++ b/lib/stdlib/src/io_lib_pretty.erl @@ -0,0 +1,646 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(io_lib_pretty). + +%%% Pretty printing Erlang terms +%%% +%%% In this module "print" means the formatted printing while "write" +%%% means just writing out onto one line. + +-export([print/1,print/2,print/3,print/4,print/5,print/6]). + +%%% +%%% Exported functions +%%% + +%% print(Term) -> [Chars] +%% print(Term, Column, LineLength, Depth) -> [Chars] +%% Depth = -1 gives unlimited print depth. Use io_lib:write for atomic terms. + +print(Term) -> + print(Term, 1, 80, -1). + +%% print(Term, RecDefFun) -> [Chars] +%% print(Term, Depth, RecDefFun) -> [Chars] +%% RecDefFun = fun(Tag, NoFields) -> [FieldTag] | no +%% Used by the shell for printing records. +print(Term, RecDefFun) -> + print(Term, -1, RecDefFun). + +print(Term, Depth, RecDefFun) -> + print(Term, 1, 80, Depth, RecDefFun). + +print(Term, Col, Ll, D) -> + print(Term, Col, Ll, D, _M=-1, no_fun). + +print(Term, Col, Ll, D, RecDefFun) -> + print(Term, Col, Ll, D, _M=-1, RecDefFun). + +print(_, _, _, 0, _M, _RF) -> "..."; +print(Term, Col, Ll, D, M, RecDefFun) when Col =< 0 -> + print(Term, 1, Ll, D, M, RecDefFun); +print(Term, Col, Ll, D, M0, RecDefFun) when is_tuple(Term); + is_list(Term) -> + If = {_S, Len} = print_length(Term, D, RecDefFun), + M = max_cs(M0, Len), + if + Len < Ll - Col, Len =< M -> + write(If); + true -> + TInd = while_fail([-1, 4], + fun(I) -> cind(If, Col, Ll, M, I, 0, 0) end, + 1), + pp(If, Col, Ll, M, TInd, indent(Col), 0, 0) + end; +print(<<_/bitstring>>=Term, Col, Ll, D, M0, RecDefFun) -> + If = {_S, Len} = print_length(Term, D, RecDefFun), + M = max_cs(M0, Len), + if + Len < Ll - Col, Len =< M -> + write(If); + true -> + TInd = while_fail([-1, 4], + fun(I) -> cind(If, Col, Ll, M, I, 0, 0) end, + 1), + pp(If, Col, Ll, M, TInd, indent(Col), 0, 0) + end; +print(Term, _Col, _Ll, _D, _M, _RF) -> + io_lib:write(Term). + +%%% +%%% Local functions +%%% + +max_cs(M, Len) when M < 0 -> + Len; +max_cs(M, _Len) -> + M. + +-define(ATM(T), is_list(element(1, T))). +-define(ATM_FLD(Field), ?ATM(element(4, element(1, Field)))). + +pp({_S, Len} = If, Col, Ll, M, _TInd, _Ind, LD, W) + when Len < Ll - Col - LD, Len + W + LD =< M -> + write(If); +pp({{list,L}, _Len}, Col, Ll, M, TInd, Ind, LD, W) -> + [$[, pp_list(L, Col + 1, Ll, M, TInd, indent(1, Ind), LD, $|, W + 1), $]]; +pp({{tuple,true,L}, _Len}, Col, Ll, M, TInd, Ind, LD, W) -> + [${, pp_tag_tuple(L, Col, Ll, M, TInd, Ind, LD, W + 1), $}]; +pp({{tuple,false,L}, _Len}, Col, Ll, M, TInd, Ind, LD, W) -> + [${, pp_list(L, Col + 1, Ll, M, TInd, indent(1, Ind), LD, $,, W + 1), $}]; +pp({{record,[{Name,NLen} | L]}, _Len}, Col, Ll, M, TInd, Ind, LD, W) -> + [Name, ${, pp_record(L, NLen, Col, Ll, M, TInd, Ind, LD, W + NLen+1), $}]; +pp({{bin,S}, _Len}, Col, Ll, M, _TInd, Ind, LD, W) -> + pp_binary(S, Col + 2, Ll, M, indent(2, Ind), LD, W); +pp({S, _Len}, _Col, _Ll, _M, _TInd, _Ind, _LD, _W) -> + S. + +%% Print a tagged tuple by indenting the rest of the elements +%% differently to the tag. Tuple has size >= 2. +pp_tag_tuple([{Tag,Tlen} | L], Col, Ll, M, TInd, Ind, LD, W) -> + TagInd = Tlen + 2, + Tcol = Col + TagInd, + S = $,, + if + TInd > 0, TagInd > TInd -> + Col1 = Col + TInd, + Indent = indent(TInd, Ind), + [Tag|pp_tail(L, Col1, Tcol, Ll, M, TInd, Indent, LD, S, W+Tlen)]; + true -> + Indent = indent(TagInd, Ind), + [Tag, S | pp_list(L, Tcol, Ll, M, TInd, Indent, LD, S, W+Tlen+1)] + end. + +pp_record([], _Nlen, _Col, _Ll, _M, _TInd, _Ind, _LD, _W) -> + ""; +pp_record({dots, _}, _Nlen, _Col, _Ll, _M, _TInd, _Ind, _LD, _W) -> + "..."; +pp_record([F | Fs], Nlen, Col0, Ll, M, TInd, Ind0, LD, W0) -> + Nind = Nlen + 1, + {Col, Ind, S, W} = rec_indent(Nind, TInd, Col0, Ind0, W0), + {FS, FW} = pp_field(F, Col, Ll, M, TInd, Ind, last_depth(Fs, LD), W), + [S, FS | pp_fields_tail(Fs, Col, Col + FW, Ll, M, TInd, Ind, LD, W + FW)]. + +pp_fields_tail([], _Col0, _Col, _Ll, _M, _TInd, _Ind, _LD, _W) -> + ""; +pp_fields_tail({dots, _}, _Col0, _Col, _M, _Ll, _TInd, _Ind, _LD, _W) -> + ",..."; +pp_fields_tail([{_, Len}=F | Fs], Col0, Col, Ll, M, TInd, Ind, LD, W) -> + LD1 = last_depth(Fs, LD), + ELen = 1 + Len, + if + LD1 =:= 0, ELen + 1 < Ll - Col, W + ELen + 1 =< M, ?ATM_FLD(F); + LD1 > 0, ELen < Ll - Col - LD1, W + ELen + LD1 =< M, ?ATM_FLD(F) -> + [$,, write_field(F) | + pp_fields_tail(Fs, Col0, Col+ELen, Ll, M, TInd, Ind, LD, W+ELen)]; + true -> + {FS, FW} = pp_field(F, Col0, Ll, M, TInd, Ind, LD1, 0), + [$,, $\n, Ind, FS | + pp_fields_tail(Fs, Col0, Col0 + FW, Ll, M, TInd, Ind, LD, FW)] + end. + +pp_field({_, Len}=Fl, Col, Ll, M, _TInd, _Ind, LD, W) + when Len < Ll - Col - LD, Len + W + LD =< M -> + {write_field(Fl), if + ?ATM_FLD(Fl) -> + Len; + true -> + Ll % force nl + end}; +pp_field({{field, Name, NameL, F}, _Len}, Col0, Ll, M, TInd, Ind0, LD, W0) -> + {Col, Ind, S, W} = rec_indent(NameL, TInd, Col0, Ind0, W0 + NameL), + {[Name, " = ", S | pp(F, Col, Ll, M, TInd, Ind, LD, W)], Ll}. % force nl + +rec_indent(RInd, TInd, Col0, Ind0, W0) -> + Nl = (TInd > 0) and (RInd > TInd), + DCol = case Nl of + true -> TInd; + false -> RInd + end, + Col = Col0 + DCol, + Ind = indent(DCol, Ind0), + S = case Nl of + true -> [$\n | Ind]; + false -> "" + end, + W = case Nl of + true -> 0; + false -> W0 + end, + {Col, Ind, S, W}. + +pp_list({dots, _}, _Col0, _Ll, _M, _TInd, _Ind, _LD, _S, _W) -> + "..."; +pp_list([E | Es], Col0, Ll, M, TInd, Ind, LD, S, W) -> + {ES, WE} = pp_element(E, Col0, Ll, M, TInd, Ind, last_depth(Es, LD), W), + [ES | pp_tail(Es, Col0, Col0 + WE, Ll, M, TInd, Ind, LD, S, W + WE)]. + +pp_tail([], _Col0, _Col, _Ll, _M, _TInd, _Ind, _LD, _S, _W) -> + ""; +pp_tail([{_, Len}=E | Es], Col0, Col, Ll, M, TInd, Ind, LD, S, W) -> + LD1 = last_depth(Es, LD), + ELen = 1 + Len, + if + LD1 =:= 0, ELen + 1 < Ll - Col, W + ELen + 1 =< M, ?ATM(E); + LD1 > 0, ELen < Ll - Col - LD1, W + ELen + LD1 =< M, ?ATM(E) -> + [$,, write(E) | + pp_tail(Es, Col0, Col + ELen, Ll, M, TInd, Ind, LD, S, W+ELen)]; + true -> + {ES, WE} = pp_element(E, Col0, Ll, M, TInd, Ind, LD1, 0), + [$,, $\n, Ind, ES | + pp_tail(Es, Col0, Col0 + WE, Ll, M, TInd, Ind, LD, S, WE)] + end; +pp_tail({dots, _}, _Col0, _Col, _Ll, _M, _TInd, _Ind, _LD, S, _W) -> + [S | "..."]; +pp_tail({_, Len}=E, _Col0, Col, Ll, M, _TInd, _Ind, LD, S, W) + when Len + 1 < Ll - Col - (LD + 1), + Len + 1 + W + (LD + 1) =< M, + ?ATM(E) -> + [S | write(E)]; +pp_tail(E, Col0, _Col, Ll, M, TInd, Ind, LD, S, _W) -> + [S, $\n, Ind | pp(E, Col0, Ll, M, TInd, Ind, LD + 1, 0)]. + +pp_element({_, Len}=E, Col, Ll, M, _TInd, _Ind, LD, W) + when Len < Ll - Col - LD, Len + W + LD =< M, ?ATM(E) -> + {write(E), Len}; +pp_element(E, Col, Ll, M, TInd, Ind, LD, W) -> + {pp(E, Col, Ll, M, TInd, Ind, LD, W), Ll}. % force nl + +%% Reuse the list created by io_lib:write_binary()... +pp_binary([LT,LT,S,GT,GT], Col, Ll, M, Ind, LD, W) -> + N = erlang:max(8, erlang:min(Ll - Col, M - 4 - W) - LD), + [LT,LT,pp_binary(S, N, N, Ind),GT,GT]. + +pp_binary([BS, $, | S], N, N0, Ind) -> + Len = length(BS) + 1, + case N - Len of + N1 when N1 < 0 -> + [$\n, Ind, BS, $, | pp_binary(S, N0 - Len, N0, Ind)]; + N1 -> + [BS, $, | pp_binary(S, N1, N0, Ind)] + end; +pp_binary([BS1, $:, BS2]=S, N, _N0, Ind) + when length(BS1) + length(BS2) + 1 > N -> + [$\n, Ind, S]; +pp_binary(S, N, _N0, Ind) -> + case iolist_size(S) > N of + true -> + [$\n, Ind, S]; + false -> + S + end. + +write({{tuple, _IsTagged, L}, _}) -> + [${, write_list(L, $,), $}]; +write({{list, L}, _}) -> + [$[, write_list(L, $|), $]]; +write({{record, [{Name,_} | L]}, _}) -> + [Name, ${, write_fields(L), $}]; +write({{bin, S}, _}) -> + S; +write({S, _}) -> + S. + +write_fields([]) -> + ""; +write_fields({dots, _}) -> + "..."; +write_fields([F | Fs]) -> + [write_field(F) | write_fields_tail(Fs)]. + +write_fields_tail([]) -> + ""; +write_fields_tail({dots, _}) -> + ",..."; +write_fields_tail([F | Fs]) -> + [$,, write_field(F) | write_fields_tail(Fs)]. + +write_field({{field, Name, _NameL, F}, _}) -> + [Name, " = " | write(F)]. + +write_list({dots, _}, _S) -> + "..."; +write_list([E | Es], S) -> + [write(E) | write_tail(Es, S)]. + +write_tail([], _S) -> + []; +write_tail([E | Es], S) -> + [$,, write(E) | write_tail(Es, S)]; +write_tail({dots, _}, S) -> + [S | "..."]; +write_tail(E, S) -> + [S | write(E)]. + +%% The depth (D) is used for extracting and counting the characters to +%% print. The structure is kept so that the returned intermediate +%% format can be formatted. The separators (list, tuple, record) are +%% counted but need to be added later. + +%% D =/= 0 +print_length([], _D, _RF) -> + {"[]", 2}; +print_length({}, _D, _RF) -> + {"{}", 2}; +print_length(List, D, RF) when is_list(List) -> + case printable_list(List, D) of + true -> + S = io_lib:write_string(List, $"), %" + {S, length(S)}; + %% Truncated lists could break some existing code. + % {true, Prefix} -> + % S = io_lib:write_string(Prefix, $"), %" + % {[S | "..."], 3 + length(S)}; + false -> + print_length_list(List, D, RF) + end; +print_length(Fun, _D, _RF) when is_function(Fun) -> + S = io_lib:write(Fun), + {S, iolist_size(S)}; +print_length(R, D, RF) when is_atom(element(1, R)), + is_function(RF) -> + case RF(element(1, R), tuple_size(R) - 1) of + no -> + print_length_tuple(R, D, RF); + RDefs -> + print_length_record(R, D, RF, RDefs) + end; +print_length(Tuple, D, RF) when is_tuple(Tuple) -> + print_length_tuple(Tuple, D, RF); +print_length(<<>>, _D, _RF) -> + {"<<>>", 4}; +print_length(<<_/bitstring>>, 1, _RF) -> + {"<<...>>", 7}; +print_length(<<_/bitstring>>=Bin, D, _RF) -> + case bit_size(Bin) rem 8 of + 0 -> + D1 = D - 1, + case printable_bin(Bin, D1) of + List when is_list(List) -> + S = io_lib:write_string(List, $"), + {[$<,$<,S,$>,$>], 4 + length(S)}; + {true, Prefix} -> + S = io_lib:write_string(Prefix, $"), + {[$<,$<, S | "...>>"], 4 + length(S)}; + false -> + S = io_lib:write(Bin, D), + {{bin,S}, iolist_size(S)} + end; + _ -> + S = io_lib:write(Bin, D), + {{bin,S}, iolist_size(S)} + end; +print_length(Term, _D, _RF) -> + S = io_lib:write(Term), + {S, iolist_size(S)}. + +print_length_tuple(_Tuple, 1, _RF) -> + {"{...}", 5}; +print_length_tuple(Tuple, D, RF) -> + L = print_length_list1(tuple_to_list(Tuple), D, RF), + IsTagged = is_atom(element(1, Tuple)) and (tuple_size(Tuple) > 1), + {{tuple,IsTagged,L}, list_length(L, 2)}. + +print_length_record(_Tuple, 1, _RF, _RDefs) -> + {"{...}", 5}; +print_length_record(Tuple, D, RF, RDefs) -> + Name = [$# | io_lib:write_atom(element(1, Tuple))], + NameL = length(Name), + L = print_length_fields(RDefs, D - 1, tl(tuple_to_list(Tuple)), RF), + {{record, [{Name,NameL} | L]}, list_length(L, NameL + 2)}. + +print_length_fields([], _D, [], _RF) -> + []; +print_length_fields(_, 1, _, _RF) -> + {dots, 3}; +print_length_fields([Def | Defs], D, [E | Es], RF) -> + [print_length_field(Def, D - 1, E, RF) | + print_length_fields(Defs, D - 1, Es, RF)]. + +print_length_field(Def, D, E, RF) -> + Name = io_lib:write_atom(Def), + {S, L} = print_length(E, D, RF), + NameL = length(Name) + 3, + {{field, Name, NameL, {S, L}}, NameL + L}. + +print_length_list(List, D, RF) -> + L = print_length_list1(List, D, RF), + {{list, L}, list_length(L, 2)}. + +print_length_list1([], _D, _RF) -> + []; +print_length_list1(_, 1, _RF) -> + {dots, 3}; +print_length_list1([E | Es], D, RF) -> + [print_length(E, D - 1, RF) | print_length_list1(Es, D - 1, RF)]; +print_length_list1(E, D, RF) -> + print_length(E, D - 1, RF). + +list_length([], Acc) -> + Acc; +list_length([{_, Len} | Es], Acc) -> + list_length_tail(Es, Acc + Len); +list_length({_, Len}, Acc) -> + Acc + Len. + +list_length_tail([], Acc) -> + Acc; +list_length_tail([{_,Len} | Es], Acc) -> + list_length_tail(Es, Acc + 1 + Len); +list_length_tail({_, Len}, Acc) -> + Acc + 1 + Len. + +%% ?CHARS printable characters has depth 1. +-define(CHARS, 4). + +printable_list(L, D) when D < 0 -> + io_lib:printable_list(L); +printable_list(_L, 1) -> + false; +printable_list(L, _D) -> + io_lib:printable_list(L). +%% Truncated lists could break some existing code. +% printable_list(L, D) -> +% Len = ?CHARS * (D - 1), +% case printable_list1(L, Len) of +% all -> +% true; +% N when is_integer(N), Len - N >= D - 1 -> +% {L1, _} = lists:split(Len - N, L), +% {true, L1}; +% N when is_integer(N) -> +% false +% end. + +printable_bin(Bin, D) when D >= 0, ?CHARS * D =< byte_size(Bin) -> + printable_bin(Bin, erlang:min(?CHARS * D, byte_size(Bin)), D); +printable_bin(Bin, D) -> + printable_bin(Bin, byte_size(Bin), D). + +printable_bin(Bin, Len, D) -> + N = erlang:min(20, Len), + L = binary_to_list(Bin, 1, N), + case printable_list1(L, N) of + all when N =:= byte_size(Bin) -> + L; + all when N =:= Len -> % N < byte_size(Bin) + {true, L}; + all -> + case printable_bin1(Bin, 1 + N, Len - N) of + 0 when byte_size(Bin) =:= Len -> + binary_to_list(Bin); + NC when D > 0, Len - NC >= D -> + {true, binary_to_list(Bin, 1, Len - NC)}; + NC when is_integer(NC) -> + false + end; + NC when is_integer(NC), D > 0, N - NC >= D -> + {true, binary_to_list(Bin, 1, N - NC)}; + NC when is_integer(NC) -> + false + end. + +printable_bin1(_Bin, _Start, 0) -> + 0; +printable_bin1(Bin, Start, Len) -> + N = erlang:min(10000, Len), + L = binary_to_list(Bin, Start, Start + N - 1), + case printable_list1(L, N) of + all -> + printable_bin1(Bin, Start + N, Len - N); + NC when is_integer(NC) -> + Len - (N - NC) + end. + +%% -> all | integer() >=0. Adopted from io_lib.erl. +% printable_list1([_ | _], 0) -> 0; +printable_list1([C | Cs], N) when is_integer(C), C >= $\s, C =< $~ -> + printable_list1(Cs, N - 1); +printable_list1([C | Cs], N) when is_integer(C), C >= $\240, C =< $\377 -> + printable_list1(Cs, N - 1); +printable_list1([$\n | Cs], N) -> printable_list1(Cs, N - 1); +printable_list1([$\r | Cs], N) -> printable_list1(Cs, N - 1); +printable_list1([$\t | Cs], N) -> printable_list1(Cs, N - 1); +printable_list1([$\v | Cs], N) -> printable_list1(Cs, N - 1); +printable_list1([$\b | Cs], N) -> printable_list1(Cs, N - 1); +printable_list1([$\f | Cs], N) -> printable_list1(Cs, N - 1); +printable_list1([$\e | Cs], N) -> printable_list1(Cs, N - 1); +printable_list1([], _) -> all; +printable_list1(_, N) -> N. + +%% Throw 'no_good' if the indentation exceeds half the line length +%% unless there is room for M characters on the line. + +cind({_S, Len}, Col, Ll, M, Ind, LD, W) when Len < Ll - Col - LD, + Len + W + LD =< M -> + Ind; +cind({{list,L}, _Len}, Col, Ll, M, Ind, LD, W) -> + cind_list(L, Col + 1, Ll, M, Ind, LD, W + 1); +cind({{tuple,true,L}, _Len}, Col, Ll, M, Ind, LD, W) -> + cind_tag_tuple(L, Col, Ll, M, Ind, LD, W + 1); +cind({{tuple,false,L}, _Len}, Col, Ll, M, Ind, LD, W) -> + cind_list(L, Col + 1, Ll, M, Ind, LD, W + 1); +cind({{record,[{_Name,NLen} | L]}, _Len}, Col, Ll, M, Ind, LD, W) -> + cind_record(L, NLen, Col, Ll, M, Ind, LD, W + NLen + 1); +cind({{bin,_S}, _Len}, _Col, _Ll, _M, Ind, _LD, _W) -> + Ind; +cind({_S, _Len}, _Col, _Ll, _M, Ind, _LD, _W) -> + Ind. + +cind_tag_tuple([{_Tag,Tlen} | L], Col, Ll, M, Ind, LD, W) -> + TagInd = Tlen + 2, + Tcol = Col + TagInd, + if + Ind > 0, TagInd > Ind -> + Col1 = Col + Ind, + if + M + Col1 =< Ll; Col1 =< Ll div 2 -> + cind_tail(L, Col1, Tcol, Ll, M, Ind, LD, W + Tlen); + true -> + throw(no_good) + end; + M + Tcol < Ll; Tcol < Ll div 2 -> + cind_list(L, Tcol, Ll, M, Ind, LD, W + Tlen + 1); + true -> + throw(no_good) + end. + +cind_record([F | Fs], Nlen, Col0, Ll, M, Ind, LD, W0) -> + Nind = Nlen + 1, + {Col, W} = cind_rec(Nind, Col0, Ll, M, Ind, W0), + FW = cind_field(F, Col, Ll, M, Ind, last_depth(Fs, LD), W), + cind_fields_tail(Fs, Col, Col + FW, Ll, M, Ind, LD, W + FW); +cind_record(_, _Nlen, _Col, _Ll, _M, Ind, _LD, _W) -> + Ind. + +cind_fields_tail([{_, Len}=F | Fs], Col0, Col, Ll, M, Ind, LD, W) -> + LD1 = last_depth(Fs, LD), + ELen = 1 + Len, + if + LD1 =:= 0, ELen + 1 < Ll - Col, W + ELen + 1 =< M, ?ATM_FLD(F); + LD1 > 0, ELen < Ll - Col - LD1, W + ELen + LD1 =< M, ?ATM_FLD(F) -> + cind_fields_tail(Fs, Col0, Col + ELen, Ll, M, Ind, LD, W + ELen); + true -> + FW = cind_field(F, Col0, Ll, M, Ind, LD1, 0), + cind_fields_tail(Fs, Col0, Col + FW, Ll, M, Ind, LD, FW) + end; +cind_fields_tail(_, _Col0, _Col, _Ll, _M, Ind, _LD, _W) -> + Ind. + +cind_field({{field, _N, _NL, _F}, Len}=Fl, Col, Ll, M, _Ind, LD, W) + when Len < Ll - Col - LD, Len + W + LD =< M -> + if + ?ATM_FLD(Fl) -> + Len; + true -> + Ll + end; +cind_field({{field, _Name, NameL, F}, _Len}, Col0, Ll, M, Ind, LD, W0) -> + {Col, W} = cind_rec(NameL, Col0, Ll, M, Ind, W0 + NameL), + cind(F, Col, Ll, M, Ind, LD, W), + Ll. + +cind_rec(RInd, Col0, Ll, M, Ind, W0) -> + Nl = (Ind > 0) and (RInd > Ind), + DCol = case Nl of + true -> Ind; + false -> RInd + end, + Col = Col0 + DCol, + if + M + Col =< Ll; Col =< Ll div 2 -> + W = case Nl of + true -> 0; + false -> W0 + end, + {Col, W}; + true -> + throw(no_good) + end. + +cind_list({dots, _}, _Col0, _Ll, _M, Ind, _LD, _W) -> + Ind; +cind_list([E | Es], Col0, Ll, M, Ind, LD, W) -> + WE = cind_element(E, Col0, Ll, M, Ind, last_depth(Es, LD), W), + cind_tail(Es, Col0, Col0 + WE, Ll, M, Ind, LD, W + WE). + +cind_tail([], _Col0, _Col, _Ll, _M, Ind, _LD, _W) -> + Ind; +cind_tail([{_, Len}=E | Es], Col0, Col, Ll, M, Ind, LD, W) -> + LD1 = last_depth(Es, LD), + ELen = 1 + Len, + if + LD1 =:= 0, ELen + 1 < Ll - Col, W + ELen + 1 =< M, ?ATM(E); + LD1 > 0, ELen < Ll - Col - LD1, W + ELen + LD1 =< M, ?ATM(E) -> + cind_tail(Es, Col0, Col + ELen, Ll, M, Ind, LD, W + ELen); + true -> + WE = cind_element(E, Col0, Ll, M, Ind, LD1, 0), + cind_tail(Es, Col0, Col0 + WE, Ll, M, Ind, LD, WE) + end; +cind_tail({dots, _}, _Col0, _Col, _Ll, _M, Ind, _LD, _W) -> + Ind; +cind_tail({_, Len}=E, _Col0, Col, Ll, M, Ind, LD, W) + when Len + 1 < Ll - Col - (LD + 1), + Len + 1 + W + (LD + 1) =< M, + ?ATM(E) -> + Ind; +cind_tail(E, _Col0, Col, Ll, M, Ind, LD, _W) -> + cind(E, Col, Ll, M, Ind, LD + 1, 0). + +cind_element({_, Len}=E, Col, Ll, M, _Ind, LD, W) + when Len < Ll - Col - LD, Len + W + LD =< M, ?ATM(E) -> + Len; +cind_element(E, Col, Ll, M, Ind, LD, W) -> + cind(E, Col, Ll, M, Ind, LD, W), + Ll. + +last_depth([_ | _], _LD) -> + 0; +last_depth(_, LD) -> + LD + 1. + +while_fail([], _F, V) -> + V; +while_fail([A | As], F, V) -> + try F(A) catch _ -> while_fail(As, F, V) end. + +indent(N) when is_integer(N), N > 0 -> + chars($\s, N-1). + +indent(1, Ind) -> % Optimization of common case + [$\s | Ind]; +indent(4, Ind) -> % Optimization of common case + S2 = [$\s, $\s], + [S2, S2 | Ind]; +indent(N, Ind) when is_integer(N), N > 0 -> + [chars($\s, N) | Ind]. + +%% A deep version of string:chars/2 +chars(_C, 0) -> + []; +chars(C, 2) -> + [C, C]; +chars(C, 3) -> + [C, C, C]; +chars(C, N) when (N band 1) =:= 0 -> + S = chars(C, N bsr 1), + [S | S]; +chars(C, N) -> + S = chars(C, N bsr 1), + [C, S | S]. diff --git a/lib/stdlib/src/lib.erl b/lib/stdlib/src/lib.erl new file mode 100644 index 0000000000..b2cfb00de9 --- /dev/null +++ b/lib/stdlib/src/lib.erl @@ -0,0 +1,452 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(lib). + +-export([flush_receive/0, error_message/2, progname/0, nonl/1, send/2, + sendw/2, eval_str/1]). + +-export([format_exception/6, format_stacktrace/4, + format_call/4, format_fun/1]). + +-spec flush_receive() -> 'ok'. + +flush_receive() -> + receive + _Any -> + flush_receive() + after + 0 -> + ok + end. + +%% +%% Functions for doing standard system format i/o. +%% +-spec error_message(atom() | string() | binary(), [term()]) -> 'ok'. + +error_message(Format, Args) -> + io:format(<<"** ~s **\n">>, [io_lib:format(Format, Args)]). + +%% Return the name of the script that starts (this) erlang +%% +-spec progname() -> atom(). + +progname() -> + case init:get_argument(progname) of + {ok, [[Prog]]} -> + list_to_atom(Prog); + _Other -> + no_prog_name + end. + +-spec nonl(string()) -> string(). + +nonl([10]) -> []; +nonl([]) -> []; +nonl([H|T]) -> [H|nonl(T)]. + +-spec send(pid() | atom() | {atom(), node()}, term()) -> term(). + +send(To, Msg) -> To ! Msg. + +-spec sendw(pid() | atom() | {atom(), node()}, term()) -> term(). + +sendw(To, Msg) -> + To ! {self(), Msg}, + receive + Reply -> Reply + end. + +%% eval_str(InStr) -> {ok, OutStr} | {error, ErrStr'} +%% InStr must represent a body + +-define(result(F,D), lists:flatten(io_lib:format(F, D))). + +-spec eval_str(string() | binary()) -> {'ok', string()} | {'error', string()}. + +eval_str(Str) when is_list(Str) -> + case erl_scan:tokens([], Str, 0) of + {more, _} -> + {error, "Incomplete form (missing .)??"}; + {done, {ok, Toks, _}, Rest} -> + case all_white(Rest) of + true -> + case erl_parse:parse_exprs(Toks) of + {ok, Exprs} -> + case catch erl_eval:exprs(Exprs, []) of + {value, Val, _} -> + {ok, Val}; + Other -> + {error, ?result("*** eval: ~p", [Other])} + end; + {error, {_Line, Mod, Args}} -> + Msg = ?result("*** ~s",[Mod:format_error(Args)]), + {error, Msg} + end; + false -> + {error, ?result("Non-white space found after " + "end-of-form :~s", [Rest])} + end + end; +eval_str(Bin) when is_binary(Bin) -> + eval_str(binary_to_list(Bin)). + +all_white([$\s|T]) -> all_white(T); +all_white([$\n|T]) -> all_white(T); +all_white([$\t|T]) -> all_white(T); +all_white([]) -> true; +all_white(_) -> false. + +%%% Formatting of exceptions, mfa:s and funs. + +%% -> iolist() (no \n at end) +%% I is the current column, starting from 1 (it will be used +%% as indentation whenever newline has been inserted); +%% Class, Reason and StackTrace are the exception; +%% FormatFun = fun(Term, I) -> iolist() formats terms; +%% StackFun = fun(Mod, Fun, Arity) -> bool() is used for trimming the +%% end of the stack (typically calls to erl_eval are skipped). +format_exception(I, Class, Reason, StackTrace, StackFun, FormatFun) + when is_integer(I), I >= 1, is_function(StackFun, 3), + is_function(FormatFun, 2) -> + S = n_spaces(I-1), + {Term,Trace1,Trace} = analyze_exception(Class, Reason, StackTrace), + Expl0 = explain_reason(Term, Class, Trace1, FormatFun, S), + Expl = io_lib:fwrite(<<"~s~s">>, [exited(Class), Expl0]), + case format_stacktrace1(S, Trace, FormatFun, StackFun) of + [] -> Expl; + Stack -> [Expl, $\n, Stack] + end. + +%% -> iolist() (no \n at end) +format_stacktrace(I, StackTrace, StackFun, FormatFun) + when is_integer(I), I >= 1, is_function(StackFun, 3), + is_function(FormatFun, 2) -> + S = n_spaces(I-1), + format_stacktrace1(S, StackTrace, FormatFun, StackFun). + +%% -> iolist() (no \n at end) +format_call(I, ForMForFun, As, FormatFun) when is_integer(I), I >= 1, + is_list(As), + is_function(FormatFun, 2) -> + format_call("", n_spaces(I-1), ForMForFun, As, FormatFun). + +%% -> iolist() (no \n at end) +format_fun(Fun) when is_function(Fun) -> + {module, M} = erlang:fun_info(Fun, module), + {name, F} = erlang:fun_info(Fun, name), + {arity, A} = erlang:fun_info(Fun, arity), + case erlang:fun_info(Fun, type) of + {type, local} when F =:= "" -> + io_lib:fwrite(<<"~w">>, [Fun]); + {type, local} when M =:= erl_eval -> + io_lib:fwrite(<<"interpreted function with arity ~w">>, [A]); + {type, local} -> + mfa_to_string(M, F, A); + {type, external} -> + mfa_to_string(M, F, A) + end. + +analyze_exception(error, Term, Stack) -> + case {is_stacktrace(Stack), Stack, Term} of + {true, [{_M,_F,As}=MFA|MFAs], function_clause} when is_list(As) -> + {Term,[MFA],MFAs}; + {true, [{shell,F,A}], function_clause} when is_integer(A) -> + {Term, [{F,A}], []}; + {true, [{_M,_F,_AorAs}=MFA|MFAs], undef} -> + {Term,[MFA],MFAs}; + {true, _, _} -> + {Term,[],Stack}; + {false, _, _} -> + {{Term,Stack},[],[]} + end; +analyze_exception(_Class, Term, Stack) -> + case is_stacktrace(Stack) of + true -> + {Term,[],Stack}; + false -> + {{Term,Stack},[],[]} + end. + +is_stacktrace([]) -> + true; +is_stacktrace([{M,F,A}|Fs]) when is_atom(M), is_atom(F), is_integer(A) -> + is_stacktrace(Fs); +is_stacktrace([{M,F,As}|Fs]) when is_atom(M), is_atom(F), length(As) >= 0 -> + is_stacktrace(Fs); +is_stacktrace(_) -> + false. + +%% ERTS exit codes (some of them are also returned by erl_eval): +explain_reason(badarg, error, [], _PF, _S) -> + <<"bad argument">>; +explain_reason({badarg,V}, error=Cl, [], PF, S) -> % orelse, andalso + format_value(V, <<"bad argument: ">>, Cl, PF, S); +explain_reason(badarith, error, [], _PF, _S) -> + <<"bad argument in an arithmetic expression">>; +explain_reason({badarity,{Fun,As}}, error, [], _PF, _S) + when is_function(Fun) -> + %% Only the arity is displayed, not the arguments As. + io_lib:fwrite(<<"~s called with ~s">>, + [format_fun(Fun), argss(length(As))]); +explain_reason({badfun,Term}, error=Cl, [], PF, S) -> + format_value(Term, <<"bad function ">>, Cl, PF, S); +explain_reason({badmatch,Term}, error=Cl, [], PF, S) -> + format_value(Term, <<"no match of right hand side value ">>, Cl, PF, S); +explain_reason({case_clause,V}, error=Cl, [], PF, S) -> + %% "there is no case clause with a true guard sequence and a + %% pattern matching..." + format_value(V, <<"no case clause matching ">>, Cl, PF, S); +explain_reason(function_clause, error, [{F,A}], _PF, _S) -> + %% Shell commands + FAs = io_lib:fwrite(<<"~w/~w">>, [F, A]), + [<<"no function clause matching call to ">> | FAs]; +explain_reason(function_clause, error=Cl, [{M,F,As}], PF, S) -> + Str = <<"no function clause matching ">>, + format_errstr_call(Str, Cl, {M,F}, As, PF, S); +explain_reason(if_clause, error, [], _PF, _S) -> + <<"no true branch found when evaluating an if expression">>; +explain_reason(noproc, error, [], _PF, _S) -> + <<"no such process or port">>; +explain_reason(notalive, error, [], _PF, _S) -> + <<"the node cannot be part of a distributed system">>; +explain_reason(system_limit, error, [], _PF, _S) -> + <<"a system limit has been reached">>; +explain_reason(timeout_value, error, [], _PF, _S) -> + <<"bad receive timeout value">>; +explain_reason({try_clause,V}, error=Cl, [], PF, S) -> + %% "there is no try clause with a true guard sequence and a + %% pattern matching..." + format_value(V, <<"no try clause matching ">>, Cl, PF, S); +explain_reason(undef, error, [{M,F,A}], _PF, _S) -> + %% Only the arity is displayed, not the arguments, if there are any. + io_lib:fwrite(<<"undefined function ~s">>, + [mfa_to_string(M, F, n_args(A))]); +explain_reason({shell_undef,F,A}, error, [], _PF, _S) -> + %% Give nicer reports for undefined shell functions + %% (but not when the user actively calls shell_default:F(...)). + io_lib:fwrite(<<"undefined shell command ~s/~w">>, [F, n_args(A)]); +%% Exit codes returned by erl_eval only: +explain_reason({argument_limit,_Fun}, error, [], _PF, _S) -> + io_lib:fwrite(<<"limit of number of arguments to interpreted function" + " exceeded">>, []); +explain_reason({bad_filter,V}, error=Cl, [], PF, S) -> + format_value(V, <<"bad filter ">>, Cl, PF, S); +explain_reason({bad_generator,V}, error=Cl, [], PF, S) -> + format_value(V, <<"bad generator ">>, Cl, PF, S); +explain_reason({unbound,V}, error, [], _PF, _S) -> + io_lib:fwrite(<<"variable ~w is unbound">>, [V]); +%% Exit codes local to the shell module (restricted shell): +explain_reason({restricted_shell_bad_return, V}, exit=Cl, [], PF, S) -> + Str = <<"restricted shell module returned bad value ">>, + format_value(V, Str, Cl, PF, S); +explain_reason({restricted_shell_disallowed,{ForMF,As}}, + exit=Cl, [], PF, S) -> + %% ForMF can be a fun, but not a shell fun. + Str = <<"restricted shell does not allow ">>, + format_errstr_call(Str, Cl, ForMF, As, PF, S); +explain_reason(restricted_shell_started, exit, [], _PF, _S) -> + <<"restricted shell starts now">>; +explain_reason(restricted_shell_stopped, exit, [], _PF, _S) -> + <<"restricted shell stopped">>; +%% Other exit code: +explain_reason(Reason, Class, [], PF, S) -> + PF(Reason, (iolist_size(S)+1) + exited_size(Class)). + +n_args(A) when is_integer(A) -> + A; +n_args(As) when is_list(As) -> + length(As). + +argss(0) -> + <<"no arguments">>; +argss(1) -> + <<"one argument">>; +argss(2) -> + <<"two arguments">>; +argss(I) -> + io_lib:fwrite(<<"~w arguments">>, [I]). + +format_stacktrace1(S0, Stack0, PF, SF) -> + Stack1 = lists:dropwhile(fun({M,F,A}) -> SF(M, F, A) + end, lists:reverse(Stack0)), + S = [" " | S0], + Stack = lists:reverse(Stack1), + format_stacktrace2(S, Stack, 1, PF). + +format_stacktrace2(S, [{M,F,A}|Fs], N, PF) when is_integer(A) -> + [io_lib:fwrite(<<"~s~s ~s">>, + [sep(N, S), origin(N, M, F, A), mfa_to_string(M, F, A)]) + | format_stacktrace2(S, Fs, N + 1, PF)]; +format_stacktrace2(S, [{M,F,As}|Fs], N, PF) when is_list(As) -> + A = length(As), + CalledAs = [S,<<" called as ">>], + C = format_call("", CalledAs, {M,F}, As, PF), + [io_lib:fwrite(<<"~s~s ~s\n~s~s">>, + [sep(N, S), origin(N, M, F, A), mfa_to_string(M, F, A), + CalledAs, C]) + | format_stacktrace2(S, Fs, N + 1, PF)]; +format_stacktrace2(_S, [], _N, _PF) -> + "". + +sep(1, S) -> S; +sep(_, S) -> [$\n | S]. + +origin(1, M, F, A) -> + case is_op({M, F}, n_args(A)) of + {yes, F} -> <<"in operator ">>; + no -> <<"in function ">> + end; +origin(_N, _M, _F, _A) -> + <<"in call from">>. + +format_errstr_call(ErrStr, Class, ForMForFun, As, PF, Pre0) -> + Pre1 = [Pre0 | n_spaces(exited_size(Class))], + format_call(ErrStr, Pre1, ForMForFun, As, PF). + +format_call(ErrStr, Pre1, ForMForFun, As, PF) -> + Arity = length(As), + [ErrStr | + case is_op(ForMForFun, Arity) of + {yes,Op} -> + format_op(ErrStr, Pre1, Op, As, PF); + no -> + MFs = mf_to_string(ForMForFun, Arity), + I1 = iolist_size([Pre1,ErrStr|MFs]), + S1 = pp_arguments(PF, As, I1), + S2 = pp_arguments(PF, As, iolist_size([Pre1|MFs])), + Long = count_nl(pp_arguments(PF, [a2345,b2345], I1)) > 0, + case Long or (count_nl(S2) < count_nl(S1)) of + true -> + [$\n, Pre1, MFs, S2]; + false -> + [MFs, S1] + end + end]. + +format_op(ErrStr, Pre, Op, [A1], PF) -> + OpS = io_lib:fwrite(<<"~s ">>, [Op]), + I1 = iolist_size([ErrStr,Pre,OpS]), + [OpS | PF(A1, I1+1)]; +format_op(ErrStr, Pre, Op, [A1, A2], PF) -> + I1 = iolist_size([ErrStr,Pre]), + S1 = PF(A1, I1+1), + S2 = PF(A2, I1+1), + OpS = atom_to_list(Op), + Pre1 = [$\n | n_spaces(I1)], + case count_nl(S1) > 0 of + true -> + [S1,Pre1,OpS,Pre1|S2]; + false -> + OpS2 = io_lib:fwrite(<<" ~s ">>, [Op]), + S2_2 = PF(A2, iolist_size([ErrStr,Pre,S1|OpS2])+1), + case count_nl(S2) < count_nl(S2_2) of + true -> + [S1,Pre1,OpS,Pre1|S2]; + false -> + [S1,OpS2|S2_2] + end + end. + +pp_arguments(PF, As, I) -> + case {As, io_lib:printable_list(As)} of + {[Int | T], true} -> + L = integer_to_list(Int), + Ll = length(L), + A = list_to_atom(lists:duplicate(Ll, $a)), + S0 = binary_to_list(iolist_to_binary(PF([A | T], I+1))), + brackets_to_parens([$[,L,string:sub_string(S0, 2+Ll)]); + _ -> + brackets_to_parens(PF(As, I+1)) + end. + +brackets_to_parens(S) -> + B = iolist_to_binary(S), + Sz = byte_size(B) - 2, + <<$[,R:Sz/binary,$]>> = B, + [$(,R,$)]. + +mfa_to_string(M, F, A) -> + io_lib:fwrite(<<"~s/~w">>, [mf_to_string({M, F}, A), A]). + +mf_to_string({M, F}, A) -> + case erl_internal:bif(M, F, A) of + true -> + io_lib:fwrite(<<"~w">>, [F]); + false -> + case is_op({M, F}, A) of + {yes, '/'} -> + io_lib:fwrite(<<"~w">>, [F]); + {yes, F} -> + atom_to_list(F); + no -> + io_lib:fwrite(<<"~w:~w">>, [M, F]) + end + end; +mf_to_string(Fun, _A) when is_function(Fun) -> + format_fun(Fun); +mf_to_string(F, _A) -> + io_lib:fwrite(<<"~w">>, [F]). + +format_value(V, ErrStr, Class, PF, S) -> + Pre1Sz = exited_size(Class), + S1 = PF(V, Pre1Sz + iolist_size([S, ErrStr])+1), + [ErrStr | case count_nl(S1) of + N1 when N1 > 1 -> + S2 = PF(V, iolist_size(S) + 1 + Pre1Sz), + case count_nl(S2) < N1 of + true -> + [$\n, S, n_spaces(Pre1Sz) | S2]; + false -> + S1 + end; + _ -> + S1 + end]. + +%% Handles deep lists, but not all iolists. +count_nl([E | Es]) -> + count_nl(E) + count_nl(Es); +count_nl($\n) -> + 1; +count_nl(Bin) when is_binary(Bin) -> + count_nl(binary_to_list(Bin)); +count_nl(_) -> + 0. + +n_spaces(N) -> + lists:duplicate(N, $\s). + +is_op(ForMForFun, A) -> + try + {erlang,F} = ForMForFun, + _ = erl_internal:op_type(F, A), + {yes,F} + catch error:_ -> no + end. + +exited_size(Class) -> + iolist_size(exited(Class)). + +exited(error) -> + <<"exception error: ">>; +exited(exit) -> + <<"exception exit: ">>; +exited(throw) -> + <<"exception throw: ">>. diff --git a/lib/stdlib/src/lists.erl b/lib/stdlib/src/lists.erl new file mode 100644 index 0000000000..e1f8d1c200 --- /dev/null +++ b/lib/stdlib/src/lists.erl @@ -0,0 +1,2462 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(lists). + +-export([append/2, append/1, subtract/2, reverse/1, + nth/2, nthtail/2, prefix/2, suffix/2, last/1, + seq/2, seq/3, sum/1, duplicate/2, min/1, max/1, sublist/2, sublist/3, + delete/2, + unzip/1, unzip3/1, zip/2, zip3/3, zipwith/3, zipwith3/4, + sort/1, merge/1, merge/2, rmerge/2, merge3/3, rmerge3/3, + usort/1, umerge/1, umerge3/3, umerge/2, rumerge3/3, rumerge/2, + concat/1, flatten/1, flatten/2, flat_length/1, flatlength/1, + keydelete/3, keyreplace/4, keytake/3, keystore/4, + keysort/2, keymerge/3, rkeymerge/3, rukeymerge/3, + ukeysort/2, ukeymerge/3, keymap/3]). + +%% Bifs: member/2, reverse/2 +%% Bifs: keymember/3, keysearch/3, keyfind/3 + +-export([merge/3, rmerge/3, sort/2, umerge/3, rumerge/3, usort/2]). + +-export([all/2,any/2,map/2,flatmap/2,foldl/3,foldr/3,filter/2, + partition/2,zf/2, + mapfoldl/3,mapfoldr/3,foreach/2,takewhile/2,dropwhile/2,splitwith/2, + split/2]). + +-deprecated([flat_length/1]). + +%% member(X, L) -> (true | false) +%% test if X is a member of the list L +%% Now a BIF! + +%member(X, [X|_]) -> true; +%member(X, [_|Y]) -> +% member(X, Y); +%member(X, []) -> false. + +%% append(X, Y) appends lists X and Y + +-spec append([T], [T]) -> [T]. + +append(L1, L2) -> L1 ++ L2. + +%% append(L) appends the list of lists L + +-spec append([[T]]) -> [T]. + +append([E]) -> E; +append([H|T]) -> H ++ append(T); +append([]) -> []. + +%% subtract(List1, List2) subtract elements in List2 form List1. + +-spec subtract([T], [T]) -> [T]. + +subtract(L1, L2) -> L1 -- L2. + +%% reverse(L) reverse all elements in the list L. Is now a BIF! + +-spec reverse([T]) -> [T]. + +reverse([] = L) -> + L; +reverse([_] = L) -> + L; +reverse([A, B]) -> + [B, A]; +reverse([A, B | L]) -> + lists:reverse(L, [B, A]). + +%reverse([H|T], Y) -> +% reverse(T, [H|Y]); +%reverse([], X) -> X. + + +%% nth(N, L) returns the N`th element of the list L +%% nthtail(N, L) returns the N`th tail of the list L + +-spec nth(pos_integer(), [T,...]) -> T. + +nth(1, [H|_]) -> H; +nth(N, [_|T]) when N > 1 -> + nth(N - 1, T). + +-spec nthtail(non_neg_integer(), [T,...]) -> [T]. + +nthtail(1, [_|T]) -> T; +nthtail(N, [_|T]) when N > 1 -> + nthtail(N - 1, T); +nthtail(0, L) when is_list(L) -> L. + +%% prefix(Prefix, List) -> (true | false) + +-spec prefix([T], [T]) -> boolean(). + +prefix([X|PreTail], [X|Tail]) -> + prefix(PreTail, Tail); +prefix([], List) when is_list(List) -> true; +prefix([_|_], List) when is_list(List) -> false. + +%% suffix(Suffix, List) -> (true | false) + +-spec suffix([T], [T]) -> boolean(). + +suffix(Suffix, List) -> + Delta = length(List) - length(Suffix), + Delta >= 0 andalso nthtail(Delta, List) =:= Suffix. + +%% last(List) returns the last element in a list. + +-spec last([T,...]) -> T. + +last([E|Es]) -> last(E, Es). + +last(_, [E|Es]) -> last(E, Es); +last(E, []) -> E. + +%% seq(Min, Max) -> [Min,Min+1, ..., Max] +%% seq(Min, Max, Incr) -> [Min,Min+Incr, ..., Max] +%% returns the sequence Min..Max +%% Min <= Max and Min and Max must be integers + +-spec seq(integer(), integer()) -> [integer()]. + +seq(First, Last) + when is_integer(First), is_integer(Last), First-1 =< Last -> + seq_loop(Last-First+1, Last, []). + +seq_loop(N, X, L) when N >= 4 -> + seq_loop(N-4, X-4, [X-3,X-2,X-1,X|L]); +seq_loop(N, X, L) when N >= 2 -> + seq_loop(N-2, X-2, [X-1,X|L]); +seq_loop(1, X, L) -> + [X|L]; +seq_loop(0, _, L) -> + L. + +-spec seq(integer(), integer(), integer()) -> [integer()]. + +seq(First, Last, Inc) + when is_integer(First), is_integer(Last), is_integer(Inc) -> + if + Inc > 0, First - Inc =< Last; + Inc < 0, First - Inc >= Last -> + N = (Last - First + Inc) div Inc, + seq_loop(N, Inc*(N-1)+First, Inc, []); + Inc =:= 0, First =:= Last -> + seq_loop(1, First, Inc, []) + end. + +seq_loop(N, X, D, L) when N >= 4 -> + Y = X-D, Z = Y-D, W = Z-D, + seq_loop(N-4, W-D, D, [W,Z,Y,X|L]); +seq_loop(N, X, D, L) when N >= 2 -> + Y = X-D, + seq_loop(N-2, Y-D, D, [Y,X|L]); +seq_loop(1, X, _, L) -> + [X|L]; +seq_loop(0, _, _, L) -> + L. + +%% sum(L) returns the sum of the elements in L + +-spec sum([number()]) -> number(). + +sum(L) -> sum(L, 0). + +sum([H|T], Sum) -> sum(T, Sum + H); +sum([], Sum) -> Sum. + +%% duplicate(N, X) -> [X,X,X,.....,X] (N times) +%% return N copies of X + +-spec duplicate(non_neg_integer(), T) -> [T]. + +duplicate(N, X) when is_integer(N), N >= 0 -> duplicate(N, X, []). + +duplicate(0, _, L) -> L; +duplicate(N, X, L) -> duplicate(N-1, X, [X|L]). + +%% min(L) -> returns the minimum element of the list L + +-spec min([T,...]) -> T. + +min([H|T]) -> min(T, H). + +min([H|T], Min) when H < Min -> min(T, H); +min([_|T], Min) -> min(T, Min); +min([], Min) -> Min. + +%% max(L) -> returns the maximum element of the list L + +-spec max([T,...]) -> T. + +max([H|T]) -> max(T, H). + +max([H|T], Max) when H > Max -> max(T, H); +max([_|T], Max) -> max(T, Max); +max([], Max) -> Max. + +%% sublist(List, Start, Length) +%% Returns the sub-list starting at Start of length Length. + +-spec sublist([T], pos_integer(), non_neg_integer()) -> [T]. + +sublist(List, S, L) when is_integer(L), L >= 0 -> + sublist(nthtail(S-1, List), L). + +-spec sublist([T], non_neg_integer()) -> [T]. + +sublist(List, L) when is_integer(L), is_list(List) -> + sublist_2(List, L). + +sublist_2([H|T], L) when L > 0 -> + [H|sublist_2(T, L-1)]; +sublist_2(_, 0) -> + []; +sublist_2(List, L) when is_list(List), L > 0 -> + []. + +%% delete(Item, List) -> List' +%% Delete the first occurrence of Item from the list L. + +-spec delete(T, [T]) -> [T]. + +delete(Item, [Item|Rest]) -> Rest; +delete(Item, [H|Rest]) -> + [H|delete(Item, Rest)]; +delete(_, []) -> []. + +%% Return [{X0, Y0}, {X1, Y1}, ..., {Xn, Yn}] for lists [X0, X1, ..., +%% Xn] and [Y0, Y1, ..., Yn]. + +-spec zip([A], [B]) -> [{A, B}]. + +zip([X | Xs], [Y | Ys]) -> [{X, Y} | zip(Xs, Ys)]; +zip([], []) -> []. + +%% Return {[X0, X1, ..., Xn], [Y0, Y1, ..., Yn]}, for a list [{X0, Y0}, +%% {X1, Y1}, ..., {Xn, Yn}]. + +-spec unzip([{A, B}]) -> {[A], [B]}. + +unzip(Ts) -> unzip(Ts, [], []). + +unzip([{X, Y} | Ts], Xs, Ys) -> unzip(Ts, [X | Xs], [Y | Ys]); +unzip([], Xs, Ys) -> {reverse(Xs), reverse(Ys)}. + +%% Return [{X0, Y0, Z0}, {X1, Y1, Z1}, ..., {Xn, Yn, Zn}] for lists [X0, +%% X1, ..., Xn], [Y0, Y1, ..., Yn] and [Z0, Z1, ..., Zn]. + +-spec zip3([A], [B], [C]) -> [{A, B, C}]. + +zip3([X | Xs], [Y | Ys], [Z | Zs]) -> [{X, Y, Z} | zip3(Xs, Ys, Zs)]; +zip3([], [], []) -> []. + +%% Return {[X0, X1, ..., Xn], [Y0, Y1, ..., Yn], [Z0, Z1, ..., Zn]}, for +%% a list [{X0, Y0, Z0}, {X1, Y1, Z1}, ..., {Xn, Yn, Zn}]. + +-spec unzip3([{A, B, C}]) -> {[A], [B], [C]}. + +unzip3(Ts) -> unzip3(Ts, [], [], []). + +unzip3([{X, Y, Z} | Ts], Xs, Ys, Zs) -> + unzip3(Ts, [X | Xs], [Y | Ys], [Z | Zs]); +unzip3([], Xs, Ys, Zs) -> + {reverse(Xs), reverse(Ys), reverse(Zs)}. + +%% Return [F(X0, Y0), F(X1, Y1), ..., F(Xn, Yn)] for lists [X0, X1, ..., +%% Xn] and [Y0, Y1, ..., Yn]. + +-spec zipwith(fun((X, Y) -> R), [X], [Y]) -> [R]. + +zipwith(F, [X | Xs], [Y | Ys]) -> [F(X, Y) | zipwith(F, Xs, Ys)]; +zipwith(F, [], []) when is_function(F, 2) -> []. + +%% Return [F(X0, Y0, Z0), F(X1, Y1, Z1), ..., F(Xn, Yn, Zn)] for lists +%% [X0, X1, ..., Xn], [Y0, Y1, ..., Yn] and [Z0, Z1, ..., Zn]. + +-spec zipwith3(fun((X, Y, Z) -> R), [X], [Y], [Z]) -> [R]. + +zipwith3(F, [X | Xs], [Y | Ys], [Z | Zs]) -> + [F(X, Y, Z) | zipwith3(F, Xs, Ys, Zs)]; +zipwith3(F, [], [], []) when is_function(F, 3) -> []. + +%% sort(List) -> L +%% sorts the list L + +-spec sort([T]) -> [T]. + +sort([X, Y | L] = L0) when X =< Y -> + case L of + [] -> + L0; + [Z] when Y =< Z -> + L0; + [Z] when X =< Z -> + [X, Z, Y]; + [Z] -> + [Z, X, Y]; + _ when X == Y -> + sort_1(Y, L, [X]); + _ -> + split_1(X, Y, L, [], []) + end; +sort([X, Y | L]) -> + case L of + [] -> + [Y, X]; + [Z] when X =< Z -> + [Y, X | L]; + [Z] when Y =< Z -> + [Y, Z, X]; + [Z] -> + [Z, Y, X]; + _ -> + split_2(X, Y, L, [], []) + end; +sort([_] = L) -> + L; +sort([] = L) -> + L. + +sort_1(X, [Y | L], R) when X == Y -> + sort_1(Y, L, [X | R]); +sort_1(X, [Y | L], R) when X < Y -> + split_1(X, Y, L, R, []); +sort_1(X, [Y | L], R) -> + split_2(X, Y, L, R, []); +sort_1(X, [], R) -> + lists:reverse(R, [X]). + +%% merge(List) -> L +%% merges a list of sorted lists + +-spec merge([T]) -> [T]. + +merge(L) -> + mergel(L, []). + +%% merge3(X, Y, Z) -> L +%% merges three sorted lists X, Y and Z + +-spec merge3([_], [_], [_]) -> [_]. + +merge3(L1, [], L3) -> + merge(L1, L3); +merge3(L1, L2, []) -> + merge(L1, L2); +merge3(L1, [H2 | T2], [H3 | T3]) -> + lists:reverse(merge3_1(L1, [], H2, T2, H3, T3), []). + +%% rmerge3(X, Y, Z) -> L +%% merges three reversed sorted lists X, Y and Z + +-spec rmerge3([_], [_], [_]) -> [_]. + +rmerge3(L1, [], L3) -> + rmerge(L1, L3); +rmerge3(L1, L2, []) -> + rmerge(L1, L2); +rmerge3(L1, [H2 | T2], [H3 | T3]) -> + lists:reverse(rmerge3_1(L1, [], H2, T2, H3, T3), []). + +%% merge(X, Y) -> L +%% merges two sorted lists X and Y + +-spec merge([_], [_]) -> [_]. + +merge(T1, []) -> + T1; +merge(T1, [H2 | T2]) -> + lists:reverse(merge2_1(T1, H2, T2, []), []). + +%% rmerge(X, Y) -> L +%% merges two reversed sorted lists X and Y + +%% reverse(rmerge(reverse(A),reverse(B))) is equal to merge(I,A,B). + +-spec rmerge([_], [_]) -> [_]. + +rmerge(T1, []) -> + T1; +rmerge(T1, [H2 | T2]) -> + lists:reverse(rmerge2_1(T1, H2, T2, []), []). + +%% concat(L) concatenate the list representation of the elements +%% in L - the elements in L can be atoms, numbers of strings. +%% Returns a list of characters. + +-type concat_thing() :: atom() | integer() | float() | string(). +-spec concat([concat_thing()]) -> string(). + +concat(List) -> + flatmap(fun thing_to_list/1, List). + +thing_to_list(X) when is_integer(X) -> integer_to_list(X); +thing_to_list(X) when is_float(X) -> float_to_list(X); +thing_to_list(X) when is_atom(X) -> atom_to_list(X); +thing_to_list(X) when is_list(X) -> X. %Assumed to be a string + +%% flatten(List) +%% flatten(List, Tail) +%% Flatten a list, adding optional tail. + +-spec flatten([_]) -> [_]. + +flatten(List) when is_list(List) -> + do_flatten(List, []). + +-spec flatten([_], [_]) -> [_]. + +flatten(List, Tail) when is_list(List), is_list(Tail) -> + do_flatten(List, Tail). + +do_flatten([H|T], Tail) when is_list(H) -> + do_flatten(H, do_flatten(T, Tail)); +do_flatten([H|T], Tail) -> + [H|do_flatten(T, Tail)]; +do_flatten([], Tail) -> + Tail. + +%% flat_length(List) (undocumented can be removed later) +%% Calculate the length of a list of lists. + +-spec flat_length([_]) -> non_neg_integer(). + +flat_length(List) -> flatlength(List). + +%% flatlength(List) +%% Calculate the length of a list of lists. + +-spec flatlength([_]) -> non_neg_integer(). + +flatlength(List) -> + flatlength(List, 0). + +flatlength([H|T], L) when is_list(H) -> + flatlength(H, flatlength(T, L)); +flatlength([_|T], L) -> + flatlength(T, L + 1); +flatlength([], L) -> L. + +%% keymember(Key, Index, [Tuple]) Now a BIF! +%% keysearch(Key, Index, [Tuple]) Now a BIF! +%% keydelete(Key, Index, [Tuple]) +%% keyreplace(Key, Index, [Tuple], NewTuple) +%% keytake(Key, Index, [Tuple]) +%% keystore(Key, Index, [Tuple], NewTuple) +%% keysort(Index, [Tuple]) +%% keymerge(Index, [Tuple], [Tuple]) +%% ukeysort(Index, [Tuple]) +%% ukeymerge(Index, [Tuple], [Tuple]) +%% keymap(Function, Index, [Tuple]) +%% keymap(Function, ExtraArgs, Index, [Tuple]) + +%keymember(K,N,L) when is_integer(N), N > 0 -> +% keymember3(K,N,L). + +%keymember3(Key, N, [T|Ts]) when element(N, T) == Key -> true; +%keymember3(Key, N, [T|Ts]) -> +% keymember3(Key, N, Ts); +%keymember3(Key, N, []) -> false. + +%keysearch(K, N, L) when is_integer(N), N > 0 -> +% keysearch3(K, N, L). + +%keysearch3(Key, N, [H|T]) when element(N, H) == Key -> +% {value, H}; +%keysearch3(Key, N, [H|T]) -> +% keysearch3(Key, N, T); +%keysearch3(Key, N, []) -> false. + +-spec keydelete(_, pos_integer(), [T]) -> [T]. + +keydelete(K, N, L) when is_integer(N), N > 0 -> + keydelete3(K, N, L). + +keydelete3(Key, N, [H|T]) when element(N, H) == Key -> T; +keydelete3(Key, N, [H|T]) -> + [H|keydelete3(Key, N, T)]; +keydelete3(_, _, []) -> []. + +-spec keyreplace(_, pos_integer(), [_], tuple()) -> [_]. + +keyreplace(K, N, L, New) when is_integer(N), N > 0, is_tuple(New) -> + keyreplace3(K, N, L, New). + +keyreplace3(Key, Pos, [Tup|Tail], New) when element(Pos, Tup) == Key -> + [New|Tail]; +keyreplace3(Key, Pos, [H|T], New) -> + [H|keyreplace3(Key, Pos, T, New)]; +keyreplace3(_, _, [], _) -> []. + +-spec keytake(_, pos_integer(), [_]) -> {'value', tuple(), [_]} | 'false'. + +keytake(Key, N, L) when is_integer(N), N > 0 -> + keytake(Key, N, L, []). + +keytake(Key, N, [H|T], L) when element(N, H) == Key -> + {value, H, lists:reverse(L, T)}; +keytake(Key, N, [H|T], L) -> + keytake(Key, N, T, [H|L]); +keytake(_K, _N, [], _L) -> false. + +-spec keystore(_, pos_integer(), [_], tuple()) -> [_]. +keystore(K, N, L, New) when is_integer(N), N > 0, is_tuple(New) -> + keystore2(K, N, L, New). + +keystore2(Key, N, [H|T], New) when element(N, H) == Key -> + [New|T]; +keystore2(Key, N, [H|T], New) -> + [H|keystore2(Key, N, T, New)]; +keystore2(_Key, _N, [], New) -> + [New]. + +-spec keysort(pos_integer(), [T]) -> [T] when is_subtype(T, tuple()). + +keysort(I, L) when is_integer(I), I > 0 -> + case L of + [] -> L; + [_] -> L; + [X, Y | T] -> + case {element(I, X), element(I, Y)} of + {EX, EY} when EX =< EY -> + case T of + [] -> + L; + [Z] -> + case element(I, Z) of + EZ when EY =< EZ -> + L; + EZ when EX =< EZ -> + [X, Z, Y]; + _EZ -> + [Z, X, Y] + end; + _ when X == Y -> + keysort_1(I, Y, EY, T, [X]); + _ -> + keysplit_1(I, X, EX, Y, EY, T, [], []) + end; + {EX, EY} -> + case T of + [] -> + [Y, X]; + [Z] -> + case element(I, Z) of + EZ when EX =< EZ -> + [Y, X | T]; + EZ when EY =< EZ -> + [Y, Z, X]; + _EZ -> + [Z, Y, X] + end; + _ -> + keysplit_2(I, X, EX, Y, EY, T, [], []) + end + end + end. + +keysort_1(I, X, EX, [Y | L], R) when X == Y -> + keysort_1(I, Y, EX, L, [X | R]); +keysort_1(I, X, EX, [Y | L], R) -> + case element(I, Y) of + EY when EX =< EY -> + keysplit_1(I, X, EX, Y, EY, L, R, []); + EY -> + keysplit_2(I, X, EX, Y, EY, L, R, []) + end; +keysort_1(_I, X, _EX, [], R) -> + lists:reverse(R, [X]). + +-spec keymerge(pos_integer(), [X], [Y]) -> + [R] when is_subtype(X, tuple()), is_subtype(Y, tuple()), is_subtype(R, tuple()). + +keymerge(Index, T1, L2) when is_integer(Index), Index > 0 -> + case L2 of + [] -> + T1; + [H2 | T2] -> + E2 = element(Index, H2), + M = keymerge2_1(Index, T1, E2, H2, T2, []), + lists:reverse(M, []) + end. + +%% reverse(rkeymerge(I,reverse(A),reverse(B))) is equal to keymerge(I,A,B). + +-spec rkeymerge(pos_integer(), [X], [Y]) -> + [R] when is_subtype(X, tuple()), is_subtype(Y, tuple()), is_subtype(R, tuple()). + +rkeymerge(Index, T1, L2) when is_integer(Index), Index > 0 -> + case L2 of + [] -> + T1; + [H2 | T2] -> + E2 = element(Index, H2), + M = rkeymerge2_1(Index, T1, E2, H2, T2, []), + lists:reverse(M, []) + end. + +-spec ukeysort(pos_integer(), [T]) -> [T] when is_subtype(T, tuple()). + +ukeysort(I, L) when is_integer(I), I > 0 -> + case L of + [] -> L; + [_] -> L; + [X, Y | T] -> + case {element(I, X), element(I, Y)} of + {EX, EY} when EX == EY -> + ukeysort_1(I, X, EX, T); + {EX, EY} when EX < EY -> + case T of + [] -> + L; + [Z] -> + case element(I, Z) of + EZ when EY == EZ -> + [X, Y]; + EZ when EY < EZ -> + [X, Y, Z]; + EZ when EZ == EX -> + [X, Y]; + EZ when EX =< EZ -> + [X, Z, Y]; + _EZ -> + [Z, X, Y] + end; + _ -> + ukeysplit_1(I, X, EX, Y, EY, T, [], []) + end; + {EX, EY} -> + case T of + [] -> + [Y, X]; + [Z] -> + case element(I, Z) of + EZ when EX == EZ -> + [Y, X]; + EZ when EX < EZ -> + [Y, X, Z]; + EZ when EY == EZ -> + [Y, X]; + EZ when EY =< EZ -> + [Y, Z, X]; + _EZ -> + [Z, Y, X] + end; + _ -> + ukeysplit_2(I, Y, EY, T, [X]) + end + end + end. + +ukeysort_1(I, X, EX, [Y | L]) -> + case element(I, Y) of + EY when EX == EY -> + ukeysort_1(I, X, EX, L); + EY when EX < EY -> + ukeysplit_1(I, X, EX, Y, EY, L, [], []); + EY -> + ukeysplit_2(I, Y, EY, L, [X]) + end; +ukeysort_1(_I, X, _EX, []) -> + [X]. + +-spec ukeymerge(pos_integer(), [X], [Y]) -> + [(X | Y)] when is_subtype(X, tuple()), is_subtype(Y, tuple()). + +ukeymerge(Index, L1, T2) when is_integer(Index), Index > 0 -> + case L1 of + [] -> + T2; + [H1 | T1] -> + E1 = element(Index, H1), + M = ukeymerge2_2(Index, T1, E1, H1, T2, []), + lists:reverse(M, []) + end. + +%% reverse(rukeymerge(I,reverse(A),reverse(B))) is equal to ukeymerge(I,A,B). + +-spec rukeymerge(pos_integer(), [X], [Y]) -> + [(X | Y)] when is_subtype(X, tuple()), is_subtype(Y, tuple()). + +rukeymerge(Index, T1, L2) when is_integer(Index), Index > 0 -> + case L2 of + [] -> + T1; + [H2 | T2] -> + E2 = element(Index, H2), + M = rukeymerge2_1(Index, T1, E2, T2, [], H2), + lists:reverse(M, []) + end. + +-spec keymap(fun((_) -> _), pos_integer(), [tuple()]) -> [tuple()]. + +keymap(Fun, Index, [Tup|Tail]) -> + [setelement(Index, Tup, Fun(element(Index, Tup)))|keymap(Fun, Index, Tail)]; +keymap(Fun, Index, []) when is_integer(Index), Index >= 1, + is_function(Fun, 1) -> []. + +%%% Suggestion from OTP-2948: sort and merge with Fun. + +-spec sort(fun((T, T) -> boolean()), [T]) -> [T]. + +sort(Fun, []) when is_function(Fun, 2) -> + []; +sort(Fun, [_] = L) when is_function(Fun, 2) -> + L; +sort(Fun, [X, Y | T]) -> + case Fun(X, Y) of + true -> + fsplit_1(Y, X, Fun, T, [], []); + false -> + fsplit_2(Y, X, Fun, T, [], []) + end. + +-spec merge(fun((X, Y) -> boolean()), [X], [Y]) -> [_]. + +merge(Fun, T1, [H2 | T2]) when is_function(Fun, 2) -> + lists:reverse(fmerge2_1(T1, H2, Fun, T2, []), []); +merge(Fun, T1, []) when is_function(Fun, 2) -> + T1. + +%% reverse(rmerge(F,reverse(A),reverse(B))) is equal to merge(F,A,B). + +-spec rmerge(fun((X, Y) -> boolean()), [X], [Y]) -> [_]. + +rmerge(Fun, T1, [H2 | T2]) when is_function(Fun, 2) -> + lists:reverse(rfmerge2_1(T1, H2, Fun, T2, []), []); +rmerge(Fun, T1, []) when is_function(Fun, 2) -> + T1. + +-spec usort(fun((T, T) -> boolean()), [T]) -> [T]. + +usort(Fun, [_] = L) when is_function(Fun, 2) -> + L; +usort(Fun, [] = L) when is_function(Fun, 2) -> + L; +usort(Fun, [X | L]) when is_function(Fun, 2) -> + usort_1(Fun, X, L). + +usort_1(Fun, X, [Y | L]) -> + case Fun(X, Y) of + true -> + case Fun(Y, X) of + true -> % X equal to Y + case L of + [] -> + [X]; + _ -> + usort_1(Fun, X, L) + end; + false -> + ufsplit_1(Y, X, Fun, L, [], []) + end; + false -> + ufsplit_2(Y, L, Fun, [X]) + end. + +-spec umerge(fun((X, Y) -> boolean()), [X], [Y]) -> [_]. + +umerge(Fun, [], T2) when is_function(Fun, 2) -> + T2; +umerge(Fun, [H1 | T1], T2) when is_function(Fun, 2) -> + lists:reverse(ufmerge2_2(H1, T1, Fun, T2, []), []). + +%% reverse(rumerge(F,reverse(A),reverse(B))) is equal to umerge(F,A,B). + +-spec rumerge(fun((X, Y) -> boolean()), [X], [Y]) -> [_]. + +rumerge(Fun, T1, []) when is_function(Fun, 2) -> + T1; +rumerge(Fun, T1, [H2 | T2]) when is_function(Fun, 2) -> + lists:reverse(rufmerge2_1(T1, H2, Fun, T2, []), []). + +%% usort(List) -> L +%% sorts the list L, removes duplicates + +-spec usort([T]) -> [T]. + +usort([X, Y | L] = L0) when X < Y -> + case L of + [] -> + L0; + [Z] when Y < Z -> + L0; + [Z] when Y == Z -> + [X, Y]; + [Z] when Z < X -> + [Z, X, Y]; + [Z] when Z == X -> + [X, Y]; + [Z] -> + [X, Z, Y]; + _ -> + usplit_1(X, Y, L, [], []) + end; +usort([X, Y | L]) when X > Y -> + case L of + [] -> + [Y, X]; + [Z] when X < Z -> + [Y, X | L]; + [Z] when X == Z -> + [Y, X]; + [Z] when Z < Y -> + [Z, Y, X]; + [Z] when Z == Y -> + [Y, X]; + [Z] -> + [Y, Z, X]; + _ -> + usplit_2(X, Y, L, [], []) + end; +usort([X, _Y | L]) -> + usort_1(X, L); +usort([_] = L) -> + L; +usort([]) -> + []. + +usort_1(X, [Y | L]) when X == Y -> + usort_1(X, L); +usort_1(X, [Y | L]) when X < Y -> + usplit_1(X, Y, L, [], []); +usort_1(X, [Y | L]) -> + usplit_2(X, Y, L, [], []); +usort_1(X, []) -> + [X]. + +%% umerge(List) -> L +%% merges a list of sorted lists without duplicates, removes duplicates + +-spec umerge([T]) -> [T]. + +umerge(L) -> + umergel(L). + +%% umerge3(X, Y, Z) -> L +%% merges three sorted lists X, Y and Z without duplicates, +%% removes duplicates + +-spec umerge3([_], [_], [_]) -> [_]. + +umerge3(L1, [], L3) -> + umerge(L1, L3); +umerge3(L1, L2, []) -> + umerge(L1, L2); +umerge3(L1, [H2 | T2], [H3 | T3]) -> + lists:reverse(umerge3_1(L1, [H2 | H3], T2, H2, [], T3, H3), []). + +%% rumerge3(X, Y, Z) -> L +%% merges three reversed sorted lists X, Y and Z without duplicates, +%% removes duplicates + +-spec rumerge3([_], [_], [_]) -> [_]. + +rumerge3(L1, [], L3) -> + rumerge(L1, L3); +rumerge3(L1, L2, []) -> + rumerge(L1, L2); +rumerge3(L1, [H2 | T2], [H3 | T3]) -> + lists:reverse(rumerge3_1(L1, T2, H2, [], T3, H3),[]). + +%% umerge(X, Y) -> L +%% merges two sorted lists X and Y without duplicates, removes duplicates + +-spec umerge([_], [_]) -> [_]. + +umerge([], T2) -> + T2; +umerge([H1 | T1], T2) -> + lists:reverse(umerge2_2(T1, T2, [], H1), []). + +%% rumerge(X, Y) -> L +%% merges two reversed sorted lists X and Y without duplicates, +%% removes duplicates + +%% reverse(rumerge(reverse(A),reverse(B))) is equal to umerge(I,A,B). + +-spec rumerge([_], [_]) -> [_]. + +rumerge(T1, []) -> + T1; +rumerge(T1, [H2 | T2]) -> + lists:reverse(rumerge2_1(T1, T2, [], H2), []). + +%% all(Predicate, List) +%% any(Predicate, List) +%% map(Function, List) +%% flatmap(Function, List) +%% foldl(Function, First, List) +%% foldr(Function, Last, List) +%% filter(Predicate, List) +%% zf(Function, List) +%% mapfoldl(Function, First, List) +%% mapfoldr(Function, Last, List) +%% foreach(Function, List) +%% takewhile(Predicate, List) +%% dropwhile(Predicate, List) +%% splitwith(Predicate, List) +%% for list programming. Function here is a 'fun'. For backward compatibility, +%% {Module,Function} is still accepted. +%% +%% The name zf is a joke! +%% +%% N.B. Unless where the functions actually needs it only foreach/2/3, +%% which is meant to be used for its side effects, has a defined order +%% of evaluation. +%% +%% There are also versions with an extra argument, ExtraArgs, which is a +%% list of extra arguments to each call. + +-spec all(fun((T) -> boolean()), [T]) -> boolean(). + +all(Pred, [Hd|Tail]) -> + case Pred(Hd) of + true -> all(Pred, Tail); + false -> false + end; +all(Pred, []) when is_function(Pred, 1) -> true. + +-spec any(fun((T) -> boolean()), [T]) -> boolean(). + +any(Pred, [Hd|Tail]) -> + case Pred(Hd) of + true -> true; + false -> any(Pred, Tail) + end; +any(Pred, []) when is_function(Pred, 1) -> false. + +-spec map(fun((D) -> R), [D]) -> [R]. + +map(F, [H|T]) -> + [F(H)|map(F, T)]; +map(F, []) when is_function(F, 1) -> []. + +-spec flatmap(fun((D) -> [R]), [D]) -> [R]. + +flatmap(F, [Hd|Tail]) -> + F(Hd) ++ flatmap(F, Tail); +flatmap(F, []) when is_function(F, 1) -> []. + +-spec foldl(fun((T, _) -> _), _, [T]) -> _. + +foldl(F, Accu, [Hd|Tail]) -> + foldl(F, F(Hd, Accu), Tail); +foldl(F, Accu, []) when is_function(F, 2) -> Accu. + +-spec foldr(fun((T, _) -> _), _, [T]) -> _. + +foldr(F, Accu, [Hd|Tail]) -> + F(Hd, foldr(F, Accu, Tail)); +foldr(F, Accu, []) when is_function(F, 2) -> Accu. + +-spec filter(Pred :: fun((T) -> boolean()), List :: [T]) -> [T]. + +filter(Pred, List) when is_function(Pred, 1) -> + [ E || E <- List, Pred(E) ]. + +%% Equivalent to {filter(F, L), filter(NotF, L)}, if NotF = 'fun(X) -> +%% not F(X) end'. + +-spec partition(Pred :: fun((T) -> boolean()), List :: [T]) -> {[T], [T]}. + +partition(Pred, L) -> + partition(Pred, L, [], []). + +partition(Pred, [H | T], As, Bs) -> + case Pred(H) of + true -> partition(Pred, T, [H | As], Bs); + false -> partition(Pred, T, As, [H | Bs]) + end; +partition(Pred, [], As, Bs) when is_function(Pred, 1) -> + {reverse(As), reverse(Bs)}. + +-spec zf(fun((T) -> boolean() | {'true', X}), [T]) -> [(T | X)]. + +zf(F, [Hd|Tail]) -> + case F(Hd) of + true -> + [Hd|zf(F, Tail)]; + {true,Val} -> + [Val|zf(F, Tail)]; + false -> + zf(F, Tail) + end; +zf(F, []) when is_function(F, 1) -> []. + +-spec foreach(F :: fun((T) -> _), List :: [T]) -> 'ok'. + +foreach(F, [Hd|Tail]) -> + F(Hd), + foreach(F, Tail); +foreach(F, []) when is_function(F, 1) -> ok. + +-spec mapfoldl(fun((T, _) -> {_, _}), _, [T]) -> {[_], _}. + +mapfoldl(F, Accu0, [Hd|Tail]) -> + {R,Accu1} = F(Hd, Accu0), + {Rs,Accu2} = mapfoldl(F, Accu1, Tail), + {[R|Rs],Accu2}; +mapfoldl(F, Accu, []) when is_function(F, 2) -> {[],Accu}. + +-spec mapfoldr(fun((T, _) -> {_, _}), _, [T]) -> {[_], _}. + +mapfoldr(F, Accu0, [Hd|Tail]) -> + {Rs,Accu1} = mapfoldr(F, Accu0, Tail), + {R,Accu2} = F(Hd, Accu1), + {[R|Rs],Accu2}; +mapfoldr(F, Accu, []) when is_function(F, 2) -> {[],Accu}. + +-spec takewhile(fun((T) -> boolean()), [T]) -> [T]. + +takewhile(Pred, [Hd|Tail]) -> + case Pred(Hd) of + true -> [Hd|takewhile(Pred, Tail)]; + false -> [] + end; +takewhile(Pred, []) when is_function(Pred, 1) -> []. + +-spec dropwhile(fun((T) -> boolean()), [T]) -> [T]. + +dropwhile(Pred, [Hd|Tail]=Rest) -> + case Pred(Hd) of + true -> dropwhile(Pred, Tail); + false -> Rest + end; +dropwhile(Pred, []) when is_function(Pred, 1) -> []. + +-spec splitwith(fun((T) -> boolean()), [T]) -> {[T], [T]}. + +splitwith(Pred, List) when is_function(Pred, 1) -> + splitwith(Pred, List, []). + +splitwith(Pred, [Hd|Tail], Taken) -> + case Pred(Hd) of + true -> splitwith(Pred, Tail, [Hd|Taken]); + false -> {reverse(Taken), [Hd|Tail]} + end; +splitwith(Pred, [], Taken) when is_function(Pred, 1) -> + {reverse(Taken),[]}. + +-spec split(non_neg_integer(), [T]) -> {[T], [T]}. + +split(N, List) when is_integer(N), N >= 0, is_list(List) -> + case split(N, List, []) of + {_, _} = Result -> Result; + Fault when is_atom(Fault) -> + erlang:error(Fault, [N,List]) + end; +split(N, List) -> + erlang:error(badarg, [N,List]). + +split(0, L, R) -> + {lists:reverse(R, []), L}; +split(N, [H|T], R) -> + split(N-1, T, [H|R]); +split(_, [], _) -> + badarg. + +%%% ================================================================= +%%% Here follows the implementation of the sort functions. +%%% +%%% These functions used to be in their own module (lists_sort), +%%% but have now been placed here to allow Dialyzer to produce better +%%% type information. +%%% ================================================================= + +-compile({inline, + [{merge3_12,7}, {merge3_21,7}, {rmerge3_12,7}, {rmerge3_21,7}]}). + +-compile({inline, + [{umerge3_12,8}, {umerge3_21,8}, + {rumerge3_12a,7}, {rumerge3_12b,8}]}). + +-compile({inline, + [{keymerge3_12,12}, {keymerge3_21,12}, + {rkeymerge3_12,12}, {rkeymerge3_21,12}]}). + +-compile({inline, + [{ukeymerge3_12,13}, {ukeymerge3_21,13}, + {rukeymerge3_12a,11}, {rukeymerge3_21a,13}, + {rukeymerge3_12b,12}, {rukeymerge3_21b,12}]}). + +%% sort/1 + +%% Ascending. +split_1(X, Y, [Z | L], R, Rs) when Z >= Y -> + split_1(Y, Z, L, [X | R], Rs); +split_1(X, Y, [Z | L], R, Rs) when Z >= X -> + split_1(Z, Y, L, [X | R], Rs); +split_1(X, Y, [Z | L], [], Rs) -> + split_1(X, Y, L, [Z], Rs); +split_1(X, Y, [Z | L], R, Rs) -> + split_1_1(X, Y, L, R, Rs, Z); +split_1(X, Y, [], R, Rs) -> + rmergel([[Y, X | R] | Rs], []). + +split_1_1(X, Y, [Z | L], R, Rs, S) when Z >= Y -> + split_1_1(Y, Z, L, [X | R], Rs, S); +split_1_1(X, Y, [Z | L], R, Rs, S) when Z >= X -> + split_1_1(Z, Y, L, [X | R], Rs, S); +split_1_1(X, Y, [Z | L], R, Rs, S) when S =< Z -> + split_1(S, Z, L, [], [[Y, X | R] | Rs]); +split_1_1(X, Y, [Z | L], R, Rs, S) -> + split_1(Z, S, L, [], [[Y, X | R] | Rs]); +split_1_1(X, Y, [], R, Rs, S) -> + rmergel([[S], [Y, X | R] | Rs], []). + +%% Descending. +split_2(X, Y, [Z | L], R, Rs) when Z =< Y -> + split_2(Y, Z, L, [X | R], Rs); +split_2(X, Y, [Z | L], R, Rs) when Z =< X -> + split_2(Z, Y, L, [X | R], Rs); +split_2(X, Y, [Z | L], [], Rs) -> + split_2(X, Y, L, [Z], Rs); +split_2(X, Y, [Z | L], R, Rs) -> + split_2_1(X, Y, L, R, Rs, Z); +split_2(X, Y, [], R, Rs) -> + mergel([[Y, X | R] | Rs], []). + +split_2_1(X, Y, [Z | L], R, Rs, S) when Z =< Y -> + split_2_1(Y, Z, L, [X | R], Rs, S); +split_2_1(X, Y, [Z | L], R, Rs, S) when Z =< X -> + split_2_1(Z, Y, L, [X | R], Rs, S); +split_2_1(X, Y, [Z | L], R, Rs, S) when S > Z -> + split_2(S, Z, L, [], [[Y, X | R] | Rs]); +split_2_1(X, Y, [Z | L], R, Rs, S) -> + split_2(Z, S, L, [], [[Y, X | R] | Rs]); +split_2_1(X, Y, [], R, Rs, S) -> + mergel([[S], [Y, X | R] | Rs], []). + +%% merge/1 + +mergel([[] | L], Acc) -> + mergel(L, Acc); +mergel([T1, [H2 | T2], [H3 | T3] | L], Acc) -> + mergel(L, [merge3_1(T1, [], H2, T2, H3, T3) | Acc]); +mergel([T1, [H2 | T2]], Acc) -> + rmergel([merge2_1(T1, H2, T2, []) | Acc], []); +mergel([L], []) -> + L; +mergel([L], Acc) -> + rmergel([lists:reverse(L, []) | Acc], []); +mergel([], []) -> + []; +mergel([], Acc) -> + rmergel(Acc, []); +mergel([A, [] | L], Acc) -> + mergel([A | L], Acc); +mergel([A, B, [] | L], Acc) -> + mergel([A, B | L], Acc). + +rmergel([[H3 | T3], [H2 | T2], T1 | L], Acc) -> + rmergel(L, [rmerge3_1(T1, [], H2, T2, H3, T3) | Acc]); +rmergel([[H2 | T2], T1], Acc) -> + mergel([rmerge2_1(T1, H2, T2, []) | Acc], []); +rmergel([L], Acc) -> + mergel([lists:reverse(L, []) | Acc], []); +rmergel([], Acc) -> + mergel(Acc, []). + +%% merge3/3 + +%% Take L1 apart. +merge3_1([H1 | T1], M, H2, T2, H3, T3) when H1 =< H2 -> + merge3_12(T1, H1, H2, T2, H3, T3, M); +merge3_1([H1 | T1], M, H2, T2, H3, T3) -> + merge3_21(T1, H1, H2, T2, H3, T3, M); +merge3_1([], M, H2, T2, H3, T3) when H2 =< H3 -> + merge2_1(T2, H3, T3, [H2 | M]); +merge3_1([], M, H2, T2, H3, T3) -> + merge2_2(T2, H3, T3, M, H2). + +%% Take L2 apart. +merge3_2(T1, H1, M, [H2 | T2], H3, T3) when H1 =< H2 -> + merge3_12(T1, H1, H2, T2, H3, T3, M); +merge3_2(T1, H1, M, [H2 | T2], H3, T3) -> + merge3_21(T1, H1, H2, T2, H3, T3, M); +merge3_2(T1, H1, M, [], H3, T3) when H1 =< H3 -> + merge2_1(T1, H3, T3, [H1 | M]); +merge3_2(T1, H1, M, [], H3, T3) -> + merge2_2(T1, H3, T3, M, H1). + +% H1 =< H2. Inlined. +merge3_12(T1, H1, H2, T2, H3, T3, M) when H1 =< H3 -> + merge3_1(T1, [H1 | M], H2, T2, H3, T3); +merge3_12(T1, H1, H2, T2, H3, T3, M) -> + merge3_12_3(T1, H1, H2, T2, [H3 | M], T3). + +% H1 =< H2, take L3 apart. +merge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) when H1 =< H3 -> + merge3_1(T1, [H1 | M], H2, T2, H3, T3); +merge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) -> + merge3_12_3(T1, H1, H2, T2, [H3 | M], T3); +merge3_12_3(T1, H1, H2, T2, M, []) -> + merge2_1(T1, H2, T2, [H1 | M]). + +% H1 > H2. Inlined. +merge3_21(T1, H1, H2, T2, H3, T3, M) when H2 =< H3 -> + merge3_2(T1, H1, [H2 | M], T2, H3, T3); +merge3_21(T1, H1, H2, T2, H3, T3, M) -> + merge3_21_3(T1, H1, H2, T2, [H3 | M], T3). + +% H1 > H2, take L3 apart. +merge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) when H2 =< H3 -> + merge3_2(T1, H1, [H2 | M], T2, H3, T3); +merge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) -> + merge3_21_3(T1, H1, H2, T2, [H3 | M], T3); +merge3_21_3(T1, H1, H2, T2, M, []) -> + merge2_2(T1, H2, T2, M, H1). + +%% rmerge/3 + +%% Take L1 apart. +rmerge3_1([H1 | T1], M, H2, T2, H3, T3) when H1 =< H2 -> + rmerge3_12(T1, H1, H2, T2, H3, T3, M); +rmerge3_1([H1 | T1], M, H2, T2, H3, T3) -> + rmerge3_21(T1, H1, H2, T2, H3, T3, M); +rmerge3_1([], M, H2, T2, H3, T3) when H2 =< H3 -> + rmerge2_2(T2, H3, T3, M, H2); +rmerge3_1([], M, H2, T2, H3, T3) -> + rmerge2_1(T2, H3, T3, [H2 | M]). + +%% Take L2 apart. +rmerge3_2(T1, H1, M, [H2 | T2], H3, T3) when H1 =< H2 -> + rmerge3_12(T1, H1, H2, T2, H3, T3, M); +rmerge3_2(T1, H1, M, [H2 | T2], H3, T3) -> + rmerge3_21(T1, H1, H2, T2, H3, T3, M); +rmerge3_2(T1, H1, M, [], H3, T3) when H1 =< H3 -> + rmerge2_2(T1, H3, T3, M, H1); +rmerge3_2(T1, H1, M, [], H3, T3) -> + rmerge2_1(T1, H3, T3, [H1 | M]). + +% H1 =< H2. Inlined. +rmerge3_12(T1, H1, H2, T2, H3, T3, M) when H2 =< H3 -> + rmerge3_12_3(T1, H1, H2, T2, [H3 | M], T3); +rmerge3_12(T1, H1, H2, T2, H3, T3, M) -> + rmerge3_2(T1, H1, [H2 | M], T2, H3, T3). + +% H1 =< H2, take L3 apart. +rmerge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) when H2 =< H3 -> + rmerge3_12_3(T1, H1, H2, T2, [H3 | M], T3); +rmerge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) -> + rmerge3_2(T1, H1, [H2 | M], T2, H3, T3); +rmerge3_12_3(T1, H1, H2, T2, M, []) -> + rmerge2_2(T1, H2, T2, M, H1). + +% H1 > H2. Inlined. +rmerge3_21(T1, H1, H2, T2, H3, T3, M) when H1 =< H3 -> + rmerge3_21_3(T1, H1, H2, T2, [H3 | M], T3); +rmerge3_21(T1, H1, H2, T2, H3, T3, M) -> + rmerge3_1(T1, [H1 | M], H2, T2, H3, T3). + +% H1 > H2, take L3 apart. +rmerge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) when H1 =< H3 -> + rmerge3_21_3(T1, H1, H2, T2, [H3 | M], T3); +rmerge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) -> + rmerge3_1(T1, [H1 | M], H2, T2, H3, T3); +rmerge3_21_3(T1, H1, H2, T2, M, []) -> + rmerge2_1(T1, H2, T2, [H1 | M]). + +%% merge/2 + +merge2_1([H1 | T1], H2, T2, M) when H1 =< H2 -> + merge2_1(T1, H2, T2, [H1 | M]); +merge2_1([H1 | T1], H2, T2, M) -> + merge2_2(T1, H2, T2, M, H1); +merge2_1([], H2, T2, M) -> + lists:reverse(T2, [H2 | M]). + +merge2_2(T1, HdM, [H2 | T2], M, H1) when H1 =< H2 -> + merge2_1(T1, H2, T2, [H1, HdM | M]); +merge2_2(T1, HdM, [H2 | T2], M, H1) -> + merge2_2(T1, H2, T2, [HdM | M], H1); +merge2_2(T1, HdM, [], M, H1) -> + lists:reverse(T1, [H1, HdM | M]). + +%% rmerge/2 + +rmerge2_1([H1 | T1], H2, T2, M) when H1 =< H2 -> + rmerge2_2(T1, H2, T2, M, H1); +rmerge2_1([H1 | T1], H2, T2, M) -> + rmerge2_1(T1, H2, T2, [H1 | M]); +rmerge2_1([], H2, T2, M) -> + lists:reverse(T2, [H2 | M]). + +rmerge2_2(T1, HdM, [H2 | T2], M, H1) when H1 =< H2 -> + rmerge2_2(T1, H2, T2, [HdM | M], H1); +rmerge2_2(T1, HdM, [H2 | T2], M, H1) -> + rmerge2_1(T1, H2, T2, [H1, HdM | M]); +rmerge2_2(T1, HdM, [], M, H1) -> + lists:reverse(T1, [H1, HdM | M]). + +%% usort/1 + +%% Ascending. +usplit_1(X, Y, [Z | L], R, Rs) when Z > Y -> + usplit_1(Y, Z, L, [X | R], Rs); +usplit_1(X, Y, [Z | L], R, Rs) when Z == Y -> + usplit_1(X, Y, L, R, Rs); +usplit_1(X, Y, [Z | L], R, Rs) when Z > X -> + usplit_1(Z, Y, L, [X | R], Rs); +usplit_1(X, Y, [Z | L], R, Rs) when Z == X -> + usplit_1(X, Y, L, R, Rs); +usplit_1(X, Y, [Z | L], [], Rs) -> + usplit_1(X, Y, L, [Z], Rs); +usplit_1(X, Y, [Z | L], R, Rs) -> + usplit_1_1(X, Y, L, R, Rs, Z); +usplit_1(X, Y, [], R, Rs) -> + rumergel([[Y, X | R] | Rs], [], asc). + +usplit_1_1(X, Y, [Z | L], R, Rs, S) when Z > Y -> + usplit_1_1(Y, Z, L, [X | R], Rs, S); +usplit_1_1(X, Y, [Z | L], R, Rs, S) when Z == Y -> + usplit_1_1(X, Y, L, R, Rs, S); +usplit_1_1(X, Y, [Z | L], R, Rs, S) when Z > X -> + usplit_1_1(Z, Y, L, [X | R], Rs, S); +usplit_1_1(X, Y, [Z | L], R, Rs, S) when Z == X -> + usplit_1_1(X, Y, L, R, Rs, S); +usplit_1_1(X, Y, [Z | L], R, Rs, S) when Z > S -> + usplit_1(S, Z, L, [], [[Y, X | R] | Rs]); +usplit_1_1(X, Y, [Z | L], R, Rs, S) when Z == S -> + usplit_1_1(X, Y, L, R, Rs, S); +usplit_1_1(X, Y, [Z | L], R, Rs, S) -> + usplit_1(Z, S, L, [], [[Y, X | R] | Rs]); +usplit_1_1(X, Y, [], R, Rs, S) -> + rumergel([[S], [Y, X | R] | Rs], [], asc). + +%% Descending. +usplit_2(X, Y, [Z | L], R, Rs) when Z < Y -> + usplit_2(Y, Z, L, [X | R], Rs); +usplit_2(X, Y, [Z | L], R, Rs) when Z == Y -> + usplit_2(X, Y, L, R, Rs); +usplit_2(X, Y, [Z | L], R, Rs) when Z < X -> + usplit_2(Z, Y, L, [X | R], Rs); +usplit_2(X, Y, [Z | L], R, Rs) when Z == X -> + usplit_2(X, Y, L, R, Rs); +usplit_2(X, Y, [Z | L], [], Rs) -> + usplit_2(X, Y, L, [Z], Rs); +usplit_2(X, Y, [Z | L], R, Rs) -> + usplit_2_1(X, Y, L, R, Rs, Z); +usplit_2(X, Y, [], R, Rs) -> + umergel([[Y, X | R] | Rs], [], desc). + +usplit_2_1(X, Y, [Z | L], R, Rs, S) when Z < Y -> + usplit_2_1(Y, Z, L, [X | R], Rs, S); +usplit_2_1(X, Y, [Z | L], R, Rs, S) when Z == Y -> + usplit_2_1(X, Y, L, R, Rs, S); +usplit_2_1(X, Y, [Z | L], R, Rs, S) when Z < X -> + usplit_2_1(Z, Y, L, [X | R], Rs, S); +usplit_2_1(X, Y, [Z | L], R, Rs, S) when Z == X -> + usplit_2_1(X, Y, L, R, Rs, S); +usplit_2_1(X, Y, [Z | L], R, Rs, S) when Z < S -> + usplit_2(S, Z, L, [], [[Y, X | R] | Rs]); +usplit_2_1(X, Y, [Z | L], R, Rs, S) when Z == S -> + usplit_2_1(X, Y, L, R, Rs, S); +usplit_2_1(X, Y, [Z | L], R, Rs, S) -> + usplit_2(Z, S, L, [], [[Y, X | R] | Rs]); +usplit_2_1(X, Y, [], R, Rs, S) -> + umergel([[S], [Y, X | R] | Rs], [], desc). + +%% umerge/1 + +umergel(L) -> + umergel(L, [], asc). + +umergel([[] | L], Acc, O) -> + umergel(L, Acc, O); +umergel([T1, [H2 | T2], [H3 | T3] | L], Acc, asc) -> + umergel(L, [umerge3_1(T1, [H2 | H3], T2, H2, [], T3, H3) | Acc], asc); +umergel([[H3 | T3], [H2 | T2], T1 | L], Acc, desc) -> + umergel(L, [umerge3_1(T1, [H2 | H3], T2, H2, [], T3, H3) | Acc], desc); +umergel([A, [] | L], Acc, O) -> + umergel([A | L], Acc, O); +umergel([A, B, [] | L], Acc, O) -> + umergel([A, B | L], Acc, O); +umergel([[H1 | T1], T2 | L], Acc, asc) -> + umergel(L, [umerge2_2(T1, T2, [], H1) | Acc], asc); +umergel([T2, [H1 | T1] | L], Acc, desc) -> + umergel(L, [umerge2_2(T1, T2, [], H1) | Acc], desc); +umergel([L], [], _O) -> + L; +umergel([L], Acc, O) -> + rumergel([lists:reverse(L, []) | Acc], [], O); +umergel([], [], _O) -> + []; +umergel([], Acc, O) -> + rumergel(Acc, [], O). + +rumergel([[H3 | T3], [H2 | T2], T1 | L], Acc, asc) -> + rumergel(L, [rumerge3_1(T1, T2, H2, [], T3, H3) | Acc], asc); +rumergel([T1, [H2 | T2], [H3 | T3] | L], Acc, desc) -> + rumergel(L, [rumerge3_1(T1, T2, H2, [], T3, H3) | Acc], desc); +rumergel([[H2 | T2], T1 | L], Acc, asc) -> + rumergel(L, [rumerge2_1(T1, T2, [], H2) | Acc], asc); +rumergel([T1, [H2 | T2] | L], Acc, desc) -> + rumergel(L, [rumerge2_1(T1, T2, [], H2) | Acc], desc); +rumergel([L], Acc, O) -> + umergel([lists:reverse(L, []) | Acc], [], O); +rumergel([], Acc, O) -> + umergel(Acc, [], O). + +%% umerge3/3 + +%% Take L1 apart. +umerge3_1([H1 | T1], HdM, T2, H2, M, T3, H3) when H1 =< H2 -> + umerge3_12(T1, H1, T2, H2, M, T3, H3, HdM); +umerge3_1([H1 | T1], HdM, T2, H2, M, T3, H3) when H2 == HdM -> + umerge3_2(T1, H1, T2, H2, M, T3, H3); +umerge3_1([H1 | T1], HdM, T2, H2, M, T3, H3) -> + umerge3_21(T1, H1, T2, H2, M, T3, H3, HdM); +umerge3_1([], HdM, T2, H2, M, T3, H3) when H2 == HdM -> + umerge2_1(T2, T3, M, HdM, H3); +umerge3_1([], _HdM, T2, H2, M, T3, H3) when H2 =< H3 -> + umerge2_1(T2, T3, [H2 | M], H2, H3); +umerge3_1([], HdM, T2, H2, M, T3, H3) when H3 == HdM -> + umerge2_2(T2, T3, M, H2); +umerge3_1([], _HdM, T2, H2, M, T3, H3) -> + umerge2_2(T2, T3, [H3 | M], H2). + +%% Take L2 apart. +umerge3_2(T1, H1, [H2 | T2], HdM, M, T3, H3) when H1 =< H2 -> + umerge3_12(T1, H1, T2, H2, M, T3, H3, HdM); +umerge3_2(T1, H1, [H2 | T2], HdM, M, T3, H3) -> + umerge3_21(T1, H1, T2, H2, M, T3, H3, HdM); +umerge3_2(T1, H1, [], _HdM, M, T3, H3) when H1 =< H3 -> + umerge2_1(T1, T3, [H1 | M], H1, H3); +umerge3_2(T1, H1, [], HdM, M, T3, H3) when H3 == HdM -> + umerge2_2(T1, T3, M, H1); +umerge3_2(T1, H1, [], _HdM, M, T3, H3) -> + umerge2_2(T1, T3, [H3 | M], H1). + +% H1 =< H2. Inlined. +umerge3_12(T1, H1, T2, H2, M, T3, H3, _HdM) when H1 =< H3 -> + umerge3_1(T1, H1, T2, H2, [H1 | M], T3, H3); +umerge3_12(T1, H1, T2, H2, M, T3, H3, HdM) when H3 == HdM -> + umerge3_12_3(T1, H1, T2, H2, M, T3); +umerge3_12(T1, H1, T2, H2, M, T3, H3, _HdM) -> + umerge3_12_3(T1, H1, T2, H2, [H3 | M], T3). + +% H1 =< H2, take L3 apart. +umerge3_12_3(T1, H1, T2, H2, M, [H3 | T3]) when H1 =< H3 -> + umerge3_1(T1, H1, T2, H2, [H1 | M], T3, H3); +umerge3_12_3(T1, H1, T2, H2, M, [H3 | T3]) -> + umerge3_12_3(T1, H1, T2, H2, [H3 | M], T3); +umerge3_12_3(T1, H1, T2, H2, M, []) -> + umerge2_1(T1, T2, [H1 | M], H1, H2). + +% H1 > H2. Inlined. +umerge3_21(T1, H1, T2, H2, M, T3, H3, _HdM) when H2 =< H3 -> + umerge3_2(T1, H1, T2, H2, [H2 | M], T3, H3); +umerge3_21(T1, H1, T2, H2, M, T3, H3, HdM) when H3 == HdM -> + umerge3_21_3(T1, H1, T2, H2, M, T3); +umerge3_21(T1, H1, T2, H2, M, T3, H3, _HdM) -> + umerge3_21_3(T1, H1, T2, H2, [H3 | M], T3). + +% H1 > H2, take L3 apart. +umerge3_21_3(T1, H1, T2, H2, M, [H3 | T3]) when H2 =< H3 -> + umerge3_2(T1, H1, T2, H2, [H2 | M], T3, H3); +umerge3_21_3(T1, H1, T2, H2, M, [H3 | T3]) -> + umerge3_21_3(T1, H1, T2, H2, [H3 | M], T3); +umerge3_21_3(T1, H1, T2, H2, M, []) -> + umerge2_2(T1, T2, [H2 | M], H1). + +%% Take L1 apart. +rumerge3_1([H1 | T1], T2, H2, M, T3, H3) when H1 =< H2 -> + rumerge3_12a(T1, H1, T2, H2, M, T3, H3); +rumerge3_1([H1 | T1], T2, H2, M, T3, H3) when H1 =< H3 -> + rumerge3_21_3(T1, T2, H2, M, T3, H3, H1); +rumerge3_1([H1 | T1], T2, H2, M, T3, H3) -> + rumerge3_1(T1, T2, H2, [H1 | M], T3, H3); +rumerge3_1([], T2, H2, M, T3, H3) when H2 =< H3 -> + rumerge2_2(T2, T3, M, H3, H2); +rumerge3_1([], T2, H2, M, T3, H3) -> + rumerge2_1(T2, T3, [H2 | M], H3). + +% H1 =< H2. Inlined. +rumerge3_12a(T1, H1, T2, H2, M, T3, H3) when H2 =< H3 -> + rumerge3_12_3(T1, T2, H2, M, T3, H3, H1); +rumerge3_12a(T1, H1, T2, H2, M, T3, H3) -> + rumerge3_2(T1, T2, H2, M, T3, H3, H1). + +%% Take L2 apart. H2M > H3. H2M > H2. +rumerge3_2(T1, [H2 | T2], H2M, M, T3, H3, H1) when H1 =< H2 -> + % H2M > H1. + rumerge3_12b(T1, H1, T2, H2, M, T3, H3, H2M); +rumerge3_2(T1, [H2 | T2], H2M, M, T3, H3, H1) when H1 == H2M -> + rumerge3_1(T1, T2, H2, [H1 | M], T3, H3); +rumerge3_2(T1, [H2 | T2], H2M, M, T3, H3, H1) when H1 =< H3 -> + % H2M > H1. + rumerge3_21_3(T1, T2, H2, [H2M | M], T3, H3, H1); +rumerge3_2(T1, [H2 | T2], H2M, M, T3, H3, H1) -> + % H2M > H1. + rumerge3_1(T1, T2, H2, [H1, H2M | M], T3, H3); +rumerge3_2(T1, [], H2M, M, T3, H3, H1) when H1 == H2M -> + rumerge2_1(T1, T3, [H1 | M], H3); +rumerge3_2(T1, [], H2M, M, T3, H3, H1) when H1 =< H3 -> + rumerge2_2(T1, T3, [H2M | M], H3, H1); +rumerge3_2(T1, [], H2M, M, T3, H3, H1) -> + rumerge2_1(T1, T3, [H1, H2M | M], H3). + +% H1 =< H2. Inlined. +rumerge3_12b(T1, H1, T2, H2, M, T3, H3, H2M) when H2 =< H3 -> + rumerge3_12_3(T1, T2, H2, [H2M | M], T3, H3, H1); +rumerge3_12b(T1, H1, T2, H2, M, T3, H3, H2M) -> + rumerge3_2(T1, T2, H2, [H2M | M], T3, H3, H1). + +% H1 =< H2, take L3 apart. +rumerge3_12_3(T1, T2, H2, M, [H3 | T3], H3M, H1) when H2 =< H3 -> + rumerge3_12_3(T1, T2, H2, [H3M | M], T3, H3, H1); +rumerge3_12_3(T1, T2, H2, M, [H3 | T3], H3M, H1) when H2 == H3M -> + rumerge3_2(T1, T2, H2, M, T3, H3, H1); +rumerge3_12_3(T1, T2, H2, M, [H3 | T3], H3M, H1) -> + rumerge3_2(T1, T2, H2, [H3M | M], T3, H3, H1); +rumerge3_12_3(T1, T2, H2, M, [], H3M, H1) when H2 == H3M -> + rumerge2_2(T1, T2, M, H2, H1); +rumerge3_12_3(T1, T2, H2, M, [], H3M, H1) -> + rumerge2_2(T1, T2, [H3M | M], H2, H1). + +% H1 > H2, take L3 apart. +rumerge3_21_3(T1, T2, H2, M, [H3 | T3], H3M, H1) when H1 =< H3 -> + rumerge3_21_3(T1, T2, H2, [H3M | M], T3, H3, H1); +rumerge3_21_3(T1, T2, H2, M, [H3 | T3], H3M, H1) when H1 == H3M -> + rumerge3_1(T1, T2, H2, [H1 | M], T3, H3); +rumerge3_21_3(T1, T2, H2, M, [H3 | T3], H3M, H1) -> + rumerge3_1(T1, T2, H2, [H1, H3M | M], T3, H3); +rumerge3_21_3(T1, T2, H2, M, [], H3M, H1) when H1 == H3M -> + rumerge2_1(T1, T2, [H1 | M], H2); +rumerge3_21_3(T1, T2, H2, M, [], H3M, H1) -> + rumerge2_1(T1, T2, [H1, H3M | M], H2). + +%% umerge/2 + +%% Elements from the first list are kept and prioritized. +umerge2_1([H1 | T1], T2, M, _HdM, H2) when H1 =< H2 -> + umerge2_1(T1, T2, [H1 | M], H1, H2); +umerge2_1([H1 | T1], T2, M, HdM, H2) when H2 == HdM -> + umerge2_2(T1, T2, M, H1); +umerge2_1([H1 | T1], T2, M, _HdM, H2) -> + umerge2_2(T1, T2, [H2 | M], H1); +umerge2_1([], T2, M, HdM, H2) when H2 == HdM -> + lists:reverse(T2, M); +umerge2_1([], T2, M, _HdM, H2) -> + lists:reverse(T2, [H2 | M]). + +umerge2_2(T1, [H2 | T2], M, H1) when H1 =< H2 -> + umerge2_1(T1, T2, [H1 | M], H1, H2); +umerge2_2(T1, [H2 | T2], M, H1) -> + umerge2_2(T1, T2, [H2 | M], H1); +umerge2_2(T1, [], M, H1) -> + lists:reverse(T1, [H1 | M]). + +%% rumerge/2 + +%% Elements from the first list are kept and prioritized. +rumerge2_1([H1 | T1], T2, M, H2) when H1 =< H2 -> + rumerge2_2(T1, T2, M, H2, H1); +rumerge2_1([H1 | T1], T2, M, H2) -> + rumerge2_1(T1, T2, [H1 | M], H2); +rumerge2_1([], T2, M, H2) -> + lists:reverse(T2, [H2 | M]). + +% H1 =< H2M. +rumerge2_2(T1, [H2 | T2], M, H2M, H1) when H1 =< H2 -> + rumerge2_2(T1, T2, [H2M | M], H2, H1); +rumerge2_2(T1, [H2 | T2], M, H2M, H1) when H1 == H2M -> + rumerge2_1(T1, T2, [H1 | M], H2); +rumerge2_2(T1, [H2 | T2], M, H2M, H1) -> + rumerge2_1(T1, T2, [H1, H2M | M], H2); +rumerge2_2(T1, [], M, H2M, H1) when H1 == H2M -> + lists:reverse(T1, [H1 | M]); +rumerge2_2(T1, [], M, H2M, H1) -> + lists:reverse(T1, [H1, H2M | M]). + +%% keysort/2 + +%% Ascending. +keysplit_1(I, X, EX, Y, EY, [Z | L], R, Rs) -> + case element(I, Z) of + EZ when EY =< EZ -> + keysplit_1(I, Y, EY, Z, EZ, L, [X | R], Rs); + EZ when EX =< EZ -> + keysplit_1(I, Z, EZ, Y, EY, L, [X | R], Rs); + _EZ when R == [] -> + keysplit_1(I, X, EX, Y, EY, L, [Z], Rs); + EZ -> + keysplit_1_1(I, X, EX, Y, EY, EZ, R, Rs, Z, L) + end; +keysplit_1(I, X, _EX, Y, _EY, [], R, Rs) -> + rkeymergel(I, [[Y, X | R] | Rs], [], asc). + +keysplit_1_1(I, X, EX, Y, EY, ES, R, Rs, S, [Z | L]) -> + case element(I, Z) of + EZ when EY =< EZ -> + keysplit_1_1(I, Y, EY, Z, EZ, ES, [X | R], Rs, S, L); + EZ when EX =< EZ -> + keysplit_1_1(I, Z, EZ, Y, EY, ES, [X | R], Rs, S, L); + EZ when ES =< EZ -> + keysplit_1(I, S, ES, Z, EZ, L, [], [[Y, X | R] | Rs]); + EZ -> + keysplit_1(I, Z, EZ, S, ES, L, [], [[Y, X | R] | Rs]) + end; +keysplit_1_1(I, X, _EX, Y, _EY, _ES, R, Rs, S, []) -> + rkeymergel(I, [[S], [Y, X | R] | Rs], [], asc). + +%% Descending. +keysplit_2(I, X, EX, Y, EY, [Z | L], R, Rs) -> + case element(I, Z) of + EZ when EY > EZ -> + keysplit_2(I, Y, EY, Z, EZ, L, [X | R], Rs); + EZ when EX > EZ -> + keysplit_2(I, Z, EZ, Y, EY, L, [X | R], Rs); + _EZ when R == [] -> + keysplit_2(I, X, EX, Y, EY, L, [Z], Rs); + EZ -> + keysplit_2_1(I, X, EX, Y, EY, EZ, R, Rs, Z, L) + end; +keysplit_2(I, X, _EX, Y, _EY, [], R, Rs) -> + keymergel(I, [[Y, X | R] | Rs], [], desc). + +keysplit_2_1(I, X, EX, Y, EY, ES, R, Rs, S, [Z | L]) -> + case element(I, Z) of + EZ when EY > EZ -> + keysplit_2_1(I, Y, EY, Z, EZ, ES, [X | R], Rs, S, L); + EZ when EX > EZ -> + keysplit_2_1(I, Z, EZ, Y, EY, ES, [X | R], Rs, S, L); + EZ when ES > EZ -> + keysplit_2(I, S, ES, Z, EZ, L, [], [[Y, X | R] | Rs]); + EZ -> + keysplit_2(I, Z, EZ, S, ES, L, [], [[Y, X | R] | Rs]) + end; +keysplit_2_1(I, X, _EX, Y, _EY, _ES, R, Rs, S, []) -> + keymergel(I, [[S], [Y, X | R] | Rs], [], desc). + +keymergel(I, [T1, [H2 | T2], [H3 | T3] | L], Acc, O) when O == asc -> + M = keymerge3_1(I, T1, [],O,element(I,H2), H2, T2, element(I,H3), H3, T3), + keymergel(I, L, [M | Acc], O); +keymergel(I, [[H3 | T3], [H2 | T2], T1 | L], Acc, O) when O == desc -> + M = keymerge3_1(I, T1, [],O,element(I,H2), H2, T2, element(I,H3), H3, T3), + keymergel(I, L, [M | Acc], O); +keymergel(I, [T1, [H2 | T2] | L], Acc, asc) -> + keymergel(I, L, [keymerge2_1(I, T1, element(I,H2),H2,T2,[]) | Acc], asc); +keymergel(I, [[H2 | T2], T1 | L], Acc, desc) -> + keymergel(I, L, [keymerge2_1(I, T1, element(I,H2),H2,T2,[]) | Acc], desc); +keymergel(_I, [L], [], _O) -> + L; +keymergel(I, [L], Acc, O) -> + rkeymergel(I, [lists:reverse(L, []) | Acc], [], O); +keymergel(I, [], Acc, O) -> + rkeymergel(I, Acc, [], O). + +rkeymergel(I, [[H3 | T3], [H2 | T2], T1 | L], Acc, O) when O == asc -> + M = rkeymerge3_1(I, T1, [],O,element(I,H2), H2, T2, element(I,H3), H3,T3), + rkeymergel(I, L, [M | Acc], O); +rkeymergel(I, [T1, [H2 | T2], [H3 | T3] | L], Acc, O) when O == desc -> + M = rkeymerge3_1(I, T1, [],O,element(I,H2), H2, T2, element(I,H3), H3,T3), + rkeymergel(I, L, [M | Acc], O); +rkeymergel(I, [[H2 | T2], T1 | L], Acc, asc) -> + rkeymergel(I, L, [rkeymerge2_1(I, T1, element(I,H2),H2,T2,[]) | Acc],asc); +rkeymergel(I, [T1, [H2 | T2] | L], Acc, desc) -> + rkeymergel(I, L, [rkeymerge2_1(I,T1, element(I,H2),H2,T2,[]) | Acc],desc); +rkeymergel(I, [L], Acc, O) -> + keymergel(I, [lists:reverse(L, []) | Acc], [], O); +rkeymergel(I, [], Acc, O) -> + keymergel(I, Acc, [], O). + +%%% An extra argument, D, just to avoid some move instructions. + +%% Take L1 apart. +keymerge3_1(I, [H1 | T1], M, D, E2, H2, T2, E3, H3, T3) -> + case element(I, H1) of + E1 when E1 =< E2 -> + keymerge3_12(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, D); + E1 -> + keymerge3_21(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, T2) + end; +keymerge3_1(I, [], M, _D, E2, H2, T2, E3, H3, T3) when E2 =< E3 -> + keymerge2_1(I, T2, E3, H3, T3, [H2 | M]); +keymerge3_1(I, [], M, _D, E2, H2, T2, _E3, H3, T3) -> + keymerge2_2(I, T2, E2, H3, T3, M, H2). + +%% Take L2 apart. +keymerge3_2(I, E1, H1, T1, [H2 | T2], M, D, E3, H3, T3) -> + case element(I, H2) of + E2 when E1 =< E2 -> + keymerge3_12(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, T1); + E2 -> + keymerge3_21(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, D) + end; +keymerge3_2(I, E1, H1, T1, [], M, _D, E3, H3, T3) when E1 =< E3 -> + keymerge2_1(I, T1, E3, H3, T3, [H1 | M]); +keymerge3_2(I, E1, H1, T1, [], M, _D, _E3, H3, T3) -> + keymerge2_2(I, T1, E1, H3, T3, M, H1). + +% E1 =< E2. Inlined. +keymerge3_12(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, D) when E1 =< E3 -> + keymerge3_1(I, T1, [H1 | M], D, E2, H2, T2, E3, H3, T3); +keymerge3_12(I, E1, H1, T1, E2, H2, T2, _E3, H3, T3, M, _D) -> + keymerge3_12_3(I, E1, H1, T1, E2, H2, T2, T3, [H3 | M]). + +% E1 =< E2, take L3 apart. +keymerge3_12_3(I, E1, H1, T1, E2, H2, T2, [H3 | T3], M) -> + case element(I, H3) of + E3 when E1 =< E3 -> + keymerge3_1(I, T1, [H1 | M], T1, E2, H2, T2, E3, H3, T3); + _E3 -> + keymerge3_12_3(I, E1, H1, T1, E2, H2, T2, T3, [H3 | M]) + end; +keymerge3_12_3(I, _E1, H1, T1, E2, H2, T2, [], M) -> + keymerge2_1(I, T1, E2, H2, T2, [H1 | M]). + +% E1 > E2. Inlined. +keymerge3_21(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, D) when E2 =< E3 -> + keymerge3_2(I, E1, H1, T1, T2, [H2 | M], D, E3, H3, T3); +keymerge3_21(I, E1, H1, T1, E2, H2, T2, _E3, H3, T3, M, _D) -> + keymerge3_21_3(I, E1, H1, T1, E2, H2, T2, T3, [H3 | M]). + +% E1 > E2, take L3 apart. +keymerge3_21_3(I, E1, H1, T1, E2, H2, T2, [H3 | T3], M) -> + case element(I, H3) of + E3 when E2 =< E3 -> + keymerge3_2(I, E1, H1, T1, T2, [H2 | M], T2, E3, H3, T3); + _E3 -> + keymerge3_21_3(I, E1, H1, T1, E2, H2, T2, T3, [H3 | M]) + end; +keymerge3_21_3(I, E1, H1, T1, _E2, H2, T2, [], M) -> + keymerge2_2(I, T1, E1, H2, T2, M, H1). + +%% Take L1 apart. +rkeymerge3_1(I, [H1 | T1], M, D, E2, H2, T2, E3, H3, T3) -> + case element(I, H1) of + E1 when E1 =< E2 -> + rkeymerge3_12(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, T2); + E1 -> + rkeymerge3_21(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, D) + end; +rkeymerge3_1(I, [], M, _D, E2, H2, T2, E3, H3, T3) when E2 =< E3 -> + rkeymerge2_2(I, E2, T2, H3, T3, M, H2); +rkeymerge3_1(I, [], M, _D, _E2, H2, T2, E3, H3, T3) -> + rkeymerge2_1(I, T2, E3, H3, T3, [H2 | M]). + +%% Take L2 apart. +rkeymerge3_2(I, E1, H1, T1, [H2 | T2], M, D, E3, H3, T3) -> + case element(I, H2) of + E2 when E1 =< E2 -> + rkeymerge3_12(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, D); + E2 -> + rkeymerge3_21(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, T1) + end; +rkeymerge3_2(I, E1, H1, T1, [], M, _D, E3, H3, T3) when E1 =< E3 -> + rkeymerge2_2(I, E1, T1, H3, T3, M, H1); +rkeymerge3_2(I, _E1, H1, T1, [], M, _D, E3, H3, T3) -> + rkeymerge2_1(I, T1, E3, H3, T3, [H1 | M]). + +% E1 =< E2. Inlined. +rkeymerge3_12(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, _D) when E2 =< E3 -> + rkeymerge3_12_3(I, E1, H1, T1, E2, H2, T2, T3, [H3 | M]); +rkeymerge3_12(I, E1, H1, T1, _E2, H2, T2, E3, H3, T3, M, D) -> + rkeymerge3_2(I, E1, H1, T1, T2, [H2 | M], D, E3, H3, T3). + +% E1 =< E2, take L3 apart. +rkeymerge3_12_3(I, E1, H1, T1, E2, H2, T2, [H3 | T3], M) -> + case element(I, H3) of + E3 when E2 =< E3 -> + rkeymerge3_12_3(I, E1, H1, T1, E2, H2, T2, T3, [H3 | M]); + E3 -> + rkeymerge3_2(I, E1, H1, T1, T2, [H2 | M], T2, E3, H3, T3) + end; +rkeymerge3_12_3(I, E1, H1, T1, _E2, H2, T2, [], M) -> + rkeymerge2_2(I, E1, T1, H2, T2, M, H1). + +% E1 > E2. Inlined. +rkeymerge3_21(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, _D) when E1 =< E3 -> + rkeymerge3_21_3(I, E1, H1, T1, E2, H2, T2, T3, [H3 | M]); +rkeymerge3_21(I, _E1, H1, T1, E2, H2, T2, E3, H3, T3, M, D) -> + rkeymerge3_1(I, T1, [H1 | M], D, E2, H2, T2, E3, H3, T3). + +% E1 > E2, take L3 apart. +rkeymerge3_21_3(I, E1, H1, T1, E2, H2, T2, [H3 | T3], M) -> + case element(I, H3) of + E3 when E1 =< E3 -> + rkeymerge3_21_3(I, E1, H1, T1, E2, H2, T2, T3, [H3 | M]); + E3 -> + rkeymerge3_1(I, T1, [H1 | M], T1, E2, H2, T2, E3, H3, T3) + end; +rkeymerge3_21_3(I, _E1, H1, T1, E2, H2, T2, [], M) -> + rkeymerge2_1(I, T1, E2, H2, T2, [H1 | M]). + +%% keymerge/3 + +%% Elements from the first list are prioritized. +keymerge2_1(I, [H1 | T1], E2, H2, T2, M) -> + case element(I, H1) of + E1 when E1 =< E2 -> + keymerge2_1(I, T1, E2, H2, T2, [H1 | M]); + E1 -> + keymerge2_2(I, T1, E1, H2, T2, M, H1) + end; +keymerge2_1(_I, [], _E2, H2, T2, M) -> + lists:reverse(T2, [H2 | M]). + +keymerge2_2(I, T1, E1, HdM, [H2 | T2], M, H1) -> + case element(I, H2) of + E2 when E1 =< E2 -> + keymerge2_1(I, T1, E2, H2, T2, [H1, HdM | M]); + _E2 -> + keymerge2_2(I, T1, E1, H2, T2, [HdM | M], H1) + end; +keymerge2_2(_I, T1, _E1, HdM, [], M, H1) -> + lists:reverse(T1, [H1, HdM | M]). + +%% rkeymerge/3 + +rkeymerge2_1(I, [H1 | T1], E2, H2, T2, M) -> + case element(I, H1) of + E1 when E1 =< E2 -> + rkeymerge2_2(I, E1, T1, H2, T2, M, H1); + _E1 -> + rkeymerge2_1(I, T1, E2, H2, T2, [H1 | M]) + end; +rkeymerge2_1(_I, [], _E2, H2, T2, M) -> + lists:reverse(T2, [H2 | M]). + +rkeymerge2_2(I, E1, T1, HdM, [H2 | T2], M, H1) -> + case element(I, H2) of + E2 when E1 =< E2 -> + rkeymerge2_2(I, E1, T1, H2, T2, [HdM | M], H1); + E2 -> + rkeymerge2_1(I, T1, E2, H2, T2, [H1, HdM | M]) + end; +rkeymerge2_2(_I, _E1, T1, HdM, [], M, H1) -> + lists:reverse(T1, [H1, HdM | M]). + +%% ukeysort/2 + +%% Ascending. +ukeysplit_1(I, X, EX, Y, EY, [Z | L], R, Rs) -> + case element(I, Z) of + EZ when EY == EZ -> + ukeysplit_1(I, X, EX, Y, EY, L, R, Rs); + EZ when EY < EZ -> + ukeysplit_1(I, Y, EY, Z, EZ, L, [X | R], Rs); + EZ when EX == EZ -> + ukeysplit_1(I, X, EX, Y, EY, L, R, Rs); + EZ when EX < EZ -> + ukeysplit_1(I, Z, EZ, Y, EY, L, [X | R], Rs); + _EZ when R == [] -> + ukeysplit_1(I, X, EX, Y, EY, L, [Z], Rs); + EZ -> + ukeysplit_1_1(I, X, EX, Y, EY, L, R, Rs, Z, EZ) + end; +ukeysplit_1(I, X, _EX, Y, _EY, [], R, Rs) -> + rukeymergel(I, [[Y, X | R] | Rs], []). + +ukeysplit_1_1(I, X, EX, Y, EY, [Z | L], R, Rs, S, ES) -> + case element(I, Z) of + EZ when EY == EZ -> + ukeysplit_1_1(I, X, EX, Y, EY, L, R, Rs, S, ES); + EZ when EY < EZ -> + ukeysplit_1_1(I, Y, EY, Z, EZ, L, [X | R], Rs, S, ES); + EZ when EX == EZ -> + ukeysplit_1_1(I, X, EX, Y, EY, L, R, Rs, S, ES); + EZ when EX < EZ -> + ukeysplit_1_1(I, Z, EZ, Y, EY, L, [X | R], Rs, S, ES); + EZ when ES == EZ -> + ukeysplit_1_1(I, X, EX, Y, EY, L, R, Rs, S, ES); + EZ when ES < EZ -> + ukeysplit_1(I, S, ES, Z, EZ, L, [], [[Y, X | R] | Rs]); + EZ -> + ukeysplit_1(I, Z, EZ, S, ES, L, [], [[Y, X | R] | Rs]) + end; +ukeysplit_1_1(I, X, _EX, Y, _EY, [], R, Rs, S, _ES) -> + rukeymergel(I, [[S], [Y, X | R] | Rs], []). + +%% Descending. +ukeysplit_2(I, Y, EY, [Z | L], R) -> + case element(I, Z) of + EZ when EY == EZ -> + ukeysplit_2(I, Y, EY, L, R); + EZ when EY < EZ -> + ukeysplit_1(I, Y, EY, Z, EZ, L, [], [lists:reverse(R, [])]); + EZ -> + ukeysplit_2(I, Z, EZ, L, [Y | R]) + end; +ukeysplit_2(_I, Y, _EY, [], R) -> + [Y | R]. + +ukeymergel(I, [T1, [H2 | T2], [H3 | T3] | L], Acc) -> + %% The fourth argument, [H2 | H3] (=HdM), may confuse type + %% checkers. Its purpose is to ensure that the tests H2 == HdM + %% and H3 == HdM in ukeymerge3_1 will always fail as long as M == []. + M = ukeymerge3_1(I, T1, Acc, [H2 | H3], element(I, H2), H2, T2, [], + element(I, H3), H3, T3), + ukeymergel(I, L, [M | Acc]); +ukeymergel(I, [[H1 | T1], T2 | L], Acc) -> + ukeymergel(I, L, [ukeymerge2_2(I, T1, element(I, H1), H1, T2, []) | Acc]); +ukeymergel(_I, [L], []) -> + L; +ukeymergel(I, [L], Acc) -> + rukeymergel(I, [lists:reverse(L, []) | Acc], []); +ukeymergel(I, [], Acc) -> + rukeymergel(I, Acc, []). + +rukeymergel(I, [[H3 | T3], [H2 | T2], T1 | L], Acc) -> + M = rukeymerge3_1(I, T1, Acc, [], element(I, H2), H2, T2, [], + element(I, H3), H3, T3), + rukeymergel(I, L, [M | Acc]); +rukeymergel(I, [[H2 | T2], T1 | L], Acc) -> + rukeymergel(I, L, [rukeymerge2_1(I, T1, element(I,H2), T2, [], H2)|Acc]); +rukeymergel(I, [L], Acc) -> + ukeymergel(I, [lists:reverse(L, []) | Acc], []); +rukeymergel(I, [], Acc) -> + ukeymergel(I, Acc, []). + +%%% An extra argument, D, just to avoid some move instructions. + +%% Take L1 apart. +ukeymerge3_1(I, [H1 | T1], D, HdM, E2, H2, T2, M, E3, H3, T3) -> + case element(I, H1) of + E1 when E1 =< E2 -> + ukeymerge3_12(I, E1, T1, H1, E2, H2, T2, E3, H3, T3, M, HdM, D); + E1 when E2 == HdM -> + ukeymerge3_2(I, E1, T1, H1, T2, HdM, T2, M, E3, H3, T3); + E1 -> + ukeymerge3_21(I, E1, T1, H1, E2, H2, T2, E3, H3, T3, M, HdM, T2) + end; +ukeymerge3_1(I, [], _D, HdM, E2, _H2, T2, M, E3, H3, T3) when E2 == HdM -> + ukeymerge2_1(I, T2, E3, HdM, T3, M, H3); +ukeymerge3_1(I, [], _D, _HdM, E2, H2, T2, M, E3, H3, T3) when E2 =< E3 -> + ukeymerge2_1(I, T2, E3, E2, T3, [H2 | M], H3); +ukeymerge3_1(I, [], _D, HdM, E2, H2, T2, M, E3, _H3, T3) when E3 == HdM -> + ukeymerge2_2(I, T2, E2, H2, T3, M); +ukeymerge3_1(I, [], _D, _HdM, E2, H2, T2, M, _E3, H3, T3) -> + ukeymerge2_2(I, T2, E2, H2, T3, [H3 | M]). + +%% Take L2 apart. +ukeymerge3_2(I, E1, T1, H1, [H2 | T2], HdM, D, M, E3, H3, T3) -> + case element(I, H2) of + E2 when E1 =< E2 -> + ukeymerge3_12(I, E1, T1, H1, E2, H2, T2, E3, H3, T3, M, HdM, T1); + E2 -> + ukeymerge3_21(I, E1, T1, H1, E2, H2, T2, E3, H3, T3, M, HdM, D) + end; +ukeymerge3_2(I, E1, T1, H1, [], _HdM, _D, M, E3, H3, T3) when E1 =< E3 -> + ukeymerge2_1(I, T1, E3, E1, T3, [H1 | M], H3); +ukeymerge3_2(I, E1, T1, H1, [], HdM, _D, M, E3, _H3, T3) when E3 == HdM -> + ukeymerge2_2(I, T1, E1, H1, T3, M); +ukeymerge3_2(I, E1, T1, H1, [], _HdM, _D, M, _E3, H3, T3) -> + ukeymerge2_2(I, T1, E1, H1, T3, [H3 | M]). + +% E1 =< E2. Inlined. +ukeymerge3_12(I, E1, T1, H1, E2, H2, T2, E3, H3, T3, M, _HdM, D) + when E1 =< E3 -> + ukeymerge3_1(I, T1, D, E1, E2, H2, T2, [H1 | M], E3, H3, T3); +ukeymerge3_12(I, E1, T1, H1, E2, H2, T2, E3, _H3, T3, M, HdM, _D) + when E3 == HdM -> + ukeymerge3_12_3(I, E1, T1, H1, E2, H2, T2, M, T3); +ukeymerge3_12(I, E1, T1, H1, E2, H2, T2, _E3, H3, T3, M, _HdM, _D) -> + ukeymerge3_12_3(I, E1, T1, H1, E2, H2, T2, [H3 | M], T3). + +% E1 =< E2, take L3 apart. +ukeymerge3_12_3(I, E1, T1, H1, E2, H2, T2, M, [H3 | T3]) -> + case element(I, H3) of + E3 when E1 =< E3 -> + ukeymerge3_1(I, T1, T1, E1, E2, H2, T2, [H1 | M], E3, H3, T3); + _E3 -> + ukeymerge3_12_3(I, E1, T1, H1, E2, H2, T2, [H3 | M], T3) + end; +ukeymerge3_12_3(I, E1, T1, H1, E2, H2, T2, M, []) -> + ukeymerge2_1(I, T1, E2, E1, T2, [H1 | M], H2). + +% E1 > E2. Inlined. +ukeymerge3_21(I, E1, T1, H1, E2, H2, T2, E3, H3, T3, M, _HdM, D) + when E2 =< E3 -> + ukeymerge3_2(I, E1, T1, H1, T2, E2, D, [H2 | M], E3, H3, T3); +ukeymerge3_21(I, E1, T1, H1, E2, H2, T2, E3, _H3, T3, M, HdM, _D) + when E3 == HdM -> + ukeymerge3_21_3(I, E1, T1, H1, E2, H2, T2, M, T3); +ukeymerge3_21(I, E1, T1, H1, E2, H2, T2, _E3, H3, T3, M, _HdM, _D) -> + ukeymerge3_21_3(I, E1, T1, H1, E2, H2, T2, [H3 | M], T3). + +% E1 > E2, take L3 apart. +ukeymerge3_21_3(I, E1, T1, H1, E2, H2, T2, M, [H3 | T3]) -> + case element(I, H3) of + E3 when E2 =< E3 -> + ukeymerge3_2(I, E1, T1, H1, T2, E2, T2, [H2 | M], E3, H3, T3); + _E3 -> + ukeymerge3_21_3(I, E1, T1, H1, E2, H2, T2, [H3 | M], T3) + end; +ukeymerge3_21_3(I, E1, T1, H1, _E2, H2, T2, M, []) -> + ukeymerge2_2(I, T1, E1, H1, T2, [H2 | M]). + +%%% Two extra arguments, D1 and D2, just to avoid some move instructions. + +%% Take L1 apart. +rukeymerge3_1(I, [H1 | T1], D1, D2, E2, H2, T2, M, E3, H3, T3) -> + case element(I, H1) of + E1 when E1 =< E2 -> + rukeymerge3_12a(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M); + E1 -> + rukeymerge3_21a(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, D1, D2) + end; +rukeymerge3_1(I, [], _D1, _D2, E2, H2, T2, M, E3, H3, T3) when E2 =< E3 -> + rukeymerge2_2(I, T2, E2, T3, M, E3, H3, H2); +rukeymerge3_1(I, [], _D1, _D2, _E2, H2, T2, M, E3, H3, T3) -> + rukeymerge2_1(I, T2, E3, T3, [H2 | M], H3). + +% E1 =< E2. Inlined. +rukeymerge3_12a(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M) when E2 =< E3 -> + rukeymerge3_12_3(I, E1, H1, T1, E2, H2, T2, M, E3, H3, T3); +rukeymerge3_12a(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M) -> + rukeymerge3_2(I, E1, H1, T1, T2, H2, E2, M, E3, H3, T3). + +% E1 > E2. Inlined +rukeymerge3_21a(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, _D1, _D2) + when E1 =< E3 -> + rukeymerge3_21_3(I, E1, H1, T1, E2, H2, T2, M, E3, H3, T3); +rukeymerge3_21a(I, _E1, H1, T1, E2, H2, T2, E3, H3, T3, M, D1, D2) -> + rukeymerge3_1(I, T1, D1, D2, E2, H2, T2, [H1 | M], E3, H3, T3). + +%% Take L2 apart. E2M > E3. E2M > E2. +rukeymerge3_2(I, E1, H1, T1, [H2 | T2], H2M, E2M, M, E3, H3, T3) -> + case element(I, H2) of + E2 when E1 =< E2 -> + % E2M > E1. + rukeymerge3_12b(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, H2M); + E2 when E1 == E2M -> + rukeymerge3_1(I, T1, H1, T1, E2, H2, T2, [H1 | M], E3, H3, T3); + E2 -> + % E2M > E1. + rukeymerge3_21b(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, H2M) + end; +rukeymerge3_2(I, E1, H1, T1, [], _H2M, E2M, M, E3, H3, T3) when E1 == E2M -> + rukeymerge2_1(I, T1, E3, T3, [H1 | M], H3); +rukeymerge3_2(I, E1, H1, T1, [], H2M, _E2M, M, E3, H3, T3) when E1 =< E3 -> + rukeymerge2_2(I, T1, E1, T3, [H2M | M], E3, H3, H1); +rukeymerge3_2(I, _E1, H1, T1, [], H2M, _E2M, M, E3, H3, T3) -> + rukeymerge2_1(I, T1, E3, T3, [H1, H2M | M], H3). + +% E1 =< E2. Inlined. +rukeymerge3_12b(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, H2M) + when E2 =< E3 -> + rukeymerge3_12_3(I, E1, H1, T1, E2, H2, T2, [H2M | M], E3, H3, T3); +rukeymerge3_12b(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M, H2M) -> + rukeymerge3_2(I, E1, H1, T1, T2, H2, E2, [H2M | M], E3, H3, T3). + +% E1 > E2. Inlined +rukeymerge3_21b(I, E1, H1, T1, E2, H2, T2, E3, H3, T3, M,H2M) when E1 =< E3 -> + rukeymerge3_21_3(I, E1, H1, T1, E2, H2, T2, [H2M | M], E3, H3, T3); +rukeymerge3_21b(I, _E1, H1, T1, E2, H2, T2, E3, H3, T3, M, H2M) -> + rukeymerge3_1(I, T1, H1, T1, E2, H2, T2, [H1, H2M | M], E3, H3, T3). + +% E1 =< E2, take L3 apart. +rukeymerge3_12_3(I, E1, H1, T1, E2, H2, T2, M, E3M, H3M, [H3 | T3]) -> + case element(I, H3) of + E3 when E2 =< E3 -> + rukeymerge3_12_3(I, E1, H1, T1, E2, H2, T2, [H3M | M], E3, H3, T3); + E3 when E2 == E3M -> + rukeymerge3_2(I, E1, H1, T1, T2, H2, E2, M, E3, H3, T3); + E3 -> + rukeymerge3_2(I, E1, H1, T1, T2, H2, E2, [H3M | M], E3, H3, T3) + end; +rukeymerge3_12_3(I, E1, H1, T1, E2, H2, T2, M, E3M, _H3M, []) when E2 == E3M -> + rukeymerge2_2(I, T1, E1, T2, M, E2, H2, H1); +rukeymerge3_12_3(I, E1, H1, T1, E2, H2, T2, M, _E3M, H3M, []) -> + rukeymerge2_2(I, T1, E1, T2, [H3M | M], E2, H2, H1). + +% E1 > E2, take L3 apart. +rukeymerge3_21_3(I, E1, H1, T1, E2, H2, T2, M, E3M, H3M, [H3 | T3]) -> + case element(I, H3) of + E3 when E1 =< E3 -> + rukeymerge3_21_3(I, E1, H1, T1, E2, H2, T2, [H3M | M], E3, H3, T3); + E3 when E1 == E3M -> + rukeymerge3_1(I, T1, H1, T1, E2, H2, T2, [H1 | M], E3, H3, T3); + E3 -> + rukeymerge3_1(I, T1, H1, T1, E2, H2, T2, [H1,H3M | M], E3, H3, T3) + end; +rukeymerge3_21_3(I, E1, H1, T1, E2, H2, T2, M, E3M, _H3M, []) when E1 == E3M -> + rukeymerge2_1(I, T1, E2, T2, [H1 | M], H2); +rukeymerge3_21_3(I, _E1, H1, T1, E2, H2, T2, M, _E3M, H3M, []) -> + rukeymerge2_1(I, T1, E2, T2, [H1, H3M | M], H2). + +%% ukeymerge/3 + +%% Elements from the first list are kept and prioritized. +ukeymerge2_1(I, [H1 | T1], E2, HdM, T2, M, H2) -> + case element(I, H1) of + E1 when E1 =< E2 -> + ukeymerge2_1(I, T1, E2, E1, T2, [H1 | M], H2); + E1 when E2 == HdM -> + ukeymerge2_2(I, T1, E1, H1, T2, M); + E1 -> + ukeymerge2_2(I, T1, E1, H1, T2, [H2 | M]) + end; +ukeymerge2_1(_I, [], E2, HdM, T2, M, _H2) when E2 == HdM -> + lists:reverse(T2, M); +ukeymerge2_1(_I, [], _E2, _HdM, T2, M, H2) -> + lists:reverse(T2, [H2 | M]). + +ukeymerge2_2(I, T1, E1, H1, [H2 | T2], M) -> + case element(I, H2) of + E2 when E1 =< E2 -> + ukeymerge2_1(I, T1, E2, E1, T2, [H1 | M], H2); + _E2 -> + ukeymerge2_2(I, T1, E1, H1, T2, [H2 | M]) + end; +ukeymerge2_2(_I, T1, _E1, H1, [], M) -> + lists:reverse(T1, [H1 | M]). + +%% rukeymerge/3 + +rukeymerge2_1(I, [H1 | T1], E2, T2, M, H2) -> + case element(I, H1) of + E1 when E1 =< E2 -> + rukeymerge2_2(I, T1, E1, T2, M, E2, H2, H1); + _E1 -> + rukeymerge2_1(I, T1, E2, T2, [H1 | M], H2) + end; +rukeymerge2_1(_I, [], _E2, T2, M, H2) -> + lists:reverse(T2, [H2 | M]). + +rukeymerge2_2(I, T1, E1, [H2 | T2], M, E2M, H2M, H1) -> + case element(I, H2) of + E2 when E1 =< E2 -> + rukeymerge2_2(I, T1, E1, T2, [H2M | M], E2, H2, H1); + E2 when E1 == E2M -> + rukeymerge2_1(I, T1, E2, T2, [H1 | M], H2); + E2 -> + rukeymerge2_1(I, T1, E2, T2, [H1, H2M | M], H2) + end; +rukeymerge2_2(_I, T1, E1, [], M, E2M, _H2M, H1) when E1 == E2M -> + lists:reverse(T1, [H1 | M]); +rukeymerge2_2(_I, T1, _E1, [], M, _E2M, H2M, H1) -> + lists:reverse(T1, [H1, H2M | M]). + +%% sort/2 + +%% Ascending. +fsplit_1(Y, X, Fun, [Z | L], R, Rs) -> + case Fun(Y, Z) of + true -> + fsplit_1(Z, Y, Fun, L, [X | R], Rs); + false -> + case Fun(X, Z) of + true -> + fsplit_1(Y, Z, Fun, L, [X | R], Rs); + false when R == [] -> + fsplit_1(Y, X, Fun, L, [Z], Rs); + false -> + fsplit_1_1(Y, X, Fun, L, R, Rs, Z) + end + end; +fsplit_1(Y, X, Fun, [], R, Rs) -> + rfmergel([[Y, X | R] | Rs], [], Fun, asc). + +fsplit_1_1(Y, X, Fun, [Z | L], R, Rs, S) -> + case Fun(Y, Z) of + true -> + fsplit_1_1(Z, Y, Fun, L, [X | R], Rs, S); + false -> + case Fun(X, Z) of + true -> + fsplit_1_1(Y, Z, Fun, L, [X | R], Rs, S); + false -> + case Fun(S, Z) of + true -> + fsplit_1(Z, S, Fun, L, [], [[Y, X | R] | Rs]); + false -> + fsplit_1(S, Z, Fun, L, [], [[Y, X | R] | Rs]) + end + end + end; +fsplit_1_1(Y, X, Fun, [], R, Rs, S) -> + rfmergel([[S], [Y, X | R] | Rs], [], Fun, asc). + +%% Descending. +fsplit_2(Y, X, Fun, [Z | L], R, Rs) -> + case Fun(Y, Z) of + false -> + fsplit_2(Z, Y, Fun, L, [X | R], Rs); + true -> + case Fun(X, Z) of + false -> + fsplit_2(Y, Z, Fun, L, [X | R], Rs); + true when R == [] -> + fsplit_2(Y, X, Fun, L, [Z], Rs); + true -> + fsplit_2_1(Y, X, Fun, L, R, Rs, Z) + end + end; +fsplit_2(Y, X, Fun, [], R, Rs) -> + fmergel([[Y, X | R] | Rs], [], Fun, desc). + +fsplit_2_1(Y, X, Fun, [Z | L], R, Rs, S) -> + case Fun(Y, Z) of + false -> + fsplit_2_1(Z, Y, Fun, L, [X | R], Rs, S); + true -> + case Fun(X, Z) of + false -> + fsplit_2_1(Y, Z, Fun, L, [X | R], Rs, S); + true -> + case Fun(S, Z) of + false -> + fsplit_2(Z, S, Fun, L, [], [[Y, X | R] | Rs]); + true -> + fsplit_2(S, Z, Fun, L, [], [[Y, X | R] | Rs]) + end + end + end; +fsplit_2_1(Y, X, Fun, [], R, Rs, S) -> + fmergel([[S], [Y, X | R] | Rs], [], Fun, desc). + +fmergel([T1, [H2 | T2] | L], Acc, Fun, asc) -> + fmergel(L, [fmerge2_1(T1, H2, Fun, T2, []) | Acc], Fun, asc); +fmergel([[H2 | T2], T1 | L], Acc, Fun, desc) -> + fmergel(L, [fmerge2_1(T1, H2, Fun, T2, []) | Acc], Fun, desc); +fmergel([L], [], _Fun, _O) -> + L; +fmergel([L], Acc, Fun, O) -> + rfmergel([lists:reverse(L, []) | Acc], [], Fun, O); +fmergel([], Acc, Fun, O) -> + rfmergel(Acc, [], Fun, O). + +rfmergel([[H2 | T2], T1 | L], Acc, Fun, asc) -> + rfmergel(L, [rfmerge2_1(T1, H2, Fun, T2, []) | Acc], Fun, asc); +rfmergel([T1, [H2 | T2] | L], Acc, Fun, desc) -> + rfmergel(L, [rfmerge2_1(T1, H2, Fun, T2, []) | Acc], Fun, desc); +rfmergel([L], Acc, Fun, O) -> + fmergel([lists:reverse(L, []) | Acc], [], Fun, O); +rfmergel([], Acc, Fun, O) -> + fmergel(Acc, [], Fun, O). + +%% merge/3 + +%% Elements from the first list are prioritized. +fmerge2_1([H1 | T1], H2, Fun, T2, M) -> + case Fun(H1, H2) of + true -> + fmerge2_1(T1, H2, Fun, T2, [H1 | M]); + false -> + fmerge2_2(H1, T1, Fun, T2, [H2 | M]) + end; +fmerge2_1([], H2, _Fun, T2, M) -> + lists:reverse(T2, [H2 | M]). + +fmerge2_2(H1, T1, Fun, [H2 | T2], M) -> + case Fun(H1, H2) of + true -> + fmerge2_1(T1, H2, Fun, T2, [H1 | M]); + false -> + fmerge2_2(H1, T1, Fun, T2, [H2 | M]) + end; +fmerge2_2(H1, T1, _Fun, [], M) -> + lists:reverse(T1, [H1 | M]). + +%% rmerge/3 + +rfmerge2_1([H1 | T1], H2, Fun, T2, M) -> + case Fun(H1, H2) of + true -> + rfmerge2_2(H1, T1, Fun, T2, [H2 | M]); + false -> + rfmerge2_1(T1, H2, Fun, T2, [H1 | M]) + end; +rfmerge2_1([], H2, _Fun, T2, M) -> + lists:reverse(T2, [H2 | M]). + +rfmerge2_2(H1, T1, Fun, [H2 | T2], M) -> + case Fun(H1, H2) of + true -> + rfmerge2_2(H1, T1, Fun, T2, [H2 | M]); + false -> + rfmerge2_1(T1, H2, Fun, T2, [H1 | M]) + end; +rfmerge2_2(H1, T1, _Fun, [], M) -> + lists:reverse(T1, [H1 | M]). + +%% usort/2 + +%% Ascending. X < Y +ufsplit_1(Y, X, Fun, [Z | L], R, Rs) -> + case Fun(Y, Z) of + true -> + case Fun(Z, Y) of + true -> % Z equal to Y + ufsplit_1(Y, X, Fun, L, R, Rs); + false -> + ufsplit_1(Z, Y, Fun, L, [X | R], Rs) + end; + false -> + case Fun(X, Z) of + true -> + case Fun(Z, X) of + true -> % Z equal to X + ufsplit_1(Y, X, Fun, L, R, Rs); + false -> + ufsplit_1(Y, Z, Fun, L, [X | R], Rs) + end; + false when R == [] -> + ufsplit_1(Y, X, Fun, L, [Z], Rs); + false -> + ufsplit_1_1(Y, X, Fun, L, R, Rs, Z) + end + end; +ufsplit_1(Y, X, Fun, [], R, Rs) -> + rufmergel([[Y, X | R] | Rs], [], Fun). + +%% X < Y +ufsplit_1_1(Y, X, Fun, [Z | L], R, Rs, S) -> + case Fun(Y, Z) of + true -> + case Fun(Z, Y) of + true -> % Z equal to Y + ufsplit_1_1(Y, X, Fun, L, R, Rs, S); + false -> + ufsplit_1_1(Z, Y, Fun, L, [X | R], Rs, S) + end; + false -> + case Fun(X, Z) of + true -> + case Fun(Z, X) of + true -> % Z equal to X + ufsplit_1_1(Y, X, Fun, L, R, Rs, S); + false -> + ufsplit_1_1(Y, Z, Fun, L, [X | R], Rs, S) + end; + false -> + case Fun(S, Z) of + true -> + case Fun(Z, S) of + true -> % Z equal to S + ufsplit_1_1(Y, X, Fun, L, R, Rs, S); + false -> + ufsplit_1(Z, S, Fun, L, [], [[Y, X | R] | Rs]) + end; + false -> + ufsplit_1(S, Z, Fun, L, [], [[Y, X | R] | Rs]) + end + end + end; +ufsplit_1_1(Y, X, Fun, [], R, Rs, S) -> + rufmergel([[S], [Y, X | R] | Rs], [], Fun). + +%% Descending. +ufsplit_2(Y, [Z | L], Fun, R) -> + case Fun(Y, Z) of + true -> + case Fun(Z, Y) of + true -> % Z equal to Y + ufsplit_2(Y, L, Fun, R); + false -> + ufsplit_1(Z, Y, Fun, L, [], [lists:reverse(R, [])]) + end; + false -> + ufsplit_2(Z, L, Fun, [Y | R]) + end; +ufsplit_2(Y, [], _Fun, R) -> + [Y | R]. + +ufmergel([[H1 | T1], T2 | L], Acc, Fun) -> + ufmergel(L, [ufmerge2_2(H1, T1, Fun, T2, []) | Acc], Fun); +ufmergel([L], [], _Fun) -> + L; +ufmergel([L], Acc, Fun) -> + rufmergel([lists:reverse(L, []) | Acc], [], Fun); +ufmergel([], Acc, Fun) -> + rufmergel(Acc, [], Fun). + +rufmergel([[H2 | T2], T1 | L], Acc, Fun) -> + rufmergel(L, [rufmerge2_1(T1, H2, Fun, T2, []) | Acc], Fun); +rufmergel([L], Acc, Fun) -> + ufmergel([lists:reverse(L, []) | Acc], [], Fun); +rufmergel([], Acc, Fun) -> + ufmergel(Acc, [], Fun). + +%% umerge/3 + +%% Elements from the first list are kept and prioritized. +%% HdM before H2. +ufmerge2_1([H1 | T1], H2, Fun, T2, M, HdM) -> + case Fun(H1, H2) of + true -> + ufmerge2_1(T1, H2, Fun, T2, [H1 | M], H1); + false -> + case Fun(H2, HdM) of + true -> % HdM equal to H2 + ufmerge2_2(H1, T1, Fun, T2, M); + false -> + ufmerge2_2(H1, T1, Fun, T2, [H2 | M]) + end + end; +ufmerge2_1([], H2, Fun, T2, M, HdM) -> + case Fun(H2, HdM) of + true -> % HdM equal to H2 + lists:reverse(T2, M); + false -> + lists:reverse(T2, [H2 | M]) + end. + +ufmerge2_2(H1, T1, Fun, [H2 | T2], M) -> + case Fun(H1, H2) of + true -> + ufmerge2_1(T1, H2, Fun, T2, [H1 | M], H1); + false -> + ufmerge2_2(H1, T1, Fun, T2, [H2 | M]) + end; +ufmerge2_2(H1, T1, _Fun, [], M) -> + lists:reverse(T1, [H1 | M]). + +%% rumerge/3 + +rufmerge2_1([H1 | T1], H2, Fun, T2, M) -> + case Fun(H1, H2) of + true -> + rufmerge2_2(H1, T1, Fun, T2, M, H2); + false -> + rufmerge2_1(T1, H2, Fun, T2, [H1 | M]) + end; +rufmerge2_1([], H2, _Fun, T2, M) -> + lists:reverse(T2, [H2 | M]). + +%% H1 before H2M +rufmerge2_2(H1, T1, Fun, [H2 | T2], M, H2M) -> + case Fun(H1, H2) of + true -> + rufmerge2_2(H1, T1, Fun, T2, [H2M | M], H2); + false -> + case Fun(H2M, H1) of + true -> % H2M equal to H1 + rufmerge2_1(T1, H2, Fun, T2, [H1 | M]); + false -> + rufmerge2_1(T1, H2, Fun, T2, [H1, H2M | M]) + end + end; +rufmerge2_2(H1, T1, Fun, [], M, H2M) -> + case Fun(H2M, H1) of + true -> + lists:reverse(T1, [H1 | M]); + false -> + lists:reverse(T1, [H1, H2M | M]) + end. + diff --git a/lib/stdlib/src/log_mf_h.erl b/lib/stdlib/src/log_mf_h.erl new file mode 100644 index 0000000000..2729f27e51 --- /dev/null +++ b/lib/stdlib/src/log_mf_h.erl @@ -0,0 +1,202 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(log_mf_h). + +-behaviour(gen_event). + +-export([init/3, init/4]). + +-export([init/1, handle_event/2, handle_info/2, terminate/2]). +-export([handle_call/2, code_change/3]). + +%%----------------------------------------------------------------- + +-type dir() :: file:filename(). +-type b() :: non_neg_integer(). +-type f() :: 1..255. +-type pred() :: fun((term()) -> boolean()). + +%%----------------------------------------------------------------- + +-record(state, {dir :: dir(), + maxB :: b(), + maxF :: f(), + curB :: b(), + curF :: f(), + cur_fd :: file:fd(), + index = [], %% Seems unused - take out?? + pred :: pred()}). + +%%%----------------------------------------------------------------- +%%% This module implements an event handler that writes events +%%% to multiple files (configurable). +%%%----------------------------------------------------------------- +%% Func: init/3, init/4 +%% Args: Dir = string() +%% MaxB = integer() +%% MaxF = byte() +%% Pred = fun(Event) -> boolean() +%% Purpose: An event handler. Writes binary events +%% to files in the directory Dir. Each file is called +%% 1, 2, 3, ..., MaxF. Writes MaxB bytes on each file. +%% Creates a file called 'index' in the Dir. +%% This file contains the last written FileName. +%% On startup, this file is read, and the next available +%% filename is used as first logfile. +%% Each event is filtered with the predicate function Pred. +%% Reports can be browsed with Report Browser Tool (rb). +%% Returns: Args = term() +%% The Args term should be used in a call to +%% gen_event:add_handler(EventMgr, log_mf_h, Args) +%% EventMgr = pid() | atom(). +%%----------------------------------------------------------------- + +-spec init(dir(), b(), f()) -> {dir(), b(), f(), pred()}. + +init(Dir, MaxB, MaxF) -> init(Dir, MaxB, MaxF, fun(_) -> true end). + +-spec init(dir(), b(), f(), pred()) -> {dir(), b(), f(), pred()}. + +init(Dir, MaxB, MaxF, Pred) -> {Dir, MaxB, MaxF, Pred}. + +%%----------------------------------------------------------------- +%% Call-back functions from gen_event +%%----------------------------------------------------------------- + +-spec init({dir(), b(), f(), pred()}) -> {'ok', #state{}} | {'error', term()}. + +init({Dir, MaxB, MaxF, Pred}) when is_integer(MaxF), MaxF > 0, MaxF < 256 -> + First = + case read_index_file(Dir) of + {ok, LastWritten} -> inc(LastWritten, MaxF); + _ -> 1 + end, + case catch file_open(Dir, First) of + {ok, Fd} -> + {ok, #state{dir = Dir, maxB = MaxB, maxF = MaxF, pred = Pred, + curF = First, cur_fd = Fd, curB = 0}}; + Error -> Error + end. + +%%----------------------------------------------------------------- +%% The handle_event/2 function may crash! In this case, this +%% handler is removed by gen_event from the event handlers. +%% Fails: 'file_open' if file:open failed for a log file. +%% 'write_index_file' if file:write_file failed for the +%% index file. +%% {file_exit, Reason} if the current Fd crashes. +%%----------------------------------------------------------------- + +-spec handle_event(term(), #state{}) -> {'ok', #state{}}. + +handle_event(Event, State) -> + #state{curB = CurB, maxB = MaxB, curF = CurF, maxF = MaxF, + dir = Dir, cur_fd = CurFd, pred = Pred} = State, + case catch Pred(Event) of + true -> + Bin = term_to_binary(tag_event(Event)), + Size = byte_size(Bin), + NewState = + if + CurB + Size < MaxB -> State; + true -> + ok = file:close(CurFd), + NewF = inc(CurF, MaxF), + {ok, NewFd} = file_open(Dir, NewF), + State#state{cur_fd = NewFd, curF = NewF, curB = 0} + end, + [Hi,Lo] = put_int16(Size), + file:write(NewState#state.cur_fd, [Hi, Lo, Bin]), + {ok, NewState#state{curB = NewState#state.curB + Size + 2}}; + _ -> + {ok, State} + end. + +-spec handle_info(term(), #state{}) -> {'ok', #state{}}. + +handle_info({emulator, GL, Chars}, State) -> + handle_event({emulator, GL, Chars}, State); +handle_info(_, State) -> + {ok, State}. + +-spec terminate(term(), #state{}) -> #state{}. + +terminate(_, State) -> + ok = file:close(State#state.cur_fd), + State. + +-spec handle_call('null', #state{}) -> {'ok', 'null', #state{}}. + +handle_call(null, State) -> + {ok, null, State}. + +-spec code_change(term(), #state{}, term()) -> {'ok', #state{}}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%----------------------------------------------------------------- +%% Misc local functions +%%----------------------------------------------------------------- + +file_open(Dir, FileNo) -> + case file:open(Dir ++ [$/ | integer_to_list(FileNo)], [raw, write]) of + {ok, Fd} -> + write_index_file(Dir, FileNo), + {ok, Fd}; + _ -> + exit({file, open}) + end. + +put_int16(I) -> + [((I band 16#ff00) bsr 8),I band 16#ff]. + +tag_event(Event) -> + {erlang:localtime(), Event}. + +read_index_file(Dir) -> + case file:open(Dir ++ "/index", [raw, read]) of + {ok, Fd} -> + Res = case catch file:read(Fd, 1) of + {ok, [Index]} -> {ok, Index}; + _ -> error + end, + ok = file:close(Fd), + Res; + _ -> error + end. + +%%----------------------------------------------------------------- +%% Write the index file. This file contains one binary with +%% the last used filename (an integer). +%%----------------------------------------------------------------- + +write_index_file(Dir, Index) -> + case file:open(Dir ++ "/index", [raw, write]) of + {ok, Fd} -> + file:write(Fd, [Index]), + ok = file:close(Fd); + _ -> exit(open_index_file) + end. + +inc(N, Max) -> + if + N < Max -> N + 1; + true -> 1 + end. diff --git a/lib/stdlib/src/math.erl b/lib/stdlib/src/math.erl new file mode 100644 index 0000000000..b2ea6195c5 --- /dev/null +++ b/lib/stdlib/src/math.erl @@ -0,0 +1,25 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(math). + +-export([pi/0]). + +-spec pi() -> float(). + +pi() -> 3.1415926535897932. diff --git a/lib/stdlib/src/ms_transform.erl b/lib/stdlib/src/ms_transform.erl new file mode 100644 index 0000000000..78b1de6e16 --- /dev/null +++ b/lib/stdlib/src/ms_transform.erl @@ -0,0 +1,992 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(ms_transform). + +-export([format_error/1,transform_from_shell/3,parse_transform/2]). + +%% Error codes. +-define(ERROR_BASE_GUARD,0). +-define(ERROR_BASE_BODY,100). +-define(ERR_NOFUN,1). +-define(ERR_ETS_HEAD,2). +-define(ERR_DBG_HEAD,3). +-define(ERR_HEADMATCH,4). +-define(ERR_SEMI_GUARD,5). +-define(ERR_UNBOUND_VARIABLE,6). +-define(ERR_HEADBADREC,7). +-define(ERR_HEADBADFIELD,8). +-define(ERR_HEADMULTIFIELD,9). +-define(ERR_HEADDOLLARATOM,10). +-define(ERR_HEADBINMATCH,11). +-define(ERR_GENMATCH,16). +-define(ERR_GENLOCALCALL,17). +-define(ERR_GENELEMENT,18). +-define(ERR_GENBADFIELD,19). +-define(ERR_GENBADREC,20). +-define(ERR_GENMULTIFIELD,21). +-define(ERR_GENREMOTECALL,22). +-define(ERR_GENBINCONSTRUCT,23). +-define(ERR_GENDISALLOWEDOP,24). +-define(ERR_GUARDMATCH,?ERR_GENMATCH+?ERROR_BASE_GUARD). +-define(ERR_BODYMATCH,?ERR_GENMATCH+?ERROR_BASE_BODY). +-define(ERR_GUARDLOCALCALL,?ERR_GENLOCALCALL+?ERROR_BASE_GUARD). +-define(ERR_BODYLOCALCALL,?ERR_GENLOCALCALL+?ERROR_BASE_BODY). +-define(ERR_GUARDELEMENT,?ERR_GENELEMENT+?ERROR_BASE_GUARD). +-define(ERR_BODYELEMENT,?ERR_GENELEMENT+?ERROR_BASE_BODY). +-define(ERR_GUARDBADFIELD,?ERR_GENBADFIELD+?ERROR_BASE_GUARD). +-define(ERR_BODYBADFIELD,?ERR_GENBADFIELD+?ERROR_BASE_BODY). +-define(ERR_GUARDBADREC,?ERR_GENBADREC+?ERROR_BASE_GUARD). +-define(ERR_BODYBADREC,?ERR_GENBADREC+?ERROR_BASE_BODY). +-define(ERR_GUARDMULTIFIELD,?ERR_GENMULTIFIELD+?ERROR_BASE_GUARD). +-define(ERR_BODYMULTIFIELD,?ERR_GENMULTIFIELD+?ERROR_BASE_BODY). +-define(ERR_GUARDREMOTECALL,?ERR_GENREMOTECALL+?ERROR_BASE_GUARD). +-define(ERR_BODYREMOTECALL,?ERR_GENREMOTECALL+?ERROR_BASE_BODY). +-define(ERR_GUARDBINCONSTRUCT,?ERR_GENBINCONSTRUCT+?ERROR_BASE_GUARD). +-define(ERR_BODYBINCONSTRUCT,?ERR_GENBINCONSTRUCT+?ERROR_BASE_BODY). +-define(ERR_GUARDDISALLOWEDOP,?ERR_GENDISALLOWEDOP+?ERROR_BASE_GUARD). +-define(ERR_BODYDISALLOWEDOP,?ERR_GENDISALLOWEDOP+?ERROR_BASE_BODY). + +%% +%% Called by compiler or ets/dbg:fun2ms when errors occur +%% +format_error(?ERR_NOFUN) -> + "Parameter of ets/dbg:fun2ms/1 is not a literal fun"; +format_error(?ERR_ETS_HEAD) -> + "ets:fun2ms requires fun with single variable or tuple parameter"; +format_error(?ERR_DBG_HEAD) -> + "dbg:fun2ms requires fun with single variable or list parameter"; +format_error(?ERR_HEADMATCH) -> + "in fun head, only matching (=) on toplevel can be translated into match_spec"; +format_error(?ERR_SEMI_GUARD) -> + "fun with semicolon (;) in guard cannot be translated into match_spec"; +format_error(?ERR_GUARDMATCH) -> + "fun with guard matching ('=' in guard) is illegal as match_spec as well"; +format_error({?ERR_GUARDLOCALCALL, Name, Arithy}) -> + lists:flatten(io_lib:format("fun containing the local function call " + "'~w/~w' (called in guard) " + "cannot be translated into match_spec", + [Name, Arithy])); +format_error({?ERR_GUARDREMOTECALL, Module, Name, Arithy}) -> + lists:flatten(io_lib:format("fun containing the remote function call " + "'~w:~w/~w' (called in guard) " + "cannot be translated into match_spec", + [Module,Name,Arithy])); +format_error({?ERR_GUARDELEMENT, Str}) -> + lists:flatten( + io_lib:format("the language element ~s (in guard) cannot be translated " + "into match_spec", [Str])); +format_error({?ERR_GUARDBINCONSTRUCT, Var}) -> + lists:flatten( + io_lib:format("bit syntax construction with variable ~w (in guard) " + "cannot be translated " + "into match_spec", [Var])); +format_error({?ERR_GUARDDISALLOWEDOP, Operator}) -> + %% There is presently no operators that are allowed in bodies but + %% not in guards. + lists:flatten( + io_lib:format("the operator ~w is not allowed in guards", [Operator])); +format_error(?ERR_BODYMATCH) -> + "fun with body matching ('=' in body) is illegal as match_spec"; +format_error({?ERR_BODYLOCALCALL, Name, Arithy}) -> + lists:flatten(io_lib:format("fun containing the local function " + "call '~w/~w' (called in body) " + "cannot be translated into match_spec", + [Name,Arithy])); +format_error({?ERR_BODYREMOTECALL, Module, Name, Arithy}) -> + lists:flatten(io_lib:format("fun containing the remote function call " + "'~w:~w/~w' (called in body) " + "cannot be translated into match_spec", + [Module,Name,Arithy])); +format_error({?ERR_BODYELEMENT, Str}) -> + lists:flatten( + io_lib:format("the language element ~s (in body) cannot be translated " + "into match_spec", [Str])); +format_error({?ERR_BODYBINCONSTRUCT, Var}) -> + lists:flatten( + io_lib:format("bit syntax construction with variable ~w (in body) " + "cannot be translated " + "into match_spec", [Var])); +format_error({?ERR_BODYDISALLOWEDOP, Operator}) -> + %% This will probably never happen, Are there op's that are allowed in + %% guards but not in bodies? Not at time of writing anyway... + lists:flatten( + io_lib:format("the operator ~w is not allowed in function bodies", + [Operator])); + +format_error({?ERR_UNBOUND_VARIABLE, Str}) -> + lists:flatten( + io_lib:format("the variable ~s is unbound, cannot translate " + "into match_spec", [Str])); +format_error({?ERR_HEADBADREC,Name}) -> + lists:flatten( + io_lib:format("fun head contains unknown record type ~w",[Name])); +format_error({?ERR_HEADBADFIELD,RName,FName}) -> + lists:flatten( + io_lib:format("fun head contains reference to unknown field ~w in " + "record type ~w",[FName, RName])); +format_error({?ERR_HEADMULTIFIELD,RName,FName}) -> + lists:flatten( + io_lib:format("fun head contains already defined field ~w in " + "record type ~w",[FName, RName])); +format_error({?ERR_HEADDOLLARATOM,Atom}) -> + lists:flatten( + io_lib:format("fun head contains atom ~w, which conflics with reserved " + "atoms in match_spec heads",[Atom])); +format_error({?ERR_HEADBINMATCH,Atom}) -> + lists:flatten( + io_lib:format("fun head contains bit syntax matching of variable ~w, " + "which cannot be translated into match_spec", [Atom])); +format_error({?ERR_GUARDBADREC,Name}) -> + lists:flatten( + io_lib:format("fun guard contains unknown record type ~w",[Name])); +format_error({?ERR_GUARDBADFIELD,RName,FName}) -> + lists:flatten( + io_lib:format("fun guard contains reference to unknown field ~w in " + "record type ~w",[FName, RName])); +format_error({?ERR_GUARDMULTIFIELD,RName,FName}) -> + lists:flatten( + io_lib:format("fun guard contains already defined field ~w in " + "record type ~w",[FName, RName])); +format_error({?ERR_BODYBADREC,Name}) -> + lists:flatten( + io_lib:format("fun body contains unknown record type ~w",[Name])); +format_error({?ERR_BODYBADFIELD,RName,FName}) -> + lists:flatten( + io_lib:format("fun body contains reference to unknown field ~w in " + "record type ~w",[FName, RName])); +format_error({?ERR_BODYMULTIFIELD,RName,FName}) -> + lists:flatten( + io_lib:format("fun body contains already defined field ~w in " + "record type ~w",[FName, RName])); +format_error(Else) -> + lists:flatten(io_lib:format("Unknown error code ~w",[Else])). + +%% +%% Called when translating in shell +%% +transform_from_shell(Dialect, Clauses, BoundEnvironment) -> + SaveFilename = setup_filename(), + case catch ms_clause_list(1,Clauses,Dialect) of + {'EXIT',Reason} -> + cleanup_filename(SaveFilename), + exit(Reason); + {error,Line,R} -> + {error, [{cleanup_filename(SaveFilename), + [{Line, ?MODULE, R}]}], []}; + Else -> + case (catch fixup_environment(Else,BoundEnvironment)) of + {error,Line1,R1} -> + {error, [{cleanup_filename(SaveFilename), + [{Line1, ?MODULE, R1}]}], []}; + Else1 -> + Ret = normalise(Else1), + cleanup_filename(SaveFilename), + Ret + end + end. + + +%% +%% Called when translating during compiling +%% +parse_transform(Forms, _Options) -> + SaveFilename = setup_filename(), + case catch forms(Forms) of + {'EXIT',Reason} -> + cleanup_filename(SaveFilename), + exit(Reason); + {error,Line,R} -> + {error, [{cleanup_filename(SaveFilename), + [{Line, ?MODULE, R}]}], []}; + Else -> + cleanup_filename(SaveFilename), + Else + end. + +setup_filename() -> + {erase(filename),erase(records)}. + +put_filename(Name) -> + put(filename,Name). + +put_records(R) -> + put(records,R), + ok. +get_records() -> + case get(records) of + undefined -> + []; + Else -> + Else + end. +cleanup_filename({Old,OldRec}) -> + Ret = case erase(filename) of + undefined -> + "TOP_LEVEL"; + X -> + X + end, + case OldRec of + undefined -> + erase(records); + Rec -> + put(records,Rec) + end, + case Old of + undefined -> + Ret; + Y -> + put(filename,Y), + Ret + end. + +add_record_definition({Name,FieldList}) -> + {KeyList,_} = lists:foldl( + fun({record_field,_,{atom,Line0,FieldName}},{L,C}) -> + {[{FieldName,C,{atom,Line0,undefined}}|L],C+1}; + ({record_field,_,{atom,_,FieldName},Def},{L,C}) -> + {[{FieldName,C,Def}|L],C+1} + end, + {[],2}, + FieldList), + put_records([{Name,KeyList}|get_records()]). + +forms([F0|Fs0]) -> + F1 = form(F0), + Fs1 = forms(Fs0), + [F1|Fs1]; +forms([]) -> []. + +form({attribute,_,file,{Filename,_}}=Form) -> + put_filename(Filename), + Form; +form({attribute,_,record,Definition}=Form) -> + add_record_definition(Definition), + Form; +form({function,Line,Name0,Arity0,Clauses0}) -> + {Name,Arity,Clauses} = function(Name0, Arity0, Clauses0), + {function,Line,Name,Arity,Clauses}; +form(AnyOther) -> + AnyOther. +function(Name, Arity, Clauses0) -> + Clauses1 = clauses(Clauses0), + {Name,Arity,Clauses1}. +clauses([C0|Cs]) -> + C1 = clause(C0), + [C1|clauses(Cs)]; +clauses([]) -> []. +clause({clause,Line,H0,G0,B0}) -> + B1 = copy(B0), + {clause,Line,H0,G0,B1}. + +copy({call,Line,{remote,_Line2,{atom,_Line3,ets},{atom,_Line4,fun2ms}}, + As0}) -> + transform_call(ets,Line,As0); +copy({call,Line,{remote,_Line2,{record_field,_Line3, + {atom,_Line4,''},{atom,_Line5,ets}}, + {atom,_Line6,fun2ms}}, As0}) -> + %% Packages... + transform_call(ets,Line,As0); +copy({call,Line,{remote,_Line2,{atom,_Line3,dbg},{atom,_Line4,fun2ms}}, + As0}) -> + transform_call(dbg,Line,As0); +copy(T) when is_tuple(T) -> + list_to_tuple(copy_list(tuple_to_list(T))); +copy(L) when is_list(L) -> + copy_list(L); +copy(AnyOther) -> + AnyOther. + +copy_list([H|T]) -> + [copy(H)|copy_list(T)]; +copy_list([]) -> + []. + +transform_call(Type,_Line,[{'fun',Line2,{clauses, ClauseList}}]) -> + ms_clause_list(Line2, ClauseList,Type); +transform_call(_Type,Line,_NoAbstractFun) -> + throw({error,Line,?ERR_NOFUN}). + +% Fixup semicolons in guards +ms_clause_expand({clause, Line, Parameters, Guard = [_,_|_], Body}) -> + [ {clause, Line, Parameters, [X], Body} || X <- Guard ]; +ms_clause_expand(_Other) -> + false. + +ms_clause_list(Line,[H|T],Type) -> + case ms_clause_expand(H) of + NewHead when is_list(NewHead) -> + ms_clause_list(Line,NewHead ++ T, Type); + false -> + {cons, Line, ms_clause(H,Type), ms_clause_list(Line, T,Type)} + end; +ms_clause_list(Line,[],_) -> + {nil,Line}. +ms_clause({clause, Line, Parameters, Guards, Body},Type) -> + check_type(Line,Parameters,Type), + {MSHead,Bindings} = transform_head(Parameters), + MSGuards = transform_guards(Line, Guards, Bindings), + MSBody = transform_body(Line,Body,Bindings), + {tuple, Line, [MSHead,MSGuards,MSBody]}. + + +check_type(_,[{var,_,_}],_) -> + ok; +check_type(_,[{tuple,_,_}],ets) -> + ok; +check_type(_,[{record,_,_,_}],ets) -> + ok; +check_type(_,[{cons,_,_,_}],dbg) -> + ok; +check_type(Line0,[{match,_,{var,_,_},X}],Any) -> + check_type(Line0,[X],Any); +check_type(Line0,[{match,_,X,{var,_,_}}],Any) -> + check_type(Line0,[X],Any); +check_type(Line,_Type,ets) -> + throw({error,Line,?ERR_ETS_HEAD}); +check_type(Line,_,dbg) -> + throw({error,Line,?ERR_DBG_HEAD}). + +-record(tgd,{ b, %Bindings + p, %Part of spec + eb %Error code base, 0 for guards, 100 for bodies + }). + +transform_guards(Line,[],_Bindings) -> + {nil,Line}; +transform_guards(Line,[G],Bindings) -> + B = #tgd{b = Bindings, p = guard, eb = ?ERROR_BASE_GUARD}, + tg0(Line,G,B); +transform_guards(Line,_,_) -> + throw({error,Line,?ERR_SEMI_GUARD}). + +transform_body(Line,Body,Bindings) -> + B = #tgd{b = Bindings, p = body, eb = ?ERROR_BASE_BODY}, + tg0(Line,Body,B). + + +guard_top_trans({call,Line0,{atom,Line1,OldTest},Params}) -> + case old_bool_test(OldTest,length(Params)) of + undefined -> + {call,Line0,{atom,Line1,OldTest},Params}; + Trans -> + {call,Line0,{atom,Line1,Trans},Params} + end; +guard_top_trans(Else) -> + Else. + +tg0(Line,[],_) -> + {nil,Line}; +tg0(Line,[H0|T],B) when B#tgd.p =:= guard -> + H = guard_top_trans(H0), + {cons,Line, tg(H,B), tg0(Line,T,B)}; +tg0(Line,[H|T],B) -> + {cons,Line, tg(H,B), tg0(Line,T,B)}. + + +tg({match,Line,_,_},B) -> + throw({error,Line,?ERR_GENMATCH+B#tgd.eb}); +tg({op, Line, Operator, O1, O2}, B) -> + {tuple, Line, [{atom, Line, Operator}, tg(O1,B), tg(O2,B)]}; +tg({op, Line, Operator, O1}, B) -> + {tuple, Line, [{atom, Line, Operator}, tg(O1,B)]}; +tg({call, _Line, {atom, Line2, bindings},[]},_B) -> + {atom, Line2, '$*'}; +tg({call, _Line, {atom, Line2, object},[]},_B) -> + {atom, Line2, '$_'}; +tg({call, Line, {atom, _, is_record}=Call,[Object, {atom,Line3,RName}=R]},B) -> + MSObject = tg(Object,B), + RDefs = get_records(), + case lists:keysearch(RName,1,RDefs) of + {value, {RName, FieldList}} -> + RSize = length(FieldList)+1, + {tuple, Line, [Call, MSObject, R, {integer, Line3, RSize}]}; + _ -> + throw({error,Line3,{?ERR_GENBADREC+B#tgd.eb,RName}}) + end; +tg({call, Line, {atom, Line2, FunName},ParaList},B) -> + case is_ms_function(FunName,length(ParaList), B#tgd.p) of + true -> + {tuple, Line, [{atom, Line2, FunName} | + lists:map(fun(X) -> tg(X,B) end, ParaList)]}; + _ -> + throw({error,Line,{?ERR_GENLOCALCALL+B#tgd.eb, + FunName,length(ParaList)}}) + end; +tg({call, Line, {remote,_,{atom,_,erlang},{atom, Line2, FunName}},ParaList}, + B) -> + L = length(ParaList), + case is_imported_from_erlang(FunName,L,B#tgd.p) of + true -> + case is_operator(FunName,L,B#tgd.p) of + false -> + tg({call, Line, {atom, Line2, FunName},ParaList},B); + true -> + tg(list_to_tuple([op,Line2,FunName | ParaList]),B) + end; + _ -> + throw({error,Line,{?ERR_GENREMOTECALL+B#tgd.eb,erlang, + FunName,length(ParaList)}}) + end; +tg({call, Line, {remote,_,{atom,_,ModuleName}, + {atom, _, FunName}},_ParaList},B) -> + throw({error,Line,{?ERR_GENREMOTECALL+B#tgd.eb,ModuleName,FunName}}); +tg({cons,Line, H, T},B) -> + {cons, Line, tg(H,B), tg(T,B)}; +tg({nil, Line},_B) -> + {nil, Line}; +tg({tuple,Line,L},B) -> + {tuple,Line,[{tuple,Line,lists:map(fun(X) -> tg(X,B) end, L)}]}; +tg({integer,Line,I},_) -> + {integer,Line,I}; +tg({char,Line,C},_) -> + {char,Line,C}; +tg({float, Line,F},_) -> + {float,Line,F}; +tg({atom,Line,A},_) -> + case atom_to_list(A) of + [$$|_] -> + {tuple, Line,[{atom, Line, 'const'},{atom,Line,A}]}; + _ -> + {atom,Line,A} + end; +tg({string,Line,S},_) -> + {string,Line,S}; +tg({var,Line,VarName},B) -> + case lkup_bind(VarName, B#tgd.b) of + undefined -> + {tuple, Line,[{atom, Line, 'const'},{var,Line,VarName}]}; + AtomName -> + {atom, Line, AtomName} + end; +tg({record_field,Line,Object,RName,{atom,_Line1,KeyName}},B) -> + RDefs = get_records(), + case lists:keysearch(RName,1,RDefs) of + {value, {RName, FieldList}} -> + case lists:keysearch(KeyName,1, FieldList) of + {value, {KeyName,Position,_}} -> + NewObject = tg(Object,B), + {tuple, Line, [{atom, Line, 'element'}, + {integer, Line, Position}, NewObject]}; + _ -> + throw({error,Line,{?ERR_GENBADFIELD+B#tgd.eb, RName, + KeyName}}) + end; + _ -> + throw({error,Line,{?ERR_GENBADREC+B#tgd.eb,RName}}) + end; + +tg({record,Line,RName,RFields},B) -> + RDefs = get_records(), + KeyList0 = lists:foldl(fun({record_field,_,{atom,_,Key},Value}, + L) -> + NV = tg(Value,B), + [{Key,NV}|L]; + ({record_field,_,{var,_,'_'},Value}, + L) -> + NV = tg(Value,B), + [{{default},NV}|L]; + (_,_) -> + throw({error,Line, + {?ERR_GENBADREC+B#tgd.eb, + RName}}) + end, + [], + RFields), + DefValue = case lists:keysearch({default},1,KeyList0) of + {value,{{default},OverriddenDefValue}} -> + {true,OverriddenDefValue}; + _ -> + false + end, + KeyList = lists:keydelete({default},1,KeyList0), + case lists:keysearch({default},1,KeyList) of + {value,{{default},_}} -> + throw({error,Line,{?ERR_GENMULTIFIELD+B#tgd.eb,RName,'_'}}); + _ -> + ok + end, + case lists:keysearch(RName,1,RDefs) of + {value, {RName, FieldList0}} -> + FieldList1 = lists:foldl( + fun({FN,_,Def},Acc) -> + El = case lists:keysearch(FN,1,KeyList) of + {value, {FN, X0}} -> + X0; + _ -> + case DefValue of + {true,Overridden} -> + Overridden; + false -> + Def + end + end, + [El | Acc] + end, + [], + FieldList0), + check_multi_field(RName,Line,KeyList, + ?ERR_GENMULTIFIELD+B#tgd.eb), + check_undef_field(RName,Line,KeyList,FieldList0, + ?ERR_GENBADFIELD+B#tgd.eb), + {tuple,Line,[{tuple,Line,[{atom,Line,RName}|FieldList1]}]}; + _ -> + throw({error,Line,{?ERR_GENBADREC+B#tgd.eb,RName}}) + end; + +tg({record_index,Line,RName,{atom,Line2,KeyName}},B) -> + RDefs = get_records(), + case lists:keysearch(RName,1,RDefs) of + {value, {RName, FieldList}} -> + case lists:keysearch(KeyName,1, FieldList) of + {value, {KeyName,Position,_}} -> + {integer, Line2, Position}; + _ -> + throw({error,Line2,{?ERR_GENBADFIELD+B#tgd.eb, RName, + KeyName}}) + end; + _ -> + throw({error,Line,{?ERR_GENBADREC+B#tgd.eb,RName}}) + end; + +tg({record,Line,{var,Line2,_VName}=AVName, RName,RFields},B) -> + RDefs = get_records(), + MSVName = tg(AVName,B), + KeyList = lists:foldl(fun({record_field,_,{atom,_,Key},Value}, + L) -> + NV = tg(Value,B), + [{Key,NV}|L]; + (_,_) -> + throw({error,Line,?ERR_HEADBADREC}) + end, + [], + RFields), + case lists:keysearch(RName,1,RDefs) of + {value, {RName, FieldList0}} -> + FieldList1 = lists:foldl( + fun({FN,Pos,_},Acc) -> + El = case lists:keysearch(FN,1,KeyList) of + {value, {FN, X0}} -> + X0; + _ -> + {tuple, Line2, + [{atom, Line2, element}, + {integer, Line2, Pos}, + MSVName]} + end, + [El | Acc] + end, + [], + FieldList0), + check_multi_field(RName,Line,KeyList, + ?ERR_GENMULTIFIELD+B#tgd.eb), + check_undef_field(RName,Line,KeyList,FieldList0, + ?ERR_GENBADFIELD+B#tgd.eb), + {tuple,Line,[{tuple,Line,[{atom,Line,RName}|FieldList1]}]}; + _ -> + throw({error,Line,{?ERR_GENBADREC+B#tgd.eb,RName}}) + end; + +tg({bin_element,_Line0,{var, Line, A},_,_} = Whole,B) -> + case lkup_bind(A, B#tgd.b) of + undefined -> + Whole; % exists in environment hopefully + _AtomName -> + throw({error,Line,{?ERR_GENBINCONSTRUCT+B#tgd.eb,A}}) + end; +tg(default,_B) -> + default; +tg({bin_element,Line,X,Y,Z},B) -> + {bin_element, Line, tg(X,B), tg(Y,B), Z}; + +tg({bin,Line,List},B) -> + {bin,Line,[tg(X,B) || X <- List]}; + +tg(T,B) when is_tuple(T), tuple_size(T) >= 2 -> + Element = element(1,T), + Line = element(2,T), + throw({error,Line,{?ERR_GENELEMENT+B#tgd.eb, + translate_language_element(Element)}}); +tg(Other,B) -> + Element = io_lib:format("unknown element ~w", [Other]), + throw({error,unknown,{?ERR_GENELEMENT+B#tgd.eb,Element}}). + +transform_head([V]) -> + Bind = cre_bind(), + {NewV,NewBind} = toplevel_head_match(V,Bind), + th(NewV,NewBind). + + +toplevel_head_match({match,_,{var,_,VName},Expr},B) -> + {Expr,new_bind({VName,'$_'},B)}; +toplevel_head_match({match,_,Expr,{var,_,VName}},B) -> + {Expr,new_bind({VName,'$_'},B)}; +toplevel_head_match(Other,B) -> + {Other,B}. + +th({record,Line,RName,RFields},B) -> + % youch... + RDefs = get_records(), + {KeyList0,NewB} = lists:foldl(fun({record_field,_,{atom,_,Key},Value}, + {L,B0}) -> + {NV,B1} = th(Value,B0), + {[{Key,NV}|L],B1}; + ({record_field,_,{var,_,'_'},Value}, + {L,B0}) -> + {NV,B1} = th(Value,B0), + {[{{default},NV}|L],B1}; + (_,_) -> + throw({error,Line,{?ERR_HEADBADREC, + RName}}) + end, + {[],B}, + RFields), + DefValue = case lists:keysearch({default},1,KeyList0) of + {value,{{default},OverriddenDefValue}} -> + OverriddenDefValue; + _ -> + {atom,Line,'_'} + end, + KeyList = lists:keydelete({default},1,KeyList0), + case lists:keysearch({default},1,KeyList) of + {value,{{default},_}} -> + throw({error,Line,{?ERR_HEADMULTIFIELD,RName,'_'}}); + _ -> + ok + end, + case lists:keysearch(RName,1,RDefs) of + {value, {RName, FieldList0}} -> + FieldList1 = lists:foldl( + fun({FN,_,_},Acc) -> + El = case lists:keysearch(FN,1,KeyList) of + {value, {FN, X0}} -> + X0; + _ -> + DefValue + end, + [El | Acc] + end, + [], + FieldList0), + check_multi_field(RName,Line,KeyList, + ?ERR_HEADMULTIFIELD), + check_undef_field(RName,Line,KeyList,FieldList0, + ?ERR_HEADBADFIELD), + {{tuple,Line,[{atom,Line,RName}|FieldList1]},NewB}; + _ -> + throw({error,Line,{?ERR_HEADBADREC,RName}}) + end; +th({match,Line,_,_},_) -> + throw({error,Line,?ERR_HEADMATCH}); +th({atom,Line,A},B) -> + case atom_to_list(A) of + [$$|NL] -> + case (catch list_to_integer(NL)) of + N when is_integer(N) -> + throw({error,Line,{?ERR_HEADDOLLARATOM,A}}); + _ -> + {{atom,Line,A},B} + end; + _ -> + {{atom,Line,A},B} + end; +th({bin_element,_Line0,{var, Line, A},_,_},_) -> + throw({error,Line,{?ERR_HEADBINMATCH,A}}); + +th({var,Line,Name},B) -> + case lkup_bind(Name,B) of + undefined -> + NewB = new_bind(Name,B), + {{atom,Line,lkup_bind(Name,NewB)},NewB}; + Trans -> + {{atom,Line,Trans},B} + end; +th([H|T],B) -> + {NH,NB} = th(H,B), + {NT,NNB} = th(T,NB), + {[NH|NT],NNB}; +th(T,B) when is_tuple(T) -> + {L,NB} = th(tuple_to_list(T),B), + {list_to_tuple(L),NB}; +th(Nonstruct,B) -> + {Nonstruct,B}. + +%% Could be more efficient... +check_multi_field(_, _, [], _) -> + ok; +check_multi_field(RName, Line, [{Key,_}|T], ErrCode) -> + case lists:keymember(Key,1,T) of + true -> + throw({error,Line,{ErrCode,RName,Key}}); + false -> + check_multi_field(RName, Line, T, ErrCode) + end. +check_undef_field(_, _, [], _, _) -> + ok; +check_undef_field(RName, Line, [{Key,_}|T], FieldList, ErrCode) -> + case lists:keymember(Key, 1, FieldList) of + true -> + check_undef_field(RName, Line, T, FieldList, ErrCode); + false -> + throw({error,Line,{ErrCode,RName,Key}}) + end. + +cre_bind() -> + {1,[{'_','_'}]}. + +lkup_bind(Name,{_,List}) -> + case lists:keysearch(Name,1,List) of + {value, {Name, Trans}} -> + Trans; + _ -> + undefined + end. + +new_bind({Name,Trans},{Next,L}) -> + {Next,[{Name,Trans}|L]}; +new_bind(Name,{Next,L}) -> + Trans = list_to_atom([$$|integer_to_list(Next)]), + {Next+1,[{Name,Trans}|L]}. + +translate_language_element(Atom) -> + Transtab = [ + {lc,"list comprehension"}, + {bc,"binary comprehension"}, + {block, "begin/end block"}, + {'if', "if"}, + {'case', "case"}, + {'receive', "receive"}, + {'try', "try"}, + {'catch', "catch"}, + {'match', "match (=)"}, + {remote, "external function call"} + ], + case lists:keysearch(Atom,1,Transtab) of + {value,{Atom, String}} -> + String; + _ -> + atom_to_list(Atom) + end. + +old_bool_test(atom,1) -> is_atom; +old_bool_test(constant,1) -> is_constant; +old_bool_test(float,1) -> is_float; +old_bool_test(integer,1) -> is_integer; +old_bool_test(list,1) -> is_list; +old_bool_test(number,1) -> is_number; +old_bool_test(pid,1) -> is_pid; +old_bool_test(port,1) -> is_port; +old_bool_test(reference,1) -> is_reference; +old_bool_test(tuple,1) -> is_tuple; +old_bool_test(binary,1) -> is_binary; +old_bool_test(function,1) -> is_function; +old_bool_test(record,2) -> is_record; +old_bool_test(_,_) -> undefined. + +bool_test(is_atom,1) -> true; +bool_test(is_constant,1) -> true; +bool_test(is_float,1) -> true; +bool_test(is_integer,1) -> true; +bool_test(is_list,1) -> true; +bool_test(is_number,1) -> true; +bool_test(is_pid,1) -> true; +bool_test(is_port,1) -> true; +bool_test(is_reference,1) -> true; +bool_test(is_tuple,1) -> true; +bool_test(is_binary,1) -> true; +bool_test(is_function,1) -> true; +bool_test(is_record,2) -> true; +bool_test(is_seq_trace,0) -> true; +bool_test(_,_) -> false. + +real_guard_function(abs,1) -> true; +real_guard_function(element,2) -> true; +real_guard_function(hd,1) -> true; +real_guard_function(length,1) -> true; +real_guard_function(node,0) -> true; +real_guard_function(node,1) -> true; +real_guard_function(round,1) -> true; +real_guard_function(size,1) -> true; +real_guard_function(tl,1) -> true; +real_guard_function(trunc,1) -> true; +real_guard_function(self,0) -> true; +real_guard_function(float,1) -> true; +real_guard_function(_,_) -> false. + +pseudo_guard_function(get_tcw,0) -> true; +pseudo_guard_function(_,_) -> false. + +guard_function(X,A) -> + real_guard_function(X,A) or pseudo_guard_function(X,A). + +action_function(set_seq_token,2) -> true; +action_function(get_seq_token,0) -> true; +action_function(message,1) -> true; +action_function(return_trace,0) -> true; +action_function(exception_trace,0) -> true; +action_function(process_dump,0) -> true; +action_function(enable_trace,1) -> true; +action_function(enable_trace,2) -> true; +action_function(disable_trace,1) -> true; +action_function(disable_trace,2) -> true; +action_function(display,1) -> true; +action_function(caller,0) -> true; +action_function(set_tcw,1) -> true; +action_function(silent,1) -> true; +action_function(trace,2) -> true; +action_function(trace,3) -> true; +action_function(_,_) -> false. + +bool_operator('and',2) -> + true; +bool_operator('or',2) -> + true; +bool_operator('xor',2) -> + true; +bool_operator('not',1) -> + true; +bool_operator('andalso',2) -> + true; +bool_operator('orelse',2) -> + true; +bool_operator(_,_) -> + false. + +arith_operator('+',1) -> + true; +arith_operator('+',2) -> + true; +arith_operator('-',1) -> + true; +arith_operator('-',2) -> + true; +arith_operator('*',2) -> + true; +arith_operator('/',2) -> + true; +arith_operator('div',2) -> + true; +arith_operator('rem',2) -> + true; +arith_operator('band',2) -> + true; +arith_operator('bor',2) -> + true; +arith_operator('bxor',2) -> + true; +arith_operator('bnot',1) -> + true; +arith_operator('bsl',2) -> + true; +arith_operator('bsr',2) -> + true; +arith_operator(_,_) -> + false. + +cmp_operator('>',2) -> + true; +cmp_operator('>=',2) -> + true; +cmp_operator('<',2) -> + true; +cmp_operator('=<',2) -> + true; +cmp_operator('==',2) -> + true; +cmp_operator('=:=',2) -> + true; +cmp_operator('/=',2) -> + true; +cmp_operator('=/=',2) -> + true; +cmp_operator(_,_) -> + false. + +is_operator(X,A,_) -> + bool_operator(X,A) or arith_operator(X,A) or cmp_operator(X,A). + +is_imported_from_erlang(X,A,_) -> + real_guard_function(X,A) or bool_test(X,A) or bool_operator(X,A) or + arith_operator(X,A) or cmp_operator(X,A). + +is_ms_function(X,A,body) -> + action_function(X,A) or guard_function(X,A) or bool_test(X,A); + +is_ms_function(X,A,guard) -> + guard_function(X,A) or bool_test(X,A). + +fixup_environment(L,B) when is_list(L) -> + lists:map(fun(X) -> + fixup_environment(X,B) + end, + L); +fixup_environment({var,Line,Name},B) -> + case lists:keysearch(Name,1,B) of + {value,{Name,Value}} -> + freeze(Line,Value); + _ -> + throw({error,Line,{?ERR_UNBOUND_VARIABLE,atom_to_list(Name)}}) + end; +fixup_environment(T,B) when is_tuple(T) -> + list_to_tuple( + lists:map(fun(X) -> + fixup_environment(X,B) + end, + tuple_to_list(T))); +fixup_environment(Other,_B) -> + Other. + +freeze(Line,Term) -> + {frozen,Line,Term}. + +%% Most of this is bluntly stolen from erl_parse. + +normalise({frozen,_,Term}) -> + Term; +normalise({char,_,C}) -> C; +normalise({integer,_,I}) -> I; +normalise({float,_,F}) -> F; +normalise({atom,_,A}) -> A; +normalise({string,_,S}) -> S; +normalise({nil,_}) -> []; +normalise({bin,_,Fs}) -> + {value, B, _} = + eval_bits:expr_grp(Fs, [], + fun(E, _) -> + {value, normalise(E), []} + end, [], true), + B; +normalise({cons,_,Head,Tail}) -> + [normalise(Head)|normalise(Tail)]; +normalise({tuple,_,Args}) -> + list_to_tuple(normalise_list(Args)); +%% Special case for unary +/-. +normalise({op,_,'+',{char,_,I}}) -> I; +normalise({op,_,'+',{integer,_,I}}) -> I; +normalise({op,_,'+',{float,_,F}}) -> F; +normalise({op,_,'-',{char,_,I}}) -> -I; % Weird, but compatible! +normalise({op,_,'-',{integer,_,I}}) -> -I; +normalise({op,_,'-',{float,_,F}}) -> -F. + +normalise_list([H|T]) -> + [normalise(H)|normalise_list(T)]; +normalise_list([]) -> + []. + + diff --git a/lib/stdlib/src/orddict.erl b/lib/stdlib/src/orddict.erl new file mode 100644 index 0000000000..c7b52b933e --- /dev/null +++ b/lib/stdlib/src/orddict.erl @@ -0,0 +1,173 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(orddict). + +%% Standard interface. +-export([new/0,is_key/2,to_list/1,from_list/1,size/1]). +-export([fetch/2,find/2,fetch_keys/1,erase/2]). +-export([store/3,append/3,append_list/3,update/3,update/4,update_counter/3]). +-export([fold/3,map/2,filter/2,merge/3]). + +%%--------------------------------------------------------------------------- + +-type orddict() :: [{term(), term()}]. + +%%--------------------------------------------------------------------------- + +-spec new() -> orddict(). + +new() -> []. + +-spec is_key(Key::term(), Dictionary::orddict()) -> boolean(). + +is_key(Key, [{K,_}|_]) when Key < K -> false; +is_key(Key, [{K,_}|Dict]) when Key > K -> is_key(Key, Dict); +is_key(_Key, [{_K,_Val}|_]) -> true; %Key == K +is_key(_, []) -> false. + +-spec to_list(orddict()) -> [{term(), term()}]. + +to_list(Dict) -> Dict. + +-spec from_list([{term(), term()}]) -> orddict(). + +from_list(Pairs) -> + lists:foldl(fun ({K,V}, D) -> store(K, V, D) end, [], Pairs). + +-spec size(orddict()) -> non_neg_integer(). + +size(D) -> length(D). + +-spec fetch(Key::term(), Dictionary::orddict()) -> term(). + +fetch(Key, [{K,_}|D]) when Key > K -> fetch(Key, D); +fetch(Key, [{K,Value}|_]) when Key == K -> Value. + +-spec find(Key::term(), Dictionary::orddict()) -> {'ok', term()} | 'error'. + +find(Key, [{K,_}|_]) when Key < K -> error; +find(Key, [{K,_}|D]) when Key > K -> find(Key, D); +find(_Key, [{_K,Value}|_]) -> {ok,Value}; %Key == K +find(_, []) -> error. + +-spec fetch_keys(Dictionary::orddict()) -> [term()]. + +fetch_keys([{Key,_}|Dict]) -> + [Key|fetch_keys(Dict)]; +fetch_keys([]) -> []. + +-spec erase(Key::term(), Dictionary::orddict()) -> orddict(). + +erase(Key, [{K,_}=E|Dict]) when Key < K -> [E|Dict]; +erase(Key, [{K,_}=E|Dict]) when Key > K -> + [E|erase(Key, Dict)]; +erase(_Key, [{_K,_Val}|Dict]) -> Dict; %Key == K +erase(_, []) -> []. + +-spec store(Key::term(), Value::term(), Dictionary::orddict()) -> orddict(). + +store(Key, New, [{K,_}=E|Dict]) when Key < K -> + [{Key,New},E|Dict]; +store(Key, New, [{K,_}=E|Dict]) when Key > K -> + [E|store(Key, New, Dict)]; +store(Key, New, [{_K,_Old}|Dict]) -> %Key == K + [{Key,New}|Dict]; +store(Key, New, []) -> [{Key,New}]. + +-spec append(Key::term(), Value::term(), Dictionary::orddict()) -> orddict(). + +append(Key, New, [{K,_}=E|Dict]) when Key < K -> + [{Key,[New]},E|Dict]; +append(Key, New, [{K,_}=E|Dict]) when Key > K -> + [E|append(Key, New, Dict)]; +append(Key, New, [{_K,Old}|Dict]) -> %Key == K + [{Key,Old ++ [New]}|Dict]; +append(Key, New, []) -> [{Key,[New]}]. + +-spec append_list(Key::term(), ValueList::[term()], orddict()) -> orddict(). + +append_list(Key, NewList, [{K,_}=E|Dict]) when Key < K -> + [{Key,NewList},E|Dict]; +append_list(Key, NewList, [{K,_}=E|Dict]) when Key > K -> + [E|append_list(Key, NewList, Dict)]; +append_list(Key, NewList, [{_K,Old}|Dict]) -> %Key == K + [{Key,Old ++ NewList}|Dict]; +append_list(Key, NewList, []) -> + [{Key,NewList}]. + +-spec update(Key::term(), Fun::fun((term()) -> term()), orddict()) -> orddict(). + +update(Key, Fun, [{K,_}=E|Dict]) when Key > K -> + [E|update(Key, Fun, Dict)]; +update(Key, Fun, [{K,Val}|Dict]) when Key == K -> + [{Key,Fun(Val)}|Dict]. + +-spec update(term(), fun((term()) -> term()), term(), orddict()) -> orddict(). + +update(Key, _, Init, [{K,_}=E|Dict]) when Key < K -> + [{Key,Init},E|Dict]; +update(Key, Fun, Init, [{K,_}=E|Dict]) when Key > K -> + [E|update(Key, Fun, Init, Dict)]; +update(Key, Fun, _Init, [{_K,Val}|Dict]) -> %Key == K + [{Key,Fun(Val)}|Dict]; +update(Key, _, Init, []) -> [{Key,Init}]. + +-spec update_counter(Key::term(), Incr::number(), orddict()) -> orddict(). + +update_counter(Key, Incr, [{K,_}=E|Dict]) when Key < K -> + [{Key,Incr},E|Dict]; +update_counter(Key, Incr, [{K,_}=E|Dict]) when Key > K -> + [E|update_counter(Key, Incr, Dict)]; +update_counter(Key, Incr, [{_K,Val}|Dict]) -> %Key == K + [{Key,Val+Incr}|Dict]; +update_counter(Key, Incr, []) -> [{Key,Incr}]. + +-spec fold(fun((term(),term(),term()) -> term()), term(), orddict()) -> term(). + +fold(F, Acc, [{Key,Val}|D]) -> + fold(F, F(Key, Val, Acc), D); +fold(F, Acc, []) when is_function(F, 3) -> Acc. + +-spec map(fun((term(), term()) -> term()), orddict()) -> orddict(). + +map(F, [{Key,Val}|D]) -> + [{Key,F(Key, Val)}|map(F, D)]; +map(F, []) when is_function(F, 2) -> []. + +-spec filter(fun((term(), term()) -> term()), orddict()) -> orddict(). + +filter(F, [{Key,Val}=E|D]) -> + case F(Key, Val) of + true -> [E|filter(F, D)]; + false -> filter(F, D) + end; +filter(F, []) when is_function(F, 2) -> []. + +-spec merge(fun((term(), term(), term()) -> term()), orddict(), orddict()) -> + orddict(). + +merge(F, [{K1,_}=E1|D1], [{K2,_}=E2|D2]) when K1 < K2 -> + [E1|merge(F, D1, [E2|D2])]; +merge(F, [{K1,_}=E1|D1], [{K2,_}=E2|D2]) when K1 > K2 -> + [E2|merge(F, [E1|D1], D2)]; +merge(F, [{K1,V1}|D1], [{_K2,V2}|D2]) -> %K1 == K2 + [{K1,F(K1, V1, V2)}|merge(F, D1, D2)]; +merge(F, [], D2) when is_function(F, 3) -> D2; +merge(F, D1, []) when is_function(F, 3) -> D1. diff --git a/lib/stdlib/src/ordsets.erl b/lib/stdlib/src/ordsets.erl new file mode 100644 index 0000000000..05041c15f1 --- /dev/null +++ b/lib/stdlib/src/ordsets.erl @@ -0,0 +1,220 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(ordsets). + +-export([new/0,is_set/1,size/1,to_list/1,from_list/1]). +-export([is_element/2,add_element/2,del_element/2]). +-export([union/2,union/1,intersection/2,intersection/1]). +-export([is_disjoint/2]). +-export([subtract/2,is_subset/2]). +-export([fold/3,filter/2]). + +-type ordset(T) :: [T]. + +%% new() -> Set. +%% Return a new empty ordered set. + +-spec new() -> ordset(term()). + +new() -> []. + +%% is_set(Term) -> boolean(). +%% Return 'true' if Set is an ordered set of elements, else 'false'. + +-spec is_set(term()) -> boolean(). + +is_set([E|Es]) -> is_set(Es, E); +is_set([]) -> true; +is_set(_) -> false. + +is_set([E2|Es], E1) when E1 < E2 -> + is_set(Es, E2); +is_set([_|_], _) -> false; +is_set([], _) -> true. + +%% size(OrdSet) -> int(). +%% Return the number of elements in OrdSet. + +-spec size(ordset(_)) -> non_neg_integer(). + +size(S) -> length(S). + +%% to_list(OrdSet) -> [Elem]. +%% Return the elements in OrdSet as a list. + +-spec to_list(ordset(T)) -> [T]. + +to_list(S) -> S. + +%% from_list([Elem]) -> Set. +%% Build an ordered set from the elements in List. + +-spec from_list([T]) -> ordset(T). + +from_list(L) -> + lists:usort(L). + +%% is_element(Element, OrdSet) -> boolean(). +%% Return 'true' if Element is an element of OrdSet, else 'false'. + +-spec is_element(term(), ordset(_)) -> boolean(). + +is_element(E, [H|Es]) when E > H -> is_element(E, Es); +is_element(E, [H|_]) when E < H -> false; +is_element(_E, [_H|_]) -> true; %E == H +is_element(_, []) -> false. + +%% add_element(Element, OrdSet) -> OrdSet. +%% Return OrdSet with Element inserted in it. + +-spec add_element(term(), ordset(_)) -> ordset(_). + +add_element(E, [H|Es]) when E > H -> [H|add_element(E, Es)]; +add_element(E, [H|_]=Set) when E < H -> [E|Set]; +add_element(_E, [_H|_]=Set) -> Set; %E == H +add_element(E, []) -> [E]. + +%% del_element(Element, OrdSet) -> OrdSet. +%% Return OrdSet but with Element removed. + +-spec del_element(term(), ordset(_)) -> ordset(_). + +del_element(E, [H|Es]) when E > H -> [H|del_element(E, Es)]; +del_element(E, [H|_]=Set) when E < H -> Set; +del_element(_E, [_H|Es]) -> Es; %E == H +del_element(_, []) -> []. + +%% union(OrdSet1, OrdSet2) -> OrdSet +%% Return the union of OrdSet1 and OrdSet2. + +-spec union(ordset(_), ordset(_)) -> ordset(_). + +union([E1|Es1], [E2|_]=Set2) when E1 < E2 -> + [E1|union(Es1, Set2)]; +union([E1|_]=Set1, [E2|Es2]) when E1 > E2 -> + [E2|union(Es2, Set1)]; % switch arguments! +union([E1|Es1], [_E2|Es2]) -> %E1 == E2 + [E1|union(Es1, Es2)]; +union([], Es2) -> Es2; +union(Es1, []) -> Es1. + +%% union([OrdSet]) -> OrdSet +%% Return the union of the list of ordered sets. + +-spec union([ordset(_)]) -> ordset(_). + +union([S1,S2|Ss]) -> + union1(union(S1, S2), Ss); +union([S]) -> S; +union([]) -> []. + +union1(S1, [S2|Ss]) -> union1(union(S1, S2), Ss); +union1(S1, []) -> S1. + +%% intersection(OrdSet1, OrdSet2) -> OrdSet. +%% Return the intersection of OrdSet1 and OrdSet2. + +-spec intersection(ordset(_), ordset(_)) -> ordset(_). + +intersection([E1|Es1], [E2|_]=Set2) when E1 < E2 -> + intersection(Es1, Set2); +intersection([E1|_]=Set1, [E2|Es2]) when E1 > E2 -> + intersection(Es2, Set1); % switch arguments! +intersection([E1|Es1], [_E2|Es2]) -> %E1 == E2 + [E1|intersection(Es1, Es2)]; +intersection([], _) -> + []; +intersection(_, []) -> + []. + +%% intersection([OrdSet]) -> OrdSet. +%% Return the intersection of the list of ordered sets. + +-spec intersection([ordset(_)]) -> ordset(_). + +intersection([S1,S2|Ss]) -> + intersection1(intersection(S1, S2), Ss); +intersection([S]) -> S. + +intersection1(S1, [S2|Ss]) -> + intersection1(intersection(S1, S2), Ss); +intersection1(S1, []) -> S1. + +%% is_disjoint(OrdSet1, OrdSet2) -> boolean(). +%% Check whether OrdSet1 and OrdSet2 are disjoint. + +-spec is_disjoint(ordset(_), ordset(_)) -> boolean(). + +is_disjoint([E1|Es1], [E2|_]=Set2) when E1 < E2 -> + is_disjoint(Es1, Set2); +is_disjoint([E1|_]=Set1, [E2|Es2]) when E1 > E2 -> + is_disjoint(Es2, Set1); % switch arguments! +is_disjoint([_E1|_Es1], [_E2|_Es2]) -> %E1 == E2 + false; +is_disjoint([], _) -> + true; +is_disjoint(_, []) -> + true. + +%% subtract(OrdSet1, OrdSet2) -> OrdSet. +%% Return all and only the elements of OrdSet1 which are not also in +%% OrdSet2. + +-spec subtract(ordset(_), ordset(_)) -> ordset(_). + +subtract([E1|Es1], [E2|_]=Set2) when E1 < E2 -> + [E1|subtract(Es1, Set2)]; +subtract([E1|_]=Set1, [E2|Es2]) when E1 > E2 -> + subtract(Set1, Es2); +subtract([_E1|Es1], [_E2|Es2]) -> %E1 == E2 + subtract(Es1, Es2); +subtract([], _) -> []; +subtract(Es1, []) -> Es1. + +%% is_subset(OrdSet1, OrdSet2) -> boolean(). +%% Return 'true' when every element of OrdSet1 is also a member of +%% OrdSet2, else 'false'. + +-spec is_subset(ordset(_), ordset(_)) -> boolean(). + +is_subset([E1|_], [E2|_]) when E1 < E2 -> %E1 not in Set2 + false; +is_subset([E1|_]=Set1, [E2|Es2]) when E1 > E2 -> + is_subset(Set1, Es2); +is_subset([_E1|Es1], [_E2|Es2]) -> %E1 == E2 + is_subset(Es1, Es2); +is_subset([], _) -> true; +is_subset(_, []) -> false. + +%% fold(Fun, Accumulator, OrdSet) -> Accumulator. +%% Fold function Fun over all elements in OrdSet and return Accumulator. + +-spec fold(fun((_, _) -> _), _, ordset(_)) -> _. + +fold(F, Acc, Set) -> + lists:foldl(F, Acc, Set). + +%% filter(Fun, OrdSet) -> OrdSet. +%% Filter OrdSet with Fun. + +-spec filter(fun((_) -> boolean()), ordset(_)) -> ordset(_). + +filter(F, Set) -> + lists:filter(F, Set). diff --git a/lib/stdlib/src/otp_internal.erl b/lib/stdlib/src/otp_internal.erl new file mode 100644 index 0000000000..3df6f4bb90 --- /dev/null +++ b/lib/stdlib/src/otp_internal.erl @@ -0,0 +1,384 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1999-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(otp_internal). + +-export([obsolete/3]). + +%%---------------------------------------------------------------------- + +-type tag() :: 'deprecated' | 'removed'. %% | 'experimental'. +-type mfas() :: mfa() | {atom(), atom(), [byte()]}. +-type release() :: string(). + +-spec obsolete(atom(), atom(), byte()) -> + 'no' | {tag(), string()} | {tag(), mfas(), release()}. + +obsolete(Module, Name, Arity) -> + case obsolete_1(Module, Name, Arity) of + {deprecated=Tag,{_,_,_}=Replacement} -> + {Tag,Replacement,"in a future release"}; + {_,String}=Ret when is_list(String) -> + Ret; + {_,_,_}=Ret -> + Ret; + no -> + no + end. + +obsolete_1(init, get_flag, 1) -> + {removed, {init, get_argument, 1}, "R12B"}; +obsolete_1(init, get_flags, 0) -> + {removed, {init, get_arguments, 0}, "R12B"}; +obsolete_1(init, get_args, 0) -> + {removed, {init, get_plain_arguments, 0}, "R12B"}; +obsolete_1(unix, cmd, 1) -> + {removed, {os,cmd,1}, "R9B"}; + +obsolete_1(net, _, _) -> + {deprecated, "module 'net' obsolete; use 'net_adm'"}; + +obsolete_1(erl_internal, builtins, 0) -> + {deprecated, {erl_internal, bif, 2}}; + +obsolete_1(string, re_sh_to_awk, 1) -> + {removed, {regexp, sh_to_awk, 1}, "R12B"}; +obsolete_1(string, re_parse, 1) -> + {removed, {regexp, parse, 1}, "R12B"}; +obsolete_1(string, re_match, 2) -> + {removed, {regexp, match, 2}, "R12B"}; +obsolete_1(string, re_sub, 3) -> + {removed, {regexp, sub, 3}, "R12B"}; +obsolete_1(string, re_gsub, 3) -> + {removed, {regexp, gsub, 3}, "R12B"}; +obsolete_1(string, re_split, 2) -> + {removed, {regexp, split, 2}, "R12B"}; + +obsolete_1(string, index, 2) -> + {removed, {string, str, 2}, "R12B"}; + +obsolete_1(erl_eval, seq, 2) -> + {deprecated, {erl_eval, exprs, 2}}; +obsolete_1(erl_eval, seq, 3) -> + {deprecated, {erl_eval, exprs, 3}}; +obsolete_1(erl_eval, arg_list, 2) -> + {deprecated, {erl_eval, expr_list, 2}}; +obsolete_1(erl_eval, arg_list, 3) -> + {deprecated, {erl_eval, expr_list, 3}}; + +obsolete_1(erl_pp, seq, 1) -> + {removed, {erl_pp, exprs, 1}, "R12B"}; +obsolete_1(erl_pp, seq, 2) -> + {removed, {erl_pp, exprs, 2}, "R12B"}; + +obsolete_1(io, scan_erl_seq, 1) -> + {removed, {io, scan_erl_exprs, 1}, "R12B"}; +obsolete_1(io, scan_erl_seq, 2) -> + {removed, {io, scan_erl_exprs, 2}, "R12B"}; +obsolete_1(io, scan_erl_seq, 3) -> + {removed, {io, scan_erl_exprs, 3}, "R12B"}; +obsolete_1(io, parse_erl_seq, 1) -> + {removed, {io, parse_erl_exprs, 1}, "R12B"}; +obsolete_1(io, parse_erl_seq, 2) -> + {removed, {io, parse_erl_exprs, 2}, "R12B"}; +obsolete_1(io, parse_erl_seq, 3) -> + {removed, {io, parse_erl_exprs, 3}, "R12B"}; +obsolete_1(io, parse_exprs, 2) -> + {removed, {io, parse_erl_exprs, 2}, "R12B"}; + +obsolete_1(io_lib, scan, 1) -> + {removed, {erl_scan, string, 1}, "R12B"}; +obsolete_1(io_lib, scan, 2) -> + {removed, {erl_scan, string, 2}, "R12B"}; +obsolete_1(io_lib, scan, 3) -> + {removed, {erl_scan, tokens, 3}, "R12B"}; +obsolete_1(io_lib, reserved_word, 1) -> + {removed, {erl_scan, reserved_word, 1}, "R12B"}; + +obsolete_1(lists, keymap, 4) -> + {removed, {lists, keymap, 3}, "R12B"}; +obsolete_1(lists, all, 3) -> + {removed, {lists, all, 2}, "R12B"}; +obsolete_1(lists, any, 3) -> + {removed, {lists, any, 2}, "R12B"}; +obsolete_1(lists, map, 3) -> + {removed, {lists, map, 2}, "R12B"}; +obsolete_1(lists, flatmap, 3) -> + {removed, {lists, flatmap, 2}, "R12B"}; +obsolete_1(lists, foldl, 4) -> + {removed, {lists, foldl, 3}, "R12B"}; +obsolete_1(lists, foldr, 4) -> + {removed, {lists, foldr, 3}, "R12B"}; +obsolete_1(lists, mapfoldl, 4) -> + {removed, {lists, mapfoldl, 3}, "R12B"}; +obsolete_1(lists, mapfoldr, 4) -> + {removed, {lists, mapfoldr, 3}, "R12B"}; +obsolete_1(lists, filter, 3) -> + {removed, {lists, filter, 2}, "R12B"}; +obsolete_1(lists, foreach, 3) -> + {removed, {lists, foreach, 2}, "R12B"}; +obsolete_1(lists, zf, 3) -> + {removed, {lists, zf, 2}, "R12B"}; + +obsolete_1(ets, fixtable, 2) -> + {removed, {ets, safe_fixtable, 2}, "R12B"}; + +obsolete_1(erlang, old_binary_to_term, 1) -> + {removed, {erlang, binary_to_term, 1}, "R12B"}; +obsolete_1(erlang, info, 1) -> + {removed, {erlang, system_info, 1}, "R12B"}; +obsolete_1(erlang, hash, 2) -> + {deprecated, {erlang, phash2, 2}}; + +obsolete_1(file, file_info, 1) -> + {removed, {file, read_file_info, 1}, "R12B"}; + +obsolete_1(dict, dict_to_list, 1) -> + {removed, {dict,to_list,1}, "R12B"}; +obsolete_1(dict, list_to_dict, 1) -> + {removed, {dict,from_list,1}, "R12B"}; +obsolete_1(orddict, dict_to_list, 1) -> + {removed, {orddict,to_list,1}, "R12B"}; +obsolete_1(orddict, list_to_dict, 1) -> + {removed, {orddict,from_list,1}, "R12B"}; + +obsolete_1(sets, new_set, 0) -> + {removed, {sets, new, 0}, "R12B"}; +obsolete_1(sets, set_to_list, 1) -> + {removed, {sets, to_list, 1}, "R12B"}; +obsolete_1(sets, list_to_set, 1) -> + {removed, {sets, from_list, 1}, "R12B"}; +obsolete_1(sets, subset, 2) -> + {removed, {sets, is_subset, 2}, "R12B"}; +obsolete_1(ordsets, new_set, 0) -> + {removed, {ordsets, new, 0}, "R12B"}; +obsolete_1(ordsets, set_to_list, 1) -> + {removed, {ordsets, to_list, 1}, "R12B"}; +obsolete_1(ordsets, list_to_set, 1) -> + {removed, {ordsets, from_list, 1}, "R12B"}; +obsolete_1(ordsets, subset, 2) -> + {removed, {ordsets, is_subset, 2}, "R12B"}; + +obsolete_1(calendar, local_time_to_universal_time, 1) -> + {deprecated, {calendar, local_time_to_universal_time_dst, 1}}; + +obsolete_1(rpc, safe_multi_server_call, A) when A =:= 2; A =:= 3 -> + {deprecated, {rpc, multi_server_call, A}}; + +obsolete_1(snmp, N, A) -> + case is_snmp_agent_function(N, A) of + false -> + no; + true -> + {deprecated,"Deprecated; use snmpa:"++atom_to_list(N)++"/"++ + integer_to_list(A)++" instead"} + end; + +obsolete_1(megaco, format_versions, 1) -> + {deprecated, "Deprecated; use megaco:print_version_info/0,1 instead"}; + +obsolete_1(os_mon_mib, init, 1) -> + {deprecated, {os_mon_mib, load, 1}}; +obsolete_1(os_mon_mib, stop, 1) -> + {deprecated, {os_mon_mib, unload, 1}}; + +obsolete_1(auth, is_auth, 1) -> + {deprecated, {net_adm, ping, 1}}; +obsolete_1(auth, cookie, 0) -> + {deprecated, {erlang, get_cookie, 0}}; +obsolete_1(auth, cookie, 1) -> + {deprecated, {erlang, set_cookie, 2}}; +obsolete_1(auth, node_cookie, 1) -> + {deprecated, "Deprecated; use erlang:set_cookie/2 and net_adm:ping/1 instead"}; +obsolete_1(auth, node_cookie, 2) -> + {deprecated, "Deprecated; use erlang:set_cookie/2 and net_adm:ping/1 instead"}; + +%% Added in R11B-5. +obsolete_1(http_base_64, _, _) -> + {removed, "The http_base_64 module was removed in R12B; use the base64 module instead"}; +obsolete_1(httpd_util, encode_base64, 1) -> + {removed, "Removed in R12B; use one of the encode functions in the base64 module instead"}; +obsolete_1(httpd_util, decode_base64, 1) -> + {removed, "Removed in R12B; use one of the decode functions in the base64 module instead"}; +obsolete_1(httpd_util, to_upper, 1) -> + {removed, {string, to_upper, 1}, "R12B"}; +obsolete_1(httpd_util, to_lower, 1) -> + {removed, {string, to_lower, 1}, "R12B"}; +obsolete_1(erlang, is_constant, 1) -> + {removed, "Removed in R13B"}; + +%% Added in R12B-0. +obsolete_1(ssl, port, 1) -> + {removed, {ssl, sockname, 1}, "R13B"}; +obsolete_1(ssl, accept, A) when A =:= 1; A =:= 2 -> + {removed, "deprecated; use ssl:transport_accept/1,2 and ssl:ssl_accept/1,2"}; +obsolete_1(erlang, fault, 1) -> + {removed, {erlang,error,1}, "R13B"}; +obsolete_1(erlang, fault, 2) -> + {removed, {erlang,error,2}, "R13B"}; + +%% Added in R12B-2. +obsolete_1(file, rawopen, 2) -> + {removed, "deprecated (will be removed in R13B); use file:open/2 with the raw option"}; + +obsolete_1(httpd, start, 0) -> {deprecated,{inets,start,[2,3]},"R14B"}; +obsolete_1(httpd, start, 1) -> {deprecated,{inets,start,[2,3]},"R14B"}; +obsolete_1(httpd, start_link, 1) -> {deprecated,{inets,start,[2,3]},"R14B"}; +obsolete_1(httpd, start_child, 0) -> {deprecated,{inets,start,[2,3]},"R14B"}; +obsolete_1(httpd, start_child, 1) -> {deprecated,{inets,start,[2,3]},"R14B"}; +obsolete_1(httpd, stop, 0) -> {deprecated,{inets,stop,2},"R14B"}; +obsolete_1(httpd, stop, 1) -> {deprecated,{inets,stop,2},"R14B"}; +obsolete_1(httpd, stop, 2) -> {deprecated,{inets,stop,2},"R14B"}; +obsolete_1(httpd, stop_child, 0) -> {deprecated,{inets,stop,2},"R14B"}; +obsolete_1(httpd, stop_child, 1) -> {deprecated,{inets,stop,2},"R14B"}; +obsolete_1(httpd, stop_child, 2) -> {deprecated,{inets,stop,2},"R14B"}; +obsolete_1(httpd, restart, 0) -> {deprecated,{httpd,reload_config,2},"R14B"}; +obsolete_1(httpd, restart, 1) -> {deprecated,{httpd,reload_config,2},"R14B"}; +obsolete_1(httpd, restart, 2) -> {deprecated,{httpd,reload_config,2},"R14B"}; +obsolete_1(httpd, block, 0) -> {deprecated,{httpd,reload_config,2},"R14B"}; +obsolete_1(httpd, block, 1) -> {deprecated,{httpd,reload_config,2},"R14B"}; +obsolete_1(httpd, block, 2) -> {deprecated,{httpd,reload_config,2},"R14B"}; +obsolete_1(httpd, block, 3) -> {deprecated,{httpd,reload_config,2},"R14B"}; +obsolete_1(httpd, block, 4) -> {deprecated,{httpd,reload_config,2},"R14B"}; +obsolete_1(httpd, unblock, 0) -> {deprecated,{httpd,reload_config,2},"R14B"}; +obsolete_1(httpd, unblock, 1) -> {deprecated,{httpd,reload_config,2},"R14B"}; +obsolete_1(httpd, unblock, 2) -> {deprecated,{httpd,reload_config,2},"R14B"}; +obsolete_1(httpd_util, key1search, 2) -> {removed,{proplists,get_value,2},"R13B"}; +obsolete_1(httpd_util, key1search, 3) -> {removed,{proplists,get_value,3},"R13B"}; +obsolete_1(ftp, open, 3) -> {deprecated,{inets,start,[2,3]},"R14B"}; +obsolete_1(ftp, force_active, 1) -> {deprecated,{inets,start,[2,3]},"R14B"}; + +%% Added in R12B-4. +obsolete_1(ssh_cm, connect, A) when 1 =< A, A =< 3 -> + {deprecated,{ssh,connect,A},"R14B"}; +obsolete_1(ssh_cm, listen, A) when 2 =< A, A =< 4 -> + {deprecated,{ssh,daemon,A},"R14B"}; +obsolete_1(ssh_cm, stop_listener, 1) -> + {deprecated,{ssh,stop_listener,[1,2]},"R14B"}; +obsolete_1(ssh_cm, session_open, A) when A =:= 2; A =:= 4 -> + {deprecated,{ssh_connection,session_channel,A},"R14B"}; +obsolete_1(ssh_cm, direct_tcpip, A) when A =:= 6; A =:= 8 -> + {deprecated,{ssh_connection,direct_tcpip,A}}; +obsolete_1(ssh_cm, tcpip_forward, 3) -> + {deprecated,{ssh_connection,tcpip_forward,3},"R14B"}; +obsolete_1(ssh_cm, cancel_tcpip_forward, 3) -> + {deprecated,{ssh_connection,cancel_tcpip_forward,3},"R14B"}; +obsolete_1(ssh_cm, open_pty, A) when A =:= 3; A =:= 7; A =:= 9 -> + {deprecated,{ssh_connection,open_pty,A},"R14"}; +obsolete_1(ssh_cm, setenv, 5) -> + {deprecated,{ssh_connection,setenv,5},"R14B"}; +obsolete_1(ssh_cm, shell, 2) -> + {deprecated,{ssh_connection,shell,2},"R14B"}; +obsolete_1(ssh_cm, exec, 4) -> + {deprecated,{ssh_connection,exec,4},"R14B"}; +obsolete_1(ssh_cm, subsystem, 4) -> + {deprecated,{ssh_connection,subsystem,4},"R14B"}; +obsolete_1(ssh_cm, winch, A) when A =:= 4; A =:= 6 -> + {deprecated,{ssh_connection,window_change,A},"R14B"}; +obsolete_1(ssh_cm, signal, 3) -> + {deprecated,{ssh_connection,signal,3},"R14B"}; +obsolete_1(ssh_cm, attach, A) when A =:= 2; A =:= 3 -> + {deprecated,{ssh,attach,A}}; +obsolete_1(ssh_cm, detach, 2) -> + {deprecated,"no longer useful; will be removed in R14B"}; +obsolete_1(ssh_cm, set_user_ack, 4) -> + {deprecated,"no longer useful; will be removed in R14B"}; +obsolete_1(ssh_cm, adjust_window, 3) -> + {deprecated,{ssh_connection,adjust_window,3},"R14B"}; +obsolete_1(ssh_cm, close, 2) -> + {deprecated,{ssh_connection,close,2},"R14B"}; +obsolete_1(ssh_cm, stop, 1) -> + {deprecated,{ssh,close,1},"R14B"}; +obsolete_1(ssh_cm, send_eof, 2) -> + {deprecated,{ssh_connection,send_eof,2},"R14B"}; +obsolete_1(ssh_cm, send, A) when A =:= 3; A =:= 4 -> + {deprecated,{ssh_connection,send,A},"R14B"}; +obsolete_1(ssh_cm, send_ack, A) when 3 =< A, A =< 5 -> + {deprecated,{ssh_connection,send,[3,4]},"R14B"}; +obsolete_1(ssh_ssh, connect, A) when 1 =< A, A =< 3 -> + {deprecated,{ssh,shell,A},"R14B"}; +obsolete_1(ssh_sshd, listen, A) when 0 =< A, A =< 3 -> + {deprecated,{ssh,daemon,[1,2,3]},"R14"}; +obsolete_1(ssh_sshd, stop, 1) -> + {deprecated,{ssh,stop_listener,1}}; + +%% Added in R13A. +obsolete_1(regexp, _, _) -> + {deprecated, "the regexp module is deprecated (will be removed in R15A); use the re module instead"}; + +obsolete_1(lists, flat_length, 1) -> + {deprecated,{lists,flatlength,1},"R14"}; + +obsolete_1(ssh_sftp, connect, A) when 1 =< A, A =< 3 -> + {deprecated,{ssh_sftp,start_channel,A},"R14B"}; +obsolete_1(ssh_sftp, stop, 1) -> + {deprecated,{ssh_sftp,stop_channel,1},"R14B"}; + +%% Added in R13B01. +obsolete_1(ssl_pkix, decode_cert_file, A) when A =:= 1; A =:= 2 -> + {deprecated,"deprecated (will be removed in R14B); use public_key:pem_to_der/1 and public_key:pkix_decode_cert/2 instead"}; +obsolete_1(ssl_pkix, decode_cert, A) when A =:= 1; A =:= 2 -> + {deprecated,{public_key,pkix_decode_cert,2},"R14B"}; + +obsolete_1(_, _, _) -> + no. + + +-spec is_snmp_agent_function(atom(), byte()) -> boolean(). + +is_snmp_agent_function(c, 1) -> true; +is_snmp_agent_function(c, 2) -> true; +is_snmp_agent_function(compile, 3) -> true; +is_snmp_agent_function(is_consistent, 1) -> true; +is_snmp_agent_function(mib_to_hrl, 1) -> true; +is_snmp_agent_function(change_log_size, 1) -> true; +is_snmp_agent_function(log_to_txt, 2) -> true; +is_snmp_agent_function(log_to_txt, 3) -> true; +is_snmp_agent_function(log_to_txt, 4) -> true; +is_snmp_agent_function(current_request_id, 0) -> true; +is_snmp_agent_function(current_community, 0) -> true; +is_snmp_agent_function(current_address, 0) -> true; +is_snmp_agent_function(current_context, 0) -> true; +is_snmp_agent_function(current_net_if_data, 0) -> true; +is_snmp_agent_function(get_symbolic_store_db, 0) -> true; +is_snmp_agent_function(name_to_oid, 1) -> true; +is_snmp_agent_function(name_to_oid, 2) -> true; +is_snmp_agent_function(oid_to_name, 1) -> true; +is_snmp_agent_function(oid_to_name, 2) -> true; +is_snmp_agent_function(int_to_enum, 2) -> true; +is_snmp_agent_function(int_to_enum, 3) -> true; +is_snmp_agent_function(enum_to_int, 2) -> true; +is_snmp_agent_function(enum_to_int, 3) -> true; +is_snmp_agent_function(get, 2) -> true; +is_snmp_agent_function(info, 1) -> true; +is_snmp_agent_function(load_mibs, 2) -> true; +is_snmp_agent_function(unload_mibs, 2) -> true; +is_snmp_agent_function(dump_mibs, 0) -> true; +is_snmp_agent_function(dump_mibs, 1) -> true; +is_snmp_agent_function(register_subagent, 3) -> true; +is_snmp_agent_function(unregister_subagent, 2) -> true; +is_snmp_agent_function(send_notification, 3) -> true; +is_snmp_agent_function(send_notification, 4) -> true; +is_snmp_agent_function(send_notification, 5) -> true; +is_snmp_agent_function(send_notification, 6) -> true; +is_snmp_agent_function(send_trap, 3) -> true; +is_snmp_agent_function(send_trap, 4) -> true; +is_snmp_agent_function(add_agent_caps, 2) -> true; +is_snmp_agent_function(del_agent_caps, 1) -> true; +is_snmp_agent_function(get_agent_caps, 0) -> true; +is_snmp_agent_function(_, _) -> false. diff --git a/lib/stdlib/src/pg.erl b/lib/stdlib/src/pg.erl new file mode 100644 index 0000000000..503654e706 --- /dev/null +++ b/lib/stdlib/src/pg.erl @@ -0,0 +1,172 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(pg). + +%% pg provides a process group facility. Messages +%% can be multicasted to all members in the group + +-export([create/1, + create/2, + standby/2, + join/2, + send/2, + esend/2, + members/1, + name_to_pid/1, + master/1]). + + +%% Create a brand new empty process group with the master residing +%% at the local node + +-spec create(term()) -> 'ok' | {'error', term()}. + +create(PgName) -> + catch begin check(PgName), + Pid = spawn(pg,master,[PgName]), + global:register_name(PgName,Pid), + ok end. + +%% Create a brand new empty process group with the master +%% residing at Node + +-spec create(term(), node()) -> 'ok' | {'error', term()}. + +create(PgName, Node) -> + catch begin check(PgName), + Pid = spawn(Node,pg,master,[PgName]), + global:register_name(PgName,Pid), + ok end. + +%% Have a process on Node that will act as a standby for the process +%% group manager. So if the node where the manager runs fails, the +%% process group will continue to function. + +-spec standby(term(), node()) -> 'ok'. + +standby(_PgName, _Node) -> + ok. + +%% Tell process group PgName that Pid is a new member of the group +%% synchronously return a list of all old members in the group + +-spec join(atom(), pid()) -> [pid()]. + +join(PgName, Pid) when is_atom(PgName) -> + global:send(PgName, {join,self(),Pid}), + receive + {_P,{members,Members}} -> + Members + end. + +%% Multi cast Mess to all members in the group + +-spec send(atom() | pid(), term()) -> 'ok'. + +send(PgName, Mess) when is_atom(PgName) -> + global:send(PgName, {send, self(), Mess}), + ok; +send(Pg, Mess) when is_pid(Pg) -> + Pg ! {send,self(),Mess}, + ok. + +%% multi cast a message to all members in the group but ourselves +%% If we are a member + +-spec esend(atom() | pid(), term()) -> 'ok'. + +esend(PgName, Mess) when is_atom(PgName) -> + global:send(PgName, {esend,self(),Mess}), + ok; +esend(Pg, Mess) when is_pid(Pg) -> + Pg ! {esend,self(),Mess}, + ok. + +%% Return the members of the group + +-spec members(atom() | pid()) -> [pid()]. + +members(PgName) when is_atom(PgName) -> + global:send(PgName, {self() ,members}), + receive + {_P,{members,Members}} -> + Members + end; +members(Pg) when is_pid(Pg) -> + Pg ! {self,members}, + receive + {_P,{members,Members}} -> + Members + end. + +-spec name_to_pid(atom()) -> pid() | 'undefined'. + +name_to_pid(PgName) when is_atom(PgName) -> + global:whereis_name(PgName). + +-spec master(term()) -> no_return(). + +master(PgName) -> + process_flag(trap_exit, true), + master_loop(PgName, []). + +master_loop(PgName,Members) -> + receive + {send,From,Message} -> + send_all(Members,{pg_message,From,PgName,Message}), + master_loop(PgName,Members); + {esend,From,Message} -> + send_all(lists:delete(From,Members), + {pg_message,From,PgName,Message}), + master_loop(PgName,Members); + {join,From,Pid} -> + link(Pid), + send_all(Members,{new_member,PgName,Pid}), + From ! {self(),{members,Members}}, + master_loop(PgName,[Pid|Members]); + {From,members} -> + From ! {self(),{members,Members}}, + master_loop(PgName,Members); + {'EXIT',From,_} -> + L = + case lists:member(From,Members) of + true -> + NewMembers = lists:delete(From,Members), + send_all(NewMembers, {crashed_member,PgName,From}), + NewMembers; + false -> + Members + end, + master_loop(PgName,L) + end. + +send_all([], _) -> ok; +send_all([P|Ps], M) -> + P ! M, + send_all(Ps, M). + +%% Check if the process group already exists + +check(PgName) -> + case global:whereis_name(PgName) of + Pid when is_pid(Pid) -> + throw({error,already_created}); + undefined -> + ok + end. diff --git a/lib/stdlib/src/pool.erl b/lib/stdlib/src/pool.erl new file mode 100644 index 0000000000..7f5f23e26d --- /dev/null +++ b/lib/stdlib/src/pool.erl @@ -0,0 +1,212 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(pool). + +%% Supplies a computational pool of processors. +%% The chief user interface function here is get_node() +%% Which returns the name of the nodes in the pool +%% with the least load !!!! +%% This function is callable from any node including the master +%% That is part of the pool +%% nodes are scheduled on a per usgae basis and per load basis, +%% Whenever we use a node, we put at the end of the queue, and whenever +%% a node report a change in load, we insert it accordingly + +% User interface Exports ... +-export([start/1, + start/2, + stop/0, + get_nodes/0, + get_nodes_and_load/0, + get_node/0, + pspawn/3, + attach/1, + pspawn_link/3]). + +%% Internal Exports +-export([statistic_collector/0, + do_spawn/4, + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2]). + +%% User interface + +%% Start up using the .hosts.erlang file + +-spec start(atom()) -> [node()]. +start(Name) -> + start(Name,[]). + +-spec start(atom(), string()) -> [node()]. +start(Name, Args) when is_atom(Name) -> + gen_server:start({global, pool_master}, pool, [], []), + Hosts = net_adm:host_file(), + Nodes = start_nodes(Hosts, Name, Args), + lists:foreach(fun attach/1, Nodes), + Nodes. + +%% +%% Interface functions ... +%% +-spec get_nodes() -> [node()]. +get_nodes() -> + get_elements(2, get_nodes_and_load()). + +-spec attach(node()) -> 'already_attached' | 'attached'. +attach(Node) -> + gen_server:call({global, pool_master}, {attach, Node}). + +get_nodes_and_load() -> + gen_server:call({global, pool_master}, get_nodes). + +-spec get_node() -> node(). +get_node() -> + gen_server:call({global, pool_master}, get_node). + +-spec pspawn(module(), atom(), [term()]) -> pid(). +pspawn(M, F, A) -> + gen_server:call({global, pool_master}, {spawn, group_leader(), M, F, A}). + +-spec pspawn_link(module(), atom(), [term()]) -> pid(). +pspawn_link(M, F, A) -> + P = pspawn(M, F, A), + link(P), + P. + +start_nodes([], _, _) -> []; +start_nodes([Host|Tail], Name, Args) -> + case slave:start(Host, Name, Args) of + {error, R} -> + io:format("Can't start node on host ~w due to ~w~n",[Host, R]), + start_nodes(Tail, Name, Args); + {ok, Node} -> + [Node | start_nodes(Tail, Name, Args)] + end. + +-spec stop() -> 'stopped'. +stop() -> + gen_server:call({global, pool_master}, stop). + +get_elements(_Pos,[]) -> []; +get_elements(Pos,[E|T]) -> [element(Pos,E) | get_elements(Pos,T)]. + +stop_em([]) -> stopped; +stop_em([N|Tail]) -> + rpc:cast(N, erlang, halt, []), + stop_em(Tail). + +init([]) -> + process_flag(trap_exit, true), + spawn_link(pool, statistic_collector, []), + {ok,[{0,node()}]}. + +handle_call(get_nodes, _From, Nodes)-> + {reply, Nodes, Nodes}; +handle_call(get_node, _From, [{Load,N}|Tail]) -> + {reply, N, Tail++[{Load+1, N}]}; +handle_call({attach, Node}, _From, Nodes) -> + case lists:keymember(Node, 2, Nodes) of + true -> + {reply, already_attached, Nodes}; + false -> + erlang:monitor_node(Node, true), + spawn_link(Node, pool, statistic_collector, []), + {reply, attached, Nodes++[{999999,Node}]} + end; +handle_call({spawn, Gl, M, F, A}, _From, Nodes) -> + [{Load,N}|Tail] = Nodes, + Pid = spawn(N, pool, do_spawn, [Gl, M, F, A]), + {reply, Pid, Tail++[{Load+1, N}]}; +handle_call(stop, _From, Nodes) -> + %% clean up in terminate/2 + {stop, normal, stopped, Nodes}. + +handle_cast(_, Nodes) -> + {noreply, Nodes}. + +handle_info({Node,load,Load}, Nodes) -> + Nodes2 = insert_node({Load,Node}, Nodes), + {noreply, Nodes2}; +handle_info({nodedown, Node}, Nodes) -> + {noreply, lists:keydelete(Node, 2, Nodes)}; +handle_info(_, Nodes) -> %% The EXIT signals etc.etc + {noreply, Nodes}. + +terminate(_Reason, Nodes) -> + N = lists:delete(node(), get_elements(2, Nodes)), + stop_em(N), + ok. + +-spec do_spawn(pid(), module(), atom(), [term()]) -> term(). +do_spawn(Gl, M, F, A) -> + group_leader(Gl, self()), + apply(M, F, A). + +insert_node({Load,Node},[{L,Node}|Tail]) when Load > L -> + %% We have a raised load here + pure_insert({Load,Node},Tail); +insert_node({Load,Node},[{L,N}|Tail]) when Load =< L -> + %% Move forward in the list + T = lists:keydelete(Node,2,[{L,N}|Tail]), + [{Load,Node} | T]; +insert_node(Ln,[H|T]) -> + [H | insert_node(Ln,T)]; +insert_node(X,[]) -> % Can't happen + error_logger:error_msg("Pool_master: Bad node list X=~w\n", [X]), + exit(crash). + +pure_insert({Load,Node},[]) -> + [{Load,Node}]; +pure_insert({Load,Node},[{L,N}|Tail]) when Load < L -> + [{Load,Node}, {L,N} | Tail]; +pure_insert(L,[H|T]) -> [H|pure_insert(L,T)]. + +%% Really should not measure the contributions from +%% the back ground processes here .... which we do :-( +%% We don't have to monitor the master, since we're slaves anyway + +statistic_collector() -> + statistic_collector(5). + +statistic_collector(0) -> exit(normal); +statistic_collector(I) -> + sleep(300), + case global:whereis_name(pool_master) of + undefined -> + statistic_collector(I-1); + M -> + stat_loop(M, 999999) + end. + +%% Do not tell the master about our load if it has not changed + +stat_loop(M, Old) -> + sleep(2000), + case statistics(run_queue) of + Old -> + stat_loop(M, Old); + NewLoad -> + M ! {node(), load, NewLoad}, %% async + stat_loop(M, NewLoad) + end. + +sleep(I) -> receive after I -> ok end. diff --git a/lib/stdlib/src/proc_lib.erl b/lib/stdlib/src/proc_lib.erl new file mode 100644 index 0000000000..9aa5e0a71e --- /dev/null +++ b/lib/stdlib/src/proc_lib.erl @@ -0,0 +1,624 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(proc_lib). + +%% This module is used to set some initial information +%% in each created process. +%% Then a process terminates the Reason is checked and +%% a crash report is generated if the Reason was not expected. + +-export([spawn/1, spawn_link/1, spawn/2, spawn_link/2, + spawn/3, spawn_link/3, spawn/4, spawn_link/4, + spawn_opt/2, spawn_opt/3, spawn_opt/4, spawn_opt/5, + start/3, start/4, start/5, start_link/3, start_link/4, start_link/5, + hibernate/3, + init_ack/1, init_ack/2, + init_p/3,init_p/5,format/1,initial_call/1,translate_initial_call/1]). + +%% Internal exports. +-export([wake_up/3]). + +%%----------------------------------------------------------------------------- + +-type priority_level() :: 'high' | 'low' | 'max' | 'normal'. +-type spawn_option() :: 'link' + | {'priority', priority_level()} + | {'min_heap_size', non_neg_integer()} + | {'fullsweep_after', non_neg_integer()}. + +-type dict_or_pid() :: pid() | [_] | {integer(), integer(), integer()}. + +%%----------------------------------------------------------------------------- + +-spec spawn(function()) -> pid(). + +spawn(F) when is_function(F) -> + Parent = get_my_name(), + Ancestors = get_ancestors(), + erlang:spawn(?MODULE, init_p, [Parent,Ancestors,F]). + +-spec spawn(atom(), atom(), [term()]) -> pid(). + +spawn(M,F,A) when is_atom(M), is_atom(F), is_list(A) -> + Parent = get_my_name(), + Ancestors = get_ancestors(), + erlang:spawn(?MODULE, init_p, [Parent,Ancestors,M,F,A]). + +-spec spawn_link(function()) -> pid(). + +spawn_link(F) when is_function(F) -> + Parent = get_my_name(), + Ancestors = get_ancestors(), + erlang:spawn_link(?MODULE, init_p, [Parent,Ancestors,F]). + +-spec spawn_link(atom(), atom(), [term()]) -> pid(). + +spawn_link(M,F,A) when is_atom(M), is_atom(F), is_list(A) -> + Parent = get_my_name(), + Ancestors = get_ancestors(), + erlang:spawn_link(?MODULE, init_p, [Parent,Ancestors,M,F,A]). + +-spec spawn(node(), function()) -> pid(). + +spawn(Node, F) when is_function(F) -> + Parent = get_my_name(), + Ancestors = get_ancestors(), + erlang:spawn(Node, ?MODULE, init_p, [Parent,Ancestors,F]). + +-spec spawn(node(), atom(), atom(), [term()]) -> pid(). +spawn(Node, M, F, A) when is_atom(M), is_atom(F), is_list(A) -> + Parent = get_my_name(), + Ancestors = get_ancestors(), + erlang:spawn(Node, ?MODULE, init_p, [Parent,Ancestors,M,F,A]). + +-spec spawn_link(node(), function()) -> pid(). + +spawn_link(Node, F) when is_function(F) -> + Parent = get_my_name(), + Ancestors = get_ancestors(), + erlang:spawn_link(Node, ?MODULE, init_p, [Parent,Ancestors,F]). + +-spec spawn_link(node(), atom(), atom(), [term()]) -> pid(). + +spawn_link(Node, M, F, A) when is_atom(M), is_atom(F), is_list(A) -> + Parent = get_my_name(), + Ancestors = get_ancestors(), + erlang:spawn_link(Node, ?MODULE, init_p, [Parent,Ancestors,M,F,A]). + +-spec spawn_opt(function(), [spawn_option()]) -> pid(). +spawn_opt(F, Opts) when is_function(F) -> + Parent = get_my_name(), + Ancestors = get_ancestors(), + check_for_monitor(Opts), + erlang:spawn_opt(?MODULE, init_p, [Parent,Ancestors,F],Opts). + +-spec spawn_opt(node(), function(), [spawn_option()]) -> pid(). + +spawn_opt(Node, F, Opts) when is_function(F) -> + Parent = get_my_name(), + Ancestors = get_ancestors(), + check_for_monitor(Opts), + erlang:spawn_opt(Node, ?MODULE, init_p, [Parent,Ancestors,F], Opts). + +-spec spawn_opt(atom(), atom(), [term()], [spawn_option()]) -> pid(). + +spawn_opt(M, F, A, Opts) when is_atom(M), is_atom(F), is_list(A) -> + Parent = get_my_name(), + Ancestors = get_ancestors(), + check_for_monitor(Opts), + erlang:spawn_opt(?MODULE, init_p, [Parent,Ancestors,M,F,A], Opts). + +-spec spawn_opt(node(), atom(), atom(), [term()], [spawn_option()]) -> pid(). + +spawn_opt(Node, M, F, A, Opts) when is_atom(M), is_atom(F), is_list(A) -> + Parent = get_my_name(), + Ancestors = get_ancestors(), + check_for_monitor(Opts), + erlang:spawn_opt(Node, ?MODULE, init_p, [Parent,Ancestors,M,F,A], Opts). + +%% OTP-6345 +%% monitor spawn_opt option is currently not possible to use +check_for_monitor(SpawnOpts) -> + case lists:member(monitor, SpawnOpts) of + true -> + erlang:error(badarg); + false -> + false + end. + +-spec hibernate(module(), atom(), [term()]) -> no_return(). + +hibernate(M, F, A) when is_atom(M), is_atom(F), is_list(A) -> + erlang:hibernate(?MODULE, wake_up, [M, F, A]). + +ensure_link(SpawnOpts) -> + case lists:member(link, SpawnOpts) of + true -> + SpawnOpts; + false -> + [link|SpawnOpts] + end. + +-spec init_p(pid(), [pid()], function()) -> term(). + +init_p(Parent, Ancestors, Fun) when is_function(Fun) -> + put('$ancestors', [Parent|Ancestors]), + {module,Mod} = erlang:fun_info(Fun, module), + {name,Name} = erlang:fun_info(Fun, name), + {arity,Arity} = erlang:fun_info(Fun, arity), + put('$initial_call', {Mod,Name,Arity}), + try + Fun() + catch + Class:Reason -> + exit_p(Class, Reason) + end. + +-spec init_p(pid(), [pid()], atom(), atom(), [term()]) -> term(). + +init_p(Parent, Ancestors, M, F, A) when is_atom(M), is_atom(F), is_list(A) -> + put('$ancestors', [Parent|Ancestors]), + put('$initial_call', trans_init(M, F, A)), + init_p_do_apply(M, F, A). + +init_p_do_apply(M, F, A) -> + try + apply(M, F, A) + catch + Class:Reason -> + exit_p(Class, Reason) + end. + +-spec wake_up(atom(), atom(), [term()]) -> term(). + +wake_up(M, F, A) when is_atom(M), is_atom(F), is_list(A) -> + try + apply(M, F, A) + catch + Class:Reason -> + exit_p(Class, Reason) + end. + +exit_p(Class, Reason) -> + case get('$initial_call') of + {M,F,A} when is_atom(M), is_atom(F), is_integer(A) -> + MFA = {M,F,make_dummy_args(A, [])}, + crash_report(Class, Reason, MFA), + exit(Reason); + _ -> + %% The process dictionary has been cleared or + %% possibly modified. + crash_report(Class, Reason, []), + exit(Reason) + end. + +-spec start(atom(), atom(), [term()]) -> term(). + +start(M, F, A) when is_atom(M), is_atom(F), is_list(A) -> + start(M, F, A, infinity). + +-spec start(atom(), atom(), [term()], timeout()) -> term(). + +start(M, F, A, Timeout) when is_atom(M), is_atom(F), is_list(A) -> + Pid = ?MODULE:spawn(M, F, A), + sync_wait(Pid, Timeout). + +-spec start(atom(), atom(), [term()], timeout(), [spawn_option()]) -> term(). + +start(M, F, A, Timeout, SpawnOpts) when is_atom(M), is_atom(F), is_list(A) -> + Pid = ?MODULE:spawn_opt(M, F, A, SpawnOpts), + sync_wait(Pid, Timeout). + +-spec start_link(atom(), atom(), [term()]) -> term(). + +start_link(M, F, A) when is_atom(M), is_atom(F), is_list(A) -> + start_link(M, F, A, infinity). + +-spec start_link(atom(), atom(), [term()], timeout()) -> term(). + +start_link(M, F, A, Timeout) when is_atom(M), is_atom(F), is_list(A) -> + Pid = ?MODULE:spawn_link(M, F, A), + sync_wait(Pid, Timeout). + +-spec start_link(atom(),atom(),[term()],timeout(),[spawn_option()]) -> term(). + +start_link(M,F,A,Timeout,SpawnOpts) when is_atom(M), is_atom(F), is_list(A) -> + Pid = ?MODULE:spawn_opt(M, F, A, ensure_link(SpawnOpts)), + sync_wait(Pid, Timeout). + +sync_wait(Pid, Timeout) -> + receive + {ack, Pid, Return} -> + Return; + {'EXIT', Pid, Reason} -> + {error, Reason} + after Timeout -> + unlink(Pid), + exit(Pid, kill), + flush(Pid), + {error, timeout} + end. + +-spec flush(pid()) -> 'true'. + +flush(Pid) -> + receive + {'EXIT', Pid, _} -> + true + after 0 -> + true + end. + +-spec init_ack(pid(), term()) -> 'ok'. + +init_ack(Parent, Return) -> + Parent ! {ack, self(), Return}, + ok. + +-spec init_ack(term()) -> 'ok'. +init_ack(Return) -> + [Parent|_] = get('$ancestors'), + init_ack(Parent, Return). + +%% ----------------------------------------------------- +%% Fetch the initial call of a proc_lib spawned process. +%% ----------------------------------------------------- + +-spec initial_call(dict_or_pid()) -> {atom(), atom(), [atom()]} | 'false'. + +initial_call(DictOrPid) -> + case raw_initial_call(DictOrPid) of + {M,F,A} -> + {M,F,make_dummy_args(A, [])}; + false -> + false + end. + +make_dummy_args(0, Acc) -> + Acc; +make_dummy_args(N, Acc) -> + Arg = list_to_atom("Argument__" ++ integer_to_list(N)), + make_dummy_args(N-1, [Arg|Acc]). + +%% ----------------------------------------------------- +%% Translate the '$initial_call' to some useful information. +%% However, the arguments are not returned here; only the +%% arity of the initial function. +%% This function is typically called from c:i() and c:regs(). +%% ----------------------------------------------------- + +-spec translate_initial_call(dict_or_pid()) -> mfa(). + +translate_initial_call(DictOrPid) -> + case raw_initial_call(DictOrPid) of + {_,_,_}=MFA -> + MFA; + false -> + {?MODULE,init_p,5} + end. + +%% ----------------------------------------------------- +%% Fetch the initial call information exactly as stored +%% in the process dictionary. +%% ----------------------------------------------------- + +raw_initial_call({X,Y,Z}) when is_integer(X), is_integer(Y), is_integer(Z) -> + raw_initial_call(c:pid(X,Y,Z)); +raw_initial_call(Pid) when is_pid(Pid) -> + case get_process_info(Pid, dictionary) of + {dictionary,Dict} -> + raw_init_call(Dict); + _ -> + false + end; +raw_initial_call(ProcInfo) when is_list(ProcInfo) -> + case lists:keyfind(dictionary, 1, ProcInfo) of + {dictionary,Dict} -> + raw_init_call(Dict); + _ -> + false + end. + +raw_init_call(Dict) -> + case lists:keyfind('$initial_call', 1, Dict) of + {_,{_,_,_}=MFA} -> + MFA; + _ -> + false + end. + +%% ----------------------------------------------------- +%% Translate the initial call to some useful information. +%% ----------------------------------------------------- + +trans_init(gen,init_it,[gen_server,_,_,supervisor,{_,Module,_},_]) -> + {supervisor,Module,1}; +trans_init(gen,init_it,[gen_server,_,_,_,supervisor,{_,Module,_},_]) -> + {supervisor,Module,1}; +trans_init(gen,init_it,[gen_server,_,_,supervisor_bridge,[Module|_],_]) -> + {supervisor_bridge,Module,1}; +trans_init(gen,init_it,[gen_server,_,_,_,supervisor_bridge,[Module|_],_]) -> + {supervisor_bridge,Module,1}; +trans_init(gen,init_it,[gen_server,_,_,Module,_,_]) -> + {Module,init,1}; +trans_init(gen,init_it,[gen_server,_,_,_,Module|_]) -> + {Module,init,1}; +trans_init(gen,init_it,[gen_fsm,_,_,Module,_,_]) -> + {Module,init,1}; +trans_init(gen,init_it,[gen_fsm,_,_,_,Module|_]) -> + {Module,init,1}; +trans_init(gen,init_it,[gen_event|_]) -> + {gen_event,init_it,6}; +trans_init(M, F, A) when is_atom(M), is_atom(F) -> + {M,F,length(A)}. + +%% ----------------------------------------------------- +%% Generate a crash report. +%% ----------------------------------------------------- + +crash_report(exit, normal, _) -> ok; +crash_report(exit, shutdown, _) -> ok; +crash_report(exit, {shutdown,_}, _) -> ok; +crash_report(Class, Reason, StartF) -> + OwnReport = my_info(Class, Reason, StartF), + LinkReport = linked_info(self()), + Rep = [OwnReport,LinkReport], + error_logger:error_report(crash_report, Rep). + +my_info(Class, Reason, []) -> + my_info_1(Class, Reason); +my_info(Class, Reason, StartF) -> + [{initial_call, StartF}|my_info_1(Class, Reason)]. + +my_info_1(Class, Reason) -> + [{pid, self()}, + get_process_info(self(), registered_name), + {error_info, {Class,Reason,erlang:get_stacktrace()}}, + get_ancestors(self()), + get_process_info(self(), messages), + get_process_info(self(), links), + get_cleaned_dictionary(self()), + get_process_info(self(), trap_exit), + get_process_info(self(), status), + get_process_info(self(), heap_size), + get_process_info(self(), stack_size), + get_process_info(self(), reductions) + ]. + +-spec get_ancestors(pid()) -> {'ancestors', [pid()]}. + +get_ancestors(Pid) -> + case get_dictionary(Pid,'$ancestors') of + {'$ancestors',Ancestors} -> + {ancestors,Ancestors}; + _ -> + {ancestors,[]} + end. + +get_cleaned_dictionary(Pid) -> + case get_process_info(Pid,dictionary) of + {dictionary,Dict} -> {dictionary,clean_dict(Dict)}; + _ -> {dictionary,[]} + end. + +clean_dict([{'$ancestors',_}|Dict]) -> + clean_dict(Dict); +clean_dict([{'$initial_call',_}|Dict]) -> + clean_dict(Dict); +clean_dict([E|Dict]) -> + [E|clean_dict(Dict)]; +clean_dict([]) -> + []. + +get_dictionary(Pid,Tag) -> + case get_process_info(Pid,dictionary) of + {dictionary,Dict} -> + case lists:keysearch(Tag,1,Dict) of + {value,Value} -> Value; + _ -> undefined + end; + _ -> + undefined + end. + +linked_info(Pid) -> + make_neighbour_reports1(neighbours(Pid)). + +make_neighbour_reports1([P|Ps]) -> + ReportBody = make_neighbour_report(P), + %% + %% Process P might have been deleted. + %% + case lists:member(undefined, ReportBody) of + true -> + make_neighbour_reports1(Ps); + false -> + [{neighbour, ReportBody}|make_neighbour_reports1(Ps)] + end; +make_neighbour_reports1([]) -> + []. + +make_neighbour_report(Pid) -> + [{pid, Pid}, + get_process_info(Pid, registered_name), + get_initial_call(Pid), + get_process_info(Pid, current_function), + get_ancestors(Pid), + get_process_info(Pid, messages), + get_process_info(Pid, links), + get_cleaned_dictionary(Pid), + get_process_info(Pid, trap_exit), + get_process_info(Pid, status), + get_process_info(Pid, heap_size), + get_process_info(Pid, stack_size), + get_process_info(Pid, reductions) + ]. + +get_initial_call(Pid) -> + case get_dictionary(Pid, '$initial_call') of + {'$initial_call', {M, F, A}} -> + {initial_call, {M, F, make_dummy_args(A, [])}}; + _ -> + get_process_info(Pid, initial_call) + end. + +%% neighbours(Pid) = list of Pids +%% +%% Get the neighbours of Pid. A neighbour is a process which is +%% linked to Pid and does not trap exit; or a neigbour of a +%% neighbour etc. +%% +%% A breadth-first search is performed. + +-spec neighbours(pid()) -> [pid()]. + +neighbours(Pid) -> + {_, Visited} = visit(adjacents(Pid), {max_neighbours(), [Pid]}), + lists:delete(Pid, Visited). + +max_neighbours() -> 15. + +%% +%% visit(Ps, {N, Vs}) = {N0, V0s} +%% +%% A breadth-first search of neighbours. +%% Ps processes, +%% Vs visited processes, +%% N max number to visit. +%% +visit([P|Ps], {N, Vs} = NVs) when N > 0 -> + case lists:member(P, Vs) of + false -> visit(adjacents(P), visit(Ps, {N-1, [P|Vs]})); + true -> visit(Ps, NVs) + end; +visit(_, {_N, _Vs} = NVs) -> + NVs. + +%% +%% adjacents(Pid) = AdjacencyList +%% +-spec adjacents(pid()) -> [pid()]. + +adjacents(Pid) -> + case catch proc_info(Pid, links) of + {links, Links} -> no_trap(Links); + _ -> [] + end. + +no_trap([P|Ps]) -> + case catch proc_info(P, trap_exit) of + {trap_exit, false} -> [P|no_trap(Ps)]; + _ -> no_trap(Ps) + end; +no_trap([]) -> + []. + +get_process_info(Pid, Tag) -> + translate_process_info(Tag, catch proc_info(Pid, Tag)). + +translate_process_info(registered_name, []) -> + {registered_name, []}; +translate_process_info(_ , {'EXIT', _}) -> + undefined; +translate_process_info(_, Result) -> + Result. + +%%% ----------------------------------------------------------- +%%% Misc. functions +%%% ----------------------------------------------------------- + +get_my_name() -> + case proc_info(self(),registered_name) of + {registered_name,Name} -> Name; + _ -> self() + end. + +-spec get_ancestors() -> [pid()]. + +get_ancestors() -> + case get('$ancestors') of + A when is_list(A) -> A; + _ -> [] + end. + +proc_info(Pid,Item) when node(Pid) =:= node() -> + process_info(Pid,Item); +proc_info(Pid,Item) -> + case lists:member(node(Pid),nodes()) of + true -> + check(rpc:call(node(Pid), erlang, process_info, [Pid, Item])); + _ -> + hidden + end. + +check({badrpc,nodedown}) -> undefined; +check({badrpc,Error}) -> Error; +check(Res) -> Res. + +%%% ----------------------------------------------------------- +%%% Format (and write) a generated crash info structure. +%%% ----------------------------------------------------------- + +-spec format([term()]) -> string(). + +format([OwnReport,LinkReport]) -> + OwnFormat = format_report(OwnReport), + LinkFormat = format_report(LinkReport), + S = io_lib:format(" crasher:~n~s neighbours:~n~s",[OwnFormat,LinkFormat]), + lists:flatten(S). + +format_report(Rep) when is_list(Rep) -> + format_rep(Rep); +format_report(Rep) -> + io_lib:format("~p~n", [Rep]). + +format_rep([{initial_call,InitialCall}|Rep]) -> + [format_mfa(InitialCall)|format_rep(Rep)]; +format_rep([{error_info,{Class,Reason,StackTrace}}|Rep]) -> + [format_exception(Class, Reason, StackTrace)|format_rep(Rep)]; +format_rep([{Tag,Data}|Rep]) -> + [format_tag(Tag, Data)|format_rep(Rep)]; +format_rep(_) -> + []. + +format_exception(Class, Reason, StackTrace) -> + PF = pp_fun(), + StackFun = fun(M, _F, _A) -> (M =:= erl_eval) or (M =:= ?MODULE) end, + %% EI = " exception: ", + EI = " ", + [EI, lib:format_exception(1+length(EI), Class, Reason, + StackTrace, StackFun, PF), "\n"]. + +format_mfa({M,F,Args}=StartF) -> + try + A = length(Args), + [" initial call: ",atom_to_list(M),$:,atom_to_list(F),$/, + integer_to_list(A),"\n"] + catch + error:_ -> + format_tag(initial_call, StartF) + end. + +pp_fun() -> + fun(Term, I) -> + io_lib:format("~." ++ integer_to_list(I) ++ "p", [Term]) + end. + +format_tag(Tag, Data) -> + io_lib:format(" ~p: ~80.18p~n", [Tag, Data]). diff --git a/lib/stdlib/src/proplists.erl b/lib/stdlib/src/proplists.erl new file mode 100644 index 0000000000..35d14891f1 --- /dev/null +++ b/lib/stdlib/src/proplists.erl @@ -0,0 +1,686 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ===================================================================== +%% Support functions for property lists +%% +%% Copyright (C) 2000-2003 Richard Carlsson +%% --------------------------------------------------------------------- +%% +%% @doc Support functions for property lists. +%% +%%

Property lists are ordinary lists containing entries in the form +%% of either tuples, whose first elements are keys used for lookup and +%% insertion, or atoms, which work as shorthand for tuples {Atom, +%% true}. (Other terms are allowed in the lists, but are ignored +%% by this module.) If there is more than one entry in a list for a +%% certain key, the first occurrence normally overrides any later +%% (irrespective of the arity of the tuples).

+%% +%%

Property lists are useful for representing inherited properties, +%% such as options passed to a function where a user may specify options +%% overriding the default settings, object properties, annotations, +%% etc.

+%% +%% @type property() = atom() | tuple() + +-module(proplists). + +-export([property/1, property/2, unfold/1, compact/1, lookup/2, + lookup_all/2, is_defined/2, get_value/2, get_value/3, + get_all_values/2, append_values/2, get_bool/2, get_keys/1, + delete/2, substitute_aliases/2, substitute_negations/2, + expand/2, normalize/2, split/2]). + +%% --------------------------------------------------------------------- + +-type property() :: atom() | tuple(). + +-type aliases() :: [{any(), any()}]. +-type negations() :: [{any(), any()}]. +-type expansions() :: [{property(), [any()]}]. + +%% --------------------------------------------------------------------- + +%% @spec property(P::property()) -> property() +%% +%% @doc Creates a normal form (minimal) representation of a property. If +%% P is {Key, true} where Key is +%% an atom, this returns Key, otherwise the whole term +%% P is returned. +%% +%% @see property/2 + +-spec property(property()) -> property(). + +property({Key, true}) when is_atom(Key) -> + Key; +property(Property) -> + Property. + + +%% @spec property(Key::term(), Value::term()) -> property() +%% +%% @doc Creates a normal form (minimal) representation of a simple +%% key/value property. Returns Key if Value is +%% true and Key is an atom, otherwise a tuple +%% {Key, Value} is returned. +%% +%% @see property/1 + +-spec property(Key::term(), Value::term()) -> atom() | {term(), term()}. + +property(Key, true) when is_atom(Key) -> + Key; +property(Key, Value) -> + {Key, Value}. + + +%% --------------------------------------------------------------------- + +%% @spec unfold(List::[term()]) -> [term()] +%% +%% @doc Unfolds all occurences of atoms in List to tuples +%% {Atom, true}. +%% +%% @see compact/1 + +-spec unfold(List::[term()]) -> [term()]. + +unfold([P | Ps]) -> + if is_atom(P) -> + [{P, true} | unfold(Ps)]; + true -> + [P | unfold(Ps)] + end; +unfold([]) -> + []. + +%% @spec compact(List::[term()]) -> [term()] +%% +%% @doc Minimizes the representation of all entries in the list. This is +%% equivalent to [property(P) || P <- List]. +%% +%% @see unfold/1 +%% @see property/1 + +-spec compact(List::[property()]) -> [property()]. + +compact(List) -> + [property(P) || P <- List]. + + +%% --------------------------------------------------------------------- + +%% @spec lookup(Key::term(), List::[term()]) -> none | tuple() +%% +%% @doc Returns the first entry associated with Key in +%% List, if one exists, otherwise returns +%% none. For an atom A in the list, the tuple +%% {A, true} is the entry associated with A. +%% +%% @see lookup_all/2 +%% @see get_value/2 +%% @see get_bool/2 + +-spec lookup(Key::term(), List::[term()]) -> 'none' | tuple(). + +lookup(Key, [P | Ps]) -> + if is_atom(P), P =:= Key -> + {Key, true}; + tuple_size(P) >= 1, element(1, P) =:= Key -> + %% Note that Key does not have to be an atom in this case. + P; + true -> + lookup(Key, Ps) + end; +lookup(_Key, []) -> + none. + +%% @spec lookup_all(Key::term(), List::[term()]) -> [tuple()] +%% +%% @doc Returns the list of all entries associated with Key +%% in List. If no such entry exists, the result is the +%% empty list. +%% +%% @see lookup/2 + +-spec lookup_all(Key::term(), List::[term()]) -> [tuple()]. + +lookup_all(Key, [P | Ps]) -> + if is_atom(P), P =:= Key -> + [{Key, true} | lookup_all(Key, Ps)]; + tuple_size(P) >= 1, element(1, P) =:= Key -> + [P | lookup_all(Key, Ps)]; + true -> + lookup_all(Key, Ps) + end; +lookup_all(_Key, []) -> + []. + + +%% --------------------------------------------------------------------- + +%% @spec is_defined(Key::term(), List::[term()]) -> boolean() +%% +%% @doc Returns true if List contains at least +%% one entry associated with Key, otherwise +%% false is returned. + +-spec is_defined(Key::term(), List::[term()]) -> boolean(). + +is_defined(Key, [P | Ps]) -> + if is_atom(P), P =:= Key -> + true; + tuple_size(P) >= 1, element(1, P) =:= Key -> + true; + true -> + is_defined(Key, Ps) + end; +is_defined(_Key, []) -> + false. + + +%% --------------------------------------------------------------------- + +%% @spec get_value(Key::term(), List::[term()]) -> term() +%% @equiv get_value(Key, List, undefined) + +-spec get_value(Key::term(), List::[term()]) -> term(). + +get_value(Key, List) -> + get_value(Key, List, undefined). + +%% @spec get_value(Key::term(), List::[term()], Default::term()) -> +%% term() +%% +%% @doc Returns the value of a simple key/value property in +%% List. If lookup(Key, List) would yield +%% {Key, Value}, this function returns the corresponding +%% Value, otherwise Default is returned. +%% +%% @see lookup/2 +%% @see get_value/2 +%% @see get_all_values/2 +%% @see get_bool/2 + +-spec get_value(Key::term(), List::[term()], Default::term()) -> term(). + +get_value(Key, [P | Ps], Default) -> + if is_atom(P), P =:= Key -> + true; + tuple_size(P) >= 1, element(1, P) =:= Key -> + case P of + {_, Value} -> + Value; + _ -> + %% Dont continue the search! + Default + end; + true -> + get_value(Key, Ps, Default) + end; +get_value(_Key, [], Default) -> + Default. + +%% @spec get_all_values(Key, List) -> [term()] +%% +%% @doc Similar to get_value/2, but returns the list of +%% values for all entries {Key, Value} in +%% List. If no such entry exists, the result is the empty +%% list. +%% +%% @see get_value/2 + +-spec get_all_values(Key::term(), List::[term()]) -> [term()]. + +get_all_values(Key, [P | Ps]) -> + if is_atom(P), P =:= Key -> + [true | get_all_values(Key, Ps)]; + tuple_size(P) >= 1, element(1, P) =:= Key -> + case P of + {_, Value} -> + [Value | get_all_values(Key, Ps)]; + _ -> + get_all_values(Key, Ps) + end; + true -> + get_all_values(Key, Ps) + end; +get_all_values(_Key, []) -> + []. + +%% @spec append_values(Key::term(), List::[term()]) -> [term()] +%% +%% @doc Similar to get_all_values/2, but each value is +%% wrapped in a list unless it is already itself a list, and the +%% resulting list of lists is concatenated. This is often useful for +%% "incremental" options; e.g., append_values(a, [{a, [1,2]}, {b, +%% 0}, {a, 3}, {c, -1}, {a, [4]}]) will return the list +%% [1,2,3,4]. +%% +%% @see get_all_values/2 + +-spec append_values(Key::term(), List::[term()]) -> [term()]. + +append_values(Key, [P | Ps]) -> + if is_atom(P), P =:= Key -> + [true | append_values(Key, Ps)]; + tuple_size(P) >= 1, element(1, P) =:= Key -> + case P of + {_, Value} when is_list(Value) -> + Value ++ append_values(Key, Ps); + {_, Value} -> + [Value | append_values(Key, Ps)]; + _ -> + append_values(Key, Ps) + end; + true -> + append_values(Key, Ps) + end; +append_values(_Key, []) -> + []. + + +%% --------------------------------------------------------------------- + +%% @spec get_bool(Key::term(), List::[term()]) -> boolean() +%% +%% @doc Returns the value of a boolean key/value option. If +%% lookup(Key, List) would yield {Key, true}, +%% this function returns true; otherwise false +%% is returned. +%% +%% @see lookup/2 +%% @see get_value/2 + +-spec get_bool(Key::term(), List::[term()]) -> boolean(). + +get_bool(Key, [P | Ps]) -> + if is_atom(P), P =:= Key -> + true; + tuple_size(P) >= 1, element(1, P) =:= Key -> + case P of + {_, true} -> + true; + _ -> + %% Don't continue the search! + false + end; + true -> + get_bool(Key, Ps) + end; +get_bool(_Key, []) -> + false. + + +%% --------------------------------------------------------------------- + +%% @spec get_keys(List::[term()]) -> [term()] +%% +%% @doc Returns an unordered list of the keys used in List, +%% not containing duplicates. + +-spec get_keys(List::[term()]) -> [term()]. + +get_keys(Ps) -> + sets:to_list(get_keys(Ps, sets:new())). + +get_keys([P | Ps], Keys) -> + if is_atom(P) -> + get_keys(Ps, sets:add_element(P, Keys)); + tuple_size(P) >= 1 -> + get_keys(Ps, sets:add_element(element(1, P), Keys)); + true -> + get_keys(Ps, Keys) + end; +get_keys([], Keys) -> + Keys. + + +%% --------------------------------------------------------------------- + +%% @spec delete(Key::term(), List::[term()]) -> [term()] +%% +%% @doc Deletes all entries associated with Key from +%% List. + +-spec delete(Key::term(), List::[term()]) -> [term()]. + +delete(Key, [P | Ps]) -> + if is_atom(P), P =:= Key -> + delete(Key, Ps); + tuple_size(P) >= 1, element(1, P) =:= Key -> + delete(Key, Ps); + true -> + [P | delete(Key, Ps)] + end; +delete(_, []) -> + []. + + +%% --------------------------------------------------------------------- + +%% @spec substitute_aliases(Aliases, List::[term()]) -> [term()] +%% +%% Aliases = [{Key, Key}] +%% Key = term() +%% +%% @doc Substitutes keys of properties. For each entry in +%% List, if it is associated with some key K1 +%% such that {K1, K2} occurs in Aliases, the +%% key of the entry is changed to Key2. If the same +%% K1 occurs more than once in Aliases, only +%% the first occurrence is used. +%% +%%

Example: substitute_aliases([{color, colour}], L) +%% will replace all tuples {color, ...} in L +%% with {colour, ...}, and all atoms color +%% with colour.

+%% +%% @see substitute_negations/2 +%% @see normalize/2 + +-spec substitute_aliases(aliases(), List::[term()]) -> [term()]. + +substitute_aliases(As, Props) -> + [substitute_aliases_1(As, P) || P <- Props]. + +substitute_aliases_1([{Key, Key1} | As], P) -> + if is_atom(P), P =:= Key -> + property(Key1, true); + tuple_size(P) >= 1, element(1, P) =:= Key -> + property(setelement(1, P, Key1)); + true -> + substitute_aliases_1(As, P) + end; +substitute_aliases_1([], P) -> + P. + + +%% --------------------------------------------------------------------- + +%% @spec substitute_negations(Negations, List::[term()]) -> [term()] +%% +%% Negations = [{Key, Key}] +%% Key = term() +%% +%% @doc Substitutes keys of boolean-valued properties and simultaneously +%% negates their values. For each entry in List, if it is +%% associated with some key K1 such that {K1, +%% K2} occurs in Negations, then if the entry was +%% {K1, true} it will be replaced with {K2, +%% false}, otherwise it will be replaced with {K2, +%% true}, thus changing the name of the option and simultaneously +%% negating the value given by get_bool(List). If the same +%% K1 occurs more than once in Negations, only +%% the first occurrence is used. +%% +%%

Example: substitute_negations([{no_foo, foo}], L) +%% will replace any atom no_foo or tuple {no_foo, +%% true} in L with {foo, false}, and +%% any other tuple {no_foo, ...} with {foo, +%% true}.

+%% +%% @see get_bool/2 +%% @see substitute_aliases/2 +%% @see normalize/2 + +-spec substitute_negations(negations(), List::[term()]) -> [term()]. + +substitute_negations(As, Props) -> + [substitute_negations_1(As, P) || P <- Props]. + +substitute_negations_1([{Key, Key1} | As], P) -> + if is_atom(P), P =:= Key -> + property(Key1, false); + tuple_size(P) >= 1, element(1, P) =:= Key -> + case P of + {_, true} -> + property(Key1, false); + {_, false} -> + property(Key1, true); + _ -> + %% The property is supposed to be a boolean, so any + %% other tuple is interpreted as `false', as done in + %% `get_bool'. + property(Key1, true) + end; + true -> + substitute_negations_1(As, P) + end; +substitute_negations_1([], P) -> + P. + + +%% --------------------------------------------------------------------- + +%% @spec expand(Expansions, List::[term()]) -> [term()] +%% +%% Expansions = [{property(), [term()]}] +%% +%% @doc Expands particular properties to corresponding sets of +%% properties (or other terms). For each pair {Property, +%% Expansion} in Expansions, if E is +%% the first entry in List with the same key as +%% Property, and E and Property +%% have equivalent normal forms, then E is replaced with +%% the terms in Expansion, and any following entries with +%% the same key are deleted from List. +%% +%%

For example, the following expressions all return [fie, bar, +%% baz, fum]: +%%

    +%%
  • expand([{foo, [bar, baz]}], +%% [fie, foo, fum])
  • +%%
  • expand([{{foo, true}, [bar, baz]}], +%% [fie, foo, fum])
  • +%%
  • expand([{{foo, false}, [bar, baz]}], +%% [fie, {foo, false}, fum])
  • +%%
+%% However, no expansion is done in the following call: +%%
    +%%
  • expand([{{foo, true}, [bar, baz]}], +%% [{foo, false}, fie, foo, fum])
  • +%%
+%% because {foo, false} shadows foo.

+%% +%%

Note that if the original property term is to be preserved in the +%% result when expanded, it must be included in the expansion list. The +%% inserted terms are not expanded recursively. If +%% Expansions contains more than one property with the same +%% key, only the first occurrance is used.

+%% +%% @see normalize/2 + +-spec expand(Expansions::expansions(), [term()]) -> [term()]. + +expand(Es, Ps) when is_list(Ps) -> + Es1 = [{property(P), V} || {P, V} <- Es], + flatten(expand_0(key_uniq(Es1), Ps)). + +%% Here, all key properties are normalized and there are no multiple +%% entries in the list of expansions for any specific key property. We +%% insert the expansions one at a time - this is quadratic, but gives +%% the desired behaviour in a simple way. + +expand_0([{P, L} | Es], Ps) -> + expand_0(Es, expand_1(P, L, Ps)); +expand_0([], Ps) -> + Ps. + +expand_1(P, L, Ps) -> + %% First, we must find out what key to look for. + %% P has a minimal representation here. + if is_atom(P) -> + expand_2(P, P, L, Ps); + tuple_size(P) >= 1 -> + expand_2(element(1, P), P, L, Ps); + true -> + Ps % refuse to expand non-property + end. + +expand_2(Key, P1, L, [P | Ps]) -> + if is_atom(P), P =:= Key -> + expand_3(Key, P1, P, L, Ps); + tuple_size(P) >= 1, element(1, P) =:= Key -> + expand_3(Key, P1, property(P), L, Ps); + true -> + %% This case handles non-property entries, and thus + %% any already inserted expansions (lists), by simply + %% ignoring them. + [P | expand_2(Key, P1, L, Ps)] + end; +expand_2(_, _, _, []) -> + []. + +expand_3(Key, P1, P, L, Ps) -> + %% Here, we have found the first entry with a matching key. Both P + %% and P1 have minimal representations here. The inserted list will + %% be flattened afterwards. If the expansion is done, we drop the + %% found entry and alao delete any later entries with the same key. + if P1 =:= P -> + [L | delete(Key, Ps)]; + true -> + %% The existing entry does not match - keep it. + [P | Ps] + end. + +key_uniq([{K, V} | Ps]) -> + [{K, V} | key_uniq_1(K, Ps)]; +key_uniq([]) -> + []. + +key_uniq_1(K, [{K1, V} | Ps]) -> + if K =:= K1 -> + key_uniq_1(K, Ps); + true -> + [{K1, V} | key_uniq_1(K1, Ps)] + end; +key_uniq_1(_, []) -> + []. + +%% This does top-level flattening only. + +flatten([E | Es]) when is_list(E) -> + E ++ flatten(Es); +flatten([E | Es]) -> + [E | flatten(Es)]; +flatten([]) -> + []. + + +%% --------------------------------------------------------------------- + +%% @spec normalize(List::[term()], Stages::[Operation]) -> [term()] +%% +%% Operation = {aliases, Aliases} | {negations, Negations} +%% | {expand, Expansions} +%% Aliases = [{Key, Key}] +%% Negations = [{Key, Key}] +%% Key = term() +%% Expansions = [{property(), [term()]}] +%% +%% @doc Passes List through a sequence of +%% substitution/expansion stages. For an aliases operation, +%% the function substitute_aliases/2 is applied using the +%% given list of aliases; for a negations operation, +%% substitute_negations/2 is applied using the given +%% negation list; for an expand operation, the function +%% expand/2 is applied using the given list of expansions. +%% The final result is automatically compacted (cf. +%% compact/1). +%% +%%

Typically you want to substitute negations first, then aliases, +%% then perform one or more expansions (sometimes you want to pre-expand +%% particular entries before doing the main expansion). You might want +%% to substitute negations and/or aliases repeatedly, to allow such +%% forms in the right-hand side of aliases and expansion lists.

+%% +%% @see substitute_aliases/2 +%% @see substitute_negations/2 +%% @see expand/2 +%% @see compact/1 + +-type operation() :: {'aliases', aliases()} + | {'negations', negations()} + | {'expand', expansions()}. + +-spec normalize(List::[term()], Stages::[operation()]) -> [term()]. + +normalize(L, [{aliases, As} | Xs]) -> + normalize(substitute_aliases(As, L), Xs); +normalize(L, [{expand, Es} | Xs]) -> + normalize(expand(Es, L), Xs); +normalize(L, [{negations, Ns} | Xs]) -> + normalize(substitute_negations(Ns, L), Xs); +normalize(L, []) -> + compact(L). + +%% --------------------------------------------------------------------- + +%% @spec split(List::[term()], Keys::[term()]) -> {Lists, Rest} +%% Lists = [[term()]] +%% Rest = [term()] +%% +%% @doc Partitions List into a list of sublists and a +%% remainder. Lists contains one sublist for each key in +%% Keys, in the corresponding order. The relative order of +%% the elements in each sublist is preserved from the original +%% List. Rest contains the elements in +%% List that are not associated with any of the given keys, +%% also with their original relative order preserved. +%% +%%

Example:

+%% split([{c, 2}, {e, 1}, a, {c, 3, 4}, d, {b, 5}, b], [a, b, c])
+%% returns
+%% {[[a], [{b, 5}, b],[{c, 2}, {c, 3, 4}]], [{e, 1}, d]}
+%%

+ +-spec split(List::[term()], Keys::[term()]) -> {[[term()]], [term()]}. + +split(List, Keys) -> + {Store, Rest} = split(List, dict:from_list([{K, []} || K <- Keys]), []), + {[lists:reverse(dict:fetch(K, Store)) || K <- Keys], + lists:reverse(Rest)}. + +split([P | Ps], Store, Rest) -> + if is_atom(P) -> + case dict:is_key(P, Store) of + true -> + split(Ps, dict_prepend(P, P, Store), Rest); + false -> + split(Ps, Store, [P | Rest]) + end; + tuple_size(P) >= 1 -> + %% Note that Key does not have to be an atom in this case. + Key = element(1, P), + case dict:is_key(Key, Store) of + true -> + split(Ps, dict_prepend(Key, P, Store), Rest); + false -> + split(Ps, Store, [P | Rest]) + end; + true -> + split(Ps, Store, [P | Rest]) + end; +split([], Store, Rest) -> + {Store, Rest}. + +dict_prepend(Key, Val, Dict) -> + dict:store(Key, [Val | dict:fetch(Key, Dict)], Dict). diff --git a/lib/stdlib/src/qlc.erl b/lib/stdlib/src/qlc.erl new file mode 100644 index 0000000000..ef142e1c8a --- /dev/null +++ b/lib/stdlib/src/qlc.erl @@ -0,0 +1,3540 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(qlc). + +%%% Purpose: Main API module qlc. Functions for evaluation. +%%% Other files: +%%% qlc_pt. Implements the parse transform. + +%% External exports + +-export([parse_transform/2, transform_from_evaluator/2]). + +-export([q/1, q/2]). + +-export([eval/1, e/1, eval/2, e/2, fold/3, fold/4]). +-export([cursor/1, cursor/2, + next_answers/1, next_answers/2, + delete_cursor/1]). +-export([append/1, append/2]). + +-export([sort/1, sort/2, keysort/2, keysort/3]). + +-export([table/2]). + +-export([info/1, info/2]). + +-export([string_to_handle/1, string_to_handle/2, string_to_handle/3]). + +-export([format_error/1]). + +%% Exported to qlc_pt.erl only: +-export([template_state/0, aux_name/3, name_suffix/2, vars/1, + var_ufold/2, var_fold/3, all_selections/1]). + +%% When cache=list lists bigger than ?MAX_LIST_SIZE bytes are put on +%% file. Also used when merge join finds big equivalence classes. +-define(MAX_LIST_SIZE, 512*1024). + +-record(qlc_append, % qlc:append/1,2 + {hl + }). + +-record(qlc_table, % qlc:table/2 + {trav_fun, % traverse fun + trav_MS, % bool(); true iff traverse fun takes a match spec + pre_fun, + post_fun, + info_fun, + format_fun, + lookup_fun, + parent_fun, + key_equality, % '==' | '=:=' | undefined (--R12B-5) + lu_vals, % undefined | {Position,Values}; values to be looked up + ms = no_match_spec + % match specification; [T || P <- Tab, Fs] + }). + +-record(qlc_sort, % qlc:sort/1,2 and qlc:keysort/2,3 + {h, + keypos, % sort | {keysort, KeyPos} + unique, + compressed, % [] | [compressed] + order, + fs_opts, % file_sorter options + tmpdir_usage = allowed, % allowed | not_allowed + % | warning_msg | error_msg | info_msg + tmpdir + }). + +%% Also in qlc_pt.erl. +-record(qlc_lc, % qlc:q/1,2 + {lc, + opt % #qlc_opt + }). + +-record(qlc_list, % a prepared list + {l, + ms = no_match_spec + }). + +-record(qlc_join, % a prepared join + {kind, % {merge, KeyEquality} | + % {lookup, KeyEquality, LookupFun} + opt, % #qlc_opt from q/2. + h1, q1, c1, % to be traversed by "lookup join" + h2, q2, c2 % to be looked up by "lookup join" + }). + +%%% A query cursor is a tuple {qlc_cursor, Cursor} where Cursor is a pair +%%% {CursorPid, OwnerPid}. + +-record(qlc_cursor, {c}). + +-record(qlc_opt, + {unique = false, % bool() + cache = false, % bool() | list (true~ets, false~no) + max_lookup = -1, % int() >= 0 | -1 (represents infinity) + join = any, % any | nested_loop | merge | lookup + tmpdir = "", % global tmpdir + lookup = any, % any | bool() + max_list = ?MAX_LIST_SIZE, % int() >= 0 + tmpdir_usage = allowed % allowed | not_allowed + % | warning_msg | error_msg | info_msg + }). + +-record(setup, {parent}). + +-define(THROWN_ERROR, {?MODULE, throw_error, _}). + +%%% A query handle is a tuple {qlc_handle, Handle} where Handle is one +%%% of #qlc_append, #qlc_table, #qlc_sort, and #qlc_lc. + +-record(qlc_handle, {h}). + +get_handle(#qlc_handle{h = #qlc_lc{opt = {qlc_opt, U, C, M}}=H}) -> + %% R11B-0. + H#qlc_lc{opt = #qlc_opt{unique = U, cache = C, max_lookup = M}}; +get_handle(#qlc_handle{h = H}) -> + H; +get_handle(L) when is_list(L) -> + L; +get_handle(_) -> + badarg. + +%%% +%%% Exported functions +%%% + +append(QHs) -> + Hs = [case get_handle(QH) of + badarg -> erlang:error(badarg, [QHs]); + H -> H + end || QH <- QHs], + #qlc_handle{h = #qlc_append{hl = Hs}}. + +append(QH1, QH2) -> + Hs = [case get_handle(QH) of + badarg -> erlang:error(badarg, [QH1, QH2]); + H -> H + end || QH <- [QH1, QH2]], + #qlc_handle{h = #qlc_append{hl = Hs}}. + +cursor(QH) -> + cursor(QH, []). + +cursor(QH, Options) -> + case {options(Options, [unique_all, cache_all, tmpdir, + spawn_options, max_list_size, + tmpdir_usage]), + get_handle(QH)} of + {B1, B2} when B1 =:= badarg; B2 =:= badarg -> + erlang:error(badarg, [QH, Options]); + {[GUnique, GCache, TmpDir, SpawnOptions0, MaxList, TmpUsage], H} -> + SpawnOptions = spawn_options(SpawnOptions0), + case cursor_process(H, GUnique, GCache, TmpDir, + SpawnOptions, MaxList, TmpUsage) of + Pid when is_pid(Pid) -> + #qlc_cursor{c = {Pid, self()}}; + Error -> + Error + end + end. + +delete_cursor(#qlc_cursor{c = {_, Owner}}=C) when Owner =/= self() -> + erlang:error(not_cursor_owner, [C]); +delete_cursor(#qlc_cursor{c = {Pid, _}}) -> + stop_cursor(Pid); +delete_cursor(T) -> + erlang:error(badarg, [T]). + +e(QH) -> + eval(QH, []). + +e(QH, Options) -> + eval(QH, Options). + +eval(QH) -> + eval(QH, []). + +eval(QH, Options) -> + case {options(Options, [unique_all, cache_all, tmpdir, max_list_size, + tmpdir_usage]), + get_handle(QH)} of + {B1, B2} when B1 =:= badarg; B2 =:= badarg -> + erlang:error(badarg, [QH, Options]); + {[GUnique, GCache, TmpDir, MaxList, TmpUsage], Handle} -> + try + Prep = prepare_qlc(Handle, [], GUnique, GCache, + TmpDir, MaxList, TmpUsage), + case setup_qlc(Prep, #setup{parent = self()}) of + {L, Post, _LocalPost} when is_list(L) -> + post_funs(Post), + L; + {Objs, Post, _LocalPost} when is_function(Objs) -> + try + collect(Objs) + after + post_funs(Post) + end + end + catch Term -> + case erlang:get_stacktrace() of + [?THROWN_ERROR | _] -> + Term; + Stacktrace -> + erlang:raise(throw, Term, Stacktrace) + end + end + end. + +fold(Fun, Acc0, QH) -> + fold(Fun, Acc0, QH, []). + +fold(Fun, Acc0, QH, Options) -> + case {options(Options, [unique_all, cache_all, tmpdir, max_list_size, + tmpdir_usage]), + get_handle(QH)} of + {B1, B2} when B1 =:= badarg; B2 =:= badarg -> + erlang:error(badarg, [Fun, Acc0, QH, Options]); + {[GUnique, GCache, TmpDir, MaxList, TmpUsage], Handle} -> + try + Prep = prepare_qlc(Handle, not_a_list, GUnique, GCache, + TmpDir, MaxList, TmpUsage), + case setup_qlc(Prep, #setup{parent = self()}) of + {Objs, Post, _LocalPost} when is_function(Objs); + is_list(Objs) -> + try + fold_loop(Fun, Objs, Acc0) + after + post_funs(Post) + end + end + catch Term -> + case erlang:get_stacktrace() of + [?THROWN_ERROR | _] -> + Term; + Stacktrace -> + erlang:raise(throw, Term, Stacktrace) + end + end + end. + +format_error(not_a_query_list_comprehension) -> + io_lib:format("argument is not a query list comprehension", []); +format_error({used_generator_variable, V}) -> + io_lib:format("generated variable ~w must not be used in list expression", + [V]); +format_error(binary_generator) -> + io_lib:format("cannot handle binary generators", []); +format_error(too_complex_join) -> + io_lib:format("cannot handle join of three or more generators efficiently", + []); +format_error(too_many_joins) -> + io_lib:format("cannot handle more than one join efficiently", []); +format_error(nomatch_pattern) -> + io_lib:format("pattern cannot possibly match", []); +format_error(nomatch_filter) -> + io_lib:format("filter evaluates to 'false'", []); +format_error({Line, Mod, Reason}) when is_integer(Line) -> + io_lib:format("~p: ~s~n", + [Line, lists:flatten(Mod:format_error(Reason))]); +%% file_sorter errors +format_error({bad_object, FileName}) -> + io_lib:format("the temporary file \"~s\" holding answers is corrupt", + [FileName]); +format_error(bad_object) -> + io_lib:format("the keys could not be extracted from some term", []); +format_error({file_error, FileName, Reason}) -> + io_lib:format("\"~s\": ~p~n",[FileName, file:format_error(Reason)]); +format_error({premature_eof, FileName}) -> + io_lib:format("\"~s\": end-of-file was encountered inside some binary term", + [FileName]); +format_error({tmpdir_usage, Why}) -> + io_lib:format("temporary file was needed for ~w~n", [Why]); +format_error({error, Module, Reason}) -> + Module:format_error(Reason); +format_error(E) -> + io_lib:format("~p~n", [E]). + +info(QH) -> + info(QH, []). + +info(QH, Options) -> + case {options(Options, [unique_all, cache_all, flat, format, n_elements, + depth, tmpdir, max_list_size, tmpdir_usage]), + get_handle(QH)} of + {B1, B2} when B1 =:= badarg; B2 =:= badarg -> + erlang:error(badarg, [QH, Options]); + {[GUnique, GCache, Flat, Format, NElements, + Depth, TmpDir, MaxList, TmpUsage], + H} -> + try + Prep = prepare_qlc(H, [], GUnique, GCache, + TmpDir, MaxList, TmpUsage), + Info = le_info(Prep, {NElements,Depth}), + AbstractCode = abstract(Info, Flat, NElements, Depth), + case Format of + abstract_code -> + abstract_code(AbstractCode); + string -> + Hook = fun({special, _Line, String}, _I, _P, _F) -> + String + end, + lists:flatten(erl_pp:expr(AbstractCode, 0, Hook)); + debug -> % Not documented. Intended for testing only. + Info + end + catch Term -> + case erlang:get_stacktrace() of + [?THROWN_ERROR | _] -> + Term; + Stacktrace -> + erlang:raise(throw, Term, Stacktrace) + end + end + end. + +keysort(KeyPos, QH) -> + keysort(KeyPos, QH, []). + +keysort(KeyPos, QH, Options) -> + case {is_keypos(KeyPos), + options(Options, [tmpdir, order, unique, compressed, + size, no_files]), + get_handle(QH)} of + {true, [TmpDir, Order, Unique,Compressed | _], H} when H =/= badarg -> + #qlc_handle{h = #qlc_sort{h = H, keypos = {keysort,KeyPos}, + unique = Unique, + compressed = Compressed, + order = Order, + fs_opts = listify(Options), + tmpdir = TmpDir}}; + _ -> + erlang:error(badarg, [KeyPos, QH, Options]) + end. + +-define(DEFAULT_NUM_OF_ANSWERS, 10). + +next_answers(C) -> + next_answers(C, ?DEFAULT_NUM_OF_ANSWERS). + +next_answers(#qlc_cursor{c = {_, Owner}}=C, + NumOfAnswers) when Owner =/= self() -> + erlang:error(not_cursor_owner, [C, NumOfAnswers]); +next_answers(#qlc_cursor{c = {Pid, _}}=C, NumOfAnswers) -> + N = case NumOfAnswers of + all_remaining -> -1; + _ when is_integer(NumOfAnswers), NumOfAnswers > 0 -> NumOfAnswers; + _ -> erlang:error(badarg, [C, NumOfAnswers]) + end, + next_loop(Pid, [], N); +next_answers(T1, T2) -> + erlang:error(badarg, [T1, T2]). + +parse_transform(Forms, Options) -> + qlc_pt:parse_transform(Forms, Options). + +%% The funcspecs qlc:q/1 and qlc:q/2 are known by erl_eval.erl and +%% erl_lint.erl. +q(QLC_lc) -> + q(QLC_lc, []). + +q(#qlc_lc{}=QLC_lc, Options) -> + case options(Options, [unique, cache, max_lookup, join, lookup]) of + [Unique, Cache, Max, Join, Lookup] -> + Opt = #qlc_opt{unique = Unique, cache = Cache, + max_lookup = Max, join = Join, lookup = Lookup}, + #qlc_handle{h = QLC_lc#qlc_lc{opt = Opt}}; + _ -> + erlang:error(badarg, [QLC_lc, Options]) + end; +q(T1, T2) -> + erlang:error(badarg, [T1, T2]). + +sort(QH) -> + sort(QH, []). + +sort(QH, Options) -> + case {options(Options, [tmpdir, order, unique, compressed, + size, no_files]), get_handle(QH)} of + {B1, B2} when B1 =:= badarg; B2 =:= badarg -> + erlang:error(badarg, [QH, Options]); + {[TD, Order, Unique, Compressed | _], H} -> + #qlc_handle{h = #qlc_sort{h = H, keypos = sort, unique = Unique, + compressed = Compressed, order = Order, + fs_opts = listify(Options), + tmpdir = TD}} + end. + +%% Note that the generated code is evaluated by (the slow) erl_eval. +string_to_handle(Str) -> + string_to_handle(Str, []). + +string_to_handle(Str, Options) -> + string_to_handle(Str, Options, []). + +string_to_handle(Str, Options, Bindings) when is_list(Str), + is_list(Bindings) -> + case options(Options, [unique, cache, max_lookup, join, lookup]) of + badarg -> + erlang:error(badarg, [Str, Options, Bindings]); + [Unique, Cache, MaxLookup, Join, Lookup] -> + case erl_scan:string(Str) of + {ok, Tokens, _} -> + case erl_parse:parse_exprs(Tokens) of + {ok, [Expr]} -> + case qlc_pt:transform_expression(Expr, Bindings) of + {ok, {call, _, _QlcQ, Handle}} -> + {value, QLC_lc, _} = + erl_eval:exprs(Handle, Bindings), + O = #qlc_opt{unique = Unique, + cache = Cache, + max_lookup = MaxLookup, + join = Join, + lookup = Lookup}, + #qlc_handle{h = QLC_lc#qlc_lc{opt = O}}; + {not_ok, [{error, Error} | _]} -> + error(Error) + end; + {ok, _ExprList} -> + erlang:error(badarg, [Str, Options, Bindings]); + {error, ErrorInfo} -> + error(ErrorInfo) + end; + {error, ErrorInfo, _EndLine} -> + error(ErrorInfo) + end + end; +string_to_handle(T1, T2, T3) -> + erlang:error(badarg, [T1, T2, T3]). + +table(TraverseFun, Options) when is_function(TraverseFun) -> + case {is_function(TraverseFun, 0), + IsFun1 = is_function(TraverseFun, 1)} of + {false, false} -> + erlang:error(badarg, [TraverseFun, Options]); + _ -> + case options(Options, [pre_fun, post_fun, info_fun, format_fun, + lookup_fun, parent_fun, key_equality]) of + [PreFun, PostFun, InfoFun, FormatFun, LookupFun, ParentFun, + KeyEquality] -> + T = #qlc_table{trav_fun = TraverseFun, pre_fun = PreFun, + post_fun = PostFun, info_fun = InfoFun, + parent_fun = ParentFun, + trav_MS = IsFun1, + format_fun = FormatFun, + lookup_fun = LookupFun, + key_equality = KeyEquality}, + #qlc_handle{h = T}; + badarg -> + erlang:error(badarg, [TraverseFun, Options]) + end + end; +table(T1, T2) -> + erlang:error(badarg, [T1, T2]). + +transform_from_evaluator(LC, Bs0) -> + qlc_pt:transform_from_evaluator(LC, Bs0). + +-define(TEMPLATE_STATE, 1). + +template_state() -> + ?TEMPLATE_STATE. + +aux_name(Name, N, AllNames) -> + {VN, _} = aux_name1(Name, N, AllNames), + VN. + +name_suffix(A, Suff) -> + list_to_atom(lists:concat([A, Suff])). + +vars(E) -> + var_ufold(fun({var,_L,V}) -> V end, E). + +var_ufold(F, E) -> + ordsets:from_list(var_fold(F, [], E)). + +all_selections([]) -> + [[]]; +all_selections([{I,Cs} | ICs]) -> + [[{I,C} | L] || C <- Cs, L <- all_selections(ICs)]. + +%%% +%%% Local functions +%%% + +aux_name1(Name, N, AllNames) -> + SN = name_suffix(Name, N), + case sets:is_element(SN, AllNames) of + true -> aux_name1(Name, N + 1, AllNames); + false -> {SN, N} + end. + +var_fold(F, A, {var,_,V}=Var) when V =/= '_' -> + [F(Var) | A]; +var_fold(F, A, T) when is_tuple(T) -> + var_fold(F, A, tuple_to_list(T)); +var_fold(F, A, [E | Es]) -> + var_fold(F, var_fold(F, A, E), Es); +var_fold(_F, A, _T) -> + A. + +options(Options, Keys) when is_list(Options) -> + options(Options, Keys, []); +options(Option, Keys) -> + options([Option], Keys, []). + +options(Options0, [Key | Keys], L) when is_list(Options0) -> + Options = case lists:member(Key, Options0) of + true -> + [atom_option(Key) | lists:delete(Key, Options0)]; + false -> + Options0 + end, + V = case lists:keysearch(Key, 1, Options) of + {value, {format_fun, U=undefined}} -> + {ok, U}; + {value, {info_fun, U=undefined}} -> + {ok, U}; + {value, {lookup_fun, U=undefined}} -> + {ok, U}; + {value, {parent_fun, U=undefined}} -> + {ok, U}; + {value, {post_fun, U=undefined}} -> + {ok, U}; + {value, {pre_fun, U=undefined}} -> + {ok, U}; + {value, {info_fun, Fun}} when is_function(Fun), + is_function(Fun, 1) -> + {ok, Fun}; + {value, {pre_fun, Fun}} when is_function(Fun), + is_function(Fun, 1) -> + {ok, Fun}; + {value, {post_fun, Fun}} when is_function(Fun), + is_function(Fun, 0) -> + {ok, Fun}; + {value, {lookup_fun, Fun}} when is_function(Fun), + is_function(Fun, 2) -> + {ok, Fun}; + {value, {max_lookup, Max}} when is_integer(Max), Max >= 0 -> + {ok, Max}; + {value, {max_lookup, infinity}} -> + {ok, -1}; + {value, {format_fun, Fun}} when is_function(Fun), + is_function(Fun, 1) -> + {ok, Fun}; + {value, {parent_fun, Fun}} when is_function(Fun), + is_function(Fun, 0) -> + {ok, Fun}; + {value, {key_equality, KE='=='}}-> + {ok, KE}; + {value, {key_equality, KE='=:='}}-> + {ok, KE}; + {value, {join, J=any}} -> + {ok, J}; + {value, {join, J=nested_loop}} -> + {ok, J}; + {value, {join, J=merge}} -> + {ok, J}; + {value, {join, J=lookup}} -> + {ok, J}; + {value, {lookup, LookUp}} when LookUp; + not LookUp; + LookUp =:= any -> + {ok, LookUp}; + {value, {max_list_size, Max}} when is_integer(Max), Max >= 0 -> + {ok, Max}; + {value, {tmpdir_usage, TmpUsage}} when TmpUsage =:= allowed; + TmpUsage =:= not_allowed; + TmpUsage =:= info_msg; + TmpUsage =:= warning_msg; + TmpUsage =:= error_msg -> + {ok, TmpUsage}; + {value, {unique, Unique}} when Unique; not Unique -> + {ok, Unique}; + {value, {cache, Cache}} when Cache; not Cache; Cache =:= list -> + {ok, Cache}; + {value, {cache, ets}} -> + {ok, true}; + {value, {cache, no}} -> + {ok, false}; + {value, {unique_all, UniqueAll}} when UniqueAll; not UniqueAll -> + {ok, UniqueAll}; + {value, {cache_all, CacheAll}} when CacheAll; + not CacheAll; + CacheAll =:= list -> + {ok, CacheAll}; + {value, {cache_all, ets}} -> + {ok, true}; + {value, {cache_all, no}} -> + {ok, false}; + {value, {spawn_options, default}} -> + {ok, default}; + {value, {spawn_options, SpawnOptions}} -> + case is_proper_list(SpawnOptions) of + true -> + {ok, SpawnOptions}; + false -> + badarg + end; + {value, {flat, Flat}} when Flat; not Flat -> + {ok, Flat}; + {value, {format, Format}} when Format =:= string; + Format =:= abstract_code; + Format =:= debug -> + {ok, Format}; + {value, {n_elements, NElements}} when NElements =:= infinity; + is_integer(NElements), + NElements > 0 -> + {ok, NElements}; + {value, {depth, Depth}} when Depth =:= infinity; + is_integer(Depth), Depth >= 0 -> + {ok, Depth}; + {value, {order, Order}} when is_function(Order), + is_function(Order, 2); + (Order =:= ascending); + (Order =:= descending) -> + {ok, Order}; + {value, {compressed, Comp}} when Comp -> + {ok, [compressed]}; + {value, {compressed, Comp}} when not Comp -> + {ok, []}; + {value, {tmpdir, T}} -> + {ok, T}; + {value, {size, Size}} when is_integer(Size), Size > 0 -> + {ok, Size}; + {value, {no_files, NoFiles}} when is_integer(NoFiles), + NoFiles > 1 -> + {ok, NoFiles}; + {value, {Key, _}} -> + badarg; + false -> + Default = default_option(Key), + {ok, Default} + end, + case V of + badarg -> + badarg; + {ok, Value} -> + NewOptions = lists:keydelete(Key, 1, Options), + options(NewOptions, Keys, [Value | L]) + end; +options([], [], L) -> + lists:reverse(L); +options(_Options, _, _L) -> + badarg. + +default_option(pre_fun) -> undefined; +default_option(post_fun) -> undefined; +default_option(info_fun) -> undefined; +default_option(format_fun) -> undefined; +default_option(lookup_fun) -> undefined; +default_option(max_lookup) -> -1; +default_option(join) -> any; +default_option(lookup) -> any; +default_option(parent_fun) -> undefined; +default_option(key_equality) -> '=:='; +default_option(spawn_options) -> default; +default_option(flat) -> true; +default_option(format) -> string; +default_option(n_elements) -> infinity; +default_option(depth) -> infinity; +default_option(max_list_size) -> ?MAX_LIST_SIZE; +default_option(tmpdir_usage) -> allowed; +default_option(cache) -> false; +default_option(cache_all) -> false; +default_option(unique) -> false; +default_option(unique_all) -> false; +default_option(order) -> ascending; % default values from file_sorter.erl +default_option(compressed) -> []; +default_option(tmpdir) -> ""; +default_option(size) -> 524288; +default_option(no_files) -> 16. + +atom_option(cache) -> {cache, true}; +atom_option(unique) -> {unique, true}; +atom_option(cache_all) -> {cache_all, true}; +atom_option(unique_all) -> {unique_all, true}; +atom_option(lookup) -> {lookup, true}; +atom_option(flat) -> {flat, true}; +atom_option(Key) -> Key. + +is_proper_list([_ | L]) -> + is_proper_list(L); +is_proper_list(L) -> + L =:= []. + +spawn_options(default) -> + [link]; +spawn_options(SpawnOptions) -> + lists:delete(monitor, + case lists:member(link, SpawnOptions) of + true -> + SpawnOptions; + false -> + [link | SpawnOptions] + end). + +is_keypos(Keypos) when is_integer(Keypos), Keypos > 0 -> + true; +is_keypos([]) -> + false; +is_keypos(L) -> + is_keyposs(L). + +is_keyposs([Kp | Kps]) when is_integer(Kp), Kp > 0 -> + is_keyposs(Kps); +is_keyposs(Kps) -> + Kps =:= []. + +listify(L) when is_list(L) -> + L; +listify(T) -> + [T]. + +%% Optimizations to be carried out. +-record(optz, + {unique = false, % bool() + cache = false, % bool() | list + join_option = any, % constraint set by the 'join' option + fast_join = no, % no | #qlc_join. 'no' means nested loop. + opt % #qlc_opt + }). + +%% Prepared #qlc_lc. +-record(qlc, + {lcf, % fun() -> Val + codef, + qdata, % with evaluated list expressions + init_value, + optz % #optz + }). + +%% Prepared simple #qlc_lc. +-record(simple_qlc, + {p, % atom(), pattern variable + le, + line, + init_value, + optz % #optz + }). + +-record(prepared, + {qh, % #qlc_append | #qlc_table | #qlc | #simple_qlc | + % #qlc_sort | list() + sorted = no, % yes | no | ascending | descending + sort_info = [], % + sort_info2 = [], % 'sort_info' updated with pattern info; qh is LE + lu_skip_quals = [], % qualifiers to skip due to lookup + join = {[],[]}, % {Lookup, Merge} + n_objs = undefined, % for join (not used yet) + is_unique_objects = false, % bool() + is_cached = false % bool() (true means 'ets' or 'list') + }). + +%%% Cursor process functions. + +cursor_process(H, GUnique, GCache, TmpDir, SpawnOptions, MaxList, TmpUsage) -> + Parent = self(), + Setup = #setup{parent = Parent}, + CF = fun() -> + %% Unless exit/2 is trapped no cleanup can be done. + %% The user is assumed not to set the flag to false. + process_flag(trap_exit, true), + MonRef = erlang:monitor(process, Parent), + {Objs, Post, _LocalPost} = + try + Prep = prepare_qlc(H, not_a_list, GUnique, GCache, + TmpDir, MaxList, TmpUsage), + setup_qlc(Prep, Setup) + catch Class:Reason -> + Parent ! {self(), {caught, Class, Reason, + erlang:get_stacktrace()}}, + exit(normal) + end, + Parent ! {self(), ok}, + wait_for_request(Parent, MonRef, Post), + reply(Parent, MonRef, Post, Objs) + end, + Pid = spawn_opt(CF, SpawnOptions), + parent_fun(Pid, Parent). + +%% Expect calls from tables calling the parent_fun and finally an 'ok'. +parent_fun(Pid, Parent) -> + receive + {Pid, ok} -> Pid; + {TPid, {parent_fun, Fun}} -> + V = try + {value, Fun()} + catch Class:Reason -> + {parent_fun_caught, Class, Reason, erlang:get_stacktrace()} + end, + TPid ! {Parent, V}, + parent_fun(Pid, Parent); + {Pid, {caught, throw, Error, [?THROWN_ERROR | _]}} -> + Error; + {Pid, {caught, Class, Reason, Stacktrace}} -> + erlang:raise(Class, Reason, Stacktrace) + end. + +reply(Parent, MonRef, Post, []) -> + no_more(Parent, MonRef, Post); +reply(Parent, MonRef, Post, [Answer | Cont]) -> + Parent ! {self(), {answer, Answer}}, + wait_for_request(Parent, MonRef, Post), + reply(Parent, MonRef, Post, Cont); +reply(Parent, MonRef, Post, Cont) -> + Reply = try + if + is_function(Cont) -> + Cont(); + true -> + throw_error(Cont) + end + catch + Class:Reason -> + post_funs(Post), + Message = {caught, Class, Reason, erlang:get_stacktrace()}, + Parent ! {self(), Message}, + exit(normal) + end, + reply(Parent, MonRef, Post, Reply). + +no_more(Parent, MonRef, Post) -> + Parent ! {self(), no_more}, + wait_for_request(Parent, MonRef, Post), + no_more(Parent, MonRef, Post). + +wait_for_request(Parent, MonRef, Post) -> + receive + {Parent, stop} -> + post_funs(Post), + exit(normal); + {Parent, more} -> + ok; + {'EXIT', Parent, _Reason} -> + post_funs(Post), + exit(normal); + {'DOWN', MonRef, process, Parent, _Info} -> + post_funs(Post), + exit(normal); + {'EXIT', Pid, _Reason} when Pid =:= self() -> + %% Trapped signal. The cursor ignores it... + wait_for_request(Parent, MonRef, Post); + Other -> + error_logger:error_msg( + "The qlc cursor ~w received an unexpected message:\n~p\n", + [self(), Other]), + wait_for_request(Parent, MonRef, Post) + end. + +%%% End of cursor process functions. + +abstract_code({special, Line, String}) -> + {string, Line, String}; +abstract_code(Tuple) when is_tuple(Tuple) -> + list_to_tuple(abstract_code(tuple_to_list(Tuple))); +abstract_code([H | T]) -> + [abstract_code(H) | abstract_code(T)]; +abstract_code(Term) -> + Term. + +%% Also in qlc_pt.erl. +-define(Q, q). +-define(QLC_Q(L1, L2, L3, L4, LC, Os), + {call,L1,{remote,L2,{atom,L3,?MODULE},{atom,L4,?Q}},[LC | Os]}). + +abstract(Info, false=_Flat, NElements, Depth) -> + abstract(Info, NElements, Depth); +abstract(Info, true=_Flat, NElements, Depth) -> + Abstract = abstract(Info, NElements, Depth), + Vars = abstract_vars(Abstract), + {_, Body0, Expr} = flatten_abstr(Abstract, 1, Vars, []), + case Body0 of + [] -> + Expr; + [{match,_,Expr,Q}] -> + Q; + [{match,_,Expr,Q} | Body] -> + {block, 0, lists:reverse(Body, [Q])}; + _ -> + {block, 0, lists:reverse(Body0, [Expr])} + end. + +abstract({qlc, E0, Qs0, Opt}, NElements, Depth) -> + Qs = lists:map(fun({generate, P, LE}) -> + {generate, 1, binary_to_term(P), + abstract(LE, NElements, Depth)}; + (F) -> + binary_to_term(F) + end, Qs0), + E = binary_to_term(E0), + Os = case Opt of + [] -> []; + _ -> [abstract_term(Opt, 1)] + end, + ?QLC_Q(1, 1, 1, 1, {lc,1,E,Qs}, Os); +abstract({table, {M, F, As0}}, _NElements, _Depth) + when is_atom(M), is_atom(F), is_list(As0) -> + As = [abstract_term(A, 1) || A <- As0], + {call, 1, {remote, 1, {atom, 1, M}, {atom, 1, F}}, As}; +abstract({table, TableDesc}, _NElements, _Depth) -> + case io_lib:deep_char_list(TableDesc) of + true -> + {ok, Tokens, _} = erl_scan:string(lists:flatten(TableDesc++".")), + {ok, [Expr]} = erl_parse:parse_exprs(Tokens), + Expr; + false -> % abstract expression + TableDesc + end; +abstract({append, Infos}, NElements, Depth) -> + As = lists:foldr(fun(Info, As0) -> + {cons,1,abstract(Info, NElements, Depth),As0} + end, {nil, 1}, Infos), + {call, 1, {remote, 1, {atom, 1, ?MODULE}, {atom, 1, append}}, [As]}; +abstract({sort, Info, SortOptions}, NElements, Depth) -> + {call, 1, {remote, 1, {atom, 1, ?MODULE}, {atom, 1, sort}}, + [abstract(Info, NElements, Depth), abstract_term(SortOptions, 1)]}; +abstract({keysort, Info, Kp, SortOptions}, NElements, Depth) -> + {call, 1, {remote, 1, {atom, 1, ?MODULE}, {atom, 1, keysort}}, + [abstract_term(Kp, 1), abstract(Info, NElements, Depth), + abstract_term(SortOptions, 1)]}; +abstract({list,L,MS}, NElements, Depth) -> + {call, 1, {remote, 1, {atom, 1, ets}, {atom, 1, match_spec_run}}, + [abstract(L, NElements, Depth), + {call, 1, {remote, 1, {atom, 1, ets}, {atom, 1, match_spec_compile}}, + [abstract_term(depth(MS, Depth), 1)]}]}; +abstract({list, L}, NElements, Depth) when NElements =:= infinity; + NElements >= length(L) -> + abstract_term(depth(L, Depth), 1); +abstract({list, L}, NElements, Depth) -> + abstract_term(depth(lists:sublist(L, NElements), Depth) ++ '...', 1). + +depth(List, infinity) -> + List; +depth(List, Depth) -> + [depth1(E, Depth) || E <- List]. + +depth_fun(infinity = _Depth) -> + fun(E) -> E end; +depth_fun(Depth) -> + fun(E) -> depth1(E, Depth) end. + +depth1([]=L, _D) -> + L; +depth1(_Term, 0) -> + '...'; +depth1(Tuple, D) when is_tuple(Tuple) -> + depth_tuple(Tuple, tuple_size(Tuple), 1, D - 1, []); +depth1(List, D) when is_list(List) -> + if + D =:= 1 -> + ['...']; + true -> + depth_list(List, D - 1) + end; +depth1(Binary, D) when byte_size(Binary) > D - 1 -> + D1 = D - 1, + <> = Binary, + <>; +depth1(T, _Depth) -> + T. + +depth_list([]=L, _D) -> + L; +depth_list(_L, 0) -> + '...'; +depth_list([E | Es], D) -> + [depth1(E, D) | depth_list(Es, D - 1)]. + +depth_tuple(_Tuple, Sz, I, _D, L) when I > Sz -> + list_to_tuple(lists:reverse(L)); +depth_tuple(_L, _Sz, _I, 0, L) -> + list_to_tuple(lists:reverse(L, ['...'])); +depth_tuple(Tuple, Sz, I, D, L) -> + E = depth1(element(I, Tuple), D), + depth_tuple(Tuple, Sz, I + 1, D - 1, [E | L]). + +abstract_term(Term) -> + abstract_term(Term, 0). + +abstract_term(Term, Line) -> + abstr_term(Term, Line). + +abstr_term(Tuple, Line) when is_tuple(Tuple) -> + {tuple,Line,[abstr_term(E, Line) || E <- tuple_to_list(Tuple)]}; +abstr_term([_ | _]=L, Line) -> + case io_lib:char_list(L) of + true -> + erl_parse:abstract(L, Line); + false -> + abstr_list(L, Line) + end; +abstr_term(Fun, Line) when is_function(Fun) -> + case erl_eval:fun_data(Fun) of + {fun_data, _Bs, Cs} -> + {'fun', Line, {clauses, Cs}}; + false -> + {name, Name} = erlang:fun_info(Fun, name), + {arity, Arity} = erlang:fun_info(Fun, arity), + case erlang:fun_info(Fun, type) of + {type, external} -> + {module, Module} = erlang:fun_info(Fun, module), + {'fun', Line, {function,Module,Name,Arity}}; + {type, local} -> + {'fun', Line, {function,Name,Arity}} + end + end; +abstr_term(PPR, Line) when is_pid(PPR); is_port(PPR); is_reference(PPR) -> + {special, Line, lists:flatten(io_lib:write(PPR))}; +abstr_term(Simple, Line) -> + erl_parse:abstract(Simple, Line). + +abstr_list([H | T], Line) -> + {cons, Line, abstr_term(H, Line), abstr_list(T, Line)}; +abstr_list(T, Line) -> + abstr_term(T, Line). + +%% Since generator pattern variables cannot be used in list +%% expressions, it is OK to flatten out QLCs using temporary +%% variables. +flatten_abstr(?QLC_Q(L1, L2, L3, L4, LC0, Os), VN0, Vars, Body0) -> + {lc,L,E,Qs0} = LC0, + F = fun({generate,Ln,P,LE0}, {VN1,Body1}) -> + {VN2,Body2,LE} = flatten_abstr(LE0, VN1, Vars, Body1), + {{generate,Ln,P,LE}, {VN2,Body2}}; + (Fil, VN_Body) -> + {Fil, VN_Body} + end, + {Qs, {VN3,Body}} = lists:mapfoldl(F, {VN0,Body0}, Qs0), + LC = {lc,L,E,Qs}, + {V, VN} = aux_name1('V', VN3, Vars), + Var = {var, L1, V}, + QLC = ?QLC_Q(L1, L2, L3, L4, LC, Os), + {VN + 1, [{match, L1, Var, QLC} | Body], Var}; +flatten_abstr(T0, VN0, Vars, Body0) when is_tuple(T0) -> + {VN, Body, L} = flatten_abstr(tuple_to_list(T0), VN0, Vars, Body0), + {VN, Body, list_to_tuple(L)}; +flatten_abstr([E0 | Es0], VN0, Vars, Body0) -> + {VN1, Body1, E} = flatten_abstr(E0, VN0, Vars, Body0), + {VN, Body, Es} = flatten_abstr(Es0, VN1, Vars, Body1), + {VN, Body, [E | Es]}; +flatten_abstr(E, VN, _Vars, Body) -> + {VN, Body, E}. + +abstract_vars(Abstract) -> + sets:from_list(ordsets:to_list(vars(Abstract))). + +collect([]=L) -> + L; +collect([Answer | Cont]) -> + [Answer | collect(Cont)]; +collect(Cont) -> + case Cont() of + Answers when is_list(Answers) -> + collect(Answers); + Term -> + throw_error(Term) + end. + +fold_loop(Fun, [Obj | Cont], Acc) -> + fold_loop(Fun, Cont, Fun(Obj, Acc)); +fold_loop(_Fun, [], Acc) -> + Acc; +fold_loop(Fun, Cont, Acc) -> + case Cont() of + Objects when is_list(Objects) -> + fold_loop(Fun, Objects, Acc); + Term -> + Term + end. + +next_loop(Pid, L, N) when N =/= 0 -> + case monitor_request(Pid, more) of + no_more -> + lists:reverse(L); + {answer, Answer} -> + next_loop(Pid, [Answer | L], N - 1); + {caught, throw, Error, [?THROWN_ERROR | _]} -> + Error; + {caught, Class, Reason, Stacktrace} -> + _ = (catch erlang:error(foo)), + erlang:raise(Class, Reason, Stacktrace ++ erlang:get_stacktrace()); + error -> + erlang:error({qlc_cursor_pid_no_longer_exists, Pid}) + end; +next_loop(_Pid, L, _N) -> + lists:reverse(L). + +stop_cursor(Pid) -> + erlang:monitor(process, Pid), + unlink(Pid), + receive + {'EXIT',Pid,_Reason} -> % Simply ignore the error. + receive + {'DOWN',_,process,Pid,_} -> ok + end + after 0 -> + Pid ! {self(),stop}, + receive + {'DOWN',_,process,Pid,_} -> ok + end + end. + +monitor_request(Pid, Req) -> + Ref = erlang:monitor(process, Pid), + Pid ! {self(), Req}, + receive + {'DOWN', Ref, process, Pid, _Info} -> + receive + {'EXIT', Pid, _Reason} -> ok + after 1 -> ok end, + error; + {'EXIT', Pid, _Reason} -> + receive + {'DOWN', _, process, Pid, _} -> error + end; + {Pid, Reply} -> + erlang:demonitor(Ref, [flush]), + Reply + end. + +%% Marker for skipped filter or unused generator. +-define(SKIP, (-1)). + +%% Qual = {gen, LE} | fil +-define(qual_data(QNum, GoToIndex, State, Qual), + {QNum, GoToIndex, State, Qual}). + +-record(join, % generated by qlc_pt + {op, q1, q2, wh1, wh2, cs_fun}). % op is unused + +%% le_info/1 returns an intermediate information format only used for +%% testing purposes. Changes will happen without notice. +%% +%% QueryDesc = {qlc, TemplateDesc, [QualDesc], [QueryOpt]} +%% | {table, TableDesc} +%% | {append, [QueryDesc]} +%% | {sort, QueryDesc, [SortOption]} +%% | {keysort, KeyPos, QueryDesc, [SortOption]} +%% | {list, list()} +%% | {list, QueryDesc, MatchExpression} +%% TableDesc = {Mod, Fun, Args} +%% | AbstractExpression +%% | character_list() +%% Mod = module() +%% Fun = atom() +%% Args = [term()] +%% QualDesc = FilterDesc +%% | {generate, PatternDesc, QueryDesc} +%% QueryOpt = {cache, bool()} | cache +%% | {unique, bool()} | unique +%% FilterDesc = PatternDesc = TemplateDesc = binary() + +le_info(#prepared{qh = #simple_qlc{le = LE, p = P, line = L, optz = Optz}}, + InfOpt) -> + QVar = term_to_binary({var, L, P}), + {qlc, QVar, [{generate, QVar, le_info(LE, InfOpt)}], opt_info(Optz)}; +le_info(#prepared{qh = #qlc{codef = CodeF, qdata = Qdata, optz = Optz}}, + InfOpt) -> + Code = CodeF(), + TemplateState = template_state(), + E = element(TemplateState, Code), + QualInfo0 = qual_info(Qdata, Code, InfOpt), + QualInfo1 = case Optz#optz.fast_join of + #qlc_join{} = Join -> + join_info(Join, QualInfo0, Qdata, Code); + no -> + QualInfo0 + end, + QualInfo = [I || I <- QualInfo1, I =/= skip], + {qlc, E, QualInfo, opt_info(Optz)}; +le_info(#prepared{qh = #qlc_table{format_fun = FormatFun, trav_MS = TravMS, + ms = MS, lu_vals = LuVals}}, InfOpt) -> + {NElements, Depth} = InfOpt, + %% The 'depth' option applies to match specifications as well. + %% This is for limiting imported variables (parameters). + DepthFun = depth_fun(Depth), + case LuVals of + _ when FormatFun =:= undefined -> + {table, {'$MOD', '$FUN', []}}; + {Pos, Vals} -> + Formated = try FormatFun({lookup, Pos, Vals, NElements, DepthFun}) + catch _:_ -> FormatFun({lookup, Pos, Vals}) + end, + if + MS =:= no_match_spec -> + {table, Formated}; + true -> + {list, {table, Formated}, depth(MS, Depth)} + end; + _ when TravMS, is_list(MS) -> + {table, FormatFun({match_spec, depth(MS, Depth)})}; + _ when MS =:= no_match_spec -> + try {table, FormatFun({all, NElements, DepthFun})} + catch _:_ -> {table, FormatFun(all)} + end + end; +le_info(#prepared{qh = #qlc_append{hl = HL}}, InfOpt) -> + {append, [le_info(H, InfOpt) || H <- HL]}; +le_info(#prepared{qh = #qlc_sort{h = H, keypos = sort, + fs_opts = SortOptions0, tmpdir = TmpDir}}, + InfOpt) -> + SortOptions = sort_options_global_tmp(SortOptions0, TmpDir), + {sort, le_info(H, InfOpt), SortOptions}; +le_info(#prepared{qh = #qlc_sort{h = H, keypos = {keysort, Kp}, + fs_opts = SortOptions0, tmpdir = TmpDir}}, + InfOpt) -> + SortOptions = sort_options_global_tmp(SortOptions0, TmpDir), + {keysort, le_info(H, InfOpt), Kp, SortOptions}; +le_info(#prepared{qh = #qlc_list{l = L, ms = no_match_spec}}, _InfOpt) -> + {list, L}; +le_info(#prepared{qh = #qlc_list{l = L, ms = MS}},_InfOpt) when is_list(L) -> + {list, {list, L}, MS}; +le_info(#prepared{qh = #qlc_list{l = L, ms = MS}}, InfOpt) -> + {list, le_info(L, InfOpt), MS}. + +qual_info([?qual_data(_QNum, _GoI, ?SKIP, fil) | Qdata], Code, InfOpt) -> + %% see skip_lookup_filters() + [skip | qual_info(Qdata, Code, InfOpt)]; +qual_info([?qual_data(QNum, _GoI, _SI, fil) | Qdata], Code, InfOpt) -> + [element(QNum + 1, Code) | qual_info(Qdata, Code, InfOpt)]; +qual_info([?qual_data(_QNum, _GoI, _SI, {gen,#join{}}) | Qdata], + Code, InfOpt) -> + [skip | qual_info(Qdata, Code, InfOpt)]; +qual_info([?qual_data(QNum, _GoI, _SI, {gen,LE}) | Qdata], Code, InfOpt) -> + [{generate,element(QNum + 1, Code),le_info(LE, InfOpt)} | + qual_info(Qdata, Code, InfOpt)]; +qual_info([], _Code, _InfOpt) -> + []. + +join_info(Join, QInfo, Qdata, Code) -> + #qlc_join{kind = Kind, q1 = QNum1a, c1 = C1, q2 = QNum2a, c2 = C2, + opt = Opt} = Join, + {?qual_data(JQNum,_,_,_), Rev, QNum1, QNum2, _WH1, _WH2, CsFun} = + find_join_data(Qdata, QNum1a, QNum2a), + {Cs1_0, Cs2_0, Compat} = CsFun(), + [Cs1, Cs2] = case Compat of + [] -> % --R12B-5 + [[{C,[{V,'=:='} || V <- Vs]} || {C,Vs} <- CVs] || + CVs <- [Cs1_0, Cs2_0]]; + _ -> % 'v1', R13A -- + %% Only compared constants (==). + [Cs1_0, Cs2_0] + end, + L = 0, + G1_0 = {var,L,'G1'}, G2_0 = {var,L,'G2'}, + JP = element(JQNum + 1, Code), + %% Create code for wh1 and wh2 in #join{}: + {{I1,G1}, {I2,G2}, QInfoL} = + case Kind of + {merge, _} -> + {JG1,QInfo1} = join_merge_info(QNum1, QInfo, Code, G1_0, Cs1), + {JG2,QInfo2} = join_merge_info(QNum2, QInfo, Code, G2_0, Cs2), + {JG1, JG2, QInfo1 ++ QInfo2}; + _ when Rev -> + {JG2,QInfo2} = join_merge_info(QNum2, QInfo, Code, G2_0, Cs2), + {J1, QInfo1} = join_lookup_info(QNum1, QInfo, G1_0), + {{J1,G1_0}, JG2, QInfo2 ++ [QInfo1]}; + _ -> + {JG1,QInfo1} = join_merge_info(QNum1, QInfo, Code, G1_0, Cs1), + {J2, QInfo2} = join_lookup_info(QNum2, QInfo, G2_0), + {JG1, {J2,G2_0}, QInfo1 ++ [QInfo2]} + end, + {JOptVal, JOp} = kind2op(Kind), + JOpt = [{join, JOptVal}] ++ opt_info(join_unique_cache(Opt)), + JFil = term_to_binary({op,L,JOp, + {call,L,{atom,L,element},[{integer,L,C1},G1]}, + {call,L,{atom,L,element},[{integer,L,C2},G2]}}), + P = term_to_binary({cons, L, G1, G2}), + JInfo = {generate, JP, {qlc, P, QInfoL ++ [JFil], JOpt}}, + {Before, [I1 | After]} = lists:split(QNum1 - 1, QInfo), + Before ++ [JInfo] ++ lists:delete(I2, After). + +kind2op({merge, _KE}) -> {merge, '=='}; +kind2op({lookup, KE, _LU_fun}) -> {lookup, KE}. + +%% qlc:q(P0 || P0 = Pattern <- H1, ConstFilters), +%% where "P0" is a fresh variable and ConstFilters are filters that +%% test constant values of pattern columns. +join_merge_info(QNum, QInfo, Code, G, ExtraConstants) -> + {generate, _, LEInfo}=I = lists:nth(QNum, QInfo), + P = binary_to_term(element(QNum + 1, Code)), + case {P, ExtraConstants} of + {{var, _, _}, []} -> + %% No need to introduce a QLC. + TP = term_to_binary(G), + I2 = {generate, TP, LEInfo}, + {{I,G}, [I2]}; + _ -> + {EPV, M} = + case P of + {var, _, _} -> + %% No need to introduce a pattern variable. + {P, P}; + _ -> + {PV, _} = aux_name1('P', 0, abstract_vars(P)), + L = 0, + V = {var, L, PV}, + {V, {match, L, V, P}} + end, + DQP = term_to_binary(EPV), + LEI = {generate, term_to_binary(M), LEInfo}, + TP = term_to_binary(G), + CFs = [begin + Call = {call,0,{atom,0,element},[{integer,0,Col},EPV]}, + F = list2op([{op,0,Op,abstract_term(Con),Call} + || {Con,Op} <- ConstOps], 'or'), + term_to_binary(F) + end || + {Col,ConstOps} <- ExtraConstants], + {{I,G}, [{generate, TP, {qlc, DQP, [LEI | CFs], []}}]} + end. + +list2op([E], _Op) -> + E; +list2op([E | Es], Op) -> + {op,0,Op,E,list2op(Es, Op)}. + +join_lookup_info(QNum, QInfo, G) -> + {generate, _, LEInfo}=I = lists:nth(QNum, QInfo), + TP = term_to_binary(G), + {I, {generate, TP, LEInfo}}. + +opt_info(#optz{unique = Unique, cache = Cache0, join_option = JoinOption}) -> + %% No 'nested_loop' options are added here, even if there are + %% nested loops to carry out, unless a 'nested_loop' was given as + %% option. The reason is that the qlc module does not know about + %% all instances of nested loops. + Cache = if + Cache0 -> ets; + true -> Cache0 + end, + [{T,V} || {T,V} <- [{cache,Cache},{unique,Unique}], + V =/= default_option(T)] ++ + [{T,V} || {T,V} <- [{join,JoinOption}], V =:= nested_loop]. + +prepare_qlc(H, InitialValue, GUnique, GCache, TmpDir, MaxList, TmpUsage) -> + GOpt = #qlc_opt{unique = GUnique, cache = GCache, + tmpdir = TmpDir, max_list = MaxList, + tmpdir_usage = TmpUsage}, + case opt_le(prep_le(H, GOpt), 1) of + #prepared{qh = #qlc{} = QLC}=Prep -> + Prep#prepared{qh = QLC#qlc{init_value = InitialValue}}; + #prepared{qh = #simple_qlc{}=SimpleQLC}=Prep -> + Prep#prepared{qh = SimpleQLC#simple_qlc{init_value = InitialValue}}; + Prep -> + Prep + end. + +%%% The options given to append, q and table (unique and cache) as well +%%% as the type of expression (list, table, append, qlc...) are +%%% analyzed by prep_le. The results are is_unique_objects and +%%% is_cached. It is checked that the evaluation (in the Erlang sense) +%%% of list expressions yields qlc handles. + +prep_le(#qlc_lc{lc = LC_fun, opt = #qlc_opt{} = Opt0}=H, GOpt) -> + #qlc_opt{unique = GUnique, cache = GCache, + tmpdir = TmpDir, max_list = MaxList, + tmpdir_usage = TmpUsage} = GOpt, + Unique = Opt0#qlc_opt.unique or GUnique, + Cache = if + not GCache -> Opt0#qlc_opt.cache; + true -> GCache + end, + Opt = Opt0#qlc_opt{unique = Unique, cache = Cache, + tmpdir = TmpDir, max_list = MaxList, + tmpdir_usage = TmpUsage}, + prep_qlc_lc(LC_fun(), Opt, GOpt, H); +prep_le(#qlc_table{info_fun = IF}=T, GOpt) -> + {SortInfo, Sorted} = table_sort_info(T), + IsUnique = grd(IF, is_unique_objects), + Prep = #prepared{qh = T, sort_info = SortInfo, sorted = Sorted, + is_unique_objects = IsUnique}, + Opt = if + IsUnique or not GOpt#qlc_opt.unique, + T#qlc_table.ms =:= no_match_spec -> + GOpt#qlc_opt{cache = false}; + true -> + GOpt + end, + may_create_simple(Opt, Prep); +prep_le(#qlc_append{hl = HL}, GOpt) -> + case lists:flatmap(fun(#prepared{qh = #qlc_list{l = []}}) -> []; + (#prepared{qh = #qlc_append{hl = HL1}}) -> HL1; + (H) -> [H] end, + [prep_le(H, GOpt) || H <- HL]) of + []=Nil -> + short_list(Nil); + [Prep] -> + Prep; + PrepL -> + Cache = lists:all(fun(#prepared{is_cached = IsC}) -> IsC =/= false + end, PrepL), + %% The handles in hl are replaced by prepared handles: + Prep = #prepared{qh = #qlc_append{hl = PrepL}, is_cached = Cache}, + may_create_simple(GOpt, Prep) + end; +prep_le(#qlc_sort{h = H0}=Q0, GOpt) -> + %% The handle h is replaced by a prepared handle: + Q = Q0#qlc_sort{h = prep_le(H0, GOpt)}, + prep_sort(Q, GOpt); +prep_le([_, _ | _]=L, GOpt) -> + Prep = #prepared{qh = #qlc_list{l = L}, is_cached = true}, + Opt = if + not GOpt#qlc_opt.unique -> + GOpt#qlc_opt{cache = false}; + true -> GOpt + end, + may_create_simple(Opt, Prep); +prep_le(L, _GOpt) when is_list(L) -> + short_list(L); +prep_le(T, _GOpt) -> + erlang:error({unsupported_qlc_handle, #qlc_handle{h = T}}). + +eval_le(LE_fun, GOpt) -> + case LE_fun() of + {error, ?MODULE, _} = Error -> + throw_error(Error); + R -> + case get_handle(R) of + badarg -> + erlang:error(badarg, [R]); + H -> + prep_le(H, GOpt) + end + end. + +prep_qlc_lc({simple_v1, PVar, LE_fun, L}, Opt, GOpt, _H) -> + check_lookup_option(Opt, false), + prep_simple_qlc(PVar, L, eval_le(LE_fun, GOpt), Opt); +prep_qlc_lc({qlc_v1, QFun, CodeF, Qdata0, QOpt}, Opt, GOpt, _H) -> + F = fun(?qual_data(_QNum, _GoI, _SI, fil)=QualData, ModGens) -> + {QualData, ModGens}; + (?qual_data(_QNum, _GoI, _SI, {gen, #join{}})=QualData, ModGens) -> + {QualData, ModGens}; + (?qual_data(QNum, GoI, SI, {gen, LE_fun}), ModGens0) -> + Prep1 = eval_le(LE_fun, GOpt), + {Prep, ModGens} = + prep_generator(QNum, Prep1, QOpt, Opt, ModGens0), + {?qual_data(QNum, GoI, SI, {gen, Prep}), ModGens} + end, + {Qdata, ModGens} = lists:mapfoldl(F, [], Qdata0), + SomeLookUp = lists:keymember(true, 2, ModGens) =/= false, + check_lookup_option(Opt, SomeLookUp), + case ModGens of + [{_QNum, _LookUp, all, OnePrep}] -> + check_join_option(Opt), + OnePrep; + _ -> + Prep0 = prep_qlc(QFun, CodeF, Qdata, QOpt, Opt), + LU_SkipQuals = + lists:flatmap(fun({QNum,_LookUp,Fs,_Prep}) -> [{QNum,Fs}] + end, ModGens), + Prep1 = Prep0#prepared{lu_skip_quals = LU_SkipQuals}, + prep_join(Prep1, QOpt, Opt) + end; +prep_qlc_lc(_, _Opt, _GOpt, H) -> + erlang:error({unsupported_qlc_handle, #qlc_handle{h = H}}). + +prep_generator(QNum, Prep0, QOpt, Opt, ModGens) -> + PosFun = fun(KeyEquality) -> pos_fun(KeyEquality, QOpt, QNum) end, + MSFs = case match_specs(QOpt, QNum) of + undefined -> + {no_match_spec, []}; + {_, _}=MSFs0 -> + MSFs0 + end, + #prepared{qh = LE} = Prep0, + case prep_gen(LE, Prep0, PosFun, MSFs, Opt) of + {replace, Fs, LookUp, Prep} -> + {Prep, [{QNum,LookUp,Fs,Prep} | ModGens]}; + {skip, SkipFils, LookUp, Prep} -> + {Prep, [{QNum,LookUp,SkipFils,Prep} | ModGens]}; + {no, _Fs, _LookUp, Prep} -> + {Prep, ModGens} + end. + +pos_fun(undefined, QOpt, QNum) -> + {'=:=', constants(QOpt, QNum)}; %% --R12B-5 +pos_fun('=:=', QOpt, QNum) -> + {'=:=', constants(QOpt, QNum)}; +pos_fun('==', QOpt, QNum) -> + try {'==', equal_constants(QOpt, QNum)} % R13A-- + catch _:_ -> {'=:=', constants(QOpt, QNum)} + end. + +prep_gen(#qlc_table{lu_vals = LuV0, ms = MS0, trav_MS = TravMS, + info_fun = IF, lookup_fun = LU_fun, + key_equality = KeyEquality}=LE0, + Prep0, PosFun0, {MS, Fs}, Opt) -> + PosFun = PosFun0(KeyEquality), + {LuV, {STag,SkipFils}} = find_const_positions(IF, LU_fun, PosFun, Opt), + LU = LuV =/= false, + if + LuV0 =/= undefined; MS0 =/= no_match_spec -> + {no, [], false, Prep0}; + MS =/= no_match_spec, LU -> + MS1 = if + Fs =:= SkipFils; STag =:= Fs -> + %% The guard of the match specification + %% is covered by the lookup. + case MS of + [{'$1',_Guard,['$1']}] -> % no transformation + no_match_spec; + [{Head,_Guard,Body}] -> + [{Head,[],Body}] % true guard + end; + true -> + MS + end, + Prep = Prep0#prepared{qh = LE0#qlc_table{lu_vals = LuV,ms = MS1}}, + {replace, Fs, LU, Prep}; + LU -> + Prep = Prep0#prepared{qh = LE0#qlc_table{lu_vals = LuV}}, + {skip, SkipFils, LU, Prep}; + TravMS, MS =/= no_match_spec -> + Prep = Prep0#prepared{qh = LE0#qlc_table{ms = MS}, + is_unique_objects = false}, + {replace, Fs, false, may_create_simple(Opt, Prep)}; + true -> + {no, [], false, Prep0} + end; +prep_gen(#qlc_list{l = []}, Prep0, _PosFun, {_MS, Fs}, _Opt) -> + %% unique and cached + {replace, Fs, false, Prep0}; +prep_gen(#qlc_list{ms = no_match_spec}=LE0, Prep0, _PosFun, {MS, Fs}, Opt) + when MS =/= no_match_spec -> + Prep = Prep0#prepared{qh = LE0#qlc_list{ms = MS}, + is_cached = false}, + {replace, Fs, false, may_create_simple(Opt, Prep)}; +prep_gen(#qlc_list{}, Prep0, _PosFun, {MS, Fs}, Opt) + when MS =/= no_match_spec -> + ListMS = #qlc_list{l = Prep0, ms = MS}, + LE = #prepared{qh = ListMS, is_cached = false}, + {replace, Fs, false, may_create_simple(Opt, LE)}; +prep_gen(_LE0, Prep0, _PosFun, _MSFs, _Opt) -> + {no, [], false, Prep0}. + +-define(SIMPLE_QVAR, 'SQV'). + +may_create_simple(#qlc_opt{unique = Unique, cache = Cache} = Opt, + #prepared{is_cached = IsCached, + is_unique_objects = IsUnique} = Prep) -> + if + Unique and not IsUnique; + (Cache =/= false) and not IsCached -> + prep_simple_qlc(?SIMPLE_QVAR, 1, Prep, Opt); + true -> + Prep + end. + +prep_simple_qlc(PVar, Line, LE, Opt) -> + check_join_option(Opt), + #prepared{is_cached = IsCached, + sort_info = SortInfo, sorted = Sorted, + is_unique_objects = IsUnique} = LE, + #qlc_opt{unique = Unique, cache = Cache} = Opt, + Cachez = if + Unique -> Cache; + not IsCached -> Cache; + true -> false + end, + Optz = #optz{unique = Unique and not IsUnique, + cache = Cachez, opt = Opt}, + QLC = #simple_qlc{p = PVar, le = LE, line = Line, + init_value = not_a_list, optz = Optz}, + %% LE#prepared.join is not copied + #prepared{qh = QLC, is_unique_objects = IsUnique or Unique, + sort_info = SortInfo, sorted = Sorted, + is_cached = IsCached or (Cachez =/= false)}. + +prep_sort(#qlc_sort{h = #prepared{sorted = yes}=Prep}, _GOpt) -> + Prep; +prep_sort(#qlc_sort{h = #prepared{is_unique_objects = IsUniqueObjs}}=Q, + GOpt) -> + S1 = sort_unique(IsUniqueObjs, Q), + S2 = sort_tmpdir(S1, GOpt), + S = S2#qlc_sort{tmpdir_usage = GOpt#qlc_opt.tmpdir_usage}, + {SortInfo, Sorted} = sort_sort_info(S), + #prepared{qh = S, is_cached = true, sort_info = SortInfo, + sorted = Sorted, + is_unique_objects = S#qlc_sort.unique or IsUniqueObjs}. + +prep_qlc(QFun, CodeF, Qdata0, QOpt, Opt) -> + #qlc_opt{unique = Unique, cache = Cache, join = Join} = Opt, + Optz = #optz{unique = Unique, cache = Cache, + join_option = Join, opt = Opt}, + {Qdata, SortInfo} = qlc_sort_info(Qdata0, QOpt), + QLC = #qlc{lcf = QFun, codef = CodeF, qdata = Qdata, + init_value = not_a_list, optz = Optz}, + #prepared{qh = QLC, sort_info = SortInfo, + is_unique_objects = Unique, + is_cached = Cache =/= false}. + +%% 'sorted', 'sorted_info', and 'sorted_info2' are used to avoid +%% sorting on a key when there is no need to sort on the key. 'sorted' +%% is set by qlc:sort() only; its purpose is to assure that if columns +%% 1 to i are constant, then column i+1 is key-sorted (always true if +%% the tuples are sorted). Note: the implementation is (too?) simple. +%% For instance, each column is annotated with 'ascending' or +%% 'descending' (not yet). More exact would be, as examples, 'always +%% ascending' and 'ascending if all preceding columns are constant'. +%% +%% The 'size' of the template is not used (size_of_qualifier(QOpt, 0)). + +qlc_sort_info(Qdata0, QOpt) -> + F = fun(?qual_data(_QNum, _GoI, _SI, fil)=Qd, Info) -> + {Qd, Info}; + (?qual_data(_QNum, _GoI, _SI, {gen, #join{}})=Qd, Info) -> + {Qd, Info}; + (?qual_data(QNum, GoI, SI, {gen, PrepLE0}), Info) -> + PrepLE = sort_info(PrepLE0, QNum, QOpt), + Qd = ?qual_data(QNum, GoI, SI, {gen, PrepLE}), + I = [{{Column,Order}, [{traverse,QNum,C}]} || + {{C,Order},What} <- PrepLE#prepared.sort_info2, + What =:= [], % Something else later... + Column <- equal_template_columns(QOpt, {QNum,C})], + {Qd, [I | Info]} + end, + {Qdata, SortInfoL} = lists:mapfoldl(F, [], Qdata0), + SortInfo0 = [{{Pos,Ord}, [template]} || + Pos <- constant_columns(QOpt, 0), + Ord <- orders(yes)] + ++ lists:append(SortInfoL), + SortInfo = family_union(SortInfo0), + {Qdata, SortInfo}. + +sort_info(#prepared{sort_info = SI, sorted = S} = Prep, QNum, QOpt) -> + SI1 = [{{C,Ord},[]} || + S =/= no, + is_integer(Sz = size_of_qualifier(QOpt, QNum)), + Sz > 0, % the size of the pattern + (NConstCols = size_of_constant_prefix(QOpt, QNum)) < Sz, + C <- [NConstCols+1], + Ord <- orders(S)] + ++ [{{Pos,Ord},[]} || Pos <- constant_columns(QOpt, QNum), + Ord <- orders(yes)] + ++ [{PosOrd,[]} || {PosOrd,_} <- SI], + SI2 = lists:usort(SI1), + Prep#prepared{sort_info2 = SI2}. + +%orders(descending=O) -> +% [O]; +orders(ascending=O) -> + [O]; +orders(yes) -> + [ascending +% ,descending + ]. + +sort_unique(true, #qlc_sort{fs_opts = SortOptions, keypos = sort}=Sort) -> + Sort#qlc_sort{unique = false, + fs_opts = + lists:keydelete(unique, 1, + lists:delete(unique, SortOptions))}; +sort_unique(_, Sort) -> + Sort. + +sort_tmpdir(S, #qlc_opt{tmpdir = ""}) -> + S; +sort_tmpdir(S, Opt) -> + S#qlc_sort{tmpdir = Opt#qlc_opt.tmpdir}. + +short_list(L) -> + %% length(L) < 2: all elements are known be equal + #prepared{qh = #qlc_list{l = L}, sorted = yes, is_unique_objects = true, + is_cached = true}. + +find_const_positions(IF, LU_fun, {KeyEquality, PosFun}, + #qlc_opt{max_lookup = Max, lookup = Lookup}) + when is_function(LU_fun), is_function(PosFun), is_function(IF), + Lookup =/= false -> + case call(IF, keypos, undefined, []) of + undefined -> + Indices = call(IF, indices, undefined, []), + find_const_position_idx(Indices, KeyEquality, PosFun, Max, []); + KeyPos -> + case pos_vals(KeyPos, KeyEquality, PosFun(KeyPos), Max) of + false -> + find_const_position_idx(IF(indices), KeyEquality, + PosFun, Max, []); + PosValuesSkip -> + PosValuesSkip + end + end; +find_const_positions(_IF, _LU_fun, _KE_PosFun, _Opt0) -> + {false, {some,[]}}. + +find_const_position_idx([I | Is], KeyEquality, PosFun, Max, L0) -> + case pos_vals(I, KeyEquality, PosFun(I), Max) of + false -> + find_const_position_idx(Is, KeyEquality, PosFun, Max, L0); + {{_Pos, Values}, _SkipFils}=PosValuesFils -> + L = [{length(Values), PosValuesFils} | L0], + find_const_position_idx(Is, KeyEquality, PosFun, Max, L) + end; +find_const_position_idx(_, _KeyEquality, _PosFun, _Max, []) -> + {false, {some,[]}}; +find_const_position_idx(_, _KeyEquality, _PosFun, _Max, L) -> + [{_,PVF} | _] = lists:sort(L), + PVF. + +pos_vals(Pos, '==', {usort_needed, Values, SkipFils}, Max) -> + pos_vals_max(Pos, lists:usort(Values), SkipFils, Max); +pos_vals(Pos, '=:=', {usort_needed, Values, SkipFils}, Max) -> + pos_vals_max(Pos, lists:sort(nub(Values)), SkipFils, Max); +pos_vals(Pos, _KeyEquality, {values, Values, SkipFils}, Max) -> + pos_vals_max(Pos, Values, SkipFils, Max); +pos_vals(_Pos, _KeyEquality, _T, _Max) -> + false. + +nub([]) -> + []; +nub([E | L]) -> + case lists:member(E, Es=nub(L)) of + true -> + Es; + false -> + [E | Es] + end. + +%% length(Values) >= 1 +pos_vals_max(Pos, Values, Skip, Max) when Max =:= -1; Max >= length(Values) -> + {{Pos, Values}, Skip}; +pos_vals_max(_Pos, _Value, _Skip, _Max) -> + false. + +prep_join(Prep, QOpt, Opt) -> + case join_opt(QOpt) of + undefined -> + check_join_option(Opt), + Prep; + EqualMatch -> + {Ix, M} = case EqualMatch of + {NEqual, NMatch} -> + pref_join(NEqual, NMatch, Prep, QOpt, Opt); + EM -> + pref_join(EM, EM, Prep, QOpt, Opt) + end, + SI = family_union(Prep#prepared.sort_info ++ M), + Prep#prepared{join = {Ix, M}, sort_info = SI} + end. + +%% The parse transform ensures that only two tables are involved. +pref_join(Equal, Match, Prep, QOpt, #qlc_opt{join = JoinOpt}) -> + JQs = [{KeyEquality, QCs} || + {KeyEquality, QCsL} <- [{'==',Equal}, {'=:=',Match}], + QCs <- QCsL], + IxL = [pref_lookup_join(KE, QCs, Prep, QOpt) || + JoinOpt =:= any orelse JoinOpt =:= lookup, + {KE, QCs} <- JQs], + ML = [pref_merge_join(KE, QCs, Prep, QOpt) || + JoinOpt =:= any orelse JoinOpt =:= merge, + {KE, QCs} <- JQs], + {lists:usort(lists:append(IxL)), lists:usort(lists:append(ML))}. + +pref_lookup_join(KeyEquality, {[{Q1,C1},{Q2,C2}],Skip}, Prep, QOpt) + when is_integer(C1), is_integer(C2) -> + #prepared{qh = #qlc{qdata = QData}} = Prep, + Is1 = lookup_qual_data(QData, Q1, KeyEquality), + Lu2 = [pref_lookup_join2(Q2, C2, Q1, C1, Skip, QOpt, KeyEquality) || + IC1 <- Is1, IC1 =:= C1], + Is2 = lookup_qual_data(QData, Q2, KeyEquality), + Lu1 = [pref_lookup_join2(Q1, C1, Q2, C2, Skip, QOpt, KeyEquality) || + IC2 <- Is2, IC2 =:= C2], + family(Lu1 ++ Lu2); +pref_lookup_join(KE, [{_,Cs1},{_,Cs2}]=L, Prep, QOpt) when is_list(Cs1), + is_list(Cs2) -> + %% --R12B-5 + lists:append([pref_lookup_join(KE, QC,Prep,QOpt) || + QC <- selections_no_skip(L)]). + +lookup_qual_data(QData, QNum, KeyEquality) -> + case lists:keysearch(QNum, 1, QData) of + {value, ?qual_data(QNum, _, _, {gen, PrepLE})} -> + join_indices(PrepLE, KeyEquality) + end. + +%% If the table has a match specification (ms =/= no_match_spec) that +%% has _not_ been derived from a filter but from a query handle then +%% the lookup join cannot be done. This particular case has not been +%% excluded here but is taken care of in opt_join(). +join_indices(#prepared{qh = #qlc_table{info_fun = IF, + lookup_fun = LU_fun, + key_equality = KeyEquality, + lu_vals = undefined}}, + KE) when is_function(LU_fun), + KE =:= KeyEquality orelse + KE =:= '=:=' andalso + KeyEquality =:= undefined -> % --R12B-5 + KpL = case call(IF, keypos, undefined, []) of + undefined -> []; + Kp -> [Kp] + end, + case call(IF, indices, undefined, []) of + undefined -> KpL; + Is0 -> lists:usort(KpL ++ Is0) + end; +join_indices(_Prep, _KeyEquality) -> + []. + +pref_lookup_join2(Q1, C1, Q2, C2, Skip, QOpt, KeyEquality) -> + TemplCols = compared_template_columns(QOpt, {Q1,C1}, KeyEquality), + {{Q1,C1,Q2,C2},{lookup_join,TemplCols,KeyEquality,Skip}}. + +pref_merge_join(KE, {[{Q1,C1},{Q2,C2}],Skip}, Prep, QOpt) + when is_integer(C1), is_integer(C2) -> + #prepared{qh = #qlc{qdata = QData}} = Prep, + Sort1 = merge_qual_data(QData, Q1), + Sort2 = merge_qual_data(QData, Q2), + Merge = pref_merge(KE, Q1, C1, Q2, C2, Skip, Sort1, Sort2, QOpt), + family_union(Merge); +pref_merge_join(KE, [{_,Cs1},{_,Cs2}]=L, Prep, QOpt) when is_list(Cs1), + is_list(Cs2) -> + %% --R12B-5 + lists:append([pref_merge_join(KE, QC, Prep, QOpt) || + QC <- selections_no_skip(L)]). + +selections_no_skip(L) -> + [{C,{some,[]}} || C <- all_selections(L)]. + +merge_qual_data(QData, QNum) -> + case lists:keysearch(QNum, 1, QData) of + {value, ?qual_data(QNum, _, _, {gen, PrepLE})} -> + #prepared{sort_info2 = SortInfo} = PrepLE, + SortInfo + end. + +pref_merge(KE, Q1, C1, Q2, C2, Skip, Sort1, Sort2, QOpt) -> + Col1 = {Q1,C1}, + Col2 = {Q2,C2}, + DoSort = [QC || {{_QNum,Col}=QC,SortL} <- [{Col1,Sort1}, {Col2,Sort2}], + lists:keymember({Col, ascending}, 1, SortL) =:= false], + J = [{{Q1,C1,Q2,C2}, {merge_join,DoSort,KE,Skip}}], + %% true = (QOpt(template))(Col1, '==') =:= (QOpt(template))(Col2, '==') + [{{Column, ascending}, J} || + Column <- equal_template_columns(QOpt, Col1)] ++ [{other, J}]. + +table_sort_info(#qlc_table{info_fun = IF}) -> + case call(IF, is_sorted_key, undefined, []) of + undefined -> + {[], no}; + false -> + {[], no}; + true -> + case call(IF, keypos, undefined, []) of + undefined -> % strange + {[], no}; + KeyPos -> + {[{{KeyPos,ascending},[]}], no} + end + end. + +sort_sort_info(#qlc_sort{keypos = sort, order = Ord0}) -> + {[], sort_order(Ord0)}; +sort_sort_info(#qlc_sort{keypos = {keysort,Kp0}, order = Ord0}) -> + Kp = case Kp0 of + [Pos | _] -> Pos; + _ -> Kp0 + end, + {[{{Kp,sort_order(Ord0)},[]}], no}. + +sort_order(F) when is_function(F) -> + no; +sort_order(Order) -> + Order. + +check_join_option(#qlc_opt{join = any}) -> + ok; +check_join_option(#qlc_opt{join = Join}) -> + erlang:error(no_join_to_carry_out, [{join,Join}]). + +check_lookup_option(#qlc_opt{lookup = true}, false) -> + erlang:error(no_lookup_to_carry_out, [{lookup,true}]); +check_lookup_option(_QOpt, _LuV) -> + ok. + +compared_template_columns(QOpt, QNumColumn, KeyEquality) -> + (QOpt(template))(QNumColumn, KeyEquality). + +equal_template_columns(QOpt, QNumColumn) -> + (QOpt(template))(QNumColumn, '=='). + +%eq_template_columns(QOpt, QNumColumn) -> +% (QOpt(template))(QNumColumn, '=:='). + +size_of_constant_prefix(QOpt, QNum) -> + (QOpt(n_leading_constant_columns))(QNum). + +constants(QOpt, QNum) -> + (QOpt(constants))(QNum). + +equal_constants(QOpt, QNum) -> + (QOpt(equal_constants))(QNum). + +join_opt(QOpt) -> + QOpt(join). + +match_specs(QOpt, QNum) -> + (QOpt(match_specs))(QNum). + +constant_columns(QOpt, QNum) -> + (QOpt(constant_columns))(QNum). + +size_of_qualifier(QOpt, QNum) -> + (QOpt(size))(QNum). + +%% Two optimizations are carried out: +%% 1. The first generator is never cached if the QLC itself is cached. +%% Since the answers do not need to be cached, the top-most QLC is +%% never cached either. Simple QLCs not holding any options are +%% removed. Simple QLCs are coalesced when possible. +%% 2. Merge join and lookup join is done if possible. + +opt_le(#prepared{qh = #simple_qlc{le = LE0, optz = Optz0}=QLC}=Prep0, + GenNum) -> + case LE0 of + #prepared{qh = #simple_qlc{p = LE_Pvar, le = LE2, optz = Optz2}} -> + %% Coalesce two simple QLCs. + Cachez = case Optz2#optz.cache of + false -> Optz0#optz.cache; + Cache2 -> Cache2 + end, + Optz = Optz0#optz{cache = Cachez, + unique = Optz0#optz.unique or Optz2#optz.unique}, + PVar = if + LE_Pvar =:= ?SIMPLE_QVAR -> QLC#simple_qlc.p; + true -> LE_Pvar + end, + Prep = Prep0#prepared{qh = QLC#simple_qlc{p = PVar, le = LE2, + optz = Optz}}, + opt_le(Prep, GenNum); + _ -> + Optz1 = no_cache_of_first_generator(Optz0, GenNum), + case {opt_le(LE0, 1), Optz1} of + {LE, #optz{unique = false, cache = false}} -> + LE; + {LE, _} -> + Prep0#prepared{qh = QLC#simple_qlc{le = LE, optz = Optz1}} + end + end; +opt_le(#prepared{qh = #qlc{}, lu_skip_quals = LU_SkipQuals0}=Prep0, GenNum) -> + #prepared{qh = #qlc{qdata = Qdata0, optz = Optz0}=QLC} = Prep0, + #optz{join_option = JoinOption, opt = Opt} = Optz0, + JoinOption = Optz0#optz.join_option, + {LU_QNum, Join, JoinSkipFs, DoSort} = + opt_join(Prep0#prepared.join, JoinOption, Qdata0, Opt, LU_SkipQuals0), + {LU_Skip, LU_SkipQuals} = + lists:partition(fun({QNum,_Fs}) -> QNum =:= LU_QNum end, + LU_SkipQuals0), + LU_SkipFs = lists:flatmap(fun({_QNum,Fs}) -> Fs end, LU_SkipQuals), + %% If LU_QNum has a match spec it must be applied _after_ the + %% lookup join (the filter must not be skipped!). + Qdata1 = if + LU_Skip =:= [] -> Qdata0; + true -> activate_join_lookup_filter(LU_QNum, Qdata0) + end, + Qdata2 = skip_lookup_filters(Qdata1, LU_SkipFs ++ JoinSkipFs), + F = fun(?qual_data(QNum, GoI, SI, {gen, #prepared{}=PrepLE}), GenNum1) -> + NewPrepLE = maybe_sort(PrepLE, QNum, DoSort, Opt), + {?qual_data(QNum, GoI, SI, {gen, opt_le(NewPrepLE, GenNum1)}), + GenNum1 + 1}; + (Qd, GenNum1) -> + {Qd, GenNum1} + end, + {Qdata, _} = lists:mapfoldl(F, 1, Qdata2), + Optz1 = no_cache_of_first_generator(Optz0, GenNum), + Optz = Optz1#optz{fast_join = Join}, + Prep0#prepared{qh = QLC#qlc{qdata = Qdata, optz = Optz}}; +opt_le(#prepared{qh = #qlc_append{hl = HL}}=Prep, GenNum) -> + Hs = [opt_le(H, GenNum) || H <- HL], + Prep#prepared{qh = #qlc_append{hl = Hs}}; +opt_le(#prepared{qh = #qlc_sort{h = H}=Sort}=Prep, GenNum) -> + Prep#prepared{qh = Sort#qlc_sort{h = opt_le(H, GenNum)}}; +opt_le(Prep, _GenNum) -> + Prep. + +no_cache_of_first_generator(Optz, GenNum) when GenNum > 1 -> + Optz; +no_cache_of_first_generator(Optz, 1) -> + Optz#optz{cache = false}. + +maybe_sort(LE, QNum, DoSort, Opt) -> + case lists:keysearch(QNum, 1, DoSort) of + {value, {QNum, Col}} -> + #qlc_opt{tmpdir = TmpDir, tmpdir_usage = TmpUsage} = Opt, + SortOpts = [{tmpdir,Dir} || Dir <- [TmpDir], Dir =/= ""], + Sort = #qlc_sort{h = LE, keypos = {keysort, Col}, unique = false, + compressed = [], order = ascending, + fs_opts = SortOpts, tmpdir_usage = TmpUsage, + tmpdir = TmpDir}, + #prepared{qh = Sort, sorted = no, join = no}; + false -> + LE + end. + +skip_lookup_filters(Qdata, []) -> + Qdata; +skip_lookup_filters(Qdata0, LU_SkipFs) -> + [case lists:member(QNum, LU_SkipFs) of + true -> + ?qual_data(QNum, GoI, ?SKIP, fil); + false -> + Qd + end || ?qual_data(QNum, GoI, _, _)=Qd <- Qdata0]. + +%% If the qualifier used for lookup by the join (QNum) has a match +%% specification it must be applied _after_ the lookup join (the +%% filter must not be skipped!). +activate_join_lookup_filter(QNum, Qdata) -> + {value, {_,GoI2,SI2,{gen,Prep2}}} = lists:keysearch(QNum, 1, Qdata), + Table2 = Prep2#prepared.qh, + NPrep2 = Prep2#prepared{qh = Table2#qlc_table{ms = no_match_spec}}, + %% Table2#qlc_table.ms has been reset; the filter will be run. + lists:keyreplace(QNum, 1, Qdata, ?qual_data(QNum,GoI2,SI2,{gen,NPrep2})). + +opt_join(Join, JoinOption, Qdata, Opt, LU_SkipQuals) -> + %% prep_qlc_lc() assures that no unwanted join is carried out + {Ix0, M0} = Join, + Ix1 = opt_join_lu(Ix0, Qdata, LU_SkipQuals), + Ix = lists:reverse(lists:keysort(2, Ix1)), % prefer to skip + case Ix of + [{{Q1,C1,Q2,C2},Skip,KE,LU_fun} | _] -> + J = #qlc_join{kind = {lookup, KE, LU_fun}, q1 = Q1, + c1 = C1, q2 = Q2, c2 = C2, opt = Opt}, + {Q2, J, Skip, []}; + [] -> + M = opt_join_merge(M0), + case M of + [{{Q1,C1,Q2,C2},{merge_join,DoSort,KE,Skip}}|_] -> + J = #qlc_join{kind = {merge, KE}, opt = Opt, + q1 = Q1, c1 = C1, q2 = Q2, c2 = C2}, + {not_a_qnum, J, Skip, DoSort}; + [] when JoinOption =:= nested_loop -> + {not_a_qnum, no, [], []}; + _ when JoinOption =/= any -> + erlang:error(cannot_carry_out_join, [JoinOption]); + _ -> + {not_a_qnum, no, [], []} + end + end. + +opt_join_lu([{{_Q1,_C1,Q2,_C2}=J,[{lookup_join,_KEols,JKE,Skip0} | _]} | LJ], + Qdata, LU_SkipQuals) -> + {value, {Q2,_,_,{gen,Prep2}}} = lists:keysearch(Q2, 1, Qdata), + #qlc_table{ms = MS, key_equality = KE, + lookup_fun = LU_fun} = Prep2#prepared.qh, + %% If there is no filter to skip (the match spec was derived + %% from a query handle) then the lookup join cannot be done. + case + MS =/= no_match_spec andalso + lists:keymember(Q2, 1, LU_SkipQuals) =:= false + of + true -> + opt_join_lu(LJ, Qdata, LU_SkipQuals); + false -> + %% The join is preferred before evaluating the match spec + %% (if there is one). + Skip = skip_if_possible(JKE, KE, Skip0), + [{J,Skip,KE,LU_fun} | opt_join_lu(LJ, Qdata, LU_SkipQuals)] + end; +opt_join_lu([], _Qdata, _LU_SkipQuals) -> + []. + +opt_join_merge(M) -> + %% Prefer not to sort arguments. Prefer to skip join filter. + L = [{-length(DoSort),length(Skip), + {QCs,{merge_join,DoSort,KE,Skip}}} || + {_KpOrder_or_other,MJ} <- M, + {QCs,{merge_join,DoSort,KE,Skip0}} <- MJ, + Skip <- [skip_if_possible(KE, '==', Skip0)]], + lists:reverse([J || {_,_,J} <- lists:sort(L)]). + +%% Cannot skip the join filter the join operator is '=:=' and the join +%% is performed using '=='. Note: the tag 'some'/'all' is not used. +skip_if_possible('=:=', '==', _) -> + []; +skip_if_possible(_, _, {_SkipTag, Skip}) -> + Skip. + +%% -> {Objects, Post, LocalPost} | throw() +%% Post is a list of funs (closures) to run afterwards. +%% LocalPost should be run when all objects have been found (optimization). +%% LocalPost will always be a subset of Post. +%% List expressions are evaluated, resulting in lists of objects kept in +%% RAM or on disk. +%% An error term is thrown as soon as cleanup according Post has been +%% done. (This is opposed to errors during evaluation; such errors are +%% returned as terms.) +setup_qlc(Prep, Setup) -> + Post0 = [], + setup_le(Prep, Post0, Setup). + +setup_le(#prepared{qh = #simple_qlc{le = LE, optz = Optz}}, Post0, Setup) -> + {Objs, Post, LocalPost} = setup_le(LE, Post0, Setup), + unique_cache(Objs, Post, LocalPost, Optz); +setup_le(#prepared{qh = #qlc{lcf = QFun, qdata = Qdata, init_value = V, + optz = Optz}}, Post0, Setup) -> + {GoTo, FirstState, Post, LocalPost} = + setup_quals(Qdata, Post0, Setup, Optz), + Objs = fun() -> QFun(FirstState, V, GoTo) end, + unique_cache(Objs, Post, LocalPost, Optz); +setup_le(#prepared{qh = #qlc_table{post_fun = PostFun}=Table}, Post, Setup) -> + H = table_handle(Table, Post, Setup), + %% The pre fun has been called from table_handle(): + {H, [PostFun | Post], []}; +setup_le(#prepared{qh = #qlc_append{hl = PrepL}}, Post0, Setup) -> + F = fun(Prep, {Post1, LPost1}) -> + {Objs, Post2, LPost2} = setup_le(Prep, Post1, Setup), + {Objs, {Post2, LPost1++LPost2}} + end, + {ObjsL, {Post, LocalPost}} = lists:mapfoldl(F, {Post0,[]}, PrepL), + {fun() -> append_loop(ObjsL, 0) end, Post, LocalPost}; +setup_le(#prepared{qh = #qlc_sort{h = Prep, keypos = Kp, + unique = Unique, compressed = Compressed, + order = Order, fs_opts = SortOptions0, + tmpdir_usage = TmpUsage,tmpdir = TmpDir}}, + Post0, Setup) -> + SortOptions = sort_options_global_tmp(SortOptions0, TmpDir), + LF = fun(Objs) -> + sort_list(Objs, Order, Unique, Kp, SortOptions, Post0) + end, + case setup_le(Prep, Post0, Setup) of + {L, Post, LocalPost} when is_list(L) -> + {LF(L), Post, LocalPost}; + {Objs, Post, LocalPost} -> + FF = fun(Objs1) -> + file_sort_handle(Objs1, Kp, SortOptions, TmpDir, + Compressed, Post, LocalPost) + end, + sort_handle(Objs, LF, FF, SortOptions, Post, LocalPost, + {TmpUsage, sorting}) + end; +setup_le(#prepared{qh = #qlc_list{l = L, ms = MS}}, Post, _Setup) + when (no_match_spec =:= MS); L =:= [] -> + {L, Post, []}; +setup_le(#prepared{qh = #qlc_list{l = L, ms = MS}}, Post, _Setup) + when is_list(L) -> + {ets:match_spec_run(L, ets:match_spec_compile(MS)), Post, []}; +setup_le(#prepared{qh = #qlc_list{l = H0, ms = MS}}, Post0, Setup) -> + {Objs0, Post, LocalPost} = setup_le(H0, Post0, Setup), + Objs = ets:match_spec_run(Objs0, ets:match_spec_compile(MS)), + {Objs, Post, LocalPost}. + +%% The goto table (a tuple) is created at runtime. It is accessed by +%% the generated code in order to find next clause to execute. For +%% generators there is also a fun; calling the fun runs the list +%% expression of the generator. There are two elements for a filter: +%% the first one is the state to go when the filter is false; the +%% other the state when the filter is true. There are three elements +%% for a generator G: the first one is the state of the generator +%% before G (or the stop state if there is no generator); the second +%% one is the state of the qualifier following the generator (or the +%% template if there is no next generator); the third one is the list +%% expression fun. +%% There are also join generators which are "activated" when it is +%% possbible to do a join. + +setup_quals(Qdata, Post0, Setup, Optz) -> + {GoTo0, Post1, LocalPost0} = + setup_quals(0, Qdata, [], Post0, [], Setup), + GoTo1 = lists:keysort(1, GoTo0), + FirstState0 = next_state(Qdata), + {GoTo2, FirstState, Post, LocalPost1} = + case Optz#optz.fast_join of + #qlc_join{kind = {merge,_KE}, c1 = C1, c2 = C2, opt = Opt} = MJ -> + MF = fun(_Rev, {H1, WH1}, {H2, WH2}) -> + fun() -> + merge_join(WH1(H1), C1, WH2(H2), C2, Opt) + end + end, + setup_join(MJ, Qdata, GoTo1, FirstState0, MF, Post1); + #qlc_join{kind = {lookup,_KE,LuF}, c1 = C1, c2 = C2} = LJ -> + LF = fun(Rev, {H1, WH1}, {H2, WH2}) -> + {H, W} = if + Rev -> {H2, WH2}; + true -> {H1, WH1} + end, + fun() -> + lookup_join(W(H), C1, LuF, C2, Rev) + end + end, + setup_join(LJ, Qdata, GoTo1, FirstState0, LF, Post1); + no -> + {flat_goto(GoTo1), FirstState0, Post1, []} + end, + GoTo = list_to_tuple(GoTo2), + {GoTo, FirstState, Post, LocalPost0 ++ LocalPost1}. + +setup_quals(GenLoopS, [?qual_data(_QNum,GoI,?SKIP,fil) | Qdata], + Gs, P, LP, Setup) -> + %% ?SKIP causes runtime error. See also skip_lookup_filters(). + setup_quals(GenLoopS, Qdata, [{GoI,[?SKIP,?SKIP]} | Gs], P, LP, Setup); +setup_quals(GenLoopS, [?qual_data(_QNum,GoI,_SI,fil) | Qdata], + Gs, P, LP, Setup) -> + setup_quals(GenLoopS, Qdata, [{GoI,[GenLoopS,next_state(Qdata)]} | Gs], + P, LP, Setup); +setup_quals(GenLoopS, [?qual_data(_QNum,GoI,_SI, {gen,#join{}}) | Qdata], + Gs, P, LP, Setup) -> + setup_quals(GenLoopS, Qdata, [{GoI,[?SKIP,?SKIP,?SKIP]} | Gs],P,LP,Setup); +setup_quals(GenLoopS, [?qual_data(_QNum,GoI,SI,{gen,LE}) | Qdata], + Gs, P, LP, Setup) -> + {V, NP, LP1} = setup_le(LE, P, Setup), + setup_quals(SI + 1, Qdata, [{GoI, [GenLoopS,next_state(Qdata),V]} | Gs], + NP, LP ++ LP1, Setup); +setup_quals(GenLoopS, [], Gs, P, LP, _Setup) -> + {[{1,[GenLoopS]} | Gs], P, LP}. + +%% Finds the qualifier in Qdata that performs the join between Q1 and +%% Q2, and sets it up using the handles already set up for Q1 and Q2. +%% Removes Q1 and Q2 from GoTo0 and updates the join qualifier in GoTo0. +%% Note: the parse transform has given each generator three slots +%% in the GoTo table. The position of these slots within the GoTo table +%% is fixed (at runtime). +%% (Assumes there is only one join-generator in Qdata.) +setup_join(J, Qdata, GoTo0, FirstState0, JoinFun, Post0) -> + #qlc_join{q1 = QNum1a, q2 = QNum2a, opt = Opt} = J, + {?qual_data(_QN,JGoI,JSI,_), Rev, QNum1, QNum2, WH1, WH2, _CsFun} = + find_join_data(Qdata, QNum1a, QNum2a), + [{GoI1,SI1}] = [{GoI,SI} || + ?qual_data(QNum,GoI,SI,_) <- Qdata, QNum =:= QNum1], + [{GoI2,SI2}] = [{GoI,SI} || + ?qual_data(QNum,GoI,SI,_) <- Qdata, QNum =:= QNum2], + + [H1] = [H || {GoI,[_Back,_Forth,H]} <- GoTo0, GoI =:= GoI1], + [{BackH2,H2}] = + [{Back,H} || {GoI,[Back,_Forth,H]} <- GoTo0, GoI =:= GoI2], + H0 = JoinFun(Rev, {H1,WH1}, {H2,WH2}), + %% The qlc expression options apply to the introduced qlc expr as well. + {H, Post, LocalPost} = + unique_cache(H0, Post0, [], join_unique_cache(Opt)), + [JBack] = [Back || {GoI,[Back,_,_]} <- GoTo0, GoI =:= GoI1], + JForth = next_after(Qdata, SI1, QNum2), + GoTo1 = lists:map(fun({GoI,_}) when GoI =:= JGoI -> + {JGoI, [JBack, JForth, H]}; + ({GoI,_}) when GoI =:= GoI1; GoI =:= GoI2 -> + {GoI, [?SKIP,?SKIP,?SKIP]}; % not necessary + (Go) -> + Go + end, GoTo0), + GoTo = lists:map(fun(S) when S =:= SI1 -> + JSI; + (S) when S =:= SI2 -> + next_after(Qdata, S, QNum2); + (S) when S =:= SI1+1 -> + JSI+1; + (S) when S =:= SI2+1, SI1 + 1 =:= BackH2 -> + JSI+1; + (S) when S =:= SI2+1 -> + BackH2; + (S) -> S + end, flat_goto(GoTo1)), + FirstState = if + SI1 =:= FirstState0 -> JSI; + true -> FirstState0 + end, + {GoTo, FirstState, Post, LocalPost}. + +join_unique_cache(#qlc_opt{cache = Cache, unique = Unique}=Opt) -> + #optz{cache = Cache, unique = Unique, opt = Opt}. + +flat_goto(GoTo) -> + lists:flatmap(fun({_,L}) -> L end, GoTo). + +next_after([?qual_data(_, _, S, _) | Qdata], S, QNum2) -> + case Qdata of + [?qual_data(QNum2, _, _, _) | Qdata1] -> + next_state(Qdata1); + _ -> + next_state(Qdata) + end; +next_after([_ | Qdata], S, QNum2) -> + next_after(Qdata, S, QNum2). + +next_state([?qual_data(_,_,_,{gen,#join{}}) | Qdata]) -> + next_state(Qdata); +next_state([?qual_data(_,_,?SKIP,fil) | Qdata]) -> + %% see skip_lookup_filters() + next_state(Qdata); +next_state([?qual_data(_,_,S,_) | _]) -> + S; +next_state([]) -> + template_state(). + +find_join_data(Qdata, QNum1, QNum2) -> + [QRev] = [{Q,Rev,QN1,QN2,H1,H2,CsF} || + ?qual_data(_QN,_GoI,_SI, + {gen,#join{q1 = QN1,q2 = QN2, + wh1 = H1, wh2 = H2, + cs_fun = CsF}})= Q <- Qdata, + if + QN1 =:= QNum1, QN2 =:= QNum2 -> + not (Rev = false); + QN1 =:= QNum2, QN2 =:= QNum1 -> + Rev = true; + true -> + Rev = false + end], + QRev. + +table_handle(#qlc_table{trav_fun = TraverseFun, trav_MS = TravMS, + pre_fun = PreFun, lookup_fun = LuF, + parent_fun = ParentFun, lu_vals = LuVals, ms = MS}, + Post, Setup) -> + #setup{parent = Parent} = Setup, + ParentValue = + if + ParentFun =:= undefined -> + undefined; + Parent =:= self() -> + try + ParentFun() + catch Class:Reason -> + post_funs(Post), + erlang:raise(Class, Reason, erlang:get_stacktrace()) + end; + true -> + case monitor_request(Parent, {parent_fun, ParentFun}) of + error -> % parent has died + post_funs(Post), + exit(normal); + {value, Value} -> + Value; + {parent_fun_caught, Class, Reason, Stacktrace} -> + %% No use augmenting Stacktrace here. + post_funs(Post), + erlang:raise(Class, Reason, Stacktrace) + end + end, + StopFun = + if + Parent =:= self() -> + undefined; + true -> + Cursor = #qlc_cursor{c = {self(), Parent}}, + fun() -> delete_cursor(Cursor) end + end, + PreFunArgs = [{parent_value, ParentValue}, {stop_fun, StopFun}], + _ = call(PreFun, PreFunArgs, ok, Post), + case LuVals of + {Pos, Vals} when MS =:= no_match_spec -> + LuF(Pos, Vals); + {Pos, Vals} -> + case LuF(Pos, Vals) of + [] -> + []; + Objs when is_list(Objs) -> + ets:match_spec_run(Objs, + ets:match_spec_compile(MS)); + Error -> + post_funs(Post), + throw_error(Error) + end; + _ when not TravMS -> + MS = no_match_spec, % assertion + TraverseFun; + _ when MS =:= no_match_spec -> + fun() -> TraverseFun([{'$1',[],['$1']}]) end; + _ -> + fun() -> TraverseFun(MS) end + end. + +-define(CHUNK_SIZE, 64*1024). + +open_file(FileName, Extra, Post) -> + case file:open(FileName, [read, raw, binary | Extra]) of + {ok, Fd} -> + {fun() -> + case file:position(Fd, bof) of + {ok, 0} -> + TF = fun([], _) -> + []; + (Ts, C) when is_list(Ts) -> + lists:reverse(Ts, C) + end, + file_loop_read(<<>>, ?CHUNK_SIZE, {Fd,FileName}, TF); + Error -> + file_error(FileName, Error) + end + end, Fd}; + Error -> + post_funs(Post), + throw_file_error(FileName, Error) + end. + +file_loop(Bin0, Fd_FName, Ts0, TF) -> + case + try file_loop2(Bin0, Ts0) + catch _:_ -> + {_Fd, FileName} = Fd_FName, + error({bad_object, FileName}) + end + of + {terms, <>=Bin, []} -> + file_loop_read(Bin, Size - byte_size(B) + 4, Fd_FName, TF); + {terms, <>=Bin, Ts} -> + C = fun() -> file_loop_read(Bin, Size+4, Fd_FName, TF) end, + TF(Ts, C); + {terms, B, Ts} -> + C = fun() -> file_loop_read(B, ?CHUNK_SIZE, Fd_FName, TF) end, + TF(Ts, C); + Error -> + Error + end. + +file_loop2(<>, Ts) -> + file_loop2(Bin, [binary_to_term(B) | Ts]); +file_loop2(Bin, Ts) -> + {terms, Bin, Ts}. + +%% After power failures (and only then) files with corrupted Size +%% fields have been observed in a disk_log file. If file:read/2 is +%% asked to read a huge amount of data the emulator may crash. Nothing +%% has been done here to prevent such crashes (by inspecting +%% BytesToRead in some way) since temporary files will never be read +%% after a power failure. +file_loop_read(B, MinBytesToRead, {Fd, FileName}=Fd_FName, TF) -> + BytesToRead = erlang:max(?CHUNK_SIZE, MinBytesToRead), + case file:read(Fd, BytesToRead) of + {ok, Bin} when byte_size(B) =:= 0 -> + file_loop(Bin, Fd_FName, [], TF); + {ok, Bin} -> + case B of + <> + when byte_size(Bin) + byte_size(Tl) >= Size -> + {B1, B2} = split_binary(Bin, Size - byte_size(Tl)), + Foo = fun([T], Fun) -> [T | Fun] end, + %% TF should be applied exactly once. + case + file_loop(list_to_binary([B, B1]), Fd_FName, [], Foo) + of + [T | Fun] -> + true = is_function(Fun), + file_loop(B2, Fd_FName, [T], TF); + Error -> + Error + end; + _ -> + file_loop(list_to_binary([B, Bin]), Fd_FName, [], TF) + end; + eof when byte_size(B) =:= 0 -> + TF([], foo); + eof -> + error({bad_object, FileName}); + Error -> + file_error(FileName, Error) + end. + +sort_cursor_input(H, NoObjects) -> + fun(close) -> + ok; + (read) -> + sort_cursor_input_read(H, NoObjects) + end. + +sort_cursor_list_output(TmpDir, Z, Unique) -> + fun(close) -> + {terms, []}; + ({value, NoObjects}) -> + fun(BTerms) when Unique; length(BTerms) =:= NoObjects -> + fun(close) -> + {terms, BTerms}; + (BTerms1) -> + sort_cursor_file(BTerms ++ BTerms1, TmpDir, Z) + end; + (BTerms) -> + sort_cursor_file(BTerms, TmpDir, Z) + end + end. + +sort_cursor_file(BTerms, TmpDir, Z) -> + FName = tmp_filename(TmpDir), + case file:open(FName, [write, raw, binary | Z]) of + {ok, Fd} -> + WFun = write_terms(FName, Fd), + WFun(BTerms); + Error -> + throw_file_error(FName, Error) + end. + +sort_options_global_tmp(S, "") -> + S; +sort_options_global_tmp(S, TmpDir) -> + [{tmpdir,TmpDir} | lists:keydelete(tmpdir, 1, S)]. + +tmp_filename(TmpDirOpt) -> + U = "_", + Node = node(), + Pid = os:getpid(), + {MSecs,Secs,MySecs} = erlang:now(), + F = lists:concat([?MODULE,U,Node,U,Pid,U,MSecs,U,Secs,U,MySecs]), + TmpDir = case TmpDirOpt of + "" -> + {ok, CurDir} = file:get_cwd(), + CurDir; + TDir -> + TDir + end, + filename:join(filename:absname(TmpDir), F). + +write_terms(FileName, Fd) -> + fun(close) -> + _ = file:close(Fd), + {file, FileName}; + (BTerms) -> + case file:write(Fd, size_bin(BTerms, [])) of + ok -> + write_terms(FileName, Fd); + Error -> + _ = file:close(Fd), + throw_file_error(FileName, Error) + end + end. + +size_bin([], L) -> + L; +size_bin([BinTerm | BinTerms], L) -> + size_bin(BinTerms, [L, <<(byte_size(BinTerm)):4/unit:8>> | BinTerm]). + +sort_cursor_input_read([], NoObjects) -> + {end_of_input, NoObjects}; +sort_cursor_input_read([Object | Cont], NoObjects) -> + {[term_to_binary(Object)], sort_cursor_input(Cont, NoObjects + 1)}; +sort_cursor_input_read(F, NoObjects) -> + case F() of + Objects when is_list(Objects) -> + sort_cursor_input_read(Objects, NoObjects); + Term -> + throw_error(Term) + end. + +unique_cache(L, Post, LocalPost, Optz) when is_list(L) -> + case Optz#optz.unique of + true -> + {unique_sort_list(L), Post, LocalPost}; + false -> + %% If Optz#optz.cache then an ETS table could be used. + {L, Post, LocalPost} + end; +unique_cache(H, Post, LocalPost, #optz{unique = false, cache = false}) -> + {H, Post, LocalPost}; +unique_cache(H, Post, LocalPost, #optz{unique = true, cache = false}) -> + E = ets:new(qlc, [set, private]), + {fun() -> no_dups(H, E) end, [del_table(E) | Post], LocalPost}; +unique_cache(H, Post, LocalPost, #optz{unique = false, cache = true}) -> + E = ets:new(qlc, [set, private]), + {L, P} = unique_cache_post(E), + {fun() -> cache(H, E, LocalPost) end, [P | Post], [L]}; +unique_cache(H, Post, LocalPost, #optz{unique = true, cache = true}) -> + UT = ets:new(qlc, [bag, private]), + MT = ets:new(qlc, [set, private]), + {L1, P1} = unique_cache_post(UT), + {L2, P2} = unique_cache_post(MT), + {fun() -> ucache(H, UT, MT, LocalPost) end, [P1, P2 | Post], [L1, L2]}; +unique_cache(H, Post, LocalPost, #optz{unique = false, cache = list}=Optz) -> + Ref = make_ref(), + F = del_lcache(Ref), + #qlc_opt{tmpdir = TmpDir, max_list = MaxList, tmpdir_usage = TmpUsage} = + Optz#optz.opt, + {fun() -> lcache(H, Ref, LocalPost, TmpDir, MaxList, TmpUsage) end, + [F | Post], [F]}; +unique_cache(H, Post0, LocalPost0, #optz{unique = true, cache = list}=Optz) -> + #qlc_opt{tmpdir = TmpDir, max_list = MaxList, tmpdir_usage = TmpUsage} = + Optz#optz.opt, + Size = if + MaxList >= 1 bsl 31 -> (1 bsl 31) - 1; + MaxList =:= 0 -> 1; + true -> MaxList + end, + SortOptions = [{size, Size}, {tmpdir, TmpDir}], + USortOptions = [{unique, true} | SortOptions], + TmpUsageM = {TmpUsage, caching}, + LF1 = fun(Objs) -> lists:ukeysort(1, Objs) end, + FF1 = fun(Objs) -> + file_sort_handle(Objs, {keysort, 1}, USortOptions, + TmpDir, [], Post0, LocalPost0) + end, + {UH, Post1, LocalPost1} = sort_handle(tag_objects(H, 1), LF1, FF1, + USortOptions, Post0, LocalPost0, + TmpUsageM), + LF2 = fun(Objs) -> lists:keysort(2, Objs) end, + FF2 = fun(Objs) -> + file_sort_handle(Objs, {keysort, 2}, SortOptions, TmpDir, + [], Post1, LocalPost1) + end, + {SH, Post, LocalPost} = + sort_handle(UH, LF2, FF2, SortOptions, Post1, LocalPost1, TmpUsageM), + if + is_list(SH) -> + %% Remove the tag once and for all. + {untag_objects2(SH), Post, LocalPost}; + true -> + %% Every traversal untags the objects... + {fun() -> untag_objects(SH) end, Post, LocalPost} + end. + +unique_cache_post(E) -> + {empty_table(E), del_table(E)}. + +unique_sort_list(L) -> + E = ets:new(qlc, [set, private]), + unique_list(L, E). + +unique_list([], E) -> + true = ets:delete(E), + []; +unique_list([Object | Objects], E) -> + case ets:member(E, Object) of + false -> + true = ets:insert(E, {Object}), + [Object | unique_list(Objects, E)]; + true -> + unique_list(Objects, E) + end. + +sort_list(L, CFun, true, sort, _SortOptions, _Post) when is_function(CFun) -> + lists:usort(CFun, L); +sort_list(L, CFun, false, sort, _SortOptions, _Post) when is_function(CFun) -> + lists:sort(CFun, L); +sort_list(L, ascending, true, sort, _SortOptions, _Post) -> + lists:usort(L); +sort_list(L, descending, true, sort, _SortOptions, _Post) -> + lists:reverse(lists:usort(L)); +sort_list(L, ascending, false, sort, _SortOptions, _Post) -> + lists:sort(L); +sort_list(L, descending, false, sort, _SortOptions, _Post) -> + lists:reverse(lists:sort(L)); +sort_list(L, Order, Unique, {keysort, Kp}, _SortOptions, _Post) + when is_integer(Kp), is_atom(Order) -> + case {Order, Unique} of + {ascending, true} -> + lists:ukeysort(Kp, L); + {ascending, false} -> + lists:keysort(Kp, L); + {descending, true} -> + lists:reverse(lists:ukeysort(Kp, L)); + {descending, false} -> + lists:reverse(lists:keysort(Kp, L)) + end; +sort_list(L, _Order, _Unique, Sort, SortOptions, Post) -> + In = fun(_) -> {L, fun(_) -> end_of_input end} end, + Out = sort_list_output([]), + TSortOptions = [{format,term} | SortOptions], + do_sort(In, Out, Sort, TSortOptions, Post). + +sort_list_output(L) -> + fun(close) -> + lists:append(lists:reverse(L)); + (Terms) when is_list(Terms) -> + sort_list_output([Terms | L]) + end. + +%% Don't use the file_sorter unless it is known that objects will be +%% put on a temporary file (optimization). +sort_handle(H, ListFun, FileFun, SortOptions, Post, LocalPost, TmpUsageM) -> + Size = case lists:keysearch(size, 1, SortOptions) of + {value, {size, Size0}} -> Size0; + false -> default_option(size) + end, + sort_cache(H, [], Size, {ListFun, FileFun, Post, LocalPost, TmpUsageM}). + +sort_cache([], CL, _Sz, {LF, _FF, Post, LocalPost, _TmpUsageM}) -> + {LF(lists:reverse(CL)), Post, LocalPost}; +sort_cache(Objs, CL, Sz, C) when Sz < 0 -> + sort_cache2(Objs, CL, false, C); +sort_cache([Object | Cont], CL, Sz0, C) -> + Sz = decr_list_size(Sz0, Object), + sort_cache(Cont, [Object | CL], Sz, C); +sort_cache(F, CL, Sz, C) -> + case F() of + Objects when is_list(Objects) -> + sort_cache(Objects, CL, Sz, C); + Term -> + {_LF, _FF, Post, _LocalPost, _TmpUsageM} = C, + post_funs(Post), + throw_error(Term) + end. + +sort_cache2([], CL, _X, {LF, _FF, Post, LocalPost, _TmpUsageM}) -> + {LF(lists:reverse(CL)), Post, LocalPost}; +sort_cache2([Object | Cont], CL, _, C) -> + sort_cache2(Cont, [Object | CL], true, C); +sort_cache2(F, CL, false, C) -> + %% Find one extra object to be sure that temporary file(s) will be + %% used when calling the file_sorter. This works even if + %% duplicates are removed. + case F() of + Objects when is_list(Objects) -> + sort_cache2(Objects, CL, true, C); + Term -> + {_LF, _FF, Post, _LocalPost, _TmpUsageM} = C, + post_funs(Post), + throw_error(Term) + end; +sort_cache2(_Cont, _CL, true, {_LF,_FF,Post,_LocalPost, {not_allowed,M}}) -> + post_funs(Post), + throw_reason({tmpdir_usage, M}); +sort_cache2(Cont, CL, true, {_LF, FF, _Post, _LocalPost, {TmpUsage, M}}) -> + maybe_error_logger(TmpUsage, M), + FF(lists:reverse(CL, Cont)). + +file_sort_handle(H, Kp, SortOptions, TmpDir, Compressed, Post, LocalPost) -> + In = sort_cursor_input(H, 0), + Unique = lists:member(unique, SortOptions) + orelse + lists:keymember(unique, 1, SortOptions), + Out = sort_cursor_list_output(TmpDir, Compressed, Unique), + Reply = do_sort(In, Out, Kp, SortOptions, Post), + case Reply of + {file, FileName} -> + {F, Fd} = open_file(FileName, Compressed, Post), + P = fun() -> _ = file:close(Fd), + _ = file:delete(FileName) + end, + {F, [P | Post], LocalPost}; + {terms, BTerms} -> + try + {[binary_to_term(B) || B <- BTerms], Post, LocalPost} + catch Class:Reason -> + post_funs(Post), + erlang:raise(Class, Reason, erlang:get_stacktrace()) + end + end. + +do_sort(In, Out, Sort, SortOptions, Post) -> + try + case do_sort(In, Out, Sort, SortOptions) of + {error, Reason} -> throw_reason(Reason); + Reply -> Reply + end + catch Class:Term -> + post_funs(Post), + erlang:raise(Class, Term, erlang:get_stacktrace()) + end. + +do_sort(In, Out, sort, SortOptions) -> + file_sorter:sort(In, Out, SortOptions); +do_sort(In, Out, {keysort, KeyPos}, SortOptions) -> + file_sorter:keysort(KeyPos, In, Out, SortOptions). + +del_table(Ets) -> + fun() -> true = ets:delete(Ets) end. + +empty_table(Ets) -> + fun() -> true = ets:delete_all_objects(Ets) end. + +append_loop([[_ | _]=L], _N) -> + L; +append_loop([F], _N) -> + F(); +append_loop([L | Hs], N) -> + append_loop(L, N, Hs). + +append_loop([], N, Hs) -> + append_loop(Hs, N); +append_loop([Object | Cont], N, Hs) -> + [Object | append_loop(Cont, N + 1, Hs)]; +append_loop(F, 0, Hs) -> + case F() of + [] -> + append_loop(Hs, 0); + [Object | Cont] -> + [Object | append_loop(Cont, 1, Hs)]; + Term -> + Term + end; +append_loop(F, _N, Hs) -> % when _N > 0 + fun() -> append_loop(F, 0, Hs) end. + +no_dups([]=Cont, UTab) -> + true = ets:delete_all_objects(UTab), + Cont; +no_dups([Object | Cont], UTab) -> + case ets:member(UTab, Object) of + false -> + true = ets:insert(UTab, {Object}), + %% A fun is created here, even if Cont is a list; objects + %% will not be copied to the ETS table unless requested. + [Object | fun() -> no_dups(Cont, UTab) end]; + true -> + no_dups(Cont, UTab) + end; +no_dups(F, UTab) -> + case F() of + Objects when is_list(Objects) -> + no_dups(Objects, UTab); + Term -> + Term + end. + +%% When all objects have been returned from a cached QLC, the +%% generators of the expression will never be called again, and so the +%% tables used by the generators (LocalPost) can be emptied. + +cache(H, MTab, LocalPost) -> + case ets:member(MTab, 0) of + false -> + true = ets:insert(MTab, {0}), + cache(H, MTab, 1, LocalPost); + true -> + cache_recall(MTab, 1) + end. + +cache([]=Cont, _MTab, _SeqNo, LocalPost) -> + local_post(LocalPost), + Cont; +cache([Object | Cont], MTab, SeqNo, LocalPost) -> + true = ets:insert(MTab, {SeqNo, Object}), + %% A fun is created here, even if Cont is a list; objects + %% will not be copied to the ETS table unless requested. + [Object | fun() -> cache(Cont, MTab, SeqNo + 1, LocalPost) end]; +cache(F, MTab, SeqNo, LocalPost) -> + case F() of + Objects when is_list(Objects) -> + cache(Objects, MTab, SeqNo, LocalPost); + Term -> + Term + end. + +cache_recall(MTab, SeqNo) -> + case ets:lookup(MTab, SeqNo) of + []=Cont -> + Cont; + [{SeqNo, Object}] -> + [Object | fun() -> cache_recall(MTab, SeqNo + 1) end] + end. + +ucache(H, UTab, MTab, LocalPost) -> + case ets:member(MTab, 0) of + false -> + true = ets:insert(MTab, {0}), + ucache(H, UTab, MTab, 1, LocalPost); + true -> + ucache_recall(UTab, MTab, 1) + end. + +ucache([]=Cont, _UTab, _MTab, _SeqNo, LocalPost) -> + local_post(LocalPost), + Cont; +ucache([Object | Cont], UTab, MTab, SeqNo, LocalPost) -> + %% Always using 28 bits hash value... + Hash = erlang:phash2(Object), + case ets:lookup(UTab, Hash) of + [] -> + ucache3(Object, Cont, Hash, UTab, MTab, SeqNo, LocalPost); + HashSeqObjects -> + case lists:keymember(Object, 3, HashSeqObjects) of + true -> + ucache(Cont, UTab, MTab, SeqNo, LocalPost); + false -> + ucache3(Object, Cont, Hash, UTab, MTab, SeqNo, LocalPost) + end + end; +ucache(F, UTab, MTab, SeqNo, LocalPost) -> + case F() of + Objects when is_list(Objects) -> + ucache(Objects, UTab, MTab, SeqNo, LocalPost); + Term -> + Term + end. + +ucache3(Object, Cont, Hash, UTab, MTab, SeqNo, LocalPost) -> + true = ets:insert(UTab, {Hash, SeqNo, Object}), + true = ets:insert(MTab, {SeqNo, Hash}), + %% A fun is created here, even if Cont is a list; objects + %% will not be copied to the ETS table unless requested. + [Object | fun() -> ucache(Cont, UTab, MTab, SeqNo+1, LocalPost) end]. + +ucache_recall(UTab, MTab, SeqNo) -> + case ets:lookup(MTab, SeqNo) of + []=Cont -> + Cont; + [{SeqNo, Hash}] -> + Object = case ets:lookup(UTab, Hash) of + [{Hash, SeqNo, Object0}] -> Object0; + HashSeqObjects -> + {value, {Hash, SeqNo, Object0}} = + lists:keysearch(SeqNo, 2, HashSeqObjects), + Object0 + end, + [Object | fun() -> ucache_recall(UTab, MTab, SeqNo + 1) end] + end. + +-define(LCACHE_FILE(Ref), {Ref, '$_qlc_cache_tmpfiles_'}). + +lcache(H, Ref, LocalPost, TmpDir, MaxList, TmpUsage) -> + Key = ?LCACHE_FILE(Ref), + case get(Key) of + undefined -> + lcache1(H, {Key, LocalPost, TmpDir, MaxList, TmpUsage}, + MaxList, []); + {file, _Fd, _TmpFile, F} -> + F(); + L when is_list(L) -> + L + end. + +lcache1([]=Cont, {Key, LocalPost, _TmpDir, _MaxList, _TmpUsage}, _Sz, Acc) -> + local_post(LocalPost), + case get(Key) of + undefined -> + put(Key, lists:reverse(Acc)), + Cont; + {file, Fd, TmpFile, _F} -> + case lcache_write(Fd, TmpFile, Acc) of + ok -> + Cont; + Error -> + Error + end + end; +lcache1(H, State, Sz, Acc) when Sz < 0 -> + {Key, LocalPost, TmpDir, MaxList, TmpUsage} = State, + GetFile = + case get(Key) of + {file, Fd0, TmpFile, _F} -> + {TmpFile, Fd0}; + undefined when TmpUsage =:= not_allowed -> + error({tmpdir_usage, caching}); + undefined -> + maybe_error_logger(TmpUsage, caching), + FName = tmp_filename(TmpDir), + {F, Fd0} = open_file(FName, [write], LocalPost), + put(Key, {file, Fd0, FName, F}), + {FName, Fd0} + end, + case GetFile of + {FileName, Fd} -> + case lcache_write(Fd, FileName, Acc) of + ok -> + lcache1(H, State, MaxList, []); + Error -> + Error + end; + Error -> + Error + end; +lcache1([Object | Cont], State, Sz0, Acc) -> + Sz = decr_list_size(Sz0, Object), + [Object | lcache2(Cont, State, Sz, [Object | Acc])]; +lcache1(F, State, Sz, Acc) -> + case F() of + Objects when is_list(Objects) -> + lcache1(Objects, State, Sz, Acc); + Term -> + Term + end. + +lcache2([Object | Cont], State, Sz0, Acc) when Sz0 >= 0 -> + Sz = decr_list_size(Sz0, Object), + [Object | lcache2(Cont, State, Sz, [Object | Acc])]; +lcache2(Cont, State, Sz, Acc) -> + fun() -> lcache1(Cont, State, Sz, Acc) end. + +lcache_write(Fd, FileName, L) -> + write_binary_terms(t2b(L, []), Fd, FileName). + +t2b([], Bs) -> + Bs; +t2b([T | Ts], Bs) -> + t2b(Ts, [term_to_binary(T) | Bs]). + +del_lcache(Ref) -> + fun() -> + Key = ?LCACHE_FILE(Ref), + case get(Key) of + undefined -> + ok; + {file, Fd, TmpFile, _F} -> + _ = file:close(Fd), + _ = file:delete(TmpFile), + erase(Key); + _L -> + erase(Key) + end + end. + +tag_objects([Object | Cont], T) -> + [{Object, T} | tag_objects2(Cont, T + 1)]; +tag_objects([]=Cont, _T) -> + Cont; +tag_objects(F, T) -> + case F() of + Objects when is_list(Objects) -> + tag_objects(Objects, T); + Term -> + Term + end. + +tag_objects2([Object | Cont], T) -> + [{Object, T} | tag_objects2(Cont, T + 1)]; +tag_objects2(Objects, T) -> + fun() -> tag_objects(Objects, T) end. + +untag_objects([]=Objs) -> + Objs; +untag_objects([{Object, _N} | Cont]) -> + [Object | untag_objects2(Cont)]; +untag_objects(F) -> + case F() of + Objects when is_list(Objects) -> + untag_objects(Objects); + Term -> % Cannot happen + Term + end. + +untag_objects2([{Object, _N} | Cont]) -> + [Object | untag_objects2(Cont)]; +untag_objects2([]=Cont) -> + Cont; +untag_objects2(Objects) -> + fun() -> untag_objects(Objects) end. + +%%% Merge join. +%%% Temporary files are used when many objects have the same key. + +-define(JWRAP(E1, E2), [E1 | E2]). + +-record(m, {id, tmpdir, max_list, tmp_usage}). + +merge_join([]=Cont, _C1, _T2, _C2, _Opt) -> + Cont; +merge_join([E1 | L1], C1, L2, C2, Opt) -> + #qlc_opt{tmpdir = TmpDir, max_list = MaxList, + tmpdir_usage = TmpUsage} = Opt, + M = #m{id = merge_join_id(), tmpdir = TmpDir, max_list = MaxList, + tmp_usage = TmpUsage}, + merge_join2(E1, element(C1, E1), L1, C1, L2, C2, M); +merge_join(F1, C1, L2, C2, Opt) -> + case F1() of + L1 when is_list(L1) -> + merge_join(L1, C1, L2, C2, Opt); + T1 -> + T1 + end. + +merge_join1(_E2, _K2, []=Cont, _C1, _L2, _C2, M) -> + end_merge_join(Cont, M); +merge_join1(E2, K2, [E1 | L1], C1, L2, C2, M) -> + K1 = element(C1, E1), + if + K1 == K2 -> + same_keys2(E1, K1, L1, C1, L2, C2, E2, M); + K1 > K2 -> + merge_join2(E1, K1, L1, C1, L2, C2, M); + true -> % K1 < K2 + merge_join1(E2, K2, L1, C1, L2, C2, M) + end; +merge_join1(E2, K2, F1, C1, L2, C2, M) -> + case F1() of + L1 when is_list(L1) -> + merge_join1(E2, K2, L1, C1, L2, C2, M); + T1 -> + T1 + end. + +merge_join2(_E1, _K1, _L1, _C1, []=Cont, _C2, M) -> + end_merge_join(Cont, M); +merge_join2(E1, K1, L1, C1, [E2 | L2], C2, M) -> + K2 = element(C2, E2), + if + K1 == K2 -> + same_keys2(E1, K1, L1, C1, L2, C2, E2, M); + K1 > K2 -> + merge_join2(E1, K1, L1, C1, L2, C2, M); + true -> % K1 < K2 + merge_join1(E2, K2, L1, C1, L2, C2, M) + end; +merge_join2(E1, K1, L1, C1, F2, C2, M) -> + case F2() of + L2 when is_list(L2) -> + merge_join2(E1, K1, L1, C1, L2, C2, M); + T2 -> + T2 + end. + +%% element(C2, E2_0) == K1 +same_keys2(E1, K1, L1, C1, [], _C2, E2_0, M) -> + Cont = fun(_L1b) -> end_merge_join([], M) end, + loop_same_keys(E1, K1, L1, C1, [E2_0], Cont, M); +same_keys2(E1, K1, L1, C1, [E2 | L2]=L2_0, C2, E2_0, M) -> + K2 = element(C2, E2), + if + K1 == K2 -> + same_keys1(E1, K1, L1, C1, E2, C2, E2_0, L2, M); + K1 < K2 -> + [?JWRAP(E1, E2_0) | + fun() -> same_loop1(L1, K1, C1, E2_0, L2_0, C2, M) end] + end; +same_keys2(E1, K1, L1, C1, F2, C2, E2_0, M) -> + case F2() of + L2 when is_list(L2) -> + same_keys2(E1, K1, L1, C1, L2, C2, E2_0, M); + T2 -> + Cont = fun(_L1b) -> T2 end, + loop_same_keys(E1, K1, L1, C1, [E2_0], Cont, M) + end. + +same_loop1([], _K1_0, _C1, _E2_0, _L2, _C2, M) -> + end_merge_join([], M); +same_loop1([E1 | L1], K1_0, C1, E2_0, L2, C2, M) -> + K1 = element(C1, E1), + if + K1 == K1_0 -> + [?JWRAP(E1, E2_0) | + fun() -> same_loop1(L1, K1_0, C1, E2_0, L2, C2, M) end]; + K1_0 < K1 -> + merge_join2(E1, K1, L1, C1, L2, C2, M) + end; +same_loop1(F1, K1_0, C1, E2_0, L2, C2, M) -> + case F1() of + L1 when is_list(L1) -> + same_loop1(L1, K1_0, C1, E2_0, L2, C2, M); + T1 -> + T1 + end. + +%% element(C2, E2_0) == K1, element(C2, E2) == K1_0 +same_keys1(E1_0, K1_0, []=L1, C1, E2, C2, E2_0, L2, M) -> + [?JWRAP(E1_0, E2_0), ?JWRAP(E1_0, E2) | + fun() -> same_keys(K1_0, E1_0, L1, C1, L2, C2, M) end]; +same_keys1(E1_0, K1_0, [E1 | _]=L1, C1, E2, C2, E2_0, L2, M) -> + K1 = element(C1, E1), + if + K1_0 == K1 -> + E2s = [E2, E2_0], + Sz0 = decr_list_size(M#m.max_list, E2s), + same_keys_cache(E1_0, K1_0, L1, C1, L2, C2, E2s, Sz0, M); + K1_0 < K1 -> + [?JWRAP(E1_0, E2_0), ?JWRAP(E1_0, E2) | + fun() -> same_keys(K1_0, E1_0, L1, C1, L2, C2, M) end] + end; +same_keys1(E1_0, K1_0, F1, C1, E2, C2, E2_0, L2, M) -> + case F1() of + L1 when is_list(L1) -> + same_keys1(E1_0, K1_0, L1, C1, E2, C2, E2_0, L2, M); + T1 -> + Cont = fun() -> T1 end, + loop_same(E1_0, [E2, E2_0], Cont) + end. + +%% There is no such element E in L1 such that element(C1, E) == K1. +same_keys(_K1, _E1, _L1, _C1, []=Cont, _C2, M) -> + end_merge_join(Cont, M); +same_keys(K1, E1, L1, C1, [E2 | L2], C2, M) -> + K2 = element(C2, E2), + if + K1 == K2 -> + [?JWRAP(E1, E2) | + fun() -> same_keys(K1, E1, L1, C1, L2, C2, M) end]; + K1 < K2 -> + merge_join1(E2, K2, L1, C1, L2, C2, M) + end; +same_keys(K1, E1, L1, C1, F2, C2, M) -> + case F2() of + L2 when is_list(L2) -> + same_keys(K1, E1, L1, C1, L2, C2, M); + T2 -> + T2 + end. + +%% There are at least two elements in [E1 | L1] that are to be combined +%% with the elements in E2s (length(E2s) > 1). This loop covers the case +%% when all elements in E2 with key K1 can be kept in RAM. +same_keys_cache(E1, K1, L1, C1, [], _C2, E2s, _Sz, M) -> + Cont = fun(_L1b) -> end_merge_join([], M) end, + loop_same_keys(E1, K1, L1, C1, E2s, Cont, M); +same_keys_cache(E1, K1, L1, C1, L2, C2, E2s, Sz0, M) when Sz0 < 0 -> + case init_merge_join(M) of + ok -> + Sz = M#m.max_list, + C = fun() -> + same_keys_file(E1, K1, L1, C1, L2, C2, [], Sz, M) + end, + write_same_keys(E1, E2s, M, C); + Error -> + Error + end; +same_keys_cache(E1, K1, L1, C1, [E2 | L2], C2, E2s, Sz0, M) -> + K2 = element(C2, E2), + if + K1 == K2 -> + Sz = decr_list_size(Sz0, E2), + same_keys_cache(E1, K1, L1, C1, L2, C2, [E2 | E2s], Sz, M); + K1 < K2 -> + Cont = fun(L1b) -> merge_join1(E2, K2, L1b, C1, L2, C2, M) end, + loop_same_keys(E1, K1, L1, C1, E2s, Cont, M) + end; +same_keys_cache(E1, K1, L1, C1, F2, C2, E2s, Sz, M) -> + case F2() of + L2 when is_list(L2) -> + same_keys_cache(E1, K1, L1, C1, L2, C2, E2s, Sz, M); + T2 -> + Cont = fun(_L1b) -> T2 end, + loop_same_keys(E1, K1, L1, C1, E2s, Cont, M) + end. + +%% E2s holds all elements E2 in L2 such that element(E2, C2) == K1. +loop_same_keys(E1, _K1, [], _C1, E2s, _Cont, M) -> + end_merge_join(loop_same(E1, E2s, []), M); +loop_same_keys(E1, K1, L1, C1, E2s, Cont, M) -> + loop_same(E1, E2s, fun() -> loop_keys(K1, L1, C1, E2s, Cont, M) end). + +loop_same(_E1, [], L) -> + L; +loop_same(E1, [E2 | E2s], L) -> + loop_same(E1, E2s, [?JWRAP(E1, E2) | L]). + +loop_keys(K, [E1 | L1]=L1_0, C1, E2s, Cont, M) -> + K1 = element(C1, E1), + if + K1 == K -> + loop_same_keys(E1, K1, L1, C1, E2s, Cont, M); + K1 > K -> + Cont(L1_0) + end; +loop_keys(_K, []=L1, _C1, _Es2, Cont, _M) -> + Cont(L1); +loop_keys(K, F1, C1, E2s, Cont, M) -> + case F1() of + L1 when is_list(L1) -> + loop_keys(K, L1, C1, E2s, Cont, M); + T1 -> + T1 + end. + +%% This is for the case when a temporary file has to be used. +same_keys_file(E1, K1, L1, C1, [], _C2, E2s, _Sz, M) -> + Cont = fun(_L1b) -> end_merge_join([], M) end, + same_keys_file_write(E1, K1, L1, C1, E2s, M, Cont); +same_keys_file(E1, K1, L1, C1, L2, C2, E2s, Sz0, M) when Sz0 < 0 -> + Sz = M#m.max_list, + C = fun() -> same_keys_file(E1, K1, L1, C1, L2, C2, [], Sz, M) end, + write_same_keys(E1, E2s, M, C); +same_keys_file(E1, K1, L1, C1, [E2 | L2], C2, E2s, Sz0, M) -> + K2 = element(C2, E2), + if + K1 == K2 -> + Sz = decr_list_size(Sz0, E2), + same_keys_file(E1, K1, L1, C1, L2, C2, [E2 | E2s], Sz, M); + K1 < K2 -> + Cont = fun(L1b) -> + %% The temporary file could be truncated here. + merge_join1(E2, K2, L1b, C1, L2, C2, M) + end, + same_keys_file_write(E1, K1, L1, C1, E2s, M, Cont) + end; +same_keys_file(E1, K1, L1, C1, F2, C2, E2s, Sz, M) -> + case F2() of + L2 when is_list(L2) -> + same_keys_file(E1, K1, L1, C1, L2, C2, E2s, Sz, M); + T2 -> + Cont = fun(_L1b) -> T2 end, + same_keys_file_write(E1, K1, L1, C1, E2s, M, Cont) + end. + +same_keys_file_write(E1, K1, L1, C1, E2s, M, Cont) -> + C = fun() -> loop_keys_file(K1, L1, C1, Cont, M) end, + write_same_keys(E1, E2s, M, C). + +write_same_keys(_E1, [], _M, Cont) -> + Cont(); +write_same_keys(E1, Es2, M, Cont) -> + write_same_keys(E1, Es2, M, [], Cont). + +%% Avoids one (the first) traversal of the temporary file. +write_same_keys(_E1, [], M, E2s, Objs) -> + case write_merge_join(M, E2s) of + ok -> Objs; + Error -> Error + end; +write_same_keys(E1, [E2 | E2s0], M, E2s, Objs) -> + BE2 = term_to_binary(E2), + write_same_keys(E1, E2s0, M, [BE2 | E2s], [?JWRAP(E1, E2) | Objs]). + +loop_keys_file(K, [E1 | L1]=L1_0, C1, Cont, M) -> + K1 = element(C1, E1), + if + K1 == K -> + C = fun() -> loop_keys_file(K1, L1, C1, Cont, M) end, + read_merge_join(M, E1, C); + K1 > K -> + Cont(L1_0) + end; +loop_keys_file(_K, []=L1, _C1, Cont, _M) -> + Cont(L1); +loop_keys_file(K, F1, C1, Cont, M) -> + case F1() of + L1 when is_list(L1) -> + loop_keys_file(K, L1, C1, Cont, M); + T1 -> + T1 + end. + +end_merge_join(Reply, M) -> + end_merge_join(M), + Reply. + +%% Normally post_funs() cleans up temporary files by calling funs in +%% Post. It seems impossible to do that with the temporary file(s) +%% used when many objects have the same key--such a file is created +%% after the setup when Post is prepared. There seems to be no real +%% alternative to using the process dictionary, at least as things +%% have been implemented so far. Probably all of Post could have been +%% put in the process dictionary... + +-define(MERGE_JOIN_FILE, '$_qlc_merge_join_tmpfiles_'). + +init_merge_join(#m{id = MergeId, tmpdir = TmpDir, tmp_usage = TmpUsage}) -> + case tmp_merge_file(MergeId) of + {Fd, FileName} -> + case file:position(Fd, bof) of + {ok, 0} -> + case file:truncate(Fd) of + ok -> + ok; + Error -> + file_error(FileName, Error) + end; + Error -> + file_error(FileName, Error) + end; + none when TmpUsage =:= not_allowed -> + error({tmpdir_usage, joining}); + none -> + maybe_error_logger(TmpUsage, joining), + FName = tmp_filename(TmpDir), + case file:open(FName, [raw, binary, read, write]) of + {ok, Fd} -> + TmpFiles = get(?MERGE_JOIN_FILE), + put(?MERGE_JOIN_FILE, [{MergeId, Fd, FName} | TmpFiles]), + ok; + Error -> + file_error(FName, Error) + end + end. + +write_merge_join(#m{id = MergeId}, BTerms) -> + {Fd, FileName} = tmp_merge_file(MergeId), + write_binary_terms(BTerms, Fd, FileName). + +read_merge_join(#m{id = MergeId}, E1, Cont) -> + {Fd, FileName} = tmp_merge_file(MergeId), + case file:position(Fd, bof) of + {ok, 0} -> + Fun = fun([], _) -> + Cont(); + (Ts, C) when is_list(Ts) -> + join_read_terms(E1, Ts, C) + end, + file_loop_read(<<>>, ?CHUNK_SIZE, {Fd, FileName}, Fun); + Error -> + file_error(FileName, Error) + end. + +join_read_terms(_E1, [], Objs) -> + Objs; +join_read_terms(E1, [E2 | E2s], Objs) -> + join_read_terms(E1, E2s, [?JWRAP(E1, E2) | Objs]). + +end_merge_join(#m{id = MergeId}) -> + case tmp_merge_file(MergeId) of + none -> + ok; + {Fd, FileName} -> + _ = file:close(Fd), + _ = file:delete(FileName), + put(?MERGE_JOIN_FILE, + lists:keydelete(MergeId, 1, get(?MERGE_JOIN_FILE))) + end. + +end_all_merge_joins() -> + lists:foreach( + fun(Id) -> end_merge_join(#m{id = Id}) end, + [Id || {Id, _Fd, _FileName} <- lists:flatten([get(?MERGE_JOIN_FILE)])]), + erase(?MERGE_JOIN_FILE). + +merge_join_id() -> + case get(?MERGE_JOIN_FILE) of + undefined -> + put(?MERGE_JOIN_FILE, []); + _ -> + ok + end, + make_ref(). + +tmp_merge_file(MergeId) -> + TmpFiles = get(?MERGE_JOIN_FILE), + case lists:keysearch(MergeId, 1, TmpFiles) of + {value, {MergeId, Fd, FileName}} -> + {Fd, FileName}; + false -> + none + end. + +decr_list_size(Sz0, E) when is_integer(Sz0) -> + Sz0 - erlang:external_size(E). + +%%% End of merge join. + +lookup_join([E1 | L1], C1, LuF, C2, Rev) -> + K1 = element(C1, E1), + case LuF(C2, [K1]) of + [] -> + lookup_join(L1, C1, LuF, C2, Rev); + [E2] when Rev -> + [?JWRAP(E2, E1) | fun() -> lookup_join(L1, C1, LuF, C2, Rev) end]; + [E2] -> + [?JWRAP(E1, E2) | fun() -> lookup_join(L1, C1, LuF, C2, Rev) end]; + E2s when is_list(E2s), Rev -> + [?JWRAP(E2, E1) || E2 <- E2s] ++ + fun() -> lookup_join(L1, C1, LuF, C2, Rev) end; + E2s when is_list(E2s) -> + [?JWRAP(E1, E2) || E2 <- E2s] ++ + fun() -> lookup_join(L1, C1, LuF, C2, Rev) end; + Term -> + Term + end; +lookup_join([]=Cont, _C1, _LuF, _C2, _Rev) -> + Cont; +lookup_join(F1, C1, LuF, C2, Rev) -> + case F1() of + L1 when is_list(L1) -> + lookup_join(L1, C1, LuF, C2, Rev); + T1 -> + T1 + end. + +maybe_error_logger(allowed, _) -> + ok; +maybe_error_logger(Name, Why) -> + [_, _, {?MODULE,maybe_error_logger,_} | Stacktrace] = expand_stacktrace(), + Trimmer = fun(M, _F, _A) -> M =:= erl_eval end, + Formater = fun(Term, I) -> io_lib:print(Term, I, 80, -1) end, + X = lib:format_stacktrace(1, Stacktrace, Trimmer, Formater), + error_logger:Name("qlc: temporary file was needed for ~w\n~s\n", + [Why, lists:flatten(X)]). + +expand_stacktrace() -> + D = erlang:system_flag(backtrace_depth, 8), + try + %% Compensate for a bug in erlang:system_flag/2: + expand_stacktrace(erlang:max(1, D)) + after + erlang:system_flag(backtrace_depth, D) + end. + +expand_stacktrace(D) -> + _ = erlang:system_flag(backtrace_depth, D), + {'EXIT', {foo, Stacktrace}} = (catch erlang:error(foo)), + L = lists:takewhile(fun({M,_,_}) -> M =/= ?MODULE + end, lists:reverse(Stacktrace)), + if + length(L) < 3 andalso length(Stacktrace) =:= D -> + expand_stacktrace(D + 5); + true -> + Stacktrace + end. + +write_binary_terms(BTerms, Fd, FileName) -> + case file:write(Fd, size_bin(BTerms, [])) of + ok -> + ok; + Error -> + file_error(FileName, Error) + end. + +post_funs(L) -> + end_all_merge_joins(), + local_post(L). + +local_post(L) -> + lists:foreach(fun(undefined) -> ok; + (F) -> catch (F)() + end, L). + +call(undefined, _Arg, Default, _Post) -> + Default; +call(Fun, Arg, _Default, Post) -> + try + Fun(Arg) + catch Class:Reason -> + post_funs(Post), + erlang:raise(Class, Reason, erlang:get_stacktrace()) + end. + +grd(undefined, _Arg) -> + false; +grd(Fun, Arg) -> + case Fun(Arg) of + true -> + true; + _ -> + false + end. + +family(L) -> + sofs:to_external(sofs:relation_to_family(sofs:relation(L))). + +family_union(L) -> + R = sofs:relation(L,[{atom,[atom]}]), + sofs:to_external(sofs:family_union(sofs:relation_to_family(R))). + +file_error(File, {error, Reason}) -> + error({file_error, File, Reason}). + +-spec throw_file_error(string(), {'error',atom()}) -> no_return(). + +throw_file_error(File, {error, Reason}) -> + throw_reason({file_error, File, Reason}). + +-spec throw_reason(term()) -> no_return(). + +throw_reason(Reason) -> + throw_error(error(Reason)). + +-spec throw_error(term()) -> no_return(). + +throw_error(Error) -> + throw(Error). + +error(Reason) -> + {error, ?MODULE, Reason}. diff --git a/lib/stdlib/src/qlc_pt.erl b/lib/stdlib/src/qlc_pt.erl new file mode 100644 index 0000000000..2d7874d99f --- /dev/null +++ b/lib/stdlib/src/qlc_pt.erl @@ -0,0 +1,2746 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(qlc_pt). + +%%% Purpose: Implements the qlc Parse Transform. + +-export([parse_transform/2, transform_from_evaluator/2, + transform_expression/2]). + +-include_lib("stdlib/include/ms_transform.hrl"). + +-define(APIMOD, qlc). +-define(Q, q). + +%% Also in qlc.erl. +-define(QLC_Q(L1, L2, L3, L4, LC, Os), + {call,L1,{remote,L2,{atom,L3,?APIMOD},{atom,L4,?Q}},[LC | Os]}). +-define(QLC_QQ(L1, L2, L3, L4, L5, L6, LC, Os), % packages... + {call,L1,{remote,L2,{record_field,L3,{atom,L4,''}, + {atom,L5,?APIMOD}},{atom,L6,?Q}},[LC | Os]}). +-define(IMP_Q(L1, L2, LC, Os), {call,L,{atom,L2,?Q},[LC | Os]}). + +%% Also in qlc.erl. +-record(qlc_lc, % qlc:q/1,2, a query handle + {lc, + opt % #qlc_opt + }). + +-record(state, {imp, maxargs, records, xwarnings = []}). + +%-define(debug, true). + +-ifdef(debug). +-define(DEBUG(S, A), io:format(S, A)). +-else. +-define(DEBUG(S, A), ok). +-endif. + +%% erl_eval cannot interpret funs with more than 20 arguments: +-define(EVAL_MAX_NUM_OF_ARGS, 20). +%% Currently the compiler can handle at most 255 arguments. +-define(COMPILE_MAX_NUM_OF_ARGS, 250). + +-define(QLC_FILE, qlc_current_file). + +%%% +%%% Exported functions +%%% + +parse_transform(Forms, Options) -> + ?DEBUG("qlc Parse Transform~n", []), + State = #state{imp = is_qlc_q_imported(Forms), + maxargs = ?COMPILE_MAX_NUM_OF_ARGS, + records = record_attributes(Forms)}, + case called_from_type_checker(Options) of + true -> + %% The returned value should conform to the types, but + %% need not evaluate to anything meaningful. + L = 0, + {tuple,_,Fs0} = abstr(#qlc_lc{}, L), + F = fun(_Id, LC, A) -> + Init = simple(L, 'V', LC, L), + {{tuple,L,set_field(#qlc_lc.lc, Fs0, Init)}, A} + end, + {Forms1,ok} = qlc_mapfold(F, ok, Forms, State), + Forms1; + false -> + FormsNoShadows = no_shadows(Forms, State), + case compile_messages(Forms, FormsNoShadows, Options, State) of + {[],[],Warnings} -> + {NewForms, State1} = transform(FormsNoShadows, State), + ExtraWs = State1#state.xwarnings, + {[],WForms} = no_duplicates(NewForms, [], Warnings, + ExtraWs, Options), + WForms ++ NewForms; + {E0,Errors,Warnings} -> + {EForms,WForms} = no_duplicates(Forms, E0++Errors, + Warnings, [], Options), + EForms ++ WForms ++ Forms + end + end. + +transform_from_evaluator(LC, Bindings) -> + ?DEBUG("qlc Parse Transform (Evaluator Version)~n", []), + transform_expression(LC, Bindings, false). + +transform_expression(LC, Bindings) -> + transform_expression(LC, Bindings, true). + +%%% +%%% Local functions +%%% + +called_from_type_checker(Options) -> + lists:member(type_checker, Options). + +transform_expression(LC, Bs0, WithLintErrors) -> + L = 1, + As = [{var,L,V} || {V,_Val} <- Bs0], + Ar = length(As), + F = {function,L,bar,Ar,[{clause,L,As,[],[?QLC_Q(L, L, L, L, LC, [])]}]}, + Forms = [{attribute,L,file,{"foo",L}}, + {attribute,L,module,foo}, F], + State = #state{imp = false, + maxargs = ?EVAL_MAX_NUM_OF_ARGS, + records = record_attributes(Forms)}, + Options = [], + FormsNoShadows = no_shadows(Forms, State), + case compile_messages(Forms, FormsNoShadows, Options, State) of + {[],[],_Warnings} -> + {NewForms,_State1} = transform(FormsNoShadows, State), + {function,L,bar,Ar,[{clause,L,As,[],[NF]}]} = + lists:last(NewForms), + {ok,NF}; + {E0,Errors,_Warnings} when WithLintErrors -> + {not_ok,mforms(error, E0 ++ Errors)}; + {E0,Errors0,_Warnings} -> + [{error,Reason} | _] = mforms(error, E0++Errors0), + {not_ok, {error, ?APIMOD, Reason}} + end. + +-define(I(I), {integer, L, I}). +-define(A(A), {atom, L, A}). +-define(V(V), {var, L, V}). +-define(ABST_NO_MORE, {nil, L}). +-define(ABST_MORE(Obj, Cont), {cons, L, Obj, Cont}). + +%% Qualifier identifier. +%% The first one encountered in a QLC has no=1. +-record(qid, {lcid,no}). + +mforms(Tag, L) -> + lists:sort([{Tag,M} || {_File,Ms} <- L, M <- Ms]). + +%% Avoid duplicated lint warnings and lint errors. Care has been taken +%% not to introduce unused variables in the transformed code. +%% +no_duplicates(Forms, Errors, Warnings0, ExtraWarnings, Options) -> + %% Some mistakes such as "{X} =:= {}" are found by strong + %% validation as well as by qlc. Prefer the warnings from qlc: + Warnings1 = mforms(Warnings0) -- + ([{File,[{L,v3_core,nomatch}]} || + {File,[{L,qlc,M}]} <- mforms(ExtraWarnings), + lists:member(M, [nomatch_pattern,nomatch_filter])] + ++ + [{File,[{L,sys_core_fold,nomatch_guard}]} || + {File,[{L,qlc,M}]} <- mforms(ExtraWarnings), + M =:= nomatch_filter]), + Warnings = Warnings1 ++ ExtraWarnings, + {Es1,Ws1} = compile_forms(Forms, Options), + Es = mforms(Errors) -- mforms(Es1), + Ws = mforms(Warnings) -- mforms(Ws1), + {mforms2(error, Es),mforms2(warning, Ws)}. + +mforms(L) -> + lists:sort([{File,[M]} || {File,Ms} <- L, M <- Ms]). + +mforms2(Tag, L) -> + Line = 0, + ML = lists:flatmap(fun({File,Ms}) -> + [[{attribute,Line,file,{File,Line}}, {Tag,M}] || + M <- Ms] + end, lists:sort(L)), + lists:flatten(lists:sort(ML)). + +is_qlc_q_imported(Forms) -> + [[] || {attribute,_,import,{?APIMOD,FAs}} <- Forms, {?Q,1} <- FAs] =/= []. + +record_attributes(Forms) -> + [A || A = {attribute, _, record, _D} <- Forms]. + +%% Get the compile errors and warnings for the QLC as well as messages +%% for introduced variables used in list expressions and messages for +%% badargs. Since the QLCs will be replaced by some terms, the +%% compiler cannot find the errors and warnings after the parse +%% transformation. +%% +compile_messages(Forms, FormsNoShadows, Options, State) -> + %% The qlc module cannot handle binary generators. + BGenF = fun(_QId,{b_generate,Line,_P,_LE}=BGen, GA, A) -> + M = {loc(Line),?APIMOD,binary_generator}, + {BGen,[{get(?QLC_FILE),[M]}|GA],A}; + (_QId, Q, GA, A) -> + {Q,GA,A} + end, + {_,BGens} = qual_fold(BGenF, [], [], FormsNoShadows, State), + GenForm = used_genvar_check(FormsNoShadows, State), + ?DEBUG("GenForm = ~s~n", [catch erl_pp:form(GenForm)]), + WarnFun = fun(Id, LC, A) -> {tag_lines(LC, get_lcid_no(Id)), A} end, + {WForms,ok} = qlc_mapfold(WarnFun, ok, Forms, State), + {Es,Ws} = compile_forms(WForms ++ [GenForm], Options), + {badarg(Forms, State),tagged_messages(Es)++BGens,tagged_messages(Ws)}. + +badarg(Forms, State) -> + F = fun(_Id, {lc,_L,_E,_Qs}=LC, Es) -> + {LC,Es}; + (Id, A, Es) -> + E = {get_lcid_line(Id),?APIMOD,not_a_query_list_comprehension}, + {A,[{get(?QLC_FILE), [E]} | Es]} + end, + {_,E0} = qlc_mapfold(F, [], Forms, State), + E0. + +tag_lines(E, No) -> + map_lines(fun(Id) -> + case is_lcid(Id) of + true -> Id; + false -> make_lcid(Id, No) + end + end, E). + +map_lines(F, E) -> + erl_lint:modify_line(E, F). + +tagged_messages(MsL) -> + [{File, + [{Loc,Mod,untag(T)} || {Loc0,Mod,T} <- Ms, + {true,Loc} <- [tloc(Loc0)]]} + || {File,Ms} <- MsL] + ++ + [{File,[{Loc,?APIMOD,{used_generator_variable,V}}]} + || {_, Ms} <- MsL, + {XLoc,erl_lint,{unbound_var,_}} <- Ms, + {Loc,File,V} <- [extra(XLoc)]]. + +tloc({Id,Column}) -> + {IsLcid,T} = tloc(Id), + {IsLcid,{T,Column}}; +tloc(Id) -> + IsLcid = is_lcid(Id), + {IsLcid,case IsLcid of + true -> get_lcid_line(Id); + false -> any + end}. + +extra({extra,Line,File,V}) -> + {Line,File,V}; +extra({Line,Column}) -> + case extra(Line) of + {L,File,V} -> {{L,Column},File,V}; + Else -> Else + end; +extra(Else) -> + Else. + +untag([E | Es]) -> [untag(E) | untag(Es)]; +untag(T) when is_tuple(T) -> list_to_tuple(untag(tuple_to_list(T))); +untag(E) -> + case is_lcid(E) of + true -> get_lcid_line(E); + false -> E + end. + +%% -> [{Qid,[variable()]}]. +%% +%% For each qualifier the introduced variables are found. The +%% variables introduced in filters are very much like the variables +%% introduced in generator patterns. If no variables are introduced in +%% a qualifier, [variable()] is empty. +%% +%% Generator: all variables occurring in the pattern are introduced +%% variables. +%% Filter: all variables bound inside the filter are introduced +%% variables (unless they are unsafe). +%% +intro_variables(FormsNoShadows, State) -> + Fun = fun(QId, {T,_L,P0,_E0}=Q, {GVs,QIds}, Foo) when T =:= b_generate; + T =:= generate -> + PVs = qlc:var_ufold(fun({var,_,V}) -> {QId,V} end, P0), + {Q,{ordsets:to_list(PVs) ++ GVs,[{QId,[]} | QIds]},Foo}; + (QId, Filter0, {GVs,QIds}, Foo) -> + %% The filter F is replaced by begin E, F, E end, + %% where E is an LC expression consisting of a + %% template mentioning all variables occurring in F. + Vs = ordsets:to_list(qlc:vars(Filter0)), + Id = QId#qid.lcid, + LC1 = embed_vars(intro_set_line({QId,f1}, Vs), Id), + LC2 = embed_vars(intro_set_line({QId,f2}, Vs), Id), + AnyLine = -1, + Filter = {block,AnyLine,[LC1,Filter0,LC2]}, + {Filter,{GVs,[{QId,[]} | QIds]},Foo} + end, + Acc0 = {[],[]}, + {FForms,{GenVars,QIds}} = + qual_fold(Fun, Acc0, [], FormsNoShadows, State), + %% Note: the linter messages are the ones we are looking for. + %% If there are no linter messages, the compiler will crash (ignored). + Es0 = compile_errors(FForms), + %% A variable is bound inside the filter if it is not bound before + %% the filter, but it is bound after the filter (obviously). + Before = [{QId,V} || {{QId,f1},erl_lint,{unbound_var,V}} <- Es0], + After = [{QId,V} || {{QId,f2},erl_lint,{unbound_var,V}} <- Es0], + Unsafe = [{QId,V} || {{QId,f2},erl_lint,{unsafe_var,V,_Where}} <- Es0], + ?DEBUG("Before = ~p~n", [Before]), + ?DEBUG("After = ~p~n", [After]), + ?DEBUG("Unsafe = ~p~n", [Unsafe]), + ?DEBUG("Filter ~p~n", [(Before -- After) -- Unsafe]), + IV = (Before -- After) -- Unsafe, + I1 = family(IV ++ GenVars), + sofs:to_external(sofs:family_union(sofs:family(QIds), I1)). + +intro_set_line(Tag, Vars) -> + L = erl_parse:set_line(1, fun(_) -> Tag end), + [{var,L,V} || V <- Vars]. + +compile_errors(FormsNoShadows) -> + case compile_forms(FormsNoShadows, []) of + {[], _Warnings} -> + []; + {Errors, _Warnings} -> + ?DEBUG("got errors ~p~n", [Errors]), + lists:flatmap(fun({_File,Es}) -> Es end, Errors) + end. + +-define(MAX_NUM_OF_LINES, 23). % assume max 1^23 lines (> 8 millions) + +compile_forms(Forms0, Options) -> + Forms = [F || F <- Forms0, element(1, F) =/= eof] ++ + [{eof,1 bsl ?MAX_NUM_OF_LINES}], + try + case compile:noenv_forms(Forms, compile_options(Options)) of + {ok, _ModName, Ws0} -> + {[], Ws0}; + {error, Es0, Ws0} -> + {Es0, Ws0} + end + catch _:_ -> + %% The compiler is not available. Use the linter instead. + case erl_lint:module(Forms, lint_options(Options)) of + {ok, Warnings} -> + {[], Warnings}; + {error, Errors, Warnings} -> + {Errors, Warnings} + end + end. + +compile_options(Options) -> + No = [report,report_errors,report_warnings,'P','E' | bitstr_options()], + [strong_validation,return | skip_options(No, Options)]. + +lint_options(Options) -> + skip_options(bitstr_options(), Options). + +skip_options(Skip, Options) -> + [O || O <- Options, not lists:member(O, Skip)]. + +bitstr_options() -> + [binary_comprehension,bitlevel_binaries]. + +%% In LCs it is possible to use variables introduced in filters and +%% generator patterns in the right hand side of generators (ListExpr), +%% but in QLCs this is not allowed. +%% +%% A brand new function is returned such that there is one expression +%% for each ListExpr. The expression mentions all introduced variables +%% occurring in ListExpr. Running the function through the compiler +%% yields error messages for erroneous use of introduced variables. +%% The messages have the form +%% {{extra,LineNo,File,Var},Module,{unbound_var,V}}, where Var is the +%% original variable name and V is the name invented by no_shadows/2. +%% +used_genvar_check(FormsNoShadows, State) -> + F = fun(QId, {T, Ln, _P, LE}=Q, {QsIVs0, Exprs0}, IVsSoFar0) + when T =:= b_generate; T =:= generate -> + F = fun({var, _, V}=Var) -> + {var, L, OrigVar} = undo_no_shadows(Var), + AF = fun(Line) -> + {extra, Line, get(?QLC_FILE), OrigVar} + end, + L2 = erl_parse:set_line(L, AF), + {var, L2, V} + end, + Vs = [Var || {var, _, V}=Var <- qlc:var_fold(F, [], LE), + lists:member(V, IVsSoFar0)], + Exprs = case Vs of + [] -> Exprs0; + _ -> [embed_vars(Vs, Ln) | Exprs0] + end, + {QsIVs,IVsSoFar} = q_intro_vars(QId, QsIVs0, IVsSoFar0), + {Q, {QsIVs, Exprs}, IVsSoFar}; + (QId, Filter, {QsIVs0, Exprs}, IVsSoFar0) -> + {QsIVs, IVsSoFar} = q_intro_vars(QId, QsIVs0, IVsSoFar0), + {Filter, {QsIVs, Exprs}, IVsSoFar} + end, + IntroVars = intro_variables(FormsNoShadows, State), + Acc0 = {IntroVars, [{atom, 0, true}]}, + {_, {[], Exprs}} = qual_fold(F, Acc0, [], FormsNoShadows, State), + FunctionNames = [Name || {function, _, Name, _, _} <- FormsNoShadows], + UniqueFName = qlc:aux_name(used_genvar, 1, sets:from_list(FunctionNames)), + {function,0,UniqueFName,0,[{clause,0,[],[],lists:reverse(Exprs)}]}. + +q_intro_vars(QId, [{QId, IVs} | QsIVs], IVsSoFar) -> {QsIVs, IVs ++ IVsSoFar}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% The transformed code has two major parts: a fun where each +%% qualifier is represented by one or more clauses, and a table where +%% list expressions (the right hand side of generators, LE) are +%% represented by funs (the table is further processed at runtime). +%% The separation into a fun and a table makes it possible to +%% rearrange qualifiers while keeping the speed offered by compiled +%% code, and to run the LEs before evaluation of the QLC (and possibly +%% modify the LEs should that be necessary). Only when doing a fast +%% join are qualifiers rearranged. +%% +%% Extra generators (and clauses) are inserted for possible fast join +%% operations. The list expression for such a generator has the form +%% {join, Op, QualifierNumber1, QualifierNumber2, PatternFilter1, +%% PatternFilter2, PatternConstants1, PatternConstants2} (it is not a +%% fun). Join generators are ignored at runtime unless a fast join is +%% possible, in which case they replace other generators. See also +%% qlc.erl. +%% +%% For each QLC, every filter is given a state number and every +%% generator two state numbers (one for initialization, one for +%% looping over values). State 1 is reserved for the template and +%% state 0 is entered when there are no more values to try, so +%% assuming no rearrangement of the qualifiers has taken place, the +%% first qualifier is given state number 2. For every state except 0, +%% the table tells which state to go to next. By modifying the table, +%% the order of the qualifiers can be altered at runtime. +%% +%% The syntax of the value Val returned from the fun is: +%% Val = [] | [term() | Val] | fun() -> Val +%% Note: the fun must not return a fun if it is to be called by +%% the function outlined below. +%% +%% An outline of the generated fun: +%% +%% fun(0, RL, ...) when is_list(RL) -> % the final state +%% lists:reverse(RL); % eval, all answers collected in a list +%% (0, ...) -> []; % cursor (or fold) +%% (1, RL, ...) when is_list(RL) -> % the template state +%% Fun(, [Template | RL], ...); +%% (1, ....) -> % return the object and a continuation +%% [Template | fun() -> Fun(, ...)]; +%% (2, ...) -> % an sample generator, initialization state +%% Fun(3, ..., , ...); +%% (3, ..., [Pattern | Val], ...) -> % looping over values (a list) +%% Fun(, ..., Val, ...); % arguments are bound +%% (3, ..., [_ | Val], ...) -> % pattern does not match +%% Fun(3, ..., Val, ...); +%% (3, ..., [], ...) -> +%% Fun(, ...); +%% (3, ...., F, ...) -> % looping over values (using continuations) +%% case F() of % get the next value by calling a continuation +%% [Pattern | Val] -> +%% Fun(..., Val, ...); +%% [_ | Val] -> +%% Fun(3, ..., Val, ...); +%% [] -> +%% Fun(, ...); +%% T -> % returned immediately, typically an error tuple +%% T +%% end; +%% (4, ...) -> % a sample filter +%% case Filter of +%% true -> Fun(, ...); +%% false -> Fun(, ...) +%% end; +%% (5, ...) -> % a filter so simple that it could be used as a guard +%% if +%% Guard -> Fun(, ...); +%% true -> Fun(, ...) +%% end +%% +%% means state 0 if there is no last +%% generator. is the evaluated list expression +%% (evaluated once only). Among the arguments indicated by ellipses +%% are all variables introduced in patterns and filters. +%% +%% transform/2 replaces each QLC (call to qlc:q/1) with a qlc_lc +%% record. The general case is that calling the fun stored in the 'lc' +%% field returns {qlc_v1, QFun, CodeF, Qdata, QOpt} such that: QFun is +%% the above mentioned fun; CodeF is a fun returning the original code +%% for the template, every pattern, and every filter; Qdata is the +%% above mentioned table; QOpt is a property list implemented as a fun +%% of one argument - an atom - which returns information about such +%% things as constant columns, match specifications, &c. +%% There is one special case when calling the fun stored in the 'lc' +%% field returns something else: +%% - If the QLC has the form [Var || Var <- LE] and there are no +%% options to qlc:q/2, a tuple {simple_v1, P, LEf, Line} is returned. +%% The objects returned are the objects returned by the generator +%% (calling LEf returns the objects generated by LE). + +transform(FormsNoShadows, State) -> + IntroVars = intro_variables(FormsNoShadows, State), + AllVars = sets:from_list(ordsets:to_list(qlc:vars(FormsNoShadows))), + ?DEBUG("AllVars = ~p~n", [sets:to_list(AllVars)]), + F1 = fun(QId, {generate,_,P,LE}, Foo, {GoI,SI}) -> + {{QId,GoI,SI,{gen,P,LE}},Foo,{GoI + 3, SI + 2}}; + (QId, F, Foo, {GoI,SI}) -> + {{QId,GoI,SI,{fil,F}},Foo,{GoI + 2,SI + 1}} + end, + TemplS = qlc:template_state(), + GoState = {TemplS + 1, TemplS + 1}, + {ModifiedForms1,_} = + qual_fold(F1, [], GoState, FormsNoShadows, State), + + %% This is for info/2. QLCs in filters and the template are + %% translated before the expression itself is translated. info/2 + %% must not display the result of the translation, but the source + %% code. + {_,Source0} = qual_fold(fun(_QId, {generate,_,_P,_E}=Q, Dict, Foo) -> + {Q,Dict,Foo}; + (QId, F, Dict, Foo) -> + {F,dict:store(QId, F, Dict),Foo} + end, dict:new(), [], FormsNoShadows, State), + {_,Source} = qlc_mapfold(fun(Id, {lc,_L,E,_Qs}=LC, Dict) -> + {LC,dict:store(Id, E, Dict)} + end, Source0, FormsNoShadows, State), + + + %% Unused variables introduced in filters are not optimized away. + F2 = fun(Id, {lc,_L,E,Qs}, {IntroVs0,XWarn0}) -> + LcNo = get_lcid_no(Id), + LcL = get_lcid_line(Id), + [RL,Fun,Go,NGV,S0,RL0,Go0,AT,Err] = + aux_vars(['RL','Fun','Go','C','S0','RL0','Go0','AT','E'], + LcNo, AllVars), + ?DEBUG("RL = ~p, Fun = ~p, Go = ~p~n", [RL, Fun, Go]), + {IntroVs, RestIntroVs} = lists:split(length(Qs), IntroVs0), + IntroVs_Qs = lists:zip(IntroVs, Qs), + F = fun({{QId,IVs}, {QId,GoI,SI,{gen,P,LE}}}, AllIVs0) -> + GV = aux_var('C', LcNo, QId#qid.no, 1, AllVars), + GenIVs = [GV | IVs], + {{QId,{GenIVs,{{gen,P,LE,GV},GoI,SI}}}, + GenIVs ++ AllIVs0}; + ({{QId,IVs}, {QId,GoI,SI,{fil,F}}}, AllIVs0) -> + {{QId,{IVs,{{fil,F},GoI,SI}}}, + IVs++AllIVs0} + end, + {QCs, AllIVs} = lists:mapfoldl(F, [], IntroVs_Qs), + + Dependencies = qualifier_dependencies(Qs, IntroVs), + L = no_compiler_warning(LcL), + {EqColumnConstants, EqualColumnConstants, + ExtraConsts, SizeInfo} = + constants_and_sizes(Qs, E, Dependencies, AllIVs, State), + {JoinInfo, XWarn} = + join_kind(Qs, LcL, AllIVs, Dependencies, State), + %% Not at all sure it is a good idea to try and find + %% failing qualifiers; Dialyzer does it so much better. + %% But there are a few cases where qlc finds more... (r12b). + FWarn = warn_failing_qualifiers(Qs, AllIVs, Dependencies, + State), + JQs = join_quals(JoinInfo, QCs, L, LcNo, ExtraConsts, AllVars), + XQCs = QCs ++ JQs, + Cs0 = clauses(XQCs, RL, Fun, Go, NGV, Err, AllIVs, State), + Template = template(E, RL, Fun, Go, AT, L, AllIVs, State), + Fin = final(RL, AllIVs, L, State), + FunC = {'fun',L,{clauses,Fin ++ Template ++ Cs0}}, + As0 = pack_args(abst_vars([S0, RL0, Fun, Go0 + | replace(AllIVs, AllIVs, nil)], + L), L, State), + AsW = abst_vars([S0, RL0, Go0], L), + FunW = {'fun',L,{clauses,[{clause,L,AsW,[], + [{match,L,{var,L,Fun},FunC}, + {call,L,{var,L,Fun},As0}]}]}}, + {ok, OrigE0} = dict:find(Id, Source), + OrigE = undo_no_shadows(OrigE0), + QCode = qcode(OrigE, XQCs, Source, L), + Qdata = qdata(XQCs, L), + TemplateInfo = + template_columns(Qs, E, AllIVs, Dependencies, State), + %% ExtraConsts should be used by match_spec_quals. + MSQs = match_spec_quals(E, Dependencies, Qs, State), + Opt = opt_info(TemplateInfo, SizeInfo, JoinInfo, MSQs, L, + EqColumnConstants, EqualColumnConstants), + LCTuple = + case qlc_kind(OrigE, Qs) of + qlc -> + {tuple,L,[?A(qlc_v1),FunW,QCode,Qdata,Opt]}; + {simple, PL, LE, V} -> + Init = closure(LE, L), + simple(L, V, Init, PL) + end, + LCFun = {'fun',L,{clauses,[{clause,L,[],[],[LCTuple]}]}}, + {tuple,_,Fs0} = abstr(#qlc_lc{}, L), + Fs = set_field(#qlc_lc.lc, Fs0, LCFun), + {{tuple,L,Fs},{RestIntroVs,FWarn++XWarn++XWarn0}} + end, + {NForms,{[],XW}} = qlc_mapfold(F2, {IntroVars,[]}, ModifiedForms1, State), + display_forms(NForms), + {restore_line_numbers(NForms), State#state{xwarnings = XW}}. + +join_kind(Qs, LcL, AllIVs, Dependencies, State) -> + {EqualCols2, EqualColsN} = equal_columns(Qs, AllIVs, Dependencies, State), + {MatchCols2, MatchColsN} = eq_columns(Qs, AllIVs, Dependencies, State), + Tables = lists:usort + ([T || {C,_Skip} <- EqualCols2, {T,_} <- C] + ++ [T || {C,_Skip} <- EqualCols2, T <- C, is_integer(T)]), + if + EqualColsN =/= []; MatchColsN =/= [] -> + {[], + [{get(?QLC_FILE),[{abs(LcL),?APIMOD,too_complex_join}]}]}; + EqualCols2 =:= [], MatchCols2 =:= [] -> + {[], []}; + length(Tables) > 2 -> + {[], + [{get(?QLC_FILE),[{abs(LcL),?APIMOD,too_many_joins}]}]}; + EqualCols2 =:= MatchCols2 -> + {EqualCols2, []}; + true -> + {{EqualCols2, MatchCols2}, []} + end. + +qlc_kind(OrigE, Qs) -> + {OrigFilterData, OrigGeneratorData} = qual_data(undo_no_shadows(Qs)), + OrigAllFilters = filters_as_one(OrigFilterData), + {_FilterData, GeneratorData} = qual_data(Qs), + case {OrigE, OrigAllFilters, OrigGeneratorData} of + {{var,_,V}, {atom,_,true}, [{_,{gen,{var,PatternL,V},_LE}}]} -> + [{_,{gen,_,LE}}] = GeneratorData, + {simple, PatternL, LE, V}; % V is for info() + _ -> + qlc + end. + +%% Finds filters and patterns that cannot match any values at all. +%% Nothing but the patterns and the filters themselves is analyzed. +%% A much weaker analysis than the one of Dialyzer's. +warn_failing_qualifiers(Qualifiers, AllIVs, Dependencies, State) -> + {FilterData, GeneratorData} = qual_data(Qualifiers), + Anon = 1, + BindFun = fun(_Op, Value) -> is_bindable(Value) end, + {PFrame, _PatternVars} = + pattern_frame(GeneratorData, BindFun, Anon, State), + {_, _, Imported} = + filter_info(FilterData, AllIVs, Dependencies, State), + PFrames = frame2frames(PFrame), + {_, Warnings} = + lists:foldl(fun({_QId,{fil,_Filter}}, {[]=Frames,Warnings}) -> + {Frames,Warnings}; + ({_QId,{fil,Filter}}, {Frames,Warnings}) -> + case filter(set_line(Filter, 0), Frames, BindFun, + State, Imported) of + [] -> + {[], + [{get(?QLC_FILE), + [{abs_loc(element(2, Filter)),?APIMOD, + nomatch_filter}]} | Warnings]}; + Frames1 -> + {Frames1,Warnings} + end; + ({_QId,{gen,Pattern,_}}, {Frames,Warnings}) -> + case pattern(Pattern, Anon, [], BindFun, State) of + {failed, _, _} -> + {Frames, + [{get(?QLC_FILE), + [{abs_loc(element(2, Pattern)),?APIMOD, + nomatch_pattern}]} | Warnings]}; + _ -> + {Frames,Warnings} + end + end, {PFrames,[]}, FilterData++GeneratorData), + Warnings. + +-define(TNO, 0). +-define(TID, #qid{lcid = template, no = ?TNO}). + +opt_info(TemplateInfo, Sizes, JoinInfo, MSQs, L, + EqColumnConstants0, EqualColumnConstants0) -> + SzCls = [{clause,L,[?I(C)],[],[?I(Sz)]} || {C,Sz} <- lists:sort(Sizes)] + ++ [{clause,L,[?V('_')],[],[?A(undefined)]}], + S = [{size, {'fun', L, {clauses, SzCls}}}], + J = case JoinInfo of [] -> []; _ -> [{join, abstr(JoinInfo, L)}] end, + %% Superfluous clauses may be emitted: + TCls0 = lists:append( + [[{clause,L,[abstr(Col, L),EqType],[], + [abstr(TemplCols, L)]} || + {Col,TemplCols} <- TemplateColumns] + || {EqType, TemplateColumns} <- TemplateInfo]), + TCls = lists:sort(TCls0) ++ [{clause,L,[?V('_'),?V('_')],[],[{nil,L}]}], + T = [{template, {'fun', L, {clauses, TCls}}}], + + %% The template may also have a constant function (IdNo = 0). + %% Only constant template columns are interesting. + EqColumnConstants = opt_column_constants(EqColumnConstants0), + CCs = opt_constants(L, EqColumnConstants), + EqC = {constants,{'fun',L,{clauses,CCs}}}, + + EqualColumnConstants = opt_column_constants(EqualColumnConstants0), + ECCs = opt_constants(L, EqualColumnConstants), + EqualC = {equal_constants,{'fun',L,{clauses,ECCs}}}, + C = [EqC | [EqualC || true <- [CCs =/= ECCs]]], + + %% Comparisons yield more constant columns than matchings. + ConstCols = [{IdNo,Col} || + {{IdNo,Col},[_],_FilNs} <- EqualColumnConstants], + ConstColsFamily = family_list(ConstCols), + NSortedCols0 = [{IdNo,hd(lists:seq(1, length(Cols)+1)--Cols)} || + {IdNo,Cols} <- ConstColsFamily], + NCls = [{clause,L,[?I(IdNo)],[],[?I(N-1)]} || + {IdNo,N} <- NSortedCols0, N > 0] + ++ [{clause,L,[?V('_')],[],[?I(0)]}], + N = [{n_leading_constant_columns,{'fun',L,{clauses,NCls}}}], + + ConstCls = [{clause,L,[?I(IdNo)],[],[abstr(Cols,L)]} || + {IdNo,Cols} <- ConstColsFamily] + ++ [{clause,L,[?V('_')],[],[{nil,L}]}], + CC = [{constant_columns,{'fun',L,{clauses,ConstCls}}}], + + MSCls = [{clause,L,[?I(G)],[],[{tuple,L,[MS,abstr(Fs,L)]}]} || + {G,MS,Fs} <- MSQs] + ++ [{clause,L,[?V('_')],[],[?A(undefined)]}], + MS = [{match_specs, {'fun',L,{clauses,MSCls}}}], + + Cls = [{clause,L,[?A(Tag)],[],[V]} || + {Tag,V} <- lists:append([J, S, T, C, N, CC, MS])] + ++ [{clause,L,[?V('_')],[],[?A(undefined)]}], + {'fun', L, {clauses, Cls}}. + +opt_column_constants(ColumnConstants0) -> + [CC || {{IdNo,_Col},Const,_FilNs}=CC <- ColumnConstants0, + (IdNo =/= ?TNO) or (length(Const) =:= 1)]. + +opt_constants(L, ColumnConstants) -> + Ns = lists:usort([IdNo || {{IdNo,_Col},_Const,_FilNs} <- ColumnConstants]), + [{clause,L,[?I(IdNo)],[],[column_fun(ColumnConstants, IdNo, L)]} + || IdNo <- Ns] + ++ [{clause,L,[?V('_')],[],[?A(no_column_fun)]}]. + +abstr(Term, Line) -> + erl_parse:abstract(Term, Line). + +%% Extra generators are introduced for join. +join_quals(JoinInfo, QCs, L, LcNo, ExtraConstants, AllVars) -> + {LastGoI, LastSI} = + lists:foldl(fun({_QId,{_QIVs,{{fil,_},GoI,SI}}}, + {GoI0, _SI0}) when GoI >= GoI0 -> + {GoI + 2, SI + 1}; + ({_QId,{_QIVs,{{gen,_,_,_},GoI,SI}}}, + {GoI0, _SI0}) when GoI >= GoI0 -> + {GoI + 3, SI + 2}; + (_, A) -> + A + end, {0, 0}, QCs), + LastQId = lists:max([QId || {QId,{_QIVs,{_Q,_GoI,_SI}}} <- QCs]), + %% Only two tables for the time being. + %% The join generator re-uses the generator variable assigned to + %% the first of the two joined generators. Its introduced variables + %% are the variables introduced by any of the two joined generators. + %% Its abstract code is a pair of the joined generators' patterns. + QNums = case JoinInfo of + {EqualCols, MatchCols} -> + EQs = join_qnums(EqualCols), + MQs = join_qnums(MatchCols), + [{Q1,Q2,'=:='} || {Q1,Q2} <- MQs] ++ + [{Q1,Q2,'=='} || {Q1,Q2} <- EQs -- MQs]; + EqualCols -> + [{Q1,Q2,'=='} || {Q1,Q2} <- join_qnums(EqualCols)] + end, + LD = [begin + [{QId1,P1,GV1,QIVs1}] = + [{QId,P,GV,QIVs} || + {QId,{QIVs,{{gen,P,_,GV},_GoI,_SI}}} <- QCs, + QId#qid.no =:= Q1], + [{QId2,P2,QIVs2}] = + [{QId,P,QIVs--[GV]} || + {QId,{QIVs,{{gen,P,_,GV},_,_}}} <- QCs, + QId#qid.no =:= Q2], + {QId1,Op,P1,GV1,QIVs1++QIVs2,QId2,P2} + end || {Q1, Q2, Op} <- lists:usort(QNums)], + Aux = abst_vars(aux_vars(['F','H','O','C'], LcNo, AllVars), L), + F = fun({QId1,Op,P1,GV1,QIVs,QId2,P2}, {QId,GoI,SI}) -> + AP1 = anon_pattern(P1), + AP2 = anon_pattern(P2), + Cs1 = join_handle_constants(QId1, ExtraConstants), + Cs2 = join_handle_constants(QId2, ExtraConstants), + H1 = join_handle(AP1, L, Aux, Cs1), + H2 = join_handle(AP2, L, Aux, Cs2), + %% Op is not used. + Join = {join,Op,QId1#qid.no,QId2#qid.no,H1,H2,Cs1,Cs2}, + G = {NQId=QId#qid{no = QId#qid.no + 1}, + {QIVs,{{gen,{cons,L,P1,P2},Join,GV1},GoI,SI}}}, + A = {NQId, GoI + 3, SI + 2}, + {G, A} + end, + {Qs, _} = lists:mapfoldl(F, {LastQId, LastGoI, LastSI}, LD), + Qs. + +join_qnums(Cols) -> + lists:usort([{Q1, Q2} || {[{Q1,_C1}, {Q2,_C2}], _Skip} <- Cols]). + +%% Variables occurring only once are replaced by '_'. +anon_pattern(P) -> + MoreThanOnce = lists:usort(occ_vars(P) -- qlc:vars(P)), + {AP, foo} = var_mapfold(fun({var, L, V}, A) -> + case lists:member(V, MoreThanOnce) of + true -> + {{var, L, V}, A}; + false -> + {{var, L, '_'}, A} + end + end, foo, P), + AP. + +%% Creates a handle that filters the operands of merge join using the +%% pattern. It is important that objects that do not pass the pattern +%% are filtered out because the columns of the pattern are inspected +%% in order to determine if key-sorting the operands can be avoided. +%% +%% No objects will be filtered out if the pattern is just a variable. +join_handle(AP, L, [F, H, O, C], Constants) -> + case {AP, Constants} of + {{var, _, _}, []} -> + {'fun',L,{clauses,[{clause,L,[H],[],[H]}]}}; + _ -> + G0 = [begin + Call = {call,0,{atom,0,element},[{integer,0,Col},O]}, + list2op([{op,0,Op,Con,Call} || {Con,Op} <- Cs], 'or') + end || {Col,Cs} <- Constants], + G = if G0 =:= [] -> G0; true -> [G0] end, + CC1 = {clause,L,[AP],G,[{cons,L,O,closure({call,L,F,[F,C]},L)}]}, + CC2 = {clause,L,[?V('_')],[],[{call,L,F,[F,C]}]}, + Case = {'case',L,O,[CC1,CC2]}, + Cls = [{clause,L,[?V('_'),{nil,L}],[],[{nil,L}]}, + {clause,L,[F,{cons,L,O,C}],[],[Case]}, + {clause,L,[F,C],[[{call,L,?A(is_function),[C]}]], + [{call,L,F,[F,{call,L,C,[]}]}]}, + {clause,L,[?V('_'),C],[],[C]}], + Fun = {'fun', L, {clauses, Cls}}, + {'fun',L,{clauses,[{clause,L,[H],[],[{match,L,F,Fun}, + closure({call,L,F,[F,H]}, + L)]}]}} + end. + +join_handle_constants(QId, ExtraConstants) -> + IdNo = QId#qid.no, + case lists:keysearch(IdNo, 1, ExtraConstants) of + {value, {IdNo, ConstOps}} -> + ConstOps; + false -> + [] + end. + +%%% By the term "imported variable" is meant a variable that is bound +%%% outside (before) the QLC. Perhaps "parameter" would be a more +%%% suitable name. + +%% The column fun is to be used when there is a known key column or +%% indices. The argument is a column number and the return value is a +%% list of the values to look up to get all objects needed to evaluate +%% the filter. The order of the objects need not be the same as the +%% order the traverse fun would return them. + +column_fun(Columns, QualifierNumber, LcL) -> + ColCls0 = + [begin + true = Vs0 =/= [], % at least one value to look up + Vs1 = list2cons(Vs0), + Fils1 = {tuple,0,[{atom,0,FTag}, + lists:foldr + (fun(F, A) -> {cons,0,{integer,0,F},A} + end, {nil,0}, Fils)]}, + Tag = case ordsets:to_list(qlc:vars(Vs1)) of + Imp when length(Imp) > 0, % imported vars + length(Vs0) > 1 -> + usort_needed; + _ -> + values + end, + Vs = {tuple,0,[{atom,0,Tag},Vs1,Fils1]}, + {clause,0,[erl_parse:abstract(Col)],[],[Vs]} + end || + {{CIdNo,Col}, Vs0, {FTag,Fils}} <- Columns, + CIdNo =:= QualifierNumber] + ++ [{clause,0,[{var,0,'_'}],[],[{atom,0,false}]}], + ColCls = set_line(ColCls0, LcL), + {'fun', LcL, {clauses, ColCls}}. + +%% Tries to find columns of the template that (1) are equal to (or +%% match) or (2) match columns of the patterns of the generators. The +%% results are to be used only for determining which columns are +%% sorted. The template can be handled very much like a generator +%% pattern (the variables are not fresh, though). As in filters calls +%% like element(I, T) are recognized. +%% -> [{EqType,Equal | Match}] +%% Equal = Match = TemplateColumns +%% EqType = abstract code for {_ | '==' | '=:='} +%% TemplateColumns = [{Column,Integers}] % integer is position in template +%% Column = {QualifierNumber,ColumnNumber}} % column is position in pattern + +template_columns(Qs0, E0, AllIVs, Dependencies, State) -> + E = expand_expr_records(pre_expand(E0), State), + TemplateAsPattern = template_as_pattern(E), + Qs = [TemplateAsPattern | Qs0], + EqualColumns = equal_columns2(Qs, AllIVs, Dependencies, State), + MatchColumns = eq_columns2(Qs, AllIVs, Dependencies, State), + Equal = template_cols(EqualColumns), + Match = template_cols(MatchColumns), + L = 0, + if + Match =:= Equal -> + [{?V('_'), Match}]; + true -> + [{?A('=='), Equal}, {?A('=:='), Match}] + end. + +equal_columns2(Qualifiers, AllIVs, Dependencies, State) -> + {JI, _Skip} = + join_info(Qualifiers, AllIVs, Dependencies, State,_JoinOp = '=='), + JI. + +eq_columns2(Qualifiers, AllIVs, Dependencies, State) -> + {JI, _SKip} = + join_info(Qualifiers, AllIVs, Dependencies, State, _JoinOp = '=:='), + JI. + +template_cols(ColumnClasses) -> + lists:sort([{{IdNo,Col}, lists:usort(Cs)} || + Class <- ColumnClasses, + {IdNo,Col} <- Class, + IdNo =/= ?TNO, + [] =/= (Cs = [C || {?TNO,C} <- Class])]). + +template_as_pattern(E) -> + P = simple_template(E), + {?TID,foo,foo,{gen,P,{nil,0}}}. + +simple_template({call,L,{remote,_,{atom,_,erlang},{atom,_,element}}=Call, + [{integer,_,I}=A1,A2]}) when I > 0 -> + %% This kludge is known by pattern/5 below. + {call, L, Call, [A1, simple_template(A2)]}; +simple_template({var, _, _}=E) -> + E; +simple_template({tuple, L, Es}) -> + {tuple, L, [simple_template(E) || E <- Es]}; +simple_template({cons, L, H, T}) -> + {cons, L, simple_template(H), simple_template(T)}; +simple_template(E) -> + case catch erl_parse:normalise(E) of + {'EXIT', _} -> unique_var(); + _ -> E + end. + +%% -> [{QId,[QId']}]. +%% Qualifier QId (a filter) uses variables introduced in QId'. +qualifier_dependencies(Qualifiers, IntroVs) -> + Intro = sofs:relation([{IV,QId} || {QId,IVs} <- IntroVs, IV <- IVs]), + {FilterData, _} = qual_data(Qualifiers), + Used = sofs:relation([{QId,UV} || + {QId,{fil,F}} <- FilterData, + UV <- qlc:vars(F)]), + Depend = sofs:strict_relation(sofs:relative_product(Used, Intro)), + G = sofs:family_to_digraph(sofs:relation_to_family(Depend)), + Dep0 = [{V,digraph_utils:reachable_neighbours([V], G)} || + V <- digraph:vertices(G)], + true = digraph:delete(G), + FilterIds = sofs:set(filter_ids(Qualifiers)), + Dep1 = sofs:restriction(sofs:family(Dep0), FilterIds), + NoDep = sofs:constant_function(FilterIds, sofs:empty_set()), + sofs:to_external(sofs:family_union(Dep1, NoDep)). + +filter_ids(Qualifiers) -> + {FilterData, _} = qual_data(Qualifiers), + [QId || {QId,_} <- FilterData]. + +%% -> [{QualifierNumber,MatchSpec,[QualifierNumber']} +%% The qualifiers [QualifierNumber'] are filters (F1, ..., Fn) that +%% depend on QualifierNumber (a generator Pattern <- LE) only. +%% MatchSpec is the match specification for [Pattern' || Pattern <- LE, +%% F1, ..., Fn], where Pattern' is Template if all qualifiers can be +%% replaced by one match specification, otherwise a modified Pattern. +match_spec_quals(Template, Dependencies, Qualifiers, State) -> + {FilterData, GeneratorData} = qual_data(Qualifiers), + NoFilterGIds = [GId || {GId,_} <- GeneratorData] + -- lists:flatmap(fun({_,GIds}) -> GIds end, Dependencies), + Filters = filter_list(FilterData, Dependencies, State), + Candidates = [{QId2#qid.no,Pattern,[Filter],F} || + {QId,[QId2]} <- Dependencies, + {GQId,{gen,Pattern,_}} <- GeneratorData, + GQId =:= QId2, + {FQId,{fil,F}}=Filter <- Filters, % guard filters only + FQId =:= QId] + ++ [{GId#qid.no,Pattern,[],{atom,0,true}} || + {GId,{gen,Pattern,_}} <- GeneratorData, + lists:member(GId, NoFilterGIds)], + E = {nil, 0}, + GF = [{{GNum,Pattern},Filter} || + {GNum,Pattern,Filter,F} <- Candidates, + no =/= try_ms(E, Pattern, F, State)], + GFF = sofs:relation_to_family(sofs:relation(GF, + [{gnum_pattern,[filter]}])), + GFFL = sofs:to_external(sofs:family_union(GFF)), + try + [{{GNum,Pattern}, GFilterData}] = GFFL, + true = length(GFilterData) =:= length(FilterData), + [_] = GeneratorData, + AbstrMS = gen_ms(Template, Pattern, GFilterData, State), + %% There is one generator and every filter uses some of the + %% variables introduced by the generator. The whole qlc + %% expressione can be replaced by a match specification. + [{GNum, AbstrMS, all}] + catch _:_ -> + {TemplVar, _} = anon_var({var,0,'_'}, 0), + [one_gen_match_spec(GNum, Pattern, GFilterData, State, TemplVar) || + {{GNum,Pattern},GFilterData} <- GFFL] + end. + +one_gen_match_spec(GNum, Pattern0, GFilterData, State, TemplVar) -> + {E, Pattern} = pattern_as_template(Pattern0, TemplVar), + AbstrMS = gen_ms(E, Pattern, GFilterData, State), + {GNum, AbstrMS, [FId#qid.no || {FId,_} <- GFilterData]}. + +gen_ms(E, Pattern, GFilterData, State) -> + {ok, MS, AMS} = try_ms(E, Pattern, filters_as_one(GFilterData), State), + case MS of + [{'$1',[true],['$1']}] -> + {atom, 0, no_match_spec}; + _ -> + AMS + end. + +%% -> {Template, Pattern'} +%% The pattern is accepted by ets:fun2ms/1, that is, =/2 can only +%% occur at top level. Introduce or reuse a top-level variable as +%% template +pattern_as_template({var,_,'_'}, TemplVar) -> + {TemplVar, TemplVar}; +pattern_as_template({var,_,_}=V, _TemplVar) -> + {V, V}; +pattern_as_template({match,L,E,{var,_,'_'}}, TemplVar) -> + {TemplVar, {match,L,E,TemplVar}}; +pattern_as_template({match,L,{var,_,'_'},E}, TemplVar) -> + {TemplVar, {match,L,E,TemplVar}}; +pattern_as_template({match,_,_E,{var,_,_}=V}=P, _TemplVar) -> + {V, P}; +pattern_as_template({match,_,{var,_,_}=V,_E}=P, _TemplVar) -> + {V, P}; +pattern_as_template(E, TemplVar) -> + L = 0, + {TemplVar, {match, L, E, TemplVar}}. + +%% Tries to find columns which are compared or matched against +%% constant values or other columns. To that end unification is used. +%% A frame is a list of bindings created by unification. +%% Also tries to find the number of columns of patterns. +%% Note that the template is handled more or less as a pattern. +%% -> {ColumnConstants, SizeInfo, ExtraConstants} +%% ColumnConstants = [{Column,[Constant],[FilterNo]}] +%% SizeInfo = [{QualifierNumber,NumberOfColumns}] +%% Column = {QualifierNumber,ColumnNumber}} +%% FilterNo is a filter that can be skipped at runtime provided constants +%% are looked up. +%% ExtraConstants = +%% [{GeneratorNumber,[{ColumnNumber, +%% [{AbstractConstant,AbstractOperator}]}]}] +%% For every generator such that the unification binds value(s) to +%% some column(s), extra constants are returned. These constants are +%% the results of the unification, and do not occur in the pattern of +%% the generator. +constants_and_sizes(Qualifiers0, E, Dependencies, AllIVs, State) -> + TemplateAsPattern = template_as_pattern(E), + Qualifiers = [TemplateAsPattern | Qualifiers0], + {FilterData, GeneratorData} = qual_data(Qualifiers), + {Filter, Anon1, Imported} = + filter_info(FilterData, AllIVs, Dependencies, State), + PatBindFun = fun(_Op, Value) -> is_bindable(Value) end, + {PatternFrame, PatternVars} = + pattern_frame(GeneratorData, PatBindFun, Anon1, State), + PatternFrames = frame2frames(PatternFrame), + FilterFun = + fun(BindFun) -> + filter(Filter, PatternFrames, BindFun, State, Imported) + end, + SzFs = FilterFun(PatBindFun), + + SizeInfo = pattern_sizes(PatternVars, SzFs), + SelectorFun = const_selector(Imported), + PatternConstants = + lists:flatten(frames_to_columns(PatternFrames, PatternVars, + deref_pattern(Imported), + SelectorFun, Imported, + '=:=')), + + {EqColumnConstants, _EqExtraConsts} = + constants(FilterFun, PatternVars, PatternConstants, PatternFrame, + FilterData, Dependencies, _LookupOp1 = '=:=', + Imported, State), + {EqualColumnConstants, EqualExtraConsts} = + constants(FilterFun, PatternVars, PatternConstants, PatternFrame, + FilterData, Dependencies, _LookupOp2 = '==', + Imported, State), + + %% Use compared extra constants only because: + %% - merge join compares terms; + %% - the constants from the matching unification is a subset of the + %% constants from the comparing unification. + %% Using constants from the matching unification would make it + %% possible to skip some (more) objects when joining. + ExtraCon1 = + [{{GId,Col},{Val,Op}} || + {Consts,Op} <- [{EqualExtraConsts,'=='}], + {{GId,Col},Val} <- Consts], + ExtraConstants = + family_list([{GId, {Col,ValOps}} || + {{GId,Col},ValOps} <- family_list(ExtraCon1)]), + {EqColumnConstants, EqualColumnConstants, ExtraConstants, SizeInfo}. + +constants(FilterFun, PatternVars, PatternConstants, PatternFrame, + FilterData, Dependencies, LookupOp, Imported, State) -> + BindFun = fun(_Op, Value) -> is_bindable(Value) end, + Fs = FilterFun(BindFun), + SelectorFun = const_selector(Imported), + ColumnConstants0 = frames_to_columns(Fs, PatternVars, + deref_lookup(Imported, LookupOp), + SelectorFun, Imported, LookupOp), + ColumnConstants1 = lists:flatten(ColumnConstants0), + ExtraConstants = + [{{GId,Col},Val} || + {{GId,Col},Vals} <- ColumnConstants1 -- PatternConstants, + GId =/= ?TNO, + Val <- Vals], + ColumnConstants = lu_skip(ColumnConstants1, FilterData, PatternFrame, + PatternVars, Dependencies, State, + Imported, LookupOp), + {ColumnConstants, ExtraConstants}. + +%%% ** Comparing Terms ** +%%% When comparing the key against a term where some integer (or float +%%% comparing equal to an integer) occurs, one has to be careful if the +%%% table matches keys. One way would be to look up the term both with +%%% the integer and with the float comparing equal to the integer--then +%%% all objects that could possibly be answers are filtered (with +%%% reasonable assumptions). But if integers occur several times in the +%%% term all combinations have to be looked up, and that could be just +%%% too many. +%%% If imported variables occur in the term one could assume at compile +%%% time that they are not integers and check that assumption at +%%% runtime. However, this would probably be bad design since some keys +%%% can be looked up, but others cannot. +%%% However, the current implementation is simple: do not bind a +%%% variable to a term if imported variables or integers occur in the +%%% term. + +deref_lookup(Imported, '==') -> + %% Comparing table. Every value can be looked up. + fun(PV, F) -> deref_values(PV, F, Imported) end; +deref_lookup(Imported, '=:=') -> + %% Matching table. Ignore comparisons unless the value is free of + %% integers. See also Comparing Terms. + BFun = fun(DV, Op) -> + Op =:= '=:=' orelse free_of_integers(DV, Imported) + end, + fun(PV, F) -> deref_values(PV, F, BFun, Imported) end. + +%% Augment ColConstants with filters that do not need to be run +%% provided that constants are looked up. +%% Does not find all filters that can be removed. +lu_skip(ColConstants, FilterData, PatternFrame, PatternVars, + Dependencies, State, Imported, LookupOp) -> + %% If there is a test that does not compare or match, then the + %% filter cannot be skipped. + FailSelector = fun(_Frame) -> fun(Value) -> {yes, Value} end end, + %% In runtime, constants are looked up and matched against a pattern + %% (the pattern acts like a filter), then the filters are run. + PatternFrames = frame2frames(PatternFrame), + PatternColumns = + lists:flatten(frames_to_columns(PatternFrames, PatternVars, + deref_pattern(Imported), FailSelector, + Imported, LookupOp)), + + %% Note: ColFil can contain filters for columns that cannot be + %% looked up. Such (possibly bogus) elements are however not used. + %% Note: one filter at a time is tested; only the pattern is + %% assumed to have been run when the filter is run. Sometimes it + %% would be advantageously to assume some filter(s) occurring + %% before the filter had been run as well + %% (an example: {{X,Y}} <- LE, X =:= 1, Y =:= a). + BindFun = fun(_Op, Value) -> is_bindable(Value) end, + ColFil = [{Column, FId#qid.no} || + {FId,{fil,Fil}} <- + filter_list(FilterData, Dependencies, State), + [] =/= (SFs = safe_filter(set_line(Fil, 0), PatternFrames, + BindFun, State, Imported)), + {GId,PV} <- PatternVars, + [] =/= + (Cols = hd(frames_to_columns(SFs, [{GId, PV}], + deref_lu_skip(LookupOp, + Imported), + const_selector(Imported), + Imported, LookupOp))), + %% The filter must not test more than one column (unless the + %% pattern has already done the test): + %% Note: if the pattern and the filter test the same + %% column, the filter will not be skipped. + %% (an example: {X=1} <- ..., X =:= 1). + length(D = Cols -- PatternColumns) =:= 1, + Frame <- SFs, + begin + %% The column is compared/matched against a constant. + %% If there are no more comparisons/matches then + %% the filter can be replaced by the lookup of + %% the constant. + [{{_,Col} = Column, Constants}] = D, + {VarI, FrameI} = unify_column(Frame, PV, Col, BindFun, + Imported), + VarValues = deref_skip(VarI, FrameI, LookupOp, Imported), + + {NV, F1} = unify_column(PatternFrame, PV, Col, BindFun, + Imported), + F2 = unify_var_bindings(VarValues, '=:=', NV, F1, + BindFun, Imported, false), + %% F2: the pattern has been matched and the + %% constant has been looked up. If Frame has no + %% more bindings than F2 (modulo unique + %% variables), then the filter can be skipped. + %% + %% Under rare circumstances (for instance: + %% "X =:= 1, X =:= U", U imported; only 1 is looked up), + %% not all constants mentioned in a filter are looked up. + %% The filter can only be skipped if all constants + %% are looked up. + LookedUpConstants = + case lists:keysearch(Column, 1, ColConstants) of + false -> []; + {value, {Column,LUCs}} -> LUCs + end, + %% Don't try to handle filters that compare several + %% values equal. See also frames_to_columns(). + length(VarValues) =< 1 andalso + (Constants -- LookedUpConstants =:= []) andalso + bindings_is_subset(Frame, F2, Imported) + end], + ColFils = family_list(ColFil), + %% The skip tag 'all' means that all filters are covered by the lookup. + %% It does not imply that there is only one generator as is the case + %% for match specifications (see match_spec_quals above). + [{Col, Constants, skip_tag(Col, ColFils, FilterData)} || + {Col,Constants} <- ColConstants]. + +deref_skip(E, F, _LookupOp, Imported) -> + deref(E, F, Imported). + +deref_lu_skip('==', Imported) -> + %% Comparing table. Cannot skip filters that match integers. + BFun = fun(DV, Op) -> + Op =:= '==' orelse free_of_integers(DV, Imported) + end, + fun(PV, F) -> deref_values(PV, F, BFun, Imported) end; +deref_lu_skip('=:=', Imported) -> + %% Matching table. Skip filters regardless of operator. + fun(PV, F) -> deref_values(PV, F, Imported) end. + +equal_columns(Qualifiers, AllIVs, Dependencies, State) -> + {Cs, Skip} = + join_info(Qualifiers, AllIVs, Dependencies, State, _JoinOp = '=='), + join_gens(Cs, Qualifiers, Skip). + +eq_columns(Qualifiers, AllIVs, Dependencies, State) -> + {Cs, Skip} = + join_info(Qualifiers, AllIVs, Dependencies, State, _JoinOp = '=:='), + join_gens(Cs, Qualifiers, Skip). + +%% -> {TwoGens, ManyGens} +join_gens(Cs0, Qs, Skip) -> + Cs = [family_list(C) || C <- Cs0], + {FD, _GeneratorData} = qual_data(Qs), + {join_gens2(lists:filter(fun(C) -> length(C) =:= 2 end, Cs), FD, Skip), + join_gens2(lists:filter(fun(C) -> length(C) > 2 end, Cs), FD, Skip)}. + +join_gens2(Cs0, FilterData, Skip) -> + [{J, skip_tag(case lists:keysearch(J, 1, Skip) of + {value, {J,FilL}} -> + FilL; + false -> + [] + end, FilterData)} || + J <- lists:append([qlc:all_selections(C) || C <- Cs0])]. + +skip_tag(FilList, FilterData) -> + {if + length(FilterData) =:= length(FilList) -> + all; + true -> + some + end, FilList}. + +skip_tag(Col, ColFils, FilterData) -> + case lists:keysearch(Col, 1, ColFils) of + {value, {Col, FilL}} -> + Tag = if + length(FilterData) =:= length(FilL) -> + all; + true -> + some + end, + {Tag, FilL}; + false -> + {some,[]} + end. + +%% Tries to find columns (possibly in the same table) that are equal. +%% If LookupOp is '=:=' then "equal" means that the columns are matched; +%% if LookupOp is '==' then "equal" means that the columns are matched or +%% compared. +%% -> [[{QualifierNumber,ColumnNumber}]] % Eq.classes. +join_info(Qualifiers, AllIVs, Dependencies, State, JoinOp) -> + {FilterData, GeneratorData} = qual_data(Qualifiers), + {Filter, Anon1, Imported} = + filter_info(FilterData, AllIVs, Dependencies, State), + BindFun = fun(_Op, V) -> bind_no_const(V, Imported) end, + {PatternFrame, PatternVars} = + pattern_frame(GeneratorData, BindFun, Anon1, State), + PatternFrames = frame2frames(PatternFrame), + Fs = filter(Filter, PatternFrames, BindFun, State, Imported), + SelectorFun = no_const_selector(Imported), + Cols = frames_to_columns(Fs, PatternVars, + fun(PV1, F) -> deref_join(PV1, F, JoinOp) end, + SelectorFun, Imported, '=:='), + JC = join_classes(Cols), + Skip = join_skip(JC, FilterData, PatternFrame, + PatternVars, Dependencies, State, Imported, JoinOp), + {JC, Skip}. + +deref_join(E, Frame, '==') -> + deref_values(E, Frame, _Imp = []); +deref_join(E, Frame, '=:=') -> + %% Matching table. It is possible that some objects read from the + %% other table (the one with the objects to look up) contain + %% integers. By making all variables imported it is ensured that + %% comparisons are kept. See also Comparing Terms. + deref_values(E, Frame, fun(_DV, Op) -> Op =:= '=:=' end, all). + +join_classes(Cols0) -> + ColVar = sofs:relation(lists:append(Cols0)), + Cols = sofs:partition(2, ColVar), + [[C || {C,_} <- Cs] || Cs <- sofs:to_external(Cols), length(Cs) > 1]. + +join_skip(JoinClasses, FilterData, PatternFrame, PatternVars, Dependencies, + State, Imported, JoinOp) -> + PatternFrames = frame2frames(PatternFrame), + ColFil = [{JoinClass,FId#qid.no} || + [{Q1,C1}, {Q2,C2}]=JoinClass <- JoinClasses, + {GId1, PV1} <- PatternVars, + GId1#qid.no =:= Q1, + {GId2, PV2} <- PatternVars, + GId2#qid.no =:= Q2, + + %% Select a filter that depends on the two generators: + {FId,{fil,Fil}} <- + filter_list(FilterData, Dependencies, State), + {value,{_,GIds}} <- + [lists:keysearch(FId, 1, Dependencies)], + GIds =:= lists:sort([GId1,GId2]), + + begin + %% Do what the join does: + %% element(C1, G1) JoinOp element(C2, G2). + %% As for lu_skip: sometimes it would be + %% advantageously to assume some filter(s) + %% occurring before the join filter had been run + %% as well. + BindFun = fun(_Op, V) -> is_bindable(V) end, + {V1, JF1} = + unify_column(PatternFrame, PV1, C1, BindFun, Imported), + {V2, JF2} = + unify_column(JF1, PV2, C2, BindFun, Imported), + JF = unify(JoinOp, V1, V2, JF2, BindFun, Imported), + + %% "Run" the filter: + SFs = safe_filter(set_line(Fil, 0), PatternFrames, + BindFun, State, Imported), + JImp = qlc:vars([SFs, JF]), % kludge + lists:all(fun(Frame) -> + bindings_is_subset(Frame, JF, JImp) + end, SFs) andalso SFs =/= [] + end], + family_list(ColFil). + +filter_info(FilterData, AllIVs, Dependencies, State) -> + FilterList = filter_list(FilterData, Dependencies, State), + Filter0 = set_line(filters_as_one(FilterList), 0), + Anon0 = 0, + {Filter, Anon1} = anon_var(Filter0, Anon0), + Imported = ordsets:subtract(qlc:vars(Filter), % anonymous too + ordsets:from_list(AllIVs)), + {Filter, Anon1, Imported}. + +%% Selects the guard filters. Other filters than guard filters are +%% ignored when trying to find constants and join columns. Note: there +%% must not occur any non-guard filter between a guard filter and the +%% generator(s) the guard filter depends on. The reason is that such a +%% filter could fail for some object(s) excluded by lookup or join. If +%% the failing filter is placed _after_ the guard filter, the failing +%% objects have already been filtered out by the guard filter. +%% Note: guard filters using variables from one generator are allowed +%% to be placed after further generators (the docs states otherwise, but +%% this seems to be common practice). +filter_list(FilterData, Dependencies, State) -> + RDs = State#state.records, + sel_gf(FilterData, 1, Dependencies, RDs, [], []). + +sel_gf([], _N, _Deps, _RDs, _Gens, _Gens1) -> + []; +sel_gf([{#qid{no = N}=Id,{fil,F}}=Fil | FData], N, Deps, RDs, Gens, Gens1) -> + case erl_lint:is_guard_test(F, RDs) of + true -> + {value, {Id,GIds}} = lists:keysearch(Id, 1, Deps), + case length(GIds) =< 1 of + true -> + case generators_in_scope(GIds, Gens1) of + true -> + [Fil|sel_gf(FData, N+1, Deps, RDs, Gens, Gens1)]; + false -> + sel_gf(FData, N + 1, Deps, RDs, [], []) + end; + false -> + case generators_in_scope(GIds, Gens) of + true -> + [Fil | sel_gf(FData, N + 1, Deps, RDs, Gens, [])]; + false -> + sel_gf(FData, N + 1, Deps, RDs, [], []) + end + end; + false -> + sel_gf(FData, N + 1, Deps, RDs, [], []) + end; +sel_gf(FData, N, Deps, RDs, Gens, Gens1) -> + sel_gf(FData, N + 1, Deps, RDs, [N | Gens], [N | Gens1]). + +generators_in_scope(GenIds, GenNumbers) -> + lists:all(fun(#qid{no=N}) -> lists:member(N, GenNumbers) end, GenIds). + +pattern_frame(GeneratorData, BindFun, Anon1, State) -> + Frame0 = [], + {PatternFrame, _Anon2, PatternVars} = + lists:foldl(fun({QId,{gen,Pattern,_}}, {F0,An0,PVs}) -> + {F1, An1, PV} = + pattern(Pattern, An0, F0, BindFun, State), + {F1, An1, [{QId,PV} | PVs]} + end, {Frame0, Anon1, []}, GeneratorData), + {PatternFrame, PatternVars}. + +const_selector(Imported) -> + selector(Imported, fun is_const/2). + +no_const_selector(Imported) -> + selector(Imported, fun(V, I) -> not is_const(V, I) end). + +selector(Imported, TestFun) -> + fun(_Frame) -> + fun(Value) -> + case TestFun(Value, Imported) of + true -> + {yes, Value}; + false -> + no + end + end + end. + +bind_no_const(Value, Imported) -> + case is_const(Value, Imported) of + true -> + false; + false -> + is_bindable(Value) + end. + +%% Tuple tails are variables, never constants. +is_const(Value, Imported) -> + %% is_bindable() has checked that E is normalisable. + [] =:= ordsets:to_list(ordsets:subtract(qlc:vars(Value), Imported)). + +is_bindable(Value) -> + case normalise(Value) of + {ok, _C} -> + true; + not_ok -> + false + end. + +pattern(P0, AnonI, Frame0, BindFun, State) -> + P1 = try + expand_pattern_records(P0, State) + catch _:_ -> P0 % template, records already expanded + end, + %% Makes test for equality simple: + P2 = set_line(P1, 0), + {P3, AnonN} = anon_var(P2, AnonI), + {P4, F1} = match_in_pattern(tuple2cons(P3), Frame0, BindFun), + {P, F2} = element_calls(P4, F1, BindFun, _Imp=[]), % kludge for templates + {var, _, PatternVar} = UniqueVar = unique_var(), + F = unify('=:=', UniqueVar, P, F2, BindFun, _Imported = []), + {F, AnonN, PatternVar}. + +frame2frames(failed) -> + []; +frame2frames(F) -> + [F]. + +match_in_pattern({match, _, E10, E20}, F0, BF) -> + {E1, F1} = match_in_pattern(E10, F0, BF), + {E2, F} = match_in_pattern(E20, F1, BF), + %% This is for join: chosing a constant could "hide" a variable. + E = case BF('=:=', E1) of + true -> E1; + false -> E2 + end, + {E, unify('=:=', E1, E2, F, BF, _Imported = [])}; +match_in_pattern(T, F0, BF) when is_tuple(T) -> + {L, F} = match_in_pattern(tuple_to_list(T), F0, BF), + {list_to_tuple(L), F}; +match_in_pattern([E0 | Es0], F0, BF) -> + {E, F1} = match_in_pattern(E0, F0, BF), + {Es, F} = match_in_pattern(Es0, F1, BF), + {[E | Es], F}; +match_in_pattern(E, F, _BF) -> + {E, F}. + +-define(ANON_VAR(N), N). + +anon_var(E, AnonI) -> + var_mapfold(fun({var, L, '_'}, N) -> + {{var, L, ?ANON_VAR(N)}, N+1}; + (Var, N) -> {Var, N} + end, AnonI, E). + +set_line(T, L) -> + map_lines(fun(_L) -> L end, T). + +-record(fstate, {state, bind_fun, imported}). + +filter(_E, []=Frames0, _BF, _State, _Imported) -> + Frames0; +filter(E0, Frames0, BF, State, Imported) -> + E = pre_expand(E0), + FState = #fstate{state = State, bind_fun = BF, imported = Imported}, + filter1(E, Frames0, FState). + +%% One frame for each path through the and/or expression. +%% +%% "A xor B" is equal to "(A and not B) or (not A and B)". +%% Ignoring "not B" and "not A" this is the same as "A or B"; +%% "xor" can be handled just as "or". +%% +%% One must handle filters with care, both when joining and when +%% looking up values. The reference is a nested loop: if the filter +%% fails for some combination of values, it must fail also when +%% looking up values or joining. In other words, the excluded +%% combinations of values must not evaluate to anything but 'false'. +%% Filters looking like guards are allowed to fail since for such +%% filter the so called guard semantics ensures that the value is +%% 'false' if it is not 'true'. This behavior was inherited from the +%% ordinary list comprehension, where it has been considered a bug +%% kept for backward compatibility. Now it has become part of the QLC +%% semantics, and hard to change (at least in the qlc module). +%% +%% A special case is =/2. If there is a chance that the =/2 fails +%% (badmatch) for some combination of values, that combination cannot +%% be excluded. If the variable is bound only once, it is OK, but not +%% twice (or more). The current implementation does not handle =/2 at +%% all (except in generator patterns). + +filter1({op, _, Op, L0, R0}, Fs, FS) when Op =:= '=:='; Op =:= '==' -> + #fstate{state = S, bind_fun = BF, imported = Imported} = FS, + %% In the transformed code there are no records in lookup values + %% because records are expanded away in prep_expr. + lists:flatmap(fun(F0) -> + {L, F1} = prep_expr(L0, F0, S, BF, Imported), + {R, F2} = prep_expr(R0, F1, S, BF, Imported), + case unify(Op, L, R, F2, BF, Imported) of + failed -> []; + F -> [F] + end + end, Fs); +filter1({op, _, Op, L, R}, Fs, FS) when Op =:= 'and'; Op =:= 'andalso' -> + filter1(R, filter1(L, Fs, FS), FS); +filter1({op, _, Op, L, R}, Fs, FS) when Op =:= 'or'; + Op =:= 'orelse'; + Op =:= 'xor' -> + filter1(L, Fs, FS) ++ filter1(R, Fs, FS); +filter1({atom,_,Atom}, _Fs, _FS) when Atom =/= true -> + []; +filter1({call,L,{remote,_,{atom,_,erlang},{atom,_,is_record}},[T,R]}, + Fs, FS) -> + filter1({op,L,'=:=',{call,L,{remote,L,{atom,L,erlang},{atom,L,element}}, + [{integer,L,1},T]},R}, + Fs, FS); +%% erlang:is_record/3 (the size information is ignored): +filter1({call,L,{remote,L1,{atom,_,erlang}=M,{atom,L2,is_record}},[T,R,_Sz]}, + Fs, FS) -> + filter1({call,L,{remote,L1,M,{atom,L2,is_record}},[T,R]}, Fs, FS); +filter1(_E, Fs, _FS) -> + Fs. + +%% filter() tries to extract as much information about constant +%% columns as possible. It ignores those parts of the filter that are +%% uninteresting. safe_filter() on the other hand ensures that the +%% bindings returned capture _all_ aspects of the filter (wrt BF). +safe_filter(_E, []=Frames0, _BF, _State, _Imported) -> + Frames0; +safe_filter(E0, Frames0, BF, State, Imported) -> + E = pre_expand(E0), + FState = #fstate{state = State, bind_fun = BF, imported = Imported}, + safe_filter1(E, Frames0, FState). + +safe_filter1({op, _, Op, L0, R0}, Fs, FS) when Op =:= '=:='; Op =:= '==' -> + #fstate{state = S, bind_fun = BF, imported = Imported} = FS, + lists:flatmap(fun(F0) -> + {L, F1} = prep_expr(L0, F0, S, BF, Imported), + {R, F2} = prep_expr(R0, F1, S, BF, Imported), + case safe_unify(Op, L, R, F2, BF, Imported) of + failed -> []; + F -> [F] + end + end, Fs); +safe_filter1({op, _, Op, L, R}, Fs, FS) when Op =:= 'and'; Op =:= 'andalso' -> + safe_filter1(R, safe_filter1(L, Fs, FS), FS); +safe_filter1({op, _, Op, L, R}, Fs, FS) when Op =:= 'or'; Op =:= 'orelse' -> + safe_filter1(L, Fs, FS) ++ safe_filter1(R, Fs, FS); +safe_filter1({atom,_,true}, Fs, _FS) -> + Fs; +safe_filter1(_E, _Fs, _FS) -> + []. + +%% Substitutions: +%% M:F() for {M,F}(); erlang:F() for F(); is_record() for record(). +pre_expand({call,L1,{atom,L2,record},As}) -> + pre_expand({call,L1,{atom,L2,is_record},As}); +pre_expand({call,L,{atom,_,_}=F,As}) -> + pre_expand({call,L,{remote,L,{atom,L,erlang},F},As}); +pre_expand({call,L,{tuple,_,[M,F]},As}) -> + pre_expand({call,L,{remote,L,M,F},As}); +pre_expand(T) when is_tuple(T) -> + list_to_tuple(pre_expand(tuple_to_list(T))); +pre_expand([E | Es]) -> + [pre_expand(E) | pre_expand(Es)]; +pre_expand(T) -> + T. + +%% -> [ [{{QualifierNumber,ColumnNumber}, [Value]}] ] +frames_to_columns([], _PatternVars, _DerefFun, _SelectorFun, _Imp, _CompOp) -> + []; +frames_to_columns(Fs, PatternVars, DerefFun, SelectorFun, Imp, CompOp) -> + %% It is important that *the same* variables are introduced for + %% columns in every frame. (When trying to find constant columns + %% it doesn't matter, but when trying to find joined columns, the + %% same variables have to be the representatives in every frame.) + SizesVarsL = + [begin + PatVar = {var,0,PV}, + PatternSizes = [pattern_size([F], PatVar, false) || + F <- Fs], + MaxPZ = lists:max([0 | PatternSizes -- [undefined]]), + Vars = pat_vars(MaxPZ), + {PatternId#qid.no, PatVar, PatternSizes, Vars} + end || {PatternId, PV} <- PatternVars], + BF = fun(_Op, Value) -> is_bindable(Value) end, + Fun = fun({_PatN, PatVar, PatSizes, Vars}, Frames) -> + [unify('=:=', pat_tuple(Sz, Vars), PatVar, Frame, BF, Imp) || + {Sz, Frame} <- lists:zip(PatSizes, Frames)] + end, + NFs = lists:foldl(Fun, Fs, SizesVarsL), + [frames2cols(NFs, PatN, PatSizes, Vars, DerefFun, SelectorFun, CompOp) || + {PatN, _PatVar, PatSizes, Vars} <- SizesVarsL]. + +frames2cols(Fs, PatN, PatSizes, Vars, DerefFun, SelectorFun, CompOp) -> + Rs = [ begin + RL = [{{PatN,Col},cons2tuple(element(2, Const))} || + {V, Col} <- lists:zip(sublist(Vars, PatSz), + seq(1, PatSz)), + %% Do not handle the case where several + %% values compare equal, e.g. "X =:= 1 + %% andalso X == 1.0". Looking up both + %% values or one of them won't always do + %% because it is more or less undefined + %% whether the table returns the given key + %% or the one stored in the table. Or + %% rather, it would be strange if the table + %% did not return the stored key upon + %% request, but the 'lookup_fun' function + %% may have to add the given key (see also + %% gb_table in qlc(3)). (Not a very strong + %% argument. "X =:= 1" could (should?) be + %% seen as a bug.) Note: matching tables + %% cannot skip the filter, but looking up + %% one of the values should be OK. + tl(Consts = DerefFun(V, F)) =:= [], + (Const = (SelectorFun(F))(hd(Consts))) =/= no], + sofs:relation(RL) % possibly empty + end || {F,PatSz} <- lists:zip(Fs, PatSizes)], + Ss = sofs:from_sets(Rs), + %% D: columns occurring in every frame (path). + D = sofs:intersection(sofs:projection(fun(S) -> sofs:projection(1, S) end, + Ss)), + Cs = sofs:restriction(sofs:relation_to_family(sofs:union(Ss)), D), + [C || {_,Vs}=C <- sofs:to_external(Cs), not col_ignore(Vs, CompOp)]. + +pat_vars(N) -> + [unique_var() || _ <- seq(1, N)]. + +pat_tuple(Sz, Vars) when is_integer(Sz), Sz > 0 -> + TupleTail = unique_var(), + {cons_tuple, list2cons(sublist(Vars, Sz) ++ TupleTail)}; +pat_tuple(_, _Vars) -> + unique_var(). + +%% Do not handle tests as "X =:= 1.0 orelse X == 1" either. +%% Similar problems as described above. +col_ignore(_Vs, '=:=') -> + false; +col_ignore(Vs, '==') -> + length(Vs) =/= length(lists:usort([element(2, normalise(V)) || V <- Vs])). + +pattern_sizes(PatternVars, Fs) -> + [{QId#qid.no, Size} || + {QId,PV} <- PatternVars, + undefined =/= (Size = pattern_size(Fs, {var,0,PV}, true))]. + +pattern_size(Fs, PatternVar, Exact) -> + Fun = fun(F) -> (deref_pattern(_Imported = []))(PatternVar, F) end, + Derefs = lists:flatmap(Fun, Fs), + Szs = [pattern_sz(Cs, 0, Exact) || {cons_tuple, Cs} <- Derefs], + case lists:usort(Szs) of + [Sz] when is_integer(Sz), Sz >= 0 -> Sz; + [] when not Exact -> 0; + _ -> undefined + end. + +pattern_sz({cons,_,_C,E}, Col, Exact) -> + pattern_sz(E, Col+1, Exact); +pattern_sz({nil,_}, Sz, _Exact) -> + Sz; +pattern_sz(_, _Sz, true) -> + undefined; +pattern_sz(_, Sz, false) -> + Sz. + +deref_pattern(Imported) -> + fun(PV, F) -> deref_values(PV, F, Imported) end. + +prep_expr(E, F, S, BF, Imported) -> + element_calls(tuple2cons(expand_expr_records(E, S)), F, BF, Imported). + +unify_column(Frame, Var, Col, BindFun, Imported) -> + Call = {call,0,{atom,0,element},[{integer,0,Col}, {var,0,Var}]}, + element_calls(Call, Frame, BindFun, Imported). + +%% cons_tuple is used for representing {V1, ..., Vi | TupleTail}. +%% +%% Tests like "element(2, X) =:= a" are represented by "tuple tails": +%% {_, a | _}. The tail may be unified later, when more information +%% about the size of the tuple is known. +element_calls({call,_,{remote,_,{atom,_,erlang},{atom,_,element}}, + [{integer,_,I},Term0]}, F0, BF, Imported) when I > 0 -> + TupleTail = unique_var(), + VarsL = [unique_var() || _ <- lists:seq(1, I)], + Vars = VarsL ++ TupleTail, + Tuple = {cons_tuple, list2cons(Vars)}, + VarI = lists:nth(I, VarsL), + {Term, F} = element_calls(Term0, F0, BF, Imported), + {VarI, unify('=:=', Tuple, Term, F, BF, Imported)}; +element_calls({call,L1,{atom,_,element}=E,As}, F0, BF, Imported) -> + %% erl_expand_records should add "erlang:"... + element_calls({call,L1,{remote,L1,{atom,L1,erlang},E}, As}, F0, BF, + Imported); +element_calls(T, F0, BF, Imported) when is_tuple(T) -> + {L, F} = element_calls(tuple_to_list(T), F0, BF, Imported), + {list_to_tuple(L), F}; +element_calls([E0 | Es0], F0, BF, Imported) -> + {E, F1} = element_calls(E0, F0, BF, Imported), + {Es, F} = element_calls(Es0, F1, BF, Imported), + {[E | Es], F}; +element_calls(E, F, _BF, _Imported) -> + {E, F}. + +unique_var() -> + {var, 0, make_ref()}. + +is_unique_var({var, _L, V}) -> + is_reference(V). + +expand_pattern_records(P, State) -> + E = {'case',0,{atom,0,true},[{clause,0,[P],[],[{atom,0,true}]}]}, + {'case',_,_,[{clause,0,[NP],_,_}]} = expand_expr_records(E, State), + NP. + +expand_expr_records(E, State) -> + RecordDefs = State#state.records, + Forms = RecordDefs ++ [{function,1,foo,0,[{clause,1,[],[],[pe(E)]}]}], + [{function,_,foo,0,[{clause,_,[],[],[NE]}]}] = + erl_expand_records:module(Forms, [no_strict_record_tests]), + NE. + +%% Partial evaluation. +pe({op,Line,Op,A}) -> + erl_eval:partial_eval({op,Line,Op,pe(A)}); +pe({op,Line,Op,L,R}) -> + erl_eval:partial_eval({op,Line,Op,pe(L),pe(R)}); +pe(T) when is_tuple(T) -> + list_to_tuple(pe(tuple_to_list(T))); +pe([E | Es]) -> + [pe(E) | pe(Es)]; +pe(E) -> + E. + +unify(Op, E1, E2, F, BF, Imported) -> + unify(Op, E1, E2, F, BF, Imported, false). + +safe_unify(Op, E1, E2, F, BF, Imported) -> + unify(Op, E1, E2, F, BF, Imported, true). + +unify(_Op, _E1, _E2, failed, _BF, _Imported, _Safe) -> % contradiction + failed; +unify(_Op, E, E, F, _BF, _Imported, _Safe) -> + F; +unify(Op, {var, _, _}=Var, E2, F, BF, Imported, Safe) -> + extend_frame(Op, Var, E2, F, BF, Imported, Safe); +unify(Op, E1, {var, _, _}=Var, F, BF, Imported, Safe) -> + extend_frame(Op, Var, E1, F, BF, Imported, Safe); +unify(Op, {cons_tuple, Es1}, {cons_tuple, Es2}, F, BF, Imported, Safe) -> + unify(Op, Es1, Es2, F, BF, Imported, Safe); +unify(Op, {cons, _, L1, R1}, {cons, _, L2, R2}, F, BF, Imported, Safe) -> + E = unify(Op, L1, L2, F, BF, Imported, Safe), + unify(Op, R1, R2, E, BF, Imported, Safe); +unify(Op, E1, E2, F, _BF, _Imported, Safe) -> + try + {ok, C1} = normalise(E1), + {ok, C2} = normalise(E2), + if + Op =:= '=:=', C1 =:= C2 -> + F; + Op =:= '==', C1 == C2 -> + F; + true -> + failed + end + catch error:_ when Safe -> failed; + error:_ when not Safe -> F % ignored + end. +%% Binaries are not handled at all (by unify). + +%% Note that a variable can be bound to several values, for instance: +%% X =:= 3, X == 3.0. As a consequence, deref() returns a list of +%% values. + +%% Binding a variable to several values makes the unification and +%% dereferencing more complicated. An alternative would be not to try +%% to find lookup values for such QLCs at all. That might have been a +%% better design decision. + +-record(bind, {var, value, op}). + +extend_frame(Op, Var, Value, F, BF, Imported, Safe) -> + case var_values(Var, F) of + [] -> + case Value of + {var, _, _} -> + case var_values(Value, F) of + [] -> + add_binding(Op, Value, Var, F, BF, Imported, Safe); + ValsOps -> + maybe_add_binding(ValsOps, Op, Value, Var, F, + BF, Imported, Safe) + end; + _ -> + add_binding(Op, Var, Value, F, BF, Imported, Safe) + end; + ValsOps -> + maybe_add_binding(ValsOps, Op, Var, Value, F, BF, Imported, Safe) + end. + +maybe_add_binding(ValsOps, Op, Var, Value, F0, BF, Imported, Safe) -> + case unify_var_bindings(ValsOps, Op, Value, F0, BF, Imported, Safe) of + failed -> + failed; + F -> + case already_bound(Op, Var, Value, F) of + true -> + F; + false -> + add_binding(Op, Var, Value, F, BF, Imported, Safe) + end + end. + +already_bound(Op, Var, Value, F) -> + %% Note: all variables are treated as imported. The dereferenced + %% values must not depend on Imported. + BFun = fun(_DV, BOp) -> Op =:= BOp end, + DerefValue = deref_value(Value, Op, F, BFun, all), + DerefVar = deref_var(Var, F, BFun, all), + DerefValue -- DerefVar =:= []. + +unify_var_bindings([], _Op, _Value, F, _BF, _Imported, _Safe) -> + F; +unify_var_bindings([{VarValue, Op2} | Bindings], + Op1, Value, F0, BF, Imported, Safe) -> + Op = deref_op(Op1, Op2), + case unify(Op, VarValue, Value, F0, BF, Imported, Safe) of + failed -> + failed; + F -> + unify_var_bindings(Bindings, Op1, Value, F, BF, Imported, Safe) + end. + +deref_op('=:=', '=:=') -> + '=:='; +deref_op(_, _) -> + '=='. + +%%% Note: usort works; {integer,L,3} does not match {float,L,3.0}. + +var_values(Var, Frame) -> + [{Value, Op} || + #bind{value = Value, op = Op} <- var_bindings(Var, Frame)]. + +deref_var(Var, Frame, Imported) -> + deref_var(Var, Frame, fun(_DV, _Op) -> true end, Imported). + +deref_var(Var, Frame, BFun, Imported) -> + lists:usort([ValOp || + #bind{value = Value, op = Op} <- var_bindings(Var, Frame), + ValOp <- deref_value(Value, Op, Frame, BFun, Imported)]). + +deref_value(Value, Op, Frame, BFun, Imported) -> + lists:usort([{Val,value_op(ValOp, Op, Imported)} || + {Val,_Op}=ValOp <- deref(Value, Frame, BFun, Imported)]). + +add_binding(Op, Var0, Value0, F, BF, Imported, Safe) -> + {Var, Value} = maybe_swap_var_value(Var0, Value0, F, Imported), + case BF(Op, Value) of + true -> + add_binding2(Var, Value, Op, F); + false when Safe -> + failed; + false when not Safe -> + F + end. + +add_binding2(Var, Value, Op, F) -> + case occurs(Var, Value, F) of + true -> + failed; + false -> + [#bind{var = Var, value = Value, op = Op} | F] + end. + +%% Ensure that imported variables are visible in the dereferenced +%% value by pushing them to the end of the binding chain. Be careful +%% not to introduce loops. +maybe_swap_var_value(Var, Value, Frame, Imported) -> + case do_swap_var_value(Var, Value, Frame, Imported) of + true -> + {Value, Var}; + false -> + {Var, Value} + end. + +do_swap_var_value({var, _, V1}=Var1, {var, _, V2}=Var2, F, Imported) -> + case swap_vv(Var1, Var2, F) of + [] -> + case swap_vv(Var2, Var1, F) of + [] -> + ordsets:is_element(V1, Imported) andalso + not ordsets:is_element(V2, Imported); + _Bs -> + true + end; + _Bs -> + false + end; +do_swap_var_value(_, _, _F, _Imp) -> + false. + +swap_vv(V1, V2, F) -> + [V || #bind{value = V} <- var_bindings(V1, F), V =:= V2]. + +normalise(E) -> + %% Tuple tails are OK. + case catch erl_parse:normalise(var2const(cons2tuple(E))) of + {'EXIT', _} -> + not_ok; + C -> + {ok, C} + end. + +occurs(V, V, _F) -> + true; +occurs(V, {var, _, _} = Var, F) -> + lists:any(fun(B) -> occurs(V, B#bind.value, F) end, var_bindings(Var, F)); +occurs(V, T, F) when is_tuple(T) -> + lists:any(fun(E) -> occurs(V, E, F) end, tuple_to_list(T)); +occurs(V, [E | Es], F) -> + occurs(V, E, F) orelse occurs(V, Es, F); +occurs(_V, _E, _F) -> + false. + +deref_values(E, Frame, Imported) -> + deref_values(E, Frame, fun(_DV, _Op) -> true end, Imported). + +deref_values(E, Frame, BFun, Imported) -> + lists:usort([V || + {V, Op} <- deref(E, Frame, BFun, Imported), + BFun(V, Op)]). + +deref(E, F, Imp) -> + BFun = fun(_DV, _Op) -> true end, + deref(E, F, BFun, Imp). + +deref({var, _, _}=V, F, BFun, Imp) -> + DBs = lists:flatmap(fun(B) -> deref_binding(B, F, BFun, Imp) + end, var_bindings(V, F)), + case DBs of + [] -> + [{V, '=:='}]; + _ -> + lists:usort(DBs) + end; +deref(T, F, BFun, Imp) when is_tuple(T) -> + [{list_to_tuple(DL), Op} || + {DL, Op} <- deref(tuple_to_list(T), F, BFun, Imp)]; +deref(Es, F, BFun, Imp) when is_list(Es) -> + L = [deref(C, F, BFun, Imp) || C <- Es], + lists:usort([deref_list(S) || S <- all_comb(L)]); +deref(E, _F, _BFun, _Imp) -> + [{E, '=:='}]. + +var_bindings(Var, F) -> + [B || #bind{var = V}=B <- F, V =:= Var]. + +deref_binding(Bind, Frame, BFun, Imp) -> + #bind{value = Value, op = Op0} = Bind, + [{Val, Op} || + {Val, _Op}=ValOp <- deref(Value, Frame, BFun, Imp), + BFun(Val, Op = value_op(ValOp, Op0, Imp))]. + +deref_list(L) -> + Op = case lists:usort([Op || {_Val, Op} <- L]) of + ['=:='] -> + '=:='; + _ -> + '==' + end, + {[V || {V, _Op} <- L], Op}. + +value_op({_V, '=='}, _BindOp, _Imp) -> + '=='; +value_op({_V, '=:='}, _BindOp='=:=', _Imp) -> + '=:='; +value_op({V, '=:='}, _BindOp='==', Imp) -> + case free_of_integers(V, Imp) of + true -> + '=:='; + false -> + '==' + end. + +all_comb([]) -> + [[]]; +all_comb([Cs | ICs]) -> + [[C | L] || C <- Cs, L <- all_comb(ICs)]. + +%% "Free of integers" here means that there are not imported variables +%% in V (which could take on integer values), but there may be other +%% variables in V. +free_of_integers(V, Imported) -> + not has_integer(V) andalso not has_imported_vars(V, Imported). + +%% Assumes that imported variables are representatives, if Value is a +%% dereferenced value. +has_imported_vars(Value, all) -> + qlc:vars(Value) =/= []; +has_imported_vars(Value, Imported) -> + [Var || Var <- qlc:vars(Value), lists:member(Var, Imported)] =/= []. + +has_integer(Abstr) -> + try + has_int(Abstr) + catch throw:true -> true + end. + +has_int({integer,_,I}) when float(I) == I -> + throw(true); +has_int({float,_,F}) when round(F) == F -> + throw(true); +has_int(T) when is_tuple(T) -> + has_int(tuple_to_list(T)); +has_int([E | Es]) -> + has_int(E), + has_int(Es); +has_int(_) -> + false. + +tuple2cons({tuple, _, Es}) -> + {cons_tuple, list2cons(tuple2cons(Es))}; +tuple2cons(T) when is_tuple(T) -> + list_to_tuple(tuple2cons(tuple_to_list(T))); +tuple2cons([E | Es]) -> + [tuple2cons(E) | tuple2cons(Es)]; +tuple2cons(E) -> + E. + +list2cons([E | Es]) -> + {cons, 0, E, list2cons(Es)}; +list2cons([]) -> + {nil, 0}; +list2cons(E) -> + E. + +%% Returns {..., Variable} if Variable is a tuple tail. +cons2tuple({cons_tuple, Es}) -> + {tuple, 0, cons2list(Es)}; +cons2tuple(T) when is_tuple(T) -> + list_to_tuple(cons2tuple(tuple_to_list(T))); +cons2tuple([E | Es]) -> + [cons2tuple(E) | cons2tuple(Es)]; +cons2tuple(E) -> + E. + +cons2list({cons, _, L, R}) -> + [cons2tuple(L) | cons2list(R)]; +cons2list({nil, _}) -> + []; +cons2list(E) -> % tuple tail (always a variable) + [cons2tuple(E)]. + +%% Returns true if all bindings in F1 also occur in F2. +%% Viewing F1 and F2 as sets, the fact that F1 is a subset of F2 iff +%% F1 union F2 is equal to F2 is used. (This should take care of +%% issues with anonymous variables.) +bindings_is_subset(F1, F2, Imported) -> + BF = fun(_Op, _Value) -> true end, % don't need any test here + %% Extend F2 with the bindings in F1: + F = lists:foldl(fun(#bind{var = V, value = Value, op = Op}, Frame) -> + unify(Op, V, Value, Frame, BF, Imported) + end, F2, F1), + bindings_subset(F, F2, Imported) andalso bindings_subset(F2, F, Imported). + +bindings_subset(F1, F2, Imp) -> + Vars = lists:usort([V || #bind{var = V} <- F1, not is_unique_var(V)]), + lists:all(fun(V) -> + deref_var(V, F1, Imp) =:= deref_var(V, F2, Imp) + end, Vars). + +%% Recognizes all QLCs on the form [T || P <- LE, F] such that +%% ets:fun2ms(fun(P) when F -> T end) is a match spec. This is OK with +%% the guard semantics implemented in filter/_ below. If one chooses +%% not to have guard semantics, affected filters will have to be +%% recognized and excluded here as well. +try_ms(E, P, Fltr, State) -> + L = 1, + Fun = {'fun',L,{clauses,[{clause,L,[P],[[Fltr]],[E]}]}}, + Expr = {call,L,{remote,L,{atom,L,ets},{atom,L,fun2ms}},[Fun]}, + Form0 = {function,L,foo,0,[{clause,L,[],[],[Expr]}]}, + Form = restore_line_numbers(Form0), + X = ms_transform:parse_transform(State#state.records ++ [Form], []), + case catch + begin + {function,L,foo,0,[{clause,L,[],[],[MS0]}]} = lists:last(X), + MS = erl_parse:normalise(var2const(MS0)), + XMS = ets:match_spec_compile(MS), + true = is_binary(XMS), + {ok, MS, MS0} + end of + {'EXIT', _Reason} -> + no; + Reply -> + Reply + end. + +filters_as_one([]) -> + {atom, 0, true}; +filters_as_one(FilterData) -> + [{_,{fil,Filter1}} | Filters] = lists:reverse(FilterData), + lists:foldr(fun({_QId,{fil,Filter}}, AbstF) -> + {op,0,'andalso',Filter,AbstF} + end, Filter1, Filters). + +qual_data(Qualifiers) -> + F = fun(T) -> + [{QId,Q} || {QId,_,_,Q} <- Qualifiers, element(1,Q) =:= T] + end, + {F(fil), F(gen)}. + +set_field(Pos, Fs, Data) -> + lists:sublist(Fs, Pos-1) ++ [Data] ++ lists:nthtail(Pos, Fs). + +qdata([{#qid{no = QIdNo},{_QIVs,{{gen,_P,LE,_GV},GoI,SI}}} | QCs], L) -> + Init = case LE of + {join, Op, Q1, Q2, H1, H2, Cs1_0, Cs2_0} -> + Cs1 = qcon(Cs1_0), + Cs2 = qcon(Cs2_0), + %% -- R12B-3: {nil,L} + %% R12B-4 --: {atom,L,v1} + Compat = {atom,L,v1}, % meant for redundant match spec + CF = closure({tuple,L,[Cs1,Cs2,Compat]}, L), + {tuple,L,[?A(join),?A(Op),?I(Q1),?I(Q2),H1,H2,CF]}; + _ -> + closure(LE, L) + end, + %% Create qual_data (see qlc.erl): + {cons,L,{tuple,L,[?I(QIdNo),?I(GoI),?I(SI),{tuple,L,[?A(gen),Init]}]}, + qdata(QCs, L)}; +qdata([{#qid{no = QIdNo},{_QIVs,{{fil,_F},GoI,SI}}} | QCs], L) -> + %% Create qual_data (see qlc.erl): + {cons,L,{tuple,L,[?I(QIdNo),?I(GoI),?I(SI),?A(fil)]},qdata(QCs, L)}; +qdata([], L) -> + {nil,L}. + +qcon(Cs) -> + list2cons([{tuple,0,[{integer,0,Col},list2cons(qcon1(ConstOps))]} || + {Col,ConstOps} <- Cs]). + +qcon1(ConstOps) -> + [{tuple,0,[Const,abstr(Op, 0)]} || {Const,Op} <- ConstOps]. + +%% The original code (in Source) is used for filters and the template +%% since the translated code can have QLCs and we don't want them to +%% be visible. +qcode(E, QCs, Source, L) -> + CL = [begin + Bin = term_to_binary(C, [compressed]), + {bin, L, [{bin_element, L, + {string, L, binary_to_list(Bin)}, + default, default}]} + end || {_,C} <- lists:keysort(1, [{qlc:template_state(),E} | + qcode(QCs, Source)])], + {'fun', L, {clauses, [{clause, L, [], [], [{tuple, L, CL}]}]}}. + +qcode([{_QId, {_QIvs, {{gen,P,_LE,_GV}, GoI, _SI}}} | QCs], Source) -> + [{GoI,undo_no_shadows(P)} | qcode(QCs, Source)]; +qcode([{QId, {_QIVs, {{fil,_F}, GoI, _SI}}} | QCs], Source) -> + {ok,OrigF} = dict:find(QId, Source), + [{GoI,undo_no_shadows(OrigF)} | qcode(QCs, Source)]; +qcode([], _Source) -> + []. + +closure(Code, L) -> + {'fun',L,{clauses,[{clause,L,[],[],[Code]}]}}. + +simple(L, Var, Init, Line) -> + {tuple,L,[?A(simple_v1),?A(Var),Init,?I(Line)]}. + +clauses([{QId,{QIVs,{QualData,GoI,S}}} | QCs], RL, Fun, Go, NGV, E, IVs,St) -> + ?DEBUG("QIVs = ~p~n", [QIVs]), + ?DEBUG("IVs = ~p~n", [IVs]), + ?DEBUG("GoI = ~p, S = ~p~n", [GoI, S]), + L = no_compiler_warning(get_lcid_line(QId#qid.lcid)), + Cs = case QualData of + {gen,P,_LE,GV} -> + generator(S, QIVs, P, GV, NGV, E, IVs, RL, Fun, Go,GoI,L,St); + {fil,F} -> + filter(F, L, QIVs, S, RL, Fun, Go, GoI, IVs, St) + end, + Cs ++ clauses(QCs, RL, Fun, Go, NGV, E, IVs, St); +clauses([], _RL, _Fun, _Go, _NGV, _IVs, _E, _St) -> + []. + +final(RL, IVs, L, State) -> + IAs = replace(IVs, IVs, '_'), + AsL = pack_args([?I(0) | abst_vars([RL, '_', '_'] ++ IAs, L)], L, State), + Grd = [is_list_c(RL, L)], + Rev = {call,L,{remote,L,?A(lists),?A(reverse)},[?V(RL)]}, + CL = {clause,L,AsL,[Grd],[Rev]}, + AsF = pack_args([?I(0) | abst_vars(['_', '_', '_'] ++ IAs, L)], L, State), + CF = {clause,L,AsF,[],[?ABST_NO_MORE]}, + [CL, CF]. + +template(E, RL, Fun, Go, AT, L, IVs, State) -> + I = qlc:template_state(), GoI = qlc:template_state(), + ARL = {cons,L,E,abst_vars(RL, L)}, + Next = next(Go, GoI, L), + As0 = abst_vars([RL, Fun, Go] ++ IVs, L), + As = pack_args([?I(I) | As0], L, State), + NAs = pack_args([Next, ARL] ++ abst_vars([Fun, Go] ++ IVs, L), L, State), + Grd = [is_list_c(RL, L)], + CL = {clause,L,As,[Grd],[{call,L,?V(Fun),NAs}]}, + + %% Extra careful here or arguments will be lifted into a wide fun. + F = case split_args([Next | As0], L, State) of + {ArgsL, ArgsT} -> + Call = {call,L,?V(Fun),ArgsL++[{var,L,AT}]}, + {block,L, + [{match,L,{var,L,AT},ArgsT}, + {'fun',L,{clauses,[{clause,L,[],[],[Call]}]}}]}; + FNAs -> + {'fun',L,{clauses,[{clause,L,[],[],[{call,L,?V(Fun),FNAs}]}]}} + end, + CF = {clause,L,As,[],[?ABST_MORE(E, F)]}, + [CL,CF]. + +generator(S, QIVs, P, GV, NGV, E, IVs, RL, Fun, Go, GoI, L, State) -> + ComAs = abst_vars([RL, Fun, Go], L), + InitC = generator_init(S, L, GV, RL, Fun, Go, GoI, IVs, State), + As = [?I(S + 1)| ComAs ++ abst_vars(replace(QIVs -- [GV], IVs, '_'), L)], + + MatchS = next(Go, GoI + 1, L), + AsM0 = [MatchS | ComAs ++ abst_vars(replace([GV], IVs, NGV), L)], + AsM = pack_args(AsM0, L, State), + + ContS = ?I(S + 1), + QIVs__GV = QIVs -- [GV], + Tmp = replace([GV], replace(QIVs__GV, IVs, nil), NGV), + AsC = pack_args([ContS | ComAs ++ abst_vars(Tmp, L)], L, State), + + DoneS = next(Go, GoI, L), + AsD0 = [DoneS | ComAs ++ abst_vars(replace(QIVs, IVs, nil), L)], + AsD = pack_args(AsD0, L, State), + + CsL = generator_list(P, GV, NGV, As, AsM, AsC, AsD, Fun, L, State), + CsF = generator_cont(P, GV, NGV, E, As, AsM, AsC, AsD, Fun, L, State), + [InitC | CsL ++ CsF]. + +generator_init(S, L, GV, RL, Fun, Go, GoI, IVs, State) -> + As0 = abst_vars([RL, Fun, Go] ++ replace([GV], IVs, '_'), L), + As = pack_args([?I(S) | As0], L, State), + Next = next(Go, GoI + 2, L), + NAs = pack_args([?I(S + 1) | replace([?V('_')], As0, Next)], L, State), + {clause,L,As,[],[{call,L,?V(Fun),NAs}]}. + +generator_list(P, GV, NGV, As, AsM, AsC, AsD, Fun, L, State) -> + As1 = pack_args(replace([?V(GV)], As, {cons,L,P,?V(NGV)}), L, State), + As2 = pack_args(replace([?V(GV)], As, {cons,L,?V('_'),?V(NGV)}), L,State), + As3 = pack_args(replace([?V(GV)], As, {nil,L}), L, State), + CM = {clause,L,As1,[],[{call,L,?V(Fun),AsM}]}, + CC = {clause,L,As2,[],[{call,L,?V(Fun),AsC}]}, + CD = {clause,L,As3,[],[{call,L,?V(Fun),AsD}]}, + [CM, CC, CD]. + +%% The clause 'CE' was added in R11B. The version of the generated was +%% however not incremented. +generator_cont(P, GV, NGV, E, As0, AsM, AsC, AsD, Fun, L, State) -> + As = pack_args(As0, L, State), + CF1 = ?ABST_MORE(P, ?V(NGV)), + CF2 = ?ABST_MORE(?V('_'), ?V(NGV)), + CF3 = ?ABST_NO_MORE, + CF4 = ?V(E), + CM = {clause,L,[CF1],[],[{call,L,?V(Fun),AsM}]}, + CC = {clause,L,[CF2],[],[{call,L,?V(Fun),AsC}]}, + CD = {clause,L,[CF3],[],[{call,L,?V(Fun),AsD}]}, + CE = {clause,L,[CF4],[],[CF4]}, + Cls = [CM, CC, CD, CE], + B = {'case',L,{call,L,?V(GV),[]},Cls}, + [{clause,L,As,[],[B]}]. + +filter(E, L, QIVs, S, RL, Fun, Go, GoI, IVs, State) -> + IAs = replace(QIVs, IVs, '_'), + As = pack_args([?I(S) | abst_vars([RL, Fun, Go] ++ IAs, L)], L, State), + NAs = abst_vars([RL, Fun, Go] ++ IVs, L), + TNext = next(Go, GoI + 1, L), + FNext = next(Go, GoI, L), + NAsT = pack_args([TNext | NAs], L, State), + NAsF = pack_args([FNext | NAs], L, State), + %% This is the "guard semantics" used in ordinary list + %% comprehension: if a filter looks like a guard test, it returns + %% 'false' rather than fails. + Body = case erl_lint:is_guard_test(E, State#state.records) of + true -> + CT = {clause,L,[],[[E]],[{call,L,?V(Fun),NAsT}]}, + CF = {clause,L,[],[[?A(true)]],[{call,L,?V(Fun),NAsF}]}, + [{'if',L,[CT,CF]}]; + false -> + CT = {clause,L,[?A(true)],[],[{call,L,?V(Fun),NAsT}]}, + CF = {clause,L,[?A(false)],[],[{call,L,?V(Fun),NAsF}]}, + [{'case',L,E,[CT,CF]}] + end, + [{clause,L,As,[],Body}]. + +pack_args(Args, L, State) -> + case split_args(Args, L, State) of + {ArgsL, ArgsT} -> + ArgsL ++ [ArgsT]; + _ -> + Args + end. + +split_args(Args, L, State) when length(Args) > State#state.maxargs -> + {lists:sublist(Args, State#state.maxargs-1), + {tuple,L,lists:nthtail(State#state.maxargs-1, Args)}}; +split_args(Args, _L, _State) -> + Args. + +%% Replace every element in IEs that is a member of Es by R, keep all +%% other elements as they are. +replace(Es, IEs, R) -> + [case lists:member(E, Es) of + true -> R; + false -> E + end || E <- IEs]. + +is_list_c(V, L) -> + {call,L,?A(is_list),[?V(V)]}. + +next(Go, GoI, L) -> + {call,L,?A(element),[?I(GoI),?V(Go)]}. + +aux_vars(Vars, LcN, AllVars) -> + [aux_var(Name, LcN, 0, 1, AllVars) || Name <- Vars]. + +aux_var(Name, LcN, QN, N, AllVars) -> + qlc:aux_name(lists:concat([Name, LcN, '_', QN, '_']), N, AllVars). + +no_compiler_warning(L) -> + erl_parse:set_line(L, fun(Line) -> -abs(Line) end). + +abs_loc(L) -> + loc(erl_parse:set_line(L, fun(Line) -> abs(Line) end)). + +loc(L) -> + {location,Location} = erl_parse:get_attribute(L, location), + Location. + +list2op([E], _Op) -> + E; +list2op([E | Es], Op) -> + {op,0,Op,E,list2op(Es, Op)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +qual_fold(Fun, GlobAcc0, Acc0, Forms, State) -> + F = fun(Id, {lc,L,E,Qs0}, GA0) -> + {Qs,GA,_NA} = qual_fold(Qs0, Fun, GA0, Acc0, Id, 1, []), + {{lc,L,E,Qs},GA}; + (_Id, Expr, GA) -> + {Expr,GA} + end, + qlc_mapfold(F, GlobAcc0, Forms, State). + +qual_fold([Q0 | Qs], F, GA0, A0, Id, No, NQs) -> + QId = qid(Id, No), + {Q,GA,A} = F(QId, Q0, GA0, A0), + qual_fold(Qs, F, GA, A, Id, No + 1, [Q | NQs]); +qual_fold([], _F, GA, A, _Id, _No, NQs) -> + {lists:reverse(NQs),GA,A}. + +qlc_mapfold(Fun, Acc0, Forms0, State) -> + {Forms, A, _NNo} = qlcmf(Forms0, Fun, State#state.imp, Acc0, 1), + erase(?QLC_FILE), + {Forms, A}. + +qlcmf([E0 | Es0], F, Imp, A0, No0) -> + {E, A1, No1} = qlcmf(E0, F, Imp, A0, No0), + {Es, A, No} = qlcmf(Es0, F, Imp, A1, No1), + {[E | Es], A, No}; +qlcmf(?QLC_Q(L1, L2, L3, L4, LC0, Os0), F, Imp, A0, No0) when length(Os0) < 2 -> + {Os, A1, No1} = qlcmf(Os0, F, Imp, A0, No0), + {LC, A2, No} = qlcmf(LC0, F, Imp, A1, No1), % nested... + NL = make_lcid(L1, No), + {T, A} = F(NL, LC, A2), + {?QLC_Q(L1, L2, L3, L4, T, Os), A, No + 1}; +qlcmf(?QLC_QQ(L1, L2, L3, L4, L5, L6, LC0, Os0), + F, Imp, A0, No0) when length(Os0) < 2 -> + {Os, A1, No1} = qlcmf(Os0, F, Imp, A0, No0), + {LC, A2, No} = qlcmf(LC0, F, Imp, A1, No1), % nested... + NL = make_lcid(L1, No), + {T, A} = F(NL, LC, A2), + {?QLC_QQ(L1, L2, L3, L4, L5, L6, T, Os), A, No + 1}; +qlcmf(?IMP_Q(L1, L2, LC0, Os0), F, Imp=true, A0, No0) when length(Os0) < 2 -> + {Os, A1, No1} = qlcmf(Os0, F, Imp, A0, No0), + {LC, A2, No} = qlcmf(LC0, F, Imp, A1, No1), % nested... + NL = make_lcid(L, No), + {T, A} = F(NL, LC, A2), + {?IMP_Q(L1, L2, T, Os), A, No + 1}; +qlcmf({attribute,_L,file,{File,_Line}}=Attr, _F, _Imp, A, No) -> + put(?QLC_FILE, File), + {Attr, A, No}; +qlcmf(T, F, Imp, A0, No0) when is_tuple(T) -> + {TL, A, No} = qlcmf(tuple_to_list(T), F, Imp, A0, No0), + {list_to_tuple(TL), A, No}; +qlcmf(T, _F, _Imp, A, No) -> + {T, A, No}. + +occ_vars(E) -> + qlc:var_fold(fun({var,_L,V}) -> V end, [], E). + +no_shadows(Forms0, State) -> + %% Variables that may shadow other variables are introduced in + %% LCs and Funs. Such variables (call them SV, Shadowing + %% Variables) are now renamed. Each (new) occurrence in a pattern + %% is assigned an index (integer), unique in the file. + %% + %% The state {LastIndex,ActiveVars,UsedVars,AllVars,Singletons} + %% holds the last index used for each SV (LastIndex), the SVs in + %% the current scope (ActiveVars), used SVs (UsedVars, the indexed + %% name is the key), all variables occurring in the file + %% (AllVars), and all singletons. If an SV is not used (that is, + %% is a member of Singletons), it is replaced by '_' (otherwise a + %% warning for unused variable would erroneously be emitted). If + %% the indexed name of an SV occurs in the file, next index is + %% tried (to avoid mixing up introduced names with existing ones). + %% + %% The original names of variables are kept in the line number + %% position of the abstract code: {var, {nos, OriginalName, L}, + %% NewName}. undo_no_shadows/1 re-creates the original code. + AllVars = sets:from_list(ordsets:to_list(qlc:vars(Forms0))), + ?DEBUG("nos AllVars = ~p~n", [sets:to_list(AllVars)]), + VFun = fun(_Id, LC, Vs) -> nos(LC, Vs) end, + LI = ets:new(?APIMOD,[]), + UV = ets:new(?APIMOD,[]), + D0 = dict:new(), + S1 = {LI, D0, UV, AllVars, []}, + _ = qlc_mapfold(VFun, S1, Forms0, State), + ?DEBUG("UsedIntroVars = ~p~n", [ets:match_object(UV, '_')]), + Singletons = ets:select(UV, ets:fun2ms(fun({K,0}) -> K end)), + ?DEBUG("Singletons: ~p~n", [Singletons]), + true = ets:delete_all_objects(LI), + true = ets:delete_all_objects(UV), + %% Do it again, this time we know which variables are singletons. + S2 = {LI, D0, UV, AllVars, Singletons}, + {Forms,_} = qlc_mapfold(VFun, S2, Forms0, State), + true = ets:delete(LI), + true = ets:delete(UV), + Forms. + +nos([E0 | Es0], S0) -> + {E, S1} = nos(E0, S0), + {Es, S} = nos(Es0, S1), + {[E | Es], S}; +nos({'fun',L,{clauses,Cs}}, S) -> + NCs = [begin + {H, S1} = nos_pattern(H0, S), + {[G, B], _} = nos([G0, B0], S1), + {clause,Ln,H,G,B} + end || {clause,Ln,H0,G0,B0} <- Cs], + {{'fun',L,{clauses,NCs}}, S}; +nos({lc,L,E0,Qs0}, S) -> + %% QLCs as well as LCs. It is OK to modify LCs as long as they + %% occur within QLCs--the warning messages have already been found + %% by compile_errors. + F = fun({T,Ln,P0,LE0}, QS0) when T =:= b_generate; T =:= generate -> + {LE, _} = nos(LE0, QS0), + {P, QS} = nos_pattern(P0, QS0), + {{T,Ln,P,LE}, QS}; + (Filter, QS) -> + nos(Filter, QS) + end, + {Qs, S1} = lists:mapfoldl(F, S, Qs0), + {E, _} = nos(E0, S1), + {{lc,L,E,Qs}, S}; +nos({var,L,V}=Var, {_LI,Vs,UV,_A,_Sg}=S) when V =/= '_' -> + case used_var(V, Vs, UV) of + {true, VN} -> + NL = nos_var(L, V), + {{var,NL,VN}, S}; + false -> + {Var, S} + end; +nos(T, S0) when is_tuple(T) -> + {TL, S} = nos(tuple_to_list(T), S0), + {list_to_tuple(TL), S}; +nos(T, S) -> + {T, S}. + +nos_pattern(P, S) -> + {T, NS, _} = nos_pattern(P, S, []), + {T, NS}. + +nos_pattern([P0 | Ps0], S0, PVs0) -> + {P, S1, PVs1} = nos_pattern(P0, S0, PVs0), + {Ps, S, PVs} = nos_pattern(Ps0, S1, PVs1), + {[P | Ps], S, PVs}; +nos_pattern({var,L,V}, {LI,Vs0,UV,A,Sg}, PVs0) when V =/= '_' -> + {Name, Vs, PVs} = + case lists:keysearch(V, 1, PVs0) of + {value, {V,VN}} -> + _ = used_var(V, Vs0, UV), + {VN, Vs0, PVs0}; + false -> + {VN, Vs1} = next_var(V, Vs0, A, LI, UV), + N = case lists:member(VN, Sg) of + true -> '_'; + false -> VN + end, + {N, Vs1, [{V,VN} | PVs0]} + end, + NL = nos_var(L, V), + {{var,NL,Name}, {LI,Vs,UV,A,Sg}, PVs}; +nos_pattern(T, S0, PVs0) when is_tuple(T) -> + {TL, S, PVs} = nos_pattern(tuple_to_list(T), S0, PVs0), + {list_to_tuple(TL), S, PVs}; +nos_pattern(T, S, PVs) -> + {T, S, PVs}. + +nos_var(L, Name) -> + erl_parse:set_line(L, fun(Line) -> {nos,Name,Line} end). + +used_var(V, Vs, UV) -> + case dict:find(V, Vs) of + {ok,Value} -> + VN = qlc:name_suffix(V, Value), + _ = ets:update_counter(UV, VN, 1), + {true, VN}; + error -> false + end. + +next_var(V, Vs, AllVars, LI, UV) -> + NValue = case ets:lookup(LI, V) of + [{V, Value}] -> Value + 1; + [] -> 1 + end, + true = ets:insert(LI, {V, NValue}), + VN = qlc:name_suffix(V, NValue), + case sets:is_element(VN, AllVars) of + true -> next_var(V, Vs, AllVars, LI, UV); + false -> true = ets:insert(UV, {VN, 0}), + NVs = dict:store(V, NValue, Vs), + {VN, NVs} + end. + +undo_no_shadows(E) -> + var_map(fun undo_no_shadows1/1, E). + +undo_no_shadows1({var, L, _}=Var) -> + case erl_parse:get_attribute(L, line) of + {line,{nos,V,_VL}} -> + NL = erl_parse:set_line(L, fun({nos,_V,VL}) -> VL end), + undo_no_shadows1({var, NL, V}); + _Else -> + Var + end. + +restore_line_numbers(E) -> + var_map(fun restore_line_numbers1/1, E). + +restore_line_numbers1({var, L, V}=Var) -> + case erl_parse:get_attribute(L, line) of + {line,{nos,_,_}} -> + NL = erl_parse:set_line(L, fun({nos,_V,VL}) -> VL end), + restore_line_numbers1({var, NL, V}); + _Else -> + Var + end. + +%% QLC identifier. +%% The first one encountered in the file has No=1. + +make_lcid(Attrs, No) when is_integer(No), No > 0 -> + F = fun(Line) when is_integer(Line), Line < (1 bsl ?MAX_NUM_OF_LINES) -> + sgn(Line) * ((No bsl ?MAX_NUM_OF_LINES) + sgn(Line) * Line) + end, + erl_parse:set_line(Attrs, F). + +is_lcid(Attrs) -> + try + {line,Id} = erl_parse:get_attribute(Attrs, line), + is_integer(Id) andalso (abs(Id) > (1 bsl ?MAX_NUM_OF_LINES)) + catch _:_ -> + false + end. + +get_lcid_no(IdAttrs) -> + {line,Id} = erl_parse:get_attribute(IdAttrs, line), + abs(Id) bsr ?MAX_NUM_OF_LINES. + +get_lcid_line(IdAttrs) -> + {line,Id} = erl_parse:get_attribute(IdAttrs, line), + sgn(Id) * (abs(Id) band ((1 bsl ?MAX_NUM_OF_LINES) - 1)). + +sgn(X) when X >= 0 -> + 1; +sgn(X) when X < 0 -> + -1. + +seq(S, E) when S - E =:= 1 -> + []; +seq(S, E) -> + lists:seq(S, E). + +sublist(_, 0) -> + []; +sublist(L, N) -> + lists:sublist(L, N). + +qid(LCId, No) -> + #qid{no = No, lcid = LCId}. + +abst_vars([V | Vs], L) -> + [abst_vars(V, L) | abst_vars(Vs, L)]; +abst_vars([], _L) -> + []; +abst_vars(nil, L) -> + {nil,L}; +abst_vars(V, L) -> + {var,L,V}. + +embed_vars(Vars, L) -> + embed_expr({tuple,L,Vars}, L). + +%% -> [Expr || _ <- []] on abstract format. +embed_expr(Expr, L) -> + {lc,L,Expr,[{generate,L,{var,L,'_'},{nil,L}}]}. + +%% Doesn't handle binaries very well, but don't bother for now. +var2const(E) -> + var_map(fun({var, L, V}) -> {atom, L, V} end, E). + +var_map(F, {var, _, _}=V) -> + F(V); +var_map(F, T) when is_tuple(T) -> + list_to_tuple(var_map(F, tuple_to_list(T))); +var_map(F, [E | Es]) -> + [var_map(F, E) | var_map(F, Es)]; +var_map(_F, E) -> + E. + +var_mapfold(F, A, {var, _, _}=V) -> + F(V, A); +var_mapfold(F, A0, T) when is_tuple(T) -> + {L, A} = var_mapfold(F, A0, tuple_to_list(T)), + {list_to_tuple(L), A}; +var_mapfold(F, A0, [E0 | Es0]) -> + {E, A1} = var_mapfold(F, A0, E0), + {Es, A} = var_mapfold(F, A1, Es0), + {[E | Es], A}; +var_mapfold(_F, A, E) -> + {E, A}. + +family_list(L) -> + sofs:to_external(family(L)). + +family(L) -> + sofs:relation_to_family(sofs:relation(L)). + +-ifdef(debug). +display_forms(Forms) -> + io:format("Forms ***~n"), + lists:foreach(fun(Form) -> + io:format("~s~n", [catch erl_pp:form(Form)]) + end, Forms), + io:format("End Forms ***~n"). +-else. +display_forms(_) -> + ok. +-endif. + diff --git a/lib/stdlib/src/queue.erl b/lib/stdlib/src/queue.erl new file mode 100644 index 0000000000..c09079e8d2 --- /dev/null +++ b/lib/stdlib/src/queue.erl @@ -0,0 +1,487 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(queue). + +%% Creation, inspection and conversion +-export([new/0,is_queue/1,is_empty/1,len/1,to_list/1,from_list/1,member/2]). +%% Original style API +-export([in/2,in_r/2,out/1,out_r/1]). +%% Less garbage style API +-export([get/1,get_r/1,peek/1,peek_r/1,drop/1,drop_r/1]). + +%% Higher level API +-export([reverse/1,join/2,split/2,filter/2]). + +%% Okasaki API from klacke +-export([cons/2,head/1,tail/1, + snoc/2,last/1,daeh/1,init/1,liat/1,lait/1]). + +%%-------------------------------------------------------------------------- +%% Efficient implementation of double ended fifo queues +%% +%% Queue representation +%% +%% {RearList,FrontList} +%% +%% The first element in the queue is at the head of the FrontList +%% The last element in the queue is at the head of the RearList, +%% that is; the RearList is reversed. +%% + +%% A declaration equivalent to the following is currently hard-coded +%% in erl_types.erl +%% +%% -opaque queue() :: {list(), list()}. + +%% Creation, inspection and conversion + +%% O(1) +-spec new() -> queue(). +new() -> {[],[]}. %{RearList,FrontList} + +%% O(1) +-spec is_queue(term()) -> boolean(). +is_queue({R,F}) when is_list(R), is_list(F) -> + true; +is_queue(_) -> + false. + +%% O(1) +-spec is_empty(queue()) -> boolean(). +is_empty({[],[]}) -> + true; +is_empty({In,Out}) when is_list(In), is_list(Out) -> + false; +is_empty(Q) -> + erlang:error(badarg, [Q]). + +%% O(len(Q)) +-spec len(queue()) -> non_neg_integer(). +len({R,F}) when is_list(R), is_list(F) -> + length(R)+length(F); +len(Q) -> + erlang:error(badarg, [Q]). + +%% O(len(Q)) +-spec to_list(queue()) -> list(). +to_list({In,Out}) when is_list(In), is_list(Out) -> + Out++lists:reverse(In, []); +to_list(Q) -> + erlang:error(badarg, [Q]). + +%% Create queue from list +%% +%% O(length(L)) +-spec from_list(list()) -> queue(). +from_list(L) when is_list(L) -> + f2r(L); +from_list(L) -> + erlang:error(badarg, [L]). + +%% Return true or false depending on if element is in queue +%% +%% O(length(Q)) worst case +-spec member(term(), queue()) -> boolean(). +member(X, {R,F}) when is_list(R), is_list(F) -> + lists:member(X, R) orelse lists:member(X, F); +member(X, Q) -> + erlang:error(badarg, [X,Q]). + +%%-------------------------------------------------------------------------- +%% Original style API + +%% Append to tail/rear +%% Put at least one element in each list, if it is cheap +%% +%% O(1) +-spec in(term(), queue()) -> queue(). +in(X, {[_]=In,[]}) -> + {[X], In}; +in(X, {In,Out}) when is_list(In), is_list(Out) -> + {[X|In],Out}; +in(X, Q) -> + erlang:error(badarg, [X,Q]). + +%% Prepend to head/front +%% Put at least one element in each list, if it is cheap +%% +%% O(1) +-spec in_r(term(), queue()) -> queue(). +in_r(X, {[],[_]=F}) -> + {F,[X]}; +in_r(X, {R,F}) when is_list(R), is_list(F) -> + {R,[X|F]}; +in_r(X, Q) -> + erlang:error(badarg, [X,Q]). + +%% Take from head/front +%% +%% O(1) amortized, O(len(Q)) worst case +-spec out(queue()) -> {'empty' | {'value',term()}, queue()}. +out({[],[]}=Q) -> + {empty,Q}; +out({[V],[]}) -> + {{value,V},{[],[]}}; +out({[Y|In],[]}) -> + [V|Out] = lists:reverse(In, []), + {{value,V},{[Y],Out}}; +out({In,[V]}) when is_list(In) -> + {{value,V},r2f(In)}; +out({In,[V|Out]}) when is_list(In) -> + {{value,V},{In,Out}}; +out(Q) -> + erlang:error(badarg, [Q]). + +%% Take from tail/rear +%% +%% O(1) amortized, O(len(Q)) worst case +-spec out_r(queue()) -> {'empty' | {'value',term()}, queue()}. +out_r({[],[]}=Q) -> + {empty,Q}; +out_r({[],[V]}) -> + {{value,V},{[],[]}}; +out_r({[],[Y|Out]}) -> + [V|In] = lists:reverse(Out, []), + {{value,V},{In,[Y]}}; +out_r({[V],Out}) when is_list(Out) -> + {{value,V},f2r(Out)}; +out_r({[V|In],Out}) when is_list(Out) -> + {{value,V},{In,Out}}; +out_r(Q) -> + erlang:error(badarg, [Q]). + +%%-------------------------------------------------------------------------- +%% Less garbage style API. + +%% Return the first element in the queue +%% +%% O(1) since the queue is supposed to be well formed +-spec get(queue()) -> term(). +get({[],[]}=Q) -> + erlang:error(empty, [Q]); +get({R,F}) when is_list(R), is_list(F) -> + get(R, F); +get(Q) -> + erlang:error(badarg, [Q]). + +-spec get(list(), list()) -> term(). +get(R, [H|_]) when is_list(R) -> + H; +get([H], []) -> + H; +get([_|R], []) -> % malformed queue -> O(len(Q)) + lists:last(R). + +%% Return the last element in the queue +%% +%% O(1) since the queue is supposed to be well formed +-spec get_r(queue()) -> term(). +get_r({[],[]}=Q) -> + erlang:error(empty, [Q]); +get_r({[H|_],F}) when is_list(F) -> + H; +get_r({[],[H]}) -> + H; +get_r({[],[_|F]}) -> % malformed queue -> O(len(Q)) + lists:last(F); +get_r(Q) -> + erlang:error(badarg, [Q]). + +%% Return the first element in the queue +%% +%% O(1) since the queue is supposed to be well formed +-spec peek(queue()) -> 'empty' | {'value',term()}. +peek({[],[]}) -> + empty; +peek({R,[H|_]}) when is_list(R) -> + {value,H}; +peek({[H],[]}) -> + {value,H}; +peek({[_|R],[]}) -> % malformed queue -> O(len(Q)) + {value,lists:last(R)}; +peek(Q) -> + erlang:error(badarg, [Q]). + +%% Return the last element in the queue +%% +%% O(1) since the queue is supposed to be well formed +-spec peek_r(queue()) -> 'empty' | {'value',term()}. +peek_r({[],[]}) -> + empty; +peek_r({[H|_],F}) when is_list(F) -> + {value,H}; +peek_r({[],[H]}) -> + {value,H}; +peek_r({[],[_|R]}) -> % malformed queue -> O(len(Q)) + {value,lists:last(R)}; +peek_r(Q) -> + erlang:error(badarg, [Q]). + +%% Remove the first element and return resulting queue +%% +%% O(1) amortized +-spec drop(queue()) -> queue(). +drop({[],[]}=Q) -> + erlang:error(empty, [Q]); +drop({[_],[]}) -> + {[],[]}; +drop({[Y|R],[]}) -> + [_|F] = lists:reverse(R, []), + {[Y],F}; +drop({R, [_]}) when is_list(R) -> + r2f(R); +drop({R, [_|F]}) when is_list(R) -> + {R,F}; +drop(Q) -> + erlang:error(badarg, [Q]). + +%% Remove the last element and return resulting queue +%% +%% O(1) amortized +-spec drop_r(queue()) -> queue(). +drop_r({[],[]}=Q) -> + erlang:error(empty, [Q]); +drop_r({[],[_]}) -> + {[],[]}; +drop_r({[],[Y|F]}) -> + [_|R] = lists:reverse(F, []), + {R,[Y]}; +drop_r({[_], F}) when is_list(F) -> + f2r(F); +drop_r({[_|R], F}) when is_list(F) -> + {R,F}; +drop_r(Q) -> + erlang:error(badarg, [Q]). + +%%-------------------------------------------------------------------------- +%% Higher level API + +%% Return reversed queue +%% +%% O(1) +-spec reverse(queue()) -> queue(). +reverse({R,F}) when is_list(R), is_list(F) -> + {F,R}; +reverse(Q) -> + erlang:error(badarg, [Q]). + +%% Join two queues +%% +%% Q2 empty: O(1) +%% else: O(len(Q1)) +-spec join(queue(), queue()) -> queue(). +join({R,F}=Q, {[],[]}) when is_list(R), is_list(F) -> + Q; +join({[],[]}, {R,F}=Q) when is_list(R), is_list(F) -> + Q; +join({R1,F1}, {R2,F2}) when is_list(R1), is_list(F1), is_list(R2), is_list(F2) -> + {R2,F1++lists:reverse(R1,F2)}; +join(Q1, Q2) -> + erlang:error(badarg, [Q1,Q2]). + +%% Split a queue in two +%% +%% N = 0..len(Q) +%% O(max(N, len(Q))) +-spec split(non_neg_integer(), queue()) -> {queue(),queue()}. +split(0, {R,F}=Q) when is_list(R), is_list(F) -> + {{[],[]},Q}; +split(N, {R,F}=Q) when is_integer(N), N >= 1, is_list(R), is_list(F) -> + Lf = erlang:length(F), + if N < Lf -> % Lf >= 2 + [X|F1] = F, + split_f1_to_r2(N-1, R, F1, [], [X]); + N > Lf -> + Lr = length(R), + M = Lr - (N-Lf), + if M < 0 -> + erlang:error(badarg, [N,Q]); + M > 0 -> + [X|R1] = R, + split_r1_to_f2(M-1, R1, F, [X], []); + true -> % M == 0 + {Q,{[],[]}} + end; + true -> % N == Lf + {f2r(F),r2f(R)} + end; +split(N, Q) -> + erlang:error(badarg, [N,Q]). + +%% Move N elements from F1 to R2 +split_f1_to_r2(0, R1, F1, R2, F2) -> + {{R2,F2},{R1,F1}}; +split_f1_to_r2(N, R1, [X|F1], R2, F2) -> + split_f1_to_r2(N-1, R1, F1, [X|R2], F2). + +%% Move N elements from R1 to F2 +split_r1_to_f2(0, R1, F1, R2, F2) -> + {{R1,F1},{R2,F2}}; +split_r1_to_f2(N, [X|R1], F1, R2, F2) -> + split_r1_to_f2(N-1, R1, F1, R2, [X|F2]). + +%% filter, or rather filtermap with insert, traverses in queue order +%% +%% Fun(_) -> List: O(length(List) * len(Q)) +%% else: O(len(Q) +-spec filter(fun((term()) -> boolean() | list()), queue()) -> queue(). +filter(Fun, {R0,F0}) when is_function(Fun, 1), is_list(R0), is_list(F0) -> + F = filter_f(Fun, F0), + R = filter_r(Fun, R0), + if R =:= [] -> + f2r(F); + F =:= [] -> + r2f(R); + true -> + {R,F} + end; +filter(Fun, Q) -> + erlang:error(badarg, [Fun,Q]). + +%% Call Fun in head to tail order +filter_f(_, []) -> + []; +filter_f(Fun, [X|F]) -> + case Fun(X) of + true -> + [X|filter_f(Fun, F)]; + false -> + filter_f(Fun, F); + L when is_list(L) -> + L++filter_f(Fun, F) + end. + +%% Call Fun in reverse order, i.e tail to head +%% and reverse list result from fun to match queue order +filter_r(_, []) -> + []; +filter_r(Fun, [X|R0]) -> + R = filter_r(Fun, R0), + case Fun(X) of + true -> + [X|R]; + false -> + R; + L when is_list(L) -> + lists:reverse(L, R) + end. + +%%-------------------------------------------------------------------------- +%% Okasaki API inspired by an Erlang user contribution "deque.erl" +%% by Claes Wikstrom 1999. +%% +%% This implementation does not use the internal data format from Klacke's +%% doubly ended queues that was "shamelessly stolen" from +%% "Purely Functional Data structures" by Chris Okasaki, since the data +%% format of this module must remain the same in case some application +%% has saved a queue in external format or sends it to an old node. +%% +%% This implementation tries to do the best of the situation and should +%% be almost as efficient as Okasaki's queues, except for len/1 that +%% is O(n) in this implementation instead of O(1). +%% +%% The new representation in this module again adds length field and +%% fixes this, but it is not yet default. +%% +%% The implementation keeps at least one element in both the forward +%% and the reversed lists to ensure that i.e head/1 or last/1 will +%% not have to reverse a list to find the element. +%% +%% To be compatible with the old version of this module, as much data as +%% possible is moved to the receiving side using lists:reverse/2 when data +%% is needed, except for two elements (when possible). These two elements +%% are kept to prevent alternating tail/1 and init/1 operations from +%% moving data back and forth between the sides. +%% +%% An alternative would be to balance for equal list length when one side +%% is exhausted. Although this could be better for a general double +%% ended queue, it would more han double the amortized cost for +%% the normal case (one way queue). + +%% Cons to head +%% +-spec cons(term(), queue()) -> queue(). +cons(X, Q) -> + in_r(X, Q). + +%% Return head element +%% +%% Return the first element in the queue +%% +%% O(1) since the queue is supposed to be well formed +-spec head(queue()) -> term(). +head({[],[]}=Q) -> + erlang:error(empty, [Q]); +head({R,F}) when is_list(R), is_list(F) -> + get(R, F); +head(Q) -> + erlang:error(badarg, [Q]). + +%% Remove head element and return resulting queue +%% +-spec tail(queue()) -> queue(). +tail(Q) -> + drop(Q). + +%% Functions operating on the other end of the queue + +%% Cons to tail +%% +-spec snoc(queue(), term()) -> queue(). +snoc(Q, X) -> + in(X, Q). + +%% Return last element +-spec daeh(queue()) -> term(). +daeh(Q) -> get_r(Q). +-spec last(queue()) -> term(). +last(Q) -> get_r(Q). + +%% Remove last element and return resulting queue +-spec liat(queue()) -> queue(). +liat(Q) -> drop_r(Q). +-spec lait(queue()) -> queue(). +lait(Q) -> drop_r(Q). %% Oops, mis-spelled 'tail' reversed. Forget this one. +-spec init(queue()) -> queue(). +init(Q) -> drop_r(Q). + +%%-------------------------------------------------------------------------- +%% Internal workers + +-compile({inline, [{r2f,1},{f2r,1}]}). + +%% Move all but two from R to F, if there are at least three +r2f([]) -> + {[],[]}; +r2f([_]=R) -> + {[],R}; +r2f([X,Y]) -> + {[X],[Y]}; +r2f([X,Y|R]) -> + {[X,Y],lists:reverse(R, [])}. + +%% Move all but two from F to R, if there are enough +f2r([]) -> + {[],[]}; +f2r([_]=F) -> + {F,[]}; +f2r([X,Y]) -> + {[Y],[X]}; +f2r([X,Y|F]) -> + {lists:reverse(F, []),[X,Y]}. diff --git a/lib/stdlib/src/random.erl b/lib/stdlib/src/random.erl new file mode 100644 index 0000000000..01227c29b4 --- /dev/null +++ b/lib/stdlib/src/random.erl @@ -0,0 +1,124 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(random). + +%% Reasonable random number generator. +%% The method is attributed to B. A. Wichmann and I. D. Hill +%% See "An efficient and portable pseudo-random number generator", +%% Journal of Applied Statistics. AS183. 1982. Also Byte March 1987. + +-export([seed/0, seed/1, seed/3, uniform/0, uniform/1, + uniform_s/1, uniform_s/2, seed0/0]). + +%%----------------------------------------------------------------------- +%% The type of the state + +-type ran() :: {integer(), integer(), integer()}. + +%%----------------------------------------------------------------------- + +-spec seed0() -> ran(). + +seed0() -> + {3172, 9814, 20125}. + +%% seed() +%% Seed random number generation with default values + +-spec seed() -> ran(). + +seed() -> + reseed(seed0()). + +%% seed({A1, A2, A3}) +%% Seed random number generation + +-spec seed({integer(), integer(), integer()}) -> 'undefined' | ran(). + +seed({A1, A2, A3}) -> + seed(A1, A2, A3). + +%% seed(A1, A2, A3) +%% Seed random number generation + +-spec seed(integer(), integer(), integer()) -> 'undefined' | ran(). + +seed(A1, A2, A3) -> + put(random_seed, + {abs(A1) rem 30269, abs(A2) rem 30307, abs(A3) rem 30323}). + + +-spec reseed(ran()) -> ran(). + +reseed({A1, A2, A3}) -> + case seed(A1, A2, A3) of + undefined -> seed0(); + {_,_,_} = Tuple -> Tuple + end. + +%% uniform() +%% Returns a random float between 0 and 1. + +-spec uniform() -> float(). + +uniform() -> + {A1, A2, A3} = case get(random_seed) of + undefined -> seed0(); + Tuple -> Tuple + end, + B1 = (A1*171) rem 30269, + B2 = (A2*172) rem 30307, + B3 = (A3*170) rem 30323, + put(random_seed, {B1,B2,B3}), + R = A1/30269 + A2/30307 + A3/30323, + R - trunc(R). + +%% uniform(N) -> I +%% Given an integer N >= 1, uniform(N) returns a random integer +%% between 1 and N. + +-spec uniform(pos_integer()) -> pos_integer(). + +uniform(N) when is_integer(N), N >= 1 -> + trunc(uniform() * N) + 1. + + +%%% Functional versions + +%% uniform_s(State) -> {F, NewState} +%% Returns a random float between 0 and 1. + +-spec uniform_s(ran()) -> {float(), ran()}. + +uniform_s({A1, A2, A3}) -> + B1 = (A1*171) rem 30269, + B2 = (A2*172) rem 30307, + B3 = (A3*170) rem 30323, + R = A1/30269 + A2/30307 + A3/30323, + {R - trunc(R), {B1,B2,B3}}. + +%% uniform_s(N, State) -> {I, NewState} +%% Given an integer N >= 1, uniform(N) returns a random integer +%% between 1 and N. + +-spec uniform_s(pos_integer(), ran()) -> {integer(), ran()}. + +uniform_s(N, State0) when is_integer(N), N >= 1 -> + {F, State1} = uniform_s(State0), + {trunc(F * N) + 1, State1}. diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl new file mode 100644 index 0000000000..5417ac02e5 --- /dev/null +++ b/lib/stdlib/src/re.erl @@ -0,0 +1,751 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(re). +-export([grun/3,urun/3,ucompile/2,replace/3,replace/4,split/2,split/3]). + +%% Emulator builtins in this module: +%% re:compile/1 +%% re:compile/2 +%% re:run/2 +%% re:run/3 + +split(Subject,RE) -> + split(Subject,RE,[]). + +split(Subject,RE,Options) -> + try + {NewOpt,Convert,Unicode,Limit,Strip,Group} = + process_split_params(Options,iodata,false,-1,false,false), + FlatSubject = + case is_binary(Subject) of + true -> + Subject; + false -> + case Unicode of + true -> + unicode:characters_to_binary(Subject,unicode); + false -> + iolist_to_binary(Subject) + end + end, + case compile_split(RE,NewOpt) of + {error,_Err} -> + throw(badre); + {PreCompiled, NumSub, RunOpt} -> + % OK, lets run + case re:run(FlatSubject,PreCompiled,RunOpt ++ [global]) of + nomatch -> + case Group of + true -> + convert_any_split_result([[FlatSubject]], + Convert, Unicode,true); + false -> + convert_any_split_result([FlatSubject], + Convert, Unicode,false) + end; + {match, Matches} -> + Res = do_split(FlatSubject, 0, Matches, NumSub, + Limit, Group), + Stripped = case Strip of + true -> + backstrip_empty(Res,Group); + false -> + Res + end, + convert_any_split_result(Stripped, Convert, Unicode, Group) + end + end + catch + throw:badopt -> + erlang:error(badarg,[Subject,RE,Options]); + throw:badre -> + erlang:error(badarg,[Subject,RE,Options]); + error:badarg -> + erlang:error(badarg,[Subject,RE,Options]) + end. + +backstrip_empty(List,false) -> + do_backstrip_empty(List); +backstrip_empty(List, true) -> + do_backstrip_empty_g(List). + +do_backstrip_empty_g([]) -> + []; +do_backstrip_empty_g([H]) -> + case do_backstrip_empty(H) of + [] -> + []; + _ -> + [H] + end; +do_backstrip_empty_g([H|T]) -> + case do_backstrip_empty_g(T) of + [] -> + case do_backstrip_empty(H) of + [] -> + []; + _ -> + [H] + end; + Other -> + [H|Other] + end. + +do_backstrip_empty([]) -> + []; +do_backstrip_empty([<<>>]) -> + []; +do_backstrip_empty([<<>>|T]) -> + case do_backstrip_empty(T) of + [] -> + []; + Other -> + [<<>>|Other] + end; +do_backstrip_empty([H|T]) -> + [H|do_backstrip_empty(T)]. + +convert_any_split_result(List,Type,Uni,true) -> + [ convert_split_result(Part,Type,Uni) || Part <- List ]; +convert_any_split_result(List,Type,Uni, false) -> + convert_split_result(List,Type,Uni). + +convert_split_result(List, iodata, _Unicode) -> + List; +convert_split_result(List, binary, _Unicode) -> + %% As it happens, the iodata is actually binaries + List; +convert_split_result(List, list, true) -> + [unicode:characters_to_list(Element,unicode) || Element <- List]; +convert_split_result(List, list, false) -> + [binary_to_list(Element) || Element <- List]. + +do_split(Subj, Off, _, _, 0, false) -> + <<_:Off/binary,Rest/binary>> = Subj, + [Rest]; +do_split(Subj, Off, [], _, _, false) -> + <<_:Off/binary,Rest/binary>> = Subj, + [Rest]; +do_split(Subj, Off, _, _, _,false) when byte_size(Subj) =< Off -> + [<<>>]; +do_split(Subj, Off, _, _, 0, true) -> + <<_:Off/binary,Rest/binary>> = Subj, + [[Rest]]; +do_split(Subj, Off, [], _, _, true) -> + <<_:Off/binary,Rest/binary>> = Subj, + [[Rest]]; +do_split(Subj, Off, _, _, _,true) when byte_size(Subj) =< Off -> + [[<<>>]]; +do_split(Subj, Offset, [[{MainI,MainL}|Sub]|T], NumSub, Limit, Group) -> + NewOffset = MainI+MainL, + KeptLen = MainI - Offset, + case {KeptLen,empty_sub(Sub),MainL} of + {0,true,0} -> + do_split(Subj,NewOffset,T,NumSub,Limit,Group); + _ -> + <<_:Offset/binary,Keep:KeptLen/binary,_/binary>> = Subj, + ESub = extend_subpatterns(Sub,NumSub), + Tail = do_split(Subj, NewOffset, T, NumSub, Limit - 1,Group), + case Group of + false -> + [Keep | dig_subpatterns(Subj,lists:reverse(ESub),Tail)]; + true -> + [[Keep | dig_subpatterns(Subj,lists:reverse(ESub),[])]| + Tail] + end + end. +empty_sub([]) -> + true; +empty_sub([{_,0}|T]) -> + empty_sub(T); +empty_sub(_) -> + false. + +dig_subpatterns(_,[],Acc) -> + Acc; +dig_subpatterns(Subj,[{-1,0}|T],Acc) -> + dig_subpatterns(Subj,T,[<<>>|Acc]); +dig_subpatterns(Subj,[{I,L}|T],Acc) -> + <<_:I/binary,Part:L/binary,_/binary>> = Subj, + dig_subpatterns(Subj,T,[Part|Acc]). + +extend_subpatterns(_,0) -> + []; +extend_subpatterns([],N) -> + [{0,0} | extend_subpatterns([],N-1)]; +extend_subpatterns([H|T],N) -> + [H | extend_subpatterns(T,N-1)]. + +compile_split({re_pattern,N,_,_} = Comp, Options) -> + {Comp,N,Options}; +compile_split(Pat,Options0) when not is_tuple(Pat) -> + Options = lists:filter(fun(O) -> + (not runopt(O)) + end, Options0), + case re:compile(Pat,Options) of + {error,Err} -> + {error,Err}; + {ok, {re_pattern,N,_,_} = Comp} -> + NewOpt = lists:filter(fun(OO) -> (not copt(OO)) end, Options0), + {Comp,N,NewOpt} + end; +compile_split(_,_) -> + throw(badre). + + + + +replace(Subject,RE,Replacement) -> + replace(Subject,RE,Replacement,[]). +replace(Subject,RE,Replacement,Options) -> + try + {NewOpt,Convert,Unicode} = + process_repl_params(Options,iodata,false), + FlatSubject = + case is_binary(Subject) of + true -> + Subject; + false -> + case Unicode of + true -> + unicode:characters_to_binary(Subject,unicode); + false -> + iolist_to_binary(Subject) + end + end, + case do_replace(FlatSubject,Subject,RE,Replacement,NewOpt) of + {error,_Err} -> + throw(badre); + IoList -> + case Convert of + iodata -> + IoList; + binary -> + iolist_to_binary(IoList); + list -> + case Unicode of + false -> + binary_to_list(iolist_to_binary(IoList)); + true -> + unicode:characters_to_list(IoList,unicode) + end + end + end + catch + throw:badopt -> + erlang:error(badarg,[Subject,RE,Replacement,Options]); + throw:badre -> + erlang:error(badarg,[Subject,RE,Replacement,Options]); + error:badarg -> + erlang:error(badarg,[Subject,RE,Replacement,Options]) + end. + + +do_replace(FlatSubject,Subject,RE,Replacement,Options) -> + case re:run(FlatSubject,RE,Options) of + nomatch -> + Subject; + {match,[Mlist|T]} when is_list(Mlist) -> + apply_mlist(FlatSubject,Replacement,[Mlist|T]); + {match,Slist} -> + apply_mlist(FlatSubject,Replacement,[Slist]) + end. + +process_repl_params([],Convert,Unicode) -> + {[],Convert,Unicode}; +process_repl_params([unicode|T],C,_U) -> + {NT,NC,NU} = process_repl_params(T,C,true), + {[unicode|NT],NC,NU}; +process_repl_params([{capture,_,_}|_],_,_) -> + throw(badopt); +process_repl_params([{capture,_}|_],_,_) -> + throw(badopt); +process_repl_params([{return,iodata}|T],_C,U) -> + process_repl_params(T,iodata,U); +process_repl_params([{return,list}|T],_C,U) -> + process_repl_params(T,list,U); +process_repl_params([{return,binary}|T],_C,U) -> + process_repl_params(T,binary,U); +process_repl_params([{return,_}|_],_,_) -> + throw(badopt); +process_repl_params([H|T],C,U) -> + {NT,NC,NU} = process_repl_params(T,C,U), + {[H|NT],NC,NU}. + +process_split_params([],Convert,Unicode,Limit,Strip,Group) -> + {[],Convert,Unicode,Limit,Strip,Group}; +process_split_params([unicode|T],C,_U,L,S,G) -> + {NT,NC,NU,NL,NS,NG} = process_split_params(T,C,true,L,S,G), + {[unicode|NT],NC,NU,NL,NS,NG}; +process_split_params([trim|T],C,U,_L,_S,G) -> + process_split_params(T,C,U,-1,true,G); +process_split_params([{parts,0}|T],C,U,_L,_S,G) -> + process_split_params(T,C,U,-1,true,G); +process_split_params([{parts,N}|T],C,U,_L,_S,G) when is_integer(N), N >= 1 -> + process_split_params(T,C,U,N-1,false,G); +process_split_params([{parts,infinity}|T],C,U,_L,_S,G) -> + process_split_params(T,C,U,-1,false,G); +process_split_params([{parts,_}|_],_,_,_,_,_) -> + throw(badopt); +process_split_params([group|T],C,U,L,S,_G) -> + process_split_params(T,C,U,L,S,true); +process_split_params([global|_],_,_,_,_,_) -> + throw(badopt); +process_split_params([{capture,_,_}|_],_,_,_,_,_) -> + throw(badopt); +process_split_params([{capture,_}|_],_,_,_,_,_) -> + throw(badopt); +process_split_params([{return,iodata}|T],_C,U,L,S,G) -> + process_split_params(T,iodata,U,L,S,G); +process_split_params([{return,list}|T],_C,U,L,S,G) -> + process_split_params(T,list,U,L,S,G); +process_split_params([{return,binary}|T],_C,U,L,S,G) -> + process_split_params(T,binary,U,L,S,G); +process_split_params([{return,_}|_],_,_,_,_,_) -> + throw(badopt); +process_split_params([H|T],C,U,L,S,G) -> + {NT,NC,NU,NL,NS,NG} = process_split_params(T,C,U,L,S,G), + {[H|NT],NC,NU,NL,NS,NG}. + +apply_mlist(Subject,Replacement,Mlist) -> + do_mlist(Subject,Subject,0,precomp_repl(iolist_to_binary(Replacement)), + Mlist). + + +precomp_repl(<<>>) -> + []; +precomp_repl(<<$\\,X,Rest/binary>>) when X < $1 ; X > $9 -> + % Escaped character + case precomp_repl(Rest) of + [BHead | T0] when is_binary(BHead) -> + [<> | T0]; + Other -> + [<> | Other] + end; +precomp_repl(<<$\\,Rest/binary>>) when byte_size(Rest) > 0-> + {NS,NRest} = pick_int(Rest), + [list_to_integer(NS) | precomp_repl(NRest)]; +precomp_repl(<<$&,Rest/binary>>) -> + [0 | precomp_repl(Rest)]; +precomp_repl(<>) -> + case precomp_repl(Rest) of + [BHead | T0] when is_binary(BHead) -> + [<> | T0]; + Other -> + [<> | Other] + end. + + + +pick_int(<>) when X >= $0, X =< $9 -> + {Found,Rest} = pick_int(R), + {[X|Found],Rest}; +pick_int(Bin) -> + {[],Bin}. + +do_mlist(_,<<>>,_,_,[]) -> + []; %Avoid empty binary tail +do_mlist(_,Subject,_,_,[]) -> + Subject; +do_mlist(Whole,Subject,Pos,Repl,[[{MPos,Count} | Sub] | Tail]) + when MPos > Pos -> + EatLength = MPos - Pos, + <> = Subject, + [Untouched | do_mlist(Whole,Rest, MPos, Repl, + [[{MPos,Count} | Sub] | Tail])]; +do_mlist(Whole,Subject,Pos,Repl,[[{MPos,Count} | Sub] | Tail]) + when MPos =:= Pos -> + EatLength = Count, + <<_:EatLength/binary,Rest/binary>> = Subject, + NewData = do_replace(Whole,Repl,[{MPos,Count} | Sub]), + [NewData | do_mlist(Whole,Rest,Pos+EatLength,Repl,Tail)]. + + +do_replace(_,[Bin],_) when is_binary(Bin) -> + Bin; +do_replace(Subject,Repl,SubExprs0) -> + SubExprs = list_to_tuple(SubExprs0), + [ case Part of + N when is_integer(N) -> + if + tuple_size(SubExprs) =< N -> + <<>>; + true -> + {SPos,SLen} = element(N+1,SubExprs), + if + SPos < 0 -> + <<>>; + true -> + <<_:SPos/binary,Res:SLen/binary,_/binary>> = + Subject, + Res + end + end; + Other -> + Other + end || Part <- Repl ]. + + +check_for_unicode({re_pattern,_,1,_},_) -> + true; +check_for_unicode({re_pattern,_,0,_},_) -> + false; +check_for_unicode(_,L) -> + lists:member(unicode,L). + +% SelectReturn = false | all | stirpfirst | none +% ConvertReturn = index | list | binary +% {capture, all} -> all (untouchded) +% {capture, first} -> kept in argumentt list and Select all +% {capture, all_but_first} -> removed from argument list and selects stripfirst +% {capture, none} -> removed from argument list and selects none +% {capture, []} -> removed from argument list and selects none +% {capture,[...]} -> 0 added to selection list and selects stripfirst +% SelectReturn false is same as all in the end. + +% Call as process_parameters([],0,false,index,NeedClean) + +process_parameters([],InitialOffset, SelectReturn, ConvertReturn,_) -> + {[], InitialOffset, SelectReturn, ConvertReturn}; +process_parameters([{offset, N} | T],_Init0,Select0,Return0,CC) -> + process_parameters(T,N,Select0,Return0,CC); +process_parameters([global | T],Init0,Select0,Return0,CC) -> + process_parameters(T,Init0,Select0,Return0,CC); +process_parameters([{capture,Values,Type}|T],Init0,Select0,_Return0,CC) -> + process_parameters([{capture,Values}|T],Init0,Select0,Type,CC); +process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC) -> + % First process the rest to see if capture was already present + {NewTail, Init1, Select1, Return1} = + process_parameters(T,Init0,Select0,Return0,CC), + case Select1 of + false -> + case Values of + all -> + {[{capture,all} | NewTail], Init1, all, Return0}; + first -> + {[{capture,first} | NewTail], Init1, all, Return0}; + all_but_first -> + {[{capture,all} | NewTail], Init1, stripfirst, Return0}; + none -> + {[{capture,first} | NewTail], Init1, none, Return0}; + [] -> + {[{capture,first} | NewTail], Init1, none, Return0}; + List when is_list(List) -> + {[{capture,[0|List]} | NewTail], + Init1, stripfirst, Return0}; + _ -> + throw(badlist) + end; + _ -> + % Found overriding further down list, ignore this one + {NewTail, Init1, Select1, Return1} + end; +process_parameters([H|T],Init0,Select0,Return0,true) -> + case copt(H) of + true -> + process_parameters(T,Init0,Select0,Return0,true); + false -> + {NewT,Init,Select,Return} = + process_parameters(T,Init0,Select0,Return0,true), + {[H|NewT],Init,Select,Return} + end; +process_parameters([H|T],Init0,Select0,Return0,false) -> + {NewT,Init,Select,Return} = + process_parameters(T,Init0,Select0,Return0,false), + {[H|NewT],Init,Select,Return}; +process_parameters(_,_,_,_,_) -> + throw(badlist). + +postprocess({match,[]},_,_,_,_) -> + nomatch; +postprocess({match,_},none,_,_,_) -> + match; +postprocess({match,M},Any,binary,Flat,Uni) -> + binarify(postprocess({match,M},Any,index,Flat,Uni),Flat); +postprocess({match,M},Any,list,Flat,Uni) -> + listify(postprocess({match,M},Any,index,Flat,Uni),Flat,Uni); +postprocess({match,M},all,index,_,_) -> + {match,M}; +postprocess({match,M},false,index,_,_) -> + {match,M}; +postprocess({match,M},stripfirst,index,_,_) -> + {match, [ T || [_|T] <- M ]}. + +binarify({match,M},Flat) -> + {match, [ [ case {I,L} of + {-1,0} -> + <<>>; + {SPos,SLen} -> + <<_:SPos/binary,Res:SLen/binary,_/binary>> = Flat, + Res + end || {I,L} <- One ] || One <- M ]}. +listify({match,M},Flat,Uni) -> + {match, [ [ case {I,L} of + {_,0} -> + []; + {SPos,SLen} -> + case Uni of + true -> + <<_:SPos/binary,Res:SLen/binary,_/binary>> = Flat, + unicode:characters_to_list(Res,unicode); + false -> + Start = SPos + 1, + End = SPos + SLen, + binary_to_list(Flat,Start,End) + end + end || {I,L} <- One ] || One <- M ]}. + +ubinarify({match,M},Flat) -> + {match, [ case {I,L} of + {-1,0} -> + <<>>; + {SPos,SLen} -> + <<_:SPos/binary,Res:SLen/binary,_/binary>> = Flat, + Res + end || {I,L} <- M ]}; +ubinarify(Else,_) -> + Else. +ulistify({match,M},Flat) -> + {match, [ case {I,L} of + {_,0} -> + []; + {SPos,SLen} -> + <<_:SPos/binary,Res:SLen/binary,_/binary>> = Flat, + unicode:characters_to_list(Res,unicode) + end || {I,L} <- M ]}; +ulistify(Else,_) -> + Else. + +process_uparams([global|_T],_RetType) -> + throw(false); +process_uparams([{capture,Values,Type}|T],_OldType) -> + process_uparams([{capture,Values}|T],Type); +process_uparams([H|T],Type) -> + {NL,NType} = process_uparams(T,Type), + {[H|NL],NType}; +process_uparams([],Type) -> + {[],Type}. + + +ucompile(RE,Options) -> + try + re:compile(unicode:characters_to_binary(RE,unicode)) + catch + error:AnyError -> + {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + (catch erlang:error(new_stacktrace, + [RE,Options])), + erlang:raise(error,AnyError,[{Mod,compile,L}|Rest]) + end. + + +urun(Subject,RE,Options) -> + try + urun2(Subject,RE,Options) + catch + error:AnyError -> + {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + (catch erlang:error(new_stacktrace, + [Subject,RE,Options])), + erlang:raise(error,AnyError,[{Mod,run,L}|Rest]) + end. +urun2(Subject0,RE0,Options0) -> + {Options,RetType} = case (catch process_uparams(Options0,index)) of + {A,B} -> + {A,B}; + _ -> + {Options0,false} + end, + Subject = unicode:characters_to_binary(Subject0,unicode), + RE = case RE0 of + BinRE when is_binary(BinRE) -> + BinRE; + {re_pattern,_,_,_} = ReCompiled -> + ReCompiled; + ListRE -> + unicode:characters_to_binary(ListRE,unicode) + end, + Ret = re:run(Subject,RE,Options), + case RetType of + binary -> + ubinarify(Ret,Subject); + list -> + ulistify(Ret,Subject); + _ -> + Ret + end. + + + +%% Might be called either with two-tuple (if regexp was already compiled) +%% or with 3-tuple (saving original RE for exceptions +grun(Subject,RE,{Options,NeedClean}) -> + try + grun2(Subject,RE,{Options,NeedClean}) + catch + error:AnyError -> + {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + (catch erlang:error(new_stacktrace, + [Subject,RE,Options])), + erlang:raise(error,AnyError,[{Mod,run,L}|Rest]) + end; +grun(Subject,RE,{Options,NeedClean,OrigRE}) -> + try + grun2(Subject,RE,{Options,NeedClean}) + catch + error:AnyError -> + {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + (catch erlang:error(new_stacktrace, + [Subject,OrigRE,Options])), + erlang:raise(error,AnyError,[{Mod,run,L}|Rest]) + end. + +grun2(Subject,RE,{Options,NeedClean}) -> + Unicode = check_for_unicode(RE,Options), + FlatSubject = + case is_binary(Subject) of + true -> + Subject; + false -> + case Unicode of + true -> + unicode:characters_to_binary(Subject,unicode); + false -> + iolist_to_binary(Subject) + end + end, + do_grun(FlatSubject,Subject,Unicode,RE,{Options,NeedClean}). + +do_grun(FlatSubject,Subject,Unicode,RE,{Options0,NeedClean}) -> + {StrippedOptions, InitialOffset, + SelectReturn, ConvertReturn} = + case (catch + process_parameters(Options0, 0, false, index, NeedClean)) of + badlist -> + erlang:error(badarg,[Subject,RE,Options0]); + CorrectReturn -> + CorrectReturn + end, + postprocess(loopexec(FlatSubject,RE,InitialOffset, + byte_size(FlatSubject), + Unicode,StrippedOptions), + SelectReturn,ConvertReturn,FlatSubject,Unicode). + +loopexec(_,_,X,Y,_,_) when X > Y -> + {match,[]}; +loopexec(Subject,RE,X,Y,Unicode,Options) -> + case re:run(Subject,RE,[{offset,X}]++Options) of + nomatch -> + {match,[]}; + {match,[{A,B}|More]} -> + {match,Rest} = + case B>0 of + true -> + loopexec(Subject,RE,A+B,Y,Unicode,Options); + false -> + {match,M} = + case re:run(Subject,RE,[{offset,X},notempty, + anchored]++Options) of + nomatch -> + {match,[]}; + {match,Other} -> + {match,Other} + end, + NewA = case M of + [{_,NStep}|_] when NStep > 0 -> + A+NStep; + _ -> + forward(Subject,A,1,Unicode) + end, + {match,MM} = loopexec(Subject,RE,NewA,Y, + Unicode,Options), + case M of + [] -> + {match,MM}; + _ -> + {match,[M | MM]} + end + end, + {match,[[{A,B}|More] | Rest]} + end. + +forward(_Chal,A,0,_) -> + A; +forward(_Chal,A,N,false) -> + A+N; +forward(Chal,A,N,true) -> + <<_:A/binary,Tl/binary>> = Chal, + Forw = case Tl of + <<1:1,1:1,0:1,_:5,_/binary>> -> + 2; + <<1:1,1:1,1:1,0:1,_:4,_/binary>> -> + 3; + <<1:1,1:1,1:1,1:1,0:1,_:3,_/binary>> -> + 4; + _ -> + 1 + end, + forward(Chal,A+Forw,N-1,true). + +copt(caseless) -> + true; +copt(dollar_endonly) -> + true; +copt(dotall) -> + true; +copt(extended) -> + true; +copt(firstline) -> + true; +copt(multiline) -> + true; +copt(no_auto_capture) -> + true; +copt(dupnames) -> + true; +copt(ungreedy) -> + true; +copt(unicode) -> + true; +copt(_) -> + false. + +%bothopt({newline,_}) -> +% true; +%bothopt(anchored) -> +% true; +%bothopt(_) -> +% false. + +runopt(notempty) -> + true; +runopt(notbol) -> + true; +runopt(noteol) -> + true; +runopt({offset,_}) -> + true; +runopt({capture,_,_}) -> + true; +runopt({capture,_}) -> + true; +runopt(global) -> + true; +runopt(_) -> + false. diff --git a/lib/stdlib/src/regexp.erl b/lib/stdlib/src/regexp.erl new file mode 100644 index 0000000000..8f5994bbee --- /dev/null +++ b/lib/stdlib/src/regexp.erl @@ -0,0 +1,490 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(regexp). + +%% This entire module is deprecated and will be removed in a future +%% release. Use the 're' module instead. +%% +%% This module provides a basic set of regular expression functions +%% for strings. The functions provided are taken from AWK. +%% +%% Note that we interpret the syntax tree of a regular expression +%% directly instead of converting it to an NFA and then interpreting +%% that. This method seems to go significantly faster. + +-export([sh_to_awk/1,parse/1,format_error/1,match/2,first_match/2,matches/2]). +-export([sub/3,gsub/3,split/2]). + +-deprecated([sh_to_awk/1,parse/1,format_error/1,match/2,first_match/2,matches/2]). +-deprecated([sub/3,gsub/3,split/2]). + +-import(string, [substr/2,substr/3]). +-import(lists, [reverse/1]). + +%% -type matchres() = {match,Start,Length} | nomatch | {error,E}. +%% -type subres() = {ok,RepString,RepCount} | {error,E}. +%% -type splitres() = {ok,[SubString]} | {error,E}. + +%%-compile([export_all]). + +%% This is the regular expression grammar used. It is equivalent to the +%% one used in AWK, except that we allow ^ $ to be used anywhere and fail +%% in the matching. +%% +%% reg -> reg1 : '$1'. +%% reg1 -> reg1 "|" reg2 : {'or','$1','$2'}. +%% reg1 -> reg2 : '$1'. +%% reg2 -> reg2 reg3 : {concat,'$1','$2'}. +%% reg2 -> reg3 : '$1'. +%% reg3 -> reg3 "*" : {kclosure,'$1'}. +%% reg3 -> reg3 "+" : {pclosure,'$1'}. +%% reg3 -> reg3 "?" : {optional,'$1'}. +%% reg3 -> reg4 : '$1'. +%% reg4 -> "(" reg ")" : '$2'. +%% reg4 -> "\\" char : '$2'. +%% reg4 -> "^" : bos. +%% reg4 -> "$" : eos. +%% reg4 -> "." : char. +%% reg4 -> "[" class "]" : {char_class,char_class('$2')} +%% reg4 -> "[" "^" class "]" : {comp_class,char_class('$3')} +%% reg4 -> "\"" chars "\"" : char_string('$2') +%% reg4 -> char : '$1'. +%% reg4 -> empty : epsilon. +%% The grammar of the current regular expressions. The actual parser +%% is a recursive descent implementation of the grammar. + +reg(S) -> reg1(S). + +%% reg1 -> reg2 reg1' +%% reg1' -> "|" reg2 +%% reg1' -> empty + +reg1(S0) -> + {L,S1} = reg2(S0), + reg1p(S1, L). + +reg1p([$||S0], L) -> + {R,S1} = reg2(S0), + reg1p(S1, {'or',L,R}); +reg1p(S, L) -> {L,S}. + +%% reg2 -> reg3 reg2' +%% reg2' -> reg3 +%% reg2' -> empty + +reg2(S0) -> + {L,S1} = reg3(S0), + reg2p(S1, L). + +reg2p([C|S0], L) when C =/= $|, C =/= $) -> + {R,S1} = reg3([C|S0]), + reg2p(S1, {concat,L,R}); +reg2p(S, L) -> {L,S}. + +%% reg3 -> reg4 reg3' +%% reg3' -> "*" reg3' +%% reg3' -> "+" reg3' +%% reg3' -> "?" reg3' +%% reg3' -> empty + +reg3(S0) -> + {L,S1} = reg4(S0), + reg3p(S1, L). + +reg3p([$*|S], L) -> reg3p(S, {kclosure,L}); +reg3p([$+|S], L) -> reg3p(S, {pclosure,L}); +reg3p([$?|S], L) -> reg3p(S, {optional,L}); +reg3p(S, L) -> {L,S}. + +-define(HEX(C), C >= $0 andalso C =< $9 orelse + C >= $A andalso C =< $F orelse + C >= $a andalso C =< $f). + +reg4([$(|S0]) -> + case reg(S0) of + {R,[$)|S1]} -> {R,S1}; + {_R,_S} -> throw({error,{unterminated,"("}}) + end; +reg4([$\\,O1,O2,O3|S]) when + O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 -> + {(O1*8 + O2)*8 + O3 - 73*$0,S}; +reg4([$\\,$x,H1,H2|S]) when ?HEX(H1), ?HEX(H2) -> + {erlang:list_to_integer([H1,H2], 16),S}; +reg4([$\\,$x,${|S]) -> + hex(S, []); +reg4([$\\,$x|_]) -> + throw({error,{illegal,[$x]}}); +reg4([$\\,C|S]) -> {escape_char(C),S}; +reg4([$\\]) -> throw({error,{unterminated,"\\"}}); +reg4([$^|S]) -> {bos,S}; +reg4([$$|S]) -> {eos,S}; +reg4([$.|S]) -> {{comp_class,"\n"},S}; +reg4("[^" ++ S0) -> + case char_class(S0) of + {Cc,[$]|S1]} -> {{comp_class,Cc},S1}; + {_Cc,_S} -> throw({error,{unterminated,"["}}) + end; +reg4([$[|S0]) -> + case char_class(S0) of + {Cc,[$]|S1]} -> {{char_class,Cc},S1}; + {_Cc,_S1} -> throw({error,{unterminated,"["}}) + end; +%reg4([$"|S0]) -> +% case char_string(S0) of +% {St,[$"|S1]} -> {St,S1}; +% {St,S1} -> throw({error,{unterminated,"\""}}) +% end; +reg4([C|S]) when C =/= $*, C =/= $+, C =/= $?, C =/= $] -> {C,S}; +reg4([C|_S]) -> throw({error,{illegal,[C]}}); +reg4([]) -> {epsilon,[]}. + +hex([C|Cs], L) when ?HEX(C) -> + hex(Cs, [C|L]); +hex([$}|S], L) -> + case catch erlang:list_to_integer(lists:reverse(L), 16) of + V when V =< 16#FF -> + {V,S}; + _ -> + throw({error,{illegal,[$}]}}) + end; +hex(_S, _) -> + throw({error,{unterminated,"\\x{"}}). + +escape_char($n) -> $\n; %\n = LF +escape_char($r) -> $\r; %\r = CR +escape_char($t) -> $\t; %\t = TAB +escape_char($v) -> $\v; %\v = VT +escape_char($b) -> $\b; %\b = BS +escape_char($f) -> $\f; %\f = FF +escape_char($e) -> $\e; %\e = ESC +escape_char($s) -> $\s; %\s = SPACE +escape_char($d) -> $\d; %\d = DEL +escape_char(C) -> C. + +char_class([$]|S]) -> char_class(S, [$]]); +char_class(S) -> char_class(S, []). + +char($\\, [O1,O2,O3|S]) when + O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 -> + {(O1*8 + O2)*8 + O3 - 73*$0,S}; +char($\\, [$x,H1,H2|S]) when ?HEX(H1), ?HEX(H2) -> + {erlang:list_to_integer([H1,H2], 16),S}; +char($\\,[$x,${|S]) -> + hex(S, []); +char($\\,[$x|_]) -> + throw({error,{illegal,[$x]}}); +char($\\, [C|S]) -> {escape_char(C),S}; +char(C, S) -> {C,S}. + +char_class([C1|S0], Cc) when C1 =/= $] -> + case char(C1, S0) of + {Cf,[$-,C2|S1]} when C2 =/= $] -> + case char(C2, S1) of + {Cl,S2} when Cf < Cl -> char_class(S2, [{Cf,Cl}|Cc]); + {Cl,_S2} -> throw({error,{char_class,[Cf,$-,Cl]}}) + end; + {C,S1} -> char_class(S1, [C|Cc]) + end; +char_class(S, Cc) -> {Cc,S}. + +%char_string([C|S]) when C =/= $" -> char_string(S, C); +%char_string(S) -> {epsilon,S}. + +%char_string([C|S0], L) when C =/= $" -> +% char_string(S0, {concat,L,C}); +%char_string(S, L) -> {L,S}. + +%% -deftype re_app_res() = {match,RestPos,Rest} | nomatch. + +%% re_apply(String, StartPos, RegExp) -> re_app_res(). +%% +%% Apply the (parse of the) regular expression RegExp to String. If +%% there is a match return the position of the remaining string and +%% the string if else return 'nomatch'. BestMatch specifies if we want +%% the longest match, or just a match. +%% +%% StartPos should be the real start position as it is used to decide +%% if we ae at the beginning of the string. +%% +%% Pass two functions to re_apply_or so it can decide, on the basis +%% of BestMatch, whether to just any take any match or try both to +%% find the longest. This is slower but saves duplicatng code. + +re_apply(S, St, RE) -> re_apply(RE, [], S, St). + +re_apply(epsilon, More, S, P) -> %This always matches + re_apply_more(More, S, P); +re_apply({'or',RE1,RE2}, More, S, P) -> + re_apply_or(re_apply(RE1, More, S, P), + re_apply(RE2, More, S, P)); +re_apply({concat,RE1,RE2}, More, S0, P) -> + re_apply(RE1, [RE2|More], S0, P); +re_apply({kclosure,CE}, More, S, P) -> + %% Be careful with the recursion, explicitly do one call before + %% looping. + re_apply_or(re_apply_more(More, S, P), + re_apply(CE, [{kclosure,CE}|More], S, P)); +re_apply({pclosure,CE}, More, S, P) -> + re_apply(CE, [{kclosure,CE}|More], S, P); +re_apply({optional,CE}, More, S, P) -> + re_apply_or(re_apply_more(More, S, P), + re_apply(CE, More, S, P)); +re_apply(bos, More, S, 1) -> re_apply_more(More, S, 1); +re_apply(eos, More, [$\n|S], P) -> re_apply_more(More, S, P); +re_apply(eos, More, [], P) -> re_apply_more(More, [], P); +re_apply({char_class,Cc}, More, [C|S], P) -> + case in_char_class(C, Cc) of + true -> re_apply_more(More, S, P+1); + false -> nomatch + end; +re_apply({comp_class,Cc}, More, [C|S], P) -> + case in_char_class(C, Cc) of + true -> nomatch; + false -> re_apply_more(More, S, P+1) + end; +re_apply(C, More, [C|S], P) when is_integer(C) -> + re_apply_more(More, S, P+1); +re_apply(_RE, _More, _S, _P) -> nomatch. + +%% re_apply_more([RegExp], String, Length) -> re_app_res(). + +re_apply_more([RE|More], S, P) -> re_apply(RE, More, S, P); +re_apply_more([], S, P) -> {match,P,S}. + +%% in_char_class(Char, Class) -> bool(). + +in_char_class(C, [{C1,C2}|_Cc]) when C >= C1, C =< C2 -> true; +in_char_class(C, [C|_Cc]) -> true; +in_char_class(C, [_|Cc]) -> in_char_class(C, Cc); +in_char_class(_C, []) -> false. + +%% re_apply_or(Match1, Match2) -> re_app_res(). +%% If we want the best match then choose the longest match, else just +%% choose one by trying sequentially. + +re_apply_or({match,P1,S1}, {match,P2,_S2}) when P1 >= P2 -> {match,P1,S1}; +re_apply_or({match,_P1,_S1}, {match,P2,S2}) -> {match,P2,S2}; +re_apply_or(nomatch, R2) -> R2; +re_apply_or(R1, nomatch) -> R1. + +%% sh_to_awk(ShellRegExp) +%% Convert a sh style regexp into a full AWK one. The main difficulty is +%% getting character sets right as the conventions are different. + +sh_to_awk(Sh) -> "^(" ++ sh_to_awk_1(Sh). %Fix the beginning + +sh_to_awk_1([$*|Sh]) -> %This matches any string + ".*" ++ sh_to_awk_1(Sh); +sh_to_awk_1([$?|Sh]) -> %This matches any character + [$.|sh_to_awk_1(Sh)]; +sh_to_awk_1([$[,$^,$]|Sh]) -> %This takes careful handling + "\\^" ++ sh_to_awk_1(Sh); +sh_to_awk_1("[^" ++ Sh) -> [$[|sh_to_awk_2(Sh, true)]; +sh_to_awk_1("[!" ++ Sh) -> "[^" ++ sh_to_awk_2(Sh, false); +sh_to_awk_1([$[|Sh]) -> [$[|sh_to_awk_2(Sh, false)]; +sh_to_awk_1([C|Sh]) -> + %% Unspecialise everything else which is not an escape character. + case special_char(C) of + true -> [$\\,C|sh_to_awk_1(Sh)]; + false -> [C|sh_to_awk_1(Sh)] + end; +sh_to_awk_1([]) -> ")$". %Fix the end + +sh_to_awk_2([$]|Sh], UpArrow) -> [$]|sh_to_awk_3(Sh, UpArrow)]; +sh_to_awk_2(Sh, UpArrow) -> sh_to_awk_3(Sh, UpArrow). + +sh_to_awk_3([$]|Sh], true) -> "^]" ++ sh_to_awk_1(Sh); +sh_to_awk_3([$]|Sh], false) -> [$]|sh_to_awk_1(Sh)]; +sh_to_awk_3([C|Sh], UpArrow) -> [C|sh_to_awk_3(Sh, UpArrow)]; +sh_to_awk_3([], true) -> [$^|sh_to_awk_1([])]; +sh_to_awk_3([], false) -> sh_to_awk_1([]). + +%% -type special_char(char()) -> bool(). +%% Test if a character is a special character. + +special_char($|) -> true; +special_char($*) -> true; +special_char($+) -> true; +special_char($?) -> true; +special_char($() -> true; +special_char($)) -> true; +special_char($\\) -> true; +special_char($^) -> true; +special_char($$) -> true; +special_char($.) -> true; +special_char($[) -> true; +special_char($]) -> true; +special_char($") -> true; +special_char(_C) -> false. + +%% parse(RegExp) -> {ok,RE} | {error,E}. +%% Parse the regexp described in the string RegExp. + +parse(S) -> + case catch reg(S) of + {R,[]} -> {ok,R}; + {_R,[C|_]} -> {error,{illegal,[C]}}; + {error,E} -> {error,E} + end. + +%% format_error(Error) -> String. + +format_error({illegal,What}) -> ["illegal character `",What,"'"]; +format_error({unterminated,What}) -> ["unterminated `",What,"'"]; +format_error({char_class,What}) -> + ["illegal character class ",io_lib:write_string(What)]. + +%% -type match(String, RegExp) -> matchres(). +%% Find the longest match of RegExp in String. + +match(S, RegExp) when is_list(RegExp) -> + case parse(RegExp) of + {ok,RE} -> match(S, RE); + {error,E} -> {error,E} + end; +match(S, RE) -> + case match(RE, S, 1, 0, -1) of + {Start,Len} when Len >= 0 -> + {match,Start,Len}; + {_Start,_Len} -> nomatch + end. + +match(RE, S, St, Pos, L) -> + case first_match(RE, S, St) of + {St1,L1} -> + Nst = St1 + 1, + if L1 > L -> match(RE, lists:nthtail(Nst-St, S), Nst, St1, L1); + true -> match(RE, lists:nthtail(Nst-St, S), Nst, Pos, L) + end; + nomatch -> {Pos,L} + end. + +%% -type first_match(String, RegExp) -> matchres(). +%% Find the first match of RegExp in String. + +first_match(S, RegExp) when is_list(RegExp) -> + case parse(RegExp) of + {ok,RE} -> first_match(S, RE); + {error,E} -> {error,E} + end; +first_match(S, RE) -> + case first_match(RE, S, 1) of + {Start,Len} when Len >= 0 -> + {match,Start,Len}; + nomatch -> nomatch + end. + +first_match(RE, S, St) when S =/= [] -> + case re_apply(S, St, RE) of + {match,P,_Rest} -> {St,P-St}; + nomatch -> first_match(RE, tl(S), St+1) + end; +first_match(_RE, [], _St) -> nomatch. + +%% -type matches(String, RegExp) -> {match,[{Start,Length}]} | {error,E}. +%% Return the all the non-overlapping matches of RegExp in String. + +matches(S, RegExp) when is_list(RegExp) -> + case parse(RegExp) of + {ok,RE} -> matches(S, RE); + {error,E} -> {error,E} + end; +matches(S, RE) -> + {match,matches(S, RE, 1)}. + +matches(S, RE, St) -> + case first_match(RE, S, St) of + {St1,0} -> [{St1,0}|matches(substr(S, St1+2-St), RE, St1+1)]; + {St1,L1} -> [{St1,L1}|matches(substr(S, St1+L1+1-St), RE, St1+L1)]; + nomatch -> [] + end. + +%% -type sub(String, RegExp, Replace) -> subsres(). +%% Substitute the first match of the regular expression RegExp with +%% the string Replace in String. Accept pre-parsed regular +%% expressions. + +sub(String, RegExp, Rep) when is_list(RegExp) -> + case parse(RegExp) of + {ok,RE} -> sub(String, RE, Rep); + {error,E} -> {error,E} + end; +sub(String, RE, Rep) -> + Ss = sub_match(String, RE, 1), + {ok,sub_repl(Ss, Rep, String, 1),length(Ss)}. + +sub_match(S, RE, St) -> + case first_match(RE, S, St) of + {St1,L1} -> [{St1,L1}]; + nomatch -> [] + end. + +sub_repl([{St,L}|Ss], Rep, S, Pos) -> + Rs = sub_repl(Ss, Rep, S, St+L), + substr(S, Pos, St-Pos) ++ sub_repl(Rep, substr(S, St, L), Rs); +sub_repl([], _Rep, S, Pos) -> substr(S, Pos). + +sub_repl([$&|Rep], M, Rest) -> M ++ sub_repl(Rep, M, Rest); +sub_repl("\\&" ++ Rep, M, Rest) -> [$&|sub_repl(Rep, M, Rest)]; +sub_repl([C|Rep], M, Rest) -> [C|sub_repl(Rep, M, Rest)]; +sub_repl([], _M, Rest) -> Rest. + +%% -type gsub(String, RegExp, Replace) -> subres(). +%% Substitute every match of the regular expression RegExp with the +%% string New in String. Accept pre-parsed regular expressions. + +gsub(String, RegExp, Rep) when is_list(RegExp) -> + case parse(RegExp) of + {ok,RE} -> gsub(String, RE, Rep); + {error,E} -> {error,E} + end; +gsub(String, RE, Rep) -> + Ss = matches(String, RE, 1), + {ok,sub_repl(Ss, Rep, String, 1),length(Ss)}. + +%% -type split(String, RegExp) -> splitres(). +%% Split a string into substrings where the RegExp describes the +%% field seperator. The RegExp " " is specially treated. + +split(String, " ") -> %This is really special + {ok,RE} = parse("[ \t]+"), + case split_apply(String, RE, true) of + [[]|Ss] -> {ok,Ss}; + Ss -> {ok,Ss} + end; +split(String, RegExp) when is_list(RegExp) -> + case parse(RegExp) of + {ok,RE} -> {ok,split_apply(String, RE, false)}; + {error,E} -> {error,E} + end; +split(String, RE) -> {ok,split_apply(String, RE, false)}. + +split_apply(S, RE, Trim) -> split_apply(S, 1, RE, Trim, []). + +split_apply([], _P, _RE, true, []) -> []; +split_apply([], _P, _RE, _T, Sub) -> [reverse(Sub)]; +split_apply(S, P, RE, T, Sub) -> + case re_apply(S, P, RE) of + {match,P,_Rest} -> + split_apply(tl(S), P+1, RE, T, [hd(S)|Sub]); + {match,P1,Rest} -> + [reverse(Sub)|split_apply(Rest, P1, RE, T, [])]; + nomatch -> + split_apply(tl(S), P+1, RE, T, [hd(S)|Sub]) + end. diff --git a/lib/stdlib/src/sets.erl b/lib/stdlib/src/sets.erl new file mode 100644 index 0000000000..bcddca2567 --- /dev/null +++ b/lib/stdlib/src/sets.erl @@ -0,0 +1,417 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2000-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% We use the dynamic hashing techniques by Per-Åke Larsson as +%% described in "The Design and Implementation of Dynamic Hashing for +%% Sets and Tables in Icon" by Griswold and Townsend. Much of the +%% terminology comes from that paper as well. + +%% The segments are all of the same fixed size and we just keep +%% increasing the size of the top tuple as the table grows. At the +%% end of the segments tuple we keep an empty segment which we use +%% when we expand the segments. The segments are expanded by doubling +%% every time n reaches maxn instead of increasing the tuple one +%% element at a time. It is easier and does not seem detrimental to +%% speed. The same applies when contracting the segments. +%% +%% Note that as the order of the keys is undefined we may freely +%% reorder keys within in a bucket. + +-module(sets). + +%% Standard interface. +-export([new/0,is_set/1,size/1,to_list/1,from_list/1]). +-export([is_element/2,add_element/2,del_element/2]). +-export([union/2,union/1,intersection/2,intersection/1]). +-export([is_disjoint/2]). +-export([subtract/2,is_subset/2]). +-export([fold/3,filter/2]). + +%% Note: mk_seg/1 must be changed too if seg_size is changed. +-define(seg_size, 16). +-define(max_seg, 32). +-define(expand_load, 5). +-define(contract_load, 3). +-define(exp_size, ?seg_size * ?expand_load). +-define(con_size, ?seg_size * ?contract_load). + +%%------------------------------------------------------------------------------ + +-type seg() :: tuple(). +-type segs() :: tuple(). + +%% Define a hash set. The default values are the standard ones. +-record(set, + {size=0 :: non_neg_integer(), % Number of elements + n=?seg_size :: non_neg_integer(), % Number of active slots + maxn=?seg_size :: pos_integer(), % Maximum slots + bso=?seg_size div 2 :: non_neg_integer(), % Buddy slot offset + exp_size=?exp_size :: non_neg_integer(), % Size to expand at + con_size=?con_size :: non_neg_integer(), % Size to contract at + empty :: seg(), % Empty segment + segs :: segs() % Segments + }). +%% A declaration equivalent to the following one is hard-coded in erl_types. +%% That declaration contains hard-coded information about the #set{} +%% record and the types of its fields. So, please make sure that any +%% changes to its structure are also propagated to erl_types.erl. +%% +%% -opaque set() :: #set{}. + +%%------------------------------------------------------------------------------ + +%% new() -> Set +-spec new() -> set(). +new() -> + Empty = mk_seg(?seg_size), + #set{empty = Empty, segs = {Empty}}. + +%% is_set(Set) -> boolean(). +%% Return 'true' if Set is a set of elements, else 'false'. +-spec is_set(term()) -> boolean(). +is_set(#set{}) -> true; +is_set(_) -> false. + +%% size(Set) -> int(). +%% Return the number of elements in Set. +-spec size(set()) -> non_neg_integer(). +size(S) -> S#set.size. + +%% to_list(Set) -> [Elem]. +%% Return the elements in Set as a list. +-spec to_list(set()) -> [term()]. +to_list(S) -> + fold(fun (Elem, List) -> [Elem|List] end, [], S). + +%% from_list([Elem]) -> Set. +%% Build a set from the elements in List. +-spec from_list([term()]) -> set(). +from_list(L) -> + lists:foldl(fun (E, S) -> add_element(E, S) end, new(), L). + +%% is_element(Element, Set) -> boolean(). +%% Return 'true' if Element is an element of Set, else 'false'. +-spec is_element(term(), set()) -> boolean(). +is_element(E, S) -> + Slot = get_slot(S, E), + Bkt = get_bucket(S, Slot), + lists:member(E, Bkt). + +%% add_element(Element, Set) -> Set. +%% Return Set with Element inserted in it. +-spec add_element(term(), set()) -> set(). +add_element(E, S0) -> + Slot = get_slot(S0, E), + {S1,Ic} = on_bucket(fun (B0) -> add_bkt_el(E, B0, B0) end, S0, Slot), + maybe_expand(S1, Ic). + +-spec add_bkt_el(T, [T], [T]) -> {[T], 0 | 1}. +add_bkt_el(E, [E|_], Bkt) -> {Bkt,0}; +add_bkt_el(E, [_|B], Bkt) -> + add_bkt_el(E, B, Bkt); +add_bkt_el(E, [], Bkt) -> {[E|Bkt],1}. + +%% del_element(Element, Set) -> Set. +%% Return Set but with Element removed. +-spec del_element(term(), set()) -> set(). +del_element(E, S0) -> + Slot = get_slot(S0, E), + {S1,Dc} = on_bucket(fun (B0) -> del_bkt_el(E, B0) end, S0, Slot), + maybe_contract(S1, Dc). + +-spec del_bkt_el(T, [T]) -> {[T], 0 | 1}. +del_bkt_el(E, [E|Bkt]) -> {Bkt,1}; +del_bkt_el(E, [Other|Bkt0]) -> + {Bkt1,Dc} = del_bkt_el(E, Bkt0), + {[Other|Bkt1],Dc}; +del_bkt_el(_, []) -> {[],0}. + +%% union(Set1, Set2) -> Set +%% Return the union of Set1 and Set2. +-spec union(set(), set()) -> set(). +union(S1, S2) when S1#set.size < S2#set.size -> + fold(fun (E, S) -> add_element(E, S) end, S2, S1); +union(S1, S2) -> + fold(fun (E, S) -> add_element(E, S) end, S1, S2). + +%% union([Set]) -> Set +%% Return the union of the list of sets. +-spec union([set()]) -> set(). +union([S1,S2|Ss]) -> + union1(union(S1, S2), Ss); +union([S]) -> S; +union([]) -> new(). + +-spec union1(set(), [set()]) -> set(). +union1(S1, [S2|Ss]) -> + union1(union(S1, S2), Ss); +union1(S1, []) -> S1. + +%% intersection(Set1, Set2) -> Set. +%% Return the intersection of Set1 and Set2. +-spec intersection(set(), set()) -> set(). +intersection(S1, S2) when S1#set.size < S2#set.size -> + filter(fun (E) -> is_element(E, S2) end, S1); +intersection(S1, S2) -> + filter(fun (E) -> is_element(E, S1) end, S2). + +%% intersection([Set]) -> Set. +%% Return the intersection of the list of sets. +-spec intersection([set(),...]) -> set(). +intersection([S1,S2|Ss]) -> + intersection1(intersection(S1, S2), Ss); +intersection([S]) -> S. + +-spec intersection1(set(), [set()]) -> set(). +intersection1(S1, [S2|Ss]) -> + intersection1(intersection(S1, S2), Ss); +intersection1(S1, []) -> S1. + +%% is_disjoint(Set1, Set2) -> boolean(). +%% Check whether Set1 and Set2 are disjoint. +-spec is_disjoint(set(), set()) -> boolean(). +is_disjoint(S1, S2) when S1#set.size < S2#set.size -> + fold(fun (_, false) -> false; + (E, true) -> not is_element(E, S2) + end, true, S1); +is_disjoint(S1, S2) -> + fold(fun (_, false) -> false; + (E, true) -> not is_element(E, S1) + end, true, S2). + +%% subtract(Set1, Set2) -> Set. +%% Return all and only the elements of Set1 which are not also in +%% Set2. +-spec subtract(set(), set()) -> set(). +subtract(S1, S2) -> + filter(fun (E) -> not is_element(E, S2) end, S1). + +%% is_subset(Set1, Set2) -> boolean(). +%% Return 'true' when every element of Set1 is also a member of +%% Set2, else 'false'. +-spec is_subset(set(), set()) -> boolean(). +is_subset(S1, S2) -> + fold(fun (E, Sub) -> Sub andalso is_element(E, S2) end, true, S1). + +%% fold(Fun, Accumulator, Set) -> Accumulator. +%% Fold function Fun over all elements in Set and return Accumulator. +-spec fold(fun((_,_) -> _), T, set()) -> T. +fold(F, Acc, D) -> fold_set(F, Acc, D). + +%% filter(Fun, Set) -> Set. +%% Filter Set with Fun. +-spec filter(fun((_) -> boolean()), set()) -> set(). +filter(F, D) -> filter_set(F, D). + +%% get_slot(Hashdb, Key) -> Slot. +%% Get the slot. First hash on the new range, if we hit a bucket +%% which has not been split use the unsplit buddy bucket. +-spec get_slot(set(), term()) -> non_neg_integer(). +get_slot(T, Key) -> + H = erlang:phash(Key, T#set.maxn), + if + H > T#set.n -> H - T#set.bso; + true -> H + end. + +%% get_bucket(Hashdb, Slot) -> Bucket. +-spec get_bucket(set(), non_neg_integer()) -> term(). +get_bucket(T, Slot) -> get_bucket_s(T#set.segs, Slot). + +%% on_bucket(Fun, Hashdb, Slot) -> {NewHashDb,Result}. +%% Apply Fun to the bucket in Slot and replace the returned bucket. +-spec on_bucket(fun((_) -> {[_], 0 | 1}), set(), non_neg_integer()) -> + {set(), 0 | 1}. +on_bucket(F, T, Slot) -> + SegI = ((Slot-1) div ?seg_size) + 1, + BktI = ((Slot-1) rem ?seg_size) + 1, + Segs = T#set.segs, + Seg = element(SegI, Segs), + B0 = element(BktI, Seg), + {B1, Res} = F(B0), %Op on the bucket. + {T#set{segs = setelement(SegI, Segs, setelement(BktI, Seg, B1))},Res}. + +%% fold_set(Fun, Acc, Dictionary) -> Dictionary. +%% filter_set(Fun, Dictionary) -> Dictionary. + +%% Work functions for fold and filter operations. These traverse the +%% hash structure rebuilding as necessary. Note we could have +%% implemented map and hash using fold but these should be faster. +%% We hope! + +fold_set(F, Acc, D) when is_function(F, 2) -> + Segs = D#set.segs, + fold_segs(F, Acc, Segs, tuple_size(Segs)). + +fold_segs(F, Acc, Segs, I) when I >= 1 -> + Seg = element(I, Segs), + fold_segs(F, fold_seg(F, Acc, Seg, tuple_size(Seg)), Segs, I-1); +fold_segs(_, Acc, _, _) -> Acc. + +fold_seg(F, Acc, Seg, I) when I >= 1 -> + fold_seg(F, fold_bucket(F, Acc, element(I, Seg)), Seg, I-1); +fold_seg(_, Acc, _, _) -> Acc. + +fold_bucket(F, Acc, [E|Bkt]) -> + fold_bucket(F, F(E, Acc), Bkt); +fold_bucket(_, Acc, []) -> Acc. + +filter_set(F, D) when is_function(F, 1) -> + Segs0 = tuple_to_list(D#set.segs), + {Segs1,Fc} = filter_seg_list(F, Segs0, [], 0), + maybe_contract(D#set{segs = list_to_tuple(Segs1)}, Fc). + +filter_seg_list(F, [Seg|Segs], Fss, Fc0) -> + Bkts0 = tuple_to_list(Seg), + {Bkts1,Fc1} = filter_bkt_list(F, Bkts0, [], Fc0), + filter_seg_list(F, Segs, [list_to_tuple(Bkts1)|Fss], Fc1); +filter_seg_list(_, [], Fss, Fc) -> + {lists:reverse(Fss, []),Fc}. + +filter_bkt_list(F, [Bkt0|Bkts], Fbs, Fc0) -> + {Bkt1,Fc1} = filter_bucket(F, Bkt0, [], Fc0), + filter_bkt_list(F, Bkts, [Bkt1|Fbs], Fc1); +filter_bkt_list(_, [], Fbs, Fc) -> + {lists:reverse(Fbs),Fc}. + +filter_bucket(F, [E|Bkt], Fb, Fc) -> + case F(E) of + true -> filter_bucket(F, Bkt, [E|Fb], Fc); + false -> filter_bucket(F, Bkt, Fb, Fc+1) + end; +filter_bucket(_, [], Fb, Fc) -> {Fb,Fc}. + +%% get_bucket_s(Segments, Slot) -> Bucket. +%% put_bucket_s(Segments, Slot, Bucket) -> NewSegments. + +get_bucket_s(Segs, Slot) -> + SegI = ((Slot-1) div ?seg_size) + 1, + BktI = ((Slot-1) rem ?seg_size) + 1, + element(BktI, element(SegI, Segs)). + +put_bucket_s(Segs, Slot, Bkt) -> + SegI = ((Slot-1) div ?seg_size) + 1, + BktI = ((Slot-1) rem ?seg_size) + 1, + Seg = setelement(BktI, element(SegI, Segs), Bkt), + setelement(SegI, Segs, Seg). + +-spec maybe_expand(set(), 0 | 1) -> set(). +maybe_expand(T0, Ic) when T0#set.size + Ic > T0#set.exp_size -> + T = maybe_expand_segs(T0), %Do we need more segments. + N = T#set.n + 1, %Next slot to expand into + Segs0 = T#set.segs, + Slot1 = N - T#set.bso, + B = get_bucket_s(Segs0, Slot1), + Slot2 = N, + {B1,B2} = rehash(B, Slot1, Slot2, T#set.maxn), + Segs1 = put_bucket_s(Segs0, Slot1, B1), + Segs2 = put_bucket_s(Segs1, Slot2, B2), + T#set{size = T#set.size + Ic, + n = N, + exp_size = N * ?expand_load, + con_size = N * ?contract_load, + segs = Segs2}; +maybe_expand(T, Ic) -> T#set{size = T#set.size + Ic}. + +-spec maybe_expand_segs(set()) -> set(). +maybe_expand_segs(T) when T#set.n =:= T#set.maxn -> + T#set{maxn = 2 * T#set.maxn, + bso = 2 * T#set.bso, + segs = expand_segs(T#set.segs, T#set.empty)}; +maybe_expand_segs(T) -> T. + +-spec maybe_contract(set(), non_neg_integer()) -> set(). +maybe_contract(T, Dc) when T#set.size - Dc < T#set.con_size, + T#set.n > ?seg_size -> + N = T#set.n, + Slot1 = N - T#set.bso, + Segs0 = T#set.segs, + B1 = get_bucket_s(Segs0, Slot1), + Slot2 = N, + B2 = get_bucket_s(Segs0, Slot2), + Segs1 = put_bucket_s(Segs0, Slot1, B1 ++ B2), + Segs2 = put_bucket_s(Segs1, Slot2, []), %Clear the upper bucket + N1 = N - 1, + maybe_contract_segs(T#set{size = T#set.size - Dc, + n = N1, + exp_size = N1 * ?expand_load, + con_size = N1 * ?contract_load, + segs = Segs2}); +maybe_contract(T, Dc) -> T#set{size = T#set.size - Dc}. + +-spec maybe_contract_segs(set()) -> set(). +maybe_contract_segs(T) when T#set.n =:= T#set.bso -> + T#set{maxn = T#set.maxn div 2, + bso = T#set.bso div 2, + segs = contract_segs(T#set.segs)}; +maybe_contract_segs(T) -> T. + +%% rehash(Bucket, Slot1, Slot2, MaxN) -> {Bucket1,Bucket2}. +-spec rehash([T], integer(), pos_integer(), pos_integer()) -> {[T],[T]}. +rehash([E|T], Slot1, Slot2, MaxN) -> + {L1,L2} = rehash(T, Slot1, Slot2, MaxN), + case erlang:phash(E, MaxN) of + Slot1 -> {[E|L1],L2}; + Slot2 -> {L1,[E|L2]} + end; +rehash([], _, _, _) -> {[],[]}. + +%% mk_seg(Size) -> Segment. +-spec mk_seg(16) -> seg(). +mk_seg(16) -> {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}. + +%% expand_segs(Segs, EmptySeg) -> NewSegs. +%% contract_segs(Segs) -> NewSegs. +%% Expand/contract the segment tuple by doubling/halving the number +%% of segments. We special case the powers of 2 upto 32, this should +%% catch most case. N.B. the last element in the segments tuple is +%% an extra element containing a default empty segment. +-spec expand_segs(segs(), seg()) -> segs(). +expand_segs({B1}, Empty) -> + {B1,Empty}; +expand_segs({B1,B2}, Empty) -> + {B1,B2,Empty,Empty}; +expand_segs({B1,B2,B3,B4}, Empty) -> + {B1,B2,B3,B4,Empty,Empty,Empty,Empty}; +expand_segs({B1,B2,B3,B4,B5,B6,B7,B8}, Empty) -> + {B1,B2,B3,B4,B5,B6,B7,B8, + Empty,Empty,Empty,Empty,Empty,Empty,Empty,Empty}; +expand_segs({B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,B14,B15,B16}, Empty) -> + {B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,B14,B15,B16, + Empty,Empty,Empty,Empty,Empty,Empty,Empty,Empty, + Empty,Empty,Empty,Empty,Empty,Empty,Empty,Empty}; +expand_segs(Segs, Empty) -> + list_to_tuple(tuple_to_list(Segs) + ++ lists:duplicate(tuple_size(Segs), Empty)). + +-spec contract_segs(segs()) -> segs(). +contract_segs({B1,_}) -> + {B1}; +contract_segs({B1,B2,_,_}) -> + {B1,B2}; +contract_segs({B1,B2,B3,B4,_,_,_,_}) -> + {B1,B2,B3,B4}; +contract_segs({B1,B2,B3,B4,B5,B6,B7,B8,_,_,_,_,_,_,_,_}) -> + {B1,B2,B3,B4,B5,B6,B7,B8}; +contract_segs({B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,B14,B15,B16, + _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_}) -> + {B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,B14,B15,B16}; +contract_segs(Segs) -> + Ss = tuple_size(Segs) div 2, + list_to_tuple(lists:sublist(tuple_to_list(Segs), 1, Ss)). diff --git a/lib/stdlib/src/shell.erl b/lib/stdlib/src/shell.erl new file mode 100644 index 0000000000..a8d31b4e6b --- /dev/null +++ b/lib/stdlib/src/shell.erl @@ -0,0 +1,1440 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(shell). + +-export([start/0, start/1, start/2, server/1, server/2, history/1, results/1]). +-export([whereis_evaluator/0, whereis_evaluator/1]). +-export([start_restricted/1, stop_restricted/0]). +-export([local_allowed/3, non_local_allowed/3]). + +-define(LINEMAX, 30). +-define(CHAR_MAX, 60). +-define(DEF_HISTORY, 20). +-define(DEF_RESULTS, 20). +-define(DEF_CATCH_EXCEPTION, false). + +-define(RECORDS, shell_records). + +-define(MAXSIZE_HEAPBINARY, 64). + +%% When used as the fallback restricted shell callback module... +local_allowed(q,[],State) -> + {true,State}; +local_allowed(_,_,State) -> + {false,State}. + +non_local_allowed({init,stop},[],State) -> + {true,State}; +non_local_allowed(_,_,State) -> + {false,State}. + +-spec start() -> pid(). + +start() -> + start(false, false). + +start(init) -> + start(false, true); +start(NoCtrlG) -> + start(NoCtrlG, false). + +start(NoCtrlG, StartSync) -> + code:ensure_loaded(user_default), + spawn(fun() -> server(NoCtrlG, StartSync) end). + +%% Find the pid of the current evaluator process. +-spec whereis_evaluator() -> 'undefined' | pid(). + +whereis_evaluator() -> + %% locate top group leader, always registered as user + %% can be implemented by group (normally) or user + %% (if oldshell or noshell) + case whereis(user) of + undefined -> + undefined; + User -> + %% get user_drv pid from group, or shell pid from user + case group:interfaces(User) of + [] -> % old- or noshell + case user:interfaces(User) of + [] -> + undefined; + [{shell,Shell}] -> + whereis_evaluator(Shell) + end; + [{user_drv,UserDrv}] -> + %% get current group pid from user_drv + case user_drv:interfaces(UserDrv) of + [] -> + undefined; + [{current_group,Group}] -> + %% get shell pid from group + GrIfs = group:interfaces(Group), + case lists:keyfind(shell, 1, GrIfs) of + {shell, Shell} -> + whereis_evaluator(Shell); + false -> + undefined + end + end + end + end. + +-spec whereis_evaluator(pid()) -> 'undefined' | pid(). + +whereis_evaluator(Shell) -> + case process_info(Shell, dictionary) of + {dictionary,Dict} -> + case lists:keyfind(evaluator, 1, Dict) of + {_, Eval} when is_pid(Eval) -> + Eval; + _ -> + undefined + end; + _ -> + undefined + end. + +%% Call this function to start a user restricted shell +%% from a normal shell session. +-spec start_restricted(module()) -> {'error', code:load_error_rsn()}. + +start_restricted(RShMod) when is_atom(RShMod) -> + case code:ensure_loaded(RShMod) of + {module,RShMod} -> + application:set_env(stdlib, restricted_shell, RShMod), + exit(restricted_shell_started); + {error,What} = Error -> + error_logger:error_report( + lists:flatten( + io_lib:fwrite( + <<"Restricted shell module ~w not found: ~p\n">>, + [RShMod,What]))), + Error + end. + +-spec stop_restricted() -> no_return(). + +stop_restricted() -> + application:unset_env(stdlib, restricted_shell), + exit(restricted_shell_stopped). + +default_packages() -> + []. +%%% ['erl','erl.lang']. + +default_modules() -> + []. +%%% [{pdict, 'erl.lang.proc.pdict'}, +%%% {keylist, 'erl.lang.list.keylist'}, +%%% {debug, 'erl.system.debug'}]. + +-spec server(boolean(), boolean()) -> 'terminated'. + +server(NoCtrlG, StartSync) -> + put(no_control_g, NoCtrlG), + server(StartSync). + + +%%% The shell should not start until the system is up and running. +%%% We subscribe with init to get a notification of when. + +%%% In older releases we didn't syncronize the shell with init, but let it +%%% start in parallell with other system processes. This was bad since +%%% accessing the shell too early could interfere with the boot procedure. +%%% Still, by means of a flag, we make it possible to start the shell the +%%% old way (for backwards compatibility reasons). This should however not +%%% be used unless for very special reasons necessary. + +-spec server(boolean()) -> 'terminated'. + +server(StartSync) -> + case init:get_argument(async_shell_start) of + {ok,_} -> + ok; % no sync with init + _ when not StartSync -> + ok; + _ -> + case init:notify_when_started(self()) of + started -> + ok; + _ -> + init:wait_until_started() + end + end, + %% Our spawner has fixed the process groups. + Bs0 = erl_eval:new_bindings(), + Bs = lists:foldl(fun ({K, V}, D) -> + erl_eval:add_binding({module,K}, V, D) + end, + lists:foldl(fun (P, D) -> + import_all(P, D) + end, + Bs0, default_packages()), + default_modules()), + %% io:fwrite("Imported modules: ~p.\n", [erl_eval:bindings(Bs)]), + + %% Use an Ets table for record definitions. It takes too long to + %% send a huge term to and from the evaluator. Ets makes it + %% possible to have thousands of record definitions. + RT = ets:new(?RECORDS, [public,ordered_set]), + _ = initiate_records(Bs, RT), + process_flag(trap_exit, true), + + %% Check if we're in user restricted mode. + RShErr = + case application:get_env(stdlib, restricted_shell) of + {ok,RShMod} -> + io:fwrite(<<"Restricted ">>, []), + case code:ensure_loaded(RShMod) of + {module,RShMod} -> + undefined; + {error,What} -> + {RShMod,What} + end; + undefined -> + undefined + end, + + case get(no_control_g) of + true -> + io:fwrite(<<"Eshell V~s\n">>, [erlang:system_info(version)]); + _undefined_or_false -> + io:fwrite(<<"Eshell V~s (abort with ^G)\n">>, + [erlang:system_info(version)]) + end, + erase(no_control_g), + + case RShErr of + undefined -> + ok; + {RShMod2,What2} -> + io:fwrite( + <<"Warning! Restricted shell module ~w not found: ~p.\n" + "Only the commands q() and init:stop() will be allowed!\n">>, + [RShMod2,What2]), + application:set_env(stdlib, restricted_shell, ?MODULE) + end, + + {History,Results} = check_and_get_history_and_results(), + server_loop(0, start_eval(Bs, RT, []), Bs, RT, [], History, Results). + +server_loop(N0, Eval_0, Bs0, RT, Ds0, History0, Results0) -> + N = N0 + 1, + {Res, Eval0} = get_command(prompt(N), Eval_0, Bs0, RT, Ds0), + case Res of + {ok,Es0,_EndLine} -> + case expand_hist(Es0, N) of + {ok,Es} -> + {V,Eval,Bs,Ds} = shell_cmd(Es, Eval0, Bs0, RT, Ds0), + {History,Results} = check_and_get_history_and_results(), + add_cmd(N, Es, V), + HB1 = del_cmd(command, N - History, N - History0, false), + HB = del_cmd(result, N - Results, N - Results0, HB1), + %% The following test makes sure that large binaries + %% (outside of the heap) are garbage collected as soon + %% as possible. + if + HB -> + garb(self()); + true -> + ok + end, + server_loop(N, Eval, Bs, RT, Ds, History, Results); + {error,E} -> + fwrite_severity(benign, <<"~s">>, [E]), + server_loop(N0, Eval0, Bs0, RT, Ds0, History0, Results0) + end; + {error,{Line,Mod,What},_EndLine} -> + fwrite_severity(benign, <<"~w: ~s">>, + [Line, Mod:format_error(What)]), + server_loop(N0, Eval0, Bs0, RT, Ds0, History0, Results0); + {error,terminated} -> %Io process terminated + exit(Eval0, kill), + terminated; + {error,interrupted} -> %Io process interrupted us + exit(Eval0, kill), + {_,Eval,_,_} = shell_rep(Eval0, Bs0, RT, Ds0), + server_loop(N0, Eval, Bs0, RT, Ds0, History0, Results0); + {error,tokens} -> %Most probably unicode > 255 + fwrite_severity(benign, <<"~w: Invalid tokens.">>, + [N]), + server_loop(N0, Eval0, Bs0, RT, Ds0, History0, Results0); + {eof,_EndLine} -> + fwrite_severity(fatal, <<"Terminating erlang (~w)">>, [node()]), + halt(); + eof -> + fwrite_severity(fatal, <<"Terminating erlang (~w)">>, [node()]), + halt() + end. + +get_command(Prompt, Eval, Bs, RT, Ds) -> + Parse = fun() -> exit(io:parse_erl_exprs(Prompt)) end, + Pid = spawn_link(Parse), + get_command1(Pid, Eval, Bs, RT, Ds). + +get_command1(Pid, Eval, Bs, RT, Ds) -> + receive + {'EXIT', Pid, Res} -> + {Res, Eval}; + {'EXIT', Eval, {Reason,Stacktrace}} -> + report_exception(error, {Reason,Stacktrace}, RT), + get_command1(Pid, start_eval(Bs, RT, Ds), Bs, RT, Ds); + {'EXIT', Eval, Reason} -> + report_exception(error, {Reason,[]}, RT), + get_command1(Pid, start_eval(Bs, RT, Ds), Bs, RT, Ds) + end. + +prompt(N) -> + case is_alive() of + true -> io_lib:format(<<"(~s)~w> ">>, [node(), N]); + false -> io_lib:format(<<"~w> ">>, [N]) + end. + +%% expand_hist(Expressions, CommandNumber) +%% Preprocess the expression list replacing all history list commands +%% with their expansions. + +expand_hist(Es, C) -> + catch {ok,expand_exprs(Es, C)}. + +expand_exprs([E|Es], C) -> + [expand_expr(E, C)|expand_exprs(Es, C)]; +expand_exprs([], _C) -> + []. + +expand_expr({cons,L,H,T}, C) -> + {cons,L,expand_expr(H, C),expand_expr(T, C)}; +expand_expr({lc,L,E,Qs}, C) -> + {lc,L,expand_expr(E, C),expand_quals(Qs, C)}; +expand_expr({bc,L,E,Qs}, C) -> + {bc,L,expand_expr(E, C),expand_quals(Qs, C)}; +expand_expr({tuple,L,Elts}, C) -> + {tuple,L,expand_exprs(Elts, C)}; +expand_expr({record_index,L,Name,F}, C) -> + {record_index,L,Name,expand_expr(F, C)}; +expand_expr({record,L,Name,Is}, C) -> + {record,L,Name,expand_fields(Is, C)}; +expand_expr({record_field,L,R,Name,F}, C) -> + {record_field,L,expand_expr(R, C),Name,expand_expr(F, C)}; +expand_expr({record,L,R,Name,Ups}, C) -> + {record,L,expand_expr(R, C),Name,expand_fields(Ups, C)}; +expand_expr({record_field,L,R,F}, C) -> %This is really illegal! + {record_field,L,expand_expr(R, C),expand_expr(F, C)}; +expand_expr({block,L,Es}, C) -> + {block,L,expand_exprs(Es, C)}; +expand_expr({'if',L,Cs}, C) -> + {'if',L,expand_cs(Cs, C)}; +expand_expr({'case',L,E,Cs}, C) -> + {'case',L,expand_expr(E, C),expand_cs(Cs, C)}; +expand_expr({'try',L,Es,Scs,Ccs,As}, C) -> + {'try',L,expand_exprs(Es, C),expand_cs(Scs, C), + expand_cs(Ccs, C),expand_exprs(As, C)}; +expand_expr({'receive',L,Cs}, C) -> + {'receive',L,expand_cs(Cs, C)}; +expand_expr({'receive',L,Cs,To,ToEs}, C) -> + {'receive',L,expand_cs(Cs, C), expand_expr(To, C), expand_exprs(ToEs, C)}; +expand_expr({call,L,{atom,_,e},[N]}, C) -> + case get_cmd(N, C) of + {undefined,_,_} -> + no_command(N); + {[Ce],_V,_CommandN} -> + Ce; + {Ces,_V,_CommandN} when is_list(Ces) -> + {block,L,Ces} + end; +expand_expr({call,_L,{atom,_,v},[N]}, C) -> + case get_cmd(N, C) of + {_,undefined,_} -> + no_command(N); + {Ces,V,CommandN} when is_list(Ces) -> + {value,CommandN,V} + end; +expand_expr({call,L,F,Args}, C) -> + {call,L,expand_expr(F, C),expand_exprs(Args, C)}; +expand_expr({'catch',L,E}, C) -> + {'catch',L,expand_expr(E, C)}; +expand_expr({match,L,Lhs,Rhs}, C) -> + {match,L,Lhs,expand_expr(Rhs, C)}; +expand_expr({op,L,Op,Arg}, C) -> + {op,L,Op,expand_expr(Arg, C)}; +expand_expr({op,L,Op,Larg,Rarg}, C) -> + {op,L,Op,expand_expr(Larg, C),expand_expr(Rarg, C)}; +expand_expr({remote,L,M,F}, C) -> + {remote,L,expand_expr(M, C),expand_expr(F, C)}; +expand_expr({'fun',L,{clauses,Cs}}, C) -> + {'fun',L,{clauses,expand_exprs(Cs, C)}}; +expand_expr({clause,L,H,G,B}, C) -> + %% Could expand H and G, but then erl_eval has to be changed as well. + {clause,L,H, G, expand_exprs(B, C)}; +expand_expr({bin,L,Fs}, C) -> + {bin,L,expand_bin_elements(Fs, C)}; +expand_expr(E, _C) -> % Constants. + E. + +expand_cs([{clause,L,P,G,B}|Cs], C) -> + [{clause,L,P,G,expand_exprs(B, C)}|expand_cs(Cs, C)]; +expand_cs([], _C) -> + []. + +expand_fields([{record_field,L,F,V}|Fs], C) -> + [{record_field,L,expand_expr(F, C),expand_expr(V, C)}| + expand_fields(Fs, C)]; +expand_fields([], _C) -> []. + +expand_quals([{generate,L,P,E}|Qs], C) -> + [{generate,L,P,expand_expr(E, C)}|expand_quals(Qs, C)]; +expand_quals([{b_generate,L,P,E}|Qs], C) -> + [{b_generate,L,P,expand_expr(E, C)}|expand_quals(Qs, C)]; +expand_quals([E|Qs], C) -> + [expand_expr(E, C)|expand_quals(Qs, C)]; +expand_quals([], _C) -> []. + +expand_bin_elements([], _C) -> + []; +expand_bin_elements([{bin_element,L,E,Sz,Ts}|Fs], C) -> + [{bin_element,L,expand_expr(E, C),Sz,Ts}|expand_bin_elements(Fs, C)]. + +no_command(N) -> + throw({error, + io_lib:fwrite(<<"~s: command not found">>, [erl_pp:expr(N)])}). + +%% add_cmd(Number, Expressions, Value) +%% get_cmd(Number, CurrentCommand) +%% del_cmd(Number, NewN, OldN, HasBin0) -> bool() + +add_cmd(N, Es, V) -> + put({command,N}, Es), + put({result,N}, V). + +getc(N) -> + {get({command,N}), get({result,N}), N}. + +get_cmd(Num, C) -> + case catch erl_eval:expr(Num, []) of + {value,N,_} when N < 0 -> getc(C+N); + {value,N,_} -> getc(N); + _Other -> {undefined,undefined,undefined} + end. + +del_cmd(_Type, N, N0, HasBin) when N < N0 -> + HasBin; +del_cmd(Type, N, N0, HasBin0) -> + T = erase({Type,N}), + HasBin = HasBin0 orelse has_binary(T), + del_cmd(Type, N-1, N0, HasBin). + +has_binary(T) -> + try has_bin(T), false + catch true=Thrown -> Thrown + end. + +has_bin(T) when is_tuple(T) -> + has_bin(T, tuple_size(T)); +has_bin([E | Es]) -> + has_bin(E), + has_bin(Es); +has_bin(B) when byte_size(B) > ?MAXSIZE_HEAPBINARY -> + throw(true); +has_bin(T) -> + T. + +has_bin(T, 0) -> + T; +has_bin(T, I) -> + has_bin(element(I, T)), + has_bin(T, I - 1). + +%% shell_cmd(Sequence, Evaluator, Bindings, RecordTable, Dictionary) +%% shell_rep(Evaluator, Bindings, RecordTable, Dictionary) -> +%% {Value,Evaluator,Bindings,Dictionary} +%% Send a command to the evaluator and wait for the reply. Start a new +%% evaluator if necessary. + +shell_cmd(Es, Eval, Bs, RT, Ds) -> + Eval ! {shell_cmd,self(),{eval,Es}}, + shell_rep(Eval, Bs, RT, Ds). + +shell_rep(Ev, Bs0, RT, Ds0) -> + receive + {shell_rep,Ev,{value,V,Bs,Ds}} -> + {V,Ev,Bs,Ds}; + {shell_rep,Ev,{command_error,{Line,M,Error}}} -> + fwrite_severity(benign, <<"~w: ~s">>, + [Line, M:format_error(Error)]), + {{'EXIT',Error},Ev,Bs0,Ds0}; + {shell_req,Ev,get_cmd} -> + Ev ! {shell_rep,self(),get()}, + shell_rep(Ev, Bs0, RT, Ds0); + {shell_req,Ev,exit} -> + Ev ! {shell_rep,self(),exit}, + exit(normal); + {shell_req,Ev,{update_dict,Ds}} -> % Update dictionary + Ev ! {shell_rep,self(),ok}, + shell_rep(Ev, Bs0, RT, Ds); + {ev_exit,{Ev,Class,Reason0}} -> % It has exited unnaturally + receive {'EXIT',Ev,normal} -> ok end, + report_exception(Class, Reason0, RT), + Reason = nocatch(Class, Reason0), + {{'EXIT',Reason},start_eval(Bs0, RT, Ds0), Bs0, Ds0}; + {ev_caught,{Ev,Class,Reason0}} -> % catch_exception is in effect + report_exception(Class, benign, Reason0, RT), + Reason = nocatch(Class, Reason0), + {{'EXIT',Reason},Ev,Bs0,Ds0}; + {'EXIT',_Id,interrupt} -> % Someone interrupted us + exit(Ev, kill), + shell_rep(Ev, Bs0, RT, Ds0); + {'EXIT',Ev,{Reason,Stacktrace}} -> + report_exception(exit, {Reason,Stacktrace}, RT), + {{'EXIT',Reason},start_eval(Bs0, RT, Ds0), Bs0, Ds0}; + {'EXIT',Ev,Reason} -> + report_exception(exit, {Reason,[]}, RT), + {{'EXIT',Reason},start_eval(Bs0, RT, Ds0), Bs0, Ds0}; + {'EXIT',_Id,R} -> + exit(Ev, R), + exit(R); + _Other -> % Ignore everything else + shell_rep(Ev, Bs0, RT, Ds0) + end. + +nocatch(throw, {Term,Stack}) -> + {{nocatch,Term},Stack}; +nocatch(error, Reason) -> + Reason; +nocatch(exit, Reason) -> + Reason. + +report_exception(Class, Reason, RT) -> + report_exception(Class, serious, Reason, RT). + +report_exception(Class, Severity, {Reason,Stacktrace}, RT) -> + Tag = severity_tag(Severity), + I = iolist_size(Tag) + 1, + PF = fun(Term, I1) -> pp(Term, I1, RT) end, + SF = fun(M, _F, _A) -> (M =:= erl_eval) or (M =:= ?MODULE) end, + io:requests([{put_chars, Tag}, + {put_chars, + lib:format_exception(I, Class, Reason, Stacktrace, SF, PF)}, + nl]). + +start_eval(Bs, RT, Ds) -> + Self = self(), + Eval = spawn_link(fun() -> evaluator(Self, Bs, RT, Ds) end), + put(evaluator, Eval), + Eval. + +%% evaluator(Shell, Bindings, RecordTable, ProcessDictionary) +%% Evaluate expressions from the shell. Use the "old" variable bindings +%% and dictionary. + +evaluator(Shell, Bs, RT, Ds) -> + init_dict(Ds), + case application:get_env(stdlib, restricted_shell) of + undefined -> + eval_loop(Shell, Bs, RT); + {ok,RShMod} -> + case get(restricted_shell_state) of + undefined -> put(restricted_shell_state, []); + _ -> ok + end, + put(restricted_expr_state, []), + restricted_eval_loop(Shell, Bs, RT, RShMod) + end. + +eval_loop(Shell, Bs0, RT) -> + receive + {shell_cmd,Shell,{eval,Es}} -> + Ef = {value, + fun(MForFun, As) -> apply_fun(MForFun, As, Shell) end}, + Lf = local_func_handler(Shell, RT, Ef), + Bs = eval_exprs(Es, Shell, Bs0, RT, Lf, Ef), + eval_loop(Shell, Bs, RT) + end. + +restricted_eval_loop(Shell, Bs0, RT, RShMod) -> + receive + {shell_cmd,Shell,{eval,Es}} -> + {LFH,NLFH} = restrict_handlers(RShMod, Shell, RT), + put(restricted_expr_state, []), + Bs = eval_exprs(Es, Shell, Bs0, RT, {eval,LFH}, {value,NLFH}), + restricted_eval_loop(Shell, Bs, RT, RShMod) + end. + +eval_exprs(Es, Shell, Bs0, RT, Lf, Ef) -> + try + {R,Bs2} = exprs(Es, Bs0, RT, Lf, Ef), + Shell ! {shell_rep,self(),R}, + Bs2 + catch + exit:normal -> + exit(normal); + Class:Reason -> + Stacktrace = erlang:get_stacktrace(), + M = {self(),Class,{Reason,Stacktrace}}, + case do_catch(Class, Reason) of + true -> + Shell ! {ev_caught,M}, + Bs0; + false -> + %% We don't want the ERROR REPORT generated by the + %% emulator. Note: exit(kill) needs nothing special. + {links,LPs} = process_info(self(), links), + ER = nocatch(Class, {Reason,Stacktrace}), + lists:foreach(fun(P) -> exit(P, ER) end, LPs--[Shell]), + Shell ! {ev_exit,M}, + exit(normal) + end + end. + +do_catch(exit, restricted_shell_stopped) -> + false; +do_catch(exit, restricted_shell_started) -> + false; +do_catch(_Class, _Reason) -> + case application:get_env(stdlib, shell_catch_exception) of + {ok, true} -> + true; + _ -> + false + end. + +exprs(Es, Bs0, RT, Lf, Ef) -> + exprs(Es, Bs0, RT, Lf, Ef, Bs0). + +exprs([E0|Es], Bs1, RT, Lf, Ef, Bs0) -> + UsedRecords = used_record_defs(E0, RT), + RBs = record_bindings(UsedRecords, Bs1), + case check_command(prep_check([E0]), RBs) of + ok -> + E1 = expand_records(UsedRecords, E0), + {value,V0,Bs2} = expr(E1, Bs1, Lf, Ef), + Bs = orddict:from_list([VV || {X,_}=VV <- erl_eval:bindings(Bs2), + not is_expand_variable(X)]), + if + Es =:= [] -> + VS = pp(V0, 1, RT), + io:requests([{put_chars, VS}, nl]), + %% Don't send the result back if it will be + %% discarded anyway. + V = case result_will_be_saved() of + true -> V0; + false -> ignored + end, + {{value,V,Bs,get()},Bs}; + true -> + exprs(Es, Bs, RT, Lf, Ef, Bs0) + end; + {error,Error} -> + {{command_error,Error},Bs0} + end. + +is_expand_variable(V) -> + case catch atom_to_list(V) of + "rec" ++ _Integer -> true; + _ -> false + end. + +result_will_be_saved() -> + case get_history_and_results() of + {_, 0} -> false; + _ -> true + end. + +used_record_defs(E, RT) -> + %% Be careful to return a list where used records come before + %% records that use them. The linter wants them ordered that way. + UR = case used_records(E, [], RT) of + [] -> + []; + L0 -> + L1 = lists:zip(L0, lists:seq(1, length(L0))), + L2 = lists:keysort(2, lists:ukeysort(1, L1)), + [R || {R, _} <- L2] + end, + record_defs(RT, UR). + +used_records(E, U0, RT) -> + case used_records(E) of + {name,Name,E1} -> + U = used_records(ets:lookup(RT, Name), [Name | U0], RT), + used_records(E1, U, RT); + {expr,[E1 | Es]} -> + used_records(Es, used_records(E1, U0, RT), RT); + _ -> + U0 + end. + +used_records({record_index,_,Name,F}) -> + {name, Name, F}; +used_records({record,_,Name,Is}) -> + {name, Name, Is}; +used_records({record_field,_,R,Name,F}) -> + {name, Name, [R | F]}; +used_records({record,_,R,Name,Ups}) -> + {name, Name, [R | Ups]}; +used_records({record_field,_,R,F}) -> % illegal + {expr, [R | F]}; +used_records({call,_,{atom,_,record},[A,{atom,_,Name}]}) -> + {name, Name, A}; +used_records({call,_,{atom,_,is_record},[A,{atom,_,Name}]}) -> + {name, Name, A}; +used_records({call,_,{remote,_,{atom,_,erlang},{atom,_,is_record}}, + [A,{atom,_,Name}]}) -> + {name, Name, A}; +used_records({call,_,{atom,_,record_info},[A,{atom,_,Name}]}) -> + {name, Name, A}; +used_records({call,Line,{tuple,_,[M,F]},As}) -> + used_records({call,Line,{remote,Line,M,F},As}); +used_records(T) when is_tuple(T) -> + {expr, tuple_to_list(T)}; +used_records(E) -> + {expr, E}. + +fwrite_severity(Severity, S, As) -> + io:fwrite(<<"~s\n">>, [format_severity(Severity, S, As)]). + +format_severity(Severity, S, As) -> + add_severity(Severity, io_lib:fwrite(S, As)). + +add_severity(Severity, S) -> + [severity_tag(Severity), S]. + +severity_tag(fatal) -> <<"*** ">>; +severity_tag(serious) -> <<"** ">>; +severity_tag(benign) -> <<"* ">>. + +restrict_handlers(RShMod, Shell, RT) -> + { fun(F,As,Binds) -> + local_allowed(F, As, RShMod, Binds, Shell, RT) + end, + fun(MF,As) -> + non_local_allowed(MF, As, RShMod, Shell) + end }. + +-define(BAD_RETURN(M, F, V), + try erlang:error(reason) + catch _:_ -> erlang:raise(exit, {restricted_shell_bad_return,V}, + [{M,F,3} | erlang:get_stacktrace()]) + end). + +local_allowed(F, As, RShMod, Bs, Shell, RT) when is_atom(F) -> + {LFH,NLFH} = restrict_handlers(RShMod, Shell, RT), + case not_restricted(F, As) of % Not restricted is the same as builtin. + % variable and record manipulations local + % to the shell process. Those are never + % restricted. + true -> + local_func(F, As, Bs, Shell, RT, {eval,LFH}, {value,NLFH}); + false -> + {AsEv,Bs1} = expr_list(As, Bs, {eval,LFH}, {value,NLFH}), + case RShMod:local_allowed(F, AsEv, {get(restricted_shell_state), + get(restricted_expr_state)}) of + {Result,{RShShSt,RShExprSt}} -> + put(restricted_shell_state, RShShSt), + put(restricted_expr_state, RShExprSt), + if not Result -> + shell_req(Shell, {update_dict,get()}), + exit({restricted_shell_disallowed,{F,AsEv}}); + true -> % This is never a builtin, + % those are handled above. + non_builtin_local_func(F,AsEv,Bs1) + end; + Unexpected -> % The user supplied non conforming module + ?BAD_RETURN(RShMod, local_allowed, Unexpected) + end + end. + +non_local_allowed(MForFun, As, RShMod, Shell) -> + case RShMod:non_local_allowed(MForFun, As, {get(restricted_shell_state), + get(restricted_expr_state)}) of + {Result,{RShShSt,RShExprSt}} -> + put(restricted_shell_state, RShShSt), + put(restricted_expr_state, RShExprSt), + case Result of + false -> + shell_req(Shell, {update_dict,get()}), + exit({restricted_shell_disallowed,{MForFun,As}}); + {redirect, NewMForFun, NewAs} -> + apply_fun(NewMForFun, NewAs, Shell); + _ -> + apply_fun(MForFun, As, Shell) + end; + Unexpected -> % The user supplied non conforming module + ?BAD_RETURN(RShMod, non_local_allowed, Unexpected) + end. + +%% The commands implemented in shell should not be checked if allowed +%% This *has* to correspond to the function local_func/7! +%% (especially true for f/1, the argument must not be evaluated). +not_restricted(f, []) -> + true; +not_restricted(f, [_]) -> + true; +not_restricted(h, []) -> + true; +not_restricted(b, []) -> + true; +not_restricted(which, [_]) -> + true; +not_restricted(import, [_]) -> + true; +not_restricted(import_all, [_]) -> + true; +not_restricted(use, [_]) -> + true; +not_restricted(use_all, [_]) -> + true; +not_restricted(history, [_]) -> + true; +not_restricted(results, [_]) -> + true; +not_restricted(catch_exception, [_]) -> + true; +not_restricted(exit, []) -> + true; +not_restricted(rd, [_,_]) -> + true; +not_restricted(rf, []) -> + true; +not_restricted(rf, [_]) -> + true; +not_restricted(rl, []) -> + true; +not_restricted(rl, [_]) -> + true; +not_restricted(rp, [_]) -> + true; +not_restricted(rr, [_]) -> + true; +not_restricted(rr, [_,_]) -> + true; +not_restricted(rr, [_,_,_]) -> + true; +not_restricted(_, _) -> + false. + +%% When erlang:garbage_collect() is called from the shell, +%% the shell process process that spawned the evaluating +%% process is garbage collected as well. +%% To garbage collect the evaluating process only the command +%% garbage_collect(self()). can be used. +apply_fun({erlang,garbage_collect}, [], Shell) -> + garb(Shell); +apply_fun({M,F}, As, _Shell) -> + apply(M, F, As); +apply_fun(MForFun, As, _Shell) -> + apply(MForFun, As). + +prep_check({call,Line,{atom,_,f},[{var,_,_Name}]}) -> + %% Do not emit a warning for f(V) when V is unbound. + {atom,Line,ok}; +prep_check({value,_CommandN,_Val}) -> + %% erl_lint cannot handle the history expansion {value,_,_}. + {atom,0,ok}; +prep_check(T) when is_tuple(T) -> + list_to_tuple(prep_check(tuple_to_list(T))); +prep_check([E | Es]) -> + [prep_check(E) | prep_check(Es)]; +prep_check(E) -> + E. + +expand_records([], E0) -> + E0; +expand_records(UsedRecords, E0) -> + RecordDefs = [Def || {_Name,Def} <- UsedRecords], + L = 1, + E = prep_rec(E0), + Forms = RecordDefs ++ [{function,L,foo,0,[{clause,L,[],[],[E]}]}], + [{function,L,foo,0,[{clause,L,[],[],[NE]}]}] = + erl_expand_records:module(Forms, [strict_record_tests]), + prep_rec(NE). + +prep_rec({value,_CommandN,_V}=Value) -> + %% erl_expand_records cannot handle the history expansion {value,_,_}. + {atom,Value,ok}; +prep_rec({atom,{value,_CommandN,_V}=Value,ok}) -> + %% Undo the effect of the previous clause... + Value; +prep_rec(T) when is_tuple(T) -> list_to_tuple(prep_rec(tuple_to_list(T))); +prep_rec([E | Es]) -> [prep_rec(E) | prep_rec(Es)]; +prep_rec(E) -> E. + +init_dict([{K,V}|Ds]) -> + put(K, V), + init_dict(Ds); +init_dict([]) -> true. + +%% local_func(Function, Args, Bindings, Shell, RecordTable, +%% LocalFuncHandler, ExternalFuncHandler) -> {value,Val,Bs} +%% Evaluate local functions, including shell commands. +%% +%% Note that the predicate not_restricted/2 has to correspond to what's +%% handled internally - it should return 'true' for all local functions +%% handled in this module (i.e. those that are not eventually handled by +%% non_builtin_local_func/3 (user_default/shell_default). + +local_func(h, [], Bs, Shell, RT, _Lf, _Ef) -> + Cs = shell_req(Shell, get_cmd), + Cs1 = lists:filter(fun({{command, _},_}) -> true; + ({{result, _},_}) -> true; + (_) -> false + end, + Cs), + Cs2 = lists:map(fun({{T, N}, V}) -> {{N, T}, V} end, + Cs1), + Cs3 = lists:keysort(1, Cs2), + {value,list_commands(Cs3, RT),Bs}; +local_func(b, [], Bs, _Shell, RT, _Lf, _Ef) -> + {value,list_bindings(erl_eval:bindings(Bs), RT),Bs}; +local_func(f, [], _Bs, _Shell, _RT, _Lf, _Ef) -> + {value,ok,erl_eval:new_bindings()}; +local_func(f, [{var,_,Name}], Bs, _Shell, _RT, _Lf, _Ef) -> + {value,ok,erl_eval:del_binding(Name, Bs)}; +local_func(f, [_Other], _Bs, _Shell, _RT, _Lf, _Ef) -> + erlang:raise(error, function_clause, [{shell,f,1}]); +local_func(rd, [{atom,_,RecName},RecDef0], Bs, _Shell, RT, _Lf, _Ef) -> + RecDef = expand_value(RecDef0), + RDs = lists:flatten(erl_pp:expr(RecDef)), + Attr = lists:concat(["-record('", RecName, "',", RDs, ")."]), + {ok, Tokens, _} = erl_scan:string(Attr), + case erl_parse:parse_form(Tokens) of + {ok,AttrForm} -> + [RN] = add_records([AttrForm], Bs, RT), + {value,RN,Bs}; + {error,{_Line,M,ErrDesc}} -> + ErrStr = io_lib:fwrite(<<"~s">>, [M:format_error(ErrDesc)]), + exit(lists:flatten(ErrStr)) + end; +local_func(rd, [_,_], _Bs, _Shell, _RT, _Lf, _Ef) -> + erlang:raise(error, function_clause, [{shell,rd,2}]); +local_func(rf, [], Bs, _Shell, RT, _Lf, _Ef) -> + true = ets:delete_all_objects(RT), + {value,initiate_records(Bs, RT),Bs}; +local_func(rf, [A], Bs0, _Shell, RT, Lf, Ef) -> + {[Recs],Bs} = expr_list([A], Bs0, Lf, Ef), + if '_' =:= Recs -> + true = ets:delete_all_objects(RT); + true -> + lists:foreach(fun(Name) -> true = ets:delete(RT, Name) + end, listify(Recs)) + end, + {value,ok,Bs}; +local_func(rl, [], Bs, _Shell, RT, _Lf, _Ef) -> + {value,list_records(ets:tab2list(RT)),Bs}; +local_func(rl, [A], Bs0, _Shell, RT, Lf, Ef) -> + {[Recs],Bs} = expr_list([A], Bs0, Lf, Ef), + {value,list_records(record_defs(RT, listify(Recs))),Bs}; +local_func(rp, [A], Bs0, _Shell, RT, Lf, Ef) -> + {[V],Bs} = expr_list([A], Bs0, Lf, Ef), + W = columns(), + io:requests([{put_chars, + io_lib_pretty:print(V, 1, W, -1, ?CHAR_MAX, + record_print_fun(RT))}, + nl]), + {value,ok,Bs}; +local_func(rr, [A], Bs0, _Shell, RT, Lf, Ef) -> + {[File],Bs} = expr_list([A], Bs0, Lf, Ef), + {value,read_and_add_records(File, '_', [], Bs, RT),Bs}; +local_func(rr, [_,_]=As0, Bs0, _Shell, RT, Lf, Ef) -> + {[File,Sel],Bs} = expr_list(As0, Bs0, Lf, Ef), + {value,read_and_add_records(File, Sel, [], Bs, RT),Bs}; +local_func(rr, [_,_,_]=As0, Bs0, _Shell, RT, Lf, Ef) -> + {[File,Sel,Options],Bs} = expr_list(As0, Bs0, Lf, Ef), + {value,read_and_add_records(File, Sel, Options, Bs, RT),Bs}; +local_func(which, [{atom,_,M}], Bs, _Shell, _RT, _Lf, _Ef) -> + case erl_eval:binding({module,M}, Bs) of + {value, M1} -> + {value,M1,Bs}; + unbound -> + {value,M,Bs} + end; +local_func(which, [_Other], _Bs, _Shell, _RT, _Lf, _Ef) -> + erlang:raise(error, function_clause, [{shell,which,1}]); +local_func(import, [M], Bs, _Shell, _RT, _Lf, _Ef) -> + case erl_parse:package_segments(M) of + error -> erlang:raise(error, function_clause, [{shell,import,1}]); + M1 -> + Mod = packages:concat(M1), + case packages:is_valid(Mod) of + true -> + Key = list_to_atom(packages:last(Mod)), + Mod1 = list_to_atom(Mod), + {value,ok,erl_eval:add_binding({module,Key}, Mod1, Bs)}; + false -> + exit({{bad_module_name, Mod}, [{shell,import,1}]}) + end + end; +local_func(import_all, [P], Bs0, _Shell, _RT, _Lf, _Ef) -> + case erl_parse:package_segments(P) of + error -> erlang:raise(error, function_clause, [{shell,import_all,1}]); + P1 -> + Name = packages:concat(P1), + case packages:is_valid(Name) of + true -> + Bs1 = import_all(Name, Bs0), + {value,ok,Bs1}; + false -> + exit({{bad_package_name, Name}, + [{shell,import_all,1}]}) + end + end; +local_func(use, [M], Bs, Shell, RT, Lf, Ef) -> + local_func(import, [M], Bs, Shell, RT, Lf, Ef); +local_func(use_all, [M], Bs, Shell, RT, Lf, Ef) -> + local_func(import_all, [M], Bs, Shell, RT, Lf, Ef); +local_func(history, [{integer,_,N}], Bs, _Shell, _RT, _Lf, _Ef) -> + {value,history(N),Bs}; +local_func(history, [_Other], _Bs, _Shell, _RT, _Lf, _Ef) -> + erlang:raise(error, function_clause, [{shell,history,1}]); +local_func(results, [{integer,_,N}], Bs, _Shell, _RT, _Lf, _Ef) -> + {value,results(N),Bs}; +local_func(results, [_Other], _Bs, _Shell, _RT, _Lf, _Ef) -> + erlang:raise(error, function_clause, [{shell,results,1}]); +local_func(catch_exception, [{atom,_,Bool}], Bs, _Shell, _RT, _Lf, _Ef) + when Bool; not Bool -> + {value,catch_exception(Bool),Bs}; +local_func(catch_exception, [_Other], _Bs, _Shell, _RT, _Lf, _Ef) -> + erlang:raise(error, function_clause, [{shell,catch_exception,1}]); +local_func(exit, [], _Bs, Shell, _RT, _Lf, _Ef) -> + shell_req(Shell, exit), %This terminates us + exit(normal); +local_func(F, As0, Bs0, _Shell, _RT, Lf, Ef) when is_atom(F) -> + {As,Bs} = expr_list(As0, Bs0, Lf, Ef), + non_builtin_local_func(F,As,Bs). + +non_builtin_local_func(F,As,Bs) -> + case erlang:function_exported(user_default, F, length(As)) of + true -> + {eval,{user_default,F},As,Bs}; + false -> + shell_default(F,As,Bs) + end. + +shell_default(F,As,Bs) -> + M = shell_default, + A = length(As), + case code:ensure_loaded(M) of + {module, _} -> + case erlang:function_exported(M,F,A) of + true -> + {eval,{M,F},As,Bs}; + false -> + shell_undef(F,A) + end; + {error, _} -> + shell_undef(F,A) + end. + +shell_undef(F,A) -> + erlang:error({shell_undef,F,A}). + +local_func_handler(Shell, RT, Ef) -> + H = fun(Lf) -> + fun(F, As, Bs) -> + local_func(F, As, Bs, Shell, RT, {eval,Lf(Lf)}, Ef) + end + end, + {eval,H(H)}. + +record_print_fun(RT) -> + fun(Tag, NoFields) -> + case ets:lookup(RT, Tag) of + [{_,{attribute,_,record,{Tag,Fields}}}] + when length(Fields) =:= NoFields -> + record_fields(Fields); + _ -> + no + end + end. + +record_fields([{record_field,_,{atom,_,Field}} | Fs]) -> + [Field | record_fields(Fs)]; +record_fields([{record_field,_,{atom,_,Field},_} | Fs]) -> + [Field | record_fields(Fs)]; +record_fields([]) -> + []. + +initiate_records(Bs, RT) -> + RNs1 = init_rec(shell_default, Bs, RT), + RNs2 = case code:is_loaded(user_default) of + {file,_File} -> + init_rec(user_default, Bs, RT); + false -> + [] + end, + lists:usort(RNs1 ++ RNs2). + +init_rec(Module, Bs, RT) -> + case read_records(Module, []) of + RAs when is_list(RAs) -> + case catch add_records(RAs, Bs, RT) of + {'EXIT',_} -> + []; + RNs -> + RNs + end; + _Error -> + [] + end. + +read_and_add_records(File, Selected, Options, Bs, RT) -> + case read_records(File, Selected, Options) of + RAs when is_list(RAs) -> + add_records(RAs, Bs, RT); + Error -> + Error + end. + +read_records(File, Selected, Options) -> + case read_records(File, listify(Options)) of + Error when is_tuple(Error) -> + Error; + RAs when Selected =:= '_' -> + RAs; + RAs -> + Sel = listify(Selected), + [RA || {attribute,_,_,{Name,_}}=RA <- RAs, + lists:member(Name, Sel)] + end. + +add_records(RAs, Bs0, RT) -> + Recs = [{Name,D} || {attribute,_,_,{Name,_}}=D <- RAs], + Bs1 = record_bindings(Recs, Bs0), + case check_command([], Bs1) of + {error,{_Line,M,ErrDesc}} -> + %% A source file that has not been compiled. + ErrStr = io_lib:fwrite(<<"~s">>, [M:format_error(ErrDesc)]), + exit(lists:flatten(ErrStr)); + ok -> + true = ets:insert(RT, Recs), + lists:usort([Name || {Name,_} <- Recs]) + end. + +listify(L) when is_list(L) -> + L; +listify(E) -> + [E]. + +check_command(Es, Bs) -> + erl_eval:check_command(Es, strip_bindings(Bs)). + +expr(E, Bs, Lf, Ef) -> + erl_eval:expr(E, strip_bindings(Bs), Lf, Ef). + +expr_list(Es, Bs, Lf, Ef) -> + erl_eval:expr_list(Es, strip_bindings(Bs), Lf, Ef). + +strip_bindings(Bs) -> + Bs -- [B || {{module,_},_}=B <- Bs]. + +%% Note that a sequence number is used here to make sure that if a +%% record is used by another record, then the first record is parsed +%% before the second record. (erl_eval:check_command() calls the +%% linter which needs the records in a proper order.) +record_bindings([], Bs) -> + Bs; +record_bindings(Recs0, Bs0) -> + {Recs1, _} = lists:mapfoldl(fun ({Name,Def}, I) -> {{Name,I,Def},I+1} + end, 0, Recs0), + Recs2 = lists:keysort(2, lists:ukeysort(1, Recs1)), + lists:foldl(fun ({Name,I,Def}, Bs) -> + erl_eval:add_binding({record,I,Name}, Def, Bs) + end, Bs0, Recs2). + +%%% Read record information from file(s) + +read_records(FileOrModule, Opts0) -> + Opts = lists:delete(report_warnings, Opts0), + case find_file(FileOrModule) of + {files,[File]} -> + read_file_records(File, Opts); + {files,Files} -> + lists:flatmap(fun(File) -> + case read_file_records(File, Opts) of + RAs when is_list(RAs) -> RAs; + _ -> [] + end + end, Files); + Error -> + Error + end. + +-include_lib("kernel/include/file.hrl"). + +find_file(Mod) when is_atom(Mod) -> + case code:which(Mod) of + File when is_list(File) -> + {files,[File]}; + preloaded -> + {_M,_Bin,File} = code:get_object_code(Mod), + {files,[File]}; + _Else -> % non_existing, interpreted, cover_compiled + {error,nofile} + end; +find_file(File) -> + case catch filelib:wildcard(File) of + {'EXIT',_} -> + {error,invalid_filename}; + Files -> + {files,Files} + end. + +read_file_records(File, Opts) -> + case filename:extension(File) of + ".beam" -> + case beam_lib:chunks(File, [abstract_code,"CInf"]) of + {ok,{_Mod,[{abstract_code,{Version,Forms}},{"CInf",CB}]}} -> + case record_attrs(Forms) of + [] when Version =:= raw_abstract_v1 -> + []; + [] -> + %% If the version is raw_X, then this test + %% is unnecessary. + try_source(File, CB); + Records -> + Records + end; + {ok,{_Mod,[{abstract_code,no_abstract_code},{"CInf",CB}]}} -> + try_source(File, CB); + Error -> + %% Could be that the "Abst" chunk is missing (pre R6). + Error + end; + _ -> + parse_file(File, Opts) + end. + +%% This is how the debugger searches for source files. See int.erl. +try_source(Beam, CB) -> + Os = case lists:keyfind(options, 1, binary_to_term(CB)) of + false -> []; + {_, Os0} -> Os0 + end, + Src0 = filename:rootname(Beam) ++ ".erl", + case is_file(Src0) of + true -> parse_file(Src0, Os); + false -> + EbinDir = filename:dirname(Beam), + Src = filename:join([filename:dirname(EbinDir), "src", + filename:basename(Src0)]), + case is_file(Src) of + true -> parse_file(Src, Os); + false -> {error, nofile} + end + end. + +is_file(Name) -> + case filelib:is_file(Name) of + true -> + not filelib:is_dir(Name); + false -> + false + end. + +parse_file(File, Opts) -> + Cwd = ".", + Dir = filename:dirname(File), + IncludePath = [Cwd,Dir|inc_paths(Opts)], + case epp:parse_file(File, IncludePath, pre_defs(Opts)) of + {ok,Forms} -> + record_attrs(Forms); + Error -> + Error + end. + +pre_defs([{d,M,V}|Opts]) -> + [{M,V}|pre_defs(Opts)]; +pre_defs([{d,M}|Opts]) -> + [M|pre_defs(Opts)]; +pre_defs([_|Opts]) -> + pre_defs(Opts); +pre_defs([]) -> []. + +inc_paths(Opts) -> + [P || {i,P} <- Opts, is_list(P)]. + +record_attrs(Forms) -> + [A || A = {attribute,_,record,_D} <- Forms]. + +%%% End of reading record information from file(s) + +import_all(P, Bs0) -> + Ms = packages:find_modules(P), + lists:foldl(fun (M, Bs) -> + Key = list_to_atom(M), + M1 = list_to_atom(packages:concat(P, M)), + erl_eval:add_binding({module,Key}, M1, Bs) + end, + Bs0, Ms). + +shell_req(Shell, Req) -> + Shell ! {shell_req,self(),Req}, + receive + {shell_rep,Shell,Rep} -> Rep + end. + +list_commands([{{N,command},Es0}, {{N,result}, V} |Ds], RT) -> + Es = prep_list_commands(Es0), + VS = pp(V, 4, RT), + Ns = io_lib:fwrite(<<"~w: ">>, [N]), + I = iolist_size(Ns), + io:requests([{put_chars, Ns}, + {format,<<"~s\n">>,[erl_pp:exprs(Es, I, none)]}, + {format,<<"-> ">>,[]}, + {put_chars, VS}, + nl]), + list_commands(Ds, RT); +list_commands([{{N,command},Es0} |Ds], RT) -> + Es = prep_list_commands(Es0), + Ns = io_lib:fwrite(<<"~w: ">>, [N]), + I = iolist_size(Ns), + io:requests([{put_chars, Ns}, + {format,<<"~s\n">>,[erl_pp:exprs(Es, I, none)]}]), + list_commands(Ds, RT); +list_commands([_D|Ds], RT) -> + list_commands(Ds, RT); +list_commands([], _RT) -> ok. + +list_bindings([{{module,M},Val}|Bs], RT) -> + io:fwrite(<<"~p is ~p\n">>, [M,Val]), + list_bindings(Bs, RT); +list_bindings([{Name,Val}|Bs], RT) -> + case erl_eval:fun_data(Val) of + {fun_data,_FBs,FCs0} -> + FCs = expand_value(FCs0), % looks nicer + F = {'fun',0,{clauses,FCs}}, + M = {match,0,{var,0,Name},F}, + io:fwrite(<<"~s\n">>, [erl_pp:expr(M)]); + false -> + Namel = io_lib:fwrite(<<"~s = ">>, [Name]), + Nl = iolist_size(Namel)+1, + ValS = pp(Val, Nl, RT), + io:requests([{put_chars, Namel}, + {put_chars, ValS}, + nl]) + end, + list_bindings(Bs, RT); +list_bindings([], _RT) -> + ok. + +list_records(Records) -> + lists:foreach(fun({_Name,Attr}) -> + io:fwrite(<<"~s">>, [erl_pp:attribute(Attr)]) + end, Records). + +record_defs(RT, Names) -> + lists:flatmap(fun(Name) -> ets:lookup(RT, Name) + end, Names). + +expand_value(E) -> + substitute_v1(fun({value,CommandN,V}) -> try_abstract(V, CommandN) + end, E). + +%% There is no abstract representation of funs. +try_abstract(V, CommandN) -> + try erl_parse:abstract(V) + catch _:_ -> {call,0,{atom,0,v},[{integer,0,CommandN}]} + end. + +%% Rather than listing possibly huge results the calls to v/1 are shown. +prep_list_commands(E) -> + substitute_v1(fun({value,CommandN,_V}) -> + {call,0,{atom,0,v},[{integer,0,CommandN}]} + end, E). + +substitute_v1(F, {value,_,_}=Value) -> + F(Value); +substitute_v1(F, T) when is_tuple(T) -> + list_to_tuple(substitute_v1(F, tuple_to_list(T))); +substitute_v1(F, [E | Es]) -> + [substitute_v1(F, E) | substitute_v1(F, Es)]; +substitute_v1(_F, E) -> + E. + +check_and_get_history_and_results() -> + check_env(shell_history_length), + check_env(shell_saved_results), + get_history_and_results(). + +get_history_and_results() -> + History = get_env(shell_history_length, ?DEF_HISTORY), + Results = get_env(shell_saved_results, ?DEF_RESULTS), + {History, erlang:min(Results, History)}. + +pp(V, I, RT) -> + io_lib_pretty:print(V, I, columns(), ?LINEMAX, ?CHAR_MAX, + record_print_fun(RT)). + +columns() -> + case io:columns() of + {ok,N} -> N; + _ -> 80 + end. + +garb(Shell) -> + erlang:garbage_collect(Shell), + catch erlang:garbage_collect(whereis(user)), + catch erlang:garbage_collect(group_leader()), + erlang:garbage_collect(). + +get_env(V, Def) -> + case application:get_env(stdlib, V) of + {ok, Val} when is_integer(Val), Val >= 0 -> + Val; + _ -> + Def + end. + +check_env(V) -> + case application:get_env(stdlib, V) of + undefined -> + ok; + {ok, Val} when is_integer(Val), Val >= 0 -> + ok; + {ok, Val} -> + Txt = io_lib:fwrite( + <<"Invalid value of STDLIB configuration parameter ~p: ~p\n">>, + [V, Val]), + error_logger:info_report(lists:flatten(Txt)) + end. + +set_env(App, Name, Val, Default) -> + Prev = case application:get_env(App, Name) of + undefined -> + Default; + {ok, Old} -> + Old + end, + application_controller:set_env(App, Name, Val), + Prev. + +-spec history(non_neg_integer()) -> non_neg_integer(). + +history(L) when is_integer(L), L >= 0 -> + set_env(stdlib, shell_history_length, L, ?DEF_HISTORY). + +-spec results(non_neg_integer()) -> non_neg_integer(). + +results(L) when is_integer(L), L >= 0 -> + set_env(stdlib, shell_saved_results, L, ?DEF_RESULTS). + +-spec catch_exception(boolean()) -> boolean(). + +catch_exception(Bool) -> + set_env(stdlib, shell_catch_exception, Bool, ?DEF_CATCH_EXCEPTION). diff --git a/lib/stdlib/src/shell_default.erl b/lib/stdlib/src/shell_default.erl new file mode 100644 index 0000000000..670f8cdb44 --- /dev/null +++ b/lib/stdlib/src/shell_default.erl @@ -0,0 +1,131 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% This is just a empty template which calls routines in the module c +%% to do all the work! + +-module(shell_default). + +-export([help/0,lc/1,c/1,c/2,nc/1,nl/1,l/1,i/0,pid/3,i/3,m/0,m/1, + memory/0,memory/1, + erlangrc/1,bi/1, regs/0, flush/0,pwd/0,ls/0,ls/1,cd/1, + y/1, y/2, + xm/1, bt/1, q/0, + ni/0, nregs/0]). + +-export([ih/0,iv/0,im/0,ii/1,ii/2,iq/1,ini/1,ini/2,inq/1,ib/2,ib/3, + ir/2,ir/3,ibd/2,ibe/2,iba/3,ibc/3, + ic/0,ir/1,ir/0,il/0,ipb/0,ipb/1,iaa/1,iaa/2,ist/1,ia/1,ia/2,ia/3, + ia/4,ip/0]). + +-import(io, [format/1]). + +help() -> + format("** shell internal commands **~n"), + format("b() -- display all variable bindings\n"), + format("e(N) -- repeat the expression in query \n"), + format("f() -- forget all variable bindings\n"), + format("f(X) -- forget the binding of variable X\n"), + format("h() -- history\n"), + format("history(N) -- set how many previous commands to keep\n"), + format("results(N) -- set how many previous command results to keep\n"), + format("v(N) -- use the value of query \n"), + format("rd(R,D) -- define a record\n"), + format("rf() -- remove all record information\n"), + format("rf(R) -- remove record information about R\n"), + format("rl() -- display all record information\n"), + format("rl(R) -- display record information about R\n"), + format("rp(Term) -- display Term using the shell's record information\n"), + format("rr(File) -- read record information from File (wildcards allowed)\n"), + format("rr(F,R) -- read selected record information from file(s)\n"), + format("rr(F,R,O) -- read selected record information with options\n"), + format("** commands in module c **\n"), + c:help(), + format("** commands in module i (interpreter interface) **\n"), + format("ih() -- print help for the i module\n"), + %% format("** private commands ** \n"), + %% format("myfunc() -- does my operation ...\n"), + true. + +%% these are in alphabetic order it would be nice if they +%% were to *stay* so! + +bi(I) -> c:bi(I). +bt(Pid) -> c:bt(Pid). +c(File) -> c:c(File). +c(File, Opt) -> c:c(File, Opt). +cd(D) -> c:cd(D). +erlangrc(X) -> c:erlangrc(X). +flush() -> c:flush(). +i() -> c:i(). +i(X,Y,Z) -> c:i(X,Y,Z). +l(Mod) -> c:l(Mod). +lc(X) -> c:lc(X). +ls() -> c:ls(). +ls(S) -> c:ls(S). +m() -> c:m(). +m(Mod) -> c:m(Mod). +memory() -> c:memory(). +memory(Type) -> c:memory(Type). +nc(X) -> c:nc(X). +ni() -> c:ni(). +nl(Mod) -> c:nl(Mod). +nregs() -> c:nregs(). +pid(X,Y,Z) -> c:pid(X,Y,Z). +pwd() -> c:pwd(). +q() -> c:q(). +regs() -> c:regs(). +xm(Mod) -> c:xm(Mod). +y(File) -> c:y(File). +y(File, Opts) -> c:y(File, Opts). + +iaa(Flag) -> calli(iaa, [Flag]). +iaa(Flag,Fnk) -> calli(iaa, [Flag,Fnk]). +ist(Flag) -> calli(ist, [Flag]). +ia(Pid) -> calli(ia, [Pid]). +ia(X,Y,Z) -> calli(ia, [X,Y,Z]). +ia(Pid,Fnk) -> calli(ia, [Pid,Fnk]). +ia(X,Y,Z,Fnk) -> calli(ia, [X,Y,Z,Fnk]). +ib(Mod,Line) -> calli(ib, [Mod,Line]). +ib(Mod,Fnk,Arity) -> calli(ib, [Mod,Fnk,Arity]). +ibd(Mod,Line) -> calli(ibd, [Mod,Line]). +ibe(Mod,Line) -> calli(ibe, [Mod,Line]). +iba(M,L,Action) -> calli(iba, [M,L,Action]). +ibc(M,L,Cond) -> calli(ibc, [M,L,Cond]). +ic() -> calli(ic, []). +ih() -> calli(help, []). +ii(Mod) -> calli(ii, [Mod]). +ii(Mod,Op) -> calli(ii, [Mod,Op]). +il() -> calli(il, []). +im() -> calli(im, []). +ini(Mod) -> calli(ini, [Mod]). +ini(Mod,Op) -> calli(ini, [Mod,Op]). +inq(Mod) -> calli(inq, [Mod]). +ip() -> calli(ip, []). +ipb() -> calli(ipb, []). +ipb(Mod) -> calli(ipb, [Mod]). +iq(Mod) -> calli(iq, [Mod]). +ir(Mod,Line) -> calli(ir, [Mod,Line]). +ir(Mod,Fnk,Arity) -> calli(ir, [Mod,Fnk,Arity]). +ir(Mod) -> calli(ir, [Mod]). +ir() -> calli(ir, []). +iv() -> calli(iv, []). + +calli(F, Args) -> + c:appcall(debugger, i, F, Args). diff --git a/lib/stdlib/src/slave.erl b/lib/stdlib/src/slave.erl new file mode 100644 index 0000000000..196b659938 --- /dev/null +++ b/lib/stdlib/src/slave.erl @@ -0,0 +1,332 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(slave). + +%% If the macro DEBUG is defined during compilation, +%% debug printouts are done through erlang:display/1. +%% Activate this feature by starting the compiler +%% with> erlc -DDEBUG ... +%% or by> setenv ERL_COMPILER_FLAGS DEBUG +%% before running make (in the OTP make system) +%% (the example is for tcsh) + + +-export([pseudo/1, + pseudo/2, + start/1, start/2, start/3, + start/5, + start_link/1, start_link/2, start_link/3, + stop/1, + relay/1]). + +%% Internal exports +-export([wait_for_slave/7, slave_start/1, wait_for_master_to_die/2]). + +-import(error_logger, [error_msg/2]). + + +-ifdef(DEBUG). +-define(dbg(Tag,Data), erlang:display({Tag,Data})). +-else. +-define(dbg(Tag,Data), true). +-endif. + + +%% Start a list of pseudo servers on the local node +pseudo([Master | ServerList]) -> + pseudo(Master , ServerList); +pseudo(_) -> + error_msg("No master node given to slave:pseudo/1~n",[]). + +pseudo(_, []) -> ok; +pseudo(Master, [S|Tail]) -> + start_pseudo(S, whereis(S), Master), + pseudo(Master, Tail). + +start_pseudo(Name, undefined, Master) -> + X = rpc:call(Master,erlang, whereis,[Name]), + register(Name, spawn(slave, relay, [X])); + +start_pseudo(_,_,_) -> ok. %% It's already there + + +%% This relay can be used to relay all messages directed to a process. + +relay({badrpc,Reason}) -> + error_msg(" ** exiting relay server ~w :~w **~n", [self(),Reason]), + exit(Reason); +relay(undefined) -> + error_msg(" ** exiting relay server ~w **~n", [self()]), + exit(undefined); +relay(Pid) when is_pid(Pid) -> + relay1(Pid). + +relay1(Pid) -> + receive + X -> + Pid ! X + end, + relay1(Pid). + +%% start/1,2,3 -- +%% start_link/1,2,3 -- +%% +%% The start/1,2,3 functions are used to start a slave Erlang node. +%% The node on which the start/N functions are used is called the +%% master in the description below. +%% +%% If hostname is the same for the master and the slave, +%% the Erlang node will simply be spawned. The only requirment for +%% this to work is that the 'erl' program can be found in PATH. +%% +%% If the master and slave are on different hosts, start/N uses +%% the 'rsh' program to spawn an Erlang node on the other host. +%% Alternative, if the master was started as +%% 'erl -sname xxx -rsh my_rsh...', then 'my_rsh' will be used instead +%% of 'rsh' (this is useful for systems where the rsh program is named +%% 'remsh'). +%% +%% For this to work, the following conditions must be fulfilled: +%% +%% 1. There must be an Rsh program on computer; if not an error +%% is returned. +%% +%% 2. The hosts must be configured to allowed 'rsh' access without +%% prompts for password. +%% +%% The slave node will have its filer and user server redirected +%% to the master. When the master node dies, the slave node will +%% terminate. For the start_link functions, the slave node will +%% terminate also if the process which called start_link terminates. +%% +%% Returns: {ok, Name@Host} | +%% {error, timeout} | +%% {error, no_rsh} | +%% {error, {already_running, Name@Host}} + +start(Host) -> + L = atom_to_list(node()), + Name = upto($@, L), + start(Host, Name). + +start(Host, Name) -> + start(Host, Name, []). + +start(Host, Name, Args) -> + start(Host, Name, Args, no_link). + +start_link(Host) -> + L = atom_to_list(node()), + Name = upto($@, L), + start_link(Host, Name). + +start_link(Host, Name) -> + start_link(Host, Name, []). + +start_link(Host, Name, Args) -> + start(Host, Name, Args, self()). + +start(Host0, Name, Args, LinkTo) -> + Prog = lib:progname(), + start(Host0, Name, Args, LinkTo, Prog). + +start(Host0, Name, Args, LinkTo, Prog) -> + Host = + case net_kernel:longnames() of + true -> dns(Host0); + false -> strip_host_name(to_list(Host0)); + ignored -> exit(not_alive) + end, + Node = list_to_atom(lists:concat([Name, "@", Host])), + case net_adm:ping(Node) of + pang -> + start_it(Host, Name, Node, Args, LinkTo, Prog); + pong -> + {error, {already_running, Node}} + end. + +%% Stops a running node. + +stop(Node) -> +% io:format("stop(~p)~n", [Node]), + rpc:call(Node, erlang, halt, []), + ok. + +%% Starts a new slave node. + +start_it(Host, Name, Node, Args, LinkTo, Prog) -> + spawn(?MODULE, wait_for_slave, [self(), Host, Name, Node, Args, LinkTo, + Prog]), + receive + {result, Result} -> Result + end. + +%% Waits for the slave to start. + +wait_for_slave(Parent, Host, Name, Node, Args, LinkTo, Prog) -> + Waiter = register_unique_name(0), + case mk_cmd(Host, Name, Args, Waiter, Prog) of + {ok, Cmd} -> +%% io:format("Command: ~s~n", [Cmd]), + open_port({spawn, Cmd}, [stream]), + receive + {SlavePid, slave_started} -> + unregister(Waiter), + slave_started(Parent, LinkTo, SlavePid) + after 32000 -> + %% If it seems that the node was partially started, + %% try to kill it. + Node = list_to_atom(lists:concat([Name, "@", Host])), + case net_adm:ping(Node) of + pong -> + spawn(Node, erlang, halt, []), + ok; + _ -> + ok + end, + Parent ! {result, {error, timeout}} + end; + Other -> + Parent ! {result, Other} + end. + +slave_started(ReplyTo, no_link, Slave) when is_pid(Slave) -> + ReplyTo ! {result, {ok, node(Slave)}}; +slave_started(ReplyTo, Master, Slave) when is_pid(Master), is_pid(Slave) -> + process_flag(trap_exit, true), + link(Master), + link(Slave), + ReplyTo ! {result, {ok, node(Slave)}}, + one_way_link(Master, Slave). + +%% This function simulates a one-way link, so that the slave node +%% will be killed if the master process terminates, but the master +%% process will not be killed if the slave node terminates. + +one_way_link(Master, Slave) -> + receive + {'EXIT', Master, _Reason} -> + unlink(Slave), + Slave ! {nodedown, node()}; + {'EXIT', Slave, _Reason} -> + unlink(Master); + _Other -> + one_way_link(Master, Slave) + end. + +register_unique_name(Number) -> + Name = list_to_atom(lists:concat(["slave_waiter_", Number])), + case catch register(Name, self()) of + true -> + Name; + {'EXIT', {badarg, _}} -> + register_unique_name(Number+1) + end. + +%% Makes up the command to start the nodes. +%% If the node should run on the local host, there is +%% no need to use rsh. + +mk_cmd(Host, Name, Args, Waiter, Prog) -> + BasicCmd = lists:concat([Prog, + " -detached -noinput -master ", node(), + " ", long_or_short(), Name, "@", Host, + " -s slave slave_start ", node(), + " ", Waiter, + " ", Args]), + + case after_char($@, atom_to_list(node())) of + Host -> + {ok, BasicCmd}; + _ -> + case rsh() of + {ok, Rsh} -> + {ok, lists:concat([Rsh, " ", Host, " ", BasicCmd])}; + Other -> + Other + end + end. + +%% Give the user an opportunity to run another program, +%% than the "rsh". On HP-UX rsh is called remsh; thus HP users +%% must start erlang as erl -rsh remsh. +%% +%% Also checks that the given program exists. +%% +%% Returns: {ok, RshPath} | {error, Reason} + +rsh() -> + Rsh = + case init:get_argument(rsh) of + {ok, [[Prog]]} -> Prog; + _ -> "rsh" + end, + case os:find_executable(Rsh) of + false -> {error, no_rsh}; + Path -> {ok, Path} + end. + +long_or_short() -> + case net_kernel:longnames() of + true -> " -name "; + false -> " -sname " + end. + +%% This function will be invoked on the slave, using the -s option of erl. +%% It will wait for the master node to terminate. + +slave_start([Master, Waiter]) -> + ?dbg({?MODULE, slave_start}, [[Master, Waiter]]), + spawn(?MODULE, wait_for_master_to_die, [Master, Waiter]). + +wait_for_master_to_die(Master, Waiter) -> + ?dbg({?MODULE, wait_for_master_to_die}, [Master, Waiter]), + process_flag(trap_exit, true), + monitor_node(Master, true), + {Waiter, Master} ! {self(), slave_started}, + wloop(Master). + +wloop(Master) -> + receive + {nodedown, Master} -> + ?dbg({?MODULE, wloop}, + [[Master], {received, {nodedown, Master}}, halting_node] ), + halt(); + _Other -> + wloop(Master) + end. + +%% Just the short hostname, not the qualified, for convenience. + +strip_host_name([]) -> []; +strip_host_name([$.|_]) -> []; +strip_host_name([H|T]) -> [H|strip_host_name(T)]. + +dns(H) -> {ok, Host} = net_adm:dns_hostname(H), Host. + +to_list(X) when is_list(X) -> X; +to_list(X) when is_atom(X) -> atom_to_list(X). + +upto(_, []) -> []; +upto(Char, [Char|_]) -> []; +upto(Char, [H|T]) -> [H|upto(Char, T)]. + +after_char(_, []) -> []; +after_char(Char, [Char|Rest]) -> Rest; +after_char(Char, [_|Rest]) -> after_char(Char, Rest). diff --git a/lib/stdlib/src/sofs.erl b/lib/stdlib/src/sofs.erl new file mode 100644 index 0000000000..a83f803330 --- /dev/null +++ b/lib/stdlib/src/sofs.erl @@ -0,0 +1,2502 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(sofs). + +-export([from_term/1, from_term/2, from_external/2, empty_set/0, + is_type/1, set/1, set/2, from_sets/1, relation/1, relation/2, + a_function/1, a_function/2, family/1, family/2, + to_external/1, type/1, to_sets/1, no_elements/1, + specification/2, union/2, intersection/2, difference/2, + symdiff/2, symmetric_partition/2, product/1, product/2, + constant_function/2, is_equal/2, is_subset/2, is_sofs_set/1, + is_set/1, is_empty_set/1, is_disjoint/2]). + +-export([union/1, intersection/1, canonical_relation/1]). + +-export([relation_to_family/1, domain/1, range/1, field/1, + relative_product/1, relative_product/2, relative_product1/2, + converse/1, image/2, inverse_image/2, strict_relation/1, + weak_relation/1, extension/3, is_a_function/1]). + +-export([composite/2, inverse/1]). + +-export([restriction/2, restriction/3, drestriction/2, drestriction/3, + substitution/2, projection/2, partition/1, partition/2, + partition/3, multiple_relative_product/2, join/4]). + +-export([family_to_relation/1, family_specification/2, + union_of_family/1, intersection_of_family/1, + family_union/1, family_intersection/1, + family_domain/1, family_range/1, family_field/1, + family_union/2, family_intersection/2, family_difference/2, + partition_family/2, family_projection/2]). + +-export([family_to_digraph/1, family_to_digraph/2, + digraph_to_family/1, digraph_to_family/2]). + +%% Shorter names of some functions. +-export([fam2rel/1, rel2fam/1]). + +-import(lists, + [any/2, append/1, flatten/1, foreach/2, + keysort/2, last/1, map/2, mapfoldl/3, member/2, merge/2, + reverse/1, reverse/2, sort/1, umerge/1, umerge/2, usort/1]). + +-compile({inline, [{family_to_relation,1}, {relation_to_family,1}]}). + +-compile({inline, [{rel,2},{a_func,2},{fam,2},{term2set,2}]}). + +-compile({inline, [{external_fun,1},{element_type,1}]}). + +-compile({inline, + [{unify_types,2}, {match_types,2}, + {test_rel,3}, {symdiff,3}, + {subst,3}]}). + +-compile({inline, [{fam_binop,3}]}). + +%% Nope, no is_member, del_member or add_member. +%% +%% See also "Naive Set Theory" by Paul R. Halmos. +%% +%% By convention, erlang:error/2 is called from exported functions. + +-define(TAG, 'Set'). +-define(ORDTAG, 'OrdSet'). + +-record(?TAG, {data = [], type = type}). +-record(?ORDTAG, {orddata = {}, ordtype = type}). + +-define(LIST(S), (S)#?TAG.data). +-define(TYPE(S), (S)#?TAG.type). +%%-define(SET(L, T), +%% case is_type(T) of +%% true -> #?TAG{data = L, type = T}; +%% false -> erlang:error(badtype, [T]) +%% end +%% ). +-define(SET(L, T), #?TAG{data = L, type = T}). +-define(IS_SET(S), is_record(S, ?TAG)). +-define(IS_UNTYPED_SET(S), ?TYPE(S) =:= ?ANYTYPE). + +%% Ordered sets and atoms: +-define(ORDDATA(S), (S)#?ORDTAG.orddata). +-define(ORDTYPE(S), (S)#?ORDTAG.ordtype). +-define(ORDSET(L, T), #?ORDTAG{orddata = L, ordtype = T}). +-define(IS_ORDSET(S), is_record(S, ?ORDTAG)). +-define(ATOM_TYPE, atom). +-define(IS_ATOM_TYPE(T), is_atom(T)). % true for ?ANYTYPE... + +%% When IS_SET is true: +-define(ANYTYPE, '_'). +-define(BINREL(X, Y), {X, Y}). +-define(IS_RELATION(R), is_tuple(R)). +-define(REL_ARITY(R), tuple_size(R)). +-define(REL_TYPE(I, R), element(I, R)). +-define(SET_OF(X), [X]). +-define(IS_SET_OF(X), is_list(X)). +-define(FAMILY(X, Y), ?BINREL(X, ?SET_OF(Y))). + +%% +%% Exported functions +%% + +%%% +%%% Create sets +%%% + +from_term(T) -> + Type = case T of + _ when is_list(T) -> [?ANYTYPE]; + _ -> ?ANYTYPE + end, + case catch setify(T, Type) of + {'EXIT', _} -> + erlang:error(badarg, [T]); + Set -> + Set + end. + +from_term(L, T) -> + case is_type(T) of + true -> + case catch setify(L, T) of + {'EXIT', _} -> + erlang:error(badarg, [L, T]); + Set -> + Set + end; + false -> + erlang:error(badarg, [L, T]) + end. + +from_external(L, ?SET_OF(Type)) -> + ?SET(L, Type); +from_external(T, Type) -> + ?ORDSET(T, Type). + +empty_set() -> + ?SET([], ?ANYTYPE). + +is_type(Atom) when ?IS_ATOM_TYPE(Atom), Atom =/= ?ANYTYPE -> + true; +is_type(?SET_OF(T)) -> + is_element_type(T); +is_type(T) when tuple_size(T) > 0 -> + is_types(tuple_size(T), T); +is_type(_T) -> + false. + +set(L) -> + case catch usort(L) of + {'EXIT', _} -> + erlang:error(badarg, [L]); + SL -> + ?SET(SL, ?ATOM_TYPE) + end. + +set(L, ?SET_OF(Type) = T) when ?IS_ATOM_TYPE(Type), Type =/= ?ANYTYPE -> + case catch usort(L) of + {'EXIT', _} -> + erlang:error(badarg, [L, T]); + SL -> + ?SET(SL, Type) + end; +set(L, ?SET_OF(_) = T) -> + case catch setify(L, T) of + {'EXIT', _} -> + erlang:error(badarg, [L, T]); + Set -> + Set + end; +set(L, T) -> + erlang:error(badarg, [L, T]). + +from_sets(Ss) when is_list(Ss) -> + case set_of_sets(Ss, [], ?ANYTYPE) of + {error, Error} -> + erlang:error(Error, [Ss]); + Set -> + Set + end; +from_sets(Tuple) when is_tuple(Tuple) -> + case ordset_of_sets(tuple_to_list(Tuple), [], []) of + error -> + erlang:error(badarg, [Tuple]); + Set -> + Set + end; +from_sets(T) -> + erlang:error(badarg, [T]). + +relation([]) -> + ?SET([], ?BINREL(?ATOM_TYPE, ?ATOM_TYPE)); +relation(Ts = [T | _]) when is_tuple(T) -> + case catch rel(Ts, tuple_size(T)) of + {'EXIT', _} -> + erlang:error(badarg, [Ts]); + Set -> + Set + end; +relation(E) -> + erlang:error(badarg, [E]). + +relation(Ts, TS) -> + case catch rel(Ts, TS) of + {'EXIT', _} -> + erlang:error(badarg, [Ts, TS]); + Set -> + Set + end. + +a_function(Ts) -> + case catch func(Ts, ?BINREL(?ATOM_TYPE, ?ATOM_TYPE)) of + {'EXIT', _} -> + erlang:error(badarg, [Ts]); + Bad when is_atom(Bad) -> + erlang:error(Bad, [Ts]); + Set -> + Set + end. + +a_function(Ts, T) -> + case catch a_func(Ts, T) of + {'EXIT', _} -> + erlang:error(badarg, [Ts, T]); + Bad when is_atom(Bad) -> + erlang:error(Bad, [Ts, T]); + Set -> + Set + end. + +family(Ts) -> + case catch fam2(Ts, ?FAMILY(?ATOM_TYPE, ?ATOM_TYPE)) of + {'EXIT', _} -> + erlang:error(badarg, [Ts]); + Bad when is_atom(Bad) -> + erlang:error(Bad, [Ts]); + Set -> + Set + end. + +family(Ts, T) -> + case catch fam(Ts, T) of + {'EXIT', _} -> + erlang:error(badarg, [Ts, T]); + Bad when is_atom(Bad) -> + erlang:error(Bad, [Ts, T]); + Set -> + Set + end. + +%%% +%%% Functions on sets. +%%% + +to_external(S) when ?IS_SET(S) -> + ?LIST(S); +to_external(S) when ?IS_ORDSET(S) -> + ?ORDDATA(S). + +type(S) when ?IS_SET(S) -> + ?SET_OF(?TYPE(S)); +type(S) when ?IS_ORDSET(S) -> + ?ORDTYPE(S). + +to_sets(S) when ?IS_SET(S) -> + case ?TYPE(S) of + ?SET_OF(Type) -> list_of_sets(?LIST(S), Type, []); + Type -> list_of_ordsets(?LIST(S), Type, []) + end; +to_sets(S) when ?IS_ORDSET(S), is_tuple(?ORDTYPE(S)) -> + tuple_of_sets(tuple_to_list(?ORDDATA(S)), tuple_to_list(?ORDTYPE(S)), []); +to_sets(S) when ?IS_ORDSET(S) -> + erlang:error(badarg, [S]). + +no_elements(S) when ?IS_SET(S) -> + length(?LIST(S)); +no_elements(S) when ?IS_ORDSET(S), is_tuple(?ORDTYPE(S)) -> + tuple_size(?ORDDATA(S)); +no_elements(S) when ?IS_ORDSET(S) -> + erlang:error(badarg, [S]). + +specification(Fun, S) when ?IS_SET(S) -> + Type = ?TYPE(S), + R = case external_fun(Fun) of + false -> + spec(?LIST(S), Fun, element_type(Type), []); + XFun -> + specification(?LIST(S), XFun, []) + end, + case R of + SL when is_list(SL) -> + ?SET(SL, Type); + Bad -> + erlang:error(Bad, [Fun, S]) + end. + +union(S1, S2) when ?IS_SET(S1), ?IS_SET(S2) -> + case unify_types(?TYPE(S1), ?TYPE(S2)) of + [] -> erlang:error(type_mismatch, [S1, S2]); + Type -> ?SET(umerge(?LIST(S1), ?LIST(S2)), Type) + end. + +intersection(S1, S2) when ?IS_SET(S1), ?IS_SET(S2) -> + case unify_types(?TYPE(S1), ?TYPE(S2)) of + [] -> erlang:error(type_mismatch, [S1, S2]); + Type -> ?SET(intersection(?LIST(S1), ?LIST(S2), []), Type) + end. + +difference(S1, S2) when ?IS_SET(S1), ?IS_SET(S2) -> + case unify_types(?TYPE(S1), ?TYPE(S2)) of + [] -> erlang:error(type_mismatch, [S1, S2]); + Type -> ?SET(difference(?LIST(S1), ?LIST(S2), []), Type) + end. + +symdiff(S1, S2) when ?IS_SET(S1), ?IS_SET(S2) -> + case unify_types(?TYPE(S1), ?TYPE(S2)) of + [] -> erlang:error(type_mismatch, [S1, S2]); + Type -> ?SET(symdiff(?LIST(S1), ?LIST(S2), []), Type) + end. + +symmetric_partition(S1, S2) when ?IS_SET(S1), ?IS_SET(S2) -> + case unify_types(?TYPE(S1), ?TYPE(S2)) of + [] -> erlang:error(type_mismatch, [S1, S2]); + Type -> sympart(?LIST(S1), ?LIST(S2), [], [], [], Type) + end. + +product(S1, S2) when ?IS_SET(S1), ?IS_SET(S2) -> + if + ?TYPE(S1) =:= ?ANYTYPE -> S1; + ?TYPE(S2) =:= ?ANYTYPE -> S2; + true -> + F = fun(E) -> {0, E} end, + T = ?BINREL(?TYPE(S1), ?TYPE(S2)), + ?SET(relprod(map(F, ?LIST(S1)), map(F, ?LIST(S2))), T) + end. + +product({S1, S2}) -> + product(S1, S2); +product(T) when is_tuple(T) -> + Ss = tuple_to_list(T), + case catch sets_to_list(Ss) of + {'EXIT', _} -> + erlang:error(badarg, [T]); + [] -> + erlang:error(badarg, [T]); + L -> + Type = types(Ss, []), + case member([], L) of + true -> + empty_set(); + false -> + ?SET(reverse(prod(L, [], [])), Type) + end + end. + +constant_function(S, E) when ?IS_SET(S) -> + case {?TYPE(S), is_sofs_set(E)} of + {?ANYTYPE, true} -> S; + {Type, true} -> + NType = ?BINREL(Type, type(E)), + ?SET(constant_function(?LIST(S), to_external(E), []), NType); + _ -> erlang:error(badarg, [S, E]) + end; +constant_function(S, E) when ?IS_ORDSET(S) -> + erlang:error(badarg, [S, E]). + +is_equal(S1, S2) when ?IS_SET(S1), ?IS_SET(S2) -> + case match_types(?TYPE(S1), ?TYPE(S2)) of + true -> ?LIST(S1) == ?LIST(S2); + false -> erlang:error(type_mismatch, [S1, S2]) + end; +is_equal(S1, S2) when ?IS_ORDSET(S1), ?IS_ORDSET(S2) -> + case match_types(?ORDTYPE(S1), ?ORDTYPE(S2)) of + true -> ?ORDDATA(S1) == ?ORDDATA(S2); + false -> erlang:error(type_mismatch, [S1, S2]) + end; +is_equal(S1, S2) when ?IS_SET(S1), ?IS_ORDSET(S2) -> + erlang:error(type_mismatch, [S1, S2]); +is_equal(S1, S2) when ?IS_ORDSET(S1), ?IS_SET(S2) -> + erlang:error(type_mismatch, [S1, S2]). + +is_subset(S1, S2) when ?IS_SET(S1), ?IS_SET(S2) -> + case match_types(?TYPE(S1), ?TYPE(S2)) of + true -> subset(?LIST(S1), ?LIST(S2)); + false -> erlang:error(type_mismatch, [S1, S2]) + end. + +is_sofs_set(S) when ?IS_SET(S) -> + true; +is_sofs_set(S) when ?IS_ORDSET(S) -> + true; +is_sofs_set(_S) -> + false. + +is_set(S) when ?IS_SET(S) -> + true; +is_set(S) when ?IS_ORDSET(S) -> + false. + +is_empty_set(S) when ?IS_SET(S) -> + ?LIST(S) =:= []; +is_empty_set(S) when ?IS_ORDSET(S) -> + false. + +is_disjoint(S1, S2) when ?IS_SET(S1), ?IS_SET(S2) -> + case match_types(?TYPE(S1), ?TYPE(S2)) of + true -> + case ?LIST(S1) of + [] -> true; + [A | As] -> disjoint(?LIST(S2), A, As) + end; + false -> erlang:error(type_mismatch, [S1, S2]) + end. + +%%% +%%% Functions on set-of-sets. +%%% + +union(Sets) when ?IS_SET(Sets) -> + case ?TYPE(Sets) of + ?SET_OF(Type) -> ?SET(lunion(?LIST(Sets)), Type); + ?ANYTYPE -> Sets; + _ -> erlang:error(badarg, [Sets]) + end. + +intersection(Sets) when ?IS_SET(Sets) -> + case ?LIST(Sets) of + [] -> erlang:error(badarg, [Sets]); + [L | Ls] -> + case ?TYPE(Sets) of + ?SET_OF(Type) -> + ?SET(lintersection(Ls, L), Type); + _ -> erlang:error(badarg, [Sets]) + end + end. + +canonical_relation(Sets) when ?IS_SET(Sets) -> + ST = ?TYPE(Sets), + case ST of + ?SET_OF(?ANYTYPE) -> empty_set(); + ?SET_OF(Type) -> + ?SET(can_rel(?LIST(Sets), []), ?BINREL(Type, ST)); + ?ANYTYPE -> Sets; + _ -> erlang:error(badarg, [Sets]) + end. + +%%% +%%% Functions on binary relations only. +%%% + +rel2fam(R) -> + relation_to_family(R). + +%% Inlined. +relation_to_family(R) when ?IS_SET(R) -> + case ?TYPE(R) of + ?BINREL(DT, RT) -> + ?SET(rel2family(?LIST(R)), ?FAMILY(DT, RT)); + ?ANYTYPE -> R; + _Else -> erlang:error(badarg, [R]) + end. + +domain(R) when ?IS_SET(R) -> + case ?TYPE(R) of + ?BINREL(DT, _) -> ?SET(dom(?LIST(R)), DT); + ?ANYTYPE -> R; + _Else -> erlang:error(badarg, [R]) + end. + +range(R) when ?IS_SET(R) -> + case ?TYPE(R) of + ?BINREL(_, RT) -> ?SET(ran(?LIST(R), []), RT); + ?ANYTYPE -> R; + _ -> erlang:error(badarg, [R]) + end. + +%% In "Introduction to LOGIC", Suppes defines the field of a binary +%% relation to be the union of the domain and the range (or +%% counterdomain). +field(R) -> + union(domain(R), range(R)). + +relative_product(RT) when is_tuple(RT) -> + case relprod_n(RT, foo, false, false) of + {error, Reason} -> + erlang:error(Reason, [RT]); + Reply -> + Reply + end. + +relative_product(R1, R2) when ?IS_SET(R1), ?IS_SET(R2) -> + relative_product1(converse(R1), R2); +relative_product(RT, R) when is_tuple(RT), ?IS_SET(R) -> + EmptyR = case ?TYPE(R) of + ?BINREL(_, _) -> ?LIST(R) =:= []; + ?ANYTYPE -> true; + _ -> erlang:error(badarg, [RT, R]) + end, + case relprod_n(RT, R, EmptyR, true) of + {error, Reason} -> + erlang:error(Reason, [RT, R]); + Reply -> + Reply + end. + +relative_product1(R1, R2) when ?IS_SET(R1), ?IS_SET(R2) -> + {DTR1, RTR1} = case ?TYPE(R1) of + ?BINREL(_, _) = R1T -> R1T; + ?ANYTYPE -> {?ANYTYPE, ?ANYTYPE}; + _ -> erlang:error(badarg, [R1, R2]) + end, + {DTR2, RTR2} = case ?TYPE(R2) of + ?BINREL(_, _) = R2T -> R2T; + ?ANYTYPE -> {?ANYTYPE, ?ANYTYPE}; + _ -> erlang:error(badarg, [R1, R2]) + end, + case match_types(DTR1, DTR2) of + true when DTR1 =:= ?ANYTYPE -> R1; + true when DTR2 =:= ?ANYTYPE -> R2; + true -> ?SET(relprod(?LIST(R1), ?LIST(R2)), ?BINREL(RTR1, RTR2)); + false -> erlang:error(type_mismatch, [R1, R2]) + end. + +converse(R) when ?IS_SET(R) -> + case ?TYPE(R) of + ?BINREL(DT, RT) -> ?SET(converse(?LIST(R), []), ?BINREL(RT, DT)); + ?ANYTYPE -> R; + _ -> erlang:error(badarg, [R]) + end. + +image(R, S) when ?IS_SET(R), ?IS_SET(S) -> + case ?TYPE(R) of + ?BINREL(DT, RT) -> + case match_types(DT, ?TYPE(S)) of + true -> + ?SET(usort(restrict(?LIST(S), ?LIST(R))), RT); + false -> + erlang:error(type_mismatch, [R, S]) + end; + ?ANYTYPE -> R; + _ -> erlang:error(badarg, [R, S]) + end. + +inverse_image(R, S) when ?IS_SET(R), ?IS_SET(S) -> + case ?TYPE(R) of + ?BINREL(DT, RT) -> + case match_types(RT, ?TYPE(S)) of + true -> + NL = restrict(?LIST(S), converse(?LIST(R), [])), + ?SET(usort(NL), DT); + false -> + erlang:error(type_mismatch, [R, S]) + end; + ?ANYTYPE -> R; + _ -> erlang:error(badarg, [R, S]) + end. + +strict_relation(R) when ?IS_SET(R) -> + case ?TYPE(R) of + Type = ?BINREL(_, _) -> + ?SET(strict(?LIST(R), []), Type); + ?ANYTYPE -> R; + _ -> erlang:error(badarg, [R]) + end. + +weak_relation(R) when ?IS_SET(R) -> + case ?TYPE(R) of + ?BINREL(DT, RT) -> + case unify_types(DT, RT) of + [] -> + erlang:error(badarg, [R]); + Type -> + ?SET(weak(?LIST(R)), ?BINREL(Type, Type)) + end; + ?ANYTYPE -> R; + _ -> erlang:error(badarg, [R]) + end. + +extension(R, S, E) when ?IS_SET(R), ?IS_SET(S) -> + case {?TYPE(R), ?TYPE(S), is_sofs_set(E)} of + {T=?BINREL(DT, RT), ST, true} -> + case match_types(DT, ST) and match_types(RT, type(E)) of + false -> + erlang:error(type_mismatch, [R, S, E]); + true -> + RL = ?LIST(R), + case extc([], ?LIST(S), to_external(E), RL) of + [] -> + R; + L -> + ?SET(merge(RL, reverse(L)), T) + end + end; + {?ANYTYPE, ?ANYTYPE, true} -> + R; + {?ANYTYPE, ST, true} -> + case type(E) of + ?SET_OF(?ANYTYPE) -> + R; + ET -> + ?SET([], ?BINREL(ST, ET)) + end; + {_, _, true} -> + erlang:error(badarg, [R, S, E]) + end. + +is_a_function(R) when ?IS_SET(R) -> + case ?TYPE(R) of + ?BINREL(_, _) -> + case ?LIST(R) of + [] -> true; + [{V,_} | Es] -> is_a_func(Es, V) + end; + ?ANYTYPE -> true; + _ -> erlang:error(badarg, [R]) + end. + +restriction(Relation, Set) -> + restriction(1, Relation, Set). + +drestriction(Relation, Set) -> + drestriction(1, Relation, Set). + +%%% +%%% Functions on functions only. +%%% + +composite(Fn1, Fn2) when ?IS_SET(Fn1), ?IS_SET(Fn2) -> + ?BINREL(DTF1, RTF1) = case ?TYPE(Fn1)of + ?BINREL(_, _) = F1T -> F1T; + ?ANYTYPE -> {?ANYTYPE, ?ANYTYPE}; + _ -> erlang:error(badarg, [Fn1, Fn2]) + end, + ?BINREL(DTF2, RTF2) = case ?TYPE(Fn2) of + ?BINREL(_, _) = F2T -> F2T; + ?ANYTYPE -> {?ANYTYPE, ?ANYTYPE}; + _ -> erlang:error(badarg, [Fn1, Fn2]) + end, + case match_types(RTF1, DTF2) of + true when DTF1 =:= ?ANYTYPE -> Fn1; + true when DTF2 =:= ?ANYTYPE -> Fn2; + true -> + case comp(?LIST(Fn1), ?LIST(Fn2)) of + SL when is_list(SL) -> + ?SET(sort(SL), ?BINREL(DTF1, RTF2)); + Bad -> + erlang:error(Bad, [Fn1, Fn2]) + end; + false -> erlang:error(type_mismatch, [Fn1, Fn2]) + end. + +inverse(Fn) when ?IS_SET(Fn) -> + case ?TYPE(Fn) of + ?BINREL(DT, RT) -> + case inverse1(?LIST(Fn)) of + SL when is_list(SL) -> + ?SET(SL, ?BINREL(RT, DT)); + Bad -> + erlang:error(Bad, [Fn]) + end; + ?ANYTYPE -> Fn; + _ -> erlang:error(badarg, [Fn]) + end. + +%%% +%%% Functions on relations (binary or other). +%%% + +%% Equivalent to range(restriction(inverse(substitution(Fun, S1)), S2)). +restriction(I, R, S) when is_integer(I), ?IS_SET(R), ?IS_SET(S) -> + RT = ?TYPE(R), + ST = ?TYPE(S), + case check_for_sort(RT, I) of + empty -> + R; + error -> + erlang:error(badarg, [I, R, S]); + Sort -> + RL = ?LIST(R), + case {match_types(?REL_TYPE(I, RT), ST), ?LIST(S)} of + {true, _SL} when RL =:= [] -> + R; + {true, []} -> + ?SET([], RT); + {true, [E | Es]} when Sort =:= false -> % I =:= 1 + ?SET(reverse(restrict_n(I, RL, E, Es, [])), RT); + {true, [E | Es]} -> + ?SET(sort(restrict_n(I, keysort(I, RL), E, Es, [])), RT); + {false, _SL} -> + erlang:error(type_mismatch, [I, R, S]) + end + end; +restriction(SetFun, S1, S2) when ?IS_SET(S1), ?IS_SET(S2) -> + Type1 = ?TYPE(S1), + Type2 = ?TYPE(S2), + SL1 = ?LIST(S1), + case external_fun(SetFun) of + false when Type2 =:= ?ANYTYPE -> + S2; + false -> + case subst(SL1, SetFun, element_type(Type1)) of + {NSL, NewType} -> % NewType can be ?ANYTYPE + case match_types(NewType, Type2) of + true -> + NL = sort(restrict(?LIST(S2), converse(NSL, []))), + ?SET(NL, Type1); + false -> + erlang:error(type_mismatch, [SetFun, S1, S2]) + end; + Bad -> + erlang:error(Bad, [SetFun, S1, S2]) + end; + _ when Type1 =:= ?ANYTYPE -> + S1; + _XFun when ?IS_SET_OF(Type1) -> + erlang:error(badarg, [SetFun, S1, S2]); + XFun -> + FunT = XFun(Type1), + case catch check_fun(Type1, XFun, FunT) of + {'EXIT', _} -> + erlang:error(badarg, [SetFun, S1, S2]); + Sort -> + case match_types(FunT, Type2) of + true -> + R1 = inverse_substitution(SL1, XFun, Sort), + ?SET(sort(Sort, restrict(?LIST(S2), R1)), Type1); + false -> + erlang:error(type_mismatch, [SetFun, S1, S2]) + end + end + end. + +drestriction(I, R, S) when is_integer(I), ?IS_SET(R), ?IS_SET(S) -> + RT = ?TYPE(R), + ST = ?TYPE(S), + case check_for_sort(RT, I) of + empty -> + R; + error -> + erlang:error(badarg, [I, R, S]); + Sort -> + RL = ?LIST(R), + case {match_types(?REL_TYPE(I, RT), ST), ?LIST(S)} of + {true, []} -> + R; + {true, _SL} when RL =:= [] -> + R; + {true, [E | Es]} when Sort =:= false -> % I =:= 1 + ?SET(diff_restrict_n(I, RL, E, Es, []), RT); + {true, [E | Es]} -> + ?SET(diff_restrict_n(I, keysort(I, RL), E, Es, []), RT); + {false, _SL} -> + erlang:error(type_mismatch, [I, R, S]) + end + end; +drestriction(SetFun, S1, S2) when ?IS_SET(S1), ?IS_SET(S2) -> + Type1 = ?TYPE(S1), + Type2 = ?TYPE(S2), + SL1 = ?LIST(S1), + case external_fun(SetFun) of + false when Type2 =:= ?ANYTYPE -> + S1; + false -> + case subst(SL1, SetFun, element_type(Type1)) of + {NSL, NewType} -> % NewType can be ?ANYTYPE + case match_types(NewType, Type2) of + true -> + SL2 = ?LIST(S2), + NL = sort(diff_restrict(SL2, converse(NSL, []))), + ?SET(NL, Type1); + false -> + erlang:error(type_mismatch, [SetFun, S1, S2]) + end; + Bad -> + erlang:error(Bad, [SetFun, S1, S2]) + end; + _ when Type1 =:= ?ANYTYPE -> + S1; + _XFun when ?IS_SET_OF(Type1) -> + erlang:error(badarg, [SetFun, S1, S2]); + XFun -> + FunT = XFun(Type1), + case catch check_fun(Type1, XFun, FunT) of + {'EXIT', _} -> + erlang:error(badarg, [SetFun, S1, S2]); + Sort -> + case match_types(FunT, Type2) of + true -> + R1 = inverse_substitution(SL1, XFun, Sort), + SL2 = ?LIST(S2), + ?SET(sort(Sort, diff_restrict(SL2, R1)), Type1); + false -> + erlang:error(type_mismatch, [SetFun, S1, S2]) + end + end + end. + +projection(I, Set) when is_integer(I), ?IS_SET(Set) -> + Type = ?TYPE(Set), + case check_for_sort(Type, I) of + empty -> + Set; + error -> + erlang:error(badarg, [I, Set]); + _ when I =:= 1 -> + ?SET(projection1(?LIST(Set)), ?REL_TYPE(I, Type)); + _ -> + ?SET(projection_n(?LIST(Set), I, []), ?REL_TYPE(I, Type)) + end; +projection(Fun, Set) -> + range(substitution(Fun, Set)). + +substitution(I, Set) when is_integer(I), ?IS_SET(Set) -> + Type = ?TYPE(Set), + case check_for_sort(Type, I) of + empty -> + Set; + error -> + erlang:error(badarg, [I, Set]); + _Sort -> + NType = ?REL_TYPE(I, Type), + NSL = substitute_element(?LIST(Set), I, []), + ?SET(NSL, ?BINREL(Type, NType)) + end; +substitution(SetFun, Set) when ?IS_SET(Set) -> + Type = ?TYPE(Set), + L = ?LIST(Set), + case external_fun(SetFun) of + false when L =/= [] -> + case subst(L, SetFun, element_type(Type)) of + {SL, NewType} -> + ?SET(reverse(SL), ?BINREL(Type, NewType)); + Bad -> + erlang:error(Bad, [SetFun, Set]) + end; + false -> + empty_set(); + _ when Type =:= ?ANYTYPE -> + empty_set(); + _XFun when ?IS_SET_OF(Type) -> + erlang:error(badarg, [SetFun, Set]); + XFun -> + FunT = XFun(Type), + case catch check_fun(Type, XFun, FunT) of + {'EXIT', _} -> + erlang:error(badarg, [SetFun, Set]); + _Sort -> + SL = substitute(L, XFun, []), + ?SET(SL, ?BINREL(Type, FunT)) + end + end. + +partition(Sets) -> + F1 = relation_to_family(canonical_relation(Sets)), + F2 = relation_to_family(converse(F1)), + range(F2). + +partition(I, Set) when is_integer(I), ?IS_SET(Set) -> + Type = ?TYPE(Set), + case check_for_sort(Type, I) of + empty -> + Set; + error -> + erlang:error(badarg, [I, Set]); + false -> % I =:= 1 + ?SET(partition_n(I, ?LIST(Set)), ?SET_OF(Type)); + true -> + ?SET(partition_n(I, keysort(I, ?LIST(Set))), ?SET_OF(Type)) + end; +partition(Fun, Set) -> + range(partition_family(Fun, Set)). + +partition(I, R, S) when is_integer(I), ?IS_SET(R), ?IS_SET(S) -> + RT = ?TYPE(R), + ST = ?TYPE(S), + case check_for_sort(RT, I) of + empty -> + {R, R}; + error -> + erlang:error(badarg, [I, R, S]); + Sort -> + RL = ?LIST(R), + case {match_types(?REL_TYPE(I, RT), ST), ?LIST(S)} of + {true, _SL} when RL =:= [] -> + {R, R}; + {true, []} -> + {?SET([], RT), R}; + {true, [E | Es]} when Sort =:= false -> % I =:= 1 + [L1 | L2] = partition3_n(I, RL, E, Es, [], []), + {?SET(L1, RT), ?SET(L2, RT)}; + {true, [E | Es]} -> + [L1 | L2] = partition3_n(I, keysort(I,RL), E, Es, [], []), + {?SET(L1, RT), ?SET(L2, RT)}; + {false, _SL} -> + erlang:error(type_mismatch, [I, R, S]) + end + end; +partition(SetFun, S1, S2) when ?IS_SET(S1), ?IS_SET(S2) -> + Type1 = ?TYPE(S1), + Type2 = ?TYPE(S2), + SL1 = ?LIST(S1), + case external_fun(SetFun) of + false when Type2 =:= ?ANYTYPE -> + {S2, S1}; + false -> + case subst(SL1, SetFun, element_type(Type1)) of + {NSL, NewType} -> % NewType can be ?ANYTYPE + case match_types(NewType, Type2) of + true -> + R1 = converse(NSL, []), + [L1 | L2] = partition3(?LIST(S2), R1), + {?SET(sort(L1), Type1), ?SET(sort(L2), Type1)}; + false -> + erlang:error(type_mismatch, [SetFun, S1, S2]) + end; + Bad -> + erlang:error(Bad, [SetFun, S1, S2]) + end; + _ when Type1 =:= ?ANYTYPE -> + {S1, S1}; + _XFun when ?IS_SET_OF(Type1) -> + erlang:error(badarg, [SetFun, S1, S2]); + XFun -> + FunT = XFun(Type1), + case catch check_fun(Type1, XFun, FunT) of + {'EXIT', _} -> + erlang:error(badarg, [SetFun, S1, S2]); + Sort -> + case match_types(FunT, Type2) of + true -> + R1 = inverse_substitution(SL1, XFun, Sort), + [L1 | L2] = partition3(?LIST(S2), R1), + {?SET(sort(L1), Type1), ?SET(sort(L2), Type1)}; + false -> + erlang:error(type_mismatch, [SetFun, S1, S2]) + end + end + end. + +multiple_relative_product(T, R) when is_tuple(T), ?IS_SET(R) -> + case test_rel(R, tuple_size(T), eq) of + true when ?TYPE(R) =:= ?ANYTYPE -> + empty_set(); + true -> + MProd = mul_relprod(tuple_to_list(T), 1, R), + relative_product(list_to_tuple(MProd)); + false -> + erlang:error(badarg, [T, R]) + end. + +join(R1, I1, R2, I2) + when ?IS_SET(R1), ?IS_SET(R2), is_integer(I1), is_integer(I2) -> + case test_rel(R1, I1, lte) and test_rel(R2, I2, lte) of + false -> + erlang:error(badarg, [R1, I1, R2, I2]); + true when ?TYPE(R1) =:= ?ANYTYPE -> R1; + true when ?TYPE(R2) =:= ?ANYTYPE -> R2; + true -> + L1 = ?LIST(raise_element(R1, I1)), + L2 = ?LIST(raise_element(R2, I2)), + T = relprod1(L1, L2), + F = case (I1 =:= 1) and (I2 =:= 1) of + true -> + fun({X,Y}) -> join_element(X, Y) end; + false -> + fun({X,Y}) -> + list_to_tuple(join_element(X, Y, I2)) + end + end, + ?SET(replace(T, F, []), F({?TYPE(R1), ?TYPE(R2)})) + end. + +%% Inlined. +test_rel(R, I, C) -> + case ?TYPE(R) of + Rel when ?IS_RELATION(Rel), C =:= eq, I =:= ?REL_ARITY(Rel) -> true; + Rel when ?IS_RELATION(Rel), C =:= lte, I>=1, I =< ?REL_ARITY(Rel) -> + true; + ?ANYTYPE -> true; + _ -> false + end. + +%%% +%%% Family functions +%%% + +fam2rel(F) -> + family_to_relation(F). + +%% Inlined. +family_to_relation(F) when ?IS_SET(F) -> + case ?TYPE(F) of + ?FAMILY(DT, RT) -> + ?SET(family2rel(?LIST(F), []), ?BINREL(DT, RT)); + ?ANYTYPE -> F; + _ -> erlang:error(badarg, [F]) + end. + +family_specification(Fun, F) when ?IS_SET(F) -> + case ?TYPE(F) of + ?FAMILY(_DT, Type) = FType -> + R = case external_fun(Fun) of + false -> + fam_spec(?LIST(F), Fun, Type, []); + XFun -> + fam_specification(?LIST(F), XFun, []) + end, + case R of + SL when is_list(SL) -> + ?SET(SL, FType); + Bad -> + erlang:error(Bad, [Fun, F]) + end; + ?ANYTYPE -> F; + _ -> erlang:error(badarg, [Fun, F]) + end. + +union_of_family(F) when ?IS_SET(F) -> + case ?TYPE(F) of + ?FAMILY(_DT, Type) -> + ?SET(un_of_fam(?LIST(F), []), Type); + ?ANYTYPE -> F; + _ -> erlang:error(badarg, [F]) + end. + +intersection_of_family(F) when ?IS_SET(F) -> + case ?TYPE(F) of + ?FAMILY(_DT, Type) -> + case int_of_fam(?LIST(F)) of + FU when is_list(FU) -> + ?SET(FU, Type); + Bad -> + erlang:error(Bad, [F]) + end; + _ -> erlang:error(badarg, [F]) + end. + +family_union(F) when ?IS_SET(F) -> + case ?TYPE(F) of + ?FAMILY(DT, ?SET_OF(Type)) -> + ?SET(fam_un(?LIST(F), []), ?FAMILY(DT, Type)); + ?ANYTYPE -> F; + _ -> erlang:error(badarg, [F]) + end. + +family_intersection(F) when ?IS_SET(F) -> + case ?TYPE(F) of + ?FAMILY(DT, ?SET_OF(Type)) -> + case fam_int(?LIST(F), []) of + FU when is_list(FU) -> + ?SET(FU, ?FAMILY(DT, Type)); + Bad -> + erlang:error(Bad, [F]) + end; + ?ANYTYPE -> F; + _ -> erlang:error(badarg, [F]) + end. + +family_domain(F) when ?IS_SET(F) -> + case ?TYPE(F) of + ?FAMILY(FDT, ?BINREL(DT, _)) -> + ?SET(fam_dom(?LIST(F), []), ?FAMILY(FDT, DT)); + ?ANYTYPE -> F; + ?FAMILY(_, ?ANYTYPE) -> F; + _ -> erlang:error(badarg, [F]) + end. + +family_range(F) when ?IS_SET(F) -> + case ?TYPE(F) of + ?FAMILY(DT, ?BINREL(_, RT)) -> + ?SET(fam_ran(?LIST(F), []), ?FAMILY(DT, RT)); + ?ANYTYPE -> F; + ?FAMILY(_, ?ANYTYPE) -> F; + _ -> erlang:error(badarg, [F]) + end. + +family_field(F) -> + family_union(family_domain(F), family_range(F)). + +family_union(F1, F2) -> + fam_binop(F1, F2, fun fam_union/3). + +family_intersection(F1, F2) -> + fam_binop(F1, F2, fun fam_intersect/3). + +family_difference(F1, F2) -> + fam_binop(F1, F2, fun fam_difference/3). + +%% Inlined. +fam_binop(F1, F2, FF) when ?IS_SET(F1), ?IS_SET(F2) -> + case unify_types(?TYPE(F1), ?TYPE(F2)) of + [] -> + erlang:error(type_mismatch, [F1, F2]); + ?ANYTYPE -> + F1; + Type = ?FAMILY(_, _) -> + ?SET(FF(?LIST(F1), ?LIST(F2), []), Type); + _ -> erlang:error(badarg, [F1, F2]) + end. + +partition_family(I, Set) when is_integer(I), ?IS_SET(Set) -> + Type = ?TYPE(Set), + case check_for_sort(Type, I) of + empty -> + Set; + error -> + erlang:error(badarg, [I, Set]); + false -> % when I =:= 1 + ?SET(fam_partition_n(I, ?LIST(Set)), + ?BINREL(?REL_TYPE(I, Type), ?SET_OF(Type))); + true -> + ?SET(fam_partition_n(I, keysort(I, ?LIST(Set))), + ?BINREL(?REL_TYPE(I, Type), ?SET_OF(Type))) + end; +partition_family(SetFun, Set) when ?IS_SET(Set) -> + Type = ?TYPE(Set), + SL = ?LIST(Set), + case external_fun(SetFun) of + false when SL =/= [] -> + case subst(SL, SetFun, element_type(Type)) of + {NSL, NewType} -> + P = fam_partition(converse(NSL, []), true), + ?SET(reverse(P), ?BINREL(NewType, ?SET_OF(Type))); + Bad -> + erlang:error(Bad, [SetFun, Set]) + end; + false -> + empty_set(); + _ when Type =:= ?ANYTYPE -> + empty_set(); + _XFun when ?IS_SET_OF(Type) -> + erlang:error(badarg, [SetFun, Set]); + XFun -> + DType = XFun(Type), + case catch check_fun(Type, XFun, DType) of + {'EXIT', _} -> + erlang:error(badarg, [SetFun, Set]); + Sort -> + Ts = inverse_substitution(?LIST(Set), XFun, Sort), + P = fam_partition(Ts, Sort), + ?SET(reverse(P), ?BINREL(DType, ?SET_OF(Type))) + end + end. + +family_projection(SetFun, F) when ?IS_SET(F) -> + case ?TYPE(F) of + ?FAMILY(_, _) when [] =:= ?LIST(F) -> + empty_set(); + ?FAMILY(DT, Type) -> + case external_fun(SetFun) of + false -> + case fam_proj(?LIST(F), SetFun, Type, ?ANYTYPE, []) of + {SL, NewType} -> + ?SET(SL, ?BINREL(DT, NewType)); + Bad -> + erlang:error(Bad, [SetFun, F]) + end; + _ -> + erlang:error(badarg, [SetFun, F]) + end; + ?ANYTYPE -> F; + _ -> erlang:error(badarg, [SetFun, F]) + end. + +%%% +%%% Digraph functions +%%% + +family_to_digraph(F) when ?IS_SET(F) -> + case ?TYPE(F) of + ?FAMILY(_, _) -> fam2digraph(F, digraph:new()); + ?ANYTYPE -> digraph:new(); + _Else -> erlang:error(badarg, [F]) + end. + +family_to_digraph(F, Type) when ?IS_SET(F) -> + case ?TYPE(F) of + ?FAMILY(_, _) -> ok; + ?ANYTYPE -> ok; + _Else -> erlang:error(badarg, [F, Type]) + end, + try digraph:new(Type) of + G -> case catch fam2digraph(F, G) of + {error, Reason} -> + true = digraph:delete(G), + erlang:error(Reason, [F, Type]); + _ -> + G + end + catch + error:badarg -> erlang:error(badarg, [F, Type]) + end. + +digraph_to_family(G) -> + case catch digraph_family(G) of + {'EXIT', _} -> erlang:error(badarg, [G]); + L -> ?SET(L, ?FAMILY(?ATOM_TYPE, ?ATOM_TYPE)) + end. + +digraph_to_family(G, T) -> + case {is_type(T), T} of + {true, ?SET_OF(?FAMILY(_,_) = Type)} -> + case catch digraph_family(G) of + {'EXIT', _} -> erlang:error(badarg, [G, T]); + L -> ?SET(L, Type) + end; + _ -> + erlang:error(badarg, [G, T]) + end. + +%% +%% Local functions +%% + +%% Type = OrderedSetType +%% | SetType +%% | atom() except '_' +%% OrderedSetType = {Type, ..., Type} +%% SetType = [ElementType] % list of exactly one element +%% ElementType = '_' % any type (implies empty set) +%% | Type + +is_types(0, _T) -> + true; +is_types(I, T) -> + case is_type(?REL_TYPE(I, T)) of + true -> is_types(I-1, T); + false -> false + end. + +is_element_type(?ANYTYPE) -> + true; +is_element_type(T) -> + is_type(T). + +set_of_sets([S | Ss], L, T0) when ?IS_SET(S) -> + case unify_types([?TYPE(S)], T0) of + [] -> {error, type_mismatch}; + Type -> set_of_sets(Ss, [?LIST(S) | L], Type) + end; +set_of_sets([S | Ss], L, T0) when ?IS_ORDSET(S) -> + case unify_types(?ORDTYPE(S), T0) of + [] -> {error, type_mismatch}; + Type -> set_of_sets(Ss, [?ORDDATA(S) | L], Type) + end; +set_of_sets([], L, T) -> + ?SET(usort(L), T); +set_of_sets(_, _L, _T) -> + {error, badarg}. + +ordset_of_sets([S | Ss], L, T) when ?IS_SET(S) -> + ordset_of_sets(Ss, [?LIST(S) | L], [[?TYPE(S)] | T]); +ordset_of_sets([S | Ss], L, T) when ?IS_ORDSET(S) -> + ordset_of_sets(Ss, [?ORDDATA(S) | L], [?ORDTYPE(S) | T]); +ordset_of_sets([], L, T) -> + ?ORDSET(list_to_tuple(reverse(L)), list_to_tuple(reverse(T))); +ordset_of_sets(_, _L, _T) -> + error. + +%% Inlined. +rel(Ts, [Type]) -> + case is_type(Type) and atoms_only(Type, 1) of + true -> + rel(Ts, tuple_size(Type), Type); + false -> + rel_type(Ts, [], Type) + end; +rel(Ts, Sz) -> + rel(Ts, Sz, erlang:make_tuple(Sz, ?ATOM_TYPE)). + +atoms_only(Type, I) when ?IS_ATOM_TYPE(?REL_TYPE(I, Type)) -> + atoms_only(Type, I+1); +atoms_only(Type, I) when I > tuple_size(Type), ?IS_RELATION(Type) -> + true; +atoms_only(_Type, _I) -> + false. + +rel(Ts, Sz, Type) when Sz >= 1 -> + SL = usort(Ts), + rel(SL, SL, Sz, Type). + +rel([T | Ts], L, Sz, Type) when tuple_size(T) =:= Sz -> + rel(Ts, L, Sz, Type); +rel([], L, _Sz, Type) -> + ?SET(L, Type). + +rel_type([E | Ts], L, Type) -> + {NType, NE} = make_element(E, Type, Type), + rel_type(Ts, [NE | L], NType); +rel_type([], [], ?ANYTYPE) -> + empty_set(); +rel_type([], SL, Type) when ?IS_RELATION(Type) -> + ?SET(usort(SL), Type). + +%% Inlined. +a_func(Ts, T) -> + case {T, is_type(T)} of + {[?BINREL(DT, RT) = Type], true} when ?IS_ATOM_TYPE(DT), + ?IS_ATOM_TYPE(RT) -> + func(Ts, Type); + {[Type], true} -> + func_type(Ts, [], Type, fun(?BINREL(_,_)) -> true end) + end. + +func(L0, Type) -> + L = usort(L0), + func(L, L, L, Type). + +func([{X,_} | Ts], X0, L, Type) when X /= X0 -> + func(Ts, X, L, Type); +func([{X,_} | _Ts], X0, _L, _Type) when X == X0 -> + bad_function; +func([], _X0, L, Type) -> + ?SET(L, Type). + +%% Inlined. +fam(Ts, T) -> + case {T, is_type(T)} of + {[?FAMILY(DT, RT) = Type], true} when ?IS_ATOM_TYPE(DT), + ?IS_ATOM_TYPE(RT) -> + fam2(Ts, Type); + {[Type], true} -> + func_type(Ts, [], Type, fun(?FAMILY(_,_)) -> true end) + end. + +fam2([], Type) -> + ?SET([], Type); +fam2(Ts, Type) -> + fam2(sort(Ts), Ts, [], Type). + +fam2([{I,L} | T], I0, SL, Type) when I /= I0 -> + fam2(T, I, [{I,usort(L)} | SL], Type); +fam2([{I,L} | T], I0, SL, Type) when I == I0 -> + case {usort(L), SL} of + {NL, [{_I,NL1} | _]} when NL == NL1 -> + fam2(T, I0, SL, Type); + _ -> + bad_function + end; +fam2([], _I0, SL, Type) -> + ?SET(reverse(SL), Type). + +func_type([E | T], SL, Type, F) -> + {NType, NE} = make_element(E, Type, Type), + func_type(T, [NE | SL], NType, F); +func_type([], [], ?ANYTYPE, _F) -> + empty_set(); +func_type([], SL, Type, F) -> + true = F(Type), + NL = usort(SL), + check_function(NL, ?SET(NL, Type)). + +setify(L, ?SET_OF(Atom)) when ?IS_ATOM_TYPE(Atom), Atom =/= ?ANYTYPE -> + ?SET(usort(L), Atom); +setify(L, ?SET_OF(Type0)) -> + case catch is_no_lists(Type0) of + {'EXIT', _} -> + {?SET_OF(Type), Set} = create(L, Type0, Type0, []), + ?SET(Set, Type); + N when is_integer(N) -> + rel(L, N, Type0); + Sizes -> + make_oset(L, Sizes, L, Type0) + end; +setify(E, Type0) -> + {Type, OrdSet} = make_element(E, Type0, Type0), + ?ORDSET(OrdSet, Type). + +is_no_lists(T) when is_tuple(T) -> + Sz = tuple_size(T), + is_no_lists(T, Sz, Sz, []). + +is_no_lists(_T, 0, Sz, []) -> + Sz; +is_no_lists(_T, 0, Sz, L) -> + {Sz, L}; +is_no_lists(T, I, Sz, L) when ?IS_ATOM_TYPE(?REL_TYPE(I, T)) -> + is_no_lists(T, I-1, Sz, L); +is_no_lists(T, I, Sz, L) -> + is_no_lists(T, I-1, Sz, [{I,is_no_lists(?REL_TYPE(I, T))} | L]). + +create([E | Es], T, T0, L) -> + {NT, S} = make_element(E, T, T0), + create(Es, NT, T0, [S | L]); +create([], T, _T0, L) -> + {?SET_OF(T), usort(L)}. + +make_element(C, ?ANYTYPE, _T0) -> + make_element(C); +make_element(C, Atom, ?ANYTYPE) when ?IS_ATOM_TYPE(Atom), + not is_list(C), not is_tuple(C) -> + {Atom, C}; +make_element(C, Atom, Atom) when ?IS_ATOM_TYPE(Atom) -> + {Atom, C}; +make_element(T, TT, ?ANYTYPE) when tuple_size(T) =:= tuple_size(TT) -> + make_tuple(tuple_to_list(T), tuple_to_list(TT), [], [], ?ANYTYPE); +make_element(T, TT, T0) when tuple_size(T) =:= tuple_size(TT) -> + make_tuple(tuple_to_list(T), tuple_to_list(TT), [], [], tuple_to_list(T0)); +make_element(L, [LT], ?ANYTYPE) when is_list(L) -> + create(L, LT, ?ANYTYPE, []); +make_element(L, [LT], [T0]) when is_list(L) -> + create(L, LT, T0, []). + +make_tuple([E | Es], [T | Ts], NT, L, T0) when T0 =:= ?ANYTYPE -> + {ET, ES} = make_element(E, T, T0), + make_tuple(Es, Ts, [ET | NT], [ES | L], T0); +make_tuple([E | Es], [T | Ts], NT, L, [T0 | T0s]) -> + {ET, ES} = make_element(E, T, T0), + make_tuple(Es, Ts, [ET | NT], [ES | L], T0s); +make_tuple([], [], NT, L, _T0s) when NT =/= [] -> + {list_to_tuple(reverse(NT)), list_to_tuple(reverse(L))}. + +%% Derive type. +make_element(C) when not is_list(C), not is_tuple(C) -> + {?ATOM_TYPE, C}; +make_element(T) when is_tuple(T) -> + make_tuple(tuple_to_list(T), [], []); +make_element(L) when is_list(L) -> + create(L, ?ANYTYPE, ?ANYTYPE, []). + +make_tuple([E | Es], T, L) -> + {ET, ES} = make_element(E), + make_tuple(Es, [ET | T], [ES | L]); +make_tuple([], T, L) when T =/= [] -> + {list_to_tuple(reverse(T)), list_to_tuple(reverse(L))}. + +make_oset([T | Ts], Szs, L, Type) -> + true = test_oset(Szs, T, T), + make_oset(Ts, Szs, L, Type); +make_oset([], _Szs, L, Type) -> + ?SET(usort(L), Type). + +%% Optimization. Avoid re-building (nested) tuples. +test_oset({Sz,Args}, T, T0) when tuple_size(T) =:= Sz -> + test_oset_args(Args, T, T0); +test_oset(Sz, T, _T0) when tuple_size(T) =:= Sz -> + true. + +test_oset_args([{Arg,Szs} | Ss], T, T0) -> + true = test_oset(Szs, ?REL_TYPE(Arg, T), T0), + test_oset_args(Ss, T, T0); +test_oset_args([], _T, _T0) -> + true. + +list_of_sets([S | Ss], Type, L) -> + list_of_sets(Ss, Type, [?SET(S, Type) | L]); +list_of_sets([], _Type, L) -> + reverse(L). + +list_of_ordsets([S | Ss], Type, L) -> + list_of_ordsets(Ss, Type, [?ORDSET(S, Type) | L]); +list_of_ordsets([], _Type, L) -> + reverse(L). + +tuple_of_sets([S | Ss], [?SET_OF(Type) | Types], L) -> + tuple_of_sets(Ss, Types, [?SET(S, Type) | L]); +tuple_of_sets([S | Ss], [Type | Types], L) -> + tuple_of_sets(Ss, Types, [?ORDSET(S, Type) | L]); +tuple_of_sets([], [], L) -> + list_to_tuple(reverse(L)). + +spec([E | Es], Fun, Type, L) -> + case Fun(term2set(E, Type)) of + true -> + spec(Es, Fun, Type, [E | L]); + false -> + spec(Es, Fun, Type, L); + _ -> + badarg + end; +spec([], _Fun, _Type, L) -> + reverse(L). + +specification([E | Es], Fun, L) -> + case Fun(E) of + true -> + specification(Es, Fun, [E | L]); + false -> + specification(Es, Fun, L); + _ -> + badarg + end; +specification([], _Fun, L) -> + reverse(L). + +%% Elements from the first list are kept. +intersection([H1 | T1], [H2 | T2], L) when H1 < H2 -> + intersection1(T1, T2, L, H2); +intersection([H1 | T1], [H2 | T2], L) when H1 == H2 -> + intersection(T1, T2, [H1 | L]); +intersection([H1 | T1], [_H2 | T2], L) -> + intersection2(T1, T2, L, H1); +intersection(_, _, L) -> + reverse(L). + +intersection1([H1 | T1], T2, L, H2) when H1 < H2 -> + intersection1(T1, T2, L, H2); +intersection1([H1 | T1], T2, L, H2) when H1 == H2 -> + intersection(T1, T2, [H1 | L]); +intersection1([H1 | T1], T2, L, _H2) -> + intersection2(T1, T2, L, H1); +intersection1(_, _, L, _) -> + reverse(L). + +intersection2(T1, [H2 | T2], L, H1) when H1 > H2 -> + intersection2(T1, T2, L, H1); +intersection2(T1, [H2 | T2], L, H1) when H1 == H2 -> + intersection(T1, T2, [H1 | L]); +intersection2(T1, [H2 | T2], L, _H1) -> + intersection1(T1, T2, L, H2); +intersection2(_, _, L, _) -> + reverse(L). + +difference([H1 | T1], [H2 | T2], L) when H1 < H2 -> + diff(T1, T2, [H1 | L], H2); +difference([H1 | T1], [H2 | T2], L) when H1 == H2 -> + difference(T1, T2, L); +difference([H1 | T1], [_H2 | T2], L) -> + diff2(T1, T2, L, H1); +difference(L1, _, L) -> + reverse(L, L1). + +diff([H1 | T1], T2, L, H2) when H1 < H2 -> + diff(T1, T2, [H1 | L], H2); +diff([H1 | T1], T2, L, H2) when H1 == H2 -> + difference(T1, T2, L); +diff([H1 | T1], T2, L, _H2) -> + diff2(T1, T2, L, H1); +diff(_, _, L, _) -> + reverse(L). + +diff2(T1, [H2 | T2], L, H1) when H1 > H2 -> + diff2(T1, T2, L, H1); +diff2(T1, [H2 | T2], L, H1) when H1 == H2 -> + difference(T1, T2, L); +diff2(T1, [H2 | T2], L, H1) -> + diff(T1, T2, [H1 | L], H2); +diff2(T1, _, L, H1) -> + reverse(L, [H1 | T1]). + +symdiff([H1 | T1], T2, L) -> + symdiff2(T1, T2, L, H1); +symdiff(_, T2, L) -> + reverse(L, T2). + +symdiff1([H1 | T1], T2, L, H2) when H1 < H2 -> + symdiff1(T1, T2, [H1 | L], H2); +symdiff1([H1 | T1], T2, L, H2) when H1 == H2 -> + symdiff(T1, T2, L); +symdiff1([H1 | T1], T2, L, H2) -> + symdiff2(T1, T2, [H2 | L], H1); +symdiff1(_, T2, L, H2) -> + reverse(L, [H2 | T2]). + +symdiff2(T1, [H2 | T2], L, H1) when H1 > H2 -> + symdiff2(T1, T2, [H2 | L], H1); +symdiff2(T1, [H2 | T2], L, H1) when H1 == H2 -> + symdiff(T1, T2, L); +symdiff2(T1, [H2 | T2], L, H1) -> + symdiff1(T1, T2, [H1 | L], H2); +symdiff2(T1, _, L, H1) -> + reverse(L, [H1 | T1]). + +sympart([H1 | T1], [H2 | T2], L1, L12, L2, T) when H1 < H2 -> + sympart1(T1, T2, [H1 | L1], L12, L2, T, H2); +sympart([H1 | T1], [H2 | T2], L1, L12, L2, T) when H1 == H2 -> + sympart(T1, T2, L1, [H1 | L12], L2, T); +sympart([H1 | T1], [H2 | T2], L1, L12, L2, T) -> + sympart2(T1, T2, L1, L12, [H2 | L2], T, H1); +sympart(S1, [], L1, L12, L2, T) -> + {?SET(reverse(L1, S1), T), + ?SET(reverse(L12), T), + ?SET(reverse(L2), T)}; +sympart(_, S2, L1, L12, L2, T) -> + {?SET(reverse(L1), T), + ?SET(reverse(L12), T), + ?SET(reverse(L2, S2), T)}. + +sympart1([H1 | T1], T2, L1, L12, L2, T, H2) when H1 < H2 -> + sympart1(T1, T2, [H1 | L1], L12, L2, T, H2); +sympart1([H1 | T1], T2, L1, L12, L2, T, H2) when H1 == H2 -> + sympart(T1, T2, L1, [H1 | L12], L2, T); +sympart1([H1 | T1], T2, L1, L12, L2, T, H2) -> + sympart2(T1, T2, L1, L12, [H2 | L2], T, H1); +sympart1(_, T2, L1, L12, L2, T, H2) -> + {?SET(reverse(L1), T), + ?SET(reverse(L12), T), + ?SET(reverse(L2, [H2 | T2]), T)}. + +sympart2(T1, [H2 | T2], L1, L12, L2, T, H1) when H1 > H2 -> + sympart2(T1, T2, L1, L12, [H2 | L2], T, H1); +sympart2(T1, [H2 | T2], L1, L12, L2, T, H1) when H1 == H2 -> + sympart(T1, T2, L1, [H1 | L12], L2, T); +sympart2(T1, [H2 | T2], L1, L12, L2, T, H1) -> + sympart1(T1, T2, [H1 | L1], L12, L2, T, H2); +sympart2(T1, _, L1, L12, L2, T, H1) -> + {?SET(reverse(L1, [H1 | T1]), T), + ?SET(reverse(L12), T), + ?SET(reverse(L2), T)}. + +prod([[E | Es] | Xs], T, L) -> + prod(Es, Xs, T, prod(Xs, [E | T], L)); +prod([], T, L) -> + [list_to_tuple(reverse(T)) | L]. + +prod([E | Es], Xs, T, L) -> + prod(Es, Xs, T, prod(Xs, [E | T], L)); +prod([], _Xs, _E, L) -> + L. + +constant_function([E | Es], X, L) -> + constant_function(Es, X, [{E,X} | L]); +constant_function([], _X, L) -> + reverse(L). + +subset([H1 | T1], [H2 | T2]) when H1 > H2 -> + subset(T1, T2, H1); +subset([H1 | T1], [H2 | T2]) when H1 == H2 -> + subset(T1, T2); +subset(L1, _) -> + L1 =:= []. + +subset(T1, [H2 | T2], H1) when H1 > H2 -> + subset(T1, T2, H1); +subset(T1, [H2 | T2], H1) when H1 == H2 -> + subset(T1, T2); +subset(_, _, _) -> + false. + +disjoint([B | Bs], A, As) when A < B -> + disjoint(As, B, Bs); +disjoint([B | _Bs], A, _As) when A == B -> + false; +disjoint([_B | Bs], A, As) -> + disjoint(Bs, A, As); +disjoint(_Bs, _A, _As) -> + true. + +%% Append sets that come in order, then "merge". +lunion([[_] = S]) -> % optimization + S; +lunion([[] | Ls]) -> + lunion(Ls); +lunion([S | Ss]) -> + umerge(lunion(Ss, last(S), [S], [])); +lunion([]) -> + []. + +lunion([[E] = S | Ss], Last, SL, Ls) when E > Last -> % optimization + lunion(Ss, E, [S | SL], Ls); +lunion([S | Ss], Last, SL, Ls) when hd(S) > Last -> + lunion(Ss, last(S), [S | SL], Ls); +lunion([S | Ss], _Last, SL, Ls) -> + lunion(Ss, last(S), [S], [append(reverse(SL)) | Ls]); +lunion([], _Last, SL, Ls) -> + [append(reverse(SL)) | Ls]. + +%% The empty list is always the first list, if present. +lintersection(_, []) -> + []; +lintersection([S | Ss], S0) -> + lintersection(Ss, intersection(S, S0, [])); +lintersection([], S) -> + S. + +can_rel([S | Ss], L) -> + can_rel(Ss, L, S, S); +can_rel([], L) -> + sort(L). + +can_rel(Ss, L, [E | Es], S) -> + can_rel(Ss, [{E, S} | L], Es, S); +can_rel(Ss, L, _, _S) -> + can_rel(Ss, L). + +rel2family([{X,Y} | S]) -> + rel2fam(S, X, [Y], []); +rel2family([]) -> + []. + +rel2fam([{X,Y} | S], X0, YL, L) when X0 == X -> + rel2fam(S, X0, [Y | YL], L); +rel2fam([{X,Y} | S], X0, [A,B | YL], L) -> % optimization + rel2fam(S, X, [Y], [{X0,reverse(YL,[B,A])} | L]); +rel2fam([{X,Y} | S], X0, YL, L) -> + rel2fam(S, X, [Y], [{X0,YL} | L]); +rel2fam([], X, YL, L) -> + reverse([{X,reverse(YL)} | L]). + +dom([{X,_} | Es]) -> + dom([], X, Es); +dom([] = L) -> + L. + +dom(L, X, [{X1,_} | Es]) when X == X1 -> + dom(L, X, Es); +dom(L, X, [{Y,_} | Es]) -> + dom([X | L], Y, Es); +dom(L, X, []) -> + reverse(L, [X]). + +ran([{_,Y} | Es], L) -> + ran(Es, [Y | L]); +ran([], L) -> + usort(L). + +relprod(A, B) -> + usort(relprod1(A, B)). + +relprod1([{Ay,Ax} | A], B) -> + relprod1(B, Ay, Ax, A, []); +relprod1(_A, _B) -> + []. + +relprod1([{Bx,_By} | B], Ay, Ax, A, L) when Ay > Bx -> + relprod1(B, Ay, Ax, A, L); +relprod1([{Bx,By} | B], Ay, Ax, A, L) when Ay == Bx -> + relprod(B, Bx, By, A, [{Ax,By} | L], Ax, B, Ay); +relprod1([{Bx,By} | B], _Ay, _Ax, A, L) -> + relprod2(B, Bx, By, A, L); +relprod1(_B, _Ay, _Ax, _A, L) -> + L. + +relprod2(B, Bx, By, [{Ay, _Ax} | A], L) when Ay < Bx -> + relprod2(B, Bx, By, A, L); +relprod2(B, Bx, By, [{Ay, Ax} | A], L) when Ay == Bx -> + relprod(B, Bx, By, A, [{Ax,By} | L], Ax, B, Ay); +relprod2(B, _Bx, _By, [{Ay, Ax} | A], L) -> + relprod1(B, Ay, Ax, A, L); +relprod2(_, _, _, _, L) -> + L. + +relprod(B0, Bx0, By0, A0, L, Ax, [{Bx,By} | B], Ay) when Ay == Bx -> + relprod(B0, Bx0, By0, A0, [{Ax,By} | L], Ax, B, Ay); +relprod(B0, Bx0, By0, A0, L, _Ax, _B, _Ay) -> + relprod2(B0, Bx0, By0, A0, L). + +relprod_n({}, _R, _EmptyG, _IsR) -> + {error, badarg}; +relprod_n(RT, R, EmptyR, IsR) -> + RL = tuple_to_list(RT), + case domain_type(RL, ?ANYTYPE) of + Error = {error, _Reason} -> + Error; + DType -> + Empty = any(fun is_empty_set/1, RL) or EmptyR, + RType = range_type(RL, []), + Type = ?BINREL(DType, RType), + Prod = + case Empty of + true when DType =:= ?ANYTYPE; RType =:= ?ANYTYPE -> + empty_set(); + true -> + ?SET([], Type); + false -> + TL = ?LIST((relprod_n(RL))), + Sz = tuple_size(RT), + Fun = fun({X,A}) -> {X, flat(Sz, A, [])} end, + ?SET(map(Fun, TL), Type) + end, + case IsR of + true -> relative_product(Prod, R); + false -> Prod + end + end. + +relprod_n([R | Rs]) -> + relprod_n(Rs, R). + +relprod_n([], R) -> + R; +relprod_n([R | Rs], R0) -> + T = raise_element(R0, 1), + R1 = relative_product1(T, R), + NR = projection({external, fun({{X,A},AS}) -> {X,{A,AS}} end}, R1), + relprod_n(Rs, NR). + +flat(1, A, L) -> + list_to_tuple([A | L]); +flat(N, {T,A}, L) -> + flat(N-1, T, [A | L]). + +domain_type([T | Ts], T0) when ?IS_SET(T) -> + case ?TYPE(T) of + ?BINREL(DT, _RT) -> + case unify_types(DT, T0) of + [] -> {error, type_mismatch}; + T1 -> domain_type(Ts, T1) + end; + ?ANYTYPE -> + domain_type(Ts, T0); + _ -> {error, badarg} + end; +domain_type([], T0) -> + T0. + +range_type([T | Ts], L) -> + case ?TYPE(T) of + ?BINREL(_DT, RT) -> + range_type(Ts, [RT | L]); + ?ANYTYPE -> + ?ANYTYPE + end; +range_type([], L) -> + list_to_tuple(reverse(L)). + +converse([{A,B} | X], L) -> + converse(X, [{B,A} | L]); +converse([], L) -> + sort(L). + +strict([{E1,E2} | Es], L) when E1 == E2 -> + strict(Es, L); +strict([E | Es], L) -> + strict(Es, [E | L]); +strict([], L) -> + reverse(L). + +weak(Es) -> + %% Not very efficient... + weak(Es, ran(Es, []), []). + +weak(Es=[{X,_} | _], [Y | Ys], L) when X > Y -> + weak(Es, Ys, [{Y,Y} | L]); +weak(Es=[{X,_} | _], [Y | Ys], L) when X == Y -> + weak(Es, Ys, L); +weak([E={X,Y} | Es], Ys, L) when X > Y -> + weak1(Es, Ys, [E | L], X); +weak([E={X,Y} | Es], Ys, L) when X == Y -> + weak2(Es, Ys, [E | L], X); +weak([E={X,_Y} | Es], Ys, L) -> % when X < _Y + weak2(Es, Ys, [E, {X,X} | L], X); +weak([], [Y | Ys], L) -> + weak([], Ys, [{Y,Y} | L]); +weak([], [], L) -> + reverse(L). + +weak1([E={X,Y} | Es], Ys, L, X0) when X > Y, X == X0 -> + weak1(Es, Ys, [E | L], X); +weak1([E={X,Y} | Es], Ys, L, X0) when X == Y, X == X0 -> + weak2(Es, Ys, [E | L], X); +weak1([E={X,_Y} | Es], Ys, L, X0) when X == X0 -> % when X < Y + weak2(Es, Ys, [E, {X,X} | L], X); +weak1(Es, Ys, L, X) -> + weak(Es, Ys, [{X,X} | L]). + +weak2([E={X,_Y} | Es], Ys, L, X0) when X == X0 -> % when X < _Y + weak2(Es, Ys, [E | L], X); +weak2(Es, Ys, L, _X) -> + weak(Es, Ys, L). + +extc(L, [D | Ds], C, Ts) -> + extc(L, Ds, C, Ts, D); +extc(L, [], _C, _Ts) -> + L. + +extc(L, Ds, C, [{X,_Y} | Ts], D) when X < D -> + extc(L, Ds, C, Ts, D); +extc(L, Ds, C, [{X,_Y} | Ts], D) when X == D -> + extc(L, Ds, C, Ts); +extc(L, Ds, C, [{X,_Y} | Ts], D) -> + extc2([{D,C} | L], Ds, C, Ts, X); +extc(L, Ds, C, [], D) -> + extc_tail([{D,C} | L], Ds, C). + +extc2(L, [D | Ds], C, Ts, X) when X > D -> + extc2([{D,C} | L], Ds, C, Ts, X); +extc2(L, [D | Ds], C, Ts, X) when X == D -> + extc(L, Ds, C, Ts); +extc2(L, [D | Ds], C, Ts, _X) -> + extc(L, Ds, C, Ts, D); +extc2(L, [], _C, _Ts, _X) -> + L. + +extc_tail(L, [D | Ds], C) -> + extc_tail([{D,C} | L], Ds, C); +extc_tail(L, [], _C) -> + L. + +is_a_func([{E,_} | Es], E0) when E /= E0 -> + is_a_func(Es, E); +is_a_func(L, _E) -> + L =:= []. + +restrict_n(I, [T | Ts], Key, Keys, L) -> + case element(I, T) of + K when K < Key -> + restrict_n(I, Ts, Key, Keys, L); + K when K == Key -> + restrict_n(I, Ts, Key, Keys, [T | L]); + K -> + restrict_n(I, K, Ts, Keys, L, T) + end; +restrict_n(_I, _Ts, _Key, _Keys, L) -> + L. + +restrict_n(I, K, Ts, [Key | Keys], L, E) when K > Key -> + restrict_n(I, K, Ts, Keys, L, E); +restrict_n(I, K, Ts, [Key | Keys], L, E) when K == Key -> + restrict_n(I, Ts, Key, Keys, [E | L]); +restrict_n(I, _K, Ts, [Key | Keys], L, _E) -> + restrict_n(I, Ts, Key, Keys, L); +restrict_n(_I, _K, _Ts, _Keys, L, _E) -> + L. + +restrict([Key | Keys], Tuples) -> + restrict(Tuples, Key, Keys, []); +restrict(_Keys, _Tuples) -> + []. + +restrict([{K,_E} | Ts], Key, Keys, L) when K < Key -> + restrict(Ts, Key, Keys, L); +restrict([{K,E} | Ts], Key, Keys, L) when K == Key -> + restrict(Ts, Key, Keys, [E | L]); +restrict([{K,E} | Ts], _Key, Keys, L) -> + restrict(Ts, K, Keys, L, E); +restrict(_Ts, _Key, _Keys, L) -> + L. + +restrict(Ts, K, [Key | Keys], L, E) when K > Key -> + restrict(Ts, K, Keys, L, E); +restrict(Ts, K, [Key | Keys], L, E) when K == Key -> + restrict(Ts, Key, Keys, [E | L]); +restrict(Ts, _K, [Key | Keys], L, _E) -> + restrict(Ts, Key, Keys, L); +restrict(_Ts, _K, _Keys, L, _E) -> + L. + +diff_restrict_n(I, [T | Ts], Key, Keys, L) -> + case element(I, T) of + K when K < Key -> + diff_restrict_n(I, Ts, Key, Keys, [T | L]); + K when K == Key -> + diff_restrict_n(I, Ts, Key, Keys, L); + K -> + diff_restrict_n(I, K, Ts, Keys, L, T) + end; +diff_restrict_n(I, _Ts, _Key, _Keys, L) when I =:= 1 -> + reverse(L); +diff_restrict_n(_I, _Ts, _Key, _Keys, L) -> + sort(L). + +diff_restrict_n(I, K, Ts, [Key | Keys], L, T) when K > Key -> + diff_restrict_n(I, K, Ts, Keys, L, T); +diff_restrict_n(I, K, Ts, [Key | Keys], L, _T) when K == Key -> + diff_restrict_n(I, Ts, Key, Keys, L); +diff_restrict_n(I, _K, Ts, [Key | Keys], L, T) -> + diff_restrict_n(I, Ts, Key, Keys, [T | L]); +diff_restrict_n(I, _K, Ts, _Keys, L, T) when I =:= 1 -> + reverse(L, [T | Ts]); +diff_restrict_n(_I, _K, Ts, _Keys, L, T) -> + sort([T | Ts ++ L]). + +diff_restrict([Key | Keys], Tuples) -> + diff_restrict(Tuples, Key, Keys, []); +diff_restrict(_Keys, Tuples) -> + diff_restrict_tail(Tuples, []). + +diff_restrict([{K,E} | Ts], Key, Keys, L) when K < Key -> + diff_restrict(Ts, Key, Keys, [E | L]); +diff_restrict([{K,_E} | Ts], Key, Keys, L) when K == Key -> + diff_restrict(Ts, Key, Keys, L); +diff_restrict([{K,E} | Ts], _Key, Keys, L) -> + diff_restrict(Ts, K, Keys, L, E); +diff_restrict(_Ts, _Key, _Keys, L) -> + L. + +diff_restrict(Ts, K, [Key | Keys], L, E) when K > Key -> + diff_restrict(Ts, K, Keys, L, E); +diff_restrict(Ts, K, [Key | Keys], L, _E) when K == Key -> + diff_restrict(Ts, Key, Keys, L); +diff_restrict(Ts, _K, [Key | Keys], L, E) -> + diff_restrict(Ts, Key, Keys, [E | L]); +diff_restrict(Ts, _K, _Keys, L, E) -> + diff_restrict_tail(Ts, [E | L]). + +diff_restrict_tail([{_K,E} | Ts], L) -> + diff_restrict_tail(Ts, [E | L]); +diff_restrict_tail(_Ts, L) -> + L. + +comp([], B) -> + check_function(B, []); +comp(_A, []) -> + bad_function; +comp(A0, [{Bx,By} | B]) -> + A = converse(A0, []), + check_function(A0, comp1(A, B, [], Bx, By)). + +comp1([{Ay,Ax} | A], B, L, Bx, By) when Ay == Bx -> + comp1(A, B, [{Ax,By} | L], Bx, By); +comp1([{Ay,Ax} | A], B, L, Bx, _By) when Ay > Bx -> + comp2(A, B, L, Bx, Ay, Ax); +comp1([{Ay,_Ax} | _A], _B, _L, Bx, _By) when Ay < Bx -> + bad_function; +comp1([], B, L, Bx, _By) -> + check_function(Bx, B, L). + +comp2(A, [{Bx,_By} | B], L, Bx0, Ay, Ax) when Ay > Bx, Bx /= Bx0 -> + comp2(A, B, L, Bx, Ay, Ax); +comp2(A, [{Bx,By} | B], L, _Bx0, Ay, Ax) when Ay == Bx -> + comp1(A, B, [{Ax,By} | L], Bx, By); +comp2(_A, _B, _L, _Bx0, _Ay, _Ax) -> + bad_function. + +inverse1([{A,B} | X]) -> + inverse(X, A, [{B,A}]); +inverse1([]) -> + []. + +inverse([{A,B} | X], A0, L) when A0 /= A -> + inverse(X, A, [{B,A} | L]); +inverse([{A,_B} | _X], A0, _L) when A0 == A -> + bad_function; +inverse([], _A0, L) -> + SL = [{V,_} | Es] = sort(L), + case is_a_func(Es, V) of + true -> SL; + false -> bad_function + end. + +%% Inlined. +external_fun({external, Function}) when is_atom(Function) -> + false; +external_fun({external, Fun}) -> + Fun; +external_fun(_) -> + false. + +%% Inlined. +element_type(?SET_OF(Type)) -> Type; +element_type(Type) -> Type. + +subst(Ts, Fun, Type) -> + subst(Ts, Fun, Type, ?ANYTYPE, []). + +subst([T | Ts], Fun, Type, NType, L) -> + case setfun(T, Fun, Type, NType) of + {SD, ST} -> subst(Ts, Fun, Type, ST, [{T, SD} | L]); + Bad -> Bad + end; +subst([], _Fun, _Type, NType, L) -> + {L, NType}. + +projection1([E | Es]) -> + projection1([], element(1, E), Es); +projection1([] = L) -> + L. + +projection1(L, X, [E | Es]) -> + case element(1, E) of + X1 when X == X1 -> projection1(L, X, Es); + X1 -> projection1([X | L], X1, Es) + end; +projection1(L, X, []) -> + reverse(L, [X]). + +projection_n([E | Es], I, L) -> + projection_n(Es, I, [element(I, E) | L]); +projection_n([], _I, L) -> + usort(L). + +substitute_element([T | Ts], I, L) -> + substitute_element(Ts, I, [{T, element(I, T)} | L]); +substitute_element(_, _I, L) -> + reverse(L). + +substitute([T | Ts], Fun, L) -> + substitute(Ts, Fun, [{T, Fun(T)} | L]); +substitute(_, _Fun, L) -> + reverse(L). + +partition_n(I, [E | Ts]) -> + partition_n(I, Ts, element(I, E), [E], []); +partition_n(_I, []) -> + []. + +partition_n(I, [E | Ts], K, Es, P) -> + case {element(I, E), Es} of + {K1, _} when K == K1 -> + partition_n(I, Ts, K, [E | Es], P); + {K1, [_]} -> % optimization + partition_n(I, Ts, K1, [E], [Es | P]); + {K1, _} -> + partition_n(I, Ts, K1, [E], [reverse(Es) | P]) + end; +partition_n(I, [], _K, Es, P) when I > 1 -> + sort([reverse(Es) | P]); +partition_n(_I, [], _K, [_] = Es, P) -> % optimization + reverse(P, [Es]); +partition_n(_I, [], _K, Es, P) -> + reverse(P, [reverse(Es)]). + +partition3_n(I, [T | Ts], Key, Keys, L1, L2) -> + case element(I, T) of + K when K < Key -> + partition3_n(I, Ts, Key, Keys, L1, [T | L2]); + K when K == Key -> + partition3_n(I, Ts, Key, Keys, [T | L1], L2); + K -> + partition3_n(I, K, Ts, Keys, L1, L2, T) + end; +partition3_n(I, _Ts, _Key, _Keys, L1, L2) when I =:= 1 -> + [reverse(L1) | reverse(L2)]; +partition3_n(_I, _Ts, _Key, _Keys, L1, L2) -> + [sort(L1) | sort(L2)]. + +partition3_n(I, K, Ts, [Key | Keys], L1, L2, T) when K > Key -> + partition3_n(I, K, Ts, Keys, L1, L2, T); +partition3_n(I, K, Ts, [Key | Keys], L1, L2, T) when K == Key -> + partition3_n(I, Ts, Key, Keys, [T | L1], L2); +partition3_n(I, _K, Ts, [Key | Keys], L1, L2, T) -> + partition3_n(I, Ts, Key, Keys, L1, [T | L2]); +partition3_n(I, _K, Ts, _Keys, L1, L2, T) when I =:= 1 -> + [reverse(L1) | reverse(L2, [T | Ts])]; +partition3_n(_I, _K, Ts, _Keys, L1, L2, T) -> + [sort(L1) | sort([T | Ts ++ L2])]. + +partition3([Key | Keys], Tuples) -> + partition3(Tuples, Key, Keys, [], []); +partition3(_Keys, Tuples) -> + partition3_tail(Tuples, [], []). + +partition3([{K,E} | Ts], Key, Keys, L1, L2) when K < Key -> + partition3(Ts, Key, Keys, L1, [E | L2]); +partition3([{K,E} | Ts], Key, Keys, L1, L2) when K == Key -> + partition3(Ts, Key, Keys, [E | L1], L2); +partition3([{K,E} | Ts], _Key, Keys, L1, L2) -> + partition3(Ts, K, Keys, L1, L2, E); +partition3(_Ts, _Key, _Keys, L1, L2) -> + [L1 | L2]. + +partition3(Ts, K, [Key | Keys], L1, L2, E) when K > Key -> + partition3(Ts, K, Keys, L1, L2, E); +partition3(Ts, K, [Key | Keys], L1, L2, E) when K == Key -> + partition3(Ts, Key, Keys, [E | L1], L2); +partition3(Ts, _K, [Key | Keys], L1, L2, E) -> + partition3(Ts, Key, Keys, L1, [E | L2]); +partition3(Ts, _K, _Keys, L1, L2, E) -> + partition3_tail(Ts, L1, [E | L2]). + +partition3_tail([{_K,E} | Ts], L1, L2) -> + partition3_tail(Ts, L1, [E | L2]); +partition3_tail(_Ts, L1, L2) -> + [L1 | L2]. + +replace([E | Es], F, L) -> + replace(Es, F, [F(E) | L]); +replace(_, _F, L) -> + sort(L). + +mul_relprod([T | Ts], I, R) when ?IS_SET(T) -> + P = raise_element(R, I), + F = relative_product1(P, T), + [F | mul_relprod(Ts, I+1, R)]; +mul_relprod([], _I, _R) -> + []. + +raise_element(R, I) -> + L = sort(I =/= 1, rearr(?LIST(R), I, [])), + Type = ?TYPE(R), + ?SET(L, ?BINREL(?REL_TYPE(I, Type), Type)). + +rearr([E | Es], I, L) -> + rearr(Es, I, [{element(I, E), E} | L]); +rearr([], _I, L) -> + L. + +join_element(E1, E2) -> + [_ | L2] = tuple_to_list(E2), + list_to_tuple(tuple_to_list(E1) ++ L2). + +join_element(E1, E2, I2) -> + tuple_to_list(E1) ++ join_element2(tuple_to_list(E2), 1, I2). + +join_element2([B | Bs], C, I2) when C =/= I2 -> + [B | join_element2(Bs, C+1, I2)]; +join_element2([_ | Bs], _C, _I2) -> + Bs. + +family2rel([{X,S} | F], L) -> + fam2rel(F, L, X, S); +family2rel([], L) -> + reverse(L). + +fam2rel(F, L, X, [Y | Ys]) -> + fam2rel(F, [{X,Y} | L], X, Ys); +fam2rel(F, L, _X, _) -> + family2rel(F, L). + +fam_spec([{_,S}=E | F], Fun, Type, L) -> + case Fun(?SET(S, Type)) of + true -> + fam_spec(F, Fun, Type, [E | L]); + false -> + fam_spec(F, Fun, Type, L); + _ -> + badarg + end; +fam_spec([], _Fun, _Type, L) -> + reverse(L). + +fam_specification([{_,S}=E | F], Fun, L) -> + case Fun(S) of + true -> + fam_specification(F, Fun, [E | L]); + false -> + fam_specification(F, Fun, L); + _ -> + badarg + end; +fam_specification([], _Fun, L) -> + reverse(L). + +un_of_fam([{_X,S} | F], L) -> + un_of_fam(F, [S | L]); +un_of_fam([], L) -> + lunion(sort(L)). + +int_of_fam([{_,S} | F]) -> + int_of_fam(F, [S]); +int_of_fam([]) -> + badarg. + +int_of_fam([{_,S} | F], L) -> + int_of_fam(F, [S | L]); +int_of_fam([], [L | Ls]) -> + lintersection(Ls, L). + +fam_un([{X,S} | F], L) -> + fam_un(F, [{X, lunion(S)} | L]); +fam_un([], L) -> + reverse(L). + +fam_int([{X, [S | Ss]} | F], L) -> + fam_int(F, [{X, lintersection(Ss, S)} | L]); +fam_int([{_X,[]} | _F], _L) -> + badarg; +fam_int([], L) -> + reverse(L). + +fam_dom([{X,S} | F], L) -> + fam_dom(F, [{X, dom(S)} | L]); +fam_dom([], L) -> + reverse(L). + +fam_ran([{X,S} | F], L) -> + fam_ran(F, [{X, ran(S, [])} | L]); +fam_ran([], L) -> + reverse(L). + +fam_union(F1 = [{A,_AS} | _AL], [B1={B,_BS} | BL], L) when A > B -> + fam_union(F1, BL, [B1 | L]); +fam_union([{A,AS} | AL], [{B,BS} | BL], L) when A == B -> + fam_union(AL, BL, [{A, umerge(AS, BS)} | L]); +fam_union([A1 | AL], F2, L) -> + fam_union(AL, F2, [A1 | L]); +fam_union(_, F2, L) -> + reverse(L, F2). + +fam_intersect(F1 = [{A,_AS} | _AL], [{B,_BS} | BL], L) when A > B -> + fam_intersect(F1, BL, L); +fam_intersect([{A,AS} | AL], [{B,BS} | BL], L) when A == B -> + fam_intersect(AL, BL, [{A, intersection(AS, BS, [])} | L]); +fam_intersect([_A1 | AL], F2, L) -> + fam_intersect(AL, F2, L); +fam_intersect(_, _, L) -> + reverse(L). + +fam_difference(F1 = [{A,_AS} | _AL], [{B,_BS} | BL], L) when A > B -> + fam_difference(F1, BL, L); +fam_difference([{A,AS} | AL], [{B,BS} | BL], L) when A == B -> + fam_difference(AL, BL, [{A, difference(AS, BS, [])} | L]); +fam_difference([A1 | AL], F2, L) -> + fam_difference(AL, F2, [A1 | L]); +fam_difference(F1, _, L) -> + reverse(L, F1). + +check_function([{X,_} | XL], R) -> + check_function(X, XL, R); +check_function([], R) -> + R. + +check_function(X0, [{X,_} | XL], R) when X0 /= X -> + check_function(X, XL, R); +check_function(X0, [{X,_} | _XL], _R) when X0 == X -> + bad_function; +check_function(_X0, [], R) -> + R. + +fam_partition_n(I, [E | Ts]) -> + fam_partition_n(I, Ts, element(I, E), [E], []); +fam_partition_n(_I, []) -> + []. + +fam_partition_n(I, [E | Ts], K, Es, P) -> + case {element(I, E), Es} of + {K1, _} when K == K1 -> + fam_partition_n(I, Ts, K, [E | Es], P); + {K1, [_]} -> % optimization + fam_partition_n(I, Ts, K1, [E], [{K,Es} | P]); + {K1, _} -> + fam_partition_n(I, Ts, K1, [E], [{K,reverse(Es)} | P]) + end; +fam_partition_n(_I, [], K, [_] = Es, P) -> % optimization + reverse(P, [{K,Es}]); +fam_partition_n(_I, [], K, Es, P) -> + reverse(P, [{K,reverse(Es)}]). + +fam_partition([{K,Vs} | Ts], Sort) -> + fam_partition(Ts, K, [Vs], [], Sort); +fam_partition([], _Sort) -> + []. + +fam_partition([{K1,V} | Ts], K, Vs, P, S) when K1 == K -> + fam_partition(Ts, K, [V | Vs], P, S); +fam_partition([{K1,V} | Ts], K, [_] = Vs, P, S) -> % optimization + fam_partition(Ts, K1, [V], [{K, Vs} | P], S); +fam_partition([{K1,V} | Ts], K, Vs, P, S) -> + fam_partition(Ts, K1, [V], [{K, sort(S, Vs)} | P], S); +fam_partition([], K, [_] = Vs, P, _S) -> % optimization + [{K, Vs} | P]; +fam_partition([], K, Vs, P, S) -> + [{K, sort(S, Vs)} | P]. + +fam_proj([{X,S} | F], Fun, Type, NType, L) -> + case setfun(S, Fun, Type, NType) of + {SD, ST} -> fam_proj(F, Fun, Type, ST, [{X, SD} | L]); + Bad -> Bad + end; +fam_proj([], _Fun, _Type, NType, L) -> + {reverse(L), NType}. + +setfun(T, Fun, Type, NType) -> + case Fun(term2set(T, Type)) of + NS when ?IS_SET(NS) -> + case unify_types(NType, ?SET_OF(?TYPE(NS))) of + [] -> type_mismatch; + NT -> {?LIST(NS), NT} + end; + NS when ?IS_ORDSET(NS) -> + case unify_types(NType, NT = ?ORDTYPE(NS)) of + [] -> type_mismatch; + NT -> {?ORDDATA(NS), NT} + end; + _ -> + badarg + end. + +%% Inlined. +term2set(L, Type) when is_list(L) -> + ?SET(L, Type); +term2set(T, Type) -> + ?ORDSET(T, Type). + +fam2digraph(F, G) -> + Fun = fun({From, ToL}) -> + digraph:add_vertex(G, From), + Fun2 = fun(To) -> + digraph:add_vertex(G, To), + case digraph:add_edge(G, From, To) of + {error, {bad_edge, _}} -> + throw({error, cyclic}); + _ -> + true + end + end, + foreach(Fun2, ToL) + end, + foreach(Fun, to_external(F)), + G. + +digraph_family(G) -> + Vs = sort(digraph:vertices(G)), + digraph_fam(Vs, Vs, G, []). + +digraph_fam([V | Vs], V0, G, L) when V /= V0 -> + Ns = sort(digraph:out_neighbours(G, V)), + digraph_fam(Vs, V, G, [{V,Ns} | L]); +digraph_fam([], _V0, _G, L) -> + reverse(L). + +%% -> bool() +check_fun(T, F, FunT) -> + true = is_type(FunT), + {NT, _MaxI} = number_tuples(T, 1), + L = flatten(tuple2list(F(NT))), + has_hole(L, 1). + +number_tuples(T, N) when is_tuple(T) -> + {L, NN} = mapfoldl(fun number_tuples/2, N, tuple_to_list(T)), + {list_to_tuple(L), NN}; +number_tuples(_, N) -> + {N, N+1}. + +tuple2list(T) when is_tuple(T) -> + map(fun tuple2list/1, tuple_to_list(T)); +tuple2list(C) -> + [C]. + +has_hole([I | Is], I0) when I =< I0 -> has_hole(Is, erlang:max(I+1, I0)); +has_hole(Is, _I) -> Is =/= []. + +%% Optimization. Same as check_fun/3, but for integers. +check_for_sort(T, _I) when T =:= ?ANYTYPE -> + empty; +check_for_sort(T, I) when ?IS_RELATION(T), I =< ?REL_ARITY(T), I >= 1 -> + I > 1; +check_for_sort(_T, _I) -> + error. + +inverse_substitution(L, Fun, Sort) -> + %% One easily sees that the inverse of the tuples created by + %% applying Fun need to be sorted iff the tuples created by Fun + %% need to be sorted. + sort(Sort, fun_rearr(L, Fun, [])). + +fun_rearr([E | Es], Fun, L) -> + fun_rearr(Es, Fun, [{Fun(E), E} | L]); +fun_rearr([], _Fun, L) -> + L. + +sets_to_list(Ss) -> + map(fun(S) when ?IS_SET(S) -> ?LIST(S) end, Ss). + +types([], L) -> + list_to_tuple(reverse(L)); +types([S | _Ss], _L) when ?TYPE(S) =:= ?ANYTYPE -> + ?ANYTYPE; +types([S | Ss], L) -> + types(Ss, [?TYPE(S) | L]). + +%% Inlined. +unify_types(T, T) -> T; +unify_types(Type1, Type2) -> + catch unify_types1(Type1, Type2). + +unify_types1(Atom, Atom) when ?IS_ATOM_TYPE(Atom) -> + Atom; +unify_types1(?ANYTYPE, Type) -> + Type; +unify_types1(Type, ?ANYTYPE) -> + Type; +unify_types1(?SET_OF(Type1), ?SET_OF(Type2)) -> + [unify_types1(Type1, Type2)]; +unify_types1(T1, T2) when tuple_size(T1) =:= tuple_size(T2) -> + unify_typesl(tuple_size(T1), T1, T2, []); +unify_types1(_T1, _T2) -> + throw([]). + +unify_typesl(0, _T1, _T2, L) -> + list_to_tuple(L); +unify_typesl(N, T1, T2, L) -> + T = unify_types1(?REL_TYPE(N, T1), ?REL_TYPE(N, T2)), + unify_typesl(N-1, T1, T2, [T | L]). + +%% inlined. +match_types(T, T) -> true; +match_types(Type1, Type2) -> match_types1(Type1, Type2). + +match_types1(Atom, Atom) when ?IS_ATOM_TYPE(Atom) -> + true; +match_types1(?ANYTYPE, _) -> + true; +match_types1(_, ?ANYTYPE) -> + true; +match_types1(?SET_OF(Type1), ?SET_OF(Type2)) -> + match_types1(Type1, Type2); +match_types1(T1, T2) when tuple_size(T1) =:= tuple_size(T2) -> + match_typesl(tuple_size(T1), T1, T2); +match_types1(_T1, _T2) -> + false. + +match_typesl(0, _T1, _T2) -> + true; +match_typesl(N, T1, T2) -> + case match_types1(?REL_TYPE(N, T1), ?REL_TYPE(N, T2)) of + true -> match_typesl(N-1, T1, T2); + false -> false + end. + +sort(true, L) -> + sort(L); +sort(false, L) -> + reverse(L). diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src new file mode 100644 index 0000000000..3e52c48e42 --- /dev/null +++ b/lib/stdlib/src/stdlib.app.src @@ -0,0 +1,105 @@ +%% This is an -*- erlang -*- file. +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +{application, stdlib, + [{description, "ERTS CXC 138 10"}, + {vsn, "%VSN%"}, + {modules, [array, + base64, + beam_lib, + c, + calendar, + dets, + dets_server, + dets_sup, + dets_utils, + dets_v8, + dets_v9, + dict, + digraph, + digraph_utils, + edlin, + edlin_expand, + epp, + eval_bits, + erl_bits, + erl_compile, + erl_eval, + erl_expand_records, + erl_internal, + erl_lint, + erl_parse, + erl_posix_msg, + erl_pp, + erl_scan, + erl_tar, + error_logger_file_h, + error_logger_tty_h, + escript, + ets, + file_sorter, + filelib, + filename, + gb_trees, + gb_sets, + gen, + gen_event, + gen_fsm, + gen_server, + io, + io_lib, + io_lib_format, + io_lib_fread, + io_lib_pretty, + lib, + lists, + log_mf_h, + math, + ms_transform, + orddict, + ordsets, + otp_internal, + pg, + pool, + proc_lib, + proplists, + qlc, + qlc_pt, + queue, + random, + re, + regexp, + sets, + shell, + shell_default, + slave, + sofs, + string, + supervisor, + supervisor_bridge, + sys, + timer, + unicode, + win32reg, + zip]}, + {registered,[timer_server,rsh_starter,take_over_monitor,pool_master, + dets]}, + {applications, [kernel]}, + {env, []}]}. + diff --git a/lib/stdlib/src/stdlib.appup.src b/lib/stdlib/src/stdlib.appup.src new file mode 100644 index 0000000000..54a63833e6 --- /dev/null +++ b/lib/stdlib/src/stdlib.appup.src @@ -0,0 +1 @@ +{"%VSN%",[],[]}. diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl new file mode 100644 index 0000000000..6636a03f06 --- /dev/null +++ b/lib/stdlib/src/string.erl @@ -0,0 +1,394 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(string). + +-export([len/1,equal/2,concat/2,chr/2,rchr/2,str/2,rstr/2, + span/2,cspan/2,substr/2,substr/3,tokens/2,chars/2,chars/3]). +-export([copies/2,words/1,words/2,strip/1,strip/2,strip/3, + sub_word/2,sub_word/3,left/2,left/3,right/2,right/3, + sub_string/2,sub_string/3,centre/2,centre/3, join/2]). +-export([to_upper/1, to_lower/1]). + +-import(lists,[reverse/1,member/2]). + +%%--------------------------------------------------------------------------- + +-type direction() :: 'left' | 'right' | 'both'. + +%%--------------------------------------------------------------------------- + +%% Robert's bit + +%% len(String) +%% Return the length of a string. + +-spec len(string()) -> non_neg_integer(). + +len(S) -> length(S). + +%% equal(String1, String2) +%% Test if 2 strings are equal. + +-spec equal(string(), string()) -> boolean(). + +equal(S, S) -> true; +equal(_, _) -> false. + +%% concat(String1, String2) +%% Concatenate 2 strings. + +-spec concat(string(), string()) -> string(). + +concat(S1, S2) -> S1 ++ S2. + +%% chr(String, Char) +%% rchr(String, Char) +%% Return the first/last index of the character in a string. + +-spec chr(string(), char()) -> non_neg_integer(). + +chr(S, C) when is_integer(C) -> chr(S, C, 1). + +chr([C|_Cs], C, I) -> I; +chr([_|Cs], C, I) -> chr(Cs, C, I+1); +chr([], _C, _I) -> 0. + +-spec rchr(string(), char()) -> non_neg_integer(). + +rchr(S, C) when is_integer(C) -> rchr(S, C, 1, 0). + +rchr([C|Cs], C, I, _L) -> %Found one, now find next! + rchr(Cs, C, I+1, I); +rchr([_|Cs], C, I, L) -> + rchr(Cs, C, I+1, L); +rchr([], _C, _I, L) -> L. + +%% str(String, SubString) +%% rstr(String, SubString) +%% index(String, SubString) +%% Return the first/last index of the sub-string in a string. +%% index/2 is kept for backwards compatibility. + +-spec str(string(), string()) -> non_neg_integer(). + +str(S, Sub) when is_list(Sub) -> str(S, Sub, 1). + +str([C|S], [C|Sub], I) -> + case prefix(Sub, S) of + true -> I; + false -> str(S, [C|Sub], I+1) + end; +str([_|S], Sub, I) -> str(S, Sub, I+1); +str([], _Sub, _I) -> 0. + +-spec rstr(string(), string()) -> non_neg_integer(). + +rstr(S, Sub) when is_list(Sub) -> rstr(S, Sub, 1, 0). + +rstr([C|S], [C|Sub], I, L) -> + case prefix(Sub, S) of + true -> rstr(S, [C|Sub], I+1, I); + false -> rstr(S, [C|Sub], I+1, L) + end; +rstr([_|S], Sub, I, L) -> rstr(S, Sub, I+1, L); +rstr([], _Sub, _I, L) -> L. + +prefix([C|Pre], [C|String]) -> prefix(Pre, String); +prefix([], String) when is_list(String) -> true; +prefix(Pre, String) when is_list(Pre), is_list(String) -> false. + +%% span(String, Chars) -> Length. +%% cspan(String, Chars) -> Length. + +-spec span(string(), string()) -> non_neg_integer(). + +span(S, Cs) when is_list(Cs) -> span(S, Cs, 0). + +span([C|S], Cs, I) -> + case member(C, Cs) of + true -> span(S, Cs, I+1); + false -> I + end; +span([], _Cs, I) -> I. + +-spec cspan(string(), string()) -> non_neg_integer(). + +cspan(S, Cs) when is_list(Cs) -> cspan(S, Cs, 0). + +cspan([C|S], Cs, I) -> + case member(C, Cs) of + true -> I; + false -> cspan(S, Cs, I+1) + end; +cspan([], _Cs, I) -> I. + +%% substr(String, Start) +%% substr(String, Start, Length) +%% Extract a sub-string from String. + +-spec substr(string(), pos_integer()) -> string(). + +substr(String, 1) when is_list(String) -> + String; +substr(String, S) when is_integer(S), S > 1 -> + substr2(String, S). + +-spec substr(string(), pos_integer(), non_neg_integer()) -> string(). + +substr(String, S, L) when is_integer(S), S >= 1, is_integer(L), L >= 0 -> + substr1(substr2(String, S), L). + +substr1([C|String], L) when L > 0 -> [C|substr1(String, L-1)]; +substr1(String, _L) when is_list(String) -> []. %Be nice! + +substr2(String, 1) when is_list(String) -> String; +substr2([_|String], S) -> substr2(String, S-1). + +%% tokens(String, Seperators). +%% Return a list of tokens seperated by characters in Seperators. + +-spec tokens(string(), string()) -> [[char(),...]]. + +tokens(S, Seps) -> + tokens1(S, Seps, []). + +tokens1([C|S], Seps, Toks) -> + case member(C, Seps) of + true -> tokens1(S, Seps, Toks); + false -> tokens2(S, Seps, Toks, [C]) + end; +tokens1([], _Seps, Toks) -> + reverse(Toks). + +tokens2([C|S], Seps, Toks, Cs) -> + case member(C, Seps) of + true -> tokens1(S, Seps, [reverse(Cs)|Toks]); + false -> tokens2(S, Seps, Toks, [C|Cs]) + end; +tokens2([], _Seps, Toks, Cs) -> + reverse([reverse(Cs)|Toks]). + +-spec chars(char(), non_neg_integer()) -> string(). + +chars(C, N) -> chars(C, N, []). + +-spec chars(char(), non_neg_integer(), string()) -> string(). + +chars(C, N, Tail) when N > 0 -> + chars(C, N-1, [C|Tail]); +chars(C, 0, Tail) when is_integer(C) -> + Tail. + +%% Torbjörn's bit. + +%%% COPIES %%% + +-spec copies(string(), non_neg_integer()) -> string(). + +copies(CharList, Num) when is_list(CharList), Num >= 0 -> + copies(CharList, Num, []). + +copies(_CharList, 0, R) -> + R; +copies(CharList, Num, R) -> + copies(CharList, Num-1, CharList++R). + +%%% WORDS %%% + +-spec words(string()) -> pos_integer(). + +words(String) -> words(String, $\s). + +-spec words(string(), char()) -> pos_integer(). + +words(String, Char) when is_integer(Char) -> + w_count(strip(String, both, Char), Char, 0). + +w_count([], _, Num) -> Num+1; +w_count([H|T], H, Num) -> w_count(strip(T, left, H), H, Num+1); +w_count([_H|T], Char, Num) -> w_count(T, Char, Num). + +%%% SUB_WORDS %%% + +-spec sub_word(string(), integer()) -> string(). + +sub_word(String, Index) -> sub_word(String, Index, $\s). + +-spec sub_word(string(), integer(), char()) -> string(). + +sub_word(String, Index, Char) when is_integer(Index), is_integer(Char) -> + case words(String, Char) of + Num when Num < Index -> + []; + _Num -> + s_word(strip(String, left, Char), Index, Char, 1, []) + end. + +s_word([], _, _, _,Res) -> reverse(Res); +s_word([Char|_],Index,Char,Index,Res) -> reverse(Res); +s_word([H|T],Index,Char,Index,Res) -> s_word(T,Index,Char,Index,[H|Res]); +s_word([Char|T],Stop,Char,Index,Res) when Index < Stop -> + s_word(strip(T,left,Char),Stop,Char,Index+1,Res); +s_word([_|T],Stop,Char,Index,Res) when Index < Stop -> + s_word(T,Stop,Char,Index,Res). + +%%% STRIP %%% + +-spec strip(string()) -> string(). + +strip(String) -> strip(String, both). + +-spec strip(string(), direction()) -> string(). + +strip(String, left) -> strip_left(String, $\s); +strip(String, right) -> strip_right(String, $\s); +strip(String, both) -> + strip_right(strip_left(String, $\s), $\s). + +-spec strip(string(), direction(), char()) -> string(). + +strip(String, right, Char) -> strip_right(String, Char); +strip(String, left, Char) -> strip_left(String, Char); +strip(String, both, Char) -> + strip_right(strip_left(String, Char), Char). + +strip_left([Sc|S], Sc) -> + strip_left(S, Sc); +strip_left([_|_]=S, Sc) when is_integer(Sc) -> S; +strip_left([], Sc) when is_integer(Sc) -> []. + +strip_right([Sc|S], Sc) -> + case strip_right(S, Sc) of + [] -> []; + T -> [Sc|T] + end; +strip_right([C|S], Sc) -> + [C|strip_right(S, Sc)]; +strip_right([], Sc) when is_integer(Sc) -> + []. + +%%% LEFT %%% + +-spec left(string(), non_neg_integer()) -> string(). + +left(String, Len) when is_integer(Len) -> left(String, Len, $\s). + +-spec left(string(), non_neg_integer(), char()) -> string(). + +left(String, Len, Char) when is_integer(Char) -> + Slen = length(String), + if + Slen > Len -> substr(String, 1, Len); + Slen < Len -> l_pad(String, Len-Slen, Char); + Slen =:= Len -> String + end. + +l_pad(String, Num, Char) -> String ++ chars(Char, Num). + +%%% RIGHT %%% + +-spec right(string(), non_neg_integer()) -> string(). + +right(String, Len) when is_integer(Len) -> right(String, Len, $\s). + +-spec right(string(), non_neg_integer(), char()) -> string(). + +right(String, Len, Char) when is_integer(Char) -> + Slen = length(String), + if + Slen > Len -> substr(String, Slen-Len+1); + Slen < Len -> r_pad(String, Len-Slen, Char); + Slen =:= Len -> String + end. + +r_pad(String, Num, Char) -> chars(Char, Num, String). + +%%% CENTRE %%% + +-spec centre(string(), non_neg_integer()) -> string(). + +centre(String, Len) when is_integer(Len) -> centre(String, Len, $\s). + +-spec centre(string(), non_neg_integer(), char()) -> string(). + +centre(String, 0, Char) when is_list(String), is_integer(Char) -> + []; % Strange cases to centre string +centre(String, Len, Char) when is_integer(Char) -> + Slen = length(String), + if + Slen > Len -> substr(String, (Slen-Len) div 2 + 1, Len); + Slen < Len -> + N = (Len-Slen) div 2, + r_pad(l_pad(String, Len-(Slen+N), Char), N, Char); + Slen =:= Len -> String + end. + +%%% SUB_STRING %%% + +-spec sub_string(string(), pos_integer()) -> string(). + +sub_string(String, Start) -> substr(String, Start). + +-spec sub_string(string(), pos_integer(), pos_integer()) -> string(). + +sub_string(String, Start, Stop) -> substr(String, Start, Stop - Start + 1). + +%% ISO/IEC 8859-1 (latin1) letters are converted, others are ignored +%% + +to_lower_char(C) when is_integer(C), $A =< C, C =< $Z -> + C + 32; +to_lower_char(C) when is_integer(C), 16#C0 =< C, C =< 16#D6 -> + C + 32; +to_lower_char(C) when is_integer(C), 16#D8 =< C, C =< 16#DE -> + C + 32; +to_lower_char(C) -> + C. + +to_upper_char(C) when is_integer(C), $a =< C, C =< $z -> + C - 32; +to_upper_char(C) when is_integer(C), 16#E0 =< C, C =< 16#F6 -> + C - 32; +to_upper_char(C) when is_integer(C), 16#F8 =< C, C =< 16#FE -> + C - 32; +to_upper_char(C) -> + C. + +-spec to_lower(string()) -> string() + ; (char()) -> char(). + +to_lower(S) when is_list(S) -> + [to_lower_char(C) || C <- S]; +to_lower(C) when is_integer(C) -> + to_lower_char(C). + +-spec to_upper(string()) -> string() + ; (char()) -> char(). + +to_upper(S) when is_list(S) -> + [to_upper_char(C) || C <- S]; +to_upper(C) when is_integer(C) -> + to_upper_char(C). + +-spec join([string()], string()) -> string(). + +join([], Sep) when is_list(Sep) -> + []; +join([H|T], Sep) -> + H ++ lists:append([Sep ++ X || X <- T]). diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl new file mode 100644 index 0000000000..fb1303d1eb --- /dev/null +++ b/lib/stdlib/src/supervisor.erl @@ -0,0 +1,889 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(supervisor). + +-behaviour(gen_server). + +%% External exports +-export([start_link/2,start_link/3, + start_child/2, restart_child/2, + delete_child/2, terminate_child/2, + which_children/1, + check_childspecs/1]). + +-export([behaviour_info/1]). + +%% Internal exports +-export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]). +-export([handle_cast/2]). + +-define(DICT, dict). + +-record(state, {name, + strategy, + children = [], + dynamics = ?DICT:new(), + intensity, + period, + restarts = [], + module, + args}). + +-record(child, {pid = undefined, % pid is undefined when child is not running + name, + mfa, + restart_type, + shutdown, + child_type, + modules = []}). + +-define(is_simple(State), State#state.strategy =:= simple_one_for_one). + +behaviour_info(callbacks) -> + [{init,1}]; +behaviour_info(_Other) -> + undefined. + +%%% --------------------------------------------------- +%%% This is a general process supervisor built upon gen_server.erl. +%%% Servers/processes should/could also be built using gen_server.erl. +%%% SupName = {local, atom()} | {global, atom()}. +%%% --------------------------------------------------- +start_link(Mod, Args) -> + gen_server:start_link(supervisor, {self, Mod, Args}, []). + +start_link(SupName, Mod, Args) -> + gen_server:start_link(SupName, supervisor, {SupName, Mod, Args}, []). + +%%% --------------------------------------------------- +%%% Interface functions. +%%% --------------------------------------------------- +start_child(Supervisor, ChildSpec) -> + call(Supervisor, {start_child, ChildSpec}). + +restart_child(Supervisor, Name) -> + call(Supervisor, {restart_child, Name}). + +delete_child(Supervisor, Name) -> + call(Supervisor, {delete_child, Name}). + +%%----------------------------------------------------------------- +%% Func: terminate_child/2 +%% Returns: ok | {error, Reason} +%% Note that the child is *always* terminated in some +%% way (maybe killed). +%%----------------------------------------------------------------- +terminate_child(Supervisor, Name) -> + call(Supervisor, {terminate_child, Name}). + +which_children(Supervisor) -> + call(Supervisor, which_children). + +call(Supervisor, Req) -> + gen_server:call(Supervisor, Req, infinity). + +check_childspecs(ChildSpecs) when is_list(ChildSpecs) -> + case check_startspec(ChildSpecs) of + {ok, _} -> ok; + Error -> {error, Error} + end; +check_childspecs(X) -> {error, {badarg, X}}. + +%%% --------------------------------------------------- +%%% +%%% Initialize the supervisor. +%%% +%%% --------------------------------------------------- +init({SupName, Mod, Args}) -> + process_flag(trap_exit, true), + case Mod:init(Args) of + {ok, {SupFlags, StartSpec}} -> + case init_state(SupName, SupFlags, Mod, Args) of + {ok, State} when ?is_simple(State) -> + init_dynamic(State, StartSpec); + {ok, State} -> + init_children(State, StartSpec); + Error -> + {stop, {supervisor_data, Error}} + end; + ignore -> + ignore; + Error -> + {stop, {bad_return, {Mod, init, Error}}} + end. + +init_children(State, StartSpec) -> + SupName = State#state.name, + case check_startspec(StartSpec) of + {ok, Children} -> + case start_children(Children, SupName) of + {ok, NChildren} -> + {ok, State#state{children = NChildren}}; + {error, NChildren} -> + terminate_children(NChildren, SupName), + {stop, shutdown} + end; + Error -> + {stop, {start_spec, Error}} + end. + +init_dynamic(State, [StartSpec]) -> + case check_startspec([StartSpec]) of + {ok, Children} -> + {ok, State#state{children = Children}}; + Error -> + {stop, {start_spec, Error}} + end; +init_dynamic(_State, StartSpec) -> + {stop, {bad_start_spec, StartSpec}}. + +%%----------------------------------------------------------------- +%% Func: start_children/2 +%% Args: Children = [#child] in start order +%% SupName = {local, atom()} | {global, atom()} | {pid(),Mod} +%% Purpose: Start all children. The new list contains #child's +%% with pids. +%% Returns: {ok, NChildren} | {error, NChildren} +%% NChildren = [#child] in termination order (reversed +%% start order) +%%----------------------------------------------------------------- +start_children(Children, SupName) -> start_children(Children, [], SupName). + +start_children([Child|Chs], NChildren, SupName) -> + case do_start_child(SupName, Child) of + {ok, Pid} -> + start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName); + {ok, Pid, _Extra} -> + start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName); + {error, Reason} -> + report_error(start_error, Reason, Child, SupName), + {error, lists:reverse(Chs) ++ [Child | NChildren]} + end; +start_children([], NChildren, _SupName) -> + {ok, NChildren}. + +do_start_child(SupName, Child) -> + #child{mfa = {M, F, A}} = Child, + case catch apply(M, F, A) of + {ok, Pid} when is_pid(Pid) -> + NChild = Child#child{pid = Pid}, + report_progress(NChild, SupName), + {ok, Pid}; + {ok, Pid, Extra} when is_pid(Pid) -> + NChild = Child#child{pid = Pid}, + report_progress(NChild, SupName), + {ok, Pid, Extra}; + ignore -> + {ok, undefined}; + {error, What} -> {error, What}; + What -> {error, What} + end. + +do_start_child_i(M, F, A) -> + case catch apply(M, F, A) of + {ok, Pid} when is_pid(Pid) -> + {ok, Pid}; + {ok, Pid, Extra} when is_pid(Pid) -> + {ok, Pid, Extra}; + ignore -> + {ok, undefined}; + {error, Error} -> + {error, Error}; + What -> + {error, What} + end. + + +%%% --------------------------------------------------- +%%% +%%% Callback functions. +%%% +%%% --------------------------------------------------- +handle_call({start_child, EArgs}, _From, State) when ?is_simple(State) -> + #child{mfa = {M, F, A}} = hd(State#state.children), + Args = A ++ EArgs, + case do_start_child_i(M, F, Args) of + {ok, Pid} -> + NState = State#state{dynamics = + ?DICT:store(Pid, Args, State#state.dynamics)}, + {reply, {ok, Pid}, NState}; + {ok, Pid, Extra} -> + NState = State#state{dynamics = + ?DICT:store(Pid, Args, State#state.dynamics)}, + {reply, {ok, Pid, Extra}, NState}; + What -> + {reply, What, State} + end; + +%%% The requests terminate_child, delete_child and restart_child are +%%% invalid for simple_one_for_one supervisors. +handle_call({_Req, _Data}, _From, State) when ?is_simple(State) -> + {reply, {error, simple_one_for_one}, State}; + +handle_call({start_child, ChildSpec}, _From, State) -> + case check_childspec(ChildSpec) of + {ok, Child} -> + {Resp, NState} = handle_start_child(Child, State), + {reply, Resp, NState}; + What -> + {reply, {error, What}, State} + end; + +handle_call({restart_child, Name}, _From, State) -> + case get_child(Name, State) of + {value, Child} when Child#child.pid =:= undefined -> + case do_start_child(State#state.name, Child) of + {ok, Pid} -> + NState = replace_child(Child#child{pid = Pid}, State), + {reply, {ok, Pid}, NState}; + {ok, Pid, Extra} -> + NState = replace_child(Child#child{pid = Pid}, State), + {reply, {ok, Pid, Extra}, NState}; + Error -> + {reply, Error, State} + end; + {value, _} -> + {reply, {error, running}, State}; + _ -> + {reply, {error, not_found}, State} + end; + +handle_call({delete_child, Name}, _From, State) -> + case get_child(Name, State) of + {value, Child} when Child#child.pid =:= undefined -> + NState = remove_child(Child, State), + {reply, ok, NState}; + {value, _} -> + {reply, {error, running}, State}; + _ -> + {reply, {error, not_found}, State} + end; + +handle_call({terminate_child, Name}, _From, State) -> + case get_child(Name, State) of + {value, Child} -> + NChild = do_terminate(Child, State#state.name), + {reply, ok, replace_child(NChild, State)}; + _ -> + {reply, {error, not_found}, State} + end; + +handle_call(which_children, _From, State) when ?is_simple(State) -> + [#child{child_type = CT, modules = Mods}] = State#state.children, + Reply = lists:map(fun({Pid, _}) -> {undefined, Pid, CT, Mods} end, + ?DICT:to_list(State#state.dynamics)), + {reply, Reply, State}; + +handle_call(which_children, _From, State) -> + Resp = + lists:map(fun(#child{pid = Pid, name = Name, + child_type = ChildType, modules = Mods}) -> + {Name, Pid, ChildType, Mods} + end, + State#state.children), + {reply, Resp, State}. + + +%%% Hopefully cause a function-clause as there is no API function +%%% that utilizes cast. +handle_cast(null, State) -> + error_logger:error_msg("ERROR: Supervisor received cast-message 'null'~n", + []), + + {noreply, State}. + +%% +%% Take care of terminated children. +%% +handle_info({'EXIT', Pid, Reason}, State) -> + case restart_child(Pid, Reason, State) of + {ok, State1} -> + {noreply, State1}; + {shutdown, State1} -> + {stop, shutdown, State1} + end; + +handle_info(Msg, State) -> + error_logger:error_msg("Supervisor received unexpected message: ~p~n", + [Msg]), + {noreply, State}. +%% +%% Terminate this server. +%% +terminate(_Reason, State) -> + terminate_children(State#state.children, State#state.name), + ok. + +%% +%% Change code for the supervisor. +%% Call the new call-back module and fetch the new start specification. +%% Combine the new spec. with the old. If the new start spec. is +%% not valid the code change will not succeed. +%% Use the old Args as argument to Module:init/1. +%% NOTE: This requires that the init function of the call-back module +%% does not have any side effects. +%% +code_change(_, State, _) -> + case (State#state.module):init(State#state.args) of + {ok, {SupFlags, StartSpec}} -> + case catch check_flags(SupFlags) of + ok -> + {Strategy, MaxIntensity, Period} = SupFlags, + update_childspec(State#state{strategy = Strategy, + intensity = MaxIntensity, + period = Period}, + StartSpec); + Error -> + {error, Error} + end; + ignore -> + {ok, State}; + Error -> + Error + end. + +check_flags({Strategy, MaxIntensity, Period}) -> + validStrategy(Strategy), + validIntensity(MaxIntensity), + validPeriod(Period), + ok; +check_flags(What) -> + {bad_flags, What}. + +update_childspec(State, StartSpec) when ?is_simple(State) -> + case check_startspec(StartSpec) of + {ok, [Child]} -> + {ok, State#state{children = [Child]}}; + Error -> + {error, Error} + end; + +update_childspec(State, StartSpec) -> + case check_startspec(StartSpec) of + {ok, Children} -> + OldC = State#state.children, % In reverse start order ! + NewC = update_childspec1(OldC, Children, []), + {ok, State#state{children = NewC}}; + Error -> + {error, Error} + end. + +update_childspec1([Child|OldC], Children, KeepOld) -> + case update_chsp(Child, Children) of + {ok,NewChildren} -> + update_childspec1(OldC, NewChildren, KeepOld); + false -> + update_childspec1(OldC, Children, [Child|KeepOld]) + end; +update_childspec1([], Children, KeepOld) -> + % Return them in (keeped) reverse start order. + lists:reverse(Children ++ KeepOld). + +update_chsp(OldCh, Children) -> + case lists:map(fun(Ch) when OldCh#child.name =:= Ch#child.name -> + Ch#child{pid = OldCh#child.pid}; + (Ch) -> + Ch + end, + Children) of + Children -> + false; % OldCh not found in new spec. + NewC -> + {ok, NewC} + end. + +%%% --------------------------------------------------- +%%% Start a new child. +%%% --------------------------------------------------- + +handle_start_child(Child, State) -> + case get_child(Child#child.name, State) of + false -> + case do_start_child(State#state.name, Child) of + {ok, Pid} -> + Children = State#state.children, + {{ok, Pid}, + State#state{children = + [Child#child{pid = Pid}|Children]}}; + {ok, Pid, Extra} -> + Children = State#state.children, + {{ok, Pid, Extra}, + State#state{children = + [Child#child{pid = Pid}|Children]}}; + {error, What} -> + {{error, {What, Child}}, State} + end; + {value, OldChild} when OldChild#child.pid =/= undefined -> + {{error, {already_started, OldChild#child.pid}}, State}; + {value, _OldChild} -> + {{error, already_present}, State} + end. + +%%% --------------------------------------------------- +%%% Restart. A process has terminated. +%%% Returns: {ok, #state} | {shutdown, #state} +%%% --------------------------------------------------- + +restart_child(Pid, Reason, State) when ?is_simple(State) -> + case ?DICT:find(Pid, State#state.dynamics) of + {ok, Args} -> + [Child] = State#state.children, + RestartType = Child#child.restart_type, + {M, F, _} = Child#child.mfa, + NChild = Child#child{pid = Pid, mfa = {M, F, Args}}, + do_restart(RestartType, Reason, NChild, State); + error -> + {ok, State} + end; +restart_child(Pid, Reason, State) -> + Children = State#state.children, + case lists:keysearch(Pid, #child.pid, Children) of + {value, Child} -> + RestartType = Child#child.restart_type, + do_restart(RestartType, Reason, Child, State); + _ -> + {ok, State} + end. + +do_restart(permanent, Reason, Child, State) -> + report_error(child_terminated, Reason, Child, State#state.name), + restart(Child, State); +do_restart(_, normal, Child, State) -> + NState = state_del_child(Child, State), + {ok, NState}; +do_restart(_, shutdown, Child, State) -> + NState = state_del_child(Child, State), + {ok, NState}; +do_restart(transient, Reason, Child, State) -> + report_error(child_terminated, Reason, Child, State#state.name), + restart(Child, State); +do_restart(temporary, Reason, Child, State) -> + report_error(child_terminated, Reason, Child, State#state.name), + NState = state_del_child(Child, State), + {ok, NState}. + +restart(Child, State) -> + case add_restart(State) of + {ok, NState} -> + restart(NState#state.strategy, Child, NState); + {terminate, NState} -> + report_error(shutdown, reached_max_restart_intensity, + Child, State#state.name), + {shutdown, remove_child(Child, NState)} + end. + +restart(simple_one_for_one, Child, State) -> + #child{mfa = {M, F, A}} = Child, + Dynamics = ?DICT:erase(Child#child.pid, State#state.dynamics), + case do_start_child_i(M, F, A) of + {ok, Pid} -> + NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)}, + {ok, NState}; + {ok, Pid, _Extra} -> + NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)}, + {ok, NState}; + {error, Error} -> + report_error(start_error, Error, Child, State#state.name), + restart(Child, State) + end; +restart(one_for_one, Child, State) -> + case do_start_child(State#state.name, Child) of + {ok, Pid} -> + NState = replace_child(Child#child{pid = Pid}, State), + {ok, NState}; + {ok, Pid, _Extra} -> + NState = replace_child(Child#child{pid = Pid}, State), + {ok, NState}; + {error, Reason} -> + report_error(start_error, Reason, Child, State#state.name), + restart(Child, State) + end; +restart(rest_for_one, Child, State) -> + {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children), + ChAfter2 = terminate_children(ChAfter, State#state.name), + case start_children(ChAfter2, State#state.name) of + {ok, ChAfter3} -> + {ok, State#state{children = ChAfter3 ++ ChBefore}}; + {error, ChAfter3} -> + restart(Child, State#state{children = ChAfter3 ++ ChBefore}) + end; +restart(one_for_all, Child, State) -> + Children1 = del_child(Child#child.pid, State#state.children), + Children2 = terminate_children(Children1, State#state.name), + case start_children(Children2, State#state.name) of + {ok, NChs} -> + {ok, State#state{children = NChs}}; + {error, NChs} -> + restart(Child, State#state{children = NChs}) + end. + +%%----------------------------------------------------------------- +%% Func: terminate_children/2 +%% Args: Children = [#child] in termination order +%% SupName = {local, atom()} | {global, atom()} | {pid(),Mod} +%% Returns: NChildren = [#child] in +%% startup order (reversed termination order) +%%----------------------------------------------------------------- +terminate_children(Children, SupName) -> + terminate_children(Children, SupName, []). + +terminate_children([Child | Children], SupName, Res) -> + NChild = do_terminate(Child, SupName), + terminate_children(Children, SupName, [NChild | Res]); +terminate_children([], _SupName, Res) -> + Res. + +do_terminate(Child, SupName) when Child#child.pid =/= undefined -> + case shutdown(Child#child.pid, + Child#child.shutdown) of + ok -> + Child#child{pid = undefined}; + {error, OtherReason} -> + report_error(shutdown_error, OtherReason, Child, SupName), + Child#child{pid = undefined} + end; +do_terminate(Child, _SupName) -> + Child. + +%%----------------------------------------------------------------- +%% Shutdowns a child. We must check the EXIT value +%% of the child, because it might have died with another reason than +%% the wanted. In that case we want to report the error. We put a +%% monitor on the child an check for the 'DOWN' message instead of +%% checking for the 'EXIT' message, because if we check the 'EXIT' +%% message a "naughty" child, who does unlink(Sup), could hang the +%% supervisor. +%% Returns: ok | {error, OtherReason} (this should be reported) +%%----------------------------------------------------------------- +shutdown(Pid, brutal_kill) -> + + case monitor_child(Pid) of + ok -> + exit(Pid, kill), + receive + {'DOWN', _MRef, process, Pid, killed} -> + ok; + {'DOWN', _MRef, process, Pid, OtherReason} -> + {error, OtherReason} + end; + {error, Reason} -> + {error, Reason} + end; + +shutdown(Pid, Time) -> + + case monitor_child(Pid) of + ok -> + exit(Pid, shutdown), %% Try to shutdown gracefully + receive + {'DOWN', _MRef, process, Pid, shutdown} -> + ok; + {'DOWN', _MRef, process, Pid, OtherReason} -> + {error, OtherReason} + after Time -> + exit(Pid, kill), %% Force termination. + receive + {'DOWN', _MRef, process, Pid, OtherReason} -> + {error, OtherReason} + end + end; + {error, Reason} -> + {error, Reason} + end. + +%% Help function to shutdown/2 switches from link to monitor approach +monitor_child(Pid) -> + + %% Do the monitor operation first so that if the child dies + %% before the monitoring is done causing a 'DOWN'-message with + %% reason noproc, we will get the real reason in the 'EXIT'-message + %% unless a naughty child has already done unlink... + erlang:monitor(process, Pid), + unlink(Pid), + + receive + %% If the child dies before the unlik we must empty + %% the mail-box of the 'EXIT'-message and the 'DOWN'-message. + {'EXIT', Pid, Reason} -> + receive + {'DOWN', _, process, Pid, _} -> + {error, Reason} + end + after 0 -> + %% If a naughty child did unlink and the child dies before + %% monitor the result will be that shutdown/2 receives a + %% 'DOWN'-message with reason noproc. + %% If the child should die after the unlink there + %% will be a 'DOWN'-message with a correct reason + %% that will be handled in shutdown/2. + ok + end. + + +%%----------------------------------------------------------------- +%% Child/State manipulating functions. +%%----------------------------------------------------------------- +state_del_child(#child{pid = Pid}, State) when ?is_simple(State) -> + NDynamics = ?DICT:erase(Pid, State#state.dynamics), + State#state{dynamics = NDynamics}; +state_del_child(Child, State) -> + NChildren = del_child(Child#child.name, State#state.children), + State#state{children = NChildren}. + +del_child(Name, [Ch|Chs]) when Ch#child.name =:= Name -> + [Ch#child{pid = undefined} | Chs]; +del_child(Pid, [Ch|Chs]) when Ch#child.pid =:= Pid -> + [Ch#child{pid = undefined} | Chs]; +del_child(Name, [Ch|Chs]) -> + [Ch|del_child(Name, Chs)]; +del_child(_, []) -> + []. + +%% Chs = [S4, S3, Ch, S1, S0] +%% Ret: {[S4, S3, Ch], [S1, S0]} +split_child(Name, Chs) -> + split_child(Name, Chs, []). + +split_child(Name, [Ch|Chs], After) when Ch#child.name =:= Name -> + {lists:reverse([Ch#child{pid = undefined} | After]), Chs}; +split_child(Pid, [Ch|Chs], After) when Ch#child.pid =:= Pid -> + {lists:reverse([Ch#child{pid = undefined} | After]), Chs}; +split_child(Name, [Ch|Chs], After) -> + split_child(Name, Chs, [Ch | After]); +split_child(_, [], After) -> + {lists:reverse(After), []}. + +get_child(Name, State) -> + lists:keysearch(Name, #child.name, State#state.children). +replace_child(Child, State) -> + Chs = do_replace_child(Child, State#state.children), + State#state{children = Chs}. + +do_replace_child(Child, [Ch|Chs]) when Ch#child.name =:= Child#child.name -> + [Child | Chs]; +do_replace_child(Child, [Ch|Chs]) -> + [Ch|do_replace_child(Child, Chs)]. + +remove_child(Child, State) -> + Chs = lists:keydelete(Child#child.name, #child.name, State#state.children), + State#state{children = Chs}. + +%%----------------------------------------------------------------- +%% Func: init_state/4 +%% Args: SupName = {local, atom()} | {global, atom()} | self +%% Type = {Strategy, MaxIntensity, Period} +%% Strategy = one_for_one | one_for_all | simple_one_for_one | +%% rest_for_one +%% MaxIntensity = integer() +%% Period = integer() +%% Mod :== atom() +%% Arsg :== term() +%% Purpose: Check that Type is of correct type (!) +%% Returns: {ok, #state} | Error +%%----------------------------------------------------------------- +init_state(SupName, Type, Mod, Args) -> + case catch init_state1(SupName, Type, Mod, Args) of + {ok, State} -> + {ok, State}; + Error -> + Error + end. + +init_state1(SupName, {Strategy, MaxIntensity, Period}, Mod, Args) -> + validStrategy(Strategy), + validIntensity(MaxIntensity), + validPeriod(Period), + {ok, #state{name = supname(SupName,Mod), + strategy = Strategy, + intensity = MaxIntensity, + period = Period, + module = Mod, + args = Args}}; +init_state1(_SupName, Type, _, _) -> + {invalid_type, Type}. + +validStrategy(simple_one_for_one) -> true; +validStrategy(one_for_one) -> true; +validStrategy(one_for_all) -> true; +validStrategy(rest_for_one) -> true; +validStrategy(What) -> throw({invalid_strategy, What}). + +validIntensity(Max) when is_integer(Max), + Max >= 0 -> true; +validIntensity(What) -> throw({invalid_intensity, What}). + +validPeriod(Period) when is_integer(Period), + Period > 0 -> true; +validPeriod(What) -> throw({invalid_period, What}). + +supname(self,Mod) -> {self(),Mod}; +supname(N,_) -> N. + +%%% ------------------------------------------------------ +%%% Check that the children start specification is valid. +%%% Shall be a six (6) tuple +%%% {Name, Func, RestartType, Shutdown, ChildType, Modules} +%%% where Name is an atom +%%% Func is {Mod, Fun, Args} == {atom, atom, list} +%%% RestartType is permanent | temporary | transient +%%% Shutdown = integer() | infinity | brutal_kill +%%% ChildType = supervisor | worker +%%% Modules = [atom()] | dynamic +%%% Returns: {ok, [#child]} | Error +%%% ------------------------------------------------------ + +check_startspec(Children) -> check_startspec(Children, []). + +check_startspec([ChildSpec|T], Res) -> + case check_childspec(ChildSpec) of + {ok, Child} -> + case lists:keymember(Child#child.name, #child.name, Res) of + true -> {duplicate_child_name, Child#child.name}; + false -> check_startspec(T, [Child | Res]) + end; + Error -> Error + end; +check_startspec([], Res) -> + {ok, lists:reverse(Res)}. + +check_childspec({Name, Func, RestartType, Shutdown, ChildType, Mods}) -> + catch check_childspec(Name, Func, RestartType, Shutdown, ChildType, Mods); +check_childspec(X) -> {invalid_child_spec, X}. + +check_childspec(Name, Func, RestartType, Shutdown, ChildType, Mods) -> + validName(Name), + validFunc(Func), + validRestartType(RestartType), + validChildType(ChildType), + validShutdown(Shutdown, ChildType), + validMods(Mods), + {ok, #child{name = Name, mfa = Func, restart_type = RestartType, + shutdown = Shutdown, child_type = ChildType, modules = Mods}}. + +validChildType(supervisor) -> true; +validChildType(worker) -> true; +validChildType(What) -> throw({invalid_child_type, What}). + +validName(_Name) -> true. + +validFunc({M, F, A}) when is_atom(M), + is_atom(F), + is_list(A) -> true; +validFunc(Func) -> throw({invalid_mfa, Func}). + +validRestartType(permanent) -> true; +validRestartType(temporary) -> true; +validRestartType(transient) -> true; +validRestartType(RestartType) -> throw({invalid_restart_type, RestartType}). + +validShutdown(Shutdown, _) + when is_integer(Shutdown), Shutdown > 0 -> true; +validShutdown(infinity, supervisor) -> true; +validShutdown(brutal_kill, _) -> true; +validShutdown(Shutdown, _) -> throw({invalid_shutdown, Shutdown}). + +validMods(dynamic) -> true; +validMods(Mods) when is_list(Mods) -> + lists:foreach(fun(Mod) -> + if + is_atom(Mod) -> ok; + true -> throw({invalid_module, Mod}) + end + end, + Mods); +validMods(Mods) -> throw({invalid_modules, Mods}). + +%%% ------------------------------------------------------ +%%% Add a new restart and calculate if the max restart +%%% intensity has been reached (in that case the supervisor +%%% shall terminate). +%%% All restarts accured inside the period amount of seconds +%%% are kept in the #state.restarts list. +%%% Returns: {ok, State'} | {terminate, State'} +%%% ------------------------------------------------------ + +add_restart(State) -> + I = State#state.intensity, + P = State#state.period, + R = State#state.restarts, + Now = erlang:now(), + R1 = add_restart([Now|R], Now, P), + State1 = State#state{restarts = R1}, + case length(R1) of + CurI when CurI =< I -> + {ok, State1}; + _ -> + {terminate, State1} + end. + +add_restart([R|Restarts], Now, Period) -> + case inPeriod(R, Now, Period) of + true -> + [R|add_restart(Restarts, Now, Period)]; + _ -> + [] + end; +add_restart([], _, _) -> + []. + +inPeriod(Time, Now, Period) -> + case difference(Time, Now) of + T when T > Period -> + false; + _ -> + true + end. + +%% +%% Time = {MegaSecs, Secs, MicroSecs} (NOTE: MicroSecs is ignored) +%% Calculate the time elapsed in seconds between two timestamps. +%% If MegaSecs is equal just subtract Secs. +%% Else calculate the Mega difference and add the Secs difference, +%% note that Secs difference can be negative, e.g. +%% {827, 999999, 676} diff {828, 1, 653753} == > 2 secs. +%% +difference({TimeM, TimeS, _}, {CurM, CurS, _}) when CurM > TimeM -> + ((CurM - TimeM) * 1000000) + (CurS - TimeS); +difference({_, TimeS, _}, {_, CurS, _}) -> + CurS - TimeS. + +%%% ------------------------------------------------------ +%%% Error and progress reporting. +%%% ------------------------------------------------------ + +report_error(Error, Reason, Child, SupName) -> + ErrorMsg = [{supervisor, SupName}, + {errorContext, Error}, + {reason, Reason}, + {offender, extract_child(Child)}], + error_logger:error_report(supervisor_report, ErrorMsg). + + +extract_child(Child) -> + [{pid, Child#child.pid}, + {name, Child#child.name}, + {mfa, Child#child.mfa}, + {restart_type, Child#child.restart_type}, + {shutdown, Child#child.shutdown}, + {child_type, Child#child.child_type}]. + +report_progress(Child, SupName) -> + Progress = [{supervisor, SupName}, + {started, extract_child(Child)}], + error_logger:info_report(progress, Progress). diff --git a/lib/stdlib/src/supervisor_bridge.erl b/lib/stdlib/src/supervisor_bridge.erl new file mode 100644 index 0000000000..3d2bd2c9a5 --- /dev/null +++ b/lib/stdlib/src/supervisor_bridge.erl @@ -0,0 +1,116 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(supervisor_bridge). + +-behaviour(gen_server). + +%% External exports +-export([start_link/2, start_link/3]). +-export([behaviour_info/1]). +%% Internal exports +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). +-export([code_change/3]). + +behaviour_info(callbacks) -> + [{init,1},{terminate,2}]; +behaviour_info(_Other) -> + undefined. + +%%%----------------------------------------------------------------- +%%% This is a rewrite of supervisor_bridge from BS.3. +%%% +%%% This module is built to function as process code +%%% for a process sitting inbetween a real supervisor +%%% and a not start&recovery complient server/system +%%% The process inbetween simulates start&recovery +%%% behaviour of the server/system below. +%%% +%%% The supervisor_bridge behaviour must export the following +%%% functions: +%%% init(Args) -> {ok, Pid, State} | {error, Reason} | ignore +%%% where Pid is the child process +%%% terminate(Reason, State) -> ok +%%%----------------------------------------------------------------- +-record(state, {mod, pid, child_state, name}). + +start_link(Mod, StartArgs) -> + gen_server:start_link(supervisor_bridge, [Mod, StartArgs, self], []). + +start_link(Name, Mod, StartArgs) -> + gen_server:start_link(Name, supervisor_bridge, [Mod, StartArgs, Name], []). + +%%----------------------------------------------------------------- +%% Callback functions from gen_server +%%----------------------------------------------------------------- +init([Mod, StartArgs, Name0]) -> + process_flag(trap_exit, true), + Name = supname(Name0, Mod), + case Mod:init(StartArgs) of + {ok, Pid, ChildState} when is_pid(Pid) -> + link(Pid), + report_progress(Pid, Mod, StartArgs, Name), + {ok, #state{mod = Mod, pid = Pid, + child_state = ChildState, name = Name}}; + ignore -> + ignore; + {error, Reason} -> + {stop, Reason} + end. + +supname(self, Mod) -> {self(),Mod}; +supname(N, _) -> N. + +%% A supervisor *must* answer the supervisor:which_children call. +handle_call(which_children, _From, State) -> + {reply, [], State}; +handle_call(_Req, _From, State) -> + {reply, {error, badcall}, State}. + +handle_cast(_, State) -> + {noreply, State}. + +handle_info({'EXIT', Pid, Reason}, State) when State#state.pid =:= Pid -> + report_error(child_terminated, Reason, State), + {stop, Reason, State#state{pid = undefined}}; +handle_info(_, State) -> + {noreply, State}. + +terminate(_Reason, #state{pid = undefined}) -> + ok; +terminate(Reason, State) -> + terminate_pid(Reason, State). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%% This function is supposed to terminate the 'real' server. +terminate_pid(Reason, #state{mod = Mod, child_state = ChildState}) -> + Mod:terminate(Reason, ChildState). + +report_progress(Pid, Mod, StartArgs, SupName) -> + Progress = [{supervisor, SupName}, + {started, [{pid, Pid}, {mfa, {Mod, init, [StartArgs]}}]}], + error_logger:info_report(progress, Progress). + +report_error(Error, Reason, #state{name = Name, pid = Pid, mod = Mod}) -> + ErrorMsg = [{supervisor, Name}, + {errorContext, Error}, + {reason, Reason}, + {offender, [{pid, Pid}, {mod, Mod}]}], + error_logger:error_report(supervisor_report, ErrorMsg). diff --git a/lib/stdlib/src/sys.erl b/lib/stdlib/src/sys.erl new file mode 100644 index 0000000000..e0f2dbcd3c --- /dev/null +++ b/lib/stdlib/src/sys.erl @@ -0,0 +1,391 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(sys). + +%% External exports +-export([suspend/1, suspend/2, resume/1, resume/2, + get_status/1, get_status/2, + change_code/4, change_code/5, + log/2, log/3, trace/2, trace/3, statistics/2, statistics/3, + log_to_file/2, log_to_file/3, no_debug/1, no_debug/2, + install/2, install/3, remove/2, remove/3]). +-export([handle_system_msg/6, handle_system_msg/7, handle_debug/4, + print_log/1, get_debug/3, debug_options/1, suspend_loop_hib/6]). + +%%----------------------------------------------------------------- +%% Types +%%----------------------------------------------------------------- + +-type name() :: pid() | atom() | {'global', atom()}. +-type system_event() :: {'in', _Msg} | {'in', _Msg, _From} | {'out', _Msg, _To}. + +%%----------------------------------------------------------------- +%% System messages +%%----------------------------------------------------------------- +suspend(Name) -> send_system_msg(Name, suspend). +suspend(Name, Timeout) -> send_system_msg(Name, suspend, Timeout). + +resume(Name) -> send_system_msg(Name, resume). +resume(Name, Timeout) -> send_system_msg(Name, resume, Timeout). + +get_status(Name) -> send_system_msg(Name, get_status). +get_status(Name, Timeout) -> send_system_msg(Name, get_status, Timeout). + +change_code(Name, Mod, Vsn, Extra) -> + send_system_msg(Name, {change_code, Mod, Vsn, Extra}). +change_code(Name, Mod, Vsn, Extra, Timeout) -> + send_system_msg(Name, {change_code, Mod, Vsn, Extra}, Timeout). + +%%----------------------------------------------------------------- +%% Debug commands +%%----------------------------------------------------------------- + +-type log_flag() :: 'true' | {'true',pos_integer()} | 'false' | 'get' | 'print'. + +-spec log(name(), log_flag()) -> 'ok' | {'ok', [system_event()]}. +log(Name, Flag) -> + send_system_msg(Name, {debug, {log, Flag}}). + +-spec log(name(), log_flag(), timeout()) -> 'ok' | {'ok', [system_event()]}. +log(Name, Flag, Timeout) -> + send_system_msg(Name, {debug, {log, Flag}}, Timeout). + +-spec trace(name(), boolean()) -> 'ok'. +trace(Name, Flag) -> + send_system_msg(Name, {debug, {trace, Flag}}). + +-spec trace(name(), boolean(), timeout()) -> 'ok'. +trace(Name, Flag, Timeout) -> + send_system_msg(Name, {debug, {trace, Flag}}, Timeout). + +-type l2f_fname() :: string() | 'false'. + +-spec log_to_file(name(), l2f_fname()) -> 'ok' | {'error','open_file'}. +log_to_file(Name, FileName) -> + send_system_msg(Name, {debug, {log_to_file, FileName}}). + +-spec log_to_file(name(), l2f_fname(), timeout()) -> 'ok' | {'error','open_file'}. +log_to_file(Name, FileName, Timeout) -> + send_system_msg(Name, {debug, {log_to_file, FileName}}, Timeout). + +statistics(Name, Flag) -> + send_system_msg(Name, {debug, {statistics, Flag}}). +statistics(Name, Flag, Timeout) -> + send_system_msg(Name, {debug, {statistics, Flag}}, Timeout). + +-spec no_debug(name()) -> 'ok'. +no_debug(Name) -> send_system_msg(Name, {debug, no_debug}). + +-spec no_debug(name(), timeout()) -> 'ok'. +no_debug(Name, Timeout) -> send_system_msg(Name, {debug, no_debug}, Timeout). + +install(Name, {Func, FuncState}) -> + send_system_msg(Name, {debug, {install, {Func, FuncState}}}). +install(Name, {Func, FuncState}, Timeout) -> + send_system_msg(Name, {debug, {install, {Func, FuncState}}}, Timeout). + +remove(Name, Func) -> + send_system_msg(Name, {debug, {remove, Func}}). +remove(Name, Func, Timeout) -> + send_system_msg(Name, {debug, {remove, Func}}, Timeout). + +%%----------------------------------------------------------------- +%% All system messages sent are on the form {system, From, Msg} +%% The receiving side should send Msg to handle_system_msg/5. +%%----------------------------------------------------------------- +send_system_msg(Name, Request) -> + case catch gen:call(Name, system, Request) of + {ok,Res} -> Res; + {'EXIT', Reason} -> exit({Reason, mfa(Name, Request)}) + end. + +send_system_msg(Name, Request, Timeout) -> + case catch gen:call(Name, system, Request, Timeout) of + {ok,Res} -> Res; + {'EXIT', Reason} -> exit({Reason, mfa(Name, Request, Timeout)}) + end. + +mfa(Name, {debug, {Func, Arg2}}) -> + {sys, Func, [Name, Arg2]}; +mfa(Name, {change_code, Mod, Vsn, Extra}) -> + {sys, change_code, [Name, Mod, Vsn, Extra]}; +mfa(Name, Atom) -> + {sys, Atom, [Name]}. +mfa(Name, Req, Timeout) -> + {M, F, A} = mfa(Name, Req), + {M, F, A ++ [Timeout]}. + +%%----------------------------------------------------------------- +%% Func: handle_system_msg/6 +%% Args: Msg ::= term() +%% From ::= {pid(),Ref} but don't count on that +%% Parent ::= pid() +%% Module ::= atom() +%% Debug ::= [debug_opts()] +%% Misc ::= term() +%% Purpose: Used by a process module that wishes to take care of +%% system messages. The process receives a {system, From, +%% Msg} message, and passes the Msg to this function. +%% Returns: This function *never* returns! It calls the function +%% Module:system_continue(Parent, NDebug, Misc) +%% there the process continues the execution or +%% Module:system_terminate(Raeson, Parent, Debug, Misc) if +%% the process should terminate. +%% The Module must export system_continue/3, system_terminate/4 +%% and format_status/2 for status information. +%%----------------------------------------------------------------- +handle_system_msg(Msg, From, Parent, Module, Debug, Misc) -> + handle_system_msg(running, Msg, From, Parent, Module, Debug, Misc, false). + +handle_system_msg(Msg, From, Parent, Mod, Debug, Misc, Hib) -> + handle_system_msg(running, Msg, From, Parent, Mod, Debug, Misc, Hib). + +handle_system_msg(SysState, Msg, From, Parent, Mod, Debug, Misc, Hib) -> + case do_cmd(SysState, Msg, Parent, Mod, Debug, Misc) of + {suspended, Reply, NDebug, NMisc} -> + gen:reply(From, Reply), + suspend_loop(suspended, Parent, Mod, NDebug, NMisc, Hib); + {running, Reply, NDebug, NMisc} -> + gen:reply(From, Reply), + Mod:system_continue(Parent, NDebug, NMisc) + end. + +%%----------------------------------------------------------------- +%% Func: handle_debug/4 +%% Args: Debug ::= [debug_opts()] +%% Func ::= {M,F} | fun() arity 3 +%% State ::= term() +%% Event ::= {in, Msg} | {in, Msg, From} | {out, Msg, To} | term() +%% Purpose: Called by a process that wishes to debug an event. +%% Func is a formatting function, called as Func(Device, Event). +%% Returns: [debug_opts()] +%%----------------------------------------------------------------- +handle_debug([{trace, true} | T], FormFunc, State, Event) -> + print_event({Event, State, FormFunc}), + [{trace, true} | handle_debug(T, FormFunc, State, Event)]; +handle_debug([{log, {N, LogData}} | T], FormFunc, State, Event) -> + NLogData = [{Event, State, FormFunc} | trim(N, LogData)], + [{log, {N, NLogData}} | handle_debug(T, FormFunc, State, Event)]; +handle_debug([{log_to_file, Fd} | T], FormFunc, State, Event) -> + print_event(Fd, {Event, State, FormFunc}), + [{log_to_file, Fd} | handle_debug(T, FormFunc, State, Event)]; +handle_debug([{statistics, StatData} | T], FormFunc, State, Event) -> + NStatData = stat(Event, StatData), + [{statistics, NStatData} | handle_debug(T, FormFunc, State, Event)]; +handle_debug([{Func, FuncState} | T], FormFunc, State, Event) -> + case catch Func(FuncState, Event, State) of + done -> handle_debug(T, FormFunc, State, Event); + {'EXIT', _} -> handle_debug(T, FormFunc, State, Event); + NFuncState -> + [{Func, NFuncState} | handle_debug(T, FormFunc, State, Event)] + end; +handle_debug([], _FormFunc, _State, _Event) -> + []. + +%%----------------------------------------------------------------- +%% When a process is suspended, it can only respond to system +%% messages. +%%----------------------------------------------------------------- +suspend_loop(SysState, Parent, Mod, Debug, Misc, Hib) -> + case Hib of + true -> + suspend_loop_hib(SysState, Parent, Mod, Debug, Misc, Hib); + _ -> + receive + {system, From, Msg} -> + handle_system_msg(SysState, Msg, From, Parent, Mod, Debug, Misc, Hib); + {'EXIT', Parent, Reason} -> + Mod:system_terminate(Reason, Parent, Debug, Misc) + end + end. + +suspend_loop_hib(SysState, Parent, Mod, Debug, Misc, Hib) -> + receive + {system, From, Msg} -> + handle_system_msg(SysState, Msg, From, Parent, Mod, Debug, Misc, Hib); + {'EXIT', Parent, Reason} -> + Mod:system_terminate(Reason, Parent, Debug, Misc) + after 0 -> % Not a system message, go back into hibernation + proc_lib:hibernate(?MODULE, suspend_loop_hib, [SysState, Parent, Mod, + Debug, Misc, Hib]) + end. + + +do_cmd(_, suspend, _Parent, _Mod, Debug, Misc) -> + {suspended, ok, Debug, Misc}; +do_cmd(_, resume, _Parent, _Mod, Debug, Misc) -> + {running, ok, Debug, Misc}; +do_cmd(SysState, get_status, Parent, Mod, Debug, Misc) -> + Res = get_status(SysState, Parent, Mod, Debug, Misc), + {SysState, Res, Debug, Misc}; +do_cmd(SysState, {debug, What}, _Parent, _Mod, Debug, Misc) -> + {Res, NDebug} = debug_cmd(What, Debug), + {SysState, Res, NDebug, Misc}; +do_cmd(suspended, {change_code, Module, Vsn, Extra}, _Parent, + Mod, Debug, Misc) -> + {Res, NMisc} = do_change_code(Mod, Module, Vsn, Extra, Misc), + {suspended, Res, Debug, NMisc}; +do_cmd(SysState, Other, _Parent, _Mod, Debug, Misc) -> + {SysState, {error, {unknown_system_msg, Other}}, Debug, Misc}. + +get_status(SysState, Parent, Mod, Debug, Misc) -> + {status, self(), {module, Mod}, + [get(), SysState, Parent, Debug, Misc]}. + +%%----------------------------------------------------------------- +%% These are the system debug commands. +%% {trace, true|false} -> io:format +%% {log, true|false|get|print} -> keeps the 10 last debug messages +%% {log_to_file, FileName | false} -> io:format to file. +%% {statistics, true|false|get} -> keeps track of messages in/out + reds. +%%----------------------------------------------------------------- +debug_cmd({trace, true}, Debug) -> + {ok, install_debug(trace, true, Debug)}; +debug_cmd({trace, false}, Debug) -> + {ok, remove_debug(trace, Debug)}; +debug_cmd({log, true}, Debug) -> + {_N, Logs} = get_debug(log, Debug, {0, []}), + {ok, install_debug(log, {10, trim(10, Logs)}, Debug)}; +debug_cmd({log, {true, N}}, Debug) when is_integer(N), N > 0 -> + {_N, Logs} = get_debug(log, Debug, {0, []}), + {ok, install_debug(log, {N, trim(N, Logs)}, Debug)}; +debug_cmd({log, false}, Debug) -> + {ok, remove_debug(log, Debug)}; +debug_cmd({log, print}, Debug) -> + print_log(Debug), + {ok, Debug}; +debug_cmd({log, get}, Debug) -> + {_N, Logs} = get_debug(log, Debug, {0, []}), + {{ok, lists:reverse(Logs)}, Debug}; +debug_cmd({log_to_file, false}, Debug) -> + NDebug = close_log_file(Debug), + {ok, NDebug}; +debug_cmd({log_to_file, FileName}, Debug) -> + NDebug = close_log_file(Debug), + case file:open(FileName, [write]) of + {ok, Fd} -> + {ok, install_debug(log_to_file, Fd, NDebug)}; + _Error -> + {{error, open_file}, NDebug} + end; +debug_cmd({statistics, true}, Debug) -> + {ok, install_debug(statistics, init_stat(), Debug)}; +debug_cmd({statistics, false}, Debug) -> + {ok, remove_debug(statistics, Debug)}; +debug_cmd({statistics, get}, Debug) -> + {{ok, get_stat(get_debug(statistics, Debug, []))}, Debug}; +debug_cmd(no_debug, Debug) -> + close_log_file(Debug), + {ok, []}; +debug_cmd({install, {Func, FuncState}}, Debug) -> + {ok, install_debug(Func, FuncState, Debug)}; +debug_cmd({remove, Func}, Debug) -> + {ok, remove_debug(Func, Debug)}; +debug_cmd(_Unknown, Debug) -> + {unknown_debug, Debug}. + + +do_change_code(Mod, Module, Vsn, Extra, Misc) -> + case catch Mod:system_code_change(Misc, Module, Vsn, Extra) of + {ok, NMisc} -> {ok, NMisc}; + Else -> {{error, Else}, Misc} + end. + +print_event(X) -> print_event(standard_io, X). + +print_event(Dev, {Event, State, FormFunc}) -> + FormFunc(Dev, Event, State). + +init_stat() -> {erlang:localtime(), process_info(self(), reductions), 0, 0}. +get_stat({Time, {reductions, Reds}, In, Out}) -> + {reductions, Reds2} = process_info(self(), reductions), + [{start_time, Time}, {current_time, erlang:localtime()}, + {reductions, Reds2 - Reds}, {messages_in, In}, {messages_out, Out}]; +get_stat(_) -> + no_statistics. + +stat({in, _Msg}, {Time, Reds, In, Out}) -> {Time, Reds, In+1, Out}; +stat({in, _Msg, _From}, {Time, Reds, In, Out}) -> {Time, Reds, In+1, Out}; +stat({out, _Msg, _To}, {Time, Reds, In, Out}) -> {Time, Reds, In, Out+1}; +stat(_, StatData) -> StatData. + +trim(N, LogData) -> + lists:sublist(LogData, 1, N-1). + +%%----------------------------------------------------------------- +%% Debug structure manipulating functions +%%----------------------------------------------------------------- +install_debug(Item, Data, Debug) -> + case get_debug(Item, Debug, undefined) of + undefined -> [{Item, Data} | Debug]; + _ -> Debug + end. +remove_debug(Item, Debug) -> lists:keydelete(Item, 1, Debug). +get_debug(Item, Debug, Default) -> + case lists:keysearch(Item, 1, Debug) of + {value, {Item, Data}} -> Data; + _ -> Default + end. + +print_log(Debug) -> + {_N, Logs} = get_debug(log, Debug, {0, []}), + lists:foreach(fun print_event/1, + lists:reverse(Logs)). + +close_log_file(Debug) -> + case get_debug(log_to_file, Debug, []) of + [] -> + Debug; + Fd -> + ok = file:close(Fd), + remove_debug(log_to_file, Debug) + end. + +%%----------------------------------------------------------------- +%% Func: debug_options/1 +%% Args: [trace|log|{log,N}|statistics|{log_to_file, FileName}| +%% {install, {Func, FuncState}}] +%% Purpose: Initiate a debug structure. Called by a process that +%% wishes to initiate the debug structure without the +%% system messages. +%% Returns: [debug_opts()] +%%----------------------------------------------------------------- +debug_options(Options) -> + debug_options(Options, []). +debug_options([trace | T], Debug) -> + debug_options(T, install_debug(trace, true, Debug)); +debug_options([log | T], Debug) -> + debug_options(T, install_debug(log, {10, []}, Debug)); +debug_options([{log, N} | T], Debug) when is_integer(N), N > 0 -> + debug_options(T, install_debug(log, {N, []}, Debug)); +debug_options([statistics | T], Debug) -> + debug_options(T, install_debug(statistics, init_stat(), Debug)); +debug_options([{log_to_file, FileName} | T], Debug) -> + case file:open(FileName, [write]) of + {ok, Fd} -> + debug_options(T, install_debug(log_to_file, Fd, Debug)); + _Error -> + debug_options(T, Debug) + end; +debug_options([{install, {Func, FuncState}} | T], Debug) -> + debug_options(T, install_debug(Func, FuncState, Debug)); +debug_options([_ | T], Debug) -> + debug_options(T, Debug); +debug_options([], Debug) -> + Debug. diff --git a/lib/stdlib/src/timer.erl b/lib/stdlib/src/timer.erl new file mode 100644 index 0000000000..36fdb48c75 --- /dev/null +++ b/lib/stdlib/src/timer.erl @@ -0,0 +1,364 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(timer). + +-export([apply_after/4, + send_after/3, send_after/2, + exit_after/3, exit_after/2, kill_after/2, kill_after/1, + apply_interval/4, send_interval/3, send_interval/2, + cancel/1, sleep/1, tc/3, now_diff/2, + seconds/1, minutes/1, hours/1, hms/3]). + +-export([start_link/0, start/0, + handle_call/3, handle_info/2, + init/1, + code_change/3, handle_cast/2, terminate/2]). + +%% internal exports for test purposes only +-export([get_status/0]). + +%% Max +-define(MAX_TIMEOUT, 16#0800000). +-define(TIMER_TAB, timer_tab). +-define(INTERVAL_TAB, timer_interval_tab). + +%% +%% Time is in milliseconds. +%% +-opaque tref() :: any(). +-type time() :: non_neg_integer(). +-type timestamp() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}. + +%% +%% Interface functions +%% +-spec apply_after(time(), atom(), atom(), [_]) -> {'ok', tref()} | {'error', _}. +apply_after(Time, M, F, A) -> + req(apply_after, {Time, {M, F, A}}). + +-spec send_after(time(), pid() | atom(), term()) -> {'ok', tref()} | {'error', _}. +send_after(Time, Pid, Message) -> + req(apply_after, {Time, {?MODULE, send, [Pid, Message]}}). + +-spec send_after(time(), _) -> {'ok', tref()} | {'error', _}. +send_after(Time, Message) -> + send_after(Time, self(), Message). + +-spec exit_after(time(), pid() | atom(), _) -> {'ok', tref()} | {'error', _}. +exit_after(Time, Pid, Reason) -> + req(apply_after, {Time, {erlang, exit, [Pid, Reason]}}). + +-spec exit_after(time(), term()) -> {'ok', tref()} | {'error', _}. +exit_after(Time, Reason) -> + exit_after(Time, self(), Reason). + +-spec kill_after(time(), pid() | atom()) -> {'ok', tref()} | {'error', _}. +kill_after(Time, Pid) -> + exit_after(Time, Pid, kill). + +-spec kill_after(time()) -> {'ok', tref()} | {'error', _}. +kill_after(Time) -> + exit_after(Time, self(), kill). + +-spec apply_interval(time(), atom(), atom(), [_]) -> {'ok', tref()} | {'error', _}. +apply_interval(Time, M, F, A) -> + req(apply_interval, {Time, self(), {M, F, A}}). + +-spec send_interval(time(), pid() | atom(), term()) -> {'ok', tref()} | {'error', _}. +send_interval(Time, Pid, Message) -> + req(apply_interval, {Time, Pid, {?MODULE, send, [Pid, Message]}}). + +-spec send_interval(time(), term()) -> {'ok', tref()} | {'error', _}. +send_interval(Time, Message) -> + send_interval(Time, self(), Message). + +-spec cancel(tref()) -> {'ok', 'cancel'} | {'error', _}. +cancel(BRef) -> + req(cancel, BRef). + +-spec sleep(timeout()) -> 'ok'. +sleep(T) -> + receive + after T -> ok + end. + +%% +%% Measure the execution time (in microseconds) for an MFA. +%% +-spec tc(atom(), atom(), [_]) -> {time(), term()}. +tc(M, F, A) -> + Before = erlang:now(), + Val = (catch apply(M, F, A)), + After = erlang:now(), + {now_diff(After, Before), Val}. + +%% +%% Calculate the time difference (in microseconds) of two +%% erlang:now() timestamps, T2-T1. +%% +-spec now_diff(timestamp(), timestamp()) -> integer(). +now_diff({A2, B2, C2}, {A1, B1, C1}) -> + ((A2-A1)*1000000 + B2-B1)*1000000 + C2-C1. + +%% +%% Convert seconds, minutes etc. to milliseconds. +%% +-spec seconds(non_neg_integer()) -> non_neg_integer(). +seconds(Seconds) -> + 1000*Seconds. +-spec minutes(non_neg_integer()) -> non_neg_integer(). +minutes(Minutes) -> + 1000*60*Minutes. +-spec hours(non_neg_integer()) -> non_neg_integer(). +hours(Hours) -> + 1000*60*60*Hours. +-spec hms(non_neg_integer(), non_neg_integer(), non_neg_integer()) -> non_neg_integer(). +hms(H, M, S) -> + hours(H) + minutes(M) + seconds(S). + +%% +%% Start/init functions +%% + +%% Start is only included because of backward compatibility! +-spec start() -> 'ok'. +start() -> + ensure_started(). + +-spec start_link() -> {'ok', pid()} | {'error', _}. +start_link() -> + gen_server:start_link({local, timer_server}, ?MODULE, [], []). + +-spec init([]) -> {'ok', [], 'infinity'}. +init([]) -> + process_flag(trap_exit, true), + ?TIMER_TAB = ets:new(?TIMER_TAB, [named_table,ordered_set,protected]), + ?INTERVAL_TAB = ets:new(?INTERVAL_TAB, [named_table,protected]), + {ok, [], infinity}. + +ensure_started() -> + case whereis(timer_server) of + undefined -> + C = {timer_server, {?MODULE, start_link, []}, permanent, 1000, + worker, [?MODULE]}, + supervisor:start_child(kernel_safe_sup, C), % kernel_safe_sup + ok; + _ -> ok + end. + +%% server calls + +req(Req, Arg) -> + SysTime = system_time(), + ensure_started(), + gen_server:call(timer_server, {Req, Arg, SysTime}, infinity). + +%% +%% handle_call(Request, From, Timers) -> +%% {reply, Response, Timers, Timeout} +%% +%% Time and Timeout is in milliseconds. Started is in microseconds. +%% +handle_call({apply_after, {Time, Op}, Started}, _From, _Ts) + when is_integer(Time), Time >= 0 -> + BRef = {Started + 1000*Time, make_ref()}, + Timer = {BRef, timeout, Op}, + ets:insert(?TIMER_TAB, Timer), + Timeout = timer_timeout(system_time()), + {reply, {ok, BRef}, [], Timeout}; +handle_call({apply_interval, {Time, To, MFA}, Started}, _From, _Ts) + when is_integer(Time), Time >= 0 -> + %% To must be a pid or a registered name + case get_pid(To) of + Pid when is_pid(Pid) -> + catch link(Pid), + SysTime = system_time(), + Ref = make_ref(), + BRef1 = {interval, Ref}, + Interval = Time*1000, + BRef2 = {Started + Interval, Ref}, + Timer = {BRef2, {repeat, Interval, Pid}, MFA}, + ets:insert(?INTERVAL_TAB,{BRef1,BRef2,Pid}), + ets:insert(?TIMER_TAB, Timer), + Timeout = timer_timeout(SysTime), + {reply, {ok, BRef1}, [], Timeout}; + _ -> + {reply, {error, badarg}, [], next_timeout()} + end; +handle_call({cancel, BRef = {_Time, Ref}, _}, _From, Ts) + when is_reference(Ref) -> + delete_ref(BRef), + {reply, {ok, cancel}, Ts, next_timeout()}; +handle_call({cancel, _BRef, _}, _From, Ts) -> + {reply, {error, badarg}, Ts, next_timeout()}; +handle_call({apply_after, _, _}, _From, Ts) -> + {reply, {error, badarg}, Ts, next_timeout()}; +handle_call({apply_interval, _, _}, _From, Ts) -> + {reply, {error, badarg}, Ts, next_timeout()}; +handle_call(_Else, _From, Ts) -> % Catch anything else + {noreply, Ts, next_timeout()}. + +handle_info(timeout, Ts) -> % Handle timeouts + Timeout = timer_timeout(system_time()), + {noreply, Ts, Timeout}; +handle_info({'EXIT', Pid, _Reason}, Ts) -> % Oops, someone died + pid_delete(Pid), + {noreply, Ts, next_timeout()}; +handle_info(_OtherMsg, Ts) -> % Other Msg's + {noreply, Ts, next_timeout()}. + +handle_cast(_Req, Ts) -> % Not predicted but handled + {noreply, Ts, next_timeout()}. + +-spec terminate(_, _) -> 'ok'. +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + %% According to the man for gen server no timer can be set here. + {ok, State}. + +%% +%% timer_timeout(Timers, SysTime) +%% +%% Apply and remove already timed-out timers. A timer is a tuple +%% {Time, BRef, Op, MFA}, where Time is in microseconds. +%% Returns {Timeout, Timers}, where Timeout is in milliseconds. +%% +timer_timeout(SysTime) -> + case ets:first(?TIMER_TAB) of + '$end_of_table' -> + infinity; + {Time, _Ref} when Time > SysTime -> + Timeout = (Time - SysTime) div 1000, + %% Returned timeout must fit in a small int + erlang:min(Timeout, ?MAX_TIMEOUT); + Key -> + case ets:lookup(?TIMER_TAB, Key) of + [{Key, timeout, MFA}] -> + ets:delete(?TIMER_TAB,Key), + do_apply(MFA), + timer_timeout(SysTime); + [{{Time, Ref}, Repeat = {repeat, Interv, To}, MFA}] -> + ets:delete(?TIMER_TAB,Key), + NewTime = Time + Interv, + %% Update the interval entry (last in table) + ets:insert(?INTERVAL_TAB,{{interval,Ref},{NewTime,Ref},To}), + do_apply(MFA), + ets:insert(?TIMER_TAB, {{NewTime, Ref}, Repeat, MFA}), + timer_timeout(SysTime) + end + end. + +%% +%% delete_ref +%% + +delete_ref(BRef = {interval, _}) -> + case ets:lookup(?INTERVAL_TAB, BRef) of + [{_, BRef2, _Pid}] -> + ets:delete(?INTERVAL_TAB, BRef), + ets:delete(?TIMER_TAB, BRef2); + _ -> % TimerReference does not exist, do nothing + ok + end; +delete_ref(BRef) -> + ets:delete(?TIMER_TAB,BRef). + +%% +%% pid_delete +%% + +pid_delete(Pid) -> + IntervalTimerList = + ets:select(?INTERVAL_TAB, + [{{'_', '_','$1'}, + [{'==','$1',Pid}], + ['$_']}]), + lists:foreach(fun({IntKey, TimerKey, _ }) -> + ets:delete(?INTERVAL_TAB,IntKey), + ets:delete(?TIMER_TAB,TimerKey) + end, IntervalTimerList). + +%% Calculate time to the next timeout. Returned timeout must fit in a +%% small int. + +next_timeout() -> + case ets:first(?TIMER_TAB) of + '$end_of_table' -> + infinity; + {Time, _} -> + erlang:min(positive((Time - system_time()) div 1000), ?MAX_TIMEOUT) + end. + +%% Help functions +do_apply({M,F,A}) -> + case {M, F, A} of + {?MODULE, send, A} -> + %% If send op. send directly, (faster than spawn) + catch send(A); + {erlang, exit, [Name, Reason]} -> + catch exit(get_pid(Name), Reason); + _ -> + %% else spawn process with the operation + catch spawn(M,F,A) + end. + +positive(X) -> + erlang:max(X, 0). + + +%% +%% system_time() -> time in microseconds +%% +system_time() -> + {M,S,U} = erlang:now(), + 1000000 * (M*1000000 + S) + U. + + +send([Pid, Msg]) -> + Pid ! Msg. + +get_pid(Name) when is_pid(Name) -> + Name; +get_pid(undefined) -> + undefined; +get_pid(Name) when is_atom(Name) -> + get_pid(whereis(Name)); +get_pid(_) -> + undefined. + +%% +%% get_status() -> +%% {{TimerTabName,TotalNumTimers},{IntervalTabName,NumIntervalTimers}} +%% +%% This function is for test purposes only; it is used by the test suite. +%% There is a small possibility that there is a mismatch of one entry +%% between the 2 tables if this call is made when the timer server is +%% in the middle of a transaction + +-spec get_status() -> + {{?TIMER_TAB,non_neg_integer()},{?INTERVAL_TAB,non_neg_integer()}}. + +get_status() -> + Info1 = ets:info(?TIMER_TAB), + {value,{size,TotalNumTimers}} = lists:keysearch(size, 1, Info1), + Info2 = ets:info(?INTERVAL_TAB), + {value,{size,NumIntervalTimers}} = lists:keysearch(size, 1, Info2), + {{?TIMER_TAB,TotalNumTimers},{?INTERVAL_TAB,NumIntervalTimers}}. diff --git a/lib/stdlib/src/unicode.erl b/lib/stdlib/src/unicode.erl new file mode 100644 index 0000000000..09b1deff9c --- /dev/null +++ b/lib/stdlib/src/unicode.erl @@ -0,0 +1,677 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(unicode). + +%% Implemented in the emulator: +%% characters_to_binary/2 (will trap to characters_to_binary_int/2 +%% if InEncoding is not {latin1 | unicode | utf8}) +%% characters_to_list/2 (will trap to characters_to_list_int/2 if +%% InEncoding is not {latin1 | unicode | utf8}) +%% + +-export([characters_to_list/1, characters_to_list_int/2, characters_to_binary/1,characters_to_binary_int/2, characters_to_binary/3,bom_to_encoding/1, encoding_to_bom/1]). + + +characters_to_list(ML) -> + unicode:characters_to_list(ML,unicode). + +characters_to_list_int(ML, Encoding) -> + try + do_characters_to_list(ML,Encoding) + catch + error:AnyError -> + TheError = case AnyError of + system_limit -> + system_limit; + _ -> + badarg + end, + {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + (catch erlang:error(new_stacktrace, + [ML,Encoding])), + erlang:raise(error,TheError,[{Mod,characters_to_list,L}|Rest]) + end. + +% XXX: Optimize me! +do_characters_to_list(ML, Encoding) -> + case unicode:characters_to_binary(ML,Encoding) of + Bin when is_binary(Bin) -> + unicode:characters_to_list(Bin,utf8); + {error,Encoded,Rest} -> + {error,unicode:characters_to_list(Encoded,utf8),Rest}; + {incomplete, Encoded2, Rest2} -> + {incomplete,unicode:characters_to_list(Encoded2,utf8),Rest2} + end. + + +characters_to_binary(ML) -> + try + unicode:characters_to_binary(ML,unicode) + catch + error:AnyError -> + TheError = case AnyError of + system_limit -> + system_limit; + _ -> + badarg + end, + {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + (catch erlang:error(new_stacktrace, + [ML])), + erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest]) + end. + + +characters_to_binary_int(ML,InEncoding) -> + try + characters_to_binary_int(ML,InEncoding,unicode) + catch + error:AnyError -> + TheError = case AnyError of + system_limit -> + system_limit; + _ -> + badarg + end, + {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + (catch erlang:error(new_stacktrace, + [ML,InEncoding])), + erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest]) + end. + +characters_to_binary(ML, latin1, latin1) when is_binary(ML) -> + ML; +characters_to_binary(ML, latin1, Uni) when is_binary(ML) and ((Uni =:= utf8) or (Uni =:= unicode)) -> + case unicode:bin_is_7bit(ML) of + true -> + ML; + false -> + try + characters_to_binary_int(ML,latin1,utf8) + catch + error:AnyError -> + TheError = case AnyError of + system_limit -> + system_limit; + _ -> + badarg + end, + {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + (catch erlang:error(new_stacktrace, + [ML,latin1,Uni])), + erlang:raise(error,TheError, + [{Mod,characters_to_binary,L}|Rest]) + end + end; +characters_to_binary(ML,Uni,latin1) when is_binary(ML) and ((Uni =:= utf8) or (Uni =:= unicode)) -> + case unicode:bin_is_7bit(ML) of + true -> + ML; + false -> + try + characters_to_binary_int(ML,utf8,latin1) + catch + error:AnyError -> + TheError = case AnyError of + system_limit -> + system_limit; + _ -> + badarg + end, + {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + (catch erlang:error(new_stacktrace, + [ML,Uni,latin1])), + erlang:raise(error,TheError, + [{Mod,characters_to_binary,L}|Rest]) + end + end; + +characters_to_binary(ML, InEncoding, OutEncoding) -> + try + characters_to_binary_int(ML,InEncoding,OutEncoding) + catch + error:AnyError -> + TheError = case AnyError of + system_limit -> + system_limit; + _ -> + badarg + end, + {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + (catch erlang:error(new_stacktrace, + [ML,InEncoding,OutEncoding])), + erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest]) + end. + +characters_to_binary_int(ML, InEncoding, OutEncoding) when + InEncoding =:= latin1, OutEncoding =:= unicode; + InEncoding =:= latin1, OutEncoding =:= utf8; + InEncoding =:= unicode, OutEncoding =:= unicode; + InEncoding =:= unicode, OutEncoding =:= utf8; + InEncoding =:= utf8, OutEncoding =:= unicode; + InEncoding =:= utf8, OutEncoding =:= utf8 -> + unicode:characters_to_binary(ML,InEncoding); + +characters_to_binary_int(ML, InEncoding, OutEncoding) -> + {InTrans,Limit} = case OutEncoding of + latin1 -> {i_trans_chk(InEncoding),255}; + _ -> {i_trans(InEncoding),case InEncoding of latin1 -> 255; _ -> 16#10FFFF end} + end, + OutTrans = o_trans(OutEncoding), + Res = + ml_map(ML, + fun(Part,Accum) when is_binary(Part) -> + case InTrans(Part) of + List when is_list(List) -> + Tail = OutTrans(List), + <>; + {error, Translated, Rest} -> + Tail = OutTrans(Translated), + {error, <>, Rest}; + {incomplete, Translated, Rest, Missing} -> + Tail = OutTrans(Translated), + {incomplete, <>, Rest, + Missing} + end; + (Part, Accum) when is_integer(Part), Part =< Limit -> + case OutTrans([Part]) of + Binary when is_binary(Binary) -> + <>; + {error, _, [Part]} -> + {error,Accum,[Part]} + end; + (Part, Accum) -> + {error, Accum, [Part]} + end,<<>>), + case Res of + {incomplete,A,B,_} -> + {incomplete,A,B}; + _ -> + Res + end. + +bom_to_encoding(<<239,187,191,_/binary>>) -> + {utf8,3}; +bom_to_encoding(<<0,0,254,255,_/binary>>) -> + {{utf32,big},4}; +bom_to_encoding(<<255,254,0,0,_/binary>>) -> + {{utf32,little},4}; +bom_to_encoding(<<254,255,_/binary>>) -> + {{utf16,big},2}; +bom_to_encoding(<<255,254,_/binary>>) -> + {{utf16,little},2}; +bom_to_encoding(Bin) when is_binary(Bin) -> + {latin1,0}. + +encoding_to_bom(unicode) -> + <<239,187,191>>; +encoding_to_bom(utf8) -> + <<239,187,191>>; +encoding_to_bom(utf16) -> + <<254,255>>; +encoding_to_bom({utf16,big}) -> + <<254,255>>; +encoding_to_bom({utf16,little}) -> + <<255,254>>; +encoding_to_bom(utf32) -> + <<0,0,254,255>>; +encoding_to_bom({utf32,big}) -> + <<0,0,254,255>>; +encoding_to_bom({utf32,little}) -> + <<255,254,0,0>>; +encoding_to_bom(latin1) -> + <<>>. + + +cbv(utf8,<<1:1,1:1,0:1,_:5>>) -> + 1; +cbv(utf8,<<1:1,1:1,1:1,0:1,_:4,R/binary>>) -> + case R of + <<>> -> + 2; + <<1:1,0:1,_:6>> -> + 1; + _ -> + false + end; +cbv(utf8,<<1:1,1:1,1:1,1:1,0:1,_:3,R/binary>>) -> + case R of + <<>> -> + 3; + <<1:1,0:1,_:6>> -> + 2; + <<1:1,0:1,_:6,1:1,0:1,_:6>> -> + 1; + _ -> + false + end; +cbv(utf8,_) -> + false; + +cbv({utf16,big},<>) when A =< 215; A >= 224 -> + 1; +cbv({utf16,big},<<54:6,_:2>>) -> + 3; +cbv({utf16,big},<<54:6,_:10>>) -> + 2; +cbv({utf16,big},<<54:6,_:10,55:6,_:2>>) -> + 1; +cbv({utf16,big},_) -> + false; +cbv({utf16,little},<<_:8>>) -> + 1; % or 3, we'll see +cbv({utf16,little},<<_:8,54:6,_:2>>) -> + 2; +cbv({utf16,little},<<_:8,54:6,_:2,_:8>>) -> + 1; +cbv({utf16,little},_) -> + false; + + +cbv({utf32,big}, <<0:8>>) -> + 3; +cbv({utf32,big}, <<0:8,X:8>>) when X =< 16 -> + 2; +cbv({utf32,big}, <<0:8,X:8,Y:8>>) + when X =< 16, ((X > 0) or ((Y =< 215) or (Y >= 224))) -> + 1; +cbv({utf32,big},_) -> + false; +cbv({utf32,little},<<_:8>>) -> + 3; +cbv({utf32,little},<<_:8,_:8>>) -> + 2; +cbv({utf32,little},<>) when X =:= 254; X =:= 255 -> + false; +cbv({utf32,little},<<_:8,Y:8,X:8>>) + when X =< 16, ((X > 0) or ((Y =< 215) or (Y >= 224))) -> + 1; +cbv({utf32,little},_) -> + false. + + +ml_map([],_,{{Inc,X},Accum}) -> + {incomplete, Accum, Inc, X}; +ml_map([],_Fun,Accum) -> + Accum; +ml_map([Part|_] = Whole,_,{{Incomplete, _}, Accum}) when is_integer(Part) -> + {error, Accum, [Incomplete | Whole]}; +ml_map([Part|T],Fun,Accum) when is_integer(Part) -> + case Fun(Part,Accum) of + Bin when is_binary(Bin) -> + case ml_map(T,Fun,Bin) of + Bin2 when is_binary(Bin2) -> + Bin2; + {error, Converted, Rest} -> + {error, Converted, Rest}; + {incomplete, Converted, Rest,X} -> + {incomplete, Converted, Rest,X} + end; + % Can not be incomplete - it's an integer + {error, Converted, Rest} -> + {error, Converted, [Rest|T]} + end; +ml_map([Part|T],Fun,{{Incomplete,Missing}, Accum}) when is_binary(Part) -> + % Ok, how much is needed to fill in the incomplete part? + case byte_size(Part) of + N when N >= Missing -> + <> = Part, + NewPart = <>, + ml_map([NewPart,Trailing|T], Fun, Accum); + M -> + NewIncomplete = <>, + NewMissing = Missing - M, + ml_map(T,Fun,{{NewIncomplete, NewMissing}, Accum}) + end; +ml_map([Part|T],Fun,Accum) when is_binary(Part), byte_size(Part) > 8192 -> + <> = Part, + ml_map([Part1,Part2|T],Fun,Accum); +ml_map([Part|T],Fun,Accum) when is_binary(Part) -> + case Fun(Part,Accum) of + Bin when is_binary(Bin) -> + ml_map(T,Fun,Bin); + {incomplete, Converted, Rest, Missing} -> + ml_map(T,Fun,{{Rest, Missing},Converted}); + {error, Converted, Rest} -> + {error, Converted, [Rest|T]} + end; +ml_map([List|T],Fun,Accum) when is_list(List) -> + case ml_map(List,Fun,Accum) of + Bin when is_binary(Bin) -> + ml_map(T,Fun,Bin); + {error, Converted,Rest} -> + {error, Converted, [Rest | T]}; + {incomplete, Converted,Rest,N} -> + ml_map(T,Fun,{{Rest,N},Converted}) + end; +ml_map(Bin,Fun,{{Incomplete,Missing},Accum}) when is_binary(Bin) -> + case byte_size(Bin) of + N when N >= Missing -> + ml_map([Incomplete,Bin],Fun,Accum); + M -> + {incomplete, Accum, <>, Missing - M} + end; +ml_map(Part,Fun,Accum) when is_binary(Part), byte_size(Part) > 8192 -> + <> = Part, + ml_map([Part1,Part2],Fun,Accum); +ml_map(Bin,Fun,Accum) when is_binary(Bin) -> + Fun(Bin,Accum). + + + + + +i_trans(latin1) -> + fun(Bin) -> binary_to_list(Bin) end; +i_trans(unicode) -> + i_trans(utf8); +i_trans(utf8) -> + fun do_i_utf8/1; +i_trans(utf16) -> + fun do_i_utf16_big/1; +i_trans({utf16,big}) -> + fun do_i_utf16_big/1; +i_trans({utf16,little}) -> + fun do_i_utf16_little/1; +i_trans(utf32) -> + fun do_i_utf32_big/1; +i_trans({utf32,big}) -> + fun do_i_utf32_big/1; +i_trans({utf32,little}) -> + fun do_i_utf32_little/1. + +i_trans_chk(latin1) -> + fun(Bin) -> binary_to_list(Bin) end; +i_trans_chk(unicode) -> + i_trans_chk(utf8); +i_trans_chk(utf8) -> + fun do_i_utf8_chk/1; +i_trans_chk(utf16) -> + fun do_i_utf16_big_chk/1; +i_trans_chk({utf16,big}) -> + fun do_i_utf16_big_chk/1; +i_trans_chk({utf16,little}) -> + fun do_i_utf16_little_chk/1; +i_trans_chk(utf32) -> + fun do_i_utf32_big_chk/1; +i_trans_chk({utf32,big}) -> + fun do_i_utf32_big_chk/1; +i_trans_chk({utf32,little}) -> + fun do_i_utf32_little_chk/1. + +o_trans(latin1) -> + fun(L) -> list_to_binary(L) end; +o_trans(unicode) -> + o_trans(utf8); +o_trans(utf8) -> + fun(L) -> + do_o_binary(fun(One) -> + <> + end, L) + end; + +o_trans(utf16) -> + fun(L) -> + do_o_binary(fun(One) -> + <> + end, L) + end; +o_trans({utf16,big}) -> + o_trans(utf16); +o_trans({utf16,little}) -> + fun(L) -> + do_o_binary(fun(One) -> + <> + end, L) + end; +o_trans(utf32) -> + fun(L) -> + do_o_binary(fun(One) -> + <> + end, L) + end; +o_trans({utf32,big}) -> + o_trans(utf32); +o_trans({utf32,little}) -> + fun(L) -> + do_o_binary(fun(One) -> + <> + end, L) + end. + +do_o_binary(F,L) -> + case do_o_binary2(F,L) of + {Tag,List,R} -> + {Tag,erlang:iolist_to_binary(List),R}; + List -> + erlang:iolist_to_binary(List) + end. + +do_o_binary2(_F,[]) -> + <<>>; +do_o_binary2(F,[H|T]) -> + case (catch F(H)) of + {'EXIT',_} -> + {error,<<>>,[H|T]}; + Bin when is_binary(Bin) -> + case do_o_binary2(F,T) of + {error,Bin2,Rest} -> + {error,[Bin|Bin2],Rest}; + Bin3 -> + [Bin|Bin3] + end + end. + +%% Specific functions only allowing codepoints in latin1 range + +do_i_utf8_chk(<<>>) -> + []; +do_i_utf8_chk(<>) when U =< 255 -> + case do_i_utf8_chk(R) of + {error,Trans,Rest} -> + {error, [U|Trans], Rest}; + {incomplete,Trans,Rest,N} -> + {incomplete, [U|Trans], Rest, N}; + L when is_list(L) -> + [U|L] + end; +do_i_utf8_chk(<<_/utf8,_/binary>> = Bin) -> + {error, [], Bin}; +do_i_utf8_chk(Bin) when is_binary(Bin) -> + case cbv(utf8,Bin) of + N when is_integer(N) -> + {incomplete, [], Bin,N}; + false -> + {error, [], Bin} + end. +do_i_utf16_big_chk(<<>>) -> + []; +do_i_utf16_big_chk(<>) when U =< 255 -> + case do_i_utf16_big_chk(R) of + {error,Trans,Rest} -> + {error, [U|Trans], Rest}; + {incomplete,Trans,Rest,N} -> + {incomplete, [U|Trans], Rest, N}; + L when is_list(L) -> + [U|L] + end; +do_i_utf16_big_chk(<<_/utf16,_/binary>> = Bin) -> + {error, [], Bin}; +do_i_utf16_big_chk(Bin) when is_binary(Bin) -> + case cbv({utf16,big},Bin) of + N when is_integer(N) -> + {incomplete, [], Bin, N}; + false -> + {error, [], Bin} + end. +do_i_utf16_little_chk(<<>>) -> + []; +do_i_utf16_little_chk(<>) when U =< 255 -> + case do_i_utf16_little_chk(R) of + {error,Trans,Rest} -> + {error, [U|Trans], Rest}; + {incomplete,Trans,Rest,N} -> + {incomplete, [U|Trans], Rest, N}; + L when is_list(L) -> + [U|L] + end; +do_i_utf16_little_chk(<<_/utf16-little,_/binary>> = Bin) -> + {error, [], Bin}; +do_i_utf16_little_chk(Bin) when is_binary(Bin) -> + case cbv({utf16,little},Bin) of + N when is_integer(N) -> + {incomplete, [], Bin, N}; + false -> + {error, [], Bin} + end. + + +do_i_utf32_big_chk(<<>>) -> + []; +do_i_utf32_big_chk(<>) when U =< 255 -> + case do_i_utf32_big_chk(R) of + {error,Trans,Rest} -> + {error, [U|Trans], Rest}; + L when is_list(L) -> + [U|L] + end; +do_i_utf32_big_chk(<<_/utf32,_/binary>> = Bin) -> + {error, [], Bin}; +do_i_utf32_big_chk(Bin) when is_binary(Bin) -> + case cbv({utf32,big},Bin) of + N when is_integer(N) -> + {incomplete, [], Bin, N}; + false -> + {error, [], Bin} + end. +do_i_utf32_little_chk(<<>>) -> + []; +do_i_utf32_little_chk(<>) when U =< 255 -> + case do_i_utf32_little_chk(R) of + {error,Trans,Rest} -> + {error, [U|Trans], Rest}; + L when is_list(L) -> + [U|L] + end; +do_i_utf32_little_chk(<<_/utf32-little,_/binary>> = Bin) -> + {error, [], Bin}; +do_i_utf32_little_chk(Bin) when is_binary(Bin) -> + case cbv({utf32,little},Bin) of + N when is_integer(N) -> + {incomplete, [], Bin, N}; + false -> + {error, [], Bin} + end. + + +%% General versions + +do_i_utf8(<<>>) -> + []; +do_i_utf8(<>) -> + case do_i_utf8(R) of + {error,Trans,Rest} -> + {error, [U|Trans], Rest}; + {incomplete,Trans,Rest,N} -> + {incomplete, [U|Trans], Rest, N}; + L when is_list(L) -> + [U|L] + end; +do_i_utf8(Bin) when is_binary(Bin) -> + case cbv(utf8,Bin) of + N when is_integer(N) -> + {incomplete, [], Bin,N}; + false -> + {error, [], Bin} + end. + +do_i_utf16_big(<<>>) -> + []; +do_i_utf16_big(<>) -> + case do_i_utf16_big(R) of + {error,Trans,Rest} -> + {error, [U|Trans], Rest}; + {incomplete,Trans,Rest,N} -> + {incomplete, [U|Trans], Rest, N}; + L when is_list(L) -> + [U|L] + end; +do_i_utf16_big(Bin) when is_binary(Bin) -> + case cbv({utf16,big},Bin) of + N when is_integer(N) -> + {incomplete, [], Bin, N}; + false -> + {error, [], Bin} + end. +do_i_utf16_little(<<>>) -> + []; +do_i_utf16_little(<>) -> + case do_i_utf16_little(R) of + {error,Trans,Rest} -> + {error, [U|Trans], Rest}; + {incomplete,Trans,Rest,N} -> + {incomplete, [U|Trans], Rest, N}; + L when is_list(L) -> + [U|L] + end; +do_i_utf16_little(Bin) when is_binary(Bin) -> + case cbv({utf16,little},Bin) of + N when is_integer(N) -> + {incomplete, [], Bin, N}; + false -> + {error, [], Bin} + end. + + +do_i_utf32_big(<<>>) -> + []; +do_i_utf32_big(<>) -> + case do_i_utf32_big(R) of + {error,Trans,Rest} -> + {error, [U|Trans], Rest}; + {incomplete,Trans,Rest,N} -> + {incomplete, [U|Trans], Rest, N}; + L when is_list(L) -> + [U|L] + end; +do_i_utf32_big(Bin) when is_binary(Bin) -> + case cbv({utf32,big},Bin) of + N when is_integer(N) -> + {incomplete, [], Bin, N}; + false -> + {error, [], Bin} + end. +do_i_utf32_little(<<>>) -> + []; +do_i_utf32_little(<>) -> + case do_i_utf32_little(R) of + {error,Trans,Rest} -> + {error, [U|Trans], Rest}; + {incomplete,Trans,Rest,N} -> + {incomplete, [U|Trans], Rest, N}; + L when is_list(L) -> + [U|L] + end; +do_i_utf32_little(Bin) when is_binary(Bin) -> + case cbv({utf32,little},Bin) of + N when is_integer(N) -> + {incomplete, [], Bin, N}; + false -> + {error, [], Bin} + end. diff --git a/lib/stdlib/src/win32reg.erl b/lib/stdlib/src/win32reg.erl new file mode 100644 index 0000000000..ee0d17bc94 --- /dev/null +++ b/lib/stdlib/src/win32reg.erl @@ -0,0 +1,386 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(win32reg). + +-export([open/1, close/1, + current_key/1, change_key/2, change_key_create/2, + sub_keys/1, delete_key/1, + value/2, values/1, set_value/3, delete_value/2, + expand/1, + format_error/1]). + +%% Key handles (always open). +-define(hkey_classes_root, 16#80000000). +-define(hkey_current_user, 16#80000001). +-define(hkey_local_machine, 16#80000002). +-define(hkey_users, 16#80000003). +-define(hkey_performance_data, 16#80000004). +-define(hkey_current_config, 16#80000005). +-define(hkey_dyn_data, 16#80000006). + +%% Driver commands. +-define(cmd_get_current, 0). +-define(cmd_open_key, 1). +-define(cmd_create_key, 2). +-define(cmd_get_all_subkeys, 3). +-define(cmd_get_value, 4). +-define(cmd_get_all_values, 5). +-define(cmd_set_value, 6). +-define(cmd_delete_key, 7). +-define(cmd_delete_value, 8). + +%% Data types. +-define(reg_sc, 1). +-define(reg_expand_sc, 2). +-define(reg_binary, 3). +-define(reg_dword, 4). + +%% Basic types internal to this file. +-type open_mode() :: 'read' | 'write'. +-type reg_handle() :: {'win32reg',port()}. +-type name() :: string() | 'default'. +-type value() :: string() | integer() | binary(). + +%%% Exported functions. + +-spec open([open_mode()]) -> {'ok', reg_handle()} | {'error', 'enotsup'}. + +open(Modes) -> + case os:type() of + {win32, _} -> + case open_mode(Modes, []) of + {error, Reason} -> + {error, Reason}; + ModeStr -> + P = open_port({spawn, "registry__drv__ " ++ ModeStr}, []), + {ok, {win32reg, P}} + end; + _ -> + {error, enotsup} + end. + +-spec close(reg_handle()) -> 'ok'. + +close({win32reg, Reg}) when is_port(Reg) -> + unlink(Reg), + exit(Reg, die), + ok. + +-spec current_key(reg_handle()) -> {'ok', string()}. + +current_key({win32reg, Reg}) when is_port(Reg) -> + Cmd = [?cmd_get_current], + Reg ! {self(), {command, Cmd}}, + {state, Hkey, Name} = get_result(Reg), + Root = hkey_to_string(Hkey), + {ok, case Name of + [] -> Root; + _ -> Root ++ [$\\|Name] + end}. + +-spec change_key(reg_handle(), string()) -> 'ok' | {'error', atom()}. + +change_key({win32reg, Reg}, Key) when is_port(Reg) -> + change_key(Reg, ?cmd_open_key, Key). + +-spec change_key_create(reg_handle(), string()) -> 'ok' | {'error', atom()}. + +change_key_create({win32reg, Reg}, Key) when is_port(Reg) -> + change_key(Reg, ?cmd_create_key, Key). + +change_key(Reg, Cmd, Key) -> + case parse_key(Key, Reg) of + {ok, Hkey, Path} -> + Reg ! {self(), {command, [Cmd, i32(Hkey), Path, 0]}}, + get_result(Reg); + {error, Reason} -> + {error, Reason} + end. + +-spec sub_keys(reg_handle()) -> {'ok', [string()]} | {'error', atom()}. + +sub_keys({win32reg, Reg}) when is_port(Reg) -> + Cmd = [?cmd_get_all_subkeys], + Reg ! {self(), {command, Cmd}}, + collect_keys(Reg, []). + +-spec delete_key(reg_handle()) -> 'ok' | {'error', atom()}. + +delete_key({win32reg, Reg}) when is_port(Reg) -> + Cmd = [?cmd_delete_key], + Reg ! {self(), {command, Cmd}}, + get_result(Reg). + +-spec set_value(reg_handle(), name(), value()) -> 'ok' | {'error', atom()}. + +set_value({win32reg, Reg}, Name0, Value) when is_port(Reg) -> + Name = + case Name0 of + default -> []; + _ -> Name0 + end, + {Type, V} = term_to_value(Value), + Cmd = [?cmd_set_value, Type, Name, 0, V], + Reg ! {self(), {command, Cmd}}, + get_result(Reg). + +-spec value(reg_handle(), name()) -> {'ok', value()} | {'error', atom()}. + +value({win32reg, Reg}, Name) when is_port(Reg) -> + Cmd = [?cmd_get_value, Name, 0], + Reg ! {self(), {command, Cmd}}, + case get_result(Reg) of + {value, {Name, Value}} -> + {ok, Value}; + {error, Reason} -> + {error, Reason} + end. + +-spec values(reg_handle()) -> {'ok', [{name(), value()}]} | {'error', atom()}. + +values({win32reg, Reg}) when is_port(Reg) -> + Cmd = [?cmd_get_all_values], + Reg ! {self(), {command, Cmd}}, + collect_values(Reg, []). + +-spec delete_value(reg_handle(), name()) -> 'ok' | {'error', atom()}. + +delete_value({win32reg, Reg}, Name0) when is_port(Reg) -> + Name = + case Name0 of + default -> []; + _ -> Name0 + end, + Cmd = [?cmd_delete_value, Name, 0], + Reg ! {self(), {command, Cmd}}, + get_result(Reg). + +-spec expand(string()) -> string(). + +expand(Value) -> + expand(Value, [], []). + +expand([$%, $%|Rest], [], Result) -> + expand(Rest, [], [$%|Result]); +expand([$%, C|Rest], [], Result) -> + expand(Rest, [C], Result); +expand([C|Rest], [], Result) -> + expand(Rest, [], [C|Result]); +expand([$%|Rest], Env0, Result) -> + Env = lists:reverse(Env0), + case os:getenv(Env) of + false -> + expand(Rest, [], Result); + Value -> + expand(Rest, [], lists:reverse(Value)++Result) + end; +expand([C|Rest], Env, Result) -> + expand(Rest, [C|Env], Result); +expand([], [], Result) -> + lists:reverse(Result). + +-spec format_error(atom()) -> string(). + +format_error(ErrorId) -> + erl_posix_msg:message(ErrorId). + +%%% Implementation. + +-spec collect_values(port(), [{name(), value()}]) -> + {'ok', [{name(), value()}]} | {'error', atom()}. + +collect_values(P, Result) -> + case get_result(P) of + ok -> + {ok, lists:reverse(Result)}; + {value, ValueData} -> + collect_values(P, [ValueData|Result]); + {error, Reason} -> + {error, Reason} + end. + +-spec collect_keys(port(), string()) -> {'ok', [string()]} | {'error', atom()}. + +collect_keys(P, Result) -> + case get_result(P) of + ok -> + {ok, lists:reverse(Result)}; + {key, KeyData} -> + collect_keys(P, [KeyData|Result]); + {error, Reason} -> + {error, Reason} + end. + +get_result(P) -> + receive + {P, {data, Data}} -> + get_result1(Data) + end. + +get_result1([$e|Reason]) -> + {error, list_to_atom(Reason)}; +get_result1([$o]) -> + ok; +get_result1([$k|Name]) -> + {key, Name}; +get_result1([$v|Rest0]) -> + {ok, Type, Rest1} = i32_on_head(Rest0), + {ok, Name0, Value} = get_cstring(Rest1), + Name = + case Name0 of + [] -> default; + _ -> Name0 + end, + {value, {Name, encode_value(Type, Value)}}; +get_result1([$s|Rest0]) -> + {ok, Hkey, Name} = i32_on_head(Rest0), + {state, Hkey, Name}. + +encode_value(?reg_sc, Value) -> + Value; +encode_value(?reg_expand_sc, Value) -> + Value; +encode_value(?reg_dword, Value) -> + i32(Value); +encode_value(_, Value) -> + list_to_binary(Value). + +term_to_value(Int) when is_integer(Int) -> + {i32(?reg_dword), i32(Int)}; +term_to_value(String) when is_list(String) -> + {i32(?reg_sc), [String, 0]}; +term_to_value(Bin) when is_binary(Bin) -> + {i32(?reg_binary), Bin}; +term_to_value(_) -> + exit(badarg). + +get_cstring(List) -> + get_cstring(List, []). + +get_cstring([0|Rest], Result) -> + {ok, lists:reverse(Result), Rest}; +get_cstring([C|Rest], Result) -> + get_cstring(Rest, [C|Result]); +get_cstring([], Result) -> + {ok, lists:reverse(Result), []}. + +i32(Int) when is_integer(Int) -> + [(Int bsr 24) band 255, + (Int bsr 16) band 255, + (Int bsr 8) band 255, + Int band 255]; +i32([X1, X2, X3, X4]) -> + (X1 bsl 24) bor (X2 bsl 16) bor (X3 bsl 8) bor X4. + +i32_on_head([X1, X2, X3, X4 | Rest]) -> + {ok, (X1 bsl 24) bor (X2 bsl 16) bor (X3 bsl 8) bor X4, Rest}. + +parse_key([$\\|Rest], _) -> + parse_root(Rest, []); +parse_key(Key, Reg) -> + parse_relative(Key, Reg). + +parse_relative(Path, Reg) -> + Cmd = [?cmd_get_current], + Reg ! {self(), {command, Cmd}}, + {state, RootHandle, Name} = get_result(Reg), + Original = split_key(Name), + Relative = lists:reverse(split_key(Path)), + case parse_relative1(Relative, Original) of + NewPath -> + {ok, RootHandle, NewPath} + %% XXX Error handling. + end. + +parse_relative1([".."|T1], [_|T2]) -> + parse_relative1(T1, T2); +parse_relative1([Comp|Rest], Result) -> + parse_relative1(Rest, [Comp|Result]); +parse_relative1([], Result) -> + reverse_and_join(Result, []). + +reverse_and_join([X|Rest], []) -> + reverse_and_join(Rest, [X]); +reverse_and_join([X|Rest], Result) -> + reverse_and_join(Rest, [X, "\\" | Result]); +reverse_and_join([], Result) -> + Result. + +split_key(Key) -> + split_key(Key, [], []). + +split_key([$\\|Rest], Current, Result) -> + split_key(Rest, [], [lists:reverse(Current)|Result]); +split_key([C|Rest], Current, Result) -> + split_key(Rest, [C|Current], Result); +split_key([], [], Result) -> + Result; +split_key([], Current, Result) -> + [lists:reverse(Current)|Result]. + +parse_root([$\\|Rest], Result) -> + Root = + case lists:reverse(Result) of + [$h, $k, $e, $y, $_|Root0] -> + Root0; + Root0 -> + Root0 + end, + case root_to_handle(list_to_atom(Root)) of + false -> + {error, enoent}; + Handle -> + {ok, Handle, Rest} + end; +parse_root([C|Rest], Result) -> + parse_root(Rest, [C|Result]); +parse_root([], Result) -> + parse_root([$\\], Result). + +root_to_handle(classes_root) -> ?hkey_classes_root; +root_to_handle(hkcr) -> ?hkey_classes_root; +root_to_handle(current_user) -> ?hkey_current_user; +root_to_handle(hkcu) -> ?hkey_current_user; +root_to_handle(local_machine) -> ?hkey_local_machine; +root_to_handle(hklm) -> ?hkey_local_machine; +root_to_handle(users) -> ?hkey_users; +root_to_handle(hku) -> ?hkey_users; +root_to_handle(current_config) -> ?hkey_current_config; +root_to_handle(hkcc) -> ?hkey_current_config; +root_to_handle(dyn_data) -> ?hkey_dyn_data; +root_to_handle(hkdd) -> ?hkey_dyn_data; +root_to_handle(performance_data) -> ?hkey_performance_data; +root_to_handle(_) -> false. + +hkey_to_string(?hkey_classes_root) -> "\\hkey_classes_root"; +hkey_to_string(?hkey_current_user) -> "\\hkey_current_user"; +hkey_to_string(?hkey_local_machine) -> "\\hkey_local_machine"; +hkey_to_string(?hkey_users) -> "\\hkey_users"; +hkey_to_string(?hkey_performance_data) -> "\\hkey_performance_data"; +hkey_to_string(?hkey_current_config) -> "\\hkey_current_config"; +hkey_to_string(?hkey_dyn_data) -> "\\hkey_dyn_data". + +open_mode([read|Rest], Result) -> + open_mode(Rest, [$r|Result]); +open_mode([write|Rest], Result) -> + open_mode(Rest, [$w|Result]); +open_mode([], Result) -> + Result; +open_mode(_, _) -> + {error, einval}. diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl new file mode 100644 index 0000000000..f44d97c227 --- /dev/null +++ b/lib/stdlib/src/zip.erl @@ -0,0 +1,1600 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2006-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(zip). + +%% Basic api +-export([unzip/1, unzip/2, extract/1, extract/2, + zip/2, zip/3, create/2, create/3, + list_dir/1, list_dir/2, table/1, table/2, + t/1, tt/1]). + +%% unzipping peicemeal +-export([openzip_open/1, openzip_open/2, + openzip_get/1, openzip_get/2, + openzip_t/1, openzip_tt/1, + openzip_list_dir/1, openzip_list_dir/2, + openzip_close/1]). +%% openzip_add/2]). + +%% zip server +-export([zip_open/1, zip_open/2, + zip_get/1, zip_get/2, + zip_t/1, zip_tt/1, + zip_list_dir/1, zip_list_dir/2, + zip_close/1]). + +%% just for debugging zip server, not documented, not tested, not to be used +-export([zip_get_state/1]). + +%% includes +-include("file.hrl"). % #file_info +-include("zip.hrl"). % #zip_file, #zip_comment + +%% max bytes fed to zlib +-define(WRITE_BLOCK_SIZE, 8*1024). + +%% for debugging, to turn off catch +-define(CATCH, catch). + +%% option sets +-record(unzip_opts, { + output, % output object (fun) + input, % input object (fun) + file_filter, % file filter (boolean fun) + open_opts, % options passed to file:open + feedback, % feeback (fun) + cwd % directory to relate paths to + }). + +-record(zip_opts, { + output, % output object (fun) + input, % input object (fun) + comment, % zip-file comment + open_opts, % options passed to file:open + feedback, % feeback (fun) + cwd, % directory to relate paths to + compress, % compress files with these suffixes + uncompress % uncompress files with these suffixes + }). + +-record(list_dir_opts, { + input, % input object (fun) + raw_iterator, % applied to each dir entry + open_opts % options passed to file:open + }). + +-record(openzip_opts, { + output, % output object (fun) + open_opts, % file:open options + cwd % directory to relate paths to + }). + +% openzip record, state for an open zip-file +-record(openzip, { + zip_comment, % zip archive comment + files, % filenames, infos, comments and offsets + in, % archive handle + input, % archive io object (fun) + output, % output io object (fun) + zlib, % handle to open zlib + cwd % directory to relate paths to + }). + +% Things that I would like to add to the public record #zip_file, +% but can't as it would make things fail at upgrade. +% Instead we use {#zip_file,#zip_file_extra} internally. +-record(zip_file_extra, { + crc32 % checksum + }). + +%% max bytes read from files and archives (and fed to zlib) +-define(READ_BLOCK_SIZE, 16*1024). + +%% -record(primzip_file, { +%% name, +%% offset, +%% chunk_size +%% }). + +%% -record(primzip, { +%% zlib, % handle to the zlib port from zlib:open +%% input, % fun/2 for file/memory input +%% in, % input (file handle or binary) +%% files % [#primzip_file] +%% }). + +%% ZIP-file format records and defines + +%% compression methods +-define(STORED, 0). +-define(UNCOMPRESSED, 0). +-define(SHRUNK, 1). +-define(REDUCED_1, 2). +-define(REDUCED_2, 3). +-define(REDUCED_3, 4). +-define(REDUCED_4, 5). +-define(IMPLODED, 6). +-define(TOKENIZED, 7). +-define(DEFLATED, 8). +-define(DEFLATED_64, 9). +-define(PKWARE_IMPLODED, 10). +-define(PKWARE_RESERVED, 11). +-define(BZIP2_COMPRESSED, 12). + +%% zip-file records +-define(LOCAL_FILE_MAGIC,16#04034b50). +-define(LOCAL_FILE_HEADER_SZ,(4+2+2+2+2+2+4+4+4+2+2)). +-define(LOCAL_FILE_HEADER_CRC32_OFFSET, 4+2+2+2+2+2). +-record(local_file_header, {version_needed, + gp_flag, + comp_method, + last_mod_time, + last_mod_date, + crc32, + comp_size, + uncomp_size, + file_name_length, + extra_field_length}). + +-define(CENTRAL_FILE_HEADER_SZ,(4+2+2+2+2+2+2+4+4+4+2+2+2+2+2+4+4)). + +-define(CENTRAL_DIR_MAGIC, 16#06054b50). +-define(CENTRAL_DIR_SZ, (4+2+2+2+2+4+4+2)). +-define(CENTRAL_DIR_DIGITAL_SIG_MAGIC, 16#05054b50). +-define(CENTRAL_DIR_DIGITAL_SIG_SZ, (4+2)). + +-define(CENTRAL_FILE_MAGIC, 16#02014b50). + +-record(cd_file_header, {version_made_by, + version_needed, + gp_flag, + comp_method, + last_mod_time, + last_mod_date, + crc32, + comp_size, + uncomp_size, + file_name_length, + extra_field_length, + file_comment_length, + disk_num_start, + internal_attr, + external_attr, + local_header_offset}). + +%% Unix extra fields (not yet supported) +-define(UNIX_EXTRA_FIELD_TAG, 16#000d). +-record(unix_extra_field, {atime, + mtime, + uid, + gid}). + +%% extended timestamps (not yet supported) +-define(EXTENDED_TIMESTAMP_TAG, 16#5455). +%% -record(extended_timestamp, {mtime, +%% atime, +%% ctime}). + +-define(END_OF_CENTRAL_DIR_MAGIC, 16#06054b50). +-define(END_OF_CENTRAL_DIR_SZ, (4+2+2+2+2+4+4+2)). + +-record(eocd, {disk_num, + start_disk_num, + entries_on_disk, + entries, + size, + offset, + zip_comment_length}). + + +%% Open a zip archive with options +%% + +openzip_open(F) -> + openzip_open(F, []). + +openzip_open(F, Options) -> + case ?CATCH do_openzip_open(F, Options) of + {ok, OpenZip} -> + {ok, OpenZip}; + Error -> + {error, Error} + end. + +do_openzip_open(F, Options) -> + Opts = get_openzip_options(Options), + #openzip_opts{output = Output, open_opts = OpO, cwd = CWD} = Opts, + Input = get_zip_input(F), + In0 = Input({open, F, OpO -- [write]}, []), + {[#zip_comment{comment = C} | Files], In1} = + get_central_dir(In0, fun raw_file_info_etc/5, Input), + Z = zlib:open(), + {ok, #openzip{zip_comment = C, + files = Files, + in = In1, + input = Input, + output = Output, + zlib = Z, + cwd = CWD}}. + +%% retrieve all files from an open archive +openzip_get(OpenZip) -> + case ?CATCH do_openzip_get(OpenZip) of + {ok, Result} -> {ok, Result}; + Error -> {error, Error} + end. + +do_openzip_get(#openzip{files = Files, in = In0, input = Input, + output = Output, zlib = Z, cwd = CWD}) -> + ZipOpts = #unzip_opts{output = Output, input = Input, + file_filter = fun all/1, open_opts = [], + feedback = fun silent/1, cwd = CWD}, + R = get_z_files(Files, Z, In0, ZipOpts, []), + {ok, R}; +do_openzip_get(_) -> + throw(einval). + +%% retrieve a file from an open archive +openzip_get(FileName, OpenZip) -> + case ?CATCH do_openzip_get(FileName, OpenZip) of + {ok, Result} -> {ok, Result}; + Error -> {error, Error} + end. + +do_openzip_get(F, #openzip{files = Files, in = In0, input = Input, + output = Output, zlib = Z, cwd = CWD}) -> + %%case lists:keysearch(F, #zip_file.name, Files) of + case file_name_search(F, Files) of + {#zip_file{offset = Offset},_}=ZFile -> + In1 = Input({seek, bof, Offset}, In0), + case get_z_file(In1, Z, Input, Output, [], fun silent/1, CWD, ZFile) of + {file, R, _In2} -> {ok, R}; + _ -> throw(file_not_found) + end; + _ -> throw(file_not_found) + end; +do_openzip_get(_, _) -> + throw(einval). + +file_name_search(Name,Files) -> + case lists:dropwhile(fun({ZipFile,_}) -> ZipFile#zip_file.name =/= Name end, + Files) of + [ZFile|_] -> ZFile; + [] -> false + end. + +%% %% add a file to an open archive +%% openzip_add(File, OpenZip) -> +%% case ?CATCH do_openzip_add(File, OpenZip) of +%% {ok, Result} -> {ok, Result}; +%% Error -> {error, Error} +%% end. + +%% do_openzip_add(File, #open_zip{files = Files, in = In0, +%% opts = Opts} = OpenZip0) -> +%% throw(nyi), +%% Z = zlib:open(), +%% R = get_z_files(Files, In0, Z, Opts, []), +%% zlib:close(Z), +%% {ok, R}; +%% do_openzip_add(_, _) -> +%% throw(einval). + +%% get file list from open archive +openzip_list_dir(#openzip{zip_comment = Comment, + files = Files}) -> + {ZipFiles,_Extras} = lists:unzip(Files), + {ok, [#zip_comment{comment = Comment} | ZipFiles]}; +openzip_list_dir(_) -> + {error, einval}. + +openzip_list_dir(#openzip{files = Files}, [names_only]) -> + {ZipFiles,_Extras} = lists:unzip(Files), + Names = [Name || {#zip_file{name=Name},_} <- ZipFiles], + {ok, Names}; +openzip_list_dir(_, _) -> + {error, einval}. + +%% close an open archive +openzip_close(#openzip{in = In0, input = Input, zlib = Z}) -> + Input(close, In0), + zlib:close(Z); +openzip_close(_) -> + {error, einval}. + +%% Extract from a zip archive with options +%% +%% Accepted options: +%% verbose, cooked, file_list, keep_old_files, file_filter, memory + +unzip(F) -> unzip(F, []). + +unzip(F, Options) -> + case ?CATCH do_unzip(F, Options) of + {ok, R} -> {ok, R}; + Error -> {error, Error} + end. + +do_unzip(F, Options) -> + Opts = get_unzip_options(F, Options), + #unzip_opts{input = Input, open_opts = OpO} = Opts, + In0 = Input({open, F, OpO -- [write]}, []), + RawIterator = fun raw_file_info_etc/5, + {Info, In1} = get_central_dir(In0, RawIterator, Input), + %% get rid of zip-comment + Z = zlib:open(), + Files = get_z_files(Info, Z, In1, Opts, []), + zlib:close(Z), + Input(close, In1), + {ok, Files}. + +%% Create zip archive name F from Files or binaries +%% +%% Accepted options: +%% verbose, cooked, memory, comment + +zip(F, Files) -> zip(F, Files, []). + +zip(F, Files, Options) -> + case ?CATCH do_zip(F, Files, Options) of + {ok, R} -> {ok, R}; + Error -> {error, Error} + end. + +do_zip(F, Files, Options) -> + Opts = get_zip_options(Files, Options), + #zip_opts{output = Output, open_opts = OpO} = Opts, + Out0 = Output({open, F, OpO}, []), + Z = zlib:open(), + {Out1, LHS, Pos} = put_z_files(Files, Z, Out0, 0, Opts, []), + zlib:close(Z), + Out2 = put_central_dir(LHS, Pos, Out1, Opts), + Out3 = Output({close, F}, Out2), + {ok, Out3}. + +%% List zip directory contents +%% +%% Accepted options: +%% cooked, file_filter, file_output (latter 2 undocumented) + +list_dir(F) -> list_dir(F, []). + +list_dir(F, Options) -> + case ?CATCH do_list_dir(F, Options) of + {ok, R} -> {ok, R}; + Error -> {error, Error} + end. + +do_list_dir(F, Options) -> + Opts = get_list_dir_options(F, Options), + #list_dir_opts{input = Input, open_opts = OpO, + raw_iterator = RawIterator} = Opts, + In0 = Input({open, F, OpO}, []), + {Info, In1} = get_central_dir(In0, RawIterator, Input), + Input(close, In1), + {ok, Info}. + +%% Print zip directory in short form + +t(F) when is_pid(F) -> zip_t(F); +t(F) when is_record(F, openzip) -> openzip_t(F); +t(F) -> t(F, fun raw_short_print_info_etc/5). + +t(F, RawPrint) -> + case ?CATCH do_t(F, RawPrint) of + ok -> ok; + Error -> {error, Error} + end. + +do_t(F, RawPrint) -> + Input = get_input(F), + OpO = [raw], + In0 = Input({open, F, OpO}, []), + {_Info, In1} = get_central_dir(In0, RawPrint, Input), + Input(close, In1), + ok. + +%% Print zip directory in long form (like ls -l) + +tt(F) when is_pid(F) -> zip_tt(F); +tt(F) when is_record(F, openzip) -> openzip_tt(F); +tt(F) -> t(F, fun raw_long_print_info_etc/5). + + +%% option utils +get_unzip_opt([], Opts) -> + Opts; +get_unzip_opt([verbose | Rest], Opts) -> + get_unzip_opt(Rest, Opts#unzip_opts{feedback = fun verbose_unzip/1}); +get_unzip_opt([cooked | Rest], #unzip_opts{open_opts = OpO} = Opts) -> + get_unzip_opt(Rest, Opts#unzip_opts{open_opts = OpO -- [raw]}); +get_unzip_opt([memory | Rest], Opts) -> + get_unzip_opt(Rest, Opts#unzip_opts{output = fun binary_io/2}); +get_unzip_opt([{cwd, CWD} | Rest], Opts) -> + get_unzip_opt(Rest, Opts#unzip_opts{cwd = CWD}); +get_unzip_opt([{file_filter, F} | Rest], Opts) -> + Filter1 = fun({ZipFile,_Extra}) -> F(ZipFile) end, + Filter2 = fun_and_1(Filter1, Opts#unzip_opts.file_filter), + get_unzip_opt(Rest, Opts#unzip_opts{file_filter = Filter2}); +get_unzip_opt([{file_list, L} | Rest], Opts) -> + FileInList = fun(F) -> file_in_list(F, L) end, + Filter = fun_and_1(FileInList, Opts#unzip_opts.file_filter), + get_unzip_opt(Rest, Opts#unzip_opts{file_filter = Filter}); +get_unzip_opt([keep_old_files | Rest], Opts) -> + Keep = fun keep_old_file/1, + Filter = fun_and_1(Keep, Opts#unzip_opts.file_filter), + get_unzip_opt(Rest, Opts#unzip_opts{file_filter = Filter}); +get_unzip_opt([Unknown | _Rest], _Opts) -> + throw({bad_option, Unknown}). + +get_list_dir_opt([], Opts) -> + Opts; +get_list_dir_opt([cooked | Rest], #list_dir_opts{open_opts = OpO} = Opts) -> + get_list_dir_opt(Rest, Opts#list_dir_opts{open_opts = OpO -- [raw]}); +get_list_dir_opt([names_only | Rest], Opts) -> + get_list_dir_opt(Rest, Opts#list_dir_opts{ + raw_iterator = fun(A, B, C, D, E) -> raw_name_only(A, B, C, D, E) end}); +%% get_list_dir_opt([{file_output, F} | Rest], Opts) -> +%% get_list_dir_opt(Rest, Opts#list_dir_opts{file_output = F}); +%% get_list_dir_opt([{file_filter, F} | Rest], Opts) -> +%% get_list_dir_opt(Rest, Opts#list_dir_opts{file_filter = F}); +get_list_dir_opt([Unknown | _Rest], _Opts) -> + throw({bad_option, Unknown}). + +get_zip_opt([], Opts) -> + Opts; +get_zip_opt([verbose | Rest], Opts) -> + get_zip_opt(Rest, Opts#zip_opts{feedback = fun verbose_zip/1}); +get_zip_opt([cooked | Rest], #zip_opts{open_opts = OpO} = Opts) -> + get_zip_opt(Rest, Opts#zip_opts{open_opts = OpO -- [raw]}); +get_zip_opt([memory | Rest], Opts) -> + get_zip_opt(Rest, Opts#zip_opts{output = fun binary_io/2}); +get_zip_opt([{cwd, CWD} | Rest], Opts) -> + get_zip_opt(Rest, Opts#zip_opts{cwd = CWD}); +get_zip_opt([{comment, C} | Rest], Opts) -> + get_zip_opt(Rest, Opts#zip_opts{comment = C}); +get_zip_opt([{compress, Which} = O| Rest], Opts) -> + Which2 = + case Which of + all -> + all; + Suffixes when is_list(Suffixes) -> + lists:usort(Suffixes); + {add, Suffixes} when is_list(Suffixes) -> + lists:usort(Opts#zip_opts.compress ++ Suffixes); + {del, Suffixes} when is_list(Suffixes) -> + lists:usort(Opts#zip_opts.compress -- Suffixes); + _ -> + throw({bad_option, O}) + end, + get_zip_opt(Rest, Opts#zip_opts{compress = Which2}); +get_zip_opt([{uncompress, Which} = O| Rest], Opts) -> + Which2 = + case Which of + all -> + all; + Suffixes when is_list(Suffixes) -> + lists:usort(Suffixes); + {add, Suffixes} when is_list(Suffixes) -> + lists:usort(Opts#zip_opts.uncompress ++ Suffixes); + {del, Suffixes} when is_list(Suffixes) -> + lists:usort(Opts#zip_opts.uncompress -- Suffixes); + _ -> + throw({bad_option, O}) + end, + get_zip_opt(Rest, Opts#zip_opts{uncompress = Which2}); +get_zip_opt([Unknown | _Rest], _Opts) -> + throw({bad_option, Unknown}). + + +%% feedback funs +silent(_) -> ok. + +verbose_unzip(FN) -> io:format("extracting: ~p\n", [FN]). + +verbose_zip(FN) -> io:format("adding: ~p\n", [FN]). + +%% file filter funs +all(_) -> true. + +file_in_list({#zip_file{name = FileName},_}, List) -> + lists:member(FileName, List); +file_in_list(_, _) -> + false. + +keep_old_file({#zip_file{name = FileName},_}) -> + not (filelib:is_file(FileName) orelse filelib:is_dir(FileName)); +keep_old_file(_) -> + false. + +%% fun combiner +fun_and_1(Fun1, Fun2) -> + fun(A) -> Fun1(A) andalso Fun2(A) end. + +%% getting options +get_zip_options(Files, Options) -> + Suffixes = [".Z", ".zip", ".zoo", ".arc", ".lzh", ".arj"], + Opts = #zip_opts{output = fun file_io/2, + input = get_zip_input({files, Files}), + open_opts = [raw, write], + comment = "", + feedback = fun silent/1, + cwd = "", + compress = all, + uncompress = Suffixes + }, + get_zip_opt(Options, Opts). + +get_unzip_options(F, Options) -> + Opts = #unzip_opts{file_filter = fun all/1, + output = fun file_io/2, + input = get_input(F), + open_opts = [raw], + feedback = fun silent/1, + cwd = "" + }, + get_unzip_opt(Options, Opts). + +get_openzip_options(Options) -> + Opts = #openzip_opts{open_opts = [raw, read], + output = fun file_io/2, + cwd = ""}, + get_openzip_opt(Options, Opts). + +get_input(F) when is_binary(F) -> + fun binary_io/2; +get_input(F) when is_list(F) -> + fun file_io/2. + +get_zip_input({F, B}) when is_binary(B), is_list(F) -> + fun binary_io/2; +get_zip_input(F) when is_list(F) -> + fun file_io/2; +get_zip_input({files, []}) -> + fun binary_io/2; +get_zip_input({files, [File | _]}) -> + get_zip_input(File). + +get_list_dir_options(F, Options) -> + Opts = #list_dir_opts{raw_iterator = fun raw_file_info_public/5, + input = get_input(F), + open_opts = [raw]}, + get_list_dir_opt(Options, Opts). + +%% aliases for erl_tar compatibility +table(F) -> list_dir(F). +table(F, O) -> list_dir(F, O). +create(F, Fs) -> zip(F, Fs). +create(F, Fs, O) -> zip(F, Fs, O). +extract(F) -> unzip(F). +extract(F, O) -> unzip(F, O). + + +%% put the central directory, at the end of the zip archive +put_central_dir(LHS, Pos, Out0, + #zip_opts{output = Output, comment = Comment}) -> + {Out1, Sz} = put_cd_files_loop(LHS, Output, Out0, 0), + put_eocd(length(LHS), Pos, Sz, Comment, Output, Out1). + +put_cd_files_loop([], _Output, Out, Sz) -> + {Out, Sz}; +put_cd_files_loop([{LH, Name, Pos} | LHRest], Output, Out0, Sz0) -> + CDFH = cd_file_header_from_lh_and_pos(LH, Pos), + BCDFH = cd_file_header_to_bin(CDFH), + B = [<>, BCDFH, Name], + Out1 = Output({write, B}, Out0), + Sz1 = Sz0 + ?CENTRAL_FILE_HEADER_SZ + + LH#local_file_header.file_name_length, + put_cd_files_loop(LHRest, Output, Out1, Sz1). + +%% put end marker of central directory, the last record in the archive +put_eocd(N, Pos, Sz, Comment, Output, Out0) -> + %% BComment = list_to_binary(Comment), + CommentSz = length(Comment), % size(BComment), + EOCD = #eocd{disk_num = 0, + start_disk_num = 0, + entries_on_disk = N, + entries = N, + size = Sz, + offset = Pos, + zip_comment_length = CommentSz}, + BEOCD = eocd_to_bin(EOCD), + B = [<>, BEOCD, Comment], % BComment], + Output({write, B}, Out0). + +get_filename({Name, _}, Type) -> + get_filename(Name, Type); +get_filename(Name, regular) -> + Name; +get_filename(Name, directory) -> + %% Ensure trailing slash + case lists:reverse(Name) of + [$/ | _Rev] -> Name; + Rev -> lists:reverse([$/ | Rev]) + end. + +add_cwd(_CWD, {_Name, _} = F) -> F; +add_cwd("", F) -> F; +add_cwd(CWD, F) -> filename:join(CWD, F). + +%% already compressed data should be stored as is in archive, +%% a simple name-match is used to check for this +%% files smaller than 10 bytes are also stored, not compressed +get_comp_method(_, N, _, _) when is_integer(N), N < 10 -> + ?STORED; +get_comp_method(_, _, _, directory) -> + ?STORED; +get_comp_method(F, _, #zip_opts{compress = Compress, uncompress = Uncompress}, _) -> + Ext = filename:extension(F), + Test = fun(Which) -> (Which =:= all) orelse lists:member(Ext, Which) end, + case Test(Compress) andalso not Test(Uncompress) of + true -> ?DEFLATED; + false -> ?STORED + end. + +put_z_files([], _Z, Out, Pos, _Opts, Acc) -> + {Out, lists:reverse(Acc, []), Pos}; +put_z_files([F | Rest], Z, Out0, Pos0, + #zip_opts{input = Input, output = Output, open_opts = OpO, + feedback = FB, cwd = CWD} = Opts, Acc) -> + In0 = [], + F1 = add_cwd(CWD, F), + FileInfo = Input({file_info, F1}, In0), + Type = FileInfo#file_info.type, + UncompSize = + case Type of + regular -> FileInfo#file_info.size; + directory -> 0 + end, + FileName = get_filename(F, Type), + CompMethod = get_comp_method(FileName, UncompSize, Opts, Type), + LH = local_file_header_from_info_method_name(FileInfo, UncompSize, CompMethod, FileName), + BLH = local_file_header_to_bin(LH), + B = [<>, BLH], + Out1 = Output({write, B}, Out0), + Out2 = Output({write, FileName}, Out1), + {Out3, CompSize, CRC} = put_z_file(CompMethod, UncompSize, Out2, F1, + 0, Input, Output, OpO, Z, Type), + FB(FileName), + Patch = <>, + Out4 = Output({pwrite, Pos0 + ?LOCAL_FILE_HEADER_CRC32_OFFSET, Patch}, Out3), + Out5 = Output({seek, eof, 0}, Out4), + Pos1 = Pos0 + ?LOCAL_FILE_HEADER_SZ + LH#local_file_header.file_name_length, + Pos2 = Pos1 + CompSize, + LH2 = LH#local_file_header{comp_size = CompSize, crc32 = CRC}, + ThisAcc = [{LH2, FileName, Pos0}], + {Out6, SubAcc, Pos3} = + case Type of + regular -> + {Out5, ThisAcc, Pos2}; + directory -> + Files = Input({list_dir, F1}, []), + RevFiles = reverse_join_files(F, Files, []), + put_z_files(RevFiles, Z, Out5, Pos2, Opts, ThisAcc) + end, + Acc2 = lists:reverse(SubAcc) ++ Acc, + put_z_files(Rest, Z, Out6, Pos3, Opts, Acc2). + +reverse_join_files(Dir, [File | Files], Acc) -> + reverse_join_files(Dir, Files, [filename:join([Dir, File]) | Acc]); +reverse_join_files(_Dir, [], Acc) -> + Acc. + +%% flag for zlib +-define(MAX_WBITS, 15). + +%% compress a file +put_z_file(_Method, Sz, Out, _F, Pos, _Input, _Output, _OpO, _Z, directory) -> + {Out, Pos + Sz, 0}; +put_z_file(_Method, 0, Out, _F, Pos, _Input, _Output, _OpO, _Z, regular) -> + {Out, Pos, 0}; +put_z_file(?STORED, UncompSize, Out0, F, Pos0, Input, Output, OpO, Z, regular) -> + In0 = [], + In1 = Input({open, F, OpO -- [write]}, In0), + CRC0 = zlib:crc32(Z, <<>>), + {Data, In2} = Input({read, UncompSize}, In1), + Out1 = Output({write, Data}, Out0), + CRC = zlib:crc32(Z, CRC0, Data), + Input(close, In2), + {Out1, Pos0+erlang:iolist_size(Data), CRC}; +put_z_file(?DEFLATED, UncompSize, Out0, F, Pos0, Input, Output, OpO, Z, regular) -> + In0 = [], + In1 = Input({open, F, OpO -- [write]}, In0), + ok = zlib:deflateInit(Z, default, deflated, -?MAX_WBITS, 8, default), + {Out1, Pos1} = + put_z_data_loop(UncompSize, In1, Out0, Pos0, Input, Output, Z), + CRC = zlib:crc32(Z), + ok = zlib:deflateEnd(Z), + Input(close, In1), + {Out1, Pos1, CRC}. + +%% zlib is finished with the last chunk compressed +get_sync(N, N) -> finish; +get_sync(_, _) -> full. + +%% compress data +put_z_data_loop(0, _In, Out, Pos, _Input, _Output, _Z) -> + {Out, Pos}; +put_z_data_loop(UncompSize, In0, Out0, Pos0, Input, Output, Z) -> + N = erlang:min(?WRITE_BLOCK_SIZE, UncompSize), + case Input({read, N}, In0) of + {eof, _In1} -> + {Out0, Pos0}; + {Uncompressed, In1} -> + Compressed = zlib:deflate(Z, Uncompressed, get_sync(N, UncompSize)), + Sz = erlang:iolist_size(Compressed), + Out1 = Output({write, Compressed}, Out0), + put_z_data_loop(UncompSize - N, In1, Out1, Pos0 + Sz, + Input, Output, Z) + end. + +%% raw iterators over central dir + +%% name only +raw_name_only(CD, FileName, _FileComment, _BExtraField, Acc) + when is_record(CD, cd_file_header) -> + [FileName | Acc]; +raw_name_only(EOCD, _, _Comment, _, Acc) when is_record(EOCD, eocd) -> + Acc. + +%% for printing directory (t/1) +raw_short_print_info_etc(CD, FileName, _FileComment, _BExtraField, Acc) + when is_record(CD, cd_file_header) -> + print_file_name(FileName), + Acc; +raw_short_print_info_etc(EOCD, X, Comment, Y, Acc) when is_record(EOCD, eocd) -> + raw_long_print_info_etc(EOCD, X, Comment, Y, Acc). + +print_file_name(FileName) -> + io:format("~s\n", [FileName]). + + +%% for printing directory (tt/1) +raw_long_print_info_etc(#cd_file_header{comp_size = CompSize, + uncomp_size = UncompSize, + last_mod_date = LMDate, + last_mod_time = LMTime}, + FileName, FileComment, _BExtraField, Acc) -> + MTime = dos_date_time_to_datetime(LMDate, LMTime), + print_header(CompSize, MTime, UncompSize, FileName, FileComment), + Acc; +raw_long_print_info_etc(EOCD, _, Comment, _, Acc) when is_record(EOCD, eocd) -> + print_comment(Comment), + Acc. + +print_header(CompSize, MTime, UncompSize, FileName, FileComment) -> + io:format("~8w ~s ~8w ~2w% ~s ~s\n", + [CompSize, time_to_string(MTime), UncompSize, + get_percent(CompSize, UncompSize), FileName, FileComment]). + +print_comment("") -> + ok; +print_comment(Comment) -> + io:format("Archive comment: ~s\n", [Comment]). + +get_percent(_, 0) -> 100; +get_percent(CompSize, Size) -> round(CompSize * 100 / Size). + +%% time formatting ("borrowed" from erl_tar.erl) +time_to_string({{Y, Mon, Day}, {H, Min, _}}) -> + io_lib:format("~s ~2w ~s:~s ~w", + [month(Mon), Day, two_d(H), two_d(Min), Y]). + +two_d(N) -> + tl(integer_to_list(N + 100)). + +month(1) -> "Jan"; +month(2) -> "Feb"; +month(3) -> "Mar"; +month(4) -> "Apr"; +month(5) -> "May"; +month(6) -> "Jun"; +month(7) -> "Jul"; +month(8) -> "Aug"; +month(9) -> "Sep"; +month(10) -> "Oct"; +month(11) -> "Nov"; +month(12) -> "Dec". + +%% zip header functions +cd_file_header_from_lh_and_pos(LH, Pos) -> + #local_file_header{version_needed = VersionNeeded, + gp_flag = GPFlag, + comp_method = CompMethod, + last_mod_time = LastModTime, + last_mod_date = LastModDate, + crc32 = CRC32, + comp_size = CompSize, + uncomp_size = UncompSize, + file_name_length = FileNameLength, + extra_field_length = ExtraFieldLength} = LH, + #cd_file_header{version_made_by = 20, + version_needed = VersionNeeded, + gp_flag = GPFlag, + comp_method = CompMethod, + last_mod_time = LastModTime, + last_mod_date = LastModDate, + crc32 = CRC32, + comp_size = CompSize, + uncomp_size = UncompSize, + file_name_length = FileNameLength, + extra_field_length = ExtraFieldLength, + file_comment_length = 0, % FileCommentLength, + disk_num_start = 1, % DiskNumStart, + internal_attr = 0, % InternalAttr, + external_attr = 0, % ExternalAttr, + local_header_offset = Pos}. + +cd_file_header_to_bin( + #cd_file_header{version_made_by = VersionMadeBy, + version_needed = VersionNeeded, + gp_flag = GPFlag, + comp_method = CompMethod, + last_mod_time = LastModTime, + last_mod_date = LastModDate, + crc32 = CRC32, + comp_size = CompSize, + uncomp_size = UncompSize, + file_name_length = FileNameLength, + extra_field_length = ExtraFieldLength, + file_comment_length = FileCommentLength, + disk_num_start = DiskNumStart, + internal_attr = InternalAttr, + external_attr = ExternalAttr, + local_header_offset = LocalHeaderOffset}) -> + <>. + +local_file_header_to_bin( + #local_file_header{version_needed = VersionNeeded, + gp_flag = GPFlag, + comp_method = CompMethod, + last_mod_time = LastModTime, + last_mod_date = LastModDate, + crc32 = CRC32, + comp_size = CompSize, + uncomp_size = UncompSize, + file_name_length = FileNameLength, + extra_field_length = ExtraFieldLength}) -> + <>. + +eocd_to_bin(#eocd{disk_num = DiskNum, + start_disk_num = StartDiskNum, + entries_on_disk = EntriesOnDisk, + entries = Entries, + size = Size, + offset = Offset, + zip_comment_length = ZipCommentLength}) -> + <>. + +%% put together a local file header +local_file_header_from_info_method_name(#file_info{mtime = MTime}, + UncompSize, + CompMethod, Name) -> + {ModDate, ModTime} = dos_date_time_from_datetime(MTime), + #local_file_header{version_needed = 20, + gp_flag = 0, + comp_method = CompMethod, + last_mod_time = ModTime, + last_mod_date = ModDate, + crc32 = -1, + comp_size = -1, + uncomp_size = UncompSize, + file_name_length = length(Name), + extra_field_length = 0}. + + +%% small, simple, stupid zip-archive server +server_loop(OpenZip) -> + receive + {From, {open, Archive, Options}} -> + case openzip_open(Archive, Options) of + {ok, NewOpenZip} -> + From ! {self(), {ok, self()}}, + server_loop(NewOpenZip); + Error -> + From ! {self(), Error} + end; + {From, close} -> + From ! {self(), openzip_close(OpenZip)}; + {From, get} -> + From ! {self(), openzip_get(OpenZip)}, + server_loop(OpenZip); + {From, {get, FileName}} -> + From ! {self(), openzip_get(FileName, OpenZip)}, + server_loop(OpenZip); + {From, list_dir} -> + From ! {self(), openzip_list_dir(OpenZip)}, + server_loop(OpenZip); + {From, {list_dir, Opts}} -> + From ! {self(), openzip_list_dir(OpenZip, Opts)}, + server_loop(OpenZip); + {From, get_state} -> + From ! {self(), OpenZip}, + server_loop(OpenZip); + _ -> + {error, bad_msg} + end. + +zip_open(Archive) -> zip_open(Archive, []). + +zip_open(Archive, Options) -> + Pid = spawn(fun() -> server_loop(not_open) end), + request(self(), Pid, {open, Archive, Options}). + +zip_get(Pid) when is_pid(Pid) -> + request(self(), Pid, get). + +zip_close(Pid) when is_pid(Pid) -> + request(self(), Pid, close). + +zip_get(FileName, Pid) when is_pid(Pid) -> + request(self(), Pid, {get, FileName}). + +zip_list_dir(Pid) when is_pid(Pid) -> + request(self(), Pid, list_dir). + +zip_list_dir(Pid, Opts) when is_pid(Pid) -> + request(self(), Pid, {list_dir, Opts}). + +zip_get_state(Pid) when is_pid(Pid) -> + request(self(), Pid, get_state). + +request(Self, Pid, Req) -> + Pid ! {Self, Req}, + receive + {Pid, R} -> R + end. + +zip_t(Pid) when is_pid(Pid) -> + Openzip = request(self(), Pid, get_state), + openzip_t(Openzip). + +zip_tt(Pid) when is_pid(Pid) -> + Openzip = request(self(), Pid, get_state), + openzip_tt(Openzip). + +openzip_tt(#openzip{zip_comment = ZipComment, files = Files}) -> + print_comment(ZipComment), + lists_foreach(fun({#zip_file{comp_size = CompSize, + name = FileName, + comment = FileComment, + info = FI},_}) -> + #file_info{size = UncompSize, mtime = MTime} = FI, + print_header(CompSize, MTime, UncompSize, + FileName, FileComment) + end, Files), + ok. + +openzip_t(#openzip{zip_comment = ZipComment, files = Files}) -> + print_comment(ZipComment), + lists_foreach(fun({#zip_file{name = FileName},_}) -> + print_file_name(FileName) + end, Files), + ok. + +lists_foreach(_, []) -> + ok; +lists_foreach(F, [Hd|Tl]) -> + F(Hd), + lists_foreach(F, Tl). + +%% option utils +get_openzip_opt([], Opts) -> + Opts; +get_openzip_opt([cooked | Rest], #openzip_opts{open_opts = OO} = Opts) -> + get_openzip_opt(Rest, Opts#openzip_opts{open_opts = OO -- [raw]}); +get_openzip_opt([memory | Rest], Opts) -> + get_openzip_opt(Rest, Opts#openzip_opts{output = fun binary_io/2}); +get_openzip_opt([{cwd, CWD} | Rest], Opts) -> + get_openzip_opt(Rest, Opts#openzip_opts{cwd = CWD}); +get_openzip_opt([Unknown | _Rest], _Opts) -> + throw({bad_option, Unknown}). + +%% get the central directory from the archive +get_central_dir(In0, RawIterator, Input) -> + {B, In1} = get_end_of_central_dir(In0, ?END_OF_CENTRAL_DIR_SZ, Input), + {EOCD, BComment} = eocd_and_comment_from_bin(B), + In2 = Input({seek, bof, EOCD#eocd.offset}, In1), + N = EOCD#eocd.entries, + Acc0 = [], + Out0 = RawIterator(EOCD, "", binary_to_list(BComment), <<>>, Acc0), + get_cd_loop(N, In2, RawIterator, Input, Out0). + +get_cd_loop(0, In, _RawIterator, _Input, Acc) -> + {lists:reverse(Acc), In}; +get_cd_loop(N, In0, RawIterator, Input, Acc0) -> + {B, In1} = Input({read, ?CENTRAL_FILE_HEADER_SZ}, In0), + BCD = case B of + <> -> XBCD; + _ -> throw(bad_central_directory) + end, + CD = cd_file_header_from_bin(BCD), + FileNameLen = CD#cd_file_header.file_name_length, + ExtraLen = CD#cd_file_header.extra_field_length, + CommentLen = CD#cd_file_header.file_comment_length, + ToRead = FileNameLen + ExtraLen + CommentLen, + {B2, In2} = Input({read, ToRead}, In1), + {FileName, Comment, BExtra} = + get_name_extra_comment(B2, FileNameLen, ExtraLen, CommentLen), + Acc1 = RawIterator(CD, FileName, Comment, BExtra, Acc0), + get_cd_loop(N-1, In2, RawIterator, Input, Acc1). + +get_name_extra_comment(B, FileNameLen, ExtraLen, CommentLen) -> + case B of + <> -> + {binary_to_list(BFileName), binary_to_list(BComment), BExtra}; + _ -> + throw(bad_central_directory) + end. + +%% get end record, containing the offset to the central directory +%% the end record is always at the end of the file BUT alas it is +%% of variable size (yes that's dumb!) +get_end_of_central_dir(_In, Sz, _Input) when Sz > 16#ffff -> + throw(bad_eocd); +get_end_of_central_dir(In0, Sz, Input) -> + In1 = Input({seek, eof, -Sz}, In0), + {B, In2} = Input({read, Sz}, In1), + case find_eocd_header(B) of + none -> + get_end_of_central_dir(In2, Sz+Sz, Input); + Header -> + {Header, In2} + end. + +%% find the end record by matching for it +find_eocd_header(<>) -> + Rest; +find_eocd_header(<<_:8, Rest/binary>>) + when byte_size(Rest) > ?END_OF_CENTRAL_DIR_SZ-4 -> + find_eocd_header(Rest); +find_eocd_header(_) -> + none. + +%% from a central directory record, filter and accumulate what we need + +%% with zip_file_extra +raw_file_info_etc(CD, FileName, FileComment, BExtraField, Acc) + when is_record(CD, cd_file_header) -> + #cd_file_header{comp_size = CompSize, + local_header_offset = Offset, + crc32 = CRC} = CD, + FileInfo = cd_file_header_to_file_info(FileName, CD, BExtraField), + [{#zip_file{name = FileName, info = FileInfo, comment = FileComment, + offset = Offset, comp_size = CompSize}, #zip_file_extra{crc32 = CRC}} | Acc]; +raw_file_info_etc(EOCD, _, Comment, _, Acc) when is_record(EOCD, eocd) -> + [#zip_comment{comment = Comment} | Acc]. + +%% without zip_file_extra +raw_file_info_public(CD, FileName, FileComment, BExtraField, Acc0) -> + [H1|T] = raw_file_info_etc(CD,FileName,FileComment,BExtraField,Acc0), + H2 = case H1 of + {ZF,Extra} when is_record(Extra,zip_file_extra) -> ZF; + Other -> Other + end, + [H2|T]. + + +%% make a file_info from a central directory header +cd_file_header_to_file_info(FileName, + #cd_file_header{uncomp_size = UncompSize, + last_mod_time = ModTime, + last_mod_date = ModDate}, + ExtraField) -> + T = dos_date_time_to_datetime(ModDate, ModTime), + Type = + case lists:last(FileName) of + $/ -> directory; + _ -> regular + end, + FI = #file_info{size = UncompSize, + type = Type, + access = read_write, + atime = T, + mtime = T, + ctime = T, + mode = 8#066, + links = 1, + major_device = 0, + minor_device = 0, + inode = 0, + uid = 0, + gid = 0}, + add_extra_info(FI, ExtraField). + +%% add extra info to file (some day when we implement it) +add_extra_info(FI, <>) -> + FI; % not yet supported, some other day... +add_extra_info(FI, <>) -> + _UnixExtra = unix_extra_field_and_var_from_bin(Rest), + FI; % not yet supported, and not widely used +add_extra_info(FI, _) -> + FI. + + + +%% get all files using file list +%% (the offset list is already filtered on which file to get... isn't it?) +get_z_files([], _Z, _In, _Opts, Acc) -> + lists:reverse(Acc); +get_z_files([#zip_comment{comment = _} | Rest], Z, In, Opts, Acc) -> + get_z_files(Rest, Z, In, Opts, Acc); +get_z_files([{#zip_file{offset = Offset},_} = ZFile | Rest], Z, In0, + #unzip_opts{input = Input, output = Output, open_opts = OpO, + file_filter = Filter, feedback = FB, + cwd = CWD} = Opts, Acc0) -> + case Filter(ZFile) of + true -> + In1 = Input({seek, bof, Offset}, In0), + {In2, Acc1} = + case get_z_file(In1, Z, Input, Output, OpO, FB, CWD, ZFile) of + {file, GZD, Inx} -> {Inx, [GZD | Acc0]}; + {dir, Inx} -> {Inx, Acc0} + end, + get_z_files(Rest, Z, In2, Opts, Acc1); + _ -> + get_z_files(Rest, Z, In0, Opts, Acc0) + end. + +%% get a file from the archive, reading chunks +get_z_file(In0, Z, Input, Output, OpO, FB, CWD, {ZipFile,Extra}) -> + case Input({read, ?LOCAL_FILE_HEADER_SZ}, In0) of + {eof, In1} -> + {eof, In1}; + %% Local File Header + {<>, In1} -> + LH = local_file_header_from_bin(B), + #local_file_header{gp_flag = GPFlag, + comp_method = CompMethod, + file_name_length = FileNameLen, + extra_field_length = ExtraLen} = LH, + + {CompSize,CRC32} = case GPFlag band 8 =:= 8 of + true -> {ZipFile#zip_file.comp_size, + Extra#zip_file_extra.crc32}; + false -> {LH#local_file_header.comp_size, + LH#local_file_header.crc32} + end, + {BFileN, In3} = Input({read, FileNameLen + ExtraLen}, In1), + {FileName, _} = get_file_name_extra(FileNameLen, ExtraLen, BFileN), + FileName1 = add_cwd(CWD, FileName), + case lists:last(FileName) of + $/ -> + %% perhaps this should always be done? + Output({ensure_dir,FileName1},[]), + {dir, In3}; + _ -> + %% FileInfo = local_file_header_to_file_info(LH) + %%{Out, In4, CRC, UncompSize} = + {Out, In4, CRC, _UncompSize} = + get_z_data(CompMethod, In3, FileName1, + CompSize, Input, Output, OpO, Z), + In5 = skip_z_data_descriptor(GPFlag, Input, In4), + %% TODO This should be fixed some day: + %% In5 = Input({set_file_info, FileName, FileInfo#file_info{size=UncompSize}}, In4), + FB(FileName), + CRC =:= CRC32 orelse throw({bad_crc, FileName}), + {file, Out, In5} + end; + _ -> + throw(bad_local_file_header) + end. + + +get_file_name_extra(FileNameLen, ExtraLen, B) -> + case B of + <> -> + {binary_to_list(BFileName), BExtra}; + _ -> + throw(bad_file_header) + end. + +%% get compressed or stored data +get_z_data(?DEFLATED, In0, FileName, CompSize, Input, Output, OpO, Z) -> + ok = zlib:inflateInit(Z, -?MAX_WBITS), + Out0 = Output({open, FileName, [write | OpO]}, []), + {In1, Out1, UncompSize} = get_z_data_loop(CompSize, 0, In0, Out0, Input, Output, Z), + CRC = zlib:crc32(Z), + ?CATCH zlib:inflateEnd(Z), + Out2 = Output({close, FileName}, Out1), + {Out2, In1, CRC, UncompSize}; +get_z_data(?STORED, In0, FileName, CompSize, Input, Output, OpO, Z) -> + Out0 = Output({open, FileName, [write | OpO]}, []), + CRC0 = zlib:crc32(Z, <<>>), + {In1, Out1, CRC} = copy_data_loop(CompSize, In0, Out0, Input, Output, + CRC0, Z), + Out2 = Output({close, FileName}, Out1), + {Out2, In1, CRC, CompSize}; +get_z_data(_, _, _, _, _, _, _, _) -> + throw(bad_file_header). + +copy_data_loop(0, In, Out, _Input, _Output, CRC, _Z) -> + {In, Out, CRC}; +copy_data_loop(CompSize, In0, Out0, Input, Output, CRC0, Z) -> + N = erlang:min(?READ_BLOCK_SIZE, CompSize), + case Input({read, N}, In0) of + {eof, In1} -> {Out0, In1}; + {Uncompressed, In1} -> + CRC1 = zlib:crc32(Z, CRC0, Uncompressed), + Out1 = Output({write, Uncompressed}, Out0), + copy_data_loop(CompSize-N, In1, Out1, Input, Output, CRC1, Z) + end. + +get_z_data_loop(0, UncompSize, In, Out, _Input, _Output, _Z) -> + {In, Out, UncompSize}; +get_z_data_loop(CompSize, UncompSize, In0, Out0, Input, Output, Z) -> + N = erlang:min(?READ_BLOCK_SIZE, CompSize), + case Input({read, N}, In0) of + {eof, In1} -> + {Out0, In1}; + {Compressed, In1} -> + Uncompressed = zlib:inflate(Z, Compressed), + Out1 = Output({write, Uncompressed}, Out0), + get_z_data_loop(CompSize-N, UncompSize + iolist_size(Uncompressed), + In1, Out1, Input, Output, Z) + end. + + +%% skip data descriptor if any +skip_z_data_descriptor(GPFlag, Input, In0) when GPFlag band 8 =:= 8 -> + Input({seek, cur, 12}, In0); +skip_z_data_descriptor(_GPFlag, _Input, In0) -> + In0. + +%% convert between erlang datetime and the MSDOS date and time +%% that's stored in the zip archive +%% MSDOS Time MSDOS Date +%% bit 0 - 4 5 - 10 11 - 15 16 - 20 21 - 24 25 - 31 +%% value second minute hour day (1 - 31) month (1 - 12) years from 1980 +dos_date_time_to_datetime(DosDate, DosTime) -> + <> = <>, + <> = <>, + {{YearFrom1980+1980, Month, Day}, + {Hour, Min, Sec}}. + +dos_date_time_from_datetime({{Year, Month, Day}, {Hour, Min, Sec}}) -> + YearFrom1980 = Year-1980, + <> = <>, + <> = <>, + {DosDate, DosTime}. + +unix_extra_field_and_var_from_bin(<>) -> + {#unix_extra_field{atime = ATime, + mtime = MTime, + uid = UID, + gid = GID}, + Var}; +unix_extra_field_and_var_from_bin(_) -> + throw(bad_unix_extra_field). + + +%% A pwrite-like function for iolists (used by memory-option) + +split_iolist(B, Pos) when is_binary(B) -> + split_binary(B, Pos); +split_iolist(L, Pos) when is_list(L) -> + splitter([], L, Pos). + +splitter(Left, Right, 0) -> + {Left, Right}; +splitter(Left, [A | Right], RelPos) when is_list(A) or is_binary(A) -> + Sz = erlang:iolist_size(A), + case Sz > RelPos of + true -> + {Leftx, Rightx} = split_iolist(A, RelPos), + {[Left | Leftx], [Rightx, Right]}; + _ -> + splitter([Left | A], Right, RelPos - Sz) + end; +splitter(Left, [A | Right], RelPos) when is_integer(A) -> + splitter([Left, A], Right, RelPos - 1); +splitter(Left, Right, RelPos) when is_binary(Right) -> + splitter(Left, [Right], RelPos). + +skip_iolist(B, Pos) when is_binary(B) -> + case B of + <<_:Pos/binary, Bin/binary>> -> Bin; + _ -> <<>> + end; +skip_iolist(L, Pos) when is_list(L) -> + skipper(L, Pos). + +skipper(Right, 0) -> + Right; +skipper([A | Right], RelPos) when is_list(A) or is_binary(A) -> + Sz = erlang:iolist_size(A), + case Sz > RelPos of + true -> + Rightx = skip_iolist(A, RelPos), + [Rightx, Right]; + _ -> + skip_iolist(Right, RelPos - Sz) + end; +skipper([A | Right], RelPos) when is_integer(A) -> + skip_iolist(Right, RelPos - 1). + +pwrite_iolist(Iolist, Pos, Bin) -> + {Left, Right} = split_iolist(Iolist, Pos), + Sz = erlang:iolist_size(Bin), + R = skip_iolist(Right, Sz), + [Left, Bin | R]. + +pwrite_binary(B, Pos, Bin) -> + erlang:iolist_to_binary(pwrite_iolist(B, Pos, Bin)). + + +%% ZIP header manipulations +eocd_and_comment_from_bin(<>) -> + {#eocd{disk_num = DiskNum, + start_disk_num = StartDiskNum, + entries_on_disk = EntriesOnDisk, + entries = Entries, + size = Size, + offset = Offset, + zip_comment_length = ZipCommentLength}, + Comment}; +eocd_and_comment_from_bin(_) -> + throw(bad_eocd). + +cd_file_header_from_bin(<>) -> + #cd_file_header{version_made_by = VersionMadeBy, + version_needed = VersionNeeded, + gp_flag = GPFlag, + comp_method = CompMethod, + last_mod_time = LastModTime, + last_mod_date = LastModDate, + crc32 = CRC32, + comp_size = CompSize, + uncomp_size = UncompSize, + file_name_length = FileNameLength, + extra_field_length = ExtraFieldLength, + file_comment_length = FileCommentLength, + disk_num_start = DiskNumStart, + internal_attr = InternalAttr, + external_attr = ExternalAttr, + local_header_offset = LocalHeaderOffset}; +cd_file_header_from_bin(_) -> + throw(bad_cd_file_header). + +local_file_header_from_bin(<>) -> + #local_file_header{version_needed = VersionNeeded, + gp_flag = GPFlag, + comp_method = CompMethod, + last_mod_time = LastModTime, + last_mod_date = LastModDate, + crc32 = CRC32, + comp_size = CompSize, + uncomp_size = UncompSize, + file_name_length = FileNameLength, + extra_field_length = ExtraFieldLength}; +local_file_header_from_bin(_) -> + throw(bad_local_file_header). + +%% make a file_info from a local directory header +%% local_file_header_to_file_info( +%% #local_file_header{last_mod_time = ModTime, +%% last_mod_date = ModDate, +%% uncomp_size = UncompSize}) -> +%% T = dos_date_time_to_datetime(ModDate, ModTime), +%% FI = #file_info{size = UncompSize, +%% type = regular, +%% access = read_write, +%% atime = T, +%% mtime = T, +%% ctime = T, +%% mode = 8#066, +%% links = 1, +%% major_device = 0, +%% minor_device = 0, +%% inode = 0, +%% uid = 0, +%% gid = 0}, +%% FI. + +%% io functions +binary_io({file_info, {_Filename, _B, #file_info{} = FI}}, _A) -> + FI; +binary_io({file_info, {_Filename, B}}, A) -> + binary_io({file_info, B}, A); +binary_io({file_info, B}, _) -> + {Type, Size} = + if + is_binary(B) -> {regular, byte_size(B)}; + B =:= directory -> {directory, 0} + end, + Now = calendar:local_time(), + #file_info{size = Size, type = Type, + access = read_write, atime = Now, + mtime = Now, ctime = Now, mode = 0, + links = 1, major_device = 0, + minor_device = 0, inode = 0, + uid = 0, gid = 0}; +binary_io({open, {_Filename, B, _FI}, _Opts}, _) -> + {0, B}; +binary_io({open, {_Filename, B}, _Opts}, _) -> + {0, B}; +binary_io({open, B, _Opts}, _) when is_binary(B) -> + {0, B}; +binary_io({open, Filename, _Opts}, _) when is_list(Filename) -> + {0, <<>>}; +binary_io({read, N}, {Pos, B}) when Pos >= byte_size(B) -> + {eof, {Pos+N, B}}; +binary_io({read, N}, {Pos, B}) when Pos + N > byte_size(B) -> + <<_:Pos/binary, Read/binary>> = B, + {Read, {byte_size(B), B}}; +binary_io({pread, Pos, N}, {OldPos, B}) -> + case B of + <<_:Pos/binary, Read:N/binary, _Rest/binary>> -> + {Read, {Pos+N, B}}; + _ -> + {eof, {OldPos, B}} + end; +binary_io({read, N}, {Pos, B}) -> + <<_:Pos/binary, Read:N/binary, _/binary>> = B, + {Read, {Pos+N, B}}; +binary_io({seek, bof, Pos}, {_OldPos, B}) -> + {Pos, B}; +binary_io({seek, cur, Pos}, {OldPos, B}) -> + {OldPos + Pos, B}; +binary_io({seek, eof, Pos}, {_OldPos, B}) -> + {byte_size(B) + Pos, B}; +binary_io({pwrite, Pos, Data}, {OldPos, B}) -> + {OldPos, pwrite_binary(B, Pos, Data)}; +binary_io({write, Data}, {Pos, B}) -> + {Pos + erlang:iolist_size(Data), pwrite_binary(B, Pos, Data)}; +binary_io(close, {_Pos, B}) -> + B; +binary_io({close, FN}, {_Pos, B}) -> + {FN, B}; +binary_io({list_dir, _F}, _B) -> + []; +binary_io({set_file_info, _F, _FI}, B) -> + B; +binary_io({ensure_dir, _Dir}, B) -> + B. + +file_io({file_info, F}, _) -> + case file:read_file_info(F) of + {ok, Info} -> Info; + {error, E} -> throw(E) + end; +file_io({open, FN, Opts}, _) -> + case lists:member(write, Opts) of + true -> ok = filelib:ensure_dir(FN); + _ -> ok + end, + case file:open(FN, Opts++[binary]) of + {ok, H} -> H; + {error, E} -> throw(E) + end; +file_io({read, N}, H) -> + case file:read(H, N) of + {ok, B} -> {B, H}; + eof -> {eof, H}; + {error, E} -> throw(E) + end; +file_io({pread, Pos, N}, H) -> + case file:pread(H, Pos, N) of + {ok, B} -> {B, H}; + eof -> {eof, H}; + {error, E} -> throw(E) + end; +file_io({seek, S, Pos}, H) -> + case file:position(H, {S, Pos}) of + {ok, _NewPos} -> H; + {error, Error} -> throw(Error) + end; +file_io({write, Data}, H) -> + case file:write(H, Data) of + ok -> H; + {error, Error} -> throw(Error) + end; +file_io({pwrite, Pos, Data}, H) -> + case file:pwrite(H, Pos, Data) of + ok -> H; + {error, Error} -> throw(Error) + end; +file_io({close, FN}, H) -> + case file:close(H) of + ok -> FN; + {error, Error} -> throw(Error) + end; +file_io(close, H) -> + file_io({close, ok}, H); +file_io({list_dir, F}, _H) -> + case file:list_dir(F) of + {ok, Files} -> Files; + {error, Error} -> throw(Error) + end; +file_io({set_file_info, F, FI}, H) -> + case file:write_file_info(F, FI) of + ok -> H; + {error, Error} -> throw(Error) + end; +file_io({ensure_dir, Dir}, H) -> + ok = filelib:ensure_dir(Dir), + H. -- cgit v1.2.3