%% %% %CopyrightBegin% %% %% Copyright Ericsson AB 2001-2014. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. %% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. %% %% %CopyrightEnd% %% %% @author Richard Carlsson <richardc@it.uu.se> %% @copyright 1999-2004 Richard Carlsson %% @doc Abstract environments, supporting self-referential bindings and %% automatic new-key generation. %% The current implementation is based on Erlang standard library %% dictionaries. %%% -define(DEBUG, true). -module(rec_env). -export([bind/3, bind_list/3, bind_recursive/4, delete/2, empty/0, get/2, is_defined/2, is_empty/1, keys/1, lookup/2, new_key/1, new_key/2, new_keys/2, new_keys/3, size/1, to_list/1]). -export_type([environment/0]). -ifdef(DEBUG). -export([test/1, test_custom/1, test_custom/2]). -endif. -ifdef(DEBUG). %% Code for testing: %%@hidden test(N) -> test_0(integer, N). %%@hidden test_custom(N) -> F = fun (X) -> list_to_atom("X"++integer_to_list(X)) end, test_custom(F, N). %%@hidden test_custom(F, N) -> test_0({custom, F}, N). test_0(Type, N) -> put(new_key_calls, 0), put(new_key_retries, 0), put(new_key_max, 0), Env = test_1(Type, N, empty()), io:fwrite("\ncalls: ~w.\n", [get(new_key_calls)]), io:fwrite("\nretries: ~w.\n", [get(new_key_retries)]), io:fwrite("\nmax: ~w.\n", [get(new_key_max)]), dict:to_list(element(1,Env)). test_1(integer = Type, N, Env) when is_integer(N), N > 0 -> Key = new_key(Env), test_1(Type, N - 1, bind(Key, value, Env)); test_1({custom, F} = Type, N, Env) when is_integer(N), N > 0 -> Key = new_key(F, Env), test_1(Type, N - 1, bind(Key, value, Env)); test_1(_,0, Env) -> Env. -endif. %%@clear %% Representation: %% %% environment() = [Mapping] %% %% Mapping = {map, Dict} | {rec, Dict, Dict} %% Dict = dict:dictionary() %% %% An empty environment is a list containing a single `{map, Dict}' %% element - empty lists are not valid environments. To find a key in an %% environment, it is searched for in each mapping in the list, in %% order, until it the key is found in some mapping, or the end of the %% list is reached. In a 'rec' mapping, we keep the original dictionary %% together with a version where entries may have been deleted - this %% makes it possible to garbage collect the entire 'rec' mapping when %% all its entries are unused (for example, by being shadowed by later %% definitions). %% ===================================================================== %% @type environment(). An abstract environment. -type mapping() :: {'map', dict:dict()} | {'rec', dict:dict(), dict:dict()}. -type environment() :: [mapping(),...]. %% ===================================================================== %% @spec empty() -> environment() %% %% @doc Returns an empty environment. -spec empty() -> environment(). empty() -> [{map, dict:new()}]. %% ===================================================================== %% @spec is_empty(Env::environment()) -> boolean() %% %% @doc Returns <code>true</code> if the environment is empty, otherwise %% <code>false</code>. -spec is_empty(environment()) -> boolean(). is_empty([{map, Dict} | Es]) -> N = dict:size(Dict), if N =/= 0 -> false; Es =:= [] -> true; true -> is_empty(Es) end; is_empty([{rec, Dict, _} | Es]) -> N = dict:size(Dict), if N =/= 0 -> false; Es =:= [] -> true; true -> is_empty(Es) end. %% ===================================================================== %% @spec size(Env::environment()) -> integer() %% %% @doc Returns the number of entries in an environment. %% (The name 'size' cannot be used in local calls, since there exists a %% built-in function with the same name.) -spec size(environment()) -> non_neg_integer(). size(Env) -> env_size(Env). env_size([{map, Dict}]) -> dict:size(Dict); env_size([{map, Dict} | Env]) -> dict:size(Dict) + env_size(Env); env_size([{rec, Dict, _Dict0} | Env]) -> dict:size(Dict) + env_size(Env). %% ===================================================================== %% @spec is_defined(Key, Env) -> boolean() %% %% Key = term() %% Env = environment() %% %% @doc Returns <code>true</code> if <code>Key</code> is bound in the %% environment, otherwise <code>false</code>. -spec is_defined(term(), environment()) -> boolean(). is_defined(Key, [{map, Dict} | Env]) -> case dict:is_key(Key, Dict) of true -> true; false when Env =:= [] -> false; false -> is_defined(Key, Env) end; is_defined(Key, [{rec, Dict, _Dict0} | Env]) -> dict:is_key(Key, Dict) orelse is_defined(Key, Env). %% ===================================================================== %% @spec keys(Env::environment()) -> [term()] %% %% @doc Returns the ordered list of all keys in the environment. -spec keys(environment()) -> [term()]. keys(Env) -> lists:sort(keys(Env, [])). keys([{map, Dict}], S) -> dict:fetch_keys(Dict) ++ S; keys([{map, Dict} | Env], S) -> keys(Env, dict:fetch_keys(Dict) ++ S); keys([{rec, Dict, _Dict0} | Env], S) -> keys(Env, dict:fetch_keys(Dict) ++ S). %% ===================================================================== %% @spec to_list(Env) -> [{Key, Value}] %% %% Env = environment() %% Key = term() %% Value = term() %% %% @doc Returns an ordered list of <code>{Key, Value}</code> pairs for %% all keys in <code>Env</code>. <code>Value</code> is the same as that %% returned by {@link get/2}. -spec to_list(environment()) -> [{term(), term()}]. to_list(Env) -> lists:sort(to_list(Env, [])). to_list([{map, Dict}], S) -> dict:to_list(Dict) ++ S; to_list([{map, Dict} | Env], S) -> to_list(Env, dict:to_list(Dict) ++ S); to_list([{rec, Dict, _Dict0} | Env], S) -> to_list(Env, dict:to_list(Dict) ++ S). %% ===================================================================== %% @spec bind(Key, Value, Env) -> environment() %% %% Key = term() %% Value = term() %% Env = environment() %% %% @doc Make a nonrecursive entry. This binds <code>Key</code> to %% <code>Value</code>. If the key already existed in the environment, %% the old entry is replaced. %% Note that deletion is done to free old bindings so they can be %% garbage collected. -spec bind(term(), term(), environment()) -> environment(). bind(Key, Value, [{map, Dict}]) -> [{map, dict:store(Key, Value, Dict)}]; bind(Key, Value, [{map, Dict} | Env]) -> [{map, dict:store(Key, Value, Dict)} | delete_any(Key, Env)]; bind(Key, Value, Env) -> [{map, dict:store(Key, Value, dict:new())} | delete_any(Key, Env)]. %% ===================================================================== %% @spec bind_list(Keys, Values, Env) -> environment() %% %% Keys = [term()] %% Values = [term()] %% Env = environment() %% %% @doc Make N nonrecursive entries. This binds each key in %% <code>Keys</code> to the corresponding value in %% <code>Values</code>. If some key already existed in the environment, %% the previous entry is replaced. If <code>Keys</code> does not have %% the same length as <code>Values</code>, an exception is generated. -spec bind_list([term()], [term()], environment()) -> environment(). bind_list(Ks, Vs, [{map, Dict}]) -> [{map, store_list(Ks, Vs, Dict)}]; bind_list(Ks, Vs, [{map, Dict} | Env]) -> [{map, store_list(Ks, Vs, Dict)} | delete_list(Ks, Env)]; bind_list(Ks, Vs, Env) -> [{map, store_list(Ks, Vs, dict:new())} | delete_list(Ks, Env)]. store_list([K | Ks], [V | Vs], Dict) -> store_list(Ks, Vs, dict:store(K, V, Dict)); store_list([], _, Dict) -> Dict. delete_list([K | Ks], Env) -> delete_list(Ks, delete_any(K, Env)); delete_list([], Env) -> Env. %% By not calling `delete' unless we have to, we avoid unnecessary %% rewriting of the data. delete_any(Key, Env) -> case is_defined(Key, Env) of true -> delete(Key, Env); false -> Env end. %% ===================================================================== %% @spec delete(Key, Env) -> environment() %% %% Key = term() %% Env = environment() %% %% @doc Delete an entry. This removes <code>Key</code> from the %% environment. -spec delete(term(), environment()) -> environment(). delete(Key, [{map, Dict} = E | Env]) -> case dict:is_key(Key, Dict) of true -> [{map, dict:erase(Key, Dict)} | Env]; false -> delete_1(Key, Env, E) end; delete(Key, [{rec, Dict, Dict0} = E | Env]) -> case dict:is_key(Key, Dict) of true -> %% The Dict0 component must be preserved as it is until all %% keys in Dict have been deleted. Dict1 = dict:erase(Key, Dict), case dict:size(Dict1) of 0 -> Env; % the whole {rec,...} is now garbage _ -> [{rec, Dict1, Dict0} | Env] end; false -> [E | delete(Key, Env)] end. %% This is just like above, except we pass on the preceding 'map' %% mapping in the list to enable merging when removing 'rec' mappings. delete_1(Key, [{rec, Dict, Dict0} = E | Env], E1) -> case dict:is_key(Key, Dict) of true -> Dict1 = dict:erase(Key, Dict), case dict:size(Dict1) of 0 -> concat(E1, Env); _ -> [E1, {rec, Dict1, Dict0} | Env] end; false -> [E1, E | delete(Key, Env)] end. concat({map, D1}, [{map, D2} | Env]) -> [dict:merge(fun (_K, V1, _V2) -> V1 end, D1, D2) | Env]; concat(E1, Env) -> [E1 | Env]. %% ===================================================================== %% @spec bind_recursive(Keys, Values, Fun, Env) -> NewEnv %% %% Keys = [term()] %% Values = [term()] %% Fun = (Value, Env) -> term() %% Env = environment() %% NewEnv = environment() %% %% @doc Make N recursive entries. This binds each key in %% <code>Keys</code> to the value of <code>Fun(Value, NewEnv)</code> for %% the corresponding <code>Value</code>. If <code>Keys</code> does not %% have the same length as <code>Values</code>, an exception is %% generated. If some key already existed in the environment, the old %% entry is replaced. %% %% <p>Note: the function <code>Fun</code> is evaluated each time one of %% the stored keys is looked up, but only then.</p> %% %% <p>Examples: %%<pre> %% NewEnv = bind_recursive([foo, bar], [1, 2], %% fun (V, E) -> V end, %% Env)</pre> %% %% This does nothing interesting; <code>get(foo, NewEnv)</code> yields %% <code>1</code> and <code>get(bar, NewEnv)</code> yields %% <code>2</code>, but there is more overhead than if the {@link %% bind_list/3} function had been used. %% %% <pre> %% NewEnv = bind_recursive([foo, bar], [1, 2], %% fun (V, E) -> {V, E} end, %% Env)</pre> %% %% Here, however, <code>get(foo, NewEnv)</code> will yield <code>{1, %% NewEnv}</code> and <code>get(bar, NewEnv)</code> will yield <code>{2, %% NewEnv}</code>, i.e., the environment <code>NewEnv</code> contains %% recursive bindings.</p> -spec bind_recursive([term()], [term()], fun((term(), environment()) -> term()), environment()) -> environment(). bind_recursive([], [], _, Env) -> Env; bind_recursive(Ks, Vs, F, Env) -> F1 = fun (V) -> fun (Dict) -> F(V, [{rec, Dict, Dict} | Env]) end end, Dict = bind_recursive_1(Ks, Vs, F1, dict:new()), [{rec, Dict, Dict} | Env]. bind_recursive_1([K | Ks], [V | Vs], F, Dict) -> bind_recursive_1(Ks, Vs, F, dict:store(K, F(V), Dict)); bind_recursive_1([], [], _, Dict) -> Dict. %% ===================================================================== %% @spec lookup(Key, Env) -> error | {ok, Value} %% %% Key = term() %% Env = environment() %% Value = term() %% %% @doc Returns <code>{ok, Value}</code> if <code>Key</code> is bound to %% <code>Value</code> in <code>Env</code>, and <code>error</code> %% otherwise. -spec lookup(term(), environment()) -> 'error' | {'ok', term()}. lookup(Key, [{map, Dict} | Env]) -> case dict:find(Key, Dict) of {ok, _}=Value -> Value; error when Env =:= [] -> error; error -> lookup(Key, Env) end; lookup(Key, [{rec, Dict, Dict0} | Env]) -> case dict:find(Key, Dict) of {ok, F} -> {ok, F(Dict0)}; error -> lookup(Key, Env) end. %% ===================================================================== %% @spec get(Key, Env) -> Value %% %% Key = term() %% Env = environment() %% Value = term() %% %% @doc Returns the value that <code>Key</code> is bound to in %% <code>Env</code>. Throws <code>{undefined, Key}</code> if the key %% does not exist in <code>Env</code>. -spec get(term(), environment()) -> term(). get(Key, Env) -> case lookup(Key, Env) of {ok, Value} -> Value; error -> throw({undefined, Key}) end. %% ===================================================================== %% The key-generating algorithm could possibly be further improved. The %% important thing to keep in mind is, that when we need a new key, we %% are generally in mid-traversal of a syntax tree, and existing names %% in the tree may be closely grouped and evenly distributed or even %% forming a compact range (often having been generated by a "gensym", %% or by this very algorithm itself). This means that if we generate an %% identifier whose value is too close to those already seen (i.e., %% which are in the environment), it is very probable that we will %% shadow a not-yet-seen identifier further down in the tree, the result %% being that we induce another later renaming, and end up renaming most %% of the identifiers, completely contrary to our intention. We need to %% generate new identifiers in a way that avoids such systematic %% collisions. %% %% One way of getting a new key to try when the previous attempt failed %% is of course to e.g. add one to the last tried value. However, in %% general it's a bad idea to try adjacent identifiers: the percentage %% of retries will typically increase a lot, so you may lose big on the %% extra lookups while gaining only a little from the quicker %% computation. %% %% We want an initial range that is large enough for most typical cases. %% If we start with, say, a range of 10, we might quickly use up most of %% the values in the range 1-10 (or 1-100) for new top-level variables - %% but as we start traversing the syntax tree, it is quite likely that %% exactly those variables will be encountered again (this depends on %% how the names in the tree were created), and will then need to be %% renamed. If we instead begin with a larger range, it is less likely %% that any top-level names that we introduce will shadow names that we %% will find in the tree. Of course we cannot know how large is large %% enough: for any initial range, there is some syntax tree that uses %% all the values in that range, and thus any top-level names introduced %% will shadow names in the tree. The point is to avoid this happening %% all the time - a range of about 1000 seems enough for most programs. %% %% The following values have been shown to work well: -define(MINIMUM_RANGE, 1000). -define(START_RANGE_FACTOR, 50). -define(MAX_RETRIES, 2). % retries before enlarging range -define(ENLARGE_FACTOR, 10). % range enlargment factor -ifdef(DEBUG). %% If you want to use these process dictionary counters, make sure to %% initialise them to zero before you call any of the key-generating %% functions. %% %% new_key_calls total number of calls %% new_key_retries failed key generation attempts %% new_key_max maximum generated integer value %% -define(measure_calls(), put(new_key_calls, 1 + get(new_key_calls))). -define(measure_max_key(N), case N > get(new_key_max) of true -> put(new_key_max, N); false -> ok end). -define(measure_retries(N), put(new_key_retries, get(new_key_retries) + N)). -else. -define(measure_calls(), ok). -define(measure_max_key(N), ok). -define(measure_retries(N), ok). -endif. %% ===================================================================== %% @spec new_key(Env::environment()) -> integer() %% %% @doc Returns an integer which is not already used as key in the %% environment. New integers are generated using an algorithm which %% tries to keep the values randomly distributed within a reasonably %% small range relative to the number of entries in the environment. %% %% <p>This function uses the Erlang standard library module %% <code>random</code> to generate new keys.</p> %% %% <p>Note that only the new key is returned; the environment itself is %% not updated by this function.</p> -spec new_key(environment()) -> integer(). new_key(Env) -> new_key(fun (X) -> X end, Env). %% ===================================================================== %% @spec new_key(Function, Env) -> term() %% %% Function = (integer()) -> term() %% Env = environment() %% %% @doc Returns a term which is not already used as key in the %% environment. The term is generated by applying <code>Function</code> %% to an integer generated as in {@link new_key/1}. %% %% <p>Note that only the generated term is returned; the environment %% itself is not updated by this function.</p> -spec new_key(fun((integer()) -> term()), environment()) -> term(). new_key(F, Env) -> ?measure_calls(), R = start_range(Env), %% io:fwrite("Start range: ~w.\n", [R]), new_key(R, F, Env). new_key(R, F, Env) -> new_key(generate(R, R), R, 0, F, Env). new_key(N, R, T, F, Env) when T < ?MAX_RETRIES -> A = F(N), case is_defined(A, Env) of true -> %% io:fwrite("CLASH: ~w.\n", [A]), new_key(generate(N, R), R, T + 1, F, Env); false -> ?measure_max_key(N), ?measure_retries(T), %% io:fwrite("New: ~w.\n", [N]), A end; new_key(N, R, _T, F, Env) -> %% Too many retries - enlarge the range and start over. ?measure_retries((_T + 1)), R1 = trunc(R * ?ENLARGE_FACTOR), %% io:fwrite("**NEW RANGE**: ~w.\n", [R1]), new_key(generate(N, R1), R1, 0, F, Env). start_range(Env) -> erlang:max(env_size(Env) * ?START_RANGE_FACTOR, ?MINIMUM_RANGE). %% The previous key might or might not be used to compute the next key %% to be tried. It is currently not used. %% %% In order to avoid causing cascading renamings, it is important that %% this function does not generate values in order, but %% (pseudo-)randomly distributed over the range. generate(_N, Range) -> random:uniform(Range). % works well %% ===================================================================== %% @spec new_keys(N, Env) -> [integer()] %% %% N = integer() %% Env = environment() %% %% @doc Returns a list of <code>N</code> distinct integers that are not %% already used as keys in the environment. See {@link new_key/1} for %% details. -spec new_keys(integer(), environment()) -> [integer()]. new_keys(N, Env) when is_integer(N) -> new_keys(N, fun (X) -> X end, Env). %% ===================================================================== %% @spec new_keys(N, Function, Env) -> [term()] %% %% N = integer() %% Function = (integer()) -> term() %% Env = environment() %% %% @doc Returns a list of <code>N</code> distinct terms that are not %% already used as keys in the environment. See {@link new_key/3} for %% details. -spec new_keys(integer(), fun((integer()) -> term()), environment()) -> [term()]. new_keys(N, F, Env) when is_integer(N) -> R = start_range(Env), new_keys(N, [], R, F, Env). new_keys(N, Ks, R, F, Env) when N > 0 -> Key = new_key(R, F, Env), Env1 = bind(Key, true, Env), % dummy binding new_keys(N - 1, [Key | Ks], R, F, Env1); new_keys(0, Ks, _, _, _) -> Ks.