diff options
Diffstat (limited to 'lib/stdlib/src/rand.erl')
-rw-r--r-- | lib/stdlib/src/rand.erl | 1385 |
1 files changed, 1264 insertions, 121 deletions
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl index 93409d95df..3a9a1e007b 100644 --- a/lib/stdlib/src/rand.erl +++ b/lib/stdlib/src/rand.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2015-2016. All Rights Reserved. +%% Copyright Ericsson AB 2015-2018. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -19,7 +19,10 @@ %% %% ===================================================================== %% Multiple PRNG module for Erlang/OTP -%% Copyright (c) 2015 Kenji Rikitake +%% Copyright (c) 2015-2016 Kenji Rikitake +%% +%% exrop (xoroshiro116+) added, statistical distribution +%% improvements and uniform_real added by the Erlang/OTP team 2017 %% ===================================================================== -module(rand). @@ -27,34 +30,200 @@ -export([seed_s/1, seed_s/2, seed/1, seed/2, export_seed/0, export_seed_s/1, uniform/0, uniform/1, uniform_s/1, uniform_s/2, - normal/0, normal_s/1 + uniform_real/0, uniform_real_s/1, + jump/0, jump/1, + normal/0, normal/2, normal_s/1, normal_s/3 ]). --compile({inline, [exs64_next/1, exsplus_next/1, +%% Test, dev and internal +-export([exro928_jump_2pow512/1, exro928_jump_2pow20/1, + exro928_seed/1, exro928_next/1, exro928_next_state/1, + format_jumpconst58/1, seed58/2]). + +%% Debug +-export([make_float/3, float2str/1, bc64/1]). + +-compile({inline, [exs64_next/1, exsplus_next/1, exsss_next/1, exs1024_next/1, exs1024_calc/2, + exro928_next_state/4, + exrop_next/1, exrop_next_s/2, get_52/1, normal_kiwi/1]}). --define(DEFAULT_ALG_HANDLER, exsplus). +-define(DEFAULT_ALG_HANDLER, exsss). -define(SEED_DICT, rand_seed). %% ===================================================================== +%% Bit fiddling macros +%% ===================================================================== + +-define(BIT(Bits), (1 bsl (Bits))). +-define(MASK(Bits), (?BIT(Bits) - 1)). +-define(MASK(Bits, X), ((X) band ?MASK(Bits))). +-define( + BSL(Bits, X, N), + %% N is evaluated 2 times + (?MASK((Bits)-(N), (X)) bsl (N))). +-define( + ROTL(Bits, X, N), + %% Bits is evaluated 2 times + %% X is evaluated 2 times + %% N i evaluated 3 times + (?BSL((Bits), (X), (N)) bor ((X) bsr ((Bits)-(N))))). + +-define( + BC(V, N), + bc((V), ?BIT((N) - 1), N)). + +%%-define(TWO_POW_MINUS53, (math:pow(2, -53))). +-define(TWO_POW_MINUS53, 1.11022302462515657e-16). + +%% ===================================================================== %% Types %% ===================================================================== +-type uint64() :: 0..?MASK(64). +-type uint58() :: 0..?MASK(58). + %% This depends on the algorithm handler function --type alg_seed() :: exs64_state() | exsplus_state() | exs1024_state(). -%% This is the algorithm handler function within this module --type alg_handler() :: #{type := alg(), - max := integer(), - next := fun(), - uniform := fun(), - uniform_n := fun()}. - -%% Internal state --opaque state() :: {alg_handler(), alg_seed()}. --type alg() :: exs64 | exsplus | exs1024. --opaque export_state() :: {alg(), alg_seed()}. --export_type([alg/0, state/0, export_state/0]). +-type alg_state() :: + exsplus_state() | exro928_state() | exrop_state() | exs1024_state() | + exs64_state() | term(). + +%% This is the algorithm handling definition within this module, +%% and the type to use for plugins. +%% +%% The 'type' field must be recognized by the module that implements +%% the algorithm, to interpret an exported state. +%% +%% The 'bits' field indicates how many bits the integer +%% returned from 'next' has got, i.e 'next' shall return +%% an random integer in the range 0..(2^Bits - 1). +%% At least 55 bits is required for the floating point +%% producing fallbacks, but 56 bits would be more future proof. +%% +%% The fields 'next', 'uniform' and 'uniform_n' +%% implement the algorithm. If 'uniform' or 'uniform_n' +%% is not present there is a fallback using 'next' and either +%% 'bits' or the deprecated 'max'. The 'next' function +%% must generate a word with at least 56 good random bits. +%% +%% The 'weak_low_bits' field indicate how many bits are of +%% lesser quality and they will not be used by the floating point +%% producing functions, nor by the range producing functions +%% when more bits are needed, to avoid weak bits in the middle +%% of the generated bits. The lowest bits from the range +%% functions still have the generator's quality. +%% +-type alg_handler() :: + #{type := alg(), + bits => non_neg_integer(), + weak_low_bits => non_neg_integer(), + max => non_neg_integer(), % Deprecated + next := + fun ((alg_state()) -> {non_neg_integer(), alg_state()}), + uniform => + fun ((state()) -> {float(), state()}), + uniform_n => + fun ((pos_integer(), state()) -> {pos_integer(), state()}), + jump => + fun ((state()) -> state())}. + +%% Algorithm state +-type state() :: {alg_handler(), alg_state()}. +-type builtin_alg() :: + exsss | exro928ss | exrop | exs1024s | exsp | exs64 | exsplus | exs1024. +-type alg() :: builtin_alg() | atom(). +-type export_state() :: {alg(), alg_state()}. +-type seed() :: [integer()] | integer() | {integer(), integer(), integer()}. +-export_type( + [builtin_alg/0, alg/0, alg_handler/0, alg_state/0, + state/0, export_state/0, seed/0]). +-export_type( + [exsplus_state/0, exro928_state/0, exrop_state/0, exs1024_state/0, + exs64_state/0]). + +%% ===================================================================== +%% Range macro and helper +%% ===================================================================== + +-define( + uniform_range(Range, Alg, R, V, MaxMinusRange, I), + if + 0 =< (MaxMinusRange) -> + if + %% Really work saving in odd cases; + %% large ranges in particular + (V) < (Range) -> + {(V) + 1, {(Alg), (R)}}; + true -> + (I) = (V) rem (Range), + if + (V) - (I) =< (MaxMinusRange) -> + {(I) + 1, {(Alg), (R)}}; + true -> + %% V in the truncated top range + %% - try again + ?FUNCTION_NAME((Range), {(Alg), (R)}) + end + end; + true -> + uniform_range((Range), (Alg), (R), (V)) + end). + +%% For ranges larger than the algorithm bit size +uniform_range(Range, #{next:=Next, bits:=Bits} = Alg, R, V) -> + WeakLowBits = maps:get(weak_low_bits, Alg, 0), + %% Maybe waste the lowest bit(s) when shifting in new bits + Shift = Bits - WeakLowBits, + ShiftMask = bnot ?MASK(WeakLowBits), + RangeMinus1 = Range - 1, + if + (Range band RangeMinus1) =:= 0 -> % Power of 2 + %% Generate at least the number of bits for the range + {V1, R1, _} = + uniform_range( + Range bsr Bits, Next, R, V, ShiftMask, Shift, Bits), + {(V1 band RangeMinus1) + 1, {Alg, R1}}; + true -> + %% Generate a value with at least two bits more than the range + %% and try that for a fit, otherwise recurse + %% + %% Just one bit more should ensure that the generated + %% number range is at least twice the size of the requested + %% range, which would make the probability to draw a good + %% number better than 0.5. And repeating that until + %% success i guess would take 2 times statistically amortized. + %% But since the probability for fairly many attemtpts + %% is not that low, use two bits more than the range which + %% should make the probability to draw a bad number under 0.25, + %% which decreases the bad case probability a lot. + {V1, R1, B} = + uniform_range( + Range bsr (Bits - 2), Next, R, V, ShiftMask, Shift, Bits), + I = V1 rem Range, + if + (V1 - I) =< (1 bsl B) - Range -> + {I + 1, {Alg, R1}}; + true -> + %% V1 drawn from the truncated top range + %% - try again + {V2, R2} = Next(R1), + uniform_range(Range, Alg, R2, V2) + end + end. +%% +uniform_range(Range, Next, R, V, ShiftMask, Shift, B) -> + if + Range =< 1 -> + {V, R, B}; + true -> + {V1, R1} = Next(R), + %% Waste the lowest bit(s) when shifting in new bits + uniform_range( + Range bsr Shift, Next, R1, + ((V band ShiftMask) bsl Shift) bor V1, + ShiftMask, Shift, B + Shift) + end. %% ===================================================================== %% API @@ -68,87 +237,316 @@ export_seed() -> _ -> undefined end. --spec export_seed_s(state()) -> export_state(). -export_seed_s({#{type:=Alg}, Seed}) -> {Alg, Seed}. +-spec export_seed_s(State :: state()) -> export_state(). +export_seed_s({#{type:=Alg}, AlgState}) -> {Alg, AlgState}. %% seed(Alg) seeds RNG with runtime dependent values %% and return the NEW state -%% seed({Alg,Seed}) setup RNG with a previously exported seed +%% seed({Alg,AlgState}) setup RNG with a previously exported seed %% and return the NEW state --spec seed(AlgOrExpState::alg() | export_state()) -> state(). +-spec seed( + AlgOrStateOrExpState :: builtin_alg() | state() | export_state()) -> + state(). seed(Alg) -> - R = seed_s(Alg), - _ = seed_put(R), - R. + seed_put(seed_s(Alg)). --spec seed_s(AlgOrExpState::alg() | export_state()) -> state(). -seed_s(Alg) when is_atom(Alg) -> +-spec seed_s( + AlgOrStateOrExpState :: builtin_alg() | state() | export_state()) -> + state(). +seed_s({AlgHandler, _AlgState} = State) when is_map(AlgHandler) -> + State; +seed_s({Alg, AlgState}) when is_atom(Alg) -> + {AlgHandler,_SeedFun} = mk_alg(Alg), + {AlgHandler,AlgState}; +seed_s(Alg) -> seed_s(Alg, {erlang:phash2([{node(),self()}]), erlang:system_time(), - erlang:unique_integer()}); -seed_s({Alg0, Seed}) -> - {Alg,_SeedFun} = mk_alg(Alg0), - {Alg, Seed}. + erlang:unique_integer()}). %% seed/2: seeds RNG with the algorithm and given values %% and returns the NEW state. --spec seed(Alg :: alg(), {integer(), integer(), integer()}) -> state(). -seed(Alg0, S0) -> - State = seed_s(Alg0, S0), - _ = seed_put(State), - State. +-spec seed(Alg :: builtin_alg(), Seed :: seed()) -> state(). +seed(Alg, Seed) -> + seed_put(seed_s(Alg, Seed)). --spec seed_s(Alg :: alg(), {integer(), integer(), integer()}) -> state(). -seed_s(Alg0, S0 = {_, _, _}) -> - {Alg, Seed} = mk_alg(Alg0), - AS = Seed(S0), - {Alg, AS}. +-spec seed_s(Alg :: builtin_alg(), Seed :: seed()) -> state(). +seed_s(Alg, Seed) -> + {AlgHandler,SeedFun} = mk_alg(Alg), + AlgState = SeedFun(Seed), + {AlgHandler,AlgState}. %%% uniform/0, uniform/1, uniform_s/1, uniform_s/2 are all %%% uniformly distributed random numbers. -%% uniform/0: returns a random float X where 0.0 < X < 1.0, +%% uniform/0: returns a random float X where 0.0 =< X < 1.0, %% updating the state in the process dictionary. --spec uniform() -> X::float(). +-spec uniform() -> X :: float(). uniform() -> - {X, Seed} = uniform_s(seed_get()), - _ = seed_put(Seed), + {X, State} = uniform_s(seed_get()), + _ = seed_put(State), X. %% uniform/1: given an integer N >= 1, %% uniform/1 returns a random integer X where 1 =< X =< N, %% updating the state in the process dictionary. --spec uniform(N :: pos_integer()) -> X::pos_integer(). +-spec uniform(N :: pos_integer()) -> X :: pos_integer(). uniform(N) -> - {X, Seed} = uniform_s(N, seed_get()), - _ = seed_put(Seed), + {X, State} = uniform_s(N, seed_get()), + _ = seed_put(State), X. %% uniform_s/1: given a state, uniform_s/1 -%% returns a random float X where 0.0 < X < 1.0, +%% returns a random float X where 0.0 =< X < 1.0, %% and a new state. --spec uniform_s(state()) -> {X::float(), NewS :: state()}. +-spec uniform_s(State :: state()) -> {X :: float(), NewState :: state()}. uniform_s(State = {#{uniform:=Uniform}, _}) -> - Uniform(State). + Uniform(State); +uniform_s({#{bits:=Bits, next:=Next} = Alg, R0}) -> + {V, R1} = Next(R0), + %% Produce floats on the form N * 2^(-53) + {(V bsr (Bits - 53)) * ?TWO_POW_MINUS53, {Alg, R1}}; +uniform_s({#{max:=Max, next:=Next} = Alg, R0}) -> + {V, R1} = Next(R0), + %% Old algorithm with non-uniform density + {V / (Max + 1), {Alg, R1}}. + %% uniform_s/2: given an integer N >= 1 and a state, uniform_s/2 %% uniform_s/2 returns a random integer X where 1 =< X =< N, %% and a new state. --spec uniform_s(N::pos_integer(), state()) -> {X::pos_integer(), NewS::state()}. -uniform_s(N, State = {#{uniform_n:=Uniform, max:=Max}, _}) - when 0 < N, N =< Max -> - Uniform(N, State); -uniform_s(N, State0 = {#{uniform:=Uniform}, _}) - when is_integer(N), 0 < N -> - {F, State} = Uniform(State0), - {trunc(F * N) + 1, State}. +-spec uniform_s(N :: pos_integer(), State :: state()) -> + {X :: pos_integer(), NewState :: state()}. +uniform_s(N, State = {#{uniform_n:=UniformN}, _}) + when is_integer(N), 1 =< N -> + UniformN(N, State); +uniform_s(N, {#{bits:=Bits, next:=Next} = Alg, R0}) + when is_integer(N), 1 =< N -> + {V, R1} = Next(R0), + MaxMinusN = ?BIT(Bits) - N, + ?uniform_range(N, Alg, R1, V, MaxMinusN, I); +uniform_s(N, {#{max:=Max, next:=Next} = Alg, R0}) + when is_integer(N), 1 =< N -> + %% Old algorithm with skewed probability + %% and gap in ranges > Max + {V, R1} = Next(R0), + if + N =< Max -> + {(V rem N) + 1, {Alg, R1}}; + true -> + F = V / (Max + 1), + {trunc(F * N) + 1, {Alg, R1}} + end. + +%% uniform_real/0: returns a random float X where 0.0 < X =< 1.0, +%% updating the state in the process dictionary. + +-spec uniform_real() -> X :: float(). +uniform_real() -> + {X, Seed} = uniform_real_s(seed_get()), + _ = seed_put(Seed), + X. + +%% uniform_real_s/1: given a state, uniform_s/1 +%% returns a random float X where 0.0 < X =< 1.0, +%% and a new state. +%% +%% This function does not use the same form of uniformity +%% as the uniform_s/1 function. +%% +%% Instead, this function does not generate numbers with equal +%% distance in the interval, but rather tries to keep all mantissa +%% bits random also for small numbers, meaning that the distance +%% between possible numbers decreases when the numbers +%% approaches 0.0, as does the possibility for a particular +%% number. Hence uniformity is preserved. +%% +%% To generate 56 bits at the time instead of 53 is actually +%% a speed optimization since the probability to have to +%% generate a second word decreases by 1/2 for every extra bit. +%% +%% This function generates normalized numbers, so the smallest number +%% that can be generated is 2^-1022 with the distance 2^-1074 +%% to the next to smallest number, compared to 2^-53 for uniform_s/1. +%% +%% This concept of uniformity should work better for applications +%% where you need to calculate 1.0/X or math:log(X) since those +%% operations benefits from larger precision approaching 0.0, +%% and that this function does not return 0.0 nor denormalized +%% numbers very close to 0.0. The log() operation in The Box-Muller +%% transformation for normal distribution is an example of this. +%% +%%-define(TWO_POW_MINUS55, (math:pow(2, -55))). +%%-define(TWO_POW_MINUS110, (math:pow(2, -110))). +%%-define(TWO_POW_MINUS55, 2.7755575615628914e-17). +%%-define(TWO_POW_MINUS110, 7.7037197775489436e-34). +%% +-spec uniform_real_s(State :: state()) -> {X :: float(), NewState :: state()}. +uniform_real_s({#{bits:=Bits, next:=Next} = Alg, R0}) -> + %% Generate a 56 bit number without using the weak low bits. + %% + %% Be sure to use only 53 bits when multiplying with + %% math:pow(2.0, -N) to avoid rounding which would make + %% "even" floats more probable than "odd". + %% + {V1, R1} = Next(R0), + M1 = V1 bsr (Bits - 56), + if + ?BIT(55) =< M1 -> + %% We have 56 bits - waste 3 + {(M1 bsr 3) * math:pow(2.0, -53), {Alg, R1}}; + ?BIT(54) =< M1 -> + %% We have 55 bits - waste 2 + {(M1 bsr 2) * math:pow(2.0, -54), {Alg, R1}}; + ?BIT(53) =< M1 -> + %% We have 54 bits - waste 1 + {(M1 bsr 1) * math:pow(2.0, -55), {Alg, R1}}; + ?BIT(52) =< M1 -> + %% We have 53 bits - use all + {M1 * math:pow(2.0, -56), {Alg, R1}}; + true -> + %% Need more bits + {V2, R2} = Next(R1), + uniform_real_s(Alg, Next, M1, -56, R2, V2, Bits) + end; +uniform_real_s({#{max:=_, next:=Next} = Alg, R0}) -> + %% Generate a 56 bit number. + %% Ignore the weak low bits for these old algorithms, + %% just produce something reasonable. + %% + %% Be sure to use only 53 bits when multiplying with + %% math:pow(2.0, -N) to avoid rounding which would make + %% "even" floats more probable than "odd". + %% + {V1, R1} = Next(R0), + M1 = ?MASK(56, V1), + if + ?BIT(55) =< M1 -> + %% We have 56 bits - waste 3 + {(M1 bsr 3) * math:pow(2.0, -53), {Alg, R1}}; + ?BIT(54) =< M1 -> + %% We have 55 bits - waste 2 + {(M1 bsr 2) * math:pow(2.0, -54), {Alg, R1}}; + ?BIT(53) =< M1 -> + %% We have 54 bits - waste 1 + {(M1 bsr 1) * math:pow(2.0, -55), {Alg, R1}}; + ?BIT(52) =< M1 -> + %% We have 53 bits - use all + {M1 * math:pow(2.0, -56), {Alg, R1}}; + true -> + %% Need more bits + {V2, R2} = Next(R1), + uniform_real_s(Alg, Next, M1, -56, R2, V2, 56) + end. + +uniform_real_s(Alg, _Next, M0, -1064, R1, V1, Bits) -> % 19*56 + %% This is a very theoretical bottom case. + %% The odds of getting here is about 2^-1008, + %% through a white box test case, or thanks to + %% a malfunctioning PRNG producing 18 56-bit zeros in a row. + %% + %% Fill up to 53 bits, we have at most 52 + B0 = (53 - ?BC(M0, 52)), % Missing bits + {(((M0 bsl B0) bor (V1 bsr (Bits - B0))) * math:pow(2.0, -1064 - B0)), + {Alg, R1}}; +uniform_real_s(Alg, Next, M0, BitNo, R1, V1, Bits) -> + if + %% Optimize the most probable. + %% Fill up to 53 bits. + ?BIT(51) =< M0 -> + %% We have 52 bits in M0 - need 1 + {(((M0 bsl 1) bor (V1 bsr (Bits - 1))) + * math:pow(2.0, BitNo - 1)), + {Alg, R1}}; + ?BIT(50) =< M0 -> + %% We have 51 bits in M0 - need 2 + {(((M0 bsl 2) bor (V1 bsr (Bits - 2))) + * math:pow(2.0, BitNo - 2)), + {Alg, R1}}; + ?BIT(49) =< M0 -> + %% We have 50 bits in M0 - need 3 + {(((M0 bsl 3) bor (V1 bsr (Bits - 3))) + * math:pow(2.0, BitNo - 3)), + {Alg, R1}}; + M0 == 0 -> + M1 = V1 bsr (Bits - 56), + if + ?BIT(55) =< M1 -> + %% We have 56 bits - waste 3 + {(M1 bsr 3) * math:pow(2.0, BitNo - 53), {Alg, R1}}; + ?BIT(54) =< M1 -> + %% We have 55 bits - waste 2 + {(M1 bsr 2) * math:pow(2.0, BitNo - 54), {Alg, R1}}; + ?BIT(53) =< M1 -> + %% We have 54 bits - waste 1 + {(M1 bsr 1) * math:pow(2.0, BitNo - 55), {Alg, R1}}; + ?BIT(52) =< M1 -> + %% We have 53 bits - use all + {M1 * math:pow(2.0, BitNo - 56), {Alg, R1}}; + BitNo =:= -1008 -> + %% Endgame + %% For the last round we cannot have 14 zeros or more + %% at the top of M1 because then we will underflow, + %% so we need at least 43 bits + if + ?BIT(42) =< M1 -> + %% We have 43 bits - get the last bits + uniform_real_s(Alg, Next, M1, BitNo - 56, R1); + true -> + %% Would underflow 2^-1022 - start all over + %% + %% We could just crash here since the odds for + %% the PRNG being broken is much higher than + %% for a good PRNG generating this many zeros + %% in a row. Maybe we should write an error + %% report or call this a system limit...? + uniform_real_s({Alg, R1}) + end; + true -> + %% Need more bits + uniform_real_s(Alg, Next, M1, BitNo - 56, R1) + end; + true -> + %% Fill up to 53 bits + B0 = 53 - ?BC(M0, 49), % Number of bits we need to append + {(((M0 bsl B0) bor (V1 bsr (Bits - B0))) + * math:pow(2.0, BitNo - B0)), + {Alg, R1}} + end. +%% +uniform_real_s(#{bits:=Bits} = Alg, Next, M0, BitNo, R0) -> + {V1, R1} = Next(R0), + uniform_real_s(Alg, Next, M0, BitNo, R1, V1, Bits); +uniform_real_s(#{max:=_} = Alg, Next, M0, BitNo, R0) -> + {V1, R1} = Next(R0), + uniform_real_s(Alg, Next, M0, BitNo, R1, ?MASK(56, V1), 56). + +%% jump/1: given a state, jump/1 +%% returns a new state which is equivalent to that +%% after a large number of call defined for each algorithm. +%% The large number is algorithm dependent. + +-spec jump(state()) -> NewState :: state(). +jump(State = {#{jump:=Jump}, _}) -> + Jump(State); +jump({#{}, _}) -> + erlang:error(not_implemented). + + +%% jump/0: read the internal state and +%% apply the jump function for the state as in jump/1 +%% and write back the new value to the internal state, +%% then returns the new value. + +-spec jump() -> NewState :: state(). +jump() -> + seed_put(jump(seed_get())). %% normal/0: returns a random float with standard normal distribution %% updating the state in the process dictionary. @@ -159,14 +557,21 @@ normal() -> _ = seed_put(Seed), X. +%% normal/2: returns a random float with N(μ, σ²) normal distribution +%% updating the state in the process dictionary. + +-spec normal(Mean :: number(), Variance :: number()) -> float(). +normal(Mean, Variance) -> + Mean + (math:sqrt(Variance) * normal()). + %% normal_s/1: returns a random float with standard normal distribution %% The Ziggurat Method for generating random variables - Marsaglia and Tsang %% Paper and reference code: http://www.jstatsoft.org/v05/i08/ --spec normal_s(state()) -> {float(), NewS :: state()}. +-spec normal_s(State :: state()) -> {float(), NewState :: state()}. normal_s(State0) -> {Sign, R, State} = get_52(State0), - Idx = R band 16#FF, + Idx = ?MASK(8, R), Idx1 = Idx+1, {Ki, Wi} = normal_kiwi(Idx1), X = R * Wi, @@ -179,22 +584,20 @@ normal_s(State0) -> false -> normal_s(Idx, Sign, -X, State) end. -%% ===================================================================== -%% Internal functions +%% normal_s/3: returns a random float with normal N(μ, σ²) distribution --define(UINT21MASK, 16#00000000001fffff). --define(UINT32MASK, 16#00000000ffffffff). --define(UINT33MASK, 16#00000001ffffffff). --define(UINT39MASK, 16#0000007fffffffff). --define(UINT58MASK, 16#03ffffffffffffff). --define(UINT64MASK, 16#ffffffffffffffff). +-spec normal_s(Mean :: number(), Variance :: number(), state()) -> {float(), NewS :: state()}. +normal_s(Mean, Variance, State0) when Variance > 0 -> + {X, State} = normal_s(State0), + {Mean + (math:sqrt(Variance) * X), State}. --type uint64() :: 0..16#ffffffffffffffff. --type uint58() :: 0..16#03ffffffffffffff. +%% ===================================================================== +%% Internal functions --spec seed_put(state()) -> undefined | state(). +-spec seed_put(state()) -> state(). seed_put(Seed) -> - put(?SEED_DICT, Seed). + put(?SEED_DICT, Seed), + Seed. seed_get() -> case get(?SEED_DICT) of @@ -204,17 +607,41 @@ seed_get() -> %% Setup alg record mk_alg(exs64) -> - {#{type=>exs64, max=>?UINT64MASK, next=>fun exs64_next/1, - uniform=>fun exs64_uniform/1, uniform_n=>fun exs64_uniform/2}, + {#{type=>exs64, max=>?MASK(64), next=>fun exs64_next/1}, fun exs64_seed/1}; mk_alg(exsplus) -> - {#{type=>exsplus, max=>?UINT58MASK, next=>fun exsplus_next/1, - uniform=>fun exsplus_uniform/1, uniform_n=>fun exsplus_uniform/2}, + {#{type=>exsplus, max=>?MASK(58), next=>fun exsplus_next/1, + jump=>fun exsplus_jump/1}, + fun exsplus_seed/1}; +mk_alg(exsp) -> + {#{type=>exsp, bits=>58, weak_low_bits=>1, next=>fun exsplus_next/1, + uniform=>fun exsp_uniform/1, uniform_n=>fun exsp_uniform/2, + jump=>fun exsplus_jump/1}, fun exsplus_seed/1}; +mk_alg(exsss) -> + {#{type=>exsss, bits=>58, next=>fun exsss_next/1, + uniform=>fun exsss_uniform/1, uniform_n=>fun exsss_uniform/2, + jump=>fun exsplus_jump/1}, + fun exsss_seed/1}; mk_alg(exs1024) -> - {#{type=>exs1024, max=>?UINT64MASK, next=>fun exs1024_next/1, - uniform=>fun exs1024_uniform/1, uniform_n=>fun exs1024_uniform/2}, - fun exs1024_seed/1}. + {#{type=>exs1024, max=>?MASK(64), next=>fun exs1024_next/1, + jump=>fun exs1024_jump/1}, + fun exs1024_seed/1}; +mk_alg(exs1024s) -> + {#{type=>exs1024s, bits=>64, weak_low_bits=>3, next=>fun exs1024_next/1, + jump=>fun exs1024_jump/1}, + fun exs1024_seed/1}; +mk_alg(exrop) -> + {#{type=>exrop, bits=>58, weak_low_bits=>1, next=>fun exrop_next/1, + uniform=>fun exrop_uniform/1, uniform_n=>fun exrop_uniform/2, + jump=>fun exrop_jump/1}, + fun exrop_seed/1}; +mk_alg(exro928ss) -> + {#{type=>exro928ss, bits=>58, next=>fun exro928ss_next/1, + uniform=>fun exro928ss_uniform/1, + uniform_n=>fun exro928ss_uniform/2, + jump=>fun exro928_jump/1}, + fun exro928_seed/1}. %% ===================================================================== %% exs64 PRNG: Xorshift64* @@ -222,29 +649,29 @@ mk_alg(exs1024) -> %% Reference URL: http://xorshift.di.unimi.it/ %% ===================================================================== --type exs64_state() :: uint64(). +-opaque exs64_state() :: uint64(). +exs64_seed(L) when is_list(L) -> + [R] = seed64_nz(1, L), + R; +exs64_seed(A) when is_integer(A) -> + [R] = seed64(1, ?MASK(64, A)), + R; +%% +%% Traditional integer triplet seed exs64_seed({A1, A2, A3}) -> - {V1, _} = exs64_next(((A1 band ?UINT32MASK) * 4294967197 + 1)), - {V2, _} = exs64_next(((A2 band ?UINT32MASK) * 4294967231 + 1)), - {V3, _} = exs64_next(((A3 band ?UINT32MASK) * 4294967279 + 1)), - ((V1 * V2 * V3) rem (?UINT64MASK - 1)) + 1. + {V1, _} = exs64_next((?MASK(32, A1) * 4294967197 + 1)), + {V2, _} = exs64_next((?MASK(32, A2) * 4294967231 + 1)), + {V3, _} = exs64_next((?MASK(32, A3) * 4294967279 + 1)), + ((V1 * V2 * V3) rem (?MASK(64) - 1)) + 1. %% Advance xorshift64* state for one step and generate 64bit unsigned integer -spec exs64_next(exs64_state()) -> {uint64(), exs64_state()}. exs64_next(R) -> R1 = R bxor (R bsr 12), - R2 = R1 bxor ((R1 band ?UINT39MASK) bsl 25), + R2 = R1 bxor ?BSL(64, R1, 25), R3 = R2 bxor (R2 bsr 27), - {(R3 * 2685821657736338717) band ?UINT64MASK, R3}. - -exs64_uniform({Alg, R0}) -> - {V, R1} = exs64_next(R0), - {V / 18446744073709551616, {Alg, R1}}. - -exs64_uniform(Max, {Alg, R}) -> - {V, R1} = exs64_next(R), - {(V rem Max) + 1, {Alg, R1}}. + {?MASK(64, R3 * 2685821657736338717), R3}. %% ===================================================================== %% exsplus PRNG: Xorshift116+ @@ -253,35 +680,191 @@ exs64_uniform(Max, {Alg, R}) -> %% 58 bits fits into an immediate on 64bits erlang and is thus much faster. %% Modification of the original Xorshift128+ algorithm to 116 %% by Sebastiano Vigna, a lot of thanks for his help and work. +%% +%% Reference C code for Xorshift116+ and Xorshift116** +%% +%% #include <stdint.h> +%% +%% #define MASK(b, v) (((UINT64_C(1) << (b)) - 1) & (v)) +%% #define BSL(b, v, n) (MASK((b)-(n), (v)) << (n)) +%% #define ROTL(b, v, n) (BSL((b), (v), (n)) | ((v) >> ((b)-(n)))) +%% +%% uint64_t s[2]; +%% +%% uint64_t next(void) { +%% uint64_t s1 = s[0]; +%% const uint64_t s0 = s[1]; +%% +%% s1 ^= BSL(58, s1, 24); // a +%% s1 ^= s0 ^ (s1 >> 11) ^ (s0 >> 41); // b, c +%% s[0] = s0; +%% s[1] = s1; +%% +%% const uint64_t result_plus = MASK(58, s0 + s1); +%% uint64_t result_starstar = s0; +%% result_starstar = MASK(58, result_starstar * 5); +%% result_starstar = ROTL(58, result_starstar, 7); +%% result_starstar = MASK(58, result_starstar * 9); +%% +%% return result_plus; +%% return result_starstar; +%% } +%% %% ===================================================================== --type exsplus_state() :: nonempty_improper_list(uint58(), uint58()). +-opaque exsplus_state() :: nonempty_improper_list(uint58(), uint58()). -dialyzer({no_improper_lists, exsplus_seed/1}). +exsplus_seed(L) when is_list(L) -> + [S0,S1] = seed58_nz(2, L), + [S0|S1]; +exsplus_seed(X) when is_integer(X) -> + [S0,S1] = seed58(2, ?MASK(64, X)), + [S0|S1]; +%% +%% Traditional integer triplet seed exsplus_seed({A1, A2, A3}) -> - {_, R1} = exsplus_next([(((A1 * 4294967197) + 1) band ?UINT58MASK)| - (((A2 * 4294967231) + 1) band ?UINT58MASK)]), - {_, R2} = exsplus_next([(((A3 * 4294967279) + 1) band ?UINT58MASK)| - tl(R1)]), + {_, R1} = exsplus_next( + [?MASK(58, (A1 * 4294967197) + 1)| + ?MASK(58, (A2 * 4294967231) + 1)]), + {_, R2} = exsplus_next( + [?MASK(58, (A3 * 4294967279) + 1)| + tl(R1)]), R2. +-dialyzer({no_improper_lists, exsss_seed/1}). + +exsss_seed(L) when is_list(L) -> + [S0,S1] = seed58_nz(2, L), + [S0|S1]; +exsss_seed(X) when is_integer(X) -> + [S0,S1] = seed58(2, ?MASK(64, X)), + [S0|S1]; +%% +%% Seed from traditional integer triple - mix into splitmix +exsss_seed({A1, A2, A3}) -> + {_, X0} = seed58(?MASK(64, A1)), + {S0, X1} = seed58(?MASK(64, A2) bxor X0), + {S1, _} = seed58(?MASK(64, A3) bxor X1), + [S0|S1]. + +%% Advance Xorshift116 state one step +-define( + exs_next(S0, S1, S1_b), + begin + S1_b = S1 bxor ?BSL(58, S1, 24), + S1_b bxor S0 bxor (S1_b bsr 11) bxor (S0 bsr 41) + end). + +-define( + scramble_starstar(S, V_a, V_b), + begin + %% The multiply by add shifted trick avoids creating bignums + %% which improves performance significantly + %% + V_a = ?MASK(58, S + ?BSL(58, S, 2)), % V_a = S * 5 + V_b = ?ROTL(58, V_a, 7), + ?MASK(58, V_b + ?BSL(58, V_b, 3)) % V_b * 9 + end). + -dialyzer({no_improper_lists, exsplus_next/1}). -%% Advance xorshift116+ state for one step and generate 58bit unsigned integer +%% Advance state and generate 58bit unsigned integer -spec exsplus_next(exsplus_state()) -> {uint58(), exsplus_state()}. exsplus_next([S1|S0]) -> %% Note: members s0 and s1 are swapped here - S11 = (S1 bxor (S1 bsl 24)) band ?UINT58MASK, - S12 = S11 bxor S0 bxor (S11 bsr 11) bxor (S0 bsr 41), - {(S0 + S12) band ?UINT58MASK, [S0|S12]}. + NewS1 = ?exs_next(S0, S1, S1_1), + {?MASK(58, S0 + NewS1), [S0|NewS1]}. +%% %% Note: members s0 and s1 are swapped here +%% S11 = S1 bxor ?BSL(58, S1, 24), +%% S12 = S11 bxor S0 bxor (S11 bsr 11) bxor (S0 bsr 41), +%% {?MASK(58, S0 + S12), [S0|S12]}. + +-dialyzer({no_improper_lists, exsss_next/1}). + +-spec exsss_next(exsplus_state()) -> {uint58(), exsplus_state()}. +exsss_next([S1|S0]) -> + %% Note: members s0 and s1 are swapped here + NewS1 = ?exs_next(S0, S1, S1_1), + {?scramble_starstar(S0, V_0, V_1), [S0|NewS1]}. +%% {?MASK(58, S0 + NewS1), [S0|NewS1]}. -exsplus_uniform({Alg, R0}) -> +exsp_uniform({Alg, R0}) -> {I, R1} = exsplus_next(R0), - {I / (?UINT58MASK+1), {Alg, R1}}. + %% Waste the lowest bit since it is of lower + %% randomness quality than the others + {(I bsr (58-53)) * ?TWO_POW_MINUS53, {Alg, R1}}. + +exsss_uniform({Alg, R0}) -> + {I, R1} = exsss_next(R0), + {(I bsr (58-53)) * ?TWO_POW_MINUS53, {Alg, R1}}. -exsplus_uniform(Max, {Alg, R}) -> +exsp_uniform(Range, {Alg, R}) -> {V, R1} = exsplus_next(R), - {(V rem Max) + 1, {Alg, R1}}. + MaxMinusRange = ?BIT(58) - Range, + ?uniform_range(Range, Alg, R1, V, MaxMinusRange, I). + +exsss_uniform(Range, {Alg, R}) -> + {V, R1} = exsss_next(R), + MaxMinusRange = ?BIT(58) - Range, + ?uniform_range(Range, Alg, R1, V, MaxMinusRange, I). + + +%% This is the jump function for the exs* generators, +%% i.e the Xorshift116 generators, equivalent +%% to 2^64 calls to next/1; it can be used to generate 2^52 +%% non-overlapping subsequences for parallel computations. +%% Note: the jump function takes 116 times of the execution time of +%% next/1. +%% +%% #include <stdint.h> +%% +%% void jump(void) { +%% static const uint64_t JUMP[] = { 0x02f8ea6bc32c797, +%% 0x345d2a0f85f788c }; +%% int i, b; +%% uint64_t s0 = 0; +%% uint64_t s1 = 0; +%% for(i = 0; i < sizeof JUMP / sizeof *JUMP; i++) +%% for(b = 0; b < 58; b++) { +%% if (JUMP[i] & 1ULL << b) { +%% s0 ^= s[0]; +%% s1 ^= s[1]; +%% } +%% next(); +%% } +%% s[0] = s0; +%% s[1] = s1; +%% } +%% +%% -define(JUMPCONST, 16#000d174a83e17de2302f8ea6bc32c797). +%% split into 58-bit chunks +%% and two iterative executions + +-define(JUMPCONST1, 16#02f8ea6bc32c797). +-define(JUMPCONST2, 16#345d2a0f85f788c). +-define(JUMPELEMLEN, 58). + +-dialyzer({no_improper_lists, exsplus_jump/1}). +-spec exsplus_jump({alg_handler(), exsplus_state()}) -> + {alg_handler(), exsplus_state()}. +exsplus_jump({Alg, S}) -> + {S1, AS1} = exsplus_jump(S, [0|0], ?JUMPCONST1, ?JUMPELEMLEN), + {_, AS2} = exsplus_jump(S1, AS1, ?JUMPCONST2, ?JUMPELEMLEN), + {Alg, AS2}. + +-dialyzer({no_improper_lists, exsplus_jump/4}). +exsplus_jump(S, AS, _, 0) -> + {S, AS}; +exsplus_jump(S, [AS0|AS1], J, N) -> + {_, NS} = exsplus_next(S), + case ?MASK(1, J) of + 1 -> + [S0|S1] = S, + exsplus_jump(NS, [(AS0 bxor S0)|(AS1 bxor S1)], J bsr 1, N-1); + 0 -> + exsplus_jump(NS, [AS0|AS1], J bsr 1, N-1) + end. %% ===================================================================== %% exs1024 PRNG: Xorshift1024* @@ -289,12 +872,18 @@ exsplus_uniform(Max, {Alg, R}) -> %% Reference URL: http://xorshift.di.unimi.it/ %% ===================================================================== --type exs1024_state() :: {list(uint64()), list(uint64())}. +-opaque exs1024_state() :: {list(uint64()), list(uint64())}. +exs1024_seed(L) when is_list(L) -> + {seed64_nz(16, L), []}; +exs1024_seed(X) when is_integer(X) -> + {seed64(16, ?MASK(64, X)), []}; +%% +%% Seed from traditional triple, remain backwards compatible exs1024_seed({A1, A2, A3}) -> - B1 = (((A1 band ?UINT21MASK) + 1) * 2097131) band ?UINT21MASK, - B2 = (((A2 band ?UINT21MASK) + 1) * 2097133) band ?UINT21MASK, - B3 = (((A3 band ?UINT21MASK) + 1) * 2097143) band ?UINT21MASK, + B1 = ?MASK(21, (?MASK(21, A1) + 1) * 2097131), + B2 = ?MASK(21, (?MASK(21, A2) + 1) * 2097133), + B3 = ?MASK(21, (?MASK(21, A3) + 1) * 2097143), {exs1024_gen1024((B1 bsl 43) bor (B2 bsl 22) bor (B3 bsl 1) bor 1), []}. @@ -317,11 +906,11 @@ exs1024_gen1024(N, R, L) -> %% X: random number output -spec exs1024_calc(uint64(), uint64()) -> {uint64(), uint64()}. exs1024_calc(S0, S1) -> - S11 = S1 bxor ((S1 band ?UINT33MASK) bsl 31), + S11 = S1 bxor ?BSL(64, S1, 31), S12 = S11 bxor (S11 bsr 11), S01 = S0 bxor (S0 bsr 30), NS1 = S01 bxor S12, - {(NS1 * 1181783497276652981) band ?UINT64MASK, NS1}. + {?MASK(64, NS1 * 1181783497276652981), NS1}. %% Advance xorshift1024* state for one step and generate 64bit unsigned integer -spec exs1024_next(exs1024_state()) -> {uint64(), exs1024_state()}. @@ -332,13 +921,524 @@ exs1024_next({[H], RL}) -> NL = [H|lists:reverse(RL)], exs1024_next({NL, []}). -exs1024_uniform({Alg, R0}) -> - {V, R1} = exs1024_next(R0), - {V / 18446744073709551616, {Alg, R1}}. -exs1024_uniform(Max, {Alg, R}) -> - {V, R1} = exs1024_next(R), - {(V rem Max) + 1, {Alg, R1}}. +%% This is the jump function for the exs1024 generator, equivalent +%% to 2^512 calls to next(); it can be used to generate 2^512 +%% non-overlapping subsequences for parallel computations. +%% Note: the jump function takes ~2000 times of the execution time of +%% next/1. + +%% Jump constant here split into 58 bits for speed +-define(JUMPCONSTHEAD, 16#00242f96eca9c41d). +-define(JUMPCONSTTAIL, + [16#0196e1ddbe5a1561, + 16#0239f070b5837a3c, + 16#03f393cc68796cd2, + 16#0248316f404489af, + 16#039a30088bffbac2, + 16#02fea70dc2d9891f, + 16#032ae0d9644caec4, + 16#0313aac17d8efa43, + 16#02f132e055642626, + 16#01ee975283d71c93, + 16#00552321b06f5501, + 16#00c41d10a1e6a569, + 16#019158ecf8aa1e44, + 16#004e9fc949d0b5fc, + 16#0363da172811fdda, + 16#030e38c3b99181f2, + 16#0000000a118038fc]). +-define(JUMPTOTALLEN, 1024). +-define(RINGLEN, 16). + +-spec exs1024_jump({alg_handler(), exs1024_state()}) -> + {alg_handler(), exs1024_state()}. +exs1024_jump({Alg, {L, RL}}) -> + P = length(RL), + AS = exs1024_jump({L, RL}, + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ?JUMPCONSTTAIL, ?JUMPCONSTHEAD, ?JUMPELEMLEN, ?JUMPTOTALLEN), + {ASL, ASR} = lists:split(?RINGLEN - P, AS), + {Alg, {ASL, lists:reverse(ASR)}}. + +exs1024_jump(_, AS, _, _, _, 0) -> + AS; +exs1024_jump(S, AS, [H|T], _, 0, TN) -> + exs1024_jump(S, AS, T, H, ?JUMPELEMLEN, TN); +exs1024_jump({L, RL}, AS, JL, J, N, TN) -> + {_, NS} = exs1024_next({L, RL}), + case ?MASK(1, J) of + 1 -> + AS2 = lists:zipwith(fun(X, Y) -> X bxor Y end, + AS, L ++ lists:reverse(RL)), + exs1024_jump(NS, AS2, JL, J bsr 1, N-1, TN-1); + 0 -> + exs1024_jump(NS, AS, JL, J bsr 1, N-1, TN-1) + end. + +%% ===================================================================== +%% exro928ss PRNG: Xoroshiro928** +%% +%% Reference URL: http://vigna.di.unimi.it/ftp/papers/ScrambledLinear.pdf +%% i.e the Xoroshiro1024 generator with ** scrambler +%% with {S, R, T} = {5, 7, 9} as recommended in the paper. +%% +%% {A, B, C} were tried out and selected as {44, 9, 45} +%% and the jump coefficients calculated. +%% +%% Standard jump function pseudocode: +%% +%% Jump constant j = 0xb10773cb...44085302f77130ca +%% Generator state: s +%% New generator state: t = 0 +%% foreach bit in j, low to high: +%% if the bit is one: +%% t ^= s +%% next s +%% s = t +%% +%% Generator used for reference value calculation: +%% +%% #include <stdint.h> +%% #include <stdio.h> +%% +%% int p = 0; +%% uint64_t s[16]; +%% +%% #define MASK(x) ((x) & ((UINT64_C(1) << 58) - 1)) +%% static __inline uint64_t rotl(uint64_t x, int n) { +%% return MASK(x << n) | (x >> (58 - n)); +%% } +%% +%% uint64_t next() { +%% const int q = p; +%% const uint64_t s0 = s[p = (p + 1) & 15]; +%% uint64_t s15 = s[q]; +%% +%% const uint64_t result_starstar = MASK(rotl(MASK(s0 * 5), 7) * 9); +%% +%% s15 ^= s0; +%% s[q] = rotl(s0, 44) ^ s15 ^ MASK(s15 << 9); +%% s[p] = rotl(s15, 45); +%% +%% return result_starstar; +%% } +%% +%% static const uint64_t jump_2pow512[15] = +%% { 0x44085302f77130ca, 0xba05381fdfd14902, 0x10a1de1d7d6813d2, +%% 0xb83fe51a1eb3be19, 0xa81b0090567fd9f0, 0x5ac26d5d20f9b49f, +%% 0x4ddd98ee4be41e01, 0x0657e19f00d4b358, 0xf02f778573cf0f0a, +%% 0xb45a3a8a3cef3cc0, 0x6e62a33cc2323831, 0xbcb3b7c4cc049c53, +%% 0x83f240c6007e76ce, 0xe19f5fc1a1504acd, 0x00000000b10773cb }; +%% +%% static const uint64_t jump_2pow20[15] = +%% { 0xbdb966a3daf905e6, 0x644807a56270cf78, 0xda90f4a806c17e9e, +%% 0x4a426866bfad3c77, 0xaf699c306d8e7566, 0x8ebc73c700b8b091, +%% 0xc081a7bf148531fb, 0xdc4d3af15f8a4dfd, 0x90627c014098f4b6, +%% 0x06df2eb1feaf0fb6, 0x5bdeb1a5a90f2e6b, 0xa480c5878c3549bd, +%% 0xff45ef33c82f3d48, 0xa30bebc15fefcc78, 0x00000000cb3d181c }; +%% +%% void jump(const uint64_t *jump) { +%% uint64_t j, t[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; +%% int m, n, k; +%% for (m = 0; m < 15; m++, jump++) { +%% for (n = 0, j = *jump; n < 64; n++, j >>= 1) { +%% if ((j & 1) != 0) { +%% for (k = 0; k < 16; k++) { +%% t[k] ^= s[(p + k) & 15]; +%% } +%% } +%% next(); +%% } +%% } +%% for (k = 0; k < 16; k++) { +%% s[(p + k) & 15] = t[k]; +%% } +%% } +%% +%% ===================================================================== + +-opaque exro928_state() :: {list(uint58()), list(uint58())}. + +-spec exro928_seed( + list(uint58()) | integer() | {integer(), integer(), integer()}) -> + exro928_state(). +exro928_seed(L) when is_list(L) -> + {seed58_nz(16, L), []}; +exro928_seed(X) when is_integer(X) -> + {seed58(16, ?MASK(64, X)), []}; +%% +%% Seed from traditional integer triple - mix into splitmix +exro928_seed({A1, A2, A3}) -> + {S0, X0} = seed58(?MASK(64, A1)), + {S1, X1} = seed58(?MASK(64, A2) bxor X0), + {S2, X2} = seed58(?MASK(64, A3) bxor X1), + {[S0,S1,S2|seed58(13, X2)], []}. + + +%% Update the state and calculate output word +-spec exro928ss_next(exro928_state()) -> {uint58(), exro928_state()}. +exro928ss_next({[S15,S0|Ss], Rs}) -> + SR = exro928_next_state(Ss, Rs, S15, S0), + %% + %% {S, R, T} = {5, 7, 9} + %% const uint64_t result_starstar = rotl(s0 * S, R) * T; + %% + {?scramble_starstar(S0, V_0, V_1), SR}; +%% %% The multiply by add shifted trick avoids creating bignums +%% %% which improves performance significantly +%% %% +%% V0 = ?MASK(58, S0 + ?BSL(58, S0, 2)), % V0 = S0 * 5 +%% V1 = ?ROTL(58, V0, 7), +%% V = ?MASK(58, V1 + ?BSL(58, V1, 3)), % V = V1 * 9 +%% {V, SR}; +exro928ss_next({[S15], Rs}) -> + exro928ss_next({[S15|lists:reverse(Rs)], []}). + +-spec exro928_next(exro928_state()) -> {{uint58(),uint58()}, exro928_state()}. +exro928_next({[S15,S0|Ss], Rs}) -> + SR = exro928_next_state(Ss, Rs, S15, S0), + {{S15,S0}, SR}; +exro928_next({[S15], Rs}) -> + exro928_next({[S15|lists:reverse(Rs)], []}). + +%% Just update the state +-spec exro928_next_state(exro928_state()) -> exro928_state(). +exro928_next_state({[S15,S0|Ss], Rs}) -> + exro928_next_state(Ss, Rs, S15, S0); +exro928_next_state({[S15], Rs}) -> + [S0|Ss] = lists:reverse(Rs), + exro928_next_state(Ss, [], S15, S0). + +exro928_next_state(Ss, Rs, S15, S0) -> + %% {A, B, C} = {44, 9, 45}, + %% s15 ^= s0; + %% NewS15: s[q] = rotl(s0, A) ^ s15 ^ (s15 << B); + %% NewS0: s[p] = rotl(s15, C); + %% + Q = S15 bxor S0, + NewS15 = ?ROTL(58, S0, 44) bxor Q bxor ?BSL(58, Q, 9), + NewS0 = ?ROTL(58, Q, 45), + {[NewS0|Ss], [NewS15|Rs]}. + + +exro928ss_uniform({Alg, SR}) -> + {V, NewSR} = exro928ss_next(SR), + {(V bsr (58-53)) * ?TWO_POW_MINUS53, {Alg, NewSR}}. + +exro928ss_uniform(Range, {Alg, SR}) -> + {V, NewSR} = exro928ss_next(SR), + MaxMinusRange = ?BIT(58) - Range, + ?uniform_range(Range, Alg, NewSR, V, MaxMinusRange, I). + + +-spec exro928_jump({alg_handler(), exro928_state()}) -> + {alg_handler(), exro928_state()}. +exro928_jump({Alg, SR}) -> + {Alg,exro928_jump_2pow512(SR)}. + +-spec exro928_jump_2pow512(exro928_state()) -> exro928_state(). +exro928_jump_2pow512(SR) -> + polyjump( + SR, fun exro928_next_state/1, + %% 2^512 + [16#4085302F77130CA, 16#54E07F7F4524091, + 16#5E1D7D6813D2BA0, 16#4687ACEF8644287, + 16#4567FD9F0B83FE5, 16#43E6D27EA06C024, + 16#641E015AC26D5D2, 16#6CD61377663B92F, + 16#70A0657E19F00D4, 16#43C0BDDE15CF3C3, + 16#745A3A8A3CEF3CC, 16#58A8CF308C8E0C6, + 16#7B7C4CC049C536E, 16#431801F9DB3AF2C, + 16#41A1504ACD83F24, 16#6C41DCF2F867D7F]). + +-spec exro928_jump_2pow20(exro928_state()) -> exro928_state(). +exro928_jump_2pow20(SR) -> + polyjump( + SR, fun exro928_next_state/1, + %% 2^20 + [16#5B966A3DAF905E6, 16#601E9589C33DE2F, + 16#74A806C17E9E644, 16#59AFEB4F1DF6A43, + 16#46D8E75664A4268, 16#42E2C246BDA670C, + 16#4531FB8EBC73C70, 16#537F702069EFC52, + 16#4B6DC4D3AF15F8A, 16#5A4189F0050263D, + 16#46DF2EB1FEAF0FB, 16#77AC696A43CB9AC, + 16#4C5878C3549BD5B, 16#7CCF20BCF522920, + 16#415FEFCC78FF45E, 16#72CF460728C2FAF]). + +%% ===================================================================== +%% exrop PRNG: Xoroshiro116+ +%% +%% Reference URL: http://xorshift.di.unimi.it/ +%% +%% 58 bits fits into an immediate on 64bits Erlang and is thus much faster. +%% In fact, an immediate number is 60 bits signed in Erlang so you can +%% add two positive 58 bit numbers and get a 59 bit number that still is +%% a positive immediate, which is a property we utilize here... +%% +%% Modification of the original Xororhiro128+ algorithm to 116 bits +%% by Sebastiano Vigna. A lot of thanks for his help and work. +%% ===================================================================== +%% (a, b, c) = (24, 2, 35) +%% JUMP Polynomial = 0x9863200f83fcd4a11293241fcb12a (116 bit) +%% +%% From http://xoroshiro.di.unimi.it/xoroshiro116plus.c: +%% --------------------------------------------------------------------- +%% /* Written in 2017 by Sebastiano Vigna ([email protected]). +%% +%% To the extent possible under law, the author has dedicated all copyright +%% and related and neighboring rights to this software to the public domain +%% worldwide. This software is distributed without any warranty. +%% +%% See <http://creativecommons.org/publicdomain/zero/1.0/>. */ +%% +%% #include <stdint.h> +%% +%% #define UINT58MASK (uint64_t)((UINT64_C(1) << 58) - 1) +%% +%% uint64_t s[2]; +%% +%% static inline uint64_t rotl58(const uint64_t x, int k) { +%% return (x << k) & UINT58MASK | (x >> (58 - k)); +%% } +%% +%% uint64_t next(void) { +%% uint64_t s1 = s[1]; +%% const uint64_t s0 = s[0]; +%% const uint64_t result = (s0 + s1) & UINT58MASK; +%% +%% s1 ^= s0; +%% s[0] = rotl58(s0, 24) ^ s1 ^ ((s1 << 2) & UINT58MASK); // a, b +%% s[1] = rotl58(s1, 35); // c +%% return result; +%% } +%% +%% void jump(void) { +%% static const uint64_t JUMP[] = +%% { 0x4a11293241fcb12a, 0x0009863200f83fcd }; +%% +%% uint64_t s0 = 0; +%% uint64_t s1 = 0; +%% for(int i = 0; i < sizeof JUMP / sizeof *JUMP; i++) +%% for(int b = 0; b < 64; b++) { +%% if (JUMP[i] & UINT64_C(1) << b) { +%% s0 ^= s[0]; +%% s1 ^= s[1]; +%% } +%% next(); +%% } +%% s[0] = s0; +%% s[1] = s1; +%% } + +-opaque exrop_state() :: nonempty_improper_list(uint58(), uint58()). + +-dialyzer({no_improper_lists, exrop_seed/1}). + +exrop_seed(L) when is_list(L) -> + [S0,S1] = seed58_nz(2, L), + [S0|S1]; +exrop_seed(X) when is_integer(X) -> + [S0,S1] = seed58(2, ?MASK(64, X)), + [S0|S1]; +%% +%% Traditional integer triplet seed +exrop_seed({A1, A2, A3}) -> + [_|S1] = + exrop_next_s( + ?MASK(58, (A1 * 4294967197) + 1), + ?MASK(58, (A2 * 4294967231) + 1)), + exrop_next_s(?MASK(58, (A3 * 4294967279) + 1), S1). + +-dialyzer({no_improper_lists, exrop_next_s/2}). +%% Advance xoroshiro116+ state one step +%% [a, b, c] = [24, 2, 35] +-define( + exrop_next_s(S0, S1, S1_a), + begin + S1_a = S1 bxor S0, + [?ROTL(58, S0, 24) bxor S1_a bxor ?BSL(58, S1_a, 2)| % a, b + ?ROTL(58, S1_a, 35)] % c + end). +exrop_next_s(S0, S1) -> + ?exrop_next_s(S0, S1, S1_a). + +-dialyzer({no_improper_lists, exrop_next/1}). +%% Advance xoroshiro116+ state one step, generate 58 bit unsigned integer, +%% and waste the lowest bit since it is of lower randomness quality +exrop_next([S0|S1]) -> + {?MASK(58, S0 + S1), ?exrop_next_s(S0, S1, S1_a)}. + +exrop_uniform({Alg, R}) -> + {V, R1} = exrop_next(R), + %% Waste the lowest bit since it is of lower + %% randomness quality than the others + {(V bsr (58-53)) * ?TWO_POW_MINUS53, {Alg, R1}}. + +exrop_uniform(Range, {Alg, R}) -> + {V, R1} = exrop_next(R), + MaxMinusRange = ?BIT(58) - Range, + ?uniform_range(Range, Alg, R1, V, MaxMinusRange, I). + +%% Split a 116 bit constant into two 58 bit words, +%% a top '1' marks the end of the low word. +-define( + JUMP_116(Jump), + [?BIT(58) bor ?MASK(58, (Jump)),(Jump) bsr 58]). +%% +exrop_jump({Alg,S}) -> + [J|Js] = ?JUMP_116(16#9863200f83fcd4a11293241fcb12a), + {Alg, exrop_jump(S, 0, 0, J, Js)}. +%% +-dialyzer({no_improper_lists, exrop_jump/5}). +exrop_jump(_S, S0, S1, 0, []) -> % End of jump constant + [S0|S1]; +exrop_jump(S, S0, S1, 1, [J|Js]) -> % End of word + exrop_jump(S, S0, S1, J, Js); +exrop_jump([S__0|S__1] = _S, S0, S1, J, Js) -> + case ?MASK(1, J) of + 1 -> + NewS = exrop_next_s(S__0, S__1), + exrop_jump(NewS, S0 bxor S__0, S1 bxor S__1, J bsr 1, Js); + 0 -> + NewS = exrop_next_s(S__0, S__1), + exrop_jump(NewS, S0, S1, J bsr 1, Js) + end. + +%% ===================================================================== +%% Mask and fill state list, ensure not all zeros +%% ===================================================================== + +seed58_nz(N, Ss) -> + seed_nz(N, Ss, 58, false). + +seed64_nz(N, Ss) -> + seed_nz(N, Ss, 64, false). + +seed_nz(_N, [], _M, false) -> + erlang:error(zero_seed); +seed_nz(0, [_|_], _M, _NZ) -> + erlang:error(too_many_seed_integers); +seed_nz(0, [], _M, _NZ) -> + []; +seed_nz(N, [], M, true) -> + [0|seed_nz(N - 1, [], M, true)]; +seed_nz(N, [S|Ss], M, NZ) -> + if + is_integer(S) -> + R = ?MASK(M, S), + [R|seed_nz(N - 1, Ss, M, NZ orelse R =/= 0)]; + true -> + erlang:error(non_integer_seed) + end. + +%% ===================================================================== +%% Splitmix seeders, lowest bits of SplitMix64, zeros skipped +%% ===================================================================== + +-spec seed58(non_neg_integer(), uint64()) -> list(uint58()). +seed58(0, _X) -> + []; +seed58(N, X) -> + {Z,NewX} = seed58(X), + [Z|seed58(N - 1, NewX)]. +%% +seed58(X_0) -> + {Z0,X} = splitmix64_next(X_0), + case ?MASK(58, Z0) of + 0 -> + seed58(X); + Z -> + {Z,X} + end. + +-spec seed64(non_neg_integer(), uint64()) -> list(uint64()). +seed64(0, _X) -> + []; +seed64(N, X) -> + {Z,NewX} = seed64(X), + [Z|seed64(N - 1, NewX)]. +%% +seed64(X_0) -> + {Z,X} = ZX = splitmix64_next(X_0), + if + Z =:= 0 -> + seed64(X); + true -> + ZX + end. + +%% The SplitMix64 generator: +%% +%% uint64_t splitmix64_next() { +%% uint64_t z = (x += 0x9e3779b97f4a7c15); +%% z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; +%% z = (z ^ (z >> 27)) * 0x94d049bb133111eb; +%% return z ^ (z >> 31); +%% } +%% +splitmix64_next(X_0) -> + X = ?MASK(64, X_0 + 16#9e3779b97f4a7c15), + Z_0 = ?MASK(64, (X bxor (X bsr 30)) * 16#bf58476d1ce4e5b9), + Z_1 = ?MASK(64, (Z_0 bxor (Z_0 bsr 27)) * 16#94d049bb133111eb), + {?MASK(64, Z_1 bxor (Z_1 bsr 31)),X}. + +%% ===================================================================== +%% Polynomial jump with a jump constant word list, +%% high bit in each word marking top of word, +%% SR is a {Forward, Reverse} queue tuple with Forward never empty +%% ===================================================================== + +polyjump({Ss, Rs} = SR, NextState, JumpConst) -> + %% Create new state accumulator T + Ts = lists:duplicate(length(Ss) + length(Rs), 0), + polyjump(SR, NextState, JumpConst, Ts). +%% +%% Foreach jump word +polyjump(_SR, _NextState, [], Ts) -> + %% Return new calculated state + {Ts, []}; +polyjump(SR, NextState, [J|Js], Ts) -> + polyjump(SR, NextState, Js, Ts, J). +%% +%% Foreach bit in jump word until top bit +polyjump(SR, NextState, Js, Ts, 1) -> + polyjump(SR, NextState, Js, Ts); +polyjump({Ss, Rs} = SR, NextState, Js, Ts, J) when J =/= 0 -> + NewSR = NextState(SR), + NewJ = J bsr 1, + case ?MASK(1, J) of + 0 -> + polyjump(NewSR, NextState, Js, Ts, NewJ); + 1 -> + %% Xor this state onto T + polyjump(NewSR, NextState, Js, xorzip_sr(Ts, Ss, Rs), NewJ) + end. + +xorzip_sr([], [], undefined) -> + []; +xorzip_sr(Ts, [], Rs) -> + xorzip_sr(Ts, lists:reverse(Rs), undefined); +xorzip_sr([T|Ts], [S|Ss], Rs) -> + [T bxor S|xorzip_sr(Ts, Ss, Rs)]. + +%% ===================================================================== + +format_jumpconst58(String) -> + ReOpts = [{newline,any},{capture,all_but_first,binary},global], + {match,Matches} = re:run(String, "0x([a-zA-Z0-9]+)", ReOpts), + format_jumcons58_matches(lists:reverse(Matches), 0). + +format_jumcons58_matches([], J) -> + format_jumpconst58_value(J); +format_jumcons58_matches([[Bin]|Matches], J) -> + NewJ = (J bsl 64) bor binary_to_integer(Bin, 16), + format_jumcons58_matches(Matches, NewJ). + +format_jumpconst58_value(0) -> + ok; +format_jumpconst58_value(J) -> + io:format("16#~s,~n", [integer_to_list(?MASK(58, J) bor ?BIT(58), 16)]), + format_jumpconst58_value(J bsr 58). %% ===================================================================== %% Ziggurat cont @@ -347,9 +1447,13 @@ exs1024_uniform(Max, {Alg, R}) -> -define(NOR_INV_R, 1/?NOR_R). %% return a {sign, Random51bits, State} +get_52({Alg=#{bits:=Bits, next:=Next}, S0}) -> + %% Use the high bits + {Int,S1} = Next(S0), + {?BIT(Bits - 51 - 1) band Int, Int bsr (Bits - 51), {Alg, S1}}; get_52({Alg=#{next:=Next}, S0}) -> {Int,S1} = Next(S0), - {((1 bsl 51) band Int), Int band ((1 bsl 51)-1), {Alg, S1}}. + {?BIT(51) band Int, ?MASK(51, Int), {Alg, S1}}. %% Slow path normal_s(0, Sign, X0, State0) -> @@ -594,3 +1698,42 @@ normal_fi(Indx) -> 1.0214971439701471e-02,8.6165827693987316e-03,7.0508754713732268e-03, 5.5224032992509968e-03,4.0379725933630305e-03,2.6090727461021627e-03, 1.2602859304985975e-03}). + +%%%bitcount64(0) -> 0; +%%%bitcount64(V) -> 1 + bitcount(V, 64). +%%% +%%%-define( +%%% BITCOUNT(V, N), +%%% bitcount(V, N) -> +%%% if +%%% (1 bsl ((N) bsr 1)) =< (V) -> +%%% ((N) bsr 1) + bitcount((V) bsr ((N) bsr 1), ((N) bsr 1)); +%%% true -> +%%% bitcount((V), ((N) bsr 1)) +%%% end). +%%%?BITCOUNT(V, 64); +%%%?BITCOUNT(V, 32); +%%%?BITCOUNT(V, 16); +%%%?BITCOUNT(V, 8); +%%%?BITCOUNT(V, 4); +%%%?BITCOUNT(V, 2); +%%%bitcount(_, 1) -> 0. + +bc64(V) -> ?BC(V, 64). + +%% Linear from high bit - higher probability first gives faster execution +bc(V, B, N) when B =< V -> N; +bc(V, B, N) -> bc(V, B bsr 1, N - 1). + +make_float(S, E, M) -> + <<F/float>> = <<S:1, E:11, M:52>>, + F. + +float2str(N) -> + <<S:1, E:11, M:52>> = <<(float(N))/float>>, + lists:flatten( + io_lib:format( + "~c~c.~13.16.0bE~b", + [case S of 1 -> $-; 0 -> $+ end, + case E of 0 -> $0; _ -> $1 end, + M, E - 16#3ff])). |