1 files changed, 305 insertions, 12 deletions
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
index 4951dc727b..3a32f40fcd 100644
--- a/lib/stdlib/src/rand.erl
+++ b/lib/stdlib/src/rand.erl
@@ -32,14 +32,19 @@
          uniform/0, uniform/1, uniform_s/1, uniform_s/2,
          uniform_real/0, uniform_real_s/1,
          jump/0, jump/1,
-	     normal/0, normal/2, normal_s/1, normal_s/3
+         normal/0, normal/2, normal_s/1, normal_s/3
 	]).
 
+%% Test, dev and internal
+-export([exro928_jump_2pow512/1, exro928_jump_2pow20/1,
+	 format_jumpconst58/1, seed58/2]).
+
 %% Debug
 -export([make_float/3, float2str/1, bc64/1]).
 
 -compile({inline, [exs64_next/1, exsplus_next/1,
 		   exs1024_next/1, exs1024_calc/2,
+                   exro928_next_state/4,
                    exrop_next/1, exrop_next_s/2,
 		   get_52/1, normal_kiwi/1]}).
 
@@ -80,8 +85,8 @@
 
 %% This depends on the algorithm handler function
 -type alg_state() ::
-        exs64_state() | exsplus_state() | exs1024_state() |
-        exrop_state() | term().
+	exrop_state() | exs1024_state() |  exro928_state() |  exsplus_state() |
+	exs64_state() | term().
 
 %% This is the algorithm handling definition within this module,
 %% and the type to use for plugins.
@@ -124,14 +129,16 @@
 
 %% Algorithm state
 -type state() :: {alg_handler(), alg_state()}.
--type builtin_alg() :: exs64 | exsplus | exsp | exs1024 | exs1024s | exrop.
+-type builtin_alg() ::
+	exrop | exs1024s | exro928ss | exsp | exs64 | exsplus | exs1024.
 -type alg() :: builtin_alg() | atom().
 -type export_state() :: {alg(), alg_state()}.
 -export_type(
    [builtin_alg/0, alg/0, alg_handler/0, alg_state/0,
     state/0, export_state/0]).
 -export_type(
-   [exs64_state/0, exsplus_state/0, exs1024_state/0, exrop_state/0]).
+   [exrop_state/0, exs1024_state/0, exro928_state/0, exsplus_state/0,
+    exs64_state/0]).
 
 %% =====================================================================
 %% Range macro and helper
@@ -260,15 +267,17 @@ seed_s(Alg) ->
 %% and returns the NEW state.
 
 -spec seed(
-        Alg :: builtin_alg(), Seed :: {integer(), integer(), integer()}) ->
+        Alg :: builtin_alg(),
+	Seed :: {integer(), integer(), integer()}) ->
                   state().
 seed(Alg0, S0) ->
     seed_put(seed_s(Alg0, S0)).
 
 -spec seed_s(
-        Alg :: builtin_alg(), Seed :: {integer(), integer(), integer()}) ->
+        Alg :: builtin_alg(),
+	Seed :: {integer(), integer(), integer()}) ->
                     state().
-seed_s(Alg0, S0 = {_, _, _}) ->
+seed_s(Alg0, S0) ->
     {Alg, Seed} = mk_alg(Alg0),
     AS = Seed(S0),
     {Alg, AS}.
@@ -625,7 +634,13 @@ mk_alg(exrop) ->
     {#{type=>exrop, bits=>58, weak_low_bits=>1, next=>fun exrop_next/1,
        uniform=>fun exrop_uniform/1, uniform_n=>fun exrop_uniform/2,
        jump=>fun exrop_jump/1},
-     fun exrop_seed/1}.
+     fun exrop_seed/1};
+mk_alg(exro928ss) ->
+    {#{type=>exro928ss, bits=>58, next=>fun exro928ss_next/1,
+       uniform=>fun exro928ss_uniform/1,
+       uniform_n=>fun exro928ss_uniform/2,
+       jump=>fun exro928_jump/1},
+     fun exro928_seed/1}.
 
 %% =====================================================================
 %% exs64 PRNG: Xorshift64*
@@ -708,7 +723,8 @@ exsp_uniform(Range, {Alg, R}) ->
 -define(JUMPELEMLEN, 58).
 
 -dialyzer({no_improper_lists, exsplus_jump/1}).
--spec exsplus_jump(state()) -> state().
+-spec exsplus_jump({alg_handler(), exsplus_state()}) ->
+                          {alg_handler(), exsplus_state()}.
 exsplus_jump({Alg, S}) ->
     {S1, AS1} = exsplus_jump(S, [0|0], ?JUMPCONST1, ?JUMPELEMLEN),
     {_,  AS2} = exsplus_jump(S1, AS1,  ?JUMPCONST2, ?JUMPELEMLEN),
@@ -806,8 +822,8 @@ exs1024_next({[H], RL}) ->
 -define(JUMPTOTALLEN, 1024).
 -define(RINGLEN, 16).
 
--spec exs1024_jump(state()) -> state().
-
+-spec exs1024_jump({alg_handler(), exs1024_state()}) ->
+                          {alg_handler(), exs1024_state()}.
 exs1024_jump({Alg, {L, RL}}) ->
     P = length(RL),
     AS = exs1024_jump({L, RL},
@@ -832,6 +848,255 @@ exs1024_jump({L, RL}, AS, JL, J, N, TN) ->
     end.
 
 %% =====================================================================
+%% exro928ss PRNG: Xoroshiro928**
+%%
+%% Reference URL: http://vigna.di.unimi.it/ftp/papers/ScrambledLinear.pdf
+%% i.e the Xoroshiro1024 generator with ** scrambler
+%% with {S, R, T} = {5, 7, 9} as recommended in the paper.
+%%
+%% {A, B, C} were tried out and selected as {44, 9, 45}
+%% and the jump coefficients calculated.
+%%
+%% Standard jump function pseudocode:
+%% 
+%%     Jump constant j = 0xb10773cb...44085302f77130ca
+%%     Generator state: s
+%%     New generator state: t = 0
+%%     foreach bit in j, low to high:
+%%         if the bit is one:
+%%             t ^= s
+%%         next s
+%%     s = t
+%%
+%% Generator used for reference value calculation:
+%%
+%%     #include <stdint.h>
+%%     #include <stdio.h>
+%%     
+%%     int p = 0;
+%%     uint64_t s[16];
+%%     
+%%     #define MASK(x) ((x) & ((UINT64_C(1) << 58) - 1))
+%%     static __inline uint64_t rotl(uint64_t x, int n) {
+%%         return MASK(x << n) | (x >> (58 - n));
+%%     }
+%%     
+%%     uint64_t next() {
+%%         const int q = p;
+%%         const uint64_t s0 = s[p = (p + 1) & 15];
+%%         uint64_t s15 = s[q];
+%%     
+%%         const uint64_t result_starstar = MASK(rotl(MASK(s0 * 5), 7) * 9);
+%%     
+%%         s15 ^= s0;
+%%         s[q] = rotl(s0, 44) ^ s15 ^ MASK(s15 << 9);
+%%         s[p] = rotl(s15, 45);
+%%     
+%%         return result_starstar;
+%%     }
+%%
+%%     static const uint64_t jump_2pow512[15] =
+%%         { 0x44085302f77130ca, 0xba05381fdfd14902, 0x10a1de1d7d6813d2,
+%%           0xb83fe51a1eb3be19, 0xa81b0090567fd9f0, 0x5ac26d5d20f9b49f,
+%%           0x4ddd98ee4be41e01, 0x0657e19f00d4b358, 0xf02f778573cf0f0a,
+%%           0xb45a3a8a3cef3cc0, 0x6e62a33cc2323831, 0xbcb3b7c4cc049c53,
+%%           0x83f240c6007e76ce, 0xe19f5fc1a1504acd, 0x00000000b10773cb };
+%%
+%%     static const uint64_t jump_2pow20[15] =
+%%         { 0xbdb966a3daf905e6, 0x644807a56270cf78, 0xda90f4a806c17e9e,
+%%           0x4a426866bfad3c77, 0xaf699c306d8e7566, 0x8ebc73c700b8b091,
+%%           0xc081a7bf148531fb, 0xdc4d3af15f8a4dfd, 0x90627c014098f4b6,
+%%           0x06df2eb1feaf0fb6, 0x5bdeb1a5a90f2e6b, 0xa480c5878c3549bd,
+%%           0xff45ef33c82f3d48, 0xa30bebc15fefcc78, 0x00000000cb3d181c };
+%%
+%%     void jump(const uint64_t *jump) {
+%%         uint64_t j, t[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+%%         int m, n, k;
+%%         for (m = 0;  m < 15;  m++, jump++) {
+%%             for (n = 0, j = *jump;  n < 64;  n++, j >>= 1) {
+%%                 if ((j & 1) != 0) {
+%%                     for (k = 0;  k < 16;  k++) {
+%%                         t[k] ^= s[(p + k) & 15];
+%%                     }
+%%                 }
+%%                 next();
+%%             }
+%%         }
+%%         for (k = 0;  k < 16;  k++) {
+%%             s[(p + k) & 15] = t[k];
+%%         }
+%%     }
+%%
+%% =====================================================================
+
+-opaque exro928_state() :: {list(uint58()), list(uint58())}.
+
+
+%%% %% Seed raw words
+%%% exro928_seed({S0,S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15})
+%%%   when S0 bor S1 bor S2 bor S3 bor S4 bor S5 bor S6 bor S7 bor
+%%%        S8 bor S9 bor S10 bor S11 bor S12 bor S13 bor S14 bor S15 > 0,
+%%%        S0 bor S1 bor S2 bor S3 bor S4 bor S5 bor S6 bor S7 bor
+%%%        S8 bor S9 bor S10 bor S11 bor S12 bor S13 bor S14 bor S15 < 1 bsl 58 ->
+%%%     {[S0,S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15], []};
+%%% %%
+%%% %% Seed from one 64-bit integer through splitmix
+%%% exro928_seed(X) when is_integer(X), 0 =< X, X =< 1 bsl 64 ->
+%%%     {exro928_seed(X, 16),[]};
+%%
+%% Seed from traditional triple - splitmix mixed with the 3 integers
+exro928_seed({A1, A2, A3}) ->
+    {S0, X0} = seed58(?MASK(64, A1)),
+    {S1, X1} = seed58(?MASK(64, A2 bxor X0)),
+    {S2, X2} = seed58(?MASK(64, A3 bxor X1)),
+    {[S0,S1,S2|exro928_seed(X2, 13)],[]}.
+%%
+%% Splitmix seed the rest of the state words
+exro928_seed(_X, 0) ->
+    [];
+exro928_seed(X, N) ->
+    {S, NewX} = seed58(X),
+    [S|exro928_seed(NewX, N-1)].
+
+
+%% Update the state and calculate output word
+-spec exro928ss_next(exro928_state()) -> {uint58(), exro928_state()}.
+exro928ss_next({[S15,S0|Ss], Rs}) ->
+    SR = exro928_next_state(Ss, Rs, S15, S0),
+    %%
+    %% {S, R, T} = {5, 7, 9}
+    %% const uint64_t result_starstar = rotl(s0 * S, R) * T;
+    %%
+    %% The multiply by add shifted trick avoids creating bignums
+    %% which improves performance significantly
+    %%
+    V0 = ?MASK(58, S0 + ?BSL(58, S0, 2)), % V0 = S0 * 5
+    V1 = ?ROTL(58, V0, 7),
+    V = ?MASK(58, V1 + ?BSL(58, V1, 3)), % V = V1 * 9
+    {V, SR};
+exro928ss_next({[S15], Rs}) ->
+    exro928ss_next({[S15|lists:reverse(Rs)], []}).
+
+%% Just update the state
+exro928_next_state({[S15,S0|Ss], Rs}) ->
+    exro928_next_state(Ss, Rs, S15, S0);
+exro928_next_state({[S15], Rs}) ->
+    [S0|Ss] = lists:reverse(Rs),
+    exro928_next_state(Ss, [], S15, S0).
+
+exro928_next_state(Ss, Rs, S15, S0) ->
+    %% {A, B, C} = {44, 9, 45},
+    %% s15 ^= s0;
+    %% NewS15: s[q] = rotl(s0, A) ^ s15 ^ (s15 << B);
+    %% NewS0: s[p] = rotl(s15, C);
+    %%
+    Q = S15 bxor S0,
+    NewS15 = ?ROTL(58, S0, 44) bxor Q bxor ?BSL(58, Q, 9),
+    NewS0 = ?ROTL(58, Q, 45),
+    {[NewS0|Ss], [NewS15|Rs]}.
+
+
+exro928ss_uniform({Alg, SR}) ->
+    {V, NewSR} = exro928ss_next(SR),
+    {(V bsr (58-53)) * ?TWO_POW_MINUS53, {Alg, NewSR}}.
+
+exro928ss_uniform(Range, {Alg, SR}) ->
+    {V, NewSR} = exro928ss_next(SR),
+    MaxMinusRange = ?BIT(58) - Range,
+    ?uniform_range(Range, Alg, NewSR, V, MaxMinusRange, I).
+
+
+-spec exro928_jump({alg_handler(), exro928_state()}) ->
+                          {alg_handler(), exro928_state()}.
+exro928_jump({Alg, SR}) ->
+    {Alg,exro928_jump_2pow512(SR)}.
+
+exro928_jump_2pow512(SR) ->
+    polyjump(
+      SR, fun exro928_next_state/1,
+      %% 2^512
+      [16#4085302F77130CA, 16#54E07F7F4524091,
+       16#5E1D7D6813D2BA0, 16#4687ACEF8644287,
+       16#4567FD9F0B83FE5, 16#43E6D27EA06C024,
+       16#641E015AC26D5D2, 16#6CD61377663B92F,
+       16#70A0657E19F00D4, 16#43C0BDDE15CF3C3,
+       16#745A3A8A3CEF3CC, 16#58A8CF308C8E0C6,
+       16#7B7C4CC049C536E, 16#431801F9DB3AF2C,
+       16#41A1504ACD83F24, 16#6C41DCF2F867D7F]).
+
+exro928_jump_2pow20(SR) ->
+    polyjump(
+      SR, fun exro928_next_state/1,
+      %% 2^20
+      [16#5B966A3DAF905E6, 16#601E9589C33DE2F,
+       16#74A806C17E9E644, 16#59AFEB4F1DF6A43,
+       16#46D8E75664A4268, 16#42E2C246BDA670C,
+       16#4531FB8EBC73C70, 16#537F702069EFC52,
+       16#4B6DC4D3AF15F8A, 16#5A4189F0050263D,
+       16#46DF2EB1FEAF0FB, 16#77AC696A43CB9AC,
+       16#4C5878C3549BD5B, 16#7CCF20BCF522920,
+       16#415FEFCC78FF45E, 16#72CF460728C2FAF]).
+
+%% =====================================================================
+%% Polynomial jump with a jump constant word list,
+%% high bit in each word marking top of word,
+%% SR is a {Forward, Reverse} queue tuple with Forward never empty
+%% =====================================================================
+
+polyjump({Ss, Rs} = SR, NextState, JumpConst) ->
+    %% Create new state accumulator T
+    Ts = lists:duplicate(length(Ss) + length(Rs), 0),
+    polyjump(SR, NextState, JumpConst, Ts).
+%%
+%% Foreach jump word
+polyjump(_SR, _NextState, [], Ts) ->
+    %% Return new calculated state
+    {Ts, []};
+polyjump(SR, NextState, [J|Js], Ts) ->
+    polyjump(SR, NextState, Js, Ts, J).
+%%
+%% Foreach bit in jump word until top bit
+polyjump(SR, NextState, Js, Ts, 1) ->
+
+    polyjump(SR, NextState, Js, Ts);
+polyjump({Ss, Rs} = SR, NextState, Js, Ts, J) when J =/= 0 ->
+    NewSR = NextState(SR),
+    NewJ = J bsr 1,
+    case ?MASK(1, J) of
+        0 ->
+            polyjump(NewSR, NextState, Js, Ts, NewJ);
+        1 ->
+            %% Xor this state onto T
+            polyjump(NewSR, NextState, Js, xorzip_sr(Ts, Ss, Rs), NewJ)
+    end.
+
+xorzip_sr([], [], undefined) ->
+    [];
+xorzip_sr(Ts, [], Rs) ->
+    xorzip_sr(Ts, lists:reverse(Rs), undefined);
+xorzip_sr([T|Ts], [S|Ss], Rs) ->
+    [T bxor S|xorzip_sr(Ts, Ss, Rs)].
+
+%% =====================================================================
+
+format_jumpconst58(String) ->
+    ReOpts = [{newline,any},{capture,all_but_first,binary},global],
+    {match,Matches} = re:run(String, "0x([a-zA-Z0-9]+)", ReOpts),
+    format_jumcons58_matches(lists:reverse(Matches), 0).
+
+format_jumcons58_matches([], J) ->
+    format_jumpconst58_value(J);
+format_jumcons58_matches([[Bin]|Matches], J) ->
+    NewJ = (J bsl 64) bor binary_to_integer(Bin, 16),
+    format_jumcons58_matches(Matches, NewJ).
+
+format_jumpconst58_value(0) ->
+    ok;
+format_jumpconst58_value(J) ->
+    io:format("16#~s,~n", [integer_to_list(?MASK(58, J) bor ?BIT(58), 16)]),
+    format_jumpconst58_value(J bsr 58).
+
+%% =====================================================================
 %% exrop PRNG: Xoroshiro116+
 %%
 %% Reference URL: http://xorshift.di.unimi.it/
@@ -962,6 +1227,34 @@ exrop_jump([S__0|S__1] = _S, S0, S1, J, Js) ->
     end.
 
 %% =====================================================================
+%% 58-bit seeder; lowest 58 bits of SplitMix64, zeros skipped
+%%
+%% uint64_t splitmix64_next() {
+%% 	uint64_t z = (x += 0x9e3779b97f4a7c15);
+%% 	z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
+%% 	z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
+%% 	return z ^ (z >> 31);
+%% }
+%% =====================================================================
+
+seed58(0, _X) ->
+    [];
+seed58(N, X) ->
+    {Z,NewX} = seed58(X),
+    [Z|seed58(N - 1, NewX)].
+%%
+seed58(X_0) ->
+    X = ?MASK(64, X_0 + 16#9e3779b97f4a7c15),
+    Z_0 = ?MASK(64, (X bxor (X bsr 30)) * 16#bf58476d1ce4e5b9),
+    Z_1 = ?MASK(64, (Z_0 bxor (Z_0 bsr 27)) * 16#94d049bb133111eb),
+    case ?MASK(58, Z_1 bxor (Z_1 bsr 31)) of
+	0 ->
+	    seed58(X);
+	Z ->
+	    {Z, X}
+    end.
+
+%% =====================================================================
 %% Ziggurat cont
 %% =====================================================================
 -define(NOR_R, 3.6541528853610087963519472518).