From 0248865dea315253618e733d77177b5a80679e20 Mon Sep 17 00:00:00 2001
From: Raimo Niskanen <raimo@erlang.org>
Date: Thu, 14 Sep 2017 15:57:16 +0200
Subject: Implement uniform_real/0 and uniform_real_s/1

---
 lib/stdlib/doc/src/rand.xml    | 118 ++++++++++++++++++-
 lib/stdlib/src/rand.erl        | 261 ++++++++++++++++++++++++++++++++++++++---
 lib/stdlib/test/rand_SUITE.erl | 210 +++++++++++++++++++++++++++++++--
 3 files changed, 563 insertions(+), 26 deletions(-)
diff --git a/lib/stdlib/doc/src/rand.xml b/lib/stdlib/doc/src/rand.xml
index 89fb858823..21f680a0ee 100644
--- a/lib/stdlib/doc/src/rand.xml
+++ b/lib/stdlib/doc/src/rand.xml
@@ -133,8 +133,9 @@
       variable <c>rand_seed</c> to remember the current state.</p>
 
     <p>If a process calls
-      <seealso marker="#uniform-0"><c>uniform/0</c></seealso> or
-      <seealso marker="#uniform-1"><c>uniform/1</c></seealso> without
+      <seealso marker="#uniform-0"><c>uniform/0</c></seealso>,
+      <seealso marker="#uniform-1"><c>uniform/1</c></seealso> or
+      <seealso marker="#uniform_real-0"><c>uniform_real/0</c></seealso> without
       setting a seed first, <seealso marker="#seed-1"><c>seed/1</c></seealso>
       is called automatically with the default algorithm and creates a
       non-constant seed.</p>
@@ -168,10 +169,17 @@ R3 = rand:uniform(),</pre>
 S0 = rand:seed_s(exrop),
 {R4, S1} = rand:uniform_s(S0),</pre>
 
+   <p>Textbook basic form Box-Muller standard normal deviate</p>
+
+   <pre>
+R5 = rand:uniform_real(),
+R6 = rand:uniform(),
+SND0 = math:sqrt(-2 * math:log(R5)) * math:cos(math:pi() * R6)</pre>
+
    <p>Create a standard normal deviate:</p>
 
    <pre>
-{SND0, S2} = rand:normal_s(S1),</pre>
+{SND1, S2} = rand:normal_s(S1),</pre>
 
    <p>Create a normal deviate with mean -3 and variance 0.5:</p>
 
@@ -414,7 +422,8 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre>
 	    This function may return exactly <c>0.0</c> which can be
 	    fatal for certain applications.  If that is undesired
 	    you can use <c>(1.0 - rand:uniform())</c> to get the
-	    interval <c>0.0 &lt; <anno>X</anno> =&lt; 1.0</c>.
+	    interval <c>0.0 &lt; <anno>X</anno> =&lt; 1.0</c>, or instead use
+	    <seealso marker="#uniform_real-0"><c>uniform_real/0</c></seealso>.
 	  </p>
 	  <p>
 	    If neither endpoint is desired you can test and re-try
@@ -431,6 +440,42 @@ end.</pre>
       </desc>
     </func>
 
+    <func>
+      <name name="uniform_real" arity="0"/>
+      <fsummary>Return a random float.</fsummary>
+      <desc><marker id="uniform_real-0"/>
+        <p>
+	  Returns a random float
+	  uniformly distributed in the value range
+	  <c>DBL_MIN =&lt; <anno>X</anno> &lt; 1.0</c>
+	  and updates the state in the process dictionary.
+	</p>
+	<p>
+	  Conceptually, a random real number <c>R</c> is generated
+	  from the interval <c>0 =&lt; R &lt; 1</c> and then the
+	  closest rounded down normalized number
+	  in the IEEE 754 Double precision format
+	  is returned.
+	</p>
+	<note>
+	  <p>
+	    The generated numbers from this function has got better
+	    granularity for small numbers than the regular
+	    <seealso marker="#uniform-0"><c>uniform/0</c></seealso>
+	    because all bits in the mantissa are random.
+	    This property, in combination with the fact that exactly zero
+	    is never returned is useful for algoritms doing for example
+	    <c>1.0 / <anno>X</anno></c> or <c>math:log(<anno>X</anno>)</c>.
+	  </p>
+	</note>
+	<p>
+	  See
+	  <seealso marker="#uniform_real_s-1"><c>uniform_real_s/1</c></seealso>
+	  for more explanation.
+	</p>
+      </desc>
+    </func>
+
     <func>
       <name name="uniform" arity="1"/>
       <fsummary>Return a random integer.</fsummary>
@@ -460,7 +505,8 @@ end.</pre>
 	    This function may return exactly <c>0.0</c> which can be
 	    fatal for certain applications.  If that is undesired
 	    you can use <c>(1.0 - rand:uniform(State))</c> to get the
-	    interval <c>0.0 &lt; <anno>X</anno> =&lt; 1.0</c>.
+	    interval <c>0.0 &lt; <anno>X</anno> =&lt; 1.0</c>, or instead use
+	    <seealso marker="#uniform_real_s-1"><c>uniform_real_s/1</c></seealso>.
 	  </p>
 	  <p>
 	    If neither endpoint is desired you can test and re-try
@@ -477,6 +523,68 @@ end.</pre>
       </desc>
     </func>
 
+    <func>
+      <name name="uniform_real_s" arity="1"/>
+      <fsummary>Return a random float.</fsummary>
+      <desc>
+        <p>
+	  Returns, for a specified state, a random float
+	  uniformly distributed in the value range
+	  <c>DBL_MIN =&lt; <anno>X</anno> &lt; 1.0</c>
+	  and updates the state in the process dictionary.
+	</p>
+	<p>
+	  Conceptually, a random real number <c>R</c> is generated
+	  from the interval <c>0 =&lt; R &lt; 1</c> and then the
+	  closest rounded down normalized number
+	  in the IEEE 754 Double precision format
+	  is returned.
+	</p>
+	<note>
+	  <p>
+	    The generated numbers from this function has got better
+	    granularity for small numbers than the regular
+	    <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso>
+	    because all bits in the mantissa are random.
+	    This property, in combination with the fact that exactly zero
+	    is never returned is useful for algoritms doing for example
+	    <c>1.0 / <anno>X</anno></c> or <c>math:log(<anno>X</anno>)</c>.
+	  </p>
+	</note>
+	<p>
+	  The concept implicates that the probability to get
+	  exactly zero is extremely low; so low that this function
+	  is in fact guaranteed to never return zero.  The smallest
+	  number that it might return is <c>DBL_MIN</c>, which is
+	  2.0^(-1022).
+	</p>
+	<p>
+	  The value range stated at the top of this function
+	  description is technically correct, but
+	  <c>0.0 =&lt; <anno>X</anno> &lt; 1.0</c>
+	  is a better description of the generated numbers'
+	  statistical distribution.  Except that exactly 0.0
+	  is never returned, which is not possible to observe
+	  statistically.
+	</p>
+	<p>
+	  For example; for all sub ranges
+	  <c>N*2.0^(-53) =&lt; X &lt; (N+1)*2.0^(-53)</c>
+	  where
+	  <c>0 =&lt; integer(N) &lt; 2.0^53</c>
+	  the probability is the same.
+	  Compare that with the form of the numbers generated by
+	  <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso>.
+	</p>
+	<p>
+	  Having to generate extra random bits for
+	  small numbers costs a little performance.
+	  This function is about 20% slower than the regular
+	  <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso>
+	</p>
+      </desc>
+    </func>
+
     <func>
       <name name="uniform_s" arity="2"/>
       <fsummary>Return a random integer.</fsummary>
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
index 7a8a5e6d4a..362e98006e 100644
--- a/lib/stdlib/src/rand.erl
+++ b/lib/stdlib/src/rand.erl
@@ -21,8 +21,8 @@
 %% Multiple PRNG module for Erlang/OTP
 %% Copyright (c) 2015-2016 Kenji Rikitake
 %%
-%% exrop (xoroshiro116+) added and statistical distribution
-%% improvements by the Erlang/OTP team 2017
+%% exrop (xoroshiro116+) added, statistical distribution
+%% improvements and uniform_real added by the Erlang/OTP team 2017
 %% =====================================================================
 
 -module(rand).
@@ -30,10 +30,14 @@
 -export([seed_s/1, seed_s/2, seed/1, seed/2,
 	 export_seed/0, export_seed_s/1,
          uniform/0, uniform/1, uniform_s/1, uniform_s/2,
+         uniform_real/0, uniform_real_s/1,
          jump/0, jump/1,
 	     normal/0, normal/2, normal_s/1, normal_s/3
 	]).
 
+%% Debug
+-export([make_float/3, float2str/1, bc64/1]).
+
 -compile({inline, [exs64_next/1, exsplus_next/1,
 		   exs1024_next/1, exs1024_calc/2,
                    exrop_next/1, exrop_next_s/2,
@@ -60,6 +64,10 @@
    %% N i evaluated 3 times
    (?BSL((Bits), (X), (N)) bor ((X) bsr ((Bits)-(N))))).
 
+-define(
+   BC(V, N),
+   bc((V), ?BIT((N) - 1), N)).
+
 %%-define(TWO_POW_MINUS53, (math:pow(2, -53))).
 -define(TWO_POW_MINUS53, 1.11022302462515657e-16).
 
@@ -84,14 +92,21 @@
 %% The 'bits' field indicates how many bits the integer
 %% returned from 'next' has got, i.e 'next' shall return
 %% an random integer in the range 0..(2^Bits - 1).
-%% At least 53 bits is required for the floating point
-%% producing fallbacks.  This field is only used when
-%% the 'uniform' or 'uniform_n' fields are not defined.
+%% At least 55 bits is required for the floating point
+%% producing fallbacks, but 56 bits would be more future proof.
 %%
 %% The fields 'next', 'uniform' and 'uniform_n'
-%% implement the algorithm.  If 'uniform' or 'uinform_n'
+%% implement the algorithm.  If 'uniform' or 'uniform_n'
 %% is not present there is a fallback using 'next' and either
-%% 'bits' or the deprecated 'max'.
+%% 'bits' or the deprecated 'max'.  The 'next' function
+%% must generate a word with at least 56 good random bits.
+%%
+%% The 'weak_low_bits' field indicate how many bits are of
+%% lesser quality and they will not be used by the floating point
+%% producing functions, nor by the range producing functions
+%% when more bits are needed, to avoid weak bits in the middle
+%% of the generated bits.  The lowest bits from the range
+%% functions still have the generator's quality.
 %%
 -type alg_handler() ::
         #{type := alg(),
@@ -148,11 +163,7 @@
 
 %% For ranges larger than the algorithm bit size
 uniform_range(Range, #{next:=Next, bits:=Bits} = Alg, R, V) ->
-    WeakLowBits =
-        case Alg of
-            #{weak_low_bits:=WLB} -> WLB;
-            #{} -> 0
-        end,
+    WeakLowBits = maps:get(weak_low_bits, Alg, 0),
     %% Maybe waste the lowest bit(s) when shifting in new bits
     Shift = Bits - WeakLowBits,
     ShiftMask = bnot ?MASK(WeakLowBits),
@@ -297,7 +308,7 @@ uniform_s({#{bits:=Bits, next:=Next} = Alg, R0}) ->
     {(V bsr (Bits - 53)) * ?TWO_POW_MINUS53, {Alg, R1}};
 uniform_s({#{max:=Max, next:=Next} = Alg, R0}) ->
     {V, R1} = Next(R0),
-    %% Old broken algorithm with non-uniform density
+    %% Old algorithm with non-uniform density
     {V / (Max + 1), {Alg, R1}}.
 
 
@@ -317,7 +328,7 @@ uniform_s(N, {#{bits:=Bits, next:=Next} = Alg, R0})
     ?uniform_range(N, Alg, R1, V, MaxMinusN, I);
 uniform_s(N, {#{max:=Max, next:=Next} = Alg, R0})
   when is_integer(N), 1 =< N ->
-    %% Old broken algorithm with skewed probability
+    %% Old algorithm with skewed probability
     %% and gap in ranges > Max
     {V, R1} = Next(R0),  
     if
@@ -328,6 +339,189 @@ uniform_s(N, {#{max:=Max, next:=Next} = Alg, R0})
             {trunc(F * N) + 1, {Alg, R1}}
     end.
 
+%% uniform_real/0: returns a random float X where 0.0 < X =< 1.0,
+%% updating the state in the process dictionary.
+
+-spec uniform_real() -> X :: float().
+uniform_real() ->
+    {X, Seed} = uniform_real_s(seed_get()),
+    _ = seed_put(Seed),
+    X.
+
+%% uniform_real_s/1: given a state, uniform_s/1
+%% returns a random float X where 0.0 < X =< 1.0,
+%% and a new state.
+%%
+%% This function does not use the same form of uniformity
+%% as the uniform_s/1 function.
+%%
+%% Instead, this function does not generate numbers with equal
+%% distance in the interval, but rather tries to keep all mantissa
+%% bits random also for small numbers, meaning that the distance
+%% between possible numbers decreases when the numbers
+%% approaches 0.0, as does the possibility for a particular
+%% number.  Hence uniformity is preserved.
+%%
+%% To generate 56 bits at the time instead of 53 is actually
+%% a speed optimization since the probability to have to
+%% generate a second word decreases by 1/2 for every extra bit.
+%%
+%% This function generates normalized numbers, so the smallest number
+%% that can be generated is 2^-1022 with the distance 2^-1074
+%% to the next to smallest number, compared to 2^-53 for uniform_s/1.
+%%
+%% This concept of uniformity should work better for applications
+%% where you need to calculate 1.0/X or math:log(X) since those
+%% operations benefits from larger precision approaching 0.0,
+%% and that this function does not return 0.0 nor denormalized
+%% numbers very close to 0.0.  The log() operation in The Box-Muller
+%% transformation for normal distribution is an example of this.
+%%
+%%-define(TWO_POW_MINUS55, (math:pow(2, -55))).
+%%-define(TWO_POW_MINUS110, (math:pow(2, -110))).
+%%-define(TWO_POW_MINUS55, 2.7755575615628914e-17).
+%%-define(TWO_POW_MINUS110, 7.7037197775489436e-34).
+%%
+-spec uniform_real_s(State :: state()) -> {X :: float(), NewState :: state()}.
+uniform_real_s({#{bits:=Bits, next:=Next} = Alg, R0}) ->
+    %% Generate a 56 bit number without using the weak low bits.
+    %%
+    %% Be sure to use only 53 bits when multiplying with
+    %% math:pow(2.0, -N) to avoid rounding which would make
+    %% "even" floats more probable than "odd".
+    %%
+    {V1, R1} = Next(R0),
+    M1 = V1 bsr (Bits - 56),
+    if
+        ?BIT(55) =< M1 ->
+            %% We have 56 bits - waste 3
+            {(M1 bsr 3) * math:pow(2.0, -53), {Alg, R1}};
+        ?BIT(54) =< M1 ->
+            %% We have 55 bits - waste 2
+            {(M1 bsr 2) * math:pow(2.0, -54), {Alg, R1}};
+        ?BIT(53) =< M1 ->
+            %% We have 54 bits - waste 1
+            {(M1 bsr 1) * math:pow(2.0, -55), {Alg, R1}};
+        ?BIT(52) =< M1 ->
+            %% We have 53 bits - use all
+            {M1 * math:pow(2.0, -56), {Alg, R1}};
+        true ->
+            %% Need more bits
+            {V2, R2} = Next(R1),
+            uniform_real_s(Alg, Next, M1, -56, R2, V2, Bits)
+    end;
+uniform_real_s({#{max:=_, next:=Next} = Alg, R0}) ->
+    %% Generate a 56 bit number.
+    %% Ignore the weak low bits for these old algorithms,
+    %% just produce something reasonable.
+    %%
+    %% Be sure to use only 53 bits when multiplying with
+    %% math:pow(2.0, -N) to avoid rounding which would make
+    %% "even" floats more probable than "odd".
+    %%
+    {V1, R1} = Next(R0),
+    M1 = ?MASK(56, V1),
+    if
+        ?BIT(55) =< M1 ->
+            %% We have 56 bits - waste 3
+            {(M1 bsr 3) * math:pow(2.0, -53), {Alg, R1}};
+        ?BIT(54) =< M1 ->
+            %% We have 55 bits - waste 2
+            {(M1 bsr 2) * math:pow(2.0, -54), {Alg, R1}};
+        ?BIT(53) =< M1 ->
+            %% We have 54 bits - waste 1
+            {(M1 bsr 1) * math:pow(2.0, -55), {Alg, R1}};
+        ?BIT(52) =< M1 ->
+            %% We have 53 bits - use all
+            {M1 * math:pow(2.0, -56), {Alg, R1}};
+        true ->
+            %% Need more bits
+            {V2, R2} = Next(R1),
+            uniform_real_s(Alg, Next, M1, -56, R2, V2, 56)
+    end.
+
+uniform_real_s(Alg, _Next, M0, -1064, R1, V1, Bits) -> % 19*56
+    %% This is a very theoretical bottom case.
+    %% The odds of getting here is about 2^-1008,
+    %% through a white box test case, or thanks to
+    %% a malfunctioning PRNG producing 18 56-bit zeros in a row.
+    %%
+    %% Fill up to 53 bits, we have at most 52
+    B0 = (53 - ?BC(M0, 52)), % Missing bits
+    {(((M0 bsl B0) bor (V1 bsr (Bits - B0))) * math:pow(2.0, -1064 - B0)),
+     {Alg, R1}};
+uniform_real_s(Alg, Next, M0, BitNo, R1, V1, Bits) ->
+    if
+        %% Optimize the most probable.
+        %% Fill up to 53 bits.
+        ?BIT(51) =< M0 ->
+            %% We have 52 bits in M0 - need 1
+            {(((M0 bsl 1) bor (V1 bsr (Bits - 1)))
+              * math:pow(2.0, BitNo - 1)),
+             {Alg, R1}};
+        ?BIT(50) =< M0 ->
+            %% We have 51 bits in M0 - need 2
+            {(((M0 bsl 2) bor (V1 bsr (Bits - 2)))
+              * math:pow(2.0, BitNo - 2)),
+             {Alg, R1}};
+        ?BIT(49) =< M0 ->
+            %% We have 50 bits in M0 - need 3
+            {(((M0 bsl 3) bor (V1 bsr (Bits - 3)))
+              * math:pow(2.0, BitNo - 3)),
+             {Alg, R1}};
+        M0 == 0 ->
+            M1 = V1 bsr (Bits - 56),
+            if
+                ?BIT(55) =< M1 ->
+                    %% We have 56 bits - waste 3
+                    {(M1 bsr 3) * math:pow(2.0, BitNo - 53), {Alg, R1}};
+                ?BIT(54) =< M1 ->
+                    %% We have 55 bits - waste 2
+                    {(M1 bsr 2) * math:pow(2.0, BitNo - 54), {Alg, R1}};
+                ?BIT(53) =< M1 ->
+                    %% We have 54 bits - waste 1
+                    {(M1 bsr 1) * math:pow(2.0, BitNo - 55), {Alg, R1}};
+                ?BIT(52) =< M1 ->
+                    %% We have 53 bits - use all
+                    {M1 * math:pow(2.0, BitNo - 56), {Alg, R1}};
+                BitNo =:= -1008 ->
+                    %% Endgame
+                    %% For the last round we can not have 14 zeros or more
+                    %% at the top of M1 because then we will underflow,
+                    %% so we need at least 43 bits
+                    if
+                        ?BIT(42) =< M1 ->
+                            %% We have 43 bits - get the last bits
+                            uniform_real_s(Alg, Next, M1, BitNo - 56, R1);
+                        true ->
+                            %% Would underflow 2^-1022 - start all over
+                            %%
+                            %% We could just crash here since the odds for
+                            %% the PRNG being broken is much higher than
+                            %% for a good PRNG generating this many zeros
+                            %% in a row.  Maybe we should write an error
+                            %% report or call this a system limit...?
+                            uniform_real_s({Alg, R1})
+                    end;
+                true ->
+                    %% Need more bits
+                    uniform_real_s(Alg, Next, M1, BitNo - 56, R1)
+            end;
+        true ->
+            %% Fill up to 53 bits
+            B0 = 53 - ?BC(M0, 49), % Number of bits we need to append
+            {(((M0 bsl B0) bor (V1 bsr (Bits - B0)))
+              * math:pow(2.0, BitNo - B0)),
+             {Alg, R1}}
+    end.
+%%
+uniform_real_s(#{bits:=Bits} = Alg, Next, M0, BitNo, R0) ->
+    {V1, R1} = Next(R0),
+    uniform_real_s(Alg, Next, M0, BitNo, R1, V1, Bits);
+uniform_real_s(#{max:=_} = Alg, Next, M0, BitNo, R0) ->
+    {V1, R1} = Next(R0),
+    uniform_real_s(Alg, Next, M0, BitNo, R1, ?MASK(56, V1), 56).
+
 %% jump/1: given a state, jump/1
 %% returns a new state which is equivalent to that
 %% after a large number of call defined for each algorithm.
@@ -1025,3 +1219,42 @@ normal_fi(Indx) ->
 	     1.0214971439701471e-02,8.6165827693987316e-03,7.0508754713732268e-03,
 	     5.5224032992509968e-03,4.0379725933630305e-03,2.6090727461021627e-03,
 	     1.2602859304985975e-03}).
+
+%%%bitcount64(0) -> 0;
+%%%bitcount64(V) -> 1 + bitcount(V, 64).
+%%%
+%%%-define(
+%%%   BITCOUNT(V, N),
+%%%   bitcount(V, N) ->
+%%%       if
+%%%           (1 bsl ((N) bsr 1)) =< (V) ->
+%%%               ((N) bsr 1) + bitcount((V) bsr ((N) bsr 1), ((N) bsr 1));
+%%%           true ->
+%%%               bitcount((V), ((N) bsr 1))
+%%%       end).
+%%%?BITCOUNT(V, 64);
+%%%?BITCOUNT(V, 32);
+%%%?BITCOUNT(V, 16);
+%%%?BITCOUNT(V, 8);
+%%%?BITCOUNT(V, 4);
+%%%?BITCOUNT(V, 2);
+%%%bitcount(_, 1) -> 0.
+
+bc64(V) -> ?BC(V, 64).
+
+%% Linear from high bit - higher probability first gives faster execution
+bc(V, B, N) when B =< V -> N;
+bc(V, B, N) -> bc(V, B bsr 1, N - 1).
+    
+make_float(S, E, M) ->
+    <<F/float>> = <<S:1, E:11, M:52>>,
+    F.
+
+float2str(N) ->
+    <<S:1, E:11, M:52>> = <<(float(N))/float>>,
+    lists:flatten(
+      io_lib:format(
+      "~c~c.~13.16.0bE~b",
+      [case S of 1 -> $-; 0 -> $+ end,
+       case E of 0 -> $0; _ -> $1 end,
+       M, E - 16#3ff])).
diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl
index f69d42551e..0c7d8d842e 100644
--- a/lib/stdlib/test/rand_SUITE.erl
+++ b/lib/stdlib/test/rand_SUITE.erl
@@ -29,11 +29,14 @@
 	 basic_stats_uniform_1/1, basic_stats_uniform_2/1,
 	 basic_stats_standard_normal/1,
 	 basic_stats_normal/1,
+         uniform_real_conv/1,
 	 plugin/1, measure/1,
 	 reference_jump_state/1, reference_jump_procdict/1]).
 
 -export([test/0, gen/1]).
 
+-export([uniform_real_gen/1, uniform_gen/2]).
+
 -include_lib("common_test/include/ct.hrl").
 
 -define(LOOP, 1000000).
@@ -46,7 +49,7 @@ all() ->
     [seed, interval_int, interval_float,
      api_eq,
      reference,
-     {group, basic_stats},
+     {group, basic_stats}, uniform_real_conv,
      plugin, measure,
      {group, reference_jump}
     ].
@@ -101,7 +104,7 @@ seed_1(Alg) ->
     _ = rand:uniform(),
     S00 = get(rand_seed),
     erase(),
-    _ = rand:uniform(),
+    _ = rand:uniform_real(),
     false = S00 =:= get(rand_seed), %% hopefully
 
     %% Choosing algo and seed
@@ -228,11 +231,13 @@ interval_float(Config) when is_list(Config) ->
 interval_float_1(0) -> ok;
 interval_float_1(N) ->
     X = rand:uniform(),
+    Y = rand:uniform_real(),
     if
-	0.0 =< X, X < 1.0 ->
+	0.0 =< X, X < 1.0, 0.0 < Y, Y < 1.0 ->
 	    ok;
 	true ->
-	    io:format("X=~p 0=<~p<1.0~n", [X,X]),
+	    io:format("X=~p 0.0=<~p<1.0~n", [X,X]),
+	    io:format("Y=~p 0.0<~p<1.0~n", [Y,Y]),
 	    exit({X, rand:export_seed()})
     end,
     interval_float_1(N-1).
@@ -334,7 +339,13 @@ basic_stats_normal(Config) when is_list(Config) ->
       IntendedMeanVariancePairs).
 
 basic_uniform_1(N, S0, Sum, A0) when N > 0 ->
-    {X,S} = rand:uniform_s(S0),
+    {X,S} =
+        case N band 1 of
+            0 ->
+                rand:uniform_s(S0);
+            1 ->
+                rand:uniform_real_s(S0)
+        end,
     I = trunc(X*100),
     A = array:set(I, 1+array:get(I,A0), A0),
     basic_uniform_1(N-1, S, Sum+X, A);
@@ -399,6 +410,137 @@ normal_s(Mean, Variance, State0) when Mean == 0, Variance == 1 ->
 normal_s(Mean, Variance, State0) ->
     rand:normal_s(Mean, Variance, State0).
 
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% White box test of the conversion to float
+
+uniform_real_conv(Config) when is_list(Config) ->
+    [begin
+%%         ct:pal("~13.16.0bx~3.16.0b: ~p~n", [M,E,Gen]),
+         uniform_real_conv_check(M, E, Gen)
+     end || {M, E, Gen} <- uniform_real_conv_data()],
+    uniform_real_scan(0),
+    uniform_real_scan(3).
+
+uniform_real_conv_data() ->
+    [{16#fffffffffffff,  -1, [16#3ffffffffffffff]},
+     {16#fffffffffffff,  -1, [16#3ffffffffffffe0]},
+     {16#ffffffffffffe,  -1, [16#3ffffffffffffdf]},
+     %%
+     {16#0000000000000,  -1, [16#200000000000000]},
+     {16#fffffffffffff,  -2, [16#1ffffffffffffff]},
+     {16#fffffffffffff,  -2, [16#1fffffffffffff0]},
+     {16#ffffffffffffe,  -2, [16#1ffffffffffffef]},
+     %%
+     {16#0000000000000,  -2, [16#100000000000000]},
+     {16#fffffffffffff,  -3, [16#0ffffffffffffff]},
+     {16#fffffffffffff,  -3, [16#0fffffffffffff8]},
+     {16#ffffffffffffe,  -3, [16#0fffffffffffff7]},
+     %%
+     {16#0000000000000,  -3, [16#080000000000000]},
+     {16#fffffffffffff,  -4, [16#07fffffffffffff]},
+     {16#fffffffffffff,  -4, [16#07ffffffffffffc]},
+     {16#ffffffffffffe,  -4, [16#07ffffffffffffb]},
+     %%
+     {16#0000000000000,  -4, [16#040000000000000]},
+     {16#fffffffffffff,  -5, [16#03fffffffffffff,16#3ffffffffffffff]},
+     {16#fffffffffffff,  -5, [16#03ffffffffffffe,16#200000000000000]},
+     {16#ffffffffffffe,  -5, [16#03fffffffffffff,16#1ffffffffffffff]},
+     {16#ffffffffffffe,  -5, [16#03fffffffffffff,16#100000000000000]},
+     %%
+     {16#0000000000001, -56, [16#000000000000007,16#00000000000007f]},
+     {16#0000000000001, -56, [16#000000000000004,16#000000000000040]},
+     {16#0000000000000, -57, [16#000000000000003,16#20000000000001f]},
+     {16#0000000000000, -57, [16#000000000000000,16#200000000000000]},
+     {16#fffffffffffff, -58, [16#000000000000003,16#1ffffffffffffff]},
+     {16#fffffffffffff, -58, [16#000000000000000,16#1fffffffffffff0]},
+     {16#ffffffffffffe, -58, [16#000000000000000,16#1ffffffffffffef]},
+     {16#ffffffffffffe, -58, [16#000000000000000,16#1ffffffffffffe0]},
+     %%
+     {16#0000000000000, -58, [16#000000000000000,16#10000000000000f]},
+     {16#0000000000000, -58, [16#000000000000000,16#100000000000000]},
+     {2#11001100000000000000000000000000000000000011000000011, % 53 bits
+      -1022,
+      [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, % 18 zeros
+       2#1100110000000000000000000000000000000000001 bsl 2, % 43 bits
+       2#1000000011 bsl (56-10+2)]}, % 10 bits
+     {0, -1, % 0.5 after retry
+      [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, % 18 zeros
+       2#111111111111111111111111111111111111111111 bsl 2, % 42 bits - retry
+       16#200000000000003]}]. % 0.5
+
+-define(UNIFORM_REAL_SCAN_PATTERN, (16#19000000000009)). % 53 bits
+-define(UNIFORM_REAL_SCAN_NUMBER, (1021)).
+
+uniform_real_scan_template(K) ->
+    <<0:?UNIFORM_REAL_SCAN_NUMBER,
+      ?UNIFORM_REAL_SCAN_PATTERN:53,K:2,0:1>>.
+
+uniform_real_scan(K) ->
+    Templ = uniform_real_scan_template(K),
+    N = ?UNIFORM_REAL_SCAN_NUMBER,
+    uniform_real_scan(Templ, N, K).
+
+uniform_real_scan(Templ, N, K) when 0 =< N ->
+    <<_:N/bits,T/bits>> = Templ,
+    Data = uniform_real_scan_data(T, K),
+    uniform_real_conv_check(
+      ?UNIFORM_REAL_SCAN_PATTERN, N - 1 - ?UNIFORM_REAL_SCAN_NUMBER, Data),
+    uniform_real_scan(Templ, N - 1, K);
+uniform_real_scan(_, _, _) ->
+    ok.
+
+uniform_real_scan_data(Templ, K) ->
+    case Templ of
+        <<X:56, T/bits>> ->
+            B = rand:bc64(X),
+            [(X bsl 2) bor K |
+             if
+                 53 =< B ->
+                     [];
+                 true ->
+                     uniform_real_scan_data(T, K)
+             end];
+        _ ->
+            <<X:56, _/bits>> = <<Templ/bits, 0:56>>,
+            [(X bsl 2) bor K]
+    end.
+
+uniform_real_conv_check(M, E, Gen) ->
+    <<F/float>> = <<0:1, (E + 16#3ff):11, M:52>>,
+    try uniform_real_gen(Gen) of
+        F -> F;
+        FF ->
+            ct:pal(
+              "~s =/= ~s: ~s~n",
+              [rand:float2str(FF), rand:float2str(F),
+               [["16#",integer_to_list(G,16),$\s]||G<-Gen]]),
+            ct:fail({neq, FF, F})
+    catch
+        Error:Reason ->
+            ct:pal(
+              "~w:~p ~s: ~s~n",
+              [Error, Reason, rand:float2str(F),
+               [["16#",integer_to_list(G,16),$\s]||G<-Gen]]),
+            ct:fail({Error, Reason, F, erlang:get_stacktrace()})
+    end.
+
+
+uniform_real_gen(Gen) ->
+    State = rand_state(Gen),
+    {F, {#{type := rand_SUITE_list},[]}} = rand:uniform_real_s(State),
+    F.
+
+uniform_gen(Range, Gen) ->
+    State = rand_state(Gen),
+    {N, {#{type := rand_SUITE_list},[]}} = rand:uniform_s(Range, State),
+    N.
+
+%% Loaded dice for white box tests
+rand_state(Gen) ->
+    {#{type => rand_SUITE_list, bits => 58, weak_low_bits => 1,
+       next => fun ([H|T]) -> {H, T} end},
+     Gen}.
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 %% Test that the user can write algorithms.
@@ -520,6 +662,21 @@ do_measure(_Config) ->
           end,
           Algs),
     %%
+    ct:pal("~nRNG uniform integer half range performance~n",[]),
+    _ =
+        measure_1(
+          fun (State) -> half_range(State) end,
+          fun (State, Range, Mod) ->
+                  measure_loop(
+                    fun (St0) ->
+                            ?CHECK_UNIFORM_RANGE(
+                               Mod:uniform_s(Range, St0), Range,
+                               X, St1)
+                    end,
+                    State)
+          end,
+          Algs),
+    %%
     ct:pal("~nRNG uniform integer half range + 1 performance~n",[]),
     _ =
         measure_1(
@@ -630,7 +787,8 @@ do_measure(_Config) ->
           Algs),
     %%
     ct:pal("~nRNG uniform float performance~n",[]),
-    _ = measure_1(
+    _ =
+        measure_1(
           fun (_) -> 0 end,
           fun (State, _, Mod) ->
                   measure_loop(
@@ -641,8 +799,22 @@ do_measure(_Config) ->
           end,
           Algs),
     %%
+    ct:pal("~nRNG uniform_real float performance~n",[]),
+    _ =
+        measure_1(
+          fun (_) -> 0 end,
+          fun (State, _, Mod) ->
+                  measure_loop(
+                    fun (St0) ->
+                            ?CHECK_UNIFORM(Mod:uniform_real_s(St0), X, St)
+                    end,
+                    State)
+          end,
+          Algs),
+    %%
     ct:pal("~nRNG normal float performance~n",[]),
-    _ = measure_1(
+    [TMarkNormalFloat|_] =
+        measure_1(
           fun (_) -> 0 end,
           fun (State, _, Mod) ->
                   measure_loop(
@@ -652,6 +824,30 @@ do_measure(_Config) ->
                     State)
           end,
           Algs),
+    %% Just for fun try an implementation of the Box-Muller
+    %% transformation for creating normal distribution floats
+    %% to compare with our Ziggurat implementation.
+    %% Generates two numbers per call that we add so they
+    %% will not be optimized away.  Hence the benchmark time
+    %% is twice what it should be.
+    TwoPi = 2 * math:pi(),
+    _ =
+        measure_1(
+          fun (_) -> 0 end,
+          fun (State, _, Mod) ->
+                  measure_loop(
+                    fun (State0) ->
+                            {U1, State1} = Mod:uniform_real_s(State0),
+                            {U2, State2} = Mod:uniform_s(State1),
+                            R = math:sqrt(-2.0 * math:log(U1)),
+                            T = TwoPi * U2,
+                            Z0 = R * math:cos(T),
+                            Z1 = R * math:sin(T),
+                            ?CHECK_NORMAL({Z0 + Z1, State2}, X, State3)
+                    end,
+                    State)
+          end,
+          exrop, TMarkNormalFloat),
     ok.
 
 measure_loop(Fun, State) ->
-- 
cgit v1.2.3