From 15a942b0f2c68ce8a0398db37adb020fd4063b58 Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Sun, 22 Oct 2017 21:01:06 +0200 Subject: Test normal distribution --- lib/stdlib/test/rand_SUITE.erl | 205 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 204 insertions(+), 1 deletion(-) (limited to 'lib/stdlib/test') diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl index ef4f9faad9..dbf1857a25 100644 --- a/lib/stdlib/test/rand_SUITE.erl +++ b/lib/stdlib/test/rand_SUITE.erl @@ -29,6 +29,9 @@ basic_stats_uniform_1/1, basic_stats_uniform_2/1, basic_stats_standard_normal/1, basic_stats_normal/1, + stats_standard_normal_box_muller/1, + stats_standard_normal_box_muller_2/1, + stats_standard_normal/1, uniform_real_conv/1, plugin/1, measure/1, reference_jump_state/1, reference_jump_procdict/1]). @@ -57,7 +60,10 @@ all() -> groups() -> [{basic_stats, [parallel], [basic_stats_uniform_1, basic_stats_uniform_2, - basic_stats_standard_normal]}, + basic_stats_standard_normal, + stats_standard_normal_box_muller, + stats_standard_normal_box_muller_2, + stats_standard_normal]}, {reference_jump, [parallel], [reference_jump_state, reference_jump_procdict]}]. @@ -410,6 +416,203 @@ normal_s(Mean, Variance, State0) when Mean == 0, Variance == 1 -> normal_s(Mean, Variance, State0) -> rand:normal_s(Mean, Variance, State0). + + +-dialyzer({no_improper_lists, stats_standard_normal_box_muller/1}). +stats_standard_normal_box_muller(Config) when is_list(Config) -> + try math:erfc(1.0) of + _ -> + TwoPi = 2.0 * math:pi(), + NormalS = + fun + ([S0]) -> + {U1, S1} = rand:uniform_real_s(S0), + R = math:sqrt(-2.0 * math:log(U1)), + {U2, S2} = rand:uniform_s(S1), + T = TwoPi * U2, + Z0 = R * math:cos(T), + Z1 = R * math:sin(T), + {Z0, [S2|Z1]}; + ([S|Z]) -> + {Z, [S]} + end, + State = [rand:seed(exrop)], + stats_standard_normal(NormalS, State) + catch error:_ -> + {skip, "math:erfc/1 not supported"} + end. + +-dialyzer({no_improper_lists, stats_standard_normal_box_muller_2/1}). +stats_standard_normal_box_muller_2(Config) when is_list(Config) -> + try math:erfc(1.0) of + _ -> + TwoPi = 2.0 * math:pi(), + NormalS = + fun + ([S0]) -> + {U0, S1} = rand:uniform_s(S0), + U1 = 1.0 - U0, + R = math:sqrt(-2.0 * math:log(U1)), + {U2, S2} = rand:uniform_s(S1), + T = TwoPi * U2, + Z0 = R * math:cos(T), + Z1 = R * math:sin(T), + {Z0, [S2|Z1]}; + ([S|Z]) -> + {Z, [S]} + end, + State = [rand:seed(exrop)], + stats_standard_normal(NormalS, State) + catch error:_ -> + {skip, "math:erfc/1 not supported"} + end. + + +stats_standard_normal(Config) when is_list(Config) -> + try math:erfc(1.0) of + _ -> + stats_standard_normal( + fun rand:normal_s/1, rand:seed_s(exrop)) + catch error:_ -> + {skip, "math:erfc/1 not supported"} + end. +%% +stats_standard_normal(Fun, S) -> +%%% +%%% ct config: +%%% {rand_SUITE, [{stats_standard_normal,[{seconds, 8}, {std_devs, 4.0}]}]}. +%%% + Seconds = ct:get_config({?MODULE, ?FUNCTION_NAME, seconds}, 8), + StdDevs = + ct:get_config( + {?MODULE, ?FUNCTION_NAME, std_devs}, + 4.0), % probability erfc(4/sqrt(2)) (1/15787) to fail a bucket +%%% + ct:timetrap({seconds, Seconds + 120}), + %% InvDelta and Buckets are chosen so that the probability to land + %% in the central bucket (closest to 0) is about the same as + %% the probability to land in the top catch-all-outside bucket. + %% + %% Rounds is calculated so the expected value for the central + %% bucket will be at least TargetHits. + %% + InvDelta = 512, + W = InvDelta * math:sqrt(2), + Buckets = 1619, % erf-1(erfc(1 / W)) * W + TargetHits = 1024, + InvP0 = floor(math:ceil(1 / (0.5 * math:erf(1 / W)))), + Rounds = 2 * TargetHits * InvP0, + Histogram = array:new({default, 0}), + StopTime = erlang:monotonic_time(second) + Seconds, + ct:pal( + "Running standard normal test against ~w std devs for ~w seconds...", + [StdDevs, Seconds]), + {PositiveHistogram, NegativeHistogram, Outlier, TotalRounds} = + stats_standard_normal( + InvDelta, Buckets, Histogram, Histogram, 0.0, + Fun, S, Rounds, StopTime, Rounds, 0), + Precision = math:sqrt(TotalRounds / InvP0) / StdDevs, + ct:pal( + "Total rounds: ~w, tolerance: 1/~.2f, outlier: ~.2f.", + [TotalRounds, Precision, Outlier]), + ResultingTargetHits = floor(TotalRounds / 2 / InvP0), + {TotalRounds, [], []} = + {TotalRounds, + check_histogram( + W, Buckets, ResultingTargetHits, TotalRounds, + StdDevs, PositiveHistogram), + check_histogram( + W, Buckets, ResultingTargetHits, TotalRounds, + StdDevs, NegativeHistogram)}, + ok. +%% +stats_standard_normal( + InvDelta, Buckets, PositiveHistogram, NegativeHistogram, Outlier, + Fun, S, 0, StopTime, Rounds, TotalRounds) -> + case erlang:monotonic_time(second) of + Now when Now < StopTime -> + stats_standard_normal( + InvDelta, Buckets, + PositiveHistogram, NegativeHistogram, Outlier, + Fun, S, Rounds, StopTime, Rounds, TotalRounds + Rounds); + _ -> + {PositiveHistogram, NegativeHistogram, + Outlier, TotalRounds + Rounds} + end; +stats_standard_normal( + InvDelta, Buckets, PositiveHistogram, NegativeHistogram, Outlier, + Fun, S, Count, StopTime, Rounds, TotalRounds) -> + case Fun(S) of + {X, NewS} when 0.0 =< X -> + Bucket = min(Buckets, floor(X * InvDelta)), + stats_standard_normal( + InvDelta, Buckets, + increment_bucket(Bucket, PositiveHistogram), + NegativeHistogram, max(Outlier, X), + Fun, NewS, Count - 1, StopTime, Rounds, TotalRounds); + {MinusX, NewS} -> + X = -MinusX, + Bucket = min(Buckets, floor(X * InvDelta)), + stats_standard_normal( + InvDelta, Buckets, + PositiveHistogram, + increment_bucket(Bucket, NegativeHistogram), max(Outlier, X), + Fun, NewS, Count - 1, StopTime, Rounds, TotalRounds) + end. + +increment_bucket(Bucket, Array) -> + array:set(Bucket, array:get(Bucket, Array) + 1, Array). + +check_histogram(W, Buckets, TargetHits, Rounds, StdDevs, Histogram) -> + %%PrevBucket = 512, + %%Bucket = PrevBucket - 1, + %%P = 0.5 * math:erfc(PrevBucket / W), + PrevBucket = Buckets, + Bucket = PrevBucket, + P = 0.0, + N = 0, + check_histogram( + W, Bucket, TargetHits, Rounds, StdDevs, Histogram, + PrevBucket, P, N). +%% +check_histogram( + _W, _Bucket, _TargetHits, _Rounds, _StdDevs, _Histogram, + 0, _PrevP, _PrevN) -> + []; +check_histogram( + W, Bucket, TargetHits, Rounds, StdDevs, Histogram, + PrevBucket, PrevP, PrevN) -> + N = PrevN + array:get(Bucket, Histogram), + P = 0.5 * math:erfc(Bucket / W), + DP = P - PrevP, + Exp = DP * Rounds, + if + TargetHits =< Exp -> + %% Check N + Err = N - Exp, + AbsErr = abs(Err), + Var = Rounds * DP*(1.0 - DP), + Limit = StdDevs * math:sqrt(Var), + if + Limit =< AbsErr -> + [#{bucket => {Bucket, PrevBucket}, + n => N, exp => Exp, err => Err, limit => Limit} | + check_histogram( + W, Bucket - 1, TargetHits, Rounds, StdDevs, Histogram, + Bucket, P, 0)]; + %% ct:fail( + %% {bucket, Bucket, PrevBucket, N, Exp, Err, Limit}); + true -> + check_histogram( + W, Bucket - 1, TargetHits, Rounds, StdDevs, Histogram, + Bucket, P, 0) + end; + true -> + check_histogram( + W, Bucket - 1, TargetHits, Rounds, StdDevs, Histogram, + PrevBucket, PrevP, N) + end. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% White box test of the conversion to float -- cgit v1.2.3 From 84e3ce67e27ea9da607402a030f9e757854f3b76 Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Fri, 27 Oct 2017 15:42:28 +0200 Subject: Improve check on normal distribution tail --- lib/stdlib/test/rand_SUITE.erl | 99 ++++++++++++++++++++++-------------------- 1 file changed, 51 insertions(+), 48 deletions(-) (limited to 'lib/stdlib/test') diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl index dbf1857a25..e256d31112 100644 --- a/lib/stdlib/test/rand_SUITE.erl +++ b/lib/stdlib/test/rand_SUITE.erl @@ -489,19 +489,21 @@ stats_standard_normal(Fun, S) -> 4.0), % probability erfc(4/sqrt(2)) (1/15787) to fail a bucket %%% ct:timetrap({seconds, Seconds + 120}), - %% InvDelta and Buckets are chosen so that the probability to land - %% in the central bucket (closest to 0) is about the same as - %% the probability to land in the top catch-all-outside bucket. + %% Buckets is chosen to get a range where the the probability to land + %% in the top catch-all bucket is not vanishingly low, but with + %% these values it is about 1/25 of the probability for the low bucket + %% (closest to 0). %% - %% Rounds is calculated so the expected value for the central + %% Rounds is calculated so the expected value for the low %% bucket will be at least TargetHits. %% InvDelta = 512, - W = InvDelta * math:sqrt(2), - Buckets = 1619, % erf-1(erfc(1 / W)) * W + Buckets = 4 * InvDelta, % 4 std devs range TargetHits = 1024, - InvP0 = floor(math:ceil(1 / (0.5 * math:erf(1 / W)))), - Rounds = 2 * TargetHits * InvP0, + Sqrt2 = math:sqrt(2.0), + W = InvDelta * Sqrt2, + P0 = math:erf(1 / W), + Rounds = TargetHits * ceil(1.0 / P0), Histogram = array:new({default, 0}), StopTime = erlang:monotonic_time(second) + Seconds, ct:pal( @@ -511,20 +513,25 @@ stats_standard_normal(Fun, S) -> stats_standard_normal( InvDelta, Buckets, Histogram, Histogram, 0.0, Fun, S, Rounds, StopTime, Rounds, 0), - Precision = math:sqrt(TotalRounds / InvP0) / StdDevs, + Precision = math:sqrt(TotalRounds * P0) / StdDevs, + TopP = math:erfc(Buckets / W), + TopPrecision = math:sqrt(TotalRounds * TopP) / StdDevs, + OutlierProbability = math:erfc(Outlier / Sqrt2) * TotalRounds, + InvOP = 1.0 / OutlierProbability, ct:pal( - "Total rounds: ~w, tolerance: 1/~.2f, outlier: ~.2f.", - [TotalRounds, Precision, Outlier]), - ResultingTargetHits = floor(TotalRounds / 2 / InvP0), + "Total rounds: ~w, tolerance: 1/~.2f..1/~.2f, " + "outlier: ~.2f, probability 1/~.2f.", + [TotalRounds, Precision, TopPrecision, Outlier, InvOP]), {TotalRounds, [], []} = {TotalRounds, check_histogram( - W, Buckets, ResultingTargetHits, TotalRounds, - StdDevs, PositiveHistogram), + W, TotalRounds, StdDevs, PositiveHistogram, Buckets), check_histogram( - W, Buckets, ResultingTargetHits, TotalRounds, - StdDevs, NegativeHistogram)}, - ok. + W, TotalRounds, StdDevs, NegativeHistogram, Buckets)}, + %% If the probability for getting this Outlier is lower than 1/20, + %% then this is fishy! + true = (1/20 =< OutlierProbability), + {comment, {tp, TopPrecision, op, InvOP}}. %% stats_standard_normal( InvDelta, Buckets, PositiveHistogram, NegativeHistogram, Outlier, @@ -563,54 +570,50 @@ stats_standard_normal( increment_bucket(Bucket, Array) -> array:set(Bucket, array:get(Bucket, Array) + 1, Array). -check_histogram(W, Buckets, TargetHits, Rounds, StdDevs, Histogram) -> +check_histogram(W, Rounds, StdDevs, Histogram, Buckets) -> %%PrevBucket = 512, %%Bucket = PrevBucket - 1, %%P = 0.5 * math:erfc(PrevBucket / W), - PrevBucket = Buckets, - Bucket = PrevBucket, + TargetP = 0.5 * math:erfc(Buckets / W), P = 0.0, N = 0, check_histogram( - W, Bucket, TargetHits, Rounds, StdDevs, Histogram, - PrevBucket, P, N). + W, Rounds, StdDevs, Histogram, TargetP, + Buckets, Buckets, P, N). %% check_histogram( - _W, _Bucket, _TargetHits, _Rounds, _StdDevs, _Histogram, - 0, _PrevP, _PrevN) -> + _W, _Rounds, _StdDevs, _Histogram, _TargetP, + 0, _PrevBucket, _PrevP, _PrevN) -> []; check_histogram( - W, Bucket, TargetHits, Rounds, StdDevs, Histogram, - PrevBucket, PrevP, PrevN) -> + W, Rounds, StdDevs, Histogram, TargetP, + Bucket, PrevBucket, PrevP, PrevN) -> N = PrevN + array:get(Bucket, Histogram), P = 0.5 * math:erfc(Bucket / W), - DP = P - PrevP, - Exp = DP * Rounds, + BucketP = P - PrevP, if - TargetHits =< Exp -> - %% Check N - Err = N - Exp, - AbsErr = abs(Err), - Var = Rounds * DP*(1.0 - DP), - Limit = StdDevs * math:sqrt(Var), + TargetP =< BucketP -> + check_histogram( + W, Rounds, StdDevs, Histogram, TargetP, + Bucket - 1, PrevBucket, PrevP, N); + true -> + Exp = BucketP * Rounds, + Var = Rounds * BucketP*(1.0 - BucketP), + Threshold = StdDevs * math:sqrt(Var), + LowerLimit = floor(Exp - Threshold), + UpperLimit = ceil(Exp + Threshold), if - Limit =< AbsErr -> - [#{bucket => {Bucket, PrevBucket}, - n => N, exp => Exp, err => Err, limit => Limit} | + N < LowerLimit; UpperLimit < N -> + [#{bucket => {Bucket, PrevBucket}, n => N, exp => Exp, + lower => LowerLimit, upper => UpperLimit} | check_histogram( - W, Bucket - 1, TargetHits, Rounds, StdDevs, Histogram, - Bucket, P, 0)]; - %% ct:fail( - %% {bucket, Bucket, PrevBucket, N, Exp, Err, Limit}); + W, Rounds, StdDevs, Histogram, TargetP, + Bucket - 1, Bucket, P, 0)]; true -> check_histogram( - W, Bucket - 1, TargetHits, Rounds, StdDevs, Histogram, - Bucket, P, 0) - end; - true -> - check_histogram( - W, Bucket - 1, TargetHits, Rounds, StdDevs, Histogram, - PrevBucket, PrevP, N) + W, Rounds, StdDevs, Histogram, TargetP, + Bucket - 1, Bucket, P, 0) + end end. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -- cgit v1.2.3 From 4604c73f8f5192b1eb33ca2b6eda7200c43a8a8d Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Wed, 1 Nov 2017 12:30:15 +0100 Subject: Tweak statistics limits --- lib/stdlib/test/rand_SUITE.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/stdlib/test') diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl index e256d31112..3d3241b33d 100644 --- a/lib/stdlib/test/rand_SUITE.erl +++ b/lib/stdlib/test/rand_SUITE.erl @@ -480,13 +480,13 @@ stats_standard_normal(Config) when is_list(Config) -> stats_standard_normal(Fun, S) -> %%% %%% ct config: -%%% {rand_SUITE, [{stats_standard_normal,[{seconds, 8}, {std_devs, 4.0}]}]}. +%%% {rand_SUITE, [{stats_standard_normal,[{seconds, 8}, {std_devs, 4.2}]}]}. %%% Seconds = ct:get_config({?MODULE, ?FUNCTION_NAME, seconds}, 8), StdDevs = ct:get_config( {?MODULE, ?FUNCTION_NAME, std_devs}, - 4.0), % probability erfc(4/sqrt(2)) (1/15787) to fail a bucket + 4.2), % probability erfc(4.2/sqrt(2)) (1/37465) to fail a bucket %%% ct:timetrap({seconds, Seconds + 120}), %% Buckets is chosen to get a range where the the probability to land @@ -528,9 +528,9 @@ stats_standard_normal(Fun, S) -> W, TotalRounds, StdDevs, PositiveHistogram, Buckets), check_histogram( W, TotalRounds, StdDevs, NegativeHistogram, Buckets)}, - %% If the probability for getting this Outlier is lower than 1/20, + %% If the probability for getting this Outlier is lower than 1/50, %% then this is fishy! - true = (1/20 =< OutlierProbability), + true = (1/50 =< OutlierProbability), {comment, {tp, TopPrecision, op, InvOP}}. %% stats_standard_normal( -- cgit v1.2.3