From 08403bac9f9d4f56f4f9047be572de7958383519 Mon Sep 17 00:00:00 2001 From: octachron Date: Wed, 11 Nov 2015 23:36:55 +0200 Subject: [PATCH 1/7] CCRandom: Add sample_without_replacement --- src/core/CCRandom.ml | 14 ++++++++++++++ src/core/CCRandom.mli | 8 ++++++++ 2 files changed, 22 insertions(+) diff --git a/src/core/CCRandom.ml b/src/core/CCRandom.ml index cc387065..cf53c8eb 100644 --- a/src/core/CCRandom.ml +++ b/src/core/CCRandom.ml @@ -78,6 +78,20 @@ let replicate n g st = if n = 0 then acc else aux (g st :: acc) (n-1) in aux [] n +(* Sample without replacement using rejection sampling. *) +let sample_without_replacement (type elt) ?(compare=compare) k (rng:elt t) st= + let module S = Set.Make(struct type t=elt let compare = compare end) in + let rec aux s k = + if k <= 0 then + S.elements s + else + let x = rng st in + if S.mem x s then + aux s k + else + aux (S.add x s) (k-1) in + aux S.empty k + let list_seq l st = List.map (fun f -> f st) l exception SplitFail diff --git a/src/core/CCRandom.mli b/src/core/CCRandom.mli index e42e1f01..2c1ebcc7 100644 --- a/src/core/CCRandom.mli +++ b/src/core/CCRandom.mli @@ -76,6 +76,14 @@ val replicate : int -> 'a t -> 'a list t (** [replicate n g] makes a list of [n] elements which are all generated randomly using [g] *) +val sample_without_replacement: + ?compare:('a -> 'a -> int) -> int -> 'a t -> 'a list t +(** [sample_without_replacement n g] makes a list of [n] elements which are all + generated randomly using [g] with the added constraint that none of the generated + random values are equal + @since 0.15 + *) + val list_seq : 'a t list -> 'a list t (** Build random lists from lists of random generators @since 0.4 *) From 06f9ca3eee084125c96b15a0cc25c9cdb8ae6695 Mon Sep 17 00:00:00 2001 From: octachron Date: Wed, 11 Nov 2015 23:37:20 +0200 Subject: [PATCH 2/7] CCRandom: Make `split_list` uniform --- src/core/CCRandom.ml | 47 ++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/src/core/CCRandom.ml b/src/core/CCRandom.ml index cf53c8eb..709a3e2e 100644 --- a/src/core/CCRandom.ml +++ b/src/core/CCRandom.ml @@ -94,33 +94,34 @@ let sample_without_replacement (type elt) ?(compare=compare) k (rng:elt t) st= let list_seq l st = List.map (fun f -> f st) l -exception SplitFail - -let _split i st = - if i < 2 then raise SplitFail +let split i st = + if i < 2 then None else let j = 1 + Random.State.int st (i-1) in - (j, i-j) + Some (j, i-j) -let split i st = try Some (_split i st) with SplitFail -> None - -(* Partition of an int into [len] integers. We divide-and-conquer on - the expected length, until it reaches 1. *) -let split_list i ~len st = - let rec aux i ~len acc = - if i < len then raise SplitFail - else if len = 1 then i::acc - else - (* split somewhere in the middle *) - let len1, len2 = _split len st in - assert (len = len1+len2); - if i = len - then aux len1 ~len:len1 (aux len2 ~len:len2 acc) - else - let i1, i2 = _split (i-len) st in - aux (i1+len1) ~len:len1 (aux (i2+len2) ~len:len2 acc) +let _diff_list l = + let rec diff_list acc = function + | [a;b] -> Some ( (b - a)::acc ) + | a::( b::_ as r ) -> diff_list ( (b-a)::acc ) r + | [_] | [] -> None in - try Some (aux i ~len []) with SplitFail -> None + diff_list [] l + + +(* Partition of an int into [len] integers uniformly. + We first sample (len-1) points from the set {1,..i-1} without replacement. + We sort these points and add back 0 and i, we have thus + x_0 = 0 < x_1 < x_2 < ... < x_{len-1} < i = x_{len}. + If we define, y_k = x_{k+1} - x_{k} for k in 0..(len-1), then by construction + ∑_k y_k = ∑_k (x_{k+1} - x_k ) = x_{len} - x_0 = i. *) +let split_list i ~len st = + if i >= len then + let xs = sample_without_replacement (len-1) (int_range 1 @@ i-1) st in + let ordered_xs = List.sort compare (i::0::xs) in + _diff_list ordered_xs + else + None let retry ?(max=10) g st = let rec aux n = From 2bb64231098d61e8153321ef9ff8e89964ddd4d8 Mon Sep 17 00:00:00 2001 From: octachron Date: Wed, 11 Nov 2015 23:01:57 +0200 Subject: [PATCH 3/7] CCRandom: add an uniformity test --- src/core/CCRandom.ml | 28 ++++++++++++++++++++++++++++ src/core/CCRandom.mli | 8 ++++++++ 2 files changed, 36 insertions(+) diff --git a/src/core/CCRandom.ml b/src/core/CCRandom.ml index 709a3e2e..a5a37f5c 100644 --- a/src/core/CCRandom.ml +++ b/src/core/CCRandom.ml @@ -192,3 +192,31 @@ let (<*>) f g st = f st (g st) let __default_state = Random.State.make_self_init () let run ?(st=__default_state) g = g st + +let uniformity_test ?(size_hint=10) k rng st = + let histogram = Hashtbl.create size_hint in + let add x = let n = try Hashtbl.find histogram x with Not_found -> 0 in + Hashtbl.replace histogram x (n + 1) in + let () = + for _i = 0 to ( k - 1 ) do + add @@ rng st + done in + let cardinal = float_of_int @@ Hashtbl.length histogram in + let kf = float_of_int k in + (* average number of points assuming an uniform distribution *) + let average = kf /. cardinal in + (* The number of points is a sum of random variables with binomial distribution *) + let p = 1. /. cardinal in + (* The variance of a binomial distribution with average p is *) + let variance = p *. (1. -. p ) in + (* Central limit theorem: a confidence interval of 4σ provides a false positive rate + of 0.00634% *) + let confidence = 4. in + let std = confidence *. ( sqrt @@ kf *. variance ) in + let predicate _key n acc = + acc && abs_float (average -. float_of_int n) < std in + Hashtbl.fold predicate histogram true + +(*$T split_list + run ( uniformity_test 50_000 (split_list 10 ~len:3) ) +*) diff --git a/src/core/CCRandom.mli b/src/core/CCRandom.mli index 2c1ebcc7..be3dcc0a 100644 --- a/src/core/CCRandom.mli +++ b/src/core/CCRandom.mli @@ -153,3 +153,11 @@ val (<*>) : ('a -> 'b) t -> 'a t -> 'b t val run : ?st:state -> 'a t -> 'a (** Using a random state (possibly the one in argument) run a generator *) + +(** {6 Random generator testing } *) + +val uniformity_test : ?size_hint:int -> int -> 'a t -> bool t +(** [uniformity_test k rng] tests the uniformity of the random generator [rng] using + [k] samples. + @since 0.15 +*) From de2a51d62e51c255471d62707cf52fb88e32e8ec Mon Sep 17 00:00:00 2001 From: octachron Date: Thu, 12 Nov 2015 13:41:38 +0200 Subject: [PATCH 4/7] Fix a wasteful sort in sample_without_replacement --- src/core/CCRandom.ml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/core/CCRandom.ml b/src/core/CCRandom.ml index a5a37f5c..1a54e544 100644 --- a/src/core/CCRandom.ml +++ b/src/core/CCRandom.ml @@ -100,11 +100,11 @@ let split i st = let j = 1 + Random.State.int st (i-1) in Some (j, i-j) -let _diff_list l = +let _diff_list ~last l = let rec diff_list acc = function - | [a;b] -> Some ( (b - a)::acc ) + | [a] -> Some ( (last - a)::acc ) | a::( b::_ as r ) -> diff_list ( (b-a)::acc ) r - | [_] | [] -> None + | [] -> None in diff_list [] l @@ -118,8 +118,7 @@ let _diff_list l = let split_list i ~len st = if i >= len then let xs = sample_without_replacement (len-1) (int_range 1 @@ i-1) st in - let ordered_xs = List.sort compare (i::0::xs) in - _diff_list ordered_xs + _diff_list ( 0::xs ) ~last:i else None From 0a662ef112b2408e0f73fffff2d62d76057cd065 Mon Sep 17 00:00:00 2001 From: octachron Date: Thu, 12 Nov 2015 13:45:57 +0200 Subject: [PATCH 5/7] Exclude uniformity test from the documentation --- src/core/CCRandom.mli | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/CCRandom.mli b/src/core/CCRandom.mli index be3dcc0a..c0b8c604 100644 --- a/src/core/CCRandom.mli +++ b/src/core/CCRandom.mli @@ -154,7 +154,7 @@ val (<*>) : ('a -> 'b) t -> 'a t -> 'b t val run : ?st:state -> 'a t -> 'a (** Using a random state (possibly the one in argument) run a generator *) -(** {6 Random generator testing } *) +(**/**) val uniformity_test : ?size_hint:int -> int -> 'a t -> bool t (** [uniformity_test k rng] tests the uniformity of the random generator [rng] using From cffbc66e6da7a2b33154c4a35c782b98f991f786 Mon Sep 17 00:00:00 2001 From: octachron Date: Fri, 13 Nov 2015 16:35:08 +0200 Subject: [PATCH 6/7] Add octachron as author --- AUTHORS.adoc | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.adoc b/AUTHORS.adoc index c4772003..b1ee4699 100644 --- a/AUTHORS.adoc +++ b/AUTHORS.adoc @@ -12,3 +12,4 @@ - Emmanuel Surleau (emm) - Guillaume Bury (guigui) - JP Rodi +- octachron (Florian Angeletti) From 3b94aa8a2cc2232a31e692167daade3c728def93 Mon Sep 17 00:00:00 2001 From: octachron Date: Fri, 13 Nov 2015 16:38:05 +0200 Subject: [PATCH 7/7] Fix @since tags in CCRandom.mli --- src/core/CCRandom.mli | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/CCRandom.mli b/src/core/CCRandom.mli index c0b8c604..1909d483 100644 --- a/src/core/CCRandom.mli +++ b/src/core/CCRandom.mli @@ -81,7 +81,7 @@ val sample_without_replacement: (** [sample_without_replacement n g] makes a list of [n] elements which are all generated randomly using [g] with the added constraint that none of the generated random values are equal - @since 0.15 + @since NEXT_RELEASE *) val list_seq : 'a t list -> 'a list t @@ -159,5 +159,5 @@ val run : ?st:state -> 'a t -> 'a val uniformity_test : ?size_hint:int -> int -> 'a t -> bool t (** [uniformity_test k rng] tests the uniformity of the random generator [rng] using [k] samples. - @since 0.15 + @since NEXT_RELEASE *)