Fix the reservoir sampling algorithm.

This commit is contained in:
Drup 2018-03-28 21:22:57 +02:00 committed by Simon Cruanes
parent 8ba5e09f8f
commit daa34efd62

View file

@ -1302,23 +1302,23 @@ let shuffle_buffer n seq k =
(** {2 Sampling} *) (** {2 Sampling} *)
(** See https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R *) (** See https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R *)
let sample n seq = let sample k seq =
match head seq with match head seq with
| None -> [||] | None -> [||]
| Some x -> | Some x ->
let a = Array.make n x in let a = Array.make k x in
let i = ref (-1) in let i = ref (-1) in
let f x = let f x =
incr i ; incr i ;
if !i < n then if !i < k then
a.(!i) <- x a.(!i) <- x
else else
let j = Random.int n in let j = Random.int (!i) in
if j <= n then a.(!i) <- x if j < k then a.(j) <- x
else () else ()
in in
seq f ; seq f ;
if !i < n then Array.sub a 0 !i if !i < k then Array.sub a 0 (!i + 1)
else a else a
(** {2 Infix functions} *) (** {2 Infix functions} *)