diff --git a/sequence.ml b/sequence.ml index 547a4e5..91512f6 100644 --- a/sequence.ml +++ b/sequence.ml @@ -698,6 +698,28 @@ let random_array a k = let random_list l = random_array (Array.of_list l) +(** {2 Sampling} *) + +(** See https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R *) +let sample n seq = + match head seq with + | None -> [||] + | Some x -> + let a = Array.make n x in + let i = ref (-1) in + let f x = + incr i ; + if !i < n then + a.(!i) <- x + else + let j = Random.int n in + if j <= n then a.(!i) <- x + else () + in + seq f ; + if !i < n then Array.sub a 0 !i + else a + (** {2 Infix functions} *) module Infix = struct diff --git a/sequence.mli b/sequence.mli index 4acfdd4..9ca0f7c 100644 --- a/sequence.mli +++ b/sequence.mli @@ -498,6 +498,15 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +(** {2 Sampling} *) + +val sample : int -> 'a t -> 'a array + (** [sample n seq] returns k samples of [seq], with uniform probability. + It will consume the sequence and use O(n) memory. + + It returns an array of size [min (length seq) n]. + @since NEXT_RELEASE *) + (** {2 Infix functions} *) module Infix : sig diff --git a/sequenceLabels.mli b/sequenceLabels.mli index 62894f4..6f5b25e 100644 --- a/sequenceLabels.mli +++ b/sequenceLabels.mli @@ -446,6 +446,15 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +(** {2 Sampling} *) + +val sample : n:int -> 'a t -> 'a array + (** [sample n seq] returns k samples of [seq], with uniform probability. + It will consume the sequence and use O(n) memory. + + It returns an array of size [min (length seq) n]. + @since NEXT_RELEASE *) + (** {2 Infix functions} *) module Infix : sig