diff --git a/sequence.ml b/sequence.ml index c118fb9..b1d9ad4 100644 --- a/sequence.ml +++ b/sequence.ml @@ -70,6 +70,16 @@ let mapi f seq k = let i = ref 0 in seq (fun x -> k (f !i x); incr i) +let map_by_2 f seq k = + let r = ref None in + let f y = match !r with + | None -> r := Some y + | Some x -> k (f x y) + in + seq f ; + match !r with + | None -> () | Some x -> k x + let filter p seq k = seq (fun x -> if p x then k x) let append s1 s2 k = s1 k; s2 k @@ -688,6 +698,61 @@ let random_array a k = let random_list l = random_array (Array.of_list l) +(* See http://en.wikipedia.org/wiki/Fisher-Yates_shuffle *) +let shuffle_array a = + for k = Array.length a - 1 downto 0+1 do + let l = Random.int (k+1) in + let tmp = a.(l) in + a.(l) <- a.(k); + a.(k) <- tmp; + done + +let shuffle seq = + let a = to_array seq in + shuffle_array a ; + of_array a + +let shuffle_buffer n seq k = + let seq_front = take n seq in + let a = to_array seq_front in + let l = Array.length a in + if l < n then begin + shuffle_array a ; + of_array a k + end + else begin + let seq = drop n seq in + let f x = + let i = Random.int n in + let y = a.(i) in + a.(i) <- x ; + k y + in + seq f + end + +(** {2 Sampling} *) + +(** See https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R *) +let sample n seq = + match head seq with + | None -> [||] + | Some x -> + let a = Array.make n x in + let i = ref (-1) in + let f x = + incr i ; + if !i < n then + a.(!i) <- x + else + let j = Random.int n in + if j <= n then a.(!i) <- x + else () + in + seq f ; + if !i < n then Array.sub a 0 !i + else a + (** {2 Infix functions} *) module Infix = struct diff --git a/sequence.mli b/sequence.mli index 593f477..83f4753 100644 --- a/sequence.mli +++ b/sequence.mli @@ -109,6 +109,11 @@ val map : ('a -> 'b) -> 'a t -> 'b t val mapi : (int -> 'a -> 'b) -> 'a t -> 'b t (** Map objects, along with their index in the sequence *) +val map_by_2 : ('a -> 'a -> 'a) -> 'a t -> 'a t + (** Map objects two by two. lazily. + The last element is kept in the sequence if the count is odd. + @since NEXT_RELEASE *) + val for_all : ('a -> bool) -> 'a t -> bool (** Do all elements satisfy the predicate? *) @@ -493,6 +498,28 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +val shuffle : 'a t -> 'a t +(** [shuffle seq] returns a perfect shuffle of [seq]. + Uses O(length seq) memory and time. Eager. + @since NEXT_RELEASE *) + +val shuffle_buffer : int -> 'a t -> 'a t +(** [shuffle_buffer n seq] returns a sequence of element of [seq] in random + order. The shuffling is *not* uniform. Uses O(n) memory. + + The first [n] elements of the sequence are consumed immediately. The + rest is consumed lazily. + @since NEXT_RELEASE *) + +(** {2 Sampling} *) + +val sample : int -> 'a t -> 'a array + (** [sample n seq] returns k samples of [seq], with uniform probability. + It will consume the sequence and use O(n) memory. + + It returns an array of size [min (length seq) n]. + @since NEXT_RELEASE *) + (** {2 Infix functions} *) module Infix : sig diff --git a/sequenceLabels.mli b/sequenceLabels.mli index 08f9a71..b52ac74 100644 --- a/sequenceLabels.mli +++ b/sequenceLabels.mli @@ -87,6 +87,11 @@ val map : f:('a -> 'b) -> 'a t -> 'b t val mapi : f:(int -> 'a -> 'b) -> 'a t -> 'b t (** Map objects, along with their index in the sequence *) +val map_by_2 : f:('a -> 'a -> 'a) -> 'a t -> 'a t + (** Map objects two by two. lazily. + The last element is kept in the sequence if the count is odd. + @since NEXT_RELEASE *) + val for_all : f:('a -> bool) -> 'a t -> bool (** Do all elements satisfy the predicate? *) @@ -441,6 +446,28 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +val shuffle : 'a t -> 'a t +(** [shuffle seq] returns a perfect shuffle of [seq]. + Uses O(length seq) memory and time. Eager. + @since NEXT_RELEASE *) + +val shuffle_buffer : n:int -> 'a t -> 'a t +(** [shuffle_buffer n seq] returns a sequence of element of [seq] in random + order. The shuffling is not uniform. Uses O(n) memory. + + The first [n] elements of the sequence are consumed immediately. The + rest is consumed lazily. + @since NEXT_RELEASE *) + +(** {2 Sampling} *) + +val sample : n:int -> 'a t -> 'a array + (** [sample n seq] returns k samples of [seq], with uniform probability. + It will consume the sequence and use O(n) memory. + + It returns an array of size [min (length seq) n]. + @since NEXT_RELEASE *) + (** {2 Infix functions} *) module Infix : sig