From cd32539fea17fc39b69dd9ce024c8744b7ac128f Mon Sep 17 00:00:00 2001 From: Drup Date: Sat, 16 Jan 2016 14:45:55 +0100 Subject: [PATCH 1/4] Add map_by_2. --- sequence.ml | 10 ++++++++++ sequence.mli | 5 +++++ sequenceLabels.mli | 5 +++++ 3 files changed, 20 insertions(+) diff --git a/sequence.ml b/sequence.ml index c118fb9..547a4e5 100644 --- a/sequence.ml +++ b/sequence.ml @@ -70,6 +70,16 @@ let mapi f seq k = let i = ref 0 in seq (fun x -> k (f !i x); incr i) +let map_by_2 f seq k = + let r = ref None in + let f y = match !r with + | None -> r := Some y + | Some x -> k (f x y) + in + seq f ; + match !r with + | None -> () | Some x -> k x + let filter p seq k = seq (fun x -> if p x then k x) let append s1 s2 k = s1 k; s2 k diff --git a/sequence.mli b/sequence.mli index 593f477..4acfdd4 100644 --- a/sequence.mli +++ b/sequence.mli @@ -109,6 +109,11 @@ val map : ('a -> 'b) -> 'a t -> 'b t val mapi : (int -> 'a -> 'b) -> 'a t -> 'b t (** Map objects, along with their index in the sequence *) +val map_by_2 : ('a -> 'a -> 'a) -> 'a t -> 'a t + (** Map objects two by two. lazily. + The last element is kept in the sequence if the count is odd. + @since NEXT_RELEASE *) + val for_all : ('a -> bool) -> 'a t -> bool (** Do all elements satisfy the predicate? *) diff --git a/sequenceLabels.mli b/sequenceLabels.mli index 08f9a71..62894f4 100644 --- a/sequenceLabels.mli +++ b/sequenceLabels.mli @@ -87,6 +87,11 @@ val map : f:('a -> 'b) -> 'a t -> 'b t val mapi : f:(int -> 'a -> 'b) -> 'a t -> 'b t (** Map objects, along with their index in the sequence *) +val map_by_2 : f:('a -> 'a -> 'a) -> 'a t -> 'a t + (** Map objects two by two. lazily. + The last element is kept in the sequence if the count is odd. + @since NEXT_RELEASE *) + val for_all : f:('a -> bool) -> 'a t -> bool (** Do all elements satisfy the predicate? *) From be57b29f8425dd4817d66f88bb2297414b1943f2 Mon Sep 17 00:00:00 2001 From: Drup Date: Sat, 16 Jan 2016 15:43:43 +0100 Subject: [PATCH 2/4] Add sample. --- sequence.ml | 22 ++++++++++++++++++++++ sequence.mli | 9 +++++++++ sequenceLabels.mli | 9 +++++++++ 3 files changed, 40 insertions(+) diff --git a/sequence.ml b/sequence.ml index 547a4e5..91512f6 100644 --- a/sequence.ml +++ b/sequence.ml @@ -698,6 +698,28 @@ let random_array a k = let random_list l = random_array (Array.of_list l) +(** {2 Sampling} *) + +(** See https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R *) +let sample n seq = + match head seq with + | None -> [||] + | Some x -> + let a = Array.make n x in + let i = ref (-1) in + let f x = + incr i ; + if !i < n then + a.(!i) <- x + else + let j = Random.int n in + if j <= n then a.(!i) <- x + else () + in + seq f ; + if !i < n then Array.sub a 0 !i + else a + (** {2 Infix functions} *) module Infix = struct diff --git a/sequence.mli b/sequence.mli index 4acfdd4..9ca0f7c 100644 --- a/sequence.mli +++ b/sequence.mli @@ -498,6 +498,15 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +(** {2 Sampling} *) + +val sample : int -> 'a t -> 'a array + (** [sample n seq] returns k samples of [seq], with uniform probability. + It will consume the sequence and use O(n) memory. + + It returns an array of size [min (length seq) n]. + @since NEXT_RELEASE *) + (** {2 Infix functions} *) module Infix : sig diff --git a/sequenceLabels.mli b/sequenceLabels.mli index 62894f4..6f5b25e 100644 --- a/sequenceLabels.mli +++ b/sequenceLabels.mli @@ -446,6 +446,15 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +(** {2 Sampling} *) + +val sample : n:int -> 'a t -> 'a array + (** [sample n seq] returns k samples of [seq], with uniform probability. + It will consume the sequence and use O(n) memory. + + It returns an array of size [min (length seq) n]. + @since NEXT_RELEASE *) + (** {2 Infix functions} *) module Infix : sig From 964b5c61bd7e0f03e4ae9f73e00e4fca2cdad292 Mon Sep 17 00:00:00 2001 From: Drup Date: Sat, 16 Jan 2016 16:30:22 +0100 Subject: [PATCH 3/4] Add shuffle_buffer. --- sequence.ml | 28 ++++++++++++++++++++++++++++ sequence.mli | 8 ++++++++ sequenceLabels.mli | 8 ++++++++ 3 files changed, 44 insertions(+) diff --git a/sequence.ml b/sequence.ml index 91512f6..b68b474 100644 --- a/sequence.ml +++ b/sequence.ml @@ -698,6 +698,34 @@ let random_array a k = let random_list l = random_array (Array.of_list l) +(* See http://en.wikipedia.org/wiki/Fisher-Yates_shuffle *) +let shuffle_array a = + for k = Array.length a - 1 downto 0+1 do + let l = Random.int (k+1) in + let tmp = a.(l) in + a.(l) <- a.(k); + a.(k) <- tmp; + done + +let shuffle_buffer n seq k = + let seq_front = take n seq in + let a = to_array seq_front in + let l = Array.length a in + if l < n then begin + shuffle_array a ; + of_array a k + end + else begin + let seq = drop n seq in + let f x = + let i = Random.int n in + let y = a.(i) in + a.(i) <- x ; + k y + in + seq f + end + (** {2 Sampling} *) (** See https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R *) diff --git a/sequence.mli b/sequence.mli index 9ca0f7c..1393490 100644 --- a/sequence.mli +++ b/sequence.mli @@ -498,6 +498,14 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +val shuffle_buffer : int -> 'a t -> 'a t +(** [shuffle_buffer n seq] returns a sequence of element of [seq] in random + order. The shuffling is *not* uniform. Uses O(n) memory. + + The first [n] elements of the sequence are consumed immediately. The + rest is consumed lazily. + @since NEXT_RELEASE *) + (** {2 Sampling} *) val sample : int -> 'a t -> 'a array diff --git a/sequenceLabels.mli b/sequenceLabels.mli index 6f5b25e..8ab7a59 100644 --- a/sequenceLabels.mli +++ b/sequenceLabels.mli @@ -446,6 +446,14 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +val shuffle_buffer : n:int -> 'a t -> 'a t +(** [shuffle_buffer n seq] returns a sequence of element of [seq] in random + order. The shuffling is not uniform. Uses O(n) memory. + + The first [n] elements of the sequence are consumed immediately. The + rest is consumed lazily. + @since NEXT_RELEASE *) + (** {2 Sampling} *) val sample : n:int -> 'a t -> 'a array From 9ae652b004d7f51cf15c13e41b99d453c85bc794 Mon Sep 17 00:00:00 2001 From: Drup Date: Sat, 16 Jan 2016 16:35:33 +0100 Subject: [PATCH 4/4] Add shuffle. --- sequence.ml | 5 +++++ sequence.mli | 5 +++++ sequenceLabels.mli | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/sequence.ml b/sequence.ml index b68b474..b1d9ad4 100644 --- a/sequence.ml +++ b/sequence.ml @@ -707,6 +707,11 @@ let shuffle_array a = a.(k) <- tmp; done +let shuffle seq = + let a = to_array seq in + shuffle_array a ; + of_array a + let shuffle_buffer n seq k = let seq_front = take n seq in let a = to_array seq_front in diff --git a/sequence.mli b/sequence.mli index 1393490..83f4753 100644 --- a/sequence.mli +++ b/sequence.mli @@ -498,6 +498,11 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +val shuffle : 'a t -> 'a t +(** [shuffle seq] returns a perfect shuffle of [seq]. + Uses O(length seq) memory and time. Eager. + @since NEXT_RELEASE *) + val shuffle_buffer : int -> 'a t -> 'a t (** [shuffle_buffer n seq] returns a sequence of element of [seq] in random order. The shuffling is *not* uniform. Uses O(n) memory. diff --git a/sequenceLabels.mli b/sequenceLabels.mli index 8ab7a59..b52ac74 100644 --- a/sequenceLabels.mli +++ b/sequenceLabels.mli @@ -446,6 +446,11 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +val shuffle : 'a t -> 'a t +(** [shuffle seq] returns a perfect shuffle of [seq]. + Uses O(length seq) memory and time. Eager. + @since NEXT_RELEASE *) + val shuffle_buffer : n:int -> 'a t -> 'a t (** [shuffle_buffer n seq] returns a sequence of element of [seq] in random order. The shuffling is not uniform. Uses O(n) memory.