From cd32539fea17fc39b69dd9ce024c8744b7ac128f Mon Sep 17 00:00:00 2001 From: Drup Date: Sat, 16 Jan 2016 14:45:55 +0100 Subject: [PATCH 1/9] Add map_by_2. --- sequence.ml | 10 ++++++++++ sequence.mli | 5 +++++ sequenceLabels.mli | 5 +++++ 3 files changed, 20 insertions(+) diff --git a/sequence.ml b/sequence.ml index c118fb9..547a4e5 100644 --- a/sequence.ml +++ b/sequence.ml @@ -70,6 +70,16 @@ let mapi f seq k = let i = ref 0 in seq (fun x -> k (f !i x); incr i) +let map_by_2 f seq k = + let r = ref None in + let f y = match !r with + | None -> r := Some y + | Some x -> k (f x y) + in + seq f ; + match !r with + | None -> () | Some x -> k x + let filter p seq k = seq (fun x -> if p x then k x) let append s1 s2 k = s1 k; s2 k diff --git a/sequence.mli b/sequence.mli index 593f477..4acfdd4 100644 --- a/sequence.mli +++ b/sequence.mli @@ -109,6 +109,11 @@ val map : ('a -> 'b) -> 'a t -> 'b t val mapi : (int -> 'a -> 'b) -> 'a t -> 'b t (** Map objects, along with their index in the sequence *) +val map_by_2 : ('a -> 'a -> 'a) -> 'a t -> 'a t + (** Map objects two by two. lazily. + The last element is kept in the sequence if the count is odd. + @since NEXT_RELEASE *) + val for_all : ('a -> bool) -> 'a t -> bool (** Do all elements satisfy the predicate? *) diff --git a/sequenceLabels.mli b/sequenceLabels.mli index 08f9a71..62894f4 100644 --- a/sequenceLabels.mli +++ b/sequenceLabels.mli @@ -87,6 +87,11 @@ val map : f:('a -> 'b) -> 'a t -> 'b t val mapi : f:(int -> 'a -> 'b) -> 'a t -> 'b t (** Map objects, along with their index in the sequence *) +val map_by_2 : f:('a -> 'a -> 'a) -> 'a t -> 'a t + (** Map objects two by two. lazily. + The last element is kept in the sequence if the count is odd. + @since NEXT_RELEASE *) + val for_all : f:('a -> bool) -> 'a t -> bool (** Do all elements satisfy the predicate? *) From be57b29f8425dd4817d66f88bb2297414b1943f2 Mon Sep 17 00:00:00 2001 From: Drup Date: Sat, 16 Jan 2016 15:43:43 +0100 Subject: [PATCH 2/9] Add sample. --- sequence.ml | 22 ++++++++++++++++++++++ sequence.mli | 9 +++++++++ sequenceLabels.mli | 9 +++++++++ 3 files changed, 40 insertions(+) diff --git a/sequence.ml b/sequence.ml index 547a4e5..91512f6 100644 --- a/sequence.ml +++ b/sequence.ml @@ -698,6 +698,28 @@ let random_array a k = let random_list l = random_array (Array.of_list l) +(** {2 Sampling} *) + +(** See https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R *) +let sample n seq = + match head seq with + | None -> [||] + | Some x -> + let a = Array.make n x in + let i = ref (-1) in + let f x = + incr i ; + if !i < n then + a.(!i) <- x + else + let j = Random.int n in + if j <= n then a.(!i) <- x + else () + in + seq f ; + if !i < n then Array.sub a 0 !i + else a + (** {2 Infix functions} *) module Infix = struct diff --git a/sequence.mli b/sequence.mli index 4acfdd4..9ca0f7c 100644 --- a/sequence.mli +++ b/sequence.mli @@ -498,6 +498,15 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +(** {2 Sampling} *) + +val sample : int -> 'a t -> 'a array + (** [sample n seq] returns k samples of [seq], with uniform probability. + It will consume the sequence and use O(n) memory. + + It returns an array of size [min (length seq) n]. + @since NEXT_RELEASE *) + (** {2 Infix functions} *) module Infix : sig diff --git a/sequenceLabels.mli b/sequenceLabels.mli index 62894f4..6f5b25e 100644 --- a/sequenceLabels.mli +++ b/sequenceLabels.mli @@ -446,6 +446,15 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +(** {2 Sampling} *) + +val sample : n:int -> 'a t -> 'a array + (** [sample n seq] returns k samples of [seq], with uniform probability. + It will consume the sequence and use O(n) memory. + + It returns an array of size [min (length seq) n]. + @since NEXT_RELEASE *) + (** {2 Infix functions} *) module Infix : sig From 964b5c61bd7e0f03e4ae9f73e00e4fca2cdad292 Mon Sep 17 00:00:00 2001 From: Drup Date: Sat, 16 Jan 2016 16:30:22 +0100 Subject: [PATCH 3/9] Add shuffle_buffer. --- sequence.ml | 28 ++++++++++++++++++++++++++++ sequence.mli | 8 ++++++++ sequenceLabels.mli | 8 ++++++++ 3 files changed, 44 insertions(+) diff --git a/sequence.ml b/sequence.ml index 91512f6..b68b474 100644 --- a/sequence.ml +++ b/sequence.ml @@ -698,6 +698,34 @@ let random_array a k = let random_list l = random_array (Array.of_list l) +(* See http://en.wikipedia.org/wiki/Fisher-Yates_shuffle *) +let shuffle_array a = + for k = Array.length a - 1 downto 0+1 do + let l = Random.int (k+1) in + let tmp = a.(l) in + a.(l) <- a.(k); + a.(k) <- tmp; + done + +let shuffle_buffer n seq k = + let seq_front = take n seq in + let a = to_array seq_front in + let l = Array.length a in + if l < n then begin + shuffle_array a ; + of_array a k + end + else begin + let seq = drop n seq in + let f x = + let i = Random.int n in + let y = a.(i) in + a.(i) <- x ; + k y + in + seq f + end + (** {2 Sampling} *) (** See https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R *) diff --git a/sequence.mli b/sequence.mli index 9ca0f7c..1393490 100644 --- a/sequence.mli +++ b/sequence.mli @@ -498,6 +498,14 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +val shuffle_buffer : int -> 'a t -> 'a t +(** [shuffle_buffer n seq] returns a sequence of element of [seq] in random + order. The shuffling is *not* uniform. Uses O(n) memory. + + The first [n] elements of the sequence are consumed immediately. The + rest is consumed lazily. + @since NEXT_RELEASE *) + (** {2 Sampling} *) val sample : int -> 'a t -> 'a array diff --git a/sequenceLabels.mli b/sequenceLabels.mli index 6f5b25e..8ab7a59 100644 --- a/sequenceLabels.mli +++ b/sequenceLabels.mli @@ -446,6 +446,14 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +val shuffle_buffer : n:int -> 'a t -> 'a t +(** [shuffle_buffer n seq] returns a sequence of element of [seq] in random + order. The shuffling is not uniform. Uses O(n) memory. + + The first [n] elements of the sequence are consumed immediately. The + rest is consumed lazily. + @since NEXT_RELEASE *) + (** {2 Sampling} *) val sample : n:int -> 'a t -> 'a array From 9ae652b004d7f51cf15c13e41b99d453c85bc794 Mon Sep 17 00:00:00 2001 From: Drup Date: Sat, 16 Jan 2016 16:35:33 +0100 Subject: [PATCH 4/9] Add shuffle. --- sequence.ml | 5 +++++ sequence.mli | 5 +++++ sequenceLabels.mli | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/sequence.ml b/sequence.ml index b68b474..b1d9ad4 100644 --- a/sequence.ml +++ b/sequence.ml @@ -707,6 +707,11 @@ let shuffle_array a = a.(k) <- tmp; done +let shuffle seq = + let a = to_array seq in + shuffle_array a ; + of_array a + let shuffle_buffer n seq k = let seq_front = take n seq in let a = to_array seq_front in diff --git a/sequence.mli b/sequence.mli index 1393490..83f4753 100644 --- a/sequence.mli +++ b/sequence.mli @@ -498,6 +498,11 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +val shuffle : 'a t -> 'a t +(** [shuffle seq] returns a perfect shuffle of [seq]. + Uses O(length seq) memory and time. Eager. + @since NEXT_RELEASE *) + val shuffle_buffer : int -> 'a t -> 'a t (** [shuffle_buffer n seq] returns a sequence of element of [seq] in random order. The shuffling is *not* uniform. Uses O(n) memory. diff --git a/sequenceLabels.mli b/sequenceLabels.mli index 8ab7a59..b52ac74 100644 --- a/sequenceLabels.mli +++ b/sequenceLabels.mli @@ -446,6 +446,11 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +val shuffle : 'a t -> 'a t +(** [shuffle seq] returns a perfect shuffle of [seq]. + Uses O(length seq) memory and time. Eager. + @since NEXT_RELEASE *) + val shuffle_buffer : n:int -> 'a t -> 'a t (** [shuffle_buffer n seq] returns a sequence of element of [seq] in random order. The shuffling is not uniform. Uses O(n) memory. From 243f81ae886c96d9f687a6a205d23d9aeb60f4bf Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 7 Mar 2016 14:18:25 +0100 Subject: [PATCH 5/9] update @since tags --- sequence.mli | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sequence.mli b/sequence.mli index 83f4753..0cf036e 100644 --- a/sequence.mli +++ b/sequence.mli @@ -204,13 +204,13 @@ val group : ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t val group_succ_by : ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t (** Group equal consecutive elements. Synonym to {!group}. - @since NEXT_RELEASE *) + @since 0.6 *) val group_by : ?hash:('a -> int) -> ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t (** Group equal elements, disregarding their order of appearance. The result sequence is traversable as many times as required. - @since NEXT_RELEASE *) + @since 0.6 *) val uniq : ?eq:('a -> 'a -> bool) -> 'a t -> 'a t (** Remove consecutive duplicate elements. Basically this is From 3cac9170457e5138b0249a1ec507e59d737ec416 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 7 Mar 2016 14:19:13 +0100 Subject: [PATCH 6/9] fix test --- tests/test_sequence.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_sequence.ml b/tests/test_sequence.ml index de1460b..0c0a23b 100644 --- a/tests/test_sequence.ml +++ b/tests/test_sequence.ml @@ -68,9 +68,9 @@ let test_concat2 () = |> S.length |> OUnit.assert_equal 2000 -let test_flatMap () = +let test_flat_map () = S.(1 -- 1000) - |> S.flatMap (fun i -> S.(i -- (i+1))) + |> S.flat_map (fun i -> S.(i -- (i+1))) |> S.length |> OUnit.assert_equal 2000 @@ -237,7 +237,7 @@ let suite = "test_exists" >:: test_exists; "test_length" >:: test_length; "test_concat" >:: test_concat; - "test_flatMap" >:: test_flatMap; + "test_flatMap" >:: test_flat_map; "test_intersperse" >:: test_intersperse; "test_not_persistent" >:: test_not_persistent; "test_persistent" >:: test_persistent; From 8a2c32d729f9c91d5c853746aacdc8c5ce949f62 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 7 Mar 2016 14:22:29 +0100 Subject: [PATCH 7/9] add missing entry in changelog --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a51e448..38bc087 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## 0.6 + +- deprecate `flatMap` and `fmap` +- in opam file, depend on ocamlbuild +- add `group_by` (ignores the ordering) +- alias `group_succ_by`, deprecated `group` +- iterate on booleans +- open Bigarray (preparing for 4.03) + ## 0.5.5 - new module `SequenceLabels` From c08dd142700b0021105ebf05cbd84eec72ced13c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 7 Mar 2016 14:26:40 +0100 Subject: [PATCH 8/9] missing since tags --- sequence.mli | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sequence.mli b/sequence.mli index 0cf036e..d20505a 100644 --- a/sequence.mli +++ b/sequence.mli @@ -151,7 +151,7 @@ val flatten : 'a t t -> 'a t (** Alias for {!concat} *) val flatMap : ('a -> 'b t) -> 'a t -> 'b t -(** @deprecated use {!flat_map} since NEXT_RELEASE *) +(** @deprecated use {!flat_map} since 0.6 *) val flat_map : ('a -> 'b t) -> 'a t -> 'b t (** Monadic bind. Intuitively, it applies the function to every @@ -159,7 +159,7 @@ val flat_map : ('a -> 'b t) -> 'a t -> 'b t @since 0.5 *) val fmap : ('a -> 'b option) -> 'a t -> 'b t -(** @deprecated use {!filter_map} since NEXT_RELEASE *) +(** @deprecated use {!filter_map} since 0.6 *) val filter_map : ('a -> 'b option) -> 'a t -> 'b t (** Map and only keep non-[None] elements @@ -199,7 +199,7 @@ val sort_uniq : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t val group : ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t (** Group equal consecutive elements. - @deprecated use {!group_succ_by} *) + @deprecated since 0.6 use {!group_succ_by} *) val group_succ_by : ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t (** Group equal consecutive elements. From 3abaae60667a2b04bd6d9e0adf2cd14a2e4b67d5 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 7 Mar 2016 14:28:32 +0100 Subject: [PATCH 9/9] prepare for 0.7 --- CHANGELOG.md | 8 ++++++++ _oasis | 2 +- opam | 2 ++ sequence.mli | 10 +++++----- sequenceLabels.mli | 8 ++++---- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 38bc087..3915aef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 0.7 + +- add missing entry in changelog and missing since annotations +- Add `shuffle`. +- Add `shuffle_buffer`. +- Add `sample`. +- Add `map_by_2`. + ## 0.6 - deprecate `flatMap` and `fmap` diff --git a/_oasis b/_oasis index c5f184f..10d80fa 100644 --- a/_oasis +++ b/_oasis @@ -1,6 +1,6 @@ OASISFormat: 0.4 Name: sequence -Version: 0.6 +Version: 0.7 Homepage: https://github.com/c-cube/sequence Authors: Simon Cruanes License: BSD-2-clause diff --git a/opam b/opam index 15aba0c..397ca53 100644 --- a/opam +++ b/opam @@ -1,4 +1,6 @@ opam-version: "1.2" +name: "sequence" +version: "0.7" author: "Simon Cruanes" maintainer: "simon.cruanes@inria.fr" build: [ diff --git a/sequence.mli b/sequence.mli index d20505a..c4e3a90 100644 --- a/sequence.mli +++ b/sequence.mli @@ -112,7 +112,7 @@ val mapi : (int -> 'a -> 'b) -> 'a t -> 'b t val map_by_2 : ('a -> 'a -> 'a) -> 'a t -> 'a t (** Map objects two by two. lazily. The last element is kept in the sequence if the count is odd. - @since NEXT_RELEASE *) + @since 0.7 *) val for_all : ('a -> bool) -> 'a t -> bool (** Do all elements satisfy the predicate? *) @@ -419,7 +419,7 @@ val int_range_dec : start:int -> stop:int -> int t val bools : bool t (** Iterates on [true] and [false] - @since NEXT_RELEASE *) + @since 0.7 *) val of_set : (module Set.S with type elt = 'a and type t = 'b) -> 'b -> 'a t (** Convert the given set to a sequence. The set module must be provided. *) @@ -501,7 +501,7 @@ val random_list : 'a list -> 'a t val shuffle : 'a t -> 'a t (** [shuffle seq] returns a perfect shuffle of [seq]. Uses O(length seq) memory and time. Eager. - @since NEXT_RELEASE *) + @since 0.7 *) val shuffle_buffer : int -> 'a t -> 'a t (** [shuffle_buffer n seq] returns a sequence of element of [seq] in random @@ -509,7 +509,7 @@ val shuffle_buffer : int -> 'a t -> 'a t The first [n] elements of the sequence are consumed immediately. The rest is consumed lazily. - @since NEXT_RELEASE *) + @since 0.7 *) (** {2 Sampling} *) @@ -518,7 +518,7 @@ val sample : int -> 'a t -> 'a array It will consume the sequence and use O(n) memory. It returns an array of size [min (length seq) n]. - @since NEXT_RELEASE *) + @since 0.7 *) (** {2 Infix functions} *) diff --git a/sequenceLabels.mli b/sequenceLabels.mli index b52ac74..5db0707 100644 --- a/sequenceLabels.mli +++ b/sequenceLabels.mli @@ -90,7 +90,7 @@ val mapi : f:(int -> 'a -> 'b) -> 'a t -> 'b t val map_by_2 : f:('a -> 'a -> 'a) -> 'a t -> 'a t (** Map objects two by two. lazily. The last element is kept in the sequence if the count is odd. - @since NEXT_RELEASE *) + @since 0.7 *) val for_all : f:('a -> bool) -> 'a t -> bool (** Do all elements satisfy the predicate? *) @@ -449,7 +449,7 @@ val random_list : 'a list -> 'a t val shuffle : 'a t -> 'a t (** [shuffle seq] returns a perfect shuffle of [seq]. Uses O(length seq) memory and time. Eager. - @since NEXT_RELEASE *) + @since 0.7 *) val shuffle_buffer : n:int -> 'a t -> 'a t (** [shuffle_buffer n seq] returns a sequence of element of [seq] in random @@ -457,7 +457,7 @@ val shuffle_buffer : n:int -> 'a t -> 'a t The first [n] elements of the sequence are consumed immediately. The rest is consumed lazily. - @since NEXT_RELEASE *) + @since 0.7 *) (** {2 Sampling} *) @@ -466,7 +466,7 @@ val sample : n:int -> 'a t -> 'a array It will consume the sequence and use O(n) memory. It returns an array of size [min (length seq) n]. - @since NEXT_RELEASE *) + @since 0.7 *) (** {2 Infix functions} *)