diff --git a/CHANGELOG.md b/CHANGELOG.md index a51e448..3915aef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # Changelog +## 0.7 + +- add missing entry in changelog and missing since annotations +- Add `shuffle`. +- Add `shuffle_buffer`. +- Add `sample`. +- Add `map_by_2`. + +## 0.6 + +- deprecate `flatMap` and `fmap` +- in opam file, depend on ocamlbuild +- add `group_by` (ignores the ordering) +- alias `group_succ_by`, deprecated `group` +- iterate on booleans +- open Bigarray (preparing for 4.03) + ## 0.5.5 - new module `SequenceLabels` diff --git a/META b/META index 27ff4f1..fb45f2c 100644 --- a/META +++ b/META @@ -1,6 +1,6 @@ # OASIS_START -# DO NOT EDIT (digest: e23210adac78822adc0cf3b47bd47a95) -version = "0.6" +# DO NOT EDIT (digest: 8041ba3970fcecd2b690dc7b78ccae41) +version = "0.7" description = "Simple sequence (iterator) datatype and combinators" requires = "bytes" archive(byte) = "sequence.cma" @@ -9,7 +9,7 @@ archive(native) = "sequence.cmxa" archive(native, plugin) = "sequence.cmxs" exists_if = "sequence.cma" package "invert" ( - version = "0.6" + version = "0.7" description = "Simple sequence (iterator) datatype and combinators" requires = "sequence delimcc" archive(byte) = "invert.cma" @@ -20,7 +20,7 @@ package "invert" ( ) package "bigarray" ( - version = "0.6" + version = "0.7" description = "Simple sequence (iterator) datatype and combinators" requires = "sequence bigarray" archive(byte) = "bigarray.cma" diff --git a/_oasis b/_oasis index 2fc3d2b..68bb3cf 100644 --- a/_oasis +++ b/_oasis @@ -1,6 +1,6 @@ OASISFormat: 0.4 Name: sequence -Version: 0.6 +Version: 0.7 Homepage: https://github.com/c-cube/sequence Authors: Simon Cruanes License: BSD-2-clause diff --git a/opam b/opam index 15aba0c..397ca53 100644 --- a/opam +++ b/opam @@ -1,4 +1,6 @@ opam-version: "1.2" +name: "sequence" +version: "0.7" author: "Simon Cruanes" maintainer: "simon.cruanes@inria.fr" build: [ diff --git a/sequence.ml b/sequence.ml index c118fb9..b1d9ad4 100644 --- a/sequence.ml +++ b/sequence.ml @@ -70,6 +70,16 @@ let mapi f seq k = let i = ref 0 in seq (fun x -> k (f !i x); incr i) +let map_by_2 f seq k = + let r = ref None in + let f y = match !r with + | None -> r := Some y + | Some x -> k (f x y) + in + seq f ; + match !r with + | None -> () | Some x -> k x + let filter p seq k = seq (fun x -> if p x then k x) let append s1 s2 k = s1 k; s2 k @@ -688,6 +698,61 @@ let random_array a k = let random_list l = random_array (Array.of_list l) +(* See http://en.wikipedia.org/wiki/Fisher-Yates_shuffle *) +let shuffle_array a = + for k = Array.length a - 1 downto 0+1 do + let l = Random.int (k+1) in + let tmp = a.(l) in + a.(l) <- a.(k); + a.(k) <- tmp; + done + +let shuffle seq = + let a = to_array seq in + shuffle_array a ; + of_array a + +let shuffle_buffer n seq k = + let seq_front = take n seq in + let a = to_array seq_front in + let l = Array.length a in + if l < n then begin + shuffle_array a ; + of_array a k + end + else begin + let seq = drop n seq in + let f x = + let i = Random.int n in + let y = a.(i) in + a.(i) <- x ; + k y + in + seq f + end + +(** {2 Sampling} *) + +(** See https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R *) +let sample n seq = + match head seq with + | None -> [||] + | Some x -> + let a = Array.make n x in + let i = ref (-1) in + let f x = + incr i ; + if !i < n then + a.(!i) <- x + else + let j = Random.int n in + if j <= n then a.(!i) <- x + else () + in + seq f ; + if !i < n then Array.sub a 0 !i + else a + (** {2 Infix functions} *) module Infix = struct diff --git a/sequence.mli b/sequence.mli index 593f477..c4e3a90 100644 --- a/sequence.mli +++ b/sequence.mli @@ -109,6 +109,11 @@ val map : ('a -> 'b) -> 'a t -> 'b t val mapi : (int -> 'a -> 'b) -> 'a t -> 'b t (** Map objects, along with their index in the sequence *) +val map_by_2 : ('a -> 'a -> 'a) -> 'a t -> 'a t + (** Map objects two by two. lazily. + The last element is kept in the sequence if the count is odd. + @since 0.7 *) + val for_all : ('a -> bool) -> 'a t -> bool (** Do all elements satisfy the predicate? *) @@ -146,7 +151,7 @@ val flatten : 'a t t -> 'a t (** Alias for {!concat} *) val flatMap : ('a -> 'b t) -> 'a t -> 'b t -(** @deprecated use {!flat_map} since NEXT_RELEASE *) +(** @deprecated use {!flat_map} since 0.6 *) val flat_map : ('a -> 'b t) -> 'a t -> 'b t (** Monadic bind. Intuitively, it applies the function to every @@ -154,7 +159,7 @@ val flat_map : ('a -> 'b t) -> 'a t -> 'b t @since 0.5 *) val fmap : ('a -> 'b option) -> 'a t -> 'b t -(** @deprecated use {!filter_map} since NEXT_RELEASE *) +(** @deprecated use {!filter_map} since 0.6 *) val filter_map : ('a -> 'b option) -> 'a t -> 'b t (** Map and only keep non-[None] elements @@ -194,18 +199,18 @@ val sort_uniq : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t val group : ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t (** Group equal consecutive elements. - @deprecated use {!group_succ_by} *) + @deprecated since 0.6 use {!group_succ_by} *) val group_succ_by : ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t (** Group equal consecutive elements. Synonym to {!group}. - @since NEXT_RELEASE *) + @since 0.6 *) val group_by : ?hash:('a -> int) -> ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t (** Group equal elements, disregarding their order of appearance. The result sequence is traversable as many times as required. - @since NEXT_RELEASE *) + @since 0.6 *) val uniq : ?eq:('a -> 'a -> bool) -> 'a t -> 'a t (** Remove consecutive duplicate elements. Basically this is @@ -414,7 +419,7 @@ val int_range_dec : start:int -> stop:int -> int t val bools : bool t (** Iterates on [true] and [false] - @since NEXT_RELEASE *) + @since 0.7 *) val of_set : (module Set.S with type elt = 'a and type t = 'b) -> 'b -> 'a t (** Convert the given set to a sequence. The set module must be provided. *) @@ -493,6 +498,28 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +val shuffle : 'a t -> 'a t +(** [shuffle seq] returns a perfect shuffle of [seq]. + Uses O(length seq) memory and time. Eager. + @since 0.7 *) + +val shuffle_buffer : int -> 'a t -> 'a t +(** [shuffle_buffer n seq] returns a sequence of element of [seq] in random + order. The shuffling is *not* uniform. Uses O(n) memory. + + The first [n] elements of the sequence are consumed immediately. The + rest is consumed lazily. + @since 0.7 *) + +(** {2 Sampling} *) + +val sample : int -> 'a t -> 'a array + (** [sample n seq] returns k samples of [seq], with uniform probability. + It will consume the sequence and use O(n) memory. + + It returns an array of size [min (length seq) n]. + @since 0.7 *) + (** {2 Infix functions} *) module Infix : sig diff --git a/sequenceLabels.mli b/sequenceLabels.mli index 08f9a71..5db0707 100644 --- a/sequenceLabels.mli +++ b/sequenceLabels.mli @@ -87,6 +87,11 @@ val map : f:('a -> 'b) -> 'a t -> 'b t val mapi : f:(int -> 'a -> 'b) -> 'a t -> 'b t (** Map objects, along with their index in the sequence *) +val map_by_2 : f:('a -> 'a -> 'a) -> 'a t -> 'a t + (** Map objects two by two. lazily. + The last element is kept in the sequence if the count is odd. + @since 0.7 *) + val for_all : f:('a -> bool) -> 'a t -> bool (** Do all elements satisfy the predicate? *) @@ -441,6 +446,28 @@ val random_list : 'a list -> 'a t (** Infinite sequence of random elements of the list. Basically the same as {!random_array}. *) +val shuffle : 'a t -> 'a t +(** [shuffle seq] returns a perfect shuffle of [seq]. + Uses O(length seq) memory and time. Eager. + @since 0.7 *) + +val shuffle_buffer : n:int -> 'a t -> 'a t +(** [shuffle_buffer n seq] returns a sequence of element of [seq] in random + order. The shuffling is not uniform. Uses O(n) memory. + + The first [n] elements of the sequence are consumed immediately. The + rest is consumed lazily. + @since 0.7 *) + +(** {2 Sampling} *) + +val sample : n:int -> 'a t -> 'a array + (** [sample n seq] returns k samples of [seq], with uniform probability. + It will consume the sequence and use O(n) memory. + + It returns an array of size [min (length seq) n]. + @since 0.7 *) + (** {2 Infix functions} *) module Infix : sig diff --git a/setup.ml b/setup.ml index 5625abd..559a871 100644 --- a/setup.ml +++ b/setup.ml @@ -1,7 +1,7 @@ (* setup.ml generated for the first time by OASIS v0.4.4 *) (* OASIS_START *) -(* DO NOT EDIT (digest: ba3f6d5b3b5e6f77424dfec99217b2f6) *) +(* DO NOT EDIT (digest: e28e259a63b26395383267decca6401e) *) (* Regenerated by OASIS v0.4.5 Visit http://oasis.forge.ocamlcore.org for more information and @@ -6861,7 +6861,7 @@ let setup_t = alpha_features = []; beta_features = []; name = "sequence"; - version = "0.6"; + version = "0.7"; license = OASISLicense.DEP5License (OASISLicense.DEP5Unit @@ -7280,7 +7280,7 @@ let setup_t = }; oasis_fn = Some "_oasis"; oasis_version = "0.4.5"; - oasis_digest = Some "\224\020Z\173\146\131E\193\nn\225\205\167u\192\196"; + oasis_digest = Some "\247\213|\155\007DgsAe\210\221|\198\232\237"; oasis_exec = None; oasis_setup_args = []; setup_update = false diff --git a/tests/test_sequence.ml b/tests/test_sequence.ml index de1460b..0c0a23b 100644 --- a/tests/test_sequence.ml +++ b/tests/test_sequence.ml @@ -68,9 +68,9 @@ let test_concat2 () = |> S.length |> OUnit.assert_equal 2000 -let test_flatMap () = +let test_flat_map () = S.(1 -- 1000) - |> S.flatMap (fun i -> S.(i -- (i+1))) + |> S.flat_map (fun i -> S.(i -- (i+1))) |> S.length |> OUnit.assert_equal 2000 @@ -237,7 +237,7 @@ let suite = "test_exists" >:: test_exists; "test_length" >:: test_length; "test_concat" >:: test_concat; - "test_flatMap" >:: test_flatMap; + "test_flatMap" >:: test_flat_map; "test_intersperse" >:: test_intersperse; "test_not_persistent" >:: test_not_persistent; "test_persistent" >:: test_persistent;