heavy modification of Gen's API. Now the default 'a Gen.t is a transient, consumable generator

of 'a, and 'a Gen.Restart.t can be used for restartable generators.
This commit is contained in:
Simon Cruanes 2013-11-11 23:04:10 +01:00
parent e36fc5275a
commit 11611894e9
5 changed files with 1336 additions and 990 deletions

1480
gen.ml

File diff suppressed because it is too large Load diff

315
gen.mli
View file

@ -23,201 +23,168 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Restartable generators} *)
(** {1 Generators}
(** This structure is inspired from Ocaml Batteries' BatEnum.t. It features
restartable generators. A value of type ['a Gen.t] represents a finite or
infinite lazy enumeration of values of type ['a]. It can be instantiated
as many times as needed into a ['a generator], which is a consumable
enumeration. The next element of a ['a generator] is obtained by
invoking it as a function; an exception, [EOG], is raised when no
more elements are available. *)
Values of type ['a Gen.t] represent a possibly infinite sequence of values
of type 'a. One can only iterate once on the sequence, as it is consumed
by iteration/deconstruction/access. The exception {!EOG} (end of generator)
is raised when the generator is empty.
The submodule {!Restart} provides utilities to work with
{b restartable generators}, that is, functions [unit -> 'a Gen.t] that
allow to build as many generators from the same source as needed.
*)
(** {2 Global type declarations} *)
exception EOG
(** End of Generation *)
type 'a t = unit -> 'a generator
(** An enum is a generator of generators *)
and 'a generator = unit -> 'a
type 'a t = unit -> 'a
(** A generator may be called several times, yielding the next value
each time. It raises EOG when it reaches the end. *)
(** {2 Generator functions} *)
type 'a gen = 'a t
val start : 'a t -> 'a generator
(** Create a new generator on the given restartable generator. *)
(** {2 Common signature for transient and restartable generators} *)
(** {2 Transient generators} *)
module type S = sig
type 'a t
module Gen : sig
val empty : 'a generator
val empty : 'a t
(** Empty generator, with no elements *)
val next : 'a generator -> 'a
(** Get next element, or raise EOG *)
val singleton : 'a -> 'a t
(** One-element generator *)
val junk : 'a generator -> unit
(** Drop element *)
val fold : ('b -> 'a -> 'b) -> 'b -> 'a generator -> 'b
(** Fold over the generator *)
val iter : ('a -> unit) -> 'a generator -> unit
(** Iterate on the generator *)
val length : 'a generator -> int
(** Consume generator to compute its length *)
val of_list : 'a list -> 'a generator
val to_list : 'a generator -> 'a list (* not tailrec *)
val to_rev_list : 'a generator -> 'a list
val int_range : int -> int -> int generator
end
(** {2 Basic constructors} *)
val empty : 'a t
(** Empty enum, with no elements *)
val singleton : 'a -> 'a t
(** One-element enum *)
val repeat : 'a -> 'a t
val repeat : 'a -> 'a t
(** Repeat same element endlessly *)
val repeatedly : (unit -> 'a) -> 'a t
(** Call the same function an infinite number of times (useful for instance
if the function is a random generator). *)
val iterate : 'a -> ('a -> 'a) -> 'a t
val iterate : 'a -> ('a -> 'a) -> 'a t
(** [iterate x f] is [[x; f x; f (f x); f (f (f x)); ...]] *)
val unfold : ('b -> ('a * 'b) option) -> 'b -> 'a t
val unfold : ('b -> ('a * 'b) option) -> 'b -> 'a t
(** Dual of {!fold}, with a deconstructing operation. It keeps on
unfolding the ['b] value into a new ['b], and a ['a] which is yielded,
until [None] is returned. *)
(** {2 Basic combinators} *)
(** {2 Basic combinators} *)
val is_empty : _ t -> bool
val is_empty : _ t -> bool
(** Check whether the enum is empty. *)
val fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b
val fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b
(** Fold on the generator, tail-recursively *)
val fold2 : ('c -> 'a -> 'b -> 'c) -> 'c -> 'a t -> 'b t -> 'c
val fold2 : ('c -> 'a -> 'b -> 'c) -> 'c -> 'a t -> 'b t -> 'c
(** Fold on the two enums in parallel. Stops once one of the enums
is exhausted. *)
val reduce : ('a -> 'a -> 'a) -> 'a t -> 'a
val reduce : ('a -> 'a -> 'a) -> 'a t -> 'a
(** Fold on non-empty sequences (otherwise raise Invalid_argument) *)
val scan : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b t
val scan : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b t
(** Like {!fold}, but keeping successive values of the accumulator *)
val iter : ('a -> unit) -> 'a t -> unit
val iter : ('a -> unit) -> 'a t -> unit
(** Iterate on the enum *)
val iteri : (int -> 'a -> unit) -> 'a t -> unit
val iteri : (int -> 'a -> unit) -> 'a t -> unit
(** Iterate on elements with their index in the enum, from 0 *)
val iter2 : ('a -> 'b -> unit) -> 'a t -> 'b t -> unit
val iter2 : ('a -> 'b -> unit) -> 'a t -> 'b t -> unit
(** Iterate on the two sequences. Stops once one of them is exhausted.*)
val length : _ t -> int
val length : _ t -> int
(** Length of an enum (linear time) *)
val map : ('a -> 'b) -> 'a t -> 'b t
val map : ('a -> 'b) -> 'a t -> 'b t
(** Lazy map. No iteration is performed now, the function will be called
when the result is traversed. *)
val append : 'a t -> 'a t -> 'a t
val append : 'a t -> 'a t -> 'a t
(** Append the two enums; the result contains the elements of the first,
then the elements of the second enum. *)
val cycle : 'a t -> 'a t
(** Cycle through the enum, endlessly. The enum must not be empty. *)
val flatten : 'a gen t -> 'a t
(** Flatten the enumeration of generators *)
val flatten : 'a t t -> 'a t
(** Flatten the enum of enum. *)
val flatMap : ('a -> 'b t) -> 'a t -> 'b t
val flatMap : ('a -> 'b gen) -> 'a t -> 'b t
(** Monadic bind; each element is transformed to a sub-enum
which is then iterated on, before the next element is processed,
and so on. *)
val mem : ?eq:('a -> 'a -> bool) -> 'a -> 'a t -> bool
val mem : ?eq:('a -> 'a -> bool) -> 'a -> 'a t -> bool
(** Is the given element, member of the enum? *)
val take : int -> 'a t -> 'a t
val take : int -> 'a t -> 'a t
(** Take at most n elements *)
val drop : int -> 'a t -> 'a t
val drop : int -> 'a t -> 'a t
(** Drop n elements *)
val nth : int -> 'a t -> 'a
(** n-th element, or Not_found *)
val nth : int -> 'a t -> 'a
(** n-th element, or Not_found
@raise Not_found if the generator contains less than [n] arguments *)
val filter : ('a -> bool) -> 'a t -> 'a t
val filter : ('a -> bool) -> 'a t -> 'a t
(** Filter out elements that do not satisfy the predicate. *)
val takeWhile : ('a -> bool) -> 'a t -> 'a t
val takeWhile : ('a -> bool) -> 'a t -> 'a t
(** Take elements while they satisfy the predicate *)
val dropWhile : ('a -> bool) -> 'a t -> 'a t
val dropWhile : ('a -> bool) -> 'a t -> 'a t
(** Drop elements while they satisfy the predicate *)
val filterMap : ('a -> 'b option) -> 'a t -> 'b t
val filterMap : ('a -> 'b option) -> 'a t -> 'b t
(** Maps some elements to 'b, drop the other ones *)
val zipWith : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t
val zipWith : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t
(** Combine common part of the enums (stops when one is exhausted) *)
val zip : 'a t -> 'b t -> ('a * 'b) t
val zip : 'a t -> 'b t -> ('a * 'b) t
(** Zip together the common part of the enums *)
val zipIndex : 'a t -> (int * 'a) t
val zipIndex : 'a t -> (int * 'a) t
(** Zip elements with their index in the enum *)
val unzip : ('a * 'b) t -> 'a t * 'b t
val unzip : ('a * 'b) t -> 'a t * 'b t
(** Unzip into two sequences, splitting each pair *)
val partition : ('a -> bool) -> 'a t -> 'a t * 'a t
val partition : ('a -> bool) -> 'a t -> 'a t * 'a t
(** [partition p l] returns the elements that satisfy [p],
and the elements that do not satisfy [p] *)
val for_all : ('a -> bool) -> 'a t -> bool
val for_all : ('a -> bool) -> 'a t -> bool
(** Is the predicate true for all elements? *)
val exists : ('a -> bool) -> 'a t -> bool
val exists : ('a -> bool) -> 'a t -> bool
(** Is the predicate true for at least one element? *)
val for_all2 : ('a -> 'b -> bool) -> 'a t -> 'b t -> bool
val for_all2 : ('a -> 'b -> bool) -> 'a t -> 'b t -> bool
val exists2 : ('a -> 'b -> bool) -> 'a t -> 'b t -> bool
val exists2 : ('a -> 'b -> bool) -> 'a t -> 'b t -> bool
val min : ?lt:('a -> 'a -> bool) -> 'a t -> 'a
val min : ?lt:('a -> 'a -> bool) -> 'a t -> 'a
(** Minimum element, according to the given comparison function *)
val max : ?lt:('a -> 'a -> bool) -> 'a t -> 'a
val max : ?lt:('a -> 'a -> bool) -> 'a t -> 'a
(** Maximum element, see {!min} *)
val eq : ?eq:('a -> 'a -> bool) -> 'a t -> 'a t -> bool
val eq : ?eq:('a -> 'a -> bool) -> 'a t -> 'a t -> bool
(** Equality of generators. *)
val lexico : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> int
val lexico : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> int
(** Lexicographic comparison of generators. If the common prefix is
the same, the shortest one is considered as smaller than the other. *)
val compare : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> int
val compare : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> int
(** Synonym for {! lexico} *)
(** {2 Complex combinators} *)
(** {2 Complex combinators} *)
val merge : 'a t t -> 'a t
(** Pick elements fairly in each sub-enum. The given enum
val merge : 'a gen t -> 'a t
(** Pick elements fairly in each sub-generator. The given enum
must be finite (not its elements, though). The merge of enums
[e1, e2, ... en] picks one element in [e1], then one element in [e2],
then in [e3], ..., then in [en], and then starts again at [e1]. Once
@ -225,125 +192,149 @@ val merge : 'a t t -> 'a t
their merge is also empty.
For instance, [merge [1;3;5] [2;4;6]] will be [1;2;3;4;5;6]. *)
(** {3 Mutable heap (taken from heap.ml to avoid dependencies)} *)
module Heap : sig
type 'a t (** A heap containing values of type 'a *)
val empty : cmp:('a -> 'a -> int) -> 'a t
val insert : 'a t -> 'a -> unit
val is_empty : 'a t -> bool
val pop : 'a t -> 'a
end
val intersection : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> 'a t
val intersection : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> 'a t
(** Intersection of two sorted sequences. Only elements that occur in both
inputs appear in the output *)
val sorted_merge : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> 'a t
val sorted_merge : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> 'a t
(** Merge two sorted sequences into a sorted sequence *)
val sorted_merge_n : ?cmp:('a -> 'a -> int) -> 'a t t -> 'a t
val sorted_merge_n : ?cmp:('a -> 'a -> int) -> 'a gen t -> 'a t
(** Sorted merge of multiple sorted sequences *)
val persistent : 'a generator -> 'a t
(** Store content of the generator in memory, to be able to iterate on it
several times later *)
val round_robin : ?n:int -> 'a t -> 'a generator t
(** Split the enum into [n] generators in a fair way. Elements with
[index = k mod n] with go to the k-th enum. [n] default value
is 2. *)
val tee : ?n:int -> 'a t -> 'a generator t
val tee : ?n:int -> 'a t -> 'a gen list
(** Duplicate the enum into [n] generators (default 2). The generators
share the same underlying instance of the enum, so the optimal case is
when they are consumed evenly *)
val interleave : 'a t -> 'a t -> 'a t
val round_robin : ?n:int -> 'a t -> 'a gen list
(** Split the enum into [n] generators in a fair way. Elements with
[index = k mod n] with go to the k-th enum. [n] default value
is 2. *)
val interleave : 'a t -> 'a t -> 'a t
(** [interleave a b] yields an element of [a], then an element of [b],
and so on until the end of [a] or [b] is reached. *)
val intersperse : 'a -> 'a t -> 'a t
val intersperse : 'a -> 'a t -> 'a t
(** Put the separator element between all elements of the given enum *)
val product : 'a t -> 'b t -> ('a * 'b) t
(** Cartesian product. If the first sequence is infinite, some pairs
will never be generated. *)
val product : 'a t -> 'b t -> ('a * 'b) t
(** Cartesian product, in no predictable order. Works even if some of the
arguments are infinite. *)
val fair_product : 'a t -> 'b t -> ('a * 'b) t
(** Cartesian product, in no predictable order. Contrary to {!product} this
function does eventually yield every pair *)
val group : ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t
val group : ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t
(** Group equal consecutive elements together. *)
val uniq : ?eq:('a -> 'a -> bool) -> 'a t -> 'a t
val uniq : ?eq:('a -> 'a -> bool) -> 'a t -> 'a t
(** Remove consecutive duplicate elements. Basically this is
like [fun e -> map List.hd (group e)]. *)
val sort : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t
val sort : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t
(** Sort according to the given comparison function. The enum must be finite. *)
val sort_uniq : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t
val sort_uniq : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t
(** Sort and remove duplicates. The enum must be finite. *)
(* TODO later
val permutations : 'a t -> 'a t t
(* TODO later
val permutations : 'a t -> 'a gen t
(** Permutations of the enum. Each permutation becomes unavailable once
the next one is produced. *)
val combinations : int -> 'a t -> 'a t t
val combinations : int -> 'a t -> 'a t t
(** Combinations of given length. *)
val powerSet : 'a t -> 'a t t
val powerSet : 'a t -> 'a t t
(** All subsets of the enum (in no particular order) *)
*)
*)
(** {2 Basic conversion functions} *)
(** {2 Basic conversion functions} *)
val of_list : 'a list -> 'a t
val of_list : 'a list -> 'a t
(** Enumerate elements of the list *)
val to_list : 'a t -> 'a list
val to_list : 'a t -> 'a list
(** non tail-call trasnformation to list, in the same order *)
val to_rev_list : 'a t -> 'a list
val to_rev_list : 'a t -> 'a list
(** Tail call conversion to list, in reverse order (more efficient) *)
val to_array : 'a t -> 'a array
val to_array : 'a t -> 'a array
(** Convert the enum to an array (not very efficient) *)
val of_array : ?start:int -> ?len:int -> 'a array -> 'a t
val of_array : ?start:int -> ?len:int -> 'a array -> 'a t
(** Iterate on (a slice of) the given array *)
val rand_int : int -> int t
val rand_int : int -> int t
(** Random ints in the given range. *)
val int_range : int -> int -> int t
val int_range : int -> int -> int t
(** [int_range a b] enumerates integers between [a] and [b], included. [a]
is assumed to be smaller than [b]. *)
val pp : ?start:string -> ?stop:string -> ?sep:string -> ?horizontal:bool ->
(Format.formatter -> 'a -> unit) -> Format.formatter -> 'a t -> unit
(** Pretty print an enum on a formatter. *)
module Infix : sig
val (--) : int -> int -> int t
(** Synonym for {! int_range} *)
module Infix : sig
val (@@) : 'a t -> 'a t -> 'a t
(** Synonym for {! append} *)
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t
(** Synonym for {! flatMap}, with arguments reversed *)
val (>>=) : 'a t -> ('a -> 'b gen) -> 'b t
(** Monadic bind operator *)
end
val (--) : int -> int -> int t
(** Synonym for {! int_range} *)
val (|>) : 'a -> ('a -> 'b) -> 'b
(** Function application, reversed *)
val (>>=) : 'a t -> ('a -> 'b gen) -> 'b t
(** Monadic bind operator *)
val pp : ?start:string -> ?stop:string -> ?sep:string -> ?horizontal:bool ->
(Format.formatter -> 'a -> unit) -> Format.formatter -> 'a t -> unit
(** Pretty print the content of the generator on a formatter. *)
end
val (@@) : 'a t -> 'a t -> 'a t
(** {2 Transient generators} *)
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t
val get : 'a t -> 'a
(** Get the next value
@raise EOG if there is no next value *)
val (--) : int -> int -> int t
val next : 'a t -> 'a
(** Synonym for {!get} *)
val (|>) : 'a -> ('a -> 'b) -> 'b
val get_safe : 'a t -> 'a option
(** Get the next value, or return None *)
val junk : 'a t -> unit
(** Drop the next value, discarding it.
@raise EOG if there is no next value *)
val repeatedly : (unit -> 'a) -> 'a t
(** Call the same function an infinite number of times (useful for instance
if the function is a random generator). *)
include S with type 'a t := 'a gen
(** {2 Restartable generators} *)
module Restart : sig
type 'a t = unit -> 'a gen
type 'a restartable = 'a t
include S with type 'a t := 'a restartable
val cycle : 'a t -> 'a t
(** Cycle through the enum, endlessly. The enum must not be empty. *)
val lift : ('a gen -> 'b) -> 'a t -> 'b
val lift2 : ('a gen -> 'b gen -> 'c) -> 'a t -> 'b t -> 'c
end
(** {2 Utils} *)
val persistent : 'a t -> 'a Restart.t
(** Store content of the generator in memory, to be able to iterate on it
several times later *)
val start : 'a Restart.t -> 'a t
(** Create a new transient generator *)

View file

@ -178,7 +178,6 @@ let length l = l.size
(** Iterator on the skip list *)
let gen l =
fun () ->
let x = ref (next l.data 0) in
fun () ->
match !x with

View file

@ -7,3 +7,11 @@ let print_int_list l =
(Sequence.pp_seq ~sep:", " Format.pp_print_int)
(Sequence.of_list l);
Buffer.contents b
let print_int_int_list l =
let printer fmt (i,j) = Format.fprintf fmt "%d, %d" i j in
let b = Buffer.create 20 in
Format.bprintf b "@[<h>[%a]@]"
(Sequence.pp_seq ~sep:", " printer)
(Sequence.of_list l);
Buffer.contents b

View file

@ -2,6 +2,8 @@
open OUnit
open Gen.Infix
module GR = Gen.Restart
let pint i = string_of_int i
let plist l = Utils.sprintf "%a"
(Sequence.pp_seq Format.pp_print_int) (Sequence.of_list l)
@ -9,18 +11,18 @@ let pstrlist l = Utils.sprintf "%a"
(Sequence.pp_seq Format.pp_print_string) (Sequence.of_list l)
let test_singleton () =
let e = Gen.singleton 42 in
let gen = Gen.start e in
OUnit.assert_equal 42 (Gen.Gen.next gen);
OUnit.assert_raises Gen.EOG (fun () -> Gen.Gen.next gen);
OUnit.assert_equal 1 (Gen.length e);
let gen = Gen.singleton 42 in
OUnit.assert_equal 42 (Gen.get gen);
OUnit.assert_raises Gen.EOG (fun () -> Gen.get gen);
let gen = Gen.singleton 42 in
OUnit.assert_equal 1 (Gen.length gen);
()
let test_iter () =
let e = 1 -- 10 in
OUnit.assert_equal ~printer:pint 10 (Gen.length e);
OUnit.assert_equal [1;2] (Gen.to_list (1 -- 2));
OUnit.assert_equal [1;2;3;4;5] (Gen.to_list (Gen.take 5 e));
let e = GR.(1 -- 10) in
OUnit.assert_equal ~printer:pint 10 (GR.length e);
OUnit.assert_equal [1;2] GR.(to_list (1 -- 2));
OUnit.assert_equal [1;2;3;4;5] (GR.to_list (GR.take 5 e));
()
let test_map () =
@ -30,7 +32,7 @@ let test_map () =
()
let test_append () =
let e = (1 -- 5) @@ (6 -- 10) in
let e = Gen.append (1 -- 5) (6 -- 10) in
OUnit.assert_equal [10;9;8;7;6;5;4;3;2;1] (Gen.to_rev_list e);
()
@ -64,26 +66,23 @@ let test_persistent () =
if j > 5 then raise Gen.EOG else (incr i; j)
in
let e = Gen.persistent gen in
OUnit.assert_equal [0;1;2;3;4;5] (Gen.to_list e);
OUnit.assert_equal [0;1;2;3;4;5] (Gen.to_list e);
OUnit.assert_equal [0;1;2;3;4;5] (Gen.to_list e);
OUnit.assert_equal [0;1;2;3;4;5] (GR.to_list e);
OUnit.assert_equal [0;1;2;3;4;5] (GR.to_list e);
OUnit.assert_equal [0;1;2;3;4;5] (GR.to_list e);
()
let test_round_robin () =
let e = Gen.round_robin ~n:2 (1--10) in
let e = Gen.map Gen.persistent e in
let l = Gen.to_list e in
match l with
let e = GR.round_robin ~n:2 GR.(1--10) in
match e with
| [a;b] ->
OUnit.assert_equal [1;3;5;7;9] (Gen.to_list a);
OUnit.assert_equal [2;4;6;8;10] (Gen.to_list b)
| _ -> OUnit.assert_failure "wrong list lenght"
let test_big_rr () =
let e = Gen.round_robin ~n:3 (1 -- 999) in
let l = Gen.to_list e in
let l' = List.map Gen.Gen.length l in
OUnit.assert_equal [333;333;333] l';
let e = GR.round_robin ~n:3 GR.(1 -- 999) in
let l = List.map Gen.length e in
OUnit.assert_equal [333;333;333] l;
()
let test_merge_sorted () =
@ -106,13 +105,9 @@ let test_intersperse () =
()
let test_product () =
let printer = Helpers.print_int_int_list in
let e = Gen.product (1--3) (4--5) in
OUnit.assert_equal [1,4; 1,5; 2,4; 2,5; 3,4; 3,5] (Gen.to_list e);
()
let test_fair_product () =
let e = Gen.fair_product (Gen.repeat ()) (1--3) in
let _ = Gen.take 10 e in (* succeeds -> ok *)
OUnit.assert_equal ~printer [1,4; 1,5; 2,4; 2,5; 3,4; 3,5] (List.sort compare (Gen.to_list e));
()
let suite =
@ -132,5 +127,4 @@ let suite =
"test_interleave" >:: test_interleave;
"test_intersperse" >:: test_intersperse;
"test_product" >:: test_product;
"test_fair_product" >:: test_fair_product;
]