heavy modification of Gen's API. Now the default 'a Gen.t is a transient, consumable generator

of 'a, and 'a Gen.Restart.t can be used for restartable generators.
This commit is contained in:
Simon Cruanes 2013-11-11 23:04:10 +01:00
parent e36fc5275a
commit 11611894e9
5 changed files with 1336 additions and 990 deletions

1480
gen.ml

File diff suppressed because it is too large Load diff

191
gen.mli
View file

@ -23,74 +23,43 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*) *)
(** {1 Restartable generators} *) (** {1 Generators}
(** This structure is inspired from Ocaml Batteries' BatEnum.t. It features Values of type ['a Gen.t] represent a possibly infinite sequence of values
restartable generators. A value of type ['a Gen.t] represents a finite or of type 'a. One can only iterate once on the sequence, as it is consumed
infinite lazy enumeration of values of type ['a]. It can be instantiated by iteration/deconstruction/access. The exception {!EOG} (end of generator)
as many times as needed into a ['a generator], which is a consumable is raised when the generator is empty.
enumeration. The next element of a ['a generator] is obtained by
invoking it as a function; an exception, [EOG], is raised when no The submodule {!Restart} provides utilities to work with
more elements are available. *) {b restartable generators}, that is, functions [unit -> 'a Gen.t] that
allow to build as many generators from the same source as needed.
*)
(** {2 Global type declarations} *)
exception EOG exception EOG
(** End of Generation *) (** End of Generation *)
type 'a t = unit -> 'a generator type 'a t = unit -> 'a
(** An enum is a generator of generators *)
and 'a generator = unit -> 'a
(** A generator may be called several times, yielding the next value (** A generator may be called several times, yielding the next value
each time. It raises EOG when it reaches the end. *) each time. It raises EOG when it reaches the end. *)
(** {2 Generator functions} *) type 'a gen = 'a t
val start : 'a t -> 'a generator (** {2 Common signature for transient and restartable generators} *)
(** Create a new generator on the given restartable generator. *)
(** {2 Transient generators} *) module type S = sig
type 'a t
module Gen : sig
val empty : 'a generator
val next : 'a generator -> 'a
(** Get next element, or raise EOG *)
val junk : 'a generator -> unit
(** Drop element *)
val fold : ('b -> 'a -> 'b) -> 'b -> 'a generator -> 'b
(** Fold over the generator *)
val iter : ('a -> unit) -> 'a generator -> unit
(** Iterate on the generator *)
val length : 'a generator -> int
(** Consume generator to compute its length *)
val of_list : 'a list -> 'a generator
val to_list : 'a generator -> 'a list (* not tailrec *)
val to_rev_list : 'a generator -> 'a list
val int_range : int -> int -> int generator
end
(** {2 Basic constructors} *)
val empty : 'a t val empty : 'a t
(** Empty enum, with no elements *) (** Empty generator, with no elements *)
val singleton : 'a -> 'a t val singleton : 'a -> 'a t
(** One-element enum *) (** One-element generator *)
val repeat : 'a -> 'a t val repeat : 'a -> 'a t
(** Repeat same element endlessly *) (** Repeat same element endlessly *)
val repeatedly : (unit -> 'a) -> 'a t
(** Call the same function an infinite number of times (useful for instance
if the function is a random generator). *)
val iterate : 'a -> ('a -> 'a) -> 'a t val iterate : 'a -> ('a -> 'a) -> 'a t
(** [iterate x f] is [[x; f x; f (f x); f (f (f x)); ...]] *) (** [iterate x f] is [[x; f x; f (f x); f (f (f x)); ...]] *)
@ -137,13 +106,10 @@ val append : 'a t -> 'a t -> 'a t
(** Append the two enums; the result contains the elements of the first, (** Append the two enums; the result contains the elements of the first,
then the elements of the second enum. *) then the elements of the second enum. *)
val cycle : 'a t -> 'a t val flatten : 'a gen t -> 'a t
(** Cycle through the enum, endlessly. The enum must not be empty. *) (** Flatten the enumeration of generators *)
val flatten : 'a t t -> 'a t val flatMap : ('a -> 'b gen) -> 'a t -> 'b t
(** Flatten the enum of enum. *)
val flatMap : ('a -> 'b t) -> 'a t -> 'b t
(** Monadic bind; each element is transformed to a sub-enum (** Monadic bind; each element is transformed to a sub-enum
which is then iterated on, before the next element is processed, which is then iterated on, before the next element is processed,
and so on. *) and so on. *)
@ -158,7 +124,8 @@ val drop : int -> 'a t -> 'a t
(** Drop n elements *) (** Drop n elements *)
val nth : int -> 'a t -> 'a val nth : int -> 'a t -> 'a
(** n-th element, or Not_found *) (** n-th element, or Not_found
@raise Not_found if the generator contains less than [n] arguments *)
val filter : ('a -> bool) -> 'a t -> 'a t val filter : ('a -> bool) -> 'a t -> 'a t
(** Filter out elements that do not satisfy the predicate. *) (** Filter out elements that do not satisfy the predicate. *)
@ -216,8 +183,8 @@ val compare : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> int
(** {2 Complex combinators} *) (** {2 Complex combinators} *)
val merge : 'a t t -> 'a t val merge : 'a gen t -> 'a t
(** Pick elements fairly in each sub-enum. The given enum (** Pick elements fairly in each sub-generator. The given enum
must be finite (not its elements, though). The merge of enums must be finite (not its elements, though). The merge of enums
[e1, e2, ... en] picks one element in [e1], then one element in [e2], [e1, e2, ... en] picks one element in [e1], then one element in [e2],
then in [e3], ..., then in [en], and then starts again at [e1]. Once then in [e3], ..., then in [en], and then starts again at [e1]. Once
@ -225,15 +192,6 @@ val merge : 'a t t -> 'a t
their merge is also empty. their merge is also empty.
For instance, [merge [1;3;5] [2;4;6]] will be [1;2;3;4;5;6]. *) For instance, [merge [1;3;5] [2;4;6]] will be [1;2;3;4;5;6]. *)
(** {3 Mutable heap (taken from heap.ml to avoid dependencies)} *)
module Heap : sig
type 'a t (** A heap containing values of type 'a *)
val empty : cmp:('a -> 'a -> int) -> 'a t
val insert : 'a t -> 'a -> unit
val is_empty : 'a t -> bool
val pop : 'a t -> 'a
end
val intersection : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> 'a t val intersection : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> 'a t
(** Intersection of two sorted sequences. Only elements that occur in both (** Intersection of two sorted sequences. Only elements that occur in both
inputs appear in the output *) inputs appear in the output *)
@ -241,23 +199,19 @@ val intersection : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> 'a t
val sorted_merge : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> 'a t val sorted_merge : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> 'a t
(** Merge two sorted sequences into a sorted sequence *) (** Merge two sorted sequences into a sorted sequence *)
val sorted_merge_n : ?cmp:('a -> 'a -> int) -> 'a t t -> 'a t val sorted_merge_n : ?cmp:('a -> 'a -> int) -> 'a gen t -> 'a t
(** Sorted merge of multiple sorted sequences *) (** Sorted merge of multiple sorted sequences *)
val persistent : 'a generator -> 'a t val tee : ?n:int -> 'a t -> 'a gen list
(** Store content of the generator in memory, to be able to iterate on it
several times later *)
val round_robin : ?n:int -> 'a t -> 'a generator t
(** Split the enum into [n] generators in a fair way. Elements with
[index = k mod n] with go to the k-th enum. [n] default value
is 2. *)
val tee : ?n:int -> 'a t -> 'a generator t
(** Duplicate the enum into [n] generators (default 2). The generators (** Duplicate the enum into [n] generators (default 2). The generators
share the same underlying instance of the enum, so the optimal case is share the same underlying instance of the enum, so the optimal case is
when they are consumed evenly *) when they are consumed evenly *)
val round_robin : ?n:int -> 'a t -> 'a gen list
(** Split the enum into [n] generators in a fair way. Elements with
[index = k mod n] with go to the k-th enum. [n] default value
is 2. *)
val interleave : 'a t -> 'a t -> 'a t val interleave : 'a t -> 'a t -> 'a t
(** [interleave a b] yields an element of [a], then an element of [b], (** [interleave a b] yields an element of [a], then an element of [b],
and so on until the end of [a] or [b] is reached. *) and so on until the end of [a] or [b] is reached. *)
@ -266,12 +220,8 @@ val intersperse : 'a -> 'a t -> 'a t
(** Put the separator element between all elements of the given enum *) (** Put the separator element between all elements of the given enum *)
val product : 'a t -> 'b t -> ('a * 'b) t val product : 'a t -> 'b t -> ('a * 'b) t
(** Cartesian product. If the first sequence is infinite, some pairs (** Cartesian product, in no predictable order. Works even if some of the
will never be generated. *) arguments are infinite. *)
val fair_product : 'a t -> 'b t -> ('a * 'b) t
(** Cartesian product, in no predictable order. Contrary to {!product} this
function does eventually yield every pair *)
val group : ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t val group : ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t
(** Group equal consecutive elements together. *) (** Group equal consecutive elements together. *)
@ -287,7 +237,7 @@ val sort_uniq : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t
(** Sort and remove duplicates. The enum must be finite. *) (** Sort and remove duplicates. The enum must be finite. *)
(* TODO later (* TODO later
val permutations : 'a t -> 'a t t val permutations : 'a t -> 'a gen t
(** Permutations of the enum. Each permutation becomes unavailable once (** Permutations of the enum. Each permutation becomes unavailable once
the next one is produced. *) the next one is produced. *)
@ -322,28 +272,69 @@ val int_range : int -> int -> int t
(** [int_range a b] enumerates integers between [a] and [b], included. [a] (** [int_range a b] enumerates integers between [a] and [b], included. [a]
is assumed to be smaller than [b]. *) is assumed to be smaller than [b]. *)
val pp : ?start:string -> ?stop:string -> ?sep:string -> ?horizontal:bool ->
(Format.formatter -> 'a -> unit) -> Format.formatter -> 'a t -> unit
(** Pretty print an enum on a formatter. *)
module Infix : sig module Infix : sig
val (@@) : 'a t -> 'a t -> 'a t val (--) : int -> int -> int t
(** Synonym for {! append} *) (** Synonym for {! int_range} *)
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t val (>>=) : 'a t -> ('a -> 'b gen) -> 'b t
(** Synonym for {! flatMap}, with arguments reversed *) (** Monadic bind operator *)
end
val (--) : int -> int -> int t val (--) : int -> int -> int t
(** Synonym for {! int_range} *) (** Synonym for {! int_range} *)
val (|>) : 'a -> ('a -> 'b) -> 'b val (>>=) : 'a t -> ('a -> 'b gen) -> 'b t
(** Function application, reversed *) (** Monadic bind operator *)
val pp : ?start:string -> ?stop:string -> ?sep:string -> ?horizontal:bool ->
(Format.formatter -> 'a -> unit) -> Format.formatter -> 'a t -> unit
(** Pretty print the content of the generator on a formatter. *)
end end
val (@@) : 'a t -> 'a t -> 'a t (** {2 Transient generators} *)
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t val get : 'a t -> 'a
(** Get the next value
@raise EOG if there is no next value *)
val (--) : int -> int -> int t val next : 'a t -> 'a
(** Synonym for {!get} *)
val (|>) : 'a -> ('a -> 'b) -> 'b val get_safe : 'a t -> 'a option
(** Get the next value, or return None *)
val junk : 'a t -> unit
(** Drop the next value, discarding it.
@raise EOG if there is no next value *)
val repeatedly : (unit -> 'a) -> 'a t
(** Call the same function an infinite number of times (useful for instance
if the function is a random generator). *)
include S with type 'a t := 'a gen
(** {2 Restartable generators} *)
module Restart : sig
type 'a t = unit -> 'a gen
type 'a restartable = 'a t
include S with type 'a t := 'a restartable
val cycle : 'a t -> 'a t
(** Cycle through the enum, endlessly. The enum must not be empty. *)
val lift : ('a gen -> 'b) -> 'a t -> 'b
val lift2 : ('a gen -> 'b gen -> 'c) -> 'a t -> 'b t -> 'c
end
(** {2 Utils} *)
val persistent : 'a t -> 'a Restart.t
(** Store content of the generator in memory, to be able to iterate on it
several times later *)
val start : 'a Restart.t -> 'a t
(** Create a new transient generator *)

View file

@ -178,7 +178,6 @@ let length l = l.size
(** Iterator on the skip list *) (** Iterator on the skip list *)
let gen l = let gen l =
fun () ->
let x = ref (next l.data 0) in let x = ref (next l.data 0) in
fun () -> fun () ->
match !x with match !x with

View file

@ -7,3 +7,11 @@ let print_int_list l =
(Sequence.pp_seq ~sep:", " Format.pp_print_int) (Sequence.pp_seq ~sep:", " Format.pp_print_int)
(Sequence.of_list l); (Sequence.of_list l);
Buffer.contents b Buffer.contents b
let print_int_int_list l =
let printer fmt (i,j) = Format.fprintf fmt "%d, %d" i j in
let b = Buffer.create 20 in
Format.bprintf b "@[<h>[%a]@]"
(Sequence.pp_seq ~sep:", " printer)
(Sequence.of_list l);
Buffer.contents b

View file

@ -2,6 +2,8 @@
open OUnit open OUnit
open Gen.Infix open Gen.Infix
module GR = Gen.Restart
let pint i = string_of_int i let pint i = string_of_int i
let plist l = Utils.sprintf "%a" let plist l = Utils.sprintf "%a"
(Sequence.pp_seq Format.pp_print_int) (Sequence.of_list l) (Sequence.pp_seq Format.pp_print_int) (Sequence.of_list l)
@ -9,18 +11,18 @@ let pstrlist l = Utils.sprintf "%a"
(Sequence.pp_seq Format.pp_print_string) (Sequence.of_list l) (Sequence.pp_seq Format.pp_print_string) (Sequence.of_list l)
let test_singleton () = let test_singleton () =
let e = Gen.singleton 42 in let gen = Gen.singleton 42 in
let gen = Gen.start e in OUnit.assert_equal 42 (Gen.get gen);
OUnit.assert_equal 42 (Gen.Gen.next gen); OUnit.assert_raises Gen.EOG (fun () -> Gen.get gen);
OUnit.assert_raises Gen.EOG (fun () -> Gen.Gen.next gen); let gen = Gen.singleton 42 in
OUnit.assert_equal 1 (Gen.length e); OUnit.assert_equal 1 (Gen.length gen);
() ()
let test_iter () = let test_iter () =
let e = 1 -- 10 in let e = GR.(1 -- 10) in
OUnit.assert_equal ~printer:pint 10 (Gen.length e); OUnit.assert_equal ~printer:pint 10 (GR.length e);
OUnit.assert_equal [1;2] (Gen.to_list (1 -- 2)); OUnit.assert_equal [1;2] GR.(to_list (1 -- 2));
OUnit.assert_equal [1;2;3;4;5] (Gen.to_list (Gen.take 5 e)); OUnit.assert_equal [1;2;3;4;5] (GR.to_list (GR.take 5 e));
() ()
let test_map () = let test_map () =
@ -30,7 +32,7 @@ let test_map () =
() ()
let test_append () = let test_append () =
let e = (1 -- 5) @@ (6 -- 10) in let e = Gen.append (1 -- 5) (6 -- 10) in
OUnit.assert_equal [10;9;8;7;6;5;4;3;2;1] (Gen.to_rev_list e); OUnit.assert_equal [10;9;8;7;6;5;4;3;2;1] (Gen.to_rev_list e);
() ()
@ -64,26 +66,23 @@ let test_persistent () =
if j > 5 then raise Gen.EOG else (incr i; j) if j > 5 then raise Gen.EOG else (incr i; j)
in in
let e = Gen.persistent gen in let e = Gen.persistent gen in
OUnit.assert_equal [0;1;2;3;4;5] (Gen.to_list e); OUnit.assert_equal [0;1;2;3;4;5] (GR.to_list e);
OUnit.assert_equal [0;1;2;3;4;5] (Gen.to_list e); OUnit.assert_equal [0;1;2;3;4;5] (GR.to_list e);
OUnit.assert_equal [0;1;2;3;4;5] (Gen.to_list e); OUnit.assert_equal [0;1;2;3;4;5] (GR.to_list e);
() ()
let test_round_robin () = let test_round_robin () =
let e = Gen.round_robin ~n:2 (1--10) in let e = GR.round_robin ~n:2 GR.(1--10) in
let e = Gen.map Gen.persistent e in match e with
let l = Gen.to_list e in
match l with
| [a;b] -> | [a;b] ->
OUnit.assert_equal [1;3;5;7;9] (Gen.to_list a); OUnit.assert_equal [1;3;5;7;9] (Gen.to_list a);
OUnit.assert_equal [2;4;6;8;10] (Gen.to_list b) OUnit.assert_equal [2;4;6;8;10] (Gen.to_list b)
| _ -> OUnit.assert_failure "wrong list lenght" | _ -> OUnit.assert_failure "wrong list lenght"
let test_big_rr () = let test_big_rr () =
let e = Gen.round_robin ~n:3 (1 -- 999) in let e = GR.round_robin ~n:3 GR.(1 -- 999) in
let l = Gen.to_list e in let l = List.map Gen.length e in
let l' = List.map Gen.Gen.length l in OUnit.assert_equal [333;333;333] l;
OUnit.assert_equal [333;333;333] l';
() ()
let test_merge_sorted () = let test_merge_sorted () =
@ -106,13 +105,9 @@ let test_intersperse () =
() ()
let test_product () = let test_product () =
let printer = Helpers.print_int_int_list in
let e = Gen.product (1--3) (4--5) in let e = Gen.product (1--3) (4--5) in
OUnit.assert_equal [1,4; 1,5; 2,4; 2,5; 3,4; 3,5] (Gen.to_list e); OUnit.assert_equal ~printer [1,4; 1,5; 2,4; 2,5; 3,4; 3,5] (List.sort compare (Gen.to_list e));
()
let test_fair_product () =
let e = Gen.fair_product (Gen.repeat ()) (1--3) in
let _ = Gen.take 10 e in (* succeeds -> ok *)
() ()
let suite = let suite =
@ -132,5 +127,4 @@ let suite =
"test_interleave" >:: test_interleave; "test_interleave" >:: test_interleave;
"test_intersperse" >:: test_intersperse; "test_intersperse" >:: test_intersperse;
"test_product" >:: test_product; "test_product" >:: test_product;
"test_fair_product" >:: test_fair_product;
] ]