heavy modification of Gen's API. Now the default 'a Gen.t is a transient, consumable generator

of 'a, and 'a Gen.Restart.t can be used for restartable generators.
This commit is contained in:
Simon Cruanes 2013-11-11 23:04:10 +01:00
parent e36fc5275a
commit 11611894e9
5 changed files with 1336 additions and 990 deletions

1480
gen.ml

File diff suppressed because it is too large Load diff

191
gen.mli
View file

@ -23,74 +23,43 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Restartable generators} *)
(** {1 Generators}
(** This structure is inspired from Ocaml Batteries' BatEnum.t. It features
restartable generators. A value of type ['a Gen.t] represents a finite or
infinite lazy enumeration of values of type ['a]. It can be instantiated
as many times as needed into a ['a generator], which is a consumable
enumeration. The next element of a ['a generator] is obtained by
invoking it as a function; an exception, [EOG], is raised when no
more elements are available. *)
Values of type ['a Gen.t] represent a possibly infinite sequence of values
of type 'a. One can only iterate once on the sequence, as it is consumed
by iteration/deconstruction/access. The exception {!EOG} (end of generator)
is raised when the generator is empty.
The submodule {!Restart} provides utilities to work with
{b restartable generators}, that is, functions [unit -> 'a Gen.t] that
allow to build as many generators from the same source as needed.
*)
(** {2 Global type declarations} *)
exception EOG
(** End of Generation *)
type 'a t = unit -> 'a generator
(** An enum is a generator of generators *)
and 'a generator = unit -> 'a
type 'a t = unit -> 'a
(** A generator may be called several times, yielding the next value
each time. It raises EOG when it reaches the end. *)
(** {2 Generator functions} *)
type 'a gen = 'a t
val start : 'a t -> 'a generator
(** Create a new generator on the given restartable generator. *)
(** {2 Common signature for transient and restartable generators} *)
(** {2 Transient generators} *)
module Gen : sig
val empty : 'a generator
val next : 'a generator -> 'a
(** Get next element, or raise EOG *)
val junk : 'a generator -> unit
(** Drop element *)
val fold : ('b -> 'a -> 'b) -> 'b -> 'a generator -> 'b
(** Fold over the generator *)
val iter : ('a -> unit) -> 'a generator -> unit
(** Iterate on the generator *)
val length : 'a generator -> int
(** Consume generator to compute its length *)
val of_list : 'a list -> 'a generator
val to_list : 'a generator -> 'a list (* not tailrec *)
val to_rev_list : 'a generator -> 'a list
val int_range : int -> int -> int generator
end
(** {2 Basic constructors} *)
module type S = sig
type 'a t
val empty : 'a t
(** Empty enum, with no elements *)
(** Empty generator, with no elements *)
val singleton : 'a -> 'a t
(** One-element enum *)
(** One-element generator *)
val repeat : 'a -> 'a t
(** Repeat same element endlessly *)
val repeatedly : (unit -> 'a) -> 'a t
(** Call the same function an infinite number of times (useful for instance
if the function is a random generator). *)
val iterate : 'a -> ('a -> 'a) -> 'a t
(** [iterate x f] is [[x; f x; f (f x); f (f (f x)); ...]] *)
@ -137,13 +106,10 @@ val append : 'a t -> 'a t -> 'a t
(** Append the two enums; the result contains the elements of the first,
then the elements of the second enum. *)
val cycle : 'a t -> 'a t
(** Cycle through the enum, endlessly. The enum must not be empty. *)
val flatten : 'a gen t -> 'a t
(** Flatten the enumeration of generators *)
val flatten : 'a t t -> 'a t
(** Flatten the enum of enum. *)
val flatMap : ('a -> 'b t) -> 'a t -> 'b t
val flatMap : ('a -> 'b gen) -> 'a t -> 'b t
(** Monadic bind; each element is transformed to a sub-enum
which is then iterated on, before the next element is processed,
and so on. *)
@ -158,7 +124,8 @@ val drop : int -> 'a t -> 'a t
(** Drop n elements *)
val nth : int -> 'a t -> 'a
(** n-th element, or Not_found *)
(** n-th element, or Not_found
@raise Not_found if the generator contains less than [n] arguments *)
val filter : ('a -> bool) -> 'a t -> 'a t
(** Filter out elements that do not satisfy the predicate. *)
@ -216,8 +183,8 @@ val compare : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> int
(** {2 Complex combinators} *)
val merge : 'a t t -> 'a t
(** Pick elements fairly in each sub-enum. The given enum
val merge : 'a gen t -> 'a t
(** Pick elements fairly in each sub-generator. The given enum
must be finite (not its elements, though). The merge of enums
[e1, e2, ... en] picks one element in [e1], then one element in [e2],
then in [e3], ..., then in [en], and then starts again at [e1]. Once
@ -225,15 +192,6 @@ val merge : 'a t t -> 'a t
their merge is also empty.
For instance, [merge [1;3;5] [2;4;6]] will be [1;2;3;4;5;6]. *)
(** {3 Mutable heap (taken from heap.ml to avoid dependencies)} *)
module Heap : sig
type 'a t (** A heap containing values of type 'a *)
val empty : cmp:('a -> 'a -> int) -> 'a t
val insert : 'a t -> 'a -> unit
val is_empty : 'a t -> bool
val pop : 'a t -> 'a
end
val intersection : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> 'a t
(** Intersection of two sorted sequences. Only elements that occur in both
inputs appear in the output *)
@ -241,23 +199,19 @@ val intersection : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> 'a t
val sorted_merge : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t -> 'a t
(** Merge two sorted sequences into a sorted sequence *)
val sorted_merge_n : ?cmp:('a -> 'a -> int) -> 'a t t -> 'a t
val sorted_merge_n : ?cmp:('a -> 'a -> int) -> 'a gen t -> 'a t
(** Sorted merge of multiple sorted sequences *)
val persistent : 'a generator -> 'a t
(** Store content of the generator in memory, to be able to iterate on it
several times later *)
val round_robin : ?n:int -> 'a t -> 'a generator t
(** Split the enum into [n] generators in a fair way. Elements with
[index = k mod n] with go to the k-th enum. [n] default value
is 2. *)
val tee : ?n:int -> 'a t -> 'a generator t
val tee : ?n:int -> 'a t -> 'a gen list
(** Duplicate the enum into [n] generators (default 2). The generators
share the same underlying instance of the enum, so the optimal case is
when they are consumed evenly *)
val round_robin : ?n:int -> 'a t -> 'a gen list
(** Split the enum into [n] generators in a fair way. Elements with
[index = k mod n] with go to the k-th enum. [n] default value
is 2. *)
val interleave : 'a t -> 'a t -> 'a t
(** [interleave a b] yields an element of [a], then an element of [b],
and so on until the end of [a] or [b] is reached. *)
@ -266,12 +220,8 @@ val intersperse : 'a -> 'a t -> 'a t
(** Put the separator element between all elements of the given enum *)
val product : 'a t -> 'b t -> ('a * 'b) t
(** Cartesian product. If the first sequence is infinite, some pairs
will never be generated. *)
val fair_product : 'a t -> 'b t -> ('a * 'b) t
(** Cartesian product, in no predictable order. Contrary to {!product} this
function does eventually yield every pair *)
(** Cartesian product, in no predictable order. Works even if some of the
arguments are infinite. *)
val group : ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t
(** Group equal consecutive elements together. *)
@ -287,7 +237,7 @@ val sort_uniq : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t
(** Sort and remove duplicates. The enum must be finite. *)
(* TODO later
val permutations : 'a t -> 'a t t
val permutations : 'a t -> 'a gen t
(** Permutations of the enum. Each permutation becomes unavailable once
the next one is produced. *)
@ -322,28 +272,69 @@ val int_range : int -> int -> int t
(** [int_range a b] enumerates integers between [a] and [b], included. [a]
is assumed to be smaller than [b]. *)
val pp : ?start:string -> ?stop:string -> ?sep:string -> ?horizontal:bool ->
(Format.formatter -> 'a -> unit) -> Format.formatter -> 'a t -> unit
(** Pretty print an enum on a formatter. *)
module Infix : sig
val (@@) : 'a t -> 'a t -> 'a t
(** Synonym for {! append} *)
val (--) : int -> int -> int t
(** Synonym for {! int_range} *)
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t
(** Synonym for {! flatMap}, with arguments reversed *)
val (>>=) : 'a t -> ('a -> 'b gen) -> 'b t
(** Monadic bind operator *)
end
val (--) : int -> int -> int t
(** Synonym for {! int_range} *)
val (|>) : 'a -> ('a -> 'b) -> 'b
(** Function application, reversed *)
val (>>=) : 'a t -> ('a -> 'b gen) -> 'b t
(** Monadic bind operator *)
val pp : ?start:string -> ?stop:string -> ?sep:string -> ?horizontal:bool ->
(Format.formatter -> 'a -> unit) -> Format.formatter -> 'a t -> unit
(** Pretty print the content of the generator on a formatter. *)
end
val (@@) : 'a t -> 'a t -> 'a t
(** {2 Transient generators} *)
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t
val get : 'a t -> 'a
(** Get the next value
@raise EOG if there is no next value *)
val (--) : int -> int -> int t
val next : 'a t -> 'a
(** Synonym for {!get} *)
val (|>) : 'a -> ('a -> 'b) -> 'b
val get_safe : 'a t -> 'a option
(** Get the next value, or return None *)
val junk : 'a t -> unit
(** Drop the next value, discarding it.
@raise EOG if there is no next value *)
val repeatedly : (unit -> 'a) -> 'a t
(** Call the same function an infinite number of times (useful for instance
if the function is a random generator). *)
include S with type 'a t := 'a gen
(** {2 Restartable generators} *)
module Restart : sig
type 'a t = unit -> 'a gen
type 'a restartable = 'a t
include S with type 'a t := 'a restartable
val cycle : 'a t -> 'a t
(** Cycle through the enum, endlessly. The enum must not be empty. *)
val lift : ('a gen -> 'b) -> 'a t -> 'b
val lift2 : ('a gen -> 'b gen -> 'c) -> 'a t -> 'b t -> 'c
end
(** {2 Utils} *)
val persistent : 'a t -> 'a Restart.t
(** Store content of the generator in memory, to be able to iterate on it
several times later *)
val start : 'a Restart.t -> 'a t
(** Create a new transient generator *)

View file

@ -178,7 +178,6 @@ let length l = l.size
(** Iterator on the skip list *)
let gen l =
fun () ->
let x = ref (next l.data 0) in
fun () ->
match !x with

View file

@ -7,3 +7,11 @@ let print_int_list l =
(Sequence.pp_seq ~sep:", " Format.pp_print_int)
(Sequence.of_list l);
Buffer.contents b
let print_int_int_list l =
let printer fmt (i,j) = Format.fprintf fmt "%d, %d" i j in
let b = Buffer.create 20 in
Format.bprintf b "@[<h>[%a]@]"
(Sequence.pp_seq ~sep:", " printer)
(Sequence.of_list l);
Buffer.contents b

View file

@ -2,6 +2,8 @@
open OUnit
open Gen.Infix
module GR = Gen.Restart
let pint i = string_of_int i
let plist l = Utils.sprintf "%a"
(Sequence.pp_seq Format.pp_print_int) (Sequence.of_list l)
@ -9,18 +11,18 @@ let pstrlist l = Utils.sprintf "%a"
(Sequence.pp_seq Format.pp_print_string) (Sequence.of_list l)
let test_singleton () =
let e = Gen.singleton 42 in
let gen = Gen.start e in
OUnit.assert_equal 42 (Gen.Gen.next gen);
OUnit.assert_raises Gen.EOG (fun () -> Gen.Gen.next gen);
OUnit.assert_equal 1 (Gen.length e);
let gen = Gen.singleton 42 in
OUnit.assert_equal 42 (Gen.get gen);
OUnit.assert_raises Gen.EOG (fun () -> Gen.get gen);
let gen = Gen.singleton 42 in
OUnit.assert_equal 1 (Gen.length gen);
()
let test_iter () =
let e = 1 -- 10 in
OUnit.assert_equal ~printer:pint 10 (Gen.length e);
OUnit.assert_equal [1;2] (Gen.to_list (1 -- 2));
OUnit.assert_equal [1;2;3;4;5] (Gen.to_list (Gen.take 5 e));
let e = GR.(1 -- 10) in
OUnit.assert_equal ~printer:pint 10 (GR.length e);
OUnit.assert_equal [1;2] GR.(to_list (1 -- 2));
OUnit.assert_equal [1;2;3;4;5] (GR.to_list (GR.take 5 e));
()
let test_map () =
@ -30,7 +32,7 @@ let test_map () =
()
let test_append () =
let e = (1 -- 5) @@ (6 -- 10) in
let e = Gen.append (1 -- 5) (6 -- 10) in
OUnit.assert_equal [10;9;8;7;6;5;4;3;2;1] (Gen.to_rev_list e);
()
@ -64,26 +66,23 @@ let test_persistent () =
if j > 5 then raise Gen.EOG else (incr i; j)
in
let e = Gen.persistent gen in
OUnit.assert_equal [0;1;2;3;4;5] (Gen.to_list e);
OUnit.assert_equal [0;1;2;3;4;5] (Gen.to_list e);
OUnit.assert_equal [0;1;2;3;4;5] (Gen.to_list e);
OUnit.assert_equal [0;1;2;3;4;5] (GR.to_list e);
OUnit.assert_equal [0;1;2;3;4;5] (GR.to_list e);
OUnit.assert_equal [0;1;2;3;4;5] (GR.to_list e);
()
let test_round_robin () =
let e = Gen.round_robin ~n:2 (1--10) in
let e = Gen.map Gen.persistent e in
let l = Gen.to_list e in
match l with
let e = GR.round_robin ~n:2 GR.(1--10) in
match e with
| [a;b] ->
OUnit.assert_equal [1;3;5;7;9] (Gen.to_list a);
OUnit.assert_equal [2;4;6;8;10] (Gen.to_list b)
| _ -> OUnit.assert_failure "wrong list lenght"
let test_big_rr () =
let e = Gen.round_robin ~n:3 (1 -- 999) in
let l = Gen.to_list e in
let l' = List.map Gen.Gen.length l in
OUnit.assert_equal [333;333;333] l';
let e = GR.round_robin ~n:3 GR.(1 -- 999) in
let l = List.map Gen.length e in
OUnit.assert_equal [333;333;333] l;
()
let test_merge_sorted () =
@ -106,13 +105,9 @@ let test_intersperse () =
()
let test_product () =
let printer = Helpers.print_int_int_list in
let e = Gen.product (1--3) (4--5) in
OUnit.assert_equal [1,4; 1,5; 2,4; 2,5; 3,4; 3,5] (Gen.to_list e);
()
let test_fair_product () =
let e = Gen.fair_product (Gen.repeat ()) (1--3) in
let _ = Gen.take 10 e in (* succeeds -> ok *)
OUnit.assert_equal ~printer [1,4; 1,5; 2,4; 2,5; 3,4; 3,5] (List.sort compare (Gen.to_list e));
()
let suite =
@ -132,5 +127,4 @@ let suite =
"test_interleave" >:: test_interleave;
"test_intersperse" >:: test_intersperse;
"test_product" >:: test_product;
"test_fair_product" >:: test_fair_product;
]