From 732cc90ea6b49cece78d9750d7eb36b84bbe2bcf Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 8 Mar 2013 14:18:54 +0100 Subject: [PATCH] updated sequence files --- sequence.ml | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++ sequence.mli | 65 +++++++++++++++++++++++++++- 2 files changed, 182 insertions(+), 2 deletions(-) diff --git a/sequence.ml b/sequence.ml index fe487909..0da84f19 100644 --- a/sequence.ml +++ b/sequence.ml @@ -28,6 +28,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** Sequence abstract iterator type *) type 'a t = ('a -> unit) -> unit +type (+'a, +'b) t2 = ('a -> 'b -> unit) -> unit + (** Sequence of pairs of values of type ['a] and ['b]. *) + (** Build a sequence from a iter function *) let from_iter f = f @@ -208,6 +211,48 @@ let persistent (seq : 'a t) : 'a t = let l = MList.of_seq seq in from_iter (fun k -> MList.iter k l) +(** Sort the sequence. Eager, O(n) ram and O(n ln(n)) time. *) +let sort ?(cmp=Pervasives.compare) seq = + (* use an intermediate list, then sort the list *) + let l = fold (fun l x -> x::l) [] seq in + let l = List.fast_sort cmp l in + fun k -> List.iter k l + +(** Group equal consecutive elements. *) +let group ?(eq=fun x y -> x = y) seq = + fun k -> + let cur = ref [] in + seq (fun x -> + match !cur with + | [] -> cur := [x] + | (y::_) as l when eq x y -> + cur := x::l (* [x] belongs to the group *) + | (_::_) as l -> + k l; (* yield group, and start another one *) + cur := [x]); + (* last list *) + if !cur <> [] then k !cur + +(** Remove consecutive duplicate elements. Basically this is + like [fun seq -> map List.hd (group seq)]. *) +let uniq ?(eq=fun x y -> x = y) seq = + fun k -> + let has_prev = ref false + and prev = ref (Obj.magic 0) in (* avoid option type, costly *) + seq (fun x -> + if !has_prev && eq !prev x + then () (* duplicate *) + else begin + has_prev := true; + prev := x; + k x + end) + +(** Sort the sequence and remove duplicates. Eager, same as [sort] *) +let sort_uniq ?(cmp=Pervasives.compare) seq = + let seq' = sort ~cmp seq in + uniq ~eq:(fun x y -> cmp x y = 0) seq' + (** Cartesian product of the sequences. *) let product outer inner = let outer = persistent outer in @@ -226,6 +271,14 @@ let unfoldr f b = in from_iter (fun k -> unfold k b) +(** Sequence of intermediate results *) +let scan f acc seq = + from_iter + (fun k -> + k acc; + let acc = ref acc in + seq (fun elt -> let acc' = f !acc elt in k acc'; acc := acc')) + (** Max element of the sequence, using the given comparison function. A default element has to be provided. *) let max ?(lt=fun x y -> x < y) seq m = @@ -283,6 +336,49 @@ let is_empty seq = try seq (fun _ -> raise ExitSequence); true with ExitSequence -> false +(** {2 Transform a sequence} *) + +let empty2 = + fun k -> () + +let is_empty2 seq2 = + try ignore (seq2 (fun _ _ -> raise ExitSequence)); true + with ExitSequence -> false + +let length2 seq2 = + let r = ref 0 in + seq2 (fun _ _ -> incr r); + !r + +let zip seq2 = + fun k -> seq2 (fun x y -> k (x,y)) + +let unzip seq = + fun k -> seq (fun (x,y) -> k x y) + +(** Zip elements of the sequence with their index in the sequence *) +let zip_i seq = + fun k -> + let r = ref 0 in + seq (fun x -> let n = !r in incr r; k n x) + +let fold2 f acc seq2 = + let acc = ref acc in + seq2 (fun x y -> acc := f !acc x y); + !acc + +let iter2 f seq2 = + seq2 f + +let map2 f seq2 = + fun k -> seq2 (fun x y -> k (f x y)) + +(** [map2_2 f g seq2] maps each [x, y] of seq2 into [f x y, g x y] *) +let map2_2 f g seq2 = + fun k -> seq2 (fun x y -> k (f x y) (g x y)) + +(** {2 Basic data structures converters} *) + let to_list seq = List.rev (fold (fun y x -> x::y) [] seq) let to_rev_list seq = fold (fun y x -> x :: y) [] seq @@ -308,6 +404,10 @@ let of_array_i a = for i = 0 to Array.length a - 1 do k (i, a.(i)) done in from_iter seq +let of_array2 a = + fun k -> + for i = 0 to Array.length a - 1 do k i a.(i) done + (** [array_slice a i j] Sequence of elements whose indexes range from [i] to [j] *) let array_slice a i j = @@ -356,9 +456,17 @@ let to_hashtbl seq = hashtbl_replace h seq; h +let to_hashtbl2 seq2 = + let h = Hashtbl.create 3 in + seq2 (fun k v -> Hashtbl.replace h k v); + h + let of_hashtbl h = from_iter (fun k -> Hashtbl.iter (fun a b -> k (a, b)) h) +let of_hashtbl2 h = + fun k -> Hashtbl.iter k h + let hashtbl_keys h = from_iter (fun k -> Hashtbl.iter (fun a b -> k a) h) @@ -528,6 +636,17 @@ module TypeClass = struct fold addable.add addable.empty seq end +(** {2 Infix functions} *) + +module Infix = struct + let (--) i j = int_range ~start:i ~stop:j + + let (|>) x f = f x + + let (@@) a b = append a b + + let (>>=) x f = flatMap f x +end (** {2 Pretty printing of sequences} *) diff --git a/sequence.mli b/sequence.mli index f6ef94b7..646262fc 100644 --- a/sequence.mli +++ b/sequence.mli @@ -27,10 +27,13 @@ for any direct, indirect, incidental, special, exemplary, or consequential are designed to allow easy transfer (mappings) between data structures, without defining n^2 conversions between the n types. *) -type +'a t +type +'a t = ('a -> unit) -> unit (** Sequence abstract iterator type, representing a finite sequence of values of type ['a]. *) +type (+'a, +'b) t2 = ('a -> 'b -> unit) -> unit + (** Sequence of pairs of values of type ['a] and ['b]. *) + (** {2 Build a sequence} *) val from_iter : (('a -> unit) -> unit) -> 'a t @@ -109,6 +112,19 @@ val persistent : 'a t -> 'a t (** Iterate on the sequence, storing elements in a data structure. The resulting sequence can be iterated on as many times as needed. *) +val sort : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t + (** Sort the sequence. Eager, O(n) ram and O(n ln(n)) time. *) + +val sort_uniq : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t + (** Sort the sequence and remove duplicates. Eager, same as [sort] *) + +val group : ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t + (** Group equal consecutive elements. *) + +val uniq : ?eq:('a -> 'a -> bool) -> 'a t -> 'a t + (** Remove consecutive duplicate elements. Basically this is + like [fun seq -> map List.hd (group seq)]. *) + val product : 'a t -> 'b t -> ('a * 'b) t (** Cartesian product of the sequences. The first one is transformed by calling [persistent] on it, so that it can be traversed @@ -119,6 +135,9 @@ val unfoldr : ('b -> ('a * 'b) option) -> 'b -> 'a t yields [Some (x,b')] then [x] is returned and unfoldr recurses with [b']. *) +val scan : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b t + (** Sequence of intermediate results *) + val max : ?lt:('a -> 'a -> bool) -> 'a t -> 'a -> 'a (** Max element of the sequence, using the given comparison function. A default element has to be provided. *) @@ -135,6 +154,30 @@ val drop : int -> 'a t -> 'a t val rev : 'a t -> 'a t (** Reverse the sequence. O(n) memory and time. *) +(** {2 Binary sequences} *) + +val empty2 : ('a, 'b) t2 + +val is_empty2 : (_, _) t2 -> bool + +val length2 : (_, _) t2 -> int + +val zip : ('a, 'b) t2 -> ('a * 'b) t + +val unzip : ('a * 'b) t -> ('a, 'b) t2 + +val zip_i : 'a t -> (int, 'a) t2 + (** Zip elements of the sequence with their index in the sequence *) + +val fold2 : ('c -> 'a -> 'b -> 'c) -> 'c -> ('a, 'b) t2 -> 'c + +val iter2 : ('a -> 'b -> unit) -> ('a, 'b) t2 -> unit + +val map2 : ('a -> 'b -> 'c) -> ('a, 'b) t2 -> 'c t + +val map2_2 : ('a -> 'b -> 'c) -> ('a -> 'b -> 'd) -> ('a, 'b) t2 -> ('c, 'd) t2 + (** [map2_2 f g seq2] maps each [x, y] of seq2 into [f x y, g x y] *) + (** {2 Basic data structures converters} *) val to_list : 'a t -> 'a list @@ -153,6 +196,8 @@ val of_array : 'a array -> 'a t val of_array_i : 'a array -> (int * 'a) t (** Elements of the array, with their index *) +val of_array2 : 'a array -> (int, 'a) t2 + val array_slice : 'a array -> int -> int -> 'a t (** [array_slice a i j] Sequence of elements whose indexes range from [i] to [j] *) @@ -183,12 +228,18 @@ val hashtbl_replace : ('a, 'b) Hashtbl.t -> ('a * 'b) t -> unit (** Add elements of the sequence to the hashtable, with Hashtbl.replace (erases conflicting bindings) *) -val to_hashtbl :('a * 'b) t -> ('a, 'b) Hashtbl.t +val to_hashtbl : ('a * 'b) t -> ('a, 'b) Hashtbl.t + (** Build a hashtable from a sequence of key/value pairs *) + +val to_hashtbl2 : ('a, 'b) t2 -> ('a, 'b) Hashtbl.t (** Build a hashtable from a sequence of key/value pairs *) val of_hashtbl : ('a, 'b) Hashtbl.t -> ('a * 'b) t (** Sequence of key/value pairs from the hashtable *) +val of_hashtbl2 : ('a, 'b) Hashtbl.t -> ('a, 'b) t2 + (** Sequence of key/value pairs from the hashtable *) + val hashtbl_keys : ('a, 'b) Hashtbl.t -> 'a t val hashtbl_values : ('a, 'b) Hashtbl.t -> 'b t @@ -289,6 +340,16 @@ module TypeClass : sig val to_addable : ('a,'b) addable -> 'a t -> 'b end +(** {2 Infix functions} *) + +module Infix : sig + val (--) : int -> int -> int t + + val (|>) : 'a -> ('a -> 'b) -> 'b + + val (@@) : 'a t -> 'a t -> 'a t +end + (** {2 Pretty printing of sequences} *) val pp_seq : ?sep:string -> (Format.formatter -> 'a -> unit) ->