From 2aef4afa6add320a8f0010bcdec45b0b8e42b4e3 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 8 Mar 2013 15:11:26 +0100 Subject: [PATCH] sort, uniq, group and sort_uniq combinators implemented --- sequence.ml | 42 ++++++++++++++++++++++++++++++++++++++++++ sequence.mli | 13 +++++++++++++ 2 files changed, 55 insertions(+) diff --git a/sequence.ml b/sequence.ml index f46558e..0da84f1 100644 --- a/sequence.ml +++ b/sequence.ml @@ -211,6 +211,48 @@ let persistent (seq : 'a t) : 'a t = let l = MList.of_seq seq in from_iter (fun k -> MList.iter k l) +(** Sort the sequence. Eager, O(n) ram and O(n ln(n)) time. *) +let sort ?(cmp=Pervasives.compare) seq = + (* use an intermediate list, then sort the list *) + let l = fold (fun l x -> x::l) [] seq in + let l = List.fast_sort cmp l in + fun k -> List.iter k l + +(** Group equal consecutive elements. *) +let group ?(eq=fun x y -> x = y) seq = + fun k -> + let cur = ref [] in + seq (fun x -> + match !cur with + | [] -> cur := [x] + | (y::_) as l when eq x y -> + cur := x::l (* [x] belongs to the group *) + | (_::_) as l -> + k l; (* yield group, and start another one *) + cur := [x]); + (* last list *) + if !cur <> [] then k !cur + +(** Remove consecutive duplicate elements. Basically this is + like [fun seq -> map List.hd (group seq)]. *) +let uniq ?(eq=fun x y -> x = y) seq = + fun k -> + let has_prev = ref false + and prev = ref (Obj.magic 0) in (* avoid option type, costly *) + seq (fun x -> + if !has_prev && eq !prev x + then () (* duplicate *) + else begin + has_prev := true; + prev := x; + k x + end) + +(** Sort the sequence and remove duplicates. Eager, same as [sort] *) +let sort_uniq ?(cmp=Pervasives.compare) seq = + let seq' = sort ~cmp seq in + uniq ~eq:(fun x y -> cmp x y = 0) seq' + (** Cartesian product of the sequences. *) let product outer inner = let outer = persistent outer in diff --git a/sequence.mli b/sequence.mli index a1da7bc..646262f 100644 --- a/sequence.mli +++ b/sequence.mli @@ -112,6 +112,19 @@ val persistent : 'a t -> 'a t (** Iterate on the sequence, storing elements in a data structure. The resulting sequence can be iterated on as many times as needed. *) +val sort : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t + (** Sort the sequence. Eager, O(n) ram and O(n ln(n)) time. *) + +val sort_uniq : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t + (** Sort the sequence and remove duplicates. Eager, same as [sort] *) + +val group : ?eq:('a -> 'a -> bool) -> 'a t -> 'a list t + (** Group equal consecutive elements. *) + +val uniq : ?eq:('a -> 'a -> bool) -> 'a t -> 'a t + (** Remove consecutive duplicate elements. Basically this is + like [fun seq -> map List.hd (group seq)]. *) + val product : 'a t -> 'b t -> ('a * 'b) t (** Cartesian product of the sequences. The first one is transformed by calling [persistent] on it, so that it can be traversed