From 12743ab24f1d5389a09adabfb0513c7c4b164c10 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 2 Feb 2017 21:27:46 +0100 Subject: [PATCH] add `count` --- src/Sequence.ml | 22 ++++++++++++++++++++++ src/Sequence.mli | 6 ++++++ src/sequenceLabels.mli | 6 ++++++ 3 files changed, 34 insertions(+) diff --git a/src/Sequence.ml b/src/Sequence.ml index 425bf8d..d36221d 100644 --- a/src/Sequence.ml +++ b/src/Sequence.ml @@ -411,6 +411,28 @@ let group_by (type k) ?(hash=Hashtbl.hash) ?(eq=(=)) seq = |> OUnit.assert_equal [[1];[2;2;2];[3;3;3];[4]] *) +let count (type k) ?(hash=Hashtbl.hash) ?(eq=(=)) seq = + let module Tbl = Hashtbl.Make(struct + type t = k + let equal = eq + let hash = hash + end) in + (* compute group table *) + let tbl = Tbl.create 32 in + seq + (fun x -> + let n = try Tbl.find tbl x with Not_found -> 0 in + Tbl.replace tbl x (n+1) + ); + fun yield -> + Tbl.iter (fun x n -> yield (x,n)) tbl + +(*$R + [1;2;3;3;2;2;3;4] + |> of_list |> count ?eq:None ?hash:None |> sort ?cmp:None |> to_list + |> OUnit.assert_equal [1,1;2,3;3,3;4,1] +*) + let uniq ?(eq=fun x y -> x = y) seq k = let has_prev = ref false and prev = ref (Obj.magic 0) in (* avoid option type, costly *) diff --git a/src/Sequence.mli b/src/Sequence.mli index e7c5e3d..872289d 100644 --- a/src/Sequence.mli +++ b/src/Sequence.mli @@ -241,6 +241,12 @@ val group_by : ?hash:('a -> int) -> ?eq:('a -> 'a -> bool) -> The result sequence is traversable as many times as required. @since 0.6 *) +val count : ?hash:('a -> int) -> ?eq:('a -> 'a -> bool) -> + 'a t -> ('a * int) t +(** Map each distinct element to its number of occurrences in the whole seq. + Similar to [group_by seq |> map (fun l->List.hd l, List.length l)] + @since NEXT_RELEASE *) + val uniq : ?eq:('a -> 'a -> bool) -> 'a t -> 'a t (** Remove consecutive duplicate elements. Basically this is like [fun seq -> map List.hd (group seq)]. *) diff --git a/src/sequenceLabels.mli b/src/sequenceLabels.mli index 06bd60f..2829be4 100644 --- a/src/sequenceLabels.mli +++ b/src/sequenceLabels.mli @@ -212,6 +212,12 @@ val group_by : ?hash:('a -> int) -> ?eq:('a -> 'a -> bool) -> The result sequence is traversable as many times as required. @since 0.6 *) +val count : ?hash:('a -> int) -> ?eq:('a -> 'a -> bool) -> + 'a t -> ('a * int) t +(** Map each distinct element to its number of occurrences in the whole seq. + Similar to [group_by seq |> map (fun l->List.hd l, List.length l)] + @since NEXT_RELEASE *) + val uniq : ?eq:('a -> 'a -> bool) -> 'a t -> 'a t (** Remove consecutive duplicate elements. Basically this is like [fun seq -> map List.hd (group seq)]. *)