diff --git a/CHANGELOG.md b/CHANGELOG.md index 3915aef..cf96d4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,29 @@ # Changelog +## 0.10 + +- add `{union,inter,diff,subset}` +- add `{join_by,join_all_by,group_join_by}` +- add `find_map{,i}` as better alias to existing functions +- add `{max_exn,min_exn}` +- add `count` +- add `doc` and `test` to opam + +## 0.9 + +- distinction between `diagonal,diagonal_l` +- add `init,fold_map,fold_filter_map,sorted,diagonal,findi,…` +- fix a few typos +- update readme: convert into asciidoc, add tutorial +- remove deprecated functions, add missing links to `SequenceLabels` + +## 0.8 + +- loop based implementation for `int_range_by` +- move files to 'src/', use qtest for tests +- add `int_range_by` +- add `Sequence.flat_map_l` + ## 0.7 - add missing entry in changelog and missing since annotations diff --git a/README.adoc b/README.adoc index ab9fc56..719d098 100644 --- a/README.adoc +++ b/README.adoc @@ -13,6 +13,8 @@ way of iterating on a finite number of values, only allocating (most of the time one intermediate closure to do so. For instance, iterating on keys, or values, of a `Hashtbl.t`, without creating a list. +toc::[] + == Documentation There is only one important type, `'a Sequence.t`, and lots of functions built @@ -93,7 +95,6 @@ underlying hash function): - : int = 11 (* now to get the values *) -# Sequence.of_t # Sequence.of_hashtbl h |> Sequence.map snd |> Sequence.to_list;; - : string list = ["6"; "2"; "8"; "7"; "3"; "5"; "4"; "9"; "0"; "10"; "1"] ---- @@ -120,7 +121,7 @@ use `Sequence.(--) : int -> int -> int Sequence.t`. NOTE: with **flambda** under sufficiently strong optimization flags, such compositions of operators -will be compiled to an actual loop with no overhead! +should be compiled to an actual loop with no overhead! === Iterating on sub-trees @@ -168,6 +169,7 @@ enumerating the ways we can insert an element in a list. [source,OCaml] ---- +# open Sequence.Infix;; # module S = Sequence ;; # let rec insert x l = match l with | [] -> S.return [x] diff --git a/_oasis b/_oasis index eb4824c..ce827cf 100644 --- a/_oasis +++ b/_oasis @@ -1,6 +1,6 @@ OASISFormat: 0.4 Name: sequence -Version: 0.9 +Version: 0.10 Homepage: https://github.com/c-cube/sequence Authors: Simon Cruanes License: BSD-2-clause diff --git a/opam b/opam index 4b4684e..9f3cdf6 100644 --- a/opam +++ b/opam @@ -1,6 +1,6 @@ opam-version: "1.2" name: "sequence" -version: "0.9" +version: "0.10" author: "Simon Cruanes" maintainer: "simon.cruanes@inria.fr" license: "BSD-2-clauses" @@ -11,6 +11,14 @@ build: [ ] [make "build"] ] +build-doc: [ + ["./configure" "--enable-docs"] + [make "doc"] +] +build-test: [ + ["./configure" "--enable-tests"] + [make "test"] +] install: [make "install"] remove: [ ["ocamlfind" "remove" "sequence"] diff --git a/setup.ml b/setup.ml index 7c29a5c..58904ac 100644 --- a/setup.ml +++ b/setup.ml @@ -1,7 +1,7 @@ (* setup.ml generated for the first time by OASIS v0.4.4 *) (* OASIS_START *) -(* DO NOT EDIT (digest: fc3602a8b67872256edf65d37d0266b7) *) +(* DO NOT EDIT (digest: b6facd5b08b6b1360edc26bd90d50fa3) *) (* Regenerated by OASIS v0.4.8 Visit http://oasis.forge.ocamlcore.org for more information and @@ -7037,7 +7037,7 @@ let setup_t = { oasis_version = "0.4"; ocaml_version = None; - version = "0.9"; + version = "0.10"; license = OASISLicense.DEP5License (OASISLicense.DEP5Unit @@ -8243,7 +8243,7 @@ let setup_t = }; oasis_fn = Some "_oasis"; oasis_version = "0.4.8"; - oasis_digest = Some "\198\255:M\202\255\011Hi\149\143\207P\190g\219"; + oasis_digest = Some "o\181u\130\246\134[37Z-Cy\216\208\151"; oasis_exec = None; oasis_setup_args = []; setup_update = false diff --git a/src/META b/src/META index f3bbb7d..13249a4 100644 --- a/src/META +++ b/src/META @@ -1,6 +1,6 @@ # OASIS_START -# DO NOT EDIT (digest: b247543864e9cc39f327533c7e23c440) -version = "0.9" +# DO NOT EDIT (digest: 47d925b722c4289a923085abbf97bba8) +version = "0.10" description = "Simple sequence (iterator) datatype and combinators" requires = "bytes" archive(byte) = "sequence.cma" @@ -9,7 +9,7 @@ archive(native) = "sequence.cmxa" archive(native, plugin) = "sequence.cmxs" exists_if = "sequence.cma" package "invert" ( - version = "0.9" + version = "0.10" description = "Simple sequence (iterator) datatype and combinators" requires = "sequence delimcc" archive(byte) = "invert.cma" @@ -20,7 +20,7 @@ package "invert" ( ) package "bigarray" ( - version = "0.9" + version = "0.10" description = "Simple sequence (iterator) datatype and combinators" requires = "sequence bigarray" archive(byte) = "bigarray.cma" diff --git a/src/sequence.ml b/src/Sequence.ml similarity index 84% rename from src/sequence.ml rename to src/Sequence.ml index 425bf8d..3bdb9fc 100644 --- a/src/sequence.ml +++ b/src/Sequence.ml @@ -15,6 +15,9 @@ type (+'a, +'b) t2 = ('a -> 'b -> unit) -> unit let pp_ilist = Q.Print.(list int) *) +type 'a equal = 'a -> 'a -> bool +type 'a hash = 'a -> int + (** Build a sequence from a iter function *) let from_iter f = f @@ -411,6 +414,28 @@ let group_by (type k) ?(hash=Hashtbl.hash) ?(eq=(=)) seq = |> OUnit.assert_equal [[1];[2;2;2];[3;3;3];[4]] *) +let count (type k) ?(hash=Hashtbl.hash) ?(eq=(=)) seq = + let module Tbl = Hashtbl.Make(struct + type t = k + let equal = eq + let hash = hash + end) in + (* compute group table *) + let tbl = Tbl.create 32 in + seq + (fun x -> + let n = try Tbl.find tbl x with Not_found -> 0 in + Tbl.replace tbl x (n+1) + ); + fun yield -> + Tbl.iter (fun x n -> yield (x,n)) tbl + +(*$R + [1;2;3;3;2;2;3;4] + |> of_list |> count ?eq:None ?hash:None |> sort ?cmp:None |> to_list + |> OUnit.assert_equal [1,1;2,3;3,3;4,1] +*) + let uniq ?(eq=fun x y -> x = y) seq k = let has_prev = ref false and prev = ref (Obj.magic 0) in (* avoid option type, costly *) @@ -499,6 +524,142 @@ let join ~join_row s1 s2 k = OUnit.assert_equal ["1 = 1"; "2 = 2"] (to_list s); *) +let join_by (type a) ?(eq=(=)) ?(hash=Hashtbl.hash) f1 f2 ~merge c1 c2 = + let module Tbl = Hashtbl.Make(struct + type t = a + let equal = eq + let hash = hash + end) in + let tbl = Tbl.create 32 in + c1 + (fun x -> + let key = f1 x in + Tbl.add tbl key x); + let res = ref [] in + c2 + (fun y -> + let key = f2 y in + let xs = Tbl.find_all tbl key in + List.iter + (fun x -> match merge key x y with + | None -> () + | Some z -> res := z :: !res) + xs); + fun yield -> List.iter yield !res + +type ('a, 'b) join_all_cell = { + mutable ja_left: 'a list; + mutable ja_right: 'b list; +} + +let join_all_by (type a) ?(eq=(=)) ?(hash=Hashtbl.hash) f1 f2 ~merge c1 c2 = + let module Tbl = Hashtbl.Make(struct + type t = a + let equal = eq + let hash = hash + end) in + let tbl = Tbl.create 32 in + (* build the map [key -> cell] *) + c1 + (fun x -> + let key = f1 x in + try + let c = Tbl.find tbl key in + c.ja_left <- x :: c.ja_left + with Not_found -> + Tbl.add tbl key {ja_left=[x]; ja_right=[]}); + c2 + (fun y -> + let key = f2 y in + try + let c = Tbl.find tbl key in + c.ja_right <- y :: c.ja_right + with Not_found -> + Tbl.add tbl key {ja_left=[]; ja_right=[y]}); + let res = ref [] in + Tbl.iter + (fun key cell -> match merge key cell.ja_left cell.ja_right with + | None -> () + | Some z -> res := z :: !res) + tbl; + fun yield -> List.iter yield !res + +let group_join_by (type a) ?(eq=(=)) ?(hash=Hashtbl.hash) f c1 c2 = + let module Tbl = Hashtbl.Make(struct + type t = a + let equal = eq + let hash = hash + end) in + let tbl = Tbl.create 32 in + c1 (fun x -> Tbl.replace tbl x []); + c2 + (fun y -> + (* project [y] into some element of [c1] *) + let key = f y in + try + let l = Tbl.find tbl key in + Tbl.replace tbl key (y :: l) + with Not_found -> ()); + fun yield -> Tbl.iter (fun k l -> yield (k,l)) tbl + +(*$= + ['a', ["abc"; "attic"]; \ + 'b', ["barbary"; "boom"; "bop"]; \ + 'c', []] \ + (group_join_by (fun s->s.[0]) \ + (of_str "abc") \ + (of_list ["abc"; "boom"; "attic"; "deleted"; "barbary"; "bop"]) \ + |> map (fun (c,l)->c,List.sort Pervasives.compare l) \ + |> sort |> to_list) +*) + +let union (type a) ?(eq=(=)) ?(hash=Hashtbl.hash) c1 c2 = + let module Tbl = Hashtbl.Make(struct + type t = a let equal = eq let hash = hash end) in + let tbl = Tbl.create 32 in + c1 (fun x -> Tbl.replace tbl x ()); + c2 (fun x -> Tbl.replace tbl x ()); + fun yield -> Tbl.iter (fun x _ -> yield x) tbl + +type inter_status = + | Inter_left + | Inter_both + +let inter (type a) ?(eq=(=)) ?(hash=Hashtbl.hash) c1 c2 = + let module Tbl = Hashtbl.Make(struct + type t = a let equal = eq let hash = hash end) in + let tbl = Tbl.create 32 in + c1 (fun x -> Tbl.replace tbl x Inter_left); + c2 + (fun x -> + try + match Tbl.find tbl x with + | Inter_left -> + Tbl.replace tbl x Inter_both; (* save *) + | Inter_both -> () + with Not_found -> ()); + fun yield -> Tbl.iter (fun x res -> if res=Inter_both then yield x) tbl + +let diff (type a) ?(eq=(=)) ?(hash=Hashtbl.hash) c1 c2 = + let module Tbl = Hashtbl.Make(struct + type t = a let equal = eq let hash = hash end) in + let tbl = Tbl.create 32 in + c2 (fun x -> Tbl.replace tbl x ()); + fun yield -> + c1 (fun x -> if not (Tbl.mem tbl x) then yield x) + +exception Subset_exit + +let subset (type a) ?(eq=(=)) ?(hash=Hashtbl.hash) c1 c2 = + let module Tbl = Hashtbl.Make(struct + type t = a let equal = eq let hash = hash end) in + let tbl = Tbl.create 32 in + c2 (fun x -> Tbl.replace tbl x ()); + try + c1 (fun x -> if not (Tbl.mem tbl x) then raise Subset_exit); + true + with Subset_exit -> false + let rec unfoldr f b k = match f b with | None -> () | Some (x, b') -> @@ -532,6 +693,10 @@ let max ?(lt=fun x y -> x < y) seq = | Some y -> if lt y x then ret := Some x); !ret +let max_exn ?lt seq = match max ?lt seq with + | Some x -> x + | None -> raise Not_found + let min ?(lt=fun x y -> x < y) seq = let ret = ref None in seq @@ -540,6 +705,15 @@ let min ?(lt=fun x y -> x < y) seq = | Some y -> if lt x y then ret := Some x); !ret +let min_exn ?lt seq = match min ?lt seq with + | Some x -> x + | None -> raise Not_found + +(*$= & ~printer:string_of_int + 100 (0 -- 100 |> max_exn ?lt:None) + 0 (0 -- 100 |> min_exn ?lt:None) +*) + exception ExitHead let head seq = @@ -664,7 +838,7 @@ let mem ?(eq=(=)) x seq = exists (eq x) seq exception ExitFind -let find f seq = +let find_map f seq = let r = ref None in begin try @@ -676,7 +850,9 @@ let find f seq = end; !r -let findi f seq = +let find = find_map + +let find_mapi f seq = let i = ref 0 in let r = ref None in begin @@ -689,7 +865,9 @@ let findi f seq = end; !r -let find_pred f seq = find (fun x -> if f x then Some x else None) seq +let findi = find_mapi + +let find_pred f seq = find_map (fun x -> if f x then Some x else None) seq let find_pred_exn f seq = match find_pred f seq with | Some x -> x diff --git a/src/sequence.mli b/src/Sequence.mli similarity index 87% rename from src/sequence.mli rename to src/Sequence.mli index 40406f0..a510c99 100644 --- a/src/sequence.mli +++ b/src/Sequence.mli @@ -39,6 +39,9 @@ type +'a sequence = 'a t type (+'a, +'b) t2 = ('a -> 'b -> unit) -> unit (** Sequence of pairs of values of type ['a] and ['b]. *) +type 'a equal = 'a -> 'a -> bool +type 'a hash = 'a -> int + (** {2 Build a sequence} *) val from_iter : (('a -> unit) -> unit) -> 'a t @@ -144,10 +147,18 @@ val find : ('a -> 'b option) -> 'a t -> 'b option (** Find the first element on which the function doesn't return [None] @since 0.5 *) +val find_map : ('a -> 'b option) -> 'a t -> 'b option +(** Alias to {!find} + @since 0.10 *) + val findi : (int -> 'a -> 'b option) -> 'a t -> 'b option (** Indexed version of {!find} @since 0.9 *) +val find_mapi : (int -> 'a -> 'b option) -> 'a t -> 'b option +(** Alias to {!findi} + @since 0.10 *) + val find_pred : ('a -> bool) -> 'a t -> 'a option (** [find_pred p l] finds the first element of [l] that satisfies [p], or returns [None] if no element satisfies [p] @@ -241,6 +252,12 @@ val group_by : ?hash:('a -> int) -> ?eq:('a -> 'a -> bool) -> The result sequence is traversable as many times as required. @since 0.6 *) +val count : ?hash:('a -> int) -> ?eq:('a -> 'a -> bool) -> + 'a t -> ('a * int) t +(** Map each distinct element to its number of occurrences in the whole seq. + Similar to [group_by seq |> map (fun l->List.hd l, List.length l)] + @since 0.10 *) + val uniq : ?eq:('a -> 'a -> bool) -> 'a t -> 'a t (** Remove consecutive duplicate elements. Basically this is like [fun seq -> map List.hd (group seq)]. *) @@ -271,6 +288,93 @@ val join : join_row:('a -> 'b -> 'c option) -> 'a t -> 'b t -> 'c t the two elements do not combine. Assume that [b] allows for multiple iterations. *) +val join_by : ?eq:'key equal -> ?hash:'key hash -> + ('a -> 'key) -> ('b -> 'key) -> + merge:('key -> 'a -> 'b -> 'c option) -> + 'a t -> + 'b t -> + 'c t +(** [join key1 key2 ~merge] is a binary operation + that takes two sequences [a] and [b], projects their + elements resp. with [key1] and [key2], and combine + values [(x,y)] from [(a,b)] with the same [key] + using [merge]. If [merge] returns [None], the combination + of values is discarded. + @since 0.10 *) + +val join_all_by : ?eq:'key equal -> ?hash:'key hash -> + ('a -> 'key) -> ('b -> 'key) -> + merge:('key -> 'a list -> 'b list -> 'c option) -> + 'a t -> + 'b t -> + 'c t +(** [join_all_by key1 key2 ~merge] is a binary operation + that takes two sequences [a] and [b], projects their + elements resp. with [key1] and [key2], and, for each key [k] + occurring in at least one of them: + - compute the list [l1] of elements of [a] that map to [k] + - compute the list [l2] of elements of [b] that map to [k] + - call [merge k l1 l2]. If [merge] returns [None], the combination + of values is discarded, otherwise it returns [Some c] + and [c] is inserted in the result. + @since 0.10 *) + +val group_join_by : ?eq:'a equal -> ?hash:'a hash -> + ('b -> 'a) -> + 'a t -> + 'b t -> + ('a * 'b list) t +(** [group_join_by key2] associates to every element [x] of + the first sequence, all the elements [y] of the second + sequence such that [eq x (key y)]. Elements of the first + sequences without corresponding values in the second one + are mapped to [[]] + @since 0.10 *) + +val inter : + ?eq:'a equal -> ?hash:'a hash -> + 'a t -> 'a t -> 'a t +(** Intersection of two collections. Each element will occur at most once + in the result. Eager. + @since 0.10 *) + +(*$= + [2;4;5;6] (inter (1--6) (cons 2 (4--10)) |> sort |> to_list) + [] (inter (0--5) (6--10) |> to_list) +*) + +val union : + ?eq:'a equal -> ?hash:'a hash -> + 'a t -> 'a t -> 'a t +(** Union of two collections. Each element will occur at most once + in the result. Eager. + @since 0.10 *) + +(*$= + [2;4;5;6] (union (4--6) (cons 2 (4--5)) |> sort |> to_list) +*) + +val diff : + ?eq:'a equal -> ?hash:'a hash -> + 'a t -> 'a t -> 'a t +(** Set difference. Eager. + @since 0.10 *) + +(*$= + [1;2;8;9;10] (diff (1--10) (3--7) |> to_list) +*) + +val subset : + ?eq:'a equal -> ?hash:'a hash -> + 'a t -> 'a t -> bool +(** [subset a b] returns [true] if all elements of [a] belong to [b]. Eager. + @since 0.10 *) + +(*$T + subset (2 -- 4) (1 -- 4) + not (subset (1 -- 4) (2 -- 10)) +*) + val unfoldr : ('b -> ('a * 'b) option) -> 'b -> 'a t (** [unfoldr f b] will apply [f] to [b]. If it yields [Some (x,b')] then [x] is returned @@ -284,10 +388,20 @@ val max : ?lt:('a -> 'a -> bool) -> 'a t -> 'a option @return None if the sequence is empty, Some [m] where [m] is the maximal element otherwise *) +val max_exn : ?lt:('a -> 'a -> bool) -> 'a t -> 'a +(** Unsafe version of {!max} + @raise Not_found if the sequence is empty + @since 0.10 *) + val min : ?lt:('a -> 'a -> bool) -> 'a t -> 'a option (** Min element of the sequence, using the given comparison function. see {!max} for more details. *) +val min_exn : ?lt:('a -> 'a -> bool) -> 'a t -> 'a +(** Unsafe version of {!min} + @raise Not_found if the sequence is empty + @since 0.10 *) + val head : 'a t -> 'a option (** First element, if any, otherwise [None] @since 0.5.1 *) diff --git a/src/sequenceLabels.ml b/src/sequenceLabels.ml index 663d7cb..b990b32 120000 --- a/src/sequenceLabels.ml +++ b/src/sequenceLabels.ml @@ -1 +1 @@ -sequence.ml \ No newline at end of file +Sequence.ml \ No newline at end of file diff --git a/src/sequenceLabels.mli b/src/sequenceLabels.mli index 06bd60f..b28e492 100644 --- a/src/sequenceLabels.mli +++ b/src/sequenceLabels.mli @@ -212,6 +212,12 @@ val group_by : ?hash:('a -> int) -> ?eq:('a -> 'a -> bool) -> The result sequence is traversable as many times as required. @since 0.6 *) +val count : ?hash:('a -> int) -> ?eq:('a -> 'a -> bool) -> + 'a t -> ('a * int) t +(** Map each distinct element to its number of occurrences in the whole seq. + Similar to [group_by seq |> map (fun l->List.hd l, List.length l)] + @since 0.10 *) + val uniq : ?eq:('a -> 'a -> bool) -> 'a t -> 'a t (** Remove consecutive duplicate elements. Basically this is like [fun seq -> map List.hd (group seq)]. *)