From 2c3af875b983b322e110c5ad6f22e65d2250446e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 20 Aug 2013 23:33:14 +0200 Subject: [PATCH] new container, AbsSet, that presents an abstract view of sets and relational operators --- README.md | 1 + absSet.ml | 228 +++++++++++++++++++++++++++++++++++++++++++++++ absSet.mli | 152 +++++++++++++++++++++++++++++++ containers.mllib | 1 + containers.odocl | 27 +----- 5 files changed, 383 insertions(+), 26 deletions(-) create mode 100644 absSet.ml create mode 100644 absSet.mli mode change 100644 => 120000 containers.odocl diff --git a/README.md b/README.md index 7b31e7e5..5f9966f6 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ data structures access to elements by their index. - `Leftistheap`, a polymorphic heap structure. - `SmallSet`, a sorted list implementation behaving like a set. +- `AbsSet`, an abstract Set data structure, a bit like `LazyGraph`. Other structures are: diff --git a/absSet.ml b/absSet.ml new file mode 100644 index 00000000..9ee90fbf --- /dev/null +++ b/absSet.ml @@ -0,0 +1,228 @@ +(* +copyright (c) 2013, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +this software is provided by the copyright holders and contributors "as is" and +any express or implied warranties, including, but not limited to, the implied +warranties of merchantability and fitness for a particular purpose are +disclaimed. in no event shall the copyright holder or contributors be liable +for any direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute goods or + services; loss of use, data, or profits; or business interruption) however + caused and on any theory of liability, whether in contract, strict liability, + or tort (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. +*) + +(** {1 Abstract set/relation} *) + +type 'a t = { + mem : 'a -> bool; + iter : ('a -> unit) -> unit; + cardinal : unit -> int; +} (** The abstract set *) + +let empty = { + mem = (fun _ -> false); + iter = (fun _ -> ()); + cardinal = (fun () -> 0); +} + +let mem set x = set.mem x + +let iter set k = set.iter k + +let fold set acc f = + let acc = ref acc in + set.iter (fun x -> acc := f !acc x); + !acc + +let cardinal set = set.cardinal () + +let singleton ?(eq=(=)) x = + let mem y = eq x y in + let iter k = k x in + let cardinal () = 1 in + { mem; iter; cardinal; } + +(* basic cardinal computation, by counting elements *) +let __default_cardinal iter = + fun () -> + let r = ref 0 in + iter (fun _ -> incr r); + !r + +let mk_generic ?cardinal ~mem ~iter = + let cardinal = match cardinal with + | Some c -> c + | None -> __default_cardinal iter (* default implementation *) + in + { mem; iter; cardinal; } + +let of_hashtbl h = + let mem x = Hashtbl.mem h x in + let iter k = Hashtbl.iter (fun x _ -> k x) h in + let cardinal () = Hashtbl.length h in + { mem; iter; cardinal; } + +let filter set pred = + let mem x = set.mem x && pred x in + let iter k = set.iter (fun x -> if pred x then k x) in + let cardinal = __default_cardinal iter in + { mem; iter; cardinal; } + +let union s1 s2 = + let mem x = s1.mem x || s2.mem x in + let iter k = + s1.iter k; + s2.iter (fun x -> if not (s1.mem x) then k x); + in + let cardinal = __default_cardinal iter in + { mem; iter; cardinal; } + +let intersection s1 s2 = + let mem x = s1.mem x && s2.mem x in + let iter k = s1.iter (fun x -> if s2.mem x then k x) in + let cardinal = __default_cardinal iter in + { mem; iter; cardinal; } + +let product s1 s2 = + let mem (x,y) = s1.mem x && s2.mem y in + let iter k = + s1.iter (fun x -> s2.iter (fun y -> k (x,y))) in + let cardinal () = s1.cardinal () * s2.cardinal () in + { mem; iter; cardinal; } + +let to_seq set = + Sequence.from_iter (fun k -> set.iter k) + +let to_list set = + let l = ref [] in + set.iter (fun x -> l := x :: !l); + !l + +(** {2 Set builders} *) + +(** A set builder is a value that serves to build a set, element by element. + Several implementations can be provided, but the two operations that + must be present are: + + - add an element to the builder + - extract the set composed of all elements added so far +*) + +type 'a builder = { + add : 'a -> unit; + get : unit -> 'a t; +} + +let mk_builder ~add ~get = + { add; get; } + +let builder_hash (type k) ?(size=15) ?(eq=(=)) ?(hash=Hashtbl.hash) () = + let module H = Hashtbl.Make(struct type t = k let equal = eq let hash = hash end) in + let h = H.create size in + let add x = H.replace h x () in + let get () = + let mem x = H.mem h x in + let iter k = H.iter (fun x _ -> k x) h in + let cardinal () = H.length h in + mk_generic ~cardinal ~mem ~iter + in + mk_builder ~add ~get + +let builder_cmp (type k) ?(cmp=Pervasives.compare) () = + let module S = Set.Make(struct type t = k let compare = cmp end) in + let s = ref S.empty in + let add x = s := S.add x !s in + let get () = + let s' = !s in + let mem x = S.mem x s' in + let iter k = S.iter k s' in + let cardinal () = S.cardinal s' in + mk_generic ~cardinal ~mem ~iter + in + mk_builder ~add ~get + +let of_seq_builder ~builder seq = + Sequence.iter builder.add seq; + builder.get () + +let of_seq_hash ?eq ?hash seq = + let b = builder_hash ?eq ?hash () in + of_seq_builder b seq + +let of_seq_cmp ?cmp seq = + let b = builder_cmp ?cmp () in + of_seq_builder b seq + +let of_list l = of_seq_hash (Sequence.of_list l) + +let map ?(builder=builder_hash ()) set ~f = + set.iter + (fun x -> + let y = f x in + builder.add y); + builder.get () + +(* relational join *) +let hash_join + (type k) ?(eq=(=)) ?(size=20) ?(hash=Hashtbl.hash) ?(builder=builder_hash ()) + ~project1 ~project2 ~merge s1 s2 + = + let module H = Hashtbl.Make(struct type t = k let equal = eq let hash = hash end) in + let h = H.create size in + s1.iter + (fun x -> + let key = project1 x in + H.add h key x); + s2.iter + (fun y -> + let key = project2 y in + let xs = H.find_all h key in + List.iter (fun x -> builder.add (merge x y)) xs); + builder.get () + +(** {2 Functorial interfaces} *) + +module MakeHash(X : Hashtbl.HashedType) = struct + type elt = X.t + (** Elements of the set are hashable *) + + module H = Hashtbl.Make(X) + + let of_seq ?(size=5) seq = + let h = Hashtbl.create size in + Sequence.iter (fun x -> Hashtbl.add h x ()) seq; + let mem x = Hashtbl.mem h x in + let iter k = Hashtbl.iter (fun x () -> k x) h in + let cardinal () = Hashtbl.length h in + mk_generic ~cardinal ~mem ~iter +end + + +module MakeSet(S : Set.S) = struct + type elt = S.elt + + let of_set set = + let mem x = S.mem x set in + let iter k = S.iter k set in + let cardinal () = S.cardinal set in + mk_generic ~cardinal ~mem ~iter + + let of_seq ?(init=S.empty) seq = + let set = Sequence.fold (fun s x -> S.add x s) init seq in + of_set set + + let to_set set = + fold set S.empty (fun set x -> S.add x set) +end diff --git a/absSet.mli b/absSet.mli new file mode 100644 index 00000000..1ce06212 --- /dev/null +++ b/absSet.mli @@ -0,0 +1,152 @@ +(* +copyright (c) 2013, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +this software is provided by the copyright holders and contributors "as is" and +any express or implied warranties, including, but not limited to, the implied +warranties of merchantability and fitness for a particular purpose are +disclaimed. in no event shall the copyright holder or contributors be liable +for any direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute goods or + services; loss of use, data, or profits; or business interruption) however + caused and on any theory of liability, whether in contract, strict liability, + or tort (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. +*) + +(** {1 Abstract set/relation} *) + +type 'a t + +val empty : 'a t + (** Empty set *) + +val mem : 'a t -> 'a -> bool + (** [mem set x] returns true iff [x] belongs to the set *) + +val iter : 'a t -> ('a -> unit) -> unit + (** Iterate on the set elements **) + +val fold : 'a t -> 'b -> ('b -> 'a -> 'b) -> 'b + (** Fold on the set *) + +val cardinal : _ t -> int + (** Number of elements *) + +val singleton : ?eq:('a -> 'a -> bool) -> 'a -> 'a t + (** Single-element set *) + +val mk_generic : ?cardinal:(unit -> int) -> + mem:('a -> bool) -> + iter:(('a -> unit) -> unit) -> 'a t + (** Generic constructor. Takes a membership function and an iteration + function, and possibly a cardinal function (supposed to return + the number of elements) *) + +val of_hashtbl : ('a, _) Hashtbl.t -> 'a t + (** Set composed of the keys of this hashtable. The cardinal is computed + using the number of bindings, so keys with multiple bindings will + entail errors in {!cardinal} !*) + +val filter : 'a t -> ('a -> bool) -> 'a t + (** Filter the set *) + +val union : 'a t -> 'a t -> 'a t + +val intersection : 'a t -> 'a t -> 'a t + +val product : 'a t -> 'b t -> ('a * 'b) t + (** Cartesian product *) + +val to_seq : 'a t -> 'a Sequence.t + +val to_list : 'a t -> 'a list + +(** {2 Set builders} *) + +(** A set builder is a value that serves to build a set, element by element. + Several implementations can be provided, but the two operations that + must be present are: + + - add an element to the builder + - extract the set composed of all elements added so far +*) + +type 'a builder + +val mk_builder : add:('a -> unit) -> get:(unit -> 'a t) -> 'a builder + (** Generic set builder *) + +val builder_hash : ?size:int -> + ?eq:('a -> 'a -> bool) -> + ?hash:('a -> int) -> unit -> 'a builder + (** Builds a set from a Hashtable. [size] is the initial size *) + +val builder_cmp : ?cmp:('a -> 'a -> int) -> unit -> 'a builder + +val of_seq_builder : builder:'a builder -> 'a Sequence.t -> 'a t + (** Uses the given builder to construct a set from a sequence of elements *) + +val of_seq_hash : ?eq:('a -> 'a -> bool) -> ?hash:('a -> int) -> 'a Sequence.t -> 'a t + (** Construction of a set from a sequence of hashable elements *) + +val of_seq_cmp : ?cmp:('a -> 'a -> int) -> 'a Sequence.t -> 'a t + (** Construction of a set from a sequence of comparable elements *) + +val of_list : 'a list -> 'a t + (** Helper that uses default hash function and equality to build a set *) + +val map : ?builder:'b builder -> 'a t -> f:('a -> 'b) -> 'b t + (** Eager map from a set to another set. The result is built immediately + using a set builder *) + +val hash_join : ?eq:('key -> 'key -> bool) -> + ?size:int -> + ?hash:('key -> int) -> + ?builder:'res builder -> + project1:('a -> 'key) -> + project2:('b -> 'key) -> + merge:('a -> 'b -> 'res) -> + 'a t -> 'b t -> 'res t + (** Relational join between two sets. The two sets are joined on + the 'key type, and rows are merged into 'res. + This takes at least three functions + in addition to optional parameters: + + - [project1] extracts keys from rows of the first set + - [project2] extracts keys from rows of the second set + - [merge] merges rows that have the same key together + *) + +(** {2 Functorial interfaces} *) + +module MakeHash(X : Hashtbl.HashedType) : sig + type elt = X.t + (** Elements of the set are hashable *) + + val of_seq : ?size:int -> elt Sequence.t -> elt t + (** Build a set from a sequence *) +end + + +module MakeSet(S : Set.S) : sig + type elt = S.elt + + val of_seq : ?init:S.t -> elt Sequence.t -> elt t + (** Build a set from a sequence *) + + val of_set : S.t -> elt t + (** Explicit conversion from a tree set *) + + val to_set : elt t -> S.t + (** Conversion to a set (linear time) *) +end diff --git a/containers.mllib b/containers.mllib index 7741c056..ce087f15 100644 --- a/containers.mllib +++ b/containers.mllib @@ -25,3 +25,4 @@ MultiSet UnionFind SmallSet Leftistheap +AbsSet diff --git a/containers.odocl b/containers.odocl deleted file mode 100644 index fe678cdd..00000000 --- a/containers.odocl +++ /dev/null @@ -1,26 +0,0 @@ -Cache -Deque -Gen -FHashtbl -FQueue -FlatHashtbl -Hashset -Heap -LazyGraph -PersistentGraph -PersistentHashtbl -PHashtbl -Sequence -SkipList -SplayTree -SplayMap -Univ -Vector -Bij -PiCalculus -Bencode -Sexp -RAL -MultiSet -UnionFind -SmallSet diff --git a/containers.odocl b/containers.odocl new file mode 120000 index 00000000..31676fb5 --- /dev/null +++ b/containers.odocl @@ -0,0 +1 @@ +containers.mllib \ No newline at end of file