From 8bd2706ed5c602357f4d698c49d9c0c98b3ef06e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Jun 2014 03:28:41 +0200 Subject: [PATCH] CCKTree, abstract tree structure with dot printing --- README.md | 1 + _oasis | 2 +- core/CCKTree.ml | 292 +++++++++++++++++++++++++++++++++++++++++++++++ core/CCKTree.mli | 127 +++++++++++++++++++++ 4 files changed, 421 insertions(+), 1 deletion(-) create mode 100644 core/CCKTree.ml create mode 100644 core/CCKTree.mli diff --git a/README.md b/README.md index 640a8177..984e4a14 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,7 @@ structures comprise (some modules in `misc/`, some other in `core/`): - `CCArray`, utilities on arrays and slices - `CCLinq`, high-level query language over collections - `CCMultimap` and `CCMultiset`, functors defining persistent structures +- `CCKTree`, an abstract lazy tree structure (similar to what `CCKlist` is to lists) - small modules (basic types, utilities): - `CCInt` - `CCPair` (cartesian products) diff --git a/_oasis b/_oasis index 627456dc..4c830279 100644 --- a/_oasis +++ b/_oasis @@ -42,7 +42,7 @@ Library "containers" CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, CCError, CCHeap, CCList, CCOpt, CCPair, CCFun, CCHash, CCKList, CCInt, CCBool, CCArray, CCBatch, CCOrd, - CCRandom, CCLinq + CCRandom, CCLinq, CCKTree FindlibName: containers Library "containers_string" diff --git a/core/CCKTree.ml b/core/CCKTree.ml new file mode 100644 index 00000000..f1f86131 --- /dev/null +++ b/core/CCKTree.ml @@ -0,0 +1,292 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Lazy Tree Structure} +This structure can be used to represent trees and directed +graphs (as infinite trees) in a lazy fashion. Like {!CCKList}, it +is a structural type. *) + +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] +type 'a printer = Buffer.t -> 'a -> unit + +type +'a t = unit -> [`Nil | `Node of 'a * 'a t list] + +let empty () = `Nil + +let is_empty t = match t() with + | `Nil -> true + | `Node _ -> false + +let singleton x () = `Node (x, []) + +let node x l () = `Node(x,l) + +let node1 x t () = `Node(x,[t]) +let node2 x t1 t2 () = `Node(x,[t1;t2]) + +let rec fold f acc t = match t() with + | `Nil -> acc + | `Node (x,l) -> + let acc = f acc x in + List.fold_left (fold f) acc l + +let rec iter f t = match t() with + | `Nil -> () + | `Node(x,l) -> f x; List.iter (iter f) l + +let size t = fold (fun n _ -> n+1) 0 t + +let height t = + let rec aux t k = match t() with + | `Nil -> k 0 + | `Node (_, l) -> aux_l 0 l k + and aux_l acc l k = match l with + | [] -> k acc + | t'::l' -> + aux t' (fun n -> aux_l (max acc n) l' k) + in aux t (fun x->x) + +let rec map f t () = match t() with + | `Nil -> `Nil + | `Node(x,l) -> + `Node (f x, List.map (map f) l) + +let (>|=) t f = map f t + +let rec cut_depth n t () = match t() with + | `Nil -> `Nil + | `Node _ when n=0 -> `Nil + | `Node(x,l) -> + `Node(x, List.map (cut_depth (n-1)) l) + +(** {2 Graph Traversals} *) + +(** Abstract Set structure *) +class type ['a] pset = object + method add : 'a -> 'a pset + method mem : 'a -> bool +end + +let set_of_cmp (type elt) ?(cmp=Pervasives.compare) () = + let module S = Set.Make(struct + type t = elt + let compare = cmp + end) in + object + val s = S.empty + method add x = {< s = S.add x s >} + method mem x = S.mem x s + end + +let _nil () = `Nil +let _cons x l = `Cons (x, l) + +let dfs ?(pset=set_of_cmp ()) t = + let rec dfs pset stack () = match stack with + | [] -> `Nil + | `Explore t :: stack' -> + begin match t() with + | `Nil -> dfs pset stack' () + | `Node (x, _) when pset#mem x -> + dfs pset stack' () (* loop *) + | `Node (x, l) -> + let pset' = pset#add x in + let stack' = + List.rev_append (List.rev_map (fun x -> `Explore x) l) (`Exit x :: stack') + in + _cons (`Enter x) (dfs pset' stack') + end + | `Exit x :: stack' -> + _cons (`Exit x) (dfs pset stack') + in + dfs pset [`Explore t] + +(** Functional queues for BFS *) +module FQ = struct + type 'a t = { + hd : 'a list; + tl : 'a list; + } + + exception Empty + + (* invariant: if hd=[], then tl=[] *) + let _make hd tl = match hd with + | [] -> {hd=List.rev tl; tl=[] } + | _::_ -> {hd; tl; } + + let empty = _make [] [] + + let is_empty q = q.hd = [] + + let push q x = _make q.hd (x::q.tl) + + let pop_exn q = + match q.hd with + | [] -> assert (q.tl = []); raise Empty + | x::hd' -> + let q' = _make hd' q.tl in + x, q' +end + +let bfs ?(pset=set_of_cmp ()) t = + let rec bfs pset q () = + if FQ.is_empty q then `Nil + else + let t, q' = FQ.pop_exn q in + match t() with + | `Nil -> bfs pset q' () + | `Node(x,_) when pset#mem x -> + bfs pset q' () (* loop *) + | `Node(x,l) -> + let q' = List.fold_left FQ.push q' l in + let pset' = pset#add x in + _cons x (bfs pset' q') + in + bfs pset (FQ.push FQ.empty t) + +let find ?pset f t = + let rec _find_kl f l = match l() with + | `Nil -> None + | `Cons (x, l') -> + match f x with + | None -> _find_kl f l' + | Some _ as res -> res + in + bfs ?pset t |> _find_kl f + +(** {2 Pretty printing in the DOT (graphviz) format} *) + +module Dot = struct + type attribute = [ + | `Color of string + | `Shape of string + | `Weight of int + | `Style of string + | `Label of string + | `Id of string + | `Other of string * string + ] (** Dot attributes for nodes *) + + type graph = (string * attribute list t list) + (** A dot graph is a name, plus a list of trees labelled with attributes *) + + let mk_id format = + let buf = Buffer.create 64 in + Printf.kbprintf + (fun fmt -> `Id (Buffer.contents buf)) + buf + format + + let mk_label format = + let buf = Buffer.create 64 in + Printf.kbprintf + (fun fmt -> `Label(Buffer.contents buf)) + buf + format + + let make ~name l = (name,l) + + let singleton ~name t = (name, [t]) + + (* find and remove the `Id attribute, if any *) + let rec _find_id acc l = match l with + | [] -> raise Not_found + | `Id n :: l' -> n, List.rev_append acc l' + | x :: l' -> _find_id (x::acc) l' + + let _pp_attr buf attr = match attr with + | `Color c -> Printf.bprintf buf "color=%s" c + | `Shape s -> Printf.bprintf buf "shape=%s" s + | `Weight w -> Printf.bprintf buf "weight=%d" w + | `Style s -> Printf.bprintf buf "style=%s" s + | `Label l -> Printf.bprintf buf "label=\"%s\"" l + | `Other (name, value) -> Printf.bprintf buf "%s=\"%s\"" name value + | `Id _ -> () (* should not be here *) + + let rec _pp_attrs buf l = match l with + | [] -> () + | [x] -> _pp_attr buf x + | x::l' -> + _pp_attr buf x; + Buffer.add_char buf ','; + _pp_attrs buf l' + + let pp buf (name,l) = + (* nodes already printed *) + let tbl = Hashtbl.create 32 in + (* fresh name generator *) + let new_name = + let n = ref 0 in + fun () -> + let s = Printf.sprintf "node_%d" !n in + incr n; s + in + (* the name for some node is either defined, either a fresh random + name *) + let get_name x = + try _find_id [] x + with Not_found -> new_name (), x + in + (* recursive printing (bfs) *) + let rec aux q = + if FQ.is_empty q + then () + else + let (parent,x), q' = FQ.pop_exn q in + let q' = pp_node q' ?parent x in + aux q' + and pp_node q ?parent t = match t() with + | `Nil -> q + | `Node (x,l) -> + let name, attrs = get_name x in + begin match parent with + | None -> () + | Some n -> Printf.bprintf buf " %s -> %s;\n" n name + end; + if not (Hashtbl.mem tbl name) then ( + Hashtbl.add tbl name (); + Printf.bprintf buf " %s [%a];\n" name _pp_attrs attrs; + List.fold_left + (fun q y -> FQ.push q (Some name, y)) q l + ) else q + in + let q = + List.fold_left + (fun q y -> FQ.push q (None, y)) + FQ.empty l + in + (* preamble *) + Printf.bprintf buf "digraph %s {\n" name; + aux q; + Printf.bprintf buf "}\n"; + () + + let pp_single name buf t = pp buf (singleton name t) +end + diff --git a/core/CCKTree.mli b/core/CCKTree.mli new file mode 100644 index 00000000..b34183a1 --- /dev/null +++ b/core/CCKTree.mli @@ -0,0 +1,127 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Lazy Tree Structure} +This structure can be used to represent trees and directed +graphs (as infinite trees) in a lazy fashion. Like {!CCKList}, it +is a structural type. *) + +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] +type 'a printer = Buffer.t -> 'a -> unit + +(** {2 Basics} *) + +type +'a t = unit -> [`Nil | `Node of 'a * 'a t list] + +val empty : 'a t + +val is_empty : _ t -> bool + +val singleton : 'a -> 'a t +(** Tree with only one label *) + +val node : 'a -> 'a t list -> 'a t +(** Build a node from a label and a list of children *) + +val node1 : 'a -> 'a t -> 'a t +(** Node with one child *) + +val node2 : 'a -> 'a t -> 'a t -> 'a t +(** Node with two children *) + +val fold : ('a -> 'b -> 'a) -> 'a -> 'b t -> 'a +(** Fold on values in no specified order. May not terminate if the + tree is infinite. *) + +val iter : ('a -> unit) -> 'a t -> unit + +val size : _ t -> int +(** Number of elements *) + +val height : _ t -> int +(** Length of the longest path to empty leaves *) + +val map : ('a -> 'b) -> 'a t -> 'b t + +val (>|=) : 'a t -> ('a -> 'b) -> 'b t + +val cut_depth : int -> 'a t -> 'a t +(** Cut the tree at the given depth, so it becomes finite. *) + +(** {2 Graph Traversals} *) + +(** Abstract Set structure *) +class type ['a] pset = object + method add : 'a -> 'a pset + method mem : 'a -> bool +end + +val set_of_cmp : ?cmp:('a -> 'a -> int) -> unit -> 'a pset +(** Build a set structure given a total ordering *) + +val dfs : ?pset:'a pset -> 'a t -> [ `Enter of 'a | `Exit of 'a ] klist +(** Depth-first traversal of the tree *) + +val bfs : ?pset:'a pset -> 'a t -> 'a klist +(** Breadth first traversal of the tree *) + +val find : ?pset:'a pset -> ('a -> 'b option) -> 'a t -> 'b option +(** Look for an element that maps to [Some _] *) + +(** {2 Pretty printing in the DOT (graphviz) format} *) + +module Dot : sig + type attribute = [ + | `Color of string + | `Shape of string + | `Weight of int + | `Style of string + | `Label of string + | `Id of string (** Unique ID in the graph. Allows sharing. *) + | `Other of string * string + ] (** Dot attributes for nodes *) + + type graph = (string * attribute list t list) + (** A dot graph is a name, plus a list of trees labelled with attributes *) + + val mk_id : ('a, Buffer.t, unit, attribute) format4 -> 'a + (** Using a formatter string, build an ID *) + + val mk_label : ('a, Buffer.t, unit, attribute) format4 -> 'a + (** Using a formatter string, build a label *) + + val make : name:string -> attribute list t list -> graph + + val singleton : name:string -> attribute list t -> graph + + val pp : graph printer + (** Print the graph in DOT *) + + val pp_single : string -> attribute list t printer +end +