diff --git a/README.md b/README.md index b286b15b..66ade054 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,15 @@ Containers is: `Containers` (intended to be opened, replaces some stdlib modules with extended ones), and a small S-expression printer and parser that can be functorized over the representation of values. -- Utilities around the `unix` library in `containers.unix` (mainly to spawn - sub-processes easily and deal with resources safely) +- Some sub-libraries with a specific focus each: + * Utilities around the `unix` library in `containers.unix` (mainly to spawn + sub-processes easily and deal with resources safely) + * A bencode codec in `containers.bencode`. This is a tiny json-like + serialization format that is extremely simple. It comes from bittorrent files. + * A [CBOR](https://cbor.io) codec in `containers.cbor`. This is a + compact binary serialization format. + * The [Strongly Connected Component](https://en.wikipedia.org/wiki/Strongly_connected_component) + algorithm, functorized, in `containers.scc` - A separate library `containers-data` with additional data structures that don't have an equivalent in the standard library, typically not as thoroughly maintained. This is now in its own package @@ -35,10 +42,6 @@ Containers is: - A separate library for threaded programming in `containers-thread`, including a blocking queue, semaphores, an extension of `Mutex`, and thread-pool based futures. This is in its own package since 3.0. -- A bencode codec in `containers.bencode`. This is a tiny json-like - serialization format that is extremely simple. It comes from bittorrent files. -- A [CBOR](https://cbor.io) codec in `containers.cbor`. This is a - compact binary serialization format. Some of the modules have been moved to their own repository (e.g. `sequence` (now `iter`), `gen`, `qcheck`) and are on opam for great fun and profit. diff --git a/src/scc/containers_scc.ml b/src/scc/containers_scc.ml new file mode 100644 index 00000000..767a703c --- /dev/null +++ b/src/scc/containers_scc.ml @@ -0,0 +1,104 @@ +type 'a iter = ('a -> unit) -> unit + +module type ARG = sig + type t + type node + + val children : t -> node -> node iter + + module Node_tbl : Hashtbl.S with type key = node +end + +module type S = sig + module A : ARG + + val scc : A.t -> A.node list -> A.node list list +end + +module Make (A : ARG) = struct + module A = A + + type state = { + mutable min_id: int; (* min ID of the vertex' scc *) + id: int; (* ID of the vertex *) + mutable on_stack: bool; + vertex: A.node; + } + + let mk_cell v n = { min_id = n; id = n; on_stack = false; vertex = v } + + (* pop elements of [stack] until we reach node with given [id] *) + let rec pop_down_to ~id acc stack = + assert (not (Stack.is_empty stack)); + let cell = Stack.pop stack in + cell.on_stack <- false; + if cell.id = id then ( + assert (cell.id = cell.min_id); + cell.vertex :: acc (* return SCC *) + ) else + pop_down_to ~id (cell.vertex :: acc) stack + + let scc (graph : A.t) (nodes : A.node list) : _ list list = + let res = ref [] in + let tbl = A.Node_tbl.create 16 in + + (* stack of nodes being explored, for the DFS *) + let to_explore = Stack.create () in + (* stack for Tarjan's algorithm itself *) + let stack = Stack.create () in + (* unique ID for new nodes *) + let n = ref 0 in + + (* exploration starting from [v] *) + let explore_from (v : A.node) : unit = + Stack.push (`Enter v) to_explore; + while not (Stack.is_empty to_explore) do + match Stack.pop to_explore with + | `Enter v -> + if not (A.Node_tbl.mem tbl v) then ( + (* remember unique ID for [v] *) + let id = !n in + incr n; + let cell = mk_cell v id in + cell.on_stack <- true; + A.Node_tbl.add tbl v cell; + Stack.push cell stack; + Stack.push (`Exit (v, cell)) to_explore; + (* explore children *) + let children = A.children graph v in + children (fun v' -> Stack.push (`Enter v') to_explore) + ) + | `Exit (v, cell) -> + (* update [min_id] *) + assert cell.on_stack; + let children = A.children graph v in + children (fun dest -> + (* must not fail, [dest] already explored *) + let dest_cell = A.Node_tbl.find tbl dest in + (* same SCC? yes if [dest] points to [cell.v] *) + if dest_cell.on_stack then + cell.min_id <- min cell.min_id dest_cell.min_id); + (* pop from stack if SCC found *) + if cell.id = cell.min_id then ( + let scc = pop_down_to ~id:cell.id [] stack in + res := scc :: !res + ) + done + in + + List.iter explore_from nodes; + assert (Stack.is_empty stack); + !res +end + +let scc (type graph node) ~(tbl : (module Hashtbl.S with type key = node)) + ~graph ~children ~nodes () : _ list = + let module S = Make (struct + type t = graph + type nonrec node = node + + let children = children + + module Node_tbl = (val tbl) + end) in + S.scc graph nodes diff --git a/src/scc/containers_scc.mli b/src/scc/containers_scc.mli new file mode 100644 index 00000000..b0a91225 --- /dev/null +++ b/src/scc/containers_scc.mli @@ -0,0 +1,26 @@ +type 'a iter = ('a -> unit) -> unit + +module type ARG = sig + type t + type node + + val children : t -> node -> node iter + + module Node_tbl : Hashtbl.S with type key = node +end + +module type S = sig + module A : ARG + + val scc : A.t -> A.node list -> A.node list list +end + +module Make (A : ARG) : S with module A = A + +val scc : + tbl:(module Hashtbl.S with type key = 'node) -> + graph:'graph -> + children:('graph -> 'node -> 'node iter) -> + nodes:'node list -> + unit -> + 'node list list diff --git a/src/scc/dune b/src/scc/dune new file mode 100644 index 00000000..95af7c23 --- /dev/null +++ b/src/scc/dune @@ -0,0 +1,6 @@ + +(library + (name containers_scc) + (public_name containers.scc) + (synopsis "strongly connected components algorithm") + (libraries containers))