(* Copyright (c) 2013, Simon Cruanes All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *) (** {1 Functional (persistent) hashtable} *) (** {2 Signatures} *) module type HASH = sig type t val equal : t -> t -> bool val hash : t -> int end (** The signature for such a functional hashtable *) module type S = sig type 'a t type key val empty : int -> 'a t (** The empty hashtable (with sub-hashtables of given size) *) val find : 'a t -> key -> 'a (** Find the binding for this key, or raise Not_found *) val replace : 'a t -> key -> 'a -> 'a t (** [replace t key val] returns a copy of [t] where [key] binds to [val] *) val remove : 'a t -> key -> 'a t (** Remove the bindings for the given key *) val fold : ('b -> key -> 'a -> 'b) -> 'b -> 'a t -> 'b (** Fold on bindings *) val iter : (key -> 'a -> unit) -> 'a t -> unit (** Iterate on bindings *) val size : 'a t -> int (** Number of bindings *) val depth : 'a t -> int (** Depth of the tree *) val to_seq : 'a t -> (key * 'a) Sequence.t val of_seq : ?size:int -> (key * 'a) Sequence.t -> 'a t end (** {2 Persistent array} *) module PArray = struct type 'a t = 'a zipper ref and 'a zipper = | Array of 'a array | Diff of int * 'a * 'a zipper ref (* XXX maybe having a snapshot of the array from point to point may help? *) let make size elt = let a = Array.create size elt in ref (Array a) (** Recover the given version of the shared array. Returns the array itself. *) let rec reroot t = match !t with | Array a -> a | Diff (i, v, t') -> begin let a = reroot t' in let v' = a.(i) in t' := Diff (i, v', t); a.(i) <- v; t := Array a; a end let get t i = match !t with | Array a -> a.(i) | Diff _ -> let a = reroot t in a.(i) let set t i v = let a = match !t with | Array a -> a | Diff _ -> reroot t in let v' = a.(i) in if v == v' then t (* no change *) else begin let t' = ref (Array a) in a.(i) <- v; t := Diff (i, v', t'); t' (* create new array *) end let fold_left f acc t = let a = reroot t in Array.fold_left f acc a let rec length t = match !t with | Array a -> Array.length a | Diff (_, _, t') -> length t' end (** {2 Constructor} *) module Make(X : HASH) = struct (** The hashtable is a binary tree, with persistent arrays as leaves. Nodes at depth n of the tree are split on the n-th digit of the hash (starting with the least significant bit as 0). The left child is for bit=0, the right one for bit=1. *) type key = X.t type 'a t = | Split of 'a t * 'a t (** Split on the last digit of the hash *) | Table of 'a buckets (** Hashtable as a persistent array *) (** The hashtable, as a tree of persistent open addressing hashtables *) and 'a buckets = 'a bucket PArray.t (** A persistent array of buckets *) and 'a bucket = | Empty | Deleted | Used of key * 'a (** One buckets stores one key->value binding *) let empty_buckets size = PArray.make size Empty (** Empty hashtable *) let empty size = Table (empty_buckets size) (** The address in a bucket array, after probing [i] times *) let addr n h i = ((h land max_int) + i) mod n (** Find the bucket that contains the given [key]. [h] is not necessarily the hash of the key, because it can have been shifted to right several times. *) let rec probe_find buckets n h key i = if i = n then raise Not_found else begin let j = addr n h i in match PArray.get buckets j with | Empty -> raise Not_found | Used (key', value) when X.equal key key' -> value (* found *) | Used _ | Deleted -> probe_find buckets n h key (i+1) (* go further *) end (** Find the value bound to the given [key] *) let find t key = let h = X.hash key in (* find the appropriate leaf *) let rec find h t = match t with | Split (l, r) -> if h land 0x1 = 0 then find (h lsr 1) l (* bit=0, goto left *) else find (h lsr 1) r (* bit=1, goto right *) | Table buckets -> probe_find buckets (PArray.length buckets) h key 0 in find h t (** Insert [key] -> [value] in the buckets. *) let rec probe_insert buckets ~depth h key value = let n = PArray.length buckets in let rec probe i = if i = n then (* table seems full, split in two sub-hashtables *) let depth' = depth + 1 in let l, r = PArray.fold_left (fun (l,r) bucket -> match bucket with | Empty | Deleted -> (l,r) | Used (key',value') -> let h' = (X.hash key') lsr depth' in if h' land 0x1 = 0 then let l' = insert l ~depth:depth' h' key' value' in l', r else let r' = insert r ~depth:depth' h' key' value' in l, r') (empty n, empty n) buckets in Split (l, r) else (* look for an empty slot to insert the bucket *) let j = addr n h i in match PArray.get buckets j with | Empty | Deleted -> (* insert here *) let buckets' = PArray.set buckets j (Used (key, value)) in Table buckets' | Used (key', _) when X.equal key key' -> (* replace *) let buckets' = PArray.set buckets j (Used (key, value)) in Table buckets' | Used _ -> probe (i+1) (* probe failed, go further *) in probe 0 (** Insert [key] -> [value] in the sub-hashtable *) and insert t ~depth h key value = match t with | Split (l, r) -> if h land 0x1 = 0 then (* bit=0, goto left *) let l' = insert l ~depth:(depth+1) (h lsr 1) key value in Split (l', r) else (* bit=1, goto right *) let r' = insert r ~depth:(depth+1) (h lsr 1) key value in Split (l, r') | Table buckets -> (* insert in the flat hashtable *) probe_insert buckets ~depth h key value (** [replace t key val] returns a copy of [t] where [key] binds to [val] *) let replace t key value = let h = X.hash key in insert t ~depth:0 h key value (** Remove the bindings for the given key *) let remove t key = failwith "not implemented" (* TODO *) (** Fold on bindings *) let rec fold f acc t = match t with | Split (l, r) -> let acc' = fold f acc l in fold f acc' r | Table buckets -> PArray.fold_left (fun acc bucket -> match bucket with | Empty | Deleted -> acc | Used (key, value) -> f acc key value) acc buckets let iter f t = fold (fun () k v -> f k v) () t let size t = fold (fun n _ _ -> n + 1) 0 t let rec depth t = match t with | Table _ -> 0 | Split (l, r) -> (max (depth l) (depth r)) + 1 let to_seq t = Sequence.from_iter (fun k -> iter (fun key value -> k (key, value)) t) let of_seq ?(size=32) seq = Sequence.fold (fun t (k,v) -> replace t k v) (empty size) seq end