mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-08 04:05:30 -05:00
draft of functional hashtable
This commit is contained in:
parent
14a5210bc2
commit
6532906f1b
2 changed files with 360 additions and 0 deletions
275
fHashtbl.ml
Normal file
275
fHashtbl.ml
Normal file
|
|
@ -0,0 +1,275 @@
|
||||||
|
(*
|
||||||
|
Copyright (c) 2013, Simon Cruanes
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
Redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer. Redistributions in binary
|
||||||
|
form must reproduce the above copyright notice, this list of conditions and the
|
||||||
|
following disclaimer in the documentation and/or other materials provided with
|
||||||
|
the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*)
|
||||||
|
|
||||||
|
(** {1 Functional (persistent) hashtable} *)
|
||||||
|
|
||||||
|
(** {2 Signatures} *)
|
||||||
|
|
||||||
|
module type HASH = sig
|
||||||
|
type t
|
||||||
|
val equal : t -> t -> bool
|
||||||
|
val hash : t -> int
|
||||||
|
end
|
||||||
|
|
||||||
|
(** The signature for such a functional hashtable *)
|
||||||
|
module type S = sig
|
||||||
|
type 'a t
|
||||||
|
type key
|
||||||
|
|
||||||
|
val empty : int -> 'a t
|
||||||
|
(** The empty hashtable (with sub-hashtables of given size) *)
|
||||||
|
|
||||||
|
val find : 'a t -> key -> 'a
|
||||||
|
(** Find the binding for this key, or raise Not_found *)
|
||||||
|
|
||||||
|
val replace : 'a t -> key -> 'a -> 'a t
|
||||||
|
(** [replace t key val] returns a copy of [t] where [key] binds to [val] *)
|
||||||
|
|
||||||
|
val remove : 'a t -> key -> 'a t
|
||||||
|
(** Remove the bindings for the given key *)
|
||||||
|
|
||||||
|
val fold : ('b -> key -> 'a -> 'b) -> 'b -> 'a t -> 'b
|
||||||
|
(** Fold on bindings *)
|
||||||
|
|
||||||
|
val iter : (key -> 'a -> unit) -> 'a t -> unit
|
||||||
|
(** Iterate on bindings *)
|
||||||
|
|
||||||
|
val size : 'a t -> int
|
||||||
|
(** Number of bindings *)
|
||||||
|
|
||||||
|
val to_seq : 'a t -> (key * 'a) Sequence.t
|
||||||
|
|
||||||
|
val of_seq : ?size:int -> (key * 'a) Sequence.t -> 'a t
|
||||||
|
end
|
||||||
|
|
||||||
|
(** {2 Persistent array} *)
|
||||||
|
|
||||||
|
module PArray = struct
|
||||||
|
type 'a t = 'a zipper ref
|
||||||
|
and 'a zipper =
|
||||||
|
| Array of 'a array
|
||||||
|
| Diff of int * 'a * 'a zipper ref
|
||||||
|
|
||||||
|
(* XXX maybe having a snapshot of the array from point to point may help? *)
|
||||||
|
|
||||||
|
let make size elt =
|
||||||
|
let a = Array.create size elt in
|
||||||
|
ref (Array a)
|
||||||
|
|
||||||
|
(** Recover the given version of the shared array. Returns the array
|
||||||
|
itself. *)
|
||||||
|
let rec reroot t =
|
||||||
|
match !t with
|
||||||
|
| Array a -> a
|
||||||
|
| Diff (i, v, t') ->
|
||||||
|
begin
|
||||||
|
let a = reroot t' in
|
||||||
|
let v' = a.(i) in
|
||||||
|
t' := Diff (i, v', t);
|
||||||
|
a.(i) <- v;
|
||||||
|
t := Array a;
|
||||||
|
a
|
||||||
|
end
|
||||||
|
|
||||||
|
let get t i =
|
||||||
|
match !t with
|
||||||
|
| Array a -> a.(i)
|
||||||
|
| Diff _ ->
|
||||||
|
let a = reroot t in
|
||||||
|
a.(i)
|
||||||
|
|
||||||
|
let set t i v =
|
||||||
|
let a =
|
||||||
|
match !t with
|
||||||
|
| Array a -> a
|
||||||
|
| Diff _ -> reroot t in
|
||||||
|
let v' = a.(i) in
|
||||||
|
if v == v'
|
||||||
|
then t (* no change *)
|
||||||
|
else begin
|
||||||
|
let t' = ref (Array a) in
|
||||||
|
a.(i) <- v;
|
||||||
|
t := Diff (i, v', t');
|
||||||
|
t' (* create new array *)
|
||||||
|
end
|
||||||
|
|
||||||
|
let fold_left f acc t =
|
||||||
|
let a = reroot t in
|
||||||
|
Array.fold_left f acc a
|
||||||
|
|
||||||
|
let rec length t =
|
||||||
|
match !t with
|
||||||
|
| Array a -> Array.length a
|
||||||
|
| Diff (_, _, t') -> length t'
|
||||||
|
end
|
||||||
|
|
||||||
|
(** {2 Constructor} *)
|
||||||
|
|
||||||
|
module Make(X : HASH) = struct
|
||||||
|
(** The hashtable is a binary tree, with persistent arrays as leaves.
|
||||||
|
Nodes at depth n of the tree are split on the n-th digit of the hash
|
||||||
|
(starting with the least significant bit as 0).
|
||||||
|
|
||||||
|
The left child is for bit=0, the right one for bit=1. *)
|
||||||
|
|
||||||
|
type key = X.t
|
||||||
|
|
||||||
|
type 'a t =
|
||||||
|
| Split of 'a t * 'a t (** Split on the last digit of the hash *)
|
||||||
|
| Table of 'a buckets (** Hashtable as a persistent array *)
|
||||||
|
(** The hashtable, as a tree of persistent open addressing hashtables *)
|
||||||
|
and 'a buckets = 'a bucket PArray.t
|
||||||
|
(** A persistent array of buckets *)
|
||||||
|
and 'a bucket =
|
||||||
|
| Empty
|
||||||
|
| Deleted
|
||||||
|
| Used of key * 'a
|
||||||
|
(** One buckets stores one key->value binding *)
|
||||||
|
|
||||||
|
let empty_buckets size =
|
||||||
|
PArray.make size Empty
|
||||||
|
|
||||||
|
(** Empty hashtable *)
|
||||||
|
let empty size =
|
||||||
|
Table (empty_buckets size)
|
||||||
|
|
||||||
|
(** The address in a bucket array, after probing [i] times *)
|
||||||
|
let addr n h i = ((h land max_int) + i) mod n
|
||||||
|
|
||||||
|
(** Find the bucket that contains the given [key]. [h] is
|
||||||
|
not necessarily the hash of the key, because it can have been
|
||||||
|
shifted to right several times. *)
|
||||||
|
let rec probe_find buckets n h key i =
|
||||||
|
if i = n then raise Not_found else begin
|
||||||
|
let j = addr n h i in
|
||||||
|
match PArray.get buckets j with
|
||||||
|
| Empty -> raise Not_found
|
||||||
|
| Used (key', value) when X.equal key key' ->
|
||||||
|
value (* found *)
|
||||||
|
| Used _ | Deleted ->
|
||||||
|
probe_find buckets n h key (i+1) (* go further *)
|
||||||
|
end
|
||||||
|
|
||||||
|
(** Find the value bound to the given [key] *)
|
||||||
|
let find t key =
|
||||||
|
let h = X.hash key in
|
||||||
|
(* find the appropriate leaf *)
|
||||||
|
let rec find h t =
|
||||||
|
match t with
|
||||||
|
| Split (l, r) ->
|
||||||
|
if h land 0x1 = 0
|
||||||
|
then find (h lsr 1) l (* bit=0, goto left *)
|
||||||
|
else find (h lsr 1) r (* bit=1, goto right *)
|
||||||
|
| Table buckets ->
|
||||||
|
probe_find buckets (PArray.length buckets) h key 0
|
||||||
|
in
|
||||||
|
find h t
|
||||||
|
|
||||||
|
(** Insert [key] -> [value] in the buckets. *)
|
||||||
|
let rec probe_insert buckets ~depth h key value =
|
||||||
|
let n = PArray.length buckets in
|
||||||
|
let rec probe i =
|
||||||
|
if i = n
|
||||||
|
then (* table seems full, split in two sub-hashtables *)
|
||||||
|
let depth' = depth + 1 in
|
||||||
|
let l, r = PArray.fold_left
|
||||||
|
(fun (l,r) bucket -> match bucket with
|
||||||
|
| Empty | Deleted -> (l,r)
|
||||||
|
| Used (key',value') ->
|
||||||
|
let h' = (X.hash key') lsr depth' in
|
||||||
|
if h' land 0x1 = 0
|
||||||
|
then
|
||||||
|
let l' = insert l ~depth:depth' h' key' value' in
|
||||||
|
l', r
|
||||||
|
else
|
||||||
|
let r' = insert r ~depth:depth' h' key' value' in
|
||||||
|
l, r')
|
||||||
|
(empty n, empty n) buckets in
|
||||||
|
Split (l, r)
|
||||||
|
else (* look for an empty slot to insert the bucket *)
|
||||||
|
let j = addr n h i in
|
||||||
|
match PArray.get buckets j with
|
||||||
|
| Empty | Deleted ->
|
||||||
|
(* insert here *)
|
||||||
|
let buckets' = PArray.set buckets j (Used (key, value)) in
|
||||||
|
Table buckets'
|
||||||
|
| Used (key', _) when X.equal key key' ->
|
||||||
|
(* replace *)
|
||||||
|
let buckets' = PArray.set buckets j (Used (key, value)) in
|
||||||
|
Table buckets'
|
||||||
|
| Used _ -> probe (i+1) (* probe failed, go further *)
|
||||||
|
in
|
||||||
|
probe 0
|
||||||
|
(** Insert [key] -> [value] in the sub-hashtable *)
|
||||||
|
and insert t ~depth h key value =
|
||||||
|
match t with
|
||||||
|
| Split (l, r) ->
|
||||||
|
if h land 0x1 = 0
|
||||||
|
then (* bit=0, goto left *)
|
||||||
|
let l' = insert l ~depth:(depth+1) (h lsr 1) key value in
|
||||||
|
Split (l', r)
|
||||||
|
else (* bit=1, goto right *)
|
||||||
|
let r' = insert r ~depth:(depth+1) (h lsr 1) key value in
|
||||||
|
Split (l, r')
|
||||||
|
| Table buckets ->
|
||||||
|
(* insert in the flat hashtable *)
|
||||||
|
probe_insert buckets ~depth h key value
|
||||||
|
|
||||||
|
(** [replace t key val] returns a copy of [t] where [key] binds to [val] *)
|
||||||
|
let replace t key value =
|
||||||
|
let h = X.hash key in
|
||||||
|
insert t ~depth:0 h key value
|
||||||
|
|
||||||
|
(** Remove the bindings for the given key *)
|
||||||
|
let remove t key =
|
||||||
|
failwith "not implemented" (* TODO *)
|
||||||
|
|
||||||
|
(** Fold on bindings *)
|
||||||
|
let rec fold f acc t =
|
||||||
|
match t with
|
||||||
|
| Split (l, r) ->
|
||||||
|
let acc' = fold f acc l in
|
||||||
|
fold f acc' r
|
||||||
|
| Table buckets ->
|
||||||
|
PArray.fold_left
|
||||||
|
(fun acc bucket -> match bucket with
|
||||||
|
| Empty | Deleted -> acc
|
||||||
|
| Used (key, value) -> f acc key value)
|
||||||
|
acc buckets
|
||||||
|
|
||||||
|
let iter f t =
|
||||||
|
fold (fun () k v -> f k v) () t
|
||||||
|
|
||||||
|
let size t =
|
||||||
|
fold (fun n _ _ -> n + 1) 0 t
|
||||||
|
|
||||||
|
let to_seq t =
|
||||||
|
Sequence.from_iter (fun k -> iter (fun key value -> k (key, value)) t)
|
||||||
|
|
||||||
|
let of_seq ?(size=32) seq =
|
||||||
|
Sequence.fold
|
||||||
|
(fun t (k,v) -> replace t k v)
|
||||||
|
(empty size) seq
|
||||||
|
end
|
||||||
85
fHashtbl.mli
Normal file
85
fHashtbl.mli
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
(*
|
||||||
|
Copyright (c) 2013, Simon Cruanes
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
Redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer. Redistributions in binary
|
||||||
|
form must reproduce the above copyright notice, this list of conditions and the
|
||||||
|
following disclaimer in the documentation and/or other materials provided with
|
||||||
|
the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*)
|
||||||
|
|
||||||
|
(** {1 Functional (persistent) hashtable} *)
|
||||||
|
|
||||||
|
(** {2 Signatures} *)
|
||||||
|
|
||||||
|
module type HASH = sig
|
||||||
|
type t
|
||||||
|
val equal : t -> t -> bool
|
||||||
|
val hash : t -> int
|
||||||
|
end
|
||||||
|
|
||||||
|
(** The signature for such a functional hashtable *)
|
||||||
|
module type S = sig
|
||||||
|
type 'a t
|
||||||
|
type key
|
||||||
|
|
||||||
|
val empty : int -> 'a t
|
||||||
|
(** The empty hashtable (with sub-hashtables of given size) *)
|
||||||
|
|
||||||
|
val find : 'a t -> key -> 'a
|
||||||
|
(** Find the binding for this key, or raise Not_found *)
|
||||||
|
|
||||||
|
val replace : 'a t -> key -> 'a -> 'a t
|
||||||
|
(** [replace t key val] returns a copy of [t] where [key] binds to [val] *)
|
||||||
|
|
||||||
|
val remove : 'a t -> key -> 'a t
|
||||||
|
(** Remove the bindings for the given key *)
|
||||||
|
|
||||||
|
val fold : ('b -> key -> 'a -> 'b) -> 'b -> 'a t -> 'b
|
||||||
|
(** Fold on bindings *)
|
||||||
|
|
||||||
|
val iter : (key -> 'a -> unit) -> 'a t -> unit
|
||||||
|
(** Iterate on bindings *)
|
||||||
|
|
||||||
|
val size : 'a t -> int
|
||||||
|
(** Number of bindings *)
|
||||||
|
|
||||||
|
val to_seq : 'a t -> (key * 'a) Sequence.t
|
||||||
|
|
||||||
|
val of_seq : ?size:int -> (key * 'a) Sequence.t -> 'a t
|
||||||
|
end
|
||||||
|
|
||||||
|
(** {2 Persistent array} *)
|
||||||
|
|
||||||
|
module PArray : sig
|
||||||
|
type 'a t
|
||||||
|
|
||||||
|
val make : int -> 'a -> 'a t
|
||||||
|
|
||||||
|
val get : 'a t -> int -> 'a
|
||||||
|
|
||||||
|
val set : 'a t -> int -> 'a -> 'a t
|
||||||
|
|
||||||
|
val fold_left : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b
|
||||||
|
|
||||||
|
val length : 'a t -> int
|
||||||
|
end
|
||||||
|
|
||||||
|
(** {2 Constructor} *)
|
||||||
|
|
||||||
|
module Make(X : HASH) : S with type key = X.t
|
||||||
Loading…
Add table
Reference in a new issue