moved CCHashtbl to CCFlatHashtbl;

new module CCHashtbl that wraps and extends the standard hashtable
This commit is contained in:
Simon Cruanes 2014-09-03 01:07:48 +02:00
parent 46205b1e26
commit 8ade96b2f6
7 changed files with 649 additions and 259 deletions

View file

@ -62,6 +62,8 @@ structures comprise (some modules in `misc/`, some other in `core/`):
- `CCArray`, utilities on arrays and slices - `CCArray`, utilities on arrays and slices
- `CCLinq`, high-level query language over collections - `CCLinq`, high-level query language over collections
- `CCMultimap` and `CCMultiset`, functors defining persistent structures - `CCMultimap` and `CCMultiset`, functors defining persistent structures
- `CCHashtbl`, an extension of the standard hashtbl module
- `CCFlatHashtbl`, a flat (open-addressing) hashtable functorial implementation
- `CCKTree`, an abstract lazy tree structure (similar to what `CCKlist` is to lists) - `CCKTree`, an abstract lazy tree structure (similar to what `CCKlist` is to lists)
- small modules (basic types, utilities): - small modules (basic types, utilities):
- `CCInt` - `CCInt`

3
_oasis
View file

@ -48,7 +48,8 @@ Library "containers"
CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, CCError, CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, CCError,
CCHeap, CCList, CCOpt, CCPair, CCFun, CCHash, CCCat, CCHeap, CCList, CCOpt, CCPair, CCFun, CCHash, CCCat,
CCKList, CCInt, CCBool, CCArray, CCBatch, CCOrd, CCIO, CCKList, CCInt, CCBool, CCArray, CCBatch, CCOrd, CCIO,
CCRandom, CCLinq, CCKTree, CCTrie, CCString, CCHashtbl CCRandom, CCLinq, CCKTree, CCTrie, CCString, CCHashtbl,
CCFlatHashtbl
FindlibName: containers FindlibName: containers
Library "containers_string" Library "containers_string"

4
configure vendored
View file

@ -1,7 +1,7 @@
#!/bin/sh #!/bin/sh
# OASIS_START # OASIS_START
# DO NOT EDIT (digest: 6f7b8221311e800a7093dc3b793f67ca) # DO NOT EDIT (digest: 82230d61386befb40bc7377608e1f16e)
set -e set -e
FST=true FST=true
@ -23,5 +23,5 @@ for i in "$@"; do
esac esac
done done
make configure CONFIGUREFLAGS="$*" make configure CONFIGUREFLAGS="$@"
# OASIS_STOP # OASIS_STOP

272
core/CCFlatHashtbl.ml Normal file
View file

@ -0,0 +1,272 @@
(*
copyright (c) 2013-2014, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Open-Addressing Hash-table}
We use Robin-Hood hashing as described in
http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
with backward shift. *)
type 'a sequence = ('a -> unit) -> unit
module type S = sig
type key
type 'a t
val create : int -> 'a t
(** Create a new table of the given initial capacity *)
val mem : 'a t -> key -> bool
(** [mem tbl k] returns [true] iff [k] is mapped to some value
in [tbl] *)
val find : 'a t -> key -> 'a option
val find_exn : 'a t -> key -> 'a
val get : key -> 'a t -> 'a option
(** [get k tbl] recovers the value for [k] in [tbl], or
returns [None] if [k] doesn't belong *)
val get_exn : key -> 'a t -> 'a
val add : 'a t -> key -> 'a -> unit
(** [add tbl k v] adds [k -> v] to [tbl], possibly replacing the old
value associated with [k]. *)
val remove : 'a t -> key -> unit
(** Remove binding *)
val size : _ t -> int
(** Number of bindings *)
val of_list : (key * 'a) list -> 'a t
val to_list : 'a t -> (key * 'a) list
val of_seq : (key * 'a) sequence -> 'a t
val to_seq : 'a t -> (key * 'a) sequence
val keys : _ t -> key sequence
val values : 'a t -> 'a sequence
end
module type HASHABLE = sig
type t
val equal : t -> t -> bool
val hash : t -> int
end
module Make(X : HASHABLE) = struct
type key = X.t
type 'a bucket =
| Empty
| Key of key * 'a * int (* store the hash too *)
type 'a t = {
mutable arr : 'a bucket array;
mutable size : int;
}
let size tbl = tbl.size
let _reached_max_load tbl =
let n = Array.length tbl.arr in
(n - tbl.size) < n/10 (* full at 9/10 *)
let create i =
let i = min Sys.max_array_length (max i 8) in
{ arr=Array.make i Empty; size=0; }
(* initial index for a value with hash [h] *)
let _initial_idx tbl h =
h mod Array.length tbl.arr
let _succ tbl i =
let i' = i+1 in
if i' = Array.length tbl.arr then 0 else i'
let _pred tbl i =
if i = 0 then Array.length tbl.arr - 1 else i-1
(* distance to initial bucket, at index [i] with hash [h] *)
let _dib tbl h i =
let i0 = _initial_idx tbl h in
if i>=i0
then i-i0
else i+ (Array.length tbl.arr - i0 - 1)
(* insert k->v in [tbl], currently at index [i] *)
let rec _linear_probe tbl k v h_k i =
match tbl.arr.(i) with
| Empty ->
(* add binding *)
tbl.size <- 1 + tbl.size;
tbl.arr.(i) <- Key (k, v, h_k)
| Key (k', _, h_k') when X.equal k k' ->
(* replace *)
assert (h_k = h_k');
tbl.arr.(i) <- Key (k, v, h_k)
| Key (k', v', h_k') ->
if _dib tbl h_k i < _dib tbl h_k' i
then (
(* replace *)
tbl.arr.(i) <- Key (k, v, h_k);
_linear_probe tbl k' v' h_k' (_succ tbl i)
) else
(* go further *)
_linear_probe tbl k v h_k (_succ tbl i)
(* resize table: put a bigger array in it, then insert values
from the old array *)
let _resize tbl =
let size' = min Sys.max_array_length (2 * Array.length tbl.arr) in
let arr' = Array.make size' Empty in
let old_arr = tbl.arr in
(* replace with new table *)
tbl.size <- 0;
tbl.arr <- arr';
Array.iter
(function
| Empty -> ()
| Key (k, v, h_k) -> _linear_probe tbl k v h_k (_initial_idx tbl h_k)
) old_arr
let add tbl k v =
if _reached_max_load tbl
then _resize tbl;
(* insert value *)
let h_k = X.hash k in
_linear_probe tbl k v h_k (_initial_idx tbl h_k)
(* shift back elements that have a DIB > 0 until an empty bucket is
met, or some element doesn't need shifting *)
let rec _backward_shift tbl i =
match tbl.arr.(i) with
| Empty -> ()
| Key (_, _, h_k) when _dib tbl h_k i = 0 ->
() (* stop *)
| Key (k, v, h_k) as bucket ->
assert (_dib tbl h_k i > 0);
(* shift backward *)
tbl.arr.(_pred tbl i) <- bucket;
tbl.arr.(i) <- Empty;
_backward_shift tbl (_succ tbl i)
(* linear probing for removal of [k] *)
let rec _linear_probe_remove tbl k h_k i =
match tbl.arr.(i) with
| Empty -> ()
| Key (k', _, _) when X.equal k k' ->
tbl.arr.(i) <- Empty;
tbl.size <- tbl.size - 1;
_backward_shift tbl (_succ tbl i)
| Key (_, _, h_k') ->
if _dib tbl h_k' i < _dib tbl h_k i
then () (* [k] not present, would be here otherwise *)
else _linear_probe_remove tbl k h_k (_succ tbl i)
let remove tbl k =
let h_k = X.hash k in
_linear_probe_remove tbl k h_k (_initial_idx tbl h_k)
let rec _get_exn tbl k h_k i dib =
match tbl.arr.(i) with
| Empty -> raise Not_found
| Key (k', v', _) when X.equal k k' -> v'
| Key (_, _, h_k') ->
if _dib tbl h_k' i < dib
then raise Not_found (* [k] would be here otherwise *)
else _get_exn tbl k h_k (_succ tbl i) (dib+1)
let get_exn k tbl =
let h_k = X.hash k in
let i0 = _initial_idx tbl h_k in
match tbl.arr.(i0) with
| Empty -> raise Not_found
| Key (k', v, _) ->
if X.equal k k' then v
else let i1 = _succ tbl i0 in
match tbl.arr.(i1) with
| Empty -> raise Not_found
| Key (k', v, _) ->
if X.equal k k' then v
else
let i2 = _succ tbl i1 in
match tbl.arr.(i2) with
| Empty -> raise Not_found
| Key (k', v, _) ->
if X.equal k k' then v
else _get_exn tbl k h_k (_succ tbl i2) 3
let get k tbl =
try Some (get_exn k tbl)
with Not_found -> None
let find_exn tbl k = get_exn k tbl
let find tbl k =
try Some (get_exn k tbl)
with Not_found -> None
let mem tbl k =
try ignore (get_exn k tbl); true
with Not_found -> false
let of_list l =
let tbl = create 16 in
List.iter (fun (k,v) -> add tbl k v) l;
tbl
let to_list tbl =
Array.fold_left
(fun acc bucket -> match bucket with
| Empty -> acc
| Key (k,v,_) -> (k,v)::acc
) [] tbl.arr
let of_seq seq =
let tbl = create 16 in
seq (fun (k,v) -> add tbl k v);
tbl
let to_seq tbl yield =
Array.iter
(function Empty -> () | Key (k, v, _) -> yield (k,v))
tbl.arr
let keys tbl yield =
Array.iter
(function Empty -> () | Key (k, _, _) -> yield k)
tbl.arr
let values tbl yield =
Array.iter
(function Empty -> () | Key (_, v, _) -> yield v)
tbl.arr
end

84
core/CCFlatHashtbl.mli Normal file
View file

@ -0,0 +1,84 @@
(*
copyright (c) 2013-2014, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Open-Addressing Hash-table}
This module was previously named [CCHashtbl], but the name is now used for
an extension of the standard library's hashtables.
@since NEXT_RELEASE *)
type 'a sequence = ('a -> unit) -> unit
module type S = sig
type key
type 'a t
val create : int -> 'a t
(** Create a new table of the given initial capacity *)
val mem : 'a t -> key -> bool
(** [mem tbl k] returns [true] iff [k] is mapped to some value
in [tbl] *)
val find : 'a t -> key -> 'a option
val find_exn : 'a t -> key -> 'a
val get : key -> 'a t -> 'a option
(** [get k tbl] recovers the value for [k] in [tbl], or
returns [None] if [k] doesn't belong *)
val get_exn : key -> 'a t -> 'a
val add : 'a t -> key -> 'a -> unit
(** [add tbl k v] adds [k -> v] to [tbl], possibly replacing the old
value associated with [k]. *)
val remove : 'a t -> key -> unit
(** Remove binding *)
val size : _ t -> int
(** Number of bindings *)
val of_list : (key * 'a) list -> 'a t
val to_list : 'a t -> (key * 'a) list
val of_seq : (key * 'a) sequence -> 'a t
val to_seq : 'a t -> (key * 'a) sequence
val keys : _ t -> key sequence
val values : 'a t -> 'a sequence
end
module type HASHABLE = sig
type t
val equal : t -> t -> bool
val hash : t -> int
end
module Make(X : HASHABLE) : S with type key = X.t

View file

@ -24,249 +24,207 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*) *)
(** {1 Extension to the standard Hashtbl} *)
(** {1 Open-Addressing Hash-table}
We use Robin-Hood hashing as described in
http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
with backward shift. *)
type 'a sequence = ('a -> unit) -> unit type 'a sequence = ('a -> unit) -> unit
type 'a eq = 'a -> 'a -> bool
type 'a hash = 'a -> int
(** {2 Polymorphic tables} *)
let get tbl x =
try Some (Hashtbl.find tbl x)
with Not_found -> None
let keys tbl k = Hashtbl.iter (fun key _ -> k key) tbl
let values tbl k = Hashtbl.iter (fun _ v -> k v) tbl
let to_seq tbl k = Hashtbl.iter (fun key v -> k (key,v)) tbl
let of_seq seq =
let tbl = Hashtbl.create 32 in
seq (fun (k,v) -> Hashtbl.add tbl k v);
tbl
let to_list tbl =
Hashtbl.fold
(fun k v l -> (k,v) :: l)
tbl []
let of_list l =
let tbl = Hashtbl.create 32 in
List.iter (fun (k,v) -> Hashtbl.add tbl k v) l;
tbl
(** {2 Functor} *)
module type S = sig module type S = sig
type key include Hashtbl.S
type 'a t
val create : int -> 'a t val get : 'a t -> key -> 'a option
(** Create a new table of the given initial capacity *) (** Safe version of {!Hashtbl.find} *)
val mem : 'a t -> key -> bool val keys : 'a t -> key sequence
(** [mem tbl k] returns [true] iff [k] is mapped to some value (** Iterate on keys (similar order as {!Hashtbl.iter}) *)
in [tbl] *)
val find : 'a t -> key -> 'a option val values : 'a t -> 'a sequence
(** Iterate on values in the table *)
val find_exn : 'a t -> key -> 'a val to_seq : 'a t -> (key * 'a) sequence
(** Iterate on values in the table *)
val get : key -> 'a t -> 'a option
(** [get k tbl] recovers the value for [k] in [tbl], or
returns [None] if [k] doesn't belong *)
val get_exn : key -> 'a t -> 'a
val add : 'a t -> key -> 'a -> unit
(** [add tbl k v] adds [k -> v] to [tbl], possibly replacing the old
value associated with [k]. *)
val remove : 'a t -> key -> unit
(** Remove binding *)
val size : _ t -> int
(** Number of bindings *)
val of_list : (key * 'a) list -> 'a t
val to_list : 'a t -> (key * 'a) list
val of_seq : (key * 'a) sequence -> 'a t val of_seq : (key * 'a) sequence -> 'a t
val to_seq : 'a t -> (key * 'a) sequence (** From the given bindings, added in order *)
val keys : _ t -> key sequence val to_list : 'a t -> (key * 'a) list
val values : 'a t -> 'a sequence (** List of bindings (order unspecified) *)
val of_list : (key * 'a) list -> 'a t
(** From the given list of bindings, added in order *)
end end
module type HASHABLE = sig module Make(X : Hashtbl.HashedType) = struct
type t include Hashtbl.Make(X)
val equal : t -> t -> bool
val hash : t -> int
end
module Make(X : HASHABLE) = struct let get tbl x =
type key = X.t try Some (find tbl x)
type 'a bucket =
| Empty
| Key of key * 'a * int (* store the hash too *)
type 'a t = {
mutable arr : 'a bucket array;
mutable size : int;
}
let size tbl = tbl.size
let _reached_max_load tbl =
let n = Array.length tbl.arr in
(n - tbl.size) < n/10 (* full at 9/10 *)
let create i =
let i = min Sys.max_array_length (max i 8) in
{ arr=Array.make i Empty; size=0; }
(* initial index for a value with hash [h] *)
let _initial_idx tbl h =
h mod Array.length tbl.arr
let _succ tbl i =
let i' = i+1 in
if i' = Array.length tbl.arr then 0 else i'
let _pred tbl i =
if i = 0 then Array.length tbl.arr - 1 else i-1
(* distance to initial bucket, at index [i] with hash [h] *)
let _dib tbl h i =
let i0 = _initial_idx tbl h in
if i>=i0
then i-i0
else i+ (Array.length tbl.arr - i0 - 1)
(* insert k->v in [tbl], currently at index [i] *)
let rec _linear_probe tbl k v h_k i =
match tbl.arr.(i) with
| Empty ->
(* add binding *)
tbl.size <- 1 + tbl.size;
tbl.arr.(i) <- Key (k, v, h_k)
| Key (k', _, h_k') when X.equal k k' ->
(* replace *)
assert (h_k = h_k');
tbl.arr.(i) <- Key (k, v, h_k)
| Key (k', v', h_k') ->
if _dib tbl h_k i < _dib tbl h_k' i
then (
(* replace *)
tbl.arr.(i) <- Key (k, v, h_k);
_linear_probe tbl k' v' h_k' (_succ tbl i)
) else
(* go further *)
_linear_probe tbl k v h_k (_succ tbl i)
(* resize table: put a bigger array in it, then insert values
from the old array *)
let _resize tbl =
let size' = min Sys.max_array_length (2 * Array.length tbl.arr) in
let arr' = Array.make size' Empty in
let old_arr = tbl.arr in
(* replace with new table *)
tbl.size <- 0;
tbl.arr <- arr';
Array.iter
(function
| Empty -> ()
| Key (k, v, h_k) -> _linear_probe tbl k v h_k (_initial_idx tbl h_k)
) old_arr
let add tbl k v =
if _reached_max_load tbl
then _resize tbl;
(* insert value *)
let h_k = X.hash k in
_linear_probe tbl k v h_k (_initial_idx tbl h_k)
(* shift back elements that have a DIB > 0 until an empty bucket is
met, or some element doesn't need shifting *)
let rec _backward_shift tbl i =
match tbl.arr.(i) with
| Empty -> ()
| Key (_, _, h_k) when _dib tbl h_k i = 0 ->
() (* stop *)
| Key (k, v, h_k) as bucket ->
assert (_dib tbl h_k i > 0);
(* shift backward *)
tbl.arr.(_pred tbl i) <- bucket;
tbl.arr.(i) <- Empty;
_backward_shift tbl (_succ tbl i)
(* linear probing for removal of [k] *)
let rec _linear_probe_remove tbl k h_k i =
match tbl.arr.(i) with
| Empty -> ()
| Key (k', _, _) when X.equal k k' ->
tbl.arr.(i) <- Empty;
tbl.size <- tbl.size - 1;
_backward_shift tbl (_succ tbl i)
| Key (_, _, h_k') ->
if _dib tbl h_k' i < _dib tbl h_k i
then () (* [k] not present, would be here otherwise *)
else _linear_probe_remove tbl k h_k (_succ tbl i)
let remove tbl k =
let h_k = X.hash k in
_linear_probe_remove tbl k h_k (_initial_idx tbl h_k)
let rec _get_exn tbl k h_k i dib =
match tbl.arr.(i) with
| Empty -> raise Not_found
| Key (k', v', _) when X.equal k k' -> v'
| Key (_, _, h_k') ->
if _dib tbl h_k' i < dib
then raise Not_found (* [k] would be here otherwise *)
else _get_exn tbl k h_k (_succ tbl i) (dib+1)
let get_exn k tbl =
let h_k = X.hash k in
let i0 = _initial_idx tbl h_k in
match tbl.arr.(i0) with
| Empty -> raise Not_found
| Key (k', v, _) ->
if X.equal k k' then v
else let i1 = _succ tbl i0 in
match tbl.arr.(i1) with
| Empty -> raise Not_found
| Key (k', v, _) ->
if X.equal k k' then v
else
let i2 = _succ tbl i1 in
match tbl.arr.(i2) with
| Empty -> raise Not_found
| Key (k', v, _) ->
if X.equal k k' then v
else _get_exn tbl k h_k (_succ tbl i2) 3
let get k tbl =
try Some (get_exn k tbl)
with Not_found -> None with Not_found -> None
let find_exn tbl k = get_exn k tbl let keys tbl k = iter (fun key _ -> k key) tbl
let find tbl k = let values tbl k = iter (fun _ v -> k v) tbl
try Some (get_exn k tbl)
with Not_found -> None
let mem tbl k = let to_seq tbl k = iter (fun key v -> k (key,v)) tbl
try ignore (get_exn k tbl); true
with Not_found -> false
let of_list l =
let tbl = create 16 in
List.iter (fun (k,v) -> add tbl k v) l;
tbl
let to_list tbl =
Array.fold_left
(fun acc bucket -> match bucket with
| Empty -> acc
| Key (k,v,_) -> (k,v)::acc
) [] tbl.arr
let of_seq seq = let of_seq seq =
let tbl = create 16 in let tbl = create 32 in
seq (fun (k,v) -> add tbl k v); seq (fun (k,v) -> add tbl k v);
tbl tbl
let to_seq tbl yield = let to_list tbl =
Array.iter fold
(function Empty -> () | Key (k, v, _) -> yield (k,v)) (fun k v l -> (k,v) :: l)
tbl.arr tbl []
let keys tbl yield = let of_list l =
Array.iter let tbl = create 32 in
(function Empty -> () | Key (k, _, _) -> yield k) List.iter (fun (k,v) -> add tbl k v) l;
tbl.arr tbl
let values tbl yield =
Array.iter
(function Empty -> () | Key (_, v, _) -> yield v)
tbl.arr
end end
(** {2 Default Table} *)
module type DEFAULT = sig
type key
type 'a t
(** A hashtable for keys of type [key] and values of type ['a] *)
val create : ?size:int -> 'a -> 'a t
(** [create d] makes a new table that maps every key to [d] by default.
@param size optional size of the initial table *)
val create_with : ?size:int -> (key -> 'a) -> 'a t
(** Similar to [create d] but here [d] is a function called to obtain a
new default value for each distinct key. Useful if the default
value is stateful. *)
val get : 'a t -> key -> 'a
(** Unfailing retrieval (possibly returns the default value) *)
val set : 'a t -> key -> 'a -> unit
(** Replace the current binding for this key *)
val remove : 'a t -> key -> unit
(** Remove the binding for this key. If [get tbl k] is called later, the
default value for the table will be returned *)
val to_seq : 'a t -> (key * 'a) sequence
(** Pairs of [(elem, count)] for all elements whose count is positive *)
end
module MakeDefault(X : Hashtbl.HashedType) = struct
type key = X.t
module T = Hashtbl.Make(X)
type 'a t = {
default : key -> 'a;
tbl : 'a T.t
}
let create_with ?(size=32) default = { default; tbl=T.create size }
let create ?size d = create_with ?size (fun _ -> d)
let get tbl k =
try T.find tbl.tbl k
with Not_found ->
let v = tbl.default k in
T.add tbl.tbl k v;
v
let set tbl k v = T.replace tbl.tbl k v
let remove tbl k = T.remove tbl.tbl k
let to_seq tbl k = T.iter (fun key v -> k (key,v)) tbl.tbl
end
(** {2 Count occurrences using a Hashtbl} *)
module type COUNTER = sig
type elt
(** Elements that are to be counted *)
type t
val create : int -> t
(** A counter maps elements to natural numbers (the number of times this
element occurred) *)
val incr : t -> elt -> unit
(** Increment the counter for the given element *)
val incr_by : t -> int -> elt -> unit
(** Add several occurrences at once *)
val get : t -> elt -> int
(** Number of occurrences for this element *)
val add_seq : t -> elt sequence -> unit
(** Increment each element of the sequence *)
val of_seq : elt sequence -> t
(** [of_seq s] is the same as [add_seq (create ())] *)
end
module MakeCounter(X : Hashtbl.HashedType) = struct
type elt = X.t
module T = Hashtbl.Make(X)
type t = int T.t
let create size = T.create size
let get tbl x = try T.find tbl x with Not_found -> 0
let incr tbl x =
let n = get tbl x in
T.replace tbl x (n+1)
let incr_by tbl n x =
let n' = get tbl x in
if n' + n <= 0
then T.remove tbl x
else T.replace tbl x (n+n')
let add_seq tbl seq = seq (incr tbl)
let of_seq seq =
let tbl = create 32 in
add_seq tbl seq;
tbl
end

View file

@ -25,55 +25,128 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*) *)
(** {1 Open-Addressing Hash-table} *) (** {1 Extension to the standard Hashtbl}
@since NEXT_RELEASE *)
type 'a sequence = ('a -> unit) -> unit type 'a sequence = ('a -> unit) -> unit
type 'a eq = 'a -> 'a -> bool
type 'a hash = 'a -> int
(** {2 Polymorphic tables} *)
val get : ('a,'b) Hashtbl.t -> 'a -> 'b option
(** Safe version of {!Hashtbl.find} *)
val keys : ('a,'b) Hashtbl.t -> 'a sequence
(** Iterate on keys (similar order as {!Hashtbl.iter}) *)
val values : ('a,'b) Hashtbl.t -> 'b sequence
(** Iterate on values in the table *)
val to_seq : ('a,'b) Hashtbl.t -> ('a * 'b) sequence
(** Iterate on bindings in the table *)
val of_seq : ('a * 'b) sequence -> ('a,'b) Hashtbl.t
(** From the given bindings, added in order *)
val to_list : ('a,'b) Hashtbl.t -> ('a * 'b) list
(** List of bindings (order unspecified) *)
val of_list : ('a * 'b) list -> ('a,'b) Hashtbl.t
(** From the given list of bindings, added in order *)
(** {2 Functor} *)
module type S = sig module type S = sig
type key include Hashtbl.S
type 'a t
val create : int -> 'a t val get : 'a t -> key -> 'a option
(** Create a new table of the given initial capacity *) (** Safe version of {!Hashtbl.find} *)
val mem : 'a t -> key -> bool val keys : 'a t -> key sequence
(** [mem tbl k] returns [true] iff [k] is mapped to some value (** Iterate on keys (similar order as {!Hashtbl.iter}) *)
in [tbl] *)
val find : 'a t -> key -> 'a option val values : 'a t -> 'a sequence
(** Iterate on values in the table *)
val find_exn : 'a t -> key -> 'a val to_seq : 'a t -> (key * 'a) sequence
(** Iterate on values in the table *)
val get : key -> 'a t -> 'a option
(** [get k tbl] recovers the value for [k] in [tbl], or
returns [None] if [k] doesn't belong *)
val get_exn : key -> 'a t -> 'a
val add : 'a t -> key -> 'a -> unit
(** [add tbl k v] adds [k -> v] to [tbl], possibly replacing the old
value associated with [k]. *)
val remove : 'a t -> key -> unit
(** Remove binding *)
val size : _ t -> int
(** Number of bindings *)
val of_list : (key * 'a) list -> 'a t
val to_list : 'a t -> (key * 'a) list
val of_seq : (key * 'a) sequence -> 'a t val of_seq : (key * 'a) sequence -> 'a t
(** From the given bindings, added in order *)
val to_list : 'a t -> (key * 'a) list
(** List of bindings (order unspecified) *)
val of_list : (key * 'a) list -> 'a t
(** From the given list of bindings, added in order *)
end
module Make(X : Hashtbl.HashedType) : S with type key = X.t
(** {2 Default Table}
A table with a default element for keys that were never added. *)
module type DEFAULT = sig
type key
type 'a t
(** A hashtable for keys of type [key] and values of type ['a] *)
val create : ?size:int -> 'a -> 'a t
(** [create d] makes a new table that maps every key to [d] by default.
@param size optional size of the initial table *)
val create_with : ?size:int -> (key -> 'a) -> 'a t
(** Similar to [create d] but here [d] is a function called to obtain a
new default value for each distinct key. Useful if the default
value is stateful. *)
val get : 'a t -> key -> 'a
(** Unfailing retrieval (possibly returns the default value). This will
modify the table if the key wasn't present. *)
val set : 'a t -> key -> 'a -> unit
(** Replace the current binding for this key *)
val remove : 'a t -> key -> unit
(** Remove the binding for this key. If [get tbl k] is called later, the
default value for the table will be returned *)
val to_seq : 'a t -> (key * 'a) sequence val to_seq : 'a t -> (key * 'a) sequence
(** Pairs of [(elem, value)] for all elements on which [get] was called *)
val keys : _ t -> key sequence
val values : 'a t -> 'a sequence
end end
module type HASHABLE = sig module MakeDefault(X : Hashtbl.HashedType) : DEFAULT with type key = X.t
(** {2 Count occurrences using a Hashtbl} *)
module type COUNTER = sig
type elt
(** Elements that are to be counted *)
type t type t
val equal : t -> t -> bool
val hash : t -> int val create : int -> t
(** A counter maps elements to natural numbers (the number of times this
element occurred) *)
val incr : t -> elt -> unit
(** Increment the counter for the given element *)
val incr_by : t -> int -> elt -> unit
(** Add several occurrences at once *)
val get : t -> elt -> int
(** Number of occurrences for this element *)
val add_seq : t -> elt sequence -> unit
(** Increment each element of the sequence *)
val of_seq : elt sequence -> t
(** [of_seq s] is the same as [add_seq (create ())] *)
end end
module Make(X : HASHABLE) : S with type key = X.t module MakeCounter(X : Hashtbl.HashedType) : COUNTER with type elt = X.t