mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 11:15:31 -05:00
CCHashtbl: open-addressing table (Robin-Hood hashing)
This commit is contained in:
parent
01c9573ae6
commit
af84e2dcc7
4 changed files with 372 additions and 1 deletions
2
_oasis
2
_oasis
|
|
@ -47,7 +47,7 @@ Library "containers"
|
|||
CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, CCError,
|
||||
CCHeap, CCList, CCOpt, CCPair, CCFun, CCHash,
|
||||
CCKList, CCInt, CCBool, CCArray, CCBatch, CCOrd,
|
||||
CCRandom, CCLinq, CCKTree, CCTrie, CCString
|
||||
CCRandom, CCLinq, CCKTree, CCTrie, CCString, CCHashtbl
|
||||
FindlibName: containers
|
||||
|
||||
Library "containers_string"
|
||||
|
|
|
|||
259
core/CCHashtbl.ml
Normal file
259
core/CCHashtbl.ml
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
|
||||
(*
|
||||
copyright (c) 2013-2014, simon cruanes
|
||||
all rights reserved.
|
||||
|
||||
redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
|
||||
(** {1 Open-Addressing Hash-table}
|
||||
|
||||
We use Robin-Hood hashing as described in
|
||||
http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
|
||||
with backward shift. *)
|
||||
|
||||
type 'a sequence = ('a -> unit) -> unit
|
||||
|
||||
module type S = sig
|
||||
type key
|
||||
type 'a t
|
||||
|
||||
val create : int -> 'a t
|
||||
(** Create a new table of the given initial capacity *)
|
||||
|
||||
val mem : 'a t -> key -> bool
|
||||
(** [mem tbl k] returns [true] iff [k] is mapped to some value
|
||||
in [tbl] *)
|
||||
|
||||
val find : 'a t -> key -> 'a option
|
||||
|
||||
val find_exn : 'a t -> key -> 'a
|
||||
|
||||
val get : key -> 'a t -> 'a option
|
||||
(** [get k tbl] recovers the value for [k] in [tbl], or
|
||||
returns [None] if [k] doesn't belong *)
|
||||
|
||||
val get_exn : key -> 'a t -> 'a
|
||||
|
||||
val add : 'a t -> key -> 'a -> unit
|
||||
(** [add tbl k v] adds [k -> v] to [tbl], possibly replacing the old
|
||||
value associated with [k]. *)
|
||||
|
||||
val remove : 'a t -> key -> unit
|
||||
(** Remove binding *)
|
||||
|
||||
val size : _ t -> int
|
||||
(** Number of bindings *)
|
||||
|
||||
val of_list : (key * 'a) list -> 'a t
|
||||
val to_list : 'a t -> (key * 'a) list
|
||||
|
||||
val of_seq : (key * 'a) sequence -> 'a t
|
||||
val to_seq : 'a t -> (key * 'a) sequence
|
||||
|
||||
val keys : _ t -> key sequence
|
||||
val values : 'a t -> 'a sequence
|
||||
end
|
||||
|
||||
module type HASHABLE = sig
|
||||
type t
|
||||
val equal : t -> t -> bool
|
||||
val hash : t -> int
|
||||
end
|
||||
|
||||
module Make(X : HASHABLE) = struct
|
||||
type key = X.t
|
||||
|
||||
type 'a bucket =
|
||||
| Empty
|
||||
| Key of key * 'a * int (* store the hash too *)
|
||||
|
||||
type 'a t = {
|
||||
mutable arr : 'a bucket array;
|
||||
mutable size : int;
|
||||
}
|
||||
|
||||
let size tbl = tbl.size
|
||||
|
||||
let _reached_max_load tbl =
|
||||
let n = Array.length tbl.arr in
|
||||
(n - tbl.size) < n/10 (* full at 9/10 *)
|
||||
|
||||
let create i =
|
||||
let i = min Sys.max_array_length (max i 8) in
|
||||
{ arr=Array.make i Empty; size=0; }
|
||||
|
||||
(* initial index for a value with hash [h] *)
|
||||
let _initial_idx tbl h =
|
||||
h mod Array.length tbl.arr
|
||||
|
||||
let _succ tbl i =
|
||||
if i = Array.length tbl.arr-1 then 0 else i+1
|
||||
|
||||
let _pred tbl i =
|
||||
if i = 0 then Array.length tbl.arr - 1 else i-1
|
||||
|
||||
(* distance to initial bucket, at index [i] with hash [h] *)
|
||||
let _dib tbl h i =
|
||||
let i0 = _initial_idx tbl h in
|
||||
if i>=i0
|
||||
then i-i0
|
||||
else i+ (Array.length tbl.arr - i0 - 1)
|
||||
|
||||
(* insert k->v in [tbl], currently at index [i] *)
|
||||
let rec _linear_probe tbl k v h_k i =
|
||||
match tbl.arr.(i) with
|
||||
| Empty ->
|
||||
(* add binding *)
|
||||
tbl.size <- 1 + tbl.size;
|
||||
tbl.arr.(i) <- Key (k, v, h_k)
|
||||
| Key (k', _, h_k') when X.equal k k' ->
|
||||
(* replace *)
|
||||
assert (h_k = h_k');
|
||||
tbl.arr.(i) <- Key (k, v, h_k)
|
||||
| Key (k', v', h_k') ->
|
||||
if _dib tbl h_k i < _dib tbl h_k' i
|
||||
then (
|
||||
(* replace *)
|
||||
tbl.arr.(i) <- Key (k, v, h_k);
|
||||
_linear_probe tbl k' v' h_k' (_succ tbl i)
|
||||
) else
|
||||
(* go further *)
|
||||
_linear_probe tbl k v h_k (_succ tbl i)
|
||||
|
||||
(* resize table: put a bigger array in it, then insert values
|
||||
from the old array *)
|
||||
let _resize tbl =
|
||||
let size' = min Sys.max_array_length (2 * Array.length tbl.arr) in
|
||||
let arr' = Array.make size' Empty in
|
||||
let old_arr = tbl.arr in
|
||||
(* replace with new table *)
|
||||
tbl.size <- 0;
|
||||
tbl.arr <- arr';
|
||||
Array.iter
|
||||
(function
|
||||
| Empty -> ()
|
||||
| Key (k, v, h_k) -> _linear_probe tbl k v h_k (_initial_idx tbl h_k)
|
||||
) old_arr
|
||||
|
||||
let add tbl k v =
|
||||
if _reached_max_load tbl
|
||||
then _resize tbl;
|
||||
(* insert value *)
|
||||
let h_k = X.hash k in
|
||||
_linear_probe tbl k v h_k (_initial_idx tbl h_k)
|
||||
|
||||
(* shift back elements that have a DIB > 0 until an empty bucket is
|
||||
met, or some element doesn't need shifting *)
|
||||
let rec _backward_shift tbl i =
|
||||
match tbl.arr.(i) with
|
||||
| Empty -> ()
|
||||
| Key (_, _, h_k) when _dib tbl h_k i = 0 ->
|
||||
() (* stop *)
|
||||
| Key (k, v, h_k) as bucket ->
|
||||
assert (_dib tbl h_k i > 0);
|
||||
(* shift backward *)
|
||||
tbl.arr.(_pred tbl i) <- bucket;
|
||||
tbl.arr.(i) <- Empty;
|
||||
_backward_shift tbl (_succ tbl i)
|
||||
|
||||
(* linear probing for removal of [k] *)
|
||||
let rec _linear_probe_remove tbl k h_k i =
|
||||
match tbl.arr.(i) with
|
||||
| Empty -> ()
|
||||
| Key (k', _, _) when X.equal k k' ->
|
||||
tbl.arr.(i) <- Empty;
|
||||
tbl.size <- tbl.size - 1;
|
||||
_backward_shift tbl (_succ tbl i)
|
||||
| Key (_, _, h_k') ->
|
||||
if _dib tbl h_k' i < _dib tbl h_k i
|
||||
then () (* [k] not present, would be here otherwise *)
|
||||
else _linear_probe_remove tbl k h_k (_succ tbl i)
|
||||
|
||||
let remove tbl k =
|
||||
let h_k = X.hash k in
|
||||
_linear_probe_remove tbl k h_k (_initial_idx tbl h_k)
|
||||
|
||||
let rec _get_exn tbl k h_k i dib =
|
||||
match tbl.arr.(i) with
|
||||
| Empty -> raise Not_found
|
||||
| Key (k', v', _) when X.equal k k' -> v'
|
||||
| Key (_, _, h_k') ->
|
||||
if (dib > 3 && _dib tbl h_k' i < dib)
|
||||
then raise Not_found (* [k] would be here otherwise *)
|
||||
else _get_exn tbl k h_k (_succ tbl i) (dib+1)
|
||||
|
||||
let get_exn k tbl =
|
||||
let h_k = X.hash k in
|
||||
let i0 = _initial_idx tbl h_k in
|
||||
match tbl.arr.(i0) with
|
||||
| Empty -> raise Not_found
|
||||
| Key (k', v, _) when X.equal k k' -> v
|
||||
| Key _ -> _get_exn tbl k h_k (_succ tbl i0) 1
|
||||
|
||||
let get k tbl =
|
||||
try Some (get_exn k tbl)
|
||||
with Not_found -> None
|
||||
|
||||
let find_exn tbl k = get_exn k tbl
|
||||
|
||||
let find tbl k =
|
||||
try Some (get_exn k tbl)
|
||||
with Not_found -> None
|
||||
|
||||
let mem tbl k =
|
||||
try ignore (get_exn k tbl); true
|
||||
with Not_found -> false
|
||||
|
||||
let of_list l =
|
||||
let tbl = create 16 in
|
||||
List.iter (fun (k,v) -> add tbl k v) l;
|
||||
tbl
|
||||
|
||||
let to_list tbl =
|
||||
Array.fold_left
|
||||
(fun acc bucket -> match bucket with
|
||||
| Empty -> acc
|
||||
| Key (k,v,_) -> (k,v)::acc
|
||||
) [] tbl.arr
|
||||
|
||||
let of_seq seq =
|
||||
let tbl = create 16 in
|
||||
seq (fun (k,v) -> add tbl k v);
|
||||
tbl
|
||||
|
||||
let to_seq tbl yield =
|
||||
Array.iter
|
||||
(function Empty -> () | Key (k, v, _) -> yield (k,v))
|
||||
tbl.arr
|
||||
|
||||
let keys tbl yield =
|
||||
Array.iter
|
||||
(function Empty -> () | Key (k, _, _) -> yield k)
|
||||
tbl.arr
|
||||
|
||||
let values tbl yield =
|
||||
Array.iter
|
||||
(function Empty -> () | Key (_, v, _) -> yield v)
|
||||
tbl.arr
|
||||
end
|
||||
|
||||
79
core/CCHashtbl.mli
Normal file
79
core/CCHashtbl.mli
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
|
||||
(*
|
||||
copyright (c) 2013-2014, simon cruanes
|
||||
all rights reserved.
|
||||
|
||||
redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
|
||||
(** {1 Open-Addressing Hash-table} *)
|
||||
|
||||
type 'a sequence = ('a -> unit) -> unit
|
||||
|
||||
module type S = sig
|
||||
type key
|
||||
type 'a t
|
||||
|
||||
val create : int -> 'a t
|
||||
(** Create a new table of the given initial capacity *)
|
||||
|
||||
val mem : 'a t -> key -> bool
|
||||
(** [mem tbl k] returns [true] iff [k] is mapped to some value
|
||||
in [tbl] *)
|
||||
|
||||
val find : 'a t -> key -> 'a option
|
||||
|
||||
val find_exn : 'a t -> key -> 'a
|
||||
|
||||
val get : key -> 'a t -> 'a option
|
||||
(** [get k tbl] recovers the value for [k] in [tbl], or
|
||||
returns [None] if [k] doesn't belong *)
|
||||
|
||||
val get_exn : key -> 'a t -> 'a
|
||||
|
||||
val add : 'a t -> key -> 'a -> unit
|
||||
(** [add tbl k v] adds [k -> v] to [tbl], possibly replacing the old
|
||||
value associated with [k]. *)
|
||||
|
||||
val remove : 'a t -> key -> unit
|
||||
(** Remove binding *)
|
||||
|
||||
val size : _ t -> int
|
||||
(** Number of bindings *)
|
||||
|
||||
val of_list : (key * 'a) list -> 'a t
|
||||
val to_list : 'a t -> (key * 'a) list
|
||||
|
||||
val of_seq : (key * 'a) sequence -> 'a t
|
||||
val to_seq : 'a t -> (key * 'a) sequence
|
||||
|
||||
val keys : _ t -> key sequence
|
||||
val values : 'a t -> 'a sequence
|
||||
end
|
||||
|
||||
module type HASHABLE = sig
|
||||
type t
|
||||
val equal : t -> t -> bool
|
||||
val hash : t -> int
|
||||
end
|
||||
|
||||
module Make(X : HASHABLE) : S with type key = X.t
|
||||
|
|
@ -31,6 +31,12 @@ module IMap = Map.Make(struct
|
|||
let compare i j = i - j
|
||||
end)
|
||||
|
||||
module ICCHashtbl = CCHashtbl.Make(struct
|
||||
type t = int
|
||||
let equal i j = i = j
|
||||
let hash i = i
|
||||
end)
|
||||
|
||||
let phashtbl_add n =
|
||||
let h = PHashtbl.create 50 in
|
||||
for i = n downto 0 do
|
||||
|
|
@ -87,6 +93,13 @@ let imap_add n =
|
|||
done;
|
||||
!h
|
||||
|
||||
let icchashtbl_add n =
|
||||
let h = ICCHashtbl.create 50 in
|
||||
for i = n downto 0 do
|
||||
ICCHashtbl.add h i i;
|
||||
done;
|
||||
h
|
||||
|
||||
let bench_maps1 () =
|
||||
Format.printf "----------------------------------------@.";
|
||||
let res = Bench.bench_n
|
||||
|
|
@ -98,6 +111,7 @@ let bench_maps1 () =
|
|||
"ipersistenthashtbl_add", (fun n -> ignore (ipersistenthashtbl_add n));
|
||||
"skiplist_add", (fun n -> ignore (skiplist_add n));
|
||||
"imap_add", (fun n -> ignore (imap_add n));
|
||||
"cchashtbl_add", (fun n -> ignore (icchashtbl_add n))
|
||||
]
|
||||
in
|
||||
Bench.summarize 1. res
|
||||
|
|
@ -182,6 +196,16 @@ let imap_replace n =
|
|||
done;
|
||||
!h
|
||||
|
||||
let icchashtbl_replace n =
|
||||
let h = ICCHashtbl.create 50 in
|
||||
for i = 0 to n do
|
||||
ICCHashtbl.add h i i;
|
||||
done;
|
||||
for i = n downto 0 do
|
||||
ICCHashtbl.add h i i;
|
||||
done;
|
||||
h
|
||||
|
||||
let bench_maps2 () =
|
||||
Format.printf "----------------------------------------@.";
|
||||
let res = Bench.bench_n
|
||||
|
|
@ -193,6 +217,7 @@ let bench_maps2 () =
|
|||
"ipersistenthashtbl_replace", (fun n -> ignore (ipersistenthashtbl_replace n));
|
||||
"skiplist_replace", (fun n -> ignore (skiplist_replace n));
|
||||
"imap_replace", (fun n -> ignore (imap_replace n));
|
||||
"cchashtbl_replace", (fun n -> ignore (icchashtbl_replace n));
|
||||
]
|
||||
in
|
||||
Bench.summarize 1. res
|
||||
|
|
@ -253,6 +278,12 @@ let imap_find m =
|
|||
ignore (IMap.find i m);
|
||||
done
|
||||
|
||||
let icchashtbl_find m =
|
||||
fun n ->
|
||||
for i = 0 to n-1 do
|
||||
ignore (ICCHashtbl.find_exn m i);
|
||||
done
|
||||
|
||||
let bench_maps3 () =
|
||||
List.iter
|
||||
(fun len ->
|
||||
|
|
@ -265,6 +296,7 @@ let bench_maps3 () =
|
|||
let l = skiplist_add len in
|
||||
let a = Array.init len (fun i -> string_of_int i) in
|
||||
let m = imap_add len in
|
||||
let h'''''' = icchashtbl_add len in
|
||||
Format.printf "----------------------------------------@.";
|
||||
Format.printf "try on size %d@.@.@." len;
|
||||
Bench.bench [
|
||||
|
|
@ -277,6 +309,7 @@ let bench_maps3 () =
|
|||
"skiplist_find", (fun () -> skiplist_find l len);
|
||||
"array_find", (fun () -> array_find a len);
|
||||
"imap_find", (fun () -> imap_find m len);
|
||||
"cchashtbl_find", (fun () -> icchashtbl_find h'''''' len);
|
||||
])
|
||||
[10;20;100;1000;10000]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue