robin hood hashing for PHashtbl

This commit is contained in:
Simon Cruanes 2013-03-05 00:49:02 +01:00
parent 7a01248c06
commit deab8c8f62
5 changed files with 86 additions and 80 deletions

View file

@ -23,29 +23,27 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*) *)
(** {1 Open addressing hashtable, with linear probing} *) (** {1 Open addressing hashtable (robin hood hashing)} *)
type ('a, 'b) t = { type ('a, 'b) t = {
mutable buckets : ('a * 'b * state) array; mutable buckets : ('a, 'b) bucket array;
mutable size : int; mutable size : int;
eq : 'a -> 'a -> bool; eq : 'a -> 'a -> bool;
hash : 'a -> int; hash : 'a -> int;
max_load : float; max_load : float;
} (** A hashtable is an array of (key, value) buckets that have a state, } (** A hashtable is an array of (key, value) buckets that have a state,
plus the size of the table and equality/hash functions *) plus the size of the table and equality/hash functions *)
and state = Used | Empty | Deleted and ('a, 'b) bucket =
(** state of a bucket *) | Empty
| Deleted
let my_null () = (Obj.magic None, Obj.magic None, Empty) | Used of 'a * 'b * int (* int: the distance from home of the key *)
(** a bucket *)
let my_deleted key = (key, Obj.magic None, Deleted)
(** Create a table. Size will be >= 2 *) (** Create a table. Size will be >= 2 *)
let create ?(max_load=0.8) ?(eq=fun x y -> x = y) let create ?(max_load=0.8) ?(eq=fun x y -> x = y)
?(hash=fun x -> Hashtbl.hash x) size = ?(hash=fun x -> Hashtbl.hash x) size =
let size = max 2 size in let size = max 2 size in
let null = my_null () in { buckets = Array.make size Empty;
{ buckets = Array.make size null;
size = 0; size = 0;
max_load; max_load;
eq; eq;
@ -73,79 +71,81 @@ let copy t = {
(** clear the table, by resetting all states to Empty *) (** clear the table, by resetting all states to Empty *)
let clear t = let clear t =
let null = my_null () in Array.fill t.buckets 0 (Array.length t.buckets) Empty;
Array.fill t.buckets 0 (Array.length t.buckets) null;
t.size <- 0 t.size <- 0
(** Index of slot, for i-th probing starting from hash [h] in (** Index of slot, for i-th probing starting from hash [h] in
a table of length [n] *) a table of length [n] *)
let addr h n i = (h + i) mod n let addr h n i = (h + i) mod n
(** Insert (key -> value) in buckets, starting with the hash. *) (** Insert (key -> value) in table, starting with the hash. *)
let insert ~eq buckets h key value = let insert t key value =
let n = Array.length buckets in let n = Array.length t.buckets in
let h = t.hash key in
(* lookup an empty slot to insert the key->value in. *) (* lookup an empty slot to insert the key->value in. *)
let rec lookup h n i = let rec lookup h i key value dist =
let j = addr h n i in let j = addr h n i in
match buckets.(j) with match t.buckets.(j) with
| (_, _, Empty) -> buckets.(j) <- (key, value, Used) | Empty | Deleted ->
| (key', _, _) when eq key key' -> () (* insert here *)
| _ -> lookup h n (i+1) t.size <- t.size + 1;
t.buckets.(j) <- Used (key, value, dist)
| Used (key', _, _) when t.eq key key' ->
(* insert here (erase old value) *)
t.size <- t.size + 1;
t.buckets.(j) <- Used (key, value, dist)
| Used (key', value', dist') when dist > dist' ->
(* displace this key/value *)
t.buckets.(j) <- Used (key, value, dist);
(* insert the other value again *)
lookup h (i+1) key' value' (dist+1)
| Used _ ->
(* search further for insertion *)
lookup h (i+1) key value (dist+1)
in in
lookup h n 0 lookup h 0 key value 1
(** Resize the array, by inserting its content into twice as large an array *) (** Resize the array, by inserting its content into twice as large an array *)
let resize ~eq ~hash buckets = let resize t =
let new_size = min (Array.length buckets * 2 + 1) Sys.max_array_length in let new_size = min (Array.length t.buckets * 2 + 1) Sys.max_array_length in
let buckets' = Array.make new_size (my_null ()) in assert (new_size > Array.length t.buckets);
for i = 0 to Array.length buckets - 1 do let old_buckets = t.buckets in
match buckets.(i) with t.buckets <- Array.make new_size Empty;
| (key, value, Used) -> t.size <- 0; (* will be updated again *)
insert ~eq buckets' (hash key) key value (* insert key -> value into new array *) for i = 0 to Array.length old_buckets - 1 do
| _ -> () match old_buckets.(i) with
done; | Used (key, value, _) ->
buckets' (* insert key -> value into new array *)
insert t key value
| Empty | Deleted -> ()
done
(** Lookup [key] in the table *) (** Lookup [key] in the table *)
let find t key = let find t key =
let n = Array.length t.buckets in let n = Array.length t.buckets in
let h = t.hash key in let h = t.hash key in
let buckets = t.buckets in let buckets = t.buckets in
let rec probe h n i num = let rec probe h n i =
if num = n then raise Not_found if i = n then raise Not_found else
else
let j = addr h n i in let j = addr h n i in
match buckets.(j) with match buckets.(j) with
| (key', value, Used) when t.eq key key' -> | Used (key', value, _) when t.eq key key' ->
value (* found value for this key *) value (* found value for this key *)
| (_, _, Deleted) | (_, _, Used) -> | Deleted | Used _ ->
probe h n (i+1) (num + 1) (* try next bucket *) probe h n (i+1) (* try next bucket *)
| (_, _, Empty) -> raise Not_found | Empty -> raise Not_found
in in
probe h n 0 0 probe h n 0
(** put [key] -> [value] in the hashtable *) (** put [key] -> [value] in the hashtable *)
let replace t key value = let replace t key value =
let load = float_of_int t.size /. float_of_int (Array.length t.buckets) in let load = float_of_int t.size /. float_of_int (Array.length t.buckets) in
(if load > t.max_load then t.buckets <- resize ~eq:t.eq ~hash:t.hash t.buckets); (if load > t.max_load then resize t);
let n = Array.length t.buckets in insert t key value
let h = t.hash key in
let buckets = t.buckets in
let rec probe h n i =
let j = addr h n i in
match buckets.(j) with
| (key', _, Used) when t.eq key key' ->
buckets.(j) <- (key, value, Used) (* replace value *)
| (_, _, Deleted) | (_, _, Empty) ->
buckets.(j) <- (key, value, Used);
t.size <- t.size + 1 (* insert and increment size *)
| (_, _, Used) ->
probe h n (i+1) (* go further *)
in
probe h n 0
(** alias for replace *) (** alias for replace *)
let add t key value = replace t key value let add t key value =
replace t key value
(** Remove the key from the table *) (** Remove the key from the table *)
let remove t key = let remove t key =
@ -155,11 +155,12 @@ let remove t key =
let rec probe h n i = let rec probe h n i =
let j = addr h n i in let j = addr h n i in
match buckets.(j) with match buckets.(j) with
| (key', _, Used) when t.eq key key' -> | Used (key', _, _) when t.eq key key' ->
buckets.(j) <- (my_deleted key'); t.size <- t.size - 1 (* remove slot *) buckets.(j) <- Deleted;
| (_, _, Deleted) | (_, _, Used) -> t.size <- t.size - 1 (* remove slot *)
| Deleted | Used _ ->
probe h n (i+1) (* search further *) probe h n (i+1) (* search further *)
| (_, _, Empty) -> () (* not present *) | Empty -> () (* not present *)
in in
probe h n 0 probe h n 0
@ -176,8 +177,8 @@ let iter k t =
let buckets = t.buckets in let buckets = t.buckets in
for i = 0 to Array.length buckets - 1 do for i = 0 to Array.length buckets - 1 do
match buckets.(i) with match buckets.(i) with
| (key, value, Used) -> k key value | Used (key, value, _) -> k key value
| _ -> () | Empty | Deleted -> ()
done done
(** Fold on key -> value pairs *) (** Fold on key -> value pairs *)
@ -186,8 +187,9 @@ let fold f acc t =
let buckets = t.buckets in let buckets = t.buckets in
for i = 0 to Array.length buckets - 1 do for i = 0 to Array.length buckets - 1 do
match buckets.(i) with match buckets.(i) with
| (key, value, Used) -> acc := f !acc key value | Used (key, value, _) ->
| _ -> () acc := f !acc key value
| Empty | Deleted -> ()
done; done;
!acc !acc
@ -196,9 +198,10 @@ let map f t =
let t' = create ~eq:t.eq ~hash:t.hash (Array.length t.buckets) in let t' = create ~eq:t.eq ~hash:t.hash (Array.length t.buckets) in
for i = 0 to Array.length t.buckets - 1 do for i = 0 to Array.length t.buckets - 1 do
match t.buckets.(i) with match t.buckets.(i) with
| (_, _, Empty) -> () | Empty -> ()
| (k, _, Deleted) -> t'.buckets.(i) <- my_deleted k | Deleted -> t'.buckets.(i) <- Deleted
| (k, v, Used) -> t'.buckets.(i) <- (k, f k v, Used) | Used (k, v, dist) ->
t'.buckets.(i) <- Used (k, f k v, dist)
done; done;
t' t'
@ -206,10 +209,10 @@ let map f t =
let filter pred t = let filter pred t =
for i = 0 to Array.length t.buckets - 1 do for i = 0 to Array.length t.buckets - 1 do
match t.buckets.(i) with match t.buckets.(i) with
| (_, _, (Empty | Deleted)) -> () | Empty | Deleted -> ()
| (k, v, Used) when pred k v -> () | Used (k, v, _) when pred k v -> ()
| (k, v, Used) -> (* remove this element *) | Used (k, v, _) -> (* remove this element *)
t.buckets.(i) <- my_deleted k; t.buckets.(i) <- Deleted;
t.size <- t.size - 1 t.size <- t.size - 1
done done

View file

@ -23,18 +23,21 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*) *)
(** {1 Open addressing hashtable, with linear probing} *) (** {1 Open addressing hashtable (robin hood hashing)} *)
type ('a, 'b) t = private { type ('a, 'b) t = {
mutable buckets : ('a * 'b * state) array; mutable buckets : ('a, 'b) bucket array;
mutable size : int; mutable size : int;
eq : 'a -> 'a -> bool; eq : 'a -> 'a -> bool;
hash : 'a -> int; hash : 'a -> int;
max_load : float; max_load : float;
} (** A hashtable is an array of (key, value) buckets that have a state, } (** A hashtable is an array of (key, value) buckets that have a state,
plus the size of the table and equality/hash functions *) plus the size of the table and equality/hash functions *)
and state = Used | Empty | Deleted and ('a, 'b) bucket =
(** state of a bucket *) | Empty
| Deleted
| Used of 'a * 'b * int (* int: the distance from home of the key *)
(** a bucket *)
val create : ?max_load:float -> ?eq:('a -> 'a -> bool) -> val create : ?max_load:float -> ?eq:('a -> 'a -> bool) ->
?hash:('a -> int) -> int -> ('a, 'b) t ?hash:('a -> int) -> int -> ('a, 'b) t

View file

@ -3,14 +3,14 @@
let phashtbl_add n = let phashtbl_add n =
let h = PHashtbl.create 50 in let h = PHashtbl.create 50 in
for i = 0 to n do for i = n downto 0 do
PHashtbl.add h i i; PHashtbl.add h i i;
done; done;
h h
let hashtbl_add n = let hashtbl_add n =
let h = Hashtbl.create 50 in let h = Hashtbl.create 50 in
for i = 0 to n do for i = n downto 0 do
Hashtbl.add h i i; Hashtbl.add h i i;
done; done;
h h

View file

@ -3,7 +3,7 @@
open OUnit open OUnit
let test_empty () = let test_empty () =
let h = Heap.empty ~lt:(fun x y -> x < y) in let h = Heap.empty ~compare:(fun x y -> x - y) in
OUnit.assert_bool "is_empty empty" (Heap.is_empty h) OUnit.assert_bool "is_empty empty" (Heap.is_empty h)
let suite = let suite =

View file

@ -5,7 +5,7 @@ open OUnit
let suite = let suite =
"all_tests" >::: "all_tests" >:::
[ Test_pHashtbl.suite; [ Test_pHashtbl.suite;
Test_heap.suite; (* Test_heap.suite; *)
] ]
let _ = let _ =