more elaborate insertion in FHashtbl (increase size of deeper hashtables);

fixed a stupid bug in the insertion;
FHashtbl.mem function
This commit is contained in:
Simon Cruanes 2013-03-07 14:50:10 +01:00
parent 5c6d9d94f2
commit 40dcbd5224
2 changed files with 36 additions and 7 deletions

View file

@ -44,6 +44,9 @@ module type S = sig
val find : 'a t -> key -> 'a val find : 'a t -> key -> 'a
(** Find the binding for this key, or raise Not_found *) (** Find the binding for this key, or raise Not_found *)
val mem : 'a t -> key -> bool
(** Check whether the key is bound in this hashtable *)
val replace : 'a t -> key -> 'a -> 'a t val replace : 'a t -> key -> 'a -> 'a t
(** [replace t key val] returns a copy of [t] where [key] binds to [val] *) (** [replace t key val] returns a copy of [t] where [key] binds to [val] *)
@ -156,6 +159,7 @@ module Make(X : HASH) = struct
(** Empty hashtable *) (** Empty hashtable *)
let empty size = let empty size =
let size = max size 16 in (* size >= 16 *)
Table (empty_buckets size) Table (empty_buckets size)
(** The address in a bucket array, after probing [i] times *) (** The address in a bucket array, after probing [i] times *)
@ -190,27 +194,49 @@ module Make(X : HASH) = struct
in in
find h t find h t
(** Check whether the key is bound in this hashtable *)
let mem t key =
try ignore (find t key); true
with Not_found -> false
(** Maximal depth of the tree (number of bits of the hash) *)
let max_depth = Sys.word_size - 1
(** [i] is the length of the current probe. [n] is the size of
the buckets array. This decides whether the probe, looking
for a free bucket to insert a binding in, is too long. *)
let probe_too_long n i =
i / 5 > n / 8 (* i/n > 5/8 *)
(** Insert [key] -> [value] in the buckets. *) (** Insert [key] -> [value] in the buckets. *)
let rec probe_insert buckets ~depth h key value = let rec probe_insert buckets ~depth h key value =
let n = PArray.length buckets in let n = PArray.length buckets in
let rec probe i = let rec probe i =
if i = n if n = i then (assert (depth = max_depth); failwith "FHashtbl is full")
then (* table seems full, split in two sub-hashtables *) else if (depth < max_depth && probe_too_long n i)
(* We are not too deep, and the table starts being full, we
split it into two sub-tables *)
then
let depth' = depth + 1 in let depth' = depth + 1 in
(* increase size of sub-arrays by 1.5 *)
let sub_size = min (n + (n lsr 1)) Sys.max_array_length in
let l, r = PArray.fold_left let l, r = PArray.fold_left
(fun (l,r) bucket -> match bucket with (fun (l,r) bucket -> match bucket with
| Empty | Deleted -> (l,r) | Empty | Deleted -> (l,r)
| Used (key',value') -> | Used (key',value') ->
let h' = (X.hash key') lsr depth' in let h' = (X.hash key') lsr depth in
if h' land 0x1 = 0 if h' land 0x1 = 0
then then
let l' = insert l ~depth:depth' h' key' value' in let l' = insert l ~depth:depth' (h' lsr 1) key' value' in
l', r l', r
else else
let r' = insert r ~depth:depth' h' key' value' in let r' = insert r ~depth:depth' (h' lsr 1) key' value' in
l, r') l, r')
(empty n, empty n) buckets in (empty sub_size, empty sub_size) buckets in
Split (l, r) (* the split of those two sub-hashtables *)
let new_table = Split (l, r) in
(* insert in this new hashtable *)
insert new_table ~depth h key value
else (* look for an empty slot to insert the bucket *) else (* look for an empty slot to insert the bucket *)
let j = addr n h i in let j = addr n h i in
match PArray.get buckets j with match PArray.get buckets j with

View file

@ -44,6 +44,9 @@ module type S = sig
val find : 'a t -> key -> 'a val find : 'a t -> key -> 'a
(** Find the binding for this key, or raise Not_found *) (** Find the binding for this key, or raise Not_found *)
val mem : 'a t -> key -> bool
(** Check whether the key is bound in this hashtable *)
val replace : 'a t -> key -> 'a -> 'a t val replace : 'a t -> key -> 'a -> 'a t
(** [replace t key val] returns a copy of [t] where [key] binds to [val] *) (** [replace t key val] returns a copy of [t] where [key] binds to [val] *)