mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2026-01-27 03:14:49 -05:00
refactor(flat_tbl): use only one array of slots + inline record
This commit is contained in:
parent
bee90ee6ea
commit
76a3fb953e
1 changed files with 107 additions and 154 deletions
|
|
@ -71,120 +71,91 @@ module Make(H : Hashtbl.HashedType) = struct
|
||||||
additional pointer anyway. *)
|
additional pointer anyway. *)
|
||||||
type 'a slot =
|
type 'a slot =
|
||||||
| Empty
|
| Empty
|
||||||
| Used of key * 'a
|
| Used of {
|
||||||
|
k: key;
|
||||||
|
mutable v: 'a;
|
||||||
|
hash: int;
|
||||||
|
mutable dib: int; (* DIB: distance to initial bucket *)
|
||||||
|
}
|
||||||
|
|
||||||
let max_load = 0.8
|
let max_load = 0.92
|
||||||
let probe_dist_n_bits = 7 (* store probe distance on <n> bits *)
|
|
||||||
|
|
||||||
type 'a t = {
|
type 'a t = {
|
||||||
mutable meta: int array;
|
|
||||||
(* [hash | probe_distance[0..10] | present[1]]
|
|
||||||
for key at index [i] *)
|
|
||||||
mutable slots: 'a slot array; (* slot for index [i] *)
|
mutable slots: 'a slot array; (* slot for index [i] *)
|
||||||
mutable size : int;
|
mutable size : int;
|
||||||
(* TODO: [max_dist: int], so we can stop loopup early? *)
|
mutable max_dib : int;
|
||||||
}
|
}
|
||||||
|
|
||||||
let create size : _ t =
|
let create size : _ t =
|
||||||
let size = max 8 size in
|
let size = max 8 size in
|
||||||
{ slots = Array.make size Empty;
|
{ slots = Array.make size Empty;
|
||||||
meta = Array.make size 0;
|
|
||||||
size = 0;
|
size = 0;
|
||||||
|
max_dib = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
let copy self =
|
let copy self =
|
||||||
{ slots = Array.copy self.slots;
|
{ slots = Array.copy self.slots;
|
||||||
meta = Array.copy self.meta;
|
|
||||||
size = self.size;
|
size = self.size;
|
||||||
|
max_dib = self.max_dib;
|
||||||
}
|
}
|
||||||
|
|
||||||
(** clear the table, by resetting all states to Empty *)
|
(** clear the table, by resetting all states to Empty *)
|
||||||
let clear self =
|
let clear self =
|
||||||
let {slots; meta; size=_} = self in
|
let {slots; max_dib=_; size=_} = self in
|
||||||
Array.fill slots 0 (Array.length slots) Empty;
|
Array.fill slots 0 (Array.length slots) Empty;
|
||||||
Array.fill meta 0 (Array.length meta ) 0;
|
self.max_dib <- 0;
|
||||||
self.size <- 0
|
self.size <- 0
|
||||||
|
|
||||||
(* Index of slot, for i-th probing starting from hash [h] in
|
(* Index of slot, for i-th probing starting from hash [h] in
|
||||||
a table of length [n] *)
|
a table of length [n].
|
||||||
let[@inline] addr_ h n dist = (h + dist) mod n
|
Note: we make sure the [h+dist] part is positive first,
|
||||||
|
and we do not use [abs] since it can be negative on [min_int]. *)
|
||||||
(* normalize h by removing bits that will not fit in storage *)
|
let[@inline] addr_ h n dist =
|
||||||
let[@inline] normalize_hash_ h : int =
|
((h + dist) land max_int) mod n
|
||||||
(h lsl (1+probe_dist_n_bits)) lsr (1+probe_dist_n_bits)
|
|
||||||
|
|
||||||
(** [mk_meta_ hash dist] make new metadata *)
|
|
||||||
let mk_meta_ h dist : int =
|
|
||||||
let dist_mask = (1 lsl probe_dist_n_bits)-1 in
|
|
||||||
let dist = dist land dist_mask in
|
|
||||||
(* LSB=1 to indicate presence *)
|
|
||||||
(((h lsl probe_dist_n_bits) lor dist) lsl 1) lor 1
|
|
||||||
|
|
||||||
(* hash of metadata (truncated) *)
|
|
||||||
let[@inline] hash_of_meta_ m : int =
|
|
||||||
m lsr (probe_dist_n_bits+1)
|
|
||||||
|
|
||||||
(* probe distance of metadata (truncated) *)
|
|
||||||
let[@inline] dist_of_meta_ m : int =
|
|
||||||
(m lsr 1) land ((1 lsl probe_dist_n_bits)-1)
|
|
||||||
|
|
||||||
(* presence bit of metadata *)
|
|
||||||
let[@inline] presence_meta_ m : bool =
|
|
||||||
(m land 1) == 1
|
|
||||||
|
|
||||||
(* Insert [k -> v] in [self], starting with the hash [h].
|
(* Insert [k -> v] in [self], starting with the hash [h].
|
||||||
Does not modify the size. *)
|
Does not modify the size. *)
|
||||||
let insert_ (self:_ t) h k v : unit =
|
let insert_ (self:_ t) h k v : unit =
|
||||||
let {slots; meta; size=_} = self in
|
let {slots; max_dib=_; size=_} = self in
|
||||||
let n = Array.length slots in
|
let n = Array.length slots in
|
||||||
assert (n=Array.length meta);
|
|
||||||
|
|
||||||
(* lookup an empty slot to insert the key->value in. *)
|
(* lookup an empty slot to insert the key->value in. *)
|
||||||
let rec insert_rec_ h k v dist =
|
let rec insert_rec_ h k v dib =
|
||||||
let j = addr_ h n dist in
|
let j = addr_ h n dib in
|
||||||
|
|
||||||
let m_j = Array.unsafe_get meta j in
|
match Array.unsafe_get slots j with
|
||||||
let dist_j = dist_of_meta_ m_j in
|
| Empty ->
|
||||||
let hash_j = hash_of_meta_ m_j in
|
Array.unsafe_set slots j (Used {k; v; hash=h; dib});
|
||||||
|
self.max_dib <- max dib self.max_dib
|
||||||
|
| Used used_j ->
|
||||||
|
if used_j.hash = h && H.equal k used_j.k then (
|
||||||
|
(* same key: replace *)
|
||||||
|
used_j.v <- v;
|
||||||
|
used_j.dib <- dib;
|
||||||
|
self.max_dib <- max dib self.max_dib;
|
||||||
|
|
||||||
if not (presence_meta_ m_j) then (
|
) else if used_j.dib < dib then (
|
||||||
(* empty slot *)
|
(* displace element*)
|
||||||
let m = mk_meta_ h dist in
|
let k_j = used_j.k in
|
||||||
meta.(j) <- m;
|
let v_j = used_j.v in
|
||||||
slots.(j) <- Used (k, v);
|
let h_j = used_j.hash in
|
||||||
) else if h <> hash_j && dist_j >= dist then (
|
let dib_j = used_j.dib in
|
||||||
(* different slot and hash (hence, key): try next slot *)
|
|
||||||
insert_rec_ h k v (dist+1)
|
|
||||||
) else (
|
|
||||||
let k_j, v_j =
|
|
||||||
match Array.unsafe_get slots j with
|
|
||||||
| Empty -> assert false
|
|
||||||
| Used (k,v) -> k, v
|
|
||||||
in
|
|
||||||
|
|
||||||
if H.equal k k_j then (
|
Array.unsafe_set slots j (Used {k;v;hash=h;dib});
|
||||||
(* replace slot, same key *)
|
self.max_dib <- max dib self.max_dib;
|
||||||
slots.(j) <- Used (k, v);
|
|
||||||
) else if dist_j < dist then (
|
|
||||||
(* displace this element *)
|
|
||||||
|
|
||||||
let m = mk_meta_ h dist in
|
insert_rec_ h_j k_j v_j dib_j
|
||||||
meta.(j) <- m;
|
|
||||||
slots.(j) <- Used (k, v);
|
|
||||||
|
|
||||||
insert_rec_ hash_j k_j v_j dist_j
|
|
||||||
) else (
|
) else (
|
||||||
(* try next slot *)
|
(* look further *)
|
||||||
insert_rec_ h k v (dist+1)
|
insert_rec_ h k v (dib+1)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
in
|
in
|
||||||
|
|
||||||
insert_rec_ h k v 0
|
insert_rec_ h k v 0
|
||||||
|
|
||||||
(* Resize the array, by inserting its content into twice as large an array *)
|
(* Resize the array, by inserting its content into twice as large an array *)
|
||||||
let resize (self:_ t) : unit =
|
let resize (self:_ t) : unit =
|
||||||
let {slots=old_slots; meta=old_meta; size=_} = self in
|
let {slots=old_slots; max_dib=_; size=_} = self in
|
||||||
|
|
||||||
let new_size =
|
let new_size =
|
||||||
let n = Array.length old_slots in
|
let n = Array.length old_slots in
|
||||||
|
|
@ -194,61 +165,56 @@ module Make(H : Hashtbl.HashedType) = struct
|
||||||
if new_size <= Array.length old_slots then failwith "flat_tbl: cannot resize further";
|
if new_size <= Array.length old_slots then failwith "flat_tbl: cannot resize further";
|
||||||
|
|
||||||
self.slots <- Array.make new_size Empty;
|
self.slots <- Array.make new_size Empty;
|
||||||
self.meta <- Array.make new_size 0;
|
self.max_dib <- 0;
|
||||||
|
|
||||||
(* insert elements into new table *)
|
(* insert elements into new table *)
|
||||||
Array.iteri
|
Array.iter
|
||||||
(fun i slot -> match slot with
|
(fun slot -> match slot with
|
||||||
| Empty -> ()
|
| Empty -> ()
|
||||||
| Used (k,v) ->
|
| Used {k; v; hash; _} ->
|
||||||
let m = Array.unsafe_get old_meta i in
|
insert_ self hash k v)
|
||||||
let h = hash_of_meta_ m in
|
|
||||||
insert_ self h k v)
|
|
||||||
old_slots;
|
old_slots;
|
||||||
()
|
()
|
||||||
|
|
||||||
(* Lookup [key] in the table *)
|
(* Lookup [key] in the table *)
|
||||||
let find_opt self k =
|
let find_opt self k =
|
||||||
let {slots; meta; size=_} = self in
|
let {slots; max_dib; size=_} = self in
|
||||||
let n = Array.length slots in
|
let n = Array.length slots in
|
||||||
let h = normalize_hash_ (H.hash k) in
|
let h = H.hash k in
|
||||||
let slots = self.slots in
|
|
||||||
let[@unroll 2] rec find_rec_ dist =
|
|
||||||
assert (dist < n); (* load factor would be 1 *)
|
|
||||||
let j = addr_ h n dist in
|
|
||||||
|
|
||||||
let m_j = Array.unsafe_get meta j in
|
let rec find_rec_ dib =
|
||||||
if not (presence_meta_ m_j) then (
|
(*assert (dist < n); (* load factor would be 1 *)*)
|
||||||
None (* met empty slot *)
|
let j = addr_ h n dib in
|
||||||
) else (
|
match Array.unsafe_get slots j with
|
||||||
|
| Empty -> None
|
||||||
(* TODO: if we store max_probe_dist, use this for early
|
| Used used_j ->
|
||||||
termination
|
if used_j.hash = h && H.equal used_j.k k then (
|
||||||
let dist_j = dist_of_meta_ m_j in
|
Some used_j.v (* found *)
|
||||||
if dist_j > max_probe_dist then raise Not_found
|
) else if dib >= max_dib then (
|
||||||
*)
|
None (* no need to go further *)
|
||||||
|
|
||||||
let h_j = hash_of_meta_ m_j in
|
|
||||||
if h <> h_j then (
|
|
||||||
(* different hash *)
|
|
||||||
find_rec_ (dist+1)
|
|
||||||
) else (
|
) else (
|
||||||
|
|
||||||
|
(* unroll by hand *)
|
||||||
|
let dib = dib+1 in
|
||||||
|
|
||||||
|
let j = addr_ h n dib in
|
||||||
match Array.unsafe_get slots j with
|
match Array.unsafe_get slots j with
|
||||||
| Used (k2, v) ->
|
| Empty -> None
|
||||||
if H.equal k k2 then Some v
|
| Used used_j ->
|
||||||
else (
|
if used_j.hash = h && H.equal used_j.k k then (
|
||||||
(* different key *)
|
Some used_j.v (* found *)
|
||||||
find_rec_ (dist+1)
|
) else if dib >= max_dib then (
|
||||||
|
None (* no need to go further *)
|
||||||
|
) else (
|
||||||
|
find_rec_ (dib+1)
|
||||||
)
|
)
|
||||||
| Empty -> assert false
|
|
||||||
)
|
)
|
||||||
)
|
|
||||||
in
|
in
|
||||||
|
|
||||||
(* try a direct hit first *)
|
(* try a direct hit first *)
|
||||||
begin match Array.unsafe_get slots (addr_ h n 0) with
|
begin match Array.unsafe_get slots (addr_ h n 0) with
|
||||||
| Empty -> None
|
| Empty -> None
|
||||||
| Used (k2, v) when H.equal k k2 -> Some v
|
| Used {k=k2; v; hash; _} when h = hash && H.equal k k2 -> Some v
|
||||||
| _ -> find_rec_ 1
|
| _ -> find_rec_ 1
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -265,7 +231,7 @@ module Make(H : Hashtbl.HashedType) = struct
|
||||||
resize self;
|
resize self;
|
||||||
);
|
);
|
||||||
|
|
||||||
let h = normalize_hash_ (H.hash k) in
|
let h = H.hash k in
|
||||||
self.size <- 1 + self.size;
|
self.size <- 1 + self.size;
|
||||||
insert_ self h k v
|
insert_ self h k v
|
||||||
|
|
||||||
|
|
@ -273,59 +239,47 @@ module Make(H : Hashtbl.HashedType) = struct
|
||||||
(see https://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/ )
|
(see https://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/ )
|
||||||
to keep probe_distance low, instead of using tombstones. *)
|
to keep probe_distance low, instead of using tombstones. *)
|
||||||
let remove self k : unit =
|
let remove self k : unit =
|
||||||
let {slots; meta; size=_} = self in
|
let {slots; max_dib=_; size=_} = self in
|
||||||
let n = Array.length slots in
|
let n = Array.length slots in
|
||||||
let h = normalize_hash_ (H.hash k) in
|
let h = H.hash k in
|
||||||
|
|
||||||
(* given that [i] is empty, and [i_succ = (i+1) mod n],
|
(* given that [i] is empty, and [i_succ = (i+1) mod n],
|
||||||
see if we can shift the element at [i_succ] to the left
|
see if we can shift the element at [i_succ] to the left
|
||||||
to decrease its probe count. *)
|
to decrease its probe count. *)
|
||||||
let rec backward_shift_ i i_succ : unit =
|
let rec backward_shift_ i i_succ : unit =
|
||||||
let m = Array.unsafe_get meta i_succ in
|
match Array.unsafe_get slots i_succ with
|
||||||
if presence_meta_ m then (
|
| Empty -> ()
|
||||||
let dist = dist_of_meta_ m in
|
|
||||||
if dist > 0 then (
|
|
||||||
let slot = Array.unsafe_get slots i_succ in
|
|
||||||
assert (slot != Empty);
|
|
||||||
|
|
||||||
let m = mk_meta_ (hash_of_meta_ m) (dist-1) in
|
| Used used as slot when used.dib > 0 ->
|
||||||
meta.(i) <- m;
|
(* shift to the left, decreasing DIB by one *)
|
||||||
slots.(i) <- slot;
|
Array.unsafe_set slots i slot;
|
||||||
meta.(i_succ) <- 0; (* cleanup i_succ *)
|
used.dib <- used.dib - 1;
|
||||||
slots.(i_succ) <- Empty;
|
Array.unsafe_set slots i_succ Empty;
|
||||||
|
|
||||||
backward_shift_ i_succ ((i_succ + 1) mod n)
|
backward_shift_ i_succ ((i_succ + 1) mod n)
|
||||||
)
|
|
||||||
)
|
| Used _ -> ()
|
||||||
in
|
in
|
||||||
|
|
||||||
let rec find_rec_ dist =
|
let rec find_rec_ dib =
|
||||||
assert (dist<n);
|
assert (dib<n);
|
||||||
|
|
||||||
let j = addr_ h n dist in
|
let j = addr_ h n dib in
|
||||||
let m_j = Array.unsafe_get meta j in
|
begin match Array.unsafe_get slots j with
|
||||||
let hash_j = hash_of_meta_ m_j in
|
| Empty -> ()
|
||||||
|
| _ when dib > self.max_dib -> ()
|
||||||
|
| Used used_j ->
|
||||||
|
if h = used_j.hash && H.equal k used_j.k then (
|
||||||
|
(* found element, remove it *)
|
||||||
|
Array.unsafe_set slots j Empty;
|
||||||
|
self.size <- self.size - 1;
|
||||||
|
|
||||||
if not (presence_meta_ m_j) then () (* early exit, key not present *)
|
backward_shift_ j ((j+1) mod n); (* shift slots that come just next *)
|
||||||
else if h <> hash_j then (
|
|
||||||
find_rec_ (dist+1) (* go further *)
|
|
||||||
) else (
|
|
||||||
let k_j = match Array.unsafe_get slots j with
|
|
||||||
| Empty -> assert false
|
|
||||||
| Used (k, _) -> k
|
|
||||||
in
|
|
||||||
|
|
||||||
if H.equal k k_j then (
|
) else (
|
||||||
(* found element, remove it *)
|
find_rec_ (dib+1)
|
||||||
slots.(j) <- Empty;
|
)
|
||||||
meta.(j) <- 0;
|
end
|
||||||
self.size <- self.size - 1;
|
|
||||||
|
|
||||||
backward_shift_ j ((j+1) mod n); (* shift slots that come just next *)
|
|
||||||
) else (
|
|
||||||
find_rec_ (dist+1)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
in
|
in
|
||||||
|
|
||||||
if self.size > 0 then (
|
if self.size > 0 then (
|
||||||
|
|
@ -343,19 +297,18 @@ module Make(H : Hashtbl.HashedType) = struct
|
||||||
|
|
||||||
(* Iterate on key -> value pairs *)
|
(* Iterate on key -> value pairs *)
|
||||||
let iter f self =
|
let iter f self =
|
||||||
let slots = self.slots in
|
Array.iter
|
||||||
for i = 0 to Array.length slots - 1 do
|
(function
|
||||||
match Array.unsafe_get slots i with
|
| Used {k; v; _} -> f k v
|
||||||
| Used (k, v) -> f k v
|
| Empty -> ())
|
||||||
| _ -> ()
|
self.slots
|
||||||
done
|
|
||||||
|
|
||||||
(* Fold on key -> value pairs *)
|
(* Fold on key -> value pairs *)
|
||||||
let fold f self acc =
|
let fold f self acc =
|
||||||
Array.fold_left
|
Array.fold_left
|
||||||
(fun acc sl -> match sl with
|
(fun acc sl -> match sl with
|
||||||
| Empty -> acc
|
| Empty -> acc
|
||||||
| Used (k,v) -> f k v acc)
|
| Used {k;v;_} -> f k v acc)
|
||||||
acc self.slots
|
acc self.slots
|
||||||
|
|
||||||
let to_iter t yield =
|
let to_iter t yield =
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue