mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-13 06:20:33 -05:00
reimplementation of CCPersistentHashtbl
This commit is contained in:
parent
aa1c5fb0e9
commit
15d5da628d
2 changed files with 242 additions and 113 deletions
2
_tags
2
_tags
|
|
@ -3,6 +3,6 @@
|
||||||
<tests/*.ml{,i}>: thread
|
<tests/*.ml{,i}>: thread
|
||||||
<src/threads/*.ml{,i}>: thread
|
<src/threads/*.ml{,i}>: thread
|
||||||
<src/core/CCVector.cmx>: inline(25)
|
<src/core/CCVector.cmx>: inline(25)
|
||||||
<src/data/CCFlatHashtbl.cm*> or <src/data/CCHashTrie.cm*>: inline(15)
|
<src/data/CCFlatHashtbl.cm*> or <src/data/CCHashTrie.cm*> or <src/data/CCPersistent*>: inline(15)
|
||||||
<src/**/*.ml> and not <src/misc/*.ml>: warn_A, warn(-4), warn(-44)
|
<src/**/*.ml> and not <src/misc/*.ml>: warn_A, warn(-4), warn(-44)
|
||||||
true: no_alias_deps, safe_string
|
true: no_alias_deps, safe_string
|
||||||
|
|
|
||||||
|
|
@ -155,58 +155,85 @@ end
|
||||||
(** {2 Implementation} *)
|
(** {2 Implementation} *)
|
||||||
|
|
||||||
module Make(H : HashedType) : S with type key = H.t = struct
|
module Make(H : HashedType) : S with type key = H.t = struct
|
||||||
module Table = Hashtbl.Make(H)
|
|
||||||
(** Imperative hashtable *)
|
|
||||||
|
|
||||||
type key = H.t
|
type key = H.t
|
||||||
type 'a t = 'a zipper ref
|
|
||||||
and 'a zipper =
|
(* main hashtable *)
|
||||||
| Table of 'a Table.t (** Concrete table *)
|
type 'a t = {
|
||||||
| Add of key * 'a * 'a t (** Add key *)
|
mutable arr: 'a p_array; (* invariant: length is a power of 2 *)
|
||||||
| Replace of key * 'a * 'a t (** Replace key by value *)
|
length: int;
|
||||||
| Remove of key * 'a t (** As the table, but without given key *)
|
}
|
||||||
|
|
||||||
|
(* piece of a persistent array *)
|
||||||
|
and 'a p_array =
|
||||||
|
| Arr of 'a bucket array
|
||||||
|
| Set of int * 'a bucket * 'a t
|
||||||
|
|
||||||
|
(* bucket of the hashtbl *)
|
||||||
|
and 'a bucket =
|
||||||
|
| Nil
|
||||||
|
| Cons of key * 'a * 'a bucket
|
||||||
|
|
||||||
|
(* first power of two that is bigger than [than], starting from [n] *)
|
||||||
|
let rec power_two_larger ~than n =
|
||||||
|
if n>= than then n else power_two_larger ~than (2*n)
|
||||||
|
|
||||||
let create i =
|
let create i =
|
||||||
ref (Table (Table.create i))
|
let i = power_two_larger ~than:i 16 in
|
||||||
|
{ length=0;
|
||||||
|
arr=Arr (Array.make i Nil)
|
||||||
|
}
|
||||||
|
|
||||||
let empty () = create 11
|
let empty () = create 16
|
||||||
|
|
||||||
(* pass continuation to get a tailrec rerooting *)
|
let rec reroot_rec_ t k = match t.arr with
|
||||||
let rec _reroot t k = match !t with
|
| Arr a -> k a
|
||||||
| Table tbl -> k tbl (* done *)
|
| Set (i, v, t') ->
|
||||||
| Add (key, v, t') ->
|
reroot_rec_ t' (fun a ->
|
||||||
_reroot t'
|
let v' = a.(i) in
|
||||||
(fun tbl ->
|
a.(i) <- v;
|
||||||
t' := Remove (key, t);
|
t.arr <- Arr a;
|
||||||
Table.add tbl key v;
|
t'.arr <- Set (i, v', t);
|
||||||
t := Table tbl;
|
k a
|
||||||
k tbl)
|
)
|
||||||
| Replace (key, v, t') ->
|
|
||||||
_reroot t'
|
|
||||||
(fun tbl ->
|
|
||||||
let v' = Table.find tbl key in
|
|
||||||
t' := Replace (key, v', t);
|
|
||||||
t := Table tbl;
|
|
||||||
Table.replace tbl key v;
|
|
||||||
k tbl)
|
|
||||||
| Remove (key, t') ->
|
|
||||||
_reroot t'
|
|
||||||
(fun tbl ->
|
|
||||||
let v = Table.find tbl key in
|
|
||||||
t' := Add (key, v, t);
|
|
||||||
t := Table tbl;
|
|
||||||
Table.remove tbl key;
|
|
||||||
k tbl)
|
|
||||||
|
|
||||||
(* Reroot: modify the zipper so that the current node is a proper
|
(* obtain the array *)
|
||||||
hashtable, and return the hashtable *)
|
let reroot_ t = match t.arr with
|
||||||
let reroot t = match !t with
|
| Arr a -> a
|
||||||
| Table tbl -> tbl
|
| _ -> reroot_rec_ t (fun x -> x)
|
||||||
| _ -> _reroot t (fun x -> x)
|
|
||||||
|
|
||||||
let is_empty t = Table.length (reroot t) = 0
|
let is_empty t = t.length = 0
|
||||||
|
|
||||||
let find t k = Table.find (reroot t) k
|
let length t = t.length
|
||||||
|
|
||||||
|
(* find index of [h] in [a] *)
|
||||||
|
let find_idx_ a ~h =
|
||||||
|
(* bitmask 00001111 if length(a) = 10000 *)
|
||||||
|
h land (Array.length a - 1)
|
||||||
|
|
||||||
|
let rec find_rec_ k l = match l with
|
||||||
|
| Nil -> raise Not_found
|
||||||
|
| Cons (k', v', l') ->
|
||||||
|
if H.equal k k' then v' else find_rec_ k l'
|
||||||
|
|
||||||
|
let find t k =
|
||||||
|
let a = reroot_ t in
|
||||||
|
(* unroll like crazy *)
|
||||||
|
match a.(find_idx_ ~h:(H.hash k) a) with
|
||||||
|
| Nil -> raise Not_found
|
||||||
|
| Cons (k1, v1, l1) ->
|
||||||
|
if H.equal k k1 then v1
|
||||||
|
else match l1 with
|
||||||
|
| Nil -> raise Not_found
|
||||||
|
| Cons (k2,v2,l2) ->
|
||||||
|
if H.equal k k2 then v2
|
||||||
|
else match l2 with
|
||||||
|
| Nil -> raise Not_found
|
||||||
|
| Cons (k3,v3,l3) ->
|
||||||
|
if H.equal k k3 then v3
|
||||||
|
else match l3 with
|
||||||
|
| Nil -> raise Not_found
|
||||||
|
| Cons (k4,v4,l4) ->
|
||||||
|
if H.equal k k4 then v4 else find_rec_ k l4
|
||||||
|
|
||||||
(*$R
|
(*$R
|
||||||
let h = H.of_seq my_seq in
|
let h = H.of_seq my_seq in
|
||||||
|
|
@ -249,9 +276,9 @@ module Make(H : HashedType) : S with type key = H.t = struct
|
||||||
try Some (find t k)
|
try Some (find t k)
|
||||||
with Not_found -> None
|
with Not_found -> None
|
||||||
|
|
||||||
let mem t k = Table.mem (reroot t) k
|
let mem t k =
|
||||||
|
try ignore (find t k); true
|
||||||
let length t = Table.length (reroot t)
|
with Not_found -> false
|
||||||
|
|
||||||
(*$R
|
(*$R
|
||||||
let h = H.of_seq
|
let h = H.of_seq
|
||||||
|
|
@ -267,33 +294,98 @@ module Make(H : HashedType) : S with type key = H.t = struct
|
||||||
)
|
)
|
||||||
*)
|
*)
|
||||||
|
|
||||||
|
let rec buck_rev_iter_ ~f l = match l with
|
||||||
|
| Nil -> ()
|
||||||
|
| Cons (k,v,l') -> buck_rev_iter_ ~f l'; f k v
|
||||||
|
|
||||||
|
(* resize [a] so it has capacity [new_size], and insert [k,v] in it *)
|
||||||
|
let resize_ k v h a new_size =
|
||||||
|
assert (new_size > Array.length a);
|
||||||
|
let a' = Array.make new_size Nil in
|
||||||
|
(* preserve order of elements by iterating on each bucket in rev order *)
|
||||||
|
Array.iter
|
||||||
|
(buck_rev_iter_
|
||||||
|
~f:(fun k v ->
|
||||||
|
let i = find_idx_ ~h:(H.hash k) a' in
|
||||||
|
a'.(i) <- Cons (k,v,a'.(i))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
a;
|
||||||
|
let i = find_idx_ ~h a' in
|
||||||
|
a'.(i) <- Cons (k,v,a'.(i));
|
||||||
|
a'
|
||||||
|
|
||||||
|
(* insert [k,v] in [l] and returns new list and boolean flag indicating
|
||||||
|
whether it's a new element *)
|
||||||
|
let rec replace_rec_ k v l = match l with
|
||||||
|
| Nil -> Cons (k,v,Nil), true
|
||||||
|
| Cons (k',v',l') ->
|
||||||
|
if H.equal k k'
|
||||||
|
then Cons (k,v,l'), false
|
||||||
|
else
|
||||||
|
let l', is_new = replace_rec_ k v l' in
|
||||||
|
Cons (k',v',l'), is_new
|
||||||
|
|
||||||
let replace t k v =
|
let replace t k v =
|
||||||
let tbl = reroot t in
|
let a = reroot_ t in
|
||||||
(* create the new hashtable *)
|
let h = H.hash k in
|
||||||
let t' = ref (Table tbl) in
|
let i = find_idx_ ~h a in
|
||||||
(* update [t] to point to the new hashtable *)
|
match a.(i) with
|
||||||
(try
|
| Nil ->
|
||||||
let v' = Table.find tbl k in
|
if t.length > (Array.length a) lsl 1
|
||||||
t := Replace (k, v', t')
|
then (
|
||||||
with Not_found ->
|
(* resize *)
|
||||||
t := Remove (k, t')
|
let new_size = min (2 * (Array.length a)) Sys.max_array_length in
|
||||||
);
|
let a = resize_ k v h a new_size in
|
||||||
(* modify the underlying hashtable *)
|
{length=t.length+1; arr=Arr a}
|
||||||
Table.replace tbl k v;
|
) else (
|
||||||
t'
|
a.(i) <- Cons (k, v, Nil);
|
||||||
|
let t' = {length=t.length + 1; arr=Arr a} in
|
||||||
|
t.arr <- Set (i,Nil,t');
|
||||||
|
t'
|
||||||
|
)
|
||||||
|
| Cons _ as l ->
|
||||||
|
let l', is_new = replace_rec_ k v l in
|
||||||
|
if is_new && t.length > (Array.length a) lsl 1
|
||||||
|
then (
|
||||||
|
(* resize and insert [k,v] (again, it's new anyway) *)
|
||||||
|
let new_size = min (2 * (Array.length a)) Sys.max_array_length in
|
||||||
|
let a = resize_ k v h a new_size in
|
||||||
|
{length=t.length+1; arr=Arr a}
|
||||||
|
) else (
|
||||||
|
(* no resize *)
|
||||||
|
a.(i) <- l';
|
||||||
|
let t' = {
|
||||||
|
length=if is_new then t.length+1 else t.length;
|
||||||
|
arr=Arr a;
|
||||||
|
} in
|
||||||
|
t.arr <- Set (i,l,t');
|
||||||
|
t'
|
||||||
|
)
|
||||||
|
|
||||||
|
(* return [Some l'] if [l] changed into [l'] by removing [k] *)
|
||||||
|
let rec remove_rec_ k l = match l with
|
||||||
|
| Nil -> None
|
||||||
|
| Cons (k', v', l') ->
|
||||||
|
if H.equal k k'
|
||||||
|
then Some l'
|
||||||
|
else match remove_rec_ k l' with
|
||||||
|
| None -> None
|
||||||
|
| Some l' -> Some (Cons (k', v', l'))
|
||||||
|
|
||||||
let remove t k =
|
let remove t k =
|
||||||
let tbl = reroot t in
|
let a = reroot_ t in
|
||||||
try
|
let i = find_idx_ ~h:(H.hash k) a in
|
||||||
let v' = Table.find tbl k in
|
match a.(i) with
|
||||||
(* value present, make a new hashtable without this value *)
|
| Nil -> t
|
||||||
let t' = ref (Table tbl) in
|
| Cons _ as l ->
|
||||||
t := Add (k, v', t');
|
match remove_rec_ k l with
|
||||||
Table.remove tbl k;
|
| None -> t
|
||||||
t'
|
| Some l' ->
|
||||||
with Not_found ->
|
a.(i) <- l';
|
||||||
(* not member, nothing to do *)
|
let t' = {length=t.length-1; arr=Arr a} in
|
||||||
t
|
t.arr <- Set (i,l,t');
|
||||||
|
t'
|
||||||
|
|
||||||
(*$R
|
(*$R
|
||||||
let h = H.of_seq my_seq in
|
let h = H.of_seq my_seq in
|
||||||
|
|
@ -333,40 +425,78 @@ module Make(H : HashedType) : S with type key = H.t = struct
|
||||||
| _, Some v' -> replace t k v'
|
| _, Some v' -> replace t k v'
|
||||||
|
|
||||||
let copy t =
|
let copy t =
|
||||||
let tbl = reroot t in
|
let a = Array.copy (reroot_ t) in
|
||||||
(* no one will point to the new [t] *)
|
{t with arr=Arr a}
|
||||||
let t = ref (Table (Table.copy tbl)) in
|
|
||||||
t
|
let rec buck_iter_ ~f l = match l with
|
||||||
|
| Nil -> ()
|
||||||
|
| Cons (k,v,l') -> f k v; buck_iter_ ~f l'
|
||||||
|
|
||||||
let iter t f =
|
let iter t f =
|
||||||
let tbl = reroot t in
|
let a = reroot_ t in
|
||||||
Table.iter f tbl
|
Array.iter (buck_iter_ ~f) a
|
||||||
|
|
||||||
|
let rec buck_fold_ f acc l = match l with
|
||||||
|
| Nil -> acc
|
||||||
|
| Cons (k,v,l') ->
|
||||||
|
let acc = f acc k v in
|
||||||
|
buck_fold_ f acc l'
|
||||||
|
|
||||||
let fold f acc t =
|
let fold f acc t =
|
||||||
let tbl = reroot t in
|
let a = reroot_ t in
|
||||||
Table.fold (fun k v acc -> f acc k v) tbl acc
|
Array.fold_left (buck_fold_ f) acc a
|
||||||
|
|
||||||
let map f t =
|
let map f t =
|
||||||
let tbl = reroot t in
|
let rec buck_map_ f l = match l with
|
||||||
let res = Table.create (Table.length tbl) in
|
| Nil -> Nil
|
||||||
Table.iter (fun k v -> Table.replace res k (f k v)) tbl;
|
| Cons (k,v,l') ->
|
||||||
ref (Table res)
|
let v' = f k v in
|
||||||
|
Cons (k,v', buck_map_ f l')
|
||||||
|
in
|
||||||
|
let a = reroot_ t in
|
||||||
|
let a' = Array.map (buck_map_ f) a in
|
||||||
|
{length=t.length; arr=Arr a'}
|
||||||
|
|
||||||
|
let rec buck_filter_ ~f l = match l with
|
||||||
|
| Nil -> Nil
|
||||||
|
| Cons (k,v,l') ->
|
||||||
|
let l' = buck_filter_ ~f l' in
|
||||||
|
if f k v then Cons (k,v,l') else l'
|
||||||
|
|
||||||
|
let buck_length_ b = buck_fold_ (fun n _ _ -> n+1) 0 b
|
||||||
|
|
||||||
let filter p t =
|
let filter p t =
|
||||||
let tbl = reroot t in
|
let a = reroot_ t in
|
||||||
let res = Table.create (Table.length tbl) in
|
let length = ref 0 in
|
||||||
Table.iter (fun k v -> if p k v then Table.replace res k v) tbl;
|
let a' = Array.map
|
||||||
ref (Table res)
|
(fun b ->
|
||||||
|
let b' = buck_filter_ ~f:p b in
|
||||||
|
length := !length + (buck_length_ b');
|
||||||
|
b'
|
||||||
|
) a
|
||||||
|
in
|
||||||
|
{length= !length; arr=Arr a'}
|
||||||
|
|
||||||
|
let rec buck_filter_map_ ~f l = match l with
|
||||||
|
| Nil -> Nil
|
||||||
|
| Cons (k,v,l') ->
|
||||||
|
let l' = buck_filter_map_ ~f l' in
|
||||||
|
match f k v with
|
||||||
|
| None -> l'
|
||||||
|
| Some v' ->
|
||||||
|
Cons (k,v',l')
|
||||||
|
|
||||||
let filter_map f t =
|
let filter_map f t =
|
||||||
let tbl = reroot t in
|
let a = reroot_ t in
|
||||||
let res = Table.create (Table.length tbl) in
|
let length = ref 0 in
|
||||||
Table.iter
|
let a' = Array.map
|
||||||
(fun k v -> match f k v with
|
(fun b ->
|
||||||
| None -> ()
|
let b' = buck_filter_map_ ~f b in
|
||||||
| Some v' -> Table.replace res k v'
|
length := !length + (buck_length_ b');
|
||||||
) tbl;
|
b'
|
||||||
ref (Table res)
|
) a
|
||||||
|
in
|
||||||
|
{length= !length; arr=Arr a'}
|
||||||
|
|
||||||
exception ExitPTbl
|
exception ExitPTbl
|
||||||
|
|
||||||
|
|
@ -383,19 +513,22 @@ module Make(H : HashedType) : S with type key = H.t = struct
|
||||||
with ExitPTbl -> true
|
with ExitPTbl -> true
|
||||||
|
|
||||||
let merge f t1 t2 =
|
let merge f t1 t2 =
|
||||||
let tbl = Table.create (max (length t1) (length t2)) in
|
let tbl = create (max (length t1) (length t2)) in
|
||||||
iter t1
|
let tbl = fold
|
||||||
(fun k v1 ->
|
(fun tbl k v1 ->
|
||||||
let v2 = try Some (find t2 k) with Not_found -> None in
|
let v2 = try Some (find t2 k) with Not_found -> None in
|
||||||
match f k (Some v1) v2 with
|
match f k (Some v1) v2 with
|
||||||
| None -> ()
|
| None -> tbl
|
||||||
| Some v' -> Table.replace tbl k v');
|
| Some v' -> replace tbl k v')
|
||||||
iter t2
|
tbl t1
|
||||||
(fun k v2 ->
|
in
|
||||||
if not (mem t1 k) then match f k None (Some v2) with
|
fold
|
||||||
| None -> ()
|
(fun tbl k v2 ->
|
||||||
| Some _ -> Table.replace tbl k v2);
|
if mem t1 k then tbl
|
||||||
ref (Table tbl)
|
else match f k None (Some v2) with
|
||||||
|
| None -> tbl
|
||||||
|
| Some _ -> replace tbl k v2
|
||||||
|
) tbl t2
|
||||||
|
|
||||||
(*$R
|
(*$R
|
||||||
let t1 = H.of_list [1, "a"; 2, "b1"] in
|
let t1 = H.of_list [1, "a"; 2, "b1"] in
|
||||||
|
|
@ -444,10 +577,7 @@ module Make(H : HashedType) : S with type key = H.t = struct
|
||||||
|
|
||||||
let of_list l = add_list (empty ()) l
|
let of_list l = add_list (empty ()) l
|
||||||
|
|
||||||
let to_list t =
|
let to_list t = fold (fun acc k v -> (k,v)::acc) [] t
|
||||||
let tbl = reroot t in
|
|
||||||
let bindings = Table.fold (fun k v acc -> (k,v)::acc) tbl [] in
|
|
||||||
bindings
|
|
||||||
|
|
||||||
(*$R
|
(*$R
|
||||||
let h = H.of_seq my_seq in
|
let h = H.of_seq my_seq in
|
||||||
|
|
@ -457,8 +587,7 @@ module Make(H : HashedType) : S with type key = H.t = struct
|
||||||
|
|
||||||
let to_seq t =
|
let to_seq t =
|
||||||
fun k ->
|
fun k ->
|
||||||
let tbl = reroot t in
|
iter t (fun x y -> k (x,y))
|
||||||
Table.iter (fun x y -> k (x,y)) tbl
|
|
||||||
|
|
||||||
(*$R
|
(*$R
|
||||||
let h = H.of_seq my_seq in
|
let h = H.of_seq my_seq in
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue