mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-09 12:45:34 -05:00
reimplementation of CCPersistentHashtbl
This commit is contained in:
parent
aa1c5fb0e9
commit
15d5da628d
2 changed files with 242 additions and 113 deletions
2
_tags
2
_tags
|
|
@ -3,6 +3,6 @@
|
|||
<tests/*.ml{,i}>: thread
|
||||
<src/threads/*.ml{,i}>: thread
|
||||
<src/core/CCVector.cmx>: inline(25)
|
||||
<src/data/CCFlatHashtbl.cm*> or <src/data/CCHashTrie.cm*>: inline(15)
|
||||
<src/data/CCFlatHashtbl.cm*> or <src/data/CCHashTrie.cm*> or <src/data/CCPersistent*>: inline(15)
|
||||
<src/**/*.ml> and not <src/misc/*.ml>: warn_A, warn(-4), warn(-44)
|
||||
true: no_alias_deps, safe_string
|
||||
|
|
|
|||
|
|
@ -155,58 +155,85 @@ end
|
|||
(** {2 Implementation} *)
|
||||
|
||||
module Make(H : HashedType) : S with type key = H.t = struct
|
||||
module Table = Hashtbl.Make(H)
|
||||
(** Imperative hashtable *)
|
||||
|
||||
type key = H.t
|
||||
type 'a t = 'a zipper ref
|
||||
and 'a zipper =
|
||||
| Table of 'a Table.t (** Concrete table *)
|
||||
| Add of key * 'a * 'a t (** Add key *)
|
||||
| Replace of key * 'a * 'a t (** Replace key by value *)
|
||||
| Remove of key * 'a t (** As the table, but without given key *)
|
||||
|
||||
(* main hashtable *)
|
||||
type 'a t = {
|
||||
mutable arr: 'a p_array; (* invariant: length is a power of 2 *)
|
||||
length: int;
|
||||
}
|
||||
|
||||
(* piece of a persistent array *)
|
||||
and 'a p_array =
|
||||
| Arr of 'a bucket array
|
||||
| Set of int * 'a bucket * 'a t
|
||||
|
||||
(* bucket of the hashtbl *)
|
||||
and 'a bucket =
|
||||
| Nil
|
||||
| Cons of key * 'a * 'a bucket
|
||||
|
||||
(* first power of two that is bigger than [than], starting from [n] *)
|
||||
let rec power_two_larger ~than n =
|
||||
if n>= than then n else power_two_larger ~than (2*n)
|
||||
|
||||
let create i =
|
||||
ref (Table (Table.create i))
|
||||
let i = power_two_larger ~than:i 16 in
|
||||
{ length=0;
|
||||
arr=Arr (Array.make i Nil)
|
||||
}
|
||||
|
||||
let empty () = create 11
|
||||
let empty () = create 16
|
||||
|
||||
(* pass continuation to get a tailrec rerooting *)
|
||||
let rec _reroot t k = match !t with
|
||||
| Table tbl -> k tbl (* done *)
|
||||
| Add (key, v, t') ->
|
||||
_reroot t'
|
||||
(fun tbl ->
|
||||
t' := Remove (key, t);
|
||||
Table.add tbl key v;
|
||||
t := Table tbl;
|
||||
k tbl)
|
||||
| Replace (key, v, t') ->
|
||||
_reroot t'
|
||||
(fun tbl ->
|
||||
let v' = Table.find tbl key in
|
||||
t' := Replace (key, v', t);
|
||||
t := Table tbl;
|
||||
Table.replace tbl key v;
|
||||
k tbl)
|
||||
| Remove (key, t') ->
|
||||
_reroot t'
|
||||
(fun tbl ->
|
||||
let v = Table.find tbl key in
|
||||
t' := Add (key, v, t);
|
||||
t := Table tbl;
|
||||
Table.remove tbl key;
|
||||
k tbl)
|
||||
let rec reroot_rec_ t k = match t.arr with
|
||||
| Arr a -> k a
|
||||
| Set (i, v, t') ->
|
||||
reroot_rec_ t' (fun a ->
|
||||
let v' = a.(i) in
|
||||
a.(i) <- v;
|
||||
t.arr <- Arr a;
|
||||
t'.arr <- Set (i, v', t);
|
||||
k a
|
||||
)
|
||||
|
||||
(* Reroot: modify the zipper so that the current node is a proper
|
||||
hashtable, and return the hashtable *)
|
||||
let reroot t = match !t with
|
||||
| Table tbl -> tbl
|
||||
| _ -> _reroot t (fun x -> x)
|
||||
(* obtain the array *)
|
||||
let reroot_ t = match t.arr with
|
||||
| Arr a -> a
|
||||
| _ -> reroot_rec_ t (fun x -> x)
|
||||
|
||||
let is_empty t = Table.length (reroot t) = 0
|
||||
let is_empty t = t.length = 0
|
||||
|
||||
let find t k = Table.find (reroot t) k
|
||||
let length t = t.length
|
||||
|
||||
(* find index of [h] in [a] *)
|
||||
let find_idx_ a ~h =
|
||||
(* bitmask 00001111 if length(a) = 10000 *)
|
||||
h land (Array.length a - 1)
|
||||
|
||||
let rec find_rec_ k l = match l with
|
||||
| Nil -> raise Not_found
|
||||
| Cons (k', v', l') ->
|
||||
if H.equal k k' then v' else find_rec_ k l'
|
||||
|
||||
let find t k =
|
||||
let a = reroot_ t in
|
||||
(* unroll like crazy *)
|
||||
match a.(find_idx_ ~h:(H.hash k) a) with
|
||||
| Nil -> raise Not_found
|
||||
| Cons (k1, v1, l1) ->
|
||||
if H.equal k k1 then v1
|
||||
else match l1 with
|
||||
| Nil -> raise Not_found
|
||||
| Cons (k2,v2,l2) ->
|
||||
if H.equal k k2 then v2
|
||||
else match l2 with
|
||||
| Nil -> raise Not_found
|
||||
| Cons (k3,v3,l3) ->
|
||||
if H.equal k k3 then v3
|
||||
else match l3 with
|
||||
| Nil -> raise Not_found
|
||||
| Cons (k4,v4,l4) ->
|
||||
if H.equal k k4 then v4 else find_rec_ k l4
|
||||
|
||||
(*$R
|
||||
let h = H.of_seq my_seq in
|
||||
|
|
@ -249,9 +276,9 @@ module Make(H : HashedType) : S with type key = H.t = struct
|
|||
try Some (find t k)
|
||||
with Not_found -> None
|
||||
|
||||
let mem t k = Table.mem (reroot t) k
|
||||
|
||||
let length t = Table.length (reroot t)
|
||||
let mem t k =
|
||||
try ignore (find t k); true
|
||||
with Not_found -> false
|
||||
|
||||
(*$R
|
||||
let h = H.of_seq
|
||||
|
|
@ -267,33 +294,98 @@ module Make(H : HashedType) : S with type key = H.t = struct
|
|||
)
|
||||
*)
|
||||
|
||||
let rec buck_rev_iter_ ~f l = match l with
|
||||
| Nil -> ()
|
||||
| Cons (k,v,l') -> buck_rev_iter_ ~f l'; f k v
|
||||
|
||||
(* resize [a] so it has capacity [new_size], and insert [k,v] in it *)
|
||||
let resize_ k v h a new_size =
|
||||
assert (new_size > Array.length a);
|
||||
let a' = Array.make new_size Nil in
|
||||
(* preserve order of elements by iterating on each bucket in rev order *)
|
||||
Array.iter
|
||||
(buck_rev_iter_
|
||||
~f:(fun k v ->
|
||||
let i = find_idx_ ~h:(H.hash k) a' in
|
||||
a'.(i) <- Cons (k,v,a'.(i))
|
||||
)
|
||||
)
|
||||
a;
|
||||
let i = find_idx_ ~h a' in
|
||||
a'.(i) <- Cons (k,v,a'.(i));
|
||||
a'
|
||||
|
||||
(* insert [k,v] in [l] and returns new list and boolean flag indicating
|
||||
whether it's a new element *)
|
||||
let rec replace_rec_ k v l = match l with
|
||||
| Nil -> Cons (k,v,Nil), true
|
||||
| Cons (k',v',l') ->
|
||||
if H.equal k k'
|
||||
then Cons (k,v,l'), false
|
||||
else
|
||||
let l', is_new = replace_rec_ k v l' in
|
||||
Cons (k',v',l'), is_new
|
||||
|
||||
let replace t k v =
|
||||
let tbl = reroot t in
|
||||
(* create the new hashtable *)
|
||||
let t' = ref (Table tbl) in
|
||||
(* update [t] to point to the new hashtable *)
|
||||
(try
|
||||
let v' = Table.find tbl k in
|
||||
t := Replace (k, v', t')
|
||||
with Not_found ->
|
||||
t := Remove (k, t')
|
||||
);
|
||||
(* modify the underlying hashtable *)
|
||||
Table.replace tbl k v;
|
||||
t'
|
||||
let a = reroot_ t in
|
||||
let h = H.hash k in
|
||||
let i = find_idx_ ~h a in
|
||||
match a.(i) with
|
||||
| Nil ->
|
||||
if t.length > (Array.length a) lsl 1
|
||||
then (
|
||||
(* resize *)
|
||||
let new_size = min (2 * (Array.length a)) Sys.max_array_length in
|
||||
let a = resize_ k v h a new_size in
|
||||
{length=t.length+1; arr=Arr a}
|
||||
) else (
|
||||
a.(i) <- Cons (k, v, Nil);
|
||||
let t' = {length=t.length + 1; arr=Arr a} in
|
||||
t.arr <- Set (i,Nil,t');
|
||||
t'
|
||||
)
|
||||
| Cons _ as l ->
|
||||
let l', is_new = replace_rec_ k v l in
|
||||
if is_new && t.length > (Array.length a) lsl 1
|
||||
then (
|
||||
(* resize and insert [k,v] (again, it's new anyway) *)
|
||||
let new_size = min (2 * (Array.length a)) Sys.max_array_length in
|
||||
let a = resize_ k v h a new_size in
|
||||
{length=t.length+1; arr=Arr a}
|
||||
) else (
|
||||
(* no resize *)
|
||||
a.(i) <- l';
|
||||
let t' = {
|
||||
length=if is_new then t.length+1 else t.length;
|
||||
arr=Arr a;
|
||||
} in
|
||||
t.arr <- Set (i,l,t');
|
||||
t'
|
||||
)
|
||||
|
||||
(* return [Some l'] if [l] changed into [l'] by removing [k] *)
|
||||
let rec remove_rec_ k l = match l with
|
||||
| Nil -> None
|
||||
| Cons (k', v', l') ->
|
||||
if H.equal k k'
|
||||
then Some l'
|
||||
else match remove_rec_ k l' with
|
||||
| None -> None
|
||||
| Some l' -> Some (Cons (k', v', l'))
|
||||
|
||||
let remove t k =
|
||||
let tbl = reroot t in
|
||||
try
|
||||
let v' = Table.find tbl k in
|
||||
(* value present, make a new hashtable without this value *)
|
||||
let t' = ref (Table tbl) in
|
||||
t := Add (k, v', t');
|
||||
Table.remove tbl k;
|
||||
t'
|
||||
with Not_found ->
|
||||
(* not member, nothing to do *)
|
||||
t
|
||||
let a = reroot_ t in
|
||||
let i = find_idx_ ~h:(H.hash k) a in
|
||||
match a.(i) with
|
||||
| Nil -> t
|
||||
| Cons _ as l ->
|
||||
match remove_rec_ k l with
|
||||
| None -> t
|
||||
| Some l' ->
|
||||
a.(i) <- l';
|
||||
let t' = {length=t.length-1; arr=Arr a} in
|
||||
t.arr <- Set (i,l,t');
|
||||
t'
|
||||
|
||||
(*$R
|
||||
let h = H.of_seq my_seq in
|
||||
|
|
@ -333,40 +425,78 @@ module Make(H : HashedType) : S with type key = H.t = struct
|
|||
| _, Some v' -> replace t k v'
|
||||
|
||||
let copy t =
|
||||
let tbl = reroot t in
|
||||
(* no one will point to the new [t] *)
|
||||
let t = ref (Table (Table.copy tbl)) in
|
||||
t
|
||||
let a = Array.copy (reroot_ t) in
|
||||
{t with arr=Arr a}
|
||||
|
||||
let rec buck_iter_ ~f l = match l with
|
||||
| Nil -> ()
|
||||
| Cons (k,v,l') -> f k v; buck_iter_ ~f l'
|
||||
|
||||
let iter t f =
|
||||
let tbl = reroot t in
|
||||
Table.iter f tbl
|
||||
let a = reroot_ t in
|
||||
Array.iter (buck_iter_ ~f) a
|
||||
|
||||
let rec buck_fold_ f acc l = match l with
|
||||
| Nil -> acc
|
||||
| Cons (k,v,l') ->
|
||||
let acc = f acc k v in
|
||||
buck_fold_ f acc l'
|
||||
|
||||
let fold f acc t =
|
||||
let tbl = reroot t in
|
||||
Table.fold (fun k v acc -> f acc k v) tbl acc
|
||||
let a = reroot_ t in
|
||||
Array.fold_left (buck_fold_ f) acc a
|
||||
|
||||
let map f t =
|
||||
let tbl = reroot t in
|
||||
let res = Table.create (Table.length tbl) in
|
||||
Table.iter (fun k v -> Table.replace res k (f k v)) tbl;
|
||||
ref (Table res)
|
||||
let rec buck_map_ f l = match l with
|
||||
| Nil -> Nil
|
||||
| Cons (k,v,l') ->
|
||||
let v' = f k v in
|
||||
Cons (k,v', buck_map_ f l')
|
||||
in
|
||||
let a = reroot_ t in
|
||||
let a' = Array.map (buck_map_ f) a in
|
||||
{length=t.length; arr=Arr a'}
|
||||
|
||||
let rec buck_filter_ ~f l = match l with
|
||||
| Nil -> Nil
|
||||
| Cons (k,v,l') ->
|
||||
let l' = buck_filter_ ~f l' in
|
||||
if f k v then Cons (k,v,l') else l'
|
||||
|
||||
let buck_length_ b = buck_fold_ (fun n _ _ -> n+1) 0 b
|
||||
|
||||
let filter p t =
|
||||
let tbl = reroot t in
|
||||
let res = Table.create (Table.length tbl) in
|
||||
Table.iter (fun k v -> if p k v then Table.replace res k v) tbl;
|
||||
ref (Table res)
|
||||
let a = reroot_ t in
|
||||
let length = ref 0 in
|
||||
let a' = Array.map
|
||||
(fun b ->
|
||||
let b' = buck_filter_ ~f:p b in
|
||||
length := !length + (buck_length_ b');
|
||||
b'
|
||||
) a
|
||||
in
|
||||
{length= !length; arr=Arr a'}
|
||||
|
||||
let rec buck_filter_map_ ~f l = match l with
|
||||
| Nil -> Nil
|
||||
| Cons (k,v,l') ->
|
||||
let l' = buck_filter_map_ ~f l' in
|
||||
match f k v with
|
||||
| None -> l'
|
||||
| Some v' ->
|
||||
Cons (k,v',l')
|
||||
|
||||
let filter_map f t =
|
||||
let tbl = reroot t in
|
||||
let res = Table.create (Table.length tbl) in
|
||||
Table.iter
|
||||
(fun k v -> match f k v with
|
||||
| None -> ()
|
||||
| Some v' -> Table.replace res k v'
|
||||
) tbl;
|
||||
ref (Table res)
|
||||
let a = reroot_ t in
|
||||
let length = ref 0 in
|
||||
let a' = Array.map
|
||||
(fun b ->
|
||||
let b' = buck_filter_map_ ~f b in
|
||||
length := !length + (buck_length_ b');
|
||||
b'
|
||||
) a
|
||||
in
|
||||
{length= !length; arr=Arr a'}
|
||||
|
||||
exception ExitPTbl
|
||||
|
||||
|
|
@ -383,19 +513,22 @@ module Make(H : HashedType) : S with type key = H.t = struct
|
|||
with ExitPTbl -> true
|
||||
|
||||
let merge f t1 t2 =
|
||||
let tbl = Table.create (max (length t1) (length t2)) in
|
||||
iter t1
|
||||
(fun k v1 ->
|
||||
let tbl = create (max (length t1) (length t2)) in
|
||||
let tbl = fold
|
||||
(fun tbl k v1 ->
|
||||
let v2 = try Some (find t2 k) with Not_found -> None in
|
||||
match f k (Some v1) v2 with
|
||||
| None -> ()
|
||||
| Some v' -> Table.replace tbl k v');
|
||||
iter t2
|
||||
(fun k v2 ->
|
||||
if not (mem t1 k) then match f k None (Some v2) with
|
||||
| None -> ()
|
||||
| Some _ -> Table.replace tbl k v2);
|
||||
ref (Table tbl)
|
||||
| None -> tbl
|
||||
| Some v' -> replace tbl k v')
|
||||
tbl t1
|
||||
in
|
||||
fold
|
||||
(fun tbl k v2 ->
|
||||
if mem t1 k then tbl
|
||||
else match f k None (Some v2) with
|
||||
| None -> tbl
|
||||
| Some _ -> replace tbl k v2
|
||||
) tbl t2
|
||||
|
||||
(*$R
|
||||
let t1 = H.of_list [1, "a"; 2, "b1"] in
|
||||
|
|
@ -444,10 +577,7 @@ module Make(H : HashedType) : S with type key = H.t = struct
|
|||
|
||||
let of_list l = add_list (empty ()) l
|
||||
|
||||
let to_list t =
|
||||
let tbl = reroot t in
|
||||
let bindings = Table.fold (fun k v acc -> (k,v)::acc) tbl [] in
|
||||
bindings
|
||||
let to_list t = fold (fun acc k v -> (k,v)::acc) [] t
|
||||
|
||||
(*$R
|
||||
let h = H.of_seq my_seq in
|
||||
|
|
@ -457,8 +587,7 @@ module Make(H : HashedType) : S with type key = H.t = struct
|
|||
|
||||
let to_seq t =
|
||||
fun k ->
|
||||
let tbl = reroot t in
|
||||
Table.iter (fun x y -> k (x,y)) tbl
|
||||
iter t (fun x y -> k (x,y))
|
||||
|
||||
(*$R
|
||||
let h = H.of_seq my_seq in
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue