reimplementation of CCPersistentHashtbl

This commit is contained in:
Simon Cruanes 2015-10-21 22:19:28 +02:00
parent aa1c5fb0e9
commit 15d5da628d
2 changed files with 242 additions and 113 deletions

2
_tags
View file

@ -3,6 +3,6 @@
<tests/*.ml{,i}>: thread <tests/*.ml{,i}>: thread
<src/threads/*.ml{,i}>: thread <src/threads/*.ml{,i}>: thread
<src/core/CCVector.cmx>: inline(25) <src/core/CCVector.cmx>: inline(25)
<src/data/CCFlatHashtbl.cm*> or <src/data/CCHashTrie.cm*>: inline(15) <src/data/CCFlatHashtbl.cm*> or <src/data/CCHashTrie.cm*> or <src/data/CCPersistent*>: inline(15)
<src/**/*.ml> and not <src/misc/*.ml>: warn_A, warn(-4), warn(-44) <src/**/*.ml> and not <src/misc/*.ml>: warn_A, warn(-4), warn(-44)
true: no_alias_deps, safe_string true: no_alias_deps, safe_string

View file

@ -155,58 +155,85 @@ end
(** {2 Implementation} *) (** {2 Implementation} *)
module Make(H : HashedType) : S with type key = H.t = struct module Make(H : HashedType) : S with type key = H.t = struct
module Table = Hashtbl.Make(H)
(** Imperative hashtable *)
type key = H.t type key = H.t
type 'a t = 'a zipper ref
and 'a zipper = (* main hashtable *)
| Table of 'a Table.t (** Concrete table *) type 'a t = {
| Add of key * 'a * 'a t (** Add key *) mutable arr: 'a p_array; (* invariant: length is a power of 2 *)
| Replace of key * 'a * 'a t (** Replace key by value *) length: int;
| Remove of key * 'a t (** As the table, but without given key *) }
(* piece of a persistent array *)
and 'a p_array =
| Arr of 'a bucket array
| Set of int * 'a bucket * 'a t
(* bucket of the hashtbl *)
and 'a bucket =
| Nil
| Cons of key * 'a * 'a bucket
(* first power of two that is bigger than [than], starting from [n] *)
let rec power_two_larger ~than n =
if n>= than then n else power_two_larger ~than (2*n)
let create i = let create i =
ref (Table (Table.create i)) let i = power_two_larger ~than:i 16 in
{ length=0;
arr=Arr (Array.make i Nil)
}
let empty () = create 11 let empty () = create 16
(* pass continuation to get a tailrec rerooting *) let rec reroot_rec_ t k = match t.arr with
let rec _reroot t k = match !t with | Arr a -> k a
| Table tbl -> k tbl (* done *) | Set (i, v, t') ->
| Add (key, v, t') -> reroot_rec_ t' (fun a ->
_reroot t' let v' = a.(i) in
(fun tbl -> a.(i) <- v;
t' := Remove (key, t); t.arr <- Arr a;
Table.add tbl key v; t'.arr <- Set (i, v', t);
t := Table tbl; k a
k tbl) )
| Replace (key, v, t') ->
_reroot t'
(fun tbl ->
let v' = Table.find tbl key in
t' := Replace (key, v', t);
t := Table tbl;
Table.replace tbl key v;
k tbl)
| Remove (key, t') ->
_reroot t'
(fun tbl ->
let v = Table.find tbl key in
t' := Add (key, v, t);
t := Table tbl;
Table.remove tbl key;
k tbl)
(* Reroot: modify the zipper so that the current node is a proper (* obtain the array *)
hashtable, and return the hashtable *) let reroot_ t = match t.arr with
let reroot t = match !t with | Arr a -> a
| Table tbl -> tbl | _ -> reroot_rec_ t (fun x -> x)
| _ -> _reroot t (fun x -> x)
let is_empty t = Table.length (reroot t) = 0 let is_empty t = t.length = 0
let find t k = Table.find (reroot t) k let length t = t.length
(* find index of [h] in [a] *)
let find_idx_ a ~h =
(* bitmask 00001111 if length(a) = 10000 *)
h land (Array.length a - 1)
let rec find_rec_ k l = match l with
| Nil -> raise Not_found
| Cons (k', v', l') ->
if H.equal k k' then v' else find_rec_ k l'
let find t k =
let a = reroot_ t in
(* unroll like crazy *)
match a.(find_idx_ ~h:(H.hash k) a) with
| Nil -> raise Not_found
| Cons (k1, v1, l1) ->
if H.equal k k1 then v1
else match l1 with
| Nil -> raise Not_found
| Cons (k2,v2,l2) ->
if H.equal k k2 then v2
else match l2 with
| Nil -> raise Not_found
| Cons (k3,v3,l3) ->
if H.equal k k3 then v3
else match l3 with
| Nil -> raise Not_found
| Cons (k4,v4,l4) ->
if H.equal k k4 then v4 else find_rec_ k l4
(*$R (*$R
let h = H.of_seq my_seq in let h = H.of_seq my_seq in
@ -249,9 +276,9 @@ module Make(H : HashedType) : S with type key = H.t = struct
try Some (find t k) try Some (find t k)
with Not_found -> None with Not_found -> None
let mem t k = Table.mem (reroot t) k let mem t k =
try ignore (find t k); true
let length t = Table.length (reroot t) with Not_found -> false
(*$R (*$R
let h = H.of_seq let h = H.of_seq
@ -267,33 +294,98 @@ module Make(H : HashedType) : S with type key = H.t = struct
) )
*) *)
let rec buck_rev_iter_ ~f l = match l with
| Nil -> ()
| Cons (k,v,l') -> buck_rev_iter_ ~f l'; f k v
(* resize [a] so it has capacity [new_size], and insert [k,v] in it *)
let resize_ k v h a new_size =
assert (new_size > Array.length a);
let a' = Array.make new_size Nil in
(* preserve order of elements by iterating on each bucket in rev order *)
Array.iter
(buck_rev_iter_
~f:(fun k v ->
let i = find_idx_ ~h:(H.hash k) a' in
a'.(i) <- Cons (k,v,a'.(i))
)
)
a;
let i = find_idx_ ~h a' in
a'.(i) <- Cons (k,v,a'.(i));
a'
(* insert [k,v] in [l] and returns new list and boolean flag indicating
whether it's a new element *)
let rec replace_rec_ k v l = match l with
| Nil -> Cons (k,v,Nil), true
| Cons (k',v',l') ->
if H.equal k k'
then Cons (k,v,l'), false
else
let l', is_new = replace_rec_ k v l' in
Cons (k',v',l'), is_new
let replace t k v = let replace t k v =
let tbl = reroot t in let a = reroot_ t in
(* create the new hashtable *) let h = H.hash k in
let t' = ref (Table tbl) in let i = find_idx_ ~h a in
(* update [t] to point to the new hashtable *) match a.(i) with
(try | Nil ->
let v' = Table.find tbl k in if t.length > (Array.length a) lsl 1
t := Replace (k, v', t') then (
with Not_found -> (* resize *)
t := Remove (k, t') let new_size = min (2 * (Array.length a)) Sys.max_array_length in
); let a = resize_ k v h a new_size in
(* modify the underlying hashtable *) {length=t.length+1; arr=Arr a}
Table.replace tbl k v; ) else (
t' a.(i) <- Cons (k, v, Nil);
let t' = {length=t.length + 1; arr=Arr a} in
t.arr <- Set (i,Nil,t');
t'
)
| Cons _ as l ->
let l', is_new = replace_rec_ k v l in
if is_new && t.length > (Array.length a) lsl 1
then (
(* resize and insert [k,v] (again, it's new anyway) *)
let new_size = min (2 * (Array.length a)) Sys.max_array_length in
let a = resize_ k v h a new_size in
{length=t.length+1; arr=Arr a}
) else (
(* no resize *)
a.(i) <- l';
let t' = {
length=if is_new then t.length+1 else t.length;
arr=Arr a;
} in
t.arr <- Set (i,l,t');
t'
)
(* return [Some l'] if [l] changed into [l'] by removing [k] *)
let rec remove_rec_ k l = match l with
| Nil -> None
| Cons (k', v', l') ->
if H.equal k k'
then Some l'
else match remove_rec_ k l' with
| None -> None
| Some l' -> Some (Cons (k', v', l'))
let remove t k = let remove t k =
let tbl = reroot t in let a = reroot_ t in
try let i = find_idx_ ~h:(H.hash k) a in
let v' = Table.find tbl k in match a.(i) with
(* value present, make a new hashtable without this value *) | Nil -> t
let t' = ref (Table tbl) in | Cons _ as l ->
t := Add (k, v', t'); match remove_rec_ k l with
Table.remove tbl k; | None -> t
t' | Some l' ->
with Not_found -> a.(i) <- l';
(* not member, nothing to do *) let t' = {length=t.length-1; arr=Arr a} in
t t.arr <- Set (i,l,t');
t'
(*$R (*$R
let h = H.of_seq my_seq in let h = H.of_seq my_seq in
@ -333,40 +425,78 @@ module Make(H : HashedType) : S with type key = H.t = struct
| _, Some v' -> replace t k v' | _, Some v' -> replace t k v'
let copy t = let copy t =
let tbl = reroot t in let a = Array.copy (reroot_ t) in
(* no one will point to the new [t] *) {t with arr=Arr a}
let t = ref (Table (Table.copy tbl)) in
t let rec buck_iter_ ~f l = match l with
| Nil -> ()
| Cons (k,v,l') -> f k v; buck_iter_ ~f l'
let iter t f = let iter t f =
let tbl = reroot t in let a = reroot_ t in
Table.iter f tbl Array.iter (buck_iter_ ~f) a
let rec buck_fold_ f acc l = match l with
| Nil -> acc
| Cons (k,v,l') ->
let acc = f acc k v in
buck_fold_ f acc l'
let fold f acc t = let fold f acc t =
let tbl = reroot t in let a = reroot_ t in
Table.fold (fun k v acc -> f acc k v) tbl acc Array.fold_left (buck_fold_ f) acc a
let map f t = let map f t =
let tbl = reroot t in let rec buck_map_ f l = match l with
let res = Table.create (Table.length tbl) in | Nil -> Nil
Table.iter (fun k v -> Table.replace res k (f k v)) tbl; | Cons (k,v,l') ->
ref (Table res) let v' = f k v in
Cons (k,v', buck_map_ f l')
in
let a = reroot_ t in
let a' = Array.map (buck_map_ f) a in
{length=t.length; arr=Arr a'}
let rec buck_filter_ ~f l = match l with
| Nil -> Nil
| Cons (k,v,l') ->
let l' = buck_filter_ ~f l' in
if f k v then Cons (k,v,l') else l'
let buck_length_ b = buck_fold_ (fun n _ _ -> n+1) 0 b
let filter p t = let filter p t =
let tbl = reroot t in let a = reroot_ t in
let res = Table.create (Table.length tbl) in let length = ref 0 in
Table.iter (fun k v -> if p k v then Table.replace res k v) tbl; let a' = Array.map
ref (Table res) (fun b ->
let b' = buck_filter_ ~f:p b in
length := !length + (buck_length_ b');
b'
) a
in
{length= !length; arr=Arr a'}
let rec buck_filter_map_ ~f l = match l with
| Nil -> Nil
| Cons (k,v,l') ->
let l' = buck_filter_map_ ~f l' in
match f k v with
| None -> l'
| Some v' ->
Cons (k,v',l')
let filter_map f t = let filter_map f t =
let tbl = reroot t in let a = reroot_ t in
let res = Table.create (Table.length tbl) in let length = ref 0 in
Table.iter let a' = Array.map
(fun k v -> match f k v with (fun b ->
| None -> () let b' = buck_filter_map_ ~f b in
| Some v' -> Table.replace res k v' length := !length + (buck_length_ b');
) tbl; b'
ref (Table res) ) a
in
{length= !length; arr=Arr a'}
exception ExitPTbl exception ExitPTbl
@ -383,19 +513,22 @@ module Make(H : HashedType) : S with type key = H.t = struct
with ExitPTbl -> true with ExitPTbl -> true
let merge f t1 t2 = let merge f t1 t2 =
let tbl = Table.create (max (length t1) (length t2)) in let tbl = create (max (length t1) (length t2)) in
iter t1 let tbl = fold
(fun k v1 -> (fun tbl k v1 ->
let v2 = try Some (find t2 k) with Not_found -> None in let v2 = try Some (find t2 k) with Not_found -> None in
match f k (Some v1) v2 with match f k (Some v1) v2 with
| None -> () | None -> tbl
| Some v' -> Table.replace tbl k v'); | Some v' -> replace tbl k v')
iter t2 tbl t1
(fun k v2 -> in
if not (mem t1 k) then match f k None (Some v2) with fold
| None -> () (fun tbl k v2 ->
| Some _ -> Table.replace tbl k v2); if mem t1 k then tbl
ref (Table tbl) else match f k None (Some v2) with
| None -> tbl
| Some _ -> replace tbl k v2
) tbl t2
(*$R (*$R
let t1 = H.of_list [1, "a"; 2, "b1"] in let t1 = H.of_list [1, "a"; 2, "b1"] in
@ -444,10 +577,7 @@ module Make(H : HashedType) : S with type key = H.t = struct
let of_list l = add_list (empty ()) l let of_list l = add_list (empty ()) l
let to_list t = let to_list t = fold (fun acc k v -> (k,v)::acc) [] t
let tbl = reroot t in
let bindings = Table.fold (fun k v acc -> (k,v)::acc) tbl [] in
bindings
(*$R (*$R
let h = H.of_seq my_seq in let h = H.of_seq my_seq in
@ -457,8 +587,7 @@ module Make(H : HashedType) : S with type key = H.t = struct
let to_seq t = let to_seq t =
fun k -> fun k ->
let tbl = reroot t in iter t (fun x y -> k (x,y))
Table.iter (fun x y -> k (x,y)) tbl
(*$R (*$R
let h = H.of_seq my_seq in let h = H.of_seq my_seq in