mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-08 04:05:30 -05:00
use 32-bits and regular integers for popcount in CCHashTrie
This commit is contained in:
parent
47414c7f40
commit
895c8a73d9
2 changed files with 103 additions and 78 deletions
|
|
@ -118,76 +118,78 @@ end
|
||||||
from https://en.wikipedia.org/wiki/Hamming_weight
|
from https://en.wikipedia.org/wiki/Hamming_weight
|
||||||
|
|
||||||
//This uses fewer arithmetic operations than any other known
|
//This uses fewer arithmetic operations than any other known
|
||||||
//implementation on machines with fast multiplication.
|
//implementation on machines with slow multiplication.
|
||||||
//It uses 12 arithmetic operations, one of which is a multiply.
|
//It uses 17 arithmetic operations.
|
||||||
int popcount_3(uint64_t x) {
|
int popcount_2(uint64_t x) {
|
||||||
x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits
|
x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits
|
||||||
x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
|
x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
|
||||||
x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits
|
x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits
|
||||||
return (x * h01)>>56; //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
|
x += x >> 8; //put count of each 16 bits into their lowest 8 bits
|
||||||
|
x += x >> 16; //put count of each 32 bits into their lowest 8 bits
|
||||||
|
x += x >> 32; //put count of each 64 bits into their lowest 8 bits
|
||||||
|
return x & 0x7f;
|
||||||
}
|
}
|
||||||
*)
|
|
||||||
|
|
||||||
let popcount64 (b:int64) =
|
32-bits popcount. int64 is too slow, and there is not use trying to deal
|
||||||
let open Int64 in
|
with 32 bit platforms by defining popcount-16, as there are integer literals
|
||||||
let b = sub b (logand (shift_right_logical b 1) 0x5555555555555555L) in
|
here that will not compile on 32-bits.
|
||||||
let b = add (logand b 0x3333333333333333L)
|
*)
|
||||||
(logand (shift_right_logical b 2) 0x3333333333333333L) in
|
let popcount b =
|
||||||
let b = logand (add b (shift_right_logical b 4)) 0x0F0F0F0F0F0F0F0FL in
|
let b = b - ((b lsr 1) land 0x55555555) in
|
||||||
let b = shift_right_logical (mul b 0x0101010101010101L) 56 in
|
let b = (b land 0x33333333) + ((b lsr 2) land 0x33333333) in
|
||||||
Int64.to_int b
|
let b = (b + (b lsr 4)) land 0x0f0f0f0f in
|
||||||
|
let b = b + (b lsr 8) in
|
||||||
|
let b = b + (b lsr 16) in
|
||||||
|
b land 0x3f
|
||||||
|
|
||||||
(*$T
|
(*$T
|
||||||
popcount64 5L = 2
|
popcount 5 = 2
|
||||||
popcount64 256L = 1
|
popcount 256 = 1
|
||||||
popcount64 255L = 8
|
popcount 255 = 8
|
||||||
popcount64 0xFFFFFFFFL = 32
|
popcount 0xFFFF = 16
|
||||||
popcount64 0xFFFFFFFFFFFFFFFFL = 64
|
popcount 0xFF1F = 13
|
||||||
|
popcount 0xFFFFFFFF = 32
|
||||||
*)
|
*)
|
||||||
|
|
||||||
(*$Q
|
(*$Q
|
||||||
Q.int (fun i -> \
|
Q.int (fun i -> let i = i land (1 lsl 32) in popcount i <= 32)
|
||||||
let i = Int64.of_int i in popcount64 i <= 64)
|
|
||||||
*)
|
*)
|
||||||
|
|
||||||
(* sparse array, using a bitfield and POPCOUNT *)
|
(* sparse array, using a bitfield and POPCOUNT *)
|
||||||
module A_SPARSE : FIXED_ARRAY = struct
|
module A_SPARSE : FIXED_ARRAY = struct
|
||||||
type 'a t = {
|
type 'a t = {
|
||||||
bits: int64;
|
bits: int;
|
||||||
arr: 'a array;
|
arr: 'a array;
|
||||||
empty: 'a;
|
empty: 'a;
|
||||||
}
|
}
|
||||||
|
|
||||||
let length_log = 6
|
let length_log = 5
|
||||||
let length = 1 lsl length_log
|
let length = 1 lsl length_log
|
||||||
|
|
||||||
let popcount = popcount64
|
let create ~empty = { bits=0; arr= [| |]; empty; }
|
||||||
|
|
||||||
let create ~empty = { bits=0L; arr= [| |]; empty; }
|
|
||||||
|
|
||||||
let get a i =
|
let get a i =
|
||||||
let open Int64 in
|
let idx = 1 lsl i in
|
||||||
let idx = shift_left 1L i in
|
if a.bits land idx = 0
|
||||||
if logand a.bits idx = 0L
|
|
||||||
then a.empty
|
then a.empty
|
||||||
else
|
else
|
||||||
let real_idx =popcount (logand a.bits (sub idx 1L)) in
|
let real_idx = popcount (a.bits land (idx- 1)) in
|
||||||
a.arr.(real_idx)
|
a.arr.(real_idx)
|
||||||
|
|
||||||
let set a i x =
|
let set a i x =
|
||||||
let open Int64 in
|
let idx = 1 lsl i in
|
||||||
let idx = shift_left 1L i in
|
let real_idx = popcount (a.bits land (idx -1)) in
|
||||||
let real_idx = popcount (logand a.bits (sub idx 1L)) in
|
if a.bits land idx = 0
|
||||||
if logand a.bits idx = 0L
|
|
||||||
then (
|
then (
|
||||||
(* insert at [real_idx] in a new array *)
|
(* insert at [real_idx] in a new array *)
|
||||||
let bits = logor a.bits idx in
|
let bits = a.bits lor idx in
|
||||||
let arr = Array.init (Array.length a.arr + 1)
|
let n = Array.length a.arr in
|
||||||
(fun j ->
|
let arr = Array.make (n+1) a.empty in
|
||||||
if j<real_idx then a.arr.(j)
|
arr.(real_idx) <- x;
|
||||||
else if j=real_idx then x
|
if real_idx>0
|
||||||
else a.arr.(j-1)
|
then Array.blit a.arr 0 arr 0 real_idx;
|
||||||
) in
|
if real_idx<n
|
||||||
|
then Array.blit a.arr real_idx arr (real_idx+1) (n-real_idx);
|
||||||
{a with bits; arr}
|
{a with bits; arr}
|
||||||
) else (
|
) else (
|
||||||
(* replace element at [real_idx] *)
|
(* replace element at [real_idx] *)
|
||||||
|
|
@ -197,21 +199,21 @@ module A_SPARSE : FIXED_ARRAY = struct
|
||||||
)
|
)
|
||||||
|
|
||||||
let update a i f =
|
let update a i f =
|
||||||
let open Int64 in
|
let idx = 1 lsl i in
|
||||||
let idx = shift_left 1L i in
|
let real_idx = popcount (a.bits land (idx -1)) in
|
||||||
let real_idx = popcount (logand a.bits (sub idx 1L)) in
|
if a.bits land idx = 0
|
||||||
if logand a.bits idx = 0L
|
|
||||||
then (
|
then (
|
||||||
(* not present *)
|
(* not present *)
|
||||||
let x = f a.empty in
|
let x = f a.empty in
|
||||||
(* insert at [real_idx] in a new array *)
|
(* insert at [real_idx] in a new array *)
|
||||||
let bits = logor a.bits idx in
|
let bits = a.bits lor idx in
|
||||||
let arr = Array.init (Array.length a.arr + 1)
|
let n = Array.length a.arr in
|
||||||
(fun j ->
|
let arr = Array.make (n+1) a.empty in
|
||||||
if j<real_idx then a.arr.(j)
|
arr.(real_idx) <- x;
|
||||||
else if j=real_idx then x
|
if real_idx>0
|
||||||
else a.arr.(j-1)
|
then Array.blit a.arr 0 arr 0 real_idx;
|
||||||
) in
|
if real_idx<n
|
||||||
|
then Array.blit a.arr real_idx arr (real_idx+1) (n-real_idx);
|
||||||
{a with bits; arr}
|
{a with bits; arr}
|
||||||
) else (
|
) else (
|
||||||
let x = f a.arr.(real_idx) in
|
let x = f a.arr.(real_idx) in
|
||||||
|
|
@ -222,18 +224,19 @@ module A_SPARSE : FIXED_ARRAY = struct
|
||||||
)
|
)
|
||||||
|
|
||||||
let remove ~empty:_ a i =
|
let remove ~empty:_ a i =
|
||||||
let open Int64 in
|
let idx = 1 lsl i in
|
||||||
let idx = shift_left 1L i in
|
let real_idx = popcount (a.bits land (idx -1)) in
|
||||||
let real_idx = popcount (logand a.bits (sub idx 1L)) in
|
if a.bits land idx = 0
|
||||||
if logand a.bits idx = 0L
|
|
||||||
then a (* not present *)
|
then a (* not present *)
|
||||||
else (
|
else (
|
||||||
(* remove at [real_idx] *)
|
(* remove at [real_idx] *)
|
||||||
let bits = logand a.bits (lognot idx) in
|
let bits = a.bits land (lnot idx) in
|
||||||
let arr = Array.init (Array.length a.arr - 1)
|
let n = Array.length a.arr in
|
||||||
(fun j ->
|
let arr = Array.make (n-1) a.empty in
|
||||||
if j>= real_idx then a.arr.(j+1) else a.arr.(j)
|
if real_idx > 0
|
||||||
) in
|
then Array.blit a.arr 0 arr 0 real_idx;
|
||||||
|
if real_idx+1 < n
|
||||||
|
then Array.blit a.arr (real_idx+1) arr real_idx (n-real_idx-1);
|
||||||
{a with bits; arr}
|
{a with bits; arr}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -278,6 +281,7 @@ module Make(Key : KEY)
|
||||||
|
|
||||||
type 'a t =
|
type 'a t =
|
||||||
| E
|
| E
|
||||||
|
| S of Hash.t * key * 'a (* single pair *)
|
||||||
| L of Hash.t * 'a leaf (* same hash for all elements *)
|
| L of Hash.t * 'a leaf (* same hash for all elements *)
|
||||||
| N of 'a leaf * 'a t A.t (* leaf for hash=0, subnodes *)
|
| N of 'a leaf * 'a t A.t (* leaf for hash=0, subnodes *)
|
||||||
|
|
||||||
|
|
@ -291,6 +295,7 @@ module Make(Key : KEY)
|
||||||
let is_empty = function
|
let is_empty = function
|
||||||
| E -> true
|
| E -> true
|
||||||
| L (_, Nil) -> assert false
|
| L (_, Nil) -> assert false
|
||||||
|
| S _
|
||||||
| L _
|
| L _
|
||||||
| N _ -> false
|
| N _ -> false
|
||||||
|
|
||||||
|
|
@ -305,6 +310,7 @@ module Make(Key : KEY)
|
||||||
|
|
||||||
let rec get_exn_ k ~h m = match m with
|
let rec get_exn_ k ~h m = match m with
|
||||||
| E -> raise Not_found
|
| E -> raise Not_found
|
||||||
|
| S (_, k', v') -> if Key.equal k k' then v' else raise Not_found
|
||||||
| L (_, l) -> get_exn_list_ k l
|
| L (_, l) -> get_exn_list_ k l
|
||||||
| N (leaf, a) ->
|
| N (leaf, a) ->
|
||||||
if Hash.is_0 h then get_exn_list_ k leaf
|
if Hash.is_0 h then get_exn_list_ k leaf
|
||||||
|
|
@ -335,31 +341,42 @@ module Make(Key : KEY)
|
||||||
|
|
||||||
(* [h]: hash, with the part required to reach this leaf removed *)
|
(* [h]: hash, with the part required to reach this leaf removed *)
|
||||||
let rec add_ k v ~h m = match m with
|
let rec add_ k v ~h m = match m with
|
||||||
| E -> leaf_ k v ~h
|
| E -> S (h, k, v)
|
||||||
|
| S (h', k', v') ->
|
||||||
|
if h=h'
|
||||||
|
then if Key.equal k k'
|
||||||
|
then S (h, k, v) (* replace *)
|
||||||
|
else L (h, Cons (k, v, Cons (k', v', Nil)))
|
||||||
|
else
|
||||||
|
make_array_ ~leaf:(Cons (k', v', Nil)) ~h_leaf:h' k v ~h
|
||||||
| L (h', l) ->
|
| L (h', l) ->
|
||||||
if h=h'
|
if h=h'
|
||||||
then L (h, add_list_ k v l)
|
then L (h, add_list_ k v l)
|
||||||
else (* split into N *)
|
else (* split into N *)
|
||||||
let a = A.create ~empty:E in
|
make_array_ ~leaf:l ~h_leaf:h' k v ~h
|
||||||
let a, leaf =
|
|
||||||
if Hash.is_0 h' then a, l
|
|
||||||
else
|
|
||||||
(* put leaf in the right bucket *)
|
|
||||||
let i = Hash.rem h' in
|
|
||||||
let h'' = Hash.quotient h' in
|
|
||||||
A.set a i (L (h'', l)), Nil
|
|
||||||
in
|
|
||||||
(* then add new node *)
|
|
||||||
let a, leaf =
|
|
||||||
if Hash.is_0 h then a, add_list_ k v leaf
|
|
||||||
else add_to_array_ k v ~h a, leaf
|
|
||||||
in
|
|
||||||
N (leaf, a)
|
|
||||||
| N (leaf, a) ->
|
| N (leaf, a) ->
|
||||||
if Hash.is_0 h
|
if Hash.is_0 h
|
||||||
then N (add_list_ k v leaf, a)
|
then N (add_list_ k v leaf, a)
|
||||||
else N (leaf, add_to_array_ k v ~h a)
|
else N (leaf, add_to_array_ k v ~h a)
|
||||||
|
|
||||||
|
(* make an array containing a leaf, and insert (k,v) in it *)
|
||||||
|
and make_array_ ~leaf ~h_leaf:h' k v ~h =
|
||||||
|
let a = A.create ~empty:E in
|
||||||
|
let a, leaf =
|
||||||
|
if Hash.is_0 h' then a, leaf
|
||||||
|
else
|
||||||
|
(* put leaf in the right bucket *)
|
||||||
|
let i = Hash.rem h' in
|
||||||
|
let h'' = Hash.quotient h' in
|
||||||
|
A.set a i (L (h'', leaf)), Nil
|
||||||
|
in
|
||||||
|
(* then add new node *)
|
||||||
|
let a, leaf =
|
||||||
|
if Hash.is_0 h then a, add_list_ k v leaf
|
||||||
|
else add_to_array_ k v ~h a, leaf
|
||||||
|
in
|
||||||
|
N (leaf, a)
|
||||||
|
|
||||||
(* add k->v to [a] *)
|
(* add k->v to [a] *)
|
||||||
and add_to_array_ k v ~h a =
|
and add_to_array_ k v ~h a =
|
||||||
(* insert in a bucket *)
|
(* insert in a bucket *)
|
||||||
|
|
@ -390,6 +407,8 @@ module Make(Key : KEY)
|
||||||
|
|
||||||
let rec remove_rec_ k ~h m = match m with
|
let rec remove_rec_ k ~h m = match m with
|
||||||
| E -> E
|
| E -> E
|
||||||
|
| S (_, k', _) ->
|
||||||
|
if Key.equal k k' then E else m
|
||||||
| L (h, l) ->
|
| L (h, l) ->
|
||||||
let l = remove_list_ k l in
|
let l = remove_list_ k l in
|
||||||
if is_empty_list_ l then E else L (h, l)
|
if is_empty_list_ l then E else L (h, l)
|
||||||
|
|
@ -414,6 +433,7 @@ module Make(Key : KEY)
|
||||||
let iter f t =
|
let iter f t =
|
||||||
let rec aux = function
|
let rec aux = function
|
||||||
| E -> ()
|
| E -> ()
|
||||||
|
| S (_, k, v) -> f k v
|
||||||
| L (_,l) -> aux_list l
|
| L (_,l) -> aux_list l
|
||||||
| N (l,a) -> aux_list l; A.iter aux a
|
| N (l,a) -> aux_list l; A.iter aux a
|
||||||
and aux_list = function
|
and aux_list = function
|
||||||
|
|
@ -425,6 +445,7 @@ module Make(Key : KEY)
|
||||||
let fold f acc t =
|
let fold f acc t =
|
||||||
let rec aux acc t = match t with
|
let rec aux acc t = match t with
|
||||||
| E -> acc
|
| E -> acc
|
||||||
|
| S (_,k,v) -> f acc k v
|
||||||
| L (_,l) -> aux_list acc l
|
| L (_,l) -> aux_list acc l
|
||||||
| N (l,a) -> let acc = aux_list acc l in A.fold aux acc a
|
| N (l,a) -> let acc = aux_list acc l in A.fold aux acc a
|
||||||
and aux_list acc l = match l with
|
and aux_list acc l = match l with
|
||||||
|
|
@ -462,6 +483,7 @@ module Make(Key : KEY)
|
||||||
|
|
||||||
let rec as_tree m () = match m with
|
let rec as_tree m () = match m with
|
||||||
| E -> `Nil
|
| E -> `Nil
|
||||||
|
| S (h,k,v) -> `Node (`L ((h:>int), [k,v]), [])
|
||||||
| L (h,l) -> `Node (`L ((h:>int), list_as_tree_ l), [])
|
| L (h,l) -> `Node (`L ((h:>int), list_as_tree_ l), [])
|
||||||
| N (l,a) -> `Node (`N, as_tree (L (Hash.zero, l)) :: array_as_tree_ a)
|
| N (l,a) -> `Node (`N, as_tree (L (Hash.zero, l)) :: array_as_tree_ a)
|
||||||
and list_as_tree_ l = match l with
|
and list_as_tree_ l = match l with
|
||||||
|
|
@ -472,7 +494,7 @@ end
|
||||||
|
|
||||||
(*$R
|
(*$R
|
||||||
let module M = Make(CCInt) in
|
let module M = Make(CCInt) in
|
||||||
let m = M.of_list CCList.(1 -- 1000 |> map (fun i->i,i)) in
|
let m = M.of_list CCList.( (501 -- 1000) @ (500 -- 1) |> map (fun i->i,i)) in
|
||||||
assert_equal ~printer:CCInt.to_string 1000 (M.cardinal m);
|
assert_equal ~printer:CCInt.to_string 1000 (M.cardinal m);
|
||||||
assert_bool "check all get"
|
assert_bool "check all get"
|
||||||
(Sequence.for_all (fun i -> i = M.get_exn i m) Sequence.(1 -- 1000));
|
(Sequence.for_all (fun i -> i = M.get_exn i m) Sequence.(1 -- 1000));
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,9 @@
|
||||||
update and access {b if} the hash function is good.
|
update and access {b if} the hash function is good.
|
||||||
The trie is not binary, to improve cache locality and decrease depth.
|
The trie is not binary, to improve cache locality and decrease depth.
|
||||||
|
|
||||||
|
Preliminary benchmarks (see the "tbl" section of benchmarks) tend to show
|
||||||
|
that this type is quite efficient for small data sets.
|
||||||
|
|
||||||
{b status: experimental}
|
{b status: experimental}
|
||||||
|
|
||||||
@since NEXT_RELEASE
|
@since NEXT_RELEASE
|
||||||
|
|
@ -97,6 +100,6 @@ end
|
||||||
module Make(K : KEY) : S with type key = K.t
|
module Make(K : KEY) : S with type key = K.t
|
||||||
|
|
||||||
(**/**)
|
(**/**)
|
||||||
val popcount64 : int64 -> int
|
val popcount : int -> int
|
||||||
module A_SPARSE : FIXED_ARRAY
|
module A_SPARSE : FIXED_ARRAY
|
||||||
(**/**)
|
(**/**)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue