use 32-bits and regular integers for popcount in CCHashTrie

This commit is contained in:
Simon Cruanes 2015-09-05 02:55:48 +02:00
parent 47414c7f40
commit 895c8a73d9
2 changed files with 103 additions and 78 deletions

View file

@ -118,76 +118,78 @@ end
from https://en.wikipedia.org/wiki/Hamming_weight from https://en.wikipedia.org/wiki/Hamming_weight
//This uses fewer arithmetic operations than any other known //This uses fewer arithmetic operations than any other known
//implementation on machines with fast multiplication. //implementation on machines with slow multiplication.
//It uses 12 arithmetic operations, one of which is a multiply. //It uses 17 arithmetic operations.
int popcount_3(uint64_t x) { int popcount_2(uint64_t x) {
x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits
x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits
return (x * h01)>>56; //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ... x += x >> 8; //put count of each 16 bits into their lowest 8 bits
x += x >> 16; //put count of each 32 bits into their lowest 8 bits
x += x >> 32; //put count of each 64 bits into their lowest 8 bits
return x & 0x7f;
} }
*)
let popcount64 (b:int64) = 32-bits popcount. int64 is too slow, and there is not use trying to deal
let open Int64 in with 32 bit platforms by defining popcount-16, as there are integer literals
let b = sub b (logand (shift_right_logical b 1) 0x5555555555555555L) in here that will not compile on 32-bits.
let b = add (logand b 0x3333333333333333L) *)
(logand (shift_right_logical b 2) 0x3333333333333333L) in let popcount b =
let b = logand (add b (shift_right_logical b 4)) 0x0F0F0F0F0F0F0F0FL in let b = b - ((b lsr 1) land 0x55555555) in
let b = shift_right_logical (mul b 0x0101010101010101L) 56 in let b = (b land 0x33333333) + ((b lsr 2) land 0x33333333) in
Int64.to_int b let b = (b + (b lsr 4)) land 0x0f0f0f0f in
let b = b + (b lsr 8) in
let b = b + (b lsr 16) in
b land 0x3f
(*$T (*$T
popcount64 5L = 2 popcount 5 = 2
popcount64 256L = 1 popcount 256 = 1
popcount64 255L = 8 popcount 255 = 8
popcount64 0xFFFFFFFFL = 32 popcount 0xFFFF = 16
popcount64 0xFFFFFFFFFFFFFFFFL = 64 popcount 0xFF1F = 13
popcount 0xFFFFFFFF = 32
*) *)
(*$Q (*$Q
Q.int (fun i -> \ Q.int (fun i -> let i = i land (1 lsl 32) in popcount i <= 32)
let i = Int64.of_int i in popcount64 i <= 64)
*) *)
(* sparse array, using a bitfield and POPCOUNT *) (* sparse array, using a bitfield and POPCOUNT *)
module A_SPARSE : FIXED_ARRAY = struct module A_SPARSE : FIXED_ARRAY = struct
type 'a t = { type 'a t = {
bits: int64; bits: int;
arr: 'a array; arr: 'a array;
empty: 'a; empty: 'a;
} }
let length_log = 6 let length_log = 5
let length = 1 lsl length_log let length = 1 lsl length_log
let popcount = popcount64 let create ~empty = { bits=0; arr= [| |]; empty; }
let create ~empty = { bits=0L; arr= [| |]; empty; }
let get a i = let get a i =
let open Int64 in let idx = 1 lsl i in
let idx = shift_left 1L i in if a.bits land idx = 0
if logand a.bits idx = 0L
then a.empty then a.empty
else else
let real_idx =popcount (logand a.bits (sub idx 1L)) in let real_idx = popcount (a.bits land (idx- 1)) in
a.arr.(real_idx) a.arr.(real_idx)
let set a i x = let set a i x =
let open Int64 in let idx = 1 lsl i in
let idx = shift_left 1L i in let real_idx = popcount (a.bits land (idx -1)) in
let real_idx = popcount (logand a.bits (sub idx 1L)) in if a.bits land idx = 0
if logand a.bits idx = 0L
then ( then (
(* insert at [real_idx] in a new array *) (* insert at [real_idx] in a new array *)
let bits = logor a.bits idx in let bits = a.bits lor idx in
let arr = Array.init (Array.length a.arr + 1) let n = Array.length a.arr in
(fun j -> let arr = Array.make (n+1) a.empty in
if j<real_idx then a.arr.(j) arr.(real_idx) <- x;
else if j=real_idx then x if real_idx>0
else a.arr.(j-1) then Array.blit a.arr 0 arr 0 real_idx;
) in if real_idx<n
then Array.blit a.arr real_idx arr (real_idx+1) (n-real_idx);
{a with bits; arr} {a with bits; arr}
) else ( ) else (
(* replace element at [real_idx] *) (* replace element at [real_idx] *)
@ -197,21 +199,21 @@ module A_SPARSE : FIXED_ARRAY = struct
) )
let update a i f = let update a i f =
let open Int64 in let idx = 1 lsl i in
let idx = shift_left 1L i in let real_idx = popcount (a.bits land (idx -1)) in
let real_idx = popcount (logand a.bits (sub idx 1L)) in if a.bits land idx = 0
if logand a.bits idx = 0L
then ( then (
(* not present *) (* not present *)
let x = f a.empty in let x = f a.empty in
(* insert at [real_idx] in a new array *) (* insert at [real_idx] in a new array *)
let bits = logor a.bits idx in let bits = a.bits lor idx in
let arr = Array.init (Array.length a.arr + 1) let n = Array.length a.arr in
(fun j -> let arr = Array.make (n+1) a.empty in
if j<real_idx then a.arr.(j) arr.(real_idx) <- x;
else if j=real_idx then x if real_idx>0
else a.arr.(j-1) then Array.blit a.arr 0 arr 0 real_idx;
) in if real_idx<n
then Array.blit a.arr real_idx arr (real_idx+1) (n-real_idx);
{a with bits; arr} {a with bits; arr}
) else ( ) else (
let x = f a.arr.(real_idx) in let x = f a.arr.(real_idx) in
@ -222,18 +224,19 @@ module A_SPARSE : FIXED_ARRAY = struct
) )
let remove ~empty:_ a i = let remove ~empty:_ a i =
let open Int64 in let idx = 1 lsl i in
let idx = shift_left 1L i in let real_idx = popcount (a.bits land (idx -1)) in
let real_idx = popcount (logand a.bits (sub idx 1L)) in if a.bits land idx = 0
if logand a.bits idx = 0L
then a (* not present *) then a (* not present *)
else ( else (
(* remove at [real_idx] *) (* remove at [real_idx] *)
let bits = logand a.bits (lognot idx) in let bits = a.bits land (lnot idx) in
let arr = Array.init (Array.length a.arr - 1) let n = Array.length a.arr in
(fun j -> let arr = Array.make (n-1) a.empty in
if j>= real_idx then a.arr.(j+1) else a.arr.(j) if real_idx > 0
) in then Array.blit a.arr 0 arr 0 real_idx;
if real_idx+1 < n
then Array.blit a.arr (real_idx+1) arr real_idx (n-real_idx-1);
{a with bits; arr} {a with bits; arr}
) )
@ -278,6 +281,7 @@ module Make(Key : KEY)
type 'a t = type 'a t =
| E | E
| S of Hash.t * key * 'a (* single pair *)
| L of Hash.t * 'a leaf (* same hash for all elements *) | L of Hash.t * 'a leaf (* same hash for all elements *)
| N of 'a leaf * 'a t A.t (* leaf for hash=0, subnodes *) | N of 'a leaf * 'a t A.t (* leaf for hash=0, subnodes *)
@ -291,6 +295,7 @@ module Make(Key : KEY)
let is_empty = function let is_empty = function
| E -> true | E -> true
| L (_, Nil) -> assert false | L (_, Nil) -> assert false
| S _
| L _ | L _
| N _ -> false | N _ -> false
@ -305,6 +310,7 @@ module Make(Key : KEY)
let rec get_exn_ k ~h m = match m with let rec get_exn_ k ~h m = match m with
| E -> raise Not_found | E -> raise Not_found
| S (_, k', v') -> if Key.equal k k' then v' else raise Not_found
| L (_, l) -> get_exn_list_ k l | L (_, l) -> get_exn_list_ k l
| N (leaf, a) -> | N (leaf, a) ->
if Hash.is_0 h then get_exn_list_ k leaf if Hash.is_0 h then get_exn_list_ k leaf
@ -335,19 +341,34 @@ module Make(Key : KEY)
(* [h]: hash, with the part required to reach this leaf removed *) (* [h]: hash, with the part required to reach this leaf removed *)
let rec add_ k v ~h m = match m with let rec add_ k v ~h m = match m with
| E -> leaf_ k v ~h | E -> S (h, k, v)
| S (h', k', v') ->
if h=h'
then if Key.equal k k'
then S (h, k, v) (* replace *)
else L (h, Cons (k, v, Cons (k', v', Nil)))
else
make_array_ ~leaf:(Cons (k', v', Nil)) ~h_leaf:h' k v ~h
| L (h', l) -> | L (h', l) ->
if h=h' if h=h'
then L (h, add_list_ k v l) then L (h, add_list_ k v l)
else (* split into N *) else (* split into N *)
make_array_ ~leaf:l ~h_leaf:h' k v ~h
| N (leaf, a) ->
if Hash.is_0 h
then N (add_list_ k v leaf, a)
else N (leaf, add_to_array_ k v ~h a)
(* make an array containing a leaf, and insert (k,v) in it *)
and make_array_ ~leaf ~h_leaf:h' k v ~h =
let a = A.create ~empty:E in let a = A.create ~empty:E in
let a, leaf = let a, leaf =
if Hash.is_0 h' then a, l if Hash.is_0 h' then a, leaf
else else
(* put leaf in the right bucket *) (* put leaf in the right bucket *)
let i = Hash.rem h' in let i = Hash.rem h' in
let h'' = Hash.quotient h' in let h'' = Hash.quotient h' in
A.set a i (L (h'', l)), Nil A.set a i (L (h'', leaf)), Nil
in in
(* then add new node *) (* then add new node *)
let a, leaf = let a, leaf =
@ -355,10 +376,6 @@ module Make(Key : KEY)
else add_to_array_ k v ~h a, leaf else add_to_array_ k v ~h a, leaf
in in
N (leaf, a) N (leaf, a)
| N (leaf, a) ->
if Hash.is_0 h
then N (add_list_ k v leaf, a)
else N (leaf, add_to_array_ k v ~h a)
(* add k->v to [a] *) (* add k->v to [a] *)
and add_to_array_ k v ~h a = and add_to_array_ k v ~h a =
@ -390,6 +407,8 @@ module Make(Key : KEY)
let rec remove_rec_ k ~h m = match m with let rec remove_rec_ k ~h m = match m with
| E -> E | E -> E
| S (_, k', _) ->
if Key.equal k k' then E else m
| L (h, l) -> | L (h, l) ->
let l = remove_list_ k l in let l = remove_list_ k l in
if is_empty_list_ l then E else L (h, l) if is_empty_list_ l then E else L (h, l)
@ -414,6 +433,7 @@ module Make(Key : KEY)
let iter f t = let iter f t =
let rec aux = function let rec aux = function
| E -> () | E -> ()
| S (_, k, v) -> f k v
| L (_,l) -> aux_list l | L (_,l) -> aux_list l
| N (l,a) -> aux_list l; A.iter aux a | N (l,a) -> aux_list l; A.iter aux a
and aux_list = function and aux_list = function
@ -425,6 +445,7 @@ module Make(Key : KEY)
let fold f acc t = let fold f acc t =
let rec aux acc t = match t with let rec aux acc t = match t with
| E -> acc | E -> acc
| S (_,k,v) -> f acc k v
| L (_,l) -> aux_list acc l | L (_,l) -> aux_list acc l
| N (l,a) -> let acc = aux_list acc l in A.fold aux acc a | N (l,a) -> let acc = aux_list acc l in A.fold aux acc a
and aux_list acc l = match l with and aux_list acc l = match l with
@ -462,6 +483,7 @@ module Make(Key : KEY)
let rec as_tree m () = match m with let rec as_tree m () = match m with
| E -> `Nil | E -> `Nil
| S (h,k,v) -> `Node (`L ((h:>int), [k,v]), [])
| L (h,l) -> `Node (`L ((h:>int), list_as_tree_ l), []) | L (h,l) -> `Node (`L ((h:>int), list_as_tree_ l), [])
| N (l,a) -> `Node (`N, as_tree (L (Hash.zero, l)) :: array_as_tree_ a) | N (l,a) -> `Node (`N, as_tree (L (Hash.zero, l)) :: array_as_tree_ a)
and list_as_tree_ l = match l with and list_as_tree_ l = match l with
@ -472,7 +494,7 @@ end
(*$R (*$R
let module M = Make(CCInt) in let module M = Make(CCInt) in
let m = M.of_list CCList.(1 -- 1000 |> map (fun i->i,i)) in let m = M.of_list CCList.( (501 -- 1000) @ (500 -- 1) |> map (fun i->i,i)) in
assert_equal ~printer:CCInt.to_string 1000 (M.cardinal m); assert_equal ~printer:CCInt.to_string 1000 (M.cardinal m);
assert_bool "check all get" assert_bool "check all get"
(Sequence.for_all (fun i -> i = M.get_exn i m) Sequence.(1 -- 1000)); (Sequence.for_all (fun i -> i = M.get_exn i m) Sequence.(1 -- 1000));

View file

@ -8,6 +8,9 @@
update and access {b if} the hash function is good. update and access {b if} the hash function is good.
The trie is not binary, to improve cache locality and decrease depth. The trie is not binary, to improve cache locality and decrease depth.
Preliminary benchmarks (see the "tbl" section of benchmarks) tend to show
that this type is quite efficient for small data sets.
{b status: experimental} {b status: experimental}
@since NEXT_RELEASE @since NEXT_RELEASE
@ -97,6 +100,6 @@ end
module Make(K : KEY) : S with type key = K.t module Make(K : KEY) : S with type key = K.t
(**/**) (**/**)
val popcount64 : int64 -> int val popcount : int -> int
module A_SPARSE : FIXED_ARRAY module A_SPARSE : FIXED_ARRAY
(**/**) (**/**)