perf(CCBV): better bitwise operations

- a 8-bit popcount
- simpler logic for LSB masks
This commit is contained in:
Simon Cruanes 2022-01-02 21:45:26 -05:00
parent b8c93f42fa
commit ced66a76e1
No known key found for this signature in database
GPG key ID: 4AC01D0849AA62B6
2 changed files with 78 additions and 23 deletions

View file

@ -22,27 +22,80 @@ let[@inline] mul_ n = n lsl 3
let zero = Char.unsafe_chr 0 let zero = Char.unsafe_chr 0
(** We use OCamls chars to store the bits. We index them from the (* 0b11111111 *)
least significant bit. We create masks to zero out the most significant let all_ones_ = Char.unsafe_chr ((1 lsl width_) - 1)
bits that aren't used to store values. This is necessary when we are
constructing or negating a bit vector. *)
let[@inline] lsb_masks_ n = (1 lsl n) - 1
let all_ones_ = Char.unsafe_chr (lsb_masks_ width_) let() = assert (all_ones_ = Char.chr 0b1111_1111)
let count_bits_ n = (* [lsb_mask_ n] is [0b111111] with [n] ones. *)
let table = [| 0; 1; 1; 2; 1; 2; 2; 3; 1; 2; 2; 3; 2; 3; 3; 4; 1; 2; 2; 3; 2; 3; 3; 4; let[@inline] __lsb_mask n = (1 lsl n) - 1
2; 3; 3; 4; 3; 4; 4; 5; 1; 2; 2; 3; 2; 3; 3; 4; 2; 3; 3; 4; 3; 4; 4; 5;
2; 3; 3; 4; 3; 4; 4; 5; 3; 4; 4; 5; 4; 5; 5; 6; 1; 2; 2; 3; 2; 3; 3; 4; (*$= & ~printer:CCInt.to_string
2; 3; 3; 4; 3; 4; 4; 5; 2; 3; 3; 4; 3; 4; 4; 5; 3; 4; 4; 5; 4; 5; 5; 6; 0b0 (__lsb_mask 0)
2; 3; 3; 4; 3; 4; 4; 5; 3; 4; 4; 5; 4; 5; 5; 6; 3; 4; 4; 5; 4; 5; 5; 6; 0b1 (__lsb_mask 1)
4; 5; 5; 6; 5; 6; 6; 7; 1; 2; 2; 3; 2; 3; 3; 4; 2; 3; 3; 4; 3; 4; 4; 5; 0b11 (__lsb_mask 2)
2; 3; 3; 4; 3; 4; 4; 5; 3; 4; 4; 5; 4; 5; 5; 6; 2; 3; 3; 4; 3; 4; 4; 5; 0b111 (__lsb_mask 3)
3; 4; 4; 5; 4; 5; 5; 6; 3; 4; 4; 5; 4; 5; 5; 6; 4; 5; 5; 6; 5; 6; 6; 7; 0b1111 (__lsb_mask 4)
2; 3; 3; 4; 3; 4; 4; 5; 3; 4; 4; 5; 4; 5; 5; 6; 3; 4; 4; 5; 4; 5; 5; 6; 0b1_1111 (__lsb_mask 5)
4; 5; 5; 6; 5; 6; 6; 7; 3; 4; 4; 5; 4; 5; 5; 6; 4; 5; 5; 6; 5; 6; 6; 7; 0b11_1111 (__lsb_mask 6)
4; 5; 5; 6; 5; 6; 6; 7; 5; 6; 6; 7; 6; 7; 7; 8; |] in 0b111_1111 (__lsb_mask 7)
Array.unsafe_get table n 0b1111_1111 (__lsb_mask 8)
*)
(*
from https://en.wikipedia.org/wiki/Hamming_weight
//This uses fewer arithmetic operations than any other known
//implementation on machines with slow multiplication.
//It uses 17 arithmetic operations.
int popcount_2(uint64_t x) {
x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits
x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits
// not necessary for int8
// x += x >> 8; //put count of each 16 bits into their lowest 8 bits
// x += x >> 16; //put count of each 32 bits into their lowest 8 bits
// x += x >> 32; //put count of each 64 bits into their lowest 8 bits
return x & 0x7f;
}
m1 = 0x5555555555555555
m2 = 0x3333333333333333
m4 = 0x0f0f0f0f0f0f0f0f
*)
let[@inline] __popcount8 (b:int) : int =
let m1 = 0x55 in
let m2 = 0x33 in
let m4 = 0x0f in
let b = b - ((b lsr 1) land m1) in
let b = (b land m2) + ((b lsr 2) land m2) in
let b = (b + (b lsr 4)) land m4 in
b land 0x7f
(*$inject
let popcount8_ref n =
let rec loop n =
if n=0 then 0
else if (n land 1) = 0 then loop (n lsr 1)
else 1 + loop (n lsr 1)
in
loop n
*)
(* test __popcount8 just to be sure. *)
(*$R
for i=0 to 255 do
let n = __popcount8 i in
let n2 = popcount8_ref i in
if n<>n2 then (
Printf.printf "bad: i=%d => %d,%d\n" i n n2;
assert false
);
done
*)
(* Can I access the "private" members in testing? $Q (* Can I access the "private" members in testing? $Q
(Q.int_bound (Sys.word_size - 1)) (fun i -> count_bits_ lsb_masks_.(i) = i) (Q.int_bound (Sys.word_size - 1)) (fun i -> count_bits_ lsb_masks_.(i) = i)
@ -71,7 +124,7 @@ let create ~size default =
(* adjust last bits *) (* adjust last bits *)
let r = mod_ size in let r = mod_ size in
if default && r <> 0 then ( if default && r <> 0 then (
Bytes.unsafe_set b (n-1) (Char.unsafe_chr (lsb_masks_ r)); Bytes.unsafe_set b (n-1) (Char.unsafe_chr (__lsb_mask r));
); );
{ b; size } { b; size }
) )
@ -102,7 +155,7 @@ let cardinal bv =
else ( else (
let n = ref 0 in let n = ref 0 in
for i = 0 to Bytes.length bv.b - 1 do for i = 0 to Bytes.length bv.b - 1 do
n := !n + count_bits_ (get_ bv.b i) (* MSB of last element are all 0 *) n := !n + __popcount8 (get_ bv.b i) (* MSB of last element are all 0 *)
done; done;
!n !n
) )
@ -420,7 +473,7 @@ let negate_self b =
let r = mod_ b.size in let r = mod_ b.size in
if r <> 0 then if r <> 0 then
let l = Bytes.length b.b - 1 in let l = Bytes.length b.b - 1 in
unsafe_set_ b.b l (lsb_masks_ r land (unsafe_get_ b.b l)) unsafe_set_ b.b l (__lsb_mask r land (unsafe_get_ b.b l))
(*$= & ~printer:(CCFormat.to_string ppli) (*$= & ~printer:(CCFormat.to_string ppli)
[0;3;4;6] (let v = of_list [1;2;5;7;] in negate_self v; to_sorted_list v) [0;3;4;6] (let v = of_list [1;2;5;7;] in negate_self v; to_sorted_list v)
@ -431,7 +484,7 @@ let negate a =
let r = mod_ a.size in let r = mod_ a.size in
if r <> 0 then ( if r <> 0 then (
let l = Bytes.length a.b - 1 in let l = Bytes.length a.b - 1 in
unsafe_set_ b l (lsb_masks_ r land (unsafe_get_ b l)) unsafe_set_ b l (__lsb_mask r land (unsafe_get_ b l))
); );
{ b ; size = a.size } { b ; size = a.size }

View file

@ -152,4 +152,6 @@ val pp : Format.formatter -> t -> unit
(**/**) (**/**)
val __to_word_l : t -> char list val __to_word_l : t -> char list
val __popcount8 : int -> int
val __lsb_mask : int -> int
(**/**) (**/**)