mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-07 03:35:30 -05:00
perf(CCBV): better bitwise operations
- a 8-bit popcount - simpler logic for LSB masks
This commit is contained in:
parent
b8c93f42fa
commit
ced66a76e1
2 changed files with 78 additions and 23 deletions
|
|
@ -22,27 +22,80 @@ let[@inline] mul_ n = n lsl 3
|
||||||
|
|
||||||
let zero = Char.unsafe_chr 0
|
let zero = Char.unsafe_chr 0
|
||||||
|
|
||||||
(** We use OCamls chars to store the bits. We index them from the
|
(* 0b11111111 *)
|
||||||
least significant bit. We create masks to zero out the most significant
|
let all_ones_ = Char.unsafe_chr ((1 lsl width_) - 1)
|
||||||
bits that aren't used to store values. This is necessary when we are
|
|
||||||
constructing or negating a bit vector. *)
|
|
||||||
let[@inline] lsb_masks_ n = (1 lsl n) - 1
|
|
||||||
|
|
||||||
let all_ones_ = Char.unsafe_chr (lsb_masks_ width_)
|
let() = assert (all_ones_ = Char.chr 0b1111_1111)
|
||||||
|
|
||||||
let count_bits_ n =
|
(* [lsb_mask_ n] is [0b111111] with [n] ones. *)
|
||||||
let table = [| 0; 1; 1; 2; 1; 2; 2; 3; 1; 2; 2; 3; 2; 3; 3; 4; 1; 2; 2; 3; 2; 3; 3; 4;
|
let[@inline] __lsb_mask n = (1 lsl n) - 1
|
||||||
2; 3; 3; 4; 3; 4; 4; 5; 1; 2; 2; 3; 2; 3; 3; 4; 2; 3; 3; 4; 3; 4; 4; 5;
|
|
||||||
2; 3; 3; 4; 3; 4; 4; 5; 3; 4; 4; 5; 4; 5; 5; 6; 1; 2; 2; 3; 2; 3; 3; 4;
|
(*$= & ~printer:CCInt.to_string
|
||||||
2; 3; 3; 4; 3; 4; 4; 5; 2; 3; 3; 4; 3; 4; 4; 5; 3; 4; 4; 5; 4; 5; 5; 6;
|
0b0 (__lsb_mask 0)
|
||||||
2; 3; 3; 4; 3; 4; 4; 5; 3; 4; 4; 5; 4; 5; 5; 6; 3; 4; 4; 5; 4; 5; 5; 6;
|
0b1 (__lsb_mask 1)
|
||||||
4; 5; 5; 6; 5; 6; 6; 7; 1; 2; 2; 3; 2; 3; 3; 4; 2; 3; 3; 4; 3; 4; 4; 5;
|
0b11 (__lsb_mask 2)
|
||||||
2; 3; 3; 4; 3; 4; 4; 5; 3; 4; 4; 5; 4; 5; 5; 6; 2; 3; 3; 4; 3; 4; 4; 5;
|
0b111 (__lsb_mask 3)
|
||||||
3; 4; 4; 5; 4; 5; 5; 6; 3; 4; 4; 5; 4; 5; 5; 6; 4; 5; 5; 6; 5; 6; 6; 7;
|
0b1111 (__lsb_mask 4)
|
||||||
2; 3; 3; 4; 3; 4; 4; 5; 3; 4; 4; 5; 4; 5; 5; 6; 3; 4; 4; 5; 4; 5; 5; 6;
|
0b1_1111 (__lsb_mask 5)
|
||||||
4; 5; 5; 6; 5; 6; 6; 7; 3; 4; 4; 5; 4; 5; 5; 6; 4; 5; 5; 6; 5; 6; 6; 7;
|
0b11_1111 (__lsb_mask 6)
|
||||||
4; 5; 5; 6; 5; 6; 6; 7; 5; 6; 6; 7; 6; 7; 7; 8; |] in
|
0b111_1111 (__lsb_mask 7)
|
||||||
Array.unsafe_get table n
|
0b1111_1111 (__lsb_mask 8)
|
||||||
|
*)
|
||||||
|
|
||||||
|
(*
|
||||||
|
from https://en.wikipedia.org/wiki/Hamming_weight
|
||||||
|
|
||||||
|
//This uses fewer arithmetic operations than any other known
|
||||||
|
//implementation on machines with slow multiplication.
|
||||||
|
//It uses 17 arithmetic operations.
|
||||||
|
int popcount_2(uint64_t x) {
|
||||||
|
x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits
|
||||||
|
x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
|
||||||
|
x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits
|
||||||
|
|
||||||
|
// not necessary for int8
|
||||||
|
// x += x >> 8; //put count of each 16 bits into their lowest 8 bits
|
||||||
|
// x += x >> 16; //put count of each 32 bits into their lowest 8 bits
|
||||||
|
// x += x >> 32; //put count of each 64 bits into their lowest 8 bits
|
||||||
|
|
||||||
|
return x & 0x7f;
|
||||||
|
}
|
||||||
|
|
||||||
|
m1 = 0x5555555555555555
|
||||||
|
m2 = 0x3333333333333333
|
||||||
|
m4 = 0x0f0f0f0f0f0f0f0f
|
||||||
|
*)
|
||||||
|
let[@inline] __popcount8 (b:int) : int =
|
||||||
|
let m1 = 0x55 in
|
||||||
|
let m2 = 0x33 in
|
||||||
|
let m4 = 0x0f in
|
||||||
|
|
||||||
|
let b = b - ((b lsr 1) land m1) in
|
||||||
|
let b = (b land m2) + ((b lsr 2) land m2) in
|
||||||
|
let b = (b + (b lsr 4)) land m4 in
|
||||||
|
b land 0x7f
|
||||||
|
|
||||||
|
(*$inject
|
||||||
|
let popcount8_ref n =
|
||||||
|
let rec loop n =
|
||||||
|
if n=0 then 0
|
||||||
|
else if (n land 1) = 0 then loop (n lsr 1)
|
||||||
|
else 1 + loop (n lsr 1)
|
||||||
|
in
|
||||||
|
loop n
|
||||||
|
*)
|
||||||
|
|
||||||
|
(* test __popcount8 just to be sure. *)
|
||||||
|
(*$R
|
||||||
|
for i=0 to 255 do
|
||||||
|
let n = __popcount8 i in
|
||||||
|
let n2 = popcount8_ref i in
|
||||||
|
if n<>n2 then (
|
||||||
|
Printf.printf "bad: i=%d => %d,%d\n" i n n2;
|
||||||
|
assert false
|
||||||
|
);
|
||||||
|
done
|
||||||
|
*)
|
||||||
|
|
||||||
(* Can I access the "private" members in testing? $Q
|
(* Can I access the "private" members in testing? $Q
|
||||||
(Q.int_bound (Sys.word_size - 1)) (fun i -> count_bits_ lsb_masks_.(i) = i)
|
(Q.int_bound (Sys.word_size - 1)) (fun i -> count_bits_ lsb_masks_.(i) = i)
|
||||||
|
|
@ -71,7 +124,7 @@ let create ~size default =
|
||||||
(* adjust last bits *)
|
(* adjust last bits *)
|
||||||
let r = mod_ size in
|
let r = mod_ size in
|
||||||
if default && r <> 0 then (
|
if default && r <> 0 then (
|
||||||
Bytes.unsafe_set b (n-1) (Char.unsafe_chr (lsb_masks_ r));
|
Bytes.unsafe_set b (n-1) (Char.unsafe_chr (__lsb_mask r));
|
||||||
);
|
);
|
||||||
{ b; size }
|
{ b; size }
|
||||||
)
|
)
|
||||||
|
|
@ -102,7 +155,7 @@ let cardinal bv =
|
||||||
else (
|
else (
|
||||||
let n = ref 0 in
|
let n = ref 0 in
|
||||||
for i = 0 to Bytes.length bv.b - 1 do
|
for i = 0 to Bytes.length bv.b - 1 do
|
||||||
n := !n + count_bits_ (get_ bv.b i) (* MSB of last element are all 0 *)
|
n := !n + __popcount8 (get_ bv.b i) (* MSB of last element are all 0 *)
|
||||||
done;
|
done;
|
||||||
!n
|
!n
|
||||||
)
|
)
|
||||||
|
|
@ -420,7 +473,7 @@ let negate_self b =
|
||||||
let r = mod_ b.size in
|
let r = mod_ b.size in
|
||||||
if r <> 0 then
|
if r <> 0 then
|
||||||
let l = Bytes.length b.b - 1 in
|
let l = Bytes.length b.b - 1 in
|
||||||
unsafe_set_ b.b l (lsb_masks_ r land (unsafe_get_ b.b l))
|
unsafe_set_ b.b l (__lsb_mask r land (unsafe_get_ b.b l))
|
||||||
|
|
||||||
(*$= & ~printer:(CCFormat.to_string ppli)
|
(*$= & ~printer:(CCFormat.to_string ppli)
|
||||||
[0;3;4;6] (let v = of_list [1;2;5;7;] in negate_self v; to_sorted_list v)
|
[0;3;4;6] (let v = of_list [1;2;5;7;] in negate_self v; to_sorted_list v)
|
||||||
|
|
@ -431,7 +484,7 @@ let negate a =
|
||||||
let r = mod_ a.size in
|
let r = mod_ a.size in
|
||||||
if r <> 0 then (
|
if r <> 0 then (
|
||||||
let l = Bytes.length a.b - 1 in
|
let l = Bytes.length a.b - 1 in
|
||||||
unsafe_set_ b l (lsb_masks_ r land (unsafe_get_ b l))
|
unsafe_set_ b l (__lsb_mask r land (unsafe_get_ b l))
|
||||||
);
|
);
|
||||||
{ b ; size = a.size }
|
{ b ; size = a.size }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -152,4 +152,6 @@ val pp : Format.formatter -> t -> unit
|
||||||
|
|
||||||
(**/**)
|
(**/**)
|
||||||
val __to_word_l : t -> char list
|
val __to_word_l : t -> char list
|
||||||
|
val __popcount8 : int -> int
|
||||||
|
val __lsb_mask : int -> int
|
||||||
(**/**)
|
(**/**)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue