ocaml-containers/src/data/CCBV.ml
2022-07-04 22:15:06 -04:00

433 lines
11 KiB
OCaml

let width_ = 8
(* Helper functions *)
let[@inline] get_ b i = Char.code (Bytes.get b i)
let[@inline] unsafe_get_ b i = Char.code (Bytes.unsafe_get b i)
let[@inline] set_ b i v = Bytes.set b i (Char.unsafe_chr v)
let[@inline] unsafe_set_ b i v = Bytes.unsafe_set b i (Char.unsafe_chr v)
let[@inline] mod_ n = n land 0b111
let[@inline] div_ n = n lsr 3
let[@inline] mul_ n = n lsl 3
let zero = Char.unsafe_chr 0
(* 0b11111111 *)
let all_ones_ = Char.unsafe_chr ((1 lsl width_) - 1)
let () = assert (all_ones_ = Char.chr 0b1111_1111)
(* [lsb_mask_ n] is [0b111111] with [n] ones. *)
let[@inline] __lsb_mask n = (1 lsl n) - 1
(*
from https://en.wikipedia.org/wiki/Hamming_weight
//This uses fewer arithmetic operations than any other known
//implementation on machines with slow multiplication.
//It uses 17 arithmetic operations.
int popcount_2(uint64_t x) {
x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits
x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits
// not necessary for int8
// x += x >> 8; //put count of each 16 bits into their lowest 8 bits
// x += x >> 16; //put count of each 32 bits into their lowest 8 bits
// x += x >> 32; //put count of each 64 bits into their lowest 8 bits
return x & 0x7f;
}
m1 = 0x5555555555555555
m2 = 0x3333333333333333
m4 = 0x0f0f0f0f0f0f0f0f
*)
let[@inline] __popcount8 (b : int) : int =
let m1 = 0x55 in
let m2 = 0x33 in
let m4 = 0x0f in
let b = b - ((b lsr 1) land m1) in
let b = (b land m2) + ((b lsr 2) land m2) in
let b = (b + (b lsr 4)) land m4 in
b land 0x7f
(*
invariants for [v:t]:
- [Bytes.length v.b >= div_ v.size] (enough storage)
- all bits above [size] are 0 in [v.b]
*)
type t = { mutable b: bytes; mutable size: int }
let length t = t.size
let empty () = { b = Bytes.empty; size = 0 }
let bytes_length_of_size size =
if mod_ size = 0 then
div_ size
else
div_ size + 1
let create ~size default : t =
if size = 0 then
empty ()
else (
let n = bytes_length_of_size size in
let b =
if default then
Bytes.make n all_ones_
else
Bytes.make n zero
in
(* adjust last bits *)
let r = mod_ size in
if default && r <> 0 then unsafe_set_ b (n - 1) (__lsb_mask r);
{ b; size }
)
let copy bv = { bv with b = Bytes.sub bv.b 0 (bytes_length_of_size bv.size) }
let[@inline] capacity bv = mul_ (Bytes.length bv.b)
(* call [f i width(byte[i]) (byte[i])] on each byte.
The last byte might have a width of less than 8. *)
let iter_bytes_ (b : t) ~f : unit =
for n = 0 to div_ b.size - 1 do
f (mul_ n) width_ (unsafe_get_ b.b n)
done;
let r = mod_ b.size in
if r <> 0 then (
let last = div_ b.size in
f (mul_ last) r (__lsb_mask r land unsafe_get_ b.b last)
)
(* set [byte[i]] to [f(byte[i])] *)
let map_bytes_ (b : t) ~f : unit =
for n = 0 to div_ b.size - 1 do
unsafe_set_ b.b n (f (unsafe_get_ b.b n))
done;
let r = mod_ b.size in
if r <> 0 then (
let last = div_ b.size in
let mask = __lsb_mask r in
unsafe_set_ b.b last (mask land f (mask land unsafe_get_ b.b last))
)
let cardinal bv =
if bv.size = 0 then
0
else (
let n = ref 0 in
iter_bytes_ bv ~f:(fun _ _ b -> n := !n + __popcount8 b);
!n
)
let really_resize_ bv ~desired ~current size =
bv.size <- size;
if desired <> current then (
let b = Bytes.make desired zero in
Bytes.blit bv.b 0 b 0 (min desired current);
bv.b <- b
)
(* set bits above [n] to 0 *)
let[@inline never] clear_bits_above_ bv top =
let n = div_ top in
let j = mod_ top in
Bytes.fill bv.b (n + 1)
(bytes_length_of_size bv.size - n - 1)
(Char.unsafe_chr 0);
unsafe_set_ bv.b n (unsafe_get_ bv.b n land __lsb_mask j)
let[@inline never] grow_to_at_least_real_ bv size =
(* beyond capacity *)
let current = Bytes.length bv.b in
let desired = bytes_length_of_size size in
let desired =
min Sys.max_string_length (max desired (current + (current / 2)))
in
assert (desired > current);
really_resize_ bv ~desired ~current size
let grow_to_at_least_ bv size =
if size <= capacity bv then
(* within capacity *)
bv.size <- size
else
(* resize. This is a separate function so it's easier to
inline the happy path. *)
grow_to_at_least_real_ bv size
let shrink_ bv size =
assert (size <= bv.size);
if size < bv.size then (
let desired = bytes_length_of_size size in
let current = Bytes.length bv.b in
clear_bits_above_ bv size;
really_resize_ bv ~desired ~current size
)
let resize bv size =
if size < 0 then invalid_arg "resize: negative size";
if size < bv.size then (
clear_bits_above_ bv size;
bv.size <- size
) else if size > bv.size then
grow_to_at_least_ bv size
let resize_minimize_memory bv size =
if size < 0 then invalid_arg "resize: negative size";
if size < bv.size then
shrink_ bv size
else if size > bv.size then
grow_to_at_least_ bv size
let is_empty bv =
bv.size = 0
||
try
for i = 0 to bytes_length_of_size bv.size - 1 do
if unsafe_get_ bv.b i <> 0 then raise_notrace Exit
done;
true
with Exit -> false
let[@inline] get bv i =
if i < 0 then invalid_arg "get: negative index";
let idx_bucket = div_ i in
let idx_in_byte = mod_ i in
if idx_bucket < Bytes.length bv.b then
unsafe_get_ bv.b idx_bucket land (1 lsl idx_in_byte) <> 0
else
false
let[@inline] set bv i =
if i < 0 then
invalid_arg "set: negative index"
else (
let idx_bucket = div_ i in
let idx_in_byte = mod_ i in
if i >= bv.size then grow_to_at_least_ bv (i + 1);
unsafe_set_ bv.b idx_bucket
(unsafe_get_ bv.b idx_bucket lor (1 lsl idx_in_byte))
)
let init size f : t =
let v = create ~size false in
for i = 0 to size - 1 do
if f i then set v i
done;
v
let[@inline] reset bv i =
if i < 0 then
invalid_arg "reset: negative index"
else (
let n = div_ i in
let j = mod_ i in
if i >= bv.size then grow_to_at_least_ bv (i + 1);
unsafe_set_ bv.b n (unsafe_get_ bv.b n land lnot (1 lsl j))
)
let[@inline] set_bool bv i b =
if b then
set bv i
else
reset bv i
let flip bv i =
if i < 0 then
invalid_arg "reset: negative index"
else (
let n = div_ i in
let j = mod_ i in
if i >= bv.size then grow_to_at_least_ bv (i + 1);
unsafe_set_ bv.b n (unsafe_get_ bv.b n lxor (1 lsl j))
)
let clear bv = Bytes.fill bv.b 0 (Bytes.length bv.b) zero
let clear_and_shrink bv =
clear bv;
bv.size <- 0
let equal_bytes_ size b1 b2 =
try
for i = 0 to bytes_length_of_size size - 1 do
if Bytes.get b1 i <> Bytes.get b2 i then raise_notrace Exit
done;
true
with Exit -> false
let equal x y : bool = x.size = y.size && equal_bytes_ x.size x.b y.b
let iter bv f =
iter_bytes_ bv ~f:(fun off width_n word_n ->
for i = 0 to width_n - 1 do
f (off + i) (word_n land (1 lsl i) <> 0)
done)
let iter_true bv f =
iter bv (fun i b ->
if b then
f i
else
())
let to_list bv =
let l = ref [] in
iter_true bv (fun i -> l := i :: !l);
!l
let to_sorted_list bv = List.rev (to_list bv)
(* Interpret these as indices. *)
let of_list l =
let size =
match l with
| [] -> 0
| _ -> List.fold_left max 0 l + 1
in
let bv = create ~size false in
List.iter (fun i -> set bv i) l;
bv
exception FoundFirst of int
let first_exn bv =
try
iter_true bv (fun i -> raise_notrace (FoundFirst i));
raise Not_found
with FoundFirst i -> i
let first bv = try Some (first_exn bv) with Not_found -> None
let filter bv p = iter_true bv (fun i -> if not (p i) then reset bv i)
let negate_self bv = map_bytes_ bv ~f:(fun b -> lnot b)
let negate a =
let b = copy a in
negate_self b;
b
let union_into_no_resize_ ~into bv =
assert (Bytes.length into.b >= bytes_length_of_size bv.size);
for i = 0 to bytes_length_of_size bv.size - 1 do
unsafe_set_ into.b i (unsafe_get_ into.b i lor unsafe_get_ bv.b i)
done
(* Underlying size grows for union. *)
let union_into ~into bv =
if into.size < bv.size then grow_to_at_least_ into bv.size;
union_into_no_resize_ ~into bv
(* To avoid potentially 2 passes, figure out what we need to copy. *)
let union b1 b2 =
if b1.size <= b2.size then (
let into = copy b2 in
union_into_no_resize_ ~into b1;
into
) else (
let into = copy b1 in
union_into_no_resize_ ~into b2;
into
)
let inter_into_no_resize_ ~into bv =
assert (into.size <= bv.size);
for i = 0 to bytes_length_of_size into.size - 1 do
unsafe_set_ into.b i (unsafe_get_ into.b i land unsafe_get_ bv.b i)
done
(* Underlying size shrinks for inter. *)
let inter_into ~into bv =
if into.size > bv.size then shrink_ into bv.size;
inter_into_no_resize_ ~into bv
let inter b1 b2 =
if b1.size <= b2.size then (
let into = copy b1 in
inter_into_no_resize_ ~into b2;
into
) else (
let into = copy b2 in
inter_into_no_resize_ ~into b1;
into
)
(* Underlying size depends on the [in_] set for diff, so we don't change
its size! *)
let diff_into ~into bv =
let n = min (Bytes.length into.b) (Bytes.length bv.b) in
for i = 0 to n - 1 do
unsafe_set_ into.b i (unsafe_get_ into.b i land lnot (unsafe_get_ bv.b i))
done
let diff in_ not_in =
let into = copy in_ in
diff_into ~into not_in;
into
let select bv arr =
let l = ref [] in
(try
iter_true bv (fun i ->
if i >= Array.length arr then
raise_notrace Exit
else
l := arr.(i) :: !l)
with Exit -> ());
!l
let selecti bv arr =
let l = ref [] in
(try
iter_true bv (fun i ->
if i >= Array.length arr then
raise_notrace Exit
else
l := (arr.(i), i) :: !l)
with Exit -> ());
!l
type 'a iter = ('a -> unit) -> unit
let to_iter bv k = iter_true bv k
let of_iter seq =
let l = ref [] and maxi = ref 0 in
seq (fun x ->
l := x :: !l;
maxi := max !maxi x);
let bv = create ~size:(!maxi + 1) false in
List.iter (fun i -> set bv i) !l;
bv
let pp out bv =
Format.pp_print_string out "bv {";
iter bv (fun _i b ->
Format.pp_print_char out
(if b then
'1'
else
'0'));
Format.pp_print_string out "}"
module Internal_ = struct
let __to_word_l bv =
let l = ref [] in
Bytes.iter (fun c -> l := c :: !l) bv.b;
List.rev !l
let __popcount8 = __popcount8
let __lsb_mask = __lsb_mask
let __check_invariant self =
let n = div_ self.size in
let j = mod_ self.size in
assert (Bytes.length self.b >= n);
if j > 0 then
assert (
let c = get_ self.b n in
c land __lsb_mask j = c);
for i = n + 1 to Bytes.length self.b - 1 do
assert (get_ self.b i = 0)
done
end