mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 03:05:28 -05:00
433 lines
11 KiB
OCaml
433 lines
11 KiB
OCaml
let width_ = 8
|
|
|
|
(* Helper functions *)
|
|
let[@inline] get_ b i = Char.code (Bytes.get b i)
|
|
let[@inline] unsafe_get_ b i = Char.code (Bytes.unsafe_get b i)
|
|
let[@inline] set_ b i v = Bytes.set b i (Char.unsafe_chr v)
|
|
let[@inline] unsafe_set_ b i v = Bytes.unsafe_set b i (Char.unsafe_chr v)
|
|
let[@inline] mod_ n = n land 0b111
|
|
let[@inline] div_ n = n lsr 3
|
|
let[@inline] mul_ n = n lsl 3
|
|
let zero = Char.unsafe_chr 0
|
|
|
|
(* 0b11111111 *)
|
|
let all_ones_ = Char.unsafe_chr ((1 lsl width_) - 1)
|
|
let () = assert (all_ones_ = Char.chr 0b1111_1111)
|
|
|
|
(* [lsb_mask_ n] is [0b111111] with [n] ones. *)
|
|
let[@inline] __lsb_mask n = (1 lsl n) - 1
|
|
|
|
(*
|
|
from https://en.wikipedia.org/wiki/Hamming_weight
|
|
|
|
//This uses fewer arithmetic operations than any other known
|
|
//implementation on machines with slow multiplication.
|
|
//It uses 17 arithmetic operations.
|
|
int popcount_2(uint64_t x) {
|
|
x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits
|
|
x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
|
|
x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits
|
|
|
|
// not necessary for int8
|
|
// x += x >> 8; //put count of each 16 bits into their lowest 8 bits
|
|
// x += x >> 16; //put count of each 32 bits into their lowest 8 bits
|
|
// x += x >> 32; //put count of each 64 bits into their lowest 8 bits
|
|
|
|
return x & 0x7f;
|
|
}
|
|
|
|
m1 = 0x5555555555555555
|
|
m2 = 0x3333333333333333
|
|
m4 = 0x0f0f0f0f0f0f0f0f
|
|
*)
|
|
let[@inline] __popcount8 (b : int) : int =
|
|
let m1 = 0x55 in
|
|
let m2 = 0x33 in
|
|
let m4 = 0x0f in
|
|
|
|
let b = b - ((b lsr 1) land m1) in
|
|
let b = (b land m2) + ((b lsr 2) land m2) in
|
|
let b = (b + (b lsr 4)) land m4 in
|
|
b land 0x7f
|
|
|
|
(*
|
|
invariants for [v:t]:
|
|
|
|
- [Bytes.length v.b >= div_ v.size] (enough storage)
|
|
- all bits above [size] are 0 in [v.b]
|
|
*)
|
|
type t = { mutable b: bytes; mutable size: int }
|
|
|
|
let length t = t.size
|
|
let empty () = { b = Bytes.empty; size = 0 }
|
|
|
|
let bytes_length_of_size size =
|
|
if mod_ size = 0 then
|
|
div_ size
|
|
else
|
|
div_ size + 1
|
|
|
|
let create ~size default : t =
|
|
if size = 0 then
|
|
empty ()
|
|
else (
|
|
let n = bytes_length_of_size size in
|
|
let b =
|
|
if default then
|
|
Bytes.make n all_ones_
|
|
else
|
|
Bytes.make n zero
|
|
in
|
|
(* adjust last bits *)
|
|
let r = mod_ size in
|
|
if default && r <> 0 then unsafe_set_ b (n - 1) (__lsb_mask r);
|
|
{ b; size }
|
|
)
|
|
|
|
let copy bv = { bv with b = Bytes.sub bv.b 0 (bytes_length_of_size bv.size) }
|
|
let[@inline] capacity bv = mul_ (Bytes.length bv.b)
|
|
|
|
(* call [f i width(byte[i]) (byte[i])] on each byte.
|
|
The last byte might have a width of less than 8. *)
|
|
let iter_bytes_ (b : t) ~f : unit =
|
|
for n = 0 to div_ b.size - 1 do
|
|
f (mul_ n) width_ (unsafe_get_ b.b n)
|
|
done;
|
|
let r = mod_ b.size in
|
|
if r <> 0 then (
|
|
let last = div_ b.size in
|
|
f (mul_ last) r (__lsb_mask r land unsafe_get_ b.b last)
|
|
)
|
|
|
|
(* set [byte[i]] to [f(byte[i])] *)
|
|
let map_bytes_ (b : t) ~f : unit =
|
|
for n = 0 to div_ b.size - 1 do
|
|
unsafe_set_ b.b n (f (unsafe_get_ b.b n))
|
|
done;
|
|
let r = mod_ b.size in
|
|
if r <> 0 then (
|
|
let last = div_ b.size in
|
|
let mask = __lsb_mask r in
|
|
unsafe_set_ b.b last (mask land f (mask land unsafe_get_ b.b last))
|
|
)
|
|
|
|
let cardinal bv =
|
|
if bv.size = 0 then
|
|
0
|
|
else (
|
|
let n = ref 0 in
|
|
iter_bytes_ bv ~f:(fun _ _ b -> n := !n + __popcount8 b);
|
|
!n
|
|
)
|
|
|
|
let really_resize_ bv ~desired ~current size =
|
|
bv.size <- size;
|
|
if desired <> current then (
|
|
let b = Bytes.make desired zero in
|
|
Bytes.blit bv.b 0 b 0 (min desired current);
|
|
bv.b <- b
|
|
)
|
|
|
|
(* set bits above [n] to 0 *)
|
|
let[@inline never] clear_bits_above_ bv top =
|
|
let n = div_ top in
|
|
let j = mod_ top in
|
|
Bytes.fill bv.b (n + 1)
|
|
(bytes_length_of_size bv.size - n - 1)
|
|
(Char.unsafe_chr 0);
|
|
unsafe_set_ bv.b n (unsafe_get_ bv.b n land __lsb_mask j)
|
|
|
|
let[@inline never] grow_to_at_least_real_ bv size =
|
|
(* beyond capacity *)
|
|
let current = Bytes.length bv.b in
|
|
let desired = bytes_length_of_size size in
|
|
let desired =
|
|
min Sys.max_string_length (max desired (current + (current / 2)))
|
|
in
|
|
assert (desired > current);
|
|
really_resize_ bv ~desired ~current size
|
|
|
|
let grow_to_at_least_ bv size =
|
|
if size <= capacity bv then
|
|
(* within capacity *)
|
|
bv.size <- size
|
|
else
|
|
(* resize. This is a separate function so it's easier to
|
|
inline the happy path. *)
|
|
grow_to_at_least_real_ bv size
|
|
|
|
let shrink_ bv size =
|
|
assert (size <= bv.size);
|
|
if size < bv.size then (
|
|
let desired = bytes_length_of_size size in
|
|
let current = Bytes.length bv.b in
|
|
clear_bits_above_ bv size;
|
|
really_resize_ bv ~desired ~current size
|
|
)
|
|
|
|
let resize bv size =
|
|
if size < 0 then invalid_arg "resize: negative size";
|
|
if size < bv.size then (
|
|
clear_bits_above_ bv size;
|
|
bv.size <- size
|
|
) else if size > bv.size then
|
|
grow_to_at_least_ bv size
|
|
|
|
let resize_minimize_memory bv size =
|
|
if size < 0 then invalid_arg "resize: negative size";
|
|
if size < bv.size then
|
|
shrink_ bv size
|
|
else if size > bv.size then
|
|
grow_to_at_least_ bv size
|
|
|
|
let is_empty bv =
|
|
bv.size = 0
|
|
||
|
|
try
|
|
for i = 0 to bytes_length_of_size bv.size - 1 do
|
|
if unsafe_get_ bv.b i <> 0 then raise_notrace Exit
|
|
done;
|
|
true
|
|
with Exit -> false
|
|
|
|
let[@inline] get bv i =
|
|
if i < 0 then invalid_arg "get: negative index";
|
|
let idx_bucket = div_ i in
|
|
let idx_in_byte = mod_ i in
|
|
if idx_bucket < Bytes.length bv.b then
|
|
unsafe_get_ bv.b idx_bucket land (1 lsl idx_in_byte) <> 0
|
|
else
|
|
false
|
|
|
|
let[@inline] set bv i =
|
|
if i < 0 then
|
|
invalid_arg "set: negative index"
|
|
else (
|
|
let idx_bucket = div_ i in
|
|
let idx_in_byte = mod_ i in
|
|
if i >= bv.size then grow_to_at_least_ bv (i + 1);
|
|
unsafe_set_ bv.b idx_bucket
|
|
(unsafe_get_ bv.b idx_bucket lor (1 lsl idx_in_byte))
|
|
)
|
|
|
|
let init size f : t =
|
|
let v = create ~size false in
|
|
for i = 0 to size - 1 do
|
|
if f i then set v i
|
|
done;
|
|
v
|
|
|
|
let[@inline] reset bv i =
|
|
if i < 0 then
|
|
invalid_arg "reset: negative index"
|
|
else (
|
|
let n = div_ i in
|
|
let j = mod_ i in
|
|
if i >= bv.size then grow_to_at_least_ bv (i + 1);
|
|
unsafe_set_ bv.b n (unsafe_get_ bv.b n land lnot (1 lsl j))
|
|
)
|
|
|
|
let[@inline] set_bool bv i b =
|
|
if b then
|
|
set bv i
|
|
else
|
|
reset bv i
|
|
|
|
let flip bv i =
|
|
if i < 0 then
|
|
invalid_arg "reset: negative index"
|
|
else (
|
|
let n = div_ i in
|
|
let j = mod_ i in
|
|
if i >= bv.size then grow_to_at_least_ bv (i + 1);
|
|
unsafe_set_ bv.b n (unsafe_get_ bv.b n lxor (1 lsl j))
|
|
)
|
|
|
|
let clear bv = Bytes.fill bv.b 0 (Bytes.length bv.b) zero
|
|
|
|
let clear_and_shrink bv =
|
|
clear bv;
|
|
bv.size <- 0
|
|
|
|
let equal_bytes_ size b1 b2 =
|
|
try
|
|
for i = 0 to bytes_length_of_size size - 1 do
|
|
if Bytes.get b1 i <> Bytes.get b2 i then raise_notrace Exit
|
|
done;
|
|
true
|
|
with Exit -> false
|
|
|
|
let equal x y : bool = x.size = y.size && equal_bytes_ x.size x.b y.b
|
|
|
|
let iter bv f =
|
|
iter_bytes_ bv ~f:(fun off width_n word_n ->
|
|
for i = 0 to width_n - 1 do
|
|
f (off + i) (word_n land (1 lsl i) <> 0)
|
|
done)
|
|
|
|
let iter_true bv f =
|
|
iter bv (fun i b ->
|
|
if b then
|
|
f i
|
|
else
|
|
())
|
|
|
|
let to_list bv =
|
|
let l = ref [] in
|
|
iter_true bv (fun i -> l := i :: !l);
|
|
!l
|
|
|
|
let to_sorted_list bv = List.rev (to_list bv)
|
|
|
|
(* Interpret these as indices. *)
|
|
let of_list l =
|
|
let size =
|
|
match l with
|
|
| [] -> 0
|
|
| _ -> List.fold_left max 0 l + 1
|
|
in
|
|
let bv = create ~size false in
|
|
List.iter (fun i -> set bv i) l;
|
|
bv
|
|
|
|
exception FoundFirst of int
|
|
|
|
let first_exn bv =
|
|
try
|
|
iter_true bv (fun i -> raise_notrace (FoundFirst i));
|
|
raise Not_found
|
|
with FoundFirst i -> i
|
|
|
|
let first bv = try Some (first_exn bv) with Not_found -> None
|
|
let filter bv p = iter_true bv (fun i -> if not (p i) then reset bv i)
|
|
let negate_self bv = map_bytes_ bv ~f:(fun b -> lnot b)
|
|
|
|
let negate a =
|
|
let b = copy a in
|
|
negate_self b;
|
|
b
|
|
|
|
let union_into_no_resize_ ~into bv =
|
|
assert (Bytes.length into.b >= bytes_length_of_size bv.size);
|
|
for i = 0 to bytes_length_of_size bv.size - 1 do
|
|
unsafe_set_ into.b i (unsafe_get_ into.b i lor unsafe_get_ bv.b i)
|
|
done
|
|
|
|
(* Underlying size grows for union. *)
|
|
let union_into ~into bv =
|
|
if into.size < bv.size then grow_to_at_least_ into bv.size;
|
|
union_into_no_resize_ ~into bv
|
|
|
|
(* To avoid potentially 2 passes, figure out what we need to copy. *)
|
|
let union b1 b2 =
|
|
if b1.size <= b2.size then (
|
|
let into = copy b2 in
|
|
union_into_no_resize_ ~into b1;
|
|
into
|
|
) else (
|
|
let into = copy b1 in
|
|
union_into_no_resize_ ~into b2;
|
|
into
|
|
)
|
|
|
|
let inter_into_no_resize_ ~into bv =
|
|
assert (into.size <= bv.size);
|
|
for i = 0 to bytes_length_of_size into.size - 1 do
|
|
unsafe_set_ into.b i (unsafe_get_ into.b i land unsafe_get_ bv.b i)
|
|
done
|
|
|
|
(* Underlying size shrinks for inter. *)
|
|
let inter_into ~into bv =
|
|
if into.size > bv.size then shrink_ into bv.size;
|
|
inter_into_no_resize_ ~into bv
|
|
|
|
let inter b1 b2 =
|
|
if b1.size <= b2.size then (
|
|
let into = copy b1 in
|
|
inter_into_no_resize_ ~into b2;
|
|
into
|
|
) else (
|
|
let into = copy b2 in
|
|
inter_into_no_resize_ ~into b1;
|
|
into
|
|
)
|
|
|
|
(* Underlying size depends on the [in_] set for diff, so we don't change
|
|
its size! *)
|
|
let diff_into ~into bv =
|
|
let n = min (Bytes.length into.b) (Bytes.length bv.b) in
|
|
for i = 0 to n - 1 do
|
|
unsafe_set_ into.b i (unsafe_get_ into.b i land lnot (unsafe_get_ bv.b i))
|
|
done
|
|
|
|
let diff in_ not_in =
|
|
let into = copy in_ in
|
|
diff_into ~into not_in;
|
|
into
|
|
|
|
let select bv arr =
|
|
let l = ref [] in
|
|
(try
|
|
iter_true bv (fun i ->
|
|
if i >= Array.length arr then
|
|
raise_notrace Exit
|
|
else
|
|
l := arr.(i) :: !l)
|
|
with Exit -> ());
|
|
!l
|
|
|
|
let selecti bv arr =
|
|
let l = ref [] in
|
|
(try
|
|
iter_true bv (fun i ->
|
|
if i >= Array.length arr then
|
|
raise_notrace Exit
|
|
else
|
|
l := (arr.(i), i) :: !l)
|
|
with Exit -> ());
|
|
!l
|
|
|
|
type 'a iter = ('a -> unit) -> unit
|
|
|
|
let to_iter bv k = iter_true bv k
|
|
|
|
let of_iter seq =
|
|
let l = ref [] and maxi = ref 0 in
|
|
seq (fun x ->
|
|
l := x :: !l;
|
|
maxi := max !maxi x);
|
|
let bv = create ~size:(!maxi + 1) false in
|
|
List.iter (fun i -> set bv i) !l;
|
|
bv
|
|
|
|
let pp out bv =
|
|
Format.pp_print_string out "bv {";
|
|
iter bv (fun _i b ->
|
|
Format.pp_print_char out
|
|
(if b then
|
|
'1'
|
|
else
|
|
'0'));
|
|
Format.pp_print_string out "}"
|
|
|
|
module Internal_ = struct
|
|
let __to_word_l bv =
|
|
let l = ref [] in
|
|
Bytes.iter (fun c -> l := c :: !l) bv.b;
|
|
List.rev !l
|
|
|
|
let __popcount8 = __popcount8
|
|
let __lsb_mask = __lsb_mask
|
|
|
|
let __check_invariant self =
|
|
let n = div_ self.size in
|
|
let j = mod_ self.size in
|
|
assert (Bytes.length self.b >= n);
|
|
if j > 0 then
|
|
assert (
|
|
let c = get_ self.b n in
|
|
c land __lsb_mask j = c);
|
|
for i = n + 1 to Bytes.length self.b - 1 do
|
|
assert (get_ self.b i = 0)
|
|
done
|
|
end
|