Size tracking implementation

This commit is contained in:
Leonid Rozenberg 2017-04-19 02:15:09 -04:00
parent b70a8d875e
commit d8a55a98b9
2 changed files with 240 additions and 111 deletions

View file

@ -1,80 +1,117 @@
(* This file is free software, part of containers. See file "license" for more details. *)
(** {2 Imperative Bitvectors} *) (** {2 Imperative Bitvectors} *)
let __width = Sys.word_size - 2 let __width = Sys.word_size - 1
(* int with [n] ones *) (** We use OCamls ints to store the bits. We index them from the
let rec __shift bv n = least significant bit. We create masks to zero out the most significant
if n = 0 bits that aren't used to store values. *)
then bv let __lsb_masks =
else __shift ((bv lsl 1) lor 1) (n-1) let a = Array.make (__width + 1) 0 in
for i = 1 to __width do
a.(i) <- a.(i-1) lor (1 lsl (i - 1))
done;
a
(* only ones *) let __all_ones = __lsb_masks.(__width)
let __all_ones = __shift 0 __width
type t = {
mutable a : int array;
}
let empty () = { a = [| |] }
let create ~size default =
if size = 0 then { a = [| |] }
else begin
let n = if size mod __width = 0 then size / __width else (size / __width) + 1 in
let arr = if default
then Array.make n __all_ones
else Array.make n 0
in
(* adjust last bits *)
if default && (size mod __width) <> 0
then arr.(n-1) <- __shift 0 (size - (n-1) * __width);
{ a = arr }
end
(*$T
create ~size:17 true |> cardinal = 17
create ~size:32 true |> cardinal= 32
create ~size:132 true |> cardinal = 132
create ~size:200 false |> cardinal = 0
create ~size:29 true |> to_sorted_list = CCList.range 0 28
*)
let copy bv = { a=Array.copy bv.a; }
(*$Q
(Q.list Q.small_int) (fun l -> \
let bv = of_list l in to_list bv = to_list (copy bv))
*)
let length bv = Array.length bv.a
let resize bv len =
if len > Array.length bv.a
then begin
let a' = Array.make len 0 in
Array.blit bv.a 0 a' 0 (Array.length bv.a);
bv.a <- a'
end
(* count the 1 bits in [n]. See https://en.wikipedia.org/wiki/Hamming_weight *) (* count the 1 bits in [n]. See https://en.wikipedia.org/wiki/Hamming_weight *)
let __count_bits n = let __count_bits n =
let rec recurse count n = let rec recurse count n =
if n = 0 then count else recurse (count+1) (n land (n-1)) if n = 0 then count else recurse (count+1) (n land (n-1))
in in
if n < 0 recurse 0 n
then recurse 1 (n lsr 1) (* only on unsigned *)
else recurse 0 n (* Can I access the "private" members in testing? $Q
(Q.int_bound (Sys.word_size - 1)) (fun i -> __count_bits __lsb_masks.(i) = i)
*)
type t = {
mutable a : int array;
mutable size : int;
}
let length t = t.size
let empty () = { a = [| |] ; size = 0 }
let __to_array_legnth size =
if size mod __width = 0 then size / __width else (size / __width) + 1
let create ~size default =
if size = 0 then { a = [| |] ; size }
else begin
let n = __to_array_legnth size in
let arr = if default
then Array.make n __all_ones
else Array.make n 0
in
(* adjust last bits *)
let r = size mod __width in
if default && r <> 0
then Array.unsafe_set arr (n-1) __lsb_masks.(r);
{ a = arr; size }
end
(*$Q
(Q.pair Q.small_int Q.bool) (fun (size, b) -> create ~size b |> length = size)
*)
(*$T
create ~size:17 true |> cardinal = 17
create ~size:32 true |> cardinal = 32
create ~size:132 true |> cardinal = 132
create ~size:200 false |> cardinal = 0
create ~size:29 true |> to_sorted_list = CCList.range 0 28
*)
let copy bv = { a = Array.copy bv.a ; size = bv.size }
(*$Q
(Q.list Q.small_int) (fun l -> \
let bv = of_list l in to_list bv = to_list (copy bv))
*)
let capacity bv = __width * Array.length bv.a
let cardinal bv = let cardinal bv =
let n = ref 0 in let n = ref 0 in
for i = 0 to length bv - 1 do for i = 0 to Array.length bv.a - 1 do
n := !n + __count_bits bv.a.(i) n := !n + __count_bits bv.a.(i)
done; done;
!n !n
(*$Q
Q.small_int (fun size -> create ~size true |> cardinal = size)
*)
let __really_resize bv ~desired ~current size =
let a' = Array.make desired 0 in
Array.blit bv.a 0 a' 0 current;
bv.a <- a';
bv.size <- size
let __grow bv size =
if size <= capacity bv (* within capacity *)
then bv.size <- size
else (* beyond capacity *)
let desired = __to_array_legnth size in
let current = Array.length bv.a in
__really_resize bv ~desired ~current size
let __shrink bv size =
let desired = __to_array_legnth size in
let current = Array.length bv.a in
__really_resize bv ~desired ~current size
let resize bv size =
if size < 0 then invalid_arg "resize: negative size" else
if size < bv.size (* shrink *)
then __shrink bv size
else if size = bv.size
then ()
else __grow bv size
(*$R (*$R
let bv1 = CCBV.create ~size:87 true in let bv1 = CCBV.create ~size:87 true in
assert_equal ~printer:string_of_int 87 (CCBV.cardinal bv1); assert_equal ~printer:string_of_int 87 (CCBV.cardinal bv1);
@ -94,12 +131,12 @@ let is_empty bv =
false false
let get bv i = let get bv i =
let n = i / __width in if i < 0 then invalid_arg "get: negative index" else
if n < Array.length bv.a let n = i / __width in
then let i = i mod __width in
let i = i - n * __width in if n < Array.length bv.a
bv.a.(n) land (1 lsl i) <> 0 then (Array.unsafe_get bv.a n) land (1 lsl i) <> 0
else false else false
(*$R (*$R
let bv = CCBV.create ~size:99 false in let bv = CCBV.create ~size:99 false in
@ -118,11 +155,11 @@ let get bv i =
*) *)
let set bv i = let set bv i =
let n = i / __width in if i < 0 then invalid_arg "set: negative index" else
if n >= Array.length bv.a let n = i / __width in
then resize bv (n+1); let j = i mod __width in
let i = i - n * __width in if i >= bv.size then __grow bv i;
bv.a.(n) <- bv.a.(n) lor (1 lsl i) Array.unsafe_set bv.a n ((Array.unsafe_get bv.a n) lor (1 lsl j))
(*$T (*$T
let bv = create ~size:3 false in set bv 0; get bv 0 let bv = create ~size:3 false in set bv 0; get bv 0
@ -130,22 +167,22 @@ let set bv i =
*) *)
let reset bv i = let reset bv i =
let n = i / __width in if i < 0 then invalid_arg "reset: negative index" else
if n >= Array.length bv.a let n = i / __width in
then resize bv (n+1); let j = i mod __width in
let i = i - n * __width in if i >= bv.size then __grow bv i;
bv.a.(n) <- bv.a.(n) land (lnot (1 lsl i)) Array.unsafe_set bv.a n ((Array.unsafe_get bv.a n) land (lnot (1 lsl j)))
(*$T (*$T
let bv = create ~size:3 false in set bv 0; reset bv 0; not (get bv 0) let bv = create ~size:3 false in set bv 0; reset bv 0; not (get bv 0)
*) *)
let flip bv i = let flip bv i =
let n = i / __width in if i < 0 then invalid_arg "reset: negative index" else
if n >= Array.length bv.a let n = i / __width in
then resize bv (n+1); let j = i mod __width in
let i = i - n * __width in if i >= bv.size then __grow bv i;
bv.a.(n) <- bv.a.(n) lxor (1 lsl i) Array.unsafe_set bv.a n ((Array.unsafe_get bv.a n) lxor (1 lsl j))
(*$R (*$R
let bv = of_list [1;10; 11; 30] in let bv = of_list [1;10; 11; 30] in
@ -163,7 +200,7 @@ let flip bv i =
*) *)
let clear bv = let clear bv =
Array.iteri (fun i _ -> bv.a.(i) <- 0) bv.a Array.fill bv.a 0 (Array.length bv.a) 0
(*$T (*$T
let bv = create ~size:37 true in cardinal bv = 37 && (clear bv; cardinal bv= 0) let bv = create ~size:37 true in cardinal bv = 37 && (clear bv; cardinal bv= 0)
@ -242,8 +279,9 @@ let to_list bv =
let to_sorted_list bv = let to_sorted_list bv =
List.rev (to_list bv) List.rev (to_list bv)
(* Interpret these as indices. *)
let of_list l = let of_list l =
let size = List.fold_left max 0 l in let size = (List.fold_left max 0 l) + 1 in
let bv = create ~size false in let bv = create ~size false in
List.iter (fun i -> set bv i) l; List.iter (fun i -> set bv i) l;
bv bv
@ -276,18 +314,61 @@ let filter bv p =
to_sorted_list bv = [2;4;6] to_sorted_list bv = [2;4;6]
*) *)
let negate_self b =
let len = Array.length b.a in
for n = 0 to len - 1 do
Array.unsafe_set b.a n (lnot (Array.unsafe_get b.a n))
done;
let r = b.size mod __width in
if r <> 0 then
let l = Array.length b.a - 1 in
Array.unsafe_set b.a l (__lsb_masks.(r) land (Array.unsafe_get b.a l))
(*$T
let v = of_list [1;2;5;7;] in negate_self v; \
cardinal v = (List.length [0;3;4;6])
*)
let negate b =
let a = Array.map (lnot) b.a in
let r = b.size mod __width in
if r <> 0 then begin
let l = Array.length b.a - 1 in
Array.unsafe_set a l (__lsb_masks.(r) land (Array.unsafe_get a l))
end;
{ a ; size = b.size }
(*$Q
Q.small_int (fun size -> create ~size false |> negate |> cardinal = size)
*)
(* Underlying size grows for union. *)
let union_into ~into bv = let union_into ~into bv =
if length into < length bv if into.size < bv.size
then resize into (length bv); then __grow into bv.size;
let len = Array.length bv.a in for i = 0 to (Array.length into.a) - 1 do
for i = 0 to len - 1 do Array.unsafe_set into.a i
into.a.(i) <- into.a.(i) lor bv.a.(i) ((Array.unsafe_get into.a i) lor (Array.unsafe_get bv.a i))
done done
let union bv1 bv2 = (* To avoid potentially 2 passes, figure out what we need to copy. *)
let bv = copy bv1 in let union b1 b2 =
union_into ~into:bv bv2; if b1.size <= b2.size
bv then begin
let into = copy b2 in
for i = 0 to (Array.length b1.a) - 1 do
Array.unsafe_set into.a i
((Array.unsafe_get into.a i) lor (Array.unsafe_get b1.a i))
done;
into
end else begin
let into = copy b1 in
for i = 0 to (Array.length b1.a) - 1 do
Array.unsafe_set into.a i
((Array.unsafe_get into.a i) lor (Array.unsafe_get b2.a i))
done;
into
end
(*$R (*$R
let bv1 = CCBV.of_list [1;2;3;4] in let bv1 = CCBV.of_list [1;2;3;4] in
@ -302,22 +383,32 @@ let union bv1 bv2 =
union (of_list [1;2;3;4;5]) (of_list [7;3;5;6]) |> to_sorted_list = CCList.range 1 7 union (of_list [1;2;3;4;5]) (of_list [7;3;5;6]) |> to_sorted_list = CCList.range 1 7
*) *)
(* Underlying size shrinks for inter. *)
let inter_into ~into bv = let inter_into ~into bv =
let n = min (length into) (length bv) in if into.size > bv.size
for i = 0 to n - 1 do then __shrink into bv.size;
into.a.(i) <- into.a.(i) land bv.a.(i) for i = 0 to (Array.length into.a) - 1 do
Array.unsafe_set into.a i
((Array.unsafe_get into.a i) land (Array.unsafe_get bv.a i))
done done
let inter bv1 bv2 = let inter b1 b2 =
if length bv1 < length bv2 if b1.size <= b2.size
then then begin
let bv = copy bv1 in let into = copy b1 in
let () = inter_into ~into:bv bv2 in for i = 0 to (Array.length b1.a) - 1 do
bv Array.unsafe_set into.a i
else ((Array.unsafe_get into.a i) land (Array.unsafe_get b2.a i))
let bv = copy bv2 in done;
let () = inter_into ~into:bv bv1 in into
bv end else begin
let into = copy b2 in
for i = 0 to (Array.length b2.a) - 1 do
Array.unsafe_set into.a i
((Array.unsafe_get into.a i) land (Array.unsafe_get b1.a i))
done;
into
end
(*$T (*$T
inter (of_list [1;2;3;4]) (of_list [2;4;6;1]) |> to_sorted_list = [1;2;4] inter (of_list [1;2;3;4]) (of_list [2;4;6;1]) |> to_sorted_list = [1;2;4]
@ -331,6 +422,20 @@ let inter bv1 bv2 =
assert_equal [3;4] l; assert_equal [3;4] l;
*) *)
(* Underlying size depends on the 'in_' set for diff, so we don't change
it's size! *)
let diff_into ~into bv =
let n = min (Array.length into.a) (Array.length bv.a) in
for i = 0 to n - 1 do
Array.unsafe_set into.a i
((Array.unsafe_get into.a i) land (lnot (Array.unsafe_get bv.a i)))
done
let diff ~in_ not_in =
let into = copy in_ in
diff_into ~into not_in;
into
let select bv arr = let select bv arr =
let l = ref [] in let l = ref [] in
begin try begin try

View file

@ -21,14 +21,19 @@ val copy : t -> t
(** Copy of bitvector *) (** Copy of bitvector *)
val cardinal : t -> int val cardinal : t -> int
(** Number of bits set *) (** Number of set bits. *)
val length : t -> int val length : t -> int
(** Length of underlying array *) (** Length of underlying bitvector. *)
val capacity : t -> int
(** The number of bits this bitvector can store without resizing. *)
val resize : t -> int -> unit val resize : t -> int -> unit
(** Resize the BV so that it has at least the given physical length (** Resize the BV so that it has the specified length. This can grow or shrink
[resize bv n] should make [bv] able to store [(Sys.word_size - 2)* n] bits *) the underlying bitvector.
@raise Invalid_arg on negative sizes. *)
val is_empty : t -> bool val is_empty : t -> bool
(** Any bit set? *) (** Any bit set? *)
@ -62,7 +67,10 @@ val to_sorted_list : t -> int list
increasing order *) increasing order *)
val of_list : int list -> t val of_list : int list -> t
(** From a list of true bits *) (** From a list of true bits.
The bits are interpreted as indices into the returned bitvector, so the final
bitvector will have [length t] equal to 1 more than max of list indices. *)
val first : t -> int val first : t -> int
(** First set bit, or (** First set bit, or
@ -72,11 +80,21 @@ val filter : t -> (int -> bool) -> unit
(** [filter bv p] only keeps the true bits of [bv] whose [index] (** [filter bv p] only keeps the true bits of [bv] whose [index]
satisfies [p index] *) satisfies [p index] *)
val negate_self : t -> unit
(** [negate_self t] flips all of the bits in [t]. *)
val negate : t -> t
(** [negate t] returns a copy of [t] with all of the bits flipped. *)
val union_into : into:t -> t -> unit val union_into : into:t -> t -> unit
(** [union ~into bv] sets [into] to the union of itself and [bv]. *) (** [union ~into bv] sets [into] to the union of itself and [bv].
Note that [into] will grow to accammodate the union. *)
val inter_into : into:t -> t -> unit val inter_into : into:t -> t -> unit
(** [inter ~into bv] sets [into] to the intersection of itself and [bv] *) (** [inter ~into bv] sets [into] to the intersection of itself and [bv]
Note that [into] will shrink to accammodate the union. *)
val union : t -> t -> t val union : t -> t -> t
(** [union bv1 bv2] returns the union of the two sets *) (** [union bv1 bv2] returns the union of the two sets *)
@ -84,6 +102,12 @@ val union : t -> t -> t
val inter : t -> t -> t val inter : t -> t -> t
(** [inter bv1 bv2] returns the intersection of the two sets *) (** [inter bv1 bv2] returns the intersection of the two sets *)
val diff_into : into:t -> t -> unit
(** [diff ~into t] Modify [into] with only the bits set but not in [t]. *)
val diff : in_:t -> t -> t
(** [diff ~in_ t] Return those bits found [in_] but not in [t]. *)
val select : t -> 'a array -> 'a list val select : t -> 'a array -> 'a list
(** [select arr bv] selects the elements of [arr] whose index (** [select arr bv] selects the elements of [arr] whose index
corresponds to a true bit in [bv]. If [bv] is too short, elements of [arr] corresponds to a true bit in [bv]. If [bv] is too short, elements of [arr]