From d8a55a98b9ef052d3c7cff150c78ed40a79cab33 Mon Sep 17 00:00:00 2001 From: Leonid Rozenberg Date: Wed, 19 Apr 2017 02:15:09 -0400 Subject: [PATCH] Size tracking implementation --- src/data/CCBV.ml | 313 +++++++++++++++++++++++++++++++--------------- src/data/CCBV.mli | 38 ++++-- 2 files changed, 240 insertions(+), 111 deletions(-) diff --git a/src/data/CCBV.ml b/src/data/CCBV.ml index f00d69f2..144661d4 100644 --- a/src/data/CCBV.ml +++ b/src/data/CCBV.ml @@ -1,80 +1,117 @@ -(* This file is free software, part of containers. See file "license" for more details. *) - (** {2 Imperative Bitvectors} *) -let __width = Sys.word_size - 2 +let __width = Sys.word_size - 1 -(* int with [n] ones *) -let rec __shift bv n = - if n = 0 - then bv - else __shift ((bv lsl 1) lor 1) (n-1) +(** We use OCamls ints to store the bits. We index them from the + least significant bit. We create masks to zero out the most significant + bits that aren't used to store values. *) +let __lsb_masks = + let a = Array.make (__width + 1) 0 in + for i = 1 to __width do + a.(i) <- a.(i-1) lor (1 lsl (i - 1)) + done; + a -(* only ones *) -let __all_ones = __shift 0 __width - -type t = { - mutable a : int array; -} - -let empty () = { a = [| |] } - -let create ~size default = - if size = 0 then { a = [| |] } - else begin - let n = if size mod __width = 0 then size / __width else (size / __width) + 1 in - let arr = if default - then Array.make n __all_ones - else Array.make n 0 - in - (* adjust last bits *) - if default && (size mod __width) <> 0 - then arr.(n-1) <- __shift 0 (size - (n-1) * __width); - { a = arr } - end - -(*$T - create ~size:17 true |> cardinal = 17 - create ~size:32 true |> cardinal= 32 - create ~size:132 true |> cardinal = 132 - create ~size:200 false |> cardinal = 0 - create ~size:29 true |> to_sorted_list = CCList.range 0 28 -*) - -let copy bv = { a=Array.copy bv.a; } - -(*$Q - (Q.list Q.small_int) (fun l -> \ - let bv = of_list l in to_list bv = to_list (copy bv)) -*) - -let length bv = Array.length bv.a - -let resize bv len = - if len > Array.length bv.a - then begin - let a' = Array.make len 0 in - Array.blit bv.a 0 a' 0 (Array.length bv.a); - bv.a <- a' - end +let __all_ones = __lsb_masks.(__width) (* count the 1 bits in [n]. See https://en.wikipedia.org/wiki/Hamming_weight *) let __count_bits n = let rec recurse count n = if n = 0 then count else recurse (count+1) (n land (n-1)) in - if n < 0 - then recurse 1 (n lsr 1) (* only on unsigned *) - else recurse 0 n + recurse 0 n + +(* Can I access the "private" members in testing? $Q + (Q.int_bound (Sys.word_size - 1)) (fun i -> __count_bits __lsb_masks.(i) = i) +*) + +type t = { + mutable a : int array; + mutable size : int; +} + +let length t = t.size + +let empty () = { a = [| |] ; size = 0 } + +let __to_array_legnth size = + if size mod __width = 0 then size / __width else (size / __width) + 1 + +let create ~size default = + if size = 0 then { a = [| |] ; size } + else begin + let n = __to_array_legnth size in + let arr = if default + then Array.make n __all_ones + else Array.make n 0 + in + (* adjust last bits *) + let r = size mod __width in + if default && r <> 0 + then Array.unsafe_set arr (n-1) __lsb_masks.(r); + { a = arr; size } + end + +(*$Q + (Q.pair Q.small_int Q.bool) (fun (size, b) -> create ~size b |> length = size) +*) + +(*$T + create ~size:17 true |> cardinal = 17 + create ~size:32 true |> cardinal = 32 + create ~size:132 true |> cardinal = 132 + create ~size:200 false |> cardinal = 0 + create ~size:29 true |> to_sorted_list = CCList.range 0 28 +*) + +let copy bv = { a = Array.copy bv.a ; size = bv.size } + +(*$Q + (Q.list Q.small_int) (fun l -> \ + let bv = of_list l in to_list bv = to_list (copy bv)) +*) + +let capacity bv = __width * Array.length bv.a let cardinal bv = let n = ref 0 in - for i = 0 to length bv - 1 do + for i = 0 to Array.length bv.a - 1 do n := !n + __count_bits bv.a.(i) done; !n +(*$Q + Q.small_int (fun size -> create ~size true |> cardinal = size) +*) + +let __really_resize bv ~desired ~current size = + let a' = Array.make desired 0 in + Array.blit bv.a 0 a' 0 current; + bv.a <- a'; + bv.size <- size + +let __grow bv size = + if size <= capacity bv (* within capacity *) + then bv.size <- size + else (* beyond capacity *) + let desired = __to_array_legnth size in + let current = Array.length bv.a in + __really_resize bv ~desired ~current size + +let __shrink bv size = + let desired = __to_array_legnth size in + let current = Array.length bv.a in + __really_resize bv ~desired ~current size + +let resize bv size = + if size < 0 then invalid_arg "resize: negative size" else + if size < bv.size (* shrink *) + then __shrink bv size + else if size = bv.size + then () + else __grow bv size + (*$R let bv1 = CCBV.create ~size:87 true in assert_equal ~printer:string_of_int 87 (CCBV.cardinal bv1); @@ -94,12 +131,12 @@ let is_empty bv = false let get bv i = - let n = i / __width in - if n < Array.length bv.a - then - let i = i - n * __width in - bv.a.(n) land (1 lsl i) <> 0 - else false + if i < 0 then invalid_arg "get: negative index" else + let n = i / __width in + let i = i mod __width in + if n < Array.length bv.a + then (Array.unsafe_get bv.a n) land (1 lsl i) <> 0 + else false (*$R let bv = CCBV.create ~size:99 false in @@ -118,11 +155,11 @@ let get bv i = *) let set bv i = - let n = i / __width in - if n >= Array.length bv.a - then resize bv (n+1); - let i = i - n * __width in - bv.a.(n) <- bv.a.(n) lor (1 lsl i) + if i < 0 then invalid_arg "set: negative index" else + let n = i / __width in + let j = i mod __width in + if i >= bv.size then __grow bv i; + Array.unsafe_set bv.a n ((Array.unsafe_get bv.a n) lor (1 lsl j)) (*$T let bv = create ~size:3 false in set bv 0; get bv 0 @@ -130,22 +167,22 @@ let set bv i = *) let reset bv i = - let n = i / __width in - if n >= Array.length bv.a - then resize bv (n+1); - let i = i - n * __width in - bv.a.(n) <- bv.a.(n) land (lnot (1 lsl i)) + if i < 0 then invalid_arg "reset: negative index" else + let n = i / __width in + let j = i mod __width in + if i >= bv.size then __grow bv i; + Array.unsafe_set bv.a n ((Array.unsafe_get bv.a n) land (lnot (1 lsl j))) (*$T let bv = create ~size:3 false in set bv 0; reset bv 0; not (get bv 0) *) let flip bv i = - let n = i / __width in - if n >= Array.length bv.a - then resize bv (n+1); - let i = i - n * __width in - bv.a.(n) <- bv.a.(n) lxor (1 lsl i) + if i < 0 then invalid_arg "reset: negative index" else + let n = i / __width in + let j = i mod __width in + if i >= bv.size then __grow bv i; + Array.unsafe_set bv.a n ((Array.unsafe_get bv.a n) lxor (1 lsl j)) (*$R let bv = of_list [1;10; 11; 30] in @@ -163,7 +200,7 @@ let flip bv i = *) let clear bv = - Array.iteri (fun i _ -> bv.a.(i) <- 0) bv.a + Array.fill bv.a 0 (Array.length bv.a) 0 (*$T let bv = create ~size:37 true in cardinal bv = 37 && (clear bv; cardinal bv= 0) @@ -242,8 +279,9 @@ let to_list bv = let to_sorted_list bv = List.rev (to_list bv) +(* Interpret these as indices. *) let of_list l = - let size = List.fold_left max 0 l in + let size = (List.fold_left max 0 l) + 1 in let bv = create ~size false in List.iter (fun i -> set bv i) l; bv @@ -276,18 +314,61 @@ let filter bv p = to_sorted_list bv = [2;4;6] *) +let negate_self b = + let len = Array.length b.a in + for n = 0 to len - 1 do + Array.unsafe_set b.a n (lnot (Array.unsafe_get b.a n)) + done; + let r = b.size mod __width in + if r <> 0 then + let l = Array.length b.a - 1 in + Array.unsafe_set b.a l (__lsb_masks.(r) land (Array.unsafe_get b.a l)) + +(*$T + let v = of_list [1;2;5;7;] in negate_self v; \ + cardinal v = (List.length [0;3;4;6]) +*) + +let negate b = + let a = Array.map (lnot) b.a in + let r = b.size mod __width in + if r <> 0 then begin + let l = Array.length b.a - 1 in + Array.unsafe_set a l (__lsb_masks.(r) land (Array.unsafe_get a l)) + end; + { a ; size = b.size } + +(*$Q + Q.small_int (fun size -> create ~size false |> negate |> cardinal = size) +*) + +(* Underlying size grows for union. *) let union_into ~into bv = - if length into < length bv - then resize into (length bv); - let len = Array.length bv.a in - for i = 0 to len - 1 do - into.a.(i) <- into.a.(i) lor bv.a.(i) + if into.size < bv.size + then __grow into bv.size; + for i = 0 to (Array.length into.a) - 1 do + Array.unsafe_set into.a i + ((Array.unsafe_get into.a i) lor (Array.unsafe_get bv.a i)) done -let union bv1 bv2 = - let bv = copy bv1 in - union_into ~into:bv bv2; - bv +(* To avoid potentially 2 passes, figure out what we need to copy. *) +let union b1 b2 = + if b1.size <= b2.size + then begin + let into = copy b2 in + for i = 0 to (Array.length b1.a) - 1 do + Array.unsafe_set into.a i + ((Array.unsafe_get into.a i) lor (Array.unsafe_get b1.a i)) + done; + into + end else begin + let into = copy b1 in + for i = 0 to (Array.length b1.a) - 1 do + Array.unsafe_set into.a i + ((Array.unsafe_get into.a i) lor (Array.unsafe_get b2.a i)) + done; + into + end (*$R let bv1 = CCBV.of_list [1;2;3;4] in @@ -302,22 +383,32 @@ let union bv1 bv2 = union (of_list [1;2;3;4;5]) (of_list [7;3;5;6]) |> to_sorted_list = CCList.range 1 7 *) +(* Underlying size shrinks for inter. *) let inter_into ~into bv = - let n = min (length into) (length bv) in - for i = 0 to n - 1 do - into.a.(i) <- into.a.(i) land bv.a.(i) + if into.size > bv.size + then __shrink into bv.size; + for i = 0 to (Array.length into.a) - 1 do + Array.unsafe_set into.a i + ((Array.unsafe_get into.a i) land (Array.unsafe_get bv.a i)) done -let inter bv1 bv2 = - if length bv1 < length bv2 - then - let bv = copy bv1 in - let () = inter_into ~into:bv bv2 in - bv - else - let bv = copy bv2 in - let () = inter_into ~into:bv bv1 in - bv +let inter b1 b2 = + if b1.size <= b2.size + then begin + let into = copy b1 in + for i = 0 to (Array.length b1.a) - 1 do + Array.unsafe_set into.a i + ((Array.unsafe_get into.a i) land (Array.unsafe_get b2.a i)) + done; + into + end else begin + let into = copy b2 in + for i = 0 to (Array.length b2.a) - 1 do + Array.unsafe_set into.a i + ((Array.unsafe_get into.a i) land (Array.unsafe_get b1.a i)) + done; + into + end (*$T inter (of_list [1;2;3;4]) (of_list [2;4;6;1]) |> to_sorted_list = [1;2;4] @@ -331,6 +422,20 @@ let inter bv1 bv2 = assert_equal [3;4] l; *) +(* Underlying size depends on the 'in_' set for diff, so we don't change + it's size! *) +let diff_into ~into bv = + let n = min (Array.length into.a) (Array.length bv.a) in + for i = 0 to n - 1 do + Array.unsafe_set into.a i + ((Array.unsafe_get into.a i) land (lnot (Array.unsafe_get bv.a i))) + done + +let diff ~in_ not_in = + let into = copy in_ in + diff_into ~into not_in; + into + let select bv arr = let l = ref [] in begin try diff --git a/src/data/CCBV.mli b/src/data/CCBV.mli index 36bb217f..414d50c2 100644 --- a/src/data/CCBV.mli +++ b/src/data/CCBV.mli @@ -21,14 +21,19 @@ val copy : t -> t (** Copy of bitvector *) val cardinal : t -> int -(** Number of bits set *) +(** Number of set bits. *) val length : t -> int -(** Length of underlying array *) +(** Length of underlying bitvector. *) + +val capacity : t -> int +(** The number of bits this bitvector can store without resizing. *) val resize : t -> int -> unit -(** Resize the BV so that it has at least the given physical length - [resize bv n] should make [bv] able to store [(Sys.word_size - 2)* n] bits *) +(** Resize the BV so that it has the specified length. This can grow or shrink + the underlying bitvector. + + @raise Invalid_arg on negative sizes. *) val is_empty : t -> bool (** Any bit set? *) @@ -62,7 +67,10 @@ val to_sorted_list : t -> int list increasing order *) val of_list : int list -> t -(** From a list of true bits *) +(** From a list of true bits. + + The bits are interpreted as indices into the returned bitvector, so the final + bitvector will have [length t] equal to 1 more than max of list indices. *) val first : t -> int (** First set bit, or @@ -72,11 +80,21 @@ val filter : t -> (int -> bool) -> unit (** [filter bv p] only keeps the true bits of [bv] whose [index] satisfies [p index] *) +val negate_self : t -> unit +(** [negate_self t] flips all of the bits in [t]. *) + +val negate : t -> t +(** [negate t] returns a copy of [t] with all of the bits flipped. *) + val union_into : into:t -> t -> unit -(** [union ~into bv] sets [into] to the union of itself and [bv]. *) +(** [union ~into bv] sets [into] to the union of itself and [bv]. + + Note that [into] will grow to accammodate the union. *) val inter_into : into:t -> t -> unit -(** [inter ~into bv] sets [into] to the intersection of itself and [bv] *) +(** [inter ~into bv] sets [into] to the intersection of itself and [bv] + + Note that [into] will shrink to accammodate the union. *) val union : t -> t -> t (** [union bv1 bv2] returns the union of the two sets *) @@ -84,6 +102,12 @@ val union : t -> t -> t val inter : t -> t -> t (** [inter bv1 bv2] returns the intersection of the two sets *) +val diff_into : into:t -> t -> unit +(** [diff ~into t] Modify [into] with only the bits set but not in [t]. *) + +val diff : in_:t -> t -> t +(** [diff ~in_ t] Return those bits found [in_] but not in [t]. *) + val select : t -> 'a array -> 'a list (** [select arr bv] selects the elements of [arr] whose index corresponds to a true bit in [bv]. If [bv] is too short, elements of [arr]