From d849b51892bd535cff7a0643e140a23d237f1e89 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 3 Nov 2016 23:48:46 +0100 Subject: [PATCH] remove `CCBloom` --- README.adoc | 1 - _oasis | 2 +- doc/intro.txt | 1 - src/data/CCBloom.ml | 164 ------------------------------------------- src/data/CCBloom.mli | 77 -------------------- 5 files changed, 1 insertion(+), 244 deletions(-) delete mode 100644 src/data/CCBloom.ml delete mode 100644 src/data/CCBloom.mli diff --git a/README.adoc b/README.adoc index 63d68ad2..b7b3270e 100644 --- a/README.adoc +++ b/README.adoc @@ -141,7 +141,6 @@ Documentation http://cedeela.fr/~simon/software/containers[here]. === Containers.data - `CCBitField`, bitfields embedded in integers -- `CCBloom`, a bloom filter - `CCCache`, memoization caches, LRU, etc. - `CCFlatHashtbl`, a flat (open-addressing) hashtable functorial implementation - `CCTrie`, a prefix tree diff --git a/_oasis b/_oasis index a4f5cd90..61d60ba3 100644 --- a/_oasis +++ b/_oasis @@ -59,7 +59,7 @@ Library "containers_data" CCPersistentHashtbl, CCDeque, CCFQueue, CCBV, CCMixtbl, CCMixmap, CCRingBuffer, CCIntMap, CCPersistentArray, CCMixset, CCHashconsedSet, CCGraph, CCHashSet, CCBitField, - CCHashTrie, CCBloom, CCWBTree, CCRAL, CCAllocCache, + CCHashTrie, CCWBTree, CCRAL, CCAllocCache, CCImmutArray, CCHet, CCZipper BuildDepends: bytes # BuildDepends: bytes, bisect_ppx diff --git a/doc/intro.txt b/doc/intro.txt index 6851f876..e2eeb355 100644 --- a/doc/intro.txt +++ b/doc/intro.txt @@ -72,7 +72,6 @@ Various data structures. {!modules: CCAllocCache CCBitField -CCBloom CCBV CCCache CCDeque diff --git a/src/data/CCBloom.ml b/src/data/CCBloom.ml deleted file mode 100644 index afbf3f2d..00000000 --- a/src/data/CCBloom.ml +++ /dev/null @@ -1,164 +0,0 @@ -(* This file is free software, part of containers. See file "license" for more details. *) - -(** {1 Bloom Filter} *) - -type 'a sequence = ('a -> unit) -> unit -type 'a gen = unit -> 'a option - -type 'a hash_funs = ('a -> int) array - -let primes_ = [| - 2; 3; 5; 7; 11; 13; 17; 19; 23; 29; 31; 37; 41; 43; 47; 53; 59; 61; 67; 71; - 73; 79; 83; 89; 97; 101; 103; 107; 109; 113; 127; 131; 137; 139; - 149; 151; 157; 163; 167; 173 -|] - -let default_hash_funs k = - Array.init k - (fun i -> - let seed = if i Hashtbl.seeded_hash seed x - ) - -(** {2 Bloom Filter} *) - -type 'a t = { - hash_funs : 'a hash_funs; - arr : Bytes.t; -} - -let mk_default_ size = - default_hash_funs (max 2 (size / 20)) - -let create ?hash size = - if size < 2 then invalid_arg "CCBloom.create"; - let hash_funs = match hash with - | None -> mk_default_ size - | Some h -> h - in - let arr = Bytes.make size '\000' in - { hash_funs; arr } - -let create_default ?hash_len size = - let hash = match hash_len with - | None -> mk_default_ size - | Some n -> default_hash_funs n - in - create ~hash size - -let copy f = - {f with arr= Bytes.copy f.arr } - -let size f = 8 * Bytes.length f.arr - -(* number of 1 bits in [c] *) -let rec popcount_byte_ c = - if c=0 then 0 - else - (c land 1) + popcount_byte_ (c lsr 1) - -let () = assert ( - popcount_byte_ 0 = 0 && - popcount_byte_ 3 = 2 && - popcount_byte_ 255 = 8 -) - -(* count the number of 1 bits *) -let rec count_ones_ arr i acc = - if i=Bytes.length arr then acc - else - let c = Char.code (Bytes.get arr i) in - count_ones_ arr (i+1) (acc + popcount_byte_ c) - -let load f = - let ones = count_ones_ f.arr 0 0 in - float_of_int ones /. (float_of_int (Bytes.length f.arr * 8)) - -exception LocalExit - -(* get i-th bit *) -let get_ arr i = - let j = i / 8 in - let c = Char.code (Bytes.get arr j) in - c land (1 lsl (i mod 8)) <> 0 - -(* set i-th bit *) -let set_ arr i = - let j = i / 8 in - let c = Char.code (Bytes.get arr j) in - let c = c lor (1 lsl (i mod 8)) in - Bytes.set arr j (Char.chr c) - -let mem f x = - let n = size f in - try - Array.iter - (fun hash -> if not (get_ f.arr (hash x mod n)) then raise LocalExit) - f.hash_funs; - true - with LocalExit -> false - -let add f x = - let n = size f in - Array.iter - (fun hash -> set_ f.arr (hash x mod n)) - f.hash_funs - -(*$Q - Q.(list int) (fun l -> \ - let f = create 30 in add_list f l ; \ - List.for_all (mem f) l) -*) - -let union_mut ~into f = - if size into <> size f then invalid_arg "CCBloom.union_mut"; - Bytes.iteri - (fun i c -> - Bytes.set into.arr i - (Char.chr (Char.code (Bytes.get into.arr i) lor (Char.code c))) - ) f.arr - -let union a b = - if size a <> size b then invalid_arg "CCBloom.union"; - let into = copy a in - union_mut ~into b; - into - -(*$Q - Q.(pair (list int)(list int)) (fun (l1,l2) -> \ - let f1=create 100 and f2 = create 100 in \ - add_list f1 l1; add_list f2 l2; \ - let f = union f1 f2 in \ - List.for_all (fun i -> not (mem f1 i) || mem f i) l1 && \ - List.for_all (fun i -> not (mem f2 i) || mem f i) l2) -*) - -let inter_mut ~into f = - if size into <> size f then invalid_arg "CCBloom.inter_mut"; - Bytes.iteri - (fun i c -> - Bytes.set into.arr i - (Char.chr (Char.code (Bytes.get into.arr i) land (Char.code c))) - ) f.arr - -let inter a b = - if size a <> size b then invalid_arg "CCBloom.inter"; - let into = copy a in - inter_mut ~into b; - into - -(*$Q - Q.(pair (list int)(list int)) (fun (l1,l2) -> \ - let f1=create 100 and f2 = create 100 in \ - add_list f1 l1; add_list f2 l2; \ - let f = inter f1 f2 in \ - List.for_all (fun i -> not (mem f1 i) || not (mem f2 i) || mem f i) (l1@l2)) -*) - -let add_list f l = List.iter (add f) l - -let add_seq f seq = seq (add f) - -let rec add_gen f g = match g() with - | None -> () - | Some x -> add f x; add_gen f g diff --git a/src/data/CCBloom.mli b/src/data/CCBloom.mli deleted file mode 100644 index db32c37c..00000000 --- a/src/data/CCBloom.mli +++ /dev/null @@ -1,77 +0,0 @@ -(* This file is free software, part of containers. See file "license" for more details. *) - -(** {1 Bloom Filter} - - {b status: experimental} - - @since 0.13 *) - -type 'a sequence = ('a -> unit) -> unit -type 'a gen = unit -> 'a option - -type 'a hash_funs = ('a -> int) array -(** An array of [k] hash functions on values of type ['a]. - Never ever modify such an array after use! *) - -val default_hash_funs : int -> 'a hash_funs -(** Use {!Hashtbl.seeded_hash} on [k] seeds - @param k the number of hash functions required *) - -(** {2 Bloom Filter} *) - -type 'a t -(** Bloom filter containing values of type ['a] *) - -val create : ?hash:('a hash_funs) -> int -> 'a t -(** [create ?hash size] creates a filter with given size, and functions. - By default it uses {!default_hash_funs} - @param size a hint for size *) - -val create_default : ?hash_len:int -> int -> 'a t -(** [create_default ?hash_len size] is the same as - [create ~hash:(default_hash_funs hash_len) size]. - It uses the given number of default hash functions. - @param size a hint for size *) - -val copy : 'a t -> 'a t -(** Copy of the filter *) - -val size : _ t -> int -(** Length of the underlying array. Do not confuse with a cardinal function, - which is impossible to write for bloom filters *) - -val load : _ t -> float -(** Ratio of 1 bits in the underlying array. The closer to [1.], the less - accurate {!mem} is *) - -val mem : 'a t -> 'a -> bool -(** [mem f x] tests whether [x] (probably) belongs to [f] *) - -val add : 'a t -> 'a -> unit -(** [add f x] adds [x] into [f] *) - -val union_mut : into:'a t -> 'a t -> unit -(** [union_mut ~into f] changes [into] into the union of [into] and [f]. - [into] and [f] MUST have the same set of hash functions - @raise Invalid_argument if the two sets do not have the same size *) - -val union : 'a t -> 'a t -> 'a t -(** The sets MUST have the same set of hash functions - @raise Invalid_argument if the two sets do not have the same size *) - -val inter_mut : into:'a t -> 'a t -> unit -(** [inter_mut ~into f] changes [into] into the intersection of [into] and [f] - [into] and [f] MUST have the same set of hash functions - @raise Invalid_argument if the two sets do not have the same size *) - -val inter : 'a t -> 'a t -> 'a t -(** The sets MUST have the same set of hash functions - @raise Invalid_argument if the two sets do not have the same size *) - -(** {2 Conversions} *) - -val add_list : 'a t -> 'a list -> unit - -val add_seq : 'a t -> 'a sequence -> unit - -val add_gen : 'a t -> 'a gen -> unit