From fcd2085190b9ae2ed5e920eb3b1d5c60446c7117 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 23 Jun 2014 23:01:01 +0200 Subject: [PATCH] CCHash now uses murmur hash --- core/CCHash.ml | 77 +++++++++++++++++++++++++++++-------------------- core/CCHash.mli | 51 ++++++++++++++++++-------------- 2 files changed, 76 insertions(+), 52 deletions(-) diff --git a/core/CCHash.ml b/core/CCHash.ml index e485d228..6edbe8c1 100644 --- a/core/CCHash.ml +++ b/core/CCHash.ml @@ -25,51 +25,66 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Hash combinators} *) -type t = int -type 'a hash_fun = 'a -> t +type t = int64 +type 'a hash_fun = 'a -> t -> t -let combine hash i = - (hash * 65599 + i) land max_int +let _r = 47 +let _m = 0xc6a4a7935bd1e995L -let (<<>>) = combine +let init = _m (* TODO? *) -let hash_int i = combine 0 i +(* combine key [k] with the current state [s] *) +let _combine s k = + let k = Int64.mul _m k in + let k = Int64.logxor k (Int64.shift_right k _r) in + let k = Int64.mul _m k in + let s = Int64.logxor s k in + let s = Int64.mul _m s in + s -let hash_int2 i j = combine i j +let finish s = + let s = Int64.logxor s (Int64.shift_right s _r) in + let s = Int64.mul s _m in + let s = Int64.logxor s (Int64.shift_right s _r) in + (Int64.to_int s) land max_int -let hash_int3 i j k = combine (combine i j) k +let apply f x = finish (f x init) -let hash_int4 i j k l = - combine (combine (combine i j) k) l +(** {2 Combinateurs} *) -let rec hash_list f h l = match l with - | [] -> h - | x::l' -> hash_list f (combine h (f x)) l' +let int_ i s = _combine s (Int64.of_int i) +let bool_ x s = _combine s (if x then 1L else 2L) +let char_ x s = _combine s (Int64.of_int (Char.code x)) +let int32_ x s = _combine s (Int64.of_int32 x) +let int64_ x s = _combine s x +let nativeint_ x s = _combine s (Int64.of_nativeint x) +let string_ x s = + let s = ref s in + String.iter (fun c -> s := char_ c !s) x; + !s -let hash_array f h a = - let h = ref h in - Array.iter (fun x -> h := combine !h (f x)) a; - !h +let rec list_ f l s = match l with + | [] -> s + | x::l' -> list_ f l' (f x s) -let hash_string s = Hashtbl.hash s +let array_ f a s = Array.fold_right f a s -let hash_pair h1 h2 (x,y) = combine (h1 x) (h2 y) -let hash_triple h1 h2 h3 (x,y,z) = (h1 x) <<>> (h2 y) <<>> (h3 z) +let pair h1 h2 (x,y) s = h2 y (h1 x s) +let triple h1 h2 h3 (x,y,z) s = h3 z (h2 y (h1 x s)) type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] -let hash_seq f h seq = - let h = ref h in - seq (fun x -> h := !h <<>> f x); - !h +let seq f seq s = + let s = ref s in + seq (fun x -> s := f x !s); + !s -let rec hash_gen f h g = match g () with - | None -> h - | Some x -> - hash_gen f (h <<>> f x) g +let rec gen f g s = match g () with + | None -> s + | Some x -> gen f g (f x s) -let rec hash_klist f h l = match l () with - | `Nil -> h - | `Cons (x,l') -> hash_klist f (h <<>> f x) l' +let rec klist f l s = match l () with + | `Nil -> s + | `Cons (x,l') -> klist f l' (f x s) diff --git a/core/CCHash.mli b/core/CCHash.mli index e250ed10..33c56263 100644 --- a/core/CCHash.mli +++ b/core/CCHash.mli @@ -25,40 +25,49 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Hash combinators} -Combination of hashes based on the -SDBM simple hash (see for instance -{{:http://www.cse.yorku.ca/~oz/hash.html} this page}) +Combination of hashes based on the Murmur Hash (64 bits). See +{{:https://sites.google.com/site/murmurhash/MurmurHash2_64.cpp?attredirects=0} this page} *) -type t = int +(** {2 Definitions} *) -type 'a hash_fun = 'a -> t +type t = private int64 -val combine : t -> t -> t - (** Combine two hashes. Non-commutative. *) +type 'a hash_fun = 'a -> t -> t +(** Hash function for values of type ['a], merging a fingerprint of the + value into the state of type [t] *) -val (<<>>) : t -> t -> t - (** Infix version of {!combine} *) +val init : t +(** Initial value *) -val hash_int : int -> t -val hash_int2 : int -> int -> t -val hash_int3 : int -> int -> int -> t -val hash_int4 : int -> int -> int -> int -> t +val finish : t -> int +(** Extract a usable hash value *) -val hash_string : string -> t +val apply : 'a hash_fun -> 'a -> int +(** Apply a hash function to a value *) -val hash_list : 'a hash_fun -> t -> 'a list hash_fun +(** {2 Basic Combinators} *) + +val bool_ : bool hash_fun +val char_ : char hash_fun +val int_ : int hash_fun +val string_ : string hash_fun +val int32_ : int32 hash_fun +val int64_ : int64 hash_fun +val nativeint_ : nativeint hash_fun + +val list_ : 'a hash_fun -> 'a list hash_fun (** Hash a list. Each element is hashed using [f]. *) -val hash_array : 'a hash_fun -> t -> 'a array hash_fun +val array_ : 'a hash_fun -> 'a array hash_fun -val hash_pair : 'a hash_fun -> 'b hash_fun -> ('a * 'b) hash_fun -val hash_triple : 'a hash_fun -> 'b hash_fun -> 'c hash_fun -> ('a * 'b * 'c) hash_fun +val pair : 'a hash_fun -> 'b hash_fun -> ('a * 'b) hash_fun +val triple : 'a hash_fun -> 'b hash_fun -> 'c hash_fun -> ('a * 'b * 'c) hash_fun type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] -val hash_seq : 'a hash_fun -> t -> 'a sequence hash_fun -val hash_gen : 'a hash_fun -> t -> 'a gen hash_fun -val hash_klist : 'a hash_fun -> t -> 'a klist hash_fun +val seq : 'a hash_fun -> 'a sequence hash_fun +val gen : 'a hash_fun -> 'a gen hash_fun +val klist : 'a hash_fun -> 'a klist hash_fun