CCHash now uses murmur hash

This commit is contained in:
Simon Cruanes 2014-06-23 23:01:01 +02:00
parent ef06d117cb
commit fcd2085190
2 changed files with 76 additions and 52 deletions

View file

@ -25,51 +25,66 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(** {1 Hash combinators} *) (** {1 Hash combinators} *)
type t = int type t = int64
type 'a hash_fun = 'a -> t type 'a hash_fun = 'a -> t -> t
let combine hash i = let _r = 47
(hash * 65599 + i) land max_int let _m = 0xc6a4a7935bd1e995L
let (<<>>) = combine let init = _m (* TODO? *)
let hash_int i = combine 0 i (* combine key [k] with the current state [s] *)
let _combine s k =
let k = Int64.mul _m k in
let k = Int64.logxor k (Int64.shift_right k _r) in
let k = Int64.mul _m k in
let s = Int64.logxor s k in
let s = Int64.mul _m s in
s
let hash_int2 i j = combine i j let finish s =
let s = Int64.logxor s (Int64.shift_right s _r) in
let s = Int64.mul s _m in
let s = Int64.logxor s (Int64.shift_right s _r) in
(Int64.to_int s) land max_int
let hash_int3 i j k = combine (combine i j) k let apply f x = finish (f x init)
let hash_int4 i j k l = (** {2 Combinateurs} *)
combine (combine (combine i j) k) l
let rec hash_list f h l = match l with let int_ i s = _combine s (Int64.of_int i)
| [] -> h let bool_ x s = _combine s (if x then 1L else 2L)
| x::l' -> hash_list f (combine h (f x)) l' let char_ x s = _combine s (Int64.of_int (Char.code x))
let int32_ x s = _combine s (Int64.of_int32 x)
let int64_ x s = _combine s x
let nativeint_ x s = _combine s (Int64.of_nativeint x)
let string_ x s =
let s = ref s in
String.iter (fun c -> s := char_ c !s) x;
!s
let hash_array f h a = let rec list_ f l s = match l with
let h = ref h in | [] -> s
Array.iter (fun x -> h := combine !h (f x)) a; | x::l' -> list_ f l' (f x s)
!h
let hash_string s = Hashtbl.hash s let array_ f a s = Array.fold_right f a s
let hash_pair h1 h2 (x,y) = combine (h1 x) (h2 y) let pair h1 h2 (x,y) s = h2 y (h1 x s)
let hash_triple h1 h2 h3 (x,y,z) = (h1 x) <<>> (h2 y) <<>> (h3 z) let triple h1 h2 h3 (x,y,z) s = h3 z (h2 y (h1 x s))
type 'a sequence = ('a -> unit) -> unit type 'a sequence = ('a -> unit) -> unit
type 'a gen = unit -> 'a option type 'a gen = unit -> 'a option
type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist]
let hash_seq f h seq = let seq f seq s =
let h = ref h in let s = ref s in
seq (fun x -> h := !h <<>> f x); seq (fun x -> s := f x !s);
!h !s
let rec hash_gen f h g = match g () with let rec gen f g s = match g () with
| None -> h | None -> s
| Some x -> | Some x -> gen f g (f x s)
hash_gen f (h <<>> f x) g
let rec hash_klist f h l = match l () with let rec klist f l s = match l () with
| `Nil -> h | `Nil -> s
| `Cons (x,l') -> hash_klist f (h <<>> f x) l' | `Cons (x,l') -> klist f l' (f x s)

View file

@ -25,40 +25,49 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(** {1 Hash combinators} (** {1 Hash combinators}
Combination of hashes based on the Combination of hashes based on the Murmur Hash (64 bits). See
SDBM simple hash (see for instance {{:https://sites.google.com/site/murmurhash/MurmurHash2_64.cpp?attredirects=0} this page}
{{:http://www.cse.yorku.ca/~oz/hash.html} this page})
*) *)
type t = int (** {2 Definitions} *)
type 'a hash_fun = 'a -> t type t = private int64
val combine : t -> t -> t type 'a hash_fun = 'a -> t -> t
(** Combine two hashes. Non-commutative. *) (** Hash function for values of type ['a], merging a fingerprint of the
value into the state of type [t] *)
val (<<>>) : t -> t -> t val init : t
(** Infix version of {!combine} *) (** Initial value *)
val hash_int : int -> t val finish : t -> int
val hash_int2 : int -> int -> t (** Extract a usable hash value *)
val hash_int3 : int -> int -> int -> t
val hash_int4 : int -> int -> int -> int -> t
val hash_string : string -> t val apply : 'a hash_fun -> 'a -> int
(** Apply a hash function to a value *)
val hash_list : 'a hash_fun -> t -> 'a list hash_fun (** {2 Basic Combinators} *)
val bool_ : bool hash_fun
val char_ : char hash_fun
val int_ : int hash_fun
val string_ : string hash_fun
val int32_ : int32 hash_fun
val int64_ : int64 hash_fun
val nativeint_ : nativeint hash_fun
val list_ : 'a hash_fun -> 'a list hash_fun
(** Hash a list. Each element is hashed using [f]. *) (** Hash a list. Each element is hashed using [f]. *)
val hash_array : 'a hash_fun -> t -> 'a array hash_fun val array_ : 'a hash_fun -> 'a array hash_fun
val hash_pair : 'a hash_fun -> 'b hash_fun -> ('a * 'b) hash_fun val pair : 'a hash_fun -> 'b hash_fun -> ('a * 'b) hash_fun
val hash_triple : 'a hash_fun -> 'b hash_fun -> 'c hash_fun -> ('a * 'b * 'c) hash_fun val triple : 'a hash_fun -> 'b hash_fun -> 'c hash_fun -> ('a * 'b * 'c) hash_fun
type 'a sequence = ('a -> unit) -> unit type 'a sequence = ('a -> unit) -> unit
type 'a gen = unit -> 'a option type 'a gen = unit -> 'a option
type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist]
val hash_seq : 'a hash_fun -> t -> 'a sequence hash_fun val seq : 'a hash_fun -> 'a sequence hash_fun
val hash_gen : 'a hash_fun -> t -> 'a gen hash_fun val gen : 'a hash_fun -> 'a gen hash_fun
val hash_klist : 'a hash_fun -> t -> 'a klist hash_fun val klist : 'a hash_fun -> 'a klist hash_fun