CCHash now uses murmur hash

This commit is contained in:
Simon Cruanes 2014-06-23 23:01:01 +02:00
parent ef06d117cb
commit fcd2085190
2 changed files with 76 additions and 52 deletions

View file

@ -25,51 +25,66 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(** {1 Hash combinators} *)
type t = int
type 'a hash_fun = 'a -> t
type t = int64
type 'a hash_fun = 'a -> t -> t
let combine hash i =
(hash * 65599 + i) land max_int
let _r = 47
let _m = 0xc6a4a7935bd1e995L
let (<<>>) = combine
let init = _m (* TODO? *)
let hash_int i = combine 0 i
(* combine key [k] with the current state [s] *)
let _combine s k =
let k = Int64.mul _m k in
let k = Int64.logxor k (Int64.shift_right k _r) in
let k = Int64.mul _m k in
let s = Int64.logxor s k in
let s = Int64.mul _m s in
s
let hash_int2 i j = combine i j
let finish s =
let s = Int64.logxor s (Int64.shift_right s _r) in
let s = Int64.mul s _m in
let s = Int64.logxor s (Int64.shift_right s _r) in
(Int64.to_int s) land max_int
let hash_int3 i j k = combine (combine i j) k
let apply f x = finish (f x init)
let hash_int4 i j k l =
combine (combine (combine i j) k) l
(** {2 Combinateurs} *)
let rec hash_list f h l = match l with
| [] -> h
| x::l' -> hash_list f (combine h (f x)) l'
let int_ i s = _combine s (Int64.of_int i)
let bool_ x s = _combine s (if x then 1L else 2L)
let char_ x s = _combine s (Int64.of_int (Char.code x))
let int32_ x s = _combine s (Int64.of_int32 x)
let int64_ x s = _combine s x
let nativeint_ x s = _combine s (Int64.of_nativeint x)
let string_ x s =
let s = ref s in
String.iter (fun c -> s := char_ c !s) x;
!s
let hash_array f h a =
let h = ref h in
Array.iter (fun x -> h := combine !h (f x)) a;
!h
let rec list_ f l s = match l with
| [] -> s
| x::l' -> list_ f l' (f x s)
let hash_string s = Hashtbl.hash s
let array_ f a s = Array.fold_right f a s
let hash_pair h1 h2 (x,y) = combine (h1 x) (h2 y)
let hash_triple h1 h2 h3 (x,y,z) = (h1 x) <<>> (h2 y) <<>> (h3 z)
let pair h1 h2 (x,y) s = h2 y (h1 x s)
let triple h1 h2 h3 (x,y,z) s = h3 z (h2 y (h1 x s))
type 'a sequence = ('a -> unit) -> unit
type 'a gen = unit -> 'a option
type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist]
let hash_seq f h seq =
let h = ref h in
seq (fun x -> h := !h <<>> f x);
!h
let seq f seq s =
let s = ref s in
seq (fun x -> s := f x !s);
!s
let rec hash_gen f h g = match g () with
| None -> h
| Some x ->
hash_gen f (h <<>> f x) g
let rec gen f g s = match g () with
| None -> s
| Some x -> gen f g (f x s)
let rec hash_klist f h l = match l () with
| `Nil -> h
| `Cons (x,l') -> hash_klist f (h <<>> f x) l'
let rec klist f l s = match l () with
| `Nil -> s
| `Cons (x,l') -> klist f l' (f x s)

View file

@ -25,40 +25,49 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(** {1 Hash combinators}
Combination of hashes based on the
SDBM simple hash (see for instance
{{:http://www.cse.yorku.ca/~oz/hash.html} this page})
Combination of hashes based on the Murmur Hash (64 bits). See
{{:https://sites.google.com/site/murmurhash/MurmurHash2_64.cpp?attredirects=0} this page}
*)
type t = int
(** {2 Definitions} *)
type 'a hash_fun = 'a -> t
type t = private int64
val combine : t -> t -> t
(** Combine two hashes. Non-commutative. *)
type 'a hash_fun = 'a -> t -> t
(** Hash function for values of type ['a], merging a fingerprint of the
value into the state of type [t] *)
val (<<>>) : t -> t -> t
(** Infix version of {!combine} *)
val init : t
(** Initial value *)
val hash_int : int -> t
val hash_int2 : int -> int -> t
val hash_int3 : int -> int -> int -> t
val hash_int4 : int -> int -> int -> int -> t
val finish : t -> int
(** Extract a usable hash value *)
val hash_string : string -> t
val apply : 'a hash_fun -> 'a -> int
(** Apply a hash function to a value *)
val hash_list : 'a hash_fun -> t -> 'a list hash_fun
(** {2 Basic Combinators} *)
val bool_ : bool hash_fun
val char_ : char hash_fun
val int_ : int hash_fun
val string_ : string hash_fun
val int32_ : int32 hash_fun
val int64_ : int64 hash_fun
val nativeint_ : nativeint hash_fun
val list_ : 'a hash_fun -> 'a list hash_fun
(** Hash a list. Each element is hashed using [f]. *)
val hash_array : 'a hash_fun -> t -> 'a array hash_fun
val array_ : 'a hash_fun -> 'a array hash_fun
val hash_pair : 'a hash_fun -> 'b hash_fun -> ('a * 'b) hash_fun
val hash_triple : 'a hash_fun -> 'b hash_fun -> 'c hash_fun -> ('a * 'b * 'c) hash_fun
val pair : 'a hash_fun -> 'b hash_fun -> ('a * 'b) hash_fun
val triple : 'a hash_fun -> 'b hash_fun -> 'c hash_fun -> ('a * 'b * 'c) hash_fun
type 'a sequence = ('a -> unit) -> unit
type 'a gen = unit -> 'a option
type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist]
val hash_seq : 'a hash_fun -> t -> 'a sequence hash_fun
val hash_gen : 'a hash_fun -> t -> 'a gen hash_fun
val hash_klist : 'a hash_fun -> t -> 'a klist hash_fun
val seq : 'a hash_fun -> 'a sequence hash_fun
val gen : 'a hash_fun -> 'a gen hash_fun
val klist : 'a hash_fun -> 'a klist hash_fun