From 0edc5ffb9df74ab22f990c53ba80512869497c1a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 22 Sep 2015 14:15:37 +0200 Subject: [PATCH] update `CCHash` with a functor and module type for generic hashing --- src/core/CCHash.ml | 144 ++++++++++++++++++++++++++++++++------------ src/core/CCHash.mli | 83 ++++++++++++++++++++++++- 2 files changed, 186 insertions(+), 41 deletions(-) diff --git a/src/core/CCHash.ml b/src/core/CCHash.ml index 17a53675..9c7cb60c 100644 --- a/src/core/CCHash.ml +++ b/src/core/CCHash.ml @@ -26,16 +26,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Hash combinators} *) type t = int -type state = int64 -type 'a hash_fun = 'a -> state -> state + +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] let _r = 47 let _m = 0xc6a4a7935bd1e995L -let init = _m (* TODO? *) +let init = _m (* combine key [k] with the current state [s] *) -let _combine s k = +let combine_murmur_ s k = let k = Int64.mul _m k in let k = Int64.logxor k (Int64.shift_right k _r) in let k = Int64.mul _m k in @@ -53,45 +55,111 @@ let apply f x = finish (f x init) (** {2 Combinators} *) -let int_ i s = _combine s (Int64.of_int i) -let bool_ x s = _combine s (if x then 1L else 2L) -let char_ x s = _combine s (Int64.of_int (Char.code x)) -let int32_ x s = _combine s (Int64.of_int32 x) -let int64_ x s = _combine s x -let nativeint_ x s = _combine s (Int64.of_nativeint x) -let string_ x s = - let s = ref s in - String.iter (fun c -> s := char_ c !s) x; - !s +(** {2 Generic Hashing} *) -let rec list_ f l s = match l with - | [] -> s - | x::l' -> list_ f l' (f x s) +module type HASH = sig + type state -let array_ f a s = Array.fold_right f a s + val int : int -> state -> state + val bool : bool -> state -> state + val char : char -> state -> state + val int32 : int32 -> state -> state + val int64 : int64 -> state -> state + val nativeint : nativeint -> state -> state + val slice : string -> int -> int -> state -> state + (** [slice s i len state] hashes the slice [[i, ... i+len)] of [s] + into [state] *) +end -let opt f o h = match o with - | None -> h - | Some x -> f x h -let pair h1 h2 (x,y) s = h2 y (h1 x s) -let triple h1 h2 h3 (x,y,z) s = h3 z (h2 y (h1 x s)) +module type S = sig + include HASH -let if_ b then_ else_ h = - if b then then_ h else else_ h + type 'a hash_fun = 'a -> state -> state -type 'a sequence = ('a -> unit) -> unit -type 'a gen = unit -> 'a option -type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] + val string : string hash_fun -let seq f seq s = - let s = ref s in - seq (fun x -> s := f x !s); - !s + val list : 'a hash_fun -> 'a list hash_fun -let rec gen f g s = match g () with - | None -> s - | Some x -> gen f g (f x s) + val array : 'a hash_fun -> 'a array hash_fun -let rec klist f l s = match l () with - | `Nil -> s - | `Cons (x,l') -> klist f l' (f x s) + val opt : 'a hash_fun -> 'a option hash_fun + val pair : 'a hash_fun -> 'b hash_fun -> ('a * 'b) hash_fun + val triple : 'a hash_fun -> 'b hash_fun -> 'c hash_fun -> ('a * 'b * 'c) hash_fun + + val if_ : bool -> 'a hash_fun -> 'a hash_fun -> 'a hash_fun + (** Decide which hash function to use depending on the boolean *) + + (** {2 Iterators} *) + + val seq : 'a hash_fun -> 'a sequence hash_fun + val gen : 'a hash_fun -> 'a gen hash_fun + val klist : 'a hash_fun -> 'a klist hash_fun +end + +module Base = struct + type state = int64 + let int i s = combine_murmur_ s (Int64.of_int i) + let bool x s = combine_murmur_ s (if x then 1L else 2L) + let char x s = combine_murmur_ s (Int64.of_int (Char.code x)) + let int32 x s = combine_murmur_ s (Int64.of_int32 x) + let int64 x s = combine_murmur_ s x + let nativeint x s = combine_murmur_ s (Int64.of_nativeint x) + + let slice x i len s = + let j=i+len in + let rec aux i s = + if i=j then s else aux (i+1) (char x.[i] s) + in + aux i s +end + +module Make(H : HASH) : S with type state = H.state = struct + include H + + type 'a hash_fun = 'a -> state -> state + + let rec list f l s = match l with + | [] -> s + | x::l' -> list f l' (f x s) + + let array f a s = Array.fold_right f a s + + let opt f o h = match o with + | None -> h + | Some x -> f x h + let pair h1 h2 (x,y) s = h2 y (h1 x s) + let triple h1 h2 h3 (x,y,z) s = h3 z (h2 y (h1 x s)) + + let string x s = slice x 0 (String.length x) s + + let if_ b then_ else_ h = + if b then then_ h else else_ h + + let seq f seq s = + let s = ref s in + seq (fun x -> s := f x !s); + !s + + let rec gen f g s = match g () with + | None -> s + | Some x -> gen f g (f x s) + + let rec klist f l s = match l () with + | `Nil -> s + | `Cons (x,l') -> klist f l' (f x s) +end + +include Make(Base) + +(* deprecated aliases *) + +let int_ = int +let bool_ = bool +let char_ = char +let int32_ = int32 +let int64_ = int64 +let nativeint_ = nativeint +let string_ = string + +let list_ = list +let array_ = array diff --git a/src/core/CCHash.mli b/src/core/CCHash.mli index 576e594b..a2c7eada 100644 --- a/src/core/CCHash.mli +++ b/src/core/CCHash.mli @@ -41,6 +41,8 @@ type 'a hash_fun = 'a -> state -> state (** Hash function for values of type ['a], merging a fingerprint of the value into the state of type [t] *) +(** {2 Applying Murmur Hash} *) + val init : state (** Initial value *) @@ -48,22 +50,44 @@ val finish : state -> int (** Extract a usable hash value *) val apply : 'a hash_fun -> 'a -> int -(** Apply a hash function to a value *) +(** Apply a hash function to a value. + [apply f x] is the same as [finish (f x init)] *) -(** {2 Basic Combinators} *) +(** {2 Basic Combinators} + + Those combinators have been renamed in NEXT_RELEASE, so as to + remove the trailing "_". + They are now defined by the application of {!Make} + + *) val bool_ : bool hash_fun +(** @deprecated use {!bool} *) + val char_ : char hash_fun +(** @deprecated use {!char} *) + val int_ : int hash_fun +(** @deprecated use {!int} *) + val string_ : string hash_fun +(** @deprecated use {!string} *) + val int32_ : int32 hash_fun +(** @deprecated use {!int32} *) + val int64_ : int64 hash_fun +(** @deprecated use {!int64} *) + val nativeint_ : nativeint hash_fun +(** @deprecated use {!nativeint} *) val list_ : 'a hash_fun -> 'a list hash_fun -(** Hash a list. Each element is hashed using [f]. *) +(** Hash a list. Each element is hashed using [f]. + @deprecated use {!list} *) val array_ : 'a hash_fun -> 'a array hash_fun +(** @deprecated use {!array} *) val opt : 'a hash_fun -> 'a option hash_fun val pair : 'a hash_fun -> 'b hash_fun -> ('a * 'b) hash_fun @@ -72,6 +96,8 @@ val triple : 'a hash_fun -> 'b hash_fun -> 'c hash_fun -> ('a * 'b * 'c) hash_fu val if_ : bool -> 'a hash_fun -> 'a hash_fun -> 'a hash_fun (** Decide which hash function to use depending on the boolean *) +(** {2 Iterators} *) + type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] @@ -79,3 +105,54 @@ type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] val seq : 'a hash_fun -> 'a sequence hash_fun val gen : 'a hash_fun -> 'a gen hash_fun val klist : 'a hash_fun -> 'a klist hash_fun + +(** {2 Generic Hashing} + + Parametrize over the state, and some primitives to hash basic types. + This can for instance be used for cryptographic hashing or + checksums such as MD5. + + @since NEXT_RELEASE *) + +module type HASH = sig + type state + + val int : int -> state -> state + val bool : bool -> state -> state + val char : char -> state -> state + val int32 : int32 -> state -> state + val int64 : int64 -> state -> state + val nativeint : nativeint -> state -> state + val slice : string -> int -> int -> state -> state + (** [slice s i len state] hashes the slice [[i, ... i+len)] of [s] + into [state] *) +end + +module type S = sig + include HASH + + type 'a hash_fun = 'a -> state -> state + + val string : string hash_fun + + val list : 'a hash_fun -> 'a list hash_fun + + val array : 'a hash_fun -> 'a array hash_fun + + val opt : 'a hash_fun -> 'a option hash_fun + val pair : 'a hash_fun -> 'b hash_fun -> ('a * 'b) hash_fun + val triple : 'a hash_fun -> 'b hash_fun -> 'c hash_fun -> ('a * 'b * 'c) hash_fun + + val if_ : bool -> 'a hash_fun -> 'a hash_fun -> 'a hash_fun + (** Decide which hash function to use depending on the boolean *) + + (** {2 Iterators} *) + + val seq : 'a hash_fun -> 'a sequence hash_fun + val gen : 'a hash_fun -> 'a gen hash_fun + val klist : 'a hash_fun -> 'a klist hash_fun +end + +module Make(H : HASH) : S with type state = H.state + +include S with type state := state and type 'a hash_fun := 'a hash_fun