diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 11ffee55..fd97b91a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,9 +7,11 @@ repos: pass_filenames: false language: system types: [text] + stages: [pre-push] - id: dune-build-opam name: dune build - entry: dune build @opam + entry: dune build @install pass_filenames: false language: system types: [text] + stages: [pre-push] diff --git a/benchs/bench_hash.ml b/benchs/bench_hash.ml index 097c05f7..110f284c 100644 --- a/benchs/bench_hash.ml +++ b/benchs/bench_hash.ml @@ -13,14 +13,16 @@ let repeat = 3 (* --- data setup ---------------------------------------------------------- *) let n_ints = 1_000 -let ints = Array.init n_ints (fun i -> i * 2654435761 (* knuth multiplicative *)) + +let ints = + Array.init n_ints (fun i -> i * 2654435761 (* knuth multiplicative *)) let short_str = String.make 16 'x' let medium_str = String.make 64 'x' let long_str = String.make 512 'x' (* Strings of various lengths with distinct content *) -let strings_short = Array.init n_ints (fun i -> Printf.sprintf "%016d" i) +let strings_short = Array.init n_ints (fun i -> Printf.sprintf "%016d" i) let strings_medium = Array.init n_ints (fun i -> Printf.sprintf "%064d" i) (* --- benchmarks ---------------------------------------------------------- *) @@ -29,9 +31,7 @@ let bench_int_hash ~time () = let r = ref 0 in B.throughputN time ~repeat [ - ( "CCHash.int", - (fun () -> Array.iter (fun x -> r := CCHash.int x) ints), - () ); + "CCHash.int", (fun () -> Array.iter (fun x -> r := CCHash.int x) ints), (); ( "Hashtbl.hash (poly)", (fun () -> Array.iter (fun x -> r := Hashtbl.hash x) ints), () ); @@ -52,15 +52,9 @@ let bench_string_hash ~time () = ( "CCHash.string/64", (fun () -> Array.iter (fun s -> r := CCHash.string s) strings_medium), () ); - ( "CCHash.string literal/16", - (fun () -> r := CCHash.string short_str), - () ); - ( "CCHash.string literal/64", - (fun () -> r := CCHash.string medium_str), - () ); - ( "CCHash.string literal/512", - (fun () -> r := CCHash.string long_str), - () ); + "CCHash.string literal/16", (fun () -> r := CCHash.string short_str), (); + "CCHash.string literal/64", (fun () -> r := CCHash.string medium_str), (); + "CCHash.string literal/512", (fun () -> r := CCHash.string long_str), (); ( "Hashtbl.hash/16", (fun () -> Array.iter (fun s -> r := Hashtbl.hash s) strings_short), () ); @@ -85,7 +79,7 @@ let bench_combine64 ~time () = (Int64.of_int (x lxor 0xaaaa))) (Int64.of_int (x + 1))) (Int64.of_int (x * 3))) - (Int64.of_int (x lxor x lsr 7)))) + (Int64.of_int (x lxor (x lsr 7))))) ints), () ); ( "CCHash.list int [1..5]", @@ -99,9 +93,12 @@ let bench_combine64 ~time () = let () = B.Tree.add_global "hash" B.Tree.( - "int" @>> (fun () -> bench_int_hash ~time:2 ()) - @> "string" @>> (fun () -> bench_string_hash ~time:2 ()) - @> "combine64" @>> (fun () -> bench_combine64 ~time:2 ()) + "int" + @>> (fun () -> bench_int_hash ~time:2 ()) + @> "string" + @>> (fun () -> bench_string_hash ~time:2 ()) + @> "combine64" + @>> (fun () -> bench_combine64 ~time:2 ()) @> nil) let () = try B.Tree.run_global () with Arg.Help msg -> print_endline msg diff --git a/benchs/run_bench_hash.ml b/benchs/run_bench_hash.ml index 14cf8830..18f212ba 100644 --- a/benchs/run_bench_hash.ml +++ b/benchs/run_bench_hash.ml @@ -21,7 +21,8 @@ let rec eq t1 t2 = let rec hash_tree t = match t with | Empty -> CCHash.string "empty" - | Node (i, l) -> CCHash.(combine2 (int i) (list hash_tree l)) + | Node (i, l) -> + CCHash.((combine2 [@alert "-deprecated"]) (int i) (list hash_tree l)) module H = Hashtbl.Make (struct type t = tree diff --git a/run_bench_hash.sh b/run_bench_hash.sh index 90114057..cfe3612d 100755 --- a/run_bench_hash.sh +++ b/run_bench_hash.sh @@ -1,4 +1,4 @@ #!/bin/sh OPTS="--profile=release --display=quiet" -exec dune exec $OPTS -- benchs/run_benchs_hash.exe $@ +exec dune exec $OPTS -- ./benchs/run_bench_hash.exe $@ diff --git a/src/bencode/containers_bencode.ml b/src/bencode/containers_bencode.ml index c9c63ca7..3160ca59 100644 --- a/src/bencode/containers_bencode.ml +++ b/src/bencode/containers_bencode.ml @@ -23,8 +23,8 @@ let rec hash t = | List l -> H.combine2 20 (H.list hash l) | Map l -> H.combine2 30 - ( H.iter (H.pair H.string hash) @@ fun k -> - Str_map.iter (fun x y -> k (x, y)) l ) + (H.iter (H.pair H.string hash) @@ fun k -> + Str_map.iter (fun x y -> k (x, y)) l) let int64 i : t = Int i let int i : t = int64 (Int64.of_int i) diff --git a/src/core/CCHash.ml b/src/core/CCHash.ml index 06d64dfd..ef03010d 100644 --- a/src/core/CCHash.ml +++ b/src/core/CCHash.ml @@ -7,56 +7,44 @@ type 'a t = 'a -> hash type 'a iter = ('a -> unit) -> unit type 'a gen = unit -> 'a option -(** {2 Full-strength int64 API} *) - -let seed : int64 = Hash_impl_.seed - -let[@inline] combine64 (s : int64) (c : int64) : int64 = Hash_impl_.combine_i64 s c - -let[@inline] finalize (s : int64) : int = Hash_impl_.finalize s - -let[@inline] finalize_i64 (s : int64) : int64 = Hash_impl_.fmix64 s - -(** {2 Deprecated int-state combinators} *) let[@inline] combine2 a b = - Hash_impl_.(finalize (combine_i64 (Int64.of_int a) (Int64.of_int b))) + Hash_impl_.(finalize (combine_int (combine_int seed a) b)) let[@inline] combine f s x = - Hash_impl_.(finalize (combine_i64 (Int64.of_int s) (Int64.of_int (f x)))) + Hash_impl_.(finalize (combine_int (combine_int seed s) (f x))) let combine3 a b c = Hash_impl_.( - let s = combine_i64 (Int64.of_int a) (Int64.of_int b) in - finalize (combine_i64 s (Int64.of_int c))) + let s = combine_int (combine_int seed a) b in + finalize (combine_int s c)) let combine4 a b c d = Hash_impl_.( - let s = combine_i64 (Int64.of_int a) (Int64.of_int b) in - let s = combine_i64 s (Int64.of_int c) in - finalize (combine_i64 s (Int64.of_int d))) + let s = combine_int (combine_int seed a) b in + let s = combine_int s c in + finalize (combine_int s d)) let combine5 a b c d e = Hash_impl_.( - let s = combine_i64 (Int64.of_int a) (Int64.of_int b) in - let s = combine_i64 s (Int64.of_int c) in - let s = combine_i64 s (Int64.of_int d) in - finalize (combine_i64 s (Int64.of_int e))) + let s = combine_int (combine_int seed a) b in + let s = combine_int s c in + let s = combine_int s d in + finalize (combine_int s e)) let combine6 a b c d e f = Hash_impl_.( - let s = combine_i64 (Int64.of_int a) (Int64.of_int b) in - let s = combine_i64 s (Int64.of_int c) in - let s = combine_i64 s (Int64.of_int d) in - let s = combine_i64 s (Int64.of_int e) in - finalize (combine_i64 s (Int64.of_int f))) + let s = combine_int (combine_int seed a) b in + let s = combine_int s c in + let s = combine_int s d in + let s = combine_int s e in + finalize (combine_int s f)) (** {2 Primitive hashers} *) let const h _ = h let const0 _ = 0 - -let int n = Hash_impl_.(finalize (combine_i64 seed (Int64.of_int n))) +let int n = Hash_impl_.(finalize (combine_int seed n)) let bool b = int @@ -66,12 +54,9 @@ let bool b = 2) let char x = Hash_impl_.(finalize (combine_char seed (Char.code x))) - let int64 (n : int64) : int = Hash_impl_.(finalize (combine_i64 seed n)) - let int32 (x : int32) : int = Hash_impl_.(finalize (combine_i32 seed x)) - -let nativeint (x : nativeint) = int64 (Int64.of_nativeint x) +let nativeint (x : nativeint) = Hash_impl_.(finalize (combine_i64 seed (Int64.of_nativeint x))) let bytes (x : bytes) = Hash_impl_.(finalize (combine_string seed (Bytes.unsafe_to_string x))) @@ -92,40 +77,35 @@ let slice x i len = let opt f = function | None -> 42 | Some x -> - Hash_impl_.(finalize (combine_i64 (combine_i64 seed 43L) (Int64.of_int (f x)))) + Hash_impl_.(finalize (combine_int (combine_int seed 43) (f x))) let list f l = let s = - List.fold_left - (fun s x -> Hash_impl_.combine_i64 s (Int64.of_int (f x))) - Hash_impl_.seed l + List.fold_left (fun s x -> Hash_impl_.combine_int s (f x)) Hash_impl_.seed l in Hash_impl_.finalize s let array f a = let s = - Array.fold_left - (fun s x -> Hash_impl_.combine_i64 s (Int64.of_int (f x))) - Hash_impl_.seed a + Array.fold_left (fun s x -> Hash_impl_.combine_int s (f x)) Hash_impl_.seed a in Hash_impl_.finalize s let pair f g (x, y) = - Hash_impl_.( - finalize (combine_i64 (combine_i64 seed (Int64.of_int (f x))) (Int64.of_int (g y)))) + Hash_impl_.(finalize (combine_int (combine_int seed (f x)) (g y))) let triple f g h (x, y, z) = Hash_impl_.( - let s = combine_i64 seed (Int64.of_int (f x)) in - let s = combine_i64 s (Int64.of_int (g y)) in - finalize (combine_i64 s (Int64.of_int (h z)))) + let s = combine_int seed (f x) in + let s = combine_int s (g y) in + finalize (combine_int s (h z))) let quad f g h i (x, y, z, w) = Hash_impl_.( - let s = combine_i64 seed (Int64.of_int (f x)) in - let s = combine_i64 s (Int64.of_int (g y)) in - let s = combine_i64 s (Int64.of_int (h z)) in - finalize (combine_i64 s (Int64.of_int (i w)))) + let s = combine_int seed (f x) in + let s = combine_int s (g y) in + let s = combine_int s (h z) in + finalize (combine_int s (i w))) let map f h x = h (f x) @@ -140,9 +120,7 @@ let poly x = Hashtbl.hash x let array_of_hashes_ arr = Array.sort CCInt.compare arr; let s = - Array.fold_left - (fun s h -> Hash_impl_.combine_i64 s (Int64.of_int h)) - Hash_impl_.seed arr + Array.fold_left (fun s h -> Hash_impl_.combine_int s h) Hash_impl_.seed arr in Hash_impl_.finalize s @@ -157,18 +135,18 @@ let list_comm f l = let iter f seq = let s = ref Hash_impl_.seed in - seq (fun x -> s := Hash_impl_.combine_i64 !s (Int64.of_int (f x))); + seq (fun x -> s := Hash_impl_.combine_int !s (f x)); Hash_impl_.finalize !s let seq f sq = let s = ref Hash_impl_.seed in - Seq.iter (fun x -> s := Hash_impl_.combine_i64 !s (Int64.of_int (f x))) sq; + Seq.iter (fun x -> s := Hash_impl_.combine_int !s (f x)) sq; Hash_impl_.finalize !s let gen f g = let rec aux s = match g () with | None -> Hash_impl_.finalize s - | Some x -> aux (Hash_impl_.combine_i64 s (Int64.of_int (f x))) + | Some x -> aux (Hash_impl_.combine_int s (f x)) in aux Hash_impl_.seed diff --git a/src/core/CCHash.mli b/src/core/CCHash.mli index eecd2f0d..46140b35 100644 --- a/src/core/CCHash.mli +++ b/src/core/CCHash.mli @@ -1,3 +1,5 @@ +(* This file is free software, part of containers. See file "license" for more details. *) + (** Hash combinators The API of this module is stable as per semantic versioning, like the @@ -5,9 +7,6 @@ can change and should not be relied on (i.e. hashing a value always returns the same integer {b within a run of a program}, not across versions of OCaml and Containers). - - {b Implementation}: xorshift+multiply combiner with fmix64 (Murmur3) finalizer, - via C stubs. Unboxed in native code, boxed in bytecode. *) (* TODO: for 4.xx: @@ -44,7 +43,8 @@ val int64 : int64 t val nativeint : nativeint t val slice : string -> int -> int t -(** [slice s i len] hashes the slice [s[i .. i+len-1]]. *) +(** [slice s i len state] hashes the slice [i, …, i+len-1] of [s] + into [state]. *) val bytes : bytes t (** Hash a byte array. @@ -88,47 +88,17 @@ val array_comm : 'a t -> 'a array t will have the same hash. @since 1.0 *) -(** {2 Full-strength int64 API} *) - -val seed : int64 -(** Initial hash state. *) - -val combine64 : int64 -> int64 -> int64 -(** [combine64 state chunk] mixes [chunk] into [state] using the - xorshift+multiply combiner. Suitable for building streaming hashers - with full 64-bit state. Finalize with {!finalize} or {!finalize_i64}. *) - -val finalize : int64 -> int -(** [finalize state] applies fmix64 (Murmur3 finalizer) and returns a - non-negative [int] (strips sign bit). *) - -val finalize_i64 : int64 -> int64 -(** [finalize_i64 state] applies fmix64 and returns the full 64-bit result. - The result may be negative as a signed [int64]. *) - -(** {2 Deprecated int-state combinators} - - These thread state as [int] (63 bits on 64-bit systems), which is lossy. - Prefer building a pipeline with {!seed}, {!combine64}, and {!finalize}. *) +(** {2 Base hash combinators} *) val combine : 'a t -> hash -> 'a -> hash -[@@deprecated "lossy (63-bit state); use combine64 with int64 state"] - val combine2 : hash -> hash -> hash -[@@deprecated "lossy (63-bit state); use combine64 with int64 state"] - val combine3 : hash -> hash -> hash -> hash -[@@deprecated "lossy (63-bit state); use combine64 with int64 state"] - val combine4 : hash -> hash -> hash -> hash -> hash -[@@deprecated "lossy (63-bit state); use combine64 with int64 state"] val combine5 : hash -> hash -> hash -> hash -> hash -> hash -[@@deprecated "lossy (63-bit state); use combine64 with int64 state"] (** @since 2.1 *) val combine6 : hash -> hash -> hash -> hash -> hash -> hash -> hash -[@@deprecated "lossy (63-bit state); use combine64 with int64 state"] (** @since 2.1 *) (** {2 Iterators} *) diff --git a/src/core/CCHash64.ml b/src/core/CCHash64.ml new file mode 100644 index 00000000..d597cf95 --- /dev/null +++ b/src/core/CCHash64.ml @@ -0,0 +1,72 @@ +(* This file is free software, part of containers. See file "license" for more details. *) + +(** {1 Hash combinators with 64-bit state} *) + +type state = int64 + +let seed : state = Hash_impl_.seed + +let[@inline] finalize64 (s : state) : int64 = Hash_impl_.fmix64 s +let[@inline] finalize (s : state) : int = Hash_impl_.finalize s + +type 'a t = state -> 'a -> state + +let[@inline] int s x = Hash_impl_.combine_int s x +let[@inline] bool s b = Hash_impl_.combine_int s (if b then 1 else 2) +let[@inline] char s c = Hash_impl_.combine_char s (Char.code c) +let[@inline] int64 s (x : int64) = Hash_impl_.combine_i64 s x +let[@inline] int32 s (x : int32) = Hash_impl_.combine_i32 s x +let[@inline] nativeint s (x : nativeint) = Hash_impl_.combine_i64 s (Int64.of_nativeint x) +let[@inline] string s x = Hash_impl_.combine_string s x +let[@inline] bytes s x = Hash_impl_.combine_string s (Bytes.unsafe_to_string x) + +let slice str ofs s len = + let j = ofs + len in + let rec loop k st = + if k = j then + st + else + loop (k + 1) (Hash_impl_.combine_char st (Char.code (String.unsafe_get str k))) + in + loop ofs s + +let opt f s = function + | None -> Hash_impl_.combine_int s 0 + | Some x -> f (Hash_impl_.combine_int s 1) x + +let list f s l = List.fold_left f s l +let array f s a = Array.fold_left f s a +let pair f g s (x, y) = g (f s x) y +let triple f g h s (x, y, z) = h (g (f s x) y) z +let quad f g h k s (x, y, z, w) = k (h (g (f s x) y) z) w + +let map proj f s x = f s (proj x) +let if_ b then_ else_ s x = if b then then_ s x else else_ s x +let poly s x = Hash_impl_.combine_int s (Hashtbl.hash x) + +type 'a iter = ('a -> unit) -> unit +type 'a gen = unit -> 'a option + +let iter f s seq = + let st = ref s in + seq (fun x -> st := f !st x); + !st + +let seq f s sq = Seq.fold_left f s sq + +let gen f s g = + let rec aux st = + match g () with + | None -> st + | Some x -> aux (f st x) + in + aux s + +let array_comm f s a = + let hashes = Array.map (fun x -> finalize64 (f seed x)) a in + Array.sort Int64.compare hashes; + Array.fold_left Hash_impl_.combine_i64 s hashes + +let list_comm f s l = + let arr = Array.of_list l in + array_comm f s arr diff --git a/src/core/CCHash64.mli b/src/core/CCHash64.mli new file mode 100644 index 00000000..4c1228c9 --- /dev/null +++ b/src/core/CCHash64.mli @@ -0,0 +1,83 @@ +(** Hash combinators with 64-bit state threading. + + State is threaded explicitly as a private [int64] through combinators, + preserving full 64-bit quality throughout. Finalize with {!finalize} (returns + a positive [int]) or {!finalize64} (returns the raw [int64]). + + Typical use: + {[ + let hash_my_record r = + CCHash64.(finalize (string (int seed r.id) r.name)) + + let hash_pair (a, b) = + CCHash64.(finalize (pair int string seed (a, b))) + ]} + + {b Implementation}: xorshift+multiply combiner with fmix64 (Murmur3) + finalizer, via C stubs. Unboxed in native code. +*) + +type state = private int64 +(** Hash accumulator (64-bit). Create with {!seed}; finish with {!finalize} + or {!finalize64}. *) + +val seed : state +(** Initial hash state (golden-ratio constant). *) + +val finalize64 : state -> int64 +(** Apply fmix64 (Murmur3) and return the full 64-bit result. + The result may be negative as a signed [int64]. *) + +val finalize : state -> int +(** Apply fmix64 and return a non-negative [int] (strips sign bit). *) + +type 'a t = state -> 'a -> state +(** A hash combiner: takes the current state, mixes in a value, returns the + updated state. *) + +val int : int t +val bool : bool t +val char : char t +val int32 : int32 t +val int64 : int64 t +val nativeint : nativeint t +val string : string t + +val bytes : bytes t +(** @since 3.5 *) + +val slice : string -> int -> int t +(** [slice str ofs s len] mixes the byte slice [str[ofs .. ofs+len-1]] into [s]. *) + +val opt : 'a t -> 'a option t +val list : 'a t -> 'a list t +val array : 'a t -> 'a array t +val pair : 'a t -> 'b t -> ('a * 'b) t +val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t +val quad : 'a t -> 'b t -> 'c t -> 'd t -> ('a * 'b * 'c * 'd) t + +val map : ('a -> 'b) -> 'b t -> 'a t +(** [map proj f] applies [proj] before hashing with [f]. + Example: [map fst int] hashes only the first element of a pair. + @since 3.5 *) + +val if_ : bool -> 'a t -> 'a t -> 'a t +(** [if_ b t e] uses hasher [t] when [b] is true, [e] otherwise. *) + +val poly : 'a t +(** Uses [Hashtbl.hash] internally. *) + +val list_comm : 'a t -> 'a list t +(** Commutative: lists equal up to permutation hash the same. *) + +val array_comm : 'a t -> 'a array t +(** Commutative: arrays equal up to permutation hash the same. *) + +(** {2 Iterators} *) + +type 'a iter = ('a -> unit) -> unit +type 'a gen = unit -> 'a option + +val seq : 'a t -> 'a Seq.t t +val iter : 'a t -> 'a iter t +val gen : 'a t -> 'a gen t diff --git a/src/core/CCInt32.ml b/src/core/CCInt32.ml index 9981d7b2..e808cfaa 100644 --- a/src/core/CCInt32.ml +++ b/src/core/CCInt32.ml @@ -8,8 +8,8 @@ let min : t -> t -> t = Stdlib.min let max : t -> t -> t = Stdlib.max [@@@endif] -let hash (x : t) : int = Hash_impl_.(finalize (combine_i32 seed x)) +let hash (x : t) : int = Hash_impl_.(finalize (combine_i32 seed x)) let sign i = compare i zero let pow a b = diff --git a/src/core/CCInt64.ml b/src/core/CCInt64.ml index 68c25312..4a2446cf 100644 --- a/src/core/CCInt64.ml +++ b/src/core/CCInt64.ml @@ -10,9 +10,7 @@ let max : t -> t -> t = Stdlib.max [@@@endif] let sign i = compare i zero - let hash_to_int64 (n : t) : t = Hash_impl_.(fmix64 (combine_i64 seed n)) - let[@inline] hash (n : t) : int = Hash_impl_.(finalize (combine_i64 seed n)) (* see {!CCInt.popcount} for more details *) diff --git a/src/core/Hash_impl_.ml b/src/core/Hash_impl_.ml index 9bd1d46f..1bc8d8d2 100644 --- a/src/core/Hash_impl_.ml +++ b/src/core/Hash_impl_.ml @@ -18,17 +18,26 @@ (** Initial hash state (golden-ratio constant). *) let seed : int64 = 0x9e3779b97f4a7c15L -external combine_i64 : (int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed]) +external combine_int : + (int64[@unboxed]) -> (int[@untagged]) -> (int64[@unboxed]) + = "caml_cc_hash_combine_int_byte" "caml_cc_hash_combine_int" +[@@noalloc] +(** [combine_int state x] mixes OCaml int [x] into [state]. *) + +external combine_i64 : + (int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed]) = "caml_cc_hash_combine_i64_byte" "caml_cc_hash_combine_i64" [@@noalloc] (** [combine_i64 state chunk] mixes [chunk] into [state]. *) -external combine_i32 : (int64[@unboxed]) -> (int32[@unboxed]) -> (int64[@unboxed]) +external combine_i32 : + (int64[@unboxed]) -> (int32[@unboxed]) -> (int64[@unboxed]) = "caml_cc_hash_combine_i32_byte" "caml_cc_hash_combine_i32" [@@noalloc] (** [combine_i32 state chunk] mixes [chunk] into [state]. *) -external combine_char : (int64[@unboxed]) -> (int[@untagged]) -> (int64[@unboxed]) +external combine_char : + (int64[@unboxed]) -> (int[@untagged]) -> (int64[@unboxed]) = "caml_cc_hash_combine_char_byte" "caml_cc_hash_combine_char" [@@noalloc] (** [combine_char state c] mixes character code [c] into [state]. *) diff --git a/src/core/hash_stubs.c b/src/core/hash_stubs.c index c5976a82..f0ac1bdf 100644 --- a/src/core/hash_stubs.c +++ b/src/core/hash_stubs.c @@ -71,6 +71,22 @@ CAMLprim value caml_cc_hash_combine_i32_byte(value v_state, value v_chunk) CAMLreturn(caml_copy_int64((int64_t)r)); } +/* --- combine_int --------------------------------------------------------- */ + +/* chunk is an OCaml int (intnat), passed untagged */ +CAMLprim int64_t caml_cc_hash_combine_int(int64_t state, intnat chunk) +{ + return (int64_t)hash_combine((uint64_t)state, (uint64_t)chunk); +} + +CAMLprim value caml_cc_hash_combine_int_byte(value v_state, value v_chunk) +{ + CAMLparam2(v_state, v_chunk); + uint64_t r = hash_combine((uint64_t)Int64_val(v_state), + (uint64_t)Long_val(v_chunk)); + CAMLreturn(caml_copy_int64((int64_t)r)); +} + /* --- combine_char -------------------------------------------------------- */ /* c is passed as untagged int (Char.code) */ diff --git a/tests/core/t_hash.ml b/tests/core/t_hash.ml index 93012036..37dce0f4 100644 --- a/tests/core/t_hash.ml +++ b/tests/core/t_hash.ml @@ -16,57 +16,61 @@ t @@ fun () -> string "abc" <> string "abcd";; q Q.int (fun i -> Q.assume (i >= 0); - int i = int64 (Int64.of_int i));; + int i = int64 (Int64.of_int i)) +;; (* --- stress tests -------------------------------------------------------- *) (* Chi-squared distribution test over [count] consecutive integers in [buckets] buckets. A uniform hash gives chi2 ~ buckets-1; we allow 4 standard deviations of slack. *) t ~name:"int hash distribution chi2" @@ fun () -> - let count = 50_000 and buckets = 500 in - let counts = Array.make buckets 0 in - for i = 0 to count - 1 do - let b = CCHash.int i mod buckets in - counts.(b) <- counts.(b) + 1 - done; - let expected = float count /. float buckets in - let c2 = - Array.fold_left - (fun acc c -> acc +. ((float c -. expected) ** 2.0 /. expected)) - 0.0 counts - in - let df = float (buckets - 1) in - c2 < df +. 4.0 *. sqrt (2.0 *. df);; +let count = 50_000 and buckets = 500 in +let counts = Array.make buckets 0 in +for i = 0 to count - 1 do + let b = CCHash.int i mod buckets in + counts.(b) <- counts.(b) + 1 +done; +let expected = float count /. float buckets in +let c2 = + Array.fold_left + (fun acc c -> acc +. (((float c -. expected) ** 2.0) /. expected)) + 0.0 counts +in +let df = float (buckets - 1) in +c2 < df +. (4.0 *. sqrt (2.0 *. df)) +;; (* Strict avalanche criterion: flip one input bit, expect ~50% output bits to change. *) t ~name:"int hash avalanche" @@ fun () -> - let bits = Sys.int_size - 1 in - let total_flips = ref 0 in - let total = ref 0 in - let rng = Random.State.make [| 42; 17; 99 |] in - for _ = 1 to 300 do - let x = Random.State.bits rng in - let hx = CCHash.int x in - for b = 0 to bits - 1 do - let hx' = CCHash.int (x lxor (1 lsl b)) in - total_flips := !total_flips + CCInt.popcount (hx lxor hx'); - total := !total + bits - done - done; - let frac = float !total_flips /. float !total in - frac >= 0.45 && frac <= 0.55;; +let bits = Sys.int_size - 1 in +let total_flips = ref 0 in +let total = ref 0 in +let rng = Random.State.make [| 42; 17; 99 |] in +for _ = 1 to 300 do + let x = Random.State.bits rng in + let hx = CCHash.int x in + for b = 0 to bits - 1 do + let hx' = CCHash.int (x lxor (1 lsl b)) in + total_flips := !total_flips + CCInt.popcount (hx lxor hx'); + total := !total + bits + done +done; +let frac = float !total_flips /. float !total in +frac >= 0.45 && frac <= 0.55 +;; (* String hash: no collisions among distinct keys. *) t ~name:"string hash no collisions" @@ fun () -> - let n = 50_000 in - let tbl = Hashtbl.create n in - let ok = ref true in - for i = 0 to n - 1 do - let h = CCHash.string (Printf.sprintf "key:%d" i) in - if Hashtbl.mem tbl h then ok := false; - Hashtbl.replace tbl h () - done; - !ok;; +let n = 50_000 in +let tbl = Hashtbl.create n in +let ok = ref true in +for i = 0 to n - 1 do + let h = CCHash.string (Printf.sprintf "key:%d" i) in + if Hashtbl.mem tbl h then ok := false; + Hashtbl.replace tbl h () +done; +!ok +;; (* CCHash64 pipeline matches CCHash.pair combiner. *) q Q.int (fun i ->