diff --git a/src/core/CCInt64.ml b/src/core/CCInt64.ml index 8bb45ead..0c3f2405 100644 --- a/src/core/CCInt64.ml +++ b/src/core/CCInt64.ml @@ -11,21 +11,13 @@ let max : t -> t -> t = Stdlib.max let sign i = compare i zero -(* use FNV: +(* use FNV-1: https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *) -let hash_to_int64 (n : t) = - let offset_basis = 0xcbf29ce484222325L in - let prime = 0x100000001b3L in +external hash_to_int64 : (int64[@unboxed]) -> (int64[@unboxed]) + = "caml_cc_hash_int64_to_int64_byte" "caml_cc_hash_int64_to_int64" - let h = ref offset_basis in - for k = 0 to 7 do - h := mul !h prime; - (* h := h xor (k-th byte of n) *) - h := logxor !h (logand (shift_right n (k * 8)) 0xffL) - done; - logand !h max_int - -let[@inline] hash (n : t) : int = to_int (hash_to_int64 n) land Stdlib.max_int +external hash : (int64[@unboxed]) -> (int[@untagged]) + = "caml_cc_hash_int64_byte" "caml_cc_hash_int64" [@@noalloc] (* see {!CCInt.popcount} for more details *) let[@inline] popcount (b : t) : int = diff --git a/src/core/cc_stubs.c b/src/core/cc_stubs.c index 9c4426ea..af8ee084 100644 --- a/src/core/cc_stubs.c +++ b/src/core/cc_stubs.c @@ -1,3 +1,4 @@ +#include #include #include @@ -51,3 +52,16 @@ CAMLprim intnat caml_cc_hash_int64(int64_t n) { CAMLprim value caml_cc_hash_int64_byte(value v_n) { return Val_long(caml_cc_hash_int64(Int64_val(v_n))); } + +/* --- int64 -> int64 entry points (for hash_to_int64) --- */ + +/* native: unboxed int64 in, unboxed int64 out. + Masks to non-negative int64 (matches OCaml's Int64.max_int). */ +CAMLprim int64_t caml_cc_hash_int64_to_int64(int64_t n) { + return cc_fnv_hash_int64(n) & INT64_MAX; +} + +/* bytecode: boxed int64 in, boxed int64 out */ +CAMLprim value caml_cc_hash_int64_to_int64_byte(value v_n) { + return caml_copy_int64(cc_fnv_hash_int64(Int64_val(v_n)) & INT64_MAX); +} diff --git a/tests/core/t_int64.ml b/tests/core/t_int64.ml index 72078a15..64d27bd9 100644 --- a/tests/core/t_int64.ml +++ b/tests/core/t_int64.ml @@ -101,3 +101,40 @@ eq' 63 (popcount max_int);; eq' 1 (popcount min_int);; eq' 10 (popcount 0b1110010110110001010L);; eq' 5 (popcount 0b1101110000000000L) + +(* hash tests *) +let ( >= ) = Stdlib.( >= );; +let ( = ) = Stdlib.( = );; +let ( <> ) = Stdlib.( <> );; + +(* hash is non-negative *) +t @@ fun () -> hash 0L >= 0;; +t @@ fun () -> hash 1L >= 0;; +t @@ fun () -> hash (-1L) >= 0;; +t @@ fun () -> hash max_int >= 0;; +t @@ fun () -> hash min_int >= 0;; + +(* hash_to_int64 is non-negative *) +t @@ fun () -> CCInt64.compare (hash_to_int64 0L) 0L >= 0;; +t @@ fun () -> CCInt64.compare (hash_to_int64 (-1L)) 0L >= 0;; +t @@ fun () -> CCInt64.compare (hash_to_int64 min_int) 0L >= 0;; + +(* hash is consistent with hash_to_int64 *) +t @@ fun () -> + hash 42L = Stdlib.(Int64.to_int (hash_to_int64 42L) land max_int);; +t @@ fun () -> + hash (-1L) = Stdlib.(Int64.to_int (hash_to_int64 (-1L)) land max_int);; + +(* different inputs produce different hashes *) +t @@ fun () -> hash 0L <> hash 1L;; +t @@ fun () -> hash 1L <> hash 2L;; +t @@ fun () -> hash 1L <> hash (-1L);; +t @@ fun () -> hash_to_int64 0L <> hash_to_int64 1L;; + +(* deterministic *) +t @@ fun () -> hash 123L = hash 123L;; +t @@ fun () -> hash_to_int64 123L = hash_to_int64 123L;; + +(* quickcheck: hash is always non-negative *) +q Q.(map Int64.of_int int) (fun n -> hash n >= 0);; +q Q.(map Int64.of_int int) (fun n -> CCInt64.compare (hash_to_int64 n) 0L >= 0)