mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2026-05-05 08:54:22 -04:00
hash mixer and combiner in C
This commit is contained in:
parent
a20eddfdd3
commit
7fdee4a17e
9 changed files with 351 additions and 139 deletions
|
|
@ -7,130 +7,125 @@ type 'a t = 'a -> hash
|
|||
type 'a iter = ('a -> unit) -> unit
|
||||
type 'a gen = unit -> 'a option
|
||||
|
||||
(* FNV hashing
|
||||
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
|
||||
*)
|
||||
let fnv_offset_basis = 0xcbf29ce484222325L
|
||||
let fnv_prime = 0x100000001b3L
|
||||
(** {2 Full-strength int64 API} *)
|
||||
|
||||
(* hash an integer *)
|
||||
let hash_int_ n =
|
||||
let h = ref fnv_offset_basis in
|
||||
for k = 0 to 7 do
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
|
||||
done;
|
||||
(* truncate back to int and remove sign *)
|
||||
Int64.to_int !h land max_int
|
||||
let seed : int64 = Hash_impl_.seed
|
||||
|
||||
let combine2 a b =
|
||||
let h = ref fnv_offset_basis in
|
||||
(* we only do one loop, where we mix bytes of [a] and [b], so as
|
||||
to simplify control flow *)
|
||||
for k = 0 to 7 do
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff)))
|
||||
done;
|
||||
Int64.to_int !h land max_int
|
||||
let[@inline] combine64 (s : int64) (c : int64) : int64 = Hash_impl_.combine_i64 s c
|
||||
|
||||
let[@inline] combine f s x = combine2 s (f x)
|
||||
let[@inline] finalize (s : int64) : int = Hash_impl_.finalize s
|
||||
|
||||
let[@inline] finalize_i64 (s : int64) : int64 = Hash_impl_.fmix64 s
|
||||
|
||||
(** {2 Deprecated int-state combinators} *)
|
||||
|
||||
let[@inline] combine2 a b =
|
||||
Hash_impl_.(finalize (combine_i64 (Int64.of_int a) (Int64.of_int b)))
|
||||
|
||||
let[@inline] combine f s x =
|
||||
Hash_impl_.(finalize (combine_i64 (Int64.of_int s) (Int64.of_int (f x))))
|
||||
|
||||
let combine3 a b c =
|
||||
let h = ref fnv_offset_basis in
|
||||
(* we only do one loop, where we mix bytes of [a] [b] and [c], so as
|
||||
to simplify control flow *)
|
||||
for k = 0 to 7 do
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
(h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff)))
|
||||
done;
|
||||
Int64.to_int !h land max_int
|
||||
Hash_impl_.(
|
||||
let s = combine_i64 (Int64.of_int a) (Int64.of_int b) in
|
||||
finalize (combine_i64 s (Int64.of_int c)))
|
||||
|
||||
let combine4 a b c d =
|
||||
let h = ref fnv_offset_basis in
|
||||
for k = 0 to 7 do
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
(h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
(h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff))));
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
h := Int64.(logxor !h (of_int ((d lsr (k * 8)) land 0xff)))
|
||||
done;
|
||||
Int64.to_int !h land max_int
|
||||
Hash_impl_.(
|
||||
let s = combine_i64 (Int64.of_int a) (Int64.of_int b) in
|
||||
let s = combine_i64 s (Int64.of_int c) in
|
||||
finalize (combine_i64 s (Int64.of_int d)))
|
||||
|
||||
let combine5 a b c d e = combine3 a b (combine3 c d e)
|
||||
let combine6 a b c d e f = combine4 a b c (combine3 d e f)
|
||||
let combine5 a b c d e =
|
||||
Hash_impl_.(
|
||||
let s = combine_i64 (Int64.of_int a) (Int64.of_int b) in
|
||||
let s = combine_i64 s (Int64.of_int c) in
|
||||
let s = combine_i64 s (Int64.of_int d) in
|
||||
finalize (combine_i64 s (Int64.of_int e)))
|
||||
|
||||
(** {2 Combinators} *)
|
||||
let combine6 a b c d e f =
|
||||
Hash_impl_.(
|
||||
let s = combine_i64 (Int64.of_int a) (Int64.of_int b) in
|
||||
let s = combine_i64 s (Int64.of_int c) in
|
||||
let s = combine_i64 s (Int64.of_int d) in
|
||||
let s = combine_i64 s (Int64.of_int e) in
|
||||
finalize (combine_i64 s (Int64.of_int f)))
|
||||
|
||||
(** {2 Primitive hashers} *)
|
||||
|
||||
let const h _ = h
|
||||
let const0 _ = 0
|
||||
let int = hash_int_
|
||||
|
||||
let int n = Hash_impl_.(finalize (combine_i64 seed (Int64.of_int n)))
|
||||
|
||||
let bool b =
|
||||
hash_int_
|
||||
int
|
||||
(if b then
|
||||
1
|
||||
else
|
||||
2)
|
||||
|
||||
let char x = hash_int_ (Char.code x)
|
||||
let char x = Hash_impl_.(finalize (combine_char seed (Char.code x)))
|
||||
|
||||
(* hash an integer *)
|
||||
let int64 n : int =
|
||||
let h = ref fnv_offset_basis in
|
||||
for k = 0 to 7 do
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
h := Int64.(logxor !h (logand (shift_right_logical n (k * 8)) 0xffL))
|
||||
done;
|
||||
(* truncate back to int and remove sign *)
|
||||
Int64.to_int !h land max_int
|
||||
let int64 (n : int64) : int = Hash_impl_.(finalize (combine_i64 seed n))
|
||||
|
||||
let int32 (x : int32) : int = Hash_impl_.(finalize (combine_i32 seed x))
|
||||
|
||||
let int32 (x : int32) = int64 (Int64.of_int32 x)
|
||||
let nativeint (x : nativeint) = int64 (Int64.of_nativeint x)
|
||||
|
||||
(* do not hash more than 128 bytes in strings/bytes *)
|
||||
let max_len_b_ = 128
|
||||
|
||||
let bytes (x : bytes) =
|
||||
let h = ref fnv_offset_basis in
|
||||
for i = 0 to min max_len_b_ (Bytes.length x - 1) do
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
let byte = Char.code (Bytes.unsafe_get x i) in
|
||||
h := Int64.(logxor !h (of_int byte))
|
||||
done;
|
||||
Int64.to_int !h land max_int
|
||||
Hash_impl_.(finalize (combine_string seed (Bytes.unsafe_to_string x)))
|
||||
|
||||
let string (x : string) = bytes (Bytes.unsafe_of_string x)
|
||||
let string (x : string) = Hash_impl_.(finalize (combine_string seed x))
|
||||
|
||||
let slice x i len =
|
||||
let j = i + len in
|
||||
let rec aux i s =
|
||||
if i = j then
|
||||
s
|
||||
let rec aux k s =
|
||||
if k = j then
|
||||
Hash_impl_.finalize s
|
||||
else
|
||||
aux (i + 1) (combine2 (Char.code x.[i]) s)
|
||||
aux (k + 1)
|
||||
(Hash_impl_.combine_char s (Char.code (String.unsafe_get x k)))
|
||||
in
|
||||
aux i 0
|
||||
aux i Hash_impl_.seed
|
||||
|
||||
let opt f = function
|
||||
| None -> 42
|
||||
| Some x -> combine2 43 (f x)
|
||||
| Some x ->
|
||||
Hash_impl_.(finalize (combine_i64 (combine_i64 seed 43L) (Int64.of_int (f x))))
|
||||
|
||||
let list f l = List.fold_left (combine f) 0x42 l
|
||||
let array f l = Array.fold_left (combine f) 0x42 l
|
||||
let pair f g (x, y) = combine2 (f x) (g y)
|
||||
let triple f g h (x, y, z) = combine2 (combine2 (f x) (g y)) (h z)
|
||||
let list f l =
|
||||
let s =
|
||||
List.fold_left
|
||||
(fun s x -> Hash_impl_.combine_i64 s (Int64.of_int (f x)))
|
||||
Hash_impl_.seed l
|
||||
in
|
||||
Hash_impl_.finalize s
|
||||
|
||||
let array f a =
|
||||
let s =
|
||||
Array.fold_left
|
||||
(fun s x -> Hash_impl_.combine_i64 s (Int64.of_int (f x)))
|
||||
Hash_impl_.seed a
|
||||
in
|
||||
Hash_impl_.finalize s
|
||||
|
||||
let pair f g (x, y) =
|
||||
Hash_impl_.(
|
||||
finalize (combine_i64 (combine_i64 seed (Int64.of_int (f x))) (Int64.of_int (g y))))
|
||||
|
||||
let triple f g h (x, y, z) =
|
||||
Hash_impl_.(
|
||||
let s = combine_i64 seed (Int64.of_int (f x)) in
|
||||
let s = combine_i64 s (Int64.of_int (g y)) in
|
||||
finalize (combine_i64 s (Int64.of_int (h z))))
|
||||
|
||||
let quad f g h i (x, y, z, w) =
|
||||
combine2 (combine2 (f x) (g y)) (combine2 (h z) (i w))
|
||||
Hash_impl_.(
|
||||
let s = combine_i64 seed (Int64.of_int (f x)) in
|
||||
let s = combine_i64 s (Int64.of_int (g y)) in
|
||||
let s = combine_i64 s (Int64.of_int (h z)) in
|
||||
finalize (combine_i64 s (Int64.of_int (i w))))
|
||||
|
||||
let map f h x = h (f x)
|
||||
|
||||
|
|
@ -144,8 +139,12 @@ let poly x = Hashtbl.hash x
|
|||
|
||||
let array_of_hashes_ arr =
|
||||
Array.sort CCInt.compare arr;
|
||||
(* sort the hashes, so their order does not matter *)
|
||||
Array.fold_left combine2 0x42 arr
|
||||
let s =
|
||||
Array.fold_left
|
||||
(fun s h -> Hash_impl_.combine_i64 s (Int64.of_int h))
|
||||
Hash_impl_.seed arr
|
||||
in
|
||||
Hash_impl_.finalize s
|
||||
|
||||
let array_comm f a =
|
||||
let arr = Array.init (Array.length a) (fun i -> f a.(i)) in
|
||||
|
|
@ -157,19 +156,19 @@ let list_comm f l =
|
|||
array_of_hashes_ arr
|
||||
|
||||
let iter f seq =
|
||||
let h = ref 0x43 in
|
||||
seq (fun x -> h := combine f !h x);
|
||||
!h
|
||||
let s = ref Hash_impl_.seed in
|
||||
seq (fun x -> s := Hash_impl_.combine_i64 !s (Int64.of_int (f x)));
|
||||
Hash_impl_.finalize !s
|
||||
|
||||
let seq f seq =
|
||||
let h = ref 0x43 in
|
||||
Seq.iter (fun x -> h := combine f !h x) seq;
|
||||
!h
|
||||
let seq f sq =
|
||||
let s = ref Hash_impl_.seed in
|
||||
Seq.iter (fun x -> s := Hash_impl_.combine_i64 !s (Int64.of_int (f x))) sq;
|
||||
Hash_impl_.finalize !s
|
||||
|
||||
let gen f g =
|
||||
let rec aux s =
|
||||
match g () with
|
||||
| None -> s
|
||||
| Some x -> aux (combine2 s (f x))
|
||||
| None -> Hash_impl_.finalize s
|
||||
| Some x -> aux (Hash_impl_.combine_i64 s (Int64.of_int (f x)))
|
||||
in
|
||||
aux 0x42
|
||||
aux Hash_impl_.seed
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
(* This file is free software, part of containers. See file "license" for more details. *)
|
||||
|
||||
(** Hash combinators
|
||||
|
||||
The API of this module is stable as per semantic versioning, like the
|
||||
|
|
@ -7,8 +5,20 @@
|
|||
can change and should not be relied on (i.e. hashing a value always
|
||||
returns the same integer {b within a run of a program}, not
|
||||
across versions of OCaml and Containers).
|
||||
|
||||
{b Implementation}: xorshift+multiply combiner with fmix64 (Murmur3) finalizer,
|
||||
via C stubs. Unboxed in native code, boxed in bytecode.
|
||||
*)
|
||||
|
||||
(* TODO: for 4.xx:
|
||||
|
||||
{[type state = int64
|
||||
val seed : state
|
||||
type 'a t = state -> 'a -> state
|
||||
val finalize : state -> int64
|
||||
]}
|
||||
*)
|
||||
|
||||
(** {2 Definitions} *)
|
||||
|
||||
type hash = int
|
||||
|
|
@ -34,8 +44,7 @@ val int64 : int64 t
|
|||
val nativeint : nativeint t
|
||||
|
||||
val slice : string -> int -> int t
|
||||
(** [slice s i len state] hashes the slice [i, …, i+len-1] of [s]
|
||||
into [state]. *)
|
||||
(** [slice s i len] hashes the slice [s[i .. i+len-1]]. *)
|
||||
|
||||
val bytes : bytes t
|
||||
(** Hash a byte array.
|
||||
|
|
@ -79,17 +88,47 @@ val array_comm : 'a t -> 'a array t
|
|||
will have the same hash.
|
||||
@since 1.0 *)
|
||||
|
||||
(** {2 Base hash combinators} *)
|
||||
(** {2 Full-strength int64 API} *)
|
||||
|
||||
val seed : int64
|
||||
(** Initial hash state. *)
|
||||
|
||||
val combine64 : int64 -> int64 -> int64
|
||||
(** [combine64 state chunk] mixes [chunk] into [state] using the
|
||||
xorshift+multiply combiner. Suitable for building streaming hashers
|
||||
with full 64-bit state. Finalize with {!finalize} or {!finalize_i64}. *)
|
||||
|
||||
val finalize : int64 -> int
|
||||
(** [finalize state] applies fmix64 (Murmur3 finalizer) and returns a
|
||||
non-negative [int] (strips sign bit). *)
|
||||
|
||||
val finalize_i64 : int64 -> int64
|
||||
(** [finalize_i64 state] applies fmix64 and returns the full 64-bit result.
|
||||
The result may be negative as a signed [int64]. *)
|
||||
|
||||
(** {2 Deprecated int-state combinators}
|
||||
|
||||
These thread state as [int] (63 bits on 64-bit systems), which is lossy.
|
||||
Prefer building a pipeline with {!seed}, {!combine64}, and {!finalize}. *)
|
||||
|
||||
val combine : 'a t -> hash -> 'a -> hash
|
||||
[@@deprecated "lossy (63-bit state); use combine64 with int64 state"]
|
||||
|
||||
val combine2 : hash -> hash -> hash
|
||||
[@@deprecated "lossy (63-bit state); use combine64 with int64 state"]
|
||||
|
||||
val combine3 : hash -> hash -> hash -> hash
|
||||
[@@deprecated "lossy (63-bit state); use combine64 with int64 state"]
|
||||
|
||||
val combine4 : hash -> hash -> hash -> hash -> hash
|
||||
[@@deprecated "lossy (63-bit state); use combine64 with int64 state"]
|
||||
|
||||
val combine5 : hash -> hash -> hash -> hash -> hash -> hash
|
||||
[@@deprecated "lossy (63-bit state); use combine64 with int64 state"]
|
||||
(** @since 2.1 *)
|
||||
|
||||
val combine6 : hash -> hash -> hash -> hash -> hash -> hash -> hash
|
||||
[@@deprecated "lossy (63-bit state); use combine64 with int64 state"]
|
||||
(** @since 2.1 *)
|
||||
|
||||
(** {2 Iterators} *)
|
||||
|
|
|
|||
|
|
@ -4,20 +4,8 @@ include Int
|
|||
|
||||
type 'a iter = ('a -> unit) -> unit
|
||||
|
||||
(* use FNV:
|
||||
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *)
|
||||
let hash (n : int) : int =
|
||||
let offset_basis = 0xcbf29ce484222325L in
|
||||
let prime = 0x100000001b3L in
|
||||
|
||||
let h = ref offset_basis in
|
||||
for k = 0 to 7 do
|
||||
(h := Int64.(mul !h prime));
|
||||
(* h := h xor (k-th byte of n) *)
|
||||
h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
|
||||
done;
|
||||
(* truncate back to int and remove sign *)
|
||||
Int64.to_int !h land max_int
|
||||
Hash_impl_.(finalize (combine_i64 seed (Int64.of_int n)))
|
||||
|
||||
let range i j yield =
|
||||
let rec up i j yield =
|
||||
|
|
|
|||
|
|
@ -8,11 +8,7 @@ let min : t -> t -> t = Stdlib.min
|
|||
let max : t -> t -> t = Stdlib.max
|
||||
|
||||
[@@@endif]
|
||||
[@@@iflt 5.1]
|
||||
|
||||
let hash x = Stdlib.abs (to_int x)
|
||||
|
||||
[@@@endif]
|
||||
let hash (x : t) : int = Hash_impl_.(finalize (combine_i32 seed x))
|
||||
|
||||
let sign i = compare i zero
|
||||
|
||||
|
|
|
|||
|
|
@ -11,21 +11,9 @@ let max : t -> t -> t = Stdlib.max
|
|||
|
||||
let sign i = compare i zero
|
||||
|
||||
(* use FNV:
|
||||
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *)
|
||||
let hash_to_int64 (n : t) =
|
||||
let offset_basis = 0xcbf29ce484222325L in
|
||||
let prime = 0x100000001b3L in
|
||||
let hash_to_int64 (n : t) : t = Hash_impl_.(fmix64 (combine_i64 seed n))
|
||||
|
||||
let h = ref offset_basis in
|
||||
for k = 0 to 7 do
|
||||
h := mul !h prime;
|
||||
(* h := h xor (k-th byte of n) *)
|
||||
h := logxor !h (logand (shift_right n (k * 8)) 0xffL)
|
||||
done;
|
||||
logand !h max_int
|
||||
|
||||
let[@inline] hash (n : t) : int = to_int (hash_to_int64 n) land Stdlib.max_int
|
||||
let[@inline] hash (n : t) : int = Hash_impl_.(finalize (combine_i64 seed n))
|
||||
|
||||
(* see {!CCInt.popcount} for more details *)
|
||||
let[@inline] popcount (b : t) : int =
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ include String
|
|||
|
||||
let compare_int (a : int) b = Stdlib.compare a b
|
||||
let compare = String.compare
|
||||
let hash s = Hashtbl.hash s
|
||||
let hash (s : string) : int = Hash_impl_.(finalize (combine_string seed s))
|
||||
let length = String.length
|
||||
let is_empty s = equal s ""
|
||||
|
||||
|
|
|
|||
49
src/core/Hash_impl_.ml
Normal file
49
src/core/Hash_impl_.ml
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
(* This file is free software, part of containers. See file "license" for more details. *)
|
||||
|
||||
(** Internal hash implementation.
|
||||
|
||||
Combiner: [state ^= chunk; state ^= state >> 32; state *= 0xd6e8feb86659fd93]
|
||||
Finalizer: fmix64 (Murmur3).
|
||||
|
||||
Multiplicative constant 0xd6e8feb86659fd93 (rrmxmx family, Pelle Evensen, 2018):
|
||||
https://mostlymangling.blogspot.com/2018/07/on-mixing-functions-in-fast-hashing.html
|
||||
Also evaluated in Chris Wellons' hash-prospector:
|
||||
https://github.com/skeeto/hash-prospector
|
||||
|
||||
fmix64 constants (Murmur3, Austin Appleby):
|
||||
https://github.com/aappleby/smhasher
|
||||
|
||||
Not part of the public API; use {!CCHash} instead. *)
|
||||
|
||||
(** Initial hash state (golden-ratio constant). *)
|
||||
let seed : int64 = 0x9e3779b97f4a7c15L
|
||||
|
||||
external combine_i64 : (int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_hash_combine_i64_byte" "caml_cc_hash_combine_i64"
|
||||
[@@noalloc]
|
||||
(** [combine_i64 state chunk] mixes [chunk] into [state]. *)
|
||||
|
||||
external combine_i32 : (int64[@unboxed]) -> (int32[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_hash_combine_i32_byte" "caml_cc_hash_combine_i32"
|
||||
[@@noalloc]
|
||||
(** [combine_i32 state chunk] mixes [chunk] into [state]. *)
|
||||
|
||||
external combine_char : (int64[@unboxed]) -> (int[@untagged]) -> (int64[@unboxed])
|
||||
= "caml_cc_hash_combine_char_byte" "caml_cc_hash_combine_char"
|
||||
[@@noalloc]
|
||||
(** [combine_char state c] mixes character code [c] into [state]. *)
|
||||
|
||||
external combine_string : (int64[@unboxed]) -> string -> (int64[@unboxed])
|
||||
= "caml_cc_hash_combine_string_byte" "caml_cc_hash_combine_string"
|
||||
[@@noalloc]
|
||||
(** [combine_string state s] mixes all bytes of [s] into [state] in 8-byte chunks. *)
|
||||
|
||||
external fmix64 : (int64[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_hash_fmix64_byte" "caml_cc_hash_fmix64"
|
||||
[@@noalloc]
|
||||
(** [fmix64 state] applies the Murmur3 finalizer. Result may be negative. *)
|
||||
|
||||
external finalize : (int64[@unboxed]) -> (int[@untagged])
|
||||
= "caml_cc_hash_finalize_byte" "caml_cc_hash_finalize"
|
||||
[@@noalloc]
|
||||
(** [finalize state] applies fmix64 and returns a non-negative [int]. *)
|
||||
|
|
@ -6,7 +6,12 @@
|
|||
(action
|
||||
(run %{project_root}/src/core/cpp/cpp.exe %{input-file})))
|
||||
(flags :standard -nolabels -open CCMonomorphic)
|
||||
(libraries either containers.monomorphic containers.domain))
|
||||
(libraries either containers.monomorphic containers.domain)
|
||||
(private_modules Hash_impl_)
|
||||
(foreign_stubs
|
||||
(language c)
|
||||
(flags :standard -O2)
|
||||
(names hash_stubs)))
|
||||
|
||||
(ocamllex
|
||||
(modules CCSexp_lex))
|
||||
|
|
|
|||
148
src/core/hash_stubs.c
Normal file
148
src/core/hash_stubs.c
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
/* This file is free software, part of containers. See file "license" for more details. */
|
||||
|
||||
/* Hash implementation: xorshift+multiply combiner with fmix64 finalizer.
|
||||
Combiner: state ^= chunk; state ^= state >> 32; state *= 0xd6e8feb86659fd93
|
||||
Finalizer (fmix64, Murmur3): three rounds of xorshift-multiply.
|
||||
|
||||
Multiplicative constant 0xd6e8feb86659fd93 (rrmxmx family, Pelle Evensen, 2018):
|
||||
https://mostlymangling.blogspot.com/2018/07/on-mixing-functions-in-fast-hashing.html
|
||||
Also evaluated in Chris Wellons' hash-prospector:
|
||||
https://github.com/skeeto/hash-prospector
|
||||
|
||||
fmix64 constants 0xff51afd7ed558ccd / 0xc4ceb9fe1a85ec53 (Murmur3, Austin Appleby):
|
||||
https://github.com/aappleby/smhasher
|
||||
*/
|
||||
|
||||
#include <caml/mlvalues.h>
|
||||
#include <caml/alloc.h>
|
||||
#include <caml/memory.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#define HASH_MUL UINT64_C(0xd6e8feb86659fd93)
|
||||
#define FMIX_C1 UINT64_C(0xff51afd7ed558ccd)
|
||||
#define FMIX_C2 UINT64_C(0xc4ceb9fe1a85ec53)
|
||||
|
||||
static inline uint64_t hash_combine(uint64_t state, uint64_t chunk)
|
||||
{
|
||||
state ^= chunk;
|
||||
state ^= state >> 32;
|
||||
state *= HASH_MUL;
|
||||
return state;
|
||||
}
|
||||
|
||||
static inline uint64_t fmix64(uint64_t h)
|
||||
{
|
||||
h ^= h >> 33;
|
||||
h *= FMIX_C1;
|
||||
h ^= h >> 33;
|
||||
h *= FMIX_C2;
|
||||
h ^= h >> 33;
|
||||
return h;
|
||||
}
|
||||
|
||||
/* --- combine_i64 --------------------------------------------------------- */
|
||||
|
||||
CAMLprim int64_t caml_cc_hash_combine_i64(int64_t state, int64_t chunk)
|
||||
{
|
||||
return (int64_t)hash_combine((uint64_t)state, (uint64_t)chunk);
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_hash_combine_i64_byte(value v_state, value v_chunk)
|
||||
{
|
||||
CAMLparam2(v_state, v_chunk);
|
||||
uint64_t r = hash_combine((uint64_t)Int64_val(v_state),
|
||||
(uint64_t)Int64_val(v_chunk));
|
||||
CAMLreturn(caml_copy_int64((int64_t)r));
|
||||
}
|
||||
|
||||
/* --- combine_i32 --------------------------------------------------------- */
|
||||
|
||||
CAMLprim int64_t caml_cc_hash_combine_i32(int64_t state, int32_t chunk)
|
||||
{
|
||||
return (int64_t)hash_combine((uint64_t)state, (uint64_t)(uint32_t)chunk);
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_hash_combine_i32_byte(value v_state, value v_chunk)
|
||||
{
|
||||
CAMLparam2(v_state, v_chunk);
|
||||
uint64_t r = hash_combine((uint64_t)Int64_val(v_state),
|
||||
(uint64_t)(uint32_t)Int32_val(v_chunk));
|
||||
CAMLreturn(caml_copy_int64((int64_t)r));
|
||||
}
|
||||
|
||||
/* --- combine_char -------------------------------------------------------- */
|
||||
|
||||
/* c is passed as untagged int (Char.code) */
|
||||
CAMLprim int64_t caml_cc_hash_combine_char(int64_t state, intnat c)
|
||||
{
|
||||
return (int64_t)hash_combine((uint64_t)state, (uint64_t)(unsigned char)c);
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_hash_combine_char_byte(value v_state, value v_c)
|
||||
{
|
||||
CAMLparam2(v_state, v_c);
|
||||
uint64_t r = hash_combine((uint64_t)Int64_val(v_state),
|
||||
(uint64_t)(unsigned char)Long_val(v_c));
|
||||
CAMLreturn(caml_copy_int64((int64_t)r));
|
||||
}
|
||||
|
||||
/* --- combine_string ------------------------------------------------------ */
|
||||
|
||||
/* Hashes all bytes of [str] into [state] using 8-byte chunks where possible.
|
||||
[str] is a regular OCaml value; [state] is unboxed int64. */
|
||||
CAMLprim int64_t caml_cc_hash_combine_string(int64_t state, value str)
|
||||
{
|
||||
const char *data = String_val(str);
|
||||
mlsize_t len = caml_string_length(str);
|
||||
uint64_t s = (uint64_t)state;
|
||||
mlsize_t i = 0;
|
||||
|
||||
for (; i + 8 <= len; i += 8) {
|
||||
uint64_t chunk;
|
||||
memcpy(&chunk, data + i, 8);
|
||||
s = hash_combine(s, chunk);
|
||||
}
|
||||
if (i < len) {
|
||||
uint64_t chunk = 0;
|
||||
memcpy(&chunk, data + i, len - i);
|
||||
s = hash_combine(s, chunk);
|
||||
}
|
||||
return (int64_t)s;
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_hash_combine_string_byte(value v_state, value str)
|
||||
{
|
||||
CAMLparam2(v_state, str);
|
||||
int64_t r = caml_cc_hash_combine_string(Int64_val(v_state), str);
|
||||
CAMLreturn(caml_copy_int64(r));
|
||||
}
|
||||
|
||||
/* --- fmix64 -------------------------------------------------------------- */
|
||||
|
||||
/* Returns full 64-bit fmix64 result; may be "negative" as signed int64. */
|
||||
CAMLprim int64_t caml_cc_hash_fmix64(int64_t state)
|
||||
{
|
||||
return (int64_t)fmix64((uint64_t)state);
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_hash_fmix64_byte(value v_state)
|
||||
{
|
||||
CAMLparam1(v_state);
|
||||
CAMLreturn(caml_copy_int64((int64_t)fmix64((uint64_t)Int64_val(v_state))));
|
||||
}
|
||||
|
||||
/* --- finalize ------------------------------------------------------------ */
|
||||
|
||||
/* Applies fmix64 and masks to Max_long (positive OCaml int). */
|
||||
CAMLprim intnat caml_cc_hash_finalize(int64_t state)
|
||||
{
|
||||
return (intnat)(fmix64((uint64_t)state) & (uint64_t)Max_long);
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_hash_finalize_byte(value v_state)
|
||||
{
|
||||
CAMLparam1(v_state);
|
||||
intnat r = (intnat)(fmix64((uint64_t)Int64_val(v_state)) & (uint64_t)Max_long);
|
||||
CAMLreturn(Val_long(r));
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue