revamp xxhash API

This commit is contained in:
Simon Cruanes 2026-03-14 03:34:24 +00:00
parent 418e0fbf7f
commit e52574c5fb
3 changed files with 130 additions and 88 deletions

View file

@ -1,22 +1,37 @@
external hash_string_aux : string -> (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_string_byte" "caml_cc_xxhash_string"
type state = int64
let seed : state = 0L
external mix_int64 : (state[@unboxed]) -> (int64[@unboxed]) -> (state[@unboxed])
= "caml_cc_xxhash_mix_int64_byte" "caml_cc_xxhash_mix_int64"
[@@noalloc]
let[@inline] hash_string ?(seed = 0L) s = hash_string_aux s seed
external hash_int64 :
(int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_int64_byte" "caml_cc_xxhash_int64"
external mix_int : (state[@unboxed]) -> (int[@untagged]) -> (state[@unboxed])
= "caml_cc_xxhash_mix_int_byte" "caml_cc_xxhash_mix_int"
[@@noalloc]
external hash_int : (int[@untagged]) -> (int[@untagged]) -> (int[@untagged])
= "caml_cc_xxhash_int_byte" "caml_cc_xxhash_int"
external mix_int32 : (state[@unboxed]) -> (int32[@unboxed]) -> (state[@unboxed])
= "caml_cc_xxhash_mix_int32_byte" "caml_cc_xxhash_mix_int32"
[@@noalloc]
external mix64 : (int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_mix64_byte" "caml_cc_xxhash_mix64"
let[@inline] mix_bool h b = mix_int h (Bool.to_int b)
let[@inline] mix_char h c = mix_int h (Char.code c)
let[@inline] mix_float h f = mix_int64 h (Int64.bits_of_float f)
external mix_string_aux : (state[@unboxed]) -> string -> (state[@unboxed])
= "caml_cc_xxhash_mix_string_byte" "caml_cc_xxhash_mix_string"
[@@noalloc]
external finalize64 : (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_finalize64_byte" "caml_cc_xxhash_finalize64"
let[@inline] mix_string h s = mix_string_aux h s
external finalize : (state[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_finalize_byte" "caml_cc_xxhash_finalize"
[@@noalloc]
let[@inline] hash_string ?(seed = seed) s = finalize (mix_string seed s)
let[@inline] hash_int64 ?(seed = seed) v = finalize (mix_int64 seed v)
let[@inline] hash_int ?(seed = seed) v = finalize (mix_int seed v)
let[@inline] hash_int32 ?(seed = seed) v = finalize (mix_int32 seed v)
let[@inline] hash_bool ?(seed = seed) b = finalize (mix_bool seed b)
let[@inline] hash_char ?(seed = seed) c = finalize (mix_char seed c)
let[@inline] hash_float ?(seed = seed) f = finalize (mix_float seed f)

View file

@ -3,36 +3,76 @@
Fast non-cryptographic hash functions from
{{:https://github.com/Cyan4973/xxHash} xxHash}.
String hashing uses XXH3_64bits (modern, fastest).
Integer hashing delegates to the string hasher via a stack-allocated buffer.
The mixer and finalizer use the XXH64 primitive.
Hashing uses XXH64. To hash a single value use the [hash_foo] convenience
functions. To combine several values, fold with [mix_*] and call
{!finalize}:
{[
let h =
seed
|> fun h -> mix_string h "hello"
|> fun h -> mix_int h 42
|> finalize
]}
*)
val hash_string : ?seed:int64 -> string -> int64
(** [hash_string ?seed s] hashes string [s] with optional [seed] (default [0L])
using XXH3_64bits_withSeed. *)
type state = private int64
(** Accumulated hash state. Represented as [int64] so the compiler can unbox
it at call sites. *)
external hash_int64 :
(int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_int64_byte" "caml_cc_xxhash_int64"
[@@noalloc]
(** [hash_int64 v seed] hashes [v] with [seed] using XXH3_64bits_withSeed.
Noalloc and unboxed in native code. *)
val seed : state
(** Initial state. Equal to [0L]. *)
external hash_int : (int[@untagged]) -> (int[@untagged]) -> (int[@untagged])
= "caml_cc_xxhash_int_byte" "caml_cc_xxhash_int"
external mix_int64 : (state[@unboxed]) -> (int64[@unboxed]) -> (state[@unboxed])
= "caml_cc_xxhash_mix_int64_byte" "caml_cc_xxhash_mix_int64"
[@@noalloc]
(** [hash_int v seed] hashes [v] (an OCaml int) with [seed].
Noalloc and untagged in native code. *)
(** Mix an [int64] value into the state. Noalloc and unboxed in native code. *)
external mix64 : (int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_mix64_byte" "caml_cc_xxhash_mix64"
external mix_int : (state[@unboxed]) -> (int[@untagged]) -> (state[@unboxed])
= "caml_cc_xxhash_mix_int_byte" "caml_cc_xxhash_mix_int"
[@@noalloc]
(** [mix64 a b] mixes two int64 values using XXH64: [XXH64(&a, 8, b)].
Suitable for combining hash values. Noalloc and unboxed in native code. *)
(** Mix an [int] value into the state. Noalloc and untagged in native code. *)
external finalize64 : (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_finalize64_byte" "caml_cc_xxhash_finalize64"
external mix_int32 : (state[@unboxed]) -> (int32[@unboxed]) -> (state[@unboxed])
= "caml_cc_xxhash_mix_int32_byte" "caml_cc_xxhash_mix_int32"
[@@noalloc]
(** [finalize64 h] finalizes/avalanches a hash value using XXH64: [XXH64(&h, 8, 0)].
Noalloc and unboxed in native code. *)
(** Mix an [int32] value into the state. Noalloc and unboxed in native code. *)
val mix_bool : state -> bool -> state
(** Mix a [bool] into the state. *)
val mix_char : state -> char -> state
(** Mix a [char] into the state. *)
val mix_float : state -> float -> state
(** Mix a [float] into the state via [Int64.bits_of_float]. *)
val mix_string : state -> string -> state
(** Mix a [string] into the state using XXH64. *)
external finalize : (state[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_finalize_byte" "caml_cc_xxhash_finalize"
[@@noalloc]
(** Finalise the accumulated state into a 64-bit hash. Noalloc and unboxed in
native code. *)
val hash_string : ?seed:state -> string -> int64
(** [hash_string ?seed s] is [finalize (mix_string seed s)]. *)
val hash_int64 : ?seed:state -> int64 -> int64
(** [hash_int64 ?seed v] is [finalize (mix_int64 seed v)]. *)
val hash_int : ?seed:state -> int -> int64
(** [hash_int ?seed v] is [finalize (mix_int seed v)]. *)
val hash_int32 : ?seed:state -> int32 -> int64
(** [hash_int32 ?seed v] is [finalize (mix_int32 seed v)]. *)
val hash_bool : ?seed:state -> bool -> int64
(** [hash_bool ?seed b] is [finalize (mix_bool seed b)]. *)
val hash_char : ?seed:state -> char -> int64
(** [hash_char ?seed c] is [finalize (mix_char seed c)]. *)
val hash_float : ?seed:state -> float -> int64
(** [hash_float ?seed f] is [finalize (mix_float seed f)]. *)

View file

@ -8,68 +8,55 @@
#include <caml/mlvalues.h>
#include <stdint.h>
/* hash_string: native signature: (value, int64_t) -> int64_t
string is passed as OCaml value (can't be unboxed), seed is unboxed int64 */
CAMLprim int64_t caml_cc_xxhash_string(value v_s, int64_t seed) {
const char *s = String_val(v_s);
size_t len = caml_string_length(v_s);
return (int64_t)XXH64(s, len, (XXH64_hash_t)seed);
/* mix_int64: (int64_t state, int64_t value) -> int64_t */
CAMLprim int64_t caml_cc_xxhash_mix_int64(int64_t state, int64_t value) {
return (int64_t)XXH64(&value, sizeof(value), (XXH64_hash_t)state);
}
CAMLprim value caml_cc_xxhash_string_byte(value v_s, value v_seed) {
CAMLparam2(v_s, v_seed);
int64_t seed = Int64_val(v_seed);
const char *s = String_val(v_s);
size_t len = caml_string_length(v_s);
int64_t result = (int64_t)XXH64(s, len, (XXH64_hash_t)seed);
CAMLprim value caml_cc_xxhash_mix_int64_byte(value v_state, value v_value) {
CAMLparam2(v_state, v_value);
int64_t result = caml_cc_xxhash_mix_int64(Int64_val(v_state), Int64_val(v_value));
CAMLreturn(caml_copy_int64(result));
}
/* hash_int64: unboxed (int64_t, int64_t) -> int64_t */
CAMLprim int64_t caml_cc_xxhash_int64(int64_t v, int64_t seed) {
return (int64_t)XXH64(&v, sizeof(v), (XXH64_hash_t)seed);
/* mix_int: (int64_t state, intnat value) -> int64_t */
CAMLprim int64_t caml_cc_xxhash_mix_int(int64_t state, intnat value) {
int64_t v = (int64_t)value;
return (int64_t)XXH64(&v, sizeof(v), (XXH64_hash_t)state);
}
CAMLprim value caml_cc_xxhash_int64_byte(value v_v, value v_seed) {
CAMLparam2(v_v, v_seed);
int64_t v = Int64_val(v_v);
int64_t seed = Int64_val(v_seed);
int64_t result = caml_cc_xxhash_int64(v, seed);
CAMLprim value caml_cc_xxhash_mix_int_byte(value v_state, value v_value) {
CAMLparam2(v_state, v_value);
int64_t result = caml_cc_xxhash_mix_int(Int64_val(v_state), Long_val(v_value));
CAMLreturn(caml_copy_int64(result));
}
/* hash_int: untagged (intnat, intnat) -> intnat */
CAMLprim intnat caml_cc_xxhash_int(intnat v, intnat seed) {
int64_t v64 = (int64_t)v;
int64_t seed64 = (int64_t)seed;
return (intnat)caml_cc_xxhash_int64(v64, seed64);
/* mix_int32: (int64_t state, int32_t value) -> int64_t */
CAMLprim int64_t caml_cc_xxhash_mix_int32(int64_t state, int32_t value) {
int64_t v = (int64_t)value;
return (int64_t)XXH64(&v, sizeof(v), (XXH64_hash_t)state);
}
CAMLprim value caml_cc_xxhash_mix_int32_byte(value v_state, value v_value) {
CAMLparam2(v_state, v_value);
int64_t result = caml_cc_xxhash_mix_int32(Int64_val(v_state), Int32_val(v_value));
CAMLreturn(caml_copy_int64(result));
}
CAMLprim value caml_cc_xxhash_int_byte(value v_v, value v_seed) {
intnat v = Long_val(v_v);
intnat seed = Long_val(v_seed);
return Val_long(caml_cc_xxhash_int(v, seed));
/* mix_string: native signature: (int64_t state, value string) -> int64_t */
CAMLprim int64_t caml_cc_xxhash_mix_string(int64_t state, value v_s) {
const char *s = String_val(v_s);
size_t len = caml_string_length(v_s);
return (int64_t)XXH64(s, len, (XXH64_hash_t)state);
}
CAMLprim value caml_cc_xxhash_mix_string_byte(value v_state, value v_s) {
CAMLparam2(v_state, v_s);
int64_t result = caml_cc_xxhash_mix_string(Int64_val(v_state), v_s);
CAMLreturn(caml_copy_int64(result));
}
/* mix64: unboxed (int64_t, int64_t) -> int64_t [uses XXH64] */
CAMLprim int64_t caml_cc_xxhash_mix64(int64_t a, int64_t b) {
return (int64_t)XXH64(&a, sizeof(a), (XXH64_hash_t)b);
/* finalize: int64_t state -> int64_t */
CAMLprim int64_t caml_cc_xxhash_finalize(int64_t state) {
return (int64_t)XXH64(&state, sizeof(state), 0);
}
CAMLprim value caml_cc_xxhash_mix64_byte(value v_a, value v_b) {
CAMLparam2(v_a, v_b);
int64_t a = Int64_val(v_a);
int64_t b = Int64_val(v_b);
CAMLreturn(caml_copy_int64(caml_cc_xxhash_mix64(a, b)));
}
/* finalize64: unboxed int64_t -> int64_t [uses XXH64 with seed=0] */
CAMLprim int64_t caml_cc_xxhash_finalize64(int64_t h) {
return (int64_t)XXH64(&h, sizeof(h), 0);
}
CAMLprim value caml_cc_xxhash_finalize64_byte(value v_h) {
CAMLparam1(v_h);
int64_t h = Int64_val(v_h);
CAMLreturn(caml_copy_int64(caml_cc_xxhash_finalize64(h)));
CAMLprim value caml_cc_xxhash_finalize_byte(value v_state) {
CAMLparam1(v_state);
CAMLreturn(caml_copy_int64(caml_cc_xxhash_finalize(Int64_val(v_state))));
}