mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2026-05-05 17:04:25 -04:00
revamp xxhash API
This commit is contained in:
parent
418e0fbf7f
commit
e52574c5fb
3 changed files with 130 additions and 88 deletions
|
|
@ -1,22 +1,37 @@
|
||||||
external hash_string_aux : string -> (int64[@unboxed]) -> (int64[@unboxed])
|
type state = int64
|
||||||
= "caml_cc_xxhash_string_byte" "caml_cc_xxhash_string"
|
|
||||||
|
let seed : state = 0L
|
||||||
|
|
||||||
|
external mix_int64 : (state[@unboxed]) -> (int64[@unboxed]) -> (state[@unboxed])
|
||||||
|
= "caml_cc_xxhash_mix_int64_byte" "caml_cc_xxhash_mix_int64"
|
||||||
[@@noalloc]
|
[@@noalloc]
|
||||||
|
|
||||||
let[@inline] hash_string ?(seed = 0L) s = hash_string_aux s seed
|
external mix_int : (state[@unboxed]) -> (int[@untagged]) -> (state[@unboxed])
|
||||||
|
= "caml_cc_xxhash_mix_int_byte" "caml_cc_xxhash_mix_int"
|
||||||
external hash_int64 :
|
|
||||||
(int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
|
||||||
= "caml_cc_xxhash_int64_byte" "caml_cc_xxhash_int64"
|
|
||||||
[@@noalloc]
|
[@@noalloc]
|
||||||
|
|
||||||
external hash_int : (int[@untagged]) -> (int[@untagged]) -> (int[@untagged])
|
external mix_int32 : (state[@unboxed]) -> (int32[@unboxed]) -> (state[@unboxed])
|
||||||
= "caml_cc_xxhash_int_byte" "caml_cc_xxhash_int"
|
= "caml_cc_xxhash_mix_int32_byte" "caml_cc_xxhash_mix_int32"
|
||||||
[@@noalloc]
|
[@@noalloc]
|
||||||
|
|
||||||
external mix64 : (int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
let[@inline] mix_bool h b = mix_int h (Bool.to_int b)
|
||||||
= "caml_cc_xxhash_mix64_byte" "caml_cc_xxhash_mix64"
|
let[@inline] mix_char h c = mix_int h (Char.code c)
|
||||||
|
let[@inline] mix_float h f = mix_int64 h (Int64.bits_of_float f)
|
||||||
|
|
||||||
|
external mix_string_aux : (state[@unboxed]) -> string -> (state[@unboxed])
|
||||||
|
= "caml_cc_xxhash_mix_string_byte" "caml_cc_xxhash_mix_string"
|
||||||
[@@noalloc]
|
[@@noalloc]
|
||||||
|
|
||||||
external finalize64 : (int64[@unboxed]) -> (int64[@unboxed])
|
let[@inline] mix_string h s = mix_string_aux h s
|
||||||
= "caml_cc_xxhash_finalize64_byte" "caml_cc_xxhash_finalize64"
|
|
||||||
|
external finalize : (state[@unboxed]) -> (int64[@unboxed])
|
||||||
|
= "caml_cc_xxhash_finalize_byte" "caml_cc_xxhash_finalize"
|
||||||
[@@noalloc]
|
[@@noalloc]
|
||||||
|
|
||||||
|
let[@inline] hash_string ?(seed = seed) s = finalize (mix_string seed s)
|
||||||
|
let[@inline] hash_int64 ?(seed = seed) v = finalize (mix_int64 seed v)
|
||||||
|
let[@inline] hash_int ?(seed = seed) v = finalize (mix_int seed v)
|
||||||
|
let[@inline] hash_int32 ?(seed = seed) v = finalize (mix_int32 seed v)
|
||||||
|
let[@inline] hash_bool ?(seed = seed) b = finalize (mix_bool seed b)
|
||||||
|
let[@inline] hash_char ?(seed = seed) c = finalize (mix_char seed c)
|
||||||
|
let[@inline] hash_float ?(seed = seed) f = finalize (mix_float seed f)
|
||||||
|
|
|
||||||
|
|
@ -3,36 +3,76 @@
|
||||||
Fast non-cryptographic hash functions from
|
Fast non-cryptographic hash functions from
|
||||||
{{:https://github.com/Cyan4973/xxHash} xxHash}.
|
{{:https://github.com/Cyan4973/xxHash} xxHash}.
|
||||||
|
|
||||||
String hashing uses XXH3_64bits (modern, fastest).
|
Hashing uses XXH64. To hash a single value use the [hash_foo] convenience
|
||||||
Integer hashing delegates to the string hasher via a stack-allocated buffer.
|
functions. To combine several values, fold with [mix_*] and call
|
||||||
The mixer and finalizer use the XXH64 primitive.
|
{!finalize}:
|
||||||
|
|
||||||
|
{[
|
||||||
|
let h =
|
||||||
|
seed
|
||||||
|
|> fun h -> mix_string h "hello"
|
||||||
|
|> fun h -> mix_int h 42
|
||||||
|
|> finalize
|
||||||
|
]}
|
||||||
*)
|
*)
|
||||||
|
|
||||||
val hash_string : ?seed:int64 -> string -> int64
|
type state = private int64
|
||||||
(** [hash_string ?seed s] hashes string [s] with optional [seed] (default [0L])
|
(** Accumulated hash state. Represented as [int64] so the compiler can unbox
|
||||||
using XXH3_64bits_withSeed. *)
|
it at call sites. *)
|
||||||
|
|
||||||
external hash_int64 :
|
val seed : state
|
||||||
(int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
(** Initial state. Equal to [0L]. *)
|
||||||
= "caml_cc_xxhash_int64_byte" "caml_cc_xxhash_int64"
|
|
||||||
[@@noalloc]
|
|
||||||
(** [hash_int64 v seed] hashes [v] with [seed] using XXH3_64bits_withSeed.
|
|
||||||
Noalloc and unboxed in native code. *)
|
|
||||||
|
|
||||||
external hash_int : (int[@untagged]) -> (int[@untagged]) -> (int[@untagged])
|
external mix_int64 : (state[@unboxed]) -> (int64[@unboxed]) -> (state[@unboxed])
|
||||||
= "caml_cc_xxhash_int_byte" "caml_cc_xxhash_int"
|
= "caml_cc_xxhash_mix_int64_byte" "caml_cc_xxhash_mix_int64"
|
||||||
[@@noalloc]
|
[@@noalloc]
|
||||||
(** [hash_int v seed] hashes [v] (an OCaml int) with [seed].
|
(** Mix an [int64] value into the state. Noalloc and unboxed in native code. *)
|
||||||
Noalloc and untagged in native code. *)
|
|
||||||
|
|
||||||
external mix64 : (int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
external mix_int : (state[@unboxed]) -> (int[@untagged]) -> (state[@unboxed])
|
||||||
= "caml_cc_xxhash_mix64_byte" "caml_cc_xxhash_mix64"
|
= "caml_cc_xxhash_mix_int_byte" "caml_cc_xxhash_mix_int"
|
||||||
[@@noalloc]
|
[@@noalloc]
|
||||||
(** [mix64 a b] mixes two int64 values using XXH64: [XXH64(&a, 8, b)].
|
(** Mix an [int] value into the state. Noalloc and untagged in native code. *)
|
||||||
Suitable for combining hash values. Noalloc and unboxed in native code. *)
|
|
||||||
|
|
||||||
external finalize64 : (int64[@unboxed]) -> (int64[@unboxed])
|
external mix_int32 : (state[@unboxed]) -> (int32[@unboxed]) -> (state[@unboxed])
|
||||||
= "caml_cc_xxhash_finalize64_byte" "caml_cc_xxhash_finalize64"
|
= "caml_cc_xxhash_mix_int32_byte" "caml_cc_xxhash_mix_int32"
|
||||||
[@@noalloc]
|
[@@noalloc]
|
||||||
(** [finalize64 h] finalizes/avalanches a hash value using XXH64: [XXH64(&h, 8, 0)].
|
(** Mix an [int32] value into the state. Noalloc and unboxed in native code. *)
|
||||||
Noalloc and unboxed in native code. *)
|
|
||||||
|
val mix_bool : state -> bool -> state
|
||||||
|
(** Mix a [bool] into the state. *)
|
||||||
|
|
||||||
|
val mix_char : state -> char -> state
|
||||||
|
(** Mix a [char] into the state. *)
|
||||||
|
|
||||||
|
val mix_float : state -> float -> state
|
||||||
|
(** Mix a [float] into the state via [Int64.bits_of_float]. *)
|
||||||
|
|
||||||
|
val mix_string : state -> string -> state
|
||||||
|
(** Mix a [string] into the state using XXH64. *)
|
||||||
|
|
||||||
|
external finalize : (state[@unboxed]) -> (int64[@unboxed])
|
||||||
|
= "caml_cc_xxhash_finalize_byte" "caml_cc_xxhash_finalize"
|
||||||
|
[@@noalloc]
|
||||||
|
(** Finalise the accumulated state into a 64-bit hash. Noalloc and unboxed in
|
||||||
|
native code. *)
|
||||||
|
|
||||||
|
val hash_string : ?seed:state -> string -> int64
|
||||||
|
(** [hash_string ?seed s] is [finalize (mix_string seed s)]. *)
|
||||||
|
|
||||||
|
val hash_int64 : ?seed:state -> int64 -> int64
|
||||||
|
(** [hash_int64 ?seed v] is [finalize (mix_int64 seed v)]. *)
|
||||||
|
|
||||||
|
val hash_int : ?seed:state -> int -> int64
|
||||||
|
(** [hash_int ?seed v] is [finalize (mix_int seed v)]. *)
|
||||||
|
|
||||||
|
val hash_int32 : ?seed:state -> int32 -> int64
|
||||||
|
(** [hash_int32 ?seed v] is [finalize (mix_int32 seed v)]. *)
|
||||||
|
|
||||||
|
val hash_bool : ?seed:state -> bool -> int64
|
||||||
|
(** [hash_bool ?seed b] is [finalize (mix_bool seed b)]. *)
|
||||||
|
|
||||||
|
val hash_char : ?seed:state -> char -> int64
|
||||||
|
(** [hash_char ?seed c] is [finalize (mix_char seed c)]. *)
|
||||||
|
|
||||||
|
val hash_float : ?seed:state -> float -> int64
|
||||||
|
(** [hash_float ?seed f] is [finalize (mix_float seed f)]. *)
|
||||||
|
|
|
||||||
|
|
@ -8,68 +8,55 @@
|
||||||
#include <caml/mlvalues.h>
|
#include <caml/mlvalues.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
/* hash_string: native signature: (value, int64_t) -> int64_t
|
/* mix_int64: (int64_t state, int64_t value) -> int64_t */
|
||||||
string is passed as OCaml value (can't be unboxed), seed is unboxed int64 */
|
CAMLprim int64_t caml_cc_xxhash_mix_int64(int64_t state, int64_t value) {
|
||||||
CAMLprim int64_t caml_cc_xxhash_string(value v_s, int64_t seed) {
|
return (int64_t)XXH64(&value, sizeof(value), (XXH64_hash_t)state);
|
||||||
const char *s = String_val(v_s);
|
|
||||||
size_t len = caml_string_length(v_s);
|
|
||||||
return (int64_t)XXH64(s, len, (XXH64_hash_t)seed);
|
|
||||||
}
|
}
|
||||||
|
CAMLprim value caml_cc_xxhash_mix_int64_byte(value v_state, value v_value) {
|
||||||
CAMLprim value caml_cc_xxhash_string_byte(value v_s, value v_seed) {
|
CAMLparam2(v_state, v_value);
|
||||||
CAMLparam2(v_s, v_seed);
|
int64_t result = caml_cc_xxhash_mix_int64(Int64_val(v_state), Int64_val(v_value));
|
||||||
int64_t seed = Int64_val(v_seed);
|
|
||||||
const char *s = String_val(v_s);
|
|
||||||
size_t len = caml_string_length(v_s);
|
|
||||||
int64_t result = (int64_t)XXH64(s, len, (XXH64_hash_t)seed);
|
|
||||||
CAMLreturn(caml_copy_int64(result));
|
CAMLreturn(caml_copy_int64(result));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* hash_int64: unboxed (int64_t, int64_t) -> int64_t */
|
/* mix_int: (int64_t state, intnat value) -> int64_t */
|
||||||
CAMLprim int64_t caml_cc_xxhash_int64(int64_t v, int64_t seed) {
|
CAMLprim int64_t caml_cc_xxhash_mix_int(int64_t state, intnat value) {
|
||||||
return (int64_t)XXH64(&v, sizeof(v), (XXH64_hash_t)seed);
|
int64_t v = (int64_t)value;
|
||||||
|
return (int64_t)XXH64(&v, sizeof(v), (XXH64_hash_t)state);
|
||||||
}
|
}
|
||||||
|
CAMLprim value caml_cc_xxhash_mix_int_byte(value v_state, value v_value) {
|
||||||
CAMLprim value caml_cc_xxhash_int64_byte(value v_v, value v_seed) {
|
CAMLparam2(v_state, v_value);
|
||||||
CAMLparam2(v_v, v_seed);
|
int64_t result = caml_cc_xxhash_mix_int(Int64_val(v_state), Long_val(v_value));
|
||||||
int64_t v = Int64_val(v_v);
|
|
||||||
int64_t seed = Int64_val(v_seed);
|
|
||||||
int64_t result = caml_cc_xxhash_int64(v, seed);
|
|
||||||
CAMLreturn(caml_copy_int64(result));
|
CAMLreturn(caml_copy_int64(result));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* hash_int: untagged (intnat, intnat) -> intnat */
|
/* mix_int32: (int64_t state, int32_t value) -> int64_t */
|
||||||
CAMLprim intnat caml_cc_xxhash_int(intnat v, intnat seed) {
|
CAMLprim int64_t caml_cc_xxhash_mix_int32(int64_t state, int32_t value) {
|
||||||
int64_t v64 = (int64_t)v;
|
int64_t v = (int64_t)value;
|
||||||
int64_t seed64 = (int64_t)seed;
|
return (int64_t)XXH64(&v, sizeof(v), (XXH64_hash_t)state);
|
||||||
return (intnat)caml_cc_xxhash_int64(v64, seed64);
|
}
|
||||||
|
CAMLprim value caml_cc_xxhash_mix_int32_byte(value v_state, value v_value) {
|
||||||
|
CAMLparam2(v_state, v_value);
|
||||||
|
int64_t result = caml_cc_xxhash_mix_int32(Int64_val(v_state), Int32_val(v_value));
|
||||||
|
CAMLreturn(caml_copy_int64(result));
|
||||||
}
|
}
|
||||||
|
|
||||||
CAMLprim value caml_cc_xxhash_int_byte(value v_v, value v_seed) {
|
/* mix_string: native signature: (int64_t state, value string) -> int64_t */
|
||||||
intnat v = Long_val(v_v);
|
CAMLprim int64_t caml_cc_xxhash_mix_string(int64_t state, value v_s) {
|
||||||
intnat seed = Long_val(v_seed);
|
const char *s = String_val(v_s);
|
||||||
return Val_long(caml_cc_xxhash_int(v, seed));
|
size_t len = caml_string_length(v_s);
|
||||||
|
return (int64_t)XXH64(s, len, (XXH64_hash_t)state);
|
||||||
|
}
|
||||||
|
CAMLprim value caml_cc_xxhash_mix_string_byte(value v_state, value v_s) {
|
||||||
|
CAMLparam2(v_state, v_s);
|
||||||
|
int64_t result = caml_cc_xxhash_mix_string(Int64_val(v_state), v_s);
|
||||||
|
CAMLreturn(caml_copy_int64(result));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* mix64: unboxed (int64_t, int64_t) -> int64_t [uses XXH64] */
|
/* finalize: int64_t state -> int64_t */
|
||||||
CAMLprim int64_t caml_cc_xxhash_mix64(int64_t a, int64_t b) {
|
CAMLprim int64_t caml_cc_xxhash_finalize(int64_t state) {
|
||||||
return (int64_t)XXH64(&a, sizeof(a), (XXH64_hash_t)b);
|
return (int64_t)XXH64(&state, sizeof(state), 0);
|
||||||
}
|
}
|
||||||
|
CAMLprim value caml_cc_xxhash_finalize_byte(value v_state) {
|
||||||
CAMLprim value caml_cc_xxhash_mix64_byte(value v_a, value v_b) {
|
CAMLparam1(v_state);
|
||||||
CAMLparam2(v_a, v_b);
|
CAMLreturn(caml_copy_int64(caml_cc_xxhash_finalize(Int64_val(v_state))));
|
||||||
int64_t a = Int64_val(v_a);
|
|
||||||
int64_t b = Int64_val(v_b);
|
|
||||||
CAMLreturn(caml_copy_int64(caml_cc_xxhash_mix64(a, b)));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* finalize64: unboxed int64_t -> int64_t [uses XXH64 with seed=0] */
|
|
||||||
CAMLprim int64_t caml_cc_xxhash_finalize64(int64_t h) {
|
|
||||||
return (int64_t)XXH64(&h, sizeof(h), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
CAMLprim value caml_cc_xxhash_finalize64_byte(value v_h) {
|
|
||||||
CAMLparam1(v_h);
|
|
||||||
int64_t h = Int64_val(v_h);
|
|
||||||
CAMLreturn(caml_copy_int64(caml_cc_xxhash_finalize64(h)));
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue