xxhash bindings

This commit is contained in:
Simon Cruanes 2026-03-14 03:09:31 +00:00
parent aba4c79625
commit a999f564d6
7 changed files with 7679 additions and 0 deletions

2
src/xxhash/README.md Normal file
View file

@ -0,0 +1,2 @@
xxhash code directly vendored from https://github.com/Cyan4973/xxHash/ and remains under BSD license, author Yann Collet.

View file

@ -0,0 +1,22 @@
external hash_string_aux : string -> (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_string_byte" "caml_cc_xxhash_string"
[@@noalloc]
let[@inline] hash_string ?(seed = 0L) s = hash_string_aux s seed
external hash_int64 :
(int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_int64_byte" "caml_cc_xxhash_int64"
[@@noalloc]
external hash_int : (int[@untagged]) -> (int[@untagged]) -> (int[@untagged])
= "caml_cc_xxhash_int_byte" "caml_cc_xxhash_int"
[@@noalloc]
external mix64 : (int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_mix64_byte" "caml_cc_xxhash_mix64"
[@@noalloc]
external finalize64 : (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_finalize64_byte" "caml_cc_xxhash_finalize64"
[@@noalloc]

View file

@ -0,0 +1,38 @@
(** XXHash bindings.
Fast non-cryptographic hash functions from
{{:https://github.com/Cyan4973/xxHash} xxHash}.
String hashing uses XXH3_64bits (modern, fastest).
Integer hashing delegates to the string hasher via a stack-allocated buffer.
The mixer and finalizer use the XXH64 primitive.
*)
val hash_string : ?seed:int64 -> string -> int64
(** [hash_string ?seed s] hashes string [s] with optional [seed] (default [0L])
using XXH3_64bits_withSeed. *)
external hash_int64 :
(int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_int64_byte" "caml_cc_xxhash_int64"
[@@noalloc]
(** [hash_int64 v seed] hashes [v] with [seed] using XXH3_64bits_withSeed.
Noalloc and unboxed in native code. *)
external hash_int : (int[@untagged]) -> (int[@untagged]) -> (int[@untagged])
= "caml_cc_xxhash_int_byte" "caml_cc_xxhash_int"
[@@noalloc]
(** [hash_int v seed] hashes [v] (an OCaml int) with [seed].
Noalloc and untagged in native code. *)
external mix64 : (int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_mix64_byte" "caml_cc_xxhash_mix64"
[@@noalloc]
(** [mix64 a b] mixes two int64 values using XXH64: [XXH64(&a, 8, b)].
Suitable for combining hash values. Noalloc and unboxed in native code. *)
external finalize64 : (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_xxhash_finalize64_byte" "caml_cc_xxhash_finalize64"
[@@noalloc]
(** [finalize64 h] finalizes/avalanches a hash value using XXH64: [XXH64(&h, 8, 0)].
Noalloc and unboxed in native code. *)

10
src/xxhash/dune Normal file
View file

@ -0,0 +1,10 @@
(library
(name containers_xxhash)
(public_name containers.xxhash)
(synopsis "xxHash bindings for containers")
(libraries containers)
(foreign_stubs
(language c)
(flags :standard -O2)
(names stubs))
(ocamlopt_flags :standard -inline 100))

75
src/xxhash/stubs.c Normal file
View file

@ -0,0 +1,75 @@
#define XXH_NO_XXH3
#define XXH_NO_STREAM
#define XXH_INLINE_ALL
#include "xxhash.h"
#include <caml/alloc.h>
#include <caml/memory.h>
#include <caml/mlvalues.h>
#include <stdint.h>
/* hash_string: native signature: (value, int64_t) -> int64_t
string is passed as OCaml value (can't be unboxed), seed is unboxed int64 */
CAMLprim int64_t caml_cc_xxhash_string(value v_s, int64_t seed) {
const char *s = String_val(v_s);
size_t len = caml_string_length(v_s);
return (int64_t)XXH64(s, len, (XXH64_hash_t)seed);
}
CAMLprim value caml_cc_xxhash_string_byte(value v_s, value v_seed) {
CAMLparam2(v_s, v_seed);
int64_t seed = Int64_val(v_seed);
const char *s = String_val(v_s);
size_t len = caml_string_length(v_s);
int64_t result = (int64_t)XXH64(s, len, (XXH64_hash_t)seed);
CAMLreturn(caml_copy_int64(result));
}
/* hash_int64: unboxed (int64_t, int64_t) -> int64_t */
CAMLprim int64_t caml_cc_xxhash_int64(int64_t v, int64_t seed) {
return (int64_t)XXH64(&v, sizeof(v), (XXH64_hash_t)seed);
}
CAMLprim value caml_cc_xxhash_int64_byte(value v_v, value v_seed) {
CAMLparam2(v_v, v_seed);
int64_t v = Int64_val(v_v);
int64_t seed = Int64_val(v_seed);
int64_t result = caml_cc_xxhash_int64(v, seed);
CAMLreturn(caml_copy_int64(result));
}
/* hash_int: untagged (intnat, intnat) -> intnat */
CAMLprim intnat caml_cc_xxhash_int(intnat v, intnat seed) {
int64_t v64 = (int64_t)v;
int64_t seed64 = (int64_t)seed;
return (intnat)caml_cc_xxhash_int64(v64, seed64);
}
CAMLprim value caml_cc_xxhash_int_byte(value v_v, value v_seed) {
intnat v = Long_val(v_v);
intnat seed = Long_val(v_seed);
return Val_long(caml_cc_xxhash_int(v, seed));
}
/* mix64: unboxed (int64_t, int64_t) -> int64_t [uses XXH64] */
CAMLprim int64_t caml_cc_xxhash_mix64(int64_t a, int64_t b) {
return (int64_t)XXH64(&a, sizeof(a), (XXH64_hash_t)b);
}
CAMLprim value caml_cc_xxhash_mix64_byte(value v_a, value v_b) {
CAMLparam2(v_a, v_b);
int64_t a = Int64_val(v_a);
int64_t b = Int64_val(v_b);
CAMLreturn(caml_copy_int64(caml_cc_xxhash_mix64(a, b)));
}
/* finalize64: unboxed int64_t -> int64_t [uses XXH64 with seed=0] */
CAMLprim int64_t caml_cc_xxhash_finalize64(int64_t h) {
return (int64_t)XXH64(&h, sizeof(h), 0);
}
CAMLprim value caml_cc_xxhash_finalize64_byte(value v_h) {
CAMLparam1(v_h);
int64_t h = Int64_val(v_h);
CAMLreturn(caml_copy_int64(caml_cc_xxhash_finalize64(h)));
}

42
src/xxhash/xxhash.c Normal file
View file

@ -0,0 +1,42 @@
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2012-2023 Yann Collet
*
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at:
* - xxHash homepage: https://www.xxhash.com
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
/*
* xxhash.c instantiates functions defined in xxhash.h
*/
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
#define XXH_IMPLEMENTATION /* access definitions */
#include "xxhash.h"

7490
src/xxhash/xxhash.h Normal file

File diff suppressed because it is too large Load diff