Compare commits

..

4 commits

Author SHA1 Message Date
Simon Cruanes
2827011b37 ocamlformat 2026-02-17 02:19:04 +00:00
Simon Cruanes
5a50d42352 CCInt64.hash: migrate to C stub, add tests
Replace the pure-OCaml FNV-1 implementation in CCInt64 with C stubs
that call the same cc_fnv_hash_int64 core as CCInt.hash:

- hash: int64 -> int via caml_cc_hash_int64 ([@unboxed] + [@@noalloc])
- hash_to_int64: int64 -> int64 via new caml_cc_hash_int64_to_int64
  entry points (masks result to non-negative, needs caml_copy_int64
  for bytecode so cannot be [@@noalloc])

Add 18 tests in t_int64.ml covering:
- Non-negativity of hash and hash_to_int64
- Consistency between hash and hash_to_int64
- Different inputs produce different hashes
- Determinism
- QuickCheck: hash is non-negative for random inputs
2026-02-16 23:58:49 +00:00
Simon Cruanes
d493f6696b Fix FNV variant in comments: this is FNV-1, not FNV-1a
The implementation uses multiply-then-XOR order, which is FNV-1.
FNV-1a would XOR first, then multiply. The constants (offset_basis
and prime) are the same for both variants; only the operation order
differs. Fix comments in cc_stubs.c, CCInt.ml, and CCHash.ml.

No behavioral change — just correcting the documentation.
2026-02-16 12:48:12 +00:00
Simon Cruanes
bb6de2ff05 CCInt.hash: replace OCaml Int64 loop with C stub (FNV-1)
Implement FNV-1 hashing for CCInt.hash as a C stub instead of a
pure-OCaml Int64-based loop. The core hash function operates on int64
with separate native/bytecode entry points for both int and int64.

- cc_stubs.c: cc_fnv_hash_int64 core, with int and int64 wrappers
- Uses [@untagged] for int args, [@unboxed] ready for int64
- OCAML_INT_MASK ensures correct byte extraction for 63-bit ints
- Hash values are unchanged from the previous OCaml implementation

Also adds a throughput benchmark in benchs/run_benchs.ml comparing
the old pure-OCaml FNV hash vs the new C stub (~2x faster).
2026-02-16 12:48:12 +00:00
6 changed files with 71 additions and 22 deletions

View file

@ -1830,7 +1830,7 @@ module Hash = struct
let prime = 0x100000001b3L in let prime = 0x100000001b3L in
let h = ref offset_basis in let h = ref offset_basis in
for k = 0 to 7 do for k = 0 to 7 do
h := Int64.(mul !h prime); (h := Int64.(mul !h prime));
h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff))) h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
done; done;
Int64.to_int !h land max_int Int64.to_int !h land max_int
@ -1846,10 +1846,7 @@ module Hash = struct
done done
in in
B.throughputN 3 ~repeat B.throughputN 3 ~repeat
[ [ "ocaml_fnv", run_ocaml, (); "c_stub", run_c_stub, () ]
"ocaml_fnv", run_ocaml, ();
"c_stub", run_c_stub, ();
]
let () = let () =
B.Tree.register ("hash" @>>> [ "int" @>> app_ints bench_hash [ 1_000 ] ]) B.Tree.register ("hash" @>>> [ "int" @>> app_ints bench_hash [ 1_000 ] ])

View file

@ -7,7 +7,7 @@ type 'a t = 'a -> hash
type 'a iter = ('a -> unit) -> unit type 'a iter = ('a -> unit) -> unit
type 'a gen = unit -> 'a option type 'a gen = unit -> 'a option
(* FNV hashing (* FNV-1 hashing (multiply then XOR)
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
*) *)
let fnv_offset_basis = 0xcbf29ce484222325L let fnv_offset_basis = 0xcbf29ce484222325L

View file

@ -4,10 +4,11 @@ include Int
type 'a iter = ('a -> unit) -> unit type 'a iter = ('a -> unit) -> unit
(* use FNV: (* use FNV-1:
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *) https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *)
external hash : (int[@untagged]) -> (int[@untagged]) external hash : (int[@untagged]) -> (int[@untagged])
= "caml_cc_hash_int_byte" "caml_cc_hash_int" [@@noalloc] = "caml_cc_hash_int_byte" "caml_cc_hash_int"
[@@noalloc]
let range i j yield = let range i j yield =
let rec up i j yield = let rec up i j yield =

View file

@ -11,21 +11,14 @@ let max : t -> t -> t = Stdlib.max
let sign i = compare i zero let sign i = compare i zero
(* use FNV: (* use FNV-1:
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *) https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *)
let hash_to_int64 (n : t) = external hash_to_int64 : (int64[@unboxed]) -> (int64[@unboxed])
let offset_basis = 0xcbf29ce484222325L in = "caml_cc_hash_int64_to_int64_byte" "caml_cc_hash_int64_to_int64"
let prime = 0x100000001b3L in
let h = ref offset_basis in external hash : (int64[@unboxed]) -> (int[@untagged])
for k = 0 to 7 do = "caml_cc_hash_int64_byte" "caml_cc_hash_int64"
h := mul !h prime; [@@noalloc]
(* h := h xor (k-th byte of n) *)
h := logxor !h (logand (shift_right n (k * 8)) 0xffL)
done;
logand !h max_int
let[@inline] hash (n : t) : int = to_int (hash_to_int64 n) land Stdlib.max_int
(* see {!CCInt.popcount} for more details *) (* see {!CCInt.popcount} for more details *)
let[@inline] popcount (b : t) : int = let[@inline] popcount (b : t) : int =

View file

@ -1,9 +1,15 @@
#include <caml/alloc.h>
#include <caml/mlvalues.h> #include <caml/mlvalues.h>
#include <stdint.h> #include <stdint.h>
/* FNV-1a hash for a 64-bit integer. /* FNV-1 hash for a 64-bit integer.
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
FNV-1 order: multiply then XOR (as opposed to FNV-1a which XORs first).
Uses the standard 64-bit FNV parameters:
offset_basis = 0xcbf29ce484222325
prime = 0x00000100000001b3
Core routine: operates on all 8 bytes of an int64_t. */ Core routine: operates on all 8 bytes of an int64_t. */
static inline int64_t cc_fnv_hash_int64(int64_t n) { static inline int64_t cc_fnv_hash_int64(int64_t n) {
@ -46,3 +52,16 @@ CAMLprim intnat caml_cc_hash_int64(int64_t n) {
CAMLprim value caml_cc_hash_int64_byte(value v_n) { CAMLprim value caml_cc_hash_int64_byte(value v_n) {
return Val_long(caml_cc_hash_int64(Int64_val(v_n))); return Val_long(caml_cc_hash_int64(Int64_val(v_n)));
} }
/* --- int64 -> int64 entry points (for hash_to_int64) --- */
/* native: unboxed int64 in, unboxed int64 out.
Masks to non-negative int64 (matches OCaml's Int64.max_int). */
CAMLprim int64_t caml_cc_hash_int64_to_int64(int64_t n) {
return cc_fnv_hash_int64(n) & INT64_MAX;
}
/* bytecode: boxed int64 in, boxed int64 out */
CAMLprim value caml_cc_hash_int64_to_int64_byte(value v_n) {
return caml_copy_int64(cc_fnv_hash_int64(Int64_val(v_n)) & INT64_MAX);
}

View file

@ -101,3 +101,42 @@ eq' 63 (popcount max_int);;
eq' 1 (popcount min_int);; eq' 1 (popcount min_int);;
eq' 10 (popcount 0b1110010110110001010L);; eq' 10 (popcount 0b1110010110110001010L);;
eq' 5 (popcount 0b1101110000000000L) eq' 5 (popcount 0b1101110000000000L)
(* hash tests *)
let ( >= ) = Stdlib.( >= )
let ( = ) = Stdlib.( = )
let ( <> ) = Stdlib.( <> );;
(* hash is non-negative *)
t @@ fun () -> hash 0L >= 0;;
t @@ fun () -> hash 1L >= 0;;
t @@ fun () -> hash (-1L) >= 0;;
t @@ fun () -> hash max_int >= 0;;
t @@ fun () -> hash min_int >= 0;;
(* hash_to_int64 is non-negative *)
t @@ fun () -> CCInt64.compare (hash_to_int64 0L) 0L >= 0;;
t @@ fun () -> CCInt64.compare (hash_to_int64 (-1L)) 0L >= 0;;
t @@ fun () -> CCInt64.compare (hash_to_int64 min_int) 0L >= 0;;
(* hash is consistent with hash_to_int64 *)
t @@ fun () -> hash 42L = Stdlib.(Int64.to_int (hash_to_int64 42L) land max_int)
;;
t @@ fun () ->
hash (-1L) = Stdlib.(Int64.to_int (hash_to_int64 (-1L)) land max_int)
;;
(* different inputs produce different hashes *)
t @@ fun () -> hash 0L <> hash 1L;;
t @@ fun () -> hash 1L <> hash 2L;;
t @@ fun () -> hash 1L <> hash (-1L);;
t @@ fun () -> hash_to_int64 0L <> hash_to_int64 1L;;
(* deterministic *)
t @@ fun () -> hash 123L = hash 123L;;
t @@ fun () -> hash_to_int64 123L = hash_to_int64 123L;;
(* quickcheck: hash is always non-negative *)
q Q.(map Int64.of_int int) (fun n -> hash n >= 0);;
q Q.(map Int64.of_int int) (fun n -> CCInt64.compare (hash_to_int64 n) 0L >= 0)