ocamlformat

CCInt64.hash: migrate to C stub, add tests
Replace the pure-OCaml FNV-1 implementation in CCInt64 with C stubs that call the same cc_fnv_hash_int64 core as CCInt.hash: - hash: int64 -> int via caml_cc_hash_int64 ([@unboxed] + [@@noalloc]) - hash_to_int64: int64 -> int64 via new caml_cc_hash_int64_to_int64 entry points (masks result to non-negative, needs caml_copy_int64 for bytecode so cannot be [@@noalloc]) Add 18 tests in t_int64.ml covering: - Non-negativity of hash and hash_to_int64 - Consistency between hash and hash_to_int64 - Different inputs produce different hashes - Determinism - QuickCheck: hash is non-negative for random inputs
2026-03-07 21:27:55 -05:00 · 2026-02-17 02:19:04 +00:00 · 2026-02-16 23:58:49 +00:00 · 2026-02-16 12:48:12 +00:00 · 2026-02-16 12:48:12 +00:00 · 2026-02-12 11:47:50 +00:00
7 changed files with 62 additions and 63 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -4,7 +4,6 @@
 - fix leb128 slice bug
 - fix leb128 `Int64.min_int` bug
 - add tests for leb128 library (#486)
- fix size explosion in `t_pvec.ml` found in CI
 - some breaking changes after the big bump to 4.08 as lower bound, thanks to @fardale for the cleanup
  * breaking: CCListLabel.compare and CCListLabel.equal takes the function on the elements as named arguments
  * breaking: CCListLabel.init now takes the length as a named arguments to follow the Stdlib
--- a/benchs/run_benchs.ml
+++ b/benchs/run_benchs.ml
@ -1535,7 +1535,7 @@ module Str = struct
  let rand_str_ ?(among = "abcdefgh") n =
    let module Q = QCheck in
    let st = Random.State.make [| n + 17 |] in
-    let gen_c = QCheck.Gen.oneof_list (CCString.to_list among) in
+    let gen_c = QCheck.Gen.oneofl (CCString.to_list among) in
    QCheck.Gen.string_size ~gen:gen_c (QCheck.Gen.return n) st

  let find ?(start = 0) ~sub s =
@ -1830,8 +1830,8 @@ module Hash = struct
    let prime = 0x100000001b3L in
    let h = ref offset_basis in
    for k = 0 to 7 do
-      (h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff))));
-      h := Int64.(mul !h prime)
+      (h := Int64.(mul !h prime));
+      h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
    done;
    Int64.to_int !h land max_int

--- a/src/core/CCHash.ml
+++ b/src/core/CCHash.ml
@ -7,33 +7,31 @@ type 'a t = 'a -> hash
 type 'a iter = ('a -> unit) -> unit
 type 'a gen = unit -> 'a option

-(* same as CCInt: *)
-open struct
-  external hash_int_ : (int[@untagged]) -> (int[@untagged])
-    = "caml_cc_hash_int_byte" "caml_cc_hash_int"
-  [@@noalloc]
-
-  external hash64_ : (int64[@unboxed]) -> (int[@untagged])
-    = "caml_cc_hash_int64_byte" "caml_cc_hash_int64"
-  [@@noalloc]
-
-  (* FNV-1a hashing (XOR then multiply )
+(* FNV-1 hashing (multiply then XOR)
   https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
 *)
 let fnv_offset_basis = 0xcbf29ce484222325L
 let fnv_prime = 0x100000001b3L
-end

-(* TODO: also port to C *)
+(* hash an integer *)
+let hash_int_ n =
+  let h = ref fnv_offset_basis in
+  for k = 0 to 7 do
+    (h := Int64.(mul !h fnv_prime));
+    h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
+  done;
+  (* truncate back to int and remove sign *)
+  Int64.to_int !h land max_int
+
 let combine2 a b =
  let h = ref fnv_offset_basis in
  (* we only do one loop, where we mix bytes of [a] and [b], so as
     to simplify control flow *)
  for k = 0 to 7 do
+    (h := Int64.(mul !h fnv_prime));
    (h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
    (h := Int64.(mul !h fnv_prime));
-    (h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
-    h := Int64.(mul !h fnv_prime)
+    h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff)))
  done;
  Int64.to_int !h land max_int

@ -44,26 +42,26 @@ let combine3 a b c =
  (* we only do one loop, where we mix bytes of [a] [b] and [c], so as
     to simplify control flow *)
  for k = 0 to 7 do
+    (h := Int64.(mul !h fnv_prime));
    (h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
    (h := Int64.(mul !h fnv_prime));
    (h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
    (h := Int64.(mul !h fnv_prime));
-    (h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff))));
-    h := Int64.(mul !h fnv_prime)
+    h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff)))
  done;
  Int64.to_int !h land max_int

 let combine4 a b c d =
  let h = ref fnv_offset_basis in
  for k = 0 to 7 do
+    (h := Int64.(mul !h fnv_prime));
    (h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
    (h := Int64.(mul !h fnv_prime));
    (h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
    (h := Int64.(mul !h fnv_prime));
    (h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff))));
    (h := Int64.(mul !h fnv_prime));
-    (h := Int64.(logxor !h (of_int ((d lsr (k * 8)) land 0xff))));
-    h := Int64.(mul !h fnv_prime)
+    h := Int64.(logxor !h (of_int ((d lsr (k * 8)) land 0xff)))
  done;
  Int64.to_int !h land max_int

@ -74,19 +72,29 @@ let combine6 a b c d e f = combine4 a b c (combine3 d e f)

 let const h _ = h
 let const0 _ = 0
-let[@inline] int i = hash_int_ i
+let int = hash_int_

-let[@inline] bool b =
+let bool b =
  hash_int_
    (if b then
       1
     else
       2)

-let[@inline] char x = hash_int_ (Char.code x)
-let int64 = hash64_
-let[@inline] int32 (x : int32) = int64 (Int64.of_int32 x)
-let[@inline] nativeint (x : nativeint) = int64 (Int64.of_nativeint x)
+let char x = hash_int_ (Char.code x)
+
+(* hash an integer *)
+let int64 n : int =
+  let h = ref fnv_offset_basis in
+  for k = 0 to 7 do
+    (h := Int64.(mul !h fnv_prime));
+    h := Int64.(logxor !h (logand (shift_right_logical n (k * 8)) 0xffL))
+  done;
+  (* truncate back to int and remove sign *)
+  Int64.to_int !h land max_int
+
+let int32 (x : int32) = int64 (Int64.of_int32 x)
+let nativeint (x : nativeint) = int64 (Int64.of_nativeint x)

 (* do not hash more than 128 bytes in strings/bytes *)
 let max_len_b_ = 128
@ -94,9 +102,9 @@ let max_len_b_ = 128
 let bytes (x : bytes) =
  let h = ref fnv_offset_basis in
  for i = 0 to min max_len_b_ (Bytes.length x - 1) do
+    (h := Int64.(mul !h fnv_prime));
    let byte = Char.code (Bytes.unsafe_get x i) in
-    (h := Int64.(logxor !h (of_int byte)));
-    h := Int64.(mul !h fnv_prime)
+    h := Int64.(logxor !h (of_int byte))
  done;
  Int64.to_int !h land max_int

--- a/src/core/CCInt64.ml
+++ b/src/core/CCInt64.ml
@ -11,6 +11,8 @@ let max : t -> t -> t = Stdlib.max

 let sign i = compare i zero

+(* use FNV-1:
+   https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *)
 external hash_to_int64 : (int64[@unboxed]) -> (int64[@unboxed])
  = "caml_cc_hash_int64_to_int64_byte" "caml_cc_hash_int64_to_int64"

--- a/src/core/cc_stubs.c
+++ b/src/core/cc_stubs.c
@ -2,20 +2,29 @@
 #include <caml/mlvalues.h>
 #include <stdint.h>

-/* FNV-1a hash for a 64-bit integer.
-   https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function */
+/* FNV-1 hash for a 64-bit integer.
+   https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+
+   FNV-1 order: multiply then XOR (as opposed to FNV-1a which XORs first).
+   Uses the standard 64-bit FNV parameters:
+     offset_basis = 0xcbf29ce484222325
+     prime        = 0x00000100000001b3
+
+   Core routine: operates on all 8 bytes of an int64_t. */

 static inline int64_t cc_fnv_hash_int64(int64_t n) {
  uint64_t un = (uint64_t)n;
  uint64_t h = UINT64_C(0xcbf29ce484222325);
  const uint64_t prime = UINT64_C(0x100000001b3);
  for (int k = 0; k < 8; k++) {
-    h ^= (un >> (k * 8)) & 0xff;
    h *= prime;
+    h ^= (un >> (k * 8)) & 0xff;
  }
  return (int64_t)h;
 }

+/* --- CCInt.hash entry points (int -> int) --- */
+
 /* Mask to the OCaml int range (63 bits on 64-bit, 31 on 32-bit)
   before hashing, so negative OCaml ints hash the same as
   the unsigned representation seen by OCaml's [lsr]. */
@ -32,6 +41,8 @@ CAMLprim value caml_cc_hash_int_byte(value v_n) {
  return Val_long(caml_cc_hash_int(Long_val(v_n)));
 }

+/* --- int64 hash entry points (int64 -> int) --- */
+
 /* native: unboxed int64 in, untagged int out */
 CAMLprim intnat caml_cc_hash_int64(int64_t n) {
  return (intnat)((uint64_t)cc_fnv_hash_int64(n) & Max_long);
@ -42,6 +53,8 @@ CAMLprim value caml_cc_hash_int64_byte(value v_n) {
  return Val_long(caml_cc_hash_int64(Int64_val(v_n)));
 }

+/* --- int64 -> int64 entry points (for hash_to_int64) --- */
+
 /* native: unboxed int64 in, unboxed int64 out.
   Masks to non-negative int64 (matches OCaml's Int64.max_int). */
 CAMLprim int64_t caml_cc_hash_int64_to_int64(int64_t n) {
--- a/tests/core/t_hash.ml
+++ b/tests/core/t_hash.ml
@ -1,19 +1,6 @@
 open CCHash
 module T = (val Containers_testlib.make ~__FILE__ ())
-include T
-
-open struct
-  let hash_ocaml64 (n : int64) : int =
-    let offset_basis = 0xcbf29ce484222325L in
-    let prime = 0x100000001b3L in
-    let h = ref offset_basis in
-    for k = 0 to 7 do
-      (h := Int64.(logxor !h (Int64.logand (Int64.shift_left n (k * 8)) 0xffL)));
-      h := Int64.(mul !h prime)
-    done;
-    Int64.to_int !h land max_int
-end
-;;
+include T;;

 t @@ fun () -> int 42 >= 0;;
 t @@ fun () -> int max_int >= 0;;
@ -30,7 +17,3 @@ t @@ fun () -> string "abc" <> string "abcd";;
 q Q.int (fun i ->
    Q.assume (i >= 0);
    int i = int64 (Int64.of_int i))
-;;
-
-q Q.int64
-  Q.(fun i -> Int64.compare i 0L >= 0 ==> (CCInt64.hash i = hash_ocaml64 i))
--- a/tests/pvec/t_pvec.ml
+++ b/tests/pvec/t_pvec.ml
@ -265,16 +265,10 @@ module Op = struct
                   ( 1,
                     list_small gen_x >|= fun l ->
                     Append l, size + List.length l );
-                 ];
-                 (if size < 10_000 then
-                    [
-                      (* flat map can explode, only do it if list isn't too big *)
                   ( 1,
                     list_size (0 -- 5) gen_x >|= fun l ->
                     Flat_map l, size * (1 + List.length l) );
-                    ]
-                  else
-                    []);
+                 ];
               ]
        in
Author	SHA1	Message	Date
Simon Cruanes	2827011b37	ocamlformat	2026-02-17 02:19:04 +00:00
Simon Cruanes	5a50d42352	CCInt64.hash: migrate to C stub, add tests Replace the pure-OCaml FNV-1 implementation in CCInt64 with C stubs that call the same cc_fnv_hash_int64 core as CCInt.hash: - hash: int64 -> int via caml_cc_hash_int64 ([@unboxed] + [@@noalloc]) - hash_to_int64: int64 -> int64 via new caml_cc_hash_int64_to_int64 entry points (masks result to non-negative, needs caml_copy_int64 for bytecode so cannot be [@@noalloc]) Add 18 tests in t_int64.ml covering: - Non-negativity of hash and hash_to_int64 - Consistency between hash and hash_to_int64 - Different inputs produce different hashes - Determinism - QuickCheck: hash is non-negative for random inputs	2026-02-16 23:58:49 +00:00
Simon Cruanes	d493f6696b	Fix FNV variant in comments: this is FNV-1, not FNV-1a The implementation uses multiply-then-XOR order, which is FNV-1. FNV-1a would XOR first, then multiply. The constants (offset_basis and prime) are the same for both variants; only the operation order differs. Fix comments in cc_stubs.c, CCInt.ml, and CCHash.ml. No behavioral change — just correcting the documentation.	2026-02-16 12:48:12 +00:00
Simon Cruanes	bb6de2ff05	CCInt.hash: replace OCaml Int64 loop with C stub (FNV-1) Implement FNV-1 hashing for CCInt.hash as a C stub instead of a pure-OCaml Int64-based loop. The core hash function operates on int64 with separate native/bytecode entry points for both int and int64. - cc_stubs.c: cc_fnv_hash_int64 core, with int and int64 wrappers - Uses [@untagged] for int args, [@unboxed] ready for int64 - OCAML_INT_MASK ensures correct byte extraction for 63-bit ints - Hash values are unchanged from the previous OCaml implementation Also adds a throughput benchmark in benchs/run_benchs.ml comparing the old pure-OCaml FNV hash vs the new C stub (~2x faster).	2026-02-16 12:48:12 +00:00
Simon Cruanes	1aa8b869e5	fix(CCSeq): correct conditional compilation version for init Seq.init was added in OCaml 4.14, not 4.11. This aligns the implementation with the interface which was already correctly marked with [@@@iflt 4.14].	2026-02-12 11:47:50 +00:00
Simon Cruanes	df7619786c	prepare for 3.18 Some checks failed format / format (push) Has been cancelled Details Build and Test / build (push) Has been cancelled Details	2026-02-10 21:08:02 -05:00