Compare commits

...

5 commits

Author SHA1 Message Date
Simon Cruanes
73ea1a2e88
CCInt.hash: replace OCaml Int64 loop with C stub (FNV-1a) 2026-02-17 09:29:22 -05:00
Simon Cruanes
bc9f361e56
forgot to format
Some checks failed
format / format (push) Has been cancelled
Build and Test / build (push) Has been cancelled
2026-02-14 21:08:29 -05:00
Simon Cruanes
42bfe9c8c6
prepare for 3.18 2026-02-14 20:38:37 -05:00
Simon Cruanes
ea5d9bbdf4
test pvec: avoid size explosion 2026-02-14 20:38:00 -05:00
Simon Cruanes
9ec34f8bf8
fix(CCSeq): correct conditional compilation version for init
Seq.init was added in OCaml 4.14, not 4.11. This aligns the
implementation with the interface which was already correctly
marked with [@@@iflt 4.14].
2026-02-12 08:52:46 -05:00
17 changed files with 209 additions and 82 deletions

View file

@ -1,10 +1,16 @@
## main
- breaking: CCListLabel.compare and CCListLabel.equal takes the function on the elements as named arguments
- breaking: CCListLabel.init now takes the length as a named arguments to follow the Stdlib
- breaking: change the semantic of CCFloat.{min,max} with respect to NaN to follow the Stdlib
- breaking: change the semantic of CCInt.rem with respect to negative number to follow the Stdlib
- breaking: change the order of argument of CCMap.add_seq to align with the stdlib.
## 3.18
- fix leb128 slice bug
- fix leb128 `Int64.min_int` bug
- add tests for leb128 library (#486)
- fix size explosion in `t_pvec.ml` found in CI
- some breaking changes after the big bump to 4.08 as lower bound, thanks to @fardale for the cleanup
* breaking: CCListLabel.compare and CCListLabel.equal takes the function on the elements as named arguments
* breaking: CCListLabel.init now takes the length as a named arguments to follow the Stdlib
* breaking: change the semantic of CCFloat.{min,max} with respect to NaN to follow the Stdlib
* breaking: change the semantic of CCInt.rem with respect to negative number to follow the Stdlib
* breaking: change the order of argument of `CCMap.add_seq` to align with the stdlib.
## 3.17

View file

@ -10,7 +10,6 @@
qcheck
oseq
batteries
base
sek)
(flags :standard -warn-error -3-5 -w -60 -safe-string -color always)
(optional)

View file

@ -1535,7 +1535,7 @@ module Str = struct
let rand_str_ ?(among = "abcdefgh") n =
let module Q = QCheck in
let st = Random.State.make [| n + 17 |] in
let gen_c = QCheck.Gen.oneofl (CCString.to_list among) in
let gen_c = QCheck.Gen.oneof_list (CCString.to_list among) in
QCheck.Gen.string_size ~gen:gen_c (QCheck.Gen.return n) st
let find ?(start = 0) ~sub s =
@ -1824,4 +1824,32 @@ module Str = struct
])
end
module Hash = struct
let hash_ocaml (n : int) : int =
let offset_basis = 0xcbf29ce484222325L in
let prime = 0x100000001b3L in
let h = ref offset_basis in
for k = 0 to 7 do
(h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff))));
h := Int64.(mul !h prime)
done;
Int64.to_int !h land max_int
let bench_hash n =
let run_ocaml () =
for i = 0 to n - 1 do
opaque_ignore (hash_ocaml i)
done
and run_c_stub () =
for i = 0 to n - 1 do
opaque_ignore (CCInt.hash i)
done
in
B.throughputN 3 ~repeat
[ "ocaml_fnv", run_ocaml, (); "c_stub", run_c_stub, () ]
let () =
B.Tree.register ("hash" @>>> [ "int" @>> app_ints bench_hash [ 1_000 ] ])
end
let () = try B.Tree.run_global () with Arg.Help msg -> print_endline msg

View file

@ -1,6 +1,6 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
version: "3.17"
version: "3.18"
synopsis: "A set of advanced datatypes for containers"
maintainer: ["c-cube"]
authors: ["c-cube"]

View file

@ -1,6 +1,6 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
version: "3.17"
version: "3.18"
synopsis:
"A modular, clean and powerful extension of the OCaml standard library"
maintainer: ["c-cube"]

View file

@ -4,7 +4,7 @@
(generate_opam_files true)
(version 3.17)
(version 3.18)
(authors c-cube)

View file

@ -104,7 +104,7 @@ val option : ?none:unit printer -> 'a printer -> 'a option printer
- [Some x] will become [pp x]
- [None] will become [none ()]
Alias of {!Format.pp_print_option}
@since NEXT_RELEASE *)
@since 3.18 *)
val opt : 'a printer -> 'a option printer
(** [opt pp] prints options as follows:

View file

@ -7,31 +7,33 @@ type 'a t = 'a -> hash
type 'a iter = ('a -> unit) -> unit
type 'a gen = unit -> 'a option
(* FNV hashing
(* same as CCInt: *)
open struct
external hash_int_ : (int[@untagged]) -> (int[@untagged])
= "caml_cc_hash_int_byte" "caml_cc_hash_int"
[@@noalloc]
external hash64_ : (int64[@unboxed]) -> (int[@untagged])
= "caml_cc_hash_int64_byte" "caml_cc_hash_int64"
[@@noalloc]
(* FNV-1a hashing (XOR then multiply )
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
*)
let fnv_offset_basis = 0xcbf29ce484222325L
let fnv_prime = 0x100000001b3L
(* hash an integer *)
let hash_int_ n =
let h = ref fnv_offset_basis in
for k = 0 to 7 do
(h := Int64.(mul !h fnv_prime));
h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
done;
(* truncate back to int and remove sign *)
Int64.to_int !h land max_int
let fnv_offset_basis = 0xcbf29ce484222325L
let fnv_prime = 0x100000001b3L
end
(* TODO: also port to C *)
let combine2 a b =
let h = ref fnv_offset_basis in
(* we only do one loop, where we mix bytes of [a] and [b], so as
to simplify control flow *)
for k = 0 to 7 do
(h := Int64.(mul !h fnv_prime));
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
(h := Int64.(mul !h fnv_prime));
h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff)))
(h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
h := Int64.(mul !h fnv_prime)
done;
Int64.to_int !h land max_int
@ -42,26 +44,26 @@ let combine3 a b c =
(* we only do one loop, where we mix bytes of [a] [b] and [c], so as
to simplify control flow *)
for k = 0 to 7 do
(h := Int64.(mul !h fnv_prime));
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
(h := Int64.(mul !h fnv_prime));
(h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
(h := Int64.(mul !h fnv_prime));
h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff)))
(h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff))));
h := Int64.(mul !h fnv_prime)
done;
Int64.to_int !h land max_int
let combine4 a b c d =
let h = ref fnv_offset_basis in
for k = 0 to 7 do
(h := Int64.(mul !h fnv_prime));
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
(h := Int64.(mul !h fnv_prime));
(h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
(h := Int64.(mul !h fnv_prime));
(h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff))));
(h := Int64.(mul !h fnv_prime));
h := Int64.(logxor !h (of_int ((d lsr (k * 8)) land 0xff)))
(h := Int64.(logxor !h (of_int ((d lsr (k * 8)) land 0xff))));
h := Int64.(mul !h fnv_prime)
done;
Int64.to_int !h land max_int
@ -72,29 +74,19 @@ let combine6 a b c d e f = combine4 a b c (combine3 d e f)
let const h _ = h
let const0 _ = 0
let int = hash_int_
let[@inline] int i = hash_int_ i
let bool b =
let[@inline] bool b =
hash_int_
(if b then
1
else
2)
let char x = hash_int_ (Char.code x)
(* hash an integer *)
let int64 n : int =
let h = ref fnv_offset_basis in
for k = 0 to 7 do
(h := Int64.(mul !h fnv_prime));
h := Int64.(logxor !h (logand (shift_right_logical n (k * 8)) 0xffL))
done;
(* truncate back to int and remove sign *)
Int64.to_int !h land max_int
let int32 (x : int32) = int64 (Int64.of_int32 x)
let nativeint (x : nativeint) = int64 (Int64.of_nativeint x)
let[@inline] char x = hash_int_ (Char.code x)
let int64 = hash64_
let[@inline] int32 (x : int32) = int64 (Int64.of_int32 x)
let[@inline] nativeint (x : nativeint) = int64 (Int64.of_nativeint x)
(* do not hash more than 128 bytes in strings/bytes *)
let max_len_b_ = 128
@ -102,9 +94,9 @@ let max_len_b_ = 128
let bytes (x : bytes) =
let h = ref fnv_offset_basis in
for i = 0 to min max_len_b_ (Bytes.length x - 1) do
(h := Int64.(mul !h fnv_prime));
let byte = Char.code (Bytes.unsafe_get x i) in
h := Int64.(logxor !h (of_int byte))
(h := Int64.(logxor !h (of_int byte)));
h := Int64.(mul !h fnv_prime)
done;
Int64.to_int !h land max_int

View file

@ -4,20 +4,11 @@ include Int
type 'a iter = ('a -> unit) -> unit
(* use FNV:
(* use FNV-1:
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *)
let hash (n : int) : int =
let offset_basis = 0xcbf29ce484222325L in
let prime = 0x100000001b3L in
let h = ref offset_basis in
for k = 0 to 7 do
(h := Int64.(mul !h prime));
(* h := h xor (k-th byte of n) *)
h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
done;
(* truncate back to int and remove sign *)
Int64.to_int !h land max_int
external hash : (int[@untagged]) -> (int[@untagged])
= "caml_cc_hash_int_byte" "caml_cc_hash_int"
[@@noalloc]
let range i j yield =
let rec up i j yield =

View file

@ -11,21 +11,12 @@ let max : t -> t -> t = Stdlib.max
let sign i = compare i zero
(* use FNV:
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *)
let hash_to_int64 (n : t) =
let offset_basis = 0xcbf29ce484222325L in
let prime = 0x100000001b3L in
external hash_to_int64 : (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_hash_int64_to_int64_byte" "caml_cc_hash_int64_to_int64"
let h = ref offset_basis in
for k = 0 to 7 do
h := mul !h prime;
(* h := h xor (k-th byte of n) *)
h := logxor !h (logand (shift_right n (k * 8)) 0xffL)
done;
logand !h max_int
let[@inline] hash (n : t) : int = to_int (hash_to_int64 n) land Stdlib.max_int
external hash : (int64[@unboxed]) -> (int[@untagged])
= "caml_cc_hash_int64_byte" "caml_cc_hash_int64"
[@@noalloc]
(* see {!CCInt.popcount} for more details *)
let[@inline] popcount (b : t) : int =

View file

@ -20,7 +20,7 @@ let cons a b () = Cons (a, b)
let singleton x () = Cons (x, nil)
[@@@endif]
[@@@iflt 4.11]
[@@@iflt 4.14]
let init n f =
let rec aux i () =

View file

@ -49,7 +49,7 @@ val forever : (unit -> 'a) -> 'a t
val cycle : 'a t -> 'a t
(** Cycle through the sequence infinitely. The sequence should be persistent.
@since NEXT_RELEASE the sequence can be empty, in this case cycle return an empty sequence. *)
@since 3.18 the sequence can be empty, in this case cycle return an empty sequence. *)
val iterate : ('a -> 'a) -> 'a -> 'a t
(** [iterate f a] corresponds to the infinite sequence containing [a], [f a], [f (f a)],

54
src/core/cc_stubs.c Normal file
View file

@ -0,0 +1,54 @@
#include <caml/alloc.h>
#include <caml/mlvalues.h>
#include <stdint.h>
/* FNV-1a hash for a 64-bit integer.
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function */
static inline int64_t cc_fnv_hash_int64(int64_t n) {
uint64_t un = (uint64_t)n;
uint64_t h = UINT64_C(0xcbf29ce484222325);
const uint64_t prime = UINT64_C(0x100000001b3);
for (int k = 0; k < 8; k++) {
h ^= (un >> (k * 8)) & 0xff;
h *= prime;
}
return (int64_t)h;
}
/* Mask to the OCaml int range (63 bits on 64-bit, 31 on 32-bit)
before hashing, so negative OCaml ints hash the same as
the unsigned representation seen by OCaml's [lsr]. */
#define OCAML_INT_MASK ((UINT64_C(1) << (8 * sizeof(value) - 1)) - 1)
/* native: untagged int in, untagged int out */
CAMLprim intnat caml_cc_hash_int(intnat n) {
int64_t projected = (int64_t)((uint64_t)n & OCAML_INT_MASK);
return (intnat)((uint64_t)cc_fnv_hash_int64(projected) & Max_long);
}
/* bytecode: boxed value in, boxed value out */
CAMLprim value caml_cc_hash_int_byte(value v_n) {
return Val_long(caml_cc_hash_int(Long_val(v_n)));
}
/* native: unboxed int64 in, untagged int out */
CAMLprim intnat caml_cc_hash_int64(int64_t n) {
return (intnat)((uint64_t)cc_fnv_hash_int64(n) & Max_long);
}
/* bytecode: boxed int64 value in, boxed value out */
CAMLprim value caml_cc_hash_int64_byte(value v_n) {
return Val_long(caml_cc_hash_int64(Int64_val(v_n)));
}
/* native: unboxed int64 in, unboxed int64 out.
Masks to non-negative int64 (matches OCaml's Int64.max_int). */
CAMLprim int64_t caml_cc_hash_int64_to_int64(int64_t n) {
return cc_fnv_hash_int64(n) & INT64_MAX;
}
/* bytecode: boxed int64 in, boxed int64 out */
CAMLprim value caml_cc_hash_int64_to_int64_byte(value v_n) {
return caml_copy_int64(cc_fnv_hash_int64(Int64_val(v_n)) & INT64_MAX);
}

View file

@ -6,6 +6,10 @@
(action
(run %{project_root}/src/core/cpp/cpp.exe %{input-file})))
(flags :standard -nolabels -open CCMonomorphic)
(foreign_stubs
(language c)
(flags :standard -std=c99 -O2)
(names cc_stubs))
(libraries either containers.monomorphic containers.domain))
(ocamllex

View file

@ -1,6 +1,19 @@
open CCHash
module T = (val Containers_testlib.make ~__FILE__ ())
include T;;
include T
open struct
let hash_ocaml64 (n : int64) : int =
let offset_basis = 0xcbf29ce484222325L in
let prime = 0x100000001b3L in
let h = ref offset_basis in
for k = 0 to 7 do
(h := Int64.(logxor !h (Int64.logand (Int64.shift_left n (k * 8)) 0xffL)));
h := Int64.(mul !h prime)
done;
Int64.to_int !h land max_int
end
;;
t @@ fun () -> int 42 >= 0;;
t @@ fun () -> int max_int >= 0;;
@ -17,3 +30,7 @@ t @@ fun () -> string "abc" <> string "abcd";;
q Q.int (fun i ->
Q.assume (i >= 0);
int i = int64 (Int64.of_int i))
;;
q Q.int64
Q.(fun i -> Int64.compare i 0L >= 0 ==> (CCInt64.hash i = hash_ocaml64 i))

View file

@ -101,3 +101,42 @@ eq' 63 (popcount max_int);;
eq' 1 (popcount min_int);;
eq' 10 (popcount 0b1110010110110001010L);;
eq' 5 (popcount 0b1101110000000000L)
(* hash tests *)
let ( >= ) = Stdlib.( >= )
let ( = ) = Stdlib.( = )
let ( <> ) = Stdlib.( <> );;
(* hash is non-negative *)
t @@ fun () -> hash 0L >= 0;;
t @@ fun () -> hash 1L >= 0;;
t @@ fun () -> hash (-1L) >= 0;;
t @@ fun () -> hash max_int >= 0;;
t @@ fun () -> hash min_int >= 0;;
(* hash_to_int64 is non-negative *)
t @@ fun () -> CCInt64.compare (hash_to_int64 0L) 0L >= 0;;
t @@ fun () -> CCInt64.compare (hash_to_int64 (-1L)) 0L >= 0;;
t @@ fun () -> CCInt64.compare (hash_to_int64 min_int) 0L >= 0;;
(* hash is consistent with hash_to_int64 *)
t @@ fun () -> hash 42L = Stdlib.(Int64.to_int (hash_to_int64 42L) land max_int)
;;
t @@ fun () ->
hash (-1L) = Stdlib.(Int64.to_int (hash_to_int64 (-1L)) land max_int)
;;
(* different inputs produce different hashes *)
t @@ fun () -> hash 0L <> hash 1L;;
t @@ fun () -> hash 1L <> hash 2L;;
t @@ fun () -> hash 1L <> hash (-1L);;
t @@ fun () -> hash_to_int64 0L <> hash_to_int64 1L;;
(* deterministic *)
t @@ fun () -> hash 123L = hash 123L;;
t @@ fun () -> hash_to_int64 123L = hash_to_int64 123L;;
(* quickcheck: hash is always non-negative *)
q Q.(map Int64.of_int int) (fun n -> hash n >= 0);;
q Q.(map Int64.of_int int) (fun n -> CCInt64.compare (hash_to_int64 n) 0L >= 0)

View file

@ -265,10 +265,16 @@ module Op = struct
( 1,
list_small gen_x >|= fun l ->
Append l, size + List.length l );
];
(if size < 10_000 then
[
(* flat map can explode, only do it if list isn't too big *)
( 1,
list_size (0 -- 5) gen_x >|= fun l ->
Flat_map l, size * (1 + List.length l) );
];
]
else
[]);
]
in