Compare commits

..

11 commits

Author SHA1 Message Date
Simon Cruanes
4948d74e45 fix comments about hash mixer constant
Some checks failed
format / format (push) Has been cancelled
github pages / Deploy doc (push) Has been cancelled
Build and Test / build (push) Has been cancelled
2026-04-24 12:30:44 -04:00
Simon Cruanes
efa429e3c8 format 2026-04-08 20:59:13 -04:00
Simon Cruanes
fec6a8f58f format with 5.3 2026-04-08 20:58:52 -04:00
Simon Cruanes
e42caf3b6a more tests 2026-04-09 00:58:25 +00:00
Simon Cruanes
749f973528 byte_slice: add clear and of_string 2026-04-09 00:58:12 +00:00
Simon Cruanes
c02c76eb0b add Byte_buffer.copy, fix bugs 2026-04-09 00:56:37 +00:00
Simon Cruanes
0ad561a8e7 use better docker image for doc 2026-04-06 17:58:19 -04:00
Simon Cruanes
7fdacb4a29 use prebuilt docker images for CI 2026-04-06 17:43:05 -04:00
Simon Cruanes
c99883b841
amend changelog for 3.18 2026-03-23 11:46:48 -04:00
Simon Cruanes
479fe31c27
CCMap.to_list: always in increasing order now 2026-03-23 11:46:38 -04:00
Simon Cruanes
7f8df6d63d
improve default hash (#489)
* add pre commit hook

* hash mixer and combiner in C

* tests for hashes

* bench of hash

* fix warnings

format and minor fixes

* format

* doc

* test hash: add a per-bit distribution check

* hash tests: count bit probability

* cchash64: add apply and combine*

* makefile

* cleanup hash bench

* change cchash constant

from murmur2

* make the hash test stronger
2026-03-19 21:44:34 -04:00
33 changed files with 970 additions and 266 deletions

View file

@ -9,20 +9,9 @@ on:
jobs:
format:
name: format
strategy:
matrix:
ocaml-compiler:
- '5.3'
runs-on: 'ubuntu-latest'
runs-on: ubuntu-latest
container: ghcr.io/c-cube/c-cube-commmon/ci-doc-5.3:latest
steps:
- uses: actions/checkout@main
- name: Use OCaml ${{ matrix.ocaml-compiler }}
uses: ocaml/setup-ocaml@v3
with:
ocaml-compiler: ${{ matrix.ocaml-compiler }}
dune-cache: true
allow-prerelease-opam: true
- run: opam install ocamlformat.0.27.0
- uses: actions/checkout@v6
- run: opam exec -- make format-check

View file

@ -3,23 +3,19 @@ name: github pages
on:
push:
branches:
- master # Set a branch name to trigger deployment
- main
jobs:
deploy:
name: Deploy doc
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@main
container: ghcr.io/c-cube/c-cube-commmon/ci-doc-5.3:latest
- name: Use OCaml
uses: ocaml/setup-ocaml@v3
with:
ocaml-compiler: '5.2'
dune-cache: false
steps:
- uses: actions/checkout@v6
- name: Deps
run: opam install odig containers containers-data
run: opam install containers containers-data
- name: Build
run: opam exec -- odig odoc --cache-dir=_doc/ containers containers-data
@ -29,5 +25,5 @@ jobs:
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./_doc/html/
destination_dir: dev
enable_jekyll: true
destination_dir: .
enable_jekyll: false

View file

@ -7,58 +7,25 @@ on:
pull_request:
jobs:
run:
build:
name: build
timeout-minutes: 15
strategy:
fail-fast: true
fail-fast: false
matrix:
os:
- ubuntu-latest
ocaml-compiler:
- '4.08'
- '4.10'
- '4.14'
- '5.3'
- 'ocaml-variants.5.0.0+options,ocaml-option-bytecode-only'
include:
- ocaml-version: "4.08"
container: ghcr.io/c-cube/c-cube-commmon/ci-4.08:latest
- ocaml-version: "4.14"
container: ghcr.io/c-cube/c-cube-commmon/ci-4.14:latest
- ocaml-version: "5.4"
container: ghcr.io/c-cube/c-cube-commmon/ci-5.4:latest
runs-on: ubuntu-latest
container: ${{ matrix.container }}
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@main
- name: Use OCaml ${{ matrix.ocaml-compiler }}
uses: ocaml/setup-ocaml@v3
with:
ocaml-compiler: ${{ matrix.ocaml-compiler }}
dune-cache: true
allow-prerelease-opam: true
- uses: actions/checkout@v6
- run: opam install -t containers containers-data --deps-only
- run: opam exec -- dune build '@install'
- run: opam exec -- dune runtest --force --profile=release
compat:
name: build
timeout-minutes: 15
strategy:
fail-fast: true
matrix:
os:
- macos-latest
- ubuntu-latest
#- windows-latest
ocaml-compiler:
- '5.1'
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@main
- name: Use OCaml ${{ matrix.ocaml-compiler }}
uses: ocaml/setup-ocaml@v3
with:
ocaml-compiler: ${{ matrix.ocaml-compiler }}
dune-cache: true
allow-prerelease-opam: true
- run: |
opam install -t containers --deps-only ;
opam install containers-data --deps-only # no test deps
- run: opam exec -- dune build '@install'
- run: opam exec -- dune runtest -j 1 -p containers --profile=release # test only core on non-ubuntu platform

17
.pre-commit-config.yaml Normal file
View file

@ -0,0 +1,17 @@
repos:
- repo: local
hooks:
- id: dune-build-fmt
name: dune build @fmt
entry: dune build @fmt --auto-promote
pass_filenames: false
language: system
types: [text]
stages: [pre-push]
- id: dune-build-opam
name: dune build
entry: dune build @install
pass_filenames: false
language: system
types: [text]
stages: [pre-push]

View file

@ -11,6 +11,7 @@
* breaking: change the semantic of CCFloat.{min,max} with respect to NaN to follow the Stdlib
* breaking: change the semantic of CCInt.rem with respect to negative number to follow the Stdlib
* breaking: change the order of argument of `CCMap.add_seq` to align with the stdlib.
* breaking: `CCMap.to_list` is now in increasing, not decreasing, order for OCaml >= 5.1
## 3.17

View file

@ -24,6 +24,8 @@ format:
format-check:
@dune build $(DUNE_OPTS) @fmt --display=quiet
install-pre-commit-hook:
uvx pre-commit install --hook-type pre-push
VERSION=$(shell awk '/^version:/ {print $$2}' containers.opam)

78
benchs/bench_hash.ml Normal file
View file

@ -0,0 +1,78 @@
(** Benchmarks for CCHash primitives.
*)
[@@@warning "-5"]
module B = Benchmark
let repeat = 3
let n_ints = 1_000
let ints = Array.init n_ints (fun i -> i * 2654435761)
let bench_int_hash ~time () =
B.throughputN time ~repeat
[
( "CCHash.int",
(fun () ->
Array.iter
(fun x -> ignore @@ Sys.opaque_identity (CCHash.int x))
ints),
() );
( "Hashtbl.hash (poly)",
(fun () ->
Array.iter
(fun x -> ignore @@ Sys.opaque_identity (Hashtbl.hash x))
ints),
() );
( "CCHash.int64",
(fun () ->
Array.iter
(fun x ->
ignore @@ Sys.opaque_identity (CCHash.int64 (Int64.of_int x)))
ints),
() );
]
let bench_combine64 ~time () =
B.throughputN time ~repeat
[
( "combine64 chain x5",
(fun () ->
Array.iter
(fun x ->
ignore
@@ Sys.opaque_identity
CCHash64.(
combine2
(combine2
(combine2
(combine2 (Int64.of_int x)
(Int64.of_int (x lxor 0xaaaa)))
(Int64.of_int (x + 1)))
(Int64.of_int (x * 3)))
(Int64.of_int (x lxor (x lsr 7)))))
ints),
() );
( "CCHash.list int [1..5]",
(fun () ->
Array.iter
(fun x ->
ignore
@@ Sys.opaque_identity
(Int64.of_int
CCHash.(list int [ x + 1; x + 2; x + 3; x + 4; x + 5 ])))
ints),
() );
]
(* --- tree for run_global ------------------------------------------------- *)
let () =
B.Tree.(
register @@ "hash"
@>>> [
"int" @> lazy (bench_int_hash ~time:2 ());
"combine64" @> lazy (bench_combine64 ~time:2 ());
])
let () = try B.Tree.run_global () with Arg.Help msg -> print_endline msg

View file

@ -4,6 +4,7 @@
containers
containers_pvec
containers-data
containers.xxhash
benchmark
gen
iter
@ -22,3 +23,17 @@
-unbox-closures
-unbox-closures-factor
20))
(executable
(name bench_hash)
(libraries containers benchmark)
(flags :standard -warn-error -3-5 -w -60 -safe-string -color always)
(optional)
(ocamlopt_flags
:standard
-O3
-color
always
-unbox-closures
-unbox-closures-factor
20))

View file

@ -21,7 +21,8 @@ let rec eq t1 t2 =
let rec hash_tree t =
match t with
| Empty -> CCHash.string "empty"
| Node (i, l) -> CCHash.(combine2 (int i) (list hash_tree l))
| Node (i, l) ->
CCHash.((combine2 [@alert "-deprecated"]) (int i) (list hash_tree l))
module H = Hashtbl.Make (struct
type t = tree

View file

@ -1535,7 +1535,7 @@ module Str = struct
let rand_str_ ?(among = "abcdefgh") n =
let module Q = QCheck in
let st = Random.State.make [| n + 17 |] in
let gen_c = QCheck.Gen.oneofl (CCString.to_list among) in
let gen_c = QCheck.Gen.oneof_list (CCString.to_list among) in
QCheck.Gen.string_size ~gen:gen_c (QCheck.Gen.return n) st
let find ?(start = 0) ~sub s =
@ -1824,4 +1824,60 @@ module Str = struct
])
end
module Hash = struct
(* Old FNV-based string hash (from before the rrmxmx+fmix64 C implementation) *)
let fnv_offset_basis = 0xcbf29ce484222325L
let fnv_prime = 0x100000001b3L
let string_fnv (s : string) =
let h = ref fnv_offset_basis in
for i = 0 to String.length s - 1 do
let c = String.unsafe_get s i in
(h := Int64.(mul !h fnv_prime));
h := Int64.(logxor !h (of_int (Char.code c)))
done;
Int64.to_int !h land max_int
let rand = Random.State.make [| 42 |]
let mk_strings n len =
Array.init n (fun _ ->
String.init len (fun _ -> Char.chr (65 + Random.State.int rand 26)))
let bench_string_hash ?(time = 2) ~len n =
let strings = mk_strings n len in
let bench_new () =
Array.iter (fun s -> opaque_ignore (CCHash.string s)) strings
and bench_fnv () =
Array.iter (fun s -> opaque_ignore (string_fnv s)) strings
and bench_poly () =
Array.iter (fun s -> opaque_ignore (Hashtbl.hash s)) strings
and bench_xxhash () =
Array.iter
(fun s ->
opaque_ignore (Int64.to_int (Containers_xxhash.hash_string s)))
strings
in
B.throughputN time ~repeat
[
"CCHash.string (new)", bench_new, ();
"string_fnv (old)", bench_fnv, ();
"Hashtbl.hash (poly)", bench_poly, ();
"xxhash", bench_xxhash, ();
]
let () =
B.Tree.register
("hash"
@>>> [
"string"
@>> B.Tree.concat
[
app_int (bench_string_hash ~time:2 ~len:16) 1_000;
app_int (bench_string_hash ~time:2 ~len:64) 1_000;
app_int (bench_string_hash ~time:2 ~len:256) 1_000;
];
])
end
let () = try B.Tree.run_global () with Arg.Help msg -> print_endline msg

View file

@ -1,4 +1,4 @@
#!/bin/sh
OPTS="--profile=release --display=quiet"
exec dune exec $OPTS -- benchs/run_benchs_hash.exe $@
exec dune exec $OPTS -- ./benchs/run_bench_hash.exe $@

View file

@ -96,7 +96,7 @@ let fold_left f acc self =
(* capture current content *)
let acc = ref acc in
for i = 0 to len do
for i = 0 to len - 1 do
acc := f !acc (Bytes.unsafe_get bs i)
done;
!acc
@ -104,16 +104,20 @@ let fold_left f acc self =
let[@inline] iter f self =
(* capture current content *)
let { bs; len } = self in
for i = 0 to len do
for i = 0 to len - 1 do
f (Bytes.unsafe_get bs i)
done
let[@inline] iteri f self =
let { bs; len } = self in
for i = 0 to len do
for i = 0 to len - 1 do
f i (Bytes.unsafe_get bs i)
done
let copy self =
let bs = Bytes.copy self.bs in
{ bs; len = self.len }
let of_seq seq =
let self = create ~cap:32 () in
append_seq self seq;

View file

@ -91,6 +91,9 @@ val to_slice : t -> CCByte_slice.t
The slice shares the same byte array as [buf] (until [buf] is resized).
@since 3.13.1 *)
val copy : t -> t
(** [copy buf] returns an independent copy of [buf]. *)
val contents : t -> string
(** Copy the internal data to a string. Allocates. *)

View file

@ -21,6 +21,12 @@ let create ?(off = 0) ?len bs =
let[@inline] unsafe_of_string ?off ?len s =
create ?off ?len (Bytes.unsafe_of_string s)
let[@inline] of_string s = create (Bytes.of_string s)
let[@inline] clear self =
self.off <- 0;
self.len <- 0
let[@inline] len self = self.len
let[@inline] contents self = Bytes.sub_string self.bs self.off self.len

View file

@ -26,6 +26,12 @@ val unsafe_of_string : ?off:int -> ?len:int -> string -> t
This is unsafe because mutating the bytes is forbidden
(just like with {!Bytes.unsafe_of_string} *)
val of_string : string -> t
(** [of_string s] makes a slice from a copy of [s]. Safe to mutate. *)
val clear : t -> unit
(** [clear sl] resets [off] and [len] to 0. *)
val len : t -> int
(** Access the length *)

View file

@ -7,130 +7,107 @@ type 'a t = 'a -> hash
type 'a iter = ('a -> unit) -> unit
type 'a gen = unit -> 'a option
(* FNV hashing
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
*)
let fnv_offset_basis = 0xcbf29ce484222325L
let fnv_prime = 0x100000001b3L
let[@inline] combine2 a b =
Hash_impl_.(finalize (combine_int (combine_int seed a) b))
(* hash an integer *)
let hash_int_ n =
let h = ref fnv_offset_basis in
for k = 0 to 7 do
(h := Int64.(mul !h fnv_prime));
h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
done;
(* truncate back to int and remove sign *)
Int64.to_int !h land max_int
let combine2 a b =
let h = ref fnv_offset_basis in
(* we only do one loop, where we mix bytes of [a] and [b], so as
to simplify control flow *)
for k = 0 to 7 do
(h := Int64.(mul !h fnv_prime));
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
(h := Int64.(mul !h fnv_prime));
h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff)))
done;
Int64.to_int !h land max_int
let[@inline] combine f s x = combine2 s (f x)
let[@inline] combine f s x =
Hash_impl_.(finalize (combine_int (combine_int seed s) (f x)))
let combine3 a b c =
let h = ref fnv_offset_basis in
(* we only do one loop, where we mix bytes of [a] [b] and [c], so as
to simplify control flow *)
for k = 0 to 7 do
(h := Int64.(mul !h fnv_prime));
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
(h := Int64.(mul !h fnv_prime));
(h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
(h := Int64.(mul !h fnv_prime));
h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff)))
done;
Int64.to_int !h land max_int
Hash_impl_.(
let s = combine_int (combine_int seed a) b in
finalize (combine_int s c))
let combine4 a b c d =
let h = ref fnv_offset_basis in
for k = 0 to 7 do
(h := Int64.(mul !h fnv_prime));
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
(h := Int64.(mul !h fnv_prime));
(h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
(h := Int64.(mul !h fnv_prime));
(h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff))));
(h := Int64.(mul !h fnv_prime));
h := Int64.(logxor !h (of_int ((d lsr (k * 8)) land 0xff)))
done;
Int64.to_int !h land max_int
Hash_impl_.(
let s = combine_int (combine_int seed a) b in
let s = combine_int s c in
finalize (combine_int s d))
let combine5 a b c d e = combine3 a b (combine3 c d e)
let combine6 a b c d e f = combine4 a b c (combine3 d e f)
let combine5 a b c d e =
Hash_impl_.(
let s = combine_int (combine_int seed a) b in
let s = combine_int s c in
let s = combine_int s d in
finalize (combine_int s e))
(** {2 Combinators} *)
let combine6 a b c d e f =
Hash_impl_.(
let s = combine_int (combine_int seed a) b in
let s = combine_int s c in
let s = combine_int s d in
let s = combine_int s e in
finalize (combine_int s f))
(** {2 Primitive hashers} *)
let const h _ = h
let const0 _ = 0
let int = hash_int_
let int n = Hash_impl_.(finalize (combine_int seed n))
let bool b =
hash_int_
int
(if b then
1
else
2)
let char x = hash_int_ (Char.code x)
let char x = Hash_impl_.(finalize (combine_char seed (Char.code x)))
let int64 (n : int64) : int = Hash_impl_.(finalize (combine_i64 seed n))
let int32 (x : int32) : int = Hash_impl_.(finalize (combine_i32 seed x))
(* hash an integer *)
let int64 n : int =
let h = ref fnv_offset_basis in
for k = 0 to 7 do
(h := Int64.(mul !h fnv_prime));
h := Int64.(logxor !h (logand (shift_right_logical n (k * 8)) 0xffL))
done;
(* truncate back to int and remove sign *)
Int64.to_int !h land max_int
let int32 (x : int32) = int64 (Int64.of_int32 x)
let nativeint (x : nativeint) = int64 (Int64.of_nativeint x)
(* do not hash more than 128 bytes in strings/bytes *)
let max_len_b_ = 128
let nativeint (x : nativeint) =
Hash_impl_.(finalize (combine_i64 seed (Int64.of_nativeint x)))
let bytes (x : bytes) =
let h = ref fnv_offset_basis in
for i = 0 to min max_len_b_ (Bytes.length x - 1) do
(h := Int64.(mul !h fnv_prime));
let byte = Char.code (Bytes.unsafe_get x i) in
h := Int64.(logxor !h (of_int byte))
done;
Int64.to_int !h land max_int
Hash_impl_.(finalize (combine_string seed (Bytes.unsafe_to_string x)))
let string (x : string) = bytes (Bytes.unsafe_of_string x)
let string (x : string) = Hash_impl_.(finalize (combine_string seed x))
let slice x i len =
let j = i + len in
let rec aux i s =
if i = j then
s
let rec aux k s =
if k = j then
Hash_impl_.finalize s
else
aux (i + 1) (combine2 (Char.code x.[i]) s)
aux (k + 1)
(Hash_impl_.combine_char s (Char.code (String.unsafe_get x k)))
in
aux i 0
aux i Hash_impl_.seed
let opt f = function
| None -> 42
| Some x -> combine2 43 (f x)
| Some x -> Hash_impl_.(finalize (combine_int (combine_int seed 43) (f x)))
let list f l = List.fold_left (combine f) 0x42 l
let array f l = Array.fold_left (combine f) 0x42 l
let pair f g (x, y) = combine2 (f x) (g y)
let triple f g h (x, y, z) = combine2 (combine2 (f x) (g y)) (h z)
let list f l =
let s =
List.fold_left (fun s x -> Hash_impl_.combine_int s (f x)) Hash_impl_.seed l
in
Hash_impl_.finalize s
let array f a =
let s =
Array.fold_left
(fun s x -> Hash_impl_.combine_int s (f x))
Hash_impl_.seed a
in
Hash_impl_.finalize s
let pair f g (x, y) =
Hash_impl_.(finalize (combine_int (combine_int seed (f x)) (g y)))
let triple f g h (x, y, z) =
Hash_impl_.(
let s = combine_int seed (f x) in
let s = combine_int s (g y) in
finalize (combine_int s (h z)))
let quad f g h i (x, y, z, w) =
combine2 (combine2 (f x) (g y)) (combine2 (h z) (i w))
Hash_impl_.(
let s = combine_int seed (f x) in
let s = combine_int s (g y) in
let s = combine_int s (h z) in
finalize (combine_int s (i w)))
let map f h x = h (f x)
@ -144,8 +121,10 @@ let poly x = Hashtbl.hash x
let array_of_hashes_ arr =
Array.sort CCInt.compare arr;
(* sort the hashes, so their order does not matter *)
Array.fold_left combine2 0x42 arr
let s =
Array.fold_left (fun s h -> Hash_impl_.combine_int s h) Hash_impl_.seed arr
in
Hash_impl_.finalize s
let array_comm f a =
let arr = Array.init (Array.length a) (fun i -> f a.(i)) in
@ -157,19 +136,19 @@ let list_comm f l =
array_of_hashes_ arr
let iter f seq =
let h = ref 0x43 in
seq (fun x -> h := combine f !h x);
!h
let s = ref Hash_impl_.seed in
seq (fun x -> s := Hash_impl_.combine_int !s (f x));
Hash_impl_.finalize !s
let seq f seq =
let h = ref 0x43 in
Seq.iter (fun x -> h := combine f !h x) seq;
!h
let seq f sq =
let s = ref Hash_impl_.seed in
Seq.iter (fun x -> s := Hash_impl_.combine_int !s (f x)) sq;
Hash_impl_.finalize !s
let gen f g =
let rec aux s =
match g () with
| None -> s
| Some x -> aux (combine2 s (f x))
| None -> Hash_impl_.finalize s
| Some x -> aux (Hash_impl_.combine_int s (f x))
in
aux 0x42
aux Hash_impl_.seed

View file

@ -9,6 +9,15 @@
across versions of OCaml and Containers).
*)
(* TODO: for 4.xx:
{[type state = int64
val seed : state
type 'a t = state -> 'a -> state
val finalize : state -> int64
]}
*)
(** {2 Definitions} *)
type hash = int

103
src/core/CCHash64.ml Normal file
View file

@ -0,0 +1,103 @@
(* This file is free software, part of containers. See file "license" for more details. *)
(** {1 Hash combinators with 64-bit state} *)
type state = int64
let seed : state = Hash_impl_.seed
let[@inline] finalize64 (s : state) : int64 = Hash_impl_.fmix64 s
let[@inline] finalize (s : state) : int = Hash_impl_.finalize s
type 'a t = state -> 'a -> state
let[@inline] apply h x = finalize64 (h seed x)
let apply_int h x = Int64.to_int (finalize64 (h seed x))
let[@inline] int s x = Hash_impl_.combine_int s x
let[@inline] bool s b =
Hash_impl_.combine_int s
(if b then
1
else
2)
let[@inline] char s c = Hash_impl_.combine_char s (Char.code c)
let[@inline] int64 s (x : int64) = Hash_impl_.combine_i64 s x
let[@inline] int32 s (x : int32) = Hash_impl_.combine_i32 s x
let[@inline] nativeint s (x : nativeint) =
Hash_impl_.combine_i64 s (Int64.of_nativeint x)
let[@inline] string s x = Hash_impl_.combine_string s x
let[@inline] bytes s x = Hash_impl_.combine_string s (Bytes.unsafe_to_string x)
let slice str ofs s len =
let j = ofs + len in
let rec loop k st =
if k = j then
st
else
loop (k + 1)
(Hash_impl_.combine_char st (Char.code (String.unsafe_get str k)))
in
loop ofs s
let opt f s = function
| None -> Hash_impl_.combine_int s 0
| Some x -> f (Hash_impl_.combine_int s 1) x
let list f s l = List.fold_left f s l
let array f s a = Array.fold_left f s a
let pair f g s (x, y) = g (f s x) y
let triple f g h s (x, y, z) = h (g (f s x) y) z
let quad f g h k s (x, y, z, w) = k (h (g (f s x) y) z) w
let map proj f s x = f s (proj x)
let if_ b then_ else_ s x =
if b then
then_ s x
else
else_ s x
let poly s x = Hash_impl_.combine_int s (Hashtbl.hash x)
type 'a iter = ('a -> unit) -> unit
type 'a gen = unit -> 'a option
let iter f s seq =
let st = ref s in
seq (fun x -> st := f !st x);
!st
let seq f s sq = Seq.fold_left f s sq
let gen f s g =
let rec aux st =
match g () with
| None -> st
| Some x -> aux (f st x)
in
aux s
let[@inline] combine2 a b =
Hash_impl_.(finalize64 (combine_i64 (combine_i64 seed a) b))
let combine3 a b c =
Hash_impl_.(
let s = combine_i64 (combine_i64 seed a) b in
finalize64 (combine_i64 s c))
let combine4 a b c d =
Hash_impl_.(
let s = combine_i64 (combine_i64 seed a) b in
let s = combine_i64 s c in
finalize64 (combine_i64 s d))
let array_comm f s a =
let hashes = Array.map (fun x -> finalize64 (f seed x)) a in
Array.sort Int64.compare hashes;
Array.fold_left Hash_impl_.combine_i64 s hashes
let list_comm f s l =
let arr = Array.of_list l in
array_comm f s arr

95
src/core/CCHash64.mli Normal file
View file

@ -0,0 +1,95 @@
(** Hash combinators with 64-bit state threading.
State is threaded explicitly as a private [int64] through combinators.
Finalize with {!finalize} (returns a positive [int]) or {!finalize64}
(returns the raw [int64]).
Typical use:
{[
let hash_my_record r =
CCHash64.(finalize (string (int seed r.id) r.name))
let hash_pair (a, b) =
CCHash64.(finalize (pair int string seed (a, b)))
]}
{b Implementation}: xorshift+multiply combiner with fmix64 (Murmur3)
finalizer, via C stubs. Unboxed in native code.
@since NEXT_RELEASE
*)
type state = private int64
(** Hash accumulator (64-bit). Create with {!seed}; finish with {!finalize}
or {!finalize64}. *)
val seed : state
(** Initial hash state (golden-ratio constant). *)
val finalize64 : state -> int64
(** Apply fmix64 (Murmur3) and return the full 64-bit result.
The result may be negative as a signed [int64]. *)
val finalize : state -> int
(** Apply fmix64 and return a non-negative [int] (strips sign bit). *)
type 'a t = state -> 'a -> state
(** A hash combiner: takes the current state, mixes in a value, returns the
updated state. *)
val apply : 'a t -> 'a -> int64
(** Hash the input *)
val apply_int : 'a t -> 'a -> int
(** Hash the input and truncate to [int] *)
val int : int t
val bool : bool t
val char : char t
val int32 : int32 t
val int64 : int64 t
val nativeint : nativeint t
val string : string t
val bytes : bytes t
(** @since 3.5 *)
val slice : string -> int -> int t
(** [slice str ofs s len] mixes the byte slice [str[ofs .. ofs+len-1]] into [s]. *)
val opt : 'a t -> 'a option t
val list : 'a t -> 'a list t
val array : 'a t -> 'a array t
val pair : 'a t -> 'b t -> ('a * 'b) t
val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t
val quad : 'a t -> 'b t -> 'c t -> 'd t -> ('a * 'b * 'c * 'd) t
val map : ('a -> 'b) -> 'b t -> 'a t
(** [map proj f] applies [proj] before hashing with [f].
Example: [map fst int] hashes only the first element of a pair.
@since 3.5 *)
val if_ : bool -> 'a t -> 'a t -> 'a t
(** [if_ b t e] uses hasher [t] when [b] is true, [e] otherwise. *)
val combine2 : int64 -> int64 -> int64
val combine3 : int64 -> int64 -> int64 -> int64
val combine4 : int64 -> int64 -> int64 -> int64 -> int64
val poly : 'a t
(** Uses [Hashtbl.hash] internally. *)
val list_comm : 'a t -> 'a list t
(** Commutative: lists equal up to permutation hash the same. *)
val array_comm : 'a t -> 'a array t
(** Commutative: arrays equal up to permutation hash the same. *)
(** {2 Iterators} *)
type 'a iter = ('a -> unit) -> unit
type 'a gen = unit -> 'a option
val seq : 'a t -> 'a Seq.t t
val iter : 'a t -> 'a iter t
val gen : 'a t -> 'a gen t

View file

@ -4,20 +4,8 @@ include Int
type 'a iter = ('a -> unit) -> unit
(* use FNV:
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *)
let hash (n : int) : int =
let offset_basis = 0xcbf29ce484222325L in
let prime = 0x100000001b3L in
let h = ref offset_basis in
for k = 0 to 7 do
(h := Int64.(mul !h prime));
(* h := h xor (k-th byte of n) *)
h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
done;
(* truncate back to int and remove sign *)
Int64.to_int !h land max_int
Hash_impl_.(finalize (combine_i64 seed (Int64.of_int n)))
let range i j yield =
let rec up i j yield =

View file

@ -7,13 +7,9 @@ include Int32
let min : t -> t -> t = Stdlib.min
let max : t -> t -> t = Stdlib.max
[@@@endif]
[@@@iflt 5.1]
let hash x = Stdlib.abs (to_int x)
[@@@endif]
let hash (x : t) : int = Hash_impl_.(finalize (combine_i32 seed x))
let sign i = compare i zero
let pow a b =

View file

@ -10,22 +10,8 @@ let max : t -> t -> t = Stdlib.max
[@@@endif]
let sign i = compare i zero
(* use FNV:
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *)
let hash_to_int64 (n : t) =
let offset_basis = 0xcbf29ce484222325L in
let prime = 0x100000001b3L in
let h = ref offset_basis in
for k = 0 to 7 do
h := mul !h prime;
(* h := h xor (k-th byte of n) *)
h := logxor !h (logand (shift_right n (k * 8)) 0xffL)
done;
logand !h max_int
let[@inline] hash (n : t) : int = to_int (hash_to_int64 n) land Stdlib.max_int
let hash_to_int64 (n : t) : t = Hash_impl_.(fmix64 (combine_i64 seed n))
let[@inline] hash (n : t) : int = Hash_impl_.(finalize (combine_i64 seed n))
(* see {!CCInt.popcount} for more details *)
let[@inline] popcount (b : t) : int =

View file

@ -191,7 +191,7 @@ module Make (O : Map.OrderedType) = struct
[@@@iflt 5.1]
let to_list m = fold (fun k v acc -> (k, v) :: acc) m []
let to_list = bindings
[@@@endif]

View file

@ -11,7 +11,7 @@ include String
let compare_int (a : int) b = Stdlib.compare a b
let compare = String.compare
let hash s = Hashtbl.hash s
let hash (s : string) : int = Hash_impl_.(finalize (combine_string seed s))
let length = String.length
let is_empty s = equal s ""

56
src/core/Hash_impl_.ml Normal file
View file

@ -0,0 +1,56 @@
(* This file is free software, part of containers. See file "license" for more details. *)
(** Internal hash implementation.
Combiner: [state ^= chunk; state ^= state >> 32; state *= 0xc6a4a7935bd1e995]
Finalizer: fmix64 (Murmur3).
Multiplicative constant 0xc6a4a7935bd1e995 (MurmurHash2, Austin Appleby):
https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
fmix64 constants (Murmur3, Austin Appleby):
https://github.com/aappleby/smhasher
Not part of the public API; use {!CCHash} instead. *)
(** Initial hash state (golden-ratio constant). *)
let seed : int64 = 0x9e3779b97f4a7c15L
external combine_int :
(int64[@unboxed]) -> (int[@untagged]) -> (int64[@unboxed])
= "caml_cc_hash_combine_int_byte" "caml_cc_hash_combine_int"
[@@noalloc]
(** [combine_int state x] mixes OCaml int [x] into [state]. *)
external combine_i64 :
(int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_hash_combine_i64_byte" "caml_cc_hash_combine_i64"
[@@noalloc]
(** [combine_i64 state chunk] mixes [chunk] into [state]. *)
external combine_i32 :
(int64[@unboxed]) -> (int32[@unboxed]) -> (int64[@unboxed])
= "caml_cc_hash_combine_i32_byte" "caml_cc_hash_combine_i32"
[@@noalloc]
(** [combine_i32 state chunk] mixes [chunk] into [state]. *)
external combine_char :
(int64[@unboxed]) -> (int[@untagged]) -> (int64[@unboxed])
= "caml_cc_hash_combine_char_byte" "caml_cc_hash_combine_char"
[@@noalloc]
(** [combine_char state c] mixes character code [c] into [state]. *)
external combine_string : (int64[@unboxed]) -> string -> (int64[@unboxed])
= "caml_cc_hash_combine_string_byte" "caml_cc_hash_combine_string"
[@@noalloc]
(** [combine_string state s] mixes all bytes of [s] into [state] in 8-byte chunks. *)
external fmix64 : (int64[@unboxed]) -> (int64[@unboxed])
= "caml_cc_hash_fmix64_byte" "caml_cc_hash_fmix64"
[@@noalloc]
(** [fmix64 state] applies the Murmur3 finalizer. Result may be negative. *)
external finalize : (int64[@unboxed]) -> (int[@untagged])
= "caml_cc_hash_finalize_byte" "caml_cc_hash_finalize"
[@@noalloc]
(** [finalize state] applies fmix64 and returns a non-negative [int]. *)

View file

@ -6,7 +6,12 @@
(action
(run %{project_root}/src/core/cpp/cpp.exe %{input-file})))
(flags :standard -nolabels -open CCMonomorphic)
(libraries either containers.monomorphic containers.domain))
(libraries either containers.monomorphic containers.domain)
(private_modules Hash_impl_)
(foreign_stubs
(language c)
(flags :standard -O2)
(names hash_stubs)))
(ocamllex
(modules CCSexp_lex))

166
src/core/hash_stubs.c Normal file
View file

@ -0,0 +1,166 @@
/* This file is free software, part of containers. See file "license" for more details. */
/* Hash implementation: xorshift+multiply combiner with fmix64 finalizer.
Combiner: state ^= chunk; state ^= state >> 32; state *= 0xc6a4a7935bd1e995
Finalizer (fmix64, Murmur3): three rounds of xorshift-multiply.
Multiplicative constant 0xc6a4a7935bd1e995 (MurmurHash2, Austin Appleby):
https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
fmix64 constants 0xff51afd7ed558ccd / 0xc4ceb9fe1a85ec53 (Murmur3, Austin Appleby):
https://github.com/aappleby/smhasher
*/
#include <caml/mlvalues.h>
#include <caml/alloc.h>
#include <caml/memory.h>
#include <stdint.h>
#include <string.h>
// from murmur2: https://chromium.googlesource.com/external/smhasher/+/c8e8bf81bc6041d6d836365a501a0a96830d2d81/MurmurHash2.cpp
#define HASH_MUL UINT64_C(0xc6a4a7935bd1e995)
// from murmur3: https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp#L81
#define FMIX_C1 UINT64_C(0xff51afd7ed558ccd)
#define FMIX_C2 UINT64_C(0xc4ceb9fe1a85ec53)
static inline uint64_t hash_combine(uint64_t state, uint64_t chunk)
{
state ^= chunk;
state ^= state >> 32;
state *= HASH_MUL;
return state;
}
// fmix64 from murmur3
static inline uint64_t fmix64(uint64_t h)
{
h ^= h >> 33;
h *= FMIX_C1;
h ^= h >> 33;
h *= FMIX_C2;
h ^= h >> 33;
return h;
}
/* --- combine_i64 --------------------------------------------------------- */
CAMLprim int64_t caml_cc_hash_combine_i64(int64_t state, int64_t chunk)
{
return (int64_t)hash_combine((uint64_t)state, (uint64_t)chunk);
}
CAMLprim value caml_cc_hash_combine_i64_byte(value v_state, value v_chunk)
{
CAMLparam2(v_state, v_chunk);
uint64_t r = hash_combine((uint64_t)Int64_val(v_state),
(uint64_t)Int64_val(v_chunk));
CAMLreturn(caml_copy_int64((int64_t)r));
}
/* --- combine_i32 --------------------------------------------------------- */
CAMLprim int64_t caml_cc_hash_combine_i32(int64_t state, int32_t chunk)
{
return (int64_t)hash_combine((uint64_t)state, (uint64_t)(uint32_t)chunk);
}
CAMLprim value caml_cc_hash_combine_i32_byte(value v_state, value v_chunk)
{
CAMLparam2(v_state, v_chunk);
uint64_t r = hash_combine((uint64_t)Int64_val(v_state),
(uint64_t)(uint32_t)Int32_val(v_chunk));
CAMLreturn(caml_copy_int64((int64_t)r));
}
/* --- combine_int --------------------------------------------------------- */
/* chunk is an OCaml int (intnat), passed untagged */
CAMLprim int64_t caml_cc_hash_combine_int(int64_t state, intnat chunk)
{
return (int64_t)hash_combine((uint64_t)state, (uint64_t)chunk);
}
CAMLprim value caml_cc_hash_combine_int_byte(value v_state, value v_chunk)
{
CAMLparam2(v_state, v_chunk);
uint64_t r = hash_combine((uint64_t)Int64_val(v_state),
(uint64_t)Long_val(v_chunk));
CAMLreturn(caml_copy_int64((int64_t)r));
}
/* --- combine_char -------------------------------------------------------- */
/* c is passed as untagged int (Char.code) */
CAMLprim int64_t caml_cc_hash_combine_char(int64_t state, intnat c)
{
return (int64_t)hash_combine((uint64_t)state, (uint64_t)(unsigned char)c);
}
CAMLprim value caml_cc_hash_combine_char_byte(value v_state, value v_c)
{
CAMLparam2(v_state, v_c);
uint64_t r = hash_combine((uint64_t)Int64_val(v_state),
(uint64_t)(unsigned char)Long_val(v_c));
CAMLreturn(caml_copy_int64((int64_t)r));
}
/* --- combine_string ------------------------------------------------------ */
/* Hashes all bytes of [str] into [state] using 8-byte chunks where possible.
[str] is a regular OCaml value; [state] is unboxed int64. */
CAMLprim int64_t caml_cc_hash_combine_string(int64_t state, value str)
{
const char *data = String_val(str);
mlsize_t len = caml_string_length(str);
uint64_t s = (uint64_t)state;
mlsize_t i = 0;
for (; i + 8 <= len; i += 8) {
uint64_t chunk;
memcpy(&chunk, data + i, 8);
s = hash_combine(s, chunk);
}
if (i < len) {
uint64_t chunk = 0;
memcpy(&chunk, data + i, len - i);
s = hash_combine(s, chunk);
}
return (int64_t)s;
}
CAMLprim value caml_cc_hash_combine_string_byte(value v_state, value str)
{
CAMLparam2(v_state, str);
int64_t r = caml_cc_hash_combine_string(Int64_val(v_state), str);
CAMLreturn(caml_copy_int64(r));
}
/* --- fmix64 -------------------------------------------------------------- */
/* Returns full 64-bit fmix64 result; may be "negative" as signed int64. */
CAMLprim int64_t caml_cc_hash_fmix64(int64_t state)
{
return (int64_t)fmix64((uint64_t)state);
}
CAMLprim value caml_cc_hash_fmix64_byte(value v_state)
{
CAMLparam1(v_state);
CAMLreturn(caml_copy_int64((int64_t)fmix64((uint64_t)Int64_val(v_state))));
}
/* --- finalize ------------------------------------------------------------ */
/* Applies fmix64 and masks to Max_long (positive OCaml int). */
CAMLprim intnat caml_cc_hash_finalize(int64_t state)
{
return (intnat)(fmix64((uint64_t)state) & (uint64_t)Max_long);
}
CAMLprim value caml_cc_hash_finalize_byte(value v_state)
{
CAMLparam1(v_state);
intnat r = (intnat)(fmix64((uint64_t)Int64_val(v_state)) & (uint64_t)Max_long);
CAMLreturn(Val_long(r));
}

View file

@ -8,7 +8,14 @@
(name test_hash)
(modules test_hash)
(flags :standard -warn-error -a+8)
(libraries containers iter))
(libraries containers iter containers_xxhash))
(rule
(alias runtest)
(locks /ctest)
(package containers)
(action
(run ./test_hash.exe)))
(executable
(name test_random)

View file

@ -1,54 +1,59 @@
(* test hash functions a bit *)
module H = CCHash
module H64 = CCHash64
module XXH = Containers_xxhash
module Hist = struct
type t = {
tbl: (int, int) Hashtbl.t;
mutable n_samples: int;
}
let n = ref 100_000
let verbose = ref false
let create () : t = { tbl = Hashtbl.create 32; n_samples = 0 }
let check_bit_proba name hash_fn n_samples =
let rand = Random.State.make [| 42 |] in
let bits = Array.make 64 0 in
let add_n self x n =
Hashtbl.replace self.tbl x (n + try Hashtbl.find self.tbl x with _ -> 0);
self.n_samples <- n + self.n_samples
let pp out (self : t) : unit =
let max = Hashtbl.fold (fun k _ n -> max k n) self.tbl 0 in
let min = Hashtbl.fold (fun k _ n -> min k n) self.tbl max in
for i = min to max do
let n = try Hashtbl.find self.tbl i with _ -> 0 in
Format.fprintf out "[v=%-4d, n-inputs %-6d] %s@." i n
(String.make (int_of_float @@ ceil (log (float n))) '#')
let n_loops = 30 in
for _i = 1 to n_loops do
let base = Random.State.int64 rand Int64.(pred max_int) |> Int64.to_int in
for i = 1 to n_samples do
let h = hash_fn (base + i) in
for b = 0 to 63 do
if Int64.(logand h (shift_left 1L b)) <> 0L then
bits.(b) <- bits.(b) + 1
done
done
end
let reset_line = "\x1b[2K\r"
let t_int n1 n2 =
Printf.printf "test hash_int on %d--%d\n" n1 n2;
let count = Hashtbl.create 128 in
for i = n1 to n2 do
Printf.printf "%shash %d…%!" reset_line i;
let h = H.int i in
Hashtbl.replace count h (1 + CCHashtbl.get_or count h ~default:0);
if i mod 1024 * 1024 * 1024 = 0 then Gc.major ()
done;
Printf.printf "%s%!" reset_line;
(* reverse table *)
let by_count =
CCHashtbl.to_iter count
|> Iter.map (fun (_h, n) -> n)
|> Iter.count ~hash:H.int
in
let hist = Hist.create () in
by_count (fun (n, i) -> Hist.add_n hist n i);
Format.printf "histogram:@.%a@." Hist.pp hist;
(*assert (Hist.check_uniform hist);*)
()
let n_samples = n_loops * n_samples in
if !verbose then (
Format.printf "%s bit probabilities after %d samples:@." name n_samples;
for b = 0 to 63 do
let prob = float bits.(b) /. float n_samples in
Format.printf "bit %2d: %.4f@." b prob
done
);
let ok = ref true in
for b = 0 to 63 do
let prob = float bits.(b) /. float n_samples in
if prob < 0.48 || prob > 0.52 then (
Format.printf "FAIL: bit %d has proba %.4f (outside 0.48-0.52)@." b prob;
ok := false
)
done;
if !ok then
Format.printf "%s: OK@." name
else
();
!ok
let speclist =
[
"-v", Arg.Set verbose, " verbose mode";
"-n", Arg.Set_int n, " size of the range";
]
let () =
t_int 0 2_000_000;
t_int (-4_000_000) (-3_500_000);
()
Arg.parse (Arg.align speclist) (fun _ -> ()) "test_hash.exe";
let ok1 =
check_bit_proba "CCHash64" (fun i -> H64.finalize64 (H64.int H64.seed i)) !n
in
let ok2 = check_bit_proba "XXH" (fun i -> XXH.hash_int i) !n in
if (not ok1) || not ok2 then exit 1

View file

@ -5,6 +5,7 @@ Containers_testlib.run_all ~descr:"containers"
T_array.get ();
T_bool.get ();
T_byte_buffer.get ();
T_byte_slice.get ();
T_canonical_sexp.get ();
T_char.get ();
T_either.get ();

View file

@ -151,4 +151,71 @@ let prop_consistent ops =
with Nope str -> Test.fail_reportf "consistent ops failed:\n%s" str
;;
q arb (fun ops -> prop_consistent ops)
q arb (fun ops -> prop_consistent ops);;
(* --- iter/fold_left/iteri off-by-one --- *)
t @@ fun () ->
(* empty buffer: iter should call f zero times *)
let b = create () in
let n = ref 0 in
iter (fun _ -> incr n) b;
!n = 0
;;
t @@ fun () ->
(* non-empty buffer: iter visits exactly [length b] chars *)
let b = create () in
append_string b "abc";
let chars = ref [] in
iter (fun c -> chars := c :: !chars) b;
List.rev !chars = [ 'a'; 'b'; 'c' ]
;;
t @@ fun () ->
(* fold_left on empty buffer returns accumulator unchanged *)
let b = create () in
fold_left (fun acc _ -> acc + 1) 0 b = 0
;;
t @@ fun () ->
(* fold_left counts exactly [length b] chars *)
let b = create () in
append_string b "hello";
fold_left (fun acc _ -> acc + 1) 0 b = 5
;;
t @@ fun () ->
(* iteri visits exactly [length b] indices *)
let b = create () in
append_string b "ab";
let pairs = ref [] in
iteri (fun i c -> pairs := (i, c) :: !pairs) b;
List.rev !pairs = [ 0, 'a'; 1, 'b' ]
;;
(* --- copy --- *)
t @@ fun () ->
let b = create () in
append_string b "hello";
let b2 = copy b in
contents b = contents b2
;;
t @@ fun () ->
(* copy is independent: mutating original doesn't affect copy *)
let b = create () in
append_string b "hello";
let b2 = copy b in
add_char b '!';
contents b2 = "hello"
;;
t @@ fun () ->
(* copy is independent: mutating copy doesn't affect original *)
let b = create () in
append_string b "hello";
let b2 = copy b in
add_char b2 '?';
contents b = "hello"

View file

@ -0,0 +1,37 @@
module T = (val Containers_testlib.make ~__FILE__ ())
include T
open CCByte_slice;;
(* --- of_string --- *)
t @@ fun () ->
let sl = of_string "hello" in
len sl = 5
;;
t @@ fun () ->
let sl = of_string "hello" in
contents sl = "hello"
;;
t @@ fun () ->
let sl = of_string "" in
len sl = 0
;;
(* --- clear --- *)
t @@ fun () ->
let sl = of_string "hello" in
clear sl;
len sl = 0
;;
t @@ fun () ->
(* after clear, get raises *)
let sl = of_string "hello" in
clear sl;
try
let _ = get sl 0 in
false
with Invalid_argument _ -> true

View file

@ -17,3 +17,66 @@ t @@ fun () -> string "abc" <> string "abcd";;
q Q.int (fun i ->
Q.assume (i >= 0);
int i = int64 (Int64.of_int i))
;;
(* --- stress tests -------------------------------------------------------- *)
(* Chi-squared distribution test over [count] consecutive integers in [buckets] buckets.
A uniform hash gives chi2 ~ buckets-1; we allow 4 standard deviations of slack. *)
t ~name:"int hash distribution chi2" @@ fun () ->
let count = 50_000 and buckets = 500 in
let counts = Array.make buckets 0 in
for i = 0 to count - 1 do
let b = CCHash.int i mod buckets in
counts.(b) <- counts.(b) + 1
done;
let expected = float count /. float buckets in
let c2 =
Array.fold_left
(fun acc c -> acc +. (((float c -. expected) ** 2.0) /. expected))
0.0 counts
in
let df = float (buckets - 1) in
c2 < df +. (4.0 *. sqrt (2.0 *. df))
;;
(* Strict avalanche criterion: flip one input bit, expect ~50% output bits to change. *)
t ~name:"int hash avalanche" @@ fun () ->
let bits = Sys.int_size - 1 in
let total_flips = ref 0 in
let total = ref 0 in
let rng = Random.State.make [| 42; 17; 99 |] in
for _ = 1 to 300 do
let x = Random.State.bits rng in
let hx = CCHash.int x in
for b = 0 to bits - 1 do
let hx' = CCHash.int (x lxor (1 lsl b)) in
total_flips := !total_flips + CCInt.popcount (hx lxor hx');
total := !total + bits
done
done;
let frac = float !total_flips /. float !total in
frac >= 0.45 && frac <= 0.55
;;
(* String hash: no collisions among distinct keys. *)
t ~name:"string hash no collisions" @@ fun () ->
let n = 50_000 in
let tbl = Hashtbl.create n in
let ok = ref true in
for i = 0 to n - 1 do
let h = CCHash.string (Printf.sprintf "key:%d" i) in
if Hashtbl.mem tbl h then ok := false;
Hashtbl.replace tbl h ()
done;
!ok
;;
(* CCHash64 pipeline matches CCHash.pair combiner. *)
q Q.int (fun i ->
let j = i lxor 0xdeadbeef in
let h_pair = CCHash.pair CCHash.int CCHash.int (i, j) in
let h_manual =
CCHash64.(finalize (int (int seed (CCHash.int i)) (CCHash.int j)))
in
h_pair = h_manual)