hash tests: count bit probability

This commit is contained in:
Simon Cruanes 2026-03-16 18:58:15 +00:00
parent 1d5b529730
commit e404dd26ab
2 changed files with 47 additions and 60 deletions

View file

@ -8,7 +8,14 @@
(name test_hash)
(modules test_hash)
(flags :standard -warn-error -a+8)
(libraries containers iter))
(libraries containers iter containers_xxhash))
(rule
(alias runtest)
(locks /ctest)
(package containers)
(action
(run ./test_hash.exe)))
(executable
(name test_random)

View file

@ -1,70 +1,50 @@
(* test hash functions a bit *)
module H = CCHash
module H64 = CCHash64
module XXH = Containers_xxhash
module Hist = struct
type t = {
tbl: (int, int) Hashtbl.t;
mutable n_samples: int;
}
let n = ref 1_000_000
let verbose = ref false
let create () : t = { tbl = Hashtbl.create 32; n_samples = 0 }
let add_n self x n =
Hashtbl.replace self.tbl x (n + try Hashtbl.find self.tbl x with _ -> 0);
self.n_samples <- n + self.n_samples
let pp out (self : t) : unit =
let max = Hashtbl.fold (fun k _ n -> max k n) self.tbl 0 in
let min = Hashtbl.fold (fun k _ n -> min k n) self.tbl max in
for i = min to max do
let n = try Hashtbl.find self.tbl i with _ -> 0 in
Format.fprintf out "[v=%-4d, n-inputs %-6d] %s@." i n
(String.make (int_of_float @@ ceil (log (float n))) '#')
done
end
let reset_line = "\x1b[2K\r"
let t_int n1 n2 =
Printf.printf "test hash_int on %d--%d\n" n1 n2;
let count = Hashtbl.create 128 in
for i = n1 to n2 do
Printf.printf "%shash %d…%!" reset_line i;
let h = H.int i in
Hashtbl.replace count h (1 + CCHashtbl.get_or count h ~default:0);
if i mod 1024 * 1024 * 1024 = 0 then Gc.major ()
done;
Printf.printf "%s%!" reset_line;
(* reverse table *)
let by_count =
CCHashtbl.to_iter count
|> Iter.map (fun (_h, n) -> n)
|> Iter.count ~hash:H.int
in
let hist = Hist.create () in
by_count (fun (n, i) -> Hist.add_n hist n i);
Format.printf "histogram:@.%a@." Hist.pp hist;
(*assert (Hist.check_uniform hist);*)
()
let () =
t_int 0 2_000_000;
t_int (-4_000_000) (-3_500_000)
let () =
let n_samples = 200_000 in
let check_bit_proba name hash_fn n_samples =
let bits = Array.make 64 0 in
for i = 0 to n_samples - 1 do
let h = H64.finalize64 (H64.int H64.seed i) in
for i = 1 to n_samples do
let h = hash_fn i in
for b = 0 to 63 do
if Int64.(logand h (shift_left 1L b)) <> 0L then
bits.(b) <- bits.(b) + 1
if Int64.(logand h (shift_left 1L b)) <> 0L then bits.(b) <- bits.(b) + 1
done
done;
Format.printf "Bit probabilities after %d samples:@." n_samples;
if !verbose then (
Format.printf "%s bit probabilities after %d samples:@." name n_samples;
for b = 0 to 63 do
let prob = float bits.(b) /. float n_samples in
Format.printf "bit %2d: %.4f@." b prob
done
);
let ok = ref true in
for b = 0 to 63 do
let prob = float bits.(b) /. float n_samples in
Format.printf "bit %2d: %.4f@." b prob
done
if prob < 0.48 || prob > 0.52 then (
Format.printf "FAIL: bit %d has proba %.4f (outside 0.48-0.52)@." b prob;
ok := false
)
done;
if !ok then
Format.printf "%s: OK@." name
else
();
!ok
let speclist =
[
"-v", Arg.Set verbose, " verbose mode";
"-n", Arg.Set_int n, " size of the range";
]
let () =
Arg.parse (Arg.align speclist) (fun _ -> ()) "test_hash.exe";
let ok1 =
check_bit_proba "CCHash64" (fun i -> H64.finalize64 (H64.int H64.seed i)) !n
in
let ok2 = check_bit_proba "XXH" (fun i -> XXH.hash_int i) !n in
if (not ok1) || not ok2 then exit 1