From 45b3956421bf25bd2ad79e3b27beaaec65c82cda Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 5 Apr 2021 00:13:33 -0400 Subject: [PATCH] improve test hash --- src/core/tests/dune | 2 +- src/core/tests/test_hash.ml | 71 ++++++++++--------------------------- 2 files changed, 19 insertions(+), 54 deletions(-) diff --git a/src/core/tests/dune b/src/core/tests/dune index 4d56aece..ef6300eb 100644 --- a/src/core/tests/dune +++ b/src/core/tests/dune @@ -10,7 +10,7 @@ (name test_hash) (modules test_hash) (flags :standard -warn-error -a+8) - (libraries containers)) + (libraries containers iter)) (executable (name test_random) diff --git a/src/core/tests/test_hash.ml b/src/core/tests/test_hash.ml index 4ebb50ea..d261309f 100644 --- a/src/core/tests/test_hash.ml +++ b/src/core/tests/test_hash.ml @@ -13,6 +13,9 @@ module Hist = struct let add self x = Hashtbl.replace self.tbl x (1 + try Hashtbl.find self.tbl x with _ ->0); self.n_samples <- 1 + self.n_samples + let add_n self x n = + Hashtbl.replace self.tbl x (n + try Hashtbl.find self.tbl x with _ ->0); + self.n_samples <- n + self.n_samples let pp out (self:t) : unit = let max = Hashtbl.fold (fun k _ n -> max k n) self.tbl 0 in @@ -24,69 +27,31 @@ module Hist = struct done end -(* how long does it take to get back to [n0]? - @param max if provided, stop when reaching it *) -let orbit_size ?(max=max_int) n0 : int = - let size = ref 1 in - let n = ref (H.int n0) in - while !n <> n0 && !size < max do - n := H.int !n; - incr size; - done; - !size - let reset_line = "\x1b[2K\r" let t_int n1 n2 = Printf.printf "test hash_int on %d--%d\n" n1 n2; - let hist = Hist.create() in + let count = Hashtbl.create 128 in for i=n1 to n2 do - Printf.printf "%sorbit for %d…%!" reset_line i; - let orb = orbit_size ~max:30_000 i in - Hist.add hist orb; + Printf.printf "%shash %d…%!" reset_line i; + let h = H.int i in + Hashtbl.replace count h (1 + CCHashtbl.get_or count h ~default:0); + if i mod 1024*1024*1024 = 0 then Gc.major(); done; Printf.printf "%s%!" reset_line; + (* reverse table *) + let by_count = + CCHashtbl.to_iter count + |> Iter.map (fun (_h,n) -> n) + |> Iter.count ~hash:H.int + in + let hist = Hist.create() in + by_count (fun (n,i) -> Hist.add_n hist n i); Format.printf "histogram:@.%a@." Hist.pp hist; (*assert (Hist.check_uniform hist);*) () -(* how long does it take to get back to [n0] by - hashing [n] with [m0] ? - @param max if provided, stop when reaching it *) -let left_orbit_size ?(max=max_int) n0 m0 : int = - let size = ref 1 in - let n = ref (H.combine2 (H.int n0) (H.int m0)) in - while !n <> n0 && !size < max do - n := H.combine2 (H.int !n) (H.int m0); - incr size; - done; - !size - -let t_int2 n1 n2 n3 n4 = - Printf.printf "test hash combine2 on %d--%d x %d--%d\n" n1 n2 n3 n4; - let hist = Hist.create() in - for i=n1 to n2 do - for j=n3 to n4 do - Printf.printf "%sleft-orbit for %d x %d…%!" reset_line i j; - let orb = left_orbit_size ~max:20_000 i j in - Hist.add hist orb; - done; - done; - Printf.printf "%s%!" reset_line; - Format.printf "histogram:@.%a@." Hist.pp hist; - () - -let help = -{|Test the hash function on integers, by computing orbits for inputs. -An orbit is a cycle [n -> h(n) -> h(h(n)) -> h^3(n) -> … h^k(n)] -where [h^k(n) == n]. For a good hash functions, orbits should be as long as -possible. -|} - let () = - print_endline help; - t_int 0 80_000; - t_int 3_000_000 3_040_000; - t_int (-40_000) (-17_000); - t_int2 10 300 600 800; + t_int 0 2_000_000; + t_int (-4_000_000) (-3_500_000); ()