mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 11:15:31 -05:00
improve test hash
This commit is contained in:
parent
8d6c7470eb
commit
45b3956421
2 changed files with 19 additions and 54 deletions
|
|
@ -10,7 +10,7 @@
|
||||||
(name test_hash)
|
(name test_hash)
|
||||||
(modules test_hash)
|
(modules test_hash)
|
||||||
(flags :standard -warn-error -a+8)
|
(flags :standard -warn-error -a+8)
|
||||||
(libraries containers))
|
(libraries containers iter))
|
||||||
|
|
||||||
(executable
|
(executable
|
||||||
(name test_random)
|
(name test_random)
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,9 @@ module Hist = struct
|
||||||
let add self x =
|
let add self x =
|
||||||
Hashtbl.replace self.tbl x (1 + try Hashtbl.find self.tbl x with _ ->0);
|
Hashtbl.replace self.tbl x (1 + try Hashtbl.find self.tbl x with _ ->0);
|
||||||
self.n_samples <- 1 + self.n_samples
|
self.n_samples <- 1 + self.n_samples
|
||||||
|
let add_n self x n =
|
||||||
|
Hashtbl.replace self.tbl x (n + try Hashtbl.find self.tbl x with _ ->0);
|
||||||
|
self.n_samples <- n + self.n_samples
|
||||||
|
|
||||||
let pp out (self:t) : unit =
|
let pp out (self:t) : unit =
|
||||||
let max = Hashtbl.fold (fun k _ n -> max k n) self.tbl 0 in
|
let max = Hashtbl.fold (fun k _ n -> max k n) self.tbl 0 in
|
||||||
|
|
@ -24,69 +27,31 @@ module Hist = struct
|
||||||
done
|
done
|
||||||
end
|
end
|
||||||
|
|
||||||
(* how long does it take to get back to [n0]?
|
|
||||||
@param max if provided, stop when reaching it *)
|
|
||||||
let orbit_size ?(max=max_int) n0 : int =
|
|
||||||
let size = ref 1 in
|
|
||||||
let n = ref (H.int n0) in
|
|
||||||
while !n <> n0 && !size < max do
|
|
||||||
n := H.int !n;
|
|
||||||
incr size;
|
|
||||||
done;
|
|
||||||
!size
|
|
||||||
|
|
||||||
let reset_line = "\x1b[2K\r"
|
let reset_line = "\x1b[2K\r"
|
||||||
|
|
||||||
let t_int n1 n2 =
|
let t_int n1 n2 =
|
||||||
Printf.printf "test hash_int on %d--%d\n" n1 n2;
|
Printf.printf "test hash_int on %d--%d\n" n1 n2;
|
||||||
let hist = Hist.create() in
|
let count = Hashtbl.create 128 in
|
||||||
for i=n1 to n2 do
|
for i=n1 to n2 do
|
||||||
Printf.printf "%sorbit for %d…%!" reset_line i;
|
Printf.printf "%shash %d…%!" reset_line i;
|
||||||
let orb = orbit_size ~max:30_000 i in
|
let h = H.int i in
|
||||||
Hist.add hist orb;
|
Hashtbl.replace count h (1 + CCHashtbl.get_or count h ~default:0);
|
||||||
|
if i mod 1024*1024*1024 = 0 then Gc.major();
|
||||||
done;
|
done;
|
||||||
Printf.printf "%s%!" reset_line;
|
Printf.printf "%s%!" reset_line;
|
||||||
|
(* reverse table *)
|
||||||
|
let by_count =
|
||||||
|
CCHashtbl.to_iter count
|
||||||
|
|> Iter.map (fun (_h,n) -> n)
|
||||||
|
|> Iter.count ~hash:H.int
|
||||||
|
in
|
||||||
|
let hist = Hist.create() in
|
||||||
|
by_count (fun (n,i) -> Hist.add_n hist n i);
|
||||||
Format.printf "histogram:@.%a@." Hist.pp hist;
|
Format.printf "histogram:@.%a@." Hist.pp hist;
|
||||||
(*assert (Hist.check_uniform hist);*)
|
(*assert (Hist.check_uniform hist);*)
|
||||||
()
|
()
|
||||||
|
|
||||||
(* how long does it take to get back to [n0] by
|
|
||||||
hashing [n] with [m0] ?
|
|
||||||
@param max if provided, stop when reaching it *)
|
|
||||||
let left_orbit_size ?(max=max_int) n0 m0 : int =
|
|
||||||
let size = ref 1 in
|
|
||||||
let n = ref (H.combine2 (H.int n0) (H.int m0)) in
|
|
||||||
while !n <> n0 && !size < max do
|
|
||||||
n := H.combine2 (H.int !n) (H.int m0);
|
|
||||||
incr size;
|
|
||||||
done;
|
|
||||||
!size
|
|
||||||
|
|
||||||
let t_int2 n1 n2 n3 n4 =
|
|
||||||
Printf.printf "test hash combine2 on %d--%d x %d--%d\n" n1 n2 n3 n4;
|
|
||||||
let hist = Hist.create() in
|
|
||||||
for i=n1 to n2 do
|
|
||||||
for j=n3 to n4 do
|
|
||||||
Printf.printf "%sleft-orbit for %d x %d…%!" reset_line i j;
|
|
||||||
let orb = left_orbit_size ~max:20_000 i j in
|
|
||||||
Hist.add hist orb;
|
|
||||||
done;
|
|
||||||
done;
|
|
||||||
Printf.printf "%s%!" reset_line;
|
|
||||||
Format.printf "histogram:@.%a@." Hist.pp hist;
|
|
||||||
()
|
|
||||||
|
|
||||||
let help =
|
|
||||||
{|Test the hash function on integers, by computing orbits for inputs.
|
|
||||||
An orbit is a cycle [n -> h(n) -> h(h(n)) -> h^3(n) -> … h^k(n)]
|
|
||||||
where [h^k(n) == n]. For a good hash functions, orbits should be as long as
|
|
||||||
possible.
|
|
||||||
|}
|
|
||||||
|
|
||||||
let () =
|
let () =
|
||||||
print_endline help;
|
t_int 0 2_000_000;
|
||||||
t_int 0 80_000;
|
t_int (-4_000_000) (-3_500_000);
|
||||||
t_int 3_000_000 3_040_000;
|
|
||||||
t_int (-40_000) (-17_000);
|
|
||||||
t_int2 10 300 600 800;
|
|
||||||
()
|
()
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue