bench of hash

This commit is contained in:
Simon Cruanes 2026-03-14 19:28:18 +00:00
parent fe5231a376
commit ac8e8bdda7
3 changed files with 179 additions and 1 deletions

107
benchs/bench_hash.ml Normal file
View file

@ -0,0 +1,107 @@
(** Benchmarks for CCHash primitives.
Run with: dune exec benchs/bench_hash.exe -- [options]
See: dune exec benchs/bench_hash.exe -- --help
*)
[@@@warning "-5"]
module B = Benchmark
let repeat = 3
(* --- data setup ---------------------------------------------------------- *)
let n_ints = 1_000
let ints = Array.init n_ints (fun i -> i * 2654435761 (* knuth multiplicative *))
let short_str = String.make 16 'x'
let medium_str = String.make 64 'x'
let long_str = String.make 512 'x'
(* Strings of various lengths with distinct content *)
let strings_short = Array.init n_ints (fun i -> Printf.sprintf "%016d" i)
let strings_medium = Array.init n_ints (fun i -> Printf.sprintf "%064d" i)
(* --- benchmarks ---------------------------------------------------------- *)
let bench_int_hash ~time () =
let r = ref 0 in
B.throughputN time ~repeat
[
( "CCHash.int",
(fun () -> Array.iter (fun x -> r := CCHash.int x) ints),
() );
( "Hashtbl.hash (poly)",
(fun () -> Array.iter (fun x -> r := Hashtbl.hash x) ints),
() );
( "CCHash.int64",
(fun () ->
Array.iter (fun x -> r := CCHash.int64 (Int64.of_int x)) ints),
() );
];
ignore !r
let bench_string_hash ~time () =
let r = ref 0 in
B.throughputN time ~repeat
[
( "CCHash.string/16",
(fun () -> Array.iter (fun s -> r := CCHash.string s) strings_short),
() );
( "CCHash.string/64",
(fun () -> Array.iter (fun s -> r := CCHash.string s) strings_medium),
() );
( "CCHash.string literal/16",
(fun () -> r := CCHash.string short_str),
() );
( "CCHash.string literal/64",
(fun () -> r := CCHash.string medium_str),
() );
( "CCHash.string literal/512",
(fun () -> r := CCHash.string long_str),
() );
( "Hashtbl.hash/16",
(fun () -> Array.iter (fun s -> r := Hashtbl.hash s) strings_short),
() );
];
ignore !r
let bench_combine64 ~time () =
let r = ref 0L in
B.throughputN time ~repeat
[
( "combine64 chain x5",
(fun () ->
Array.iter
(fun x ->
r :=
CCHash.(
combine64
(combine64
(combine64
(combine64
(combine64 seed (Int64.of_int x))
(Int64.of_int (x lxor 0xaaaa)))
(Int64.of_int (x + 1)))
(Int64.of_int (x * 3)))
(Int64.of_int (x lxor x lsr 7))))
ints),
() );
( "CCHash.list int [1..5]",
(fun () -> r := Int64.of_int CCHash.(list int [ 1; 2; 3; 4; 5 ])),
() );
];
ignore !r
(* --- tree for run_global ------------------------------------------------- *)
let () =
B.Tree.add_global "hash"
B.Tree.(
"int" @>> (fun () -> bench_int_hash ~time:2 ())
@> "string" @>> (fun () -> bench_string_hash ~time:2 ())
@> "combine64" @>> (fun () -> bench_combine64 ~time:2 ())
@> nil)
let () = try B.Tree.run_global () with Arg.Help msg -> print_endline msg

View file

@ -4,6 +4,7 @@
containers
containers_pvec
containers-data
containers.xxhash
benchmark
gen
iter
@ -22,3 +23,17 @@
-unbox-closures
-unbox-closures-factor
20))
(executable
(name bench_hash)
(libraries containers benchmark)
(flags :standard -warn-error -3-5 -w -60 -safe-string -color always)
(optional)
(ocamlopt_flags
:standard
-O3
-color
always
-unbox-closures
-unbox-closures-factor
20))

View file

@ -1535,7 +1535,7 @@ module Str = struct
let rand_str_ ?(among = "abcdefgh") n =
let module Q = QCheck in
let st = Random.State.make [| n + 17 |] in
let gen_c = QCheck.Gen.oneofl (CCString.to_list among) in
let gen_c = QCheck.Gen.oneof_list (CCString.to_list among) in
QCheck.Gen.string_size ~gen:gen_c (QCheck.Gen.return n) st
let find ?(start = 0) ~sub s =
@ -1824,4 +1824,60 @@ module Str = struct
])
end
module Hash = struct
(* Old FNV-based string hash (from before the rrmxmx+fmix64 C implementation) *)
let fnv_offset_basis = 0xcbf29ce484222325L
let fnv_prime = 0x100000001b3L
let string_fnv (s : string) =
let h = ref fnv_offset_basis in
for i = 0 to String.length s - 1 do
let c = String.unsafe_get s i in
(h := Int64.(mul !h fnv_prime));
h := Int64.(logxor !h (of_int (Char.code c)))
done;
Int64.to_int !h land max_int
let rand = Random.State.make [| 42 |]
let mk_strings n len =
Array.init n (fun _ ->
String.init len (fun _ -> Char.chr (65 + Random.State.int rand 26)))
let bench_string_hash ?(time = 2) ~len n =
let strings = mk_strings n len in
let bench_new () =
Array.iter (fun s -> opaque_ignore (CCHash.string s)) strings
and bench_fnv () =
Array.iter (fun s -> opaque_ignore (string_fnv s)) strings
and bench_poly () =
Array.iter (fun s -> opaque_ignore (Hashtbl.hash s)) strings
and bench_xxhash () =
Array.iter
(fun s ->
opaque_ignore (Int64.to_int (Containers_xxhash.hash_string s)))
strings
in
B.throughputN time ~repeat
[
"CCHash.string (new)", bench_new, ();
"string_fnv (old)", bench_fnv, ();
"Hashtbl.hash (poly)", bench_poly, ();
"xxhash", bench_xxhash, ();
]
let () =
B.Tree.register
("hash"
@>>> [
"string"
@>> B.Tree.concat
[
app_int (bench_string_hash ~time:2 ~len:16) 1_000;
app_int (bench_string_hash ~time:2 ~len:64) 1_000;
app_int (bench_string_hash ~time:2 ~len:256) 1_000;
];
])
end
let () = try B.Tree.run_global () with Arg.Help msg -> print_endline msg