From ac8e8bdda7aca87388c357b10cf5a006b3e6ae76 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 14 Mar 2026 19:28:18 +0000 Subject: [PATCH] bench of hash --- benchs/bench_hash.ml | 107 +++++++++++++++++++++++++++++++++++++++++++ benchs/dune | 15 ++++++ benchs/run_benchs.ml | 58 ++++++++++++++++++++++- 3 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 benchs/bench_hash.ml diff --git a/benchs/bench_hash.ml b/benchs/bench_hash.ml new file mode 100644 index 00000000..097c05f7 --- /dev/null +++ b/benchs/bench_hash.ml @@ -0,0 +1,107 @@ +(** Benchmarks for CCHash primitives. + + Run with: dune exec benchs/bench_hash.exe -- [options] + See: dune exec benchs/bench_hash.exe -- --help +*) + +[@@@warning "-5"] + +module B = Benchmark + +let repeat = 3 + +(* --- data setup ---------------------------------------------------------- *) + +let n_ints = 1_000 +let ints = Array.init n_ints (fun i -> i * 2654435761 (* knuth multiplicative *)) + +let short_str = String.make 16 'x' +let medium_str = String.make 64 'x' +let long_str = String.make 512 'x' + +(* Strings of various lengths with distinct content *) +let strings_short = Array.init n_ints (fun i -> Printf.sprintf "%016d" i) +let strings_medium = Array.init n_ints (fun i -> Printf.sprintf "%064d" i) + +(* --- benchmarks ---------------------------------------------------------- *) + +let bench_int_hash ~time () = + let r = ref 0 in + B.throughputN time ~repeat + [ + ( "CCHash.int", + (fun () -> Array.iter (fun x -> r := CCHash.int x) ints), + () ); + ( "Hashtbl.hash (poly)", + (fun () -> Array.iter (fun x -> r := Hashtbl.hash x) ints), + () ); + ( "CCHash.int64", + (fun () -> + Array.iter (fun x -> r := CCHash.int64 (Int64.of_int x)) ints), + () ); + ]; + ignore !r + +let bench_string_hash ~time () = + let r = ref 0 in + B.throughputN time ~repeat + [ + ( "CCHash.string/16", + (fun () -> Array.iter (fun s -> r := CCHash.string s) strings_short), + () ); + ( "CCHash.string/64", + (fun () -> Array.iter (fun s -> r := CCHash.string s) strings_medium), + () ); + ( "CCHash.string literal/16", + (fun () -> r := CCHash.string short_str), + () ); + ( "CCHash.string literal/64", + (fun () -> r := CCHash.string medium_str), + () ); + ( "CCHash.string literal/512", + (fun () -> r := CCHash.string long_str), + () ); + ( "Hashtbl.hash/16", + (fun () -> Array.iter (fun s -> r := Hashtbl.hash s) strings_short), + () ); + ]; + ignore !r + +let bench_combine64 ~time () = + let r = ref 0L in + B.throughputN time ~repeat + [ + ( "combine64 chain x5", + (fun () -> + Array.iter + (fun x -> + r := + CCHash.( + combine64 + (combine64 + (combine64 + (combine64 + (combine64 seed (Int64.of_int x)) + (Int64.of_int (x lxor 0xaaaa))) + (Int64.of_int (x + 1))) + (Int64.of_int (x * 3))) + (Int64.of_int (x lxor x lsr 7)))) + ints), + () ); + ( "CCHash.list int [1..5]", + (fun () -> r := Int64.of_int CCHash.(list int [ 1; 2; 3; 4; 5 ])), + () ); + ]; + ignore !r + +(* --- tree for run_global ------------------------------------------------- *) + +let () = + B.Tree.add_global "hash" + B.Tree.( + "int" @>> (fun () -> bench_int_hash ~time:2 ()) + @> "string" @>> (fun () -> bench_string_hash ~time:2 ()) + @> "combine64" @>> (fun () -> bench_combine64 ~time:2 ()) + @> nil) + +let () = try B.Tree.run_global () with Arg.Help msg -> print_endline msg diff --git a/benchs/dune b/benchs/dune index f0d36ef7..ac88101d 100644 --- a/benchs/dune +++ b/benchs/dune @@ -4,6 +4,7 @@ containers containers_pvec containers-data + containers.xxhash benchmark gen iter @@ -22,3 +23,17 @@ -unbox-closures -unbox-closures-factor 20)) + +(executable + (name bench_hash) + (libraries containers benchmark) + (flags :standard -warn-error -3-5 -w -60 -safe-string -color always) + (optional) + (ocamlopt_flags + :standard + -O3 + -color + always + -unbox-closures + -unbox-closures-factor + 20)) diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index db23a3c4..dddf908e 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -1535,7 +1535,7 @@ module Str = struct let rand_str_ ?(among = "abcdefgh") n = let module Q = QCheck in let st = Random.State.make [| n + 17 |] in - let gen_c = QCheck.Gen.oneofl (CCString.to_list among) in + let gen_c = QCheck.Gen.oneof_list (CCString.to_list among) in QCheck.Gen.string_size ~gen:gen_c (QCheck.Gen.return n) st let find ?(start = 0) ~sub s = @@ -1824,4 +1824,60 @@ module Str = struct ]) end +module Hash = struct + (* Old FNV-based string hash (from before the rrmxmx+fmix64 C implementation) *) + let fnv_offset_basis = 0xcbf29ce484222325L + let fnv_prime = 0x100000001b3L + + let string_fnv (s : string) = + let h = ref fnv_offset_basis in + for i = 0 to String.length s - 1 do + let c = String.unsafe_get s i in + (h := Int64.(mul !h fnv_prime)); + h := Int64.(logxor !h (of_int (Char.code c))) + done; + Int64.to_int !h land max_int + + let rand = Random.State.make [| 42 |] + + let mk_strings n len = + Array.init n (fun _ -> + String.init len (fun _ -> Char.chr (65 + Random.State.int rand 26))) + + let bench_string_hash ?(time = 2) ~len n = + let strings = mk_strings n len in + let bench_new () = + Array.iter (fun s -> opaque_ignore (CCHash.string s)) strings + and bench_fnv () = + Array.iter (fun s -> opaque_ignore (string_fnv s)) strings + and bench_poly () = + Array.iter (fun s -> opaque_ignore (Hashtbl.hash s)) strings + and bench_xxhash () = + Array.iter + (fun s -> + opaque_ignore (Int64.to_int (Containers_xxhash.hash_string s))) + strings + in + B.throughputN time ~repeat + [ + "CCHash.string (new)", bench_new, (); + "string_fnv (old)", bench_fnv, (); + "Hashtbl.hash (poly)", bench_poly, (); + "xxhash", bench_xxhash, (); + ] + + let () = + B.Tree.register + ("hash" + @>>> [ + "string" + @>> B.Tree.concat + [ + app_int (bench_string_hash ~time:2 ~len:16) 1_000; + app_int (bench_string_hash ~time:2 ~len:64) 1_000; + app_int (bench_string_hash ~time:2 ~len:256) 1_000; + ]; + ]) +end + let () = try B.Tree.run_global () with Arg.Help msg -> print_endline msg