mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2026-03-07 21:27:55 -05:00
CCInt.hash: replace OCaml Int64 loop with C stub (FNV-1)
Implement FNV-1 hashing for CCInt.hash as a C stub instead of a pure-OCaml Int64-based loop. The core hash function operates on int64 with separate native/bytecode entry points for both int and int64. - cc_stubs.c: cc_fnv_hash_int64 core, with int and int64 wrappers - Uses [@untagged] for int args, [@unboxed] ready for int64 - OCAML_INT_MASK ensures correct byte extraction for 63-bit ints - Hash values are unchanged from the previous OCaml implementation Also adds a throughput benchmark in benchs/run_benchs.ml comparing the old pure-OCaml FNV hash vs the new C stub (~2x faster).
This commit is contained in:
parent
1aa8b869e5
commit
bb6de2ff05
5 changed files with 85 additions and 13 deletions
|
|
@ -10,7 +10,6 @@
|
|||
qcheck
|
||||
oseq
|
||||
batteries
|
||||
base
|
||||
sek)
|
||||
(flags :standard -warn-error -3-5 -w -60 -safe-string -color always)
|
||||
(optional)
|
||||
|
|
|
|||
|
|
@ -1824,4 +1824,35 @@ module Str = struct
|
|||
])
|
||||
end
|
||||
|
||||
module Hash = struct
|
||||
let hash_ocaml (n : int) : int =
|
||||
let offset_basis = 0xcbf29ce484222325L in
|
||||
let prime = 0x100000001b3L in
|
||||
let h = ref offset_basis in
|
||||
for k = 0 to 7 do
|
||||
h := Int64.(mul !h prime);
|
||||
h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
|
||||
done;
|
||||
Int64.to_int !h land max_int
|
||||
|
||||
let bench_hash n =
|
||||
let run_ocaml () =
|
||||
for i = 0 to n - 1 do
|
||||
opaque_ignore (hash_ocaml i)
|
||||
done
|
||||
and run_c_stub () =
|
||||
for i = 0 to n - 1 do
|
||||
opaque_ignore (CCInt.hash i)
|
||||
done
|
||||
in
|
||||
B.throughputN 3 ~repeat
|
||||
[
|
||||
"ocaml_fnv", run_ocaml, ();
|
||||
"c_stub", run_c_stub, ();
|
||||
]
|
||||
|
||||
let () =
|
||||
B.Tree.register ("hash" @>>> [ "int" @>> app_ints bench_hash [ 1_000 ] ])
|
||||
end
|
||||
|
||||
let () = try B.Tree.run_global () with Arg.Help msg -> print_endline msg
|
||||
|
|
|
|||
|
|
@ -6,18 +6,8 @@ type 'a iter = ('a -> unit) -> unit
|
|||
|
||||
(* use FNV:
|
||||
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *)
|
||||
let hash (n : int) : int =
|
||||
let offset_basis = 0xcbf29ce484222325L in
|
||||
let prime = 0x100000001b3L in
|
||||
|
||||
let h = ref offset_basis in
|
||||
for k = 0 to 7 do
|
||||
(h := Int64.(mul !h prime));
|
||||
(* h := h xor (k-th byte of n) *)
|
||||
h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
|
||||
done;
|
||||
(* truncate back to int and remove sign *)
|
||||
Int64.to_int !h land max_int
|
||||
external hash : (int[@untagged]) -> (int[@untagged])
|
||||
= "caml_cc_hash_int_byte" "caml_cc_hash_int" [@@noalloc]
|
||||
|
||||
let range i j yield =
|
||||
let rec up i j yield =
|
||||
|
|
|
|||
48
src/core/cc_stubs.c
Normal file
48
src/core/cc_stubs.c
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
#include <caml/mlvalues.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* FNV-1a hash for a 64-bit integer.
|
||||
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
|
||||
|
||||
Core routine: operates on all 8 bytes of an int64_t. */
|
||||
|
||||
static inline int64_t cc_fnv_hash_int64(int64_t n) {
|
||||
uint64_t un = (uint64_t)n;
|
||||
uint64_t h = UINT64_C(0xcbf29ce484222325);
|
||||
const uint64_t prime = UINT64_C(0x100000001b3);
|
||||
for (int k = 0; k < 8; k++) {
|
||||
h *= prime;
|
||||
h ^= (un >> (k * 8)) & 0xff;
|
||||
}
|
||||
return (int64_t)h;
|
||||
}
|
||||
|
||||
/* --- CCInt.hash entry points (int -> int) --- */
|
||||
|
||||
/* Mask to the OCaml int range (63 bits on 64-bit, 31 on 32-bit)
|
||||
before hashing, so negative OCaml ints hash the same as
|
||||
the unsigned representation seen by OCaml's [lsr]. */
|
||||
#define OCAML_INT_MASK ((UINT64_C(1) << (8 * sizeof(value) - 1)) - 1)
|
||||
|
||||
/* native: untagged int in, untagged int out */
|
||||
CAMLprim intnat caml_cc_hash_int(intnat n) {
|
||||
int64_t projected = (int64_t)((uint64_t)n & OCAML_INT_MASK);
|
||||
return (intnat)((uint64_t)cc_fnv_hash_int64(projected) & Max_long);
|
||||
}
|
||||
|
||||
/* bytecode: boxed value in, boxed value out */
|
||||
CAMLprim value caml_cc_hash_int_byte(value v_n) {
|
||||
return Val_long(caml_cc_hash_int(Long_val(v_n)));
|
||||
}
|
||||
|
||||
/* --- int64 hash entry points (int64 -> int) --- */
|
||||
|
||||
/* native: unboxed int64 in, untagged int out */
|
||||
CAMLprim intnat caml_cc_hash_int64(int64_t n) {
|
||||
return (intnat)((uint64_t)cc_fnv_hash_int64(n) & Max_long);
|
||||
}
|
||||
|
||||
/* bytecode: boxed int64 value in, boxed value out */
|
||||
CAMLprim value caml_cc_hash_int64_byte(value v_n) {
|
||||
return Val_long(caml_cc_hash_int64(Int64_val(v_n)));
|
||||
}
|
||||
|
|
@ -6,6 +6,10 @@
|
|||
(action
|
||||
(run %{project_root}/src/core/cpp/cpp.exe %{input-file})))
|
||||
(flags :standard -nolabels -open CCMonomorphic)
|
||||
(foreign_stubs
|
||||
(language c)
|
||||
(flags :standard -std=c99 -O2)
|
||||
(names cc_stubs))
|
||||
(libraries either containers.monomorphic containers.domain))
|
||||
|
||||
(ocamllex
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue