mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2026-05-05 08:54:22 -04:00
fix warnings
format and minor fixes
This commit is contained in:
parent
ac8e8bdda7
commit
abe924eb57
14 changed files with 288 additions and 158 deletions
|
|
@ -7,9 +7,11 @@ repos:
|
|||
pass_filenames: false
|
||||
language: system
|
||||
types: [text]
|
||||
stages: [pre-push]
|
||||
- id: dune-build-opam
|
||||
name: dune build
|
||||
entry: dune build @opam
|
||||
entry: dune build @install
|
||||
pass_filenames: false
|
||||
language: system
|
||||
types: [text]
|
||||
stages: [pre-push]
|
||||
|
|
|
|||
|
|
@ -13,14 +13,16 @@ let repeat = 3
|
|||
(* --- data setup ---------------------------------------------------------- *)
|
||||
|
||||
let n_ints = 1_000
|
||||
let ints = Array.init n_ints (fun i -> i * 2654435761 (* knuth multiplicative *))
|
||||
|
||||
let ints =
|
||||
Array.init n_ints (fun i -> i * 2654435761 (* knuth multiplicative *))
|
||||
|
||||
let short_str = String.make 16 'x'
|
||||
let medium_str = String.make 64 'x'
|
||||
let long_str = String.make 512 'x'
|
||||
|
||||
(* Strings of various lengths with distinct content *)
|
||||
let strings_short = Array.init n_ints (fun i -> Printf.sprintf "%016d" i)
|
||||
let strings_short = Array.init n_ints (fun i -> Printf.sprintf "%016d" i)
|
||||
let strings_medium = Array.init n_ints (fun i -> Printf.sprintf "%064d" i)
|
||||
|
||||
(* --- benchmarks ---------------------------------------------------------- *)
|
||||
|
|
@ -29,9 +31,7 @@ let bench_int_hash ~time () =
|
|||
let r = ref 0 in
|
||||
B.throughputN time ~repeat
|
||||
[
|
||||
( "CCHash.int",
|
||||
(fun () -> Array.iter (fun x -> r := CCHash.int x) ints),
|
||||
() );
|
||||
"CCHash.int", (fun () -> Array.iter (fun x -> r := CCHash.int x) ints), ();
|
||||
( "Hashtbl.hash (poly)",
|
||||
(fun () -> Array.iter (fun x -> r := Hashtbl.hash x) ints),
|
||||
() );
|
||||
|
|
@ -52,15 +52,9 @@ let bench_string_hash ~time () =
|
|||
( "CCHash.string/64",
|
||||
(fun () -> Array.iter (fun s -> r := CCHash.string s) strings_medium),
|
||||
() );
|
||||
( "CCHash.string literal/16",
|
||||
(fun () -> r := CCHash.string short_str),
|
||||
() );
|
||||
( "CCHash.string literal/64",
|
||||
(fun () -> r := CCHash.string medium_str),
|
||||
() );
|
||||
( "CCHash.string literal/512",
|
||||
(fun () -> r := CCHash.string long_str),
|
||||
() );
|
||||
"CCHash.string literal/16", (fun () -> r := CCHash.string short_str), ();
|
||||
"CCHash.string literal/64", (fun () -> r := CCHash.string medium_str), ();
|
||||
"CCHash.string literal/512", (fun () -> r := CCHash.string long_str), ();
|
||||
( "Hashtbl.hash/16",
|
||||
(fun () -> Array.iter (fun s -> r := Hashtbl.hash s) strings_short),
|
||||
() );
|
||||
|
|
@ -85,7 +79,7 @@ let bench_combine64 ~time () =
|
|||
(Int64.of_int (x lxor 0xaaaa)))
|
||||
(Int64.of_int (x + 1)))
|
||||
(Int64.of_int (x * 3)))
|
||||
(Int64.of_int (x lxor x lsr 7))))
|
||||
(Int64.of_int (x lxor (x lsr 7)))))
|
||||
ints),
|
||||
() );
|
||||
( "CCHash.list int [1..5]",
|
||||
|
|
@ -99,9 +93,12 @@ let bench_combine64 ~time () =
|
|||
let () =
|
||||
B.Tree.add_global "hash"
|
||||
B.Tree.(
|
||||
"int" @>> (fun () -> bench_int_hash ~time:2 ())
|
||||
@> "string" @>> (fun () -> bench_string_hash ~time:2 ())
|
||||
@> "combine64" @>> (fun () -> bench_combine64 ~time:2 ())
|
||||
"int"
|
||||
@>> (fun () -> bench_int_hash ~time:2 ())
|
||||
@> "string"
|
||||
@>> (fun () -> bench_string_hash ~time:2 ())
|
||||
@> "combine64"
|
||||
@>> (fun () -> bench_combine64 ~time:2 ())
|
||||
@> nil)
|
||||
|
||||
let () = try B.Tree.run_global () with Arg.Help msg -> print_endline msg
|
||||
|
|
|
|||
|
|
@ -21,7 +21,8 @@ let rec eq t1 t2 =
|
|||
let rec hash_tree t =
|
||||
match t with
|
||||
| Empty -> CCHash.string "empty"
|
||||
| Node (i, l) -> CCHash.(combine2 (int i) (list hash_tree l))
|
||||
| Node (i, l) ->
|
||||
CCHash.((combine2 [@alert "-deprecated"]) (int i) (list hash_tree l))
|
||||
|
||||
module H = Hashtbl.Make (struct
|
||||
type t = tree
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#!/bin/sh
|
||||
|
||||
OPTS="--profile=release --display=quiet"
|
||||
exec dune exec $OPTS -- benchs/run_benchs_hash.exe $@
|
||||
exec dune exec $OPTS -- ./benchs/run_bench_hash.exe $@
|
||||
|
|
|
|||
|
|
@ -23,8 +23,8 @@ let rec hash t =
|
|||
| List l -> H.combine2 20 (H.list hash l)
|
||||
| Map l ->
|
||||
H.combine2 30
|
||||
( H.iter (H.pair H.string hash) @@ fun k ->
|
||||
Str_map.iter (fun x y -> k (x, y)) l )
|
||||
(H.iter (H.pair H.string hash) @@ fun k ->
|
||||
Str_map.iter (fun x y -> k (x, y)) l)
|
||||
|
||||
let int64 i : t = Int i
|
||||
let int i : t = int64 (Int64.of_int i)
|
||||
|
|
|
|||
|
|
@ -7,56 +7,44 @@ type 'a t = 'a -> hash
|
|||
type 'a iter = ('a -> unit) -> unit
|
||||
type 'a gen = unit -> 'a option
|
||||
|
||||
(** {2 Full-strength int64 API} *)
|
||||
|
||||
let seed : int64 = Hash_impl_.seed
|
||||
|
||||
let[@inline] combine64 (s : int64) (c : int64) : int64 = Hash_impl_.combine_i64 s c
|
||||
|
||||
let[@inline] finalize (s : int64) : int = Hash_impl_.finalize s
|
||||
|
||||
let[@inline] finalize_i64 (s : int64) : int64 = Hash_impl_.fmix64 s
|
||||
|
||||
(** {2 Deprecated int-state combinators} *)
|
||||
|
||||
let[@inline] combine2 a b =
|
||||
Hash_impl_.(finalize (combine_i64 (Int64.of_int a) (Int64.of_int b)))
|
||||
Hash_impl_.(finalize (combine_int (combine_int seed a) b))
|
||||
|
||||
let[@inline] combine f s x =
|
||||
Hash_impl_.(finalize (combine_i64 (Int64.of_int s) (Int64.of_int (f x))))
|
||||
Hash_impl_.(finalize (combine_int (combine_int seed s) (f x)))
|
||||
|
||||
let combine3 a b c =
|
||||
Hash_impl_.(
|
||||
let s = combine_i64 (Int64.of_int a) (Int64.of_int b) in
|
||||
finalize (combine_i64 s (Int64.of_int c)))
|
||||
let s = combine_int (combine_int seed a) b in
|
||||
finalize (combine_int s c))
|
||||
|
||||
let combine4 a b c d =
|
||||
Hash_impl_.(
|
||||
let s = combine_i64 (Int64.of_int a) (Int64.of_int b) in
|
||||
let s = combine_i64 s (Int64.of_int c) in
|
||||
finalize (combine_i64 s (Int64.of_int d)))
|
||||
let s = combine_int (combine_int seed a) b in
|
||||
let s = combine_int s c in
|
||||
finalize (combine_int s d))
|
||||
|
||||
let combine5 a b c d e =
|
||||
Hash_impl_.(
|
||||
let s = combine_i64 (Int64.of_int a) (Int64.of_int b) in
|
||||
let s = combine_i64 s (Int64.of_int c) in
|
||||
let s = combine_i64 s (Int64.of_int d) in
|
||||
finalize (combine_i64 s (Int64.of_int e)))
|
||||
let s = combine_int (combine_int seed a) b in
|
||||
let s = combine_int s c in
|
||||
let s = combine_int s d in
|
||||
finalize (combine_int s e))
|
||||
|
||||
let combine6 a b c d e f =
|
||||
Hash_impl_.(
|
||||
let s = combine_i64 (Int64.of_int a) (Int64.of_int b) in
|
||||
let s = combine_i64 s (Int64.of_int c) in
|
||||
let s = combine_i64 s (Int64.of_int d) in
|
||||
let s = combine_i64 s (Int64.of_int e) in
|
||||
finalize (combine_i64 s (Int64.of_int f)))
|
||||
let s = combine_int (combine_int seed a) b in
|
||||
let s = combine_int s c in
|
||||
let s = combine_int s d in
|
||||
let s = combine_int s e in
|
||||
finalize (combine_int s f))
|
||||
|
||||
(** {2 Primitive hashers} *)
|
||||
|
||||
let const h _ = h
|
||||
let const0 _ = 0
|
||||
|
||||
let int n = Hash_impl_.(finalize (combine_i64 seed (Int64.of_int n)))
|
||||
let int n = Hash_impl_.(finalize (combine_int seed n))
|
||||
|
||||
let bool b =
|
||||
int
|
||||
|
|
@ -66,12 +54,9 @@ let bool b =
|
|||
2)
|
||||
|
||||
let char x = Hash_impl_.(finalize (combine_char seed (Char.code x)))
|
||||
|
||||
let int64 (n : int64) : int = Hash_impl_.(finalize (combine_i64 seed n))
|
||||
|
||||
let int32 (x : int32) : int = Hash_impl_.(finalize (combine_i32 seed x))
|
||||
|
||||
let nativeint (x : nativeint) = int64 (Int64.of_nativeint x)
|
||||
let nativeint (x : nativeint) = Hash_impl_.(finalize (combine_i64 seed (Int64.of_nativeint x)))
|
||||
|
||||
let bytes (x : bytes) =
|
||||
Hash_impl_.(finalize (combine_string seed (Bytes.unsafe_to_string x)))
|
||||
|
|
@ -92,40 +77,35 @@ let slice x i len =
|
|||
let opt f = function
|
||||
| None -> 42
|
||||
| Some x ->
|
||||
Hash_impl_.(finalize (combine_i64 (combine_i64 seed 43L) (Int64.of_int (f x))))
|
||||
Hash_impl_.(finalize (combine_int (combine_int seed 43) (f x)))
|
||||
|
||||
let list f l =
|
||||
let s =
|
||||
List.fold_left
|
||||
(fun s x -> Hash_impl_.combine_i64 s (Int64.of_int (f x)))
|
||||
Hash_impl_.seed l
|
||||
List.fold_left (fun s x -> Hash_impl_.combine_int s (f x)) Hash_impl_.seed l
|
||||
in
|
||||
Hash_impl_.finalize s
|
||||
|
||||
let array f a =
|
||||
let s =
|
||||
Array.fold_left
|
||||
(fun s x -> Hash_impl_.combine_i64 s (Int64.of_int (f x)))
|
||||
Hash_impl_.seed a
|
||||
Array.fold_left (fun s x -> Hash_impl_.combine_int s (f x)) Hash_impl_.seed a
|
||||
in
|
||||
Hash_impl_.finalize s
|
||||
|
||||
let pair f g (x, y) =
|
||||
Hash_impl_.(
|
||||
finalize (combine_i64 (combine_i64 seed (Int64.of_int (f x))) (Int64.of_int (g y))))
|
||||
Hash_impl_.(finalize (combine_int (combine_int seed (f x)) (g y)))
|
||||
|
||||
let triple f g h (x, y, z) =
|
||||
Hash_impl_.(
|
||||
let s = combine_i64 seed (Int64.of_int (f x)) in
|
||||
let s = combine_i64 s (Int64.of_int (g y)) in
|
||||
finalize (combine_i64 s (Int64.of_int (h z))))
|
||||
let s = combine_int seed (f x) in
|
||||
let s = combine_int s (g y) in
|
||||
finalize (combine_int s (h z)))
|
||||
|
||||
let quad f g h i (x, y, z, w) =
|
||||
Hash_impl_.(
|
||||
let s = combine_i64 seed (Int64.of_int (f x)) in
|
||||
let s = combine_i64 s (Int64.of_int (g y)) in
|
||||
let s = combine_i64 s (Int64.of_int (h z)) in
|
||||
finalize (combine_i64 s (Int64.of_int (i w))))
|
||||
let s = combine_int seed (f x) in
|
||||
let s = combine_int s (g y) in
|
||||
let s = combine_int s (h z) in
|
||||
finalize (combine_int s (i w)))
|
||||
|
||||
let map f h x = h (f x)
|
||||
|
||||
|
|
@ -140,9 +120,7 @@ let poly x = Hashtbl.hash x
|
|||
let array_of_hashes_ arr =
|
||||
Array.sort CCInt.compare arr;
|
||||
let s =
|
||||
Array.fold_left
|
||||
(fun s h -> Hash_impl_.combine_i64 s (Int64.of_int h))
|
||||
Hash_impl_.seed arr
|
||||
Array.fold_left (fun s h -> Hash_impl_.combine_int s h) Hash_impl_.seed arr
|
||||
in
|
||||
Hash_impl_.finalize s
|
||||
|
||||
|
|
@ -157,18 +135,18 @@ let list_comm f l =
|
|||
|
||||
let iter f seq =
|
||||
let s = ref Hash_impl_.seed in
|
||||
seq (fun x -> s := Hash_impl_.combine_i64 !s (Int64.of_int (f x)));
|
||||
seq (fun x -> s := Hash_impl_.combine_int !s (f x));
|
||||
Hash_impl_.finalize !s
|
||||
|
||||
let seq f sq =
|
||||
let s = ref Hash_impl_.seed in
|
||||
Seq.iter (fun x -> s := Hash_impl_.combine_i64 !s (Int64.of_int (f x))) sq;
|
||||
Seq.iter (fun x -> s := Hash_impl_.combine_int !s (f x)) sq;
|
||||
Hash_impl_.finalize !s
|
||||
|
||||
let gen f g =
|
||||
let rec aux s =
|
||||
match g () with
|
||||
| None -> Hash_impl_.finalize s
|
||||
| Some x -> aux (Hash_impl_.combine_i64 s (Int64.of_int (f x)))
|
||||
| Some x -> aux (Hash_impl_.combine_int s (f x))
|
||||
in
|
||||
aux Hash_impl_.seed
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
(* This file is free software, part of containers. See file "license" for more details. *)
|
||||
|
||||
(** Hash combinators
|
||||
|
||||
The API of this module is stable as per semantic versioning, like the
|
||||
|
|
@ -5,9 +7,6 @@
|
|||
can change and should not be relied on (i.e. hashing a value always
|
||||
returns the same integer {b within a run of a program}, not
|
||||
across versions of OCaml and Containers).
|
||||
|
||||
{b Implementation}: xorshift+multiply combiner with fmix64 (Murmur3) finalizer,
|
||||
via C stubs. Unboxed in native code, boxed in bytecode.
|
||||
*)
|
||||
|
||||
(* TODO: for 4.xx:
|
||||
|
|
@ -44,7 +43,8 @@ val int64 : int64 t
|
|||
val nativeint : nativeint t
|
||||
|
||||
val slice : string -> int -> int t
|
||||
(** [slice s i len] hashes the slice [s[i .. i+len-1]]. *)
|
||||
(** [slice s i len state] hashes the slice [i, …, i+len-1] of [s]
|
||||
into [state]. *)
|
||||
|
||||
val bytes : bytes t
|
||||
(** Hash a byte array.
|
||||
|
|
@ -88,47 +88,17 @@ val array_comm : 'a t -> 'a array t
|
|||
will have the same hash.
|
||||
@since 1.0 *)
|
||||
|
||||
(** {2 Full-strength int64 API} *)
|
||||
|
||||
val seed : int64
|
||||
(** Initial hash state. *)
|
||||
|
||||
val combine64 : int64 -> int64 -> int64
|
||||
(** [combine64 state chunk] mixes [chunk] into [state] using the
|
||||
xorshift+multiply combiner. Suitable for building streaming hashers
|
||||
with full 64-bit state. Finalize with {!finalize} or {!finalize_i64}. *)
|
||||
|
||||
val finalize : int64 -> int
|
||||
(** [finalize state] applies fmix64 (Murmur3 finalizer) and returns a
|
||||
non-negative [int] (strips sign bit). *)
|
||||
|
||||
val finalize_i64 : int64 -> int64
|
||||
(** [finalize_i64 state] applies fmix64 and returns the full 64-bit result.
|
||||
The result may be negative as a signed [int64]. *)
|
||||
|
||||
(** {2 Deprecated int-state combinators}
|
||||
|
||||
These thread state as [int] (63 bits on 64-bit systems), which is lossy.
|
||||
Prefer building a pipeline with {!seed}, {!combine64}, and {!finalize}. *)
|
||||
(** {2 Base hash combinators} *)
|
||||
|
||||
val combine : 'a t -> hash -> 'a -> hash
|
||||
[@@deprecated "lossy (63-bit state); use combine64 with int64 state"]
|
||||
|
||||
val combine2 : hash -> hash -> hash
|
||||
[@@deprecated "lossy (63-bit state); use combine64 with int64 state"]
|
||||
|
||||
val combine3 : hash -> hash -> hash -> hash
|
||||
[@@deprecated "lossy (63-bit state); use combine64 with int64 state"]
|
||||
|
||||
val combine4 : hash -> hash -> hash -> hash -> hash
|
||||
[@@deprecated "lossy (63-bit state); use combine64 with int64 state"]
|
||||
|
||||
val combine5 : hash -> hash -> hash -> hash -> hash -> hash
|
||||
[@@deprecated "lossy (63-bit state); use combine64 with int64 state"]
|
||||
(** @since 2.1 *)
|
||||
|
||||
val combine6 : hash -> hash -> hash -> hash -> hash -> hash -> hash
|
||||
[@@deprecated "lossy (63-bit state); use combine64 with int64 state"]
|
||||
(** @since 2.1 *)
|
||||
|
||||
(** {2 Iterators} *)
|
||||
|
|
|
|||
72
src/core/CCHash64.ml
Normal file
72
src/core/CCHash64.ml
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
(* This file is free software, part of containers. See file "license" for more details. *)
|
||||
|
||||
(** {1 Hash combinators with 64-bit state} *)
|
||||
|
||||
type state = int64
|
||||
|
||||
let seed : state = Hash_impl_.seed
|
||||
|
||||
let[@inline] finalize64 (s : state) : int64 = Hash_impl_.fmix64 s
|
||||
let[@inline] finalize (s : state) : int = Hash_impl_.finalize s
|
||||
|
||||
type 'a t = state -> 'a -> state
|
||||
|
||||
let[@inline] int s x = Hash_impl_.combine_int s x
|
||||
let[@inline] bool s b = Hash_impl_.combine_int s (if b then 1 else 2)
|
||||
let[@inline] char s c = Hash_impl_.combine_char s (Char.code c)
|
||||
let[@inline] int64 s (x : int64) = Hash_impl_.combine_i64 s x
|
||||
let[@inline] int32 s (x : int32) = Hash_impl_.combine_i32 s x
|
||||
let[@inline] nativeint s (x : nativeint) = Hash_impl_.combine_i64 s (Int64.of_nativeint x)
|
||||
let[@inline] string s x = Hash_impl_.combine_string s x
|
||||
let[@inline] bytes s x = Hash_impl_.combine_string s (Bytes.unsafe_to_string x)
|
||||
|
||||
let slice str ofs s len =
|
||||
let j = ofs + len in
|
||||
let rec loop k st =
|
||||
if k = j then
|
||||
st
|
||||
else
|
||||
loop (k + 1) (Hash_impl_.combine_char st (Char.code (String.unsafe_get str k)))
|
||||
in
|
||||
loop ofs s
|
||||
|
||||
let opt f s = function
|
||||
| None -> Hash_impl_.combine_int s 0
|
||||
| Some x -> f (Hash_impl_.combine_int s 1) x
|
||||
|
||||
let list f s l = List.fold_left f s l
|
||||
let array f s a = Array.fold_left f s a
|
||||
let pair f g s (x, y) = g (f s x) y
|
||||
let triple f g h s (x, y, z) = h (g (f s x) y) z
|
||||
let quad f g h k s (x, y, z, w) = k (h (g (f s x) y) z) w
|
||||
|
||||
let map proj f s x = f s (proj x)
|
||||
let if_ b then_ else_ s x = if b then then_ s x else else_ s x
|
||||
let poly s x = Hash_impl_.combine_int s (Hashtbl.hash x)
|
||||
|
||||
type 'a iter = ('a -> unit) -> unit
|
||||
type 'a gen = unit -> 'a option
|
||||
|
||||
let iter f s seq =
|
||||
let st = ref s in
|
||||
seq (fun x -> st := f !st x);
|
||||
!st
|
||||
|
||||
let seq f s sq = Seq.fold_left f s sq
|
||||
|
||||
let gen f s g =
|
||||
let rec aux st =
|
||||
match g () with
|
||||
| None -> st
|
||||
| Some x -> aux (f st x)
|
||||
in
|
||||
aux s
|
||||
|
||||
let array_comm f s a =
|
||||
let hashes = Array.map (fun x -> finalize64 (f seed x)) a in
|
||||
Array.sort Int64.compare hashes;
|
||||
Array.fold_left Hash_impl_.combine_i64 s hashes
|
||||
|
||||
let list_comm f s l =
|
||||
let arr = Array.of_list l in
|
||||
array_comm f s arr
|
||||
83
src/core/CCHash64.mli
Normal file
83
src/core/CCHash64.mli
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
(** Hash combinators with 64-bit state threading.
|
||||
|
||||
State is threaded explicitly as a private [int64] through combinators,
|
||||
preserving full 64-bit quality throughout. Finalize with {!finalize} (returns
|
||||
a positive [int]) or {!finalize64} (returns the raw [int64]).
|
||||
|
||||
Typical use:
|
||||
{[
|
||||
let hash_my_record r =
|
||||
CCHash64.(finalize (string (int seed r.id) r.name))
|
||||
|
||||
let hash_pair (a, b) =
|
||||
CCHash64.(finalize (pair int string seed (a, b)))
|
||||
]}
|
||||
|
||||
{b Implementation}: xorshift+multiply combiner with fmix64 (Murmur3)
|
||||
finalizer, via C stubs. Unboxed in native code.
|
||||
*)
|
||||
|
||||
type state = private int64
|
||||
(** Hash accumulator (64-bit). Create with {!seed}; finish with {!finalize}
|
||||
or {!finalize64}. *)
|
||||
|
||||
val seed : state
|
||||
(** Initial hash state (golden-ratio constant). *)
|
||||
|
||||
val finalize64 : state -> int64
|
||||
(** Apply fmix64 (Murmur3) and return the full 64-bit result.
|
||||
The result may be negative as a signed [int64]. *)
|
||||
|
||||
val finalize : state -> int
|
||||
(** Apply fmix64 and return a non-negative [int] (strips sign bit). *)
|
||||
|
||||
type 'a t = state -> 'a -> state
|
||||
(** A hash combiner: takes the current state, mixes in a value, returns the
|
||||
updated state. *)
|
||||
|
||||
val int : int t
|
||||
val bool : bool t
|
||||
val char : char t
|
||||
val int32 : int32 t
|
||||
val int64 : int64 t
|
||||
val nativeint : nativeint t
|
||||
val string : string t
|
||||
|
||||
val bytes : bytes t
|
||||
(** @since 3.5 *)
|
||||
|
||||
val slice : string -> int -> int t
|
||||
(** [slice str ofs s len] mixes the byte slice [str[ofs .. ofs+len-1]] into [s]. *)
|
||||
|
||||
val opt : 'a t -> 'a option t
|
||||
val list : 'a t -> 'a list t
|
||||
val array : 'a t -> 'a array t
|
||||
val pair : 'a t -> 'b t -> ('a * 'b) t
|
||||
val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t
|
||||
val quad : 'a t -> 'b t -> 'c t -> 'd t -> ('a * 'b * 'c * 'd) t
|
||||
|
||||
val map : ('a -> 'b) -> 'b t -> 'a t
|
||||
(** [map proj f] applies [proj] before hashing with [f].
|
||||
Example: [map fst int] hashes only the first element of a pair.
|
||||
@since 3.5 *)
|
||||
|
||||
val if_ : bool -> 'a t -> 'a t -> 'a t
|
||||
(** [if_ b t e] uses hasher [t] when [b] is true, [e] otherwise. *)
|
||||
|
||||
val poly : 'a t
|
||||
(** Uses [Hashtbl.hash] internally. *)
|
||||
|
||||
val list_comm : 'a t -> 'a list t
|
||||
(** Commutative: lists equal up to permutation hash the same. *)
|
||||
|
||||
val array_comm : 'a t -> 'a array t
|
||||
(** Commutative: arrays equal up to permutation hash the same. *)
|
||||
|
||||
(** {2 Iterators} *)
|
||||
|
||||
type 'a iter = ('a -> unit) -> unit
|
||||
type 'a gen = unit -> 'a option
|
||||
|
||||
val seq : 'a t -> 'a Seq.t t
|
||||
val iter : 'a t -> 'a iter t
|
||||
val gen : 'a t -> 'a gen t
|
||||
|
|
@ -8,8 +8,8 @@ let min : t -> t -> t = Stdlib.min
|
|||
let max : t -> t -> t = Stdlib.max
|
||||
|
||||
[@@@endif]
|
||||
let hash (x : t) : int = Hash_impl_.(finalize (combine_i32 seed x))
|
||||
|
||||
let hash (x : t) : int = Hash_impl_.(finalize (combine_i32 seed x))
|
||||
let sign i = compare i zero
|
||||
|
||||
let pow a b =
|
||||
|
|
|
|||
|
|
@ -10,9 +10,7 @@ let max : t -> t -> t = Stdlib.max
|
|||
[@@@endif]
|
||||
|
||||
let sign i = compare i zero
|
||||
|
||||
let hash_to_int64 (n : t) : t = Hash_impl_.(fmix64 (combine_i64 seed n))
|
||||
|
||||
let[@inline] hash (n : t) : int = Hash_impl_.(finalize (combine_i64 seed n))
|
||||
|
||||
(* see {!CCInt.popcount} for more details *)
|
||||
|
|
|
|||
|
|
@ -18,17 +18,26 @@
|
|||
(** Initial hash state (golden-ratio constant). *)
|
||||
let seed : int64 = 0x9e3779b97f4a7c15L
|
||||
|
||||
external combine_i64 : (int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||
external combine_int :
|
||||
(int64[@unboxed]) -> (int[@untagged]) -> (int64[@unboxed])
|
||||
= "caml_cc_hash_combine_int_byte" "caml_cc_hash_combine_int"
|
||||
[@@noalloc]
|
||||
(** [combine_int state x] mixes OCaml int [x] into [state]. *)
|
||||
|
||||
external combine_i64 :
|
||||
(int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_hash_combine_i64_byte" "caml_cc_hash_combine_i64"
|
||||
[@@noalloc]
|
||||
(** [combine_i64 state chunk] mixes [chunk] into [state]. *)
|
||||
|
||||
external combine_i32 : (int64[@unboxed]) -> (int32[@unboxed]) -> (int64[@unboxed])
|
||||
external combine_i32 :
|
||||
(int64[@unboxed]) -> (int32[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_hash_combine_i32_byte" "caml_cc_hash_combine_i32"
|
||||
[@@noalloc]
|
||||
(** [combine_i32 state chunk] mixes [chunk] into [state]. *)
|
||||
|
||||
external combine_char : (int64[@unboxed]) -> (int[@untagged]) -> (int64[@unboxed])
|
||||
external combine_char :
|
||||
(int64[@unboxed]) -> (int[@untagged]) -> (int64[@unboxed])
|
||||
= "caml_cc_hash_combine_char_byte" "caml_cc_hash_combine_char"
|
||||
[@@noalloc]
|
||||
(** [combine_char state c] mixes character code [c] into [state]. *)
|
||||
|
|
|
|||
|
|
@ -71,6 +71,22 @@ CAMLprim value caml_cc_hash_combine_i32_byte(value v_state, value v_chunk)
|
|||
CAMLreturn(caml_copy_int64((int64_t)r));
|
||||
}
|
||||
|
||||
/* --- combine_int --------------------------------------------------------- */
|
||||
|
||||
/* chunk is an OCaml int (intnat), passed untagged */
|
||||
CAMLprim int64_t caml_cc_hash_combine_int(int64_t state, intnat chunk)
|
||||
{
|
||||
return (int64_t)hash_combine((uint64_t)state, (uint64_t)chunk);
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_hash_combine_int_byte(value v_state, value v_chunk)
|
||||
{
|
||||
CAMLparam2(v_state, v_chunk);
|
||||
uint64_t r = hash_combine((uint64_t)Int64_val(v_state),
|
||||
(uint64_t)Long_val(v_chunk));
|
||||
CAMLreturn(caml_copy_int64((int64_t)r));
|
||||
}
|
||||
|
||||
/* --- combine_char -------------------------------------------------------- */
|
||||
|
||||
/* c is passed as untagged int (Char.code) */
|
||||
|
|
|
|||
|
|
@ -16,57 +16,61 @@ t @@ fun () -> string "abc" <> string "abcd";;
|
|||
|
||||
q Q.int (fun i ->
|
||||
Q.assume (i >= 0);
|
||||
int i = int64 (Int64.of_int i));;
|
||||
int i = int64 (Int64.of_int i))
|
||||
;;
|
||||
|
||||
(* --- stress tests -------------------------------------------------------- *)
|
||||
|
||||
(* Chi-squared distribution test over [count] consecutive integers in [buckets] buckets.
|
||||
A uniform hash gives chi2 ~ buckets-1; we allow 4 standard deviations of slack. *)
|
||||
t ~name:"int hash distribution chi2" @@ fun () ->
|
||||
let count = 50_000 and buckets = 500 in
|
||||
let counts = Array.make buckets 0 in
|
||||
for i = 0 to count - 1 do
|
||||
let b = CCHash.int i mod buckets in
|
||||
counts.(b) <- counts.(b) + 1
|
||||
done;
|
||||
let expected = float count /. float buckets in
|
||||
let c2 =
|
||||
Array.fold_left
|
||||
(fun acc c -> acc +. ((float c -. expected) ** 2.0 /. expected))
|
||||
0.0 counts
|
||||
in
|
||||
let df = float (buckets - 1) in
|
||||
c2 < df +. 4.0 *. sqrt (2.0 *. df);;
|
||||
let count = 50_000 and buckets = 500 in
|
||||
let counts = Array.make buckets 0 in
|
||||
for i = 0 to count - 1 do
|
||||
let b = CCHash.int i mod buckets in
|
||||
counts.(b) <- counts.(b) + 1
|
||||
done;
|
||||
let expected = float count /. float buckets in
|
||||
let c2 =
|
||||
Array.fold_left
|
||||
(fun acc c -> acc +. (((float c -. expected) ** 2.0) /. expected))
|
||||
0.0 counts
|
||||
in
|
||||
let df = float (buckets - 1) in
|
||||
c2 < df +. (4.0 *. sqrt (2.0 *. df))
|
||||
;;
|
||||
|
||||
(* Strict avalanche criterion: flip one input bit, expect ~50% output bits to change. *)
|
||||
t ~name:"int hash avalanche" @@ fun () ->
|
||||
let bits = Sys.int_size - 1 in
|
||||
let total_flips = ref 0 in
|
||||
let total = ref 0 in
|
||||
let rng = Random.State.make [| 42; 17; 99 |] in
|
||||
for _ = 1 to 300 do
|
||||
let x = Random.State.bits rng in
|
||||
let hx = CCHash.int x in
|
||||
for b = 0 to bits - 1 do
|
||||
let hx' = CCHash.int (x lxor (1 lsl b)) in
|
||||
total_flips := !total_flips + CCInt.popcount (hx lxor hx');
|
||||
total := !total + bits
|
||||
done
|
||||
done;
|
||||
let frac = float !total_flips /. float !total in
|
||||
frac >= 0.45 && frac <= 0.55;;
|
||||
let bits = Sys.int_size - 1 in
|
||||
let total_flips = ref 0 in
|
||||
let total = ref 0 in
|
||||
let rng = Random.State.make [| 42; 17; 99 |] in
|
||||
for _ = 1 to 300 do
|
||||
let x = Random.State.bits rng in
|
||||
let hx = CCHash.int x in
|
||||
for b = 0 to bits - 1 do
|
||||
let hx' = CCHash.int (x lxor (1 lsl b)) in
|
||||
total_flips := !total_flips + CCInt.popcount (hx lxor hx');
|
||||
total := !total + bits
|
||||
done
|
||||
done;
|
||||
let frac = float !total_flips /. float !total in
|
||||
frac >= 0.45 && frac <= 0.55
|
||||
;;
|
||||
|
||||
(* String hash: no collisions among distinct keys. *)
|
||||
t ~name:"string hash no collisions" @@ fun () ->
|
||||
let n = 50_000 in
|
||||
let tbl = Hashtbl.create n in
|
||||
let ok = ref true in
|
||||
for i = 0 to n - 1 do
|
||||
let h = CCHash.string (Printf.sprintf "key:%d" i) in
|
||||
if Hashtbl.mem tbl h then ok := false;
|
||||
Hashtbl.replace tbl h ()
|
||||
done;
|
||||
!ok;;
|
||||
let n = 50_000 in
|
||||
let tbl = Hashtbl.create n in
|
||||
let ok = ref true in
|
||||
for i = 0 to n - 1 do
|
||||
let h = CCHash.string (Printf.sprintf "key:%d" i) in
|
||||
if Hashtbl.mem tbl h then ok := false;
|
||||
Hashtbl.replace tbl h ()
|
||||
done;
|
||||
!ok
|
||||
;;
|
||||
|
||||
(* CCHash64 pipeline matches CCHash.pair combiner. *)
|
||||
q Q.int (fun i ->
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue