mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2026-05-05 08:54:22 -04:00
Compare commits
11 commits
aeae7c1039
...
4948d74e45
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4948d74e45 | ||
|
|
efa429e3c8 | ||
|
|
fec6a8f58f | ||
|
|
e42caf3b6a | ||
|
|
749f973528 | ||
|
|
c02c76eb0b | ||
|
|
0ad561a8e7 | ||
|
|
7fdacb4a29 | ||
|
|
c99883b841 | ||
|
|
479fe31c27 | ||
|
|
7f8df6d63d |
33 changed files with 970 additions and 266 deletions
19
.github/workflows/format.yml
vendored
19
.github/workflows/format.yml
vendored
|
|
@ -9,20 +9,9 @@ on:
|
|||
jobs:
|
||||
format:
|
||||
name: format
|
||||
strategy:
|
||||
matrix:
|
||||
ocaml-compiler:
|
||||
- '5.3'
|
||||
runs-on: 'ubuntu-latest'
|
||||
runs-on: ubuntu-latest
|
||||
container: ghcr.io/c-cube/c-cube-commmon/ci-doc-5.3:latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@main
|
||||
- name: Use OCaml ${{ matrix.ocaml-compiler }}
|
||||
uses: ocaml/setup-ocaml@v3
|
||||
with:
|
||||
ocaml-compiler: ${{ matrix.ocaml-compiler }}
|
||||
dune-cache: true
|
||||
allow-prerelease-opam: true
|
||||
|
||||
- run: opam install ocamlformat.0.27.0
|
||||
- uses: actions/checkout@v6
|
||||
- run: opam exec -- make format-check
|
||||
|
||||
|
|
|
|||
18
.github/workflows/gh-pages.yml
vendored
18
.github/workflows/gh-pages.yml
vendored
|
|
@ -3,23 +3,19 @@ name: github pages
|
|||
on:
|
||||
push:
|
||||
branches:
|
||||
- master # Set a branch name to trigger deployment
|
||||
- main
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
name: Deploy doc
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@main
|
||||
container: ghcr.io/c-cube/c-cube-commmon/ci-doc-5.3:latest
|
||||
|
||||
- name: Use OCaml
|
||||
uses: ocaml/setup-ocaml@v3
|
||||
with:
|
||||
ocaml-compiler: '5.2'
|
||||
dune-cache: false
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Deps
|
||||
run: opam install odig containers containers-data
|
||||
run: opam install containers containers-data
|
||||
|
||||
- name: Build
|
||||
run: opam exec -- odig odoc --cache-dir=_doc/ containers containers-data
|
||||
|
|
@ -29,5 +25,5 @@ jobs:
|
|||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
publish_dir: ./_doc/html/
|
||||
destination_dir: dev
|
||||
enable_jekyll: true
|
||||
destination_dir: .
|
||||
enable_jekyll: false
|
||||
|
|
|
|||
59
.github/workflows/main.yml
vendored
59
.github/workflows/main.yml
vendored
|
|
@ -7,58 +7,25 @@ on:
|
|||
pull_request:
|
||||
|
||||
jobs:
|
||||
run:
|
||||
build:
|
||||
name: build
|
||||
timeout-minutes: 15
|
||||
strategy:
|
||||
fail-fast: true
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os:
|
||||
- ubuntu-latest
|
||||
ocaml-compiler:
|
||||
- '4.08'
|
||||
- '4.10'
|
||||
- '4.14'
|
||||
- '5.3'
|
||||
- 'ocaml-variants.5.0.0+options,ocaml-option-bytecode-only'
|
||||
include:
|
||||
- ocaml-version: "4.08"
|
||||
container: ghcr.io/c-cube/c-cube-commmon/ci-4.08:latest
|
||||
- ocaml-version: "4.14"
|
||||
container: ghcr.io/c-cube/c-cube-commmon/ci-4.14:latest
|
||||
- ocaml-version: "5.4"
|
||||
container: ghcr.io/c-cube/c-cube-commmon/ci-5.4:latest
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
container: ${{ matrix.container }}
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@main
|
||||
- name: Use OCaml ${{ matrix.ocaml-compiler }}
|
||||
uses: ocaml/setup-ocaml@v3
|
||||
with:
|
||||
ocaml-compiler: ${{ matrix.ocaml-compiler }}
|
||||
dune-cache: true
|
||||
allow-prerelease-opam: true
|
||||
- uses: actions/checkout@v6
|
||||
- run: opam install -t containers containers-data --deps-only
|
||||
- run: opam exec -- dune build '@install'
|
||||
- run: opam exec -- dune runtest --force --profile=release
|
||||
|
||||
compat:
|
||||
name: build
|
||||
timeout-minutes: 15
|
||||
strategy:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os:
|
||||
- macos-latest
|
||||
- ubuntu-latest
|
||||
#- windows-latest
|
||||
ocaml-compiler:
|
||||
- '5.1'
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@main
|
||||
- name: Use OCaml ${{ matrix.ocaml-compiler }}
|
||||
uses: ocaml/setup-ocaml@v3
|
||||
with:
|
||||
ocaml-compiler: ${{ matrix.ocaml-compiler }}
|
||||
dune-cache: true
|
||||
allow-prerelease-opam: true
|
||||
- run: |
|
||||
opam install -t containers --deps-only ;
|
||||
opam install containers-data --deps-only # no test deps
|
||||
- run: opam exec -- dune build '@install'
|
||||
- run: opam exec -- dune runtest -j 1 -p containers --profile=release # test only core on non-ubuntu platform
|
||||
|
|
|
|||
17
.pre-commit-config.yaml
Normal file
17
.pre-commit-config.yaml
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
repos:
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: dune-build-fmt
|
||||
name: dune build @fmt
|
||||
entry: dune build @fmt --auto-promote
|
||||
pass_filenames: false
|
||||
language: system
|
||||
types: [text]
|
||||
stages: [pre-push]
|
||||
- id: dune-build-opam
|
||||
name: dune build
|
||||
entry: dune build @install
|
||||
pass_filenames: false
|
||||
language: system
|
||||
types: [text]
|
||||
stages: [pre-push]
|
||||
|
|
@ -11,6 +11,7 @@
|
|||
* breaking: change the semantic of CCFloat.{min,max} with respect to NaN to follow the Stdlib
|
||||
* breaking: change the semantic of CCInt.rem with respect to negative number to follow the Stdlib
|
||||
* breaking: change the order of argument of `CCMap.add_seq` to align with the stdlib.
|
||||
* breaking: `CCMap.to_list` is now in increasing, not decreasing, order for OCaml >= 5.1
|
||||
|
||||
## 3.17
|
||||
|
||||
|
|
|
|||
2
Makefile
2
Makefile
|
|
@ -24,6 +24,8 @@ format:
|
|||
format-check:
|
||||
@dune build $(DUNE_OPTS) @fmt --display=quiet
|
||||
|
||||
install-pre-commit-hook:
|
||||
uvx pre-commit install --hook-type pre-push
|
||||
|
||||
VERSION=$(shell awk '/^version:/ {print $$2}' containers.opam)
|
||||
|
||||
|
|
|
|||
78
benchs/bench_hash.ml
Normal file
78
benchs/bench_hash.ml
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
(** Benchmarks for CCHash primitives.
|
||||
*)
|
||||
|
||||
[@@@warning "-5"]
|
||||
|
||||
module B = Benchmark
|
||||
|
||||
let repeat = 3
|
||||
let n_ints = 1_000
|
||||
let ints = Array.init n_ints (fun i -> i * 2654435761)
|
||||
|
||||
let bench_int_hash ~time () =
|
||||
B.throughputN time ~repeat
|
||||
[
|
||||
( "CCHash.int",
|
||||
(fun () ->
|
||||
Array.iter
|
||||
(fun x -> ignore @@ Sys.opaque_identity (CCHash.int x))
|
||||
ints),
|
||||
() );
|
||||
( "Hashtbl.hash (poly)",
|
||||
(fun () ->
|
||||
Array.iter
|
||||
(fun x -> ignore @@ Sys.opaque_identity (Hashtbl.hash x))
|
||||
ints),
|
||||
() );
|
||||
( "CCHash.int64",
|
||||
(fun () ->
|
||||
Array.iter
|
||||
(fun x ->
|
||||
ignore @@ Sys.opaque_identity (CCHash.int64 (Int64.of_int x)))
|
||||
ints),
|
||||
() );
|
||||
]
|
||||
|
||||
let bench_combine64 ~time () =
|
||||
B.throughputN time ~repeat
|
||||
[
|
||||
( "combine64 chain x5",
|
||||
(fun () ->
|
||||
Array.iter
|
||||
(fun x ->
|
||||
ignore
|
||||
@@ Sys.opaque_identity
|
||||
CCHash64.(
|
||||
combine2
|
||||
(combine2
|
||||
(combine2
|
||||
(combine2 (Int64.of_int x)
|
||||
(Int64.of_int (x lxor 0xaaaa)))
|
||||
(Int64.of_int (x + 1)))
|
||||
(Int64.of_int (x * 3)))
|
||||
(Int64.of_int (x lxor (x lsr 7)))))
|
||||
ints),
|
||||
() );
|
||||
( "CCHash.list int [1..5]",
|
||||
(fun () ->
|
||||
Array.iter
|
||||
(fun x ->
|
||||
ignore
|
||||
@@ Sys.opaque_identity
|
||||
(Int64.of_int
|
||||
CCHash.(list int [ x + 1; x + 2; x + 3; x + 4; x + 5 ])))
|
||||
ints),
|
||||
() );
|
||||
]
|
||||
|
||||
(* --- tree for run_global ------------------------------------------------- *)
|
||||
|
||||
let () =
|
||||
B.Tree.(
|
||||
register @@ "hash"
|
||||
@>>> [
|
||||
"int" @> lazy (bench_int_hash ~time:2 ());
|
||||
"combine64" @> lazy (bench_combine64 ~time:2 ());
|
||||
])
|
||||
|
||||
let () = try B.Tree.run_global () with Arg.Help msg -> print_endline msg
|
||||
15
benchs/dune
15
benchs/dune
|
|
@ -4,6 +4,7 @@
|
|||
containers
|
||||
containers_pvec
|
||||
containers-data
|
||||
containers.xxhash
|
||||
benchmark
|
||||
gen
|
||||
iter
|
||||
|
|
@ -22,3 +23,17 @@
|
|||
-unbox-closures
|
||||
-unbox-closures-factor
|
||||
20))
|
||||
|
||||
(executable
|
||||
(name bench_hash)
|
||||
(libraries containers benchmark)
|
||||
(flags :standard -warn-error -3-5 -w -60 -safe-string -color always)
|
||||
(optional)
|
||||
(ocamlopt_flags
|
||||
:standard
|
||||
-O3
|
||||
-color
|
||||
always
|
||||
-unbox-closures
|
||||
-unbox-closures-factor
|
||||
20))
|
||||
|
|
|
|||
|
|
@ -21,7 +21,8 @@ let rec eq t1 t2 =
|
|||
let rec hash_tree t =
|
||||
match t with
|
||||
| Empty -> CCHash.string "empty"
|
||||
| Node (i, l) -> CCHash.(combine2 (int i) (list hash_tree l))
|
||||
| Node (i, l) ->
|
||||
CCHash.((combine2 [@alert "-deprecated"]) (int i) (list hash_tree l))
|
||||
|
||||
module H = Hashtbl.Make (struct
|
||||
type t = tree
|
||||
|
|
|
|||
|
|
@ -1535,7 +1535,7 @@ module Str = struct
|
|||
let rand_str_ ?(among = "abcdefgh") n =
|
||||
let module Q = QCheck in
|
||||
let st = Random.State.make [| n + 17 |] in
|
||||
let gen_c = QCheck.Gen.oneofl (CCString.to_list among) in
|
||||
let gen_c = QCheck.Gen.oneof_list (CCString.to_list among) in
|
||||
QCheck.Gen.string_size ~gen:gen_c (QCheck.Gen.return n) st
|
||||
|
||||
let find ?(start = 0) ~sub s =
|
||||
|
|
@ -1824,4 +1824,60 @@ module Str = struct
|
|||
])
|
||||
end
|
||||
|
||||
module Hash = struct
|
||||
(* Old FNV-based string hash (from before the rrmxmx+fmix64 C implementation) *)
|
||||
let fnv_offset_basis = 0xcbf29ce484222325L
|
||||
let fnv_prime = 0x100000001b3L
|
||||
|
||||
let string_fnv (s : string) =
|
||||
let h = ref fnv_offset_basis in
|
||||
for i = 0 to String.length s - 1 do
|
||||
let c = String.unsafe_get s i in
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
h := Int64.(logxor !h (of_int (Char.code c)))
|
||||
done;
|
||||
Int64.to_int !h land max_int
|
||||
|
||||
let rand = Random.State.make [| 42 |]
|
||||
|
||||
let mk_strings n len =
|
||||
Array.init n (fun _ ->
|
||||
String.init len (fun _ -> Char.chr (65 + Random.State.int rand 26)))
|
||||
|
||||
let bench_string_hash ?(time = 2) ~len n =
|
||||
let strings = mk_strings n len in
|
||||
let bench_new () =
|
||||
Array.iter (fun s -> opaque_ignore (CCHash.string s)) strings
|
||||
and bench_fnv () =
|
||||
Array.iter (fun s -> opaque_ignore (string_fnv s)) strings
|
||||
and bench_poly () =
|
||||
Array.iter (fun s -> opaque_ignore (Hashtbl.hash s)) strings
|
||||
and bench_xxhash () =
|
||||
Array.iter
|
||||
(fun s ->
|
||||
opaque_ignore (Int64.to_int (Containers_xxhash.hash_string s)))
|
||||
strings
|
||||
in
|
||||
B.throughputN time ~repeat
|
||||
[
|
||||
"CCHash.string (new)", bench_new, ();
|
||||
"string_fnv (old)", bench_fnv, ();
|
||||
"Hashtbl.hash (poly)", bench_poly, ();
|
||||
"xxhash", bench_xxhash, ();
|
||||
]
|
||||
|
||||
let () =
|
||||
B.Tree.register
|
||||
("hash"
|
||||
@>>> [
|
||||
"string"
|
||||
@>> B.Tree.concat
|
||||
[
|
||||
app_int (bench_string_hash ~time:2 ~len:16) 1_000;
|
||||
app_int (bench_string_hash ~time:2 ~len:64) 1_000;
|
||||
app_int (bench_string_hash ~time:2 ~len:256) 1_000;
|
||||
];
|
||||
])
|
||||
end
|
||||
|
||||
let () = try B.Tree.run_global () with Arg.Help msg -> print_endline msg
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#!/bin/sh
|
||||
|
||||
OPTS="--profile=release --display=quiet"
|
||||
exec dune exec $OPTS -- benchs/run_benchs_hash.exe $@
|
||||
exec dune exec $OPTS -- ./benchs/run_bench_hash.exe $@
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ let fold_left f acc self =
|
|||
|
||||
(* capture current content *)
|
||||
let acc = ref acc in
|
||||
for i = 0 to len do
|
||||
for i = 0 to len - 1 do
|
||||
acc := f !acc (Bytes.unsafe_get bs i)
|
||||
done;
|
||||
!acc
|
||||
|
|
@ -104,16 +104,20 @@ let fold_left f acc self =
|
|||
let[@inline] iter f self =
|
||||
(* capture current content *)
|
||||
let { bs; len } = self in
|
||||
for i = 0 to len do
|
||||
for i = 0 to len - 1 do
|
||||
f (Bytes.unsafe_get bs i)
|
||||
done
|
||||
|
||||
let[@inline] iteri f self =
|
||||
let { bs; len } = self in
|
||||
for i = 0 to len do
|
||||
for i = 0 to len - 1 do
|
||||
f i (Bytes.unsafe_get bs i)
|
||||
done
|
||||
|
||||
let copy self =
|
||||
let bs = Bytes.copy self.bs in
|
||||
{ bs; len = self.len }
|
||||
|
||||
let of_seq seq =
|
||||
let self = create ~cap:32 () in
|
||||
append_seq self seq;
|
||||
|
|
|
|||
|
|
@ -91,6 +91,9 @@ val to_slice : t -> CCByte_slice.t
|
|||
The slice shares the same byte array as [buf] (until [buf] is resized).
|
||||
@since 3.13.1 *)
|
||||
|
||||
val copy : t -> t
|
||||
(** [copy buf] returns an independent copy of [buf]. *)
|
||||
|
||||
val contents : t -> string
|
||||
(** Copy the internal data to a string. Allocates. *)
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,12 @@ let create ?(off = 0) ?len bs =
|
|||
let[@inline] unsafe_of_string ?off ?len s =
|
||||
create ?off ?len (Bytes.unsafe_of_string s)
|
||||
|
||||
let[@inline] of_string s = create (Bytes.of_string s)
|
||||
|
||||
let[@inline] clear self =
|
||||
self.off <- 0;
|
||||
self.len <- 0
|
||||
|
||||
let[@inline] len self = self.len
|
||||
let[@inline] contents self = Bytes.sub_string self.bs self.off self.len
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,12 @@ val unsafe_of_string : ?off:int -> ?len:int -> string -> t
|
|||
This is unsafe because mutating the bytes is forbidden
|
||||
(just like with {!Bytes.unsafe_of_string} *)
|
||||
|
||||
val of_string : string -> t
|
||||
(** [of_string s] makes a slice from a copy of [s]. Safe to mutate. *)
|
||||
|
||||
val clear : t -> unit
|
||||
(** [clear sl] resets [off] and [len] to 0. *)
|
||||
|
||||
val len : t -> int
|
||||
(** Access the length *)
|
||||
|
||||
|
|
|
|||
|
|
@ -7,130 +7,107 @@ type 'a t = 'a -> hash
|
|||
type 'a iter = ('a -> unit) -> unit
|
||||
type 'a gen = unit -> 'a option
|
||||
|
||||
(* FNV hashing
|
||||
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
|
||||
*)
|
||||
let fnv_offset_basis = 0xcbf29ce484222325L
|
||||
let fnv_prime = 0x100000001b3L
|
||||
let[@inline] combine2 a b =
|
||||
Hash_impl_.(finalize (combine_int (combine_int seed a) b))
|
||||
|
||||
(* hash an integer *)
|
||||
let hash_int_ n =
|
||||
let h = ref fnv_offset_basis in
|
||||
for k = 0 to 7 do
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
|
||||
done;
|
||||
(* truncate back to int and remove sign *)
|
||||
Int64.to_int !h land max_int
|
||||
|
||||
let combine2 a b =
|
||||
let h = ref fnv_offset_basis in
|
||||
(* we only do one loop, where we mix bytes of [a] and [b], so as
|
||||
to simplify control flow *)
|
||||
for k = 0 to 7 do
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff)))
|
||||
done;
|
||||
Int64.to_int !h land max_int
|
||||
|
||||
let[@inline] combine f s x = combine2 s (f x)
|
||||
let[@inline] combine f s x =
|
||||
Hash_impl_.(finalize (combine_int (combine_int seed s) (f x)))
|
||||
|
||||
let combine3 a b c =
|
||||
let h = ref fnv_offset_basis in
|
||||
(* we only do one loop, where we mix bytes of [a] [b] and [c], so as
|
||||
to simplify control flow *)
|
||||
for k = 0 to 7 do
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
(h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff)))
|
||||
done;
|
||||
Int64.to_int !h land max_int
|
||||
Hash_impl_.(
|
||||
let s = combine_int (combine_int seed a) b in
|
||||
finalize (combine_int s c))
|
||||
|
||||
let combine4 a b c d =
|
||||
let h = ref fnv_offset_basis in
|
||||
for k = 0 to 7 do
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
(h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
(h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff))));
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
h := Int64.(logxor !h (of_int ((d lsr (k * 8)) land 0xff)))
|
||||
done;
|
||||
Int64.to_int !h land max_int
|
||||
Hash_impl_.(
|
||||
let s = combine_int (combine_int seed a) b in
|
||||
let s = combine_int s c in
|
||||
finalize (combine_int s d))
|
||||
|
||||
let combine5 a b c d e = combine3 a b (combine3 c d e)
|
||||
let combine6 a b c d e f = combine4 a b c (combine3 d e f)
|
||||
let combine5 a b c d e =
|
||||
Hash_impl_.(
|
||||
let s = combine_int (combine_int seed a) b in
|
||||
let s = combine_int s c in
|
||||
let s = combine_int s d in
|
||||
finalize (combine_int s e))
|
||||
|
||||
(** {2 Combinators} *)
|
||||
let combine6 a b c d e f =
|
||||
Hash_impl_.(
|
||||
let s = combine_int (combine_int seed a) b in
|
||||
let s = combine_int s c in
|
||||
let s = combine_int s d in
|
||||
let s = combine_int s e in
|
||||
finalize (combine_int s f))
|
||||
|
||||
(** {2 Primitive hashers} *)
|
||||
|
||||
let const h _ = h
|
||||
let const0 _ = 0
|
||||
let int = hash_int_
|
||||
let int n = Hash_impl_.(finalize (combine_int seed n))
|
||||
|
||||
let bool b =
|
||||
hash_int_
|
||||
int
|
||||
(if b then
|
||||
1
|
||||
else
|
||||
2)
|
||||
|
||||
let char x = hash_int_ (Char.code x)
|
||||
let char x = Hash_impl_.(finalize (combine_char seed (Char.code x)))
|
||||
let int64 (n : int64) : int = Hash_impl_.(finalize (combine_i64 seed n))
|
||||
let int32 (x : int32) : int = Hash_impl_.(finalize (combine_i32 seed x))
|
||||
|
||||
(* hash an integer *)
|
||||
let int64 n : int =
|
||||
let h = ref fnv_offset_basis in
|
||||
for k = 0 to 7 do
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
h := Int64.(logxor !h (logand (shift_right_logical n (k * 8)) 0xffL))
|
||||
done;
|
||||
(* truncate back to int and remove sign *)
|
||||
Int64.to_int !h land max_int
|
||||
|
||||
let int32 (x : int32) = int64 (Int64.of_int32 x)
|
||||
let nativeint (x : nativeint) = int64 (Int64.of_nativeint x)
|
||||
|
||||
(* do not hash more than 128 bytes in strings/bytes *)
|
||||
let max_len_b_ = 128
|
||||
let nativeint (x : nativeint) =
|
||||
Hash_impl_.(finalize (combine_i64 seed (Int64.of_nativeint x)))
|
||||
|
||||
let bytes (x : bytes) =
|
||||
let h = ref fnv_offset_basis in
|
||||
for i = 0 to min max_len_b_ (Bytes.length x - 1) do
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
let byte = Char.code (Bytes.unsafe_get x i) in
|
||||
h := Int64.(logxor !h (of_int byte))
|
||||
done;
|
||||
Int64.to_int !h land max_int
|
||||
Hash_impl_.(finalize (combine_string seed (Bytes.unsafe_to_string x)))
|
||||
|
||||
let string (x : string) = bytes (Bytes.unsafe_of_string x)
|
||||
let string (x : string) = Hash_impl_.(finalize (combine_string seed x))
|
||||
|
||||
let slice x i len =
|
||||
let j = i + len in
|
||||
let rec aux i s =
|
||||
if i = j then
|
||||
s
|
||||
let rec aux k s =
|
||||
if k = j then
|
||||
Hash_impl_.finalize s
|
||||
else
|
||||
aux (i + 1) (combine2 (Char.code x.[i]) s)
|
||||
aux (k + 1)
|
||||
(Hash_impl_.combine_char s (Char.code (String.unsafe_get x k)))
|
||||
in
|
||||
aux i 0
|
||||
aux i Hash_impl_.seed
|
||||
|
||||
let opt f = function
|
||||
| None -> 42
|
||||
| Some x -> combine2 43 (f x)
|
||||
| Some x -> Hash_impl_.(finalize (combine_int (combine_int seed 43) (f x)))
|
||||
|
||||
let list f l = List.fold_left (combine f) 0x42 l
|
||||
let array f l = Array.fold_left (combine f) 0x42 l
|
||||
let pair f g (x, y) = combine2 (f x) (g y)
|
||||
let triple f g h (x, y, z) = combine2 (combine2 (f x) (g y)) (h z)
|
||||
let list f l =
|
||||
let s =
|
||||
List.fold_left (fun s x -> Hash_impl_.combine_int s (f x)) Hash_impl_.seed l
|
||||
in
|
||||
Hash_impl_.finalize s
|
||||
|
||||
let array f a =
|
||||
let s =
|
||||
Array.fold_left
|
||||
(fun s x -> Hash_impl_.combine_int s (f x))
|
||||
Hash_impl_.seed a
|
||||
in
|
||||
Hash_impl_.finalize s
|
||||
|
||||
let pair f g (x, y) =
|
||||
Hash_impl_.(finalize (combine_int (combine_int seed (f x)) (g y)))
|
||||
|
||||
let triple f g h (x, y, z) =
|
||||
Hash_impl_.(
|
||||
let s = combine_int seed (f x) in
|
||||
let s = combine_int s (g y) in
|
||||
finalize (combine_int s (h z)))
|
||||
|
||||
let quad f g h i (x, y, z, w) =
|
||||
combine2 (combine2 (f x) (g y)) (combine2 (h z) (i w))
|
||||
Hash_impl_.(
|
||||
let s = combine_int seed (f x) in
|
||||
let s = combine_int s (g y) in
|
||||
let s = combine_int s (h z) in
|
||||
finalize (combine_int s (i w)))
|
||||
|
||||
let map f h x = h (f x)
|
||||
|
||||
|
|
@ -144,8 +121,10 @@ let poly x = Hashtbl.hash x
|
|||
|
||||
let array_of_hashes_ arr =
|
||||
Array.sort CCInt.compare arr;
|
||||
(* sort the hashes, so their order does not matter *)
|
||||
Array.fold_left combine2 0x42 arr
|
||||
let s =
|
||||
Array.fold_left (fun s h -> Hash_impl_.combine_int s h) Hash_impl_.seed arr
|
||||
in
|
||||
Hash_impl_.finalize s
|
||||
|
||||
let array_comm f a =
|
||||
let arr = Array.init (Array.length a) (fun i -> f a.(i)) in
|
||||
|
|
@ -157,19 +136,19 @@ let list_comm f l =
|
|||
array_of_hashes_ arr
|
||||
|
||||
let iter f seq =
|
||||
let h = ref 0x43 in
|
||||
seq (fun x -> h := combine f !h x);
|
||||
!h
|
||||
let s = ref Hash_impl_.seed in
|
||||
seq (fun x -> s := Hash_impl_.combine_int !s (f x));
|
||||
Hash_impl_.finalize !s
|
||||
|
||||
let seq f seq =
|
||||
let h = ref 0x43 in
|
||||
Seq.iter (fun x -> h := combine f !h x) seq;
|
||||
!h
|
||||
let seq f sq =
|
||||
let s = ref Hash_impl_.seed in
|
||||
Seq.iter (fun x -> s := Hash_impl_.combine_int !s (f x)) sq;
|
||||
Hash_impl_.finalize !s
|
||||
|
||||
let gen f g =
|
||||
let rec aux s =
|
||||
match g () with
|
||||
| None -> s
|
||||
| Some x -> aux (combine2 s (f x))
|
||||
| None -> Hash_impl_.finalize s
|
||||
| Some x -> aux (Hash_impl_.combine_int s (f x))
|
||||
in
|
||||
aux 0x42
|
||||
aux Hash_impl_.seed
|
||||
|
|
|
|||
|
|
@ -9,6 +9,15 @@
|
|||
across versions of OCaml and Containers).
|
||||
*)
|
||||
|
||||
(* TODO: for 4.xx:
|
||||
|
||||
{[type state = int64
|
||||
val seed : state
|
||||
type 'a t = state -> 'a -> state
|
||||
val finalize : state -> int64
|
||||
]}
|
||||
*)
|
||||
|
||||
(** {2 Definitions} *)
|
||||
|
||||
type hash = int
|
||||
|
|
|
|||
103
src/core/CCHash64.ml
Normal file
103
src/core/CCHash64.ml
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
(* This file is free software, part of containers. See file "license" for more details. *)
|
||||
|
||||
(** {1 Hash combinators with 64-bit state} *)
|
||||
|
||||
type state = int64
|
||||
|
||||
let seed : state = Hash_impl_.seed
|
||||
let[@inline] finalize64 (s : state) : int64 = Hash_impl_.fmix64 s
|
||||
let[@inline] finalize (s : state) : int = Hash_impl_.finalize s
|
||||
|
||||
type 'a t = state -> 'a -> state
|
||||
|
||||
let[@inline] apply h x = finalize64 (h seed x)
|
||||
let apply_int h x = Int64.to_int (finalize64 (h seed x))
|
||||
let[@inline] int s x = Hash_impl_.combine_int s x
|
||||
|
||||
let[@inline] bool s b =
|
||||
Hash_impl_.combine_int s
|
||||
(if b then
|
||||
1
|
||||
else
|
||||
2)
|
||||
|
||||
let[@inline] char s c = Hash_impl_.combine_char s (Char.code c)
|
||||
let[@inline] int64 s (x : int64) = Hash_impl_.combine_i64 s x
|
||||
let[@inline] int32 s (x : int32) = Hash_impl_.combine_i32 s x
|
||||
|
||||
let[@inline] nativeint s (x : nativeint) =
|
||||
Hash_impl_.combine_i64 s (Int64.of_nativeint x)
|
||||
|
||||
let[@inline] string s x = Hash_impl_.combine_string s x
|
||||
let[@inline] bytes s x = Hash_impl_.combine_string s (Bytes.unsafe_to_string x)
|
||||
|
||||
let slice str ofs s len =
|
||||
let j = ofs + len in
|
||||
let rec loop k st =
|
||||
if k = j then
|
||||
st
|
||||
else
|
||||
loop (k + 1)
|
||||
(Hash_impl_.combine_char st (Char.code (String.unsafe_get str k)))
|
||||
in
|
||||
loop ofs s
|
||||
|
||||
let opt f s = function
|
||||
| None -> Hash_impl_.combine_int s 0
|
||||
| Some x -> f (Hash_impl_.combine_int s 1) x
|
||||
|
||||
let list f s l = List.fold_left f s l
|
||||
let array f s a = Array.fold_left f s a
|
||||
let pair f g s (x, y) = g (f s x) y
|
||||
let triple f g h s (x, y, z) = h (g (f s x) y) z
|
||||
let quad f g h k s (x, y, z, w) = k (h (g (f s x) y) z) w
|
||||
let map proj f s x = f s (proj x)
|
||||
|
||||
let if_ b then_ else_ s x =
|
||||
if b then
|
||||
then_ s x
|
||||
else
|
||||
else_ s x
|
||||
|
||||
let poly s x = Hash_impl_.combine_int s (Hashtbl.hash x)
|
||||
|
||||
type 'a iter = ('a -> unit) -> unit
|
||||
type 'a gen = unit -> 'a option
|
||||
|
||||
let iter f s seq =
|
||||
let st = ref s in
|
||||
seq (fun x -> st := f !st x);
|
||||
!st
|
||||
|
||||
let seq f s sq = Seq.fold_left f s sq
|
||||
|
||||
let gen f s g =
|
||||
let rec aux st =
|
||||
match g () with
|
||||
| None -> st
|
||||
| Some x -> aux (f st x)
|
||||
in
|
||||
aux s
|
||||
|
||||
let[@inline] combine2 a b =
|
||||
Hash_impl_.(finalize64 (combine_i64 (combine_i64 seed a) b))
|
||||
|
||||
let combine3 a b c =
|
||||
Hash_impl_.(
|
||||
let s = combine_i64 (combine_i64 seed a) b in
|
||||
finalize64 (combine_i64 s c))
|
||||
|
||||
let combine4 a b c d =
|
||||
Hash_impl_.(
|
||||
let s = combine_i64 (combine_i64 seed a) b in
|
||||
let s = combine_i64 s c in
|
||||
finalize64 (combine_i64 s d))
|
||||
|
||||
let array_comm f s a =
|
||||
let hashes = Array.map (fun x -> finalize64 (f seed x)) a in
|
||||
Array.sort Int64.compare hashes;
|
||||
Array.fold_left Hash_impl_.combine_i64 s hashes
|
||||
|
||||
let list_comm f s l =
|
||||
let arr = Array.of_list l in
|
||||
array_comm f s arr
|
||||
95
src/core/CCHash64.mli
Normal file
95
src/core/CCHash64.mli
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
(** Hash combinators with 64-bit state threading.
|
||||
|
||||
State is threaded explicitly as a private [int64] through combinators.
|
||||
Finalize with {!finalize} (returns a positive [int]) or {!finalize64}
|
||||
(returns the raw [int64]).
|
||||
|
||||
Typical use:
|
||||
{[
|
||||
let hash_my_record r =
|
||||
CCHash64.(finalize (string (int seed r.id) r.name))
|
||||
|
||||
let hash_pair (a, b) =
|
||||
CCHash64.(finalize (pair int string seed (a, b)))
|
||||
]}
|
||||
|
||||
{b Implementation}: xorshift+multiply combiner with fmix64 (Murmur3)
|
||||
finalizer, via C stubs. Unboxed in native code.
|
||||
|
||||
@since NEXT_RELEASE
|
||||
*)
|
||||
|
||||
type state = private int64
|
||||
(** Hash accumulator (64-bit). Create with {!seed}; finish with {!finalize}
|
||||
or {!finalize64}. *)
|
||||
|
||||
val seed : state
|
||||
(** Initial hash state (golden-ratio constant). *)
|
||||
|
||||
val finalize64 : state -> int64
|
||||
(** Apply fmix64 (Murmur3) and return the full 64-bit result.
|
||||
The result may be negative as a signed [int64]. *)
|
||||
|
||||
val finalize : state -> int
|
||||
(** Apply fmix64 and return a non-negative [int] (strips sign bit). *)
|
||||
|
||||
type 'a t = state -> 'a -> state
|
||||
(** A hash combiner: takes the current state, mixes in a value, returns the
|
||||
updated state. *)
|
||||
|
||||
val apply : 'a t -> 'a -> int64
|
||||
(** Hash the input *)
|
||||
|
||||
val apply_int : 'a t -> 'a -> int
|
||||
(** Hash the input and truncate to [int] *)
|
||||
|
||||
val int : int t
|
||||
val bool : bool t
|
||||
val char : char t
|
||||
val int32 : int32 t
|
||||
val int64 : int64 t
|
||||
val nativeint : nativeint t
|
||||
val string : string t
|
||||
|
||||
val bytes : bytes t
|
||||
(** @since 3.5 *)
|
||||
|
||||
val slice : string -> int -> int t
|
||||
(** [slice str ofs s len] mixes the byte slice [str[ofs .. ofs+len-1]] into [s]. *)
|
||||
|
||||
val opt : 'a t -> 'a option t
|
||||
val list : 'a t -> 'a list t
|
||||
val array : 'a t -> 'a array t
|
||||
val pair : 'a t -> 'b t -> ('a * 'b) t
|
||||
val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t
|
||||
val quad : 'a t -> 'b t -> 'c t -> 'd t -> ('a * 'b * 'c * 'd) t
|
||||
|
||||
val map : ('a -> 'b) -> 'b t -> 'a t
|
||||
(** [map proj f] applies [proj] before hashing with [f].
|
||||
Example: [map fst int] hashes only the first element of a pair.
|
||||
@since 3.5 *)
|
||||
|
||||
val if_ : bool -> 'a t -> 'a t -> 'a t
|
||||
(** [if_ b t e] uses hasher [t] when [b] is true, [e] otherwise. *)
|
||||
|
||||
val combine2 : int64 -> int64 -> int64
|
||||
val combine3 : int64 -> int64 -> int64 -> int64
|
||||
val combine4 : int64 -> int64 -> int64 -> int64 -> int64
|
||||
|
||||
val poly : 'a t
|
||||
(** Uses [Hashtbl.hash] internally. *)
|
||||
|
||||
val list_comm : 'a t -> 'a list t
|
||||
(** Commutative: lists equal up to permutation hash the same. *)
|
||||
|
||||
val array_comm : 'a t -> 'a array t
|
||||
(** Commutative: arrays equal up to permutation hash the same. *)
|
||||
|
||||
(** {2 Iterators} *)
|
||||
|
||||
type 'a iter = ('a -> unit) -> unit
|
||||
type 'a gen = unit -> 'a option
|
||||
|
||||
val seq : 'a t -> 'a Seq.t t
|
||||
val iter : 'a t -> 'a iter t
|
||||
val gen : 'a t -> 'a gen t
|
||||
|
|
@ -4,20 +4,8 @@ include Int
|
|||
|
||||
type 'a iter = ('a -> unit) -> unit
|
||||
|
||||
(* use FNV:
|
||||
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *)
|
||||
let hash (n : int) : int =
|
||||
let offset_basis = 0xcbf29ce484222325L in
|
||||
let prime = 0x100000001b3L in
|
||||
|
||||
let h = ref offset_basis in
|
||||
for k = 0 to 7 do
|
||||
(h := Int64.(mul !h prime));
|
||||
(* h := h xor (k-th byte of n) *)
|
||||
h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
|
||||
done;
|
||||
(* truncate back to int and remove sign *)
|
||||
Int64.to_int !h land max_int
|
||||
Hash_impl_.(finalize (combine_i64 seed (Int64.of_int n)))
|
||||
|
||||
let range i j yield =
|
||||
let rec up i j yield =
|
||||
|
|
|
|||
|
|
@ -7,13 +7,9 @@ include Int32
|
|||
let min : t -> t -> t = Stdlib.min
|
||||
let max : t -> t -> t = Stdlib.max
|
||||
|
||||
[@@@endif]
|
||||
[@@@iflt 5.1]
|
||||
|
||||
let hash x = Stdlib.abs (to_int x)
|
||||
|
||||
[@@@endif]
|
||||
|
||||
let hash (x : t) : int = Hash_impl_.(finalize (combine_i32 seed x))
|
||||
let sign i = compare i zero
|
||||
|
||||
let pow a b =
|
||||
|
|
|
|||
|
|
@ -10,22 +10,8 @@ let max : t -> t -> t = Stdlib.max
|
|||
[@@@endif]
|
||||
|
||||
let sign i = compare i zero
|
||||
|
||||
(* use FNV:
|
||||
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *)
|
||||
let hash_to_int64 (n : t) =
|
||||
let offset_basis = 0xcbf29ce484222325L in
|
||||
let prime = 0x100000001b3L in
|
||||
|
||||
let h = ref offset_basis in
|
||||
for k = 0 to 7 do
|
||||
h := mul !h prime;
|
||||
(* h := h xor (k-th byte of n) *)
|
||||
h := logxor !h (logand (shift_right n (k * 8)) 0xffL)
|
||||
done;
|
||||
logand !h max_int
|
||||
|
||||
let[@inline] hash (n : t) : int = to_int (hash_to_int64 n) land Stdlib.max_int
|
||||
let hash_to_int64 (n : t) : t = Hash_impl_.(fmix64 (combine_i64 seed n))
|
||||
let[@inline] hash (n : t) : int = Hash_impl_.(finalize (combine_i64 seed n))
|
||||
|
||||
(* see {!CCInt.popcount} for more details *)
|
||||
let[@inline] popcount (b : t) : int =
|
||||
|
|
|
|||
|
|
@ -191,7 +191,7 @@ module Make (O : Map.OrderedType) = struct
|
|||
|
||||
[@@@iflt 5.1]
|
||||
|
||||
let to_list m = fold (fun k v acc -> (k, v) :: acc) m []
|
||||
let to_list = bindings
|
||||
|
||||
[@@@endif]
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ include String
|
|||
|
||||
let compare_int (a : int) b = Stdlib.compare a b
|
||||
let compare = String.compare
|
||||
let hash s = Hashtbl.hash s
|
||||
let hash (s : string) : int = Hash_impl_.(finalize (combine_string seed s))
|
||||
let length = String.length
|
||||
let is_empty s = equal s ""
|
||||
|
||||
|
|
|
|||
56
src/core/Hash_impl_.ml
Normal file
56
src/core/Hash_impl_.ml
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
(* This file is free software, part of containers. See file "license" for more details. *)
|
||||
|
||||
(** Internal hash implementation.
|
||||
|
||||
Combiner: [state ^= chunk; state ^= state >> 32; state *= 0xc6a4a7935bd1e995]
|
||||
Finalizer: fmix64 (Murmur3).
|
||||
|
||||
Multiplicative constant 0xc6a4a7935bd1e995 (MurmurHash2, Austin Appleby):
|
||||
https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
|
||||
|
||||
fmix64 constants (Murmur3, Austin Appleby):
|
||||
https://github.com/aappleby/smhasher
|
||||
|
||||
Not part of the public API; use {!CCHash} instead. *)
|
||||
|
||||
(** Initial hash state (golden-ratio constant). *)
|
||||
let seed : int64 = 0x9e3779b97f4a7c15L
|
||||
|
||||
external combine_int :
|
||||
(int64[@unboxed]) -> (int[@untagged]) -> (int64[@unboxed])
|
||||
= "caml_cc_hash_combine_int_byte" "caml_cc_hash_combine_int"
|
||||
[@@noalloc]
|
||||
(** [combine_int state x] mixes OCaml int [x] into [state]. *)
|
||||
|
||||
external combine_i64 :
|
||||
(int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_hash_combine_i64_byte" "caml_cc_hash_combine_i64"
|
||||
[@@noalloc]
|
||||
(** [combine_i64 state chunk] mixes [chunk] into [state]. *)
|
||||
|
||||
external combine_i32 :
|
||||
(int64[@unboxed]) -> (int32[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_hash_combine_i32_byte" "caml_cc_hash_combine_i32"
|
||||
[@@noalloc]
|
||||
(** [combine_i32 state chunk] mixes [chunk] into [state]. *)
|
||||
|
||||
external combine_char :
|
||||
(int64[@unboxed]) -> (int[@untagged]) -> (int64[@unboxed])
|
||||
= "caml_cc_hash_combine_char_byte" "caml_cc_hash_combine_char"
|
||||
[@@noalloc]
|
||||
(** [combine_char state c] mixes character code [c] into [state]. *)
|
||||
|
||||
external combine_string : (int64[@unboxed]) -> string -> (int64[@unboxed])
|
||||
= "caml_cc_hash_combine_string_byte" "caml_cc_hash_combine_string"
|
||||
[@@noalloc]
|
||||
(** [combine_string state s] mixes all bytes of [s] into [state] in 8-byte chunks. *)
|
||||
|
||||
external fmix64 : (int64[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_hash_fmix64_byte" "caml_cc_hash_fmix64"
|
||||
[@@noalloc]
|
||||
(** [fmix64 state] applies the Murmur3 finalizer. Result may be negative. *)
|
||||
|
||||
external finalize : (int64[@unboxed]) -> (int[@untagged])
|
||||
= "caml_cc_hash_finalize_byte" "caml_cc_hash_finalize"
|
||||
[@@noalloc]
|
||||
(** [finalize state] applies fmix64 and returns a non-negative [int]. *)
|
||||
|
|
@ -6,7 +6,12 @@
|
|||
(action
|
||||
(run %{project_root}/src/core/cpp/cpp.exe %{input-file})))
|
||||
(flags :standard -nolabels -open CCMonomorphic)
|
||||
(libraries either containers.monomorphic containers.domain))
|
||||
(libraries either containers.monomorphic containers.domain)
|
||||
(private_modules Hash_impl_)
|
||||
(foreign_stubs
|
||||
(language c)
|
||||
(flags :standard -O2)
|
||||
(names hash_stubs)))
|
||||
|
||||
(ocamllex
|
||||
(modules CCSexp_lex))
|
||||
|
|
|
|||
166
src/core/hash_stubs.c
Normal file
166
src/core/hash_stubs.c
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
/* This file is free software, part of containers. See file "license" for more details. */
|
||||
|
||||
/* Hash implementation: xorshift+multiply combiner with fmix64 finalizer.
|
||||
Combiner: state ^= chunk; state ^= state >> 32; state *= 0xc6a4a7935bd1e995
|
||||
Finalizer (fmix64, Murmur3): three rounds of xorshift-multiply.
|
||||
|
||||
Multiplicative constant 0xc6a4a7935bd1e995 (MurmurHash2, Austin Appleby):
|
||||
https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
|
||||
|
||||
fmix64 constants 0xff51afd7ed558ccd / 0xc4ceb9fe1a85ec53 (Murmur3, Austin Appleby):
|
||||
https://github.com/aappleby/smhasher
|
||||
*/
|
||||
|
||||
#include <caml/mlvalues.h>
|
||||
#include <caml/alloc.h>
|
||||
#include <caml/memory.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
// from murmur2: https://chromium.googlesource.com/external/smhasher/+/c8e8bf81bc6041d6d836365a501a0a96830d2d81/MurmurHash2.cpp
|
||||
#define HASH_MUL UINT64_C(0xc6a4a7935bd1e995)
|
||||
|
||||
// from murmur3: https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp#L81
|
||||
#define FMIX_C1 UINT64_C(0xff51afd7ed558ccd)
|
||||
#define FMIX_C2 UINT64_C(0xc4ceb9fe1a85ec53)
|
||||
|
||||
static inline uint64_t hash_combine(uint64_t state, uint64_t chunk)
|
||||
{
|
||||
state ^= chunk;
|
||||
state ^= state >> 32;
|
||||
state *= HASH_MUL;
|
||||
return state;
|
||||
}
|
||||
|
||||
// fmix64 from murmur3
|
||||
static inline uint64_t fmix64(uint64_t h)
|
||||
{
|
||||
h ^= h >> 33;
|
||||
h *= FMIX_C1;
|
||||
h ^= h >> 33;
|
||||
h *= FMIX_C2;
|
||||
h ^= h >> 33;
|
||||
return h;
|
||||
}
|
||||
|
||||
/* --- combine_i64 --------------------------------------------------------- */
|
||||
|
||||
CAMLprim int64_t caml_cc_hash_combine_i64(int64_t state, int64_t chunk)
|
||||
{
|
||||
return (int64_t)hash_combine((uint64_t)state, (uint64_t)chunk);
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_hash_combine_i64_byte(value v_state, value v_chunk)
|
||||
{
|
||||
CAMLparam2(v_state, v_chunk);
|
||||
uint64_t r = hash_combine((uint64_t)Int64_val(v_state),
|
||||
(uint64_t)Int64_val(v_chunk));
|
||||
CAMLreturn(caml_copy_int64((int64_t)r));
|
||||
}
|
||||
|
||||
/* --- combine_i32 --------------------------------------------------------- */
|
||||
|
||||
CAMLprim int64_t caml_cc_hash_combine_i32(int64_t state, int32_t chunk)
|
||||
{
|
||||
return (int64_t)hash_combine((uint64_t)state, (uint64_t)(uint32_t)chunk);
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_hash_combine_i32_byte(value v_state, value v_chunk)
|
||||
{
|
||||
CAMLparam2(v_state, v_chunk);
|
||||
uint64_t r = hash_combine((uint64_t)Int64_val(v_state),
|
||||
(uint64_t)(uint32_t)Int32_val(v_chunk));
|
||||
CAMLreturn(caml_copy_int64((int64_t)r));
|
||||
}
|
||||
|
||||
/* --- combine_int --------------------------------------------------------- */
|
||||
|
||||
/* chunk is an OCaml int (intnat), passed untagged */
|
||||
CAMLprim int64_t caml_cc_hash_combine_int(int64_t state, intnat chunk)
|
||||
{
|
||||
return (int64_t)hash_combine((uint64_t)state, (uint64_t)chunk);
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_hash_combine_int_byte(value v_state, value v_chunk)
|
||||
{
|
||||
CAMLparam2(v_state, v_chunk);
|
||||
uint64_t r = hash_combine((uint64_t)Int64_val(v_state),
|
||||
(uint64_t)Long_val(v_chunk));
|
||||
CAMLreturn(caml_copy_int64((int64_t)r));
|
||||
}
|
||||
|
||||
/* --- combine_char -------------------------------------------------------- */
|
||||
|
||||
/* c is passed as untagged int (Char.code) */
|
||||
CAMLprim int64_t caml_cc_hash_combine_char(int64_t state, intnat c)
|
||||
{
|
||||
return (int64_t)hash_combine((uint64_t)state, (uint64_t)(unsigned char)c);
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_hash_combine_char_byte(value v_state, value v_c)
|
||||
{
|
||||
CAMLparam2(v_state, v_c);
|
||||
uint64_t r = hash_combine((uint64_t)Int64_val(v_state),
|
||||
(uint64_t)(unsigned char)Long_val(v_c));
|
||||
CAMLreturn(caml_copy_int64((int64_t)r));
|
||||
}
|
||||
|
||||
/* --- combine_string ------------------------------------------------------ */
|
||||
|
||||
/* Hashes all bytes of [str] into [state] using 8-byte chunks where possible.
|
||||
[str] is a regular OCaml value; [state] is unboxed int64. */
|
||||
CAMLprim int64_t caml_cc_hash_combine_string(int64_t state, value str)
|
||||
{
|
||||
const char *data = String_val(str);
|
||||
mlsize_t len = caml_string_length(str);
|
||||
uint64_t s = (uint64_t)state;
|
||||
mlsize_t i = 0;
|
||||
|
||||
for (; i + 8 <= len; i += 8) {
|
||||
uint64_t chunk;
|
||||
memcpy(&chunk, data + i, 8);
|
||||
s = hash_combine(s, chunk);
|
||||
}
|
||||
if (i < len) {
|
||||
uint64_t chunk = 0;
|
||||
memcpy(&chunk, data + i, len - i);
|
||||
s = hash_combine(s, chunk);
|
||||
}
|
||||
return (int64_t)s;
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_hash_combine_string_byte(value v_state, value str)
|
||||
{
|
||||
CAMLparam2(v_state, str);
|
||||
int64_t r = caml_cc_hash_combine_string(Int64_val(v_state), str);
|
||||
CAMLreturn(caml_copy_int64(r));
|
||||
}
|
||||
|
||||
/* --- fmix64 -------------------------------------------------------------- */
|
||||
|
||||
/* Returns full 64-bit fmix64 result; may be "negative" as signed int64. */
|
||||
CAMLprim int64_t caml_cc_hash_fmix64(int64_t state)
|
||||
{
|
||||
return (int64_t)fmix64((uint64_t)state);
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_hash_fmix64_byte(value v_state)
|
||||
{
|
||||
CAMLparam1(v_state);
|
||||
CAMLreturn(caml_copy_int64((int64_t)fmix64((uint64_t)Int64_val(v_state))));
|
||||
}
|
||||
|
||||
/* --- finalize ------------------------------------------------------------ */
|
||||
|
||||
/* Applies fmix64 and masks to Max_long (positive OCaml int). */
|
||||
CAMLprim intnat caml_cc_hash_finalize(int64_t state)
|
||||
{
|
||||
return (intnat)(fmix64((uint64_t)state) & (uint64_t)Max_long);
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_hash_finalize_byte(value v_state)
|
||||
{
|
||||
CAMLparam1(v_state);
|
||||
intnat r = (intnat)(fmix64((uint64_t)Int64_val(v_state)) & (uint64_t)Max_long);
|
||||
CAMLreturn(Val_long(r));
|
||||
}
|
||||
|
|
@ -8,7 +8,14 @@
|
|||
(name test_hash)
|
||||
(modules test_hash)
|
||||
(flags :standard -warn-error -a+8)
|
||||
(libraries containers iter))
|
||||
(libraries containers iter containers_xxhash))
|
||||
|
||||
(rule
|
||||
(alias runtest)
|
||||
(locks /ctest)
|
||||
(package containers)
|
||||
(action
|
||||
(run ./test_hash.exe)))
|
||||
|
||||
(executable
|
||||
(name test_random)
|
||||
|
|
|
|||
|
|
@ -1,54 +1,59 @@
|
|||
(* test hash functions a bit *)
|
||||
|
||||
module H = CCHash
|
||||
module H64 = CCHash64
|
||||
module XXH = Containers_xxhash
|
||||
|
||||
module Hist = struct
|
||||
type t = {
|
||||
tbl: (int, int) Hashtbl.t;
|
||||
mutable n_samples: int;
|
||||
}
|
||||
let n = ref 100_000
|
||||
let verbose = ref false
|
||||
|
||||
let create () : t = { tbl = Hashtbl.create 32; n_samples = 0 }
|
||||
let check_bit_proba name hash_fn n_samples =
|
||||
let rand = Random.State.make [| 42 |] in
|
||||
let bits = Array.make 64 0 in
|
||||
|
||||
let add_n self x n =
|
||||
Hashtbl.replace self.tbl x (n + try Hashtbl.find self.tbl x with _ -> 0);
|
||||
self.n_samples <- n + self.n_samples
|
||||
|
||||
let pp out (self : t) : unit =
|
||||
let max = Hashtbl.fold (fun k _ n -> max k n) self.tbl 0 in
|
||||
let min = Hashtbl.fold (fun k _ n -> min k n) self.tbl max in
|
||||
for i = min to max do
|
||||
let n = try Hashtbl.find self.tbl i with _ -> 0 in
|
||||
Format.fprintf out "[v=%-4d, n-inputs %-6d] %s@." i n
|
||||
(String.make (int_of_float @@ ceil (log (float n))) '#')
|
||||
let n_loops = 30 in
|
||||
for _i = 1 to n_loops do
|
||||
let base = Random.State.int64 rand Int64.(pred max_int) |> Int64.to_int in
|
||||
for i = 1 to n_samples do
|
||||
let h = hash_fn (base + i) in
|
||||
for b = 0 to 63 do
|
||||
if Int64.(logand h (shift_left 1L b)) <> 0L then
|
||||
bits.(b) <- bits.(b) + 1
|
||||
done
|
||||
done
|
||||
end
|
||||
|
||||
let reset_line = "\x1b[2K\r"
|
||||
|
||||
let t_int n1 n2 =
|
||||
Printf.printf "test hash_int on %d--%d\n" n1 n2;
|
||||
let count = Hashtbl.create 128 in
|
||||
for i = n1 to n2 do
|
||||
Printf.printf "%shash %d…%!" reset_line i;
|
||||
let h = H.int i in
|
||||
Hashtbl.replace count h (1 + CCHashtbl.get_or count h ~default:0);
|
||||
if i mod 1024 * 1024 * 1024 = 0 then Gc.major ()
|
||||
done;
|
||||
Printf.printf "%s%!" reset_line;
|
||||
(* reverse table *)
|
||||
let by_count =
|
||||
CCHashtbl.to_iter count
|
||||
|> Iter.map (fun (_h, n) -> n)
|
||||
|> Iter.count ~hash:H.int
|
||||
in
|
||||
let hist = Hist.create () in
|
||||
by_count (fun (n, i) -> Hist.add_n hist n i);
|
||||
Format.printf "histogram:@.%a@." Hist.pp hist;
|
||||
(*assert (Hist.check_uniform hist);*)
|
||||
()
|
||||
let n_samples = n_loops * n_samples in
|
||||
|
||||
if !verbose then (
|
||||
Format.printf "%s bit probabilities after %d samples:@." name n_samples;
|
||||
for b = 0 to 63 do
|
||||
let prob = float bits.(b) /. float n_samples in
|
||||
Format.printf "bit %2d: %.4f@." b prob
|
||||
done
|
||||
);
|
||||
let ok = ref true in
|
||||
for b = 0 to 63 do
|
||||
let prob = float bits.(b) /. float n_samples in
|
||||
if prob < 0.48 || prob > 0.52 then (
|
||||
Format.printf "FAIL: bit %d has proba %.4f (outside 0.48-0.52)@." b prob;
|
||||
ok := false
|
||||
)
|
||||
done;
|
||||
if !ok then
|
||||
Format.printf "%s: OK@." name
|
||||
else
|
||||
();
|
||||
!ok
|
||||
|
||||
let speclist =
|
||||
[
|
||||
"-v", Arg.Set verbose, " verbose mode";
|
||||
"-n", Arg.Set_int n, " size of the range";
|
||||
]
|
||||
|
||||
let () =
|
||||
t_int 0 2_000_000;
|
||||
t_int (-4_000_000) (-3_500_000);
|
||||
()
|
||||
Arg.parse (Arg.align speclist) (fun _ -> ()) "test_hash.exe";
|
||||
let ok1 =
|
||||
check_bit_proba "CCHash64" (fun i -> H64.finalize64 (H64.int H64.seed i)) !n
|
||||
in
|
||||
let ok2 = check_bit_proba "XXH" (fun i -> XXH.hash_int i) !n in
|
||||
if (not ok1) || not ok2 then exit 1
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ Containers_testlib.run_all ~descr:"containers"
|
|||
T_array.get ();
|
||||
T_bool.get ();
|
||||
T_byte_buffer.get ();
|
||||
T_byte_slice.get ();
|
||||
T_canonical_sexp.get ();
|
||||
T_char.get ();
|
||||
T_either.get ();
|
||||
|
|
|
|||
|
|
@ -151,4 +151,71 @@ let prop_consistent ops =
|
|||
with Nope str -> Test.fail_reportf "consistent ops failed:\n%s" str
|
||||
;;
|
||||
|
||||
q arb (fun ops -> prop_consistent ops)
|
||||
q arb (fun ops -> prop_consistent ops);;
|
||||
|
||||
(* --- iter/fold_left/iteri off-by-one --- *)
|
||||
|
||||
t @@ fun () ->
|
||||
(* empty buffer: iter should call f zero times *)
|
||||
let b = create () in
|
||||
let n = ref 0 in
|
||||
iter (fun _ -> incr n) b;
|
||||
!n = 0
|
||||
;;
|
||||
|
||||
t @@ fun () ->
|
||||
(* non-empty buffer: iter visits exactly [length b] chars *)
|
||||
let b = create () in
|
||||
append_string b "abc";
|
||||
let chars = ref [] in
|
||||
iter (fun c -> chars := c :: !chars) b;
|
||||
List.rev !chars = [ 'a'; 'b'; 'c' ]
|
||||
;;
|
||||
|
||||
t @@ fun () ->
|
||||
(* fold_left on empty buffer returns accumulator unchanged *)
|
||||
let b = create () in
|
||||
fold_left (fun acc _ -> acc + 1) 0 b = 0
|
||||
;;
|
||||
|
||||
t @@ fun () ->
|
||||
(* fold_left counts exactly [length b] chars *)
|
||||
let b = create () in
|
||||
append_string b "hello";
|
||||
fold_left (fun acc _ -> acc + 1) 0 b = 5
|
||||
;;
|
||||
|
||||
t @@ fun () ->
|
||||
(* iteri visits exactly [length b] indices *)
|
||||
let b = create () in
|
||||
append_string b "ab";
|
||||
let pairs = ref [] in
|
||||
iteri (fun i c -> pairs := (i, c) :: !pairs) b;
|
||||
List.rev !pairs = [ 0, 'a'; 1, 'b' ]
|
||||
;;
|
||||
|
||||
(* --- copy --- *)
|
||||
|
||||
t @@ fun () ->
|
||||
let b = create () in
|
||||
append_string b "hello";
|
||||
let b2 = copy b in
|
||||
contents b = contents b2
|
||||
;;
|
||||
|
||||
t @@ fun () ->
|
||||
(* copy is independent: mutating original doesn't affect copy *)
|
||||
let b = create () in
|
||||
append_string b "hello";
|
||||
let b2 = copy b in
|
||||
add_char b '!';
|
||||
contents b2 = "hello"
|
||||
;;
|
||||
|
||||
t @@ fun () ->
|
||||
(* copy is independent: mutating copy doesn't affect original *)
|
||||
let b = create () in
|
||||
append_string b "hello";
|
||||
let b2 = copy b in
|
||||
add_char b2 '?';
|
||||
contents b = "hello"
|
||||
|
|
|
|||
37
tests/core/t_byte_slice.ml
Normal file
37
tests/core/t_byte_slice.ml
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
module T = (val Containers_testlib.make ~__FILE__ ())
|
||||
include T
|
||||
open CCByte_slice;;
|
||||
|
||||
(* --- of_string --- *)
|
||||
|
||||
t @@ fun () ->
|
||||
let sl = of_string "hello" in
|
||||
len sl = 5
|
||||
;;
|
||||
|
||||
t @@ fun () ->
|
||||
let sl = of_string "hello" in
|
||||
contents sl = "hello"
|
||||
;;
|
||||
|
||||
t @@ fun () ->
|
||||
let sl = of_string "" in
|
||||
len sl = 0
|
||||
;;
|
||||
|
||||
(* --- clear --- *)
|
||||
|
||||
t @@ fun () ->
|
||||
let sl = of_string "hello" in
|
||||
clear sl;
|
||||
len sl = 0
|
||||
;;
|
||||
|
||||
t @@ fun () ->
|
||||
(* after clear, get raises *)
|
||||
let sl = of_string "hello" in
|
||||
clear sl;
|
||||
try
|
||||
let _ = get sl 0 in
|
||||
false
|
||||
with Invalid_argument _ -> true
|
||||
|
|
@ -17,3 +17,66 @@ t @@ fun () -> string "abc" <> string "abcd";;
|
|||
q Q.int (fun i ->
|
||||
Q.assume (i >= 0);
|
||||
int i = int64 (Int64.of_int i))
|
||||
;;
|
||||
|
||||
(* --- stress tests -------------------------------------------------------- *)
|
||||
|
||||
(* Chi-squared distribution test over [count] consecutive integers in [buckets] buckets.
|
||||
A uniform hash gives chi2 ~ buckets-1; we allow 4 standard deviations of slack. *)
|
||||
t ~name:"int hash distribution chi2" @@ fun () ->
|
||||
let count = 50_000 and buckets = 500 in
|
||||
let counts = Array.make buckets 0 in
|
||||
for i = 0 to count - 1 do
|
||||
let b = CCHash.int i mod buckets in
|
||||
counts.(b) <- counts.(b) + 1
|
||||
done;
|
||||
let expected = float count /. float buckets in
|
||||
let c2 =
|
||||
Array.fold_left
|
||||
(fun acc c -> acc +. (((float c -. expected) ** 2.0) /. expected))
|
||||
0.0 counts
|
||||
in
|
||||
let df = float (buckets - 1) in
|
||||
c2 < df +. (4.0 *. sqrt (2.0 *. df))
|
||||
;;
|
||||
|
||||
(* Strict avalanche criterion: flip one input bit, expect ~50% output bits to change. *)
|
||||
t ~name:"int hash avalanche" @@ fun () ->
|
||||
let bits = Sys.int_size - 1 in
|
||||
let total_flips = ref 0 in
|
||||
let total = ref 0 in
|
||||
let rng = Random.State.make [| 42; 17; 99 |] in
|
||||
for _ = 1 to 300 do
|
||||
let x = Random.State.bits rng in
|
||||
let hx = CCHash.int x in
|
||||
for b = 0 to bits - 1 do
|
||||
let hx' = CCHash.int (x lxor (1 lsl b)) in
|
||||
total_flips := !total_flips + CCInt.popcount (hx lxor hx');
|
||||
total := !total + bits
|
||||
done
|
||||
done;
|
||||
let frac = float !total_flips /. float !total in
|
||||
frac >= 0.45 && frac <= 0.55
|
||||
;;
|
||||
|
||||
(* String hash: no collisions among distinct keys. *)
|
||||
t ~name:"string hash no collisions" @@ fun () ->
|
||||
let n = 50_000 in
|
||||
let tbl = Hashtbl.create n in
|
||||
let ok = ref true in
|
||||
for i = 0 to n - 1 do
|
||||
let h = CCHash.string (Printf.sprintf "key:%d" i) in
|
||||
if Hashtbl.mem tbl h then ok := false;
|
||||
Hashtbl.replace tbl h ()
|
||||
done;
|
||||
!ok
|
||||
;;
|
||||
|
||||
(* CCHash64 pipeline matches CCHash.pair combiner. *)
|
||||
q Q.int (fun i ->
|
||||
let j = i lxor 0xdeadbeef in
|
||||
let h_pair = CCHash.pair CCHash.int CCHash.int (i, j) in
|
||||
let h_manual =
|
||||
CCHash64.(finalize (int (int seed (CCHash.int i)) (CCHash.int j)))
|
||||
in
|
||||
h_pair = h_manual)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue