mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2026-05-05 17:04:25 -04:00
commit
aeae7c1039
9 changed files with 7833 additions and 0 deletions
2
src/xxhash/README.md
Normal file
2
src/xxhash/README.md
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
|
||||||
|
xxhash code directly vendored from https://github.com/Cyan4973/xxHash/ and remains under BSD license, author Yann Collet.
|
||||||
32
src/xxhash/containers_xxhash.ml
Normal file
32
src/xxhash/containers_xxhash.ml
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
module Raw = struct
|
||||||
|
external hash_string : string -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||||
|
= "caml_cc_xxhash_string_byte" "caml_cc_xxhash_string"
|
||||||
|
[@@noalloc]
|
||||||
|
|
||||||
|
external hash_int64 :
|
||||||
|
(int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||||
|
= "caml_cc_xxhash_int64_byte" "caml_cc_xxhash_int64"
|
||||||
|
[@@noalloc]
|
||||||
|
|
||||||
|
external hash_int32 :
|
||||||
|
(int32[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||||
|
= "caml_cc_xxhash_int32_byte" "caml_cc_xxhash_int32"
|
||||||
|
[@@noalloc]
|
||||||
|
|
||||||
|
external hash_int : (int[@untagged]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||||
|
= "caml_cc_xxhash_int_byte" "caml_cc_xxhash_int"
|
||||||
|
[@@noalloc]
|
||||||
|
end
|
||||||
|
|
||||||
|
let[@inline] hash_string s = Raw.hash_string s 0L
|
||||||
|
let[@inline] hash_string_seed s seed = Raw.hash_string s seed
|
||||||
|
let[@inline] hash_int64 v = Raw.hash_int64 v 0L
|
||||||
|
let[@inline] hash_int64_seed v seed = Raw.hash_int64 v seed
|
||||||
|
let[@inline] hash_int32 v = Raw.hash_int32 v 0L
|
||||||
|
let[@inline] hash_int32_seed v seed = Raw.hash_int32 v seed
|
||||||
|
let[@inline] hash_int v = Raw.hash_int v 0L
|
||||||
|
let[@inline] hash_int_seed v seed = Raw.hash_int v seed
|
||||||
|
let[@inline] hash_bool b = Raw.hash_int (Bool.to_int b) 0L
|
||||||
|
let[@inline] hash_bool_seed b seed = Raw.hash_int (Bool.to_int b) seed
|
||||||
|
let[@inline] hash_char c = Raw.hash_int (Char.code c) 0L
|
||||||
|
let[@inline] hash_char_seed c seed = Raw.hash_int (Char.code c) seed
|
||||||
69
src/xxhash/containers_xxhash.mli
Normal file
69
src/xxhash/containers_xxhash.mli
Normal file
|
|
@ -0,0 +1,69 @@
|
||||||
|
(** XXHash bindings.
|
||||||
|
|
||||||
|
Fast non-cryptographic hash functions from
|
||||||
|
{{:https://github.com/Cyan4973/xxHash} xxHash}.
|
||||||
|
|
||||||
|
All functions use XXH64 and are noalloc in native code.
|
||||||
|
*)
|
||||||
|
|
||||||
|
(** Raw bindings with explicit seed argument. *)
|
||||||
|
module Raw : sig
|
||||||
|
external hash_string : string -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||||
|
= "caml_cc_xxhash_string_byte" "caml_cc_xxhash_string"
|
||||||
|
[@@noalloc]
|
||||||
|
(** [hash_string s seed] hashes [s] with [seed] using XXH64. *)
|
||||||
|
|
||||||
|
external hash_int64 :
|
||||||
|
(int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||||
|
= "caml_cc_xxhash_int64_byte" "caml_cc_xxhash_int64"
|
||||||
|
[@@noalloc]
|
||||||
|
(** [hash_int64 v seed] hashes the 8-byte representation of [v] with [seed]. *)
|
||||||
|
|
||||||
|
external hash_int32 :
|
||||||
|
(int32[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||||
|
= "caml_cc_xxhash_int32_byte" "caml_cc_xxhash_int32"
|
||||||
|
[@@noalloc]
|
||||||
|
(** [hash_int32 v seed] hashes the 4-byte representation of [v] with [seed]. *)
|
||||||
|
|
||||||
|
external hash_int : (int[@untagged]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||||
|
= "caml_cc_xxhash_int_byte" "caml_cc_xxhash_int"
|
||||||
|
[@@noalloc]
|
||||||
|
(** [hash_int v seed] hashes [v] as a 64-bit integer with [seed]. Noalloc
|
||||||
|
and untagged in native code. *)
|
||||||
|
end
|
||||||
|
|
||||||
|
val hash_string : string -> int64
|
||||||
|
(** [hash_string s] hashes [s] using XXH64 with seed [0L]. *)
|
||||||
|
|
||||||
|
val hash_string_seed : string -> int64 -> int64
|
||||||
|
(** [hash_string_seed s seed] hashes [s] with an explicit seed. *)
|
||||||
|
|
||||||
|
val hash_int64 : int64 -> int64
|
||||||
|
(** [hash_int64 v] hashes the 8-byte representation of [v] with seed [0L]. *)
|
||||||
|
|
||||||
|
val hash_int64_seed : int64 -> int64 -> int64
|
||||||
|
(** [hash_int64_seed v seed] hashes [v] with an explicit seed. *)
|
||||||
|
|
||||||
|
val hash_int32 : int32 -> int64
|
||||||
|
(** [hash_int32 v] hashes the 4-byte representation of [v] with seed [0L]. *)
|
||||||
|
|
||||||
|
val hash_int32_seed : int32 -> int64 -> int64
|
||||||
|
(** [hash_int32_seed v seed] hashes [v] with an explicit seed. *)
|
||||||
|
|
||||||
|
val hash_int : int -> int64
|
||||||
|
(** [hash_int v] hashes [v] as a 64-bit integer with seed [0L]. *)
|
||||||
|
|
||||||
|
val hash_int_seed : int -> int64 -> int64
|
||||||
|
(** [hash_int_seed v seed] hashes [v] with an explicit seed. *)
|
||||||
|
|
||||||
|
val hash_bool : bool -> int64
|
||||||
|
(** [hash_bool b] hashes [b] as an integer (0 or 1) with seed [0L]. *)
|
||||||
|
|
||||||
|
val hash_bool_seed : bool -> int64 -> int64
|
||||||
|
(** [hash_bool_seed b seed] hashes [b] with an explicit seed. *)
|
||||||
|
|
||||||
|
val hash_char : char -> int64
|
||||||
|
(** [hash_char c] hashes [c] as its character code with seed [0L]. *)
|
||||||
|
|
||||||
|
val hash_char_seed : char -> int64 -> int64
|
||||||
|
(** [hash_char_seed c seed] hashes [c] with an explicit seed. *)
|
||||||
10
src/xxhash/dune
Normal file
10
src/xxhash/dune
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
(library
|
||||||
|
(name containers_xxhash)
|
||||||
|
(public_name containers.xxhash)
|
||||||
|
(synopsis "xxHash bindings for containers")
|
||||||
|
(libraries containers)
|
||||||
|
(foreign_stubs
|
||||||
|
(language c)
|
||||||
|
(flags :standard -O2)
|
||||||
|
(names stubs))
|
||||||
|
(ocamlopt_flags :standard -inline 100))
|
||||||
52
src/xxhash/stubs.c
Normal file
52
src/xxhash/stubs.c
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
#define XXH_NO_XXH3
|
||||||
|
#define XXH_NO_STREAM
|
||||||
|
#define XXH_INLINE_ALL
|
||||||
|
#include "xxhash.h"
|
||||||
|
|
||||||
|
#include <caml/alloc.h>
|
||||||
|
#include <caml/memory.h>
|
||||||
|
#include <caml/mlvalues.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
/* hash_string: (value string, int64_t seed) -> int64_t */
|
||||||
|
CAMLprim int64_t caml_cc_xxhash_string(value v_s, int64_t seed) {
|
||||||
|
return (int64_t)XXH64(String_val(v_s), caml_string_length(v_s),
|
||||||
|
(XXH64_hash_t)seed);
|
||||||
|
}
|
||||||
|
CAMLprim value caml_cc_xxhash_string_byte(value v_s, value v_seed) {
|
||||||
|
CAMLparam2(v_s, v_seed);
|
||||||
|
int64_t result =
|
||||||
|
caml_cc_xxhash_string(v_s, (int64_t)Int64_val(v_seed));
|
||||||
|
CAMLreturn(caml_copy_int64(result));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* hash_int64: (int64_t v, int64_t seed) -> int64_t */
|
||||||
|
CAMLprim int64_t caml_cc_xxhash_int64(int64_t v, int64_t seed) {
|
||||||
|
return (int64_t)XXH64(&v, sizeof(v), (XXH64_hash_t)seed);
|
||||||
|
}
|
||||||
|
CAMLprim value caml_cc_xxhash_int64_byte(value v_v, value v_seed) {
|
||||||
|
CAMLparam2(v_v, v_seed);
|
||||||
|
int64_t result = caml_cc_xxhash_int64(Int64_val(v_v), Int64_val(v_seed));
|
||||||
|
CAMLreturn(caml_copy_int64(result));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* hash_int32: (int32_t v, int64_t seed) -> int64_t */
|
||||||
|
CAMLprim int64_t caml_cc_xxhash_int32(int32_t v, int64_t seed) {
|
||||||
|
return (int64_t)XXH64(&v, sizeof(v), (XXH64_hash_t)seed);
|
||||||
|
}
|
||||||
|
CAMLprim value caml_cc_xxhash_int32_byte(value v_v, value v_seed) {
|
||||||
|
CAMLparam2(v_v, v_seed);
|
||||||
|
int64_t result = caml_cc_xxhash_int32(Int32_val(v_v), Int64_val(v_seed));
|
||||||
|
CAMLreturn(caml_copy_int64(result));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* hash_int: (intnat v, int64_t seed) -> int64_t */
|
||||||
|
CAMLprim int64_t caml_cc_xxhash_int(intnat v, int64_t seed) {
|
||||||
|
int64_t v64 = (int64_t)(uintnat)v; /* zero-extend on 32-bit platforms */
|
||||||
|
return (int64_t)XXH64(&v64, sizeof(v64), (XXH64_hash_t)seed);
|
||||||
|
}
|
||||||
|
CAMLprim value caml_cc_xxhash_int_byte(value v_v, value v_seed) {
|
||||||
|
CAMLparam2(v_v, v_seed);
|
||||||
|
int64_t result = caml_cc_xxhash_int(Long_val(v_v), Int64_val(v_seed));
|
||||||
|
CAMLreturn(caml_copy_int64(result));
|
||||||
|
}
|
||||||
42
src/xxhash/xxhash.c
Normal file
42
src/xxhash/xxhash.c
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
/*
|
||||||
|
* xxHash - Extremely Fast Hash algorithm
|
||||||
|
* Copyright (C) 2012-2023 Yann Collet
|
||||||
|
*
|
||||||
|
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above
|
||||||
|
* copyright notice, this list of conditions and the following disclaimer
|
||||||
|
* in the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* You can contact the author at:
|
||||||
|
* - xxHash homepage: https://www.xxhash.com
|
||||||
|
* - xxHash source repository: https://github.com/Cyan4973/xxHash
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* xxhash.c instantiates functions defined in xxhash.h
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
|
||||||
|
#define XXH_IMPLEMENTATION /* access definitions */
|
||||||
|
|
||||||
|
#include "xxhash.h"
|
||||||
7490
src/xxhash/xxhash.h
Normal file
7490
src/xxhash/xxhash.h
Normal file
File diff suppressed because it is too large
Load diff
11
tests/xxhash/dune
Normal file
11
tests/xxhash/dune
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
(executable
|
||||||
|
(name t_xxhash)
|
||||||
|
(modules t_xxhash)
|
||||||
|
(libraries containers containers.xxhash containers_testlib qcheck-core))
|
||||||
|
|
||||||
|
(rule
|
||||||
|
(alias runtest)
|
||||||
|
(deps t_xxhash.exe)
|
||||||
|
(package containers)
|
||||||
|
(action
|
||||||
|
(run ./t_xxhash.exe)))
|
||||||
125
tests/xxhash/t_xxhash.ml
Normal file
125
tests/xxhash/t_xxhash.ml
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
include (val Containers_testlib.make ~__FILE__ ())
|
||||||
|
module H = Containers_xxhash
|
||||||
|
|
||||||
|
(* Gold tests: hash_string *)
|
||||||
|
;;
|
||||||
|
|
||||||
|
t @@ fun () ->
|
||||||
|
assert_equal ~printer:Int64.to_string (-1205034819632174695L) (H.hash_string "");
|
||||||
|
assert_equal ~printer:Int64.to_string (-7444071767201028348L)
|
||||||
|
(H.hash_string_seed "" 42L);
|
||||||
|
assert_equal ~printer:Int64.to_string 2794345569481354659L
|
||||||
|
(H.hash_string "hello");
|
||||||
|
assert_equal ~printer:Int64.to_string (-4367754540140381902L)
|
||||||
|
(H.hash_string_seed "hello" 42L);
|
||||||
|
assert_equal ~printer:Int64.to_string 1513236774081638803L
|
||||||
|
(H.hash_string "the quick brown fox");
|
||||||
|
assert_equal ~printer:Int64.to_string 6882318601984224800L
|
||||||
|
(H.hash_string_seed "the quick brown fox" 42L);
|
||||||
|
true
|
||||||
|
;;
|
||||||
|
|
||||||
|
(* Gold tests: hash_int64 *)
|
||||||
|
t @@ fun () ->
|
||||||
|
assert_equal ~printer:Int64.to_string 3803688792395291579L (H.hash_int64 0L);
|
||||||
|
assert_equal ~printer:Int64.to_string (-6977822845260490347L) (H.hash_int64 1L);
|
||||||
|
assert_equal ~printer:Int64.to_string (-8804195676797548855L)
|
||||||
|
(H.hash_int64 (-1L));
|
||||||
|
assert_equal ~printer:Int64.to_string (-7296932117151183542L)
|
||||||
|
(H.hash_int64 1234567890123456789L);
|
||||||
|
true
|
||||||
|
;;
|
||||||
|
|
||||||
|
(* Gold tests: hash_int32 *)
|
||||||
|
t @@ fun () ->
|
||||||
|
assert_equal ~printer:Int64.to_string 4246796580750024372L (H.hash_int32 0l);
|
||||||
|
assert_equal ~printer:Int64.to_string (-851299076295404719L) (H.hash_int32 1l);
|
||||||
|
assert_equal ~printer:Int64.to_string 9185342943168159635L (H.hash_int32 (-1l));
|
||||||
|
assert_equal ~printer:Int64.to_string (-2929917330072466447L) (H.hash_int32 42l);
|
||||||
|
true
|
||||||
|
;;
|
||||||
|
|
||||||
|
(* Gold tests: hash_int *)
|
||||||
|
t @@ fun () ->
|
||||||
|
assert_equal ~printer:Int64.to_string 3803688792395291579L (H.hash_int 0);
|
||||||
|
assert_equal ~printer:Int64.to_string (-6977822845260490347L) (H.hash_int 1);
|
||||||
|
assert_equal ~printer:Int64.to_string (-8804195676797548855L) (H.hash_int (-1));
|
||||||
|
assert_equal ~printer:Int64.to_string (-5379971487550586029L) (H.hash_int 42);
|
||||||
|
true
|
||||||
|
;;
|
||||||
|
|
||||||
|
(* Gold tests: hash_bool *)
|
||||||
|
t @@ fun () ->
|
||||||
|
assert_equal ~printer:Int64.to_string 3803688792395291579L (H.hash_bool false);
|
||||||
|
assert_equal ~printer:Int64.to_string (-6977822845260490347L) (H.hash_bool true);
|
||||||
|
true
|
||||||
|
;;
|
||||||
|
|
||||||
|
(* Gold tests: hash_char *)
|
||||||
|
t @@ fun () ->
|
||||||
|
(* 'a' = 97, '0' = 48 *)
|
||||||
|
assert_equal ~printer:Int64.to_string (H.hash_int 97) (H.hash_char 'a');
|
||||||
|
assert_equal ~printer:Int64.to_string (H.hash_int 48) (H.hash_char '0');
|
||||||
|
true
|
||||||
|
;;
|
||||||
|
|
||||||
|
(* Property tests: determinism *)
|
||||||
|
q ~count:10_000 Q.string @@ fun s ->
|
||||||
|
Int64.equal (H.hash_string s) (H.hash_string s)
|
||||||
|
;;
|
||||||
|
|
||||||
|
q ~count:10_000 Q.int64 @@ fun v ->
|
||||||
|
Int64.equal (H.hash_int64 v) (H.hash_int64 v)
|
||||||
|
;;
|
||||||
|
|
||||||
|
q ~count:10_000 Q.int @@ fun v -> Int64.equal (H.hash_int v) (H.hash_int v);;
|
||||||
|
q ~count:10_000 Q.bool @@ fun b -> Int64.equal (H.hash_bool b) (H.hash_bool b);;
|
||||||
|
q ~count:10_000 Q.char @@ fun c -> Int64.equal (H.hash_char c) (H.hash_char c);;
|
||||||
|
|
||||||
|
(* Different seeds give different results for the same input *)
|
||||||
|
q ~count:10_000 (Q.pair Q.string Q.int64) @@ fun (s, seed) ->
|
||||||
|
Q.assume (not (Int64.equal seed 0L));
|
||||||
|
not (Int64.equal (H.hash_string s) (H.hash_string_seed s seed))
|
||||||
|
;;
|
||||||
|
|
||||||
|
q ~count:10_000 (Q.pair Q.int64 Q.int64) @@ fun (v, seed) ->
|
||||||
|
Q.assume (not (Int64.equal seed 0L));
|
||||||
|
not (Int64.equal (H.hash_int64 v) (H.hash_int64_seed v seed))
|
||||||
|
;;
|
||||||
|
|
||||||
|
q ~count:10_000 (Q.pair Q.int Q.int64) @@ fun (v, seed) ->
|
||||||
|
Q.assume (not (Int64.equal seed 0L));
|
||||||
|
not (Int64.equal (H.hash_int v) (H.hash_int_seed v seed))
|
||||||
|
;;
|
||||||
|
|
||||||
|
(* Different inputs give different results for the same seed *)
|
||||||
|
q ~count:10_000 (Q.pair Q.string Q.string) @@ fun (s1, s2) ->
|
||||||
|
Q.assume (not (String.equal s1 s2));
|
||||||
|
not (Int64.equal (H.hash_string s1) (H.hash_string s2))
|
||||||
|
;;
|
||||||
|
|
||||||
|
q ~count:10_000 (Q.pair Q.int64 Q.int64) @@ fun (a, b) ->
|
||||||
|
Q.assume (not (Int64.equal a b));
|
||||||
|
not (Int64.equal (H.hash_int64 a) (H.hash_int64 b))
|
||||||
|
;;
|
||||||
|
|
||||||
|
q ~count:10_000 (Q.pair Q.int Q.int) @@ fun (a, b) ->
|
||||||
|
Q.assume (a <> b);
|
||||||
|
not (Int64.equal (H.hash_int a) (H.hash_int b))
|
||||||
|
;;
|
||||||
|
|
||||||
|
(* Stress test: hash 100k strings of varying lengths, non-empty => non-zero *)
|
||||||
|
t @@ fun () ->
|
||||||
|
for len = 0 to 99 do
|
||||||
|
for _ = 1 to 1000 do
|
||||||
|
let s = String.make len 'x' in
|
||||||
|
let h = H.hash_string s in
|
||||||
|
if len > 0 then
|
||||||
|
if Int64.equal h 0L then
|
||||||
|
failwith
|
||||||
|
(Printf.sprintf "unexpected zero hash for string of len %d" len)
|
||||||
|
done
|
||||||
|
done;
|
||||||
|
true
|
||||||
|
|
||||||
|
let () = Containers_testlib.run_all ~descr:"test xxhash" [ get () ]
|
||||||
Loading…
Add table
Reference in a new issue