mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2026-05-05 17:04:25 -04:00
commit
aeae7c1039
9 changed files with 7833 additions and 0 deletions
2
src/xxhash/README.md
Normal file
2
src/xxhash/README.md
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
|
||||
xxhash code directly vendored from https://github.com/Cyan4973/xxHash/ and remains under BSD license, author Yann Collet.
|
||||
32
src/xxhash/containers_xxhash.ml
Normal file
32
src/xxhash/containers_xxhash.ml
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
module Raw = struct
|
||||
external hash_string : string -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_xxhash_string_byte" "caml_cc_xxhash_string"
|
||||
[@@noalloc]
|
||||
|
||||
external hash_int64 :
|
||||
(int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_xxhash_int64_byte" "caml_cc_xxhash_int64"
|
||||
[@@noalloc]
|
||||
|
||||
external hash_int32 :
|
||||
(int32[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_xxhash_int32_byte" "caml_cc_xxhash_int32"
|
||||
[@@noalloc]
|
||||
|
||||
external hash_int : (int[@untagged]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_xxhash_int_byte" "caml_cc_xxhash_int"
|
||||
[@@noalloc]
|
||||
end
|
||||
|
||||
let[@inline] hash_string s = Raw.hash_string s 0L
|
||||
let[@inline] hash_string_seed s seed = Raw.hash_string s seed
|
||||
let[@inline] hash_int64 v = Raw.hash_int64 v 0L
|
||||
let[@inline] hash_int64_seed v seed = Raw.hash_int64 v seed
|
||||
let[@inline] hash_int32 v = Raw.hash_int32 v 0L
|
||||
let[@inline] hash_int32_seed v seed = Raw.hash_int32 v seed
|
||||
let[@inline] hash_int v = Raw.hash_int v 0L
|
||||
let[@inline] hash_int_seed v seed = Raw.hash_int v seed
|
||||
let[@inline] hash_bool b = Raw.hash_int (Bool.to_int b) 0L
|
||||
let[@inline] hash_bool_seed b seed = Raw.hash_int (Bool.to_int b) seed
|
||||
let[@inline] hash_char c = Raw.hash_int (Char.code c) 0L
|
||||
let[@inline] hash_char_seed c seed = Raw.hash_int (Char.code c) seed
|
||||
69
src/xxhash/containers_xxhash.mli
Normal file
69
src/xxhash/containers_xxhash.mli
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
(** XXHash bindings.
|
||||
|
||||
Fast non-cryptographic hash functions from
|
||||
{{:https://github.com/Cyan4973/xxHash} xxHash}.
|
||||
|
||||
All functions use XXH64 and are noalloc in native code.
|
||||
*)
|
||||
|
||||
(** Raw bindings with explicit seed argument. *)
|
||||
module Raw : sig
|
||||
external hash_string : string -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_xxhash_string_byte" "caml_cc_xxhash_string"
|
||||
[@@noalloc]
|
||||
(** [hash_string s seed] hashes [s] with [seed] using XXH64. *)
|
||||
|
||||
external hash_int64 :
|
||||
(int64[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_xxhash_int64_byte" "caml_cc_xxhash_int64"
|
||||
[@@noalloc]
|
||||
(** [hash_int64 v seed] hashes the 8-byte representation of [v] with [seed]. *)
|
||||
|
||||
external hash_int32 :
|
||||
(int32[@unboxed]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_xxhash_int32_byte" "caml_cc_xxhash_int32"
|
||||
[@@noalloc]
|
||||
(** [hash_int32 v seed] hashes the 4-byte representation of [v] with [seed]. *)
|
||||
|
||||
external hash_int : (int[@untagged]) -> (int64[@unboxed]) -> (int64[@unboxed])
|
||||
= "caml_cc_xxhash_int_byte" "caml_cc_xxhash_int"
|
||||
[@@noalloc]
|
||||
(** [hash_int v seed] hashes [v] as a 64-bit integer with [seed]. Noalloc
|
||||
and untagged in native code. *)
|
||||
end
|
||||
|
||||
val hash_string : string -> int64
|
||||
(** [hash_string s] hashes [s] using XXH64 with seed [0L]. *)
|
||||
|
||||
val hash_string_seed : string -> int64 -> int64
|
||||
(** [hash_string_seed s seed] hashes [s] with an explicit seed. *)
|
||||
|
||||
val hash_int64 : int64 -> int64
|
||||
(** [hash_int64 v] hashes the 8-byte representation of [v] with seed [0L]. *)
|
||||
|
||||
val hash_int64_seed : int64 -> int64 -> int64
|
||||
(** [hash_int64_seed v seed] hashes [v] with an explicit seed. *)
|
||||
|
||||
val hash_int32 : int32 -> int64
|
||||
(** [hash_int32 v] hashes the 4-byte representation of [v] with seed [0L]. *)
|
||||
|
||||
val hash_int32_seed : int32 -> int64 -> int64
|
||||
(** [hash_int32_seed v seed] hashes [v] with an explicit seed. *)
|
||||
|
||||
val hash_int : int -> int64
|
||||
(** [hash_int v] hashes [v] as a 64-bit integer with seed [0L]. *)
|
||||
|
||||
val hash_int_seed : int -> int64 -> int64
|
||||
(** [hash_int_seed v seed] hashes [v] with an explicit seed. *)
|
||||
|
||||
val hash_bool : bool -> int64
|
||||
(** [hash_bool b] hashes [b] as an integer (0 or 1) with seed [0L]. *)
|
||||
|
||||
val hash_bool_seed : bool -> int64 -> int64
|
||||
(** [hash_bool_seed b seed] hashes [b] with an explicit seed. *)
|
||||
|
||||
val hash_char : char -> int64
|
||||
(** [hash_char c] hashes [c] as its character code with seed [0L]. *)
|
||||
|
||||
val hash_char_seed : char -> int64 -> int64
|
||||
(** [hash_char_seed c seed] hashes [c] with an explicit seed. *)
|
||||
10
src/xxhash/dune
Normal file
10
src/xxhash/dune
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
(library
|
||||
(name containers_xxhash)
|
||||
(public_name containers.xxhash)
|
||||
(synopsis "xxHash bindings for containers")
|
||||
(libraries containers)
|
||||
(foreign_stubs
|
||||
(language c)
|
||||
(flags :standard -O2)
|
||||
(names stubs))
|
||||
(ocamlopt_flags :standard -inline 100))
|
||||
52
src/xxhash/stubs.c
Normal file
52
src/xxhash/stubs.c
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
#define XXH_NO_XXH3
|
||||
#define XXH_NO_STREAM
|
||||
#define XXH_INLINE_ALL
|
||||
#include "xxhash.h"
|
||||
|
||||
#include <caml/alloc.h>
|
||||
#include <caml/memory.h>
|
||||
#include <caml/mlvalues.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* hash_string: (value string, int64_t seed) -> int64_t */
|
||||
CAMLprim int64_t caml_cc_xxhash_string(value v_s, int64_t seed) {
|
||||
return (int64_t)XXH64(String_val(v_s), caml_string_length(v_s),
|
||||
(XXH64_hash_t)seed);
|
||||
}
|
||||
CAMLprim value caml_cc_xxhash_string_byte(value v_s, value v_seed) {
|
||||
CAMLparam2(v_s, v_seed);
|
||||
int64_t result =
|
||||
caml_cc_xxhash_string(v_s, (int64_t)Int64_val(v_seed));
|
||||
CAMLreturn(caml_copy_int64(result));
|
||||
}
|
||||
|
||||
/* hash_int64: (int64_t v, int64_t seed) -> int64_t */
|
||||
CAMLprim int64_t caml_cc_xxhash_int64(int64_t v, int64_t seed) {
|
||||
return (int64_t)XXH64(&v, sizeof(v), (XXH64_hash_t)seed);
|
||||
}
|
||||
CAMLprim value caml_cc_xxhash_int64_byte(value v_v, value v_seed) {
|
||||
CAMLparam2(v_v, v_seed);
|
||||
int64_t result = caml_cc_xxhash_int64(Int64_val(v_v), Int64_val(v_seed));
|
||||
CAMLreturn(caml_copy_int64(result));
|
||||
}
|
||||
|
||||
/* hash_int32: (int32_t v, int64_t seed) -> int64_t */
|
||||
CAMLprim int64_t caml_cc_xxhash_int32(int32_t v, int64_t seed) {
|
||||
return (int64_t)XXH64(&v, sizeof(v), (XXH64_hash_t)seed);
|
||||
}
|
||||
CAMLprim value caml_cc_xxhash_int32_byte(value v_v, value v_seed) {
|
||||
CAMLparam2(v_v, v_seed);
|
||||
int64_t result = caml_cc_xxhash_int32(Int32_val(v_v), Int64_val(v_seed));
|
||||
CAMLreturn(caml_copy_int64(result));
|
||||
}
|
||||
|
||||
/* hash_int: (intnat v, int64_t seed) -> int64_t */
|
||||
CAMLprim int64_t caml_cc_xxhash_int(intnat v, int64_t seed) {
|
||||
int64_t v64 = (int64_t)(uintnat)v; /* zero-extend on 32-bit platforms */
|
||||
return (int64_t)XXH64(&v64, sizeof(v64), (XXH64_hash_t)seed);
|
||||
}
|
||||
CAMLprim value caml_cc_xxhash_int_byte(value v_v, value v_seed) {
|
||||
CAMLparam2(v_v, v_seed);
|
||||
int64_t result = caml_cc_xxhash_int(Long_val(v_v), Int64_val(v_seed));
|
||||
CAMLreturn(caml_copy_int64(result));
|
||||
}
|
||||
42
src/xxhash/xxhash.c
Normal file
42
src/xxhash/xxhash.c
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* xxHash - Extremely Fast Hash algorithm
|
||||
* Copyright (C) 2012-2023 Yann Collet
|
||||
*
|
||||
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* You can contact the author at:
|
||||
* - xxHash homepage: https://www.xxhash.com
|
||||
* - xxHash source repository: https://github.com/Cyan4973/xxHash
|
||||
*/
|
||||
|
||||
/*
|
||||
* xxhash.c instantiates functions defined in xxhash.h
|
||||
*/
|
||||
|
||||
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
|
||||
#define XXH_IMPLEMENTATION /* access definitions */
|
||||
|
||||
#include "xxhash.h"
|
||||
7490
src/xxhash/xxhash.h
Normal file
7490
src/xxhash/xxhash.h
Normal file
File diff suppressed because it is too large
Load diff
11
tests/xxhash/dune
Normal file
11
tests/xxhash/dune
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
(executable
|
||||
(name t_xxhash)
|
||||
(modules t_xxhash)
|
||||
(libraries containers containers.xxhash containers_testlib qcheck-core))
|
||||
|
||||
(rule
|
||||
(alias runtest)
|
||||
(deps t_xxhash.exe)
|
||||
(package containers)
|
||||
(action
|
||||
(run ./t_xxhash.exe)))
|
||||
125
tests/xxhash/t_xxhash.ml
Normal file
125
tests/xxhash/t_xxhash.ml
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
include (val Containers_testlib.make ~__FILE__ ())
|
||||
module H = Containers_xxhash
|
||||
|
||||
(* Gold tests: hash_string *)
|
||||
;;
|
||||
|
||||
t @@ fun () ->
|
||||
assert_equal ~printer:Int64.to_string (-1205034819632174695L) (H.hash_string "");
|
||||
assert_equal ~printer:Int64.to_string (-7444071767201028348L)
|
||||
(H.hash_string_seed "" 42L);
|
||||
assert_equal ~printer:Int64.to_string 2794345569481354659L
|
||||
(H.hash_string "hello");
|
||||
assert_equal ~printer:Int64.to_string (-4367754540140381902L)
|
||||
(H.hash_string_seed "hello" 42L);
|
||||
assert_equal ~printer:Int64.to_string 1513236774081638803L
|
||||
(H.hash_string "the quick brown fox");
|
||||
assert_equal ~printer:Int64.to_string 6882318601984224800L
|
||||
(H.hash_string_seed "the quick brown fox" 42L);
|
||||
true
|
||||
;;
|
||||
|
||||
(* Gold tests: hash_int64 *)
|
||||
t @@ fun () ->
|
||||
assert_equal ~printer:Int64.to_string 3803688792395291579L (H.hash_int64 0L);
|
||||
assert_equal ~printer:Int64.to_string (-6977822845260490347L) (H.hash_int64 1L);
|
||||
assert_equal ~printer:Int64.to_string (-8804195676797548855L)
|
||||
(H.hash_int64 (-1L));
|
||||
assert_equal ~printer:Int64.to_string (-7296932117151183542L)
|
||||
(H.hash_int64 1234567890123456789L);
|
||||
true
|
||||
;;
|
||||
|
||||
(* Gold tests: hash_int32 *)
|
||||
t @@ fun () ->
|
||||
assert_equal ~printer:Int64.to_string 4246796580750024372L (H.hash_int32 0l);
|
||||
assert_equal ~printer:Int64.to_string (-851299076295404719L) (H.hash_int32 1l);
|
||||
assert_equal ~printer:Int64.to_string 9185342943168159635L (H.hash_int32 (-1l));
|
||||
assert_equal ~printer:Int64.to_string (-2929917330072466447L) (H.hash_int32 42l);
|
||||
true
|
||||
;;
|
||||
|
||||
(* Gold tests: hash_int *)
|
||||
t @@ fun () ->
|
||||
assert_equal ~printer:Int64.to_string 3803688792395291579L (H.hash_int 0);
|
||||
assert_equal ~printer:Int64.to_string (-6977822845260490347L) (H.hash_int 1);
|
||||
assert_equal ~printer:Int64.to_string (-8804195676797548855L) (H.hash_int (-1));
|
||||
assert_equal ~printer:Int64.to_string (-5379971487550586029L) (H.hash_int 42);
|
||||
true
|
||||
;;
|
||||
|
||||
(* Gold tests: hash_bool *)
|
||||
t @@ fun () ->
|
||||
assert_equal ~printer:Int64.to_string 3803688792395291579L (H.hash_bool false);
|
||||
assert_equal ~printer:Int64.to_string (-6977822845260490347L) (H.hash_bool true);
|
||||
true
|
||||
;;
|
||||
|
||||
(* Gold tests: hash_char *)
|
||||
t @@ fun () ->
|
||||
(* 'a' = 97, '0' = 48 *)
|
||||
assert_equal ~printer:Int64.to_string (H.hash_int 97) (H.hash_char 'a');
|
||||
assert_equal ~printer:Int64.to_string (H.hash_int 48) (H.hash_char '0');
|
||||
true
|
||||
;;
|
||||
|
||||
(* Property tests: determinism *)
|
||||
q ~count:10_000 Q.string @@ fun s ->
|
||||
Int64.equal (H.hash_string s) (H.hash_string s)
|
||||
;;
|
||||
|
||||
q ~count:10_000 Q.int64 @@ fun v ->
|
||||
Int64.equal (H.hash_int64 v) (H.hash_int64 v)
|
||||
;;
|
||||
|
||||
q ~count:10_000 Q.int @@ fun v -> Int64.equal (H.hash_int v) (H.hash_int v);;
|
||||
q ~count:10_000 Q.bool @@ fun b -> Int64.equal (H.hash_bool b) (H.hash_bool b);;
|
||||
q ~count:10_000 Q.char @@ fun c -> Int64.equal (H.hash_char c) (H.hash_char c);;
|
||||
|
||||
(* Different seeds give different results for the same input *)
|
||||
q ~count:10_000 (Q.pair Q.string Q.int64) @@ fun (s, seed) ->
|
||||
Q.assume (not (Int64.equal seed 0L));
|
||||
not (Int64.equal (H.hash_string s) (H.hash_string_seed s seed))
|
||||
;;
|
||||
|
||||
q ~count:10_000 (Q.pair Q.int64 Q.int64) @@ fun (v, seed) ->
|
||||
Q.assume (not (Int64.equal seed 0L));
|
||||
not (Int64.equal (H.hash_int64 v) (H.hash_int64_seed v seed))
|
||||
;;
|
||||
|
||||
q ~count:10_000 (Q.pair Q.int Q.int64) @@ fun (v, seed) ->
|
||||
Q.assume (not (Int64.equal seed 0L));
|
||||
not (Int64.equal (H.hash_int v) (H.hash_int_seed v seed))
|
||||
;;
|
||||
|
||||
(* Different inputs give different results for the same seed *)
|
||||
q ~count:10_000 (Q.pair Q.string Q.string) @@ fun (s1, s2) ->
|
||||
Q.assume (not (String.equal s1 s2));
|
||||
not (Int64.equal (H.hash_string s1) (H.hash_string s2))
|
||||
;;
|
||||
|
||||
q ~count:10_000 (Q.pair Q.int64 Q.int64) @@ fun (a, b) ->
|
||||
Q.assume (not (Int64.equal a b));
|
||||
not (Int64.equal (H.hash_int64 a) (H.hash_int64 b))
|
||||
;;
|
||||
|
||||
q ~count:10_000 (Q.pair Q.int Q.int) @@ fun (a, b) ->
|
||||
Q.assume (a <> b);
|
||||
not (Int64.equal (H.hash_int a) (H.hash_int b))
|
||||
;;
|
||||
|
||||
(* Stress test: hash 100k strings of varying lengths, non-empty => non-zero *)
|
||||
t @@ fun () ->
|
||||
for len = 0 to 99 do
|
||||
for _ = 1 to 1000 do
|
||||
let s = String.make len 'x' in
|
||||
let h = H.hash_string s in
|
||||
if len > 0 then
|
||||
if Int64.equal h 0L then
|
||||
failwith
|
||||
(Printf.sprintf "unexpected zero hash for string of len %d" len)
|
||||
done
|
||||
done;
|
||||
true
|
||||
|
||||
let () = Containers_testlib.run_all ~descr:"test xxhash" [ get () ]
|
||||
Loading…
Add table
Reference in a new issue