mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2026-03-07 21:27:55 -05:00
Compare commits
5 commits
2827011b37
...
73ea1a2e88
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
73ea1a2e88 | ||
|
|
bc9f361e56 | ||
|
|
42bfe9c8c6 | ||
|
|
ea5d9bbdf4 | ||
|
|
9ec34f8bf8 |
7 changed files with 63 additions and 62 deletions
|
|
@ -4,6 +4,7 @@
|
||||||
- fix leb128 slice bug
|
- fix leb128 slice bug
|
||||||
- fix leb128 `Int64.min_int` bug
|
- fix leb128 `Int64.min_int` bug
|
||||||
- add tests for leb128 library (#486)
|
- add tests for leb128 library (#486)
|
||||||
|
- fix size explosion in `t_pvec.ml` found in CI
|
||||||
- some breaking changes after the big bump to 4.08 as lower bound, thanks to @fardale for the cleanup
|
- some breaking changes after the big bump to 4.08 as lower bound, thanks to @fardale for the cleanup
|
||||||
* breaking: CCListLabel.compare and CCListLabel.equal takes the function on the elements as named arguments
|
* breaking: CCListLabel.compare and CCListLabel.equal takes the function on the elements as named arguments
|
||||||
* breaking: CCListLabel.init now takes the length as a named arguments to follow the Stdlib
|
* breaking: CCListLabel.init now takes the length as a named arguments to follow the Stdlib
|
||||||
|
|
|
||||||
|
|
@ -1535,7 +1535,7 @@ module Str = struct
|
||||||
let rand_str_ ?(among = "abcdefgh") n =
|
let rand_str_ ?(among = "abcdefgh") n =
|
||||||
let module Q = QCheck in
|
let module Q = QCheck in
|
||||||
let st = Random.State.make [| n + 17 |] in
|
let st = Random.State.make [| n + 17 |] in
|
||||||
let gen_c = QCheck.Gen.oneofl (CCString.to_list among) in
|
let gen_c = QCheck.Gen.oneof_list (CCString.to_list among) in
|
||||||
QCheck.Gen.string_size ~gen:gen_c (QCheck.Gen.return n) st
|
QCheck.Gen.string_size ~gen:gen_c (QCheck.Gen.return n) st
|
||||||
|
|
||||||
let find ?(start = 0) ~sub s =
|
let find ?(start = 0) ~sub s =
|
||||||
|
|
@ -1830,8 +1830,8 @@ module Hash = struct
|
||||||
let prime = 0x100000001b3L in
|
let prime = 0x100000001b3L in
|
||||||
let h = ref offset_basis in
|
let h = ref offset_basis in
|
||||||
for k = 0 to 7 do
|
for k = 0 to 7 do
|
||||||
(h := Int64.(mul !h prime));
|
(h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff))));
|
||||||
h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
|
h := Int64.(mul !h prime)
|
||||||
done;
|
done;
|
||||||
Int64.to_int !h land max_int
|
Int64.to_int !h land max_int
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,31 +7,33 @@ type 'a t = 'a -> hash
|
||||||
type 'a iter = ('a -> unit) -> unit
|
type 'a iter = ('a -> unit) -> unit
|
||||||
type 'a gen = unit -> 'a option
|
type 'a gen = unit -> 'a option
|
||||||
|
|
||||||
(* FNV-1 hashing (multiply then XOR)
|
(* same as CCInt: *)
|
||||||
|
open struct
|
||||||
|
external hash_int_ : (int[@untagged]) -> (int[@untagged])
|
||||||
|
= "caml_cc_hash_int_byte" "caml_cc_hash_int"
|
||||||
|
[@@noalloc]
|
||||||
|
|
||||||
|
external hash64_ : (int64[@unboxed]) -> (int[@untagged])
|
||||||
|
= "caml_cc_hash_int64_byte" "caml_cc_hash_int64"
|
||||||
|
[@@noalloc]
|
||||||
|
|
||||||
|
(* FNV-1a hashing (XOR then multiply )
|
||||||
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
|
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
|
||||||
*)
|
*)
|
||||||
let fnv_offset_basis = 0xcbf29ce484222325L
|
let fnv_offset_basis = 0xcbf29ce484222325L
|
||||||
let fnv_prime = 0x100000001b3L
|
let fnv_prime = 0x100000001b3L
|
||||||
|
end
|
||||||
(* hash an integer *)
|
|
||||||
let hash_int_ n =
|
|
||||||
let h = ref fnv_offset_basis in
|
|
||||||
for k = 0 to 7 do
|
|
||||||
(h := Int64.(mul !h fnv_prime));
|
|
||||||
h := Int64.(logxor !h (of_int ((n lsr (k * 8)) land 0xff)))
|
|
||||||
done;
|
|
||||||
(* truncate back to int and remove sign *)
|
|
||||||
Int64.to_int !h land max_int
|
|
||||||
|
|
||||||
|
(* TODO: also port to C *)
|
||||||
let combine2 a b =
|
let combine2 a b =
|
||||||
let h = ref fnv_offset_basis in
|
let h = ref fnv_offset_basis in
|
||||||
(* we only do one loop, where we mix bytes of [a] and [b], so as
|
(* we only do one loop, where we mix bytes of [a] and [b], so as
|
||||||
to simplify control flow *)
|
to simplify control flow *)
|
||||||
for k = 0 to 7 do
|
for k = 0 to 7 do
|
||||||
(h := Int64.(mul !h fnv_prime));
|
|
||||||
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
|
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
|
||||||
(h := Int64.(mul !h fnv_prime));
|
(h := Int64.(mul !h fnv_prime));
|
||||||
h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff)))
|
(h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
|
||||||
|
h := Int64.(mul !h fnv_prime)
|
||||||
done;
|
done;
|
||||||
Int64.to_int !h land max_int
|
Int64.to_int !h land max_int
|
||||||
|
|
||||||
|
|
@ -42,26 +44,26 @@ let combine3 a b c =
|
||||||
(* we only do one loop, where we mix bytes of [a] [b] and [c], so as
|
(* we only do one loop, where we mix bytes of [a] [b] and [c], so as
|
||||||
to simplify control flow *)
|
to simplify control flow *)
|
||||||
for k = 0 to 7 do
|
for k = 0 to 7 do
|
||||||
(h := Int64.(mul !h fnv_prime));
|
|
||||||
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
|
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
|
||||||
(h := Int64.(mul !h fnv_prime));
|
(h := Int64.(mul !h fnv_prime));
|
||||||
(h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
|
(h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
|
||||||
(h := Int64.(mul !h fnv_prime));
|
(h := Int64.(mul !h fnv_prime));
|
||||||
h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff)))
|
(h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff))));
|
||||||
|
h := Int64.(mul !h fnv_prime)
|
||||||
done;
|
done;
|
||||||
Int64.to_int !h land max_int
|
Int64.to_int !h land max_int
|
||||||
|
|
||||||
let combine4 a b c d =
|
let combine4 a b c d =
|
||||||
let h = ref fnv_offset_basis in
|
let h = ref fnv_offset_basis in
|
||||||
for k = 0 to 7 do
|
for k = 0 to 7 do
|
||||||
(h := Int64.(mul !h fnv_prime));
|
|
||||||
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
|
(h := Int64.(logxor !h (of_int ((a lsr (k * 8)) land 0xff))));
|
||||||
(h := Int64.(mul !h fnv_prime));
|
(h := Int64.(mul !h fnv_prime));
|
||||||
(h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
|
(h := Int64.(logxor !h (of_int ((b lsr (k * 8)) land 0xff))));
|
||||||
(h := Int64.(mul !h fnv_prime));
|
(h := Int64.(mul !h fnv_prime));
|
||||||
(h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff))));
|
(h := Int64.(logxor !h (of_int ((c lsr (k * 8)) land 0xff))));
|
||||||
(h := Int64.(mul !h fnv_prime));
|
(h := Int64.(mul !h fnv_prime));
|
||||||
h := Int64.(logxor !h (of_int ((d lsr (k * 8)) land 0xff)))
|
(h := Int64.(logxor !h (of_int ((d lsr (k * 8)) land 0xff))));
|
||||||
|
h := Int64.(mul !h fnv_prime)
|
||||||
done;
|
done;
|
||||||
Int64.to_int !h land max_int
|
Int64.to_int !h land max_int
|
||||||
|
|
||||||
|
|
@ -72,29 +74,19 @@ let combine6 a b c d e f = combine4 a b c (combine3 d e f)
|
||||||
|
|
||||||
let const h _ = h
|
let const h _ = h
|
||||||
let const0 _ = 0
|
let const0 _ = 0
|
||||||
let int = hash_int_
|
let[@inline] int i = hash_int_ i
|
||||||
|
|
||||||
let bool b =
|
let[@inline] bool b =
|
||||||
hash_int_
|
hash_int_
|
||||||
(if b then
|
(if b then
|
||||||
1
|
1
|
||||||
else
|
else
|
||||||
2)
|
2)
|
||||||
|
|
||||||
let char x = hash_int_ (Char.code x)
|
let[@inline] char x = hash_int_ (Char.code x)
|
||||||
|
let int64 = hash64_
|
||||||
(* hash an integer *)
|
let[@inline] int32 (x : int32) = int64 (Int64.of_int32 x)
|
||||||
let int64 n : int =
|
let[@inline] nativeint (x : nativeint) = int64 (Int64.of_nativeint x)
|
||||||
let h = ref fnv_offset_basis in
|
|
||||||
for k = 0 to 7 do
|
|
||||||
(h := Int64.(mul !h fnv_prime));
|
|
||||||
h := Int64.(logxor !h (logand (shift_right_logical n (k * 8)) 0xffL))
|
|
||||||
done;
|
|
||||||
(* truncate back to int and remove sign *)
|
|
||||||
Int64.to_int !h land max_int
|
|
||||||
|
|
||||||
let int32 (x : int32) = int64 (Int64.of_int32 x)
|
|
||||||
let nativeint (x : nativeint) = int64 (Int64.of_nativeint x)
|
|
||||||
|
|
||||||
(* do not hash more than 128 bytes in strings/bytes *)
|
(* do not hash more than 128 bytes in strings/bytes *)
|
||||||
let max_len_b_ = 128
|
let max_len_b_ = 128
|
||||||
|
|
@ -102,9 +94,9 @@ let max_len_b_ = 128
|
||||||
let bytes (x : bytes) =
|
let bytes (x : bytes) =
|
||||||
let h = ref fnv_offset_basis in
|
let h = ref fnv_offset_basis in
|
||||||
for i = 0 to min max_len_b_ (Bytes.length x - 1) do
|
for i = 0 to min max_len_b_ (Bytes.length x - 1) do
|
||||||
(h := Int64.(mul !h fnv_prime));
|
|
||||||
let byte = Char.code (Bytes.unsafe_get x i) in
|
let byte = Char.code (Bytes.unsafe_get x i) in
|
||||||
h := Int64.(logxor !h (of_int byte))
|
(h := Int64.(logxor !h (of_int byte)));
|
||||||
|
h := Int64.(mul !h fnv_prime)
|
||||||
done;
|
done;
|
||||||
Int64.to_int !h land max_int
|
Int64.to_int !h land max_int
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,8 +11,6 @@ let max : t -> t -> t = Stdlib.max
|
||||||
|
|
||||||
let sign i = compare i zero
|
let sign i = compare i zero
|
||||||
|
|
||||||
(* use FNV-1:
|
|
||||||
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function *)
|
|
||||||
external hash_to_int64 : (int64[@unboxed]) -> (int64[@unboxed])
|
external hash_to_int64 : (int64[@unboxed]) -> (int64[@unboxed])
|
||||||
= "caml_cc_hash_int64_to_int64_byte" "caml_cc_hash_int64_to_int64"
|
= "caml_cc_hash_int64_to_int64_byte" "caml_cc_hash_int64_to_int64"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,29 +2,20 @@
|
||||||
#include <caml/mlvalues.h>
|
#include <caml/mlvalues.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
/* FNV-1 hash for a 64-bit integer.
|
/* FNV-1a hash for a 64-bit integer.
|
||||||
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
|
https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function */
|
||||||
|
|
||||||
FNV-1 order: multiply then XOR (as opposed to FNV-1a which XORs first).
|
|
||||||
Uses the standard 64-bit FNV parameters:
|
|
||||||
offset_basis = 0xcbf29ce484222325
|
|
||||||
prime = 0x00000100000001b3
|
|
||||||
|
|
||||||
Core routine: operates on all 8 bytes of an int64_t. */
|
|
||||||
|
|
||||||
static inline int64_t cc_fnv_hash_int64(int64_t n) {
|
static inline int64_t cc_fnv_hash_int64(int64_t n) {
|
||||||
uint64_t un = (uint64_t)n;
|
uint64_t un = (uint64_t)n;
|
||||||
uint64_t h = UINT64_C(0xcbf29ce484222325);
|
uint64_t h = UINT64_C(0xcbf29ce484222325);
|
||||||
const uint64_t prime = UINT64_C(0x100000001b3);
|
const uint64_t prime = UINT64_C(0x100000001b3);
|
||||||
for (int k = 0; k < 8; k++) {
|
for (int k = 0; k < 8; k++) {
|
||||||
h *= prime;
|
|
||||||
h ^= (un >> (k * 8)) & 0xff;
|
h ^= (un >> (k * 8)) & 0xff;
|
||||||
|
h *= prime;
|
||||||
}
|
}
|
||||||
return (int64_t)h;
|
return (int64_t)h;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* --- CCInt.hash entry points (int -> int) --- */
|
|
||||||
|
|
||||||
/* Mask to the OCaml int range (63 bits on 64-bit, 31 on 32-bit)
|
/* Mask to the OCaml int range (63 bits on 64-bit, 31 on 32-bit)
|
||||||
before hashing, so negative OCaml ints hash the same as
|
before hashing, so negative OCaml ints hash the same as
|
||||||
the unsigned representation seen by OCaml's [lsr]. */
|
the unsigned representation seen by OCaml's [lsr]. */
|
||||||
|
|
@ -41,8 +32,6 @@ CAMLprim value caml_cc_hash_int_byte(value v_n) {
|
||||||
return Val_long(caml_cc_hash_int(Long_val(v_n)));
|
return Val_long(caml_cc_hash_int(Long_val(v_n)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* --- int64 hash entry points (int64 -> int) --- */
|
|
||||||
|
|
||||||
/* native: unboxed int64 in, untagged int out */
|
/* native: unboxed int64 in, untagged int out */
|
||||||
CAMLprim intnat caml_cc_hash_int64(int64_t n) {
|
CAMLprim intnat caml_cc_hash_int64(int64_t n) {
|
||||||
return (intnat)((uint64_t)cc_fnv_hash_int64(n) & Max_long);
|
return (intnat)((uint64_t)cc_fnv_hash_int64(n) & Max_long);
|
||||||
|
|
@ -53,8 +42,6 @@ CAMLprim value caml_cc_hash_int64_byte(value v_n) {
|
||||||
return Val_long(caml_cc_hash_int64(Int64_val(v_n)));
|
return Val_long(caml_cc_hash_int64(Int64_val(v_n)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* --- int64 -> int64 entry points (for hash_to_int64) --- */
|
|
||||||
|
|
||||||
/* native: unboxed int64 in, unboxed int64 out.
|
/* native: unboxed int64 in, unboxed int64 out.
|
||||||
Masks to non-negative int64 (matches OCaml's Int64.max_int). */
|
Masks to non-negative int64 (matches OCaml's Int64.max_int). */
|
||||||
CAMLprim int64_t caml_cc_hash_int64_to_int64(int64_t n) {
|
CAMLprim int64_t caml_cc_hash_int64_to_int64(int64_t n) {
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,19 @@
|
||||||
open CCHash
|
open CCHash
|
||||||
module T = (val Containers_testlib.make ~__FILE__ ())
|
module T = (val Containers_testlib.make ~__FILE__ ())
|
||||||
include T;;
|
include T
|
||||||
|
|
||||||
|
open struct
|
||||||
|
let hash_ocaml64 (n : int64) : int =
|
||||||
|
let offset_basis = 0xcbf29ce484222325L in
|
||||||
|
let prime = 0x100000001b3L in
|
||||||
|
let h = ref offset_basis in
|
||||||
|
for k = 0 to 7 do
|
||||||
|
(h := Int64.(logxor !h (Int64.logand (Int64.shift_left n (k * 8)) 0xffL)));
|
||||||
|
h := Int64.(mul !h prime)
|
||||||
|
done;
|
||||||
|
Int64.to_int !h land max_int
|
||||||
|
end
|
||||||
|
;;
|
||||||
|
|
||||||
t @@ fun () -> int 42 >= 0;;
|
t @@ fun () -> int 42 >= 0;;
|
||||||
t @@ fun () -> int max_int >= 0;;
|
t @@ fun () -> int max_int >= 0;;
|
||||||
|
|
@ -17,3 +30,7 @@ t @@ fun () -> string "abc" <> string "abcd";;
|
||||||
q Q.int (fun i ->
|
q Q.int (fun i ->
|
||||||
Q.assume (i >= 0);
|
Q.assume (i >= 0);
|
||||||
int i = int64 (Int64.of_int i))
|
int i = int64 (Int64.of_int i))
|
||||||
|
;;
|
||||||
|
|
||||||
|
q Q.int64
|
||||||
|
Q.(fun i -> Int64.compare i 0L >= 0 ==> (CCInt64.hash i = hash_ocaml64 i))
|
||||||
|
|
|
||||||
|
|
@ -265,10 +265,16 @@ module Op = struct
|
||||||
( 1,
|
( 1,
|
||||||
list_small gen_x >|= fun l ->
|
list_small gen_x >|= fun l ->
|
||||||
Append l, size + List.length l );
|
Append l, size + List.length l );
|
||||||
( 1,
|
|
||||||
list_size (0 -- 5) gen_x >|= fun l ->
|
|
||||||
Flat_map l, size * (1 + List.length l) );
|
|
||||||
];
|
];
|
||||||
|
(if size < 10_000 then
|
||||||
|
[
|
||||||
|
(* flat map can explode, only do it if list isn't too big *)
|
||||||
|
( 1,
|
||||||
|
list_size (0 -- 5) gen_x >|= fun l ->
|
||||||
|
Flat_map l, size * (1 + List.length l) );
|
||||||
|
]
|
||||||
|
else
|
||||||
|
[]);
|
||||||
]
|
]
|
||||||
in
|
in
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue