mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-05 19:00:31 -05:00
Compare commits
8 commits
e5f038d632
...
330e4a0c9f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
330e4a0c9f | ||
|
|
99dba20fa6 | ||
|
|
f934db1e9c | ||
|
|
14ad8c1f2a | ||
|
|
0ff9614520 | ||
|
|
fd1495324a | ||
|
|
765a8da876 | ||
|
|
18ffdd707b |
13 changed files with 270 additions and 7 deletions
15
CHANGELOG.md
15
CHANGELOG.md
|
|
@ -1,6 +1,17 @@
|
|||
# Changelog
|
||||
|
||||
## main
|
||||
## 3.16
|
||||
|
||||
|
||||
- breaking: Renamed predicate parameter of `take_while`, `rtake_while` from `p` to `f`, aligining it with pre-existing `drop_while`.
|
||||
|
||||
- feat: add `containers.leb128` library
|
||||
- feat: add `CCFun.with_return`
|
||||
- Added functions to the `Char` module to check common character properties.
|
||||
- feat: add `CCVector.findi`
|
||||
|
||||
|
||||
- fix: compat with OCaml 5.4
|
||||
- fix: oob(!!) in CCHash.bytes
|
||||
|
||||
## 3.15
|
||||
|
||||
|
|
|
|||
|
|
@ -1153,12 +1153,14 @@ module Iter_ = struct
|
|||
let bench_to_array n =
|
||||
let iter () = Iter.to_array Iter.(1 -- n)
|
||||
and gen () = Gen.to_array Gen.(1 -- n)
|
||||
and oseq () = OSeq.to_array OSeq.(1 -- n) in
|
||||
and oseq () = OSeq.to_array OSeq.(1 -- n)
|
||||
and of_iter () = CCArray.of_iter Iter.(1 -- n) in
|
||||
B.throughputN 3 ~repeat
|
||||
[
|
||||
"iter.to_array", iter, ();
|
||||
"gen.to_array", gen, ();
|
||||
"oseq.to_array", oseq, ();
|
||||
"ccarray.of_iter", of_iter, ();
|
||||
]
|
||||
|
||||
let bench_cons n =
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ let sort_indices cmp a =
|
|||
Array.sort (fun k1 k2 -> cmp a.(k1) a.(k2)) b;
|
||||
b
|
||||
|
||||
let sort_ranking cmp a = sort_indices compare (sort_indices cmp a)
|
||||
let sort_ranking cmp a = sort_indices CCInt.compare (sort_indices cmp a)
|
||||
|
||||
let rev a =
|
||||
let b = Array.copy a in
|
||||
|
|
@ -466,6 +466,12 @@ let to_seq a =
|
|||
|
||||
let to_iter a k = iter k a
|
||||
|
||||
let of_iter (i : 'a iter) : 'a array =
|
||||
let open CCVector in
|
||||
let vec = create () in
|
||||
i (push vec);
|
||||
to_array vec
|
||||
|
||||
let to_gen a =
|
||||
let k = ref 0 in
|
||||
fun () ->
|
||||
|
|
|
|||
|
|
@ -240,6 +240,11 @@ val to_iter : 'a t -> 'a iter
|
|||
in modification of the iterator.
|
||||
@since 2.8 *)
|
||||
|
||||
val of_iter : 'a iter -> 'a t
|
||||
(** [of_iter iter] builds a array from a given [iter].
|
||||
In the result, elements appear in the same order as they did in the source [iter].
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val to_seq : 'a t -> 'a Seq.t
|
||||
(** [to_seq a] returns a [Seq.t] of the elements of an array [a].
|
||||
The input array [a] is shared with the sequence and modification of it will result
|
||||
|
|
|
|||
|
|
@ -248,6 +248,11 @@ val to_iter : 'a t -> 'a iter
|
|||
in modification of the iterator.
|
||||
@since 2.8 *)
|
||||
|
||||
val of_iter : 'a iter -> 'a t
|
||||
(** [of_iter iter] builds a array from a given [iter].
|
||||
In the result, elements appear in the same order as they did in the source [iter].
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val to_seq : 'a t -> 'a Seq.t
|
||||
(** [to_seq a] returns a [Seq.t] of the elements of an array [a].
|
||||
The input array [a] is shared with the sequence and modification of it will result
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ let find_array arr x =
|
|||
-1
|
||||
]}
|
||||
|
||||
@since NEXT_RELEASE *)
|
||||
@since 3.15 *)
|
||||
|
||||
(** {2 Infix}
|
||||
|
||||
|
|
|
|||
|
|
@ -222,7 +222,7 @@ val find : ('a -> bool) -> ('a, _) t -> 'a option
|
|||
|
||||
val findi : ('a -> bool) -> ('a, _) t -> (int * 'a) option
|
||||
(** Find an element and its index that satisfies the predicate.
|
||||
@since NEXT_RELEASE *)
|
||||
@since 3.15 *)
|
||||
|
||||
val find_exn : ('a -> bool) -> ('a, _) t -> 'a
|
||||
(** Find an element that satisfies the predicate, or
|
||||
|
|
|
|||
98
src/leb128/containers_leb128.ml
Normal file
98
src/leb128/containers_leb128.ml
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
(* adapted from ocaml-protoc from code by c-cube *)
|
||||
|
||||
module Byte_slice = CCByte_slice
|
||||
module Byte_buffer = CCByte_buffer
|
||||
|
||||
module Decode = struct
|
||||
let skip (sl : Byte_slice.t) off : int =
|
||||
let shift = ref 0 in
|
||||
let continue = ref true in
|
||||
|
||||
let off = ref off in
|
||||
let n_consumed = ref 0 in
|
||||
|
||||
while !continue do
|
||||
if sl.len <= 0 then invalid_arg "out of bound";
|
||||
incr n_consumed;
|
||||
let b = Char.code (Bytes.get sl.bs !off) in
|
||||
let cur = b land 0x7f in
|
||||
if cur <> b then (
|
||||
(* at least one byte follows this one *)
|
||||
incr off;
|
||||
shift := !shift + 7
|
||||
) else if !shift < 63 || b land 0x7f <= 1 then
|
||||
continue := false
|
||||
else
|
||||
invalid_arg "leb128 varint is too long"
|
||||
done;
|
||||
|
||||
!n_consumed
|
||||
|
||||
let u64 (sl : Byte_slice.t) (off : int) : int64 * int =
|
||||
let shift = ref 0 in
|
||||
let res = ref 0L in
|
||||
let continue = ref true in
|
||||
|
||||
let off = ref off in
|
||||
let n_consumed = ref 0 in
|
||||
|
||||
while !continue do
|
||||
if sl.len <= 0 then invalid_arg "out of bound";
|
||||
incr n_consumed;
|
||||
let b = Char.code (Bytes.get sl.bs !off) in
|
||||
let cur = b land 0x7f in
|
||||
if cur <> b then (
|
||||
(* at least one byte follows this one *)
|
||||
(res := Int64.(logor !res (shift_left (of_int cur) !shift)));
|
||||
incr off;
|
||||
shift := !shift + 7
|
||||
) else if !shift < 63 || b land 0x7f <= 1 then (
|
||||
(res := Int64.(logor !res (shift_left (of_int b) !shift)));
|
||||
continue := false
|
||||
) else
|
||||
invalid_arg "leb128 varint is too long"
|
||||
done;
|
||||
|
||||
!res, !n_consumed
|
||||
|
||||
let[@inline] uint_truncate sl off =
|
||||
let v, n_consumed = u64 sl off in
|
||||
Int64.to_int v, n_consumed
|
||||
|
||||
let[@inline] decode_zigzag (v : int64) : int64 =
|
||||
Int64.(logxor (shift_right v 1) (neg (logand v Int64.one)))
|
||||
|
||||
let[@inline] i64 sl off : int64 * int =
|
||||
let v, n_consumed = u64 sl off in
|
||||
decode_zigzag v, n_consumed
|
||||
|
||||
let[@inline] int_truncate sl off =
|
||||
let v, n_consumed = u64 sl off in
|
||||
Int64.to_int (decode_zigzag v), n_consumed
|
||||
end
|
||||
|
||||
module Encode = struct
|
||||
let[@inline] encode_zigzag (i : int64) : int64 =
|
||||
Int64.(logxor (shift_left i 1) (shift_right i 63))
|
||||
|
||||
external varint_size : (int64[@unboxed]) -> int
|
||||
= "caml_cc_leb128_varint_size_byte" "caml_cc_leb128_varint_size"
|
||||
[@@noalloc]
|
||||
(** Compute how many bytes this int would occupy as varint *)
|
||||
|
||||
external varint_slice : bytes -> (int[@untagged]) -> (int64[@unboxed]) -> unit
|
||||
= "caml_cc_leb128_varint_byte" "caml_cc_leb128_varint"
|
||||
[@@noalloc]
|
||||
(** Write this int as varint into the given slice *)
|
||||
|
||||
let[@inline] u64 (buf : Byte_buffer.t) (i : int64) =
|
||||
let n = varint_size i in
|
||||
Byte_buffer.ensure_free buf n;
|
||||
assert (buf.len + n <= Bytes.length buf.bs);
|
||||
varint_slice buf.bs buf.len i;
|
||||
buf.len <- buf.len + n
|
||||
|
||||
let[@inline] i64 buf i : unit = u64 buf (encode_zigzag i)
|
||||
let[@inline] uint buf i : unit = u64 buf (Int64.of_int i)
|
||||
let[@inline] int buf i : unit = u64 buf (encode_zigzag (Int64.of_int i))
|
||||
end
|
||||
49
src/leb128/containers_leb128.mli
Normal file
49
src/leb128/containers_leb128.mli
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
(** LEB128 encoding and decoding.
|
||||
|
||||
See https://en.wikipedia.org/wiki/LEB128 . *)
|
||||
|
||||
module Byte_slice = CCByte_slice
|
||||
module Byte_buffer = CCByte_buffer
|
||||
|
||||
module Decode : sig
|
||||
val decode_zigzag : int64 -> int64
|
||||
(** Turn an unsigned integer into a signed one.
|
||||
|
||||
See https://en.wikipedia.org/wiki/Variable-length_quantity#Zigzag_encoding
|
||||
*)
|
||||
|
||||
val skip : Byte_slice.t -> int -> int
|
||||
(** [skip slice off] reads an integer at offset [off], and returns how many
|
||||
bytes the integer occupies. *)
|
||||
|
||||
val u64 : Byte_slice.t -> int -> int64 * int
|
||||
(** [u64 slice off] reads an integer at offset [off], and returns a pair
|
||||
[v, n_consumed]. [v] is the read integer, [n_consumed] is the number of
|
||||
bytes consumed during reading. *)
|
||||
|
||||
val i64 : Byte_slice.t -> int -> int64 * int
|
||||
(** Read a signed int64 by reading a u64 and zigzag decoding it *)
|
||||
|
||||
val int_truncate : Byte_slice.t -> int -> int * int
|
||||
(** Like {!i64} but truncates to integer. Returns a pair [v, n_consumed]. *)
|
||||
|
||||
val uint_truncate : Byte_slice.t -> int -> int * int
|
||||
(** Like {!u64} but truncates to integer. *)
|
||||
end
|
||||
|
||||
module Encode : sig
|
||||
val encode_zigzag : int64 -> int64
|
||||
(** Turn a signed int64 into a u64 via zigzag encoding. *)
|
||||
|
||||
val u64 : Byte_buffer.t -> int64 -> unit
|
||||
(** Write a unsigned int *)
|
||||
|
||||
val i64 : Byte_buffer.t -> int64 -> unit
|
||||
(** Write a signed int via zigzag encoding *)
|
||||
|
||||
val uint : Byte_buffer.t -> int -> unit
|
||||
(** Turn an uint into a u64 and write it *)
|
||||
|
||||
val int : Byte_buffer.t -> int -> unit
|
||||
(** Turn an int into a int64 and write it *)
|
||||
end
|
||||
11
src/leb128/dune
Normal file
11
src/leb128/dune
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
(library
|
||||
(name containers_leb128)
|
||||
(public_name containers.leb128)
|
||||
(synopsis
|
||||
"LEB128 encoding (https://en.wikipedia.org/wiki/LEB128) for cephalopod")
|
||||
(libraries containers)
|
||||
(foreign_stubs
|
||||
(language c)
|
||||
(flags :standard -std=c99 -O2)
|
||||
(names stubs))
|
||||
(ocamlopt_flags :standard -inline 100))
|
||||
73
src/leb128/stubs.c
Normal file
73
src/leb128/stubs.c
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
|
||||
// readapted from ocaml-protoc, original code also from c-cube
|
||||
|
||||
#include <caml/alloc.h>
|
||||
#include <caml/memory.h>
|
||||
#include <caml/mlvalues.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static inline int ix_leb128_varint_size(uint64_t i) {
|
||||
/* generated with:
|
||||
for i in range(1,10):
|
||||
ceiling = (1 << (i*7))-1
|
||||
print(f'if (i <= {ceiling}L) return {i};')
|
||||
*/
|
||||
|
||||
if (i <= 127L) return 1;
|
||||
if (i <= 16383L) return 2;
|
||||
if (i <= 2097151L) return 3;
|
||||
if (i <= 268435455L) return 4;
|
||||
if (i <= 34359738367L) return 5;
|
||||
if (i <= 4398046511103L) return 6;
|
||||
if (i <= 562949953421311L) return 7;
|
||||
if (i <= 72057594037927935L) return 8;
|
||||
if (i <= 9223372036854775807L) return 9;
|
||||
return 10;
|
||||
}
|
||||
|
||||
// number of bytes for i
|
||||
CAMLprim value caml_cc_leb128_varint_size(int64_t i) {
|
||||
int res = ix_leb128_varint_size(i);
|
||||
return Val_int(res);
|
||||
}
|
||||
|
||||
// boxed version, for bytecode
|
||||
CAMLprim value caml_cc_leb128_varint_size_byte(value v_i) {
|
||||
CAMLparam1(v_i);
|
||||
|
||||
int64_t i = Int64_val(v_i);
|
||||
int res = ix_leb128_varint_size(i);
|
||||
CAMLreturn(Val_int(res));
|
||||
}
|
||||
|
||||
// write i at str[idx…] in varint
|
||||
static inline void ix_leb128_varint(unsigned char *str, uint64_t i) {
|
||||
while (true) {
|
||||
uint64_t cur = i & 0x7f;
|
||||
if (cur == i) {
|
||||
*str = (unsigned char)cur;
|
||||
break;
|
||||
} else {
|
||||
*str = (unsigned char)(cur | 0x80);
|
||||
i = i >> 7;
|
||||
++str;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// write `i` starting at `idx`
|
||||
CAMLprim value caml_cc_leb128_varint(value _str, intnat idx, int64_t i) {
|
||||
char *str = Bytes_val(_str);
|
||||
ix_leb128_varint(str + idx, i);
|
||||
return Val_unit;
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_leb128_varint_byte(value _str, value _idx, value _i) {
|
||||
CAMLparam3(_str, _idx, _i);
|
||||
char *str = Bytes_val(_str);
|
||||
int idx = Int_val(_idx);
|
||||
int64_t i = Int64_val(_i);
|
||||
ix_leb128_varint(str + idx, i);
|
||||
CAMLreturn(Val_unit);
|
||||
}
|
||||
|
|
@ -309,3 +309,6 @@ q ~count:300 arr_arbitrary (fun a ->
|
|||
Array.sort CCInt.compare a1;
|
||||
sort_generic (module IA) ~cmp:CCInt.compare a2;
|
||||
a1 = a2)
|
||||
;;
|
||||
|
||||
q Q.(array int) (fun a -> of_iter (to_iter a) = a)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue