mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 03:05:28 -05:00
Compare commits
1 commit
330e4a0c9f
...
e5f038d632
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e5f038d632 |
9 changed files with 6 additions and 248 deletions
15
CHANGELOG.md
15
CHANGELOG.md
|
|
@ -1,17 +1,6 @@
|
|||
# Changelog
|
||||
|
||||
## 3.16
|
||||
|
||||
|
||||
- breaking: Renamed predicate parameter of `take_while`, `rtake_while` from `p` to `f`, aligining it with pre-existing `drop_while`.
|
||||
|
||||
- feat: add `containers.leb128` library
|
||||
- feat: add `CCFun.with_return`
|
||||
- Added functions to the `Char` module to check common character properties.
|
||||
- feat: add `CCVector.findi`
|
||||
|
||||
|
||||
- fix: compat with OCaml 5.4
|
||||
- fix: oob(!!) in CCHash.bytes
|
||||
## main
|
||||
|
||||
## 3.15
|
||||
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ let sort_indices cmp a =
|
|||
Array.sort (fun k1 k2 -> cmp a.(k1) a.(k2)) b;
|
||||
b
|
||||
|
||||
let sort_ranking cmp a = sort_indices CCInt.compare (sort_indices cmp a)
|
||||
let sort_ranking cmp a = sort_indices compare (sort_indices cmp a)
|
||||
|
||||
let rev a =
|
||||
let b = Array.copy a in
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ let find_array arr x =
|
|||
-1
|
||||
]}
|
||||
|
||||
@since 3.15 *)
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
(** {2 Infix}
|
||||
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ let max_len_b_ = 128
|
|||
|
||||
let bytes (x : bytes) =
|
||||
let h = ref fnv_offset_basis in
|
||||
for i = 0 to min max_len_b_ (Bytes.length x - 1) do
|
||||
for i = 0 to min max_len_b_ (Bytes.length x-1) do
|
||||
(h := Int64.(mul !h fnv_prime));
|
||||
let byte = Char.code (Bytes.unsafe_get x i) in
|
||||
h := Int64.(logxor !h (of_int byte))
|
||||
|
|
|
|||
|
|
@ -222,7 +222,7 @@ val find : ('a -> bool) -> ('a, _) t -> 'a option
|
|||
|
||||
val findi : ('a -> bool) -> ('a, _) t -> (int * 'a) option
|
||||
(** Find an element and its index that satisfies the predicate.
|
||||
@since 3.15 *)
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val find_exn : ('a -> bool) -> ('a, _) t -> 'a
|
||||
(** Find an element that satisfies the predicate, or
|
||||
|
|
|
|||
|
|
@ -1,98 +0,0 @@
|
|||
(* adapted from ocaml-protoc from code by c-cube *)
|
||||
|
||||
module Byte_slice = CCByte_slice
|
||||
module Byte_buffer = CCByte_buffer
|
||||
|
||||
module Decode = struct
|
||||
let skip (sl : Byte_slice.t) off : int =
|
||||
let shift = ref 0 in
|
||||
let continue = ref true in
|
||||
|
||||
let off = ref off in
|
||||
let n_consumed = ref 0 in
|
||||
|
||||
while !continue do
|
||||
if sl.len <= 0 then invalid_arg "out of bound";
|
||||
incr n_consumed;
|
||||
let b = Char.code (Bytes.get sl.bs !off) in
|
||||
let cur = b land 0x7f in
|
||||
if cur <> b then (
|
||||
(* at least one byte follows this one *)
|
||||
incr off;
|
||||
shift := !shift + 7
|
||||
) else if !shift < 63 || b land 0x7f <= 1 then
|
||||
continue := false
|
||||
else
|
||||
invalid_arg "leb128 varint is too long"
|
||||
done;
|
||||
|
||||
!n_consumed
|
||||
|
||||
let u64 (sl : Byte_slice.t) (off : int) : int64 * int =
|
||||
let shift = ref 0 in
|
||||
let res = ref 0L in
|
||||
let continue = ref true in
|
||||
|
||||
let off = ref off in
|
||||
let n_consumed = ref 0 in
|
||||
|
||||
while !continue do
|
||||
if sl.len <= 0 then invalid_arg "out of bound";
|
||||
incr n_consumed;
|
||||
let b = Char.code (Bytes.get sl.bs !off) in
|
||||
let cur = b land 0x7f in
|
||||
if cur <> b then (
|
||||
(* at least one byte follows this one *)
|
||||
(res := Int64.(logor !res (shift_left (of_int cur) !shift)));
|
||||
incr off;
|
||||
shift := !shift + 7
|
||||
) else if !shift < 63 || b land 0x7f <= 1 then (
|
||||
(res := Int64.(logor !res (shift_left (of_int b) !shift)));
|
||||
continue := false
|
||||
) else
|
||||
invalid_arg "leb128 varint is too long"
|
||||
done;
|
||||
|
||||
!res, !n_consumed
|
||||
|
||||
let[@inline] uint_truncate sl off =
|
||||
let v, n_consumed = u64 sl off in
|
||||
Int64.to_int v, n_consumed
|
||||
|
||||
let[@inline] decode_zigzag (v : int64) : int64 =
|
||||
Int64.(logxor (shift_right v 1) (neg (logand v Int64.one)))
|
||||
|
||||
let[@inline] i64 sl off : int64 * int =
|
||||
let v, n_consumed = u64 sl off in
|
||||
decode_zigzag v, n_consumed
|
||||
|
||||
let[@inline] int_truncate sl off =
|
||||
let v, n_consumed = u64 sl off in
|
||||
Int64.to_int (decode_zigzag v), n_consumed
|
||||
end
|
||||
|
||||
module Encode = struct
|
||||
let[@inline] encode_zigzag (i : int64) : int64 =
|
||||
Int64.(logxor (shift_left i 1) (shift_right i 63))
|
||||
|
||||
external varint_size : (int64[@unboxed]) -> int
|
||||
= "caml_cc_leb128_varint_size_byte" "caml_cc_leb128_varint_size"
|
||||
[@@noalloc]
|
||||
(** Compute how many bytes this int would occupy as varint *)
|
||||
|
||||
external varint_slice : bytes -> (int[@untagged]) -> (int64[@unboxed]) -> unit
|
||||
= "caml_cc_leb128_varint_byte" "caml_cc_leb128_varint"
|
||||
[@@noalloc]
|
||||
(** Write this int as varint into the given slice *)
|
||||
|
||||
let[@inline] u64 (buf : Byte_buffer.t) (i : int64) =
|
||||
let n = varint_size i in
|
||||
Byte_buffer.ensure_free buf n;
|
||||
assert (buf.len + n <= Bytes.length buf.bs);
|
||||
varint_slice buf.bs buf.len i;
|
||||
buf.len <- buf.len + n
|
||||
|
||||
let[@inline] i64 buf i : unit = u64 buf (encode_zigzag i)
|
||||
let[@inline] uint buf i : unit = u64 buf (Int64.of_int i)
|
||||
let[@inline] int buf i : unit = u64 buf (encode_zigzag (Int64.of_int i))
|
||||
end
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
(** LEB128 encoding and decoding.
|
||||
|
||||
See https://en.wikipedia.org/wiki/LEB128 . *)
|
||||
|
||||
module Byte_slice = CCByte_slice
|
||||
module Byte_buffer = CCByte_buffer
|
||||
|
||||
module Decode : sig
|
||||
val decode_zigzag : int64 -> int64
|
||||
(** Turn an unsigned integer into a signed one.
|
||||
|
||||
See https://en.wikipedia.org/wiki/Variable-length_quantity#Zigzag_encoding
|
||||
*)
|
||||
|
||||
val skip : Byte_slice.t -> int -> int
|
||||
(** [skip slice off] reads an integer at offset [off], and returns how many
|
||||
bytes the integer occupies. *)
|
||||
|
||||
val u64 : Byte_slice.t -> int -> int64 * int
|
||||
(** [u64 slice off] reads an integer at offset [off], and returns a pair
|
||||
[v, n_consumed]. [v] is the read integer, [n_consumed] is the number of
|
||||
bytes consumed during reading. *)
|
||||
|
||||
val i64 : Byte_slice.t -> int -> int64 * int
|
||||
(** Read a signed int64 by reading a u64 and zigzag decoding it *)
|
||||
|
||||
val int_truncate : Byte_slice.t -> int -> int * int
|
||||
(** Like {!i64} but truncates to integer. Returns a pair [v, n_consumed]. *)
|
||||
|
||||
val uint_truncate : Byte_slice.t -> int -> int * int
|
||||
(** Like {!u64} but truncates to integer. *)
|
||||
end
|
||||
|
||||
module Encode : sig
|
||||
val encode_zigzag : int64 -> int64
|
||||
(** Turn a signed int64 into a u64 via zigzag encoding. *)
|
||||
|
||||
val u64 : Byte_buffer.t -> int64 -> unit
|
||||
(** Write a unsigned int *)
|
||||
|
||||
val i64 : Byte_buffer.t -> int64 -> unit
|
||||
(** Write a signed int via zigzag encoding *)
|
||||
|
||||
val uint : Byte_buffer.t -> int -> unit
|
||||
(** Turn an uint into a u64 and write it *)
|
||||
|
||||
val int : Byte_buffer.t -> int -> unit
|
||||
(** Turn an int into a int64 and write it *)
|
||||
end
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
(library
|
||||
(name containers_leb128)
|
||||
(public_name containers.leb128)
|
||||
(synopsis
|
||||
"LEB128 encoding (https://en.wikipedia.org/wiki/LEB128) for cephalopod")
|
||||
(libraries containers)
|
||||
(foreign_stubs
|
||||
(language c)
|
||||
(flags :standard -std=c99 -O2)
|
||||
(names stubs))
|
||||
(ocamlopt_flags :standard -inline 100))
|
||||
|
|
@ -1,73 +0,0 @@
|
|||
|
||||
// readapted from ocaml-protoc, original code also from c-cube
|
||||
|
||||
#include <caml/alloc.h>
|
||||
#include <caml/memory.h>
|
||||
#include <caml/mlvalues.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static inline int ix_leb128_varint_size(uint64_t i) {
|
||||
/* generated with:
|
||||
for i in range(1,10):
|
||||
ceiling = (1 << (i*7))-1
|
||||
print(f'if (i <= {ceiling}L) return {i};')
|
||||
*/
|
||||
|
||||
if (i <= 127L) return 1;
|
||||
if (i <= 16383L) return 2;
|
||||
if (i <= 2097151L) return 3;
|
||||
if (i <= 268435455L) return 4;
|
||||
if (i <= 34359738367L) return 5;
|
||||
if (i <= 4398046511103L) return 6;
|
||||
if (i <= 562949953421311L) return 7;
|
||||
if (i <= 72057594037927935L) return 8;
|
||||
if (i <= 9223372036854775807L) return 9;
|
||||
return 10;
|
||||
}
|
||||
|
||||
// number of bytes for i
|
||||
CAMLprim value caml_cc_leb128_varint_size(int64_t i) {
|
||||
int res = ix_leb128_varint_size(i);
|
||||
return Val_int(res);
|
||||
}
|
||||
|
||||
// boxed version, for bytecode
|
||||
CAMLprim value caml_cc_leb128_varint_size_byte(value v_i) {
|
||||
CAMLparam1(v_i);
|
||||
|
||||
int64_t i = Int64_val(v_i);
|
||||
int res = ix_leb128_varint_size(i);
|
||||
CAMLreturn(Val_int(res));
|
||||
}
|
||||
|
||||
// write i at str[idx…] in varint
|
||||
static inline void ix_leb128_varint(unsigned char *str, uint64_t i) {
|
||||
while (true) {
|
||||
uint64_t cur = i & 0x7f;
|
||||
if (cur == i) {
|
||||
*str = (unsigned char)cur;
|
||||
break;
|
||||
} else {
|
||||
*str = (unsigned char)(cur | 0x80);
|
||||
i = i >> 7;
|
||||
++str;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// write `i` starting at `idx`
|
||||
CAMLprim value caml_cc_leb128_varint(value _str, intnat idx, int64_t i) {
|
||||
char *str = Bytes_val(_str);
|
||||
ix_leb128_varint(str + idx, i);
|
||||
return Val_unit;
|
||||
}
|
||||
|
||||
CAMLprim value caml_cc_leb128_varint_byte(value _str, value _idx, value _i) {
|
||||
CAMLparam3(_str, _idx, _i);
|
||||
char *str = Bytes_val(_str);
|
||||
int idx = Int_val(_idx);
|
||||
int64_t i = Int64_val(_i);
|
||||
ix_leb128_varint(str + idx, i);
|
||||
CAMLreturn(Val_unit);
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue