Compare commits

...

2 commits

Author SHA1 Message Date
Simon Cruanes
14ad8c1f2a
format
Some checks failed
Build and Test / build (push) Has been cancelled
Build and Test / format (push) Has been cancelled
2025-05-06 22:24:04 -04:00
Simon Cruanes
0ff9614520
feat: add containers.leb128 library
adapted from pbrt
2025-05-06 21:53:26 -04:00
5 changed files with 232 additions and 1 deletions

View file

@ -101,7 +101,7 @@ let max_len_b_ = 128
let bytes (x : bytes) =
let h = ref fnv_offset_basis in
for i = 0 to min max_len_b_ (Bytes.length x-1) do
for i = 0 to min max_len_b_ (Bytes.length x - 1) do
(h := Int64.(mul !h fnv_prime));
let byte = Char.code (Bytes.unsafe_get x i) in
h := Int64.(logxor !h (of_int byte))

View file

@ -0,0 +1,98 @@
(* adapted from ocaml-protoc from code by c-cube *)
module Byte_slice = CCByte_slice
module Byte_buffer = CCByte_buffer
module Decode = struct
let skip (sl : Byte_slice.t) off : int =
let shift = ref 0 in
let continue = ref true in
let off = ref off in
let n_consumed = ref 0 in
while !continue do
if sl.len <= 0 then invalid_arg "out of bound";
incr n_consumed;
let b = Char.code (Bytes.get sl.bs !off) in
let cur = b land 0x7f in
if cur <> b then (
(* at least one byte follows this one *)
incr off;
shift := !shift + 7
) else if !shift < 63 || b land 0x7f <= 1 then
continue := false
else
invalid_arg "leb128 varint is too long"
done;
!n_consumed
let u64 (sl : Byte_slice.t) (off : int) : int64 * int =
let shift = ref 0 in
let res = ref 0L in
let continue = ref true in
let off = ref off in
let n_consumed = ref 0 in
while !continue do
if sl.len <= 0 then invalid_arg "out of bound";
incr n_consumed;
let b = Char.code (Bytes.get sl.bs !off) in
let cur = b land 0x7f in
if cur <> b then (
(* at least one byte follows this one *)
(res := Int64.(logor !res (shift_left (of_int cur) !shift)));
incr off;
shift := !shift + 7
) else if !shift < 63 || b land 0x7f <= 1 then (
(res := Int64.(logor !res (shift_left (of_int b) !shift)));
continue := false
) else
invalid_arg "leb128 varint is too long"
done;
!res, !n_consumed
let[@inline] uint_truncate sl off =
let v, n_consumed = u64 sl off in
Int64.to_int v, n_consumed
let[@inline] decode_zigzag (v : int64) : int64 =
Int64.(logxor (shift_right v 1) (neg (logand v Int64.one)))
let[@inline] i64 sl off : int64 * int =
let v, n_consumed = u64 sl off in
decode_zigzag v, n_consumed
let[@inline] int_truncate sl off =
let v, n_consumed = u64 sl off in
Int64.to_int (decode_zigzag v), n_consumed
end
module Encode = struct
let[@inline] encode_zigzag (i : int64) : int64 =
Int64.(logxor (shift_left i 1) (shift_right i 63))
external varint_size : (int64[@unboxed]) -> int
= "caml_cc_leb128_varint_size_byte" "caml_cc_leb128_varint_size"
[@@noalloc]
(** Compute how many bytes this int would occupy as varint *)
external varint_slice : bytes -> (int[@untagged]) -> (int64[@unboxed]) -> unit
= "caml_cc_leb128_varint_byte" "caml_cc_leb128_varint"
[@@noalloc]
(** Write this int as varint into the given slice *)
let[@inline] u64 (buf : Byte_buffer.t) (i : int64) =
let n = varint_size i in
Byte_buffer.ensure_free buf n;
assert (buf.len + n <= Bytes.length buf.bs);
varint_slice buf.bs buf.len i;
buf.len <- buf.len + n
let[@inline] i64 buf i : unit = u64 buf (encode_zigzag i)
let[@inline] uint buf i : unit = u64 buf (Int64.of_int i)
let[@inline] int buf i : unit = u64 buf (encode_zigzag (Int64.of_int i))
end

View file

@ -0,0 +1,49 @@
(** LEB128 encoding and decoding.
See https://en.wikipedia.org/wiki/LEB128 . *)
module Byte_slice = CCByte_slice
module Byte_buffer = CCByte_buffer
module Decode : sig
val decode_zigzag : int64 -> int64
(** Turn an unsigned integer into a signed one.
See https://en.wikipedia.org/wiki/Variable-length_quantity#Zigzag_encoding
*)
val skip : Byte_slice.t -> int -> int
(** [skip slice off] reads an integer at offset [off], and returns how many
bytes the integer occupies. *)
val u64 : Byte_slice.t -> int -> int64 * int
(** [u64 slice off] reads an integer at offset [off], and returns a pair
[v, n_consumed]. [v] is the read integer, [n_consumed] is the number of
bytes consumed during reading. *)
val i64 : Byte_slice.t -> int -> int64 * int
(** Read a signed int64 by reading a u64 and zigzag decoding it *)
val int_truncate : Byte_slice.t -> int -> int * int
(** Like {!i64} but truncates to integer. Returns a pair [v, n_consumed]. *)
val uint_truncate : Byte_slice.t -> int -> int * int
(** Like {!u64} but truncates to integer. *)
end
module Encode : sig
val encode_zigzag : int64 -> int64
(** Turn a signed int64 into a u64 via zigzag encoding. *)
val u64 : Byte_buffer.t -> int64 -> unit
(** Write a unsigned int *)
val i64 : Byte_buffer.t -> int64 -> unit
(** Write a signed int via zigzag encoding *)
val uint : Byte_buffer.t -> int -> unit
(** Turn an uint into a u64 and write it *)
val int : Byte_buffer.t -> int -> unit
(** Turn an int into a int64 and write it *)
end

11
src/leb128/dune Normal file
View file

@ -0,0 +1,11 @@
(library
(name containers_leb128)
(public_name containers.leb128)
(synopsis
"LEB128 encoding (https://en.wikipedia.org/wiki/LEB128) for cephalopod")
(libraries containers)
(foreign_stubs
(language c)
(flags :standard -std=c99 -O2)
(names stubs))
(ocamlopt_flags :standard -inline 100))

73
src/leb128/stubs.c Normal file
View file

@ -0,0 +1,73 @@
// readapted from ocaml-protoc, original code also from c-cube
#include <caml/alloc.h>
#include <caml/memory.h>
#include <caml/mlvalues.h>
#include <stdbool.h>
#include <stdint.h>
static inline int ix_leb128_varint_size(uint64_t i) {
/* generated with:
for i in range(1,10):
ceiling = (1 << (i*7))-1
print(f'if (i <= {ceiling}L) return {i};')
*/
if (i <= 127L) return 1;
if (i <= 16383L) return 2;
if (i <= 2097151L) return 3;
if (i <= 268435455L) return 4;
if (i <= 34359738367L) return 5;
if (i <= 4398046511103L) return 6;
if (i <= 562949953421311L) return 7;
if (i <= 72057594037927935L) return 8;
if (i <= 9223372036854775807L) return 9;
return 10;
}
// number of bytes for i
CAMLprim value caml_cc_leb128_varint_size(int64_t i) {
int res = ix_leb128_varint_size(i);
return Val_int(res);
}
// boxed version, for bytecode
CAMLprim value caml_cc_leb128_varint_size_byte(value v_i) {
CAMLparam1(v_i);
int64_t i = Int64_val(v_i);
int res = ix_leb128_varint_size(i);
CAMLreturn(Val_int(res));
}
// write i at str[idx…] in varint
static inline void ix_leb128_varint(unsigned char *str, uint64_t i) {
while (true) {
uint64_t cur = i & 0x7f;
if (cur == i) {
*str = (unsigned char)cur;
break;
} else {
*str = (unsigned char)(cur | 0x80);
i = i >> 7;
++str;
}
}
}
// write `i` starting at `idx`
CAMLprim value caml_cc_leb128_varint(value _str, intnat idx, int64_t i) {
char *str = Bytes_val(_str);
ix_leb128_varint(str + idx, i);
return Val_unit;
}
CAMLprim value caml_cc_leb128_varint_byte(value _str, value _idx, value _i) {
CAMLparam3(_str, _idx, _i);
char *str = Bytes_val(_str);
int idx = Int_val(_idx);
int64_t i = Int64_val(_i);
ix_leb128_varint(str + idx, i);
CAMLreturn(Val_unit);
}