mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 03:05:28 -05:00
feat: add containers.leb128 library
adapted from pbrt
This commit is contained in:
parent
ab7d0fcc09
commit
0ff9614520
4 changed files with 231 additions and 0 deletions
98
src/leb128/containers_leb128.ml
Normal file
98
src/leb128/containers_leb128.ml
Normal file
|
|
@ -0,0 +1,98 @@
|
||||||
|
(* adapted from ocaml-protoc from code by c-cube *)
|
||||||
|
|
||||||
|
module Byte_slice = CCByte_slice
|
||||||
|
module Byte_buffer = CCByte_buffer
|
||||||
|
|
||||||
|
module Decode = struct
|
||||||
|
let skip (sl : Byte_slice.t) off : int =
|
||||||
|
let shift = ref 0 in
|
||||||
|
let continue = ref true in
|
||||||
|
|
||||||
|
let off = ref off in
|
||||||
|
let n_consumed = ref 0 in
|
||||||
|
|
||||||
|
while !continue do
|
||||||
|
if sl.len <= 0 then invalid_arg "out of bound";
|
||||||
|
incr n_consumed;
|
||||||
|
let b = Char.code (Bytes.get sl.bs !off) in
|
||||||
|
let cur = b land 0x7f in
|
||||||
|
if cur <> b then (
|
||||||
|
(* at least one byte follows this one *)
|
||||||
|
incr off;
|
||||||
|
shift := !shift + 7
|
||||||
|
) else if !shift < 63 || b land 0x7f <= 1 then
|
||||||
|
continue := false
|
||||||
|
else
|
||||||
|
invalid_arg "leb128 varint is too long"
|
||||||
|
done;
|
||||||
|
|
||||||
|
!n_consumed
|
||||||
|
|
||||||
|
let u64 (sl : Byte_slice.t) (off : int) : int64 * int =
|
||||||
|
let shift = ref 0 in
|
||||||
|
let res = ref 0L in
|
||||||
|
let continue = ref true in
|
||||||
|
|
||||||
|
let off = ref off in
|
||||||
|
let n_consumed = ref 0 in
|
||||||
|
|
||||||
|
while !continue do
|
||||||
|
if sl.len <= 0 then invalid_arg "out of bound";
|
||||||
|
incr n_consumed;
|
||||||
|
let b = Char.code (Bytes.get sl.bs !off) in
|
||||||
|
let cur = b land 0x7f in
|
||||||
|
if cur <> b then (
|
||||||
|
(* at least one byte follows this one *)
|
||||||
|
(res := Int64.(logor !res (shift_left (of_int cur) !shift)));
|
||||||
|
incr off;
|
||||||
|
shift := !shift + 7
|
||||||
|
) else if !shift < 63 || b land 0x7f <= 1 then (
|
||||||
|
(res := Int64.(logor !res (shift_left (of_int b) !shift)));
|
||||||
|
continue := false
|
||||||
|
) else
|
||||||
|
invalid_arg "leb128 varint is too long"
|
||||||
|
done;
|
||||||
|
|
||||||
|
!res, !n_consumed
|
||||||
|
|
||||||
|
let[@inline] uint_truncate sl off =
|
||||||
|
let v, n_consumed = u64 sl off in
|
||||||
|
Int64.to_int v, n_consumed
|
||||||
|
|
||||||
|
let[@inline] decode_zigzag (v : int64) : int64 =
|
||||||
|
Int64.(logxor (shift_right v 1) (neg (logand v Int64.one)))
|
||||||
|
|
||||||
|
let[@inline] i64 sl off : int64 * int =
|
||||||
|
let v, n_consumed = u64 sl off in
|
||||||
|
decode_zigzag v, n_consumed
|
||||||
|
|
||||||
|
let[@inline] int_truncate sl off =
|
||||||
|
let v, n_consumed = u64 sl off in
|
||||||
|
Int64.to_int (decode_zigzag v), n_consumed
|
||||||
|
end
|
||||||
|
|
||||||
|
module Encode = struct
|
||||||
|
let[@inline] encode_zigzag (i : int64) : int64 =
|
||||||
|
Int64.(logxor (shift_left i 1) (shift_right i 63))
|
||||||
|
|
||||||
|
external varint_size : (int64[@unboxed]) -> int
|
||||||
|
= "caml_cc_leb128_varint_size_byte" "caml_cc_leb128_varint_size"
|
||||||
|
[@@noalloc]
|
||||||
|
(** Compute how many bytes this int would occupy as varint *)
|
||||||
|
|
||||||
|
external varint_slice : bytes -> (int[@untagged]) -> (int64[@unboxed]) -> unit
|
||||||
|
= "caml_cc_leb128_varint_byte" "caml_cc_leb128_varint"
|
||||||
|
[@@noalloc]
|
||||||
|
(** Write this int as varint into the given slice *)
|
||||||
|
|
||||||
|
let[@inline] u64 (buf : Byte_buffer.t) (i : int64) =
|
||||||
|
let n = varint_size i in
|
||||||
|
Byte_buffer.ensure_free buf n;
|
||||||
|
assert (buf.len + n <= Bytes.length buf.bs);
|
||||||
|
varint_slice buf.bs buf.len i;
|
||||||
|
buf.len <- buf.len + n
|
||||||
|
|
||||||
|
let[@inline] i64 buf i : unit = u64 buf (encode_zigzag i)
|
||||||
|
let[@inline] uint buf i : unit = u64 buf (Int64.of_int i)
|
||||||
|
let[@inline] int buf i : unit = u64 buf (encode_zigzag (Int64.of_int i))
|
||||||
|
end
|
||||||
49
src/leb128/containers_leb128.mli
Normal file
49
src/leb128/containers_leb128.mli
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
(** LEB128 encoding and decoding.
|
||||||
|
|
||||||
|
See https://en.wikipedia.org/wiki/LEB128 . *)
|
||||||
|
|
||||||
|
module Byte_slice = CCByte_slice
|
||||||
|
module Byte_buffer = CCByte_buffer
|
||||||
|
|
||||||
|
module Decode : sig
|
||||||
|
val decode_zigzag : int64 -> int64
|
||||||
|
(** Turn an unsigned integer into a signed one.
|
||||||
|
|
||||||
|
See https://en.wikipedia.org/wiki/Variable-length_quantity#Zigzag_encoding
|
||||||
|
*)
|
||||||
|
|
||||||
|
val skip : Byte_slice.t -> int -> int
|
||||||
|
(** [skip slice off] reads an integer at offset [off], and returns how many
|
||||||
|
bytes the integer occupies. *)
|
||||||
|
|
||||||
|
val u64 : Byte_slice.t -> int -> int64 * int
|
||||||
|
(** [u64 slice off] reads an integer at offset [off], and returns a pair
|
||||||
|
[v, n_consumed]. [v] is the read integer, [n_consumed] is the number of
|
||||||
|
bytes consumed during reading. *)
|
||||||
|
|
||||||
|
val i64 : Byte_slice.t -> int -> int64 * int
|
||||||
|
(** Read a signed int64 by reading a u64 and zigzag decoding it *)
|
||||||
|
|
||||||
|
val int_truncate : Byte_slice.t -> int -> int * int
|
||||||
|
(** Like {!i64} but truncates to integer. Returns a pair [v, n_consumed]. *)
|
||||||
|
|
||||||
|
val uint_truncate : Byte_slice.t -> int -> int * int
|
||||||
|
(** Like {!u64} but truncates to integer. *)
|
||||||
|
end
|
||||||
|
|
||||||
|
module Encode : sig
|
||||||
|
val encode_zigzag : int64 -> int64
|
||||||
|
(** Turn a signed int64 into a u64 via zigzag encoding. *)
|
||||||
|
|
||||||
|
val u64 : Byte_buffer.t -> int64 -> unit
|
||||||
|
(** Write a unsigned int *)
|
||||||
|
|
||||||
|
val i64 : Byte_buffer.t -> int64 -> unit
|
||||||
|
(** Write a signed int via zigzag encoding *)
|
||||||
|
|
||||||
|
val uint : Byte_buffer.t -> int -> unit
|
||||||
|
(** Turn an uint into a u64 and write it *)
|
||||||
|
|
||||||
|
val int : Byte_buffer.t -> int -> unit
|
||||||
|
(** Turn an int into a int64 and write it *)
|
||||||
|
end
|
||||||
11
src/leb128/dune
Normal file
11
src/leb128/dune
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
(library
|
||||||
|
(name containers_leb128)
|
||||||
|
(public_name containers.leb128)
|
||||||
|
(synopsis
|
||||||
|
"LEB128 encoding (https://en.wikipedia.org/wiki/LEB128) for cephalopod")
|
||||||
|
(libraries containers)
|
||||||
|
(foreign_stubs
|
||||||
|
(language c)
|
||||||
|
(flags :standard -std=c99 -O2)
|
||||||
|
(names stubs))
|
||||||
|
(ocamlopt_flags :standard -inline 100))
|
||||||
73
src/leb128/stubs.c
Normal file
73
src/leb128/stubs.c
Normal file
|
|
@ -0,0 +1,73 @@
|
||||||
|
|
||||||
|
// readapted from ocaml-protoc, original code also from c-cube
|
||||||
|
|
||||||
|
#include <caml/alloc.h>
|
||||||
|
#include <caml/memory.h>
|
||||||
|
#include <caml/mlvalues.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
static inline int ix_leb128_varint_size(uint64_t i) {
|
||||||
|
/* generated with:
|
||||||
|
for i in range(1,10):
|
||||||
|
ceiling = (1 << (i*7))-1
|
||||||
|
print(f'if (i <= {ceiling}L) return {i};')
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (i <= 127L) return 1;
|
||||||
|
if (i <= 16383L) return 2;
|
||||||
|
if (i <= 2097151L) return 3;
|
||||||
|
if (i <= 268435455L) return 4;
|
||||||
|
if (i <= 34359738367L) return 5;
|
||||||
|
if (i <= 4398046511103L) return 6;
|
||||||
|
if (i <= 562949953421311L) return 7;
|
||||||
|
if (i <= 72057594037927935L) return 8;
|
||||||
|
if (i <= 9223372036854775807L) return 9;
|
||||||
|
return 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
// number of bytes for i
|
||||||
|
CAMLprim value caml_cc_leb128_varint_size(int64_t i) {
|
||||||
|
int res = ix_leb128_varint_size(i);
|
||||||
|
return Val_int(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
// boxed version, for bytecode
|
||||||
|
CAMLprim value caml_cc_leb128_varint_size_byte(value v_i) {
|
||||||
|
CAMLparam1(v_i);
|
||||||
|
|
||||||
|
int64_t i = Int64_val(v_i);
|
||||||
|
int res = ix_leb128_varint_size(i);
|
||||||
|
CAMLreturn(Val_int(res));
|
||||||
|
}
|
||||||
|
|
||||||
|
// write i at str[idx…] in varint
|
||||||
|
static inline void ix_leb128_varint(unsigned char *str, uint64_t i) {
|
||||||
|
while (true) {
|
||||||
|
uint64_t cur = i & 0x7f;
|
||||||
|
if (cur == i) {
|
||||||
|
*str = (unsigned char)cur;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
*str = (unsigned char)(cur | 0x80);
|
||||||
|
i = i >> 7;
|
||||||
|
++str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// write `i` starting at `idx`
|
||||||
|
CAMLprim value caml_cc_leb128_varint(value _str, intnat idx, int64_t i) {
|
||||||
|
char *str = Bytes_val(_str);
|
||||||
|
ix_leb128_varint(str + idx, i);
|
||||||
|
return Val_unit;
|
||||||
|
}
|
||||||
|
|
||||||
|
CAMLprim value caml_cc_leb128_varint_byte(value _str, value _idx, value _i) {
|
||||||
|
CAMLparam3(_str, _idx, _i);
|
||||||
|
char *str = Bytes_val(_str);
|
||||||
|
int idx = Int_val(_idx);
|
||||||
|
int64_t i = Int64_val(_i);
|
||||||
|
ix_leb128_varint(str + idx, i);
|
||||||
|
CAMLreturn(Val_unit);
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue