From 8b60f5237701f1894cbdedacb004bb96a90de5fe Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 13 May 2024 21:05:06 -0400 Subject: [PATCH] add byte_slice module, fix warnings --- fuzz/dune | 2 +- src/core/CCByte_buffer.ml | 40 +++++++++++++++++----------------- src/core/CCByte_buffer.mli | 7 ++++-- src/core/CCByte_slice.ml | 44 ++++++++++++++++++++++++++++++++++++++ src/core/CCByte_slice.mli | 22 +++++++++++++++++++ src/core/containers.ml | 1 + tests/core/t_fun.ml | 2 ++ 7 files changed, 95 insertions(+), 23 deletions(-) create mode 100644 src/core/CCByte_slice.ml create mode 100644 src/core/CCByte_slice.mli diff --git a/fuzz/dune b/fuzz/dune index 8cc8a87c..0f377574 100644 --- a/fuzz/dune +++ b/fuzz/dune @@ -1,6 +1,6 @@ (executables (flags - (-w "+a-4-9-29-37-40-42-44-48-50-32" -g)) + (-w "+a-4-9-29-37-40-42-44-48-50-32-70" -g)) (names ccsexp_parse_string_does_not_crash ccutf8_string_uchar_to_bytes_is_same_as_simple_version diff --git a/src/core/CCByte_buffer.ml b/src/core/CCByte_buffer.ml index c15dbf61..258e4cac 100644 --- a/src/core/CCByte_buffer.ml +++ b/src/core/CCByte_buffer.ml @@ -1,21 +1,21 @@ type 'a iter = ('a -> unit) -> unit type t = { - mutable bytes: bytes; + mutable bs: bytes; mutable len: int; } let create ?(cap = 0) () : t = - let bytes = + let bs = if cap = 0 then Bytes.unsafe_of_string "" else Bytes.create cap in - { len = 0; bytes } + { len = 0; bs } -let[@inline] capacity self : int = Bytes.length self.bytes -let[@inline] bytes self = self.bytes +let[@inline] capacity self : int = Bytes.length self.bs +let[@inline] bytes self = self.bs let[@inline] length self = self.len let[@inline] is_empty self = self.len = 0 let[@inline] clear self = self.len <- 0 @@ -28,8 +28,8 @@ let grow_cap_ self = let grow_to_ self newcap = if newcap = capacity self then invalid_arg "byte_buf: cannot grow further"; let newbytes = Bytes.create newcap in - Bytes.blit self.bytes 0 newbytes 0 self.len; - self.bytes <- newbytes + Bytes.blit self.bs 0 newbytes 0 self.len; + self.bs <- newbytes let[@inline never] grow_ self = let newcap = grow_cap_ self in @@ -46,12 +46,12 @@ let shrink_to self n = if self.len > n then self.len <- n let append_buf (self : t) buf : unit = let n = Buffer.length buf in ensure_cap self (length self + n); - Buffer.blit buf 0 self.bytes self.len n; + Buffer.blit buf 0 self.bs self.len n; self.len <- self.len + n let append_subbytes self b off len = ensure_cap self (length self + len); - Bytes.blit b off self.bytes self.len len; + Bytes.blit b off self.bs self.len len; self.len <- self.len + len let append_bytes self b = append_subbytes self b 0 (Bytes.length b) @@ -61,15 +61,15 @@ let append_substring self s off len = append_subbytes self (Bytes.unsafe_of_string s) off len let[@inline] add_char_unsafe_ self c = - Bytes.unsafe_set self.bytes self.len c; + Bytes.unsafe_set self.bs self.len c; self.len <- self.len + 1 let[@inline] add_char self c = if self.len = capacity self then grow_ self; add_char_unsafe_ self c -let[@inline] unsafe_get self i = Bytes.unsafe_get self.bytes i -let[@inline] unsafe_set self i c = Bytes.unsafe_set self.bytes i c +let[@inline] unsafe_get self i = Bytes.unsafe_get self.bs i +let[@inline] unsafe_set self i c = Bytes.unsafe_set self.bs i c let[@inline] get self i = if i < 0 || i >= self.len then invalid_arg "Byte_buf.get"; @@ -79,26 +79,26 @@ let[@inline] set self i c = if i < 0 || i >= self.len then invalid_arg "Byte_buf.set"; unsafe_set self i c -let[@inline] contents self = Bytes.sub_string self.bytes 0 self.len -let[@inline] contents_bytes self = Bytes.sub self.bytes 0 self.len +let[@inline] contents self = Bytes.sub_string self.bs 0 self.len +let[@inline] contents_bytes self = Bytes.sub self.bs 0 self.len let[@inline] append_iter self i = i (add_char self) let[@inline] append_seq self seq = Seq.iter (add_char self) seq let fold_left f acc self = - let { bytes; len } = self in + let { bs; len } = self in (* capture current content *) let acc = ref acc in for i = 0 to len do - acc := f !acc (Bytes.unsafe_get bytes i) + acc := f !acc (Bytes.unsafe_get bs i) done; !acc let iter f self = - let { bytes; len } = self in + let { bs; len } = self in (* capture current content *) for i = 0 to len do - f (Bytes.unsafe_get bytes i) + f (Bytes.unsafe_get bs i) done let of_seq seq = @@ -114,12 +114,12 @@ let of_iter iter = let to_iter self yield = iter yield self let to_seq self = - let { bytes; len } = self in + let { bs; len } = self in let rec s i () = if i = len then Seq.Nil else - Seq.Cons (Bytes.unsafe_get bytes i, s (i + 1)) + Seq.Cons (Bytes.unsafe_get bs i, s (i + 1)) in s 0 diff --git a/src/core/CCByte_buffer.mli b/src/core/CCByte_buffer.mli index 77325013..09460266 100644 --- a/src/core/CCByte_buffer.mli +++ b/src/core/CCByte_buffer.mli @@ -5,11 +5,14 @@ *) type t = { - mutable bytes: bytes; + mutable bs: bytes; (** The backing bytes buffer *) mutable len: int; + (** Length of the "active" slice in [bs]. The actual content + of the buffer is [bs[0]..bs[len-1]]. What comes after + is undefined garbage. *) } (** The byte buffer. - The definition is public since NEXT_RELEASE *) + The definition is public since NEXT_RELEASE . *) type 'a iter = ('a -> unit) -> unit diff --git a/src/core/CCByte_slice.ml b/src/core/CCByte_slice.ml new file mode 100644 index 00000000..61c41fec --- /dev/null +++ b/src/core/CCByte_slice.ml @@ -0,0 +1,44 @@ +type t = { + bs: bytes; + mutable off: int; + mutable len: int; +} + +let show self = Printf.sprintf "" self.len +let pp out self = Format.pp_print_string out (show self) + +let create ?(off = 0) ?len bs = + let len = + match len with + | None -> Bytes.length bs - off + | Some n -> + if n < 0 || off + n > Bytes.length bs then + invalid_arg "Bslice: invalid length"; + n + in + { bs; off; len } + +let[@inline] of_string s = create (Bytes.unsafe_of_string s) +let[@inline] len self = self.len +let[@inline] contents self = Bytes.sub_string self.bs self.off self.len + +let[@inline] clear self = + self.len <- 0; + self.off <- 0 + +let[@inline] get self i : char = + if i >= self.len then invalid_arg "Bslice: out of bound access"; + Bytes.unsafe_get self.bs (self.off + i) + +let[@inline] set self i c : unit = + if i >= self.len then invalid_arg "Bslice: out of bound access"; + Bytes.unsafe_set self.bs (self.off + i) c + +let sub self off len = + if off + len > self.len then invalid_arg "Bslice: invalid length"; + { bs = self.bs; off = self.off + off; len } + +let[@inline] consume self n : unit = + if n > self.len then invalid_arg "Bslice: consuming too many bytes"; + self.off <- self.off + n; + self.len <- self.len - n diff --git a/src/core/CCByte_slice.mli b/src/core/CCByte_slice.mli new file mode 100644 index 00000000..62376218 --- /dev/null +++ b/src/core/CCByte_slice.mli @@ -0,0 +1,22 @@ +(** A simple byte slice. + + @since NEXT_RELEASE *) + +type t = { + bs: bytes; (** The bytes, potentially shared between many slices *) + mutable off: int; (** Offset in [bs] *) + mutable len: int; + (** Length of the slice. Valid indices are [bs[off]…bs[off+len-1]], + inclusive. *) +} +[@@deriving show] + +val create : ?off:int -> ?len:int -> bytes -> t +val clear : t -> unit +val of_string : string -> t +val len : t -> int +val get : t -> int -> char +val set : t -> int -> char -> unit +val consume : t -> int -> unit +val contents : t -> string +val sub : t -> int -> int -> t diff --git a/src/core/containers.ml b/src/core/containers.ml index be529eba..94988ff5 100644 --- a/src/core/containers.ml +++ b/src/core/containers.ml @@ -5,6 +5,7 @@ module Array = CCArray module Bool = CCBool module Byte_buffer = CCByte_buffer +module Byte_slice = CCByte_slice module Char = CCChar module Equal = CCEqual module Either = CCEither diff --git a/tests/core/t_fun.ml b/tests/core/t_fun.ml index 59b6eb4f..2b196592 100644 --- a/tests/core/t_fun.ml +++ b/tests/core/t_fun.ml @@ -1,3 +1,5 @@ + +[@@@ocaml.warning "-33"] open CCFun module T = (val Containers_testlib.make ~__FILE__ ()) include T;;