diff --git a/src/base/Chunk_stack.ml b/src/base/Chunk_stack.ml deleted file mode 100644 index d9e3af3b..00000000 --- a/src/base/Chunk_stack.ml +++ /dev/null @@ -1,112 +0,0 @@ - -module Buf = struct - type t = { - mutable b: bytes; - mutable len: int; - } - - let create ?(cap=16) () : t = - { len=0; b=Bytes.create cap; } - - let ensure_size_ (self:t) (new_len:int) : unit = - if new_len > Bytes.length self.b then ( - let size = min (new_len + new_len / 4 + 5) Sys.max_string_length in - if new_len > size then failwith "max buf size exceeded"; - - let b2 = Bytes.create size in - Bytes.blit self.b 0 b2 0 self.len; - self.b <- b2 - ) - - let add_bytes (self:t) (b:bytes) (off:int) (len:int) = - ensure_size_ self (self.len + len); - Bytes.blit self.b self.len b off len; - self.len <- self.len + len - - let[@inline] add_buf (self:t) (other:t) = - add_bytes self other.b 0 other.len - - let clear self = self.len <- 0 - - let contents self = Bytes.sub_string self.b 0 self.len -end - -module Writer = struct - type t = { - write: Buf.t -> unit; - } - - let nop_ _ = () - - let dummy : t = { write=nop_; } - - let into_buf (into:Buf.t) : t = - let blen = Bytes.create 4 in - let write buf = - Buf.add_buf into buf; - (* add len *) - Bytes.set_int32_le blen 0 (Int32.of_int buf.Buf.len); - Buf.add_bytes into blen 0 4; - in - { write; } - - let into_channel (oc:out_channel) : t = - let blen = Bytes.create 4 in - let write buf = - output oc buf.Buf.b 0 buf.Buf.len; - (* add len *) - Bytes.set_int32_le blen 0 (Int32.of_int buf.Buf.len); - output oc blen 0 4 - in - { write; } -end - -module Reader = struct - type t = { - read: Buf.t -> bool; - } - - let[@inline] next (self:t) buf : bool = self.read buf - - let empty : t = { read=fun _ -> false } - - let from_buf (buf:Buf.t) : t = - assert false (* TODO *) - - let with_file_backward (filename:string) f = - CCIO.with_in ~flags:[Open_binary; Open_rdonly] filename @@ fun ic -> - - let len = in_channel_length ic in - seek_in ic len; - - (* read length *) - let blen = Bytes.create 4 in - - let read buf : bool = - let pos = pos_in ic in - if pos > 0 then ( - (* read length of preceding chunk *) - assert (pos>=4); - seek_in ic (pos - 4); - - really_input ic blen 0 4; - let chunk_len = Int32.to_int (Bytes.get_int32_le blen 0) in - Printf.printf "read chunk of len %d\n%!" chunk_len; - - (* now read chunk *) - Buf.ensure_size_ buf chunk_len; - seek_in ic (pos - 4 - chunk_len); - really_input ic buf.Buf.b 0 chunk_len; - buf.Buf.len <- chunk_len; - - true - ) else ( - false - ) - in - f {read} -end - -(*$T - false - *) diff --git a/src/util/Chunk_stack.ml b/src/util/Chunk_stack.ml new file mode 100644 index 00000000..eef6ff36 --- /dev/null +++ b/src/util/Chunk_stack.ml @@ -0,0 +1,146 @@ + +module Buf = struct + type t = { + mutable b: bytes; + mutable len: int; + } + + let create ?(cap=16) () : t = + { len=0; b=Bytes.create cap; } + + let resize_ self new_len : unit = + let size = min (new_len + new_len / 4 + 5) Sys.max_string_length in + if new_len > size then failwith "max buf size exceeded"; + + let b2 = Bytes.create size in + Bytes.blit self.b 0 b2 0 self.len; + self.b <- b2 + + let[@inline] ensure_size_ (self:t) (new_len:int) : unit = + if new_len > Bytes.length self.b then ( + resize_ self new_len + ) + + let add_bytes (self:t) (b:bytes) (off:int) (len:int) = + Printf.printf "add_bytes len=%d\n%!" len; + ensure_size_ self (self.len + len); + Bytes.blit b off self.b self.len len; + self.len <- self.len + len + + let[@inline] add_buf (self:t) (other:t) = + add_bytes self other.b 0 other.len + + let clear self = self.len <- 0 + + let contents self = Bytes.sub_string self.b 0 self.len +end + +module Writer = struct + type t = { + write: bytes -> int -> int -> unit; + } + + let nop_ _ = () + + let dummy : t = { write=fun _ _ _ -> (); } + + let into_buf (into:Buf.t) : t = + let blen = Bytes.create 4 in + let write b i len = + Buf.add_bytes into b i len; + (* add len *) + Bytes.set_int32_le blen 0 (Int32.of_int len); + Buf.add_bytes into blen 0 4; + in + { write; } + + let into_channel (oc:out_channel) : t = + let blen = Bytes.create 4 in + let write b i len = + output oc b i len; + (* add len *) + Bytes.set_int32_le blen 0 (Int32.of_int len); + output oc blen 0 4 + in + { write; } + + let[@inline] add_buf self buf = self.write buf.Buf.b 0 buf.Buf.len + let[@inline] add_bytes self b i len = self.write b i len + let[@inline] add_string self s = + add_bytes self (Bytes.unsafe_of_string s) 0 (String.length s) +end + +module Reader = struct + type t = { + read: 'a. yield:(bytes -> int -> int -> 'a) -> finish:(unit -> 'a) -> 'a; + } [@@unboxed] + + let[@inline] next (self:t) f : bool = + self.read + ~yield:(fun b i len -> f b i len; true) + ~finish:(fun () -> false) + + let next_string (self:t) : string option = + self.read + ~yield:(fun b i len -> Some (Bytes.sub_string b i len)) + ~finish:(fun () -> None) + + let empty : t = { read=fun ~yield:_ ~finish -> finish() } + + let from_buf (ibuf:Buf.t) : t = + let i = ref ibuf.Buf.len in + + (* read next record *) + let read ~yield ~finish = + if !i > 0 then ( + Printf.printf "reading (!i = %d)\n%!" !i; + assert (!i >= 4); + + i := !i - 4; + let chunk_size = Int32.to_int (Bytes.get_int32_le ibuf.Buf.b !i) in + Printf.printf "chunk size is %d\n%!" chunk_size; + + i := !i - chunk_size; + yield ibuf.Buf.b !i chunk_size + ) else ( + finish() + ) + in + { read; } + + let with_file_backward (filename:string) f = + CCIO.with_in ~flags:[Open_binary; Open_rdonly] filename @@ fun ic -> + + let len = in_channel_length ic in + seek_in ic len; + + let blen = Bytes.create 4 in (* to read length *) + let buf = Buf.create() in (* local buffer *) + + let read ~yield ~finish = + let pos = pos_in ic in + if pos > 0 then ( + (* read length of preceding chunk *) + assert (pos>=4); + seek_in ic (pos - 4); + + really_input ic blen 0 4; + let chunk_len = Int32.to_int (Bytes.get_int32_le blen 0) in + + (* now read chunk *) + Buf.ensure_size_ buf chunk_len; + seek_in ic (pos - 4 - chunk_len); + really_input ic buf.Buf.b 0 chunk_len; + buf.Buf.len <- chunk_len; + + yield buf.Buf.b 0 buf.Buf.len + ) else ( + finish() + ) + in + f {read} +end + +(*$T + false + *) diff --git a/src/base/Chunk_stack.mli b/src/util/Chunk_stack.mli similarity index 80% rename from src/base/Chunk_stack.mli rename to src/util/Chunk_stack.mli index e508e673..3ee6ab6b 100644 --- a/src/base/Chunk_stack.mli +++ b/src/util/Chunk_stack.mli @@ -35,15 +35,24 @@ module Writer : sig val into_buf : Buf.t -> t val into_channel: out_channel -> t + + val add_buf : t -> Buf.t -> unit + + val add_bytes : t -> bytes -> int -> int -> unit + + val add_string : t -> string -> unit end module Reader : sig type t - val next : t -> Buf.t -> bool - (** Read next chunk into buf. + val next : t -> (bytes -> int -> int -> unit) -> bool + (** Read next chunk, call the function with a slice of bytes. Returns [true] if a chunk was read, [false] if no more chunks are there. *) + val next_string : t -> string option + (** Read next chunk as a string *) + val empty : t val from_buf : Buf.t -> t diff --git a/src/util/Sidekick_util.ml b/src/util/Sidekick_util.ml index 3e70f3da..b49f6c31 100644 --- a/src/util/Sidekick_util.ml +++ b/src/util/Sidekick_util.ml @@ -28,5 +28,6 @@ module Bag = Bag module Stat = Stat module Hash = Hash module Profile = Profile +module Chunk_stack = Chunk_stack module Intf = Sidekick_sigs