move chunk_stack to util, fix some bugs

This commit is contained in:
Simon Cruanes 2021-10-14 23:18:21 -04:00
parent 3a56fb0763
commit 73cca4ca18
No known key found for this signature in database
GPG key ID: 4AC01D0849AA62B6
4 changed files with 158 additions and 114 deletions

View file

@ -1,112 +0,0 @@
module Buf = struct
type t = {
mutable b: bytes;
mutable len: int;
}
let create ?(cap=16) () : t =
{ len=0; b=Bytes.create cap; }
let ensure_size_ (self:t) (new_len:int) : unit =
if new_len > Bytes.length self.b then (
let size = min (new_len + new_len / 4 + 5) Sys.max_string_length in
if new_len > size then failwith "max buf size exceeded";
let b2 = Bytes.create size in
Bytes.blit self.b 0 b2 0 self.len;
self.b <- b2
)
let add_bytes (self:t) (b:bytes) (off:int) (len:int) =
ensure_size_ self (self.len + len);
Bytes.blit self.b self.len b off len;
self.len <- self.len + len
let[@inline] add_buf (self:t) (other:t) =
add_bytes self other.b 0 other.len
let clear self = self.len <- 0
let contents self = Bytes.sub_string self.b 0 self.len
end
module Writer = struct
type t = {
write: Buf.t -> unit;
}
let nop_ _ = ()
let dummy : t = { write=nop_; }
let into_buf (into:Buf.t) : t =
let blen = Bytes.create 4 in
let write buf =
Buf.add_buf into buf;
(* add len *)
Bytes.set_int32_le blen 0 (Int32.of_int buf.Buf.len);
Buf.add_bytes into blen 0 4;
in
{ write; }
let into_channel (oc:out_channel) : t =
let blen = Bytes.create 4 in
let write buf =
output oc buf.Buf.b 0 buf.Buf.len;
(* add len *)
Bytes.set_int32_le blen 0 (Int32.of_int buf.Buf.len);
output oc blen 0 4
in
{ write; }
end
module Reader = struct
type t = {
read: Buf.t -> bool;
}
let[@inline] next (self:t) buf : bool = self.read buf
let empty : t = { read=fun _ -> false }
let from_buf (buf:Buf.t) : t =
assert false (* TODO *)
let with_file_backward (filename:string) f =
CCIO.with_in ~flags:[Open_binary; Open_rdonly] filename @@ fun ic ->
let len = in_channel_length ic in
seek_in ic len;
(* read length *)
let blen = Bytes.create 4 in
let read buf : bool =
let pos = pos_in ic in
if pos > 0 then (
(* read length of preceding chunk *)
assert (pos>=4);
seek_in ic (pos - 4);
really_input ic blen 0 4;
let chunk_len = Int32.to_int (Bytes.get_int32_le blen 0) in
Printf.printf "read chunk of len %d\n%!" chunk_len;
(* now read chunk *)
Buf.ensure_size_ buf chunk_len;
seek_in ic (pos - 4 - chunk_len);
really_input ic buf.Buf.b 0 chunk_len;
buf.Buf.len <- chunk_len;
true
) else (
false
)
in
f {read}
end
(*$T
false
*)

146
src/util/Chunk_stack.ml Normal file
View file

@ -0,0 +1,146 @@
module Buf = struct
type t = {
mutable b: bytes;
mutable len: int;
}
let create ?(cap=16) () : t =
{ len=0; b=Bytes.create cap; }
let resize_ self new_len : unit =
let size = min (new_len + new_len / 4 + 5) Sys.max_string_length in
if new_len > size then failwith "max buf size exceeded";
let b2 = Bytes.create size in
Bytes.blit self.b 0 b2 0 self.len;
self.b <- b2
let[@inline] ensure_size_ (self:t) (new_len:int) : unit =
if new_len > Bytes.length self.b then (
resize_ self new_len
)
let add_bytes (self:t) (b:bytes) (off:int) (len:int) =
Printf.printf "add_bytes len=%d\n%!" len;
ensure_size_ self (self.len + len);
Bytes.blit b off self.b self.len len;
self.len <- self.len + len
let[@inline] add_buf (self:t) (other:t) =
add_bytes self other.b 0 other.len
let clear self = self.len <- 0
let contents self = Bytes.sub_string self.b 0 self.len
end
module Writer = struct
type t = {
write: bytes -> int -> int -> unit;
}
let nop_ _ = ()
let dummy : t = { write=fun _ _ _ -> (); }
let into_buf (into:Buf.t) : t =
let blen = Bytes.create 4 in
let write b i len =
Buf.add_bytes into b i len;
(* add len *)
Bytes.set_int32_le blen 0 (Int32.of_int len);
Buf.add_bytes into blen 0 4;
in
{ write; }
let into_channel (oc:out_channel) : t =
let blen = Bytes.create 4 in
let write b i len =
output oc b i len;
(* add len *)
Bytes.set_int32_le blen 0 (Int32.of_int len);
output oc blen 0 4
in
{ write; }
let[@inline] add_buf self buf = self.write buf.Buf.b 0 buf.Buf.len
let[@inline] add_bytes self b i len = self.write b i len
let[@inline] add_string self s =
add_bytes self (Bytes.unsafe_of_string s) 0 (String.length s)
end
module Reader = struct
type t = {
read: 'a. yield:(bytes -> int -> int -> 'a) -> finish:(unit -> 'a) -> 'a;
} [@@unboxed]
let[@inline] next (self:t) f : bool =
self.read
~yield:(fun b i len -> f b i len; true)
~finish:(fun () -> false)
let next_string (self:t) : string option =
self.read
~yield:(fun b i len -> Some (Bytes.sub_string b i len))
~finish:(fun () -> None)
let empty : t = { read=fun ~yield:_ ~finish -> finish() }
let from_buf (ibuf:Buf.t) : t =
let i = ref ibuf.Buf.len in
(* read next record *)
let read ~yield ~finish =
if !i > 0 then (
Printf.printf "reading (!i = %d)\n%!" !i;
assert (!i >= 4);
i := !i - 4;
let chunk_size = Int32.to_int (Bytes.get_int32_le ibuf.Buf.b !i) in
Printf.printf "chunk size is %d\n%!" chunk_size;
i := !i - chunk_size;
yield ibuf.Buf.b !i chunk_size
) else (
finish()
)
in
{ read; }
let with_file_backward (filename:string) f =
CCIO.with_in ~flags:[Open_binary; Open_rdonly] filename @@ fun ic ->
let len = in_channel_length ic in
seek_in ic len;
let blen = Bytes.create 4 in (* to read length *)
let buf = Buf.create() in (* local buffer *)
let read ~yield ~finish =
let pos = pos_in ic in
if pos > 0 then (
(* read length of preceding chunk *)
assert (pos>=4);
seek_in ic (pos - 4);
really_input ic blen 0 4;
let chunk_len = Int32.to_int (Bytes.get_int32_le blen 0) in
(* now read chunk *)
Buf.ensure_size_ buf chunk_len;
seek_in ic (pos - 4 - chunk_len);
really_input ic buf.Buf.b 0 chunk_len;
buf.Buf.len <- chunk_len;
yield buf.Buf.b 0 buf.Buf.len
) else (
finish()
)
in
f {read}
end
(*$T
false
*)

View file

@ -35,15 +35,24 @@ module Writer : sig
val into_buf : Buf.t -> t
val into_channel: out_channel -> t
val add_buf : t -> Buf.t -> unit
val add_bytes : t -> bytes -> int -> int -> unit
val add_string : t -> string -> unit
end
module Reader : sig
type t
val next : t -> Buf.t -> bool
(** Read next chunk into buf.
val next : t -> (bytes -> int -> int -> unit) -> bool
(** Read next chunk, call the function with a slice of bytes.
Returns [true] if a chunk was read, [false] if no more chunks are there. *)
val next_string : t -> string option
(** Read next chunk as a string *)
val empty : t
val from_buf : Buf.t -> t

View file

@ -28,5 +28,6 @@ module Bag = Bag
module Stat = Stat
module Hash = Hash
module Profile = Profile
module Chunk_stack = Chunk_stack
module Intf = Sidekick_sigs