From 65c4c059386bb156165a9b6642e2b4ae918d176a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 10 Apr 2014 22:57:57 +0200 Subject: [PATCH] progress in BencodeStream.Encode, and basic features in Tell --- bencodeStream.ml | 210 +++++++--------------------------------------- bencodeStream.mli | 15 +--- tell.ml | 59 ++++++++++++- tell.mli | 6 +- 4 files changed, 94 insertions(+), 196 deletions(-) diff --git a/bencodeStream.ml b/bencodeStream.ml index 8e9b3992..5d2fa2fa 100644 --- a/bencodeStream.ml +++ b/bencodeStream.ml @@ -30,9 +30,8 @@ type token = | Int of int | String of string | BeginDict - | EndDict | BeginList - | EndList + | End module Encode = struct type sink = @@ -56,7 +55,7 @@ module Encode = struct ; on_close = nop } | `File f -> - let o = open_in f in + let o = open_out f in { write_string=output_string o ; write_char=output_char o ; on_close = (fun () -> close_out o) @@ -78,97 +77,41 @@ module Encode = struct out.write_string s | BeginDict -> out.write_char 'd' - | EndDict -> + | End -> out.write_char 'e' | BeginList -> out.write_char 'l' - | EndList -> - out.write_char 'e' end module Decode = struct - type source = - [ `File of string - | `In of in_channel - | `String of string - | `Manual - ] - type result = | Yield of token | Error of string - | End | Await (** The user needs to call {!feed} with some input *) + type state = + | Start + | ParsingInt of int + | ParsingString of string + type t = { - read_string : string -> int -> int -> int; - read_char : unit -> char; mutable buf : string; (* buffer *) mutable i : int; (* index in buf *) mutable len : int; (* length of substring to read *) mutable c : int; (* line *) mutable l : int; (* column *) - mutable state : result; - mutable stack : partial_state list; + mutable state : state; } - let __default = { - read_string = (fun _ _ _ -> assert false); - read_char = (fun _ -> '\000'); + let create () = { buf = ""; i = 0; len = 0; c = 0; l = 0; - state = Error "no input"; - stack = []; + state = Start; } - let create = function - | `File f -> - - - val create : source -> t - (** Create a new decoder with the given source. *) - - type decoder = { - mutable buf : string; (* buffer *) - mutable i : int; (* index in buf *) - mutable len : int; (* length of substring to read *) - mutable c : int; (* line *) - mutable l : int; (* column *) - mutable state : parse_result; - mutable stack : partial_state list; - } - - (** Result of parsing *) - and parse_result = - | ParseOk of t - | ParseError of string - | ParsePartial - - (** Partial state of the parser *) - and partial_state = - | PS_I of bool * int (* sign and integer *) - | PS_S of int ref * string (* index in string, plus string *) - | PS_L of t list - | PS_D of t SMap.t (* in dictionary *) - | PS_D_key of string * t SMap.t (* parsed key, wait for value *) - | PS_return of t (* bottom of stack *) - | PS_error of string (* error *) - - let mk_decoder () = - let dec = { - buf = ""; - i = 0; - len = 0; - c = 0; - l = 0; - state = ParsePartial; - stack = []; - } in - dec - let is_empty dec = dec.len = 0 let cur dec = dec.buf.[dec.i] @@ -185,120 +128,29 @@ module Decode = struct junk dec; c + (* (* parse value *) let rec parse_rec dec = - match dec.stack with - | [PS_return v] -> (* return value *) - dec.stack <- []; - dec.state <- ParseOk v; - dec.state - | [PS_error s] -> (* failure *) - dec.stack <- []; - dec.state <- ParseError s; - dec.state - | _ -> - if is_empty dec then ParsePartial (* wait *) - else begin - let c = next dec in - (match dec.stack, c with - | (PS_I (sign, i)) :: stack, '0' .. '9' -> - dec.stack <- PS_I (sign, (Char.code c - Char.code '0') + 10 * i) :: stack; - | (PS_I (_, 0)) :: stack, '-' -> - dec.stack <- PS_I (false, 0) :: stack (* negative number *) - | (PS_I (sign, i)) :: stack, 'e' -> - dec.stack <- stack; - push_value dec (I (if sign then i else ~- i)) - | ((PS_D _ | PS_D_key _ | PS_L _) :: _ | []), '0' .. '9' -> - (* initial length of string *) - dec.stack <- (PS_I (true, Char.code c - Char.code '0')) :: dec.stack - | (PS_I (sign, i)) :: stack, ':' -> - if i < 0 - then error dec "string length cannot be negative" - else if i = 0 then (* empty string *) - let _ = dec.stack <- stack in - push_value dec (S "") - else (* prepare to parse a string *) - dec.stack <- (PS_S (ref 0, String.create i)) :: stack; - | (PS_S (n, s)) :: stack, _ -> - s.[!n] <- c; - incr n; - (* value completed *) - (if !n = String.length s - then - let _ = dec.stack <- stack in - push_value dec (S s)); - | stack, 'i' -> - dec.stack <- (PS_I (true, 0)) :: stack - | stack, 'l' -> - dec.stack <- PS_L [] :: stack; - | stack, 'd' -> - dec.stack <- PS_D SMap.empty :: stack - | (PS_L l) :: stack, 'e' -> (* end of list *) - dec.stack <- stack; - push_value dec (L (List.rev l)) - | (PS_D d) :: stack, 'e' -> (* end of dict *) - dec.stack <- stack; - push_value dec (D d) - | (PS_D_key _) :: _, 'e' -> (* error *) - error dec "missing value in dict" - | _ -> (* generic error *) - error dec (Printf.sprintf "expected value, got %c" c)); - parse_rec dec - end - (* When a value is parsed, push it on the stack (possibly collapsing it) *) - and push_value dec v = - match v, dec.stack with - | _, [] -> - dec.stack <- [PS_return v] (* finished *) - | _, (PS_L l) :: stack -> - (* add to list *) - dec.stack <- (PS_L (v :: l)) :: stack; - | S key, ((PS_D d) :: stack) -> - (* new key for the map *) - dec.stack <- (PS_D_key (key, d)) :: stack; - | _, ((PS_D d) :: _) -> - (* error: key must be string *) - error dec "dict keys must be strings" - | _, (PS_D_key (key, d)) :: stack -> - (* new binding for the map *) - dec.stack <- (PS_D (SMap.add key v d)) :: stack; - | _ -> assert false - (* signal error *) - and error dec msg = - let msg = Printf.sprintf "Bencode: error at line %d, column %d: %s" - dec.l dec.c msg in - dec.stack <- [PS_error msg] + if is_empty dec then Await (* wait *) + else begin + let c = next dec in + match dec.state, c with + | Start, 'l' -> + Yield StartList + | Start, 'd' -> + Yield StartDict + | Start, 'e' -> + Yield End + | Start, 'i' -> + dec.state <- ParsingInt 0 + | ParsingString i, 'e' -> + dec.state <- Start; + Yield (Int i) + | + *) - (* exported parse function *) - let parse dec s i len = - (if i < 0 || i+len > String.length s - then invalid_arg "Bencode.parse: not a valid substring"); - (* add the input to [dec] *) - if dec.len = 0 - then begin - dec.buf <- String.copy s; - dec.i <- i; - dec.len <- len; - end else begin - (* use a buffer to merge the stored input and the new input *) - let buf' = String.create (dec.len + len - dec.i) in - String.blit dec.buf dec.i buf' 0 dec.len; - String.blit s i buf' dec.len len; - dec.buf <- buf'; - dec.i <- 0; - dec.len <- dec.len + len - dec.i; - end; - (* state machine *) - parse_rec dec + let feed dec = assert false - val feed : t -> string -> unit - (** For manual mode, provide some input *) - - type result = - | Yield of token - | End - | Await (** The user needs to call {!feed} with some input *) - - val next : t -> result + let next dec = assert false end diff --git a/bencodeStream.mli b/bencodeStream.mli index b13ba0d6..bb5f2d87 100644 --- a/bencodeStream.mli +++ b/bencodeStream.mli @@ -30,9 +30,8 @@ type token = | Int of int | String of string | BeginDict - | EndDict | BeginList - | EndList + | End module Encode : sig type t @@ -51,14 +50,7 @@ end module Decode : sig type t - type source = - [ `File of string - | `In of in_channel - | `String of string - | `Manual - ] - - val create : source -> t + val create : unit -> t (** Create a new decoder with the given source. *) val feed : t -> string -> unit @@ -66,8 +58,7 @@ module Decode : sig type result = | Yield of token - | Error of string - | End + | Error of string (** Invalid B-encode *) | Await (** The user needs to call {!feed} with some input *) val next : t -> result diff --git a/tell.ml b/tell.ml index 030403a8..530a4bcc 100644 --- a/tell.ml +++ b/tell.ml @@ -26,9 +26,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Hierarchic logging} *) +module BS = BencodeStream + type t = { name : string; out : out_channel; + encoder : BS.Encode.t; cleanup : bool; mutable context : string list; } @@ -43,6 +46,7 @@ let __new_name = let to_chan ?(cleanup=false) o = { name = __new_name (); out = o; + encoder = BS.Encode.create (`Out o); cleanup; context = []; } @@ -51,4 +55,57 @@ let to_file filename = let o = open_out filename in to_chan ~cleanup:true o -let step +let close log = + if log.cleanup + then close_out log.out + +let step log msg = + BS.Encode.push log.encoder BS.BeginDict; + BS.Encode.push log.encoder (BS.String "step"); + BS.Encode.push log.encoder (BS.String msg); + BS.Encode.push log.encoder BS.End + +let enter log = + BS.Encode.push log.encoder BS.BeginList + +let exit log = + BS.Encode.push log.encoder BS.End + +let within ~log f = + BS.Encode.push log.encoder BS.BeginDict; + BS.Encode.push log.encoder (BS.String "section"); + try + let x = f () in + BS.Encode.push log.encoder BS.End; + x + with e -> + BS.Encode.push log.encoder BS.End; + raise e + +module B = struct + let step ~log format = + exit log; + let b = Buffer.create 24 in + Printf.kbprintf + (fun b -> + BS.Encode.push log.encoder (BS.String (Buffer.contents b))) + b format + + let enter ~log format = + let b = Buffer.create 24 in + let x = Printf.kbprintf + (fun b -> + BS.Encode.push log.encoder (BS.String (Buffer.contents b))) + b format + in + enter log; + x + + let exit ~log format = + exit log; + let b = Buffer.create 24 in + Printf.kbprintf + (fun b -> + BS.Encode.push log.encoder (BS.String (Buffer.contents b))) + b format +end diff --git a/tell.mli b/tell.mli index 4ef0a9a4..6f17ffc6 100644 --- a/tell.mli +++ b/tell.mli @@ -36,8 +36,6 @@ val to_chan : ?cleanup:bool -> out_channel -> t @param cleanup if true, will close the channel on exit; if false or not explicited, won't do anything. *) -exception Error of string - (** {2 Raw functions} *) val step : t -> string -> unit @@ -53,8 +51,8 @@ val enter : t -> unit val exit : t -> unit (** Exit the current subsection *) -val within : ?descr:string -> log:t -> (unit -> 'a) -> 'a -(** Enter a subsection named [descr], evaluate the given function, +val within : log:t -> (unit -> 'a) -> 'a +(** Enter a new subsection, evaluate the given function, exit the subsection and return the function's result. Also protects against exceptions. *)