preparation for Tell and BencodeStream

This commit is contained in:
Simon Cruanes 2014-04-10 19:36:26 +02:00
parent cfed1c44a9
commit 9c18698b22
9 changed files with 518 additions and 8 deletions

2
_oasis
View file

@ -41,7 +41,7 @@ Library "containers"
UnionFind, SmallSet, Leftistheap, AbsSet, CSM, MultiMap,
ActionMan, BV, QCheck, BencodeOnDisk, Show, TTree,
HGraph, Automaton, Conv, Levenshtein, Bidir, Iteratee,
Ty
Ty, Tell
BuildDepends: unix
Library "containers_thread"

3
_tags
View file

@ -1,5 +1,5 @@
# OASIS_START
# DO NOT EDIT (digest: 1f820e710eb0e25ebe36cbb436da1e21)
# DO NOT EDIT (digest: 7cc04ba603fa39231957d867b707c9af)
# Ignore VCS directories, you can use the same kind of rule outside
# OASIS_START/STOP if you want to exclude directories that contains
# useless stuff for the build process
@ -58,6 +58,7 @@
"bidir.cmx": for-pack(Containers)
"iteratee.cmx": for-pack(Containers)
"ty.cmx": for-pack(Containers)
"tell.cmx": for-pack(Containers)
# Library containers_thread
"threads/containers_thread.cmxs": use_containers_thread
<threads/*.ml{,i}>: package(threads)

304
bencodeStream.ml Normal file
View file

@ -0,0 +1,304 @@
(*
copyright (c) 2014, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Full-Streaming API of Bencode} *)
type token =
| Int of int
| String of string
| BeginDict
| EndDict
| BeginList
| EndList
module Encode = struct
type sink =
[ `File of string
| `Out of out_channel
| `Buf of Buffer.t
]
type t = {
write_string : string -> unit;
write_char : char -> unit;
on_close : unit -> unit;
}
let nop() = ()
let create = function
| `Out o ->
{ write_string=output_string o
; write_char=output_char o
; on_close = nop
}
| `File f ->
let o = open_in f in
{ write_string=output_string o
; write_char=output_char o
; on_close = (fun () -> close_out o)
}
| `Buf b ->
{ write_string=Buffer.add_string b
; write_char=Buffer.add_char b
; on_close =nop
}
let push out tok = match tok with
| Int i ->
out.write_char 'i';
out.write_string (string_of_int i);
out.write_char 'e'
| String s ->
out.write_string (string_of_int (String.length s));
out.write_char ':';
out.write_string s
| BeginDict ->
out.write_char 'd'
| EndDict ->
out.write_char 'e'
| BeginList ->
out.write_char 'l'
| EndList ->
out.write_char 'e'
end
module Decode = struct
type source =
[ `File of string
| `In of in_channel
| `String of string
| `Manual
]
type result =
| Yield of token
| Error of string
| End
| Await (** The user needs to call {!feed} with some input *)
type t = {
read_string : string -> int -> int -> int;
read_char : unit -> char;
mutable buf : string; (* buffer *)
mutable i : int; (* index in buf *)
mutable len : int; (* length of substring to read *)
mutable c : int; (* line *)
mutable l : int; (* column *)
mutable state : result;
mutable stack : partial_state list;
}
let __default = {
read_string = (fun _ _ _ -> assert false);
read_char = (fun _ -> '\000');
buf = "";
i = 0;
len = 0;
c = 0;
l = 0;
state = Error "no input";
stack = [];
}
let create = function
| `File f ->
val create : source -> t
(** Create a new decoder with the given source. *)
type decoder = {
mutable buf : string; (* buffer *)
mutable i : int; (* index in buf *)
mutable len : int; (* length of substring to read *)
mutable c : int; (* line *)
mutable l : int; (* column *)
mutable state : parse_result;
mutable stack : partial_state list;
}
(** Result of parsing *)
and parse_result =
| ParseOk of t
| ParseError of string
| ParsePartial
(** Partial state of the parser *)
and partial_state =
| PS_I of bool * int (* sign and integer *)
| PS_S of int ref * string (* index in string, plus string *)
| PS_L of t list
| PS_D of t SMap.t (* in dictionary *)
| PS_D_key of string * t SMap.t (* parsed key, wait for value *)
| PS_return of t (* bottom of stack *)
| PS_error of string (* error *)
let mk_decoder () =
let dec = {
buf = "";
i = 0;
len = 0;
c = 0;
l = 0;
state = ParsePartial;
stack = [];
} in
dec
let is_empty dec = dec.len = 0
let cur dec = dec.buf.[dec.i]
let junk dec =
(* update line/column *)
(if cur dec = '\n'
then (dec.c <- 0; dec.l <- dec.l + 1)
else dec.c <- dec.c + 1);
dec.i <- dec.i + 1;
dec.len <- dec.len - 1
let next dec =
let c = cur dec in
junk dec;
c
(* parse value *)
let rec parse_rec dec =
match dec.stack with
| [PS_return v] -> (* return value *)
dec.stack <- [];
dec.state <- ParseOk v;
dec.state
| [PS_error s] -> (* failure *)
dec.stack <- [];
dec.state <- ParseError s;
dec.state
| _ ->
if is_empty dec then ParsePartial (* wait *)
else begin
let c = next dec in
(match dec.stack, c with
| (PS_I (sign, i)) :: stack, '0' .. '9' ->
dec.stack <- PS_I (sign, (Char.code c - Char.code '0') + 10 * i) :: stack;
| (PS_I (_, 0)) :: stack, '-' ->
dec.stack <- PS_I (false, 0) :: stack (* negative number *)
| (PS_I (sign, i)) :: stack, 'e' ->
dec.stack <- stack;
push_value dec (I (if sign then i else ~- i))
| ((PS_D _ | PS_D_key _ | PS_L _) :: _ | []), '0' .. '9' ->
(* initial length of string *)
dec.stack <- (PS_I (true, Char.code c - Char.code '0')) :: dec.stack
| (PS_I (sign, i)) :: stack, ':' ->
if i < 0
then error dec "string length cannot be negative"
else if i = 0 then (* empty string *)
let _ = dec.stack <- stack in
push_value dec (S "")
else (* prepare to parse a string *)
dec.stack <- (PS_S (ref 0, String.create i)) :: stack;
| (PS_S (n, s)) :: stack, _ ->
s.[!n] <- c;
incr n;
(* value completed *)
(if !n = String.length s
then
let _ = dec.stack <- stack in
push_value dec (S s));
| stack, 'i' ->
dec.stack <- (PS_I (true, 0)) :: stack
| stack, 'l' ->
dec.stack <- PS_L [] :: stack;
| stack, 'd' ->
dec.stack <- PS_D SMap.empty :: stack
| (PS_L l) :: stack, 'e' -> (* end of list *)
dec.stack <- stack;
push_value dec (L (List.rev l))
| (PS_D d) :: stack, 'e' -> (* end of dict *)
dec.stack <- stack;
push_value dec (D d)
| (PS_D_key _) :: _, 'e' -> (* error *)
error dec "missing value in dict"
| _ -> (* generic error *)
error dec (Printf.sprintf "expected value, got %c" c));
parse_rec dec
end
(* When a value is parsed, push it on the stack (possibly collapsing it) *)
and push_value dec v =
match v, dec.stack with
| _, [] ->
dec.stack <- [PS_return v] (* finished *)
| _, (PS_L l) :: stack ->
(* add to list *)
dec.stack <- (PS_L (v :: l)) :: stack;
| S key, ((PS_D d) :: stack) ->
(* new key for the map *)
dec.stack <- (PS_D_key (key, d)) :: stack;
| _, ((PS_D d) :: _) ->
(* error: key must be string *)
error dec "dict keys must be strings"
| _, (PS_D_key (key, d)) :: stack ->
(* new binding for the map *)
dec.stack <- (PS_D (SMap.add key v d)) :: stack;
| _ -> assert false
(* signal error *)
and error dec msg =
let msg = Printf.sprintf "Bencode: error at line %d, column %d: %s"
dec.l dec.c msg in
dec.stack <- [PS_error msg]
(* exported parse function *)
let parse dec s i len =
(if i < 0 || i+len > String.length s
then invalid_arg "Bencode.parse: not a valid substring");
(* add the input to [dec] *)
if dec.len = 0
then begin
dec.buf <- String.copy s;
dec.i <- i;
dec.len <- len;
end else begin
(* use a buffer to merge the stored input and the new input *)
let buf' = String.create (dec.len + len - dec.i) in
String.blit dec.buf dec.i buf' 0 dec.len;
String.blit s i buf' dec.len len;
dec.buf <- buf';
dec.i <- 0;
dec.len <- dec.len + len - dec.i;
end;
(* state machine *)
parse_rec dec
val feed : t -> string -> unit
(** For manual mode, provide some input *)
type result =
| Yield of token
| End
| Await (** The user needs to call {!feed} with some input *)
val next : t -> result
end

74
bencodeStream.mli Normal file
View file

@ -0,0 +1,74 @@
(*
copyright (c) 2014, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Full-Streaming API of Bencode} *)
type token =
| Int of int
| String of string
| BeginDict
| EndDict
| BeginList
| EndList
module Encode : sig
type t
type sink =
[ `File of string
| `Out of out_channel
| `Buf of Buffer.t
]
val create : sink -> t
val push : t -> token -> unit
end
module Decode : sig
type t
type source =
[ `File of string
| `In of in_channel
| `String of string
| `Manual
]
val create : source -> t
(** Create a new decoder with the given source. *)
val feed : t -> string -> unit
(** For manual mode, provide some input *)
type result =
| Yield of token
| Error of string
| End
| Await (** The user needs to call {!feed} with some input *)
val next : t -> result
end

View file

@ -1,5 +1,5 @@
# OASIS_START
# DO NOT EDIT (digest: a57e9337d5ca2ca5de2c29c113a55d57)
# DO NOT EDIT (digest: 4f32d3f650d22abadbd792d7ed9a300a)
Cache
Deque
Gen
@ -43,4 +43,5 @@ Levenshtein
Bidir
Iteratee
Ty
Tell
# OASIS_STOP

View file

@ -1,5 +1,5 @@
# OASIS_START
# DO NOT EDIT (digest: a57e9337d5ca2ca5de2c29c113a55d57)
# DO NOT EDIT (digest: 4f32d3f650d22abadbd792d7ed9a300a)
Cache
Deque
Gen
@ -43,4 +43,5 @@ Levenshtein
Bidir
Iteratee
Ty
Tell
# OASIS_STOP

View file

@ -1,7 +1,7 @@
(* setup.ml generated for the first time by OASIS v0.3.0 *)
(* OASIS_START *)
(* DO NOT EDIT (digest: 607ac444249450197cccc8d204000609) *)
(* DO NOT EDIT (digest: 64cad3d2ac0a699d2cce214751e56a41) *)
(*
Regenerated by OASIS v0.4.4
Visit http://oasis.forge.ocamlcore.org for more information and
@ -7001,7 +7001,8 @@ let setup_t =
"Levenshtein";
"Bidir";
"Iteratee";
"Ty"
"Ty";
"Tell"
];
lib_pack = true;
lib_internal_modules = [];
@ -7388,8 +7389,7 @@ let setup_t =
};
oasis_fn = Some "_oasis";
oasis_version = "0.4.4";
oasis_digest =
Some "\234\208\170\146/\020\006\020k\186\024\145\237\193\148\145";
oasis_digest = Some "\145(\131\246\004\000\023}\183\228*\141]B\1918";
oasis_exec = None;
oasis_setup_args = [];
setup_update = false

54
tell.ml Normal file
View file

@ -0,0 +1,54 @@
(*
copyright (c) 2014, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Hierarchic logging} *)
type t = {
name : string;
out : out_channel;
cleanup : bool;
mutable context : string list;
}
let __new_name =
let r = ref 0 in
fun () ->
let name = Printf.sprintf "Tell.log_%d" !r in
incr r;
name
let to_chan ?(cleanup=false) o = {
name = __new_name ();
out = o;
cleanup;
context = [];
}
let to_file filename =
let o = open_out filename in
to_chan ~cleanup:true o
let step

75
tell.mli Normal file
View file

@ -0,0 +1,75 @@
(*
copyright (c) 2014, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Hierarchic logging} *)
type t
val to_file : string -> t
(** Create a logger that outputs to the given file *)
val to_chan : ?cleanup:bool out_channel -> t
(** Obtain a logger that outputs to the given channel.
@param cleanup if true, will close the channel on exit;
if false or not explicited, won't do anything. *)
exception Error of string
(** {2 Raw functions} *)
val step : t -> string -> unit
val close : t -> unit
(** Close the logger. It will be unusable afterwards. *)
(** {2 Hierarchy} *)
val enter : t -> unit
(** Enter a new subsection *)
val exit : t -> unit
(** Exit the current subsection *)
val within : ?descr:string -> log:t -> (unit -> 'a) -> 'a
(** Enter a subsection named [descr], evaluate the given function,
exit the subsection and return the function's result.
Also protects against exceptions. *)
(** {2 Buffer-formatting output}
The following functions use a {!Buffer.t} to create the message,
then send it to their logger. *)
module B : sig
val enter : log:t -> ('a, Buffer.t, unit, unit) format4 -> 'a
(** Enter a new (sub-)section with the given message *)
val exit : log:t -> ('a, Buffer.t, unit, unit) format4 -> 'a
(** Exit (close) the current sub-section. *)
val step : log:t -> ('a, Buffer.t, unit, unit) format4 -> 'a
(** Unit step within the current section *)
end