mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-05 19:00:31 -05:00
Bencode module, for the eponym encoding format
This commit is contained in:
parent
d9c8007548
commit
689908f9b9
3 changed files with 407 additions and 0 deletions
269
bencode.ml
Normal file
269
bencode.ml
Normal file
|
|
@ -0,0 +1,269 @@
|
|||
(*
|
||||
Copyright (c) 2013, Simon Cruanes
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. Redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
(** {6 B-encoding} *)
|
||||
|
||||
|
||||
module SMap = Map.Make(String)
|
||||
|
||||
type t =
|
||||
| I of int
|
||||
| S of string
|
||||
| L of t list
|
||||
| D of t SMap.t
|
||||
|
||||
let rec eq t1 t2 = match t1, t2 with
|
||||
| I i1, I i2 -> i1 = i2
|
||||
| S s1, S s2 -> s1 = s2
|
||||
| L l1, L l2 ->
|
||||
(try List.for_all2 eq l1 l2 with Invalid_argument _ -> false)
|
||||
| D d1, D d2 ->
|
||||
SMap.equal eq d1 d2
|
||||
| _ -> false
|
||||
|
||||
let hash t = Hashtbl.hash t
|
||||
|
||||
let dict_of_list l =
|
||||
let d = List.fold_left
|
||||
(fun d (k, v) -> SMap.add k v d)
|
||||
SMap.empty l
|
||||
in
|
||||
D d
|
||||
|
||||
(** {2 Serialization (encoding)} *)
|
||||
|
||||
let rec to_buf buf t = match t with
|
||||
| I i -> Printf.bprintf buf "i%de" i
|
||||
| S s ->
|
||||
Printf.bprintf buf "%d:" (String.length s);
|
||||
Buffer.add_string buf s
|
||||
| L l ->
|
||||
Buffer.add_char buf 'l';
|
||||
List.iter (fun t' -> to_buf buf t') l;
|
||||
Buffer.add_char buf 'e'
|
||||
| D m ->
|
||||
Buffer.add_char buf 'd';
|
||||
SMap.iter (fun key t' -> to_buf buf (S key); to_buf buf t') m;
|
||||
Buffer.add_char buf 'e'
|
||||
|
||||
let to_string t =
|
||||
let b = Buffer.create 25 in
|
||||
to_buf b t;
|
||||
Buffer.contents b
|
||||
|
||||
let to_chan ch t =
|
||||
let b = Buffer.create 25 in
|
||||
to_buf b t;
|
||||
Buffer.output_buffer ch b
|
||||
|
||||
let fmt formatter t =
|
||||
let b = Buffer.create 25 in
|
||||
to_buf b t;
|
||||
Format.pp_print_string formatter (Buffer.contents b)
|
||||
|
||||
(** {2 Deserialization (decoding)} *)
|
||||
|
||||
(** Deserialization is based on the {! decoder} type. Parsing can be
|
||||
incremental, in which case the input is provided chunk by chunk and
|
||||
the decoder contains the parsing state. Once a B-encoded value
|
||||
has been parsed, other values can still be read. *)
|
||||
|
||||
type decoder = {
|
||||
mutable buf : string; (* buffer *)
|
||||
mutable i : int; (* index in buf *)
|
||||
mutable len : int; (* length of substring to read *)
|
||||
mutable c : int; (* line *)
|
||||
mutable l : int; (* column *)
|
||||
mutable state : parse_result;
|
||||
mutable stack : partial_state list;
|
||||
}
|
||||
|
||||
(** Result of parsing *)
|
||||
and parse_result =
|
||||
| ParseOk of t
|
||||
| ParseError of string
|
||||
| ParsePartial
|
||||
|
||||
(** Partial state of the parser *)
|
||||
and partial_state =
|
||||
| PS_I of bool * int (* sign and integer *)
|
||||
| PS_S of int ref * string (* index in string, plus string *)
|
||||
| PS_L of t list
|
||||
| PS_D of t SMap.t (* in dictionary *)
|
||||
| PS_D_key of string * t SMap.t (* parsed key, wait for value *)
|
||||
| PS_return of t (* bottom of stack *)
|
||||
| PS_error of string (* error *)
|
||||
|
||||
let mk_decoder () =
|
||||
let dec = {
|
||||
buf = "";
|
||||
i = 0;
|
||||
len = 0;
|
||||
c = 0;
|
||||
l = 0;
|
||||
state = ParsePartial;
|
||||
stack = [];
|
||||
} in
|
||||
dec
|
||||
|
||||
let is_empty dec = dec.len = 0
|
||||
let cur dec = dec.buf.[dec.i]
|
||||
|
||||
let junk dec =
|
||||
(* update line/column *)
|
||||
(if cur dec = '\n'
|
||||
then (dec.c <- 0; dec.l <- dec.l + 1)
|
||||
else dec.c <- dec.c + 1);
|
||||
dec.i <- dec.i + 1;
|
||||
dec.len <- dec.len - 1
|
||||
|
||||
let next dec =
|
||||
let c = cur dec in
|
||||
junk dec;
|
||||
c
|
||||
|
||||
(* parse value *)
|
||||
let rec parse_rec dec =
|
||||
match dec.stack with
|
||||
| [PS_return v] -> (* return value *)
|
||||
dec.stack <- [];
|
||||
dec.state <- ParseOk v;
|
||||
dec.state
|
||||
| [PS_error s] -> (* failure *)
|
||||
dec.stack <- [];
|
||||
dec.state <- ParseError s;
|
||||
dec.state
|
||||
| _ ->
|
||||
if is_empty dec then ParsePartial (* wait *)
|
||||
else begin
|
||||
let c = next dec in
|
||||
(match dec.stack, c with
|
||||
| (PS_I (sign, i)) :: stack, '0' .. '9' ->
|
||||
dec.stack <- PS_I (sign, (Char.code c - Char.code '0') + 10 * i) :: stack;
|
||||
| (PS_I (_, 0)) :: stack, '-' ->
|
||||
dec.stack <- PS_I (false, 0) :: stack (* negative number *)
|
||||
| (PS_I (sign, i)) :: stack, 'e' ->
|
||||
dec.stack <- stack;
|
||||
push_value dec (I (if sign then i else ~- i))
|
||||
| ((PS_D _ | PS_D_key _ | PS_L _) :: _ | []), '0' .. '9' ->
|
||||
(* initial length of string *)
|
||||
dec.stack <- (PS_I (true, Char.code c - Char.code '0')) :: dec.stack
|
||||
| (PS_I (sign, i)) :: stack, ':' ->
|
||||
if i < 0
|
||||
then error dec "string length cannot be negative"
|
||||
else if i = 0 then (* empty string *)
|
||||
let _ = dec.stack <- stack in
|
||||
push_value dec (S "")
|
||||
else (* prepare to parse a string *)
|
||||
dec.stack <- (PS_S (ref 0, String.create i)) :: stack;
|
||||
| (PS_S (n, s)) :: stack, _ ->
|
||||
s.[!n] <- c;
|
||||
incr n;
|
||||
(* value completed *)
|
||||
(if !n = String.length s
|
||||
then
|
||||
let _ = dec.stack <- stack in
|
||||
push_value dec (S s));
|
||||
| stack, 'i' ->
|
||||
dec.stack <- (PS_I (true, 0)) :: stack
|
||||
| stack, 'l' ->
|
||||
dec.stack <- PS_L [] :: stack;
|
||||
| stack, 'd' ->
|
||||
dec.stack <- PS_D SMap.empty :: stack
|
||||
| (PS_L l) :: stack, 'e' -> (* end of list *)
|
||||
dec.stack <- stack;
|
||||
push_value dec (L (List.rev l))
|
||||
| (PS_D d) :: stack, 'e' -> (* end of dict *)
|
||||
dec.stack <- stack;
|
||||
push_value dec (D d)
|
||||
| (PS_D_key _) :: _, 'e' -> (* error *)
|
||||
error dec "missing value in dict"
|
||||
| _ -> (* generic error *)
|
||||
error dec (Printf.sprintf "expected value, got %c" c));
|
||||
parse_rec dec
|
||||
end
|
||||
(* When a value is parsed, push it on the stack (possibly collapsing it) *)
|
||||
and push_value dec v =
|
||||
match v, dec.stack with
|
||||
| _, [] ->
|
||||
dec.stack <- [PS_return v] (* finished *)
|
||||
| _, (PS_L l) :: stack ->
|
||||
(* add to list *)
|
||||
dec.stack <- (PS_L (v :: l)) :: stack;
|
||||
| S key, ((PS_D d) :: stack) ->
|
||||
(* new key for the map *)
|
||||
dec.stack <- (PS_D_key (key, d)) :: stack;
|
||||
| _, ((PS_D d) :: _) ->
|
||||
(* error: key must be string *)
|
||||
error dec "dict keys must be strings"
|
||||
| _, (PS_D_key (key, d)) :: stack ->
|
||||
(* new binding for the map *)
|
||||
dec.stack <- (PS_D (SMap.add key v d)) :: stack;
|
||||
| _ -> assert false
|
||||
(* signal error *)
|
||||
and error dec msg =
|
||||
let msg = Printf.sprintf "Bencode: error at line %d, column %d: %s"
|
||||
dec.l dec.c msg in
|
||||
dec.stack <- [PS_error msg]
|
||||
|
||||
(* exported parse function *)
|
||||
let parse dec s i len =
|
||||
(if i < 0 || i+len > String.length s
|
||||
then invalid_arg "Bencode.parse: not a valid substring");
|
||||
(* add the input to [dec] *)
|
||||
if dec.len = 0
|
||||
then begin
|
||||
dec.buf <- s;
|
||||
dec.i <- i;
|
||||
dec.len <- len;
|
||||
end else begin
|
||||
(* use a buffer to merge the stored input and the new input *)
|
||||
let b = Buffer.create (dec.len + len) in
|
||||
Buffer.add_substring b dec.buf dec.i dec.len;
|
||||
Buffer.add_substring b s i len;
|
||||
dec.buf <- Buffer.contents b;
|
||||
dec.i <- 0;
|
||||
dec.len <- dec.len + len;
|
||||
end;
|
||||
(* state machine *)
|
||||
parse_rec dec
|
||||
|
||||
let state dec = dec.state
|
||||
|
||||
let rest dec =
|
||||
String.sub dec.buf dec.i dec.len
|
||||
|
||||
let rest_size dec =
|
||||
dec.len
|
||||
|
||||
let parse_string s =
|
||||
let dec = mk_decoder () in
|
||||
parse dec s 0 (String.length s)
|
||||
|
||||
let of_string s =
|
||||
match parse_string s with
|
||||
| ParseOk t -> t
|
||||
| ParsePartial -> invalid_arg "Bencode: partial parse"
|
||||
| ParseError msg -> invalid_arg msg
|
||||
92
bencode.mli
Normal file
92
bencode.mli
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
(*
|
||||
Copyright (c) 2013, Simon Cruanes
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. Redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
(** {6 B-encoding} *)
|
||||
|
||||
(** This implements encoding and decoding using the {i B-encode} format.
|
||||
See {{: http://en.wikipedia.org/wiki/Bencode} wikipedia} for more details
|
||||
*)
|
||||
|
||||
module SMap : Map.S with type key = string
|
||||
|
||||
type t =
|
||||
| I of int
|
||||
| S of string
|
||||
| L of t list
|
||||
| D of t SMap.t
|
||||
|
||||
val eq : t -> t -> bool
|
||||
val hash : t -> int
|
||||
|
||||
val dict_of_list : (string * t) list -> t
|
||||
|
||||
(** {2 Serialization (encoding)} *)
|
||||
|
||||
val to_buf : Buffer.t -> t -> unit
|
||||
val to_string : t -> string
|
||||
val to_chan : out_channel -> t -> unit
|
||||
val fmt : Format.formatter -> t -> unit
|
||||
|
||||
(** {2 Deserialization (decoding)} *)
|
||||
|
||||
(** Deserialization is based on the {! decoder} type. Parsing can be
|
||||
incremental, in which case the input is provided chunk by chunk and
|
||||
the decoder contains the parsing state. Once a B-encoded value
|
||||
has been parsed, other values can still be read.
|
||||
|
||||
This implementation does accept leading zeros, because it simplifies
|
||||
the code. *)
|
||||
|
||||
type decoder
|
||||
(** Decoding state *)
|
||||
|
||||
val mk_decoder : unit -> decoder
|
||||
(** Create a new decoder *)
|
||||
|
||||
type parse_result =
|
||||
| ParseOk of t
|
||||
| ParseError of string
|
||||
| ParsePartial
|
||||
|
||||
val parse : decoder -> string -> int -> int -> parse_result
|
||||
(** [parse dec s i len] uses the partial state stored in [dec] and
|
||||
the substring of [s] starting at index [i] with length [len].
|
||||
It can return an error, a value or just [ParsePartial] if
|
||||
more input is needed *)
|
||||
|
||||
val state : decoder -> parse_result
|
||||
(** Current state of the decoder *)
|
||||
|
||||
val rest : decoder -> string
|
||||
(** What remains after parsing (the additional, unused input) *)
|
||||
|
||||
val rest_size : decoder -> int
|
||||
(** Length of [rest d]. 0 indicates that the whole input has been consumed. *)
|
||||
|
||||
val parse_string : string -> parse_result
|
||||
(** Parse a full value from this string. *)
|
||||
|
||||
val of_string : string -> t
|
||||
(** Parse the string. @raise Invalid_argument if it fails to parse. *)
|
||||
46
tests/test_bencode.ml
Normal file
46
tests/test_bencode.ml
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
|
||||
open OUnit
|
||||
|
||||
module B = Bencode
|
||||
|
||||
let test1 () =
|
||||
let s = "li42ei0ei-200ee" in
|
||||
match B.parse_string s with
|
||||
| B.ParseError msg ->
|
||||
OUnit.assert_failure (Printf.sprintf "should parse, got %s" msg)
|
||||
| B.ParsePartial ->
|
||||
OUnit.assert_failure "should parse, got partial"
|
||||
| B.ParseOk b ->
|
||||
OUnit.assert_equal (B.L [B.I 42; B.I 0; B.I ~-200]) b
|
||||
|
||||
let test2 () =
|
||||
let b =
|
||||
B.dict_of_list [
|
||||
"foo", B.I 42;
|
||||
"bar", B.L [B.I 0; B.S "caramba si"];
|
||||
"", B.S "";
|
||||
]
|
||||
in
|
||||
let s = B.to_string b in
|
||||
(* Printf.printf "serialized to %s\n" s; *)
|
||||
let b' = B.of_string s in
|
||||
OUnit.assert_equal ~cmp:B.eq ~printer:B.to_string b b'
|
||||
|
||||
let test3 () =
|
||||
let b = B.dict_of_list [
|
||||
"a", B.I 1;
|
||||
"b", B.S "bbbb";
|
||||
"l", B.L [B.I 0; B.I 0; B.S "zero\n\t \x00"];
|
||||
"d", B.dict_of_list ["foo", B.S "bar"];
|
||||
] in
|
||||
let s = B.to_string b in
|
||||
(* Printf.printf "serialized to %s\n" s; *)
|
||||
let b' = B.of_string s in
|
||||
OUnit.assert_equal ~cmp:B.eq ~printer:B.to_string b b'
|
||||
|
||||
let suite =
|
||||
"test_bencode" >:::
|
||||
[ "test1" >:: test1;
|
||||
"test2" >:: test2;
|
||||
"test3" >:: test3;
|
||||
]
|
||||
Loading…
Add table
Reference in a new issue