add Containers_bencode

A small module to parse/print Bencode values. Bencode is really simple
and can embed binary strings easily, unlike JSON.
This commit is contained in:
Simon Cruanes 2022-06-09 20:53:10 -04:00
parent e63383174e
commit 911e9faff7
No known key found for this signature in database
GPG key ID: EBFFF6F283F3A2B4
5 changed files with 276 additions and 3 deletions

View file

@ -17,7 +17,7 @@ depends: [
"seq" # compat
"either" # compat
"qtest" { with-test }
"qcheck" { with-test }
"qcheck" { >= "0.14" & with-test }
"ounit" { with-test }
"iter" { with-test }
"gen" { with-test }

View file

@ -7,7 +7,7 @@
(rule
(targets run_qtest.ml)
(deps ./make.exe (source_tree ../src))
(action (run ./make.exe -target %{targets} ../src/core ../src/unix/)))
(action (run ./make.exe -target %{targets} ../src/core ../src/bencode ../src/unix/)))
(executable
(name run_qtest)
@ -15,7 +15,7 @@
(modules run_qtest)
; disable some warnings in qtests
(flags :standard -warn-error -a -w -3-33-35-27-39-50)
(libraries iter gen qcheck containers containers.unix unix uutf threads))
(libraries iter gen qcheck containers containers.unix containers.bencode unix uutf threads))
(rule
(alias runtest)

View file

@ -0,0 +1,214 @@
module Str_map = Map.Make(String)
type t =
| Int of int64
| String of string
| List of t list
| Map of t Str_map.t
let rec equal t1 t2 = match t1, t2 with
| Int i1, Int i2 -> i1 = i2
| String s1, String s2 -> s1 = s2
| List l1, List l2 ->
(try List.for_all2 equal l1 l2 with Invalid_argument _ -> false)
| Map d1, Map d2 -> Str_map.equal equal d1 d2
| (Int _ | String _ | List _ | Map _), _ -> false
let rec hash t =
let module H = CCHash in
match t with
| Int i -> H.int64 i
| String s -> H.combine2 10 (H.string s)
| List l -> H.combine2 20 (H.list hash l)
| Map l ->
H.combine2 30
(H.iter (H.pair H.string hash) @@
(fun k -> Str_map.iter (fun x y -> k(x,y)) l))
let int64 i : t = Int i
let int i : t = int64 (Int64.of_int i)
let string s : t = String s
let list l : t = List l
let map m : t = Map m
let map_of_list l : t =
map @@ List.fold_left (fun m (k,v) -> Str_map.add k v m) Str_map.empty l
let rec pp_debug out (self:t) : unit =
let fpf = Format.fprintf in
match self with
| Int i -> fpf out "%Ld" i
| String s -> fpf out "%S" s
| List l ->
fpf out "[@[<hv>";
List.iteri (fun i v ->
if i>0 then fpf out ";@ ";
pp_debug out v) l;
fpf out "@]]"
| Map m ->
fpf out "{@[<hv>";
let i = ref 0 in
Str_map.iter (fun k v ->
if !i>0 then fpf out ";@ ";
incr i;
fpf out "@[<1>%S:@ %a@]" k pp_debug v) m;
fpf out "@]}"
let to_string_debug self = Format.asprintf "%a" pp_debug self
module Encode = struct
let bpf = Printf.bprintf
let fpf = Printf.fprintf
let rec to_buffer (buf:Buffer.t) (self:t) : unit =
let recurse = to_buffer buf in
let addc = Buffer.add_char in
match self with
| Int i -> bpf buf "i%Lde" i
| String s -> bpf buf "%d:%s" (String.length s) s
| List l -> addc buf 'l'; List.iter recurse l; addc buf 'e'
| Map l ->
addc buf 'd';
Str_map.iter (fun k v -> bpf buf "%d:%s%a" (String.length k) k to_buffer v) l;
addc buf 'e'
let to_string (self:t) : string =
let buf = Buffer.create 32 in
to_buffer buf self;
Buffer.contents buf
let rec to_chan (oc:out_channel) (self:t) : unit =
let recurse = to_chan oc in
let addc = output_char in
match self with
| Int i -> fpf oc "i%Lde" i
| String s -> fpf oc "%d:%s" (String.length s) s
| List l -> addc oc 'l'; List.iter recurse l; addc oc 'e'
| Map l ->
addc oc 'd';
Str_map.iter (fun k v -> fpf oc "%d:%s%a" (String.length k) k to_chan v) l;
addc oc 'e'
let to_fmt out self =
Format.pp_print_string out (to_string self)
end
module Decode = struct
exception Fail
let of_string s =
let i = ref 0 in
let[@inline] check_not_eof() =
if !i >= String.length s then raise_notrace Fail;
in
let rec top () : t =
check_not_eof ();
match String.unsafe_get s !i with
| 'l' ->
incr i;
read_list []
| 'd' ->
incr i;
read_map Str_map.empty
| 'i' -> incr i; let n = read_int 'e' true 0 in int n
| '0' .. '9' -> String (parse_str_len ())
| _ -> raise_notrace Fail
(* read integer until char [stop] is met, consume [stop], return int *)
and read_int stop sign n : int =
check_not_eof ();
match String.unsafe_get s !i with
| c when c == stop -> incr i; if sign then n else -n
| '-' when stop == 'e' && sign && n=0 ->
incr i; read_int stop false n
| '0' .. '9' as c ->
incr i; read_int stop sign (Char.code c - Char.code '0' + 10 * n)
| _ -> raise_notrace Fail
and parse_str_len () : string =
let n = read_int ':' true 0 in
if !i + n > String.length s then raise_notrace Fail;
let s = String.sub s !i n in
i := !i + n;
s
and read_list acc =
check_not_eof();
match String.unsafe_get s !i with
| 'e' -> incr i; List (List.rev acc)
| _ -> let x = top() in read_list (x::acc)
and read_map acc =
check_not_eof();
match String.unsafe_get s !i with
| 'e' -> incr i; Map acc
| _ ->
let k = parse_str_len () in
let v = top() in
read_map (Str_map.add k v acc)
in
try Some (top())
with Fail -> None
let of_string_exn s =
match of_string s with
| Some x -> x
| None -> failwith "bencode.decode: invalid string"
end
(*$= & ~printer:to_string_debug
(map_of_list []) (Decode.of_string_exn "de")
(list [int 1; int 2; string "foo"]) (Decode.of_string_exn "li1ei2e3:fooe")
*)
(*$inject
module B = Containers_bencode
let rec size = function
| Int _ | String _ -> 1
| List l -> List.fold_left (fun n x -> n + size x) 0 l
| Map m -> Str_map.fold(fun _ v n -> size v + n) m 0
let g_rand_b =
Q.Gen.(
sized_size (0--7) @@ fix @@ fun self n ->
let str n = string_size ~gen:char (0 -- n) in
let base = [
int >|= B.int;
str 100 >|= B.string;
] in
match n with
| 0 -> oneof base
| n ->
frequency @@
List.map (fun x -> 2, x) base @
[ 1, list_size (0 -- 10) (self (n-1)) >|= B.list;
1, list_size (0 -- 10) (pair (str 10) (self (n-1)) ) >|= B.map_of_list;
]
)
let rec shrink_b self = Q.(Iter.(
match self with
| Int i -> Shrink.int64 i >|= B.int64
| String s -> Shrink.string s >|= B.string
| List l -> Shrink.list ~shrink:shrink_b l >|= B.list
| Map l ->
let l = Str_map.fold (fun k v l -> (k,v) :: l) l [] in
Shrink.list ~shrink:(fun (k,v) ->
(Shrink.string k >|= fun k -> k,v) <+>
(shrink_b v >|= fun v -> k,v))
l
>|= B.map_of_list
))
let rand_b = Q.make ~print:to_string_debug ~stats:["size", size]
~shrink:shrink_b g_rand_b
*)
(*$Q
rand_b (fun b -> \
let s=Encode.to_string b in \
equal (Decode.of_string_exn s) b)
*)

View file

@ -0,0 +1,54 @@
(** Basic Bencode decoder/encoder.
See https://en.wikipedia.org/wiki/Bencode .
@since NEXT_RELEASE *)
module Str_map : module type of Map.Make(String)
type t =
| Int of int64
| String of string
| List of t list
| Map of t Str_map.t
val equal : t -> t -> bool
val hash : t -> int
val pp_debug : Format.formatter -> t -> unit
(** Printer for diagnostic/human consumption *)
val to_string_debug : t -> string
val int : int -> t
val int64 : int64 -> t
val string : string -> t
val list : t list -> t
val map_of_list : (string * t) list -> t
val map : t Str_map.t -> t
(** Encoding *)
module Encode : sig
val to_string : t -> string
val to_buffer : Buffer.t -> t -> unit
val to_chan : out_channel -> t -> unit
val to_fmt : Format.formatter -> t -> unit
end
(** Decoding *)
module Decode : sig
val of_string : string -> t option
val of_string_exn : string -> t
(** Parse string.
@raise Failure if the string is not valid bencode. *)
end

5
src/bencode/dune Normal file
View file

@ -0,0 +1,5 @@
(library
(name containers_bencode)
(public_name containers.bencode)
(libraries containers)
(synopsis "Bencode codec for containers (the format for bittorrent files)"))