From 911e9faff7a2c658b4e8796461b0c263d16f2ecf Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 9 Jun 2022 20:53:10 -0400 Subject: [PATCH] add Containers_bencode A small module to parse/print Bencode values. Bencode is really simple and can embed binary strings easily, unlike JSON. --- containers.opam | 2 +- qtest/dune | 4 +- src/bencode/containers_bencode.ml | 214 +++++++++++++++++++++++++++++ src/bencode/containers_bencode.mli | 54 ++++++++ src/bencode/dune | 5 + 5 files changed, 276 insertions(+), 3 deletions(-) create mode 100644 src/bencode/containers_bencode.ml create mode 100644 src/bencode/containers_bencode.mli create mode 100644 src/bencode/dune diff --git a/containers.opam b/containers.opam index a0cb951a..333d4a28 100644 --- a/containers.opam +++ b/containers.opam @@ -17,7 +17,7 @@ depends: [ "seq" # compat "either" # compat "qtest" { with-test } - "qcheck" { with-test } + "qcheck" { >= "0.14" & with-test } "ounit" { with-test } "iter" { with-test } "gen" { with-test } diff --git a/qtest/dune b/qtest/dune index f55c6a09..b74244da 100644 --- a/qtest/dune +++ b/qtest/dune @@ -7,7 +7,7 @@ (rule (targets run_qtest.ml) (deps ./make.exe (source_tree ../src)) - (action (run ./make.exe -target %{targets} ../src/core ../src/unix/))) + (action (run ./make.exe -target %{targets} ../src/core ../src/bencode ../src/unix/))) (executable (name run_qtest) @@ -15,7 +15,7 @@ (modules run_qtest) ; disable some warnings in qtests (flags :standard -warn-error -a -w -3-33-35-27-39-50) - (libraries iter gen qcheck containers containers.unix unix uutf threads)) + (libraries iter gen qcheck containers containers.unix containers.bencode unix uutf threads)) (rule (alias runtest) diff --git a/src/bencode/containers_bencode.ml b/src/bencode/containers_bencode.ml new file mode 100644 index 00000000..1ec9f692 --- /dev/null +++ b/src/bencode/containers_bencode.ml @@ -0,0 +1,214 @@ +module Str_map = Map.Make(String) + +type t = + | Int of int64 + | String of string + | List of t list + | Map of t Str_map.t + +let rec equal t1 t2 = match t1, t2 with + | Int i1, Int i2 -> i1 = i2 + | String s1, String s2 -> s1 = s2 + | List l1, List l2 -> + (try List.for_all2 equal l1 l2 with Invalid_argument _ -> false) + | Map d1, Map d2 -> Str_map.equal equal d1 d2 + | (Int _ | String _ | List _ | Map _), _ -> false + +let rec hash t = + let module H = CCHash in + match t with + | Int i -> H.int64 i + | String s -> H.combine2 10 (H.string s) + | List l -> H.combine2 20 (H.list hash l) + | Map l -> + H.combine2 30 + (H.iter (H.pair H.string hash) @@ + (fun k -> Str_map.iter (fun x y -> k(x,y)) l)) + +let int64 i : t = Int i +let int i : t = int64 (Int64.of_int i) +let string s : t = String s +let list l : t = List l +let map m : t = Map m +let map_of_list l : t = + map @@ List.fold_left (fun m (k,v) -> Str_map.add k v m) Str_map.empty l + +let rec pp_debug out (self:t) : unit = + let fpf = Format.fprintf in + match self with + | Int i -> fpf out "%Ld" i + | String s -> fpf out "%S" s + | List l -> + fpf out "[@["; + List.iteri (fun i v -> + if i>0 then fpf out ";@ "; + pp_debug out v) l; + fpf out "@]]" + | Map m -> + fpf out "{@["; + let i = ref 0 in + Str_map.iter (fun k v -> + if !i>0 then fpf out ";@ "; + incr i; + fpf out "@[<1>%S:@ %a@]" k pp_debug v) m; + fpf out "@]}" + +let to_string_debug self = Format.asprintf "%a" pp_debug self + +module Encode = struct + let bpf = Printf.bprintf + let fpf = Printf.fprintf + + let rec to_buffer (buf:Buffer.t) (self:t) : unit = + let recurse = to_buffer buf in + let addc = Buffer.add_char in + match self with + | Int i -> bpf buf "i%Lde" i + | String s -> bpf buf "%d:%s" (String.length s) s + | List l -> addc buf 'l'; List.iter recurse l; addc buf 'e' + | Map l -> + addc buf 'd'; + Str_map.iter (fun k v -> bpf buf "%d:%s%a" (String.length k) k to_buffer v) l; + addc buf 'e' + + let to_string (self:t) : string = + let buf = Buffer.create 32 in + to_buffer buf self; + Buffer.contents buf + + let rec to_chan (oc:out_channel) (self:t) : unit = + let recurse = to_chan oc in + let addc = output_char in + match self with + | Int i -> fpf oc "i%Lde" i + | String s -> fpf oc "%d:%s" (String.length s) s + | List l -> addc oc 'l'; List.iter recurse l; addc oc 'e' + | Map l -> + addc oc 'd'; + Str_map.iter (fun k v -> fpf oc "%d:%s%a" (String.length k) k to_chan v) l; + addc oc 'e' + + let to_fmt out self = + Format.pp_print_string out (to_string self) +end + +module Decode = struct + exception Fail + + let of_string s = + let i = ref 0 in + + let[@inline] check_not_eof() = + if !i >= String.length s then raise_notrace Fail; + in + + let rec top () : t = + check_not_eof (); + match String.unsafe_get s !i with + | 'l' -> + incr i; + read_list [] + | 'd' -> + incr i; + read_map Str_map.empty + | 'i' -> incr i; let n = read_int 'e' true 0 in int n + | '0' .. '9' -> String (parse_str_len ()) + | _ -> raise_notrace Fail + + (* read integer until char [stop] is met, consume [stop], return int *) + and read_int stop sign n : int = + check_not_eof (); + match String.unsafe_get s !i with + | c when c == stop -> incr i; if sign then n else -n + | '-' when stop == 'e' && sign && n=0 -> + incr i; read_int stop false n + | '0' .. '9' as c -> + incr i; read_int stop sign (Char.code c - Char.code '0' + 10 * n) + | _ -> raise_notrace Fail + + and parse_str_len () : string = + let n = read_int ':' true 0 in + if !i + n > String.length s then raise_notrace Fail; + let s = String.sub s !i n in + i := !i + n; + s + + and read_list acc = + check_not_eof(); + match String.unsafe_get s !i with + | 'e' -> incr i; List (List.rev acc) + | _ -> let x = top() in read_list (x::acc) + + and read_map acc = + check_not_eof(); + match String.unsafe_get s !i with + | 'e' -> incr i; Map acc + | _ -> + let k = parse_str_len () in + let v = top() in + read_map (Str_map.add k v acc) + in + + try Some (top()) + with Fail -> None + + let of_string_exn s = + match of_string s with + | Some x -> x + | None -> failwith "bencode.decode: invalid string" +end + +(*$= & ~printer:to_string_debug + (map_of_list []) (Decode.of_string_exn "de") + (list [int 1; int 2; string "foo"]) (Decode.of_string_exn "li1ei2e3:fooe") +*) + +(*$inject + module B = Containers_bencode + + let rec size = function + | Int _ | String _ -> 1 + | List l -> List.fold_left (fun n x -> n + size x) 0 l + | Map m -> Str_map.fold(fun _ v n -> size v + n) m 0 + + let g_rand_b = + Q.Gen.( + sized_size (0--7) @@ fix @@ fun self n -> + let str n = string_size ~gen:char (0 -- n) in + let base = [ + int >|= B.int; + str 100 >|= B.string; + ] in + match n with + | 0 -> oneof base + | n -> + frequency @@ + List.map (fun x -> 2, x) base @ + [ 1, list_size (0 -- 10) (self (n-1)) >|= B.list; + 1, list_size (0 -- 10) (pair (str 10) (self (n-1)) ) >|= B.map_of_list; + ] + ) + + let rec shrink_b self = Q.(Iter.( + match self with + | Int i -> Shrink.int64 i >|= B.int64 + | String s -> Shrink.string s >|= B.string + | List l -> Shrink.list ~shrink:shrink_b l >|= B.list + | Map l -> + let l = Str_map.fold (fun k v l -> (k,v) :: l) l [] in + Shrink.list ~shrink:(fun (k,v) -> + (Shrink.string k >|= fun k -> k,v) <+> + (shrink_b v >|= fun v -> k,v)) + l + >|= B.map_of_list + )) + + let rand_b = Q.make ~print:to_string_debug ~stats:["size", size] + ~shrink:shrink_b g_rand_b + *) + +(*$Q + rand_b (fun b -> \ + let s=Encode.to_string b in \ + equal (Decode.of_string_exn s) b) +*) diff --git a/src/bencode/containers_bencode.mli b/src/bencode/containers_bencode.mli new file mode 100644 index 00000000..ac55c02a --- /dev/null +++ b/src/bencode/containers_bencode.mli @@ -0,0 +1,54 @@ +(** Basic Bencode decoder/encoder. + + See https://en.wikipedia.org/wiki/Bencode . + + @since NEXT_RELEASE *) + +module Str_map : module type of Map.Make(String) + +type t = + | Int of int64 + | String of string + | List of t list + | Map of t Str_map.t + +val equal : t -> t -> bool + +val hash : t -> int + +val pp_debug : Format.formatter -> t -> unit +(** Printer for diagnostic/human consumption *) + +val to_string_debug : t -> string + +val int : int -> t + +val int64 : int64 -> t + +val string : string -> t + +val list : t list -> t + +val map_of_list : (string * t) list -> t + +val map : t Str_map.t -> t + +(** Encoding *) +module Encode : sig + val to_string : t -> string + + val to_buffer : Buffer.t -> t -> unit + + val to_chan : out_channel -> t -> unit + + val to_fmt : Format.formatter -> t -> unit +end + +(** Decoding *) +module Decode : sig + val of_string : string -> t option + + val of_string_exn : string -> t + (** Parse string. + @raise Failure if the string is not valid bencode. *) +end diff --git a/src/bencode/dune b/src/bencode/dune new file mode 100644 index 00000000..6be34f3f --- /dev/null +++ b/src/bencode/dune @@ -0,0 +1,5 @@ +(library + (name containers_bencode) + (public_name containers.bencode) + (libraries containers) + (synopsis "Bencode codec for containers (the format for bittorrent files)"))