mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 19:25:28 -05:00
add Containers_bencode
A small module to parse/print Bencode values. Bencode is really simple and can embed binary strings easily, unlike JSON.
This commit is contained in:
parent
e63383174e
commit
911e9faff7
5 changed files with 276 additions and 3 deletions
|
|
@ -17,7 +17,7 @@ depends: [
|
||||||
"seq" # compat
|
"seq" # compat
|
||||||
"either" # compat
|
"either" # compat
|
||||||
"qtest" { with-test }
|
"qtest" { with-test }
|
||||||
"qcheck" { with-test }
|
"qcheck" { >= "0.14" & with-test }
|
||||||
"ounit" { with-test }
|
"ounit" { with-test }
|
||||||
"iter" { with-test }
|
"iter" { with-test }
|
||||||
"gen" { with-test }
|
"gen" { with-test }
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
(rule
|
(rule
|
||||||
(targets run_qtest.ml)
|
(targets run_qtest.ml)
|
||||||
(deps ./make.exe (source_tree ../src))
|
(deps ./make.exe (source_tree ../src))
|
||||||
(action (run ./make.exe -target %{targets} ../src/core ../src/unix/)))
|
(action (run ./make.exe -target %{targets} ../src/core ../src/bencode ../src/unix/)))
|
||||||
|
|
||||||
(executable
|
(executable
|
||||||
(name run_qtest)
|
(name run_qtest)
|
||||||
|
|
@ -15,7 +15,7 @@
|
||||||
(modules run_qtest)
|
(modules run_qtest)
|
||||||
; disable some warnings in qtests
|
; disable some warnings in qtests
|
||||||
(flags :standard -warn-error -a -w -3-33-35-27-39-50)
|
(flags :standard -warn-error -a -w -3-33-35-27-39-50)
|
||||||
(libraries iter gen qcheck containers containers.unix unix uutf threads))
|
(libraries iter gen qcheck containers containers.unix containers.bencode unix uutf threads))
|
||||||
|
|
||||||
(rule
|
(rule
|
||||||
(alias runtest)
|
(alias runtest)
|
||||||
|
|
|
||||||
214
src/bencode/containers_bencode.ml
Normal file
214
src/bencode/containers_bencode.ml
Normal file
|
|
@ -0,0 +1,214 @@
|
||||||
|
module Str_map = Map.Make(String)
|
||||||
|
|
||||||
|
type t =
|
||||||
|
| Int of int64
|
||||||
|
| String of string
|
||||||
|
| List of t list
|
||||||
|
| Map of t Str_map.t
|
||||||
|
|
||||||
|
let rec equal t1 t2 = match t1, t2 with
|
||||||
|
| Int i1, Int i2 -> i1 = i2
|
||||||
|
| String s1, String s2 -> s1 = s2
|
||||||
|
| List l1, List l2 ->
|
||||||
|
(try List.for_all2 equal l1 l2 with Invalid_argument _ -> false)
|
||||||
|
| Map d1, Map d2 -> Str_map.equal equal d1 d2
|
||||||
|
| (Int _ | String _ | List _ | Map _), _ -> false
|
||||||
|
|
||||||
|
let rec hash t =
|
||||||
|
let module H = CCHash in
|
||||||
|
match t with
|
||||||
|
| Int i -> H.int64 i
|
||||||
|
| String s -> H.combine2 10 (H.string s)
|
||||||
|
| List l -> H.combine2 20 (H.list hash l)
|
||||||
|
| Map l ->
|
||||||
|
H.combine2 30
|
||||||
|
(H.iter (H.pair H.string hash) @@
|
||||||
|
(fun k -> Str_map.iter (fun x y -> k(x,y)) l))
|
||||||
|
|
||||||
|
let int64 i : t = Int i
|
||||||
|
let int i : t = int64 (Int64.of_int i)
|
||||||
|
let string s : t = String s
|
||||||
|
let list l : t = List l
|
||||||
|
let map m : t = Map m
|
||||||
|
let map_of_list l : t =
|
||||||
|
map @@ List.fold_left (fun m (k,v) -> Str_map.add k v m) Str_map.empty l
|
||||||
|
|
||||||
|
let rec pp_debug out (self:t) : unit =
|
||||||
|
let fpf = Format.fprintf in
|
||||||
|
match self with
|
||||||
|
| Int i -> fpf out "%Ld" i
|
||||||
|
| String s -> fpf out "%S" s
|
||||||
|
| List l ->
|
||||||
|
fpf out "[@[<hv>";
|
||||||
|
List.iteri (fun i v ->
|
||||||
|
if i>0 then fpf out ";@ ";
|
||||||
|
pp_debug out v) l;
|
||||||
|
fpf out "@]]"
|
||||||
|
| Map m ->
|
||||||
|
fpf out "{@[<hv>";
|
||||||
|
let i = ref 0 in
|
||||||
|
Str_map.iter (fun k v ->
|
||||||
|
if !i>0 then fpf out ";@ ";
|
||||||
|
incr i;
|
||||||
|
fpf out "@[<1>%S:@ %a@]" k pp_debug v) m;
|
||||||
|
fpf out "@]}"
|
||||||
|
|
||||||
|
let to_string_debug self = Format.asprintf "%a" pp_debug self
|
||||||
|
|
||||||
|
module Encode = struct
|
||||||
|
let bpf = Printf.bprintf
|
||||||
|
let fpf = Printf.fprintf
|
||||||
|
|
||||||
|
let rec to_buffer (buf:Buffer.t) (self:t) : unit =
|
||||||
|
let recurse = to_buffer buf in
|
||||||
|
let addc = Buffer.add_char in
|
||||||
|
match self with
|
||||||
|
| Int i -> bpf buf "i%Lde" i
|
||||||
|
| String s -> bpf buf "%d:%s" (String.length s) s
|
||||||
|
| List l -> addc buf 'l'; List.iter recurse l; addc buf 'e'
|
||||||
|
| Map l ->
|
||||||
|
addc buf 'd';
|
||||||
|
Str_map.iter (fun k v -> bpf buf "%d:%s%a" (String.length k) k to_buffer v) l;
|
||||||
|
addc buf 'e'
|
||||||
|
|
||||||
|
let to_string (self:t) : string =
|
||||||
|
let buf = Buffer.create 32 in
|
||||||
|
to_buffer buf self;
|
||||||
|
Buffer.contents buf
|
||||||
|
|
||||||
|
let rec to_chan (oc:out_channel) (self:t) : unit =
|
||||||
|
let recurse = to_chan oc in
|
||||||
|
let addc = output_char in
|
||||||
|
match self with
|
||||||
|
| Int i -> fpf oc "i%Lde" i
|
||||||
|
| String s -> fpf oc "%d:%s" (String.length s) s
|
||||||
|
| List l -> addc oc 'l'; List.iter recurse l; addc oc 'e'
|
||||||
|
| Map l ->
|
||||||
|
addc oc 'd';
|
||||||
|
Str_map.iter (fun k v -> fpf oc "%d:%s%a" (String.length k) k to_chan v) l;
|
||||||
|
addc oc 'e'
|
||||||
|
|
||||||
|
let to_fmt out self =
|
||||||
|
Format.pp_print_string out (to_string self)
|
||||||
|
end
|
||||||
|
|
||||||
|
module Decode = struct
|
||||||
|
exception Fail
|
||||||
|
|
||||||
|
let of_string s =
|
||||||
|
let i = ref 0 in
|
||||||
|
|
||||||
|
let[@inline] check_not_eof() =
|
||||||
|
if !i >= String.length s then raise_notrace Fail;
|
||||||
|
in
|
||||||
|
|
||||||
|
let rec top () : t =
|
||||||
|
check_not_eof ();
|
||||||
|
match String.unsafe_get s !i with
|
||||||
|
| 'l' ->
|
||||||
|
incr i;
|
||||||
|
read_list []
|
||||||
|
| 'd' ->
|
||||||
|
incr i;
|
||||||
|
read_map Str_map.empty
|
||||||
|
| 'i' -> incr i; let n = read_int 'e' true 0 in int n
|
||||||
|
| '0' .. '9' -> String (parse_str_len ())
|
||||||
|
| _ -> raise_notrace Fail
|
||||||
|
|
||||||
|
(* read integer until char [stop] is met, consume [stop], return int *)
|
||||||
|
and read_int stop sign n : int =
|
||||||
|
check_not_eof ();
|
||||||
|
match String.unsafe_get s !i with
|
||||||
|
| c when c == stop -> incr i; if sign then n else -n
|
||||||
|
| '-' when stop == 'e' && sign && n=0 ->
|
||||||
|
incr i; read_int stop false n
|
||||||
|
| '0' .. '9' as c ->
|
||||||
|
incr i; read_int stop sign (Char.code c - Char.code '0' + 10 * n)
|
||||||
|
| _ -> raise_notrace Fail
|
||||||
|
|
||||||
|
and parse_str_len () : string =
|
||||||
|
let n = read_int ':' true 0 in
|
||||||
|
if !i + n > String.length s then raise_notrace Fail;
|
||||||
|
let s = String.sub s !i n in
|
||||||
|
i := !i + n;
|
||||||
|
s
|
||||||
|
|
||||||
|
and read_list acc =
|
||||||
|
check_not_eof();
|
||||||
|
match String.unsafe_get s !i with
|
||||||
|
| 'e' -> incr i; List (List.rev acc)
|
||||||
|
| _ -> let x = top() in read_list (x::acc)
|
||||||
|
|
||||||
|
and read_map acc =
|
||||||
|
check_not_eof();
|
||||||
|
match String.unsafe_get s !i with
|
||||||
|
| 'e' -> incr i; Map acc
|
||||||
|
| _ ->
|
||||||
|
let k = parse_str_len () in
|
||||||
|
let v = top() in
|
||||||
|
read_map (Str_map.add k v acc)
|
||||||
|
in
|
||||||
|
|
||||||
|
try Some (top())
|
||||||
|
with Fail -> None
|
||||||
|
|
||||||
|
let of_string_exn s =
|
||||||
|
match of_string s with
|
||||||
|
| Some x -> x
|
||||||
|
| None -> failwith "bencode.decode: invalid string"
|
||||||
|
end
|
||||||
|
|
||||||
|
(*$= & ~printer:to_string_debug
|
||||||
|
(map_of_list []) (Decode.of_string_exn "de")
|
||||||
|
(list [int 1; int 2; string "foo"]) (Decode.of_string_exn "li1ei2e3:fooe")
|
||||||
|
*)
|
||||||
|
|
||||||
|
(*$inject
|
||||||
|
module B = Containers_bencode
|
||||||
|
|
||||||
|
let rec size = function
|
||||||
|
| Int _ | String _ -> 1
|
||||||
|
| List l -> List.fold_left (fun n x -> n + size x) 0 l
|
||||||
|
| Map m -> Str_map.fold(fun _ v n -> size v + n) m 0
|
||||||
|
|
||||||
|
let g_rand_b =
|
||||||
|
Q.Gen.(
|
||||||
|
sized_size (0--7) @@ fix @@ fun self n ->
|
||||||
|
let str n = string_size ~gen:char (0 -- n) in
|
||||||
|
let base = [
|
||||||
|
int >|= B.int;
|
||||||
|
str 100 >|= B.string;
|
||||||
|
] in
|
||||||
|
match n with
|
||||||
|
| 0 -> oneof base
|
||||||
|
| n ->
|
||||||
|
frequency @@
|
||||||
|
List.map (fun x -> 2, x) base @
|
||||||
|
[ 1, list_size (0 -- 10) (self (n-1)) >|= B.list;
|
||||||
|
1, list_size (0 -- 10) (pair (str 10) (self (n-1)) ) >|= B.map_of_list;
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
let rec shrink_b self = Q.(Iter.(
|
||||||
|
match self with
|
||||||
|
| Int i -> Shrink.int64 i >|= B.int64
|
||||||
|
| String s -> Shrink.string s >|= B.string
|
||||||
|
| List l -> Shrink.list ~shrink:shrink_b l >|= B.list
|
||||||
|
| Map l ->
|
||||||
|
let l = Str_map.fold (fun k v l -> (k,v) :: l) l [] in
|
||||||
|
Shrink.list ~shrink:(fun (k,v) ->
|
||||||
|
(Shrink.string k >|= fun k -> k,v) <+>
|
||||||
|
(shrink_b v >|= fun v -> k,v))
|
||||||
|
l
|
||||||
|
>|= B.map_of_list
|
||||||
|
))
|
||||||
|
|
||||||
|
let rand_b = Q.make ~print:to_string_debug ~stats:["size", size]
|
||||||
|
~shrink:shrink_b g_rand_b
|
||||||
|
*)
|
||||||
|
|
||||||
|
(*$Q
|
||||||
|
rand_b (fun b -> \
|
||||||
|
let s=Encode.to_string b in \
|
||||||
|
equal (Decode.of_string_exn s) b)
|
||||||
|
*)
|
||||||
54
src/bencode/containers_bencode.mli
Normal file
54
src/bencode/containers_bencode.mli
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
(** Basic Bencode decoder/encoder.
|
||||||
|
|
||||||
|
See https://en.wikipedia.org/wiki/Bencode .
|
||||||
|
|
||||||
|
@since NEXT_RELEASE *)
|
||||||
|
|
||||||
|
module Str_map : module type of Map.Make(String)
|
||||||
|
|
||||||
|
type t =
|
||||||
|
| Int of int64
|
||||||
|
| String of string
|
||||||
|
| List of t list
|
||||||
|
| Map of t Str_map.t
|
||||||
|
|
||||||
|
val equal : t -> t -> bool
|
||||||
|
|
||||||
|
val hash : t -> int
|
||||||
|
|
||||||
|
val pp_debug : Format.formatter -> t -> unit
|
||||||
|
(** Printer for diagnostic/human consumption *)
|
||||||
|
|
||||||
|
val to_string_debug : t -> string
|
||||||
|
|
||||||
|
val int : int -> t
|
||||||
|
|
||||||
|
val int64 : int64 -> t
|
||||||
|
|
||||||
|
val string : string -> t
|
||||||
|
|
||||||
|
val list : t list -> t
|
||||||
|
|
||||||
|
val map_of_list : (string * t) list -> t
|
||||||
|
|
||||||
|
val map : t Str_map.t -> t
|
||||||
|
|
||||||
|
(** Encoding *)
|
||||||
|
module Encode : sig
|
||||||
|
val to_string : t -> string
|
||||||
|
|
||||||
|
val to_buffer : Buffer.t -> t -> unit
|
||||||
|
|
||||||
|
val to_chan : out_channel -> t -> unit
|
||||||
|
|
||||||
|
val to_fmt : Format.formatter -> t -> unit
|
||||||
|
end
|
||||||
|
|
||||||
|
(** Decoding *)
|
||||||
|
module Decode : sig
|
||||||
|
val of_string : string -> t option
|
||||||
|
|
||||||
|
val of_string_exn : string -> t
|
||||||
|
(** Parse string.
|
||||||
|
@raise Failure if the string is not valid bencode. *)
|
||||||
|
end
|
||||||
5
src/bencode/dune
Normal file
5
src/bencode/dune
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
(library
|
||||||
|
(name containers_bencode)
|
||||||
|
(public_name containers.bencode)
|
||||||
|
(libraries containers)
|
||||||
|
(synopsis "Bencode codec for containers (the format for bittorrent files)"))
|
||||||
Loading…
Add table
Reference in a new issue