mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 11:15:31 -05:00
much simpler Bij module:removed the generic encoding/decoding
stuff, replaced by translation to/from Bencode; switch now based on strings rather than chars to choose branch; fix uses a lazy value rather than a function
This commit is contained in:
parent
ec066c5386
commit
e1064845a7
4 changed files with 145 additions and 515 deletions
|
|
@ -5,7 +5,7 @@
|
||||||
#load "containers.cma";;
|
#load "containers.cma";;
|
||||||
#require "threads";;
|
#require "threads";;
|
||||||
#load "thread_containers.cma";;
|
#load "thread_containers.cma";;
|
||||||
open Gen.Infix;;
|
open Sequence.Infix;;
|
||||||
#install_printer Bencode.pretty;;
|
#install_printer Bencode.pretty;;
|
||||||
(* vim:syntax=ocaml:
|
(* vim:syntax=ocaml:
|
||||||
*)
|
*)
|
||||||
|
|
|
||||||
477
bij.ml
477
bij.ml
|
|
@ -40,7 +40,8 @@ type _ t =
|
||||||
| Quint : 'a t * 'b t * 'c t * 'd t * 'e t -> ('a * 'b * 'c * 'd * 'e) t
|
| Quint : 'a t * 'b t * 'c t * 'd t * 'e t -> ('a * 'b * 'c * 'd * 'e) t
|
||||||
| Guard : ('a -> bool) * 'a t -> 'a t
|
| Guard : ('a -> bool) * 'a t -> 'a t
|
||||||
| Map : ('a -> 'b) * ('b -> 'a) * 'b t -> 'a t
|
| Map : ('a -> 'b) * ('b -> 'a) * 'b t -> 'a t
|
||||||
| Switch : ('a -> char * 'a inject_branch) * (char -> 'a extract_branch) -> 'a t
|
| Switch : ('a -> string * 'a inject_branch) *
|
||||||
|
(string-> 'a extract_branch) -> 'a t
|
||||||
and _ inject_branch =
|
and _ inject_branch =
|
||||||
| BranchTo : 'b t * 'b -> 'a inject_branch
|
| BranchTo : 'b t * 'b -> 'a inject_branch
|
||||||
and _ extract_branch =
|
and _ extract_branch =
|
||||||
|
|
@ -69,10 +70,8 @@ let switch ~inject ~extract = Switch (inject, extract)
|
||||||
|
|
||||||
(** {2 Exceptions} *)
|
(** {2 Exceptions} *)
|
||||||
|
|
||||||
exception EOF
|
|
||||||
|
|
||||||
exception EncodingError of string
|
exception EncodingError of string
|
||||||
(** Raised when decoding is impossible *)
|
(** Raised when encoding is impossible *)
|
||||||
|
|
||||||
exception DecodingError of string
|
exception DecodingError of string
|
||||||
(** Raised when decoding is impossible *)
|
(** Raised when decoding is impossible *)
|
||||||
|
|
@ -80,7 +79,7 @@ exception DecodingError of string
|
||||||
(** {2 Helpers} *)
|
(** {2 Helpers} *)
|
||||||
|
|
||||||
let fix f =
|
let fix f =
|
||||||
let rec bij = lazy (f (fun () -> Lazy.force bij)) in
|
let rec bij = lazy (f bij) in
|
||||||
Lazy.force bij
|
Lazy.force bij
|
||||||
|
|
||||||
let with_version v t =
|
let with_version v t =
|
||||||
|
|
@ -107,389 +106,103 @@ let hashtbl ma mb =
|
||||||
h)
|
h)
|
||||||
(list_ (pair ma mb))
|
(list_ (pair ma mb))
|
||||||
|
|
||||||
(** {2 Source of parsing} *)
|
(** {2 Translations} *)
|
||||||
|
|
||||||
module type SOURCE = sig
|
module TrBencode = struct
|
||||||
type t
|
module B = Bencode
|
||||||
|
|
||||||
val eof : t -> bool
|
let rec encode: type a. bij:a t -> a -> B.t =
|
||||||
(** End of input reached? *)
|
fun ~bij x -> match bij, x with
|
||||||
|
| Unit, () -> B.I 0
|
||||||
|
| String, s -> B.S s
|
||||||
|
| Int, i -> B.I i
|
||||||
|
| Float, f -> B.S (string_of_float f)
|
||||||
|
| Bool, b -> B.I (if b then 1 else 0)
|
||||||
|
| List bij', l ->
|
||||||
|
let l' = List.map (fun x -> encode ~bij:bij' x) l in
|
||||||
|
B.L l'
|
||||||
|
| Many bij', [] -> raise (EncodingError "many: got empty list")
|
||||||
|
| Many bij', l ->
|
||||||
|
let l' = List.map (fun x -> encode ~bij:bij' x) l in
|
||||||
|
B.L l'
|
||||||
|
| Opt bij', None -> B.L []
|
||||||
|
| Opt bij', Some x -> B.L [encode ~bij:bij' x]
|
||||||
|
| Pair (bija, bijb), (a, b) ->
|
||||||
|
B.L [encode ~bij:bija a; encode ~bij:bijb b]
|
||||||
|
| Triple (bija, bijb, bijc), (a, b, c) ->
|
||||||
|
B.L [encode ~bij:bija a; encode ~bij:bijb b; encode ~bij:bijc c]
|
||||||
|
| Quad (bija, bijb, bijc, bijd), (a, b, c, d) ->
|
||||||
|
B.L [encode ~bij:bija a; encode ~bij:bijb b;
|
||||||
|
encode ~bij:bijc c; encode ~bij:bijd d]
|
||||||
|
| Quint (bija, bijb, bijc, bijd, bije), (a, b, c, d, e) ->
|
||||||
|
B.L [encode ~bij:bija a; encode ~bij:bijb b;
|
||||||
|
encode ~bij:bijc c; encode ~bij:bijd d;
|
||||||
|
encode ~bij:bije e]
|
||||||
|
| Guard (check, bij'), x ->
|
||||||
|
if not (check x) then raise (EncodingError "check failed");
|
||||||
|
encode ~bij:bij' x
|
||||||
|
| Map (inject, _, bij'), x ->
|
||||||
|
encode ~bij:bij' (inject x)
|
||||||
|
| Switch (inject, _), x ->
|
||||||
|
let key, BranchTo (bij',y) = inject x in
|
||||||
|
B.D (B.SMap.singleton key (encode ~bij:bij' y))
|
||||||
|
|
||||||
val cur : t -> char
|
let rec decode: type a. bij:a t -> B.t -> a
|
||||||
(** Current char *)
|
= fun ~bij b -> match bij, b with
|
||||||
|
| Unit, B.I 0 -> ()
|
||||||
|
| String, B.S s -> s
|
||||||
|
| Int, B.I i -> i
|
||||||
|
| Float, B.S s ->
|
||||||
|
begin try
|
||||||
|
let f = float_of_string s in
|
||||||
|
f
|
||||||
|
with Failure _ ->
|
||||||
|
raise (DecodingError "expected float")
|
||||||
|
end
|
||||||
|
| Bool, B.I 0 -> false
|
||||||
|
| Bool, B.I _ -> true
|
||||||
|
| List bij', B.L l ->
|
||||||
|
List.map (fun b -> decode ~bij:bij' b) l
|
||||||
|
| Many bij', B.L [] ->
|
||||||
|
raise (DecodingError "expected nonempty list")
|
||||||
|
| Many bij', B.L l ->
|
||||||
|
List.map (fun b -> decode ~bij:bij' b) l
|
||||||
|
| Opt bij', B.L [] -> None
|
||||||
|
| Opt bij', B.L [x] -> Some (decode ~bij:bij' x)
|
||||||
|
| Opt bij', B.L _ ->
|
||||||
|
raise (DecodingError "expected [] or [_]")
|
||||||
|
| Pair (bija, bijb), B.L [a; b] ->
|
||||||
|
decode ~bij:bija a, decode ~bij:bijb b
|
||||||
|
| Triple (bija, bijb, bijc), B.L [a; b; c] ->
|
||||||
|
decode ~bij:bija a, decode ~bij:bijb b, decode ~bij:bijc c
|
||||||
|
| Quad (bija, bijb, bijc, bijd), B.L [a; b; c; d] ->
|
||||||
|
decode ~bij:bija a, decode ~bij:bijb b,
|
||||||
|
decode ~bij:bijc c, decode ~bij:bijd d
|
||||||
|
| Quint (bija, bijb, bijc, bijd, bije), B.L [a; b; c; d; e] ->
|
||||||
|
decode ~bij:bija a, decode ~bij:bijb b,
|
||||||
|
decode ~bij:bijc c, decode ~bij:bijd d,
|
||||||
|
decode ~bij:bije e
|
||||||
|
| Guard (check, bij'), x ->
|
||||||
|
let y = decode ~bij:bij' x in
|
||||||
|
if not (check y) then raise (DecodingError "check failed");
|
||||||
|
y
|
||||||
|
| Map (_, extract, bij'), b ->
|
||||||
|
let x = decode ~bij:bij' b in
|
||||||
|
extract x
|
||||||
|
| Switch (_, extract), B.D d when B.SMap.cardinal d = 1 ->
|
||||||
|
let key, value = B.SMap.choose d in
|
||||||
|
let BranchFrom (bij', convert) = extract key in
|
||||||
|
convert (decode ~bij:bij' value)
|
||||||
|
| _ -> raise (DecodingError "bad case")
|
||||||
|
|
||||||
val junk : t -> unit
|
let to_string ~bij x = B.to_string (encode ~bij x)
|
||||||
(** Discard current char *)
|
|
||||||
end
|
|
||||||
|
|
||||||
module SourceStr = struct
|
|
||||||
type t = {
|
|
||||||
str : string;
|
|
||||||
mutable idx : int;
|
|
||||||
}
|
|
||||||
|
|
||||||
let create str =
|
|
||||||
{ str;
|
|
||||||
idx = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
let eof t = t.idx = String.length t.str
|
|
||||||
|
|
||||||
let cur t =
|
|
||||||
if eof t then raise EOF else t.str.[t.idx]
|
|
||||||
|
|
||||||
let junk t =
|
|
||||||
if t.idx >= String.length t.str
|
|
||||||
then raise EOF
|
|
||||||
else t.idx <- t.idx + 1
|
|
||||||
end
|
|
||||||
|
|
||||||
module SourceStream = struct
|
|
||||||
type t = char Stream.t
|
|
||||||
|
|
||||||
let eof t = match Stream.peek t with
|
|
||||||
| None -> true
|
|
||||||
| Some _ -> false
|
|
||||||
|
|
||||||
let cur t = match Stream.peek t with
|
|
||||||
| None -> raise EOF
|
|
||||||
| Some c -> c
|
|
||||||
|
|
||||||
let junk t = Stream.junk t
|
|
||||||
end
|
|
||||||
|
|
||||||
module SourceChan = struct
|
|
||||||
type t = {
|
|
||||||
chan : in_channel;
|
|
||||||
buf : string;
|
|
||||||
mutable len : int;
|
|
||||||
mutable idx : int;
|
|
||||||
}
|
|
||||||
|
|
||||||
let create ?(bufsize=256) ic =
|
|
||||||
let t = { chan = ic;
|
|
||||||
buf = String.make bufsize ' ';
|
|
||||||
len = 0;
|
|
||||||
idx = 0;
|
|
||||||
} in
|
|
||||||
(* fill the buffer *)
|
|
||||||
t.len <- input t.chan t.buf 0 bufsize;
|
|
||||||
t
|
|
||||||
|
|
||||||
let eof t = t.len = 0
|
|
||||||
|
|
||||||
let cur t =
|
|
||||||
if eof t
|
|
||||||
then raise EOF
|
|
||||||
else t.buf.[t.idx]
|
|
||||||
|
|
||||||
let junk t =
|
|
||||||
(if t.len = 0 then raise EOF);
|
|
||||||
t.idx <- t.idx + 1;
|
|
||||||
if t.idx = t.len
|
|
||||||
then begin (* refill *)
|
|
||||||
t.idx <- 0;
|
|
||||||
t.len <- input t.chan t.buf 0 (String.length t.buf)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
(** {2 Sink: Where to print} *)
|
|
||||||
|
|
||||||
module type SINK = sig
|
|
||||||
type t
|
|
||||||
val write : t -> string -> int -> int -> unit (* write substring [i..i+len] *)
|
|
||||||
val write_char : t -> char -> unit
|
|
||||||
val write_int : t -> int -> unit
|
|
||||||
val write_bool : t -> bool -> unit
|
|
||||||
val write_float : t -> float -> unit
|
|
||||||
end
|
|
||||||
|
|
||||||
module SinkBuf = struct
|
|
||||||
type t = Buffer.t
|
|
||||||
|
|
||||||
let write t str i len = Buffer.add_substring t str i len
|
|
||||||
let write_char t c = Buffer.add_char t c
|
|
||||||
let write_int t i = Printf.bprintf t "%d" i
|
|
||||||
let write_bool t b = Printf.bprintf t "%B" b
|
|
||||||
let write_float t f = Printf.bprintf t "%f" f
|
|
||||||
end
|
|
||||||
|
|
||||||
module SinkChan = struct
|
|
||||||
type t = out_channel
|
|
||||||
|
|
||||||
let write t str i len = output t str i len
|
|
||||||
let write_char t c = output_char t c
|
|
||||||
let write_int t i = Printf.fprintf t "%d" i
|
|
||||||
let write_bool t b = Printf.fprintf t "%B" b
|
|
||||||
let write_float t f = Printf.fprintf t "%f" f
|
|
||||||
end
|
|
||||||
|
|
||||||
(** {2 Encoding/decoding} *)
|
|
||||||
|
|
||||||
module type ENCODE = sig
|
|
||||||
type sink
|
|
||||||
val encode : bij:'a t -> sink -> 'a -> unit
|
|
||||||
end
|
|
||||||
|
|
||||||
module type DECODE = sig
|
|
||||||
type source
|
|
||||||
val decode : bij:'a t -> source -> 'a
|
|
||||||
end
|
|
||||||
|
|
||||||
module SexpEncode(Sink : SINK) = struct
|
|
||||||
type sink = Sink.t
|
|
||||||
|
|
||||||
(* print escaped string to [sink] *)
|
|
||||||
let escape sink s =
|
|
||||||
(* function that escapes into the given sink *)
|
|
||||||
let rec really_escape sink s i =
|
|
||||||
if i = String.length s
|
|
||||||
then () (* done *)
|
|
||||||
else begin
|
|
||||||
(match s.[i] with
|
|
||||||
| '\n' -> Sink.write sink "\\n" 0 2
|
|
||||||
| '\t' -> Sink.write sink "\\t" 0 2
|
|
||||||
| ' ' | ')' ->
|
|
||||||
Sink.write_char sink '\\';
|
|
||||||
Sink.write_char sink s.[i];
|
|
||||||
| c ->
|
|
||||||
Sink.write_char sink c);
|
|
||||||
really_escape sink s (i+1)
|
|
||||||
end
|
|
||||||
in
|
|
||||||
(* search for a char to escape, if any *)
|
|
||||||
let rec search s i =
|
|
||||||
if i = String.length s
|
|
||||||
then Sink.write sink s 0 i (* no escaping needed *)
|
|
||||||
else match s.[i] with
|
|
||||||
| ' ' | '\t' | '\n' | ')' -> (* must escape *)
|
|
||||||
Sink.write sink s 0 i;
|
|
||||||
really_escape sink s i (* escape starting at i *)
|
|
||||||
| _ -> search s (i+1)
|
|
||||||
in
|
|
||||||
search s 0
|
|
||||||
|
|
||||||
let encode ~bij sink x =
|
|
||||||
let open Sink in
|
|
||||||
let rec encode : type a. a bij -> a -> unit = fun bij x ->
|
|
||||||
match bij, x with
|
|
||||||
| Unit, () -> ()
|
|
||||||
| String, s -> escape sink s
|
|
||||||
| Int, i -> Sink.write_int sink i
|
|
||||||
| Bool, b -> Sink.write_bool sink b
|
|
||||||
| Float, f -> Sink.write_float sink f
|
|
||||||
| List bij', l ->
|
|
||||||
Sink.write_char sink '(';
|
|
||||||
List.iteri
|
|
||||||
(fun i x -> (if i > 0 then Sink.write_char sink ' '); encode bij' x)
|
|
||||||
l;
|
|
||||||
Sink.write_char sink ')'
|
|
||||||
| Many _, [] -> failwith "Bij.encode: expected non-empty list"
|
|
||||||
| Many bij', l ->
|
|
||||||
Sink.write_char sink '(';
|
|
||||||
List.iteri
|
|
||||||
(fun i x -> (if i > 0 then Sink.write_char sink ' '); encode bij' x)
|
|
||||||
l;
|
|
||||||
Sink.write_char sink ')'
|
|
||||||
| Opt bij, None ->
|
|
||||||
encode (List bij) []
|
|
||||||
| Opt bij, Some x ->
|
|
||||||
encode (List bij) [x]
|
|
||||||
| Pair (bij_a, bij_b), (a, b) ->
|
|
||||||
Sink.write_char sink '(';
|
|
||||||
encode bij_a a;
|
|
||||||
Sink.write_char sink ' ';
|
|
||||||
encode bij_b b;
|
|
||||||
Sink.write_char sink ')'
|
|
||||||
| Guard (check, bij'), _ ->
|
|
||||||
(if not (check x) then raise (EncodingError ("check failed")));
|
|
||||||
encode bij' x
|
|
||||||
| Triple (bij_a, bij_b, bij_c), (a, b, c) ->
|
|
||||||
Sink.write_char sink '(';
|
|
||||||
encode bij_a a;
|
|
||||||
Sink.write_char sink ' ';
|
|
||||||
encode bij_b b;
|
|
||||||
Sink.write_char sink ' ';
|
|
||||||
encode bij_c c;
|
|
||||||
Sink.write_char sink ')'
|
|
||||||
| Quad (bij_a, bij_b, bij_c, bij_d), (a, b, c, d) ->
|
|
||||||
Sink.write_char sink '(';
|
|
||||||
encode bij_a a;
|
|
||||||
Sink.write_char sink ' ';
|
|
||||||
encode bij_b b;
|
|
||||||
Sink.write_char sink ' ';
|
|
||||||
encode bij_c c;
|
|
||||||
Sink.write_char sink ' ';
|
|
||||||
encode bij_d d;
|
|
||||||
Sink.write_char sink ')'
|
|
||||||
| Quint (bij_a, bij_b, bij_c, bij_d, bij_e), (a, b, c, d, e) ->
|
|
||||||
Sink.write_char sink '(';
|
|
||||||
encode bij_a a;
|
|
||||||
Sink.write_char sink ' ';
|
|
||||||
encode bij_b b;
|
|
||||||
Sink.write_char sink ' ';
|
|
||||||
encode bij_c c;
|
|
||||||
Sink.write_char sink ' ';
|
|
||||||
encode bij_d d;
|
|
||||||
Sink.write_char sink ' ';
|
|
||||||
encode bij_e e;
|
|
||||||
Sink.write_char sink ')'
|
|
||||||
| Map (inject, _, bij'), x ->
|
|
||||||
let y = inject x in
|
|
||||||
encode bij' y
|
|
||||||
| Switch (inject, _), x ->
|
|
||||||
let c, BranchTo (bij', y) = inject x in
|
|
||||||
Sink.write_char sink c;
|
|
||||||
encode bij' y
|
|
||||||
in encode bij x
|
|
||||||
end
|
|
||||||
|
|
||||||
module SexpDecode(Source : SOURCE) = struct
|
|
||||||
type source = Source.t
|
|
||||||
|
|
||||||
let decode ~bij source =
|
|
||||||
let rec cur () = Source.cur source
|
|
||||||
and junk () = Source.junk source
|
|
||||||
and eof () = Source.eof source
|
|
||||||
in
|
|
||||||
(* eat whitespace *)
|
|
||||||
let rec whitespace () =
|
|
||||||
if not (eof ()) then match cur () with
|
|
||||||
| ' ' | '\t' | '\n' -> junk (); whitespace ()
|
|
||||||
| _ -> ()
|
|
||||||
in
|
|
||||||
(* decode using the [bij] *)
|
|
||||||
let rec decode : type a. a bij -> a = fun bij ->
|
|
||||||
whitespace ();
|
|
||||||
match bij with
|
|
||||||
| Unit -> ()
|
|
||||||
| String -> decode_string (Buffer.create 5)
|
|
||||||
| Int -> decode_int 0
|
|
||||||
| Float ->
|
|
||||||
begin try float_of_string (decode_string (Buffer.create 3))
|
|
||||||
with Failure _ -> raise (DecodingError ("expected float"))
|
|
||||||
end
|
|
||||||
| Bool ->
|
|
||||||
begin match decode_string (Buffer.create 4) with
|
|
||||||
| "true" -> true
|
|
||||||
| "false" -> false
|
|
||||||
| s -> raise (DecodingError ("expected bool, got " ^ s))
|
|
||||||
end
|
|
||||||
| List bij' ->
|
|
||||||
decode_open ();
|
|
||||||
let l = decode_list bij' [] in
|
|
||||||
decode_close ();
|
|
||||||
l
|
|
||||||
| Many bij' ->
|
|
||||||
decode_open ();
|
|
||||||
let l = decode_list bij' [] in
|
|
||||||
decode_close ();
|
|
||||||
if l = [] then raise (DecodingError "expected non empty list") else l
|
|
||||||
| Opt bij' ->
|
|
||||||
decode_open ();
|
|
||||||
let l = decode_list bij' [] in
|
|
||||||
decode_close ();
|
|
||||||
begin match l with
|
|
||||||
| [] -> None
|
|
||||||
| [x] -> Some x
|
|
||||||
| _ -> raise (DecodingError "expected option")
|
|
||||||
end
|
|
||||||
| Pair (bija, bijb) ->
|
|
||||||
decode_open ();
|
|
||||||
let a = decode bija in
|
|
||||||
let b = decode bijb in
|
|
||||||
decode_close ();
|
|
||||||
a, b
|
|
||||||
| Triple (bija, bijb, bijc) ->
|
|
||||||
decode_open ();
|
|
||||||
let a = decode bija in
|
|
||||||
let b = decode bijb in
|
|
||||||
let c = decode bijc in
|
|
||||||
decode_close ();
|
|
||||||
a, b, c
|
|
||||||
| Quad (bija, bijb, bijc, bijd) ->
|
|
||||||
decode_open ();
|
|
||||||
let a = decode bija in
|
|
||||||
let b = decode bijb in
|
|
||||||
let c = decode bijc in
|
|
||||||
let d = decode bijd in
|
|
||||||
decode_close ();
|
|
||||||
a, b, c, d
|
|
||||||
| Quint (bija, bijb, bijc, bijd, bije) ->
|
|
||||||
decode_open ();
|
|
||||||
let a = decode bija in
|
|
||||||
let b = decode bijb in
|
|
||||||
let c = decode bijc in
|
|
||||||
let d = decode bijd in
|
|
||||||
let e = decode bije in
|
|
||||||
decode_close ();
|
|
||||||
a, b, c, d, e
|
|
||||||
| Guard (check, bij') ->
|
|
||||||
let x = decode bij' in
|
|
||||||
(if not (check x) then raise (DecodingError "check failed"));
|
|
||||||
x
|
|
||||||
| Map (_, extract, bij') ->
|
|
||||||
let x = decode bij' in
|
|
||||||
extract x
|
|
||||||
| Switch (_, extract) ->
|
|
||||||
let c = cur () in
|
|
||||||
let BranchFrom (bij', convert) = extract c in
|
|
||||||
junk (); (* remove c *)
|
|
||||||
let y = decode bij' in
|
|
||||||
convert y (* translate back *)
|
|
||||||
and decode_open : unit -> unit = fun () -> match cur () with
|
|
||||||
| '(' -> junk () (* done *)
|
|
||||||
| _ -> raise (DecodingError "expected '('")
|
|
||||||
and decode_close : unit -> unit = fun () ->
|
|
||||||
whitespace (); (* on close, first eat whitespace *)
|
|
||||||
match cur () with
|
|
||||||
| ')' -> junk () (* done *)
|
|
||||||
| _ -> raise (DecodingError "expected ')'")
|
|
||||||
and decode_int : int -> int = fun i ->
|
|
||||||
if eof () then i
|
|
||||||
else match cur () with
|
|
||||||
| '-' when i = 0 -> junk (); ~- (decode_int 0) (* negative *)
|
|
||||||
| c when Char.code c >= Char.code '0' && Char.code c <= Char.code '9' ->
|
|
||||||
junk ();
|
|
||||||
decode_int (i * 10 + (Char.code c - Char.code '0'))
|
|
||||||
| _ -> i
|
|
||||||
and decode_string : Buffer.t -> string = fun buf ->
|
|
||||||
if eof () then Buffer.contents buf
|
|
||||||
else match cur() with
|
|
||||||
| ' ' | '\t' | '\n' | ')' -> Buffer.contents buf
|
|
||||||
| '\\' -> junk (); Buffer.add_char buf (cur ()); junk (); decode_string buf
|
|
||||||
| c -> Buffer.add_char buf c; junk (); decode_string buf
|
|
||||||
and decode_list : type a. a t -> a list -> a list = fun bij l ->
|
|
||||||
whitespace ();
|
|
||||||
match cur() with
|
|
||||||
| ')' -> List.rev l (* done *)
|
|
||||||
| _ ->
|
|
||||||
let x = decode bij in
|
|
||||||
decode_list bij (x :: l)
|
|
||||||
in
|
|
||||||
try
|
|
||||||
decode bij
|
|
||||||
with EOF ->
|
|
||||||
raise (DecodingError "unexpected EOF")
|
|
||||||
end
|
|
||||||
|
|
||||||
module SexpStr = struct
|
|
||||||
module SexpEncodeBuf = SexpEncode(SinkBuf)
|
|
||||||
module SexpDecodeString = SexpDecode(SourceStr)
|
|
||||||
|
|
||||||
let to_string ?(bufsize=64) ~bij x =
|
|
||||||
let b = Buffer.create bufsize in
|
|
||||||
SexpEncodeBuf.encode ~bij b x;
|
|
||||||
Buffer.contents b
|
|
||||||
|
|
||||||
let of_string ~bij s =
|
let of_string ~bij s =
|
||||||
SexpDecodeString.decode ~bij (SourceStr.create s)
|
let b = B.of_string s in
|
||||||
end
|
decode ~bij b
|
||||||
|
|
||||||
module SexpChan = struct
|
(* TODO *)
|
||||||
module SexpEncodeChan = SexpEncode(SinkChan)
|
let read ~bij ic = failwith "read: not implemented"
|
||||||
module SexpDecodeChan = SexpDecode(SourceChan)
|
|
||||||
|
|
||||||
include SexpEncodeChan
|
let write ~bij x oc = failwith "write: not implemented"
|
||||||
include SexpDecodeChan
|
|
||||||
end
|
end
|
||||||
|
|
|
||||||
116
bij.mli
116
bij.mli
|
|
@ -25,26 +25,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
(** {1 Bijective Serializer/Deserializer} *)
|
(** {1 Bijective Serializer/Deserializer} *)
|
||||||
|
|
||||||
type 'a t = private
|
type 'a t
|
||||||
| Unit : unit t
|
|
||||||
| String : string t
|
|
||||||
| Int : int t
|
|
||||||
| Bool : bool t
|
|
||||||
| Float : float t
|
|
||||||
| List : 'a t -> 'a list t
|
|
||||||
| Many : 'a t -> 'a list t
|
|
||||||
| Opt : 'a t -> 'a option t
|
|
||||||
| Pair : 'a t * 'b t -> ('a * 'b) t
|
|
||||||
| Triple : 'a t * 'b t * 'c t -> ('a * 'b * 'c) t
|
|
||||||
| Quad : 'a t * 'b t * 'c t * 'd t -> ('a * 'b * 'c * 'd) t
|
|
||||||
| Quint : 'a t * 'b t * 'c t * 'd t * 'e t -> ('a * 'b * 'c * 'd * 'e) t
|
|
||||||
| Guard : ('a -> bool) * 'a t -> 'a t
|
|
||||||
| Map : ('a -> 'b) * ('b -> 'a) * 'b t -> 'a t
|
|
||||||
| Switch : ('a -> char * 'a inject_branch) * (char -> 'a extract_branch) -> 'a t
|
|
||||||
and _ inject_branch =
|
|
||||||
| BranchTo : 'b t * 'b -> 'a inject_branch
|
|
||||||
and _ extract_branch =
|
|
||||||
| BranchFrom : 'b t * ('b -> 'a) -> 'a extract_branch
|
|
||||||
|
|
||||||
(** {2 Bijection description} *)
|
(** {2 Bijection description} *)
|
||||||
|
|
||||||
|
|
@ -66,100 +47,49 @@ val guard : ('a -> bool) -> 'a t -> 'a t
|
||||||
|
|
||||||
val map : inject:('a -> 'b) -> extract:('b -> 'a) -> 'b t -> 'a t
|
val map : inject:('a -> 'b) -> extract:('b -> 'a) -> 'b t -> 'a t
|
||||||
|
|
||||||
val switch : inject:('a -> char * 'a inject_branch) ->
|
type _ inject_branch =
|
||||||
extract:(char -> 'a extract_branch) -> 'a t
|
| BranchTo : 'b t * 'b -> 'a inject_branch
|
||||||
|
type _ extract_branch =
|
||||||
|
| BranchFrom : 'b t * ('b -> 'a) -> 'a extract_branch
|
||||||
|
|
||||||
|
val switch : inject:('a -> string * 'a inject_branch) ->
|
||||||
|
extract:(string -> 'a extract_branch) -> 'a t
|
||||||
(** Discriminates unions based on the next character.
|
(** Discriminates unions based on the next character.
|
||||||
[inject] is used to select a character, as well as mapping to another
|
[inject] must give a unique key for each branch, as well as mapping to another
|
||||||
type (the argument of the algebraic constructor);
|
type (the argument of the algebraic constructor);
|
||||||
[extract] retrieves which type to parse based on the character. *)
|
[extract] retrieves which type to parse based on the key. *)
|
||||||
|
|
||||||
(** {2 Helpers} *)
|
(** {2 Helpers} *)
|
||||||
|
|
||||||
val fix : ((unit -> 'a t) -> 'a t) -> 'a t
|
val fix : ('a t lazy_t -> 'a t) -> 'a t
|
||||||
(** Helper for recursive encodings *)
|
(** Helper for recursive encodings. The parameter is the recursive bijection
|
||||||
|
itself. It must be lazy. *)
|
||||||
|
|
||||||
val with_version : string -> 'a t -> 'a t
|
val with_version : string -> 'a t -> 'a t
|
||||||
(** Guards the values with a given version. Only values encoded with
|
(** Guards the values with a given version. Only values encoded with
|
||||||
the same version will fit. *)
|
the same version will fit. *)
|
||||||
|
|
||||||
val array_ : 'a t -> 'a array t
|
|
||||||
val hashtbl : 'a t -> 'b t -> ('a, 'b) Hashtbl.t t
|
|
||||||
|
|
||||||
(** {2 Exceptions} *)
|
(** {2 Exceptions} *)
|
||||||
|
|
||||||
exception EOF
|
|
||||||
|
|
||||||
exception EncodingError of string
|
exception EncodingError of string
|
||||||
(** Raised when decoding is impossible *)
|
(** Raised when encoding is impossible *)
|
||||||
|
|
||||||
exception DecodingError of string
|
exception DecodingError of string
|
||||||
(** Raised when decoding is impossible *)
|
(** Raised when decoding is impossible *)
|
||||||
|
|
||||||
(** {2 Source of parsing} *)
|
(** {2 Translations} *)
|
||||||
|
|
||||||
module type SOURCE = sig
|
module TrBencode : sig
|
||||||
type t
|
val encode : bij:'a t -> 'a -> Bencode.t
|
||||||
|
|
||||||
val eof : t -> bool
|
val decode : bij:'a t -> Bencode.t -> 'a
|
||||||
(** End of input reached? *)
|
|
||||||
|
|
||||||
val cur : t -> char
|
val to_string : bij:'a t -> 'a -> string
|
||||||
(** Current char *)
|
|
||||||
|
|
||||||
val junk : t -> unit
|
|
||||||
(** Discard current char *)
|
|
||||||
end
|
|
||||||
|
|
||||||
module SourceStr : sig
|
|
||||||
include SOURCE
|
|
||||||
val create : string -> t
|
|
||||||
end
|
|
||||||
|
|
||||||
module SourceStream : SOURCE with type t = char Stream.t
|
|
||||||
|
|
||||||
module SourceChan : sig
|
|
||||||
include SOURCE
|
|
||||||
val create : ?bufsize:int -> in_channel -> t
|
|
||||||
end
|
|
||||||
|
|
||||||
(** {2 Sink: Where to print} *)
|
|
||||||
|
|
||||||
module type SINK = sig
|
|
||||||
type t
|
|
||||||
val write : t -> string -> int -> int -> unit (* write substring [i..i+len] *)
|
|
||||||
val write_char : t -> char -> unit
|
|
||||||
val write_int : t -> int -> unit
|
|
||||||
val write_bool : t -> bool -> unit
|
|
||||||
val write_float : t -> float -> unit
|
|
||||||
end
|
|
||||||
|
|
||||||
module SinkBuf : SINK with type t = Buffer.t
|
|
||||||
|
|
||||||
module SinkChan : SINK with type t = out_channel
|
|
||||||
|
|
||||||
(** {2 Encoding/decoding} *)
|
|
||||||
|
|
||||||
module type ENCODE = sig
|
|
||||||
type sink
|
|
||||||
val encode : bij:'a t -> sink -> 'a -> unit
|
|
||||||
end
|
|
||||||
|
|
||||||
module type DECODE = sig
|
|
||||||
type source
|
|
||||||
val decode : bij:'a t -> source -> 'a
|
|
||||||
end
|
|
||||||
|
|
||||||
module SexpEncode(Sink : SINK) : ENCODE with type sink = Sink.t
|
|
||||||
module SexpDecode(Source : SOURCE) : DECODE with type source = Source.t
|
|
||||||
|
|
||||||
(** Specific instance for encoding to/from strings *)
|
|
||||||
module SexpStr : sig
|
|
||||||
val to_string : ?bufsize:int -> bij:'a t -> 'a -> string
|
|
||||||
val of_string : bij:'a t -> string -> 'a
|
val of_string : bij:'a t -> string -> 'a
|
||||||
|
|
||||||
|
val read : bij:'a t -> in_channel -> 'a
|
||||||
|
|
||||||
|
val write : bij:'a t -> 'a t -> out_channel -> unit
|
||||||
end
|
end
|
||||||
|
|
||||||
(** Specific instance for encoding to/from channels *)
|
|
||||||
module SexpChan : sig
|
|
||||||
include ENCODE with type sink = SinkChan.t
|
|
||||||
include DECODE with type source = SourceChan.t
|
|
||||||
end
|
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,6 @@
|
||||||
|
|
||||||
open OUnit
|
open OUnit
|
||||||
|
|
||||||
open Bij
|
|
||||||
|
|
||||||
let test_int2 () =
|
|
||||||
let bij = pair int_ int_ in
|
|
||||||
let s = SexpStr.to_string bij (1,2) in
|
|
||||||
OUnit.assert_equal ~printer:(fun x -> x) "(1 2)" s
|
|
||||||
|
|
||||||
let test_escape () =
|
|
||||||
let bij = pair int_ (pair string_ string_) in
|
|
||||||
let s = SexpStr.to_string bij (1,("foo()","bar\n hello")) in
|
|
||||||
OUnit.assert_equal ~printer:(fun x -> x) "(1 (foo(\\) bar\\n\\ hello))" s
|
|
||||||
|
|
||||||
let pp_int_list l =
|
let pp_int_list l =
|
||||||
let b = Buffer.create 4 in
|
let b = Buffer.create 4 in
|
||||||
Format.fprintf (Format.formatter_of_buffer b) "%a@?"
|
Format.fprintf (Format.formatter_of_buffer b) "%a@?"
|
||||||
|
|
@ -20,10 +8,10 @@ let pp_int_list l =
|
||||||
Buffer.contents b
|
Buffer.contents b
|
||||||
|
|
||||||
let test_intlist n () =
|
let test_intlist n () =
|
||||||
let bij = list_ int_ in
|
let bij = Bij.(list_ int_) in
|
||||||
let l = Sequence.to_list (Sequence.int_range ~start:0 ~stop:n) in
|
let l = Sequence.to_list (Sequence.int_range ~start:0 ~stop:n) in
|
||||||
let s = SexpStr.to_string ~bij l in
|
let s = Bij.TrBencode.to_string ~bij l in
|
||||||
let l' = SexpStr.of_string ~bij s in
|
let l' = Bij.TrBencode.of_string ~bij s in
|
||||||
OUnit.assert_equal ~printer:pp_int_list l l'
|
OUnit.assert_equal ~printer:pp_int_list l l'
|
||||||
|
|
||||||
type term =
|
type term =
|
||||||
|
|
@ -32,26 +20,27 @@ type term =
|
||||||
| App of term list
|
| App of term list
|
||||||
|
|
||||||
let bij_term =
|
let bij_term =
|
||||||
let bij = fix
|
let bij = Bij.(fix
|
||||||
(fun bij ->
|
(fun bij ->
|
||||||
switch
|
switch
|
||||||
~inject:(function
|
~inject:(function
|
||||||
| Const s -> 'c', BranchTo (string_, s)
|
| Const s -> "const", BranchTo (string_, s)
|
||||||
| Int i -> 'i', BranchTo (int_, i)
|
| Int i -> "int", BranchTo (int_, i)
|
||||||
| App l -> 'a', BranchTo (list_ (bij ()), l))
|
| App l -> "app", BranchTo (list_ (Lazy.force bij), l))
|
||||||
~extract:(function
|
~extract:(function
|
||||||
| 'c' -> BranchFrom (string_, fun x -> Const x)
|
| "const" -> BranchFrom (string_, fun x -> Const x)
|
||||||
| 'i' -> BranchFrom (int_, fun x -> Int x)
|
| "int" -> BranchFrom (int_, fun x -> Int x)
|
||||||
| 'a' -> BranchFrom (list_ (bij ()), fun l -> App l)
|
| "app" -> BranchFrom (list_ (Lazy.force bij), fun l -> App l)
|
||||||
| _ -> raise (DecodingError "unexpected case switch")))
|
| _ -> raise (DecodingError "unexpected case switch")))
|
||||||
|
)
|
||||||
in
|
in
|
||||||
bij
|
bij
|
||||||
|
|
||||||
let test_rec () =
|
let test_rec () =
|
||||||
let t = App [Const "foo"; App [Const "bar"; Int 1; Int 2]; Int 3; Const "hello"] in
|
let t = App [Const "foo"; App [Const "bar"; Int 1; Int 2]; Int 3; Const "hello"] in
|
||||||
let s = SexpStr.to_string ~bij:bij_term t in
|
let s = Bij.TrBencode.to_string ~bij:bij_term t in
|
||||||
(* Printf.printf "to: %s\n" s; *)
|
(* Printf.printf "to: %s\n" s; *)
|
||||||
let t' = SexpStr.of_string ~bij:bij_term s in
|
let t' = Bij.TrBencode.of_string ~bij:bij_term s in
|
||||||
OUnit.assert_equal t t'
|
OUnit.assert_equal t t'
|
||||||
|
|
||||||
let random_str len =
|
let random_str len =
|
||||||
|
|
@ -75,28 +64,26 @@ let rec random_term depth =
|
||||||
let test_term_random ?(depth=5) n () =
|
let test_term_random ?(depth=5) n () =
|
||||||
for i = 0 to n - 1 do
|
for i = 0 to n - 1 do
|
||||||
let t = random_term depth in
|
let t = random_term depth in
|
||||||
let s = SexpStr.to_string ~bij:bij_term t in
|
let s = Bij.TrBencode.to_string ~bij:bij_term t in
|
||||||
let t' = SexpStr.of_string ~bij:bij_term s in
|
let t' = Bij.TrBencode.of_string ~bij:bij_term s in
|
||||||
OUnit.assert_equal t t'
|
OUnit.assert_equal t t'
|
||||||
done
|
done
|
||||||
|
|
||||||
let test_complicated () =
|
let test_complicated () =
|
||||||
let bij = triple int_ (pair bool_ (many float_))
|
let bij = Bij.(triple int_ (pair bool_ (many float_))
|
||||||
(map ~inject:(fun (a,b) -> (b,a)) ~extract:(fun (b,a) -> a,b) (pair int_ bool_)) in
|
(map ~inject:(fun (a,b) -> (b,a)) ~extract:(fun (b,a) -> a,b) (pair int_ bool_))) in
|
||||||
let x = (1, (true, [1.; 2.; 3.]), (false, 42)) in
|
let x = (1, (true, [1.; 2.; 3.]), (false, 42)) in
|
||||||
let s = SexpStr.to_string ~bij x in
|
let s = Bij.TrBencode.to_string ~bij x in
|
||||||
let x' = SexpStr.of_string ~bij s in
|
let x' = Bij.TrBencode.of_string ~bij s in
|
||||||
OUnit.assert_equal x x'
|
OUnit.assert_equal x x'
|
||||||
|
|
||||||
let suite =
|
let suite =
|
||||||
"test_bij" >:::
|
"test_bij" >:::
|
||||||
[ "test_int2" >:: test_int2;
|
[ "test_intlist10" >:: test_intlist 10
|
||||||
"test_escape" >:: test_escape;
|
; "test_intlist100" >:: test_intlist 100
|
||||||
"test_intlist10" >:: test_intlist 10;
|
; "test_intlist10_000" >:: test_intlist 10_000
|
||||||
"test_intlist100" >:: test_intlist 100;
|
; "test_rec" >:: test_rec
|
||||||
"test_intlist10_000" >:: test_intlist 10_000;
|
; "test_term_random100" >:: test_term_random 100
|
||||||
"test_rec" >:: test_rec;
|
; "test_term_random100_depth10" >:: test_term_random ~depth:10 100
|
||||||
"test_term_random100" >:: test_term_random 100;
|
; "test_complicated" >:: test_complicated
|
||||||
"test_term_random100_depth10" >:: test_term_random ~depth:10 100;
|
|
||||||
"test_complicated" >:: test_complicated;
|
|
||||||
]
|
]
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue