modules everywhere for sources, sinks, encoders, decoders...;

more primitives in Sink; direct access to eof/cur/junk in Source
This commit is contained in:
Simon Cruanes 2013-05-15 11:12:53 +02:00
parent a41baa1174
commit a570a34951
3 changed files with 232 additions and 130 deletions

282
bij.ml
View file

@ -57,6 +57,8 @@ let triple a b c = Triple (a,b,c)
let map ~inject ~extract b = Map (inject, extract, b)
let switch select l = Switch (select, l)
exception EOF
exception EncodingError of string
(** Raised when decoding is impossible *)
@ -65,93 +67,162 @@ exception DecodingError of string
(** {2 Source of parsing} *)
module Source = struct
type t = string -> int (* fills the buffer *)
module type SOURCE = sig
type t
let of_str s =
let i = ref 0 in
fun buf ->
let len = min (String.length s - !i) (String.length buf) in
if len = 0
then 0 (* done *)
else begin
String.blit s !i buf 0 len;
i := !i + len;
len
end
val eof : t -> bool
(** End of input reached? *)
let of_stream str =
fun buf ->
let rec fill i =
if i = String.length buf
then i
else match Stream.peek str with
| None -> i (* done *)
| Some c ->
buf.[i] <- c;
Stream.junk str;
fill (i+1)
in
fill 0
val cur : t -> char
(** Current char *)
let of_chan ic =
fun buf ->
input ic buf 0 (String.length buf)
val junk : t -> unit
(** Discard current char *)
end
module SourceStr = struct
type t = {
str : string;
mutable idx : int;
}
let create str =
{ str;
idx = 0;
}
let eof t = t.idx = String.length t.str
let cur t =
if eof t then raise EOF else t.str.[t.idx]
let junk t =
if t.idx >= String.length t.str
then raise EOF
else t.idx <- t.idx + 1
end
module SourceStream = struct
type t = char Stream.t
let eof t = match Stream.peek t with
| None -> true
| Some _ -> false
let cur t = match Stream.peek t with
| None -> raise EOF
| Some c -> c
let junk t = Stream.junk t
end
module SourceChan = struct
type t = {
chan : in_channel;
buf : string;
mutable len : int;
mutable idx : int;
}
let create ?(bufsize=256) ic =
let t = { chan = ic;
buf = String.make bufsize ' ';
len = 0;
idx = 0;
} in
(* fill the buffer *)
t.len <- input t.chan t.buf 0 bufsize;
t
let eof t = t.len = 0
let cur t =
if eof t
then raise EOF
else t.buf.[t.idx]
let junk t =
(if t.len = 0 then raise EOF);
t.idx <- t.idx + 1;
if t.idx = t.len
then begin (* refill *)
t.idx <- 0;
t.len <- input t.chan t.buf 0 (String.length t.buf)
end
end
(** {2 Sink: Where to print} *)
module Sink = struct
type t = {
mutable write : string -> unit;
mutable write_int : int -> unit;
mutable write_bool : bool -> unit;
mutable write_float : float -> unit;
}
module type SINK = sig
type t
val write : t -> string -> int -> int -> unit (* write substring [i..i+len] *)
val write_char : t -> char -> unit
val write_int : t -> int -> unit
val write_bool : t -> bool -> unit
val write_float : t -> float -> unit
end
let of_buf buf =
{ write = (fun s -> Buffer.add_string buf s);
write_int = (fun i -> Printf.bprintf buf "%d" i);
write_bool = (fun b -> Printf.bprintf buf "%B" b);
write_float = (fun f -> Printf.bprintf buf "%f" f);
}
module SinkBuf = struct
type t = Buffer.t
let of_chan oc =
{ write = (fun s -> output_string oc s);
write_int = (fun i -> Printf.fprintf oc "%d" i);
write_bool = (fun b -> Printf.fprintf oc "%B" b);
write_float = (fun f -> Printf.fprintf oc "%f" f);
}
let write t str i len = Buffer.add_substring t str i len
let write_char t c = Buffer.add_char t c
let write_int t i = Printf.bprintf t "%d" i
let write_bool t b = Printf.bprintf t "%B" b
let write_float t f = Printf.bprintf t "%f" f
end
module SinkChan = struct
type t = out_channel
let write t str i len = output t str i len
let write_char t c = output_char t c
let write_int t i = Printf.fprintf t "%d" i
let write_bool t b = Printf.fprintf t "%B" b
let write_float t f = Printf.fprintf t "%f" f
end
(** {2 Encoding/decoding} *)
module Sexp = struct
(* escape string *)
let escape s =
(* function that escapes into the given buffer *)
let rec really_escape buf s i =
module type ENCODE = sig
type sink
val encode : bij:'a t -> sink -> 'a -> unit
end
module type DECODE = sig
type source
val decode : bij:'a t -> source -> 'a
end
module SexpEncode(Sink : SINK) = struct
type sink = Sink.t
(* print escaped string to [sink] *)
let escape sink s =
(* function that escapes into the given sink *)
let rec really_escape sink s i =
if i = String.length s
then Buffer.contents buf
then () (* done *)
else begin
(match s.[i] with
| '\n' -> Buffer.add_string buf "\\n"
| '\t' -> Buffer.add_string buf "\\t"
| '\n' -> Sink.write sink "\\n" 0 2
| '\t' -> Sink.write sink "\\t" 0 2
| ' ' | ')' ->
Buffer.add_char buf '\\';
Buffer.add_char buf s.[i]
| c -> Buffer.add_char buf c);
really_escape buf s (i+1)
Sink.write_char sink '\\';
Sink.write_char sink s.[i];
| c ->
Sink.write_char sink c);
really_escape sink s (i+1)
end
in
(* search for a char to escape, if any *)
let rec search s i =
if i = String.length s then s (* no escaping needed *)
else match s.[i] with
if i = String.length s
then Sink.write sink s 0 i (* no escaping needed *)
else match s.[i] with
| ' ' | '\t' | '\n' | ')' -> (* must escape *)
let buf = Buffer.create (String.length s + 1) in
Buffer.add_substring buf s 0 i;
really_escape buf s i (* escape starting at i *)
Sink.write sink s 0 i;
really_escape sink s i (* escape starting at i *)
| _ -> search s (i+1)
in
search s 0
@ -161,41 +232,41 @@ module Sexp = struct
let rec encode : type a. a bij -> a -> unit = fun bij x ->
match bij, x with
| Unit, () -> ()
| String, s -> sink.write (escape s)
| Int, i -> sink.write_int i
| Bool, b -> sink.write_bool b
| Float, f -> sink.write_float f
| String, s -> escape sink s
| Int, i -> Sink.write_int sink i
| Bool, b -> Sink.write_bool sink b
| Float, f -> Sink.write_float sink f
| List bij', l ->
sink.write "(";
Sink.write_char sink '(';
List.iter
(fun x -> sink.write " "; encode bij' x)
(fun x -> Sink.write_char sink ' '; encode bij' x)
l;
sink.write ")"
Sink.write_char sink ')'
| Many _, [] -> failwith "Bij.encode: expected non-empty list"
| Many bij', l ->
sink.write "(";
List.iter
(fun x -> sink.write " "; encode bij' x)
Sink.write_char sink '(';
List.iteri
(fun i x -> (if i > 0 then Sink.write_char sink ' '); encode bij' x)
l;
sink.write ")"
Sink.write_char sink ')'
| Opt bij, None ->
encode (List bij) []
| Opt bij, Some x ->
encode (List bij) [x]
| Pair (bij_a, bij_b), (a, b) ->
sink.write "(";
Sink.write_char sink '(';
encode bij_a a;
sink.write " ";
Sink.write_char sink ' ';
encode bij_b b;
sink.write ")"
Sink.write_char sink ')'
| Triple (bij_a, bij_b, bij_c), (a, b, c) ->
sink.write "(";
Sink.write_char sink '(';
encode bij_a a;
sink.write " ";
Sink.write_char sink ' ';
encode bij_b b;
sink.write " ";
Sink.write_char sink ' ';
encode bij_c c;
sink.write ")"
Sink.write_char sink ')'
| Map (inject, _, bij'), x ->
let y = inject x in
encode bij' y
@ -208,29 +279,18 @@ module Sexp = struct
raise (EncodingError "no encoding in switch")
in encode bij x
let to_string ~bij x =
let b = Buffer.create 15 in
encode ~bij (Sink.of_buf b) x;
Buffer.contents b
end
module SexpDecode(Source : SOURCE) = struct
type source = Source.t
let decode ~bij source =
let str = String.make 64 '_' in
let pos = ref 0 in
let len = ref 0 in
(* current token *)
let rec cur () =
if eof ()
then raise (EncodingError "unexpected EOF")
else str.[!pos]
and eof () = !len = 0
and refill () =
len := source str;
pos := 0
and junk () =
incr pos;
if !pos >= !len then refill ()
let rec cur () = Source.cur source
and junk () = Source.junk source
and eof () = Source.eof source
in
(* eat whitespace *)
and whitespace () =
let rec whitespace () =
if not (eof ()) then match cur () with
| ' ' | '\t' | '\n' -> junk (); whitespace ()
| _ -> ()
@ -328,8 +388,18 @@ module Sexp = struct
in
decode bij
in
refill (); (* first input *)
decode bij
let of_string ~bij s = decode ~bij (Source.of_str s)
end
module SexpStr = struct
module SexpEncodeBuf = SexpEncode(SinkBuf)
module SexpDecodeString = SexpDecode(SourceStr)
let to_string ~bij x =
let b = Buffer.create 15 in
SexpEncodeBuf.encode ~bij b x;
Buffer.contents b
let of_string ~bij s =
SexpDecodeString.decode ~bij (SourceStr.create s)
end

72
bij.mli
View file

@ -48,6 +48,8 @@ val switch : ('a -> char) -> (char * 'a t) list -> 'a t
bijection depending on the value.
' ' means "default" *)
exception EOF
exception EncodingError of string
(** Raised when decoding is impossible *)
@ -56,33 +58,63 @@ exception DecodingError of string
(** {2 Source of parsing} *)
module Source : sig
type t = string -> int (* fills the buffer *)
module type SOURCE = sig
type t
val of_str : string -> t
val of_stream : char Stream.t -> t
val of_chan : in_channel -> t
val eof : t -> bool
(** End of input reached? *)
val cur : t -> char
(** Current char *)
val junk : t -> unit
(** Discard current char *)
end
module SourceStr : sig
include SOURCE
val create : string -> t
end
module SourceStream : SOURCE with type t = char Stream.t
module SourceChan : sig
include SOURCE
val create : ?bufsize:int -> in_channel -> t
end
(** {2 Sink: Where to print} *)
module Sink : sig
type t = {
mutable write : string -> unit;
mutable write_int : int -> unit;
mutable write_bool : bool -> unit;
mutable write_float : float -> unit;
}
val of_buf : Buffer.t -> t
val of_chan : out_channel -> t
module type SINK = sig
type t
val write : t -> string -> int -> int -> unit (* write substring [i..i+len] *)
val write_char : t -> char -> unit
val write_int : t -> int -> unit
val write_bool : t -> bool -> unit
val write_float : t -> float -> unit
end
module SinkBuf : SINK with type t := Buffer.t
module SinkChan : SINK with type t := out_channel
(** {2 Encoding/decoding} *)
module Sexp : sig
val encode : bij:'a t -> Sink.t -> 'a -> unit
val to_string : bij:'a t -> 'a -> string
val decode : bij:'a t -> Source.t -> 'a
val of_string : bij:'a t -> string -> 'a
module type ENCODE = sig
type sink
val encode : bij:'a t -> sink -> 'a -> unit
end
module type DECODE = sig
type source
val decode : bij:'a t -> source -> 'a
end
module SexpEncode(Sink : SINK) : ENCODE with type sink = Sink.t
module SexpDecode(Source : SOURCE) : DECODE with type source = Source.t
(** Specific instance for encoding to/from strings *)
module SexpStr : sig
val to_string : bij:'a t -> 'a -> string
val of_string : bij:'a t -> string -> 'a
end

View file

@ -5,12 +5,12 @@ open Bij
let test_int2 () =
let bij = pair int_ int_ in
let s = Sexp.to_string bij (1,2) in
let s = SexpStr.to_string bij (1,2) in
OUnit.assert_equal ~printer:(fun x -> x) "(1 2)" s
let test_escape () =
let bij = pair int_ (pair string_ string_) in
let s = Sexp.to_string bij (1,("foo()","bar\n hello")) in
let s = SexpStr.to_string bij (1,("foo()","bar\n hello")) in
OUnit.assert_equal ~printer:(fun x -> x) "(1 (foo(\\) bar\\n\\ hello))" s
let pp_int_list l =
@ -22,8 +22,8 @@ let pp_int_list l =
let test_intlist n () =
let bij = list_ int_ in
let l = Sequence.to_list (Sequence.int_range ~start:0 ~stop:n) in
let s = Sexp.to_string ~bij l in
let l' = Sexp.of_string ~bij s in
let s = SexpStr.to_string ~bij l in
let l' = SexpStr.of_string ~bij s in
OUnit.assert_equal ~printer:pp_int_list l l'
let suite =