mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-08 12:15:32 -05:00
modules everywhere for sources, sinks, encoders, decoders...;
more primitives in Sink; direct access to eof/cur/junk in Source
This commit is contained in:
parent
a41baa1174
commit
a570a34951
3 changed files with 232 additions and 130 deletions
282
bij.ml
282
bij.ml
|
|
@ -57,6 +57,8 @@ let triple a b c = Triple (a,b,c)
|
||||||
let map ~inject ~extract b = Map (inject, extract, b)
|
let map ~inject ~extract b = Map (inject, extract, b)
|
||||||
let switch select l = Switch (select, l)
|
let switch select l = Switch (select, l)
|
||||||
|
|
||||||
|
exception EOF
|
||||||
|
|
||||||
exception EncodingError of string
|
exception EncodingError of string
|
||||||
(** Raised when decoding is impossible *)
|
(** Raised when decoding is impossible *)
|
||||||
|
|
||||||
|
|
@ -65,93 +67,162 @@ exception DecodingError of string
|
||||||
|
|
||||||
(** {2 Source of parsing} *)
|
(** {2 Source of parsing} *)
|
||||||
|
|
||||||
module Source = struct
|
module type SOURCE = sig
|
||||||
type t = string -> int (* fills the buffer *)
|
type t
|
||||||
|
|
||||||
let of_str s =
|
val eof : t -> bool
|
||||||
let i = ref 0 in
|
(** End of input reached? *)
|
||||||
fun buf ->
|
|
||||||
let len = min (String.length s - !i) (String.length buf) in
|
val cur : t -> char
|
||||||
if len = 0
|
(** Current char *)
|
||||||
then 0 (* done *)
|
|
||||||
else begin
|
val junk : t -> unit
|
||||||
String.blit s !i buf 0 len;
|
(** Discard current char *)
|
||||||
i := !i + len;
|
end
|
||||||
len
|
|
||||||
|
module SourceStr = struct
|
||||||
|
type t = {
|
||||||
|
str : string;
|
||||||
|
mutable idx : int;
|
||||||
|
}
|
||||||
|
|
||||||
|
let create str =
|
||||||
|
{ str;
|
||||||
|
idx = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
let eof t = t.idx = String.length t.str
|
||||||
|
|
||||||
|
let cur t =
|
||||||
|
if eof t then raise EOF else t.str.[t.idx]
|
||||||
|
|
||||||
|
let junk t =
|
||||||
|
if t.idx >= String.length t.str
|
||||||
|
then raise EOF
|
||||||
|
else t.idx <- t.idx + 1
|
||||||
|
end
|
||||||
|
|
||||||
|
module SourceStream = struct
|
||||||
|
type t = char Stream.t
|
||||||
|
|
||||||
|
let eof t = match Stream.peek t with
|
||||||
|
| None -> true
|
||||||
|
| Some _ -> false
|
||||||
|
|
||||||
|
let cur t = match Stream.peek t with
|
||||||
|
| None -> raise EOF
|
||||||
|
| Some c -> c
|
||||||
|
|
||||||
|
let junk t = Stream.junk t
|
||||||
|
end
|
||||||
|
|
||||||
|
module SourceChan = struct
|
||||||
|
type t = {
|
||||||
|
chan : in_channel;
|
||||||
|
buf : string;
|
||||||
|
mutable len : int;
|
||||||
|
mutable idx : int;
|
||||||
|
}
|
||||||
|
|
||||||
|
let create ?(bufsize=256) ic =
|
||||||
|
let t = { chan = ic;
|
||||||
|
buf = String.make bufsize ' ';
|
||||||
|
len = 0;
|
||||||
|
idx = 0;
|
||||||
|
} in
|
||||||
|
(* fill the buffer *)
|
||||||
|
t.len <- input t.chan t.buf 0 bufsize;
|
||||||
|
t
|
||||||
|
|
||||||
|
let eof t = t.len = 0
|
||||||
|
|
||||||
|
let cur t =
|
||||||
|
if eof t
|
||||||
|
then raise EOF
|
||||||
|
else t.buf.[t.idx]
|
||||||
|
|
||||||
|
let junk t =
|
||||||
|
(if t.len = 0 then raise EOF);
|
||||||
|
t.idx <- t.idx + 1;
|
||||||
|
if t.idx = t.len
|
||||||
|
then begin (* refill *)
|
||||||
|
t.idx <- 0;
|
||||||
|
t.len <- input t.chan t.buf 0 (String.length t.buf)
|
||||||
end
|
end
|
||||||
|
|
||||||
let of_stream str =
|
|
||||||
fun buf ->
|
|
||||||
let rec fill i =
|
|
||||||
if i = String.length buf
|
|
||||||
then i
|
|
||||||
else match Stream.peek str with
|
|
||||||
| None -> i (* done *)
|
|
||||||
| Some c ->
|
|
||||||
buf.[i] <- c;
|
|
||||||
Stream.junk str;
|
|
||||||
fill (i+1)
|
|
||||||
in
|
|
||||||
fill 0
|
|
||||||
|
|
||||||
let of_chan ic =
|
|
||||||
fun buf ->
|
|
||||||
input ic buf 0 (String.length buf)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
(** {2 Sink: Where to print} *)
|
(** {2 Sink: Where to print} *)
|
||||||
|
|
||||||
module Sink = struct
|
module type SINK = sig
|
||||||
type t = {
|
type t
|
||||||
mutable write : string -> unit;
|
val write : t -> string -> int -> int -> unit (* write substring [i..i+len] *)
|
||||||
mutable write_int : int -> unit;
|
val write_char : t -> char -> unit
|
||||||
mutable write_bool : bool -> unit;
|
val write_int : t -> int -> unit
|
||||||
mutable write_float : float -> unit;
|
val write_bool : t -> bool -> unit
|
||||||
}
|
val write_float : t -> float -> unit
|
||||||
|
end
|
||||||
|
|
||||||
let of_buf buf =
|
module SinkBuf = struct
|
||||||
{ write = (fun s -> Buffer.add_string buf s);
|
type t = Buffer.t
|
||||||
write_int = (fun i -> Printf.bprintf buf "%d" i);
|
|
||||||
write_bool = (fun b -> Printf.bprintf buf "%B" b);
|
|
||||||
write_float = (fun f -> Printf.bprintf buf "%f" f);
|
|
||||||
}
|
|
||||||
|
|
||||||
let of_chan oc =
|
let write t str i len = Buffer.add_substring t str i len
|
||||||
{ write = (fun s -> output_string oc s);
|
let write_char t c = Buffer.add_char t c
|
||||||
write_int = (fun i -> Printf.fprintf oc "%d" i);
|
let write_int t i = Printf.bprintf t "%d" i
|
||||||
write_bool = (fun b -> Printf.fprintf oc "%B" b);
|
let write_bool t b = Printf.bprintf t "%B" b
|
||||||
write_float = (fun f -> Printf.fprintf oc "%f" f);
|
let write_float t f = Printf.bprintf t "%f" f
|
||||||
}
|
end
|
||||||
|
|
||||||
|
module SinkChan = struct
|
||||||
|
type t = out_channel
|
||||||
|
|
||||||
|
let write t str i len = output t str i len
|
||||||
|
let write_char t c = output_char t c
|
||||||
|
let write_int t i = Printf.fprintf t "%d" i
|
||||||
|
let write_bool t b = Printf.fprintf t "%B" b
|
||||||
|
let write_float t f = Printf.fprintf t "%f" f
|
||||||
end
|
end
|
||||||
|
|
||||||
(** {2 Encoding/decoding} *)
|
(** {2 Encoding/decoding} *)
|
||||||
|
|
||||||
module Sexp = struct
|
module type ENCODE = sig
|
||||||
(* escape string *)
|
type sink
|
||||||
let escape s =
|
val encode : bij:'a t -> sink -> 'a -> unit
|
||||||
(* function that escapes into the given buffer *)
|
end
|
||||||
let rec really_escape buf s i =
|
|
||||||
|
module type DECODE = sig
|
||||||
|
type source
|
||||||
|
val decode : bij:'a t -> source -> 'a
|
||||||
|
end
|
||||||
|
|
||||||
|
module SexpEncode(Sink : SINK) = struct
|
||||||
|
type sink = Sink.t
|
||||||
|
|
||||||
|
(* print escaped string to [sink] *)
|
||||||
|
let escape sink s =
|
||||||
|
(* function that escapes into the given sink *)
|
||||||
|
let rec really_escape sink s i =
|
||||||
if i = String.length s
|
if i = String.length s
|
||||||
then Buffer.contents buf
|
then () (* done *)
|
||||||
else begin
|
else begin
|
||||||
(match s.[i] with
|
(match s.[i] with
|
||||||
| '\n' -> Buffer.add_string buf "\\n"
|
| '\n' -> Sink.write sink "\\n" 0 2
|
||||||
| '\t' -> Buffer.add_string buf "\\t"
|
| '\t' -> Sink.write sink "\\t" 0 2
|
||||||
| ' ' | ')' ->
|
| ' ' | ')' ->
|
||||||
Buffer.add_char buf '\\';
|
Sink.write_char sink '\\';
|
||||||
Buffer.add_char buf s.[i]
|
Sink.write_char sink s.[i];
|
||||||
| c -> Buffer.add_char buf c);
|
| c ->
|
||||||
really_escape buf s (i+1)
|
Sink.write_char sink c);
|
||||||
|
really_escape sink s (i+1)
|
||||||
end
|
end
|
||||||
in
|
in
|
||||||
(* search for a char to escape, if any *)
|
(* search for a char to escape, if any *)
|
||||||
let rec search s i =
|
let rec search s i =
|
||||||
if i = String.length s then s (* no escaping needed *)
|
if i = String.length s
|
||||||
|
then Sink.write sink s 0 i (* no escaping needed *)
|
||||||
else match s.[i] with
|
else match s.[i] with
|
||||||
| ' ' | '\t' | '\n' | ')' -> (* must escape *)
|
| ' ' | '\t' | '\n' | ')' -> (* must escape *)
|
||||||
let buf = Buffer.create (String.length s + 1) in
|
Sink.write sink s 0 i;
|
||||||
Buffer.add_substring buf s 0 i;
|
really_escape sink s i (* escape starting at i *)
|
||||||
really_escape buf s i (* escape starting at i *)
|
|
||||||
| _ -> search s (i+1)
|
| _ -> search s (i+1)
|
||||||
in
|
in
|
||||||
search s 0
|
search s 0
|
||||||
|
|
@ -161,41 +232,41 @@ module Sexp = struct
|
||||||
let rec encode : type a. a bij -> a -> unit = fun bij x ->
|
let rec encode : type a. a bij -> a -> unit = fun bij x ->
|
||||||
match bij, x with
|
match bij, x with
|
||||||
| Unit, () -> ()
|
| Unit, () -> ()
|
||||||
| String, s -> sink.write (escape s)
|
| String, s -> escape sink s
|
||||||
| Int, i -> sink.write_int i
|
| Int, i -> Sink.write_int sink i
|
||||||
| Bool, b -> sink.write_bool b
|
| Bool, b -> Sink.write_bool sink b
|
||||||
| Float, f -> sink.write_float f
|
| Float, f -> Sink.write_float sink f
|
||||||
| List bij', l ->
|
| List bij', l ->
|
||||||
sink.write "(";
|
Sink.write_char sink '(';
|
||||||
List.iter
|
List.iter
|
||||||
(fun x -> sink.write " "; encode bij' x)
|
(fun x -> Sink.write_char sink ' '; encode bij' x)
|
||||||
l;
|
l;
|
||||||
sink.write ")"
|
Sink.write_char sink ')'
|
||||||
| Many _, [] -> failwith "Bij.encode: expected non-empty list"
|
| Many _, [] -> failwith "Bij.encode: expected non-empty list"
|
||||||
| Many bij', l ->
|
| Many bij', l ->
|
||||||
sink.write "(";
|
Sink.write_char sink '(';
|
||||||
List.iter
|
List.iteri
|
||||||
(fun x -> sink.write " "; encode bij' x)
|
(fun i x -> (if i > 0 then Sink.write_char sink ' '); encode bij' x)
|
||||||
l;
|
l;
|
||||||
sink.write ")"
|
Sink.write_char sink ')'
|
||||||
| Opt bij, None ->
|
| Opt bij, None ->
|
||||||
encode (List bij) []
|
encode (List bij) []
|
||||||
| Opt bij, Some x ->
|
| Opt bij, Some x ->
|
||||||
encode (List bij) [x]
|
encode (List bij) [x]
|
||||||
| Pair (bij_a, bij_b), (a, b) ->
|
| Pair (bij_a, bij_b), (a, b) ->
|
||||||
sink.write "(";
|
Sink.write_char sink '(';
|
||||||
encode bij_a a;
|
encode bij_a a;
|
||||||
sink.write " ";
|
Sink.write_char sink ' ';
|
||||||
encode bij_b b;
|
encode bij_b b;
|
||||||
sink.write ")"
|
Sink.write_char sink ')'
|
||||||
| Triple (bij_a, bij_b, bij_c), (a, b, c) ->
|
| Triple (bij_a, bij_b, bij_c), (a, b, c) ->
|
||||||
sink.write "(";
|
Sink.write_char sink '(';
|
||||||
encode bij_a a;
|
encode bij_a a;
|
||||||
sink.write " ";
|
Sink.write_char sink ' ';
|
||||||
encode bij_b b;
|
encode bij_b b;
|
||||||
sink.write " ";
|
Sink.write_char sink ' ';
|
||||||
encode bij_c c;
|
encode bij_c c;
|
||||||
sink.write ")"
|
Sink.write_char sink ')'
|
||||||
| Map (inject, _, bij'), x ->
|
| Map (inject, _, bij'), x ->
|
||||||
let y = inject x in
|
let y = inject x in
|
||||||
encode bij' y
|
encode bij' y
|
||||||
|
|
@ -208,29 +279,18 @@ module Sexp = struct
|
||||||
raise (EncodingError "no encoding in switch")
|
raise (EncodingError "no encoding in switch")
|
||||||
in encode bij x
|
in encode bij x
|
||||||
|
|
||||||
let to_string ~bij x =
|
end
|
||||||
let b = Buffer.create 15 in
|
|
||||||
encode ~bij (Sink.of_buf b) x;
|
module SexpDecode(Source : SOURCE) = struct
|
||||||
Buffer.contents b
|
type source = Source.t
|
||||||
|
|
||||||
let decode ~bij source =
|
let decode ~bij source =
|
||||||
let str = String.make 64 '_' in
|
let rec cur () = Source.cur source
|
||||||
let pos = ref 0 in
|
and junk () = Source.junk source
|
||||||
let len = ref 0 in
|
and eof () = Source.eof source
|
||||||
(* current token *)
|
in
|
||||||
let rec cur () =
|
|
||||||
if eof ()
|
|
||||||
then raise (EncodingError "unexpected EOF")
|
|
||||||
else str.[!pos]
|
|
||||||
and eof () = !len = 0
|
|
||||||
and refill () =
|
|
||||||
len := source str;
|
|
||||||
pos := 0
|
|
||||||
and junk () =
|
|
||||||
incr pos;
|
|
||||||
if !pos >= !len then refill ()
|
|
||||||
(* eat whitespace *)
|
(* eat whitespace *)
|
||||||
and whitespace () =
|
let rec whitespace () =
|
||||||
if not (eof ()) then match cur () with
|
if not (eof ()) then match cur () with
|
||||||
| ' ' | '\t' | '\n' -> junk (); whitespace ()
|
| ' ' | '\t' | '\n' -> junk (); whitespace ()
|
||||||
| _ -> ()
|
| _ -> ()
|
||||||
|
|
@ -328,8 +388,18 @@ module Sexp = struct
|
||||||
in
|
in
|
||||||
decode bij
|
decode bij
|
||||||
in
|
in
|
||||||
refill (); (* first input *)
|
|
||||||
decode bij
|
decode bij
|
||||||
|
end
|
||||||
let of_string ~bij s = decode ~bij (Source.of_str s)
|
|
||||||
|
module SexpStr = struct
|
||||||
|
module SexpEncodeBuf = SexpEncode(SinkBuf)
|
||||||
|
module SexpDecodeString = SexpDecode(SourceStr)
|
||||||
|
|
||||||
|
let to_string ~bij x =
|
||||||
|
let b = Buffer.create 15 in
|
||||||
|
SexpEncodeBuf.encode ~bij b x;
|
||||||
|
Buffer.contents b
|
||||||
|
|
||||||
|
let of_string ~bij s =
|
||||||
|
SexpDecodeString.decode ~bij (SourceStr.create s)
|
||||||
end
|
end
|
||||||
|
|
|
||||||
68
bij.mli
68
bij.mli
|
|
@ -48,6 +48,8 @@ val switch : ('a -> char) -> (char * 'a t) list -> 'a t
|
||||||
bijection depending on the value.
|
bijection depending on the value.
|
||||||
' ' means "default" *)
|
' ' means "default" *)
|
||||||
|
|
||||||
|
exception EOF
|
||||||
|
|
||||||
exception EncodingError of string
|
exception EncodingError of string
|
||||||
(** Raised when decoding is impossible *)
|
(** Raised when decoding is impossible *)
|
||||||
|
|
||||||
|
|
@ -56,33 +58,63 @@ exception DecodingError of string
|
||||||
|
|
||||||
(** {2 Source of parsing} *)
|
(** {2 Source of parsing} *)
|
||||||
|
|
||||||
module Source : sig
|
module type SOURCE = sig
|
||||||
type t = string -> int (* fills the buffer *)
|
type t
|
||||||
|
|
||||||
val of_str : string -> t
|
val eof : t -> bool
|
||||||
val of_stream : char Stream.t -> t
|
(** End of input reached? *)
|
||||||
val of_chan : in_channel -> t
|
|
||||||
|
val cur : t -> char
|
||||||
|
(** Current char *)
|
||||||
|
|
||||||
|
val junk : t -> unit
|
||||||
|
(** Discard current char *)
|
||||||
|
end
|
||||||
|
|
||||||
|
module SourceStr : sig
|
||||||
|
include SOURCE
|
||||||
|
val create : string -> t
|
||||||
|
end
|
||||||
|
|
||||||
|
module SourceStream : SOURCE with type t = char Stream.t
|
||||||
|
|
||||||
|
module SourceChan : sig
|
||||||
|
include SOURCE
|
||||||
|
val create : ?bufsize:int -> in_channel -> t
|
||||||
end
|
end
|
||||||
|
|
||||||
(** {2 Sink: Where to print} *)
|
(** {2 Sink: Where to print} *)
|
||||||
|
|
||||||
module Sink : sig
|
module type SINK = sig
|
||||||
type t = {
|
type t
|
||||||
mutable write : string -> unit;
|
val write : t -> string -> int -> int -> unit (* write substring [i..i+len] *)
|
||||||
mutable write_int : int -> unit;
|
val write_char : t -> char -> unit
|
||||||
mutable write_bool : bool -> unit;
|
val write_int : t -> int -> unit
|
||||||
mutable write_float : float -> unit;
|
val write_bool : t -> bool -> unit
|
||||||
}
|
val write_float : t -> float -> unit
|
||||||
|
|
||||||
val of_buf : Buffer.t -> t
|
|
||||||
val of_chan : out_channel -> t
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
module SinkBuf : SINK with type t := Buffer.t
|
||||||
|
|
||||||
|
module SinkChan : SINK with type t := out_channel
|
||||||
|
|
||||||
(** {2 Encoding/decoding} *)
|
(** {2 Encoding/decoding} *)
|
||||||
|
|
||||||
module Sexp : sig
|
module type ENCODE = sig
|
||||||
val encode : bij:'a t -> Sink.t -> 'a -> unit
|
type sink
|
||||||
|
val encode : bij:'a t -> sink -> 'a -> unit
|
||||||
|
end
|
||||||
|
|
||||||
|
module type DECODE = sig
|
||||||
|
type source
|
||||||
|
val decode : bij:'a t -> source -> 'a
|
||||||
|
end
|
||||||
|
|
||||||
|
module SexpEncode(Sink : SINK) : ENCODE with type sink = Sink.t
|
||||||
|
module SexpDecode(Source : SOURCE) : DECODE with type source = Source.t
|
||||||
|
|
||||||
|
(** Specific instance for encoding to/from strings *)
|
||||||
|
module SexpStr : sig
|
||||||
val to_string : bij:'a t -> 'a -> string
|
val to_string : bij:'a t -> 'a -> string
|
||||||
val decode : bij:'a t -> Source.t -> 'a
|
|
||||||
val of_string : bij:'a t -> string -> 'a
|
val of_string : bij:'a t -> string -> 'a
|
||||||
end
|
end
|
||||||
|
|
|
||||||
|
|
@ -5,12 +5,12 @@ open Bij
|
||||||
|
|
||||||
let test_int2 () =
|
let test_int2 () =
|
||||||
let bij = pair int_ int_ in
|
let bij = pair int_ int_ in
|
||||||
let s = Sexp.to_string bij (1,2) in
|
let s = SexpStr.to_string bij (1,2) in
|
||||||
OUnit.assert_equal ~printer:(fun x -> x) "(1 2)" s
|
OUnit.assert_equal ~printer:(fun x -> x) "(1 2)" s
|
||||||
|
|
||||||
let test_escape () =
|
let test_escape () =
|
||||||
let bij = pair int_ (pair string_ string_) in
|
let bij = pair int_ (pair string_ string_) in
|
||||||
let s = Sexp.to_string bij (1,("foo()","bar\n hello")) in
|
let s = SexpStr.to_string bij (1,("foo()","bar\n hello")) in
|
||||||
OUnit.assert_equal ~printer:(fun x -> x) "(1 (foo(\\) bar\\n\\ hello))" s
|
OUnit.assert_equal ~printer:(fun x -> x) "(1 (foo(\\) bar\\n\\ hello))" s
|
||||||
|
|
||||||
let pp_int_list l =
|
let pp_int_list l =
|
||||||
|
|
@ -22,8 +22,8 @@ let pp_int_list l =
|
||||||
let test_intlist n () =
|
let test_intlist n () =
|
||||||
let bij = list_ int_ in
|
let bij = list_ int_ in
|
||||||
let l = Sequence.to_list (Sequence.int_range ~start:0 ~stop:n) in
|
let l = Sequence.to_list (Sequence.int_range ~start:0 ~stop:n) in
|
||||||
let s = Sexp.to_string ~bij l in
|
let s = SexpStr.to_string ~bij l in
|
||||||
let l' = Sexp.of_string ~bij s in
|
let l' = SexpStr.of_string ~bij s in
|
||||||
OUnit.assert_equal ~printer:pp_int_list l l'
|
OUnit.assert_equal ~printer:pp_int_list l l'
|
||||||
|
|
||||||
let suite =
|
let suite =
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue