mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 11:15:31 -05:00
Bij module, a try at the serialization problem using GADT
This commit is contained in:
parent
d7b3f45f3c
commit
20097ddae3
6 changed files with 435 additions and 0 deletions
316
bij.ml
Normal file
316
bij.ml
Normal file
|
|
@ -0,0 +1,316 @@
|
||||||
|
(*
|
||||||
|
Copyright (c) 2013, Simon Cruanes
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
Redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer. Redistributions in binary
|
||||||
|
form must reproduce the above copyright notice, this list of conditions and the
|
||||||
|
following disclaimer in the documentation and/or other materials provided with
|
||||||
|
the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*)
|
||||||
|
|
||||||
|
(** {1 Bijective Serializer/Deserializer} *)
|
||||||
|
|
||||||
|
type _ t =
|
||||||
|
| Unit : unit t
|
||||||
|
| String : string t
|
||||||
|
| Int : int t
|
||||||
|
| Bool : bool t
|
||||||
|
| Float : float t
|
||||||
|
| List : 'a t -> 'a list t
|
||||||
|
| Many : 'a t -> 'a list t
|
||||||
|
| Opt : 'a t -> 'a option t
|
||||||
|
| Pair : 'a t * 'b t -> ('a * 'b) t
|
||||||
|
| Triple : 'a t * 'b t * 'c t -> ('a * 'b * 'c) t
|
||||||
|
| Map : ('a -> 'b) * ('b -> 'a) * 'b t -> 'a t
|
||||||
|
| Switch : ('a -> char) * (char * 'a t) list -> 'a t
|
||||||
|
|
||||||
|
type 'a bij = 'a t
|
||||||
|
|
||||||
|
(** {2 Bijection description} *)
|
||||||
|
|
||||||
|
let unit_ = Unit
|
||||||
|
let string_ = String
|
||||||
|
let int_ = Int
|
||||||
|
let bool_ = Bool
|
||||||
|
let float_ = Float
|
||||||
|
let list_ l = List l
|
||||||
|
let many l = Many l
|
||||||
|
let opt t = Opt t
|
||||||
|
let pair a b = Pair(a,b)
|
||||||
|
let triple a b c = Triple (a,b,c)
|
||||||
|
|
||||||
|
let map ~inject ~extract b = Map (inject, extract, b)
|
||||||
|
let switch select l = Switch (select, l)
|
||||||
|
|
||||||
|
exception EncodingError of string
|
||||||
|
(** Raised when decoding is impossible *)
|
||||||
|
|
||||||
|
exception DecodingError of string
|
||||||
|
(** Raised when decoding is impossible *)
|
||||||
|
|
||||||
|
(** {2 Source of parsing} *)
|
||||||
|
|
||||||
|
module Source = struct
|
||||||
|
type t = string -> int (* fills the buffer *)
|
||||||
|
|
||||||
|
let of_str s =
|
||||||
|
let i = ref 0 in
|
||||||
|
fun buf ->
|
||||||
|
let len = min (String.length s - !i) (String.length buf) in
|
||||||
|
if len = 0
|
||||||
|
then 0 (* done *)
|
||||||
|
else begin
|
||||||
|
String.blit s !i buf 0 len;
|
||||||
|
i := !i + len;
|
||||||
|
len
|
||||||
|
end
|
||||||
|
|
||||||
|
let of_stream str =
|
||||||
|
fun buf ->
|
||||||
|
let rec fill i =
|
||||||
|
if i = String.length buf
|
||||||
|
then i
|
||||||
|
else match Stream.peek str with
|
||||||
|
| None -> i (* done *)
|
||||||
|
| Some c ->
|
||||||
|
buf.[i] <- c;
|
||||||
|
Stream.junk str;
|
||||||
|
fill (i+1)
|
||||||
|
in
|
||||||
|
fill 0
|
||||||
|
|
||||||
|
let of_chan ic =
|
||||||
|
fun buf ->
|
||||||
|
input ic buf 0 (String.length buf)
|
||||||
|
end
|
||||||
|
|
||||||
|
(** {2 Sink: Where to print} *)
|
||||||
|
|
||||||
|
module Sink = struct
|
||||||
|
type t = {
|
||||||
|
mutable write : string -> unit;
|
||||||
|
mutable write_int : int -> unit;
|
||||||
|
mutable write_bool : bool -> unit;
|
||||||
|
mutable write_float : float -> unit;
|
||||||
|
}
|
||||||
|
|
||||||
|
let of_buf buf =
|
||||||
|
{ write = (fun s -> Buffer.add_string buf s);
|
||||||
|
write_int = (fun i -> Printf.bprintf buf "%d" i);
|
||||||
|
write_bool = (fun b -> Printf.bprintf buf "%B" b);
|
||||||
|
write_float = (fun f -> Printf.bprintf buf "%f" f);
|
||||||
|
}
|
||||||
|
|
||||||
|
let of_chan oc =
|
||||||
|
{ write = (fun s -> output_string oc s);
|
||||||
|
write_int = (fun i -> Printf.fprintf oc "%d" i);
|
||||||
|
write_bool = (fun b -> Printf.fprintf oc "%B" b);
|
||||||
|
write_float = (fun f -> Printf.fprintf oc "%f" f);
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
(** {2 Encoding/decoding} *)
|
||||||
|
|
||||||
|
module Sexp = struct
|
||||||
|
(* escape string *)
|
||||||
|
let escape s =
|
||||||
|
(* function that escapes into the given buffer *)
|
||||||
|
let rec really_escape buf s i =
|
||||||
|
if i = String.length s
|
||||||
|
then Buffer.contents buf
|
||||||
|
else begin
|
||||||
|
(match s.[i] with
|
||||||
|
| '\n' -> Buffer.add_string buf "\\n"
|
||||||
|
| '\t' -> Buffer.add_string buf "\\t"
|
||||||
|
| ' ' | ')' ->
|
||||||
|
Buffer.add_char buf '\\';
|
||||||
|
Buffer.add_char buf s.[i]
|
||||||
|
| c -> Buffer.add_char buf c);
|
||||||
|
really_escape buf s (i+1)
|
||||||
|
end
|
||||||
|
in
|
||||||
|
(* search for a char to escape, if any *)
|
||||||
|
let rec search s i =
|
||||||
|
if i = String.length s then s (* no escaping needed *)
|
||||||
|
else match s.[i] with
|
||||||
|
| ' ' | '\t' | '\n' | ')' -> (* must escape *)
|
||||||
|
let buf = Buffer.create (String.length s + 1) in
|
||||||
|
Buffer.add_substring buf s 0 i;
|
||||||
|
really_escape buf s i (* escape starting at i *)
|
||||||
|
| _ -> search s (i+1)
|
||||||
|
in
|
||||||
|
search s 0
|
||||||
|
|
||||||
|
let encode ~bij sink x =
|
||||||
|
let open Sink in
|
||||||
|
let rec encode : type a. a bij -> a -> unit = fun bij x ->
|
||||||
|
match bij, x with
|
||||||
|
| Unit, () -> ()
|
||||||
|
| String, s -> sink.write (escape s)
|
||||||
|
| Int, i -> sink.write_int i
|
||||||
|
| Bool, b -> sink.write_bool b
|
||||||
|
| Float, f -> sink.write_float f
|
||||||
|
| List bij', l ->
|
||||||
|
sink.write "(";
|
||||||
|
List.iter
|
||||||
|
(fun x -> sink.write " "; encode bij' x)
|
||||||
|
l;
|
||||||
|
sink.write ")"
|
||||||
|
| Many _, [] -> failwith "Bij.encode: expected non-empty list"
|
||||||
|
| Many bij', l ->
|
||||||
|
sink.write "(";
|
||||||
|
List.iter
|
||||||
|
(fun x -> sink.write " "; encode bij' x)
|
||||||
|
l;
|
||||||
|
sink.write ")"
|
||||||
|
| Opt bij, None ->
|
||||||
|
encode (List bij) []
|
||||||
|
| Opt bij, Some x ->
|
||||||
|
encode (List bij) [x]
|
||||||
|
| Pair (bij_a, bij_b), (a, b) ->
|
||||||
|
sink.write "(";
|
||||||
|
encode bij_a a;
|
||||||
|
sink.write " ";
|
||||||
|
encode bij_b b;
|
||||||
|
sink.write ")"
|
||||||
|
| Triple (bij_a, bij_b, bij_c), (a, b, c) ->
|
||||||
|
sink.write "(";
|
||||||
|
encode bij_a a;
|
||||||
|
sink.write " ";
|
||||||
|
encode bij_b b;
|
||||||
|
sink.write " ";
|
||||||
|
encode bij_c c;
|
||||||
|
sink.write ")"
|
||||||
|
| Map (inject, _, bij'), x ->
|
||||||
|
let y = inject x in
|
||||||
|
encode bij' y
|
||||||
|
| Switch (select, l), x ->
|
||||||
|
let c = select x in
|
||||||
|
try
|
||||||
|
let bij' = List.assq c l in
|
||||||
|
encode bij' x
|
||||||
|
with Not_found ->
|
||||||
|
raise (EncodingError "no encoding in switch")
|
||||||
|
in encode bij x
|
||||||
|
|
||||||
|
let to_string ~bij x =
|
||||||
|
let b = Buffer.create 15 in
|
||||||
|
encode ~bij (Sink.of_buf b) x;
|
||||||
|
Buffer.contents b
|
||||||
|
|
||||||
|
let decode ~bij source =
|
||||||
|
let str = String.make 64 '_' in
|
||||||
|
let pos = ref 0 in
|
||||||
|
let len = ref 0 in
|
||||||
|
(* current token *)
|
||||||
|
let rec cur () =
|
||||||
|
if !len = 0
|
||||||
|
then raise (DecodingError "unexpected EOF")
|
||||||
|
else if !pos = !len
|
||||||
|
then (refill (); cur ())
|
||||||
|
else
|
||||||
|
str.[!pos]
|
||||||
|
and refill () =
|
||||||
|
len := source str;
|
||||||
|
pos := 0
|
||||||
|
and junk () =
|
||||||
|
if !pos = !len then refill () else incr pos
|
||||||
|
(* eat whitespace *)
|
||||||
|
and whitespace () =
|
||||||
|
match cur () with
|
||||||
|
| ' ' | '\t' | '\n' -> junk (); whitespace ()
|
||||||
|
| _ -> ()
|
||||||
|
(* decode using the [bij] *)
|
||||||
|
and decode : type a. a bij -> int -> int -> a = fun bij i n ->
|
||||||
|
whitespace ();
|
||||||
|
match bij with
|
||||||
|
| Unit -> ()
|
||||||
|
| String -> decode_string (Buffer.create 5)
|
||||||
|
| Int -> decode_int 0
|
||||||
|
| Float ->
|
||||||
|
begin try float_of_string (decode_string (Buffer.create 3))
|
||||||
|
with Failure _ -> raise (DecodingError ("expected float"))
|
||||||
|
end
|
||||||
|
| Bool ->
|
||||||
|
begin match decode_string (Buffer.create 4) with
|
||||||
|
| "true" -> true
|
||||||
|
| "false" -> false
|
||||||
|
| s -> raise (DecodingError ("expected bool, got " ^ s))
|
||||||
|
end
|
||||||
|
| List bij' ->
|
||||||
|
decode_open ();
|
||||||
|
let l = decode_list bij' [] in
|
||||||
|
decode_close ();
|
||||||
|
l
|
||||||
|
| Many bij' ->
|
||||||
|
decode_open ();
|
||||||
|
let l = decode_list bij' [] in
|
||||||
|
decode_close ();
|
||||||
|
if l = [] then raise (DecodingError "expected non empty list") else l
|
||||||
|
| Opt bij' ->
|
||||||
|
decode_open ();
|
||||||
|
let l = decode_list bij' [] in
|
||||||
|
decode_close ();
|
||||||
|
begin match l with
|
||||||
|
| [] -> None
|
||||||
|
| [x] -> Some x
|
||||||
|
| _ -> raise (DecodingError "expected option")
|
||||||
|
end
|
||||||
|
| Pair (bija, bijb) ->
|
||||||
|
decode_open ();
|
||||||
|
let a = decode bija in
|
||||||
|
whitespace ();
|
||||||
|
let b = decode bijb in
|
||||||
|
decode_close ();
|
||||||
|
a, b
|
||||||
|
| Triple (bija, bijb, bijc) ->
|
||||||
|
decode_open ();
|
||||||
|
let a = decode bija in
|
||||||
|
whitespace ();
|
||||||
|
let b = decode bijb in
|
||||||
|
whitespace ();
|
||||||
|
let c = decode bijc in
|
||||||
|
decode_close ();
|
||||||
|
a, b, c
|
||||||
|
| Map (_, extract, bij') ->
|
||||||
|
let x = decode bij' in
|
||||||
|
extract x
|
||||||
|
| Switch (_, choices) -> decode_switch choices
|
||||||
|
and decode_open () = match cur () with
|
||||||
|
| '(' -> junk (); whitespace ()
|
||||||
|
| _ -> raise (DecodingError "expected '('")
|
||||||
|
and decode_close () =
|
||||||
|
whitespace (); (* on close, first eat whitespace *)
|
||||||
|
match cur () with
|
||||||
|
| ')' -> junk (); whitespace ()
|
||||||
|
| _ -> raise (DecodingError "expected ')'")
|
||||||
|
and decode_int i = match cur () with
|
||||||
|
| '-' when i = 0 -> ~- (decode_int 0) (* negative *)
|
||||||
|
| c when Char.code c >= Char.code '0' && Char.code c <= Char.code '9' ->
|
||||||
|
junk ();
|
||||||
|
decode_int (i * 10 + (Char.code c - Char.code '0'))
|
||||||
|
| _ -> i
|
||||||
|
and decode_string buf = match cur() with
|
||||||
|
| ' ' | '\t' | '\n' | ')' -> Buffer.contents buf
|
||||||
|
| '\\' -> junk (); Buffer.add_char buf (cur ()); junk (); decode_string buf
|
||||||
|
| c -> Buffer.add_char buf c; junk (); decode_string buf
|
||||||
|
in
|
||||||
|
decode bij
|
||||||
|
|
||||||
|
let of_string ~bij s = decode ~bij (Source.of_str s)
|
||||||
|
end
|
||||||
88
bij.mli
Normal file
88
bij.mli
Normal file
|
|
@ -0,0 +1,88 @@
|
||||||
|
(*
|
||||||
|
Copyright (c) 2013, Simon Cruanes
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
Redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer. Redistributions in binary
|
||||||
|
form must reproduce the above copyright notice, this list of conditions and the
|
||||||
|
following disclaimer in the documentation and/or other materials provided with
|
||||||
|
the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*)
|
||||||
|
|
||||||
|
(** {1 Bijective Serializer/Deserializer} *)
|
||||||
|
|
||||||
|
type 'a t
|
||||||
|
|
||||||
|
(** {2 Bijection description} *)
|
||||||
|
|
||||||
|
val unit_ : unit t
|
||||||
|
val string_ : string t
|
||||||
|
val int_ : int t
|
||||||
|
val bool_ : bool t
|
||||||
|
val float_ : float t
|
||||||
|
|
||||||
|
val list_ : 'a t -> 'a list t
|
||||||
|
val many : 'a t -> 'a list t (* non empty *)
|
||||||
|
val opt : 'a t -> 'a option t
|
||||||
|
val pair : 'a t -> 'b t -> ('a * 'b) t
|
||||||
|
val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t
|
||||||
|
|
||||||
|
val map : inject:('a -> 'b) -> extract:('b -> 'a) -> 'b t -> 'a t
|
||||||
|
val switch : ('a -> char) -> (char * 'a t) list -> 'a t
|
||||||
|
(** discriminates based on the next character.
|
||||||
|
The selection function, with type ['a -> char], is used to select a
|
||||||
|
bijection depending on the value.
|
||||||
|
' ' means "default" *)
|
||||||
|
|
||||||
|
exception EncodingError of string
|
||||||
|
(** Raised when decoding is impossible *)
|
||||||
|
|
||||||
|
exception DecodingError of string
|
||||||
|
(** Raised when decoding is impossible *)
|
||||||
|
|
||||||
|
(** {2 Source of parsing} *)
|
||||||
|
|
||||||
|
module Source : sig
|
||||||
|
type t = string -> int (* fills the buffer *)
|
||||||
|
|
||||||
|
val of_str : string -> t
|
||||||
|
val of_stream : char Stream.t -> t
|
||||||
|
val of_chan : in_channel -> t
|
||||||
|
end
|
||||||
|
|
||||||
|
(** {2 Sink: Where to print} *)
|
||||||
|
|
||||||
|
module Sink : sig
|
||||||
|
type t = {
|
||||||
|
mutable write : string -> unit;
|
||||||
|
mutable write_int : int -> unit;
|
||||||
|
mutable write_bool : bool -> unit;
|
||||||
|
mutable write_float : float -> unit;
|
||||||
|
}
|
||||||
|
|
||||||
|
val of_buf : Buffer.t -> t
|
||||||
|
val of_chan : out_channel -> t
|
||||||
|
end
|
||||||
|
|
||||||
|
(** {2 Encoding/decoding} *)
|
||||||
|
|
||||||
|
module Sexp : sig
|
||||||
|
val encode : bij:'a t -> Sink.t -> 'a -> unit
|
||||||
|
val to_string : bij:'a t -> 'a -> string
|
||||||
|
val decode : bij:'a t -> Source.t -> 'a
|
||||||
|
val of_string : bij:'a t -> string -> 'a
|
||||||
|
end
|
||||||
|
|
@ -16,3 +16,4 @@ SplayTree
|
||||||
SplayMap
|
SplayMap
|
||||||
Univ
|
Univ
|
||||||
Vector
|
Vector
|
||||||
|
Bij
|
||||||
|
|
|
||||||
|
|
@ -7,3 +7,4 @@ LazyGraph
|
||||||
PersistentHashtbl
|
PersistentHashtbl
|
||||||
Sequence
|
Sequence
|
||||||
Univ
|
Univ
|
||||||
|
Bij
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ let suite =
|
||||||
[ Test_pHashtbl.suite;
|
[ Test_pHashtbl.suite;
|
||||||
Test_PersistentHashtbl.suite;
|
Test_PersistentHashtbl.suite;
|
||||||
Test_splayMap.suite;
|
Test_splayMap.suite;
|
||||||
|
Test_bij.suite;
|
||||||
Test_leftistheap.suite;
|
Test_leftistheap.suite;
|
||||||
Test_cc.suite;
|
Test_cc.suite;
|
||||||
Test_puf.suite;
|
Test_puf.suite;
|
||||||
|
|
|
||||||
28
tests/test_bij.ml
Normal file
28
tests/test_bij.ml
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
|
||||||
|
open OUnit
|
||||||
|
|
||||||
|
open Bij
|
||||||
|
|
||||||
|
let test_int2 () =
|
||||||
|
let bij = pair int_ int_ in
|
||||||
|
let s = Sexp.to_string bij (1,2) in
|
||||||
|
OUnit.assert_equal ~printer:(fun x -> x) "(1 2)" s
|
||||||
|
|
||||||
|
let test_escape () =
|
||||||
|
let bij = pair int_ (pair string_ string_) in
|
||||||
|
let s = Sexp.to_string bij (1,("foo()","bar\n hello")) in
|
||||||
|
OUnit.assert_equal ~printer:(fun x -> x) "(1 (foo(\\) bar\\n\\ hello))" s
|
||||||
|
|
||||||
|
let test_intlist () =
|
||||||
|
let bij = list_ int_ in
|
||||||
|
let l = Sequence.to_list (Sequence.int_range ~start:0 ~stop:10000) in
|
||||||
|
let s = Sexp.to_string ~bij l in
|
||||||
|
let l' = Sexp.of_string ~bij s in
|
||||||
|
OUnit.assert_equal l l'
|
||||||
|
|
||||||
|
let suite =
|
||||||
|
"test_bij" >:::
|
||||||
|
[ "test_int2" >:: test_int2;
|
||||||
|
"test_escape" >:: test_escape;
|
||||||
|
"test_intlist" >:: test_intlist;
|
||||||
|
]
|
||||||
Loading…
Add table
Reference in a new issue