From 20097ddae3f488937dc8f6e35bfd4ed323d844f7 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 14 May 2013 18:32:23 +0200 Subject: [PATCH] Bij module, a try at the serialization problem using GADT --- bij.ml | 316 +++++++++++++++++++++++++++++++++++++++++++++ bij.mli | 88 +++++++++++++ containers.mllib | 1 + containers.odocl | 1 + tests/run_tests.ml | 1 + tests/test_bij.ml | 28 ++++ 6 files changed, 435 insertions(+) create mode 100644 bij.ml create mode 100644 bij.mli create mode 100644 tests/test_bij.ml diff --git a/bij.ml b/bij.ml new file mode 100644 index 00000000..7ee0a3fa --- /dev/null +++ b/bij.ml @@ -0,0 +1,316 @@ +(* +Copyright (c) 2013, Simon Cruanes +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. Redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Bijective Serializer/Deserializer} *) + +type _ t = + | Unit : unit t + | String : string t + | Int : int t + | Bool : bool t + | Float : float t + | List : 'a t -> 'a list t + | Many : 'a t -> 'a list t + | Opt : 'a t -> 'a option t + | Pair : 'a t * 'b t -> ('a * 'b) t + | Triple : 'a t * 'b t * 'c t -> ('a * 'b * 'c) t + | Map : ('a -> 'b) * ('b -> 'a) * 'b t -> 'a t + | Switch : ('a -> char) * (char * 'a t) list -> 'a t + +type 'a bij = 'a t + +(** {2 Bijection description} *) + +let unit_ = Unit +let string_ = String +let int_ = Int +let bool_ = Bool +let float_ = Float +let list_ l = List l +let many l = Many l +let opt t = Opt t +let pair a b = Pair(a,b) +let triple a b c = Triple (a,b,c) + +let map ~inject ~extract b = Map (inject, extract, b) +let switch select l = Switch (select, l) + +exception EncodingError of string + (** Raised when decoding is impossible *) + +exception DecodingError of string + (** Raised when decoding is impossible *) + +(** {2 Source of parsing} *) + +module Source = struct + type t = string -> int (* fills the buffer *) + + let of_str s = + let i = ref 0 in + fun buf -> + let len = min (String.length s - !i) (String.length buf) in + if len = 0 + then 0 (* done *) + else begin + String.blit s !i buf 0 len; + i := !i + len; + len + end + + let of_stream str = + fun buf -> + let rec fill i = + if i = String.length buf + then i + else match Stream.peek str with + | None -> i (* done *) + | Some c -> + buf.[i] <- c; + Stream.junk str; + fill (i+1) + in + fill 0 + + let of_chan ic = + fun buf -> + input ic buf 0 (String.length buf) +end + +(** {2 Sink: Where to print} *) + +module Sink = struct + type t = { + mutable write : string -> unit; + mutable write_int : int -> unit; + mutable write_bool : bool -> unit; + mutable write_float : float -> unit; + } + + let of_buf buf = + { write = (fun s -> Buffer.add_string buf s); + write_int = (fun i -> Printf.bprintf buf "%d" i); + write_bool = (fun b -> Printf.bprintf buf "%B" b); + write_float = (fun f -> Printf.bprintf buf "%f" f); + } + + let of_chan oc = + { write = (fun s -> output_string oc s); + write_int = (fun i -> Printf.fprintf oc "%d" i); + write_bool = (fun b -> Printf.fprintf oc "%B" b); + write_float = (fun f -> Printf.fprintf oc "%f" f); + } +end + +(** {2 Encoding/decoding} *) + +module Sexp = struct + (* escape string *) + let escape s = + (* function that escapes into the given buffer *) + let rec really_escape buf s i = + if i = String.length s + then Buffer.contents buf + else begin + (match s.[i] with + | '\n' -> Buffer.add_string buf "\\n" + | '\t' -> Buffer.add_string buf "\\t" + | ' ' | ')' -> + Buffer.add_char buf '\\'; + Buffer.add_char buf s.[i] + | c -> Buffer.add_char buf c); + really_escape buf s (i+1) + end + in + (* search for a char to escape, if any *) + let rec search s i = + if i = String.length s then s (* no escaping needed *) + else match s.[i] with + | ' ' | '\t' | '\n' | ')' -> (* must escape *) + let buf = Buffer.create (String.length s + 1) in + Buffer.add_substring buf s 0 i; + really_escape buf s i (* escape starting at i *) + | _ -> search s (i+1) + in + search s 0 + + let encode ~bij sink x = + let open Sink in + let rec encode : type a. a bij -> a -> unit = fun bij x -> + match bij, x with + | Unit, () -> () + | String, s -> sink.write (escape s) + | Int, i -> sink.write_int i + | Bool, b -> sink.write_bool b + | Float, f -> sink.write_float f + | List bij', l -> + sink.write "("; + List.iter + (fun x -> sink.write " "; encode bij' x) + l; + sink.write ")" + | Many _, [] -> failwith "Bij.encode: expected non-empty list" + | Many bij', l -> + sink.write "("; + List.iter + (fun x -> sink.write " "; encode bij' x) + l; + sink.write ")" + | Opt bij, None -> + encode (List bij) [] + | Opt bij, Some x -> + encode (List bij) [x] + | Pair (bij_a, bij_b), (a, b) -> + sink.write "("; + encode bij_a a; + sink.write " "; + encode bij_b b; + sink.write ")" + | Triple (bij_a, bij_b, bij_c), (a, b, c) -> + sink.write "("; + encode bij_a a; + sink.write " "; + encode bij_b b; + sink.write " "; + encode bij_c c; + sink.write ")" + | Map (inject, _, bij'), x -> + let y = inject x in + encode bij' y + | Switch (select, l), x -> + let c = select x in + try + let bij' = List.assq c l in + encode bij' x + with Not_found -> + raise (EncodingError "no encoding in switch") + in encode bij x + + let to_string ~bij x = + let b = Buffer.create 15 in + encode ~bij (Sink.of_buf b) x; + Buffer.contents b + + let decode ~bij source = + let str = String.make 64 '_' in + let pos = ref 0 in + let len = ref 0 in + (* current token *) + let rec cur () = + if !len = 0 + then raise (DecodingError "unexpected EOF") + else if !pos = !len + then (refill (); cur ()) + else + str.[!pos] + and refill () = + len := source str; + pos := 0 + and junk () = + if !pos = !len then refill () else incr pos + (* eat whitespace *) + and whitespace () = + match cur () with + | ' ' | '\t' | '\n' -> junk (); whitespace () + | _ -> () + (* decode using the [bij] *) + and decode : type a. a bij -> int -> int -> a = fun bij i n -> + whitespace (); + match bij with + | Unit -> () + | String -> decode_string (Buffer.create 5) + | Int -> decode_int 0 + | Float -> + begin try float_of_string (decode_string (Buffer.create 3)) + with Failure _ -> raise (DecodingError ("expected float")) + end + | Bool -> + begin match decode_string (Buffer.create 4) with + | "true" -> true + | "false" -> false + | s -> raise (DecodingError ("expected bool, got " ^ s)) + end + | List bij' -> + decode_open (); + let l = decode_list bij' [] in + decode_close (); + l + | Many bij' -> + decode_open (); + let l = decode_list bij' [] in + decode_close (); + if l = [] then raise (DecodingError "expected non empty list") else l + | Opt bij' -> + decode_open (); + let l = decode_list bij' [] in + decode_close (); + begin match l with + | [] -> None + | [x] -> Some x + | _ -> raise (DecodingError "expected option") + end + | Pair (bija, bijb) -> + decode_open (); + let a = decode bija in + whitespace (); + let b = decode bijb in + decode_close (); + a, b + | Triple (bija, bijb, bijc) -> + decode_open (); + let a = decode bija in + whitespace (); + let b = decode bijb in + whitespace (); + let c = decode bijc in + decode_close (); + a, b, c + | Map (_, extract, bij') -> + let x = decode bij' in + extract x + | Switch (_, choices) -> decode_switch choices + and decode_open () = match cur () with + | '(' -> junk (); whitespace () + | _ -> raise (DecodingError "expected '('") + and decode_close () = + whitespace (); (* on close, first eat whitespace *) + match cur () with + | ')' -> junk (); whitespace () + | _ -> raise (DecodingError "expected ')'") + and decode_int i = match cur () with + | '-' when i = 0 -> ~- (decode_int 0) (* negative *) + | c when Char.code c >= Char.code '0' && Char.code c <= Char.code '9' -> + junk (); + decode_int (i * 10 + (Char.code c - Char.code '0')) + | _ -> i + and decode_string buf = match cur() with + | ' ' | '\t' | '\n' | ')' -> Buffer.contents buf + | '\\' -> junk (); Buffer.add_char buf (cur ()); junk (); decode_string buf + | c -> Buffer.add_char buf c; junk (); decode_string buf + in + decode bij + + let of_string ~bij s = decode ~bij (Source.of_str s) +end diff --git a/bij.mli b/bij.mli new file mode 100644 index 00000000..c3f79af9 --- /dev/null +++ b/bij.mli @@ -0,0 +1,88 @@ +(* +Copyright (c) 2013, Simon Cruanes +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. Redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Bijective Serializer/Deserializer} *) + +type 'a t + +(** {2 Bijection description} *) + +val unit_ : unit t +val string_ : string t +val int_ : int t +val bool_ : bool t +val float_ : float t + +val list_ : 'a t -> 'a list t +val many : 'a t -> 'a list t (* non empty *) +val opt : 'a t -> 'a option t +val pair : 'a t -> 'b t -> ('a * 'b) t +val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t + +val map : inject:('a -> 'b) -> extract:('b -> 'a) -> 'b t -> 'a t +val switch : ('a -> char) -> (char * 'a t) list -> 'a t + (** discriminates based on the next character. + The selection function, with type ['a -> char], is used to select a + bijection depending on the value. + ' ' means "default" *) + +exception EncodingError of string + (** Raised when decoding is impossible *) + +exception DecodingError of string + (** Raised when decoding is impossible *) + +(** {2 Source of parsing} *) + +module Source : sig + type t = string -> int (* fills the buffer *) + + val of_str : string -> t + val of_stream : char Stream.t -> t + val of_chan : in_channel -> t +end + +(** {2 Sink: Where to print} *) + +module Sink : sig + type t = { + mutable write : string -> unit; + mutable write_int : int -> unit; + mutable write_bool : bool -> unit; + mutable write_float : float -> unit; + } + + val of_buf : Buffer.t -> t + val of_chan : out_channel -> t +end + +(** {2 Encoding/decoding} *) + +module Sexp : sig + val encode : bij:'a t -> Sink.t -> 'a -> unit + val to_string : bij:'a t -> 'a -> string + val decode : bij:'a t -> Source.t -> 'a + val of_string : bij:'a t -> string -> 'a +end diff --git a/containers.mllib b/containers.mllib index 57cd05f2..8d13be01 100644 --- a/containers.mllib +++ b/containers.mllib @@ -16,3 +16,4 @@ SplayTree SplayMap Univ Vector +Bij diff --git a/containers.odocl b/containers.odocl index e53d5c98..80c7ab88 100644 --- a/containers.odocl +++ b/containers.odocl @@ -7,3 +7,4 @@ LazyGraph PersistentHashtbl Sequence Univ +Bij diff --git a/tests/run_tests.ml b/tests/run_tests.ml index 5f9423b8..cd2b3a6a 100644 --- a/tests/run_tests.ml +++ b/tests/run_tests.ml @@ -7,6 +7,7 @@ let suite = [ Test_pHashtbl.suite; Test_PersistentHashtbl.suite; Test_splayMap.suite; + Test_bij.suite; Test_leftistheap.suite; Test_cc.suite; Test_puf.suite; diff --git a/tests/test_bij.ml b/tests/test_bij.ml new file mode 100644 index 00000000..d913272c --- /dev/null +++ b/tests/test_bij.ml @@ -0,0 +1,28 @@ + +open OUnit + +open Bij + +let test_int2 () = + let bij = pair int_ int_ in + let s = Sexp.to_string bij (1,2) in + OUnit.assert_equal ~printer:(fun x -> x) "(1 2)" s + +let test_escape () = + let bij = pair int_ (pair string_ string_) in + let s = Sexp.to_string bij (1,("foo()","bar\n hello")) in + OUnit.assert_equal ~printer:(fun x -> x) "(1 (foo(\\) bar\\n\\ hello))" s + +let test_intlist () = + let bij = list_ int_ in + let l = Sequence.to_list (Sequence.int_range ~start:0 ~stop:10000) in + let s = Sexp.to_string ~bij l in + let l' = Sexp.of_string ~bij s in + OUnit.assert_equal l l' + +let suite = + "test_bij" >::: + [ "test_int2" >:: test_int2; + "test_escape" >:: test_escape; + "test_intlist" >:: test_intlist; + ]