mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 03:05:28 -05:00
187 lines
6.6 KiB
OCaml
187 lines
6.6 KiB
OCaml
(*
|
|
Copyright (c) 2013, Simon Cruanes
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
Redistributions of source code must retain the above copyright notice, this
|
|
list of conditions and the following disclaimer. Redistributions in binary
|
|
form must reproduce the above copyright notice, this list of conditions and the
|
|
following disclaimer in the documentation and/or other materials provided with
|
|
the distribution.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*)
|
|
|
|
(** {1 Bijective Serializer/Deserializer} *)
|
|
|
|
(** This module helps writing serialization/deserialization code in
|
|
a type-safe way. It uses GADTs, and as such requires OCaml >= 4.00.1.
|
|
|
|
Conceptually, a value of type ['a] {! t} describes the (persistent) structure
|
|
of the type ['a]. Combinators, listed in the next section (e.g., {!list_}
|
|
or {!pair}), are used to describe complicated structures from simpler
|
|
ones.
|
|
|
|
For instance, to serialize a value of type [(int * string) list]:
|
|
|
|
{[let bij = Bij.(list_ (pair int_ string_));;
|
|
|
|
let l = [(1, "foo"); (2, "bar")];;
|
|
|
|
Bij.TrBencode.to_string ~bij l;;
|
|
- : string = "lli1e3:fooeli2e3:baree"
|
|
]}
|
|
|
|
Some types may not be directly describable, for instance records or
|
|
algebraic types. For those, more subtle combinators exist:
|
|
|
|
- {!map} is a bijection between two types, and should be typically used to
|
|
map records to tuples (for which combinators exist)
|
|
|
|
- {!switch} is a case disjunction. Each case can map to a different type,
|
|
thank to the power of GADT, and a {b key} needs to be provided for
|
|
each case, so that de-serialization can know which type to read.
|
|
|
|
- {!fix} allows to describe recursive encodings. The user provides a function
|
|
which, given a ['a t lazy_t], builds a ['a t], and return its fixpoint.
|
|
|
|
For instance, let's take a simple symbolic expressions structure (can
|
|
be found in the corresponding test file "tests/test_bij.ml"):
|
|
|
|
{[
|
|
type term =
|
|
| Const of string
|
|
| Int of int
|
|
| App of term list;;
|
|
|
|
let bij_term =
|
|
Bij.(fix
|
|
(fun bij ->
|
|
switch
|
|
~inject:(function
|
|
| Const s -> "const", BranchTo (string_, s)
|
|
| Int i -> "int", BranchTo (int_, i)
|
|
| App l -> "app", BranchTo (list_ (Lazy.force bij), l))
|
|
~extract:(function
|
|
| "const" -> BranchFrom (string_, fun x -> Const x)
|
|
| "int" -> BranchFrom (int_, fun x -> Int x)
|
|
| "app" -> BranchFrom (list_ (Lazy.force bij), fun l -> App l)
|
|
| _ -> raise (DecodingError "unexpected case switch")))
|
|
)
|
|
]}
|
|
|
|
A bijection could be used for many things, but here our focus is on
|
|
serialization and de-serialization. The idea is that we can map a value
|
|
[x : 'a] to some general-purpose serialization format
|
|
(json, XML, B-encode, etc.) that we can then write to the disk or network;
|
|
the reverse operation is also possible (and bijectivity is enforced
|
|
by the fact that we use a single datatype ['a t] to describe both mappings).
|
|
|
|
For now, only a bijection to B-encode (see {!Bencode} and {!Bij.TrBencode})
|
|
is provided. The code is quite straightforward and could be extended
|
|
to XML or Json without hassle.
|
|
*)
|
|
|
|
type _ t = private
|
|
| Unit : unit t
|
|
| String : string t
|
|
| Int : int t
|
|
| Bool : bool t
|
|
| Float : float t
|
|
| List : 'a t -> 'a list t
|
|
| Many : 'a t -> 'a list t
|
|
| Opt : 'a t -> 'a option t
|
|
| Pair : 'a t * 'b t -> ('a * 'b) t
|
|
| Triple : 'a t * 'b t * 'c t -> ('a * 'b * 'c) t
|
|
| Quad : 'a t * 'b t * 'c t * 'd t -> ('a * 'b * 'c * 'd) t
|
|
| Quint : 'a t * 'b t * 'c t * 'd t * 'e t -> ('a * 'b * 'c * 'd * 'e) t
|
|
| Guard : ('a -> bool) * 'a t -> 'a t
|
|
| Map : ('a -> 'b) * ('b -> 'a) * 'b t -> 'a t
|
|
| Switch : ('a -> string * 'a inject_branch) *
|
|
(string-> 'a extract_branch) -> 'a t
|
|
and _ inject_branch =
|
|
| BranchTo : 'b t * 'b -> 'a inject_branch
|
|
and _ extract_branch =
|
|
| BranchFrom : 'b t * ('b -> 'a) -> 'a extract_branch
|
|
|
|
(** {2 Bijection description} *)
|
|
|
|
val unit_ : unit t
|
|
val string_ : string t
|
|
val int_ : int t
|
|
val bool_ : bool t
|
|
val float_ : float t
|
|
|
|
val list_ : 'a t -> 'a list t
|
|
val many : 'a t -> 'a list t (* non empty *)
|
|
val opt : 'a t -> 'a option t
|
|
val pair : 'a t -> 'b t -> ('a * 'b) t
|
|
val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t
|
|
val quad : 'a t -> 'b t -> 'c t -> 'd t -> ('a * 'b * 'c * 'd) t
|
|
val quint : 'a t -> 'b t -> 'c t -> 'd t -> 'e t -> ('a * 'b * 'c * 'd * 'e) t
|
|
val guard : ('a -> bool) -> 'a t -> 'a t
|
|
(** Validate values at encoding and decoding *)
|
|
|
|
val map : inject:('a -> 'b) -> extract:('b -> 'a) -> 'b t -> 'a t
|
|
|
|
val switch : inject:('a -> string * 'a inject_branch) ->
|
|
extract:(string -> 'a extract_branch) -> 'a t
|
|
(** Discriminates unions based on the next character.
|
|
[inject] must give a unique key for each branch, as well as mapping to another
|
|
type (the argument of the algebraic constructor);
|
|
[extract] retrieves which type to parse based on the key. *)
|
|
|
|
val fix : ('a t lazy_t -> 'a t) -> 'a t
|
|
(** Helper for recursive encodings. The parameter is the recursive bijection
|
|
itself. It must be lazy. *)
|
|
|
|
(** {2 Helpers} *)
|
|
|
|
val with_version : string -> 'a t -> 'a t
|
|
(** Guards the values with a given version. Only values encoded with
|
|
the same version will fit. *)
|
|
|
|
val array_ : 'a t -> 'a array t
|
|
|
|
val hashtbl : 'a t -> 'b t -> ('a, 'b) Hashtbl.t t
|
|
|
|
(** {2 Exceptions} *)
|
|
|
|
exception EncodingError of string
|
|
(** Raised when encoding is impossible *)
|
|
|
|
exception DecodingError of string
|
|
(** Raised when decoding is impossible *)
|
|
|
|
(** {2 Translations} *)
|
|
|
|
module TrBencode : sig
|
|
val encode : bij:'a t -> 'a -> Bencode.t
|
|
|
|
val decode : bij:'a t -> Bencode.t -> 'a
|
|
|
|
val to_string : bij:'a t -> 'a -> string
|
|
|
|
val of_string : bij:'a t -> string -> 'a
|
|
|
|
val read : bij:'a t -> in_channel -> 'a
|
|
(** Read a single value from the channel *)
|
|
|
|
val read_stream : bij:'a t -> in_channel -> 'a Stream.t
|
|
|
|
val write : bij:'a t -> out_channel -> 'a -> unit
|
|
|
|
val write_stream : bij:'a t -> out_channel -> 'a Stream.t -> unit
|
|
end
|
|
|