diff --git a/bij.mli b/bij.mli index 0492290d..e2827c65 100644 --- a/bij.mli +++ b/bij.mli @@ -25,7 +25,95 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Bijective Serializer/Deserializer} *) -type 'a t +(** This module helps writing serialization/deserialization code in + a type-safe way. It uses GADTs, and as such requires OCaml >= 4.00.1. *) + +type 'a t = private + | Unit : unit t + | String : string t + | Int : int t + | Bool : bool t + | Float : float t + | List : 'a t -> 'a list t + | Many : 'a t -> 'a list t + | Opt : 'a t -> 'a option t + | Pair : 'a t * 'b t -> ('a * 'b) t + | Triple : 'a t * 'b t * 'c t -> ('a * 'b * 'c) t + | Quad : 'a t * 'b t * 'c t * 'd t -> ('a * 'b * 'c * 'd) t + | Quint : 'a t * 'b t * 'c t * 'd t * 'e t -> ('a * 'b * 'c * 'd * 'e) t + | Guard : ('a -> bool) * 'a t -> 'a t + | Map : ('a -> 'b) * ('b -> 'a) * 'b t -> 'a t + | Switch : ('a -> string * 'a inject_branch) * + (string-> 'a extract_branch) -> 'a t +and _ inject_branch = + | BranchTo : 'b t * 'b -> 'a inject_branch +and _ extract_branch = + | BranchFrom : 'b t * ('b -> 'a) -> 'a extract_branch + +(** Conceptually, a value of type ['a t] describes the (persistent) structure + of the type ['a]. Combinators, listed in the next section (e.g., {!list_} + or {!pair}), are used to describe complicated structures from simpler + ones. + + For instance, to serialize a [(int * string) list]: + +{[let bij = Bij.(list_ (pair int_ string_));; + +let l = [(1, "foo"); (2, "bar")];; + +Bij.TrBencode.to_string ~bij l;; +- : string = "lli1e3:fooeli2e3:baree" +]} + + Some types may not be directly describable, for instance records or + algebraic types. For those, more subtle combinators exist: + + - [map] is a bijection between two types, and should be typically used to + map records to tuples (for which combinators exist) + + - [switch] is a case disjunction. Each case can map to a different type, + thank to the power of GADT, and a {b key} needs to be provided for + each case, so that de-serialization can know which type to read. + + - [fix] allows to describe recursive encodings. The user provides a function + which, given a ['a t lazy_t], builds a ['a t], and return its fixpoint. + + For instance, let's take a simple symbolic expressions structure (can + be found in the corresponding test file "tests/test_bij.ml"): + +{[ +type term = + | Const of string + | Int of int + | App of term list;; + +let bij_term = + Bij.(fix + (fun bij -> + switch + ~inject:(function + | Const s -> "const", BranchTo (string_, s) + | Int i -> "int", BranchTo (int_, i) + | App l -> "app", BranchTo (list_ (Lazy.force bij), l)) + ~extract:(function + | "const" -> BranchFrom (string_, fun x -> Const x) + | "int" -> BranchFrom (int_, fun x -> Int x) + | "app" -> BranchFrom (list_ (Lazy.force bij), fun l -> App l) + | _ -> raise (DecodingError "unexpected case switch"))) + ) +]} + + A bijection could be used for many things, but here our focus is on + serialization and de-serialization. The idea is that we can map a value + [x : 'a] to some general-purpose serialization format + (json, XML, B-encode, etc.) that we can then write to the disk or network; + the reverse operation is also possible (and bijectivity is enforced + by the fact that we use a single datatype ['a t] to describe both mappings). + + For now, only a bijection to B-encode (see {!Bencode} and {!Bij.TrBencode}) + is provided. The code is quite straightforward and could be extended + to XML or Json without hassle. +*) (** {2 Bijection description} *) @@ -47,11 +135,6 @@ val guard : ('a -> bool) -> 'a t -> 'a t val map : inject:('a -> 'b) -> extract:('b -> 'a) -> 'b t -> 'a t -type _ inject_branch = - | BranchTo : 'b t * 'b -> 'a inject_branch -type _ extract_branch = - | BranchFrom : 'b t * ('b -> 'a) -> 'a extract_branch - val switch : inject:('a -> string * 'a inject_branch) -> extract:(string -> 'a extract_branch) -> 'a t (** Discriminates unions based on the next character. @@ -59,12 +142,12 @@ val switch : inject:('a -> string * 'a inject_branch) -> type (the argument of the algebraic constructor); [extract] retrieves which type to parse based on the key. *) -(** {2 Helpers} *) - val fix : ('a t lazy_t -> 'a t) -> 'a t (** Helper for recursive encodings. The parameter is the recursive bijection itself. It must be lazy. *) +(** {2 Helpers} *) + val with_version : string -> 'a t -> 'a t (** Guards the values with a given version. Only values encoded with the same version will fit. *)