diff --git a/_oasis b/_oasis index 2839b24e..86cb7993 100644 --- a/_oasis +++ b/_oasis @@ -58,8 +58,8 @@ Library "containers_unix" Library "containers_sexp" Path: src/sexp - Modules: CCSexp, CCSexpM - BuildDepends: bytes + Modules: CCSexp, CCSexp_lex + BuildDepends: bytes, result FindlibParent: containers FindlibName: sexp diff --git a/examples/id_sexp.ml b/examples/id_sexp.ml index cac7b040..e8e4b3ce 100644 --- a/examples/id_sexp.ml +++ b/examples/id_sexp.ml @@ -1,18 +1,20 @@ +open Result + let pp_sexp s = match s with - | `Ok l -> + | Ok l -> List.iter - (fun s -> Format.printf "@[%a@]@." CCSexpM.print s) + (fun s -> Format.printf "@[%a@]@." CCSexp.pp s) l - | `Error msg -> + | Error msg -> Format.printf "error: %s@." msg let () = match Sys.argv with | [| _ |] -> - let s = CCSexpM.parse_chan_list stdin in + let s = CCSexp.parse_chan_list stdin in pp_sexp s | [| _; file |] -> - let s = CCSexpM.parse_file_list file in + let s = CCSexp.parse_file_list file in pp_sexp s | _ -> failwith "usage: id_sexp [file]" diff --git a/src/sexp/CCSexp.ml b/src/sexp/CCSexp.ml index 597dea62..bc83efa0 100644 --- a/src/sexp/CCSexp.ml +++ b/src/sexp/CCSexp.ml @@ -1,34 +1,17 @@ -(* -Copyright (c) 2013, Simon Cruanes -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. Redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) +(* This file is free software, part of containers. See file "license" for more details. *) (** {1 Simple S-expression parsing/printing} *) +type 'a or_error = ('a, string) Result.result +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option + type t = [ | `Atom of string | `List of t list ] +type sexp = t let equal a b = a = b @@ -52,122 +35,241 @@ let of_field name t = `List [`Atom name; t] let of_record l = `List (List.map (fun (n,x) -> of_field n x) l) -(** {6 Traversal of S-exp} *) +(** {2 Printing} *) -module Traverse = struct - type 'a conv = t -> 'a option +let _with_out filename f = + let oc = open_out filename in + try + let x = f oc in + close_out oc; + x + with e -> + close_out oc; + raise e - let return x = Some x +(* shall we escape the string because of one of its chars? *) +let _must_escape s = + try + for i = 0 to String.length s - 1 do + let c = String.unsafe_get s i in + match c with + | ' ' | ')' | '(' | '"' | ';' | '\\' | '\n' | '\t' | '\r' -> raise Exit + | _ when Char.code c > 127 -> raise Exit (* non-ascii *) + | _ -> () + done; + false + with Exit -> true - let (>|=) e f = match e with - | None -> None - | Some x -> Some (f x) +let rec to_buf b t = match t with + | `Atom s when _must_escape s -> Printf.bprintf b "\"%s\"" (String.escaped s) + | `Atom s -> Buffer.add_string b s + | `List [] -> Buffer.add_string b "()" + | `List [x] -> Printf.bprintf b "(%a)" to_buf x + | `List l -> + Buffer.add_char b '('; + List.iteri + (fun i t' -> (if i > 0 then Buffer.add_char b ' '; to_buf b t')) + l; + Buffer.add_char b ')' - let (>>=) e f = match e with - | None -> None - | Some x -> f x +let to_string t = + let b = Buffer.create 128 in + to_buf b t; + Buffer.contents b - let map_opt f l = - let rec recurse acc l = match l with - | [] -> Some (List.rev acc) - | x::l' -> - match f x with - | None -> None - | Some y -> recurse (y::acc) l' - in recurse [] l +let rec pp fmt t = match t with + | `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s) + | `Atom s -> Format.pp_print_string fmt s + | `List [] -> Format.pp_print_string fmt "()" + | `List [x] -> Format.fprintf fmt "@[(%a)@]" pp x + | `List l -> + Format.fprintf fmt "@[("; + List.iteri + (fun i t' -> (if i > 0 then Format.fprintf fmt "@ "; pp fmt t')) + l; + Format.fprintf fmt ")@]" - let rec _list_any f l = match l with - | [] -> None - | x::tl -> - match f x with - | Some _ as res -> res - | None -> _list_any f tl +let rec pp_noindent fmt t = match t with + | `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s) + | `Atom s -> Format.pp_print_string fmt s + | `List [] -> Format.pp_print_string fmt "()" + | `List [x] -> Format.fprintf fmt "(%a)" pp_noindent x + | `List l -> + Format.pp_print_char fmt '('; + List.iteri + (fun i t' -> (if i > 0 then Format.pp_print_char fmt ' '; pp_noindent fmt t')) + l; + Format.pp_print_char fmt ')' - let list_any f e = match e with - | `Atom _ -> None - | `List l -> _list_any f l +let to_chan oc t = + let fmt = Format.formatter_of_out_channel oc in + pp fmt t; + Format.pp_print_flush fmt () - let rec _list_all f acc l = match l with - | [] -> List.rev acc - | x::tl -> - match f x with - | Some y -> _list_all f (y::acc) tl - | None -> _list_all f acc tl +let to_file_seq filename seq = + _with_out filename + (fun oc -> + seq (fun t -> to_chan oc t; output_char oc '\n') + ) - let list_all f e = match e with - | `Atom _ -> [] - | `List l -> _list_all f [] l +let to_file filename t = to_file_seq filename (fun k -> k t) - let _try_atom e f = match e with - | `List _ -> None - | `Atom x -> try Some (f x) with _ -> None +(** {2 Parsing} *) - let to_int e = _try_atom e int_of_string - let to_bool e = _try_atom e bool_of_string - let to_float e = _try_atom e float_of_string - let to_string e = _try_atom e (fun x->x) +let _with_in filename f = + let ic = open_in filename in + try + let x = f ic in + close_in ic; + x + with e -> + close_in ic; + Result.Error (Printexc.to_string e) - let to_pair e = match e with - | `List [x;y] -> Some (x,y) - | _ -> None +(** A parser of ['a] can return [Yield x] when it parsed a value, + or [Fail e] when a parse error was encountered, or + [End] if the input was empty *) +type 'a parse_result = + | Yield of 'a + | Fail of string + | End - let to_pair_with f1 f2 e = - to_pair e >>= fun (x,y) -> - f1 x >>= fun x -> - f2 y >>= fun y -> - return (x,y) +module Decoder = struct + module L = CCSexp_lex - let to_triple e = match e with - | `List [x;y;z] -> Some (x,y,z) - | _ -> None + type t = { + buf: Lexing.lexbuf; + mutable cur_tok: L.token option; (* current token *) + } - let to_triple_with f1 f2 f3 e = - to_triple e >>= fun (x,y,z) -> - f1 x >>= fun x -> - f2 y >>= fun y -> - f3 z >>= fun z -> - return (x,y,z) + let cur (t:t): L.token = match t.cur_tok with + | Some L.EOI -> assert false + | Some t -> t + | None -> + (* fetch token *) + let tok = L.token t.buf in + t.cur_tok <- Some tok; + tok - let to_list e = match e with - | `List l -> Some l - | `Atom _ -> None + let junk t = t.cur_tok <- None - let to_list_with f (e:t) = match e with - | `List l -> map_opt f l - | `Atom _ -> None + let of_lexbuf buf = { + buf; + cur_tok=None; + } - let rec _get_field name l = match l with - | `List [`Atom n; x] :: _ when name=n -> Some x - | _ :: tl -> _get_field name tl - | [] -> None + exception E_end + exception E_error of int * int * string - let get_field name e = match e with - | `List l -> _get_field name l - | `Atom _ -> None + let error_ lexbuf msg = + let start = Lexing.lexeme_start_p lexbuf in + let line = start.Lexing.pos_lnum in + let col = start.Lexing.pos_cnum - start.Lexing.pos_bol in + raise (E_error (line,col,msg)) - let field name f e = - get_field name e >>= f - - let rec _get_field_list name l = match l with - | `List (`Atom n :: tl) :: _ when name=n -> Some tl - | _ :: tl -> _get_field_list name tl - | [] -> None - - let field_list name f e = match e with - | `List l -> _get_field_list name l >>= f - | `Atom _ -> None - - let rec _get_variant s args l = match l with - | [] -> None - | (s', f) :: _ when s=s' -> f args - | _ :: tl -> _get_variant s args tl - - let get_variant l e = match e with - | `List (`Atom s :: args) -> _get_variant s args l - | `List _ -> None - | `Atom s -> _get_variant s [] l - - let get_exn e = match e with - | None -> failwith "CCSexp.Traverse.get_exn" - | Some x -> x + let next (t:t) = + let rec expr () = match cur t with + | L.EOI -> raise E_end + | L.ATOM s -> junk t; `Atom s + | L.LIST_OPEN -> + junk t; + let l = lst [] in + begin match cur t with + | L.LIST_CLOSE -> junk t; `List l + | _ -> error_ t.buf "expected ')'" + end + | L.LIST_CLOSE -> error_ t.buf "expected expression" + and lst acc = match cur t with + | L.LIST_CLOSE -> List.rev acc + | L.LIST_OPEN | L.ATOM _ -> + let sub = expr () in + lst (sub::acc) + | L.EOI -> error_ t.buf "unexpected EOI" + in + try Yield (expr ()) + with + | E_end -> End + | E_error (line,col,msg) + | CCSexp_lex.Error (line,col,msg) -> + Fail (Printf.sprintf "parse error at %d:%d: %s" line col msg) end + +let parse_string s : t or_error = + let buf = Lexing.from_string s in + let d = Decoder.of_lexbuf buf in + match Decoder.next d with + | End -> Result.Error "unexpected end of file" + | Yield x -> Result.Ok x + | Fail s -> Result.Error s + +(*$T + CCResult.to_opt (parse_string "(abc d/e/f \"hello \\\" () world\" )") <> None + CCResult.to_opt (parse_string "(abc ( d e ffff ) \"hello/world\")") <> None +*) + +(*$inject + let sexp_gen = + let mkatom a = `Atom a and mklist l = `List l in + let atom = Q.Gen.(map mkatom (string_size ~gen:printable (1 -- 30))) in + let gen = Q.Gen.( + sized (fix + (fun self n st -> match n with + | 0 -> atom st + | _ -> + frequency + [ 1, atom + ; 2, map mklist (list_size (0 -- 10) (self (n/10))) + ] st + ) + )) in + let rec small = function + | `Atom s -> String.length s + | `List l -> List.fold_left (fun n x->n+small x) 0 l + and print = function + | `Atom s -> Printf.sprintf "`Atom \"%s\"" s + | `List l -> "`List " ^ Q.Print.list print l + and shrink = function + | `Atom s -> Q.Iter.map mkatom (Q.Shrink.string s) + | `List l -> Q.Iter.map mklist (Q.Shrink.list ~shrink l) + in + Q.make ~print ~small ~shrink gen + + let rec sexp_valid = function + | `Atom "" -> false + | `Atom _ -> true + | `List l -> List.for_all sexp_valid l +*) + +(*$Q & ~count:100 + sexp_gen (fun s -> sexp_valid s ==> (to_string s |> parse_string = Result.Ok s)) +*) + +let parse_chan ic : sexp or_error = + let buf = Lexing.from_channel ic in + let d = Decoder.of_lexbuf buf in + match Decoder.next d with + | End -> Result.Error "unexpected end of file" + | Yield x -> Result.Ok x + | Fail e -> Result.Error e + +let parse_chan_list ic = + let buf = Lexing.from_channel ic in + let d = Decoder.of_lexbuf buf in + let rec iter acc = match Decoder.next d with + | End -> Result.Ok (List.rev acc) + | Yield x -> iter (x::acc) + | Fail e -> Result.Error e + in + iter [] + +let parse_chan_gen ic = + let buf = Lexing.from_channel ic in + let d = Decoder.of_lexbuf buf in + fun () -> match Decoder.next d with + | End -> None + | Fail e -> Some (Result.Error e) + | Yield x -> Some (Result.Ok x) + +let parse_file filename = _with_in filename parse_chan + +let parse_file_list filename = _with_in filename parse_chan_list diff --git a/src/sexp/CCSexp.mli b/src/sexp/CCSexp.mli index 17acdf7b..b7b4f236 100644 --- a/src/sexp/CCSexp.mli +++ b/src/sexp/CCSexp.mli @@ -1,35 +1,11 @@ -(* -Copyright (c) 2013, Simon Cruanes -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: +(* This file is free software, part of containers. See file "license" for more details. *) -Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. Redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. +(** {1 Handling S-expressions} *) -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) - -(** {1 Handling S-expressions} - -@since 0.4 - -@since 0.7 -Moved the streaming parser to CCSexpStream -*) +type 'a or_error = ('a, string) Result.result +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option (** {2 Basics} *) @@ -37,6 +13,7 @@ type t = [ | `Atom of string | `List of t list ] +type sexp = t val equal : t -> t -> bool val compare : t -> t -> int @@ -65,104 +42,61 @@ val of_field : string -> t -> t val of_record : (string * t) list -> t (** Represent a record by its named fields *) -(** {6 Traversal of S-exp} +(** {2 Printing} *) -Example: serializing 2D points -{[ -type pt = {x:int; y:int };; +val to_buf : Buffer.t -> t -> unit -let pt_of_sexp e = - Sexp.Traverse.( - field "x" to_int e >>= fun x -> - field "y" to_int e >>= fun y -> - return {x;y} - );; +val to_string : t -> string -let sexp_of_pt pt = Sexp.(of_record ["x", of_int pt.x; "y", of_int pt.y]);; +val to_file : string -> t -> unit -let l = [{x=1;y=1}; {x=2;y=10}];; +val to_file_seq : string -> t sequence -> unit +(** Print the given sequence of expressions to a file *) -let sexp = Sexp.(of_list (List.map sexp_of_pt l));; +val to_chan : out_channel -> t -> unit -Sexp.Traverse.list_all pt_of_sexp sexp;; -]} +val pp : Format.formatter -> t -> unit +(** Pretty-printer nice on human eyes (including indentation) *) -*) +val pp_noindent : Format.formatter -> t -> unit +(** Raw, direct printing as compact as possible *) -module Traverse : sig - type 'a conv = t -> 'a option - (** A converter from S-expressions to 'a is a function [sexp -> 'a option]. - @since 0.4.1 *) +(** {2 Parsing} *) - val map_opt : ('a -> 'b option) -> 'a list -> 'b list option - (** Map over a list, failing as soon as the function fails on any element - @since 0.4.1 *) +(** A parser of ['a] can return [Yield x] when it parsed a value, + or [Fail e] when a parse error was encountered, or + [End] if the input was empty *) +type 'a parse_result = + | Yield of 'a + | Fail of string + | End - val list_any : 'a conv -> t -> 'a option - (** [list_any f (List l)] tries [f x] for every element [x] in [List l], - and returns the first non-None result (if any). *) +module Decoder : sig + type t + (** Decoder *) - val list_all : 'a conv -> t -> 'a list - (** [list_all f (List l)] returns the list of all [y] such that [x] in [l] - and [f x = Some y] *) + val of_lexbuf : Lexing.lexbuf -> t - val to_int : int conv - (** Expect an integer *) - - val to_string : string conv - (** Expect a string (an atom) *) - - val to_bool : bool conv - (** Expect a boolean *) - - val to_float : float conv - (** Expect a float *) - - val to_list : t list conv - (** Expect a list *) - - val to_list_with : (t -> 'a option) -> 'a list conv - (** Expect a list, applies [f] to all the elements of the list, and succeeds - only if [f] succeeded on every element - @since 0.4.1 *) - - val to_pair : (t * t) conv - (** Expect a list of two elements *) - - val to_pair_with : 'a conv -> 'b conv -> ('a * 'b) conv - (** Same as {!to_pair} but applies conversion functions - @since 0.4.1 *) - - val to_triple : (t * t * t) conv - - val to_triple_with : 'a conv -> 'b conv -> 'c conv -> ('a * 'b * 'c) conv - (* @since 0.4.1 *) - - val get_field : string -> t conv - (** [get_field name e], when [e = List [(n1,x1); (n2,x2) ... ]], extracts - the [xi] such that [name = ni], if it can find it. *) - - val field : string -> 'a conv -> 'a conv - (** Enriched version of {!get_field}, with a converter as argument *) - - val get_variant : (string * (t list -> 'a option)) list -> 'a conv - (** [get_variant l e] checks whether [e = List (Atom s :: args)], and - if some pair of [l] is [s, f]. In this case, it calls [f args] - and returns its result, otherwise it returns None. *) - - val field_list : string -> (t list -> 'a option) -> 'a conv - (** [field_list name f "(... (name a b c d) ...record)"] - will look for a field based on the given [name], and expect it to have a - list of arguments dealt with by [f] (here, "a b c d"). - @since 0.4.1 *) - - val (>>=) : 'a option -> ('a -> 'b option) -> 'b option - - val (>|=) : 'a option -> ('a -> 'b) -> 'b option - - val return : 'a -> 'a option - - val get_exn : 'a option -> 'a - (** Unwrap an option, possibly failing. - @raise Invalid_argument if the argument is [None] *) + val next : t -> sexp parse_result + (** Parse the next S-expression or return an error if the input isn't + long enough or isn't a proper S-expression *) end + +val parse_string : string -> t or_error +(** Parse a string *) + +val parse_chan : in_channel -> t or_error +(** Parse a S-expression from the given channel. Can read more data than + necessary, so don't use this if you need finer-grained control (e.g. + to read something else {b after} the S-exp) *) + +val parse_chan_gen : in_channel -> t or_error gen +(** Parse a channel into a generator of S-expressions *) + +val parse_chan_list : in_channel -> t list or_error + +val parse_file : string -> t or_error +(** Open the file and read a S-exp from it *) + +val parse_file_list : string -> t list or_error +(** Open the file and read a S-exp from it *) diff --git a/src/sexp/CCSexpM.ml b/src/sexp/CCSexpM.ml deleted file mode 100644 index 3ecaf953..00000000 --- a/src/sexp/CCSexpM.ml +++ /dev/null @@ -1,379 +0,0 @@ - -(* This file is free software, part of containers. See file "license" for more details. *) - -(** {1 Simple S-expression parsing/printing} *) - -type 'a or_error = [ `Ok of 'a | `Error of string ] -type 'a sequence = ('a -> unit) -> unit -type 'a gen = unit -> 'a option - -type t = [ - | `Atom of string - | `List of t list - ] -type sexp = t - -let _with_in filename f = - let ic = open_in filename in - try - let x = f ic in - close_in ic; - x - with e -> - close_in ic; - `Error (Printexc.to_string e) - -let _with_out filename f = - let oc = open_out filename in - try - let x = f oc in - close_out oc; - x - with e -> - close_out oc; - raise e - -(** {2 Serialization (encoding)} *) - -(* shall we escape the string because of one of its chars? *) -let _must_escape s = - try - for i = 0 to String.length s - 1 do - let c = String.unsafe_get s i in - match c with - | ' ' | ';' | ')' | '(' | '"' | '\\' | '\n' | '\t' | '\r' -> raise Exit - | _ when Char.code c > 127 -> raise Exit (* non-ascii *) - | _ -> () - done; - false - with Exit -> true - -let rec to_buf b t = match t with - | `Atom s when _must_escape s -> Printf.bprintf b "\"%s\"" (String.escaped s) - | `Atom s -> Buffer.add_string b s - | `List [] -> Buffer.add_string b "()" - | `List [x] -> Printf.bprintf b "(%a)" to_buf x - | `List l -> - Buffer.add_char b '('; - List.iteri - (fun i t' -> (if i > 0 then Buffer.add_char b ' '; to_buf b t')) - l; - Buffer.add_char b ')' - -let to_string t = - let b = Buffer.create 128 in - to_buf b t; - Buffer.contents b - -let rec print fmt t = match t with - | `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s) - | `Atom s -> Format.pp_print_string fmt s - | `List [] -> Format.pp_print_string fmt "()" - | `List [x] -> Format.fprintf fmt "@[(%a)@]" print x - | `List l -> - Format.fprintf fmt "@[("; - List.iteri - (fun i t' -> (if i > 0 then Format.fprintf fmt "@ "; print fmt t')) - l; - Format.fprintf fmt ")@]" - -let rec print_noindent fmt t = match t with - | `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s) - | `Atom s -> Format.pp_print_string fmt s - | `List [] -> Format.pp_print_string fmt "()" - | `List [x] -> Format.fprintf fmt "(%a)" print_noindent x - | `List l -> - Format.pp_print_char fmt '('; - List.iteri - (fun i t' -> (if i > 0 then Format.pp_print_char fmt ' '; print_noindent fmt t')) - l; - Format.pp_print_char fmt ')' - -let to_chan oc t = - let fmt = Format.formatter_of_out_channel oc in - print fmt t; - Format.pp_print_flush fmt () - -let to_file_seq filename seq = - _with_out filename - (fun oc -> - seq (fun t -> to_chan oc t; output_char oc '\n') - ) - -let to_file filename t = to_file_seq filename (fun k -> k t) - -(** {2 Deserialization (decoding)} *) - -module type MONAD = sig - type 'a t - val return : 'a -> 'a t - val (>>=) : 'a t -> ('a -> 'b t) -> 'b t -end - -type 'a parse_result = ['a or_error | `End ] - -module MakeDecode(M : MONAD) = struct - let (>>=) = M.(>>=) - - type t = { - buf : Bytes.t; - refill : Bytes.t -> int -> int -> int M.t; - atom : Buffer.t; - mutable i : int; (* offset in [buf] *) - mutable len : int; (* how many bytes of [buf] are usable *) - mutable line : int; - mutable col : int; - } - - let make ?(bufsize=1024) refill = - let bufsize = min (max bufsize 16) Sys.max_string_length in - { buf=Bytes.create bufsize; - refill; - atom = Buffer.create 32; - i=0; - len=0; - line=1; - col=1; - } - - let _is_digit c = Char.code '0' <= Char.code c && Char.code c <= Char.code '9' - let _digit2i c = Char.code c - Char.code '0' - - (* refill buffer. If it works, call k_succ, otherwise call k_fail *) - let _refill t k_succ k_fail = - t.refill t.buf 0 (Bytes.length t.buf) - >>= fun n -> - t.i <- 0; - t.len <- n; - if n = 0 then k_fail t else k_succ t - - (* get next char, assuming t.i < t.len *) - let _get t = - assert (t.i < t.len); - let c = Bytes.get t.buf t.i in - t.i <- t.i + 1; - if c = '\n' then (t.col <- 1; t.line <- t.line + 1) else t.col <- t.col + 1; - c - - (* return an error *) - let _error t msg = - let b = Buffer.create 32 in - Printf.bprintf b "at %d, %d: " t.line t.col; - Printf.kbprintf - (fun b -> - let msg' = Buffer.contents b in - M.return (`Error msg') - ) b msg - - let _error_eof t = _error t "unexpected end of input" - - (* The parsers all take a success continuation, and the decoder as - last arguments. The continuation is used to minimize the - number of calls to [>>=] and take two parameters, the next - char (if not consumed), and the returned expression itself *) - - (* read expression *) - let rec expr k t = - if t.i = t.len then _refill t (expr k) _error_eof - else match _get t with - | ' ' | '\t' | '\r' | '\n' -> expr k t - | c -> expr_starting_with c k t - - and expr_starting_with c k t = match c with - | ' ' | '\t' | '\r' | '\n' -> assert false - | ';' -> skip_comment (fun _ () -> expr k t) t - | '(' -> expr_list [] k t - | ')' -> _error t "unexpected ')'" - | '\\' -> _error t "unexpected '\\'" - | '"' -> quoted k t - | c -> - Buffer.add_char t.atom c; - atom k t - - (* parse list *) - and expr_list acc k t = - if t.i = t.len then _refill t (expr_list acc k) _error_eof - else match _get t with - | ' ' | '\t' | '\r' | '\n' -> expr_list acc k t - | ')' -> k None (`List (List.rev acc)) - | c -> - expr_starting_with c - (fun last e -> match last with - | Some '(' -> expr_list [] (fun _ l -> expr_list (l::acc) k t) t - | Some ')' -> k None (`List (List.rev (e::acc))) - | _ -> expr_list (e::acc) k t - ) t - - (* return the current atom (last char: c) *) - and _return_atom last k t = - let s = Buffer.contents t.atom in - Buffer.clear t.atom; - k last (`Atom s) - - (* parse atom *) - and atom k t = - if t.i = t.len then _refill t (atom k) (_return_atom None k) - else match _get t with - | '\\' -> _error t "unexpected '\\' in non-quoted string" - | '"' -> _error t "unexpected '\"' in the middle of an atom" - | (' ' | '\r' | '\n' | '\t' | '(' | ')') as c -> - _return_atom (Some c) k t - | c -> - Buffer.add_char t.atom c; - atom k t - - (* quoted string *) - and quoted k t = - if t.i = t.len then _refill t (quoted k) _error_eof - else match _get t with - | '\\' -> - (* read escaped char and continue *) - escaped - (fun c -> - Buffer.add_char t.atom c; - quoted k t - ) t - | '"' -> _return_atom None k t - | c -> - Buffer.add_char t.atom c; - quoted k t - - (* read escaped char *) - and escaped k t = - if t.i = t.len then _refill t (escaped k) _error_eof - else match _get t with - | 'n' -> k '\n' - | 't' -> k '\t' - | 'r' -> k '\r' - | 'b' -> k '\b' - | '\\' -> k '\\' - | '"' -> k '"' - | c when _is_digit c -> - read2int (_digit2i c) (fun n -> k (Char.chr n)) t - | c -> _error t "unexpected escaped char '%c'" c - - and read2int i k t = - if t.i = t.len then _refill t (read2int i k) _error_eof - else match _get t with - | c when _is_digit c -> read1int (10 * i + _digit2i c) k t - | c -> _error t "unexpected char '%c' when reading byte" c - - and read1int i k t = - if t.i = t.len then _refill t (read1int i k) _error_eof - else match _get t with - | c when _is_digit c -> k (10 * i + _digit2i c) - | c -> _error t "unexpected char '%c' when reading byte" c - - (* skip until end of line, then call next() *) - and skip_comment k t = - if t.i = t.len - then _refill t (skip_comment k) _error_eof - else match _get t with - | '\n' -> k None () - | _ -> skip_comment k t - - (* top-level expression *) - let rec expr_or_end k t = - if t.i = t.len - then _refill t (expr_or_end k) (fun _ -> M.return `End) - else match _get t with - | ' ' | '\t' | '\r' | '\n' -> expr_or_end k t - | c -> expr_starting_with c k t - - (* entry point *) - let next t : sexp parse_result M.t = - expr_or_end (fun _ x -> M.return (`Ok x)) t -end - -module ID_MONAD = struct - type 'a t = 'a - let return x = x - let (>>=) x f = f x -end - -module D = MakeDecode(ID_MONAD) - -let parse_string s : t or_error = - let n = String.length s in - let stop = ref false in - let refill bytes i _len = - if !stop then 0 - else (stop := true; Bytes.blit_string s 0 bytes i n; n) - in - let d = D.make ~bufsize:n refill in - match D.next d with - | `End -> `Error "unexpected end of file" - | (`Ok _ | `Error _) as res -> res - -(*$T - CCError.to_opt (parse_string "(abc d/e/f \"hello \\\" () world\" )") <> None - CCError.to_opt (parse_string "(abc ( d e ffff ) \"hello/world\")") <> None - (parse_string "(abc\r\n ( d e \r\tffff ))") \ - = `Ok (`List [`Atom "abc"; `List [`Atom "d"; `Atom "e"; `Atom "ffff"]]) -*) - -(*$inject - let sexp_gen = - let mkatom a = `Atom a and mklist l = `List l in - let atom = Q.Gen.(map mkatom (string_size ~gen:printable (1 -- 30))) in - let gen = Q.Gen.( - sized (fix - (fun self n st -> match n with - | 0 -> atom st - | _ -> - frequency - [ 1, atom - ; 2, map mklist (list_size (0 -- 10) (self (n/10))) - ] st - ) - )) in - let rec small = function - | `Atom s -> String.length s - | `List l -> List.fold_left (fun n x->n+small x) 0 l - and print = function - | `Atom s -> Printf.sprintf "`Atom \"%s\"" s - | `List l -> "`List " ^ Q.Print.list print l - and shrink = function - | `Atom s -> Q.Iter.map mkatom (Q.Shrink.string s) - | `List l -> Q.Iter.map mklist (Q.Shrink.list ~shrink l) - in - Q.make ~print ~small ~shrink gen - - let rec sexp_valid = function - | `Atom "" -> false - | `Atom _ -> true - | `List l -> List.for_all sexp_valid l -*) - -(*$Q & ~count:100 - sexp_gen (fun s -> sexp_valid s ==> (to_string s |> parse_string = `Ok s)) -*) - -let parse_chan ?bufsize ic = - let d = D.make ?bufsize (input ic) in - match D.next d with - | `End -> `Error "unexpected end of file" - | (`Ok _ | `Error _) as res -> res - -let parse_chan_gen ?bufsize ic = - let d = D.make ?bufsize (input ic) in - fun () -> - match D.next d with - | `End -> None - | `Error _ as e -> Some e - | `Ok _ as res -> Some res - -let parse_chan_list ?bufsize ic = - let d = D.make ?bufsize (input ic) in - let rec iter acc = match D.next d with - | `End -> `Ok (List.rev acc) - | `Ok x -> iter (x::acc) - | `Error _ as e -> e - in - iter [] - -let parse_file filename = - _with_in filename (fun ic -> parse_chan ic) - -let parse_file_list filename = - _with_in filename (fun ic -> parse_chan_list ic) diff --git a/src/sexp/CCSexpM.mli b/src/sexp/CCSexpM.mli deleted file mode 100644 index 74f723c7..00000000 --- a/src/sexp/CCSexpM.mli +++ /dev/null @@ -1,93 +0,0 @@ - -(* This file is free software, part of containers. See file "license" for more details. *) - -(** {1 Simple and efficient S-expression parsing/printing} - - @since 0.7 *) - -type 'a or_error = [ `Ok of 'a | `Error of string ] -type 'a sequence = ('a -> unit) -> unit -type 'a gen = unit -> 'a option - -(** {2 Basics} *) - -type t = [ - | `Atom of string - | `List of t list - ] -type sexp = t - -(** {2 Serialization (encoding)} *) - -val to_buf : Buffer.t -> t -> unit - -val to_string : t -> string - -val to_file : string -> t -> unit - -val to_file_seq : string -> t sequence -> unit -(** Print the given sequence of expressions to a file *) - -val to_chan : out_channel -> t -> unit - -val print : Format.formatter -> t -> unit -(** Pretty-printer nice on human eyes (including indentation) *) - -val print_noindent : Format.formatter -> t -> unit -(** Raw, direct printing as compact as possible *) - -(** {2 Deserialization (decoding)} *) - -module type MONAD = sig - type 'a t - val return : 'a -> 'a t - val (>>=) : 'a t -> ('a -> 'b t) -> 'b t -end - -type 'a parse_result = ['a or_error | `End ] -(** A parser of ['a] can return [`Ok x] when it parsed a value, - or [`Error e] when a parse error was encountered, or - [`End] if the input was empty *) - -module MakeDecode(M : MONAD) : sig - type t - (** Decoder *) - - val make : ?bufsize:int -> (Bytes.t -> int -> int -> int M.t) -> t - (** Make a decoder with the given function used to refill an - internal buffer. The function might return [0] if the - input is exhausted. - @param bufsize size of internal buffer *) - - val next : t -> sexp parse_result M.t - (** Parse the next S-expression or return an error if the input isn't - long enough or isn't a proper S-expression *) -end - -module ID_MONAD : MONAD with type 'a t = 'a -(** The monad that just uses blocking calls as bind - @since 0.14 - ['a t = 'a] contraint is @since 0.16 *) - -module D : module type of MakeDecode(ID_MONAD) -(** Decoder that just blocks when input is not available - @since 0.14 *) - -val parse_string : string -> t or_error -(** Parse a string *) - -val parse_chan : ?bufsize:int -> in_channel -> t or_error -(** Parse a S-expression from the given channel. Can read more data than - necessary, so don't use this if you need finer-grained control (e.g. - to read something else {b after} the S-exp) *) - -val parse_chan_gen : ?bufsize:int -> in_channel -> t or_error gen -(** Parse a channel into a generator of S-expressions *) - -val parse_chan_list : ?bufsize:int -> in_channel -> t list or_error - -val parse_file : string -> t or_error -(** Open the file and read a S-exp from it *) - -val parse_file_list : string -> t list or_error -(** Open the file and read a S-exp from it *) diff --git a/src/sexp/CCSexp_lex.mll b/src/sexp/CCSexp_lex.mll new file mode 100644 index 00000000..4470a75b --- /dev/null +++ b/src/sexp/CCSexp_lex.mll @@ -0,0 +1,56 @@ +{ + type token = + | ATOM of string + | LIST_OPEN + | LIST_CLOSE + | EOI + + (* location + message *) + exception Error of int * int * string + + let error lexbuf msg = + let start = Lexing.lexeme_start_p lexbuf in + let line = start.Lexing.pos_lnum in + let col = start.Lexing.pos_cnum - start.Lexing.pos_bol in + raise (Error (line,col,msg)) + + (* remove quotes + unescape *) + let remove_quotes lexbuf s = + assert (s.[0] = '"' && s.[String.length s - 1] = '"'); + let buf = Buffer.create (String.length s) in + let escaped = ref false in + for i = 1 to String.length s-2 do + match s.[i] with + | '\\' when !escaped -> Buffer.add_char buf '\\'; escaped := false + | '\\' -> escaped := true + | 'n' when !escaped -> Buffer.add_char buf '\n'; escaped := false + | 'r' when !escaped -> Buffer.add_char buf '\r'; escaped := false + | 't' when !escaped -> Buffer.add_char buf '\t'; escaped := false + | '"' when !escaped -> Buffer.add_char buf '"'; escaped := false + | c when !escaped -> error lexbuf (Printf.sprintf "wrong escape `%c`" c) + | c -> Buffer.add_char buf c; + done; + Buffer.contents buf +} + +let newline = '\n' | "\r\n" +let white = [' ' '\r' '\t'] | newline + +let comment_line = ';' [^ '\n']* +let printable_char = [^ '\n'] + +let id = [^ ')' '(' '"' ' ' '\t' '\r' '\n']+ +let string = '"' ([^ '"' '\\'] | "\\\"" | "\\\\" | "\\n" | "\\t" | "\\r")* '"' + +rule token = parse + | comment_line { token lexbuf } + | newline { Lexing.new_line lexbuf; token lexbuf } + | white { token lexbuf } + | eof { EOI } + | '(' { LIST_OPEN } + | ')' { LIST_CLOSE } + | id { ATOM (Lexing.lexeme lexbuf) } + | string { ATOM (remove_quotes lexbuf (Lexing.lexeme lexbuf)) } + | _ as c + { error lexbuf (Printf.sprintf "lexing failed on char `%c`" c) } +