From d6f98032c8be171cdc163f47928c5cbfd2fe1521 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 5 Nov 2019 19:24:28 -0600 Subject: [PATCH] feat(sexp): functorize the parser/printer --- src/sexp/CCSexp.ml | 468 +++++++++++++++++++++++--------------------- src/sexp/CCSexp.mli | 108 ++-------- 2 files changed, 266 insertions(+), 310 deletions(-) diff --git a/src/sexp/CCSexp.ml b/src/sexp/CCSexp.ml index c5486528..850ded67 100644 --- a/src/sexp/CCSexp.ml +++ b/src/sexp/CCSexp.ml @@ -9,13 +9,247 @@ type 'a or_error = ('a, string) Result.result type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option +module type SEXP = CCSexp_intf.SEXP +module type S = CCSexp_intf.S + +let equal_string (a : string) b = Stdlib.(=) a b +let compare_string (a : string) b = Stdlib.compare a b + +module Make(Sexp : SEXP) = struct + type t = Sexp.t + type sexp = t + + let of_int x = Sexp.atom (string_of_int x) + let of_float x = Sexp.atom (string_of_float x) + let of_bool x = Sexp.atom (string_of_bool x) + let atom x = Sexp.atom x + let of_unit = Sexp.list [] + let of_list l = Sexp.list l + let of_rev_list l = Sexp.list (List.rev l) + let of_pair (x,y) = Sexp.list [x;y] + let of_triple (x,y,z) = Sexp.list [x;y;z] + let of_quad (x,y,z,u) = Sexp.list [x;y;z;u] + + let of_variant name args = Sexp.list (Sexp.atom name :: args) + let of_field name t = Sexp.list [Sexp.atom name; t] + let of_record l = + Sexp.list (List.map (fun (n,x) -> of_field n x) l) + + (** {2 Printing} *) + + let _with_out filename f = + let oc = open_out filename in + try + let x = f oc in + close_out oc; + x + with e -> + close_out oc; + raise e + + (* shall we escape the string because of one of its chars? *) + let _must_escape s = + try + for i = 0 to String.length s - 1 do + let c = String.unsafe_get s i in + match c with + | ' ' | ')' | '(' | '"' | ';' | '\\' | '\n' | '\t' | '\r' -> raise Exit + | _ when Char.code c > 127 -> raise Exit (* non-ascii *) + | _ -> () + done; + false + with Exit -> true + + let rec to_buf b t = + Sexp.match_ t + ~atom:(fun s -> + if _must_escape s then Printf.bprintf b "\"%s\"" (String.escaped s) + else Buffer.add_string b s) + ~list:(function + | [] -> Buffer.add_string b "()" + | [x] -> Printf.bprintf b "(%a)" to_buf x + | l -> + Buffer.add_char b '('; + List.iteri + (fun i t' -> (if i > 0 then Buffer.add_char b ' '; to_buf b t')) + l; + Buffer.add_char b ')') + + let to_string t = + let b = Buffer.create 128 in + to_buf b t; + Buffer.contents b + + let rec pp fmt t = + Sexp.match_ t + ~atom:(fun s -> + if _must_escape s then Format.fprintf fmt "\"%s\"" (String.escaped s) + else Format.pp_print_string fmt s) + ~list:(function + | [] -> Format.pp_print_string fmt "()" + | [x] -> Format.fprintf fmt "@[(%a)@]" pp x + | l -> + Format.fprintf fmt "@[("; + List.iteri + (fun i t' -> (if i > 0 then Format.fprintf fmt "@ "; pp fmt t')) + l; + Format.fprintf fmt ")@]") + + let rec pp_noindent fmt t = + Sexp.match_ t + ~atom:(fun s -> + if _must_escape s then Format.fprintf fmt "\"%s\"" (String.escaped s) + else Format.pp_print_string fmt s) + ~list:(function + | [] -> Format.pp_print_string fmt "()" + | [x] -> Format.fprintf fmt "(%a)" pp_noindent x + | l -> + Format.pp_print_char fmt '('; + List.iteri + (fun i t' -> (if i > 0 then Format.pp_print_char fmt ' '; pp_noindent fmt t')) + l; + Format.pp_print_char fmt ')') + + let to_chan oc t = + let fmt = Format.formatter_of_out_channel oc in + pp fmt t; + Format.pp_print_flush fmt () + + let to_file_seq filename seq = + _with_out filename + (fun oc -> + seq (fun t -> to_chan oc t; output_char oc '\n') + ) + + let to_file filename t = to_file_seq filename (fun k -> k t) + + (** {2 Parsing} *) + + let _with_in filename f = + let ic = open_in filename in + try + let x = f ic in + close_in ic; + x + with e -> + close_in ic; + Result.Error (Printexc.to_string e) + + (** A parser of ['a] can return [Yield x] when it parsed a value, + or [Fail e] when a parse error was encountered, or + [End] if the input was empty *) + type 'a parse_result = + | Yield of 'a + | Fail of string + | End + + module Decoder = struct + module L = CCSexp_lex + + type t = { + buf: Lexing.lexbuf; + mutable cur_tok: L.token option; (* current token *) + } + + let cur (t:t): L.token = match t.cur_tok with + | Some L.EOI -> assert false + | Some t -> t + | None -> + (* fetch token *) + let tok = L.token t.buf in + t.cur_tok <- Some tok; + tok + + let junk t = t.cur_tok <- None + + let of_lexbuf buf = { + buf; + cur_tok=None; + } + + exception E_end + exception E_error of int * int * string + + let error_ lexbuf msg = + let start = Lexing.lexeme_start_p lexbuf in + let line = start.Lexing.pos_lnum in + let col = start.Lexing.pos_cnum - start.Lexing.pos_bol in + raise (E_error (line,col,msg)) + + let next (t:t) = + let rec expr () = match cur t with + | L.EOI -> raise E_end + | L.SEXP_COMMENT -> + junk t; + let _u = expr() in (* discard next sexp *) + expr() + | L.ATOM s -> junk t; Sexp.atom s + | L.LIST_OPEN -> + junk t; + let l = lst [] in + begin match cur t with + | L.LIST_CLOSE -> junk t; Sexp.list l + | _ -> error_ t.buf "expected ')'" + end + | L.LIST_CLOSE -> error_ t.buf "expected expression" + and lst acc = match cur t with + | L.LIST_CLOSE -> List.rev acc + | L.LIST_OPEN | L.ATOM _ | L.SEXP_COMMENT -> + let sub = expr () in + lst (sub::acc) + | L.EOI -> error_ t.buf "unexpected EOI" + in + try Yield (expr ()) + with + | E_end -> End + | E_error (line,col,msg) + | CCSexp_lex.Error (line,col,msg) -> + Fail (Printf.sprintf "parse error at %d:%d: %s" line col msg) + end + + let parse_string s : t or_error = + let buf = Lexing.from_string s in + let d = Decoder.of_lexbuf buf in + match Decoder.next d with + | End -> Result.Error "unexpected end of file" + | Yield x -> Result.Ok x + | Fail s -> Result.Error s + + let parse_chan ic : sexp or_error = + let buf = Lexing.from_channel ic in + let d = Decoder.of_lexbuf buf in + match Decoder.next d with + | End -> Result.Error "unexpected end of file" + | Yield x -> Result.Ok x + | Fail e -> Result.Error e + + let parse_chan_list ic = + let buf = Lexing.from_channel ic in + let d = Decoder.of_lexbuf buf in + let rec iter acc = match Decoder.next d with + | End -> Result.Ok (List.rev acc) + | Yield x -> iter (x::acc) + | Fail e -> Result.Error e + in + iter [] + + let parse_chan_gen ic = + let buf = Lexing.from_channel ic in + let d = Decoder.of_lexbuf buf in + fun () -> match Decoder.next d with + | End -> None + | Fail e -> Some (Result.Error e) + | Yield x -> Some (Result.Ok x) + + let parse_file filename = _with_in filename parse_chan + + let parse_file_list filename = _with_in filename parse_chan_list +end + type t = [ | `Atom of string | `List of t list ] -type sexp = t - -let equal_string (a : string) b = Stdlib.(=) a b let rec equal a b = match a, b with | `Atom s1, `Atom s2 -> @@ -24,8 +258,6 @@ let rec equal a b = match a, b with begin try List.for_all2 equal l1 l2 with Invalid_argument _ -> false end | `Atom _, _ | `List _, _ -> false -let compare_string (a : string) b = Stdlib.compare a b - let rec compare_list a b = match a, b with | [], [] -> 0 | [], _::_ -> -1 @@ -42,194 +274,16 @@ and compare a b = match a, b with | `Atom _, _ -> -1 | `List _, _ -> 1 -let hash a = Hashtbl.hash a +include (Make(struct + type t_ = t + type t = t_ + let atom x = `Atom x + let list x = `List x -let of_int x = `Atom (string_of_int x) -let of_float x = `Atom (string_of_float x) -let of_bool x = `Atom (string_of_bool x) -let atom x = `Atom x -let of_unit = `List [] -let of_list l = `List l -let of_rev_list l = `List (List.rev l) -let of_pair (x,y) = `List[x;y] -let of_triple (x,y,z) = `List[x;y;z] -let of_quad (x,y,z,u) = `List[x;y;z;u] - -let of_variant name args = `List (`Atom name :: args) -let of_field name t = `List [`Atom name; t] -let of_record l = - `List (List.map (fun (n,x) -> of_field n x) l) - -(** {2 Printing} *) - -let _with_out filename f = - let oc = open_out filename in - try - let x = f oc in - close_out oc; - x - with e -> - close_out oc; - raise e - -(* shall we escape the string because of one of its chars? *) -let _must_escape s = - try - for i = 0 to String.length s - 1 do - let c = String.unsafe_get s i in - match c with - | ' ' | ')' | '(' | '"' | ';' | '\\' | '\n' | '\t' | '\r' -> raise Exit - | _ when Char.code c > 127 -> raise Exit (* non-ascii *) - | _ -> () - done; - false - with Exit -> true - -let rec to_buf b t = match t with - | `Atom s when _must_escape s -> Printf.bprintf b "\"%s\"" (String.escaped s) - | `Atom s -> Buffer.add_string b s - | `List [] -> Buffer.add_string b "()" - | `List [x] -> Printf.bprintf b "(%a)" to_buf x - | `List l -> - Buffer.add_char b '('; - List.iteri - (fun i t' -> (if i > 0 then Buffer.add_char b ' '; to_buf b t')) - l; - Buffer.add_char b ')' - -let to_string t = - let b = Buffer.create 128 in - to_buf b t; - Buffer.contents b - -let rec pp fmt t = match t with - | `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s) - | `Atom s -> Format.pp_print_string fmt s - | `List [] -> Format.pp_print_string fmt "()" - | `List [x] -> Format.fprintf fmt "@[(%a)@]" pp x - | `List l -> - Format.fprintf fmt "@[("; - List.iteri - (fun i t' -> (if i > 0 then Format.fprintf fmt "@ "; pp fmt t')) - l; - Format.fprintf fmt ")@]" - -let rec pp_noindent fmt t = match t with - | `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s) - | `Atom s -> Format.pp_print_string fmt s - | `List [] -> Format.pp_print_string fmt "()" - | `List [x] -> Format.fprintf fmt "(%a)" pp_noindent x - | `List l -> - Format.pp_print_char fmt '('; - List.iteri - (fun i t' -> (if i > 0 then Format.pp_print_char fmt ' '; pp_noindent fmt t')) - l; - Format.pp_print_char fmt ')' - -let to_chan oc t = - let fmt = Format.formatter_of_out_channel oc in - pp fmt t; - Format.pp_print_flush fmt () - -let to_file_seq filename seq = - _with_out filename - (fun oc -> - seq (fun t -> to_chan oc t; output_char oc '\n') - ) - -let to_file filename t = to_file_seq filename (fun k -> k t) - -(** {2 Parsing} *) - -let _with_in filename f = - let ic = open_in filename in - try - let x = f ic in - close_in ic; - x - with e -> - close_in ic; - Result.Error (Printexc.to_string e) - -(** A parser of ['a] can return [Yield x] when it parsed a value, - or [Fail e] when a parse error was encountered, or - [End] if the input was empty *) -type 'a parse_result = - | Yield of 'a - | Fail of string - | End - -module Decoder = struct - module L = CCSexp_lex - - type t = { - buf: Lexing.lexbuf; - mutable cur_tok: L.token option; (* current token *) - } - - let cur (t:t): L.token = match t.cur_tok with - | Some L.EOI -> assert false - | Some t -> t - | None -> - (* fetch token *) - let tok = L.token t.buf in - t.cur_tok <- Some tok; - tok - - let junk t = t.cur_tok <- None - - let of_lexbuf buf = { - buf; - cur_tok=None; - } - - exception E_end - exception E_error of int * int * string - - let error_ lexbuf msg = - let start = Lexing.lexeme_start_p lexbuf in - let line = start.Lexing.pos_lnum in - let col = start.Lexing.pos_cnum - start.Lexing.pos_bol in - raise (E_error (line,col,msg)) - - let next (t:t) = - let rec expr () = match cur t with - | L.EOI -> raise E_end - | L.SEXP_COMMENT -> - junk t; - let _u = expr() in (* discard next sexp *) - expr() - | L.ATOM s -> junk t; `Atom s - | L.LIST_OPEN -> - junk t; - let l = lst [] in - begin match cur t with - | L.LIST_CLOSE -> junk t; `List l - | _ -> error_ t.buf "expected ')'" - end - | L.LIST_CLOSE -> error_ t.buf "expected expression" - and lst acc = match cur t with - | L.LIST_CLOSE -> List.rev acc - | L.LIST_OPEN | L.ATOM _ | L.SEXP_COMMENT -> - let sub = expr () in - lst (sub::acc) - | L.EOI -> error_ t.buf "unexpected EOI" - in - try Yield (expr ()) - with - | E_end -> End - | E_error (line,col,msg) - | CCSexp_lex.Error (line,col,msg) -> - Fail (Printf.sprintf "parse error at %d:%d: %s" line col msg) -end - -let parse_string s : t or_error = - let buf = Lexing.from_string s in - let d = Decoder.of_lexbuf buf in - match Decoder.next d with - | End -> Result.Error "unexpected end of file" - | Yield x -> Result.Ok x - | Fail s -> Result.Error s + let match_ x ~atom ~list = match x with + | `Atom x -> atom x + | `List l -> list l + end) : S with type t := t) (*$T CCResult.to_opt (parse_string "(abc d/e/f \"hello \\\" () world\" )") <> None @@ -282,33 +336,3 @@ let parse_string s : t or_error = (*$Q & ~count:100 sexp_gen (fun s -> sexp_valid s ==> (to_string s |> parse_string = Result.Ok s)) *) - -let parse_chan ic : sexp or_error = - let buf = Lexing.from_channel ic in - let d = Decoder.of_lexbuf buf in - match Decoder.next d with - | End -> Result.Error "unexpected end of file" - | Yield x -> Result.Ok x - | Fail e -> Result.Error e - -let parse_chan_list ic = - let buf = Lexing.from_channel ic in - let d = Decoder.of_lexbuf buf in - let rec iter acc = match Decoder.next d with - | End -> Result.Ok (List.rev acc) - | Yield x -> iter (x::acc) - | Fail e -> Result.Error e - in - iter [] - -let parse_chan_gen ic = - let buf = Lexing.from_channel ic in - let d = Decoder.of_lexbuf buf in - fun () -> match Decoder.next d with - | End -> None - | Fail e -> Some (Result.Error e) - | Yield x -> Some (Result.Ok x) - -let parse_file filename = _with_in filename parse_chan - -let parse_file_list filename = _with_in filename parse_chan_list diff --git a/src/sexp/CCSexp.mli b/src/sexp/CCSexp.mli index b1cd713e..34929f0f 100644 --- a/src/sexp/CCSexp.mli +++ b/src/sexp/CCSexp.mli @@ -7,98 +7,30 @@ type 'a or_error = ('a, string) Result.result type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option +(** {2 Abstract representation of S-expressions} + + @since NEXT_RELEASE *) +module type SEXP = CCSexp_intf.SEXP + +(** {2 Operations over S-expressions} + + @since NEXT_RELEASE *) +module type S = CCSexp_intf.S + +(** {2 Functorized operations} + + This builds a parser and printer for S-expressions represented as + in the [Sexp] argument. + + @since NEXT_RELEASE *) +module Make(Sexp : SEXP) : S with type t = Sexp.t + (** {2 Basics} *) +(** A simple, structural representation of S-expressions. *) type t = [ | `Atom of string | `List of t list ] -type sexp = t -val equal : t -> t -> bool -val compare : t -> t -> int -val hash : t -> int - -val atom : string -> t -(** Build an atom directly from a string. *) - -val of_int : int -> t -val of_bool : bool -> t -val of_list : t list -> t -val of_rev_list : t list -> t -(** Reverse the list. *) -val of_float : float -> t -val of_unit : t -val of_pair : t * t -> t -val of_triple : t * t * t -> t -val of_quad : t * t * t * t -> t - -val of_variant : string -> t list -> t -(** [of_variant name args] is used to encode algebraic variants - into a S-expr. For instance [of_variant "some" [of_int 1]] - represents the value [Some 1]. *) - -val of_field : string -> t -> t -(** Used to represent one record field. *) - -val of_record : (string * t) list -> t -(** Represent a record by its named fields. *) - -(** {2 Printing} *) - -val to_buf : Buffer.t -> t -> unit - -val to_string : t -> string - -val to_file : string -> t -> unit - -val to_file_seq : string -> t sequence -> unit -(** Print the given sequence of expressions to a file. *) - -val to_chan : out_channel -> t -> unit - -val pp : Format.formatter -> t -> unit -(** Pretty-printer nice on human eyes (including indentation). *) - -val pp_noindent : Format.formatter -> t -> unit -(** Raw, direct printing as compact as possible. *) - -(** {2 Parsing} *) - -(** A parser of ['a] can return [Yield x] when it parsed a value, - or [Fail e] when a parse error was encountered, or - [End] if the input was empty. *) -type 'a parse_result = - | Yield of 'a - | Fail of string - | End - -module Decoder : sig - type t - (** Decoder *) - - val of_lexbuf : Lexing.lexbuf -> t - - val next : t -> sexp parse_result - (** Parse the next S-expression or return an error if the input isn't - long enough or isn't a proper S-expression. *) -end - -val parse_string : string -> t or_error -(** Parse a string. *) - -val parse_chan : in_channel -> t or_error -(** Parse a S-expression from the given channel. Can read more data than - necessary, so don't use this if you need finer-grained control (e.g. - to read something else {b after} the S-exp). *) - -val parse_chan_gen : in_channel -> t or_error gen -(** Parse a channel into a generator of S-expressions. *) - -val parse_chan_list : in_channel -> t list or_error - -val parse_file : string -> t or_error -(** Open the file and read a S-exp from it. *) - -val parse_file_list : string -> t list or_error -(** Open the file and read a S-exp from it. *) +include S with type t := t