From 14d651b83622b73cf355b4b0522eb9640266d901 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 4 Feb 2013 23:29:09 +0100 Subject: [PATCH] S-expression module, with a strong accent on sequences (o'rly?). S-exprs can be converted to and from streams of 'tokens' --- sexpr.ml | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ sexpr.mli | 57 +++++++++++++++++++++++++++ tests.ml | 6 ++- 3 files changed, 175 insertions(+), 1 deletion(-) create mode 100644 sexpr.ml create mode 100644 sexpr.mli diff --git a/sexpr.ml b/sexpr.ml new file mode 100644 index 0000000..20f3c63 --- /dev/null +++ b/sexpr.ml @@ -0,0 +1,113 @@ +(* +Zipperposition: a functional superposition prover for prototyping +Copyright (C) 2012 Simon Cruanes + +This is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. +*) + +(* {1 Basic S-expressions, with printing and parsing} *) + +(** S-expression *) +type t = + | Atom of string (** An atom *) + | List of t list (** A list of S-expressions *) + +(** Token that compose a Sexpr once serialized *) +type token = [`Open | `Close | `Atom of string] + +(** Iterate on the S-expression, calling the callback with tokens *) +let rec iter f s = match s with + | Atom a -> f (`Atom a) + | List l -> f `Open; iter_list f l; f `Close +and iter_list f l = match l with + | [] -> () + | x::l' -> iter f x; iter_list f l' + +(** Traverse. This yields a sequence of tokens *) +let traverse s = Sequence.from_iter (fun k -> iter k s) + +(** Lex: create a sequence of tokens from the given in_channel. *) +let lex input = + let seq_fun k = + let in_word = ref false in + let buf = Buffer.create 128 in + (* loop. TODO handle escaping of (), and "" *) + let rec next c = + match c with + | '(' -> k `Open + | ')' -> flush_word(); k `Close + | ' ' | '\t' | '\n' -> flush_word () + | c -> in_word := true; Buffer.add_char buf c + (* finish the previous word token *) + and flush_word () = + if !in_word then begin + (* this whitespace follows a word *) + let word = Buffer.contents buf in + Buffer.clear buf; + in_word := false; + k (`Atom word) + end + in + Sequence.iter next input + in + Sequence.from_iter seq_fun + +(** Build a Sexpr from a sequence of tokens *) +let of_seq seq = + (* called on every token *) + let rec k stack token = match token with + | `Open -> `Open :: stack + | `Close -> collapse [] stack + | `Atom a -> (`Expr (Atom a)) :: stack + (* collapse last list into an `Expr *) + and collapse acc stack = match stack with + | `Open::stack' -> `Expr (List acc) :: stack' + | `Expr a::stack' -> collapse (a :: acc) stack' + | _ -> assert false + in + (* iterate on the sequence, given an empty initial stack *) + let stack = Sequence.fold k [] seq in + (* stack should contain exactly one expression *) + match stack with + | [`Expr expr] -> expr + | [] -> failwith "no Sexpr could be parsed" + | _ -> failwith "too many elements on the stack" + +(** Print a token on the given formatter *) +let pp_token formatter token = match token with + | `Open -> Format.fprintf formatter "@[(" + | `Close -> Format.fprintf formatter ")@]" + | `Atom s -> Format.pp_print_string formatter s + +(** Print a sequence of Sexpr tokens on the given formatter *) +let pp_tokens formatter tokens = + let first = ref true in + Sequence.iter + (fun token -> + (match token with + | `Open -> (if not !first then Format.fprintf formatter " "); first := true + | `Close -> first := false + | _ -> if !first then first := false else Format.fprintf formatter " "); + pp_token formatter token) + tokens + +(** Pretty-print the S-expr. If [indent] is true, the S-expression + is printed with indentation. *) +let pp_sexpr ?(indent=false) formatter s = + if indent + then Format.fprintf formatter "@[%a@]" pp_tokens (traverse s) + else pp_tokens formatter (traverse s) + diff --git a/sexpr.mli b/sexpr.mli new file mode 100644 index 0000000..3a03bad --- /dev/null +++ b/sexpr.mli @@ -0,0 +1,57 @@ +(* +Zipperposition: a functional superposition prover for prototyping +Copyright (C) 2012 Simon Cruanes + +This is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. +*) + +(* {1 Basic S-expressions, with printing and parsing} *) + +type t = + | Atom of string (** An atom *) + | List of t list (** A list of S-expressions *) + (** S-expression *) + +type token = [`Open | `Close | `Atom of string] + (** Token that compose a Sexpr once serialized *) + +(** {2 Traverse a sequence of tokens} *) + +val iter : (token -> unit) -> t -> unit + (** Iterate on the S-expression, calling the callback with tokens *) + +val traverse : t -> token Sequence.t + (** Traverse. This yields a sequence of tokens *) + +(** {2 Text <-> tokens} *) + +val lex : char Sequence.t -> token Sequence.t + (** Lex: create a sequence of tokens from the given sequence of chars. *) + +val of_seq : token Sequence.t -> t + (** Build a Sexpr from a sequence of tokens, or raise Failure *) + +(** {2 Printing} *) + +val pp_token : Format.formatter -> token -> unit + (** Print a token on the given formatter *) + +val pp_tokens : Format.formatter -> token Sequence.t -> unit + (** Print a sequence of Sexpr tokens on the given formatter *) + +val pp_sexpr : ?indent:bool -> Format.formatter -> t -> unit + (** Pretty-print the S-expr. If [indent] is true, the S-expression + is printed with indentation. *) diff --git a/tests.ml b/tests.ml index 901b059..a5d8f30 100644 --- a/tests.ml +++ b/tests.ml @@ -9,6 +9,8 @@ let pp_list ?(sep=", ") pp_item formatter l = module ISet = Set.Make(struct type t = int let compare = compare end) module ISetSeq = Sequence.Set(ISet) +let sexpr = "(foo bar (bazz quux hello 42) world (zoo foo bar (1 2 (3 4))))" + let _ = let l = [0;1;2;3;4;5;6] in let l' = Sequence.List.of_seq @@ -38,8 +40,10 @@ let _ = (Sequence.Array.of_seq (Sequence.append (Sequence.take 5 (Sequence.List.to_seq l3)) (Sequence.List.to_seq l4)))); (* sum *) - let n = 200000000 in + let n = 100000000 in let sum = Sequence.fold (+) 0 (Sequence.take n (Sequence.Int.repeat 1)) in Format.printf "%dx1 = %d@." n sum; assert (n=sum); + let s = Sexpr.of_seq (Sexpr.lex (Sequence.String.to_seq sexpr)) in + Format.printf "parse @[%s@] into @[%a@]@." sexpr (Sexpr.pp_sexpr ~indent:false) s; ()