S-expression module, with a strong accent on

sequences (o'rly?). S-exprs can be converted to and from streams of 'tokens'
This commit is contained in:
Simon Cruanes 2013-02-04 23:29:09 +01:00
parent 28332a45df
commit 14d651b836
3 changed files with 175 additions and 1 deletions

113
sexpr.ml Normal file
View file

@ -0,0 +1,113 @@
(*
Zipperposition: a functional superposition prover for prototyping
Copyright (C) 2012 Simon Cruanes
This is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
*)
(* {1 Basic S-expressions, with printing and parsing} *)
(** S-expression *)
type t =
| Atom of string (** An atom *)
| List of t list (** A list of S-expressions *)
(** Token that compose a Sexpr once serialized *)
type token = [`Open | `Close | `Atom of string]
(** Iterate on the S-expression, calling the callback with tokens *)
let rec iter f s = match s with
| Atom a -> f (`Atom a)
| List l -> f `Open; iter_list f l; f `Close
and iter_list f l = match l with
| [] -> ()
| x::l' -> iter f x; iter_list f l'
(** Traverse. This yields a sequence of tokens *)
let traverse s = Sequence.from_iter (fun k -> iter k s)
(** Lex: create a sequence of tokens from the given in_channel. *)
let lex input =
let seq_fun k =
let in_word = ref false in
let buf = Buffer.create 128 in
(* loop. TODO handle escaping of (), and "" *)
let rec next c =
match c with
| '(' -> k `Open
| ')' -> flush_word(); k `Close
| ' ' | '\t' | '\n' -> flush_word ()
| c -> in_word := true; Buffer.add_char buf c
(* finish the previous word token *)
and flush_word () =
if !in_word then begin
(* this whitespace follows a word *)
let word = Buffer.contents buf in
Buffer.clear buf;
in_word := false;
k (`Atom word)
end
in
Sequence.iter next input
in
Sequence.from_iter seq_fun
(** Build a Sexpr from a sequence of tokens *)
let of_seq seq =
(* called on every token *)
let rec k stack token = match token with
| `Open -> `Open :: stack
| `Close -> collapse [] stack
| `Atom a -> (`Expr (Atom a)) :: stack
(* collapse last list into an `Expr *)
and collapse acc stack = match stack with
| `Open::stack' -> `Expr (List acc) :: stack'
| `Expr a::stack' -> collapse (a :: acc) stack'
| _ -> assert false
in
(* iterate on the sequence, given an empty initial stack *)
let stack = Sequence.fold k [] seq in
(* stack should contain exactly one expression *)
match stack with
| [`Expr expr] -> expr
| [] -> failwith "no Sexpr could be parsed"
| _ -> failwith "too many elements on the stack"
(** Print a token on the given formatter *)
let pp_token formatter token = match token with
| `Open -> Format.fprintf formatter "@[("
| `Close -> Format.fprintf formatter ")@]"
| `Atom s -> Format.pp_print_string formatter s
(** Print a sequence of Sexpr tokens on the given formatter *)
let pp_tokens formatter tokens =
let first = ref true in
Sequence.iter
(fun token ->
(match token with
| `Open -> (if not !first then Format.fprintf formatter " "); first := true
| `Close -> first := false
| _ -> if !first then first := false else Format.fprintf formatter " ");
pp_token formatter token)
tokens
(** Pretty-print the S-expr. If [indent] is true, the S-expression
is printed with indentation. *)
let pp_sexpr ?(indent=false) formatter s =
if indent
then Format.fprintf formatter "@[<hov 4>%a@]" pp_tokens (traverse s)
else pp_tokens formatter (traverse s)

57
sexpr.mli Normal file
View file

@ -0,0 +1,57 @@
(*
Zipperposition: a functional superposition prover for prototyping
Copyright (C) 2012 Simon Cruanes
This is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
*)
(* {1 Basic S-expressions, with printing and parsing} *)
type t =
| Atom of string (** An atom *)
| List of t list (** A list of S-expressions *)
(** S-expression *)
type token = [`Open | `Close | `Atom of string]
(** Token that compose a Sexpr once serialized *)
(** {2 Traverse a sequence of tokens} *)
val iter : (token -> unit) -> t -> unit
(** Iterate on the S-expression, calling the callback with tokens *)
val traverse : t -> token Sequence.t
(** Traverse. This yields a sequence of tokens *)
(** {2 Text <-> tokens} *)
val lex : char Sequence.t -> token Sequence.t
(** Lex: create a sequence of tokens from the given sequence of chars. *)
val of_seq : token Sequence.t -> t
(** Build a Sexpr from a sequence of tokens, or raise Failure *)
(** {2 Printing} *)
val pp_token : Format.formatter -> token -> unit
(** Print a token on the given formatter *)
val pp_tokens : Format.formatter -> token Sequence.t -> unit
(** Print a sequence of Sexpr tokens on the given formatter *)
val pp_sexpr : ?indent:bool -> Format.formatter -> t -> unit
(** Pretty-print the S-expr. If [indent] is true, the S-expression
is printed with indentation. *)

View file

@ -9,6 +9,8 @@ let pp_list ?(sep=", ") pp_item formatter l =
module ISet = Set.Make(struct type t = int let compare = compare end)
module ISetSeq = Sequence.Set(ISet)
let sexpr = "(foo bar (bazz quux hello 42) world (zoo foo bar (1 2 (3 4))))"
let _ =
let l = [0;1;2;3;4;5;6] in
let l' = Sequence.List.of_seq
@ -38,8 +40,10 @@ let _ =
(Sequence.Array.of_seq (Sequence.append
(Sequence.take 5 (Sequence.List.to_seq l3)) (Sequence.List.to_seq l4))));
(* sum *)
let n = 200000000 in
let n = 100000000 in
let sum = Sequence.fold (+) 0 (Sequence.take n (Sequence.Int.repeat 1)) in
Format.printf "%dx1 = %d@." n sum;
assert (n=sum);
let s = Sexpr.of_seq (Sexpr.lex (Sequence.String.to_seq sexpr)) in
Format.printf "parse @[<h>%s@] into @[<h>%a@]@." sexpr (Sexpr.pp_sexpr ~indent:false) s;
()