mirror of
https://github.com/c-cube/iter.git
synced 2025-12-05 19:00:31 -05:00
349 lines
9 KiB
OCaml
349 lines
9 KiB
OCaml
(*
|
|
Copyright (C) 2012 Simon Cruanes
|
|
|
|
This is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
This is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
02110-1301 USA.
|
|
*)
|
|
|
|
(* {1 Basic S-expressions, with printing and parsing} *)
|
|
|
|
(** S-expression *)
|
|
type t =
|
|
| Atom of string (** An atom *)
|
|
| List of t list (** A list of S-expressions *)
|
|
|
|
type token = [ `Open | `Close | `Atom of string ]
|
|
(** Token that compose a Sexpr once serialized *)
|
|
|
|
(** {2 Traverse an iterator of tokens} *)
|
|
|
|
(** Iterate on the S-expression, calling the callback with tokens *)
|
|
let rec iter f s =
|
|
match s with
|
|
| Atom a -> f (`Atom a)
|
|
| List l ->
|
|
f `Open;
|
|
iter_list f l;
|
|
f `Close
|
|
|
|
and iter_list f l =
|
|
match l with
|
|
| [] -> ()
|
|
| x :: l' ->
|
|
iter f x;
|
|
iter_list f l'
|
|
|
|
(** Traverse. This yields an iterator of tokens *)
|
|
let traverse s = Iter.from_iter (fun k -> iter k s)
|
|
|
|
(** Returns the same iterator of tokens, but during iteration, if
|
|
the structure of the Sexpr corresponding to the iterator
|
|
is wrong (bad parenthesing), Invalid_argument is raised
|
|
and iteration is stoped *)
|
|
let validate seq =
|
|
let depth = ref 0 in
|
|
Iter.map
|
|
(fun tok ->
|
|
match tok with
|
|
| `Open ->
|
|
incr depth;
|
|
tok
|
|
| `Close ->
|
|
if !depth = 0 then
|
|
raise (Invalid_argument "wrong parenthesing")
|
|
else
|
|
decr depth;
|
|
tok
|
|
| _ -> tok)
|
|
seq
|
|
|
|
(** {2 Text <-> tokens} *)
|
|
|
|
(** Lex: create an iterator of tokens from the given in_channel. *)
|
|
let lex input =
|
|
let seq_fun k =
|
|
let in_word = ref false in
|
|
let buf = Buffer.create 128 in
|
|
(* loop. TODO handle escaping of (), and "" *)
|
|
let rec next c =
|
|
match c with
|
|
| '(' -> k `Open
|
|
| ')' ->
|
|
flush_word ();
|
|
k `Close
|
|
| ' ' | '\t' | '\n' -> flush_word ()
|
|
| c ->
|
|
in_word := true;
|
|
Buffer.add_char buf c
|
|
(* finish the previous word token *)
|
|
and flush_word () =
|
|
if !in_word then (
|
|
(* this whitespace follows a word *)
|
|
let word = Buffer.contents buf in
|
|
Buffer.clear buf;
|
|
in_word := false;
|
|
k (`Atom word)
|
|
)
|
|
in
|
|
Iter.iter next input
|
|
in
|
|
Iter.from_iter seq_fun
|
|
|
|
(** Build a Sexpr from an iterator of tokens *)
|
|
let of_seq seq =
|
|
(* called on every token *)
|
|
let rec k stack token =
|
|
match token with
|
|
| `Open -> `Open :: stack
|
|
| `Close -> collapse [] stack
|
|
| `Atom a -> `Expr (Atom a) :: stack
|
|
(* collapse last list into an `Expr *)
|
|
and collapse acc stack =
|
|
match stack with
|
|
| `Open :: stack' -> `Expr (List acc) :: stack'
|
|
| `Expr a :: stack' -> collapse (a :: acc) stack'
|
|
| _ -> assert false
|
|
in
|
|
(* iterate, given an empty initial stack *)
|
|
let stack = Iter.fold k [] seq in
|
|
(* stack should contain exactly one expression *)
|
|
match stack with
|
|
| [ `Expr expr ] -> expr
|
|
| [] -> failwith "no Sexpr could be parsed"
|
|
| _ -> failwith "too many elements on the stack"
|
|
|
|
(** {2 Printing} *)
|
|
|
|
(** Print a token on the given formatter *)
|
|
let pp_token formatter token =
|
|
match token with
|
|
| `Open -> Format.fprintf formatter "@[("
|
|
| `Close -> Format.fprintf formatter ")@]"
|
|
| `Atom s -> Format.pp_print_string formatter s
|
|
|
|
(** Print an iterator of Sexpr tokens on the given formatter *)
|
|
let pp_tokens formatter tokens =
|
|
let first = ref true in
|
|
let last = ref false in
|
|
Iter.iter
|
|
(fun token ->
|
|
(match token with
|
|
| `Open ->
|
|
if not !first then Format.fprintf formatter " ";
|
|
first := true
|
|
| `Close ->
|
|
first := false;
|
|
last := true
|
|
| _ ->
|
|
if !first then
|
|
first := false
|
|
else
|
|
Format.fprintf formatter " ");
|
|
pp_token formatter token;
|
|
if !last then last := false)
|
|
tokens
|
|
|
|
(** Pretty-print the S-expr. If [indent] is true, the S-expression
|
|
is printed with indentation. *)
|
|
let pp_sexpr ?(indent = false) formatter s =
|
|
if indent then
|
|
Format.fprintf formatter "@[<hov 4>%a@]" pp_tokens (traverse s)
|
|
else
|
|
pp_tokens formatter (traverse s)
|
|
|
|
(** {2 Serializing} *)
|
|
|
|
let output_seq name subexpr k =
|
|
k `Open;
|
|
k (`Atom name);
|
|
Iter.iter k subexpr;
|
|
k `Close
|
|
|
|
let output_str name str k =
|
|
k `Open;
|
|
k (`Atom name);
|
|
k (`Atom str);
|
|
k `Close
|
|
|
|
(** {2 Parsing} *)
|
|
|
|
(** Monadic combinators for parsing data from an iterator of tokens,
|
|
without converting to concrete S-expressions.
|
|
|
|
The [one] parser can raise ParseFailure if it fails to parse
|
|
the atomic type. *)
|
|
|
|
(** parser that returns a 'a *)
|
|
type 'a parser =
|
|
| Return : 'a -> 'a parser
|
|
| One : (token -> 'a) -> 'a parser
|
|
| Zero : (token -> 'a parser) -> 'a parser
|
|
(* | Maybe of (token -> 'a option) *)
|
|
| Bind : ('b parser * ('b -> 'a parser)) -> 'a parser
|
|
| Fail : string -> 'a parser
|
|
|
|
exception ParseFailure of string
|
|
|
|
let ( >>= ) p f = Bind (p, f)
|
|
let ( >> ) p p' = p >>= fun _ -> p'
|
|
let return x = Return x
|
|
let fail reason = Fail reason
|
|
let one f = One f
|
|
let skip = One (fun _ -> ())
|
|
let lookahead f = Zero f
|
|
|
|
let left =
|
|
One
|
|
(function
|
|
| `Open -> ()
|
|
| _ -> raise (ParseFailure "expected '('"))
|
|
|
|
let right =
|
|
One
|
|
(function
|
|
| `Close -> ()
|
|
| _ -> raise (ParseFailure "expected ')'"))
|
|
|
|
let pair f g =
|
|
f >>= fun x ->
|
|
g >>= fun y -> return (x, y)
|
|
|
|
let triple f g h =
|
|
f >>= fun x ->
|
|
g >>= fun y ->
|
|
h >>= fun z -> return (x, y, z)
|
|
|
|
(** [(name,p) ^|| p'] behaves as p if the next token is [`Atom name], and
|
|
like [p'] otherwise *)
|
|
let ( ^|| ) (name, p) p' =
|
|
lookahead (fun token ->
|
|
match token with
|
|
| `Atom s when s = name -> skip >> p ()
|
|
| _ -> p')
|
|
|
|
(** Maps the value returned by the parser *)
|
|
let map p f = p >>= fun x -> return (f x)
|
|
|
|
let p_str =
|
|
one (function
|
|
| `Atom s -> s
|
|
| _ -> raise (ParseFailure "expected string"))
|
|
|
|
let p_int =
|
|
one (function
|
|
| `Atom s ->
|
|
(try int_of_string s
|
|
with Failure _ -> raise (ParseFailure "expected int"))
|
|
| _ -> raise (ParseFailure "expected int"))
|
|
|
|
let p_bool =
|
|
one (function
|
|
| `Atom s ->
|
|
(try bool_of_string s
|
|
with Failure _ -> raise (ParseFailure "expected bool"))
|
|
| _ -> raise (ParseFailure "expected bool"))
|
|
|
|
let p_float =
|
|
one (function
|
|
| `Atom s ->
|
|
(try float_of_string s
|
|
with Failure _ -> raise (ParseFailure "expected float"))
|
|
| _ -> raise (ParseFailure "expected float"))
|
|
|
|
let many p =
|
|
let rec elements token =
|
|
match token with
|
|
| `Close -> return []
|
|
| _ ->
|
|
p >>= fun x ->
|
|
lookahead elements >>= fun l -> return (x :: l)
|
|
in
|
|
left >> lookahead elements >>= fun l -> right >> return l
|
|
|
|
let many1 p =
|
|
p >>= fun x ->
|
|
many p >>= fun l -> return (x :: l)
|
|
|
|
(** parsing state that returns a 'a *)
|
|
type 'a state =
|
|
| Bottom : 'a state
|
|
| Push : ('b parser * ('b -> 'a state)) -> 'a state
|
|
|
|
(** Actually parse the iterator of tokens, with a callback to be called
|
|
on every parsed value. The callback decides whether to push another
|
|
state or whether to continue. *)
|
|
let parse_k p tokens k =
|
|
let rec state =
|
|
Push
|
|
( p,
|
|
fun x ->
|
|
match k x with
|
|
| `Stop -> Bottom
|
|
| `Continue -> state )
|
|
in
|
|
(* Token handler. It also takes the current parser. *)
|
|
let rec one_step state token =
|
|
match reduce state with
|
|
| Bottom ->
|
|
(* should not happen, unless there are too many tokens *)
|
|
raise (ParseFailure "unexpected ')'")
|
|
| Push (Return _, _cont) -> assert false (* should be reduced *)
|
|
| Push (Zero f, cont) ->
|
|
let p' = f token in
|
|
let state' = Push (p', cont) in
|
|
one_step state' token (* do not consume token *)
|
|
| Push (One f, cont) ->
|
|
let x = f token in
|
|
let state' = cont x in
|
|
reduce state' (* consume token *)
|
|
(* | Maybe f, _ -> let x = f token in (Obj.magic cont) x *)
|
|
| Push (Bind (p', cont'), cont) ->
|
|
let cont'' x =
|
|
let p'' = cont' x in
|
|
Push (p'', cont)
|
|
in
|
|
let state' = Push (p', cont'') in
|
|
one_step state' token (* do not consume token *)
|
|
| Push (Fail reason, _) -> raise (ParseFailure reason)
|
|
(* Reduce parser state *)
|
|
and reduce state =
|
|
match state with
|
|
| Push (Return x, cont) ->
|
|
let state' = cont x in
|
|
reduce state'
|
|
| _ -> state
|
|
in
|
|
(* iterate on the tokens *)
|
|
ignore (Iter.fold one_step state tokens)
|
|
|
|
(** Parse one value *)
|
|
let parse p tokens =
|
|
let res = ref None in
|
|
parse_k p tokens (fun x ->
|
|
res := Some x;
|
|
`Stop);
|
|
(* return result *)
|
|
match !res with
|
|
| None -> raise (ParseFailure "incomplete input")
|
|
| Some x -> x
|
|
|
|
(** Parse an iterator of values *)
|
|
let parse_seq p tokens =
|
|
let seq_fun k =
|
|
parse_k p tokens (fun x ->
|
|
k x;
|
|
`Continue)
|
|
in
|
|
Iter.from_iter seq_fun
|