mirror of
https://github.com/c-cube/iter.git
synced 2025-12-08 04:05:32 -05:00
parser combinators for deserializing token sequences directly into
any type, without building the intermediate Sexpr.t. GADT are now necessary, to keep the Sexpr.parse function type-safe
This commit is contained in:
parent
af0c94dab4
commit
cce20838c4
2 changed files with 205 additions and 0 deletions
148
sexpr.ml
148
sexpr.ml
|
|
@ -28,6 +28,8 @@ type t =
|
||||||
(** Token that compose a Sexpr once serialized *)
|
(** Token that compose a Sexpr once serialized *)
|
||||||
type token = [`Open | `Close | `Atom of string]
|
type token = [`Open | `Close | `Atom of string]
|
||||||
|
|
||||||
|
(** {2 Traverse a sequence of tokens} *)
|
||||||
|
|
||||||
(** Iterate on the S-expression, calling the callback with tokens *)
|
(** Iterate on the S-expression, calling the callback with tokens *)
|
||||||
let rec iter f s = match s with
|
let rec iter f s = match s with
|
||||||
| Atom a -> f (`Atom a)
|
| Atom a -> f (`Atom a)
|
||||||
|
|
@ -39,6 +41,23 @@ and iter_list f l = match l with
|
||||||
(** Traverse. This yields a sequence of tokens *)
|
(** Traverse. This yields a sequence of tokens *)
|
||||||
let traverse s = Sequence.from_iter (fun k -> iter k s)
|
let traverse s = Sequence.from_iter (fun k -> iter k s)
|
||||||
|
|
||||||
|
(** Returns the same sequence of tokens, but during iteration, if
|
||||||
|
the structure of the Sexpr corresponding to the sequence
|
||||||
|
is wrong (bad parenthesing), Invalid_argument is raised
|
||||||
|
and iteration is stoped *)
|
||||||
|
let validate seq =
|
||||||
|
let depth = ref 0 in
|
||||||
|
Sequence.map
|
||||||
|
(fun tok -> match tok with
|
||||||
|
| `Open -> incr depth; tok
|
||||||
|
| `Close -> if !depth = 0
|
||||||
|
then raise (Invalid_argument "wrong parenthesing")
|
||||||
|
else decr depth; tok
|
||||||
|
| _ -> tok)
|
||||||
|
seq
|
||||||
|
|
||||||
|
(** {2 Text <-> tokens} *)
|
||||||
|
|
||||||
(** Lex: create a sequence of tokens from the given in_channel. *)
|
(** Lex: create a sequence of tokens from the given in_channel. *)
|
||||||
let lex input =
|
let lex input =
|
||||||
let seq_fun k =
|
let seq_fun k =
|
||||||
|
|
@ -86,6 +105,8 @@ let of_seq seq =
|
||||||
| [] -> failwith "no Sexpr could be parsed"
|
| [] -> failwith "no Sexpr could be parsed"
|
||||||
| _ -> failwith "too many elements on the stack"
|
| _ -> failwith "too many elements on the stack"
|
||||||
|
|
||||||
|
(** {2 Printing} *)
|
||||||
|
|
||||||
(** Print a token on the given formatter *)
|
(** Print a token on the given formatter *)
|
||||||
let pp_token formatter token = match token with
|
let pp_token formatter token = match token with
|
||||||
| `Open -> Format.fprintf formatter "@[("
|
| `Open -> Format.fprintf formatter "@[("
|
||||||
|
|
@ -113,3 +134,130 @@ let pp_sexpr ?(indent=false) formatter s =
|
||||||
then Format.fprintf formatter "@[<hov 4>%a@]" pp_tokens (traverse s)
|
then Format.fprintf formatter "@[<hov 4>%a@]" pp_tokens (traverse s)
|
||||||
else pp_tokens formatter (traverse s)
|
else pp_tokens formatter (traverse s)
|
||||||
|
|
||||||
|
(** {2 Parsing} *)
|
||||||
|
|
||||||
|
(** Monadic combinators for parsing data from a sequence of tokens,
|
||||||
|
without converting to concrete S-expressions.
|
||||||
|
|
||||||
|
The [one] parser can raise ParseFailure if it fails to parse
|
||||||
|
the atomic type. *)
|
||||||
|
|
||||||
|
(** parser that returns a 'a *)
|
||||||
|
type 'a parser =
|
||||||
|
| Return : 'a -> 'a parser
|
||||||
|
| One : (token -> 'a) -> 'a parser
|
||||||
|
| Zero : (token -> 'a parser) -> 'a parser
|
||||||
|
(* | Maybe of (token -> 'a option) *)
|
||||||
|
| Bind : ('b parser * ('b -> 'a parser)) -> 'a parser
|
||||||
|
| Fail : string -> 'a parser
|
||||||
|
|
||||||
|
exception ParseFailure of string
|
||||||
|
|
||||||
|
let (>>=) p f = Bind (p, f)
|
||||||
|
|
||||||
|
let (>>) p p' = p >>= fun _ -> p'
|
||||||
|
|
||||||
|
let return x = Return x
|
||||||
|
|
||||||
|
let fail reason = Fail reason
|
||||||
|
|
||||||
|
let one f = One f
|
||||||
|
|
||||||
|
let lookahead f = Zero f
|
||||||
|
|
||||||
|
let left = One (function | `Open -> ()
|
||||||
|
| _ -> raise (ParseFailure "expected '('"))
|
||||||
|
|
||||||
|
let right = One (function | `Close -> ()
|
||||||
|
| _ -> raise (ParseFailure "expected ')'"))
|
||||||
|
|
||||||
|
let pair f g =
|
||||||
|
f >>= fun x ->
|
||||||
|
g >>= fun y ->
|
||||||
|
return (x, y)
|
||||||
|
|
||||||
|
let triple f g h =
|
||||||
|
f >>= fun x ->
|
||||||
|
g >>= fun y ->
|
||||||
|
h >>= fun z ->
|
||||||
|
return (x, y, z)
|
||||||
|
|
||||||
|
(** Maps the value returned by the parser *)
|
||||||
|
let map p f = p >>= fun x -> return (f x)
|
||||||
|
|
||||||
|
let p_str = one
|
||||||
|
(function | `Atom s -> s | _ -> raise (ParseFailure "expected string"))
|
||||||
|
|
||||||
|
let p_int = one
|
||||||
|
(function | `Atom s -> (try int_of_string s
|
||||||
|
with Failure _ -> raise (ParseFailure "expected int"))
|
||||||
|
| _ -> raise (ParseFailure "expected int"))
|
||||||
|
|
||||||
|
let p_bool = one
|
||||||
|
(function | `Atom s -> (try bool_of_string s
|
||||||
|
with Failure _ -> raise (ParseFailure "expected bool"))
|
||||||
|
| _ -> raise (ParseFailure "expected bool"))
|
||||||
|
|
||||||
|
let p_float = one
|
||||||
|
(function | `Atom s -> (try float_of_string s
|
||||||
|
with Failure _ -> raise (ParseFailure "expected float"))
|
||||||
|
| _ -> raise (ParseFailure "expected float"))
|
||||||
|
|
||||||
|
let many p =
|
||||||
|
let rec elements token =
|
||||||
|
match token with
|
||||||
|
| `Close -> return []
|
||||||
|
| _ ->
|
||||||
|
p >>= fun x ->
|
||||||
|
lookahead elements >>= fun l ->
|
||||||
|
return (x :: l)
|
||||||
|
in
|
||||||
|
left >> lookahead elements >>= fun l -> right >> return l
|
||||||
|
|
||||||
|
let many1 p =
|
||||||
|
p >>= fun x ->
|
||||||
|
many p >>= fun l ->
|
||||||
|
return (x::l)
|
||||||
|
|
||||||
|
(** parsing state that returns a 'a *)
|
||||||
|
type 'a state =
|
||||||
|
| Bottom : 'a state
|
||||||
|
| Push : ('b parser * ('b -> 'a state)) -> 'a state
|
||||||
|
|
||||||
|
(** Actually parse the sequence of tokens *)
|
||||||
|
let parse p tokens =
|
||||||
|
let res = ref None in
|
||||||
|
let state = Push(p, fun x -> (res := Some x; Bottom)) in
|
||||||
|
(* Token handler. It also takes the current parser. *)
|
||||||
|
let rec one_step state token =
|
||||||
|
match state with
|
||||||
|
| Bottom -> (* should not happen, unless there are too many tokens *)
|
||||||
|
raise (ParseFailure "unexpected ')'")
|
||||||
|
| Push (Return x, cont) ->
|
||||||
|
let state' = cont x in
|
||||||
|
one_step state' token (* do not consume token *)
|
||||||
|
| Push (Zero f, cont) ->
|
||||||
|
let p' = f token in
|
||||||
|
let state' = Push (p', cont) in
|
||||||
|
one_step state' token (* do not consume token *)
|
||||||
|
| Push (One f, cont) ->
|
||||||
|
let x = f token in
|
||||||
|
let state' = cont x in
|
||||||
|
state' (* consume token *)
|
||||||
|
(* | Maybe f, _ -> let x = f token in (Obj.magic cont) x *)
|
||||||
|
| Push (Bind (p', cont'), cont) ->
|
||||||
|
let cont'' x =
|
||||||
|
let p'' = cont' x in
|
||||||
|
Push (p'', cont)
|
||||||
|
in
|
||||||
|
let state' = Push (p', cont'') in
|
||||||
|
one_step state' token (* do not consume token *)
|
||||||
|
| Push (Fail reason, _) -> raise (ParseFailure reason)
|
||||||
|
in
|
||||||
|
(* iterate on the tokens *)
|
||||||
|
ignore (Sequence.fold one_step state tokens);
|
||||||
|
(* return result *)
|
||||||
|
match !res with
|
||||||
|
| None -> raise (ParseFailure "incomplete input")
|
||||||
|
| Some x -> x
|
||||||
|
|
||||||
|
|
|
||||||
57
sexpr.mli
57
sexpr.mli
|
|
@ -36,6 +36,12 @@ val iter : (token -> unit) -> t -> unit
|
||||||
val traverse : t -> token Sequence.t
|
val traverse : t -> token Sequence.t
|
||||||
(** Traverse. This yields a sequence of tokens *)
|
(** Traverse. This yields a sequence of tokens *)
|
||||||
|
|
||||||
|
val validate : token Sequence.t -> token Sequence.t
|
||||||
|
(** Returns the same sequence of tokens, but during iteration, if
|
||||||
|
the structure of the Sexpr corresponding to the sequence
|
||||||
|
is wrong (bad parenthesing), Invalid_argument is raised
|
||||||
|
and iteration is stoped *)
|
||||||
|
|
||||||
(** {2 Text <-> tokens} *)
|
(** {2 Text <-> tokens} *)
|
||||||
|
|
||||||
val lex : char Sequence.t -> token Sequence.t
|
val lex : char Sequence.t -> token Sequence.t
|
||||||
|
|
@ -55,3 +61,54 @@ val pp_tokens : Format.formatter -> token Sequence.t -> unit
|
||||||
val pp_sexpr : ?indent:bool -> Format.formatter -> t -> unit
|
val pp_sexpr : ?indent:bool -> Format.formatter -> t -> unit
|
||||||
(** Pretty-print the S-expr. If [indent] is true, the S-expression
|
(** Pretty-print the S-expr. If [indent] is true, the S-expression
|
||||||
is printed with indentation. *)
|
is printed with indentation. *)
|
||||||
|
|
||||||
|
(** {2 Parsing} *)
|
||||||
|
|
||||||
|
(** Monadic combinators for parsing data from a sequence of tokens,
|
||||||
|
without converting to concrete S-expressions. *)
|
||||||
|
|
||||||
|
type 'a parser
|
||||||
|
|
||||||
|
exception ParseFailure of string
|
||||||
|
|
||||||
|
val (>>=) : 'a parser -> ('a -> 'b parser) -> 'b parser
|
||||||
|
(** Monadic bind: computes a parser from the result of
|
||||||
|
the first parser *)
|
||||||
|
|
||||||
|
val (>>) : 'a parser -> 'b parser -> 'b parser
|
||||||
|
(** Like (>>=), but ignores the result of the first parser *)
|
||||||
|
|
||||||
|
val return : 'a -> 'a parser
|
||||||
|
(** Parser that consumes no input and return the given value *)
|
||||||
|
|
||||||
|
val fail : string -> 'a parser
|
||||||
|
(** Fails parsing with the given message *)
|
||||||
|
|
||||||
|
val one : (token -> 'a) -> 'a parser
|
||||||
|
(** consumes one token with the function *)
|
||||||
|
|
||||||
|
val lookahead : (token -> 'a parser) -> 'a parser
|
||||||
|
(** choose parser given current token *)
|
||||||
|
|
||||||
|
val left : unit parser
|
||||||
|
(** Parses a `Open *)
|
||||||
|
|
||||||
|
val right : unit parser
|
||||||
|
(** Parses a `Close *)
|
||||||
|
|
||||||
|
val pair : 'a parser -> 'b parser -> ('a * 'b) parser
|
||||||
|
val triple : 'a parser -> 'b parser -> 'c parser -> ('a * 'b * 'c) parser
|
||||||
|
|
||||||
|
val map : 'a parser -> ('a -> 'b) -> 'b parser
|
||||||
|
(** Maps the value returned by the parser *)
|
||||||
|
|
||||||
|
val p_str : string parser
|
||||||
|
val p_int : int parser
|
||||||
|
val p_bool : bool parser
|
||||||
|
|
||||||
|
val many : 'a parser -> 'a list parser
|
||||||
|
val many1 : 'a parser -> 'a list parser
|
||||||
|
|
||||||
|
val parse : 'a parser -> token Sequence.t -> 'a
|
||||||
|
(** Actually parse the sequence of tokens. Raises
|
||||||
|
ParseFailure if anything goes wrong. *)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue