mirror of
https://github.com/c-cube/sidekick.git
synced 2025-12-06 11:15:43 -05:00
feat(parser): move to menhir
infix operators need parentheses when nested, there is no precedence yet
This commit is contained in:
parent
3317171971
commit
8866f032fe
12 changed files with 406 additions and 1854 deletions
|
|
@ -1,34 +1,54 @@
|
||||||
module Pos = Parser_comb.Position
|
type position = Position.t
|
||||||
|
type loc = Loc.t = { start: position; end_: position }
|
||||||
|
|
||||||
type position = Pos.t
|
open struct
|
||||||
type loc = { start: position; end_: position }
|
let mk_loc = Loc.mk
|
||||||
|
end
|
||||||
|
|
||||||
let mk_loc ~start ~end_ : loc = { start; end_ }
|
type 'a with_loc = { view: 'a; loc: loc }
|
||||||
|
|
||||||
let pp_loc out (loc : loc) =
|
type term = term_view with_loc
|
||||||
Fmt.fprintf out "%a - %a" Pos.pp loc.start Pos.pp loc.end_
|
|
||||||
|
|
||||||
let loc_merge a b : loc =
|
|
||||||
{ start = Pos.min a.start b.start; end_ = Pos.max a.end_ b.end_ }
|
|
||||||
|
|
||||||
type term = { view: term_view; loc: loc }
|
|
||||||
(** Expressions *)
|
(** Expressions *)
|
||||||
|
|
||||||
and term_view =
|
and term_view =
|
||||||
| Var of string
|
| Var of string
|
||||||
| Int of string
|
| Int of string
|
||||||
| App of term * term list
|
| App of term * term list
|
||||||
| Let of (var * term) list * term
|
| Let of let_binding list * term
|
||||||
| Lambda of var list * term
|
| Lambda of var list * term
|
||||||
| Pi of var list * term
|
| Pi of var list * term
|
||||||
| Arrow of term list * term
|
| Arrow of term list * term
|
||||||
| Error_node of string
|
| Error_node of string
|
||||||
|
|
||||||
|
and let_binding = var * term
|
||||||
|
|
||||||
and var = { name: string; ty: term option }
|
and var = { name: string; ty: term option }
|
||||||
(** Variable *)
|
(** Variable *)
|
||||||
|
|
||||||
|
type decl = decl_view with_loc
|
||||||
|
(** Declarations *)
|
||||||
|
|
||||||
|
(* TODO: axiom *)
|
||||||
|
and decl_view =
|
||||||
|
| D_def of { name: string; args: var list; ty_ret: term option; rhs: term }
|
||||||
|
| D_hash of string * term
|
||||||
|
| D_theorem of { name: string; goal: term; proof: proof }
|
||||||
|
|
||||||
|
and proof = proof_view with_loc
|
||||||
|
|
||||||
|
and proof_view =
|
||||||
|
| P_by of term
|
||||||
|
| P_exact of term
|
||||||
|
| P_steps of {
|
||||||
|
steps: proof_step list; (** intermediate steps *)
|
||||||
|
ret: proof; (** proof for the result *)
|
||||||
|
}
|
||||||
|
|
||||||
|
and proof_step = proof_step_view with_loc
|
||||||
|
and proof_step_view = { name: string; goal: term; proof: proof }
|
||||||
|
|
||||||
open struct
|
open struct
|
||||||
let mk_ ~loc view : term = { loc; view }
|
let mk_ ~loc view : _ with_loc = { loc; view }
|
||||||
end
|
end
|
||||||
|
|
||||||
let view (t : term) = t.view
|
let view (t : term) = t.view
|
||||||
|
|
@ -40,7 +60,7 @@ let mk_app f args : term =
|
||||||
if args = [] then
|
if args = [] then
|
||||||
f
|
f
|
||||||
else (
|
else (
|
||||||
let loc = List.fold_left (fun loc a -> loc_merge loc a.loc) f.loc args in
|
let loc = List.fold_left (fun loc a -> Loc.merge loc a.loc) f.loc args in
|
||||||
mk_ ~loc (App (f, args))
|
mk_ ~loc (App (f, args))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -68,12 +88,25 @@ let mk_let ~loc bs bod : term =
|
||||||
if bs = [] then
|
if bs = [] then
|
||||||
bod
|
bod
|
||||||
else (
|
else (
|
||||||
let loc = loc_merge loc bod.loc in
|
let loc = Loc.merge loc bod.loc in
|
||||||
mk_ ~loc (Let (bs, bod))
|
mk_ ~loc (Let (bs, bod))
|
||||||
)
|
)
|
||||||
|
|
||||||
let mk_error ~loc msg : term = mk_ ~loc (Error_node msg)
|
let mk_error ~loc msg : term = mk_ ~loc (Error_node msg)
|
||||||
|
|
||||||
|
let decl_def ~loc ?ty_ret name args rhs : decl =
|
||||||
|
mk_ ~loc @@ D_def { name; args; ty_ret; rhs }
|
||||||
|
|
||||||
|
let decl_hash ~loc s t : decl = mk_ ~loc @@ D_hash (s, t)
|
||||||
|
|
||||||
|
let decl_theorem ~loc name goal proof : decl =
|
||||||
|
mk_ ~loc @@ D_theorem { name; goal; proof }
|
||||||
|
|
||||||
|
let proof_by ~loc t : proof = mk_ ~loc @@ P_by t
|
||||||
|
let proof_exact ~loc t : proof = mk_ ~loc @@ P_exact t
|
||||||
|
let proof_steps ~loc steps ret : proof = mk_ ~loc @@ P_steps { steps; ret }
|
||||||
|
let step ~loc name goal proof : proof_step = mk_ ~loc @@ { name; goal; proof }
|
||||||
|
|
||||||
(** Pretty print terms *)
|
(** Pretty print terms *)
|
||||||
let rec pp_term out (e : term) : unit =
|
let rec pp_term out (e : term) : unit =
|
||||||
let pp = pp_term in
|
let pp = pp_term in
|
||||||
|
|
@ -84,12 +117,6 @@ let rec pp_term out (e : term) : unit =
|
||||||
| Var _ | Error_node _ | Int _ -> pp out e
|
| Var _ | Error_node _ | Int _ -> pp out e
|
||||||
in
|
in
|
||||||
|
|
||||||
let pp_tyvar out x =
|
|
||||||
match x.ty with
|
|
||||||
| None -> Fmt.string out x.name
|
|
||||||
| Some ty -> Fmt.fprintf out "(@[%s : %a@])" x.name pp ty
|
|
||||||
in
|
|
||||||
|
|
||||||
match e.view with
|
match e.view with
|
||||||
| Var v -> Fmt.string out v
|
| Var v -> Fmt.string out v
|
||||||
| Error_node msg -> Fmt.fprintf out "<error %s>" msg
|
| Error_node msg -> Fmt.fprintf out "<error %s>" msg
|
||||||
|
|
@ -101,11 +128,47 @@ let rec pp_term out (e : term) : unit =
|
||||||
(Util.pp_list ~sep:" -> " pp_sub)
|
(Util.pp_list ~sep:" -> " pp_sub)
|
||||||
args pp_sub ret
|
args pp_sub ret
|
||||||
| Let (bs, bod) ->
|
| Let (bs, bod) ->
|
||||||
let ppb out (x, t) = Fmt.fprintf out "@[<2>%s :=@ %a@]" x.name pp t in
|
let ppb out ((x, t) : let_binding) =
|
||||||
Fmt.fprintf out "@[@[<2>let@ %a@] in@ %a@]"
|
Fmt.fprintf out "@[<2>%s :=@ %a@]" x.name pp t
|
||||||
(Util.pp_list ~sep:"and" ppb)
|
in
|
||||||
|
Fmt.fprintf out "@[@[<2>let@ @[<hv>%a@]@] in@ %a@]"
|
||||||
|
(Util.pp_list ~sep:" and " ppb)
|
||||||
bs pp bod
|
bs pp bod
|
||||||
| Lambda (args, bod) ->
|
| Lambda (args, bod) ->
|
||||||
Fmt.fprintf out "@[lam %a.@ %a@]" (Util.pp_list pp_tyvar) args pp_sub bod
|
Fmt.fprintf out "@[lam %a.@ %a@]" (Util.pp_list pp_tyvar) args pp_sub bod
|
||||||
| Pi (args, bod) ->
|
| Pi (args, bod) ->
|
||||||
Fmt.fprintf out "@[pi %a.@ %a@]" (Util.pp_list pp_tyvar) args pp_sub bod
|
Fmt.fprintf out "@[pi %a.@ %a@]" (Util.pp_list pp_tyvar) args pp_sub bod
|
||||||
|
|
||||||
|
and pp_tyvar out (x : var) : unit =
|
||||||
|
match x.ty with
|
||||||
|
| None -> Fmt.string out x.name
|
||||||
|
| Some ty -> Fmt.fprintf out "(@[%s : %a@])" x.name pp_term ty
|
||||||
|
|
||||||
|
let rec pp_proof out (p : proof) : unit =
|
||||||
|
match p.view with
|
||||||
|
| P_by t -> Fmt.fprintf out "@[by@ %a@]" pp_term t
|
||||||
|
| P_exact t -> Fmt.fprintf out "@[exact@ %a@]" pp_term t
|
||||||
|
| P_steps { steps; ret } ->
|
||||||
|
Fmt.fprintf out "{@[%a;@ %a@]}"
|
||||||
|
(Util.pp_list ~sep:";" pp_proof_step)
|
||||||
|
steps pp_proof ret
|
||||||
|
|
||||||
|
and pp_proof_step out (step : proof_step) : unit =
|
||||||
|
let s = step.view in
|
||||||
|
Fmt.fprintf out "@[<2>have %s := %a@ proof %a@]" s.name pp_term s.goal
|
||||||
|
pp_proof s.proof
|
||||||
|
|
||||||
|
let pp_decl out (d : decl) =
|
||||||
|
match d.view with
|
||||||
|
| D_def { name; args; ty_ret; rhs } ->
|
||||||
|
let pp_tyret out () =
|
||||||
|
match ty_ret with
|
||||||
|
| None -> ()
|
||||||
|
| Some ty -> Fmt.fprintf out " @[: %a@]" pp_term ty
|
||||||
|
in
|
||||||
|
Fmt.fprintf out "@[<2>def %s%a%a :=@ %a@];" name (Util.pp_list pp_tyvar)
|
||||||
|
args pp_tyret () pp_term rhs
|
||||||
|
| D_hash (name, t) -> Fmt.fprintf out "@[<2>#%s@ %a@];" name pp_term t
|
||||||
|
| D_theorem { name; goal; proof } ->
|
||||||
|
Fmt.fprintf out "@[<hv2>theorem %s :=@ %a@ @[<hv2>proof %a@]@];" name
|
||||||
|
pp_term goal pp_proof proof
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,12 @@
|
||||||
(library
|
(library
|
||||||
(name sidekick_parser)
|
(name sidekick_parser)
|
||||||
(public_name sidekick.parser)
|
(public_name sidekick-parser)
|
||||||
(synopsis "A term parser")
|
(synopsis "High level syntax for sidekick")
|
||||||
(libraries sidekick.core sidekick.util)
|
(libraries sidekick.core sidekick.util pp_loc)
|
||||||
(flags :standard -open Sidekick_util))
|
(flags :standard -open Sidekick_util))
|
||||||
|
|
||||||
|
(ocamllex
|
||||||
|
(modules lex))
|
||||||
|
|
||||||
|
(menhir
|
||||||
|
(modules parse))
|
||||||
|
|
|
||||||
50
src/parser/lex.mll
Normal file
50
src/parser/lex.mll
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
|
||||||
|
open Parse
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
let num = ['0' - '9']
|
||||||
|
let alpha = ['a' - 'z' 'A' - 'Z']
|
||||||
|
let alphanum = alpha | num
|
||||||
|
let alphanum_under = alpha | num | ['_']
|
||||||
|
let whitespace = [' ' '\t' '\n']
|
||||||
|
let sym = '_' | '=' | '+' | '-' | '*' | '^' | '%' | '$' | '#' | '@' | '!' | '>' | '<' | '[' | ']'
|
||||||
|
|
||||||
|
let id = (alpha | '_') alphanum_under*
|
||||||
|
let int = '-'? num+
|
||||||
|
let symbol = sym+
|
||||||
|
let hash = '#' alphanum_under+
|
||||||
|
|
||||||
|
rule token = parse
|
||||||
|
| whitespace { token lexbuf }
|
||||||
|
| ";" { SEMICOLON }
|
||||||
|
| ":" { COLON }
|
||||||
|
| "(" { LPAREN }
|
||||||
|
| ")" { RPAREN }
|
||||||
|
| ":=" { EQDEF }
|
||||||
|
| "->" { ARROW }
|
||||||
|
| "." { DOT }
|
||||||
|
| "pi" { PI }
|
||||||
|
| "fn" { FUNCTION }
|
||||||
|
| "let" { LET }
|
||||||
|
| "in" { IN }
|
||||||
|
| "and" { AND }
|
||||||
|
| "have" { HAVE }
|
||||||
|
| "theorem" { THEOREM }
|
||||||
|
| "by" { BY }
|
||||||
|
| "exact" { EXACT }
|
||||||
|
| "have" { HAVE }
|
||||||
|
| int { INT(Lexing.lexeme lexbuf) }
|
||||||
|
| id { ID (Lexing.lexeme lexbuf) }
|
||||||
|
| symbol { SYMBOL (Lexing.lexeme lexbuf) }
|
||||||
|
| hash { HASH (Lexing.lexeme lexbuf) }
|
||||||
|
| eof { EOF }
|
||||||
|
| _ as c { ERROR(String.make 1 c) }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
13
src/parser/loc.ml
Normal file
13
src/parser/loc.ml
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
type position = Position.t
|
||||||
|
type t = { start: position; end_: position }
|
||||||
|
|
||||||
|
let mk ~start ~end_ : t = { start; end_ }
|
||||||
|
|
||||||
|
let pp out (self : t) =
|
||||||
|
Fmt.fprintf out "%a - %a" Position.pp self.start Position.pp self.end_
|
||||||
|
|
||||||
|
let merge a b : t =
|
||||||
|
{ start = Position.min a.start b.start; end_ = Position.max a.end_ b.end_ }
|
||||||
|
|
||||||
|
let of_lexloc ((p1, p2) : Lexing.position * Lexing.position) : t =
|
||||||
|
{ start = Position.of_lexpos p1; end_ = Position.of_lexpos p2 }
|
||||||
172
src/parser/parse.mly
Normal file
172
src/parser/parse.mly
Normal file
|
|
@ -0,0 +1,172 @@
|
||||||
|
|
||||||
|
%{
|
||||||
|
|
||||||
|
open struct
|
||||||
|
module A = Ast_term
|
||||||
|
end
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%token <string> ID
|
||||||
|
%token <string> INT
|
||||||
|
%token <string> SYMBOL
|
||||||
|
%token <string> HASH
|
||||||
|
|
||||||
|
%token <string> ERROR
|
||||||
|
|
||||||
|
%token LET
|
||||||
|
%token IN
|
||||||
|
%token AND
|
||||||
|
%token EQDEF
|
||||||
|
%token COLON
|
||||||
|
%token FUNCTION
|
||||||
|
%token PI
|
||||||
|
%token ARROW
|
||||||
|
%token DOT
|
||||||
|
|
||||||
|
%token SEMICOLON
|
||||||
|
|
||||||
|
%token LPAREN
|
||||||
|
%token RPAREN
|
||||||
|
/* TODO: implicit arguments in def */
|
||||||
|
%token LBRACE
|
||||||
|
%token RBRACE
|
||||||
|
|
||||||
|
%token DEF
|
||||||
|
%token THEOREM
|
||||||
|
%token BY
|
||||||
|
%token EXACT
|
||||||
|
%token HAVE
|
||||||
|
|
||||||
|
%token EOF
|
||||||
|
|
||||||
|
%start <Ast_term.term> top_term
|
||||||
|
%start <Ast_term.decl> top_decl
|
||||||
|
%start <Ast_term.decl list> top_decls
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
top_decls: d=decl* EOF { d }
|
||||||
|
top_decl: d=decl EOF { d }
|
||||||
|
top_term: t=term EOF { t }
|
||||||
|
|
||||||
|
decl:
|
||||||
|
| h=HASH t=term SEMICOLON {
|
||||||
|
let loc = Loc.of_lexloc $loc in
|
||||||
|
A.decl_hash ~loc h t
|
||||||
|
}
|
||||||
|
| DEF name=name args=tyvars* ty_ret=optional_ty EQDEF rhs=term SEMICOLON {
|
||||||
|
let loc = Loc.of_lexloc $loc in
|
||||||
|
let args = List.flatten args in
|
||||||
|
A.decl_def ~loc name args ?ty_ret rhs
|
||||||
|
}
|
||||||
|
| THEOREM name=name EQDEF goal=term proof=proof SEMICOLON {
|
||||||
|
let loc = Loc.of_lexloc $loc in
|
||||||
|
A.decl_theorem ~loc name goal proof
|
||||||
|
}
|
||||||
|
|
||||||
|
proof:
|
||||||
|
| BY t=term {
|
||||||
|
let loc = Loc.of_lexloc $loc in
|
||||||
|
A.proof_by ~loc t
|
||||||
|
}
|
||||||
|
| EXACT t=term {
|
||||||
|
let loc = Loc.of_lexloc $loc in
|
||||||
|
A.proof_exact ~loc t
|
||||||
|
}
|
||||||
|
| LBRACE steps=proof_step+ ret=proof RBRACE {
|
||||||
|
let loc = Loc.of_lexloc $loc in
|
||||||
|
A.proof_steps ~loc steps ret
|
||||||
|
}
|
||||||
|
|
||||||
|
proof_step:
|
||||||
|
| HAVE name=name EQDEF goal=term proof=proof SEMICOLON {
|
||||||
|
let loc = Loc.of_lexloc $loc in
|
||||||
|
A.step ~loc name goal proof
|
||||||
|
}
|
||||||
|
|
||||||
|
tyvar:
|
||||||
|
| name=name ty=optional_ty { A.var ?ty name }
|
||||||
|
|
||||||
|
tyvars:
|
||||||
|
| name=name { [A.var name] }
|
||||||
|
| LPAREN names=name+ COLON ty=term RPAREN {
|
||||||
|
List.map (fun name -> A.var ~ty name) names
|
||||||
|
}
|
||||||
|
|
||||||
|
%inline optional_ty:
|
||||||
|
| { None }
|
||||||
|
| COLON t=term { Some t }
|
||||||
|
|
||||||
|
term:
|
||||||
|
| t=binder_term { t }
|
||||||
|
| LET bs=let_bindings IN rhs=term {
|
||||||
|
let loc = Loc.of_lexloc $loc in
|
||||||
|
A.mk_let ~loc bs rhs
|
||||||
|
}
|
||||||
|
|
||||||
|
let_binding:
|
||||||
|
| x=tyvar EQDEF t=term {x,t}
|
||||||
|
|
||||||
|
let_bindings:
|
||||||
|
| b=let_binding { [b] }
|
||||||
|
| b=let_binding AND l=let_bindings { b::l }
|
||||||
|
|
||||||
|
binder_term:
|
||||||
|
| t=sym_term { t }
|
||||||
|
| FUNCTION vars=tyvars+ DOT rhs=binder_term {
|
||||||
|
let loc = Loc.of_lexloc $loc in
|
||||||
|
let vars = List.flatten vars in
|
||||||
|
A.mk_lam ~loc vars rhs
|
||||||
|
}
|
||||||
|
| PI vars=tyvars+ DOT rhs=binder_term {
|
||||||
|
let loc = Loc.of_lexloc $loc in
|
||||||
|
let vars = List.flatten vars in
|
||||||
|
A.mk_pi ~loc vars rhs
|
||||||
|
}
|
||||||
|
|
||||||
|
sym_term:
|
||||||
|
| t=arrow_term { t }
|
||||||
|
| t=arrow_term sym=SYMBOL u=arrow_term {
|
||||||
|
let locsym = Loc.of_lexloc $loc(sym) in
|
||||||
|
A.mk_app (A.mk_var ~loc:locsym sym) [t;u]
|
||||||
|
}
|
||||||
|
|
||||||
|
arrow_term:
|
||||||
|
| t=apply_term { t }
|
||||||
|
| t=apply_term ARROW u=arrow_term {
|
||||||
|
let loc = Loc.of_lexloc $loc in
|
||||||
|
A.mk_arrow ~loc [t] u
|
||||||
|
}
|
||||||
|
|
||||||
|
apply_term:
|
||||||
|
| t=atomic_term { t }
|
||||||
|
| f=atomic_term args=atomic_term+ {
|
||||||
|
A.mk_app f args
|
||||||
|
}
|
||||||
|
|
||||||
|
(* TODO: lambda, pi, arrow *)
|
||||||
|
|
||||||
|
|
||||||
|
atomic_term:
|
||||||
|
| v=name {
|
||||||
|
let loc = Loc.of_lexloc $loc in
|
||||||
|
A.mk_var ~loc v
|
||||||
|
}
|
||||||
|
| i=INT {
|
||||||
|
let loc = Loc.of_lexloc $loc in
|
||||||
|
A.mk_int ~loc i
|
||||||
|
}
|
||||||
|
| LPAREN t=term RPAREN { t }
|
||||||
|
| err=ERROR {
|
||||||
|
let loc = Loc.of_lexloc $loc in
|
||||||
|
A.mk_error ~loc err
|
||||||
|
}
|
||||||
|
|
||||||
|
name:
|
||||||
|
| x=ID { x }
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,112 +0,0 @@
|
||||||
module A = Ast_term
|
|
||||||
module P = Parser_comb
|
|
||||||
open P.Infix
|
|
||||||
|
|
||||||
let is_alpha = function
|
|
||||||
| 'a' .. 'z' | 'A' .. 'Z' -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_num = function
|
|
||||||
| '0' .. '9' -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_alphanum = function
|
|
||||||
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let id : string P.t =
|
|
||||||
P.chars_fold_transduce `Start ~f:(fun st c ->
|
|
||||||
match st, c with
|
|
||||||
| `Start, c when is_alpha c -> `Yield (`Inside, c)
|
|
||||||
| `Inside, c when is_alphanum c -> `Yield (`Inside, c)
|
|
||||||
| `Start, _ -> `Fail "expected identifier"
|
|
||||||
| `Inside, _ -> `Stop)
|
|
||||||
>|= snd
|
|
||||||
|
|
||||||
let int : string P.t =
|
|
||||||
P.chars_fold_transduce `Start ~f:(fun st c ->
|
|
||||||
match st, c with
|
|
||||||
| `Start, '-' -> `Yield (`Post_neg, c)
|
|
||||||
| (`Start | `Post_neg | `Inside), c when is_num c -> `Yield (`Inside, c)
|
|
||||||
| `Start, _ -> `Fail "expected integer"
|
|
||||||
| `Post_neg, _ -> `Fail "expected a number after '-'"
|
|
||||||
| `Inside, _ -> `Stop)
|
|
||||||
>|= snd
|
|
||||||
|
|
||||||
let with_loc (p : 'a P.t) : ('a * A.loc) P.t =
|
|
||||||
let+ start = P.pos and+ x = p and+ end_ = P.pos in
|
|
||||||
let loc = A.mk_loc ~start ~end_ in
|
|
||||||
x, loc
|
|
||||||
|
|
||||||
(* TODO: also skip comments *)
|
|
||||||
let skip_white : unit P.t = P.skip_white
|
|
||||||
|
|
||||||
let p_var : A.term P.t =
|
|
||||||
let+ name, loc = with_loc id in
|
|
||||||
A.mk_var ~loc name
|
|
||||||
|
|
||||||
let p_int : A.term P.t =
|
|
||||||
let+ x, loc = with_loc int in
|
|
||||||
A.mk_int ~loc x
|
|
||||||
|
|
||||||
(* main parser *)
|
|
||||||
let rec p_term () : A.term P.t =
|
|
||||||
P.suspend @@ fun () ->
|
|
||||||
P.skip_white
|
|
||||||
*> (P.try_or_l ~msg:"expected term"
|
|
||||||
@@ List.flatten
|
|
||||||
[
|
|
||||||
[
|
|
||||||
( P.lookahead_ignore (P.guard (String.equal "let") id),
|
|
||||||
let+ _id_let, loc = with_loc id
|
|
||||||
and+ x = skip_white *> id
|
|
||||||
and+ _ = skip_white *> P.string ":="
|
|
||||||
and+ t = p_term ()
|
|
||||||
and+ _ = skip_white *> P.string "in"
|
|
||||||
and+ bod = p_term () in
|
|
||||||
assert (_id_let = "let");
|
|
||||||
(* TODO: allow [let x : _ := t in bod] *)
|
|
||||||
let x = A.var x in
|
|
||||||
A.mk_let ~loc [ x, t ] bod );
|
|
||||||
];
|
|
||||||
p_term_atomic_cases () ~f:(fun t -> p_term_apply t []);
|
|
||||||
])
|
|
||||||
|
|
||||||
and p_term_atomic_cases ~f () : _ list =
|
|
||||||
[
|
|
||||||
P.lookahead_ignore id, p_var >>= f;
|
|
||||||
P.lookahead_ignore int, p_int >>= f;
|
|
||||||
( P.lookahead_ignore (P.char '('),
|
|
||||||
P.char '(' *> p_term () <* skip_white *> P.char ')' >>= f );
|
|
||||||
]
|
|
||||||
|
|
||||||
and p_term_atomic ?else_ ~f () =
|
|
||||||
P.suspend @@ fun () ->
|
|
||||||
P.try_or_l ?else_ ~msg:"expected atomic term" @@ p_term_atomic_cases ~f ()
|
|
||||||
|
|
||||||
(* TODO: handle infix symbols, with a table (sym -> precedence * parser).
|
|
||||||
Start by registering "=" and "->" in there. *)
|
|
||||||
(* TODO: handle lambda and pi *)
|
|
||||||
|
|
||||||
(* parse application of [t] to 0 or more arguments *)
|
|
||||||
and p_term_apply t args : A.term P.t =
|
|
||||||
P.suspend @@ fun () ->
|
|
||||||
let ret = P.suspend @@ fun () -> P.return @@ A.mk_app t (List.rev args) in
|
|
||||||
skip_white
|
|
||||||
*> (P.try_or_l ~else_:ret
|
|
||||||
@@ List.flatten
|
|
||||||
[
|
|
||||||
[
|
|
||||||
P.eoi, ret;
|
|
||||||
( P.lookahead_ignore (P.guard (fun s -> s = "let" || s = "in") id),
|
|
||||||
(* if we meet some keyword, we stop *)
|
|
||||||
ret );
|
|
||||||
];
|
|
||||||
p_term_atomic_cases () ~f:(fun a -> p_term_apply t (a :: args));
|
|
||||||
])
|
|
||||||
|
|
||||||
let p = p_term ()
|
|
||||||
let of_string s = P.parse_string_e p s
|
|
||||||
let of_string_exn s = P.parse_string_exn p s
|
|
||||||
let of_string_l s = P.parse_string_e (P.many p) s
|
|
||||||
let of_string_l_exn s = P.parse_string_exn (P.many p) s
|
|
||||||
|
|
@ -1,9 +0,0 @@
|
||||||
module A = Ast_term
|
|
||||||
|
|
||||||
val p : A.term Parser_comb.t
|
|
||||||
(** Term parser *)
|
|
||||||
|
|
||||||
val of_string : string -> A.term Parser_comb.or_error
|
|
||||||
val of_string_exn : string -> A.term
|
|
||||||
val of_string_l : string -> A.term list Parser_comb.or_error
|
|
||||||
val of_string_l_exn : string -> A.term list
|
|
||||||
44
src/parser/parse_util.ml
Normal file
44
src/parser/parse_util.ml
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
module Error = struct
|
||||||
|
type t = { loc: Loc.t; msg: string }
|
||||||
|
|
||||||
|
(* TODO: use pp_loc *)
|
||||||
|
|
||||||
|
let mk ~loc msg : t = { msg; loc }
|
||||||
|
|
||||||
|
let pp out (self : t) =
|
||||||
|
Fmt.fprintf out "parse error: %s@ %a" self.msg Loc.pp self.loc
|
||||||
|
|
||||||
|
let to_string = Fmt.to_string pp
|
||||||
|
end
|
||||||
|
|
||||||
|
exception Exn_parse_error of Error.t
|
||||||
|
|
||||||
|
open struct
|
||||||
|
module A = Ast_term
|
||||||
|
|
||||||
|
let guard buf f =
|
||||||
|
try f ()
|
||||||
|
with Parse.Error ->
|
||||||
|
let loc =
|
||||||
|
Loc.of_lexloc (Lexing.lexeme_start_p buf, Lexing.lexeme_end_p buf)
|
||||||
|
in
|
||||||
|
raise (Exn_parse_error (Error.mk ~loc @@ "syntax error"))
|
||||||
|
end
|
||||||
|
|
||||||
|
let term_of_string_exn ?filename (s : string) : A.term =
|
||||||
|
let buf = Lexing.from_string s in
|
||||||
|
let@ () = guard buf in
|
||||||
|
Option.iter (Lexing.set_filename buf) filename;
|
||||||
|
Parse.top_term Lex.token buf
|
||||||
|
|
||||||
|
let term_of_string ?filename (s : string) : _ result =
|
||||||
|
try Ok (term_of_string_exn ?filename s) with Exn_parse_error e -> Error e
|
||||||
|
|
||||||
|
let decls_of_string_exn ?filename (s : string) : A.decl list =
|
||||||
|
let buf = Lexing.from_string s in
|
||||||
|
let@ () = guard buf in
|
||||||
|
Option.iter (Lexing.set_filename buf) filename;
|
||||||
|
Parse.top_decls Lex.token buf
|
||||||
|
|
||||||
|
let decls_of_string ?filename (s : string) : _ result =
|
||||||
|
try Ok (decls_of_string_exn ?filename s) with Exn_parse_error e -> Error e
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,695 +0,0 @@
|
||||||
(** Simple Parser Combinators
|
|
||||||
|
|
||||||
*)
|
|
||||||
|
|
||||||
type position
|
|
||||||
(** A position in the input. Typically it'll point at the {b beginning} of
|
|
||||||
an error location. *)
|
|
||||||
|
|
||||||
(** {2 Positions in input}
|
|
||||||
|
|
||||||
*)
|
|
||||||
module Position : sig
|
|
||||||
type t = position
|
|
||||||
|
|
||||||
val line : t -> int
|
|
||||||
(** Line number, 0 based *)
|
|
||||||
|
|
||||||
val column : t -> int
|
|
||||||
(** Column number, 0 based *)
|
|
||||||
|
|
||||||
val line_and_column : t -> int * int
|
|
||||||
(** Line and column number *)
|
|
||||||
|
|
||||||
val min : t -> t -> t
|
|
||||||
val max : t -> t -> t
|
|
||||||
|
|
||||||
val pp : Format.formatter -> t -> unit
|
|
||||||
(** Unspecified pretty-printed version of the position. *)
|
|
||||||
end
|
|
||||||
|
|
||||||
(** {2 Errors}
|
|
||||||
*)
|
|
||||||
module Error : sig
|
|
||||||
type t
|
|
||||||
(** A parse error.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val position : t -> position
|
|
||||||
(** Returns position of the error *)
|
|
||||||
|
|
||||||
val line_and_column : t -> int * int
|
|
||||||
(** Line and column numbers of the error position. *)
|
|
||||||
|
|
||||||
val msg : t -> string
|
|
||||||
|
|
||||||
val to_string : t -> string
|
|
||||||
(** Prints the error *)
|
|
||||||
|
|
||||||
val pp : Format.formatter -> t -> unit
|
|
||||||
(** Pretty prints the error *)
|
|
||||||
end
|
|
||||||
|
|
||||||
type +'a or_error = ('a, Error.t) result
|
|
||||||
(** ['a or_error] is either [Ok x] for some result [x : 'a],
|
|
||||||
or an error {!Error.t}.
|
|
||||||
|
|
||||||
See {!stringify_result} and {!Error.to_string} to print the
|
|
||||||
error message. *)
|
|
||||||
|
|
||||||
exception ParseError of Error.t
|
|
||||||
|
|
||||||
(** {2 Input} *)
|
|
||||||
|
|
||||||
(** {2 Combinators} *)
|
|
||||||
|
|
||||||
type 'a t
|
|
||||||
(** The abstract type of parsers that return a value of type ['a] (or fail).
|
|
||||||
|
|
||||||
@raise ParseError in case of failure.
|
|
||||||
the type is private.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val return : 'a -> 'a t
|
|
||||||
(** Always succeeds, without consuming its input. *)
|
|
||||||
|
|
||||||
val pure : 'a -> 'a t
|
|
||||||
(** Synonym to {!return}. *)
|
|
||||||
|
|
||||||
val map : ('a -> 'b) -> 'a t -> 'b t
|
|
||||||
val map2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t
|
|
||||||
val map3 : ('a -> 'b -> 'c -> 'd) -> 'a t -> 'b t -> 'c t -> 'd t
|
|
||||||
|
|
||||||
val bind : ('a -> 'b t) -> 'a t -> 'b t
|
|
||||||
(** [bind f p] results in a new parser which behaves as [p] then,
|
|
||||||
in case of success, applies [f] to the result.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val ap : ('a -> 'b) t -> 'a t -> 'b t
|
|
||||||
(** Applicative.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val eoi : unit t
|
|
||||||
(** Expect the end of input, fails otherwise. *)
|
|
||||||
|
|
||||||
val empty : unit t
|
|
||||||
(** Succeed with [()].
|
|
||||||
*)
|
|
||||||
|
|
||||||
val fail : string -> 'a t
|
|
||||||
(** [fail msg] fails with the given message. It can trigger a backtrack. *)
|
|
||||||
|
|
||||||
val failf : ('a, unit, string, 'b t) format4 -> 'a
|
|
||||||
(** [Format.sprintf] version of {!fail}. *)
|
|
||||||
|
|
||||||
val fail_lazy : (unit -> string) -> 'a t
|
|
||||||
(** Like {!fail}, but only produce an error message on demand.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val parsing : string -> 'a t -> 'a t
|
|
||||||
(** [parsing s p] behaves the same as [p], with the information that
|
|
||||||
we are parsing [s], if [p] fails.
|
|
||||||
The message [s] is added to the error, it does not replace it,
|
|
||||||
not does the location change (the error still points to
|
|
||||||
the same location as in [p]). *)
|
|
||||||
|
|
||||||
val set_error_message : string -> 'a t -> 'a t
|
|
||||||
(** [set_error_message msg p] behaves like [p], but if [p] fails,
|
|
||||||
[set_error_message msg p] fails with [msg] instead and at the current
|
|
||||||
position. The internal error message of [p] is just discarded.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val pos : position t
|
|
||||||
(** [pos] returns the current position in the buffer.
|
|
||||||
|
|
||||||
*)
|
|
||||||
|
|
||||||
val with_pos : 'a t -> ('a * position) t
|
|
||||||
(** [with_pos p] behaves like [p], but returns the (starting) position
|
|
||||||
along with [p]'s result.
|
|
||||||
|
|
||||||
*)
|
|
||||||
|
|
||||||
val any_char : char t
|
|
||||||
(** [any_char] parses any character.
|
|
||||||
It still fails if the end of input was reached.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val any_char_n : int -> string t
|
|
||||||
(** [any_char_n len] parses exactly [len] characters from the input.
|
|
||||||
Fails if the input doesn't contain at least [len] chars.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val char : char -> char t
|
|
||||||
(** [char c] parses the character [c] and nothing else. *)
|
|
||||||
|
|
||||||
type slice
|
|
||||||
(** A slice of the input, as returned by some combinators such
|
|
||||||
as {!split_1} or {!split_list} or {!take}.
|
|
||||||
|
|
||||||
The idea is that one can use some parsers to cut the input into slices,
|
|
||||||
e.g. split into lines, or split a line into fields (think CSV or TSV).
|
|
||||||
Then a variety of parsers can be used on each slice to extract data from
|
|
||||||
it using {!recurse}.
|
|
||||||
|
|
||||||
Slices contain enough information to make it possible
|
|
||||||
for [recurse slice p] to report failures (if [p] fails) using locations
|
|
||||||
from the original input, not relative to the slice.
|
|
||||||
Therefore, even after splitting the input into lines using, say, {!each_line},
|
|
||||||
a failure to parse the 500th line will be reported at line 500 and
|
|
||||||
not at line 1.
|
|
||||||
|
|
||||||
*)
|
|
||||||
|
|
||||||
(** Functions on slices.
|
|
||||||
*)
|
|
||||||
module Slice : sig
|
|
||||||
type t = slice
|
|
||||||
|
|
||||||
val is_empty : t -> bool
|
|
||||||
(** Is the slice empty? *)
|
|
||||||
|
|
||||||
val length : t -> int
|
|
||||||
(** Length of the slice *)
|
|
||||||
|
|
||||||
val to_string : t -> string
|
|
||||||
(** Convert the slice into a string.
|
|
||||||
Linear time and memory in [length slice] *)
|
|
||||||
end
|
|
||||||
|
|
||||||
val recurse : slice -> 'a t -> 'a t
|
|
||||||
(** [recurse slice p] parses the [slice]
|
|
||||||
(most likely obtained via another combinator, such as {!split_1}
|
|
||||||
or {!split_n}), using [p].
|
|
||||||
|
|
||||||
The slice contains a position which is used to relocate error
|
|
||||||
messages to their position in the whole input, not just relative to
|
|
||||||
the slice.
|
|
||||||
|
|
||||||
*)
|
|
||||||
|
|
||||||
val set_current_slice : slice -> unit t
|
|
||||||
(** [set_current_slice slice] replaces the parser's state with [slice].
|
|
||||||
|
|
||||||
*)
|
|
||||||
|
|
||||||
val chars_fold :
|
|
||||||
f:
|
|
||||||
('acc ->
|
|
||||||
char ->
|
|
||||||
[ `Continue of 'acc
|
|
||||||
| `Consume_and_stop of 'acc
|
|
||||||
| `Stop of 'acc
|
|
||||||
| `Fail of string ]) ->
|
|
||||||
'acc ->
|
|
||||||
('acc * slice) t
|
|
||||||
(** [chars_fold f acc0] folds over characters of the input.
|
|
||||||
Each char [c] is passed, along with the current accumulator, to [f];
|
|
||||||
[f] can either:
|
|
||||||
|
|
||||||
- stop, by returning [`Stop acc]. In this case the final accumulator [acc]
|
|
||||||
is returned, and [c] is not consumed.
|
|
||||||
- consume char and stop, by returning [`Consume_and_stop acc].
|
|
||||||
- fail, by returning [`Fail msg]. In this case the parser fails
|
|
||||||
with the given message.
|
|
||||||
- continue, by returning [`Continue acc]. The parser continues to the
|
|
||||||
next char with the new accumulator.
|
|
||||||
|
|
||||||
This is a generalization of of {!chars_if} that allows one to transform
|
|
||||||
characters on the fly, skip some, handle escape sequences, etc.
|
|
||||||
It can also be useful as a base component for a lexer.
|
|
||||||
|
|
||||||
@return a pair of the final accumular, and the slice matched by the fold.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val chars_fold_transduce :
|
|
||||||
f:
|
|
||||||
('acc ->
|
|
||||||
char ->
|
|
||||||
[ `Continue of 'acc
|
|
||||||
| `Yield of 'acc * char
|
|
||||||
| `Consume_and_stop
|
|
||||||
| `Stop
|
|
||||||
| `Fail of string ]) ->
|
|
||||||
'acc ->
|
|
||||||
('acc * string) t
|
|
||||||
(** Same as {!char_fold} but with the following differences:
|
|
||||||
|
|
||||||
- returns a string along with the accumulator, rather than the slice
|
|
||||||
of all the characters accepted by [`Continue _].
|
|
||||||
The string is built from characters returned by [`Yield].
|
|
||||||
- new case [`Yield (acc, c)] adds [c] to the returned string
|
|
||||||
and continues parsing with [acc].
|
|
||||||
|
|
||||||
*)
|
|
||||||
|
|
||||||
val guard : ?descr:string -> ('a -> bool) -> 'a t -> 'a t
|
|
||||||
(** [guard f p] is like [p], but fails if the value returned by [p]
|
|
||||||
does not satisfy [f].
|
|
||||||
@param descr used to provide a better error message *)
|
|
||||||
|
|
||||||
val take : int -> slice t
|
|
||||||
(** [take len] parses exactly [len] characters from the input.
|
|
||||||
Fails if the input doesn't contain at least [len] chars.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val take_if : (char -> bool) -> slice t
|
|
||||||
(** [take_if f] takes characters as long as they satisfy the predicate [f].
|
|
||||||
*)
|
|
||||||
|
|
||||||
val take1_if : ?descr:string -> (char -> bool) -> slice t
|
|
||||||
(** [take1_if f] takes characters as long as they satisfy the predicate [f].
|
|
||||||
Fails if no character satisfies [f].
|
|
||||||
@param descr describes what kind of character was expected, in case of error
|
|
||||||
*)
|
|
||||||
|
|
||||||
val char_if : ?descr:string -> (char -> bool) -> char t
|
|
||||||
(** [char_if f] parses a character [c] if [f c = true].
|
|
||||||
Fails if the next char does not satisfy [f].
|
|
||||||
@param descr describes what kind of character was expected, in case of error *)
|
|
||||||
|
|
||||||
val chars_if : (char -> bool) -> string t
|
|
||||||
(** [chars_if f] parses a string of chars that satisfy [f].
|
|
||||||
Cannot fail. *)
|
|
||||||
|
|
||||||
val chars1_if : ?descr:string -> (char -> bool) -> string t
|
|
||||||
(** Like {!chars_if}, but accepts only non-empty strings.
|
|
||||||
[chars1_if p] fails if the string accepted by [chars_if p] is empty.
|
|
||||||
[chars1_if p] is equivalent to [take1_if p >|= Slice.to_string].
|
|
||||||
@param descr describes what kind of character was expected, in case of error *)
|
|
||||||
|
|
||||||
val endline : char t
|
|
||||||
(** Parse '\n'. *)
|
|
||||||
|
|
||||||
val space : char t
|
|
||||||
(** Tab or space. *)
|
|
||||||
|
|
||||||
val white : char t
|
|
||||||
(** Tab or space or newline. *)
|
|
||||||
|
|
||||||
val skip_chars : (char -> bool) -> unit t
|
|
||||||
(** Skip 0 or more chars satisfying the predicate. *)
|
|
||||||
|
|
||||||
val skip_space : unit t
|
|
||||||
(** Skip ' ' and '\t'. *)
|
|
||||||
|
|
||||||
val skip_white : unit t
|
|
||||||
(** Skip ' ' and '\t' and '\n'. *)
|
|
||||||
|
|
||||||
val is_alpha : char -> bool
|
|
||||||
(** Is the char a letter? *)
|
|
||||||
|
|
||||||
val is_num : char -> bool
|
|
||||||
(** Is the char a digit? *)
|
|
||||||
|
|
||||||
val is_alpha_num : char -> bool
|
|
||||||
(** Is the char a letter or a digit? *)
|
|
||||||
|
|
||||||
val is_space : char -> bool
|
|
||||||
(** True on ' ' and '\t'. *)
|
|
||||||
|
|
||||||
val is_white : char -> bool
|
|
||||||
(** True on ' ' and '\t' and '\n'. *)
|
|
||||||
|
|
||||||
val suspend : (unit -> 'a t) -> 'a t
|
|
||||||
(** [suspend f] is the same as [f ()], but evaluates [f ()] only
|
|
||||||
when needed.
|
|
||||||
|
|
||||||
A practical use case is to implement recursive parsers manually,
|
|
||||||
as described in {!fix}. The parser is [let rec p () = …],
|
|
||||||
and [suspend p] can be used in the definition to use [p].
|
|
||||||
*)
|
|
||||||
|
|
||||||
val string : string -> string t
|
|
||||||
(** [string s] parses exactly the string [s], and nothing else. *)
|
|
||||||
|
|
||||||
val exact : string -> string t
|
|
||||||
(** Alias to {!string}.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val many : 'a t -> 'a list t
|
|
||||||
(** [many p] parses [p] repeatedly, until [p] fails, and
|
|
||||||
collects the results into a list. *)
|
|
||||||
|
|
||||||
val optional : _ t -> unit t
|
|
||||||
(** [optional p] tries to parse [p], and return [()] whether it
|
|
||||||
succeeded or failed. Cannot fail itself.
|
|
||||||
It consumes input if [p] succeeded (as much as [p] consumed), but
|
|
||||||
consumes not input if [p] failed.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val try_ : 'a t -> 'a t
|
|
||||||
[@@deprecated "plays no role anymore, just replace [try foo] with [foo]"]
|
|
||||||
(** [try_ p] is just like [p] (it used to play a role in backtracking
|
|
||||||
semantics but no more).
|
|
||||||
|
|
||||||
@deprecated since 3.6 it can just be removed. See {!try_opt} if you want
|
|
||||||
to detect failure. *)
|
|
||||||
|
|
||||||
val try_opt : 'a t -> 'a option t
|
|
||||||
(** [try_opt p] tries to parse using [p], and return [Some x] if [p]
|
|
||||||
succeeded with [x] (and consumes what [p] consumed).
|
|
||||||
Otherwise it returns [None] and consumes nothing. This cannot fail.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val many_until : until:_ t -> 'a t -> 'a list t
|
|
||||||
(** [many_until ~until p] parses as many [p] as it can until
|
|
||||||
the [until] parser successfully returns.
|
|
||||||
If [p] fails before that then [many_until ~until p] fails as well.
|
|
||||||
Typically [until] can be a closing ')' or another termination condition,
|
|
||||||
and what is consumed by [until] is also consumed by [many_until ~until p].
|
|
||||||
|
|
||||||
*)
|
|
||||||
|
|
||||||
val try_or : 'a t -> f:('a -> 'b t) -> else_:'b t -> 'b t
|
|
||||||
(** [try_or p1 ~f ~else_:p2] attempts to parse [x] using [p1],
|
|
||||||
and then becomes [f x].
|
|
||||||
If [p1] fails, then it becomes [p2]. This can be useful if [f] is expensive
|
|
||||||
but only ever works if [p1] matches (e.g. after an opening parenthesis
|
|
||||||
or some sort of prefix).
|
|
||||||
|
|
||||||
*)
|
|
||||||
|
|
||||||
val try_or_l : ?msg:string -> ?else_:'a t -> (unit t * 'a t) list -> 'a t
|
|
||||||
(** [try_or_l ?else_ l] tries each pair [(test, p)] in order.
|
|
||||||
If the n-th [test] succeeds, then [try_or_l l] behaves like n-th [p],
|
|
||||||
whether [p] fails or not. If [test] consumes input, the state is restored
|
|
||||||
before calling [p].
|
|
||||||
If they all fail, and [else_] is defined, then it behaves like [else_].
|
|
||||||
If all fail, and [else_] is [None], then it fails as well.
|
|
||||||
|
|
||||||
This is a performance optimization compared to {!(<|>)}. We commit to a
|
|
||||||
branch if the test succeeds, without backtracking at all.
|
|
||||||
It can also provide better error messages, because failures in the parser
|
|
||||||
will not be reported as failures in [try_or_l].
|
|
||||||
|
|
||||||
See {!lookahead_ignore} for a convenient way of writing the test conditions.
|
|
||||||
|
|
||||||
@param msg error message if all options fail
|
|
||||||
|
|
||||||
*)
|
|
||||||
|
|
||||||
val or_ : 'a t -> 'a t -> 'a t
|
|
||||||
(** [or_ p1 p2] tries to parse [p1], and if it fails, tries [p2]
|
|
||||||
from the same position.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val both : 'a t -> 'b t -> ('a * 'b) t
|
|
||||||
(** [both a b] parses [a], then [b], then returns the pair of their results.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val many1 : 'a t -> 'a list t
|
|
||||||
(** [many1 p] is like [many p] excepts it fails if the
|
|
||||||
list is empty (i.e. it needs [p] to succeed at least once). *)
|
|
||||||
|
|
||||||
val skip : _ t -> unit t
|
|
||||||
(** [skip p] parses zero or more times [p] and ignores its result.
|
|
||||||
It is eager, meaning it will continue as long as [p] succeeds.
|
|
||||||
As soon as [p] fails, [skip p] stops consuming any input. *)
|
|
||||||
|
|
||||||
val sep : by:_ t -> 'a t -> 'a list t
|
|
||||||
(** [sep ~by p] parses a list of [p] separated by [by]. *)
|
|
||||||
|
|
||||||
val sep_until : until:_ t -> by:_ t -> 'a t -> 'a list t
|
|
||||||
(** Same as {!sep} but stop when [until] parses successfully.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val sep1 : by:_ t -> 'a t -> 'a list t
|
|
||||||
(** [sep1 ~by p] parses a non empty list of [p], separated by [by]. *)
|
|
||||||
|
|
||||||
val lookahead : 'a t -> 'a t
|
|
||||||
(** [lookahead p] behaves like [p], except it doesn't consume any input.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val lookahead_ignore : 'a t -> unit t
|
|
||||||
(** [lookahead_ignore p] tries to parse input with [p],
|
|
||||||
and succeeds if [p] succeeds. However it doesn't consume any input
|
|
||||||
and returns [()], so in effect its only use-case is to detect
|
|
||||||
whether [p] succeeds, e.g. in {!try_or_l}.
|
|
||||||
|
|
||||||
*)
|
|
||||||
|
|
||||||
val fix : ('a t -> 'a t) -> 'a t
|
|
||||||
(** Fixpoint combinator. [fix (fun self -> p)] is the parser [p],
|
|
||||||
in which [self] refers to the parser [p] itself (which is useful to
|
|
||||||
parse recursive structures.
|
|
||||||
|
|
||||||
An alternative, manual implementation to [let p = fix (fun self -> q)]
|
|
||||||
is:
|
|
||||||
{[ let rec p () =
|
|
||||||
let self = suspend p in
|
|
||||||
q
|
|
||||||
]}
|
|
||||||
*)
|
|
||||||
|
|
||||||
val line : slice t
|
|
||||||
(** Parse a line, ['\n'] excluded, and position the cursor after the ['\n'].
|
|
||||||
*)
|
|
||||||
|
|
||||||
val line_str : string t
|
|
||||||
(** [line_str] is [line >|= Slice.to_string].
|
|
||||||
It parses the next line and turns the slice into a string.
|
|
||||||
The state points to the character immediately after the ['\n'] character.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val each_line : 'a t -> 'a list t
|
|
||||||
(** [each_line p] runs [p] on each line of the input.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val split_1 : on_char:char -> (slice * slice option) t
|
|
||||||
(** [split_1 ~on_char] looks for [on_char] in the input, and returns a
|
|
||||||
pair [sl1, sl2], where:
|
|
||||||
|
|
||||||
- [sl1] is the slice of the input the precedes the first occurrence
|
|
||||||
of [on_char], or the whole input if [on_char] cannot be found.
|
|
||||||
It does not contain [on_char].
|
|
||||||
- [sl2] is the slice that comes after [on_char],
|
|
||||||
or [None] if [on_char] couldn't be found. It doesn't contain the first
|
|
||||||
occurrence of [on_char] (if any).
|
|
||||||
|
|
||||||
The parser is now positioned at the end of the input.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val split_list : on_char:char -> slice list t
|
|
||||||
(** [split_list ~on_char] splits the input on all occurrences of [on_char],
|
|
||||||
returning a list of slices.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val split_list_at_most : on_char:char -> int -> slice list t
|
|
||||||
(** [split_list_at_most ~on_char n] applies [split_1 ~on_char] at most
|
|
||||||
[n] times, to get a list of [n+1] elements.
|
|
||||||
The last element might contain [on_char]. This is useful to limit the
|
|
||||||
amount of work done by {!split_list}.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val split_2 : on_char:char -> (slice * slice) t
|
|
||||||
(** [split_2 ~on_char] splits the input into exactly 2 fields,
|
|
||||||
and fails if the split yields less or more than 2 items.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val split_3 : on_char:char -> (slice * slice * slice) t
|
|
||||||
(** See {!split_2}
|
|
||||||
*)
|
|
||||||
|
|
||||||
val split_4 : on_char:char -> (slice * slice * slice * slice) t
|
|
||||||
(** See {!split_2}
|
|
||||||
*)
|
|
||||||
|
|
||||||
val each_split : on_char:char -> 'a t -> 'a list t
|
|
||||||
(** [split_list_map ~on_char p] uses [split_list ~on_char] to split
|
|
||||||
the input, then parses each chunk of the input thus obtained using [p].
|
|
||||||
|
|
||||||
The difference with [sep ~by:(char on_char) p] is that
|
|
||||||
[sep] calls [p] first, and only tries to find [on_char] after [p] returns.
|
|
||||||
While it is more flexible, this technique also means [p] has to be careful
|
|
||||||
not to consume [on_char] by error.
|
|
||||||
|
|
||||||
A useful specialization of this is {!each_line}, which is
|
|
||||||
basically [each_split ~on_char:'\n' p].
|
|
||||||
*)
|
|
||||||
|
|
||||||
val all : slice t
|
|
||||||
(** [all] returns all the unconsumed input as a slice, and consumes it.
|
|
||||||
Use {!Slice.to_string} to turn it into a string.
|
|
||||||
|
|
||||||
Note that [lookahead all] can be used to {i peek} at the rest of the input
|
|
||||||
without consuming anything.
|
|
||||||
|
|
||||||
*)
|
|
||||||
|
|
||||||
val all_str : string t
|
|
||||||
(** [all_str] accepts all the remaining chars and extracts them into a
|
|
||||||
string. Similar to {!all} but with a string.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val memo : 'a t -> 'a t
|
|
||||||
(** Memoize the parser. [memo p] will behave like [p], but when called
|
|
||||||
in a state (read: position in input) it has already processed, [memo p]
|
|
||||||
returns a result directly. The implementation uses an underlying
|
|
||||||
hashtable.
|
|
||||||
This can be costly in memory, but improve the run time a lot if there
|
|
||||||
is a lot of backtracking involving [p].
|
|
||||||
|
|
||||||
Do not call {!memo} inside other functions, especially with {!(>>=)},
|
|
||||||
{!map}, etc. being so prevalent. Instead the correct way to use it
|
|
||||||
is in a toplevel definition:
|
|
||||||
|
|
||||||
{[
|
|
||||||
let my_expensive_parser = memo (foo *> bar >>= fun i -> …)
|
|
||||||
]}
|
|
||||||
|
|
||||||
This function is not thread-safe. *)
|
|
||||||
|
|
||||||
val fix_memo : ('a t -> 'a t) -> 'a t
|
|
||||||
(** Like {!fix}, but the fixpoint is memoized. *)
|
|
||||||
|
|
||||||
(** {2 Infix} *)
|
|
||||||
|
|
||||||
module Infix : sig
|
|
||||||
val ( >|= ) : 'a t -> ('a -> 'b) -> 'b t
|
|
||||||
(** Alias to {!map}. [p >|= f] parses an item [x] using [p],
|
|
||||||
and returns [f x]. *)
|
|
||||||
|
|
||||||
val ( >>= ) : 'a t -> ('a -> 'b t) -> 'b t
|
|
||||||
(** Alias to {!bind}.
|
|
||||||
[p >>= f] results in a new parser which behaves as [p] then,
|
|
||||||
in case of success, applies [f] to the result. *)
|
|
||||||
|
|
||||||
val ( <*> ) : ('a -> 'b) t -> 'a t -> 'b t
|
|
||||||
(** Applicative. *)
|
|
||||||
|
|
||||||
val ( <* ) : 'a t -> _ t -> 'a t
|
|
||||||
(** [a <* b] parses [a] into [x], parses [b] and ignores its result,
|
|
||||||
and returns [x]. *)
|
|
||||||
|
|
||||||
val ( *> ) : _ t -> 'a t -> 'a t
|
|
||||||
(** [a *> b] parses [a], then parses [b] into [x], and returns [x]. The
|
|
||||||
result of [a] is ignored. *)
|
|
||||||
|
|
||||||
val ( <|> ) : 'a t -> 'a t -> 'a t
|
|
||||||
(** Alias to {!or_}.
|
|
||||||
|
|
||||||
[a <|> b] tries to parse [a], and if [a] fails without
|
|
||||||
consuming any input, backtracks and tries
|
|
||||||
to parse [b], otherwise it fails as [a]. *)
|
|
||||||
|
|
||||||
val ( <?> ) : 'a t -> string -> 'a t
|
|
||||||
(** [a <?> msg] behaves like [a], but if [a] fails,
|
|
||||||
[a <?> msg] fails with [msg] instead.
|
|
||||||
Useful as the last choice in a series of [<|>]. For example:
|
|
||||||
[a <|> b <|> c <?> "expected one of a, b, c"]. *)
|
|
||||||
|
|
||||||
val ( ||| ) : 'a t -> 'b t -> ('a * 'b) t
|
|
||||||
(** Alias to {!both}.
|
|
||||||
[a ||| b] parses [a], then [b], then returns the pair of their results.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val ( let+ ) : 'a t -> ('a -> 'b) -> 'b t
|
|
||||||
val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t
|
|
||||||
val ( and+ ) : 'a t -> 'b t -> ('a * 'b) t
|
|
||||||
val ( and* ) : 'a t -> 'b t -> ('a * 'b) t
|
|
||||||
end
|
|
||||||
|
|
||||||
include module type of Infix
|
|
||||||
|
|
||||||
(** {2 Parse input} *)
|
|
||||||
|
|
||||||
val stringify_result : 'a or_error -> ('a, string) result
|
|
||||||
(** Turn a {!Error.t}-oriented result into a more basic string result.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val parse_string : 'a t -> string -> ('a, string) result
|
|
||||||
(** Parse a string using the parser. *)
|
|
||||||
|
|
||||||
val parse_string_e : 'a t -> string -> 'a or_error
|
|
||||||
(** Version of {!parse_string} that returns a more detailed error. *)
|
|
||||||
|
|
||||||
val parse_string_exn : 'a t -> string -> 'a
|
|
||||||
(** @raise ParseError if it fails. *)
|
|
||||||
|
|
||||||
val parse_file : 'a t -> string -> ('a, string) result
|
|
||||||
(** [parse_file p filename] parses file named [filename] with [p]
|
|
||||||
by opening the file and reading it whole. *)
|
|
||||||
|
|
||||||
val parse_file_e : 'a t -> string -> 'a or_error
|
|
||||||
(** Version of {!parse_file} that returns a more detailed error. *)
|
|
||||||
|
|
||||||
val parse_file_exn : 'a t -> string -> 'a
|
|
||||||
(** Same as {!parse_file}, but
|
|
||||||
@raise ParseError if it fails. *)
|
|
||||||
|
|
||||||
(** {2 Utils}
|
|
||||||
|
|
||||||
This is useful to parse OCaml-like values in a simple way.
|
|
||||||
All the parsers are whitespace-insensitive (they skip whitespace). *)
|
|
||||||
module U : sig
|
|
||||||
val list : ?start:string -> ?stop:string -> ?sep:string -> 'a t -> 'a list t
|
|
||||||
(** [list p] parses a list of [p], with the OCaml conventions for
|
|
||||||
start token "\[", stop token "\]" and separator ";".
|
|
||||||
Whitespace between items are skipped. *)
|
|
||||||
|
|
||||||
(* TODO: parse option? *)
|
|
||||||
(* TODO: split on whitespace? *)
|
|
||||||
|
|
||||||
val int : int t
|
|
||||||
(** Parse an int in decimal representation. *)
|
|
||||||
|
|
||||||
val in_paren : 'a t -> 'a t
|
|
||||||
(** [in_paren p] parses an opening "(",[p] , and then ")".
|
|
||||||
*)
|
|
||||||
|
|
||||||
val in_parens_opt : 'a t -> 'a t
|
|
||||||
(** [in_parens_opt p] parses [p] in an arbitrary number of nested
|
|
||||||
parenthesis (possibly 0).
|
|
||||||
*)
|
|
||||||
|
|
||||||
val option : 'a t -> 'a option t
|
|
||||||
(** [option p] parses "Some <x>" into [Some x] if [p] parses "<x>" into [x],
|
|
||||||
and parses "None" into [None].
|
|
||||||
*)
|
|
||||||
|
|
||||||
val hexa_int : int t
|
|
||||||
(** Parse an int int hexadecimal format. Accepts an optional [0x] prefix,
|
|
||||||
and ignores capitalization.
|
|
||||||
*)
|
|
||||||
|
|
||||||
val word : string t
|
|
||||||
(** Non empty string of alpha num, start with alpha. *)
|
|
||||||
|
|
||||||
val bool : bool t
|
|
||||||
(** Accepts "true" or "false"
|
|
||||||
*)
|
|
||||||
|
|
||||||
(* TODO: quoted string *)
|
|
||||||
|
|
||||||
val pair :
|
|
||||||
?start:string -> ?stop:string -> ?sep:string -> 'a t -> 'b t -> ('a * 'b) t
|
|
||||||
(** Parse a pair using OCaml syntactic conventions.
|
|
||||||
The default is "(a, b)". *)
|
|
||||||
|
|
||||||
val triple :
|
|
||||||
?start:string ->
|
|
||||||
?stop:string ->
|
|
||||||
?sep:string ->
|
|
||||||
'a t ->
|
|
||||||
'b t ->
|
|
||||||
'c t ->
|
|
||||||
('a * 'b * 'c) t
|
|
||||||
(** Parse a triple using OCaml syntactic conventions.
|
|
||||||
The default is "(a, b, c)". *)
|
|
||||||
end
|
|
||||||
|
|
||||||
(** Debugging utils. *)
|
|
||||||
module Debug_ : sig
|
|
||||||
val trace_fail : string -> 'a t -> 'a t
|
|
||||||
(** [trace_fail name p] behaves like [p], but prints the error message of [p]
|
|
||||||
on stderr whenever [p] fails.
|
|
||||||
@param name used as a prefix of all trace messages. *)
|
|
||||||
|
|
||||||
val trace_success : string -> print:('a -> string) -> 'a t -> 'a t
|
|
||||||
(** [trace_success name ~print p] behaves like [p], but
|
|
||||||
prints successful runs of [p] using [print]. *)
|
|
||||||
|
|
||||||
val trace_success_or_fail : string -> print:('a -> string) -> 'a t -> 'a t
|
|
||||||
(** Trace both error or success *)
|
|
||||||
end
|
|
||||||
26
src/parser/position.ml
Normal file
26
src/parser/position.ml
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
type t = { file: string; line: int; col: int }
|
||||||
|
|
||||||
|
let line self = self.line
|
||||||
|
let col self = self.col
|
||||||
|
let ( <= ) x y = x.line < y.line || (x.line = y.line && x.col <= y.col)
|
||||||
|
let ( < ) x y = x.line < y.line || (x.line = y.line && x.col < y.col)
|
||||||
|
let ( >= ) a b = b <= a
|
||||||
|
let ( > ) a b = b < a
|
||||||
|
|
||||||
|
let min a b =
|
||||||
|
if a <= b then
|
||||||
|
a
|
||||||
|
else
|
||||||
|
b
|
||||||
|
|
||||||
|
let max a b =
|
||||||
|
if a >= b then
|
||||||
|
a
|
||||||
|
else
|
||||||
|
b
|
||||||
|
|
||||||
|
let pp out self = Format.fprintf out "at line %d, column %d" self.line self.col
|
||||||
|
|
||||||
|
(** Build position from a lexing position *)
|
||||||
|
let of_lexpos (p : Lexing.position) : t =
|
||||||
|
{ file = p.pos_fname; line = p.pos_lnum; col = p.pos_cnum - p.pos_bol }
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
module Ast_term = Ast_term
|
module Ast_term = Ast_term
|
||||||
module Parser_comb = Parser_comb
|
module Parse_util = Parse_util
|
||||||
module Parse_term = Parse_term
|
module Parse = Parse
|
||||||
|
module Lex = Lex
|
||||||
|
module Loc = Loc
|
||||||
|
module Position = Position
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue