mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 03:05:28 -05:00
add CCParse into core, a simple, lightweight version of parser combs
This commit is contained in:
parent
8f7b29c3bd
commit
72d43c6eeb
4 changed files with 783 additions and 1 deletions
2
_oasis
2
_oasis
|
|
@ -41,7 +41,7 @@ Library "containers"
|
|||
Modules: CCVector, CCHeap, CCList, CCOpt, CCPair,
|
||||
CCFun, CCHash, CCInt, CCBool, CCFloat, CCArray, CCRef, CCSet,
|
||||
CCOrd, CCRandom, CCString, CCHashtbl, CCMap, CCFormat, CCIO,
|
||||
CCInt64, CCChar, CCResult, Containers
|
||||
CCInt64, CCChar, CCResult, CCParse, Containers
|
||||
BuildDepends: bytes, result
|
||||
# BuildDepends: bytes, bisect_ppx
|
||||
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ CCMap
|
|||
CCOpt
|
||||
CCOrd
|
||||
CCPair
|
||||
CCParse
|
||||
CCRandom
|
||||
CCRef
|
||||
CCResult
|
||||
|
|
|
|||
414
src/core/CCParse.ml
Normal file
414
src/core/CCParse.ml
Normal file
|
|
@ -0,0 +1,414 @@
|
|||
|
||||
(* This file is free software. See file "license" for more details. *)
|
||||
|
||||
(** {1 Very Simple Parser Combinators} *)
|
||||
|
||||
type 'a or_error = ('a, string) Result.result
|
||||
|
||||
type line_num = int
|
||||
type col_num = int
|
||||
|
||||
module MemoTbl = struct
|
||||
module H = Hashtbl.Make(struct
|
||||
type t = int * int (* id of parser, position *)
|
||||
let equal ((a,b):t)(c,d) = a=c && b=d
|
||||
let hash = Hashtbl.hash
|
||||
end)
|
||||
|
||||
(* table of closures, used to implement universal type *)
|
||||
type t = (unit -> unit) H.t lazy_t
|
||||
|
||||
let create n = lazy (H.create n)
|
||||
|
||||
(* unique ID for each parser *)
|
||||
let id_ = ref 0
|
||||
|
||||
type 'a res =
|
||||
| Fail of exn
|
||||
| Ok of 'a
|
||||
end
|
||||
|
||||
type position = int * int * int (* pos, line, column *)
|
||||
|
||||
type parse_branch = (line_num * col_num * string option) list
|
||||
|
||||
type state = {
|
||||
str: string; (* the input *)
|
||||
mutable i: int; (* offset *)
|
||||
mutable lnum : line_num; (* Line number *)
|
||||
mutable cnum : col_num; (* Column number *)
|
||||
mutable branch: parse_branch;
|
||||
memo : MemoTbl.t; (* Memoization table, if any *)
|
||||
}
|
||||
|
||||
exception ParseError of parse_branch * (unit -> string)
|
||||
|
||||
let rec string_of_branch l =
|
||||
let pp_s () = function
|
||||
| None -> ""
|
||||
| Some s -> Format.sprintf "while parsing %s, " s
|
||||
in
|
||||
match l with
|
||||
| [] -> ""
|
||||
| [l,c,s] ->
|
||||
Format.sprintf "@[%aat line %d, col %d@]" pp_s s l c
|
||||
| (l,c,s) :: tail ->
|
||||
Format.sprintf "@[%aat line %d, col %d@]@,%s" pp_s s l c (string_of_branch tail)
|
||||
|
||||
let () = Printexc.register_printer
|
||||
(function
|
||||
| ParseError (b,msg) ->
|
||||
Some (Format.sprintf "@[<v>%s@ %s@]" (msg()) (string_of_branch b))
|
||||
| _ -> None)
|
||||
|
||||
let const_ x () = x
|
||||
|
||||
let state_of_string str =
|
||||
let s = {
|
||||
str;
|
||||
i=0;
|
||||
lnum=1;
|
||||
cnum=1;
|
||||
branch=[];
|
||||
memo=MemoTbl.create 32;
|
||||
} in
|
||||
s
|
||||
|
||||
let is_done st = st.i = String.length st.str
|
||||
let cur st = st.str.[st.i]
|
||||
|
||||
let fail_ ~err st msg =
|
||||
let b = (st.lnum, st.cnum, None) :: st.branch in
|
||||
err (ParseError (b, msg))
|
||||
|
||||
let next st ~ok ~err =
|
||||
if st.i = String.length st.str
|
||||
then fail_ st ~err (const_ "unexpected end of input")
|
||||
else (
|
||||
let c = st.str.[st.i] in
|
||||
st.i <- st.i + 1;
|
||||
if c='\n'
|
||||
then (st.lnum <- st.lnum + 1; st.cnum <- 1)
|
||||
else st.cnum <- st.cnum + 1;
|
||||
ok c
|
||||
)
|
||||
|
||||
let pos st = st.i, st.lnum, st.cnum
|
||||
|
||||
let backtrack st (i',l',c') =
|
||||
assert (0 <= i' && i' <= st.i);
|
||||
st.i <- i';
|
||||
st.lnum <- l';
|
||||
st.cnum <- c';
|
||||
()
|
||||
|
||||
type 'a t = state -> ok:('a -> unit) -> err:(exn -> unit) -> unit
|
||||
|
||||
let return : 'a -> 'a t = fun x _st ~ok ~err:_ -> ok x
|
||||
let pure = return
|
||||
let (>|=) : 'a t -> ('a -> 'b) -> 'b t
|
||||
= fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> ok (f x))
|
||||
let (>>=) : 'a t -> ('a -> 'b t) -> 'b t
|
||||
= fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> f x st ~err ~ok)
|
||||
let (<*>) : ('a -> 'b) t -> 'a t -> 'b t
|
||||
= fun f x st ~ok ~err ->
|
||||
f st ~err ~ok:(fun f' -> x st ~err ~ok:(fun x' -> ok (f' x')))
|
||||
let (<* ) : 'a t -> _ t -> 'a t
|
||||
= fun x y st ~ok ~err ->
|
||||
x st ~err ~ok:(fun res -> y st ~err ~ok:(fun _ -> ok res))
|
||||
let ( *>) : _ t -> 'a t -> 'a t
|
||||
= fun x y st ~ok ~err ->
|
||||
x st ~err ~ok:(fun _ -> y st ~err ~ok)
|
||||
|
||||
let map f x = x >|= f
|
||||
let map2 f x y = pure f <*> x <*> y
|
||||
let map3 f x y z = pure f <*> x <*> y <*> z
|
||||
|
||||
let junk_ st = next st ~err:(fun _ -> assert false) ~ok:ignore
|
||||
|
||||
let eoi st ~ok ~err =
|
||||
if is_done st
|
||||
then ok ()
|
||||
else fail_ ~err st (const_ "expected EOI")
|
||||
|
||||
let fail msg st ~ok:_ ~err = fail_ ~err st (const_ msg)
|
||||
let failf msg = Printf.ksprintf fail msg
|
||||
|
||||
let parsing s p st ~ok ~err =
|
||||
st.branch <- (st.lnum, st.cnum, Some s) :: st.branch;
|
||||
p st
|
||||
~ok:(fun x -> st.branch <- List.tl st.branch; ok x)
|
||||
~err:(fun e -> st.branch <- List.tl st.branch; err e)
|
||||
|
||||
let nop _ ~ok ~err:_ = ok()
|
||||
|
||||
let char c =
|
||||
let msg = Printf.sprintf "expected '%c'" c in
|
||||
fun st ~ok ~err ->
|
||||
next st ~err
|
||||
~ok:(fun c' -> if c=c' then ok c else fail_ ~err st (const_ msg))
|
||||
|
||||
let char_if p st ~ok ~err =
|
||||
next st ~err
|
||||
~ok:(fun c ->
|
||||
if p c then ok c
|
||||
else fail_ ~err st (fun () -> Printf.sprintf "unexpected char '%c'" c)
|
||||
)
|
||||
|
||||
let chars_if p st ~ok ~err:_ =
|
||||
let i = st.i in
|
||||
let len = ref 0 in
|
||||
while not (is_done st) && p (cur st) do junk_ st; incr len done;
|
||||
ok (String.sub st.str i !len)
|
||||
|
||||
let chars1_if p st ~ok ~err =
|
||||
chars_if p st ~err
|
||||
~ok:(fun s ->
|
||||
if s = ""
|
||||
then fail_ ~err st (const_ "unexpected sequence of chars")
|
||||
else ok s)
|
||||
|
||||
let rec skip_chars p st ~ok ~err =
|
||||
if not (is_done st) && p (cur st) then (
|
||||
junk_ st;
|
||||
skip_chars p st ~ok ~err
|
||||
) else ok()
|
||||
|
||||
let is_alpha = function
|
||||
| 'a' .. 'z' | 'A' .. 'Z' -> true
|
||||
| _ -> false
|
||||
let is_num = function '0' .. '9' -> true | _ -> false
|
||||
let is_alpha_num = function
|
||||
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' -> true
|
||||
| _ -> false
|
||||
let is_space = function ' ' | '\t' -> true | _ -> false
|
||||
let is_white = function ' ' | '\t' | '\n' -> true | _ -> false
|
||||
|
||||
let space = char_if is_space
|
||||
let white = char_if is_white
|
||||
|
||||
let endline st ~ok ~err =
|
||||
next st ~err
|
||||
~ok:(function
|
||||
| '\n' as c -> ok c
|
||||
| _ -> fail_ ~err st (const_ "expected end-of-line"))
|
||||
|
||||
let skip_space = skip_chars is_space
|
||||
let skip_white = skip_chars is_white
|
||||
|
||||
let (<|>) : 'a t -> 'a t -> 'a t
|
||||
= fun x y st ~ok ~err ->
|
||||
let i = st.i in
|
||||
x st ~ok
|
||||
~err:(fun e ->
|
||||
let j = st.i in
|
||||
if i=j then y st ~ok ~err (* try [y] *)
|
||||
else err e (* fail *)
|
||||
)
|
||||
|
||||
let try_ : 'a t -> 'a t
|
||||
= fun p st ~ok ~err ->
|
||||
let i = pos st in
|
||||
p st ~ok
|
||||
~err:(fun e ->
|
||||
backtrack st i;
|
||||
err e)
|
||||
|
||||
let suspend f st ~ok ~err = f () st ~ok ~err
|
||||
|
||||
let (<?>) : 'a t -> string -> 'a t
|
||||
= fun x msg st ~ok ~err ->
|
||||
let i = st.i in
|
||||
x st ~ok
|
||||
~err:(fun e ->
|
||||
if st.i = i
|
||||
then fail_ ~err st (fun () -> msg)
|
||||
else err e)
|
||||
|
||||
let string s st ~ok ~err =
|
||||
let rec check i =
|
||||
if i = String.length s then ok s
|
||||
else
|
||||
next st ~err
|
||||
~ok:(fun c ->
|
||||
if c = s.[i]
|
||||
then check (i+1)
|
||||
else fail_ ~err st (fun () -> Printf.sprintf "expected \"%s\"" s))
|
||||
in
|
||||
check 0
|
||||
|
||||
let rec many_rec : 'a t -> 'a list -> 'a list t = fun p acc st ~ok ~err ->
|
||||
if is_done st then ok(List.rev acc)
|
||||
else
|
||||
p st ~err
|
||||
~ok:(fun x ->
|
||||
let i = pos st in
|
||||
many_rec p (x :: acc) st ~ok
|
||||
~err:(fun _ ->
|
||||
backtrack st i;
|
||||
ok(List.rev acc))
|
||||
)
|
||||
|
||||
let many : 'a t -> 'a list t
|
||||
= fun p st ~ok ~err -> many_rec p [] st ~ok ~err
|
||||
|
||||
let many1 : 'a t -> 'a list t =
|
||||
fun p st ~ok ~err ->
|
||||
p st ~err ~ok:(fun x -> many_rec p [x] st ~err ~ok)
|
||||
|
||||
let rec skip p st ~ok ~err =
|
||||
let i = pos st in
|
||||
p st
|
||||
~ok:(fun _ -> skip p st ~ok ~err)
|
||||
~err:(fun _ ->
|
||||
backtrack st i;
|
||||
ok()
|
||||
)
|
||||
|
||||
(* by (sep1 ~by p) *)
|
||||
let rec sep_rec ~by p = try_ by *> sep1 ~by p
|
||||
|
||||
and sep1 ~by p =
|
||||
p >>= fun x ->
|
||||
(sep_rec ~by p >|= fun tl -> x::tl)
|
||||
<|> return [x]
|
||||
|
||||
let sep ~by p =
|
||||
(try_ p >>= fun x ->
|
||||
(sep_rec ~by p >|= fun tl -> x::tl)
|
||||
<|> return [x])
|
||||
<|> return []
|
||||
|
||||
let fix f =
|
||||
let rec p st ~ok ~err = f p st ~ok ~err in
|
||||
p
|
||||
|
||||
let memo (type a) (p:a t):a t =
|
||||
let id = !MemoTbl.id_ in
|
||||
incr MemoTbl.id_;
|
||||
let r = ref None in (* used for universal encoding *)
|
||||
fun st ~ok ~err ->
|
||||
let i = st.i in
|
||||
let (lazy tbl) = st.memo in
|
||||
try
|
||||
let f = MemoTbl.H.find tbl (i,id) in
|
||||
(* extract hidden value *)
|
||||
r := None;
|
||||
f ();
|
||||
begin match !r with
|
||||
| None -> assert false
|
||||
| Some (MemoTbl.Ok x) -> ok x
|
||||
| Some (MemoTbl.Fail e) -> err e
|
||||
end
|
||||
with Not_found ->
|
||||
(* parse, and save *)
|
||||
p st
|
||||
~err:(fun e ->
|
||||
MemoTbl.H.replace tbl (i,id) (fun () -> r := Some (MemoTbl.Fail e));
|
||||
err e)
|
||||
~ok:(fun x ->
|
||||
MemoTbl.H.replace tbl (i,id) (fun () -> r := Some (MemoTbl.Ok x));
|
||||
ok x)
|
||||
|
||||
let fix_memo f =
|
||||
let rec p =
|
||||
let p' = lazy (memo p) in
|
||||
fun st ~ok ~err -> f (Lazy.force p') st ~ok ~err
|
||||
in
|
||||
p
|
||||
|
||||
let get_lnum = fun st ~ok ~err:_ -> ok st.lnum
|
||||
let get_cnum = fun st ~ok ~err:_ -> ok st.cnum
|
||||
let get_pos = fun st ~ok ~err:_ -> ok (st.lnum, st.cnum)
|
||||
|
||||
let parse_exn p st =
|
||||
let res = ref None in
|
||||
p st ~ok:(fun x -> res := Some x) ~err:(fun e -> raise e);
|
||||
match !res with
|
||||
| None -> assert false
|
||||
| Some x -> x
|
||||
|
||||
let exn_to_err e =Result.Error (Printexc.to_string e)
|
||||
|
||||
let parse p st =
|
||||
try Result.Ok (parse_exn p st)
|
||||
with e -> exn_to_err e
|
||||
|
||||
let parse_string_exn p s = parse_exn p (state_of_string s)
|
||||
|
||||
let parse_string p s = parse p (state_of_string s)
|
||||
|
||||
let read_all_ ic =
|
||||
let buf = Buffer.create 1024 in
|
||||
begin
|
||||
try
|
||||
while true do
|
||||
let line = input_line ic in
|
||||
Buffer.add_string buf line;
|
||||
Buffer.add_char buf '\n';
|
||||
done;
|
||||
assert false
|
||||
with End_of_file -> ()
|
||||
end;
|
||||
Buffer.contents buf
|
||||
|
||||
let parse_file_exn p file =
|
||||
let ic = open_in file in
|
||||
let st = state_of_string (read_all_ ic) in
|
||||
try
|
||||
let res = parse_exn p st in
|
||||
close_in ic;
|
||||
res
|
||||
with e ->
|
||||
close_in_noerr ic;
|
||||
raise e
|
||||
|
||||
let parse_file p file =
|
||||
try Result.Ok (parse_file_exn p file)
|
||||
with e -> exn_to_err e
|
||||
|
||||
module Infix = struct
|
||||
let (>|=) = (>|=)
|
||||
let (>>=) = (>>=)
|
||||
let (<*>) = (<*>)
|
||||
let (<* ) = (<* )
|
||||
let ( *>) = ( *>)
|
||||
let (<|>) = (<|>)
|
||||
let (<?>) = (<?>)
|
||||
end
|
||||
|
||||
module U = struct
|
||||
let sep_ = sep
|
||||
|
||||
let list ?(start="[") ?(stop="]") ?(sep=";") p =
|
||||
string start *> skip_white *>
|
||||
sep_ ~by:(skip_white *> string sep *> skip_white) p <*
|
||||
skip_white <* string stop
|
||||
|
||||
let int =
|
||||
chars1_if (fun c -> is_num c || c='-')
|
||||
>>= fun s ->
|
||||
try return (int_of_string s)
|
||||
with Failure _ -> fail "expected an int"
|
||||
|
||||
let prepend_str c s = String.make 1 c ^ s
|
||||
|
||||
let word =
|
||||
map2 prepend_str (char_if is_alpha) (chars_if is_alpha_num)
|
||||
|
||||
let pair ?(start="(") ?(stop=")") ?(sep=",") p1 p2 =
|
||||
string start *> skip_white *>
|
||||
p1 >>= fun x1 ->
|
||||
skip_white *> string sep *> skip_white *>
|
||||
p2 >>= fun x2 ->
|
||||
string stop *> return (x1,x2)
|
||||
|
||||
let triple ?(start="(") ?(stop=")") ?(sep=",") p1 p2 p3 =
|
||||
string start *> skip_white *>
|
||||
p1 >>= fun x1 ->
|
||||
skip_white *> string sep *> skip_white *>
|
||||
p2 >>= fun x2 ->
|
||||
skip_white *> string sep *> skip_white *>
|
||||
p3 >>= fun x3 ->
|
||||
string stop *> return (x1,x2,x3)
|
||||
end
|
||||
367
src/core/CCParse.mli
Normal file
367
src/core/CCParse.mli
Normal file
|
|
@ -0,0 +1,367 @@
|
|||
|
||||
(* This file is free software. See file "license" for more details. *)
|
||||
|
||||
(** {1 Very Simple Parser Combinators}
|
||||
|
||||
{[
|
||||
open CCParse;;
|
||||
|
||||
type tree = L of int | N of tree * tree;;
|
||||
|
||||
let mk_leaf x = L x
|
||||
let mk_node x y = N(x,y)
|
||||
|
||||
let ptree = fix @@ fun self ->
|
||||
skip_space *>
|
||||
( (try_ (char '(') *> (pure mk_node <*> self <*> self) <* char ')')
|
||||
<|>
|
||||
(U.int >|= mk_leaf) )
|
||||
;;
|
||||
|
||||
parse_string_exn ptree "(1 (2 3))" ;;
|
||||
parse_string_exn ptree "((1 2) (3 (4 5)))" ;;
|
||||
|
||||
]}
|
||||
|
||||
{6 Parse a list of words}
|
||||
|
||||
{[
|
||||
open Containers_string.Parse;;
|
||||
let p = U.list ~sep:"," U.word;;
|
||||
parse_string_exn p "[abc , de, hello ,world ]";;
|
||||
]}
|
||||
|
||||
{6 Stress Test}
|
||||
This makes a list of 100_000 integers, prints it and parses it back.
|
||||
|
||||
{[
|
||||
let p = CCParse.(U.list ~sep:"," U.int);;
|
||||
|
||||
let l = CCList.(1 -- 100_000);;
|
||||
let l_printed =
|
||||
CCFormat.to_string (CCList.print ~sep:"," ~start:"[" ~stop:"]" CCInt.print) l;;
|
||||
|
||||
let l' = CCParse.parse_string_exn p l_printed;;
|
||||
|
||||
assert (l=l');;
|
||||
]}
|
||||
|
||||
*)
|
||||
|
||||
(*$inject
|
||||
module T = struct
|
||||
type tree = L of int | N of tree * tree
|
||||
end
|
||||
open T
|
||||
open Result
|
||||
|
||||
let mk_leaf x = L x
|
||||
let mk_node x y = N(x,y)
|
||||
|
||||
let ptree = fix @@ fun self ->
|
||||
skip_space *>
|
||||
( (try_ (char '(') *> (pure mk_node <*> self <*> self) <* char ')')
|
||||
<|>
|
||||
(U.int >|= mk_leaf) )
|
||||
|
||||
let ptree' = fix_memo @@ fun self ->
|
||||
skip_space *>
|
||||
( (try_ (char '(') *> (pure mk_node <*> self <*> self) <* char ')')
|
||||
<|>
|
||||
(U.int >|= mk_leaf) )
|
||||
|
||||
let rec pptree = function
|
||||
| N (a,b) -> Printf.sprintf "N (%s, %s)" (pptree a) (pptree b)
|
||||
| L x -> Printf.sprintf "L %d" x
|
||||
|
||||
let errpptree = function
|
||||
| Ok x -> "Ok " ^ pptree x
|
||||
| Error s -> "Error " ^ s
|
||||
*)
|
||||
|
||||
(*$= & ~printer:errpptree
|
||||
(Ok (N (L 1, N (L 2, L 3)))) \
|
||||
(parse_string ptree "(1 (2 3))" )
|
||||
(Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \
|
||||
(parse_string ptree "((1 2) (3 (4 5)))" )
|
||||
(Ok (N (L 1, N (L 2, L 3)))) \
|
||||
(parse_string ptree' "(1 (2 3))" )
|
||||
(Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \
|
||||
(parse_string ptree' "((1 2) (3 (4 5)))" )
|
||||
*)
|
||||
|
||||
(*$R
|
||||
let p = U.list ~sep:"," U.word in
|
||||
let printer = function
|
||||
| Ok l -> "Ok " ^ CCFormat.(to_string (list string)) l
|
||||
| Error s -> "Error " ^ s
|
||||
in
|
||||
assert_equal ~printer
|
||||
(Ok ["abc"; "de"; "hello"; "world"])
|
||||
(parse_string p "[abc , de, hello ,world ]");
|
||||
*)
|
||||
|
||||
(*$R
|
||||
let test n =
|
||||
let p = CCParse.(U.list ~sep:"," U.int) in
|
||||
|
||||
let l = CCList.(1 -- n) in
|
||||
let l_printed =
|
||||
CCFormat.(to_string (list ~start:"[" ~stop:"]" ~sep:"," int)) l in
|
||||
|
||||
let l' = CCParse.parse_string_exn p l_printed in
|
||||
|
||||
assert_equal ~printer:Q.Print.(list int) l l'
|
||||
in
|
||||
test 100_000;
|
||||
test 400_000;
|
||||
|
||||
*)
|
||||
|
||||
type 'a or_error = ('a, string) Result.result
|
||||
|
||||
type line_num = int
|
||||
type col_num = int
|
||||
|
||||
type parse_branch
|
||||
|
||||
val string_of_branch : parse_branch -> string
|
||||
|
||||
exception ParseError of parse_branch * (unit -> string)
|
||||
(** parsing branch * message *)
|
||||
|
||||
(** {2 Input} *)
|
||||
|
||||
type position
|
||||
|
||||
type state
|
||||
|
||||
val state_of_string : string -> state
|
||||
|
||||
(** {2 Combinators} *)
|
||||
|
||||
type 'a t = state -> ok:('a -> unit) -> err:(exn -> unit) -> unit
|
||||
(** Takes the input and two continuations:
|
||||
{ul
|
||||
{- [ok] to call with the result when it's done}
|
||||
{- [err] to call when the parser met an error}
|
||||
}
|
||||
@raise ParseError in case of failure *)
|
||||
|
||||
val return : 'a -> 'a t
|
||||
(** Always succeeds, without consuming its input *)
|
||||
|
||||
val pure : 'a -> 'a t
|
||||
(** Synonym to {!return} *)
|
||||
|
||||
val (>|=) : 'a t -> ('a -> 'b) -> 'b t
|
||||
(** Map *)
|
||||
|
||||
val map : ('a -> 'b) -> 'a t -> 'b t
|
||||
|
||||
val map2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t
|
||||
|
||||
val map3 : ('a -> 'b -> 'c -> 'd) -> 'a t -> 'b t -> 'c t -> 'd t
|
||||
|
||||
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t
|
||||
(** Monadic bind *)
|
||||
|
||||
val (<*>) : ('a -> 'b) t -> 'a t -> 'b t
|
||||
(** Applicative *)
|
||||
|
||||
val (<* ) : 'a t -> _ t -> 'a t
|
||||
(** [a <* b] parses [a] into [x], parses [b] and ignores its result,
|
||||
and returns [x] *)
|
||||
|
||||
val ( *>) : _ t -> 'a t -> 'a t
|
||||
(** [a *> b] parses [a], then parses [b] into [x], and returns [x]. The
|
||||
results of [a] is ignored. *)
|
||||
|
||||
val fail : string -> 'a t
|
||||
(** [fail msg] fails with the given message. It can trigger a backtrack *)
|
||||
|
||||
val failf: ('a, unit, string, 'b t) format4 -> 'a
|
||||
(** [Format.sprintf] version of {!fail} *)
|
||||
|
||||
val parsing : string -> 'a t -> 'a t
|
||||
(** [parsing s p] behaves the same as [p], with the information that
|
||||
we are parsing [s], if [p] fails *)
|
||||
|
||||
val eoi : unit t
|
||||
(** Expect the end of input, fails otherwise *)
|
||||
|
||||
val nop : unit t
|
||||
(** Succeed with [()] *)
|
||||
|
||||
val char : char -> char t
|
||||
(** [char c] parses the char [c] and nothing else *)
|
||||
|
||||
val char_if : (char -> bool) -> char t
|
||||
(** [char_if f] parses a character [c] if [f c = true] *)
|
||||
|
||||
val chars_if : (char -> bool) -> string t
|
||||
(** [chars_if f] parses a string of chars that satisfy [f] *)
|
||||
|
||||
val chars1_if : (char -> bool) -> string t
|
||||
(** Same as {!chars_if}, but only non-empty strings *)
|
||||
|
||||
val endline : char t
|
||||
(** Parses '\n' *)
|
||||
|
||||
val space : char t
|
||||
(** Tab or space *)
|
||||
|
||||
val white : char t
|
||||
(** Tab or space or newline *)
|
||||
|
||||
val skip_chars : (char -> bool) -> unit t
|
||||
(** Skip 0 or more chars satisfying the predicate *)
|
||||
|
||||
val skip_space : unit t
|
||||
(** Skip ' ' and '\t' *)
|
||||
|
||||
val skip_white : unit t
|
||||
(** Skip ' ' and '\t' and '\n' *)
|
||||
|
||||
val is_alpha : char -> bool
|
||||
(** Is the char a letter? *)
|
||||
|
||||
val is_num : char -> bool
|
||||
(** Is the char a digit? *)
|
||||
|
||||
val is_alpha_num : char -> bool
|
||||
|
||||
val is_space : char -> bool
|
||||
(** True on ' ' and '\t' *)
|
||||
|
||||
val is_white : char -> bool
|
||||
(** True on ' ' and '\t' and '\n' *)
|
||||
|
||||
val (<|>) : 'a t -> 'a t -> 'a t
|
||||
(** [a <|> b] tries to parse [a], and if [a] fails without
|
||||
consuming any input, backtracks and tries
|
||||
to parse [b], otherwise it fails as [a].
|
||||
See {!try_} to ensure [a] does not consume anything (but it is best
|
||||
to avoid wrapping large parsers with {!try_}) *)
|
||||
|
||||
val (<?>) : 'a t -> string -> 'a t
|
||||
(** [a <?> msg] behaves like [a], but if [a] fails without
|
||||
consuming any input, it fails with [msg]
|
||||
instead. Useful as the last choice in a series of [<|>]:
|
||||
[a <|> b <|> c <?> "expected a|b|c"] *)
|
||||
|
||||
val try_ : 'a t -> 'a t
|
||||
(** [try_ p] tries to parse like [p], but backtracks if [p] fails.
|
||||
Useful in combination with [<|>] *)
|
||||
|
||||
val suspend : (unit -> 'a t) -> 'a t
|
||||
(** [suspend f] is the same as [f ()], but evaluates [f ()] only
|
||||
when needed *)
|
||||
|
||||
val string : string -> string t
|
||||
(** [string s] parses exactly the string [s], and nothing else *)
|
||||
|
||||
val many : 'a t -> 'a list t
|
||||
(** [many p] parses a list of [p], eagerly (as long as possible) *)
|
||||
|
||||
val many1 : 'a t -> 'a list t
|
||||
(** parses a non empty list *)
|
||||
|
||||
val skip : _ t -> unit t
|
||||
(** [skip p] parses zero or more times [p] and ignores its result *)
|
||||
|
||||
val sep : by:_ t -> 'a t -> 'a list t
|
||||
(** [sep ~by p] parses a list of [p] separated by [by] *)
|
||||
|
||||
val sep1 : by:_ t -> 'a t -> 'a list t
|
||||
(** [sep1 ~by p] parses a non empty list of [p], separated by [by] *)
|
||||
|
||||
|
||||
val fix : ('a t -> 'a t) -> 'a t
|
||||
(** Fixpoint combinator *)
|
||||
|
||||
val memo : 'a t -> 'a t
|
||||
(** Memoize the parser. [memo p] will behave like [p], but when called
|
||||
in a state (read: position in input) it has already processed, [memo p]
|
||||
returns a result directly. The implementation uses an underlying
|
||||
hashtable.
|
||||
This can be costly in memory, but improve the run time a lot if there
|
||||
is a lot of backtracking involving [p].
|
||||
|
||||
This function is not thread-safe. *)
|
||||
|
||||
val fix_memo : ('a t -> 'a t) -> 'a t
|
||||
(** Same as {!fix}, but the fixpoint is memoized. *)
|
||||
|
||||
val get_lnum : int t
|
||||
(** Reflects the current line number *)
|
||||
|
||||
val get_cnum : int t
|
||||
(** Reflects the current column number *)
|
||||
|
||||
val get_pos : (int * int) t
|
||||
(** Reflects the current (line, column) numbers *)
|
||||
|
||||
(** {2 Parse}
|
||||
|
||||
Those functions have a label [~p] on the parser, since 0.14.
|
||||
*)
|
||||
|
||||
val parse : 'a t -> state -> 'a or_error
|
||||
(** [parse p st] applies [p] on the input, and returns [Ok x] if
|
||||
[p] succeeds with [x], or [Error s] otherwise *)
|
||||
|
||||
val parse_exn : 'a t -> state -> 'a
|
||||
(** Unsafe version of {!parse}
|
||||
@raise ParseError if it fails *)
|
||||
|
||||
val parse_string : 'a t -> string -> 'a or_error
|
||||
(** Specialization of {!parse} for string inputs *)
|
||||
|
||||
val parse_string_exn : 'a t -> string -> 'a
|
||||
(** @raise ParseError if it fails *)
|
||||
|
||||
val parse_file : 'a t -> string -> 'a or_error
|
||||
(** [parse_file p file] parses [file] with [p] by opening the file
|
||||
and reading it whole. *)
|
||||
|
||||
val parse_file_exn : 'a t -> string -> 'a
|
||||
(** @raise ParseError if it fails *)
|
||||
|
||||
(** {2 Infix} *)
|
||||
|
||||
module Infix : sig
|
||||
val (>|=) : 'a t -> ('a -> 'b) -> 'b t
|
||||
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t
|
||||
val (<*>) : ('a -> 'b) t -> 'a t -> 'b t
|
||||
val (<* ) : 'a t -> _ t -> 'a t
|
||||
val ( *>) : _ t -> 'a t -> 'a t
|
||||
val (<|>) : 'a t -> 'a t -> 'a t
|
||||
val (<?>) : 'a t -> string -> 'a t
|
||||
end
|
||||
|
||||
(** {2 Utils}
|
||||
|
||||
This is useful to parse OCaml-like values in a simple way. *)
|
||||
|
||||
module U : sig
|
||||
val list : ?start:string -> ?stop:string -> ?sep:string -> 'a t -> 'a list t
|
||||
(** [list p] parses a list of [p], with the OCaml conventions for
|
||||
start token "[", stop token "]" and separator ";".
|
||||
Whitespace between items are skipped *)
|
||||
|
||||
val int : int t
|
||||
|
||||
val word : string t
|
||||
(** non empty string of alpha num, start with alpha *)
|
||||
|
||||
val pair : ?start:string -> ?stop:string -> ?sep:string ->
|
||||
'a t -> 'b t -> ('a * 'b) t
|
||||
(** Parse a pair using OCaml whitespace conventions.
|
||||
The default is "(a, b)". *)
|
||||
|
||||
val triple : ?start:string -> ?stop:string -> ?sep:string ->
|
||||
'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t
|
||||
(** Parse a triple using OCaml whitespace conventions.
|
||||
The default is "(a, b, c)". *)
|
||||
end
|
||||
Loading…
Add table
Reference in a new issue