mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 03:05:28 -05:00
417 lines
10 KiB
OCaml
417 lines
10 KiB
OCaml
|
|
(* This file is free software. See file "license" for more details. *)
|
|
|
|
(** {1 Very Simple Parser Combinators} *)
|
|
|
|
type 'a or_error = ('a, string) Result.result
|
|
|
|
type line_num = int
|
|
type col_num = int
|
|
|
|
module MemoTbl = struct
|
|
module H = Hashtbl.Make(struct
|
|
type t = int * int (* id of parser, position *)
|
|
let equal ((a,b):t)(c,d) = a=c && b=d
|
|
let hash = Hashtbl.hash
|
|
end)
|
|
|
|
(* table of closures, used to implement universal type *)
|
|
type t = (unit -> unit) H.t lazy_t
|
|
|
|
let create n = lazy (H.create n)
|
|
|
|
(* unique ID for each parser *)
|
|
let id_ = ref 0
|
|
|
|
type 'a res =
|
|
| Fail of exn
|
|
| Ok of 'a
|
|
end
|
|
|
|
type position = int * int * int (* pos, line, column *)
|
|
|
|
type parse_branch = (line_num * col_num * string option) list
|
|
|
|
type state = {
|
|
str: string; (* the input *)
|
|
mutable i: int; (* offset *)
|
|
mutable lnum : line_num; (* Line number *)
|
|
mutable cnum : col_num; (* Column number *)
|
|
mutable branch: parse_branch;
|
|
memo : MemoTbl.t; (* Memoization table, if any *)
|
|
}
|
|
|
|
exception ParseError of parse_branch * (unit -> string)
|
|
|
|
let char_equal (a : char) b = Pervasives.(=) a b
|
|
let string_equal (a : string) b = Pervasives.(=) a b
|
|
|
|
let rec string_of_branch l =
|
|
let pp_s () = function
|
|
| None -> ""
|
|
| Some s -> Format.sprintf "while parsing %s, " s
|
|
in
|
|
match l with
|
|
| [] -> ""
|
|
| [l,c,s] ->
|
|
Format.sprintf "@[%aat line %d, col %d@]" pp_s s l c
|
|
| (l,c,s) :: tail ->
|
|
Format.sprintf "@[%aat line %d, col %d@]@,%s" pp_s s l c (string_of_branch tail)
|
|
|
|
let () = Printexc.register_printer
|
|
(function
|
|
| ParseError (b,msg) ->
|
|
Some (Format.sprintf "@[<v>%s@ %s@]" (msg()) (string_of_branch b))
|
|
| _ -> None)
|
|
|
|
let const_ x () = x
|
|
|
|
let state_of_string str =
|
|
let s = {
|
|
str;
|
|
i=0;
|
|
lnum=1;
|
|
cnum=1;
|
|
branch=[];
|
|
memo=MemoTbl.create 32;
|
|
} in
|
|
s
|
|
|
|
let is_done st = st.i = String.length st.str
|
|
let cur st = st.str.[st.i]
|
|
|
|
let fail_ ~err st msg =
|
|
let b = (st.lnum, st.cnum, None) :: st.branch in
|
|
err (ParseError (b, msg))
|
|
|
|
let next st ~ok ~err =
|
|
if st.i = String.length st.str
|
|
then fail_ st ~err (const_ "unexpected end of input")
|
|
else (
|
|
let c = st.str.[st.i] in
|
|
st.i <- st.i + 1;
|
|
if char_equal c '\n'
|
|
then (st.lnum <- st.lnum + 1; st.cnum <- 1)
|
|
else st.cnum <- st.cnum + 1;
|
|
ok c
|
|
)
|
|
|
|
let pos st = st.i, st.lnum, st.cnum
|
|
|
|
let backtrack st (i',l',c') =
|
|
assert (0 <= i' && i' <= st.i);
|
|
st.i <- i';
|
|
st.lnum <- l';
|
|
st.cnum <- c';
|
|
()
|
|
|
|
type 'a t = state -> ok:('a -> unit) -> err:(exn -> unit) -> unit
|
|
|
|
let return : 'a -> 'a t = fun x _st ~ok ~err:_ -> ok x
|
|
let pure = return
|
|
let (>|=) : 'a t -> ('a -> 'b) -> 'b t
|
|
= fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> ok (f x))
|
|
let (>>=) : 'a t -> ('a -> 'b t) -> 'b t
|
|
= fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> f x st ~err ~ok)
|
|
let (<*>) : ('a -> 'b) t -> 'a t -> 'b t
|
|
= fun f x st ~ok ~err ->
|
|
f st ~err ~ok:(fun f' -> x st ~err ~ok:(fun x' -> ok (f' x')))
|
|
let (<* ) : 'a t -> _ t -> 'a t
|
|
= fun x y st ~ok ~err ->
|
|
x st ~err ~ok:(fun res -> y st ~err ~ok:(fun _ -> ok res))
|
|
let ( *>) : _ t -> 'a t -> 'a t
|
|
= fun x y st ~ok ~err ->
|
|
x st ~err ~ok:(fun _ -> y st ~err ~ok)
|
|
|
|
let map f x = x >|= f
|
|
let map2 f x y = pure f <*> x <*> y
|
|
let map3 f x y z = pure f <*> x <*> y <*> z
|
|
|
|
let junk_ st = next st ~err:(fun _ -> assert false) ~ok:ignore
|
|
|
|
let eoi st ~ok ~err =
|
|
if is_done st
|
|
then ok ()
|
|
else fail_ ~err st (const_ "expected EOI")
|
|
|
|
let fail msg st ~ok:_ ~err = fail_ ~err st (const_ msg)
|
|
let failf msg = Printf.ksprintf fail msg
|
|
|
|
let parsing s p st ~ok ~err =
|
|
st.branch <- (st.lnum, st.cnum, Some s) :: st.branch;
|
|
p st
|
|
~ok:(fun x -> st.branch <- List.tl st.branch; ok x)
|
|
~err:(fun e -> st.branch <- List.tl st.branch; err e)
|
|
|
|
let nop _ ~ok ~err:_ = ok()
|
|
|
|
let char c =
|
|
let msg = Printf.sprintf "expected '%c'" c in
|
|
fun st ~ok ~err ->
|
|
next st ~err
|
|
~ok:(fun c' -> if char_equal c c' then ok c else fail_ ~err st (const_ msg))
|
|
|
|
let char_if p st ~ok ~err =
|
|
next st ~err
|
|
~ok:(fun c ->
|
|
if p c then ok c
|
|
else fail_ ~err st (fun () -> Printf.sprintf "unexpected char '%c'" c)
|
|
)
|
|
|
|
let chars_if p st ~ok ~err:_ =
|
|
let i = st.i in
|
|
let len = ref 0 in
|
|
while not (is_done st) && p (cur st) do junk_ st; incr len done;
|
|
ok (String.sub st.str i !len)
|
|
|
|
let chars1_if p st ~ok ~err =
|
|
chars_if p st ~err
|
|
~ok:(fun s ->
|
|
if string_equal s ""
|
|
then fail_ ~err st (const_ "unexpected sequence of chars")
|
|
else ok s)
|
|
|
|
let rec skip_chars p st ~ok ~err =
|
|
if not (is_done st) && p (cur st) then (
|
|
junk_ st;
|
|
skip_chars p st ~ok ~err
|
|
) else ok()
|
|
|
|
let is_alpha = function
|
|
| 'a' .. 'z' | 'A' .. 'Z' -> true
|
|
| _ -> false
|
|
let is_num = function '0' .. '9' -> true | _ -> false
|
|
let is_alpha_num = function
|
|
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' -> true
|
|
| _ -> false
|
|
let is_space = function ' ' | '\t' -> true | _ -> false
|
|
let is_white = function ' ' | '\t' | '\n' -> true | _ -> false
|
|
|
|
let space = char_if is_space
|
|
let white = char_if is_white
|
|
|
|
let endline st ~ok ~err =
|
|
next st ~err
|
|
~ok:(function
|
|
| '\n' as c -> ok c
|
|
| _ -> fail_ ~err st (const_ "expected end-of-line"))
|
|
|
|
let skip_space = skip_chars is_space
|
|
let skip_white = skip_chars is_white
|
|
|
|
let (<|>) : 'a t -> 'a t -> 'a t
|
|
= fun x y st ~ok ~err ->
|
|
let i = st.i in
|
|
x st ~ok
|
|
~err:(fun e ->
|
|
let j = st.i in
|
|
if i=j then y st ~ok ~err (* try [y] *)
|
|
else err e (* fail *)
|
|
)
|
|
|
|
let try_ : 'a t -> 'a t
|
|
= fun p st ~ok ~err ->
|
|
let i = pos st in
|
|
p st ~ok
|
|
~err:(fun e ->
|
|
backtrack st i;
|
|
err e)
|
|
|
|
let suspend f st ~ok ~err = f () st ~ok ~err
|
|
|
|
let (<?>) : 'a t -> string -> 'a t
|
|
= fun x msg st ~ok ~err ->
|
|
let i = st.i in
|
|
x st ~ok
|
|
~err:(fun e ->
|
|
if st.i = i
|
|
then fail_ ~err st (fun () -> msg)
|
|
else err e)
|
|
|
|
let string s st ~ok ~err =
|
|
let rec check i =
|
|
if i = String.length s then ok s
|
|
else
|
|
next st ~err
|
|
~ok:(fun c ->
|
|
if char_equal c s.[i]
|
|
then check (i+1)
|
|
else fail_ ~err st (fun () -> Printf.sprintf "expected \"%s\"" s))
|
|
in
|
|
check 0
|
|
|
|
let rec many_rec : 'a t -> 'a list -> 'a list t = fun p acc st ~ok ~err ->
|
|
if is_done st then ok(List.rev acc)
|
|
else
|
|
p st ~err
|
|
~ok:(fun x ->
|
|
let i = pos st in
|
|
many_rec p (x :: acc) st ~ok
|
|
~err:(fun _ ->
|
|
backtrack st i;
|
|
ok(List.rev acc))
|
|
)
|
|
|
|
let many : 'a t -> 'a list t
|
|
= fun p st ~ok ~err -> many_rec p [] st ~ok ~err
|
|
|
|
let many1 : 'a t -> 'a list t =
|
|
fun p st ~ok ~err ->
|
|
p st ~err ~ok:(fun x -> many_rec p [x] st ~err ~ok)
|
|
|
|
let rec skip p st ~ok ~err =
|
|
let i = pos st in
|
|
p st
|
|
~ok:(fun _ -> skip p st ~ok ~err)
|
|
~err:(fun _ ->
|
|
backtrack st i;
|
|
ok()
|
|
)
|
|
|
|
(* by (sep1 ~by p) *)
|
|
let rec sep_rec ~by p = try_ by *> sep1 ~by p
|
|
|
|
and sep1 ~by p =
|
|
p >>= fun x ->
|
|
(sep_rec ~by p >|= fun tl -> x::tl)
|
|
<|> return [x]
|
|
|
|
let sep ~by p =
|
|
(try_ p >>= fun x ->
|
|
(sep_rec ~by p >|= fun tl -> x::tl)
|
|
<|> return [x])
|
|
<|> return []
|
|
|
|
let fix f =
|
|
let rec p st ~ok ~err = f p st ~ok ~err in
|
|
p
|
|
|
|
let memo (type a) (p:a t):a t =
|
|
let id = !MemoTbl.id_ in
|
|
incr MemoTbl.id_;
|
|
let r = ref None in (* used for universal encoding *)
|
|
fun st ~ok ~err ->
|
|
let i = st.i in
|
|
let (lazy tbl) = st.memo in
|
|
try
|
|
let f = MemoTbl.H.find tbl (i,id) in
|
|
(* extract hidden value *)
|
|
r := None;
|
|
f ();
|
|
begin match !r with
|
|
| None -> assert false
|
|
| Some (MemoTbl.Ok x) -> ok x
|
|
| Some (MemoTbl.Fail e) -> err e
|
|
end
|
|
with Not_found ->
|
|
(* parse, and save *)
|
|
p st
|
|
~err:(fun e ->
|
|
MemoTbl.H.replace tbl (i,id) (fun () -> r := Some (MemoTbl.Fail e));
|
|
err e)
|
|
~ok:(fun x ->
|
|
MemoTbl.H.replace tbl (i,id) (fun () -> r := Some (MemoTbl.Ok x));
|
|
ok x)
|
|
|
|
let fix_memo f =
|
|
let rec p =
|
|
let p' = lazy (memo p) in
|
|
fun st ~ok ~err -> f (Lazy.force p') st ~ok ~err
|
|
in
|
|
p
|
|
|
|
let get_lnum = fun st ~ok ~err:_ -> ok st.lnum
|
|
let get_cnum = fun st ~ok ~err:_ -> ok st.cnum
|
|
let get_pos = fun st ~ok ~err:_ -> ok (st.lnum, st.cnum)
|
|
|
|
let parse_exn p st =
|
|
let res = ref None in
|
|
p st ~ok:(fun x -> res := Some x) ~err:(fun e -> raise e);
|
|
match !res with
|
|
| None -> assert false
|
|
| Some x -> x
|
|
|
|
let exn_to_err e =Result.Error (Printexc.to_string e)
|
|
|
|
let parse p st =
|
|
try Result.Ok (parse_exn p st)
|
|
with e -> exn_to_err e
|
|
|
|
let parse_string_exn p s = parse_exn p (state_of_string s)
|
|
|
|
let parse_string p s = parse p (state_of_string s)
|
|
|
|
let read_all_ ic =
|
|
let buf = Buffer.create 1024 in
|
|
begin
|
|
try
|
|
while true do
|
|
let line = input_line ic in
|
|
Buffer.add_string buf line;
|
|
Buffer.add_char buf '\n';
|
|
done;
|
|
assert false
|
|
with End_of_file -> ()
|
|
end;
|
|
Buffer.contents buf
|
|
|
|
let parse_file_exn p file =
|
|
let ic = open_in file in
|
|
let st = state_of_string (read_all_ ic) in
|
|
try
|
|
let res = parse_exn p st in
|
|
close_in ic;
|
|
res
|
|
with e ->
|
|
close_in_noerr ic;
|
|
raise e
|
|
|
|
let parse_file p file =
|
|
try Result.Ok (parse_file_exn p file)
|
|
with e -> exn_to_err e
|
|
|
|
module Infix = struct
|
|
let (>|=) = (>|=)
|
|
let (>>=) = (>>=)
|
|
let (<*>) = (<*>)
|
|
let (<* ) = (<* )
|
|
let ( *>) = ( *>)
|
|
let (<|>) = (<|>)
|
|
let (<?>) = (<?>)
|
|
end
|
|
|
|
module U = struct
|
|
let sep_ = sep
|
|
|
|
let list ?(start="[") ?(stop="]") ?(sep=";") p =
|
|
string start *> skip_white *>
|
|
sep_ ~by:(skip_white *> string sep *> skip_white) p <*
|
|
skip_white <* string stop
|
|
|
|
let int =
|
|
chars1_if (fun c -> is_num c || char_equal c '-')
|
|
>>= fun s ->
|
|
try return (int_of_string s)
|
|
with Failure _ -> fail "expected an int"
|
|
|
|
let prepend_str c s = String.make 1 c ^ s
|
|
|
|
let word =
|
|
map2 prepend_str (char_if is_alpha) (chars_if is_alpha_num)
|
|
|
|
let pair ?(start="(") ?(stop=")") ?(sep=",") p1 p2 =
|
|
string start *> skip_white *>
|
|
p1 >>= fun x1 ->
|
|
skip_white *> string sep *> skip_white *>
|
|
p2 >>= fun x2 ->
|
|
string stop *> return (x1,x2)
|
|
|
|
let triple ?(start="(") ?(stop=")") ?(sep=",") p1 p2 p3 =
|
|
string start *> skip_white *>
|
|
p1 >>= fun x1 ->
|
|
skip_white *> string sep *> skip_white *>
|
|
p2 >>= fun x2 ->
|
|
skip_white *> string sep *> skip_white *>
|
|
p3 >>= fun x3 ->
|
|
string stop *> return (x1,x2,x3)
|
|
end
|