mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 03:05:28 -05:00
1164 lines
29 KiB
OCaml
1164 lines
29 KiB
OCaml
(* This file is free software. See file "license" for more details. *)
|
|
|
|
(** {1 Very Simple Parser Combinators} *)
|
|
|
|
open CCShims_
|
|
|
|
(*$inject
|
|
module T = struct
|
|
type tree = L of int | N of tree * tree
|
|
end
|
|
open T
|
|
|
|
let mk_leaf x = L x
|
|
let mk_node x y = N(x,y)
|
|
|
|
let ptree = fix @@ fun self ->
|
|
skip_space *>
|
|
( (char '(' *> (pure mk_node <*> self <*> self) <* char ')')
|
|
<|>
|
|
(U.int >|= mk_leaf) )
|
|
|
|
let ptree' = fix_memo @@ fun self ->
|
|
skip_space *>
|
|
( (char '(' *> (pure mk_node <*> self <*> self) <* char ')')
|
|
<|>
|
|
(U.int >|= mk_leaf) )
|
|
|
|
let rec pptree = function
|
|
| N (a,b) -> Printf.sprintf "N (%s, %s)" (pptree a) (pptree b)
|
|
| L x -> Printf.sprintf "L %d" x
|
|
|
|
let errpp pp = function
|
|
| Ok x -> "Ok " ^ pp x
|
|
| Error s -> "Error " ^ s
|
|
|
|
let errpptree = errpp pptree
|
|
|
|
let erreq eq x y = match x, y with
|
|
| Ok x, Ok y -> eq x y
|
|
| Error _ , Error _ -> true
|
|
| _ -> false ;;
|
|
*)
|
|
|
|
(*$= & ~printer:errpptree
|
|
(Ok (N (L 1, N (L 2, L 3)))) \
|
|
(parse_string ptree "(1 (2 3))" )
|
|
(Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \
|
|
(parse_string ptree "((1 2) (3 (4 5)))" )
|
|
(Ok (N (L 1, N (L 2, L 3)))) \
|
|
(parse_string ptree' "(1 (2 3))" )
|
|
(Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \
|
|
(parse_string ptree' "((1 2) (3 (4 5)))" )
|
|
*)
|
|
|
|
(*$R
|
|
let p = U.list ~sep:"," U.word in
|
|
let printer = function
|
|
| Ok l -> "Ok " ^ CCFormat.(to_string (Dump.list string_quoted)) l
|
|
| Error s -> "Error " ^ s
|
|
in
|
|
assert_equal ~printer
|
|
(Ok ["abc"; "de"; "hello"; "world"])
|
|
(parse_string p "[abc , de, hello ,world ]");
|
|
*)
|
|
|
|
(*$R
|
|
let test n =
|
|
let p = CCParse.(U.list ~sep:"," U.int) in
|
|
|
|
let l = CCList.(1 -- n) in
|
|
let l_printed =
|
|
CCFormat.(to_string (within "[" "]" (list ~sep:(return ",") int))) l in
|
|
|
|
let l' = CCParse.parse_string_exn p l_printed in
|
|
|
|
assert_equal ~printer:Q.Print.(list int) l l'
|
|
in
|
|
test 300_000;
|
|
|
|
*)
|
|
|
|
(*$R
|
|
let open CCParse.Infix in
|
|
let module P = CCParse in
|
|
|
|
let parens p = P.char '(' *> p <* P.char ')' in
|
|
let add = P.char '+' *> P.return (+) in
|
|
let sub = P.char '-' *> P.return (-) in
|
|
let mul = P.char '*' *> P.return ( * ) in
|
|
let div = P.char '/' *> P.return ( / ) in
|
|
let integer =
|
|
P.chars1_if (function '0'..'9'->true|_->false) >|= int_of_string in
|
|
|
|
let chainl1 e op =
|
|
P.fix (fun r ->
|
|
e >>= fun x -> (op <*> P.return x <*> r) <|> P.return x) in
|
|
|
|
let expr : int P.t =
|
|
P.fix (fun expr ->
|
|
let factor = parens expr <|> integer in
|
|
let term = chainl1 factor (mul <|> div) in
|
|
chainl1 term (add <|> sub)) in
|
|
|
|
assert_equal (Ok 6) (P.parse_string expr "4*1+2");
|
|
assert_equal (Ok 12) (P.parse_string expr "4*(1+2)");
|
|
()
|
|
*)
|
|
|
|
(* TODO: [type position = {state: state; i: int}] and recompute line, col
|
|
on demand *)
|
|
type position = {
|
|
pos_buffer: string;
|
|
pos_offset: int;
|
|
mutable pos_lc: (int * int) option;
|
|
}
|
|
|
|
module Position = struct
|
|
type t = position
|
|
|
|
(* actually re-compute line and column from the buffer *)
|
|
let compute_line_and_col_ (s:string) (off:int) : int * int =
|
|
let i = ref 0 in
|
|
let continue = ref true in
|
|
let line = ref 1 in
|
|
let col = ref 1 in
|
|
while !continue && !i < off && !i < String.length s do
|
|
match String.index_from s !i '\n' with
|
|
| exception Not_found ->
|
|
col := off - !i; continue := false;
|
|
| j when j > off ->
|
|
col := off - !i; continue := false;
|
|
| j -> incr line; i := j+1;
|
|
done;
|
|
!line, !col
|
|
|
|
let line_and_column self =
|
|
match self.pos_lc with
|
|
| Some tup -> tup
|
|
| None ->
|
|
let tup = compute_line_and_col_ self.pos_buffer self.pos_offset in
|
|
self.pos_lc <- Some tup; (* save *)
|
|
tup
|
|
|
|
let line self = fst (line_and_column self)
|
|
let column self = snd (line_and_column self)
|
|
let pp out self =
|
|
let l, c = line_and_column self in
|
|
Format.fprintf out "at line %d, column %d" l c
|
|
end
|
|
|
|
module Error = struct
|
|
type t = {
|
|
msg: unit -> string;
|
|
pos: position;
|
|
}
|
|
|
|
let position self = self.pos
|
|
let line_and_column self = Position.line_and_column self.pos
|
|
|
|
let msg self = self.msg()
|
|
let to_string self =
|
|
let line,col = line_and_column self in
|
|
Printf.sprintf "at line %d, char %d: %s" line col (self.msg())
|
|
|
|
let pp out self =
|
|
let line,col = line_and_column self in
|
|
Format.fprintf out "@[<hv>at line %d, char %d:@ %s@]" line col (self.msg())
|
|
end
|
|
|
|
type +'a or_error = ('a, Error.t) result
|
|
|
|
module Memo_tbl = Hashtbl.Make(struct
|
|
type t = int * int (* id of parser, position *)
|
|
let equal ((a,b):t)(c,d) = a=c && b=d
|
|
let hash = Hashtbl.hash
|
|
end)
|
|
|
|
module Memo_state = struct
|
|
(* table of closures, used to implement universal type *)
|
|
type t = (unit -> unit) Memo_tbl.t
|
|
|
|
(* unique ID for each parser *)
|
|
let id_ = ref 0
|
|
end
|
|
|
|
(** Purely functional state passed around *)
|
|
type state = {
|
|
str: string; (* the input *)
|
|
i: int; (* offset in [str] *)
|
|
j: int; (* end pointer in [str], excluded. [len = j-i] *)
|
|
memo : Memo_state.t option ref; (* Memoization table, if any *)
|
|
}
|
|
(* FIXME: replace memo with:
|
|
[global : global_st ref]
|
|
|
|
where:
|
|
[type global = {
|
|
mutable memo: Memo_state.t option;
|
|
line_offsets: int CCVector.vector;
|
|
}
|
|
|
|
with line_offsets used to cache the offset where each line begins,
|
|
and is computed lazily, to make {!Position.line_and_column}
|
|
faster if called many times.
|
|
*)
|
|
|
|
let[@inline] char_equal (a : char) b = Stdlib.(=) a b
|
|
let string_equal = String.equal
|
|
|
|
(* FIXME: printer for error
|
|
let () = Printexc.register_printer
|
|
(function
|
|
| ParseError (b,msg) ->
|
|
Some (Format.sprintf "@[<v>%s@ %s@]" (msg()) (string_of_branch b))
|
|
| _ -> None)
|
|
*)
|
|
|
|
let[@inline] const_str_ x () : string = x
|
|
|
|
let state_of_string str =
|
|
let s = {
|
|
str;
|
|
i=0;
|
|
j=String.length str;
|
|
memo=ref None;
|
|
} in
|
|
s
|
|
|
|
let[@inline] is_done st = st.i >= st.j
|
|
let[@inline] cur st = st.str.[st.i]
|
|
|
|
let pos_of_st_ st : position = {pos_buffer=st.str; pos_offset=st.i; pos_lc=None}
|
|
let mk_error_ st msg : Error.t = {Error.msg; pos=pos_of_st_ st}
|
|
|
|
(* consume one char, passing it to [ok]. *)
|
|
let consume_ st ~ok ~err =
|
|
if is_done st then (
|
|
let msg = const_str_ "unexpected end of input" in
|
|
err (mk_error_ st msg)
|
|
) else (
|
|
let c = st.str.[st.i] in
|
|
ok {st with i=st.i + 1} c
|
|
)
|
|
|
|
type 'a t = {
|
|
run: 'b. state -> ok:(state -> 'a -> 'b) -> err:(Error.t -> 'b) -> 'b;
|
|
} [@@unboxed]
|
|
(** Takes the input and two continuations:
|
|
{ul
|
|
{- [ok] to call with the result and new state when it's done}
|
|
{- [err] to call when the parser met an error}
|
|
}
|
|
*)
|
|
|
|
let return x : _ t = {
|
|
run=fun st ~ok ~err:_ -> ok st x
|
|
}
|
|
|
|
let pure = return
|
|
|
|
let map f (p: 'a t) : _ t = {
|
|
run=fun st ~ok ~err ->
|
|
p.run st
|
|
~ok:(fun st x -> ok st (f x))
|
|
~err
|
|
}
|
|
|
|
let bind f (p:'a t) : _ t = {
|
|
run=fun st ~ok ~err ->
|
|
p.run st
|
|
~ok:(fun st x ->
|
|
let p2 = f x in
|
|
p2.run st ~ok ~err)
|
|
~err
|
|
}
|
|
|
|
let ap (f:_ t) (a:_ t) : _ t = {
|
|
run=fun st ~ok ~err ->
|
|
f.run st
|
|
~ok:(fun st f ->
|
|
a.run st ~ok:(fun st x -> ok st (f x)) ~err)
|
|
~err
|
|
}
|
|
|
|
let ap_left (a:_ t) (b:_ t) : _ t = {
|
|
run=fun st ~ok ~err ->
|
|
a.run st
|
|
~ok:(fun st x ->
|
|
b.run st ~ok:(fun st _ -> ok st x) ~err)
|
|
~err
|
|
}
|
|
|
|
let ap_right (a:_ t) (b:_ t) : _ t = {
|
|
run=fun st ~ok ~err ->
|
|
a.run st
|
|
~ok:(fun st _ ->
|
|
b.run st ~ok:(fun st x -> ok st x) ~err)
|
|
~err
|
|
}
|
|
|
|
let or_ (p1:'a t) (p2:'a t) : _ t = {
|
|
run=fun st ~ok ~err ->
|
|
p1.run st ~ok
|
|
~err:(fun _e -> p2.run st ~ok ~err)
|
|
}
|
|
|
|
let both a b = {
|
|
run=fun st ~ok ~err ->
|
|
a.run st
|
|
~ok:(fun st xa ->
|
|
b.run st ~ok:(fun st xb -> ok st (xa,xb)) ~err)
|
|
~err
|
|
}
|
|
|
|
let set_error_message msg (p:'a t) : _ t = {
|
|
run=fun st ~ok ~err ->
|
|
p.run st ~ok
|
|
~err:(fun _e -> err (mk_error_ st (const_str_ msg)))
|
|
}
|
|
|
|
|
|
module Infix = struct
|
|
let[@inline] (>|=) p f = map f p
|
|
let[@inline] (>>=) p f = bind f p
|
|
let (<*>) = ap
|
|
let (<* ) = ap_left
|
|
let ( *>) = ap_right
|
|
let (<|>) = or_
|
|
let (|||) = both
|
|
let[@inline] (<?>) p msg = set_error_message msg p
|
|
|
|
include CCShimsMkLet_.Make(struct
|
|
type nonrec 'a t = 'a t
|
|
let (>>=) = (>>=)
|
|
let (>|=) = (>|=)
|
|
let monoid_product = both
|
|
end)
|
|
end
|
|
|
|
include Infix
|
|
|
|
let map2 f x y = pure f <*> x <*> y
|
|
let map3 f x y z = pure f <*> x <*> y <*> z
|
|
|
|
let junk_ (st:state) : state =
|
|
assert (st.i < st.j);
|
|
{st with i=st.i + 1}
|
|
|
|
let eoi = {
|
|
run=fun st ~ok ~err ->
|
|
if is_done st
|
|
then ok st ()
|
|
else err (mk_error_ st (const_str_ "expected end of input"))
|
|
}
|
|
|
|
(*$= & ~printer:(errpp Q.Print.bool) ~cmp:(erreq (=))
|
|
(Ok true) (parse_string (U.bool <* eoi) "true")
|
|
(Error "") (parse_string (U.bool <* eoi) "true ")
|
|
(Ok true) (parse_string (U.bool <* skip_white <* eoi) "true")
|
|
*)
|
|
|
|
let with_pos p : _ t = {
|
|
run=fun st ~ok ~err ->
|
|
p.run st
|
|
~ok:(fun st' x ->
|
|
ok st' (x, pos_of_st_ st))
|
|
~err
|
|
}
|
|
|
|
(* a slice is just a state, which makes {!recurse} quite easy. *)
|
|
type slice = state
|
|
|
|
module Slice = struct
|
|
type t = slice
|
|
let length sl = sl.j - sl.i
|
|
let is_empty sl = sl.i = sl.j
|
|
let to_string sl = String.sub sl.str sl.i (length sl)
|
|
end
|
|
|
|
let recurse slice p : _ t = {
|
|
run=fun _st ~ok ~err ->
|
|
(* make sure these states are related. all slices share the
|
|
same reference as the initial state they derive from. *)
|
|
assert CCShims_.Stdlib.(_st.memo == slice.memo);
|
|
p.run slice ~ok ~err
|
|
}
|
|
|
|
let all = {
|
|
run=fun st ~ok ~err:_ ->
|
|
if is_done st then ok st st
|
|
else (
|
|
let st_done = {st with i=st.j} in
|
|
ok st_done st
|
|
)
|
|
}
|
|
|
|
let all_str = all >|= Slice.to_string
|
|
|
|
(*$= & ~printer:(errpp Q.Print.string) ~cmp:(erreq (=))
|
|
(Ok "abcd") (parse_string all_str "abcd")
|
|
(Ok "cd") (parse_string (string "ab" *> all_str) "abcd")
|
|
(Ok "") (parse_string (string "ab" *> all_str) "ab")
|
|
*)
|
|
|
|
(*$= & ~printer:(errpp Q.Print.(pair string string)) ~cmp:(erreq (=))
|
|
(Ok ("foobar", "")) (parse_string (both all_str all_str) "foobar")
|
|
*)
|
|
|
|
let fail msg : _ t = {
|
|
run=fun st ~ok:_ ~err ->
|
|
err (mk_error_ st (const_str_ msg))
|
|
}
|
|
let failf msg = Printf.ksprintf fail msg
|
|
let fail_lazy msg = {
|
|
run=fun st ~ok:_ ~err ->
|
|
err (mk_error_ st msg)
|
|
}
|
|
|
|
let parsing what p = {
|
|
run=fun st ~ok ~err ->
|
|
p.run st ~ok
|
|
~err:(fun e ->
|
|
let msg() =
|
|
Printf.sprintf "while parsing %s:\n%s" what (e.Error.msg ())
|
|
in
|
|
err {e with Error.msg})
|
|
}
|
|
|
|
let empty = {
|
|
run=fun st ~ok ~err:_ -> ok st ();
|
|
}
|
|
let nop = empty
|
|
|
|
let any_char = {
|
|
run=fun st ~ok ~err -> consume_ st ~ok ~err
|
|
}
|
|
|
|
let char c : _ t = {
|
|
run=fun st ~ok ~err ->
|
|
consume_ st
|
|
~ok:(fun st c2 ->
|
|
if char_equal c c2 then ok st c
|
|
else (
|
|
let msg() = Printf.sprintf "expected '%c', got '%c'" c c2 in
|
|
err (mk_error_ st msg)
|
|
))
|
|
~err
|
|
}
|
|
|
|
let char_if ?descr p = {
|
|
run=fun st ~ok ~err ->
|
|
consume_ st
|
|
~ok:(fun st c ->
|
|
if p c then ok st c
|
|
else (
|
|
let msg() =
|
|
let rest = match descr with
|
|
| None -> ""
|
|
| Some d -> Printf.sprintf ", expected %s" d
|
|
in
|
|
Printf.sprintf "unexpected char '%c'%s" c rest
|
|
in
|
|
err (mk_error_ st msg)
|
|
))
|
|
~err
|
|
}
|
|
|
|
let take_if p : slice t = {
|
|
run=fun st ~ok ~err:_ ->
|
|
let i = ref st.i in
|
|
while
|
|
let st = {st with i = !i} in
|
|
not (is_done st) && p (cur st)
|
|
do
|
|
incr i;
|
|
done;
|
|
ok {st with i = !i} {st with j= !i}
|
|
}
|
|
|
|
let take1_if ?descr p =
|
|
take_if p >>= fun sl ->
|
|
if Slice.is_empty sl then (
|
|
let msg() =
|
|
let what = match descr with
|
|
| None -> ""
|
|
| Some d -> Printf.sprintf " for %s" d
|
|
in
|
|
Printf.sprintf "expected non-empty sequence of chars%s" what
|
|
in
|
|
fail_lazy msg
|
|
) else (
|
|
return sl
|
|
)
|
|
|
|
let chars_if p = take_if p >|= Slice.to_string
|
|
|
|
let chars1_if ?descr p = {
|
|
run=fun st ~ok ~err ->
|
|
(chars_if p).run st
|
|
~ok:(fun st s ->
|
|
if string_equal s ""
|
|
then (
|
|
let msg() =
|
|
let what = match descr with
|
|
| None -> ""
|
|
| Some d -> Printf.sprintf " for %s" d
|
|
in
|
|
Printf.sprintf "expected non-empty sequence of chars%s" what
|
|
in
|
|
err (mk_error_ st msg)
|
|
) else ok st s)
|
|
~err
|
|
}
|
|
|
|
exception Fold_fail of state * string
|
|
|
|
let chars_fold ~f acc0 = {
|
|
run=fun st ~ok ~err ->
|
|
let i0 = st.i in
|
|
let i = ref i0 in
|
|
let acc = ref acc0 in
|
|
let continue = ref true in
|
|
try
|
|
while !continue do
|
|
let st = {st with i = !i} in
|
|
if is_done st then (
|
|
continue := false;
|
|
) else (
|
|
let c = cur st in
|
|
match f !acc c with
|
|
| `Continue acc' ->
|
|
incr i;
|
|
acc := acc'
|
|
| `Stop a -> acc := a; continue := false;
|
|
| `Consume_and_stop a -> acc := a; incr i; continue := false
|
|
| `Fail msg -> raise (Fold_fail (st,msg))
|
|
)
|
|
done;
|
|
ok {st with i= !i} (!acc, {st with j= !i})
|
|
with Fold_fail (st,msg) -> err (mk_error_ st (const_str_ msg))
|
|
}
|
|
|
|
let chars_fold_transduce ~f acc0 = {
|
|
run=fun st ~ok ~err ->
|
|
let i0 = st.i in
|
|
let i = ref i0 in
|
|
let acc = ref acc0 in
|
|
let continue = ref true in
|
|
let buf = Buffer.create 16 in
|
|
try
|
|
while !continue do
|
|
let st = {st with i = !i} in
|
|
if is_done st then (
|
|
continue := false;
|
|
) else (
|
|
let c = cur st in
|
|
match f !acc c with
|
|
| `Continue acc' ->
|
|
incr i;
|
|
acc := acc'
|
|
| `Yield (acc', c') ->
|
|
incr i;
|
|
acc := acc';
|
|
Buffer.add_char buf c';
|
|
| `Stop -> continue := false;
|
|
| `Consume_and_stop -> incr i; continue := false
|
|
| `Fail msg -> raise (Fold_fail (st,msg))
|
|
)
|
|
done;
|
|
ok {st with i= !i} (!acc, Buffer.contents buf)
|
|
with Fold_fail (st,msg) -> err (mk_error_ st (const_str_ msg))
|
|
}
|
|
|
|
let skip_chars p : _ t =
|
|
let rec self = {
|
|
run=fun st ~ok ~err ->
|
|
if not (is_done st) && p (cur st) then (
|
|
let st = junk_ st in
|
|
self.run st ~ok ~err
|
|
) else ok st ()
|
|
}
|
|
in
|
|
self
|
|
|
|
let is_alpha = function
|
|
| 'a' .. 'z' | 'A' .. 'Z' -> true
|
|
| _ -> false
|
|
let is_num = function '0' .. '9' -> true | _ -> false
|
|
let is_alpha_num = function
|
|
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' -> true
|
|
| _ -> false
|
|
let is_space = function ' ' | '\t' -> true | _ -> false
|
|
let is_white = function ' ' | '\t' | '\n' -> true | _ -> false
|
|
|
|
let space = char_if is_space
|
|
let white = char_if is_white
|
|
|
|
let endline =
|
|
char_if ~descr:"end-of-line ('\\n')" (function '\n' -> true | _ -> false)
|
|
let skip_space = skip_chars is_space
|
|
let skip_white = skip_chars is_white
|
|
|
|
let try_or p1 ~f ~else_:p2 = {
|
|
run=fun st ~ok ~err ->
|
|
p1.run st
|
|
~ok:(fun st x -> (f x).run st ~ok ~err)
|
|
~err:(fun _ -> p2.run st ~ok ~err)
|
|
}
|
|
|
|
let try_or_l ?(msg="try_or_l ran out of options") ?else_ l : _ t = {
|
|
run=fun st ~ok ~err ->
|
|
let rec loop = function
|
|
| (test, p) :: tl ->
|
|
test.run st
|
|
~ok:(fun _ _ -> p.run st ~ok ~err) (* commit *)
|
|
~err:(fun _ -> loop tl)
|
|
| [] ->
|
|
begin match else_ with
|
|
| None -> err (mk_error_ st (const_str_ msg))
|
|
| Some p -> p.run st ~ok ~err
|
|
end
|
|
in
|
|
loop l
|
|
}
|
|
|
|
let suspend f = {
|
|
run=fun st ~ok ~err ->
|
|
let p = f () in
|
|
p.run st ~ok ~err
|
|
}
|
|
|
|
(* read [len] chars at once *)
|
|
let take len : slice t = {
|
|
run=fun st ~ok ~err ->
|
|
if st.i + len <= st.j then (
|
|
let slice = {st with j = st.i + len} in
|
|
let st = {st with i = st.i + len} in
|
|
ok st slice
|
|
) else (
|
|
let msg() =
|
|
Printf.sprintf "expected to be able to consume %d chars" len
|
|
in
|
|
err (mk_error_ st msg)
|
|
)
|
|
}
|
|
|
|
let any_chars len : _ t = take len >|= Slice.to_string
|
|
|
|
let exact s = {
|
|
run=fun st ~ok ~err ->
|
|
(* parse a string of length [String.length s] and compare with [s] *)
|
|
(any_chars (String.length s)).run st
|
|
~ok:(fun st s2 ->
|
|
if string_equal s s2 then ok st s
|
|
else (
|
|
let msg() = Printf.sprintf "expected %S, got %S" s s2 in
|
|
err (mk_error_ st msg)
|
|
)
|
|
)
|
|
~err
|
|
}
|
|
|
|
let string = exact
|
|
|
|
let fix f =
|
|
let rec self = {
|
|
run=fun st ~ok ~err ->
|
|
(Lazy.force f_self).run st ~ok ~err
|
|
}
|
|
and f_self = lazy (f self) in
|
|
self
|
|
|
|
let try_ p = p
|
|
|
|
let try_opt p : _ t = {
|
|
run=fun st ~ok ~err:_ ->
|
|
p.run st
|
|
~ok:(fun st x -> ok st (Some x))
|
|
~err:(fun _ -> ok st None)
|
|
}
|
|
|
|
let optional p : _ t = {
|
|
run=fun st ~ok ~err:_ ->
|
|
p.run st
|
|
~ok:(fun st _x -> ok st ())
|
|
~err:(fun _ -> ok st ())
|
|
}
|
|
|
|
let many_until ~until p : _ t =
|
|
fix
|
|
(fun self ->
|
|
try_or until ~f:(fun _ -> pure [])
|
|
~else_:(
|
|
p >>= fun x ->
|
|
self >|= fun l -> x :: l
|
|
))
|
|
|
|
let many p : _ t =
|
|
fix
|
|
(fun self ->
|
|
try_or p
|
|
~f:(fun x -> self >|= fun tl -> x :: tl)
|
|
(pure []))
|
|
|
|
(*
|
|
(* parse many [p], as a difference list *)
|
|
let many_rec_ p : (_ list -> _ list) t =
|
|
let rec self = {
|
|
run=fun st ~ok ~err ->
|
|
if is_done st then ok st (fun l->l) (* empty list *)
|
|
else (
|
|
p.run st
|
|
~ok:(fun st x ->
|
|
self.run st
|
|
~ok:(fun st f -> ok st (fun l -> x :: f l))
|
|
~err)
|
|
~err
|
|
)
|
|
} in
|
|
self
|
|
|
|
let many p : _ t = {
|
|
run=fun st ~ok ~err ->
|
|
(many_rec_ p).run st
|
|
~ok:(fun st f -> ok st (f []))
|
|
~err
|
|
}
|
|
*)
|
|
|
|
(*$R
|
|
let p0 = skip_white *> U.int in
|
|
let p = (skip_white *> char '(' *> many p0) <* (skip_white <* char ')') in
|
|
let printer = CCFormat.(to_string @@ Dump.result @@ Dump.list int) in
|
|
assert_equal ~printer
|
|
(Ok [1;2;3]) (parse_string p "(1 2 3)");
|
|
assert_equal ~printer
|
|
(Ok [1;2; -30; 4]) (parse_string p "( 1 2 -30 4 )")
|
|
*)
|
|
|
|
|
|
let many1 p =
|
|
p >>= fun x ->
|
|
many p >|= fun l -> x :: l
|
|
|
|
(* skip can be made efficient by not allocating intermediate parsers *)
|
|
let skip p : _ t =
|
|
let rec self = {
|
|
run=fun st ~ok ~err ->
|
|
p.run st
|
|
~ok:(fun st _ -> self.run st ~ok ~err)
|
|
~err:(fun _ ->
|
|
ok st ())
|
|
} in
|
|
self
|
|
|
|
let sep_until ~until ~by p =
|
|
let rec read_p = lazy (
|
|
p >>= fun x ->
|
|
(until *> pure [x])
|
|
<|>
|
|
(by *> (Lazy.force read_p >|= fun tl -> x :: tl))
|
|
) in
|
|
(until *> pure [])
|
|
<|> (Lazy.force read_p)
|
|
|
|
let sep ~by p =
|
|
let rec read_p = lazy (
|
|
try_or p
|
|
~f:(fun x ->
|
|
(eoi *> pure [x])
|
|
<|>
|
|
try_or by
|
|
~f:(fun _ -> Lazy.force read_p >|= fun tl -> x :: tl)
|
|
(pure [x]))
|
|
(pure [])
|
|
) in
|
|
Lazy.force read_p
|
|
|
|
(*$inject
|
|
let aword = chars1_if (function 'a'..'z'|'A'..'Z'->true|_ -> false);;
|
|
*)
|
|
(*$= & ~printer:(errpp Q.Print.(list string))
|
|
(Ok ["a";"b";"c"]) \
|
|
(parse_string (optional (char '/') *> sep ~by:(char '/') aword) "/a/b/c")
|
|
(Ok ["a";"b";"c"]) \
|
|
(parse_string (optional (char '/') *> sep ~by:(char '/') aword) "a/b/c")
|
|
*)
|
|
|
|
let sep1 ~by p =
|
|
p >>= fun x ->
|
|
sep ~by p >|= fun tl ->
|
|
x :: tl
|
|
|
|
let lookahead p : _ t = {
|
|
run=fun st ~ok ~err ->
|
|
p.run st
|
|
~ok:(fun _st x -> ok st x) (* discard old state *)
|
|
~err
|
|
}
|
|
|
|
let lookahead_ignore p : _ t = {
|
|
run=fun st ~ok ~err ->
|
|
p.run st
|
|
~ok:(fun _st _x -> ok st ())
|
|
~err
|
|
}
|
|
|
|
let set_current_slice sl : _ t = {
|
|
run=fun _st ~ok ~err:_ ->
|
|
assert CCShims_.Stdlib.(_st.memo == sl.memo);
|
|
ok sl () (* jump to slice *)
|
|
}
|
|
|
|
(*$= & ~printer:(errpp Q.Print.(string))
|
|
(Ok "abc") (parse_string (lookahead (string "ab") *> (string "abc")) "abcd")
|
|
*)
|
|
|
|
(*$= & ~printer:(errpp Q.Print.(string))
|
|
(Ok "1234") (parse_string line_str "1234\nyolo")
|
|
*)
|
|
|
|
(*$= & ~printer:(errpp Q.Print.(pair String.escaped String.escaped))
|
|
(Ok ("1234", "yolo")) (parse_string (line_str ||| line_str) "1234\nyolo\nswag")
|
|
*)
|
|
|
|
let split_1 ~on_char : _ t = {
|
|
run=fun st ~ok ~err:_ ->
|
|
if st.i >= st.j then (
|
|
ok st (st, None)
|
|
) else (
|
|
match String.index_from st.str st.i on_char with
|
|
| j ->
|
|
let x = {st with j} in
|
|
let y = {st with i=min st.j (j+1)} in
|
|
let st_done = {st with i=st.j} in (* empty *)
|
|
ok st_done (x, Some y)
|
|
| exception Not_found ->
|
|
let st_done = {st with i=st.j} in (* empty *)
|
|
ok st_done (st, None)
|
|
)
|
|
}
|
|
|
|
let split_list_at_most ~on_char n : slice list t =
|
|
let rec loop acc n =
|
|
if n <= 0 then (
|
|
(* add the rest to [acc] *)
|
|
all >|= fun rest ->
|
|
let acc = rest :: acc in
|
|
List.rev acc
|
|
) else (
|
|
try_or
|
|
eoi
|
|
~f:(fun _ -> return (List.rev acc))
|
|
~else_:(parse_1 acc n)
|
|
)
|
|
and parse_1 acc n =
|
|
split_1 ~on_char >>= fun (sl1, rest) ->
|
|
let acc = sl1 :: acc in
|
|
match rest with
|
|
| None -> return (List.rev acc)
|
|
| Some rest -> recurse rest (loop acc (n-1))
|
|
in
|
|
loop [] n
|
|
|
|
(*$= & ~printer:(errpp Q.Print.(list string)) ~cmp:(erreq (=))
|
|
(Ok ["a";"b";"c";"d,e,f"]) \
|
|
(parse_string (split_list_at_most ~on_char:',' 3 >|= List.map Slice.to_string) "a,b,c,d,e,f")
|
|
(Ok ["a";"bc"]) \
|
|
(parse_string (split_list_at_most ~on_char:',' 3 >|= List.map Slice.to_string) "a,bc")
|
|
*)
|
|
|
|
let split_list ~on_char : _ t =
|
|
split_list_at_most ~on_char max_int
|
|
|
|
let split_2 ~on_char : _ t =
|
|
split_list_at_most ~on_char 3 >>= function
|
|
| [a; b] -> return (a,b)
|
|
| _ -> fail "split_2: expected 2 fields exactly"
|
|
|
|
let split_3 ~on_char : _ t =
|
|
split_list_at_most ~on_char 4 >>= function
|
|
| [a; b; c] -> return (a,b,c)
|
|
| _ -> fail "split_3: expected 3 fields exactly"
|
|
|
|
let split_4 ~on_char : _ t =
|
|
split_list_at_most ~on_char 5 >>= function
|
|
| [a; b; c; d] -> return (a,b,c,d)
|
|
| _ -> fail "split_4: expected 4 fields exactly"
|
|
|
|
let split_list ~on_char : slice list t =
|
|
let rec loop acc =
|
|
try_or
|
|
eoi
|
|
~f:(fun _ -> return (List.rev acc))
|
|
~else_:(parse_1 acc)
|
|
and parse_1 acc =
|
|
split_1 ~on_char >>= fun (sl1, rest) ->
|
|
let acc = sl1 :: acc in
|
|
match rest with
|
|
| None -> return (List.rev acc)
|
|
| Some rest -> recurse rest (loop acc)
|
|
in
|
|
loop []
|
|
|
|
let each_split ~on_char p : 'a list t =
|
|
let rec loop acc =
|
|
split_1 ~on_char >>= fun (sl1, rest) ->
|
|
(* parse [sl1] with [p] *)
|
|
recurse sl1 p >>= fun x ->
|
|
let acc = x :: acc in
|
|
match rest with
|
|
| None -> return (List.rev acc)
|
|
| Some rest -> recurse rest (loop acc)
|
|
in
|
|
loop []
|
|
|
|
let line : slice t =
|
|
split_1 ~on_char:'\n' >>= fun (sl, rest) ->
|
|
match rest with
|
|
| None -> return sl
|
|
| Some rest ->
|
|
set_current_slice rest >|= fun () -> sl
|
|
|
|
let line_str = line >|= Slice.to_string
|
|
|
|
let each_line p : _ t =
|
|
each_split ~on_char:'\n' p
|
|
|
|
(*$= & ~printer:(errpp Q.Print.(list @@ list int))
|
|
(Ok ([[1;1];[2;2];[3;3];[]])) \
|
|
(parse_string (each_line (sep ~by:skip_space U.int)) "1 1\n2 2\n3 3\n")
|
|
*)
|
|
|
|
let memo (type a) (p:a t) : a t =
|
|
let id = !Memo_state.id_ in
|
|
incr Memo_state.id_;
|
|
let r = ref None in (* used for universal encoding *)
|
|
|
|
{run=fun st ~ok ~err ->
|
|
let tbl = match !(st.memo) with
|
|
| Some t -> t
|
|
| None ->
|
|
let tbl = Memo_tbl.create 32 in
|
|
st.memo := Some tbl;
|
|
tbl
|
|
in
|
|
|
|
match
|
|
r := None;
|
|
let f = Memo_tbl.find tbl (st.i, id) in
|
|
f();
|
|
!r
|
|
with
|
|
| None -> assert false
|
|
| Some (Ok (st,x)) -> ok st x
|
|
| Some (Error e) -> err e
|
|
| exception Not_found ->
|
|
(* parse, and save *)
|
|
p.run st
|
|
~ok:(fun st' x ->
|
|
Memo_tbl.replace tbl (st.i,id) (fun () -> r := Some (Ok (st',x)));
|
|
ok st' x)
|
|
~err:(fun e ->
|
|
Memo_tbl.replace tbl (st.i,id) (fun () -> r := Some (Error e));
|
|
err e)
|
|
}
|
|
|
|
let fix_memo f =
|
|
let rec p = {
|
|
run=fun st ~ok ~err -> (Lazy.force p').run st ~ok ~err
|
|
}
|
|
and p' = lazy (memo (f p)) in
|
|
p
|
|
|
|
exception ParseError of Error.t
|
|
|
|
let stringify_result = function
|
|
| Ok _ as x -> x
|
|
| Error e -> Error (Error.to_string e)
|
|
|
|
let parse_string_exn p s =
|
|
p.run (state_of_string s)
|
|
~ok:(fun _st x -> x)
|
|
~err:(fun e -> raise (ParseError e))
|
|
|
|
let parse_string_e p s =
|
|
p.run (state_of_string s)
|
|
~ok:(fun _st x -> Ok x)
|
|
~err:(fun e -> Error e)
|
|
|
|
let parse_string p s = parse_string_e p s |> stringify_result
|
|
|
|
let read_all_ ic =
|
|
let buf = Buffer.create 1024 in
|
|
begin
|
|
try
|
|
while true do
|
|
let line = input_line ic in
|
|
Buffer.add_string buf line;
|
|
Buffer.add_char buf '\n';
|
|
done;
|
|
assert false
|
|
with End_of_file -> ()
|
|
end;
|
|
Buffer.contents buf
|
|
|
|
let parse_file_e p file =
|
|
let ic = open_in file in
|
|
let s = read_all_ ic in
|
|
let r = parse_string_e p s in
|
|
close_in ic;
|
|
r
|
|
|
|
let parse_file p file = parse_file_e p file |> stringify_result
|
|
|
|
let parse_file_exn p file =
|
|
match parse_file_e p file with
|
|
| Ok x -> x
|
|
| Error e -> raise (ParseError e)
|
|
|
|
module U = struct
|
|
let sep_ = sep
|
|
|
|
let list ?(start="[") ?(stop="]") ?(sep=";") p =
|
|
string start *> skip_white *>
|
|
sep_until
|
|
~until:(skip_white <* string stop)
|
|
~by:(skip_white *> string sep *> skip_white) p
|
|
|
|
let int =
|
|
skip_white *>
|
|
chars1_if ~descr:"integer" (fun c -> is_num c || char_equal c '-')
|
|
>>= fun s ->
|
|
try return (int_of_string s)
|
|
with Failure _ -> fail "expected an int"
|
|
|
|
(*$= & ~printer:(errpp Q.Print.int) ~cmp:(erreq (=))
|
|
(Ok 42) (parse_string U.int " 42")
|
|
(Ok 2) (parse_string U.int "2")
|
|
(Error "") (parse_string U.int "abc")
|
|
(Error "") (parse_string U.int "")
|
|
*)
|
|
|
|
let in_paren (p:'a t) : 'a t =
|
|
skip_white *>
|
|
(char '(' *> skip_white *> p <* skip_white <* char ')')
|
|
|
|
let in_parens_opt (p:'a t) : 'a t =
|
|
fix (fun self ->
|
|
skip_white *>
|
|
try_or
|
|
(char '(')
|
|
~f:(fun _ -> skip_white *> self <* skip_white <* char ')')
|
|
~else_:p)
|
|
|
|
(*$= & ~printer:(errpp Q.Print.int) ~cmp:(erreq (=))
|
|
(Ok 15) (parse_string (U.in_paren (U.in_paren U.int)) "( ( 15) )")
|
|
(Ok 2) (parse_string (U.in_paren U.int) "(2)")
|
|
(Error "") (parse_string (U.in_paren U.int) "2")
|
|
(Error "") (parse_string (U.in_paren U.int) "")
|
|
(Ok 2) (parse_string (U.in_parens_opt U.int) "((((2))))")
|
|
(Ok 2) (parse_string (U.in_parens_opt U.int) "2")
|
|
(Ok 200) (parse_string (U.in_parens_opt U.int) "( ( 200 ) )")
|
|
*)
|
|
|
|
let option p =
|
|
skip_white *>
|
|
try_or
|
|
(string "Some")
|
|
~f:(fun _ -> skip_white *> p >|= fun x -> Some x)
|
|
~else_:(string "None" *> return None)
|
|
|
|
(*$= & ~printer:(errpp Q.Print.(option int)) ~cmp:(erreq (=))
|
|
(Ok (Some 12)) (parse_string U.(option int) " Some 12")
|
|
(Ok None) (parse_string U.(option int) " None")
|
|
(Ok (Some 0)) (parse_string U.(option int) "Some 0")
|
|
(Ok (Some 0)) (parse_string U.(in_parens_opt @@ option int) "(( Some 0) )")
|
|
*)
|
|
|
|
let hexa_int =
|
|
(exact "0x" <|> return "") *>
|
|
begin
|
|
chars1_if (function '0' .. '9' | 'a'..'f' | 'A'..'F' -> true | _ -> false)
|
|
>|= fun s ->
|
|
let i = ref 0 in
|
|
String.iter
|
|
(fun c ->
|
|
let n = match c with
|
|
| '0' .. '9' -> Char.code c - Char.code '0'
|
|
| 'a' .. 'f' -> Char.code c - Char.code 'a' + 10
|
|
| 'A' .. 'F' -> Char.code c - Char.code 'A' + 10
|
|
| _ -> assert false
|
|
in
|
|
i := !i * 16 + n)
|
|
s;
|
|
!i
|
|
end
|
|
|
|
(*$= & ~printer:(errpp Q.Print.int) ~cmp:(erreq (=))
|
|
(Ok 16) (parse_string U.hexa_int "0x10")
|
|
(Ok 16) (parse_string U.hexa_int "10")
|
|
(Error "") (parse_string U.hexa_int "x10")
|
|
(Error "") (parse_string U.hexa_int "0xz")
|
|
*)
|
|
|
|
let prepend_str c s = String.make 1 c ^ s
|
|
|
|
let word =
|
|
map2 prepend_str (char_if is_alpha) (chars_if is_alpha_num)
|
|
|
|
let bool =
|
|
skip_white *>
|
|
((string "true" *> return true) <|> (string "false" *> return false))
|
|
(*$= & ~printer:(errpp Q.Print.bool) ~cmp:(erreq (=))
|
|
(Ok true) (parse_string U.bool "true")
|
|
(Ok false) (parse_string U.bool "false")
|
|
*)
|
|
|
|
let pair ?(start="(") ?(stop=")") ?(sep=",") p1 p2 =
|
|
skip_white *> string start *> skip_white *>
|
|
p1 >>= fun x1 ->
|
|
skip_white *> string sep *> skip_white *>
|
|
p2 >>= fun x2 ->
|
|
skip_white *> string stop *> return (x1,x2)
|
|
|
|
(*$= & ~printer:Q.Print.(errpp (pair int int))
|
|
(Ok(1,2)) U.(parse_string (pair int int) "(1 , 2 )")
|
|
*)
|
|
|
|
let triple ?(start="(") ?(stop=")") ?(sep=",") p1 p2 p3 =
|
|
string start *> skip_white *>
|
|
p1 >>= fun x1 ->
|
|
skip_white *> string sep *> skip_white *>
|
|
p2 >>= fun x2 ->
|
|
skip_white *> string sep *> skip_white *>
|
|
p3 >>= fun x3 ->
|
|
string stop *> return (x1,x2,x3)
|
|
end
|
|
|
|
module Debug_ = struct
|
|
let trace_fail name p = {
|
|
run=fun st ~ok ~err ->
|
|
p.run st ~ok
|
|
~err:(fun e ->
|
|
Printf.eprintf "trace %s: fail with %s\n%!" name (Error.to_string e);
|
|
err e)
|
|
}
|
|
|
|
let trace_ ~both name ~print p = {
|
|
run=fun st ~ok ~err ->
|
|
p.run st
|
|
~ok:(fun st x ->
|
|
Printf.eprintf "trace %s: parsed %s\n%!" name (print x);
|
|
ok st x)
|
|
~err:(fun e ->
|
|
if both then (
|
|
Printf.eprintf "trace %s: fail with %s\n%!" name (Error.to_string e);
|
|
);
|
|
err e)
|
|
}
|
|
|
|
let trace_success name ~print p = trace_ ~both:false name ~print p
|
|
let trace_success_or_fail name ~print p = trace_ ~both:true name ~print p
|
|
end
|