add CCParse.{char_fold, chars_fold_map}

useful for non-trivial lexing
This commit is contained in:
Simon Cruanes 2021-06-06 18:49:55 -04:00
parent c63a2b7b37
commit 88fe234a4c
2 changed files with 140 additions and 2 deletions

View file

@ -160,11 +160,11 @@ module Error = struct
let msg self = self.msg()
let to_string self =
let line,col = line_and_column self in
Printf.sprintf "at line %d, char %d:\n%s" line col (self.msg())
Printf.sprintf "at line %d, char %d: %s" line col (self.msg())
let pp out self =
let line,col = line_and_column self in
Format.fprintf out "at line %d, char %d:@ %s" line col (self.msg())
Format.fprintf out "@[<hv>at line %d, char %d:@ %s@]" line col (self.msg())
end
type 'a or_error = ('a, Error.t) result
@ -429,6 +429,65 @@ let chars1_if ?descr p = {
~err
}
exception Fold_fail of state * string
let chars_fold ~f acc0 = {
run=fun st ~ok ~err ->
let i0 = st.i in
let i = ref i0 in
let acc = ref acc0 in
let continue = ref true in
try
while !continue do
let st = {st with i = !i} in
if is_done st then (
continue := false;
) else (
let c = cur st in
match f !acc c with
| `Continue acc' ->
incr i;
acc := acc'
| `Stop -> continue := false;
| `Consume_and_stop -> incr i; continue := false
| `Fail msg -> raise (Fold_fail (st,msg))
)
done;
ok {st with i= !i} !acc
with Fold_fail (st,msg) -> err (mk_error_ st (const_str_ msg))
}
let chars_fold_map ~f acc0 = {
run=fun st ~ok ~err ->
let i0 = st.i in
let i = ref i0 in
let acc = ref acc0 in
let continue = ref true in
let buf = Buffer.create 16 in
try
while !continue do
let st = {st with i = !i} in
if is_done st then (
continue := false;
) else (
let c = cur st in
match f !acc c with
| `Continue acc' ->
incr i;
acc := acc'
| `Yield (acc', c') ->
incr i;
acc := acc';
Buffer.add_char buf c';
| `Stop -> continue := false;
| `Consume_and_stop -> incr i; continue := false
| `Fail msg -> raise (Fold_fail (st,msg))
)
done;
ok {st with i= !i} (!acc, Buffer.contents buf)
with Fold_fail (st,msg) -> err (mk_error_ st (const_str_ msg))
}
let skip_chars p : _ t =
let rec self = {
run=fun st ~ok ~err ->
@ -901,3 +960,29 @@ module U = struct
p3 >>= fun x3 ->
string stop *> return (x1,x2,x3)
end
module Debug_ = struct
let trace_fail name p = {
run=fun st ~ok ~err ->
p.run st ~ok
~err:(fun e ->
Printf.eprintf "trace %s: fail with %s\n%!" name (Error.to_string e);
err e)
}
let trace_ ~both name ~print p = {
run=fun st ~ok ~err ->
p.run st
~ok:(fun st x ->
Printf.eprintf "trace %s: parsed %s\n%!" name (print x);
ok st x)
~err:(fun e ->
if both then (
Printf.eprintf "trace %s: fail with %s\n%!" name (Error.to_string e);
);
err e)
}
let trace_success name ~print p = trace_ ~both:false name ~print p
let trace_success_or_fail name ~print p = trace_ ~both:true name ~print p
end

View file

@ -185,6 +185,43 @@ val chars1_if : ?descr:string -> (char -> bool) -> string t
(** Like {!chars_if}, but only non-empty strings.
@param descr describes what kind of character was expected *)
val chars_fold :
f:('acc -> char ->
[`Continue of 'acc | `Consume_and_stop | `Stop | `Fail of string]) ->
'acc ->
'acc t
(** [chars_fold f acc0] folds over characters of the input.
Each char [c] is passed, along with the current accumulator, to [f];
[f] can either:
- stop, by returning [`Stop]. In this case the current accumulator
is returned, and [c] is not consumed.
- consume char and stop, by returning [`Consume_and_stop].
- fail, by returning [`Fail msg]. In this case the parser fails
with the given message.
- continue, by returning [`Continue acc]. The parser continues to the
next char with the new accumulator.
This is a generalization of of {!chars_if} that allows one to transform
characters on the fly, skip some, handle escape sequences, etc.
@since NEXT_RELEASE *)
val chars_fold_map :
f:('acc -> char ->
[`Continue of 'acc | `Yield of 'acc * char
| `Consume_and_stop | `Stop | `Fail of string]) ->
'acc ->
('acc * string) t
(** Same as {!char_fold} but with the following differences:
- returns a string along with the accumulator. The string is built from
characters returned by [`Yield].
- new case [`Yield (acc, c)] adds [c] to the returned string
and continues parsing with [acc].
@since NEXT_RELEASE *)
val endline : char t
(** Parse '\n'. *)
@ -471,3 +508,19 @@ module U : sig
(** Parse a triple using OCaml syntactic conventions.
The default is "(a, b, c)". *)
end
(** Debugging utils.
{b EXPERIMENTAL} *)
module Debug_ : sig
val trace_fail : string -> 'a t -> 'a t
(** [trace_fail name p] behaves like [p], but prints the error message of [p]
on stderr whenever [p] fails.
@param name used as a prefix of all trace messages. *)
val trace_success : string -> print:('a -> string) -> 'a t -> 'a t
(** [trace_success name ~print p] behaves like [p], but
prints successful runs of [p] using [print]. *)
val trace_success_or_fail : string -> print:('a -> string) -> 'a t -> 'a t
(** Trace both error or success *)
end