mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 03:05:28 -05:00
CCParse: add slice and the ability to recurse on them
the idea is that it's often convenient to split the input into smaller part (e.g. lines), or do a first pass of parsing that just returns a slice of the input; and then later to use another parser on that slice to extract the actual data. The new notion of `slice` allows that, while preserving locations wrt the original input.
This commit is contained in:
parent
7bdc3cff24
commit
1517f64f55
2 changed files with 427 additions and 84 deletions
|
|
@ -167,7 +167,7 @@ module Error = struct
|
|||
Format.fprintf out "@[<hv>at line %d, char %d:@ %s@]" line col (self.msg())
|
||||
end
|
||||
|
||||
type 'a or_error = ('a, Error.t) result
|
||||
type +'a or_error = ('a, Error.t) result
|
||||
|
||||
module Memo_tbl = Hashtbl.Make(struct
|
||||
type t = int * int (* id of parser, position *)
|
||||
|
|
@ -186,7 +186,8 @@ end
|
|||
(** Purely functional state passed around *)
|
||||
type state = {
|
||||
str: string; (* the input *)
|
||||
i: int; (* offset in [input.str] *)
|
||||
i: int; (* offset in [str] *)
|
||||
j: int; (* end pointer in [str], excluded. [len = j-i] *)
|
||||
memo : Memo_state.t option ref; (* Memoization table, if any *)
|
||||
}
|
||||
|
||||
|
|
@ -207,11 +208,12 @@ let state_of_string str =
|
|||
let s = {
|
||||
str;
|
||||
i=0;
|
||||
j=String.length str;
|
||||
memo=ref None;
|
||||
} in
|
||||
s
|
||||
|
||||
let[@inline] is_done st = st.i >= String.length st.str
|
||||
let[@inline] is_done st = st.i >= st.j
|
||||
let[@inline] cur st = st.str.[st.i]
|
||||
|
||||
let pos_of_st_ st : position = {pos_buffer=st.str; pos_offset=st.i; pos_lc=None}
|
||||
|
|
@ -328,7 +330,7 @@ let map2 f x y = pure f <*> x <*> y
|
|||
let map3 f x y z = pure f <*> x <*> y <*> z
|
||||
|
||||
let junk_ (st:state) : state =
|
||||
assert (st.i < String.length st.str);
|
||||
assert (st.i < st.j);
|
||||
{st with i=st.i + 1}
|
||||
|
||||
let eoi = {
|
||||
|
|
@ -344,11 +346,61 @@ let eoi = {
|
|||
(Ok true) (parse_string (U.bool <* skip_white <* eoi) "true")
|
||||
*)
|
||||
|
||||
let with_pos p : _ t = {
|
||||
run=fun st ~ok ~err ->
|
||||
p.run st
|
||||
~ok:(fun st' x ->
|
||||
ok st' (x, pos_of_st_ st))
|
||||
~err
|
||||
}
|
||||
|
||||
(* a slice is just a state, which makes {!recurse} quite easy. *)
|
||||
type slice = state
|
||||
|
||||
module Slice = struct
|
||||
type t = slice
|
||||
let length sl = sl.j - sl.i
|
||||
let is_empty sl = sl.i = sl.j
|
||||
let to_string sl = String.sub sl.str sl.i (length sl)
|
||||
end
|
||||
|
||||
let recurse slice p : _ t = {
|
||||
run=fun _st ~ok ~err ->
|
||||
(* make sure these states are related. all slices share the
|
||||
same reference as the initial state they derive from. *)
|
||||
assert CCShims_.Stdlib.(_st.memo == slice.memo);
|
||||
p.run slice ~ok ~err
|
||||
}
|
||||
|
||||
let all = {
|
||||
run=fun st ~ok ~err:_ ->
|
||||
if is_done st then ok st st
|
||||
else (
|
||||
let st_done = {st with i=st.j} in
|
||||
ok st_done st
|
||||
)
|
||||
}
|
||||
|
||||
let all_str = all >|= Slice.to_string
|
||||
|
||||
(*$= & ~printer:(errpp Q.Print.string) ~cmp:(erreq (=))
|
||||
(Ok "abcd") (parse_string all_str "abcd")
|
||||
(Ok "cd") (parse_string (string "ab" *> all_str) "ab")
|
||||
*)
|
||||
|
||||
(*$= & ~printer:(errpp Q.Print.(pair string string)) ~cmp:(erreq (=))
|
||||
(Ok ("foobar", "")) (parse_string (both all_str all_str) "foobar")
|
||||
*)
|
||||
|
||||
let fail msg : _ t = {
|
||||
run=fun st ~ok:_ ~err ->
|
||||
err (mk_error_ st (const_str_ msg))
|
||||
}
|
||||
let failf msg = Printf.ksprintf fail msg
|
||||
let fail_lazy msg = {
|
||||
run=fun st ~ok:_ ~err ->
|
||||
err (mk_error_ st msg)
|
||||
}
|
||||
|
||||
let parsing what p = {
|
||||
run=fun st ~ok ~err ->
|
||||
|
|
@ -360,9 +412,10 @@ let parsing what p = {
|
|||
err {e with Error.msg})
|
||||
}
|
||||
|
||||
let nop = {
|
||||
let empty = {
|
||||
run=fun st ~ok ~err:_ -> ok st ();
|
||||
}
|
||||
let nop = empty
|
||||
|
||||
let any_char = {
|
||||
run=fun st ~ok ~err -> consume_ st ~ok ~err
|
||||
|
|
@ -398,19 +451,35 @@ let char_if ?descr p = {
|
|||
~err
|
||||
}
|
||||
|
||||
let chars_if p = {
|
||||
let take_if p : slice t = {
|
||||
run=fun st ~ok ~err:_ ->
|
||||
let i0 = st.i in
|
||||
let i = ref i0 in
|
||||
let i = ref st.i in
|
||||
while
|
||||
let st = {st with i = !i} in
|
||||
not (is_done st) && p (cur st)
|
||||
do
|
||||
incr i;
|
||||
done;
|
||||
ok {st with i = !i} (String.sub st.str i0 (!i - i0))
|
||||
ok {st with i = !i} {st with j= !i}
|
||||
}
|
||||
|
||||
let take1_if ?descr p =
|
||||
take_if p >>= fun sl ->
|
||||
if Slice.is_empty sl then (
|
||||
let msg() =
|
||||
let what = match descr with
|
||||
| None -> ""
|
||||
| Some d -> Printf.sprintf " for %s" d
|
||||
in
|
||||
Printf.sprintf "expected non-empty sequence of chars%s" what
|
||||
in
|
||||
fail_lazy msg
|
||||
) else (
|
||||
return sl
|
||||
)
|
||||
|
||||
let chars_if p = take_if p >|= Slice.to_string
|
||||
|
||||
let chars1_if ?descr p = {
|
||||
run=fun st ~ok ~err ->
|
||||
(chars_if p).run st
|
||||
|
|
@ -453,11 +522,11 @@ let chars_fold ~f acc0 = {
|
|||
| `Fail msg -> raise (Fold_fail (st,msg))
|
||||
)
|
||||
done;
|
||||
ok {st with i= !i} !acc
|
||||
ok {st with i= !i} (!acc, {st with j= !i})
|
||||
with Fold_fail (st,msg) -> err (mk_error_ st (const_str_ msg))
|
||||
}
|
||||
|
||||
let chars_fold_map ~f acc0 = {
|
||||
let chars_fold_transduce ~f acc0 = {
|
||||
run=fun st ~ok ~err ->
|
||||
let i0 = st.i in
|
||||
let i = ref i0 in
|
||||
|
|
@ -524,6 +593,22 @@ let try_or p1 ~f ~else_:p2 = {
|
|||
~err:(fun _ -> p2.run st ~ok ~err)
|
||||
}
|
||||
|
||||
let try_or_l ?(msg="try_or_l ran out of options") ?else_ l : _ t = {
|
||||
run=fun st ~ok ~err ->
|
||||
let rec loop = function
|
||||
| (test, p) :: tl ->
|
||||
test.run st
|
||||
~ok:(fun _ _ -> p.run st ~ok ~err) (* commit *)
|
||||
~err:(fun _ -> loop tl)
|
||||
| [] ->
|
||||
begin match else_ with
|
||||
| None -> err (mk_error_ st (const_str_ msg))
|
||||
| Some p -> p.run st ~ok ~err
|
||||
end
|
||||
in
|
||||
loop l
|
||||
}
|
||||
|
||||
let suspend f = {
|
||||
run=fun st ~ok ~err ->
|
||||
let p = f () in
|
||||
|
|
@ -531,12 +616,12 @@ let suspend f = {
|
|||
}
|
||||
|
||||
(* read [len] chars at once *)
|
||||
let any_chars len : _ t = {
|
||||
let take len : slice t = {
|
||||
run=fun st ~ok ~err ->
|
||||
if st.i + len <= String.length st.str then (
|
||||
let s = String.sub st.str st.i len in
|
||||
if st.i + len <= st.j then (
|
||||
let slice = {st with j = st.i + len} in
|
||||
let st = {st with i = st.i + len} in
|
||||
ok st s
|
||||
ok st slice
|
||||
) else (
|
||||
let msg() =
|
||||
Printf.sprintf "expected to be able to consume %d chars" len
|
||||
|
|
@ -545,6 +630,8 @@ let any_chars len : _ t = {
|
|||
)
|
||||
}
|
||||
|
||||
let any_chars len : _ t = take len >|= Slice.to_string
|
||||
|
||||
let exact s = {
|
||||
run=fun st ~ok ~err ->
|
||||
(* parse a string of length [String.length s] and compare with [s] *)
|
||||
|
|
@ -697,58 +784,116 @@ let lookahead p : _ t = {
|
|||
~err
|
||||
}
|
||||
|
||||
let lookahead_ignore p : _ t = {
|
||||
run=fun st ~ok ~err ->
|
||||
p.run st
|
||||
~ok:(fun _st _x -> ok st ())
|
||||
~err
|
||||
}
|
||||
|
||||
(*$= & ~printer:(errpp Q.Print.(string))
|
||||
(Ok "abc") (parse_string (lookahead (string "ab") *> (string "abc")) "abcd")
|
||||
*)
|
||||
|
||||
let line : _ t = {
|
||||
run=fun st ~ok ~err ->
|
||||
if is_done st then err (mk_error_ st (const_str_ "expected a line, not EOI"))
|
||||
else (
|
||||
match String.index_from st.str st.i '\n' with
|
||||
(*$=
|
||||
(Ok "1234") (parse_string line_str "1234\nyolo")
|
||||
(Ok ("1234", "yolo")) (parse_string (line_str ||| line_str) "1234\nyolo\nswag")
|
||||
*)
|
||||
|
||||
let split_1 ~on_char : _ t = {
|
||||
run=fun st ~ok ~err:_ ->
|
||||
if st.i >= st.j then (
|
||||
ok st (st, None)
|
||||
) else (
|
||||
match String.index_from st.str st.i on_char with
|
||||
| j ->
|
||||
let s = String.sub st.str st.i (j - st.i) in
|
||||
ok {st with i=j+1} s
|
||||
let x = {st with j} in
|
||||
let y = {st with i=min st.j (j+1)} in
|
||||
let st_done = {st with i=st.j} in (* empty *)
|
||||
ok st_done (x, Some y)
|
||||
| exception Not_found ->
|
||||
err (mk_error_ st (const_str_ "unterminated line"))
|
||||
let st_done = {st with i=st.j} in (* empty *)
|
||||
ok st_done (st, None)
|
||||
)
|
||||
}
|
||||
|
||||
(*$=
|
||||
(Ok "1234") (parse_string line "1234\nyolo")
|
||||
(Ok ("1234", "yolo")) (parse_string (line ||| line) "1234\nyolo\nswag")
|
||||
let split_list_at_most ~on_char n : slice list t =
|
||||
let rec loop acc n =
|
||||
if n <= 0 then return (List.rev acc)
|
||||
else (
|
||||
try_or
|
||||
eoi
|
||||
~f:(fun _ -> return (List.rev acc))
|
||||
~else_:(parse_1 acc n)
|
||||
)
|
||||
and parse_1 acc n =
|
||||
split_1 ~on_char >>= fun (sl1, rest) ->
|
||||
let acc = sl1 :: acc in
|
||||
match rest with
|
||||
| None -> return (List.rev acc)
|
||||
| Some rest -> recurse rest (loop acc (n-1))
|
||||
in
|
||||
loop [] n
|
||||
|
||||
(*$= & ~printer:(errpp Q.Print.(list string)) ~cmp:(erreq (=))
|
||||
(Ok ["a";"b";"c";"d,e,f"]) \
|
||||
(parse_string (split_list_at_most ~on_char:',' 3 >|= List.map Slice.to_string) "a,b,c,d,e,f")
|
||||
(Ok ["a";"bc"]) \
|
||||
(parse_string (split_list_at_most ~on_char:',' 3 >|= List.map Slice.to_string) "a,bc")
|
||||
*)
|
||||
|
||||
(* parse a string [s] using [p_sub], then parse [s] using [p].
|
||||
The result is that of parsing [s] using [p], but the state is
|
||||
the one after using [p_sub], and errors are translated back into the context
|
||||
of [p_sub].
|
||||
This can be useful for example in [p_sub line some_line_parser]. *)
|
||||
let parse_sub_ p_sub p : _ t = {
|
||||
run=fun st0 ~ok ~err ->
|
||||
let p = p <* eoi in (* make sure [p] reads all *)
|
||||
p_sub.run st0
|
||||
~ok:(fun st1 s ->
|
||||
p.run (state_of_string s)
|
||||
~ok:(fun _ r -> ok st1 r)
|
||||
~err:(fun e ->
|
||||
let pos = e.pos in
|
||||
let pos = {
|
||||
pos_buffer=pos.pos_buffer;
|
||||
pos_offset=pos.pos_offset + st0.i;
|
||||
pos_lc=None;
|
||||
} in
|
||||
err {e with pos}))
|
||||
~err
|
||||
}
|
||||
let split_list ~on_char : _ t =
|
||||
split_list_at_most ~on_char max_int
|
||||
|
||||
let split_2 ~on_char : _ t =
|
||||
split_list_at_most ~on_char 2 >>= function
|
||||
| [a; b] -> return (a,b)
|
||||
| _ -> fail "split_2: expected 2 fields exactly"
|
||||
|
||||
let split_3 ~on_char : _ t =
|
||||
split_list_at_most ~on_char 3 >>= function
|
||||
| [a; b; c] -> return (a,b,c)
|
||||
| _ -> fail "split_3: expected 3 fields exactly"
|
||||
|
||||
let split_4 ~on_char : _ t =
|
||||
split_list_at_most ~on_char 4 >>= function
|
||||
| [a; b; c; d] -> return (a,b,c,d)
|
||||
| _ -> fail "split_4: expected 4 fields exactly"
|
||||
|
||||
let split_list ~on_char : slice list t =
|
||||
let rec loop acc =
|
||||
try_or
|
||||
eoi
|
||||
~f:(fun _ -> return (List.rev acc))
|
||||
~else_:(parse_1 acc)
|
||||
and parse_1 acc =
|
||||
split_1 ~on_char >>= fun (sl1, rest) ->
|
||||
let acc = sl1 :: acc in
|
||||
match rest with
|
||||
| None -> return (List.rev acc)
|
||||
| Some rest -> recurse rest (loop acc)
|
||||
in
|
||||
loop []
|
||||
|
||||
let each_split ~on_char p : 'a list t =
|
||||
let rec loop acc =
|
||||
split_1 ~on_char >>= fun (sl1, rest) ->
|
||||
(* parse [sl1] with [p] *)
|
||||
recurse sl1 p >>= fun x ->
|
||||
let acc = x :: acc in
|
||||
match rest with
|
||||
| None -> return (List.rev acc)
|
||||
| Some rest -> recurse rest (loop acc)
|
||||
in
|
||||
loop []
|
||||
|
||||
let line : slice t =
|
||||
split_1 ~on_char:'\n' >|= fst
|
||||
|
||||
let line_str = line >|= Slice.to_string
|
||||
|
||||
let each_line p : _ t =
|
||||
fix
|
||||
(fun self ->
|
||||
try_or eoi
|
||||
~f:(fun _ -> pure [])
|
||||
(parse_sub_ line p >>= fun x ->
|
||||
self >|= fun tl -> x :: tl))
|
||||
each_split ~on_char:'\n' p
|
||||
|
||||
(*$= & ~printer:(errpp Q.Print.(list @@ list int))
|
||||
(Ok ([[1;1];[2;2];[3;3]])) \
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
{2 A few examples}
|
||||
|
||||
Some more advanced example(s) can be found in the [/examples] directory.
|
||||
|
||||
{4 Parse a tree}
|
||||
|
||||
{[
|
||||
|
|
@ -104,7 +106,7 @@ module Error : sig
|
|||
(** Pretty prints the error *)
|
||||
end
|
||||
|
||||
type 'a or_error = ('a, Error.t) result
|
||||
type +'a or_error = ('a, Error.t) result
|
||||
(** ['a or_error] is either [Ok x] for some result [x : 'a],
|
||||
or an error {!Error.t}.
|
||||
|
||||
|
|
@ -146,21 +148,45 @@ val ap : ('a -> 'b) t -> 'a t -> 'b t
|
|||
(** Applicative.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val eoi : unit t
|
||||
(** Expect the end of input, fails otherwise. *)
|
||||
|
||||
val nop : unit t
|
||||
(** Succeed with [()]. *)
|
||||
|
||||
val empty : unit t
|
||||
(** Succeed with [()], same as {!nop}.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val fail : string -> 'a t
|
||||
(** [fail msg] fails with the given message. It can trigger a backtrack. *)
|
||||
|
||||
val failf: ('a, unit, string, 'b t) format4 -> 'a
|
||||
(** [Format.sprintf] version of {!fail}. *)
|
||||
|
||||
val fail_lazy : (unit -> string) -> 'a t
|
||||
(** Like {!fail}, but only produce an error message on demand.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val parsing : string -> 'a t -> 'a t
|
||||
(** [parsing s p] behaves the same as [p], with the information that
|
||||
we are parsing [s], if [p] fails. *)
|
||||
we are parsing [s], if [p] fails.
|
||||
The message [s] is added to the error, it does not replace it,
|
||||
not does the location change (the error still points to
|
||||
the same location as in [p]). *)
|
||||
|
||||
val eoi : unit t
|
||||
(** Expect the end of input, fails otherwise. *)
|
||||
val set_error_message : string -> 'a t -> 'a t
|
||||
(** [set_error_message msg p] behaves like [p], but if [p] fails,
|
||||
[set_error_message msg p] fails with [msg] instead and at the current
|
||||
position. The internal error message of [p] is just discarded.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val nop : unit t
|
||||
(** Succeed with [()]. *)
|
||||
val with_pos : 'a t -> ('a * position) t
|
||||
(** [with_pos p] behaves like [p], but returns the (starting) position
|
||||
along with [p]'s result.
|
||||
|
||||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val any_char : char t
|
||||
(** [any_char] parses any character.
|
||||
|
|
@ -169,27 +195,52 @@ val any_char : char t
|
|||
|
||||
val any_chars : int -> string t
|
||||
(** [any_chars len] parses exactly [len] characters from the input.
|
||||
Fails if the input doesn't contain at least [len] chars.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val char : char -> char t
|
||||
(** [char c] parses the character [c] and nothing else. *)
|
||||
|
||||
val char_if : ?descr:string -> (char -> bool) -> char t
|
||||
(** [char_if f] parses a character [c] if [f c = true].
|
||||
@param descr describes what kind of character was expected *)
|
||||
type slice
|
||||
(** A slice of the input, as returned by some combinators such
|
||||
as {!split_1} or {split_n}.
|
||||
|
||||
val chars_if : (char -> bool) -> string t
|
||||
(** [chars_if f] parses a string of chars that satisfy [f]. *)
|
||||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val chars1_if : ?descr:string -> (char -> bool) -> string t
|
||||
(** Like {!chars_if}, but only non-empty strings.
|
||||
@param descr describes what kind of character was expected *)
|
||||
(** Functions on slices.
|
||||
@since NEXT_RELEASE *)
|
||||
module Slice : sig
|
||||
type t = slice
|
||||
|
||||
val is_empty : t -> bool
|
||||
(** Is the slice empty? *)
|
||||
|
||||
val length : t -> int
|
||||
(** Length of the slice *)
|
||||
|
||||
val to_string : t -> string
|
||||
(** Convert the slice into a string.
|
||||
Linear time and memory in [length slice] *)
|
||||
end
|
||||
|
||||
val recurse : slice -> 'a t -> 'a t
|
||||
(** [recurse slice p] parses the [slice]
|
||||
(most likely obtained via another combinator, such as {!split_1}
|
||||
or {!split_n}), using [p].
|
||||
|
||||
The slice contains a position which is used to relocate error
|
||||
messages to their position in the whole input, not just relative to
|
||||
the slice.
|
||||
|
||||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val chars_fold :
|
||||
f:('acc -> char ->
|
||||
[`Continue of 'acc | `Consume_and_stop | `Stop | `Fail of string]) ->
|
||||
'acc ->
|
||||
'acc t
|
||||
('acc * slice) t
|
||||
(** [chars_fold f acc0] folds over characters of the input.
|
||||
Each char [c] is passed, along with the current accumulator, to [f];
|
||||
[f] can either:
|
||||
|
|
@ -204,24 +255,54 @@ val chars_fold :
|
|||
|
||||
This is a generalization of of {!chars_if} that allows one to transform
|
||||
characters on the fly, skip some, handle escape sequences, etc.
|
||||
It can also be useful as a base component for a lexer.
|
||||
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val chars_fold_map :
|
||||
val chars_fold_transduce :
|
||||
f:('acc -> char ->
|
||||
[`Continue of 'acc | `Yield of 'acc * char
|
||||
[ `Continue of 'acc | `Yield of 'acc * char
|
||||
| `Consume_and_stop | `Stop | `Fail of string]) ->
|
||||
'acc ->
|
||||
('acc * string) t
|
||||
(** Same as {!char_fold} but with the following differences:
|
||||
|
||||
- returns a string along with the accumulator. The string is built from
|
||||
characters returned by [`Yield].
|
||||
- returns a string along with the accumulator, rather than the slice
|
||||
of all the characters accepted by [`Continue _].
|
||||
The string is built from characters returned by [`Yield].
|
||||
- new case [`Yield (acc, c)] adds [c] to the returned string
|
||||
and continues parsing with [acc].
|
||||
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val take : int -> slice t
|
||||
(** [slice_of_len len] parses exactly [len] characters from the input.
|
||||
Fails if the input doesn't contain at least [len] chars.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val take_if : (char -> bool) -> slice t
|
||||
(** [take_if f] takes characters as long as they satisfy the predicate [f].
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val take1_if : ?descr:string -> (char -> bool) -> slice t
|
||||
(** [take1_if f] takes characters as long as they satisfy the predicate [f].
|
||||
Fails if no character satisfies [f].
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val char_if : ?descr:string -> (char -> bool) -> char t
|
||||
(** [char_if f] parses a character [c] if [f c = true].
|
||||
Fails if the next char does not satisfy [f].
|
||||
@param descr describes what kind of character was expected *)
|
||||
|
||||
val chars_if : (char -> bool) -> string t
|
||||
(** [chars_if f] parses a string of chars that satisfy [f].
|
||||
Cannot fail. *)
|
||||
|
||||
val chars1_if : ?descr:string -> (char -> bool) -> string t
|
||||
(** Like {!chars_if}, but only non-empty strings.
|
||||
Fails if the string is empty.
|
||||
@param descr describes what kind of character was expected *)
|
||||
|
||||
val endline : char t
|
||||
(** Parse '\n'. *)
|
||||
|
||||
|
|
@ -272,7 +353,9 @@ val many : 'a t -> 'a list t
|
|||
|
||||
val optional : _ t -> unit t
|
||||
(** [optional p] tries to parse [p], and return [()] whether it
|
||||
succeeded or failed. Cannot fail.
|
||||
succeeded or failed. Cannot fail itself.
|
||||
It consumes input if [p] succeeded (as much as [p] consumed), but
|
||||
consumes not input if [p] failed.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val try_ : 'a t -> 'a t
|
||||
|
|
@ -307,6 +390,27 @@ val try_or : 'a t -> f:('a -> 'b t) -> else_:'b t -> 'b t
|
|||
@since NEXT_RELEASE
|
||||
*)
|
||||
|
||||
val try_or_l :
|
||||
?msg:string ->
|
||||
?else_:'a t ->
|
||||
(unit t * 'a t) list ->
|
||||
'a t
|
||||
(** [try_or_l ?else_ l] tries each pair [(test, p)] in order.
|
||||
If the n-th [test] succeeds, then [try_or_l l] behaves like n-th [p],
|
||||
whether [p] fails or not.
|
||||
If they all fail, and [else_] is defined, then it behaves like [else_].
|
||||
If all fail, and [else_] is [None], then it fails as well.
|
||||
|
||||
This is a performance optimization compared to {!(<|>)}. We commit to a
|
||||
branch if the test succeeds, without backtracking at all.
|
||||
|
||||
See {!lookahead_ignore} for a convenient way of writing the test conditions.
|
||||
|
||||
@param msg error message if all options fail
|
||||
|
||||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val or_ : 'a t -> 'a t -> 'a t
|
||||
(** [or_ p1 p2] tries to parse [p1], and if it fails, tries [p2]
|
||||
from the same position.
|
||||
|
|
@ -316,12 +420,6 @@ val both : 'a t -> 'b t -> ('a * 'b) t
|
|||
(** [both a b] parses [a], then [b], then returns the pair of their results.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val set_error_message : string -> 'a t -> 'a t
|
||||
(** [set_error_message msg p] behaves like [p], but if [p] fails,
|
||||
[set_error_message msg p] fails with [msg] instead.
|
||||
@since NEXT_RELEASE
|
||||
*)
|
||||
|
||||
val many1 : 'a t -> 'a list t
|
||||
(** [many1 p] is like [many p] excepts it fails if the
|
||||
list is empty (i.e. it needs [p] to succeed at least once). *)
|
||||
|
|
@ -345,8 +443,26 @@ val lookahead : 'a t -> 'a t
|
|||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val line : string t
|
||||
(** Parse a line, '\n' excluded.
|
||||
val lookahead_ignore : 'a t -> unit t
|
||||
(** [lookahead_ignore p] tries to parse input with [p],
|
||||
and succeeds if [p] succeeds. However it doesn't consume any input
|
||||
and returns [()], so in effect its only use-case is to detect
|
||||
whether [p] succeeds, e.g. in {!cond}.
|
||||
|
||||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val fix : ('a t -> 'a t) -> 'a t
|
||||
(** Fixpoint combinator. *)
|
||||
|
||||
val line : slice t
|
||||
(** Parse a line, ['\n'] excluded, and position the cursor after the ['\n'].
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val line_str : string t
|
||||
(** [line_str] is [line >|= Slice.to_string].
|
||||
It parses the next line and turns the slice into a string.
|
||||
The state points to after the ['\n'] character.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val each_line : 'a t -> 'a list t
|
||||
|
|
@ -354,8 +470,90 @@ val each_line : 'a t -> 'a list t
|
|||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val fix : ('a t -> 'a t) -> 'a t
|
||||
(** Fixpoint combinator. *)
|
||||
val split_1 : on_char:char -> (slice * slice option) t
|
||||
(** [split_1 ~on_char] looks for [on_char] in the input, and returns a
|
||||
pair [sl1, sl2], where:
|
||||
|
||||
- [sl1] is the slice of the input the precedes the first occurrence
|
||||
of [on_char], or the whole input if [on_char] cannot be found.
|
||||
- [sl2] is the slice that comes after [on_char],
|
||||
or [None] if [on_char] couldn't be found.
|
||||
|
||||
The parser is now positioned at the end of the input.
|
||||
|
||||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val split_list : on_char:char -> slice list t
|
||||
(** [split_n ~on_char] splits the input on all occurrences of [on_char],
|
||||
returning a list of slices.
|
||||
|
||||
A useful specialization of this is {!each_line}, which is
|
||||
basically [split_n ~on_char:'\n' p].
|
||||
|
||||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val split_list_at_most : on_char:char -> int -> slice list t
|
||||
(** [split_list_at_most ~on_char n] applies [split_1 ~on_char] at most
|
||||
[n] times, to get a list of [n+1] elements.
|
||||
The last element might contain [on_char]. This is useful to limit the
|
||||
amount of work done by {!split_list}.
|
||||
|
||||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
|
||||
val split_2 : on_char:char -> (slice * slice) t
|
||||
(** [split_2 ~on_char] splits the input into exactly 2 fields,
|
||||
and fails if the split yields less or more than 2 items.
|
||||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val split_3 : on_char:char -> (slice * slice * slice) t
|
||||
(** See {!split_2}
|
||||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val split_4 : on_char:char -> (slice * slice * slice * slice) t
|
||||
(** See {!split_2}
|
||||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val each_split : on_char:char -> 'a t -> 'a list t
|
||||
(** [split_list_map ~on_char p] uses [split_list ~on_char] to split
|
||||
the input, then parses each chunk of the input thus obtained using [p].
|
||||
|
||||
The difference with [sep ~by:(char on_char) p] is that
|
||||
[sep] calls [p] first, and only tries to find [on_char] after [p] returns.
|
||||
While it is more flexible, this technique also means [p] has to be careful
|
||||
not to consume [on_char] by error.
|
||||
|
||||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val all : slice t
|
||||
(** [all] returns all the unconsumed input as a slice, and consumes it.
|
||||
Use {!Slice.to_string} to turn it into a string.
|
||||
|
||||
Note that [lookahead all] can be used to {i peek} at the rest of the input
|
||||
without consuming anything.
|
||||
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val all_str : string t
|
||||
(** [all_str] accepts all the remaining chars and extracts them into a
|
||||
string. Similar to {!rest_of_input} but with a string.
|
||||
|
||||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
(* TODO
|
||||
val trim : slice t
|
||||
(** [trim] is like {!all}, but removes whitespace on the left and right.
|
||||
{b EXPERIMENTAL}
|
||||
@since NEXT_RELEASE *)
|
||||
*)
|
||||
|
||||
val memo : 'a t -> 'a t
|
||||
(** Memoize the parser. [memo p] will behave like [p], but when called
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue