CCParse: add slice and the ability to recurse on them

the idea is that it's often convenient to split the input into smaller
part (e.g. lines), or do a first pass of parsing that just returns a
slice of the input; and then later to use another parser on that slice
to extract the actual data. The new notion of `slice` allows that,
while preserving locations wrt the original input.
This commit is contained in:
Simon Cruanes 2021-06-06 22:42:20 -04:00
parent 7bdc3cff24
commit 1517f64f55
2 changed files with 427 additions and 84 deletions

View file

@ -167,7 +167,7 @@ module Error = struct
Format.fprintf out "@[<hv>at line %d, char %d:@ %s@]" line col (self.msg()) Format.fprintf out "@[<hv>at line %d, char %d:@ %s@]" line col (self.msg())
end end
type 'a or_error = ('a, Error.t) result type +'a or_error = ('a, Error.t) result
module Memo_tbl = Hashtbl.Make(struct module Memo_tbl = Hashtbl.Make(struct
type t = int * int (* id of parser, position *) type t = int * int (* id of parser, position *)
@ -186,7 +186,8 @@ end
(** Purely functional state passed around *) (** Purely functional state passed around *)
type state = { type state = {
str: string; (* the input *) str: string; (* the input *)
i: int; (* offset in [input.str] *) i: int; (* offset in [str] *)
j: int; (* end pointer in [str], excluded. [len = j-i] *)
memo : Memo_state.t option ref; (* Memoization table, if any *) memo : Memo_state.t option ref; (* Memoization table, if any *)
} }
@ -207,11 +208,12 @@ let state_of_string str =
let s = { let s = {
str; str;
i=0; i=0;
j=String.length str;
memo=ref None; memo=ref None;
} in } in
s s
let[@inline] is_done st = st.i >= String.length st.str let[@inline] is_done st = st.i >= st.j
let[@inline] cur st = st.str.[st.i] let[@inline] cur st = st.str.[st.i]
let pos_of_st_ st : position = {pos_buffer=st.str; pos_offset=st.i; pos_lc=None} let pos_of_st_ st : position = {pos_buffer=st.str; pos_offset=st.i; pos_lc=None}
@ -328,7 +330,7 @@ let map2 f x y = pure f <*> x <*> y
let map3 f x y z = pure f <*> x <*> y <*> z let map3 f x y z = pure f <*> x <*> y <*> z
let junk_ (st:state) : state = let junk_ (st:state) : state =
assert (st.i < String.length st.str); assert (st.i < st.j);
{st with i=st.i + 1} {st with i=st.i + 1}
let eoi = { let eoi = {
@ -344,11 +346,61 @@ let eoi = {
(Ok true) (parse_string (U.bool <* skip_white <* eoi) "true") (Ok true) (parse_string (U.bool <* skip_white <* eoi) "true")
*) *)
let with_pos p : _ t = {
run=fun st ~ok ~err ->
p.run st
~ok:(fun st' x ->
ok st' (x, pos_of_st_ st))
~err
}
(* a slice is just a state, which makes {!recurse} quite easy. *)
type slice = state
module Slice = struct
type t = slice
let length sl = sl.j - sl.i
let is_empty sl = sl.i = sl.j
let to_string sl = String.sub sl.str sl.i (length sl)
end
let recurse slice p : _ t = {
run=fun _st ~ok ~err ->
(* make sure these states are related. all slices share the
same reference as the initial state they derive from. *)
assert CCShims_.Stdlib.(_st.memo == slice.memo);
p.run slice ~ok ~err
}
let all = {
run=fun st ~ok ~err:_ ->
if is_done st then ok st st
else (
let st_done = {st with i=st.j} in
ok st_done st
)
}
let all_str = all >|= Slice.to_string
(*$= & ~printer:(errpp Q.Print.string) ~cmp:(erreq (=))
(Ok "abcd") (parse_string all_str "abcd")
(Ok "cd") (parse_string (string "ab" *> all_str) "ab")
*)
(*$= & ~printer:(errpp Q.Print.(pair string string)) ~cmp:(erreq (=))
(Ok ("foobar", "")) (parse_string (both all_str all_str) "foobar")
*)
let fail msg : _ t = { let fail msg : _ t = {
run=fun st ~ok:_ ~err -> run=fun st ~ok:_ ~err ->
err (mk_error_ st (const_str_ msg)) err (mk_error_ st (const_str_ msg))
} }
let failf msg = Printf.ksprintf fail msg let failf msg = Printf.ksprintf fail msg
let fail_lazy msg = {
run=fun st ~ok:_ ~err ->
err (mk_error_ st msg)
}
let parsing what p = { let parsing what p = {
run=fun st ~ok ~err -> run=fun st ~ok ~err ->
@ -360,9 +412,10 @@ let parsing what p = {
err {e with Error.msg}) err {e with Error.msg})
} }
let nop = { let empty = {
run=fun st ~ok ~err:_ -> ok st (); run=fun st ~ok ~err:_ -> ok st ();
} }
let nop = empty
let any_char = { let any_char = {
run=fun st ~ok ~err -> consume_ st ~ok ~err run=fun st ~ok ~err -> consume_ st ~ok ~err
@ -398,19 +451,35 @@ let char_if ?descr p = {
~err ~err
} }
let chars_if p = { let take_if p : slice t = {
run=fun st ~ok ~err:_ -> run=fun st ~ok ~err:_ ->
let i0 = st.i in let i = ref st.i in
let i = ref i0 in
while while
let st = {st with i = !i} in let st = {st with i = !i} in
not (is_done st) && p (cur st) not (is_done st) && p (cur st)
do do
incr i; incr i;
done; done;
ok {st with i = !i} (String.sub st.str i0 (!i - i0)) ok {st with i = !i} {st with j= !i}
} }
let take1_if ?descr p =
take_if p >>= fun sl ->
if Slice.is_empty sl then (
let msg() =
let what = match descr with
| None -> ""
| Some d -> Printf.sprintf " for %s" d
in
Printf.sprintf "expected non-empty sequence of chars%s" what
in
fail_lazy msg
) else (
return sl
)
let chars_if p = take_if p >|= Slice.to_string
let chars1_if ?descr p = { let chars1_if ?descr p = {
run=fun st ~ok ~err -> run=fun st ~ok ~err ->
(chars_if p).run st (chars_if p).run st
@ -453,11 +522,11 @@ let chars_fold ~f acc0 = {
| `Fail msg -> raise (Fold_fail (st,msg)) | `Fail msg -> raise (Fold_fail (st,msg))
) )
done; done;
ok {st with i= !i} !acc ok {st with i= !i} (!acc, {st with j= !i})
with Fold_fail (st,msg) -> err (mk_error_ st (const_str_ msg)) with Fold_fail (st,msg) -> err (mk_error_ st (const_str_ msg))
} }
let chars_fold_map ~f acc0 = { let chars_fold_transduce ~f acc0 = {
run=fun st ~ok ~err -> run=fun st ~ok ~err ->
let i0 = st.i in let i0 = st.i in
let i = ref i0 in let i = ref i0 in
@ -524,6 +593,22 @@ let try_or p1 ~f ~else_:p2 = {
~err:(fun _ -> p2.run st ~ok ~err) ~err:(fun _ -> p2.run st ~ok ~err)
} }
let try_or_l ?(msg="try_or_l ran out of options") ?else_ l : _ t = {
run=fun st ~ok ~err ->
let rec loop = function
| (test, p) :: tl ->
test.run st
~ok:(fun _ _ -> p.run st ~ok ~err) (* commit *)
~err:(fun _ -> loop tl)
| [] ->
begin match else_ with
| None -> err (mk_error_ st (const_str_ msg))
| Some p -> p.run st ~ok ~err
end
in
loop l
}
let suspend f = { let suspend f = {
run=fun st ~ok ~err -> run=fun st ~ok ~err ->
let p = f () in let p = f () in
@ -531,12 +616,12 @@ let suspend f = {
} }
(* read [len] chars at once *) (* read [len] chars at once *)
let any_chars len : _ t = { let take len : slice t = {
run=fun st ~ok ~err -> run=fun st ~ok ~err ->
if st.i + len <= String.length st.str then ( if st.i + len <= st.j then (
let s = String.sub st.str st.i len in let slice = {st with j = st.i + len} in
let st = {st with i = st.i + len} in let st = {st with i = st.i + len} in
ok st s ok st slice
) else ( ) else (
let msg() = let msg() =
Printf.sprintf "expected to be able to consume %d chars" len Printf.sprintf "expected to be able to consume %d chars" len
@ -545,6 +630,8 @@ let any_chars len : _ t = {
) )
} }
let any_chars len : _ t = take len >|= Slice.to_string
let exact s = { let exact s = {
run=fun st ~ok ~err -> run=fun st ~ok ~err ->
(* parse a string of length [String.length s] and compare with [s] *) (* parse a string of length [String.length s] and compare with [s] *)
@ -697,58 +784,116 @@ let lookahead p : _ t = {
~err ~err
} }
let lookahead_ignore p : _ t = {
run=fun st ~ok ~err ->
p.run st
~ok:(fun _st _x -> ok st ())
~err
}
(*$= & ~printer:(errpp Q.Print.(string)) (*$= & ~printer:(errpp Q.Print.(string))
(Ok "abc") (parse_string (lookahead (string "ab") *> (string "abc")) "abcd") (Ok "abc") (parse_string (lookahead (string "ab") *> (string "abc")) "abcd")
*) *)
let line : _ t = { (*$=
run=fun st ~ok ~err -> (Ok "1234") (parse_string line_str "1234\nyolo")
if is_done st then err (mk_error_ st (const_str_ "expected a line, not EOI")) (Ok ("1234", "yolo")) (parse_string (line_str ||| line_str) "1234\nyolo\nswag")
else ( *)
match String.index_from st.str st.i '\n' with
let split_1 ~on_char : _ t = {
run=fun st ~ok ~err:_ ->
if st.i >= st.j then (
ok st (st, None)
) else (
match String.index_from st.str st.i on_char with
| j -> | j ->
let s = String.sub st.str st.i (j - st.i) in let x = {st with j} in
ok {st with i=j+1} s let y = {st with i=min st.j (j+1)} in
let st_done = {st with i=st.j} in (* empty *)
ok st_done (x, Some y)
| exception Not_found -> | exception Not_found ->
err (mk_error_ st (const_str_ "unterminated line")) let st_done = {st with i=st.j} in (* empty *)
ok st_done (st, None)
) )
} }
(*$= let split_list_at_most ~on_char n : slice list t =
(Ok "1234") (parse_string line "1234\nyolo") let rec loop acc n =
(Ok ("1234", "yolo")) (parse_string (line ||| line) "1234\nyolo\nswag") if n <= 0 then return (List.rev acc)
else (
try_or
eoi
~f:(fun _ -> return (List.rev acc))
~else_:(parse_1 acc n)
)
and parse_1 acc n =
split_1 ~on_char >>= fun (sl1, rest) ->
let acc = sl1 :: acc in
match rest with
| None -> return (List.rev acc)
| Some rest -> recurse rest (loop acc (n-1))
in
loop [] n
(*$= & ~printer:(errpp Q.Print.(list string)) ~cmp:(erreq (=))
(Ok ["a";"b";"c";"d,e,f"]) \
(parse_string (split_list_at_most ~on_char:',' 3 >|= List.map Slice.to_string) "a,b,c,d,e,f")
(Ok ["a";"bc"]) \
(parse_string (split_list_at_most ~on_char:',' 3 >|= List.map Slice.to_string) "a,bc")
*) *)
(* parse a string [s] using [p_sub], then parse [s] using [p]. let split_list ~on_char : _ t =
The result is that of parsing [s] using [p], but the state is split_list_at_most ~on_char max_int
the one after using [p_sub], and errors are translated back into the context
of [p_sub]. let split_2 ~on_char : _ t =
This can be useful for example in [p_sub line some_line_parser]. *) split_list_at_most ~on_char 2 >>= function
let parse_sub_ p_sub p : _ t = { | [a; b] -> return (a,b)
run=fun st0 ~ok ~err -> | _ -> fail "split_2: expected 2 fields exactly"
let p = p <* eoi in (* make sure [p] reads all *)
p_sub.run st0 let split_3 ~on_char : _ t =
~ok:(fun st1 s -> split_list_at_most ~on_char 3 >>= function
p.run (state_of_string s) | [a; b; c] -> return (a,b,c)
~ok:(fun _ r -> ok st1 r) | _ -> fail "split_3: expected 3 fields exactly"
~err:(fun e ->
let pos = e.pos in let split_4 ~on_char : _ t =
let pos = { split_list_at_most ~on_char 4 >>= function
pos_buffer=pos.pos_buffer; | [a; b; c; d] -> return (a,b,c,d)
pos_offset=pos.pos_offset + st0.i; | _ -> fail "split_4: expected 4 fields exactly"
pos_lc=None;
} in let split_list ~on_char : slice list t =
err {e with pos})) let rec loop acc =
~err try_or
} eoi
~f:(fun _ -> return (List.rev acc))
~else_:(parse_1 acc)
and parse_1 acc =
split_1 ~on_char >>= fun (sl1, rest) ->
let acc = sl1 :: acc in
match rest with
| None -> return (List.rev acc)
| Some rest -> recurse rest (loop acc)
in
loop []
let each_split ~on_char p : 'a list t =
let rec loop acc =
split_1 ~on_char >>= fun (sl1, rest) ->
(* parse [sl1] with [p] *)
recurse sl1 p >>= fun x ->
let acc = x :: acc in
match rest with
| None -> return (List.rev acc)
| Some rest -> recurse rest (loop acc)
in
loop []
let line : slice t =
split_1 ~on_char:'\n' >|= fst
let line_str = line >|= Slice.to_string
let each_line p : _ t = let each_line p : _ t =
fix each_split ~on_char:'\n' p
(fun self ->
try_or eoi
~f:(fun _ -> pure [])
(parse_sub_ line p >>= fun x ->
self >|= fun tl -> x :: tl))
(*$= & ~printer:(errpp Q.Print.(list @@ list int)) (*$= & ~printer:(errpp Q.Print.(list @@ list int))
(Ok ([[1;1];[2;2];[3;3]])) \ (Ok ([[1;1];[2;2];[3;3]])) \

View file

@ -8,6 +8,8 @@
{2 A few examples} {2 A few examples}
Some more advanced example(s) can be found in the [/examples] directory.
{4 Parse a tree} {4 Parse a tree}
{[ {[
@ -104,7 +106,7 @@ module Error : sig
(** Pretty prints the error *) (** Pretty prints the error *)
end end
type 'a or_error = ('a, Error.t) result type +'a or_error = ('a, Error.t) result
(** ['a or_error] is either [Ok x] for some result [x : 'a], (** ['a or_error] is either [Ok x] for some result [x : 'a],
or an error {!Error.t}. or an error {!Error.t}.
@ -146,21 +148,45 @@ val ap : ('a -> 'b) t -> 'a t -> 'b t
(** Applicative. (** Applicative.
@since NEXT_RELEASE *) @since NEXT_RELEASE *)
val eoi : unit t
(** Expect the end of input, fails otherwise. *)
val nop : unit t
(** Succeed with [()]. *)
val empty : unit t
(** Succeed with [()], same as {!nop}.
@since NEXT_RELEASE *)
val fail : string -> 'a t val fail : string -> 'a t
(** [fail msg] fails with the given message. It can trigger a backtrack. *) (** [fail msg] fails with the given message. It can trigger a backtrack. *)
val failf: ('a, unit, string, 'b t) format4 -> 'a val failf: ('a, unit, string, 'b t) format4 -> 'a
(** [Format.sprintf] version of {!fail}. *) (** [Format.sprintf] version of {!fail}. *)
val fail_lazy : (unit -> string) -> 'a t
(** Like {!fail}, but only produce an error message on demand.
@since NEXT_RELEASE *)
val parsing : string -> 'a t -> 'a t val parsing : string -> 'a t -> 'a t
(** [parsing s p] behaves the same as [p], with the information that (** [parsing s p] behaves the same as [p], with the information that
we are parsing [s], if [p] fails. *) we are parsing [s], if [p] fails.
The message [s] is added to the error, it does not replace it,
not does the location change (the error still points to
the same location as in [p]). *)
val eoi : unit t val set_error_message : string -> 'a t -> 'a t
(** Expect the end of input, fails otherwise. *) (** [set_error_message msg p] behaves like [p], but if [p] fails,
[set_error_message msg p] fails with [msg] instead and at the current
position. The internal error message of [p] is just discarded.
@since NEXT_RELEASE *)
val nop : unit t val with_pos : 'a t -> ('a * position) t
(** Succeed with [()]. *) (** [with_pos p] behaves like [p], but returns the (starting) position
along with [p]'s result.
{b EXPERIMENTAL}
@since NEXT_RELEASE *)
val any_char : char t val any_char : char t
(** [any_char] parses any character. (** [any_char] parses any character.
@ -169,27 +195,52 @@ val any_char : char t
val any_chars : int -> string t val any_chars : int -> string t
(** [any_chars len] parses exactly [len] characters from the input. (** [any_chars len] parses exactly [len] characters from the input.
Fails if the input doesn't contain at least [len] chars.
@since NEXT_RELEASE *) @since NEXT_RELEASE *)
val char : char -> char t val char : char -> char t
(** [char c] parses the character [c] and nothing else. *) (** [char c] parses the character [c] and nothing else. *)
val char_if : ?descr:string -> (char -> bool) -> char t type slice
(** [char_if f] parses a character [c] if [f c = true]. (** A slice of the input, as returned by some combinators such
@param descr describes what kind of character was expected *) as {!split_1} or {split_n}.
val chars_if : (char -> bool) -> string t {b EXPERIMENTAL}
(** [chars_if f] parses a string of chars that satisfy [f]. *) @since NEXT_RELEASE *)
val chars1_if : ?descr:string -> (char -> bool) -> string t (** Functions on slices.
(** Like {!chars_if}, but only non-empty strings. @since NEXT_RELEASE *)
@param descr describes what kind of character was expected *) module Slice : sig
type t = slice
val is_empty : t -> bool
(** Is the slice empty? *)
val length : t -> int
(** Length of the slice *)
val to_string : t -> string
(** Convert the slice into a string.
Linear time and memory in [length slice] *)
end
val recurse : slice -> 'a t -> 'a t
(** [recurse slice p] parses the [slice]
(most likely obtained via another combinator, such as {!split_1}
or {!split_n}), using [p].
The slice contains a position which is used to relocate error
messages to their position in the whole input, not just relative to
the slice.
{b EXPERIMENTAL}
@since NEXT_RELEASE *)
val chars_fold : val chars_fold :
f:('acc -> char -> f:('acc -> char ->
[`Continue of 'acc | `Consume_and_stop | `Stop | `Fail of string]) -> [`Continue of 'acc | `Consume_and_stop | `Stop | `Fail of string]) ->
'acc -> 'acc ->
'acc t ('acc * slice) t
(** [chars_fold f acc0] folds over characters of the input. (** [chars_fold f acc0] folds over characters of the input.
Each char [c] is passed, along with the current accumulator, to [f]; Each char [c] is passed, along with the current accumulator, to [f];
[f] can either: [f] can either:
@ -204,24 +255,54 @@ val chars_fold :
This is a generalization of of {!chars_if} that allows one to transform This is a generalization of of {!chars_if} that allows one to transform
characters on the fly, skip some, handle escape sequences, etc. characters on the fly, skip some, handle escape sequences, etc.
It can also be useful as a base component for a lexer.
@since NEXT_RELEASE *) @since NEXT_RELEASE *)
val chars_fold_map : val chars_fold_transduce :
f:('acc -> char -> f:('acc -> char ->
[`Continue of 'acc | `Yield of 'acc * char [ `Continue of 'acc | `Yield of 'acc * char
| `Consume_and_stop | `Stop | `Fail of string]) -> | `Consume_and_stop | `Stop | `Fail of string]) ->
'acc -> 'acc ->
('acc * string) t ('acc * string) t
(** Same as {!char_fold} but with the following differences: (** Same as {!char_fold} but with the following differences:
- returns a string along with the accumulator. The string is built from - returns a string along with the accumulator, rather than the slice
characters returned by [`Yield]. of all the characters accepted by [`Continue _].
The string is built from characters returned by [`Yield].
- new case [`Yield (acc, c)] adds [c] to the returned string - new case [`Yield (acc, c)] adds [c] to the returned string
and continues parsing with [acc]. and continues parsing with [acc].
@since NEXT_RELEASE *) @since NEXT_RELEASE *)
val take : int -> slice t
(** [slice_of_len len] parses exactly [len] characters from the input.
Fails if the input doesn't contain at least [len] chars.
@since NEXT_RELEASE *)
val take_if : (char -> bool) -> slice t
(** [take_if f] takes characters as long as they satisfy the predicate [f].
@since NEXT_RELEASE *)
val take1_if : ?descr:string -> (char -> bool) -> slice t
(** [take1_if f] takes characters as long as they satisfy the predicate [f].
Fails if no character satisfies [f].
@since NEXT_RELEASE *)
val char_if : ?descr:string -> (char -> bool) -> char t
(** [char_if f] parses a character [c] if [f c = true].
Fails if the next char does not satisfy [f].
@param descr describes what kind of character was expected *)
val chars_if : (char -> bool) -> string t
(** [chars_if f] parses a string of chars that satisfy [f].
Cannot fail. *)
val chars1_if : ?descr:string -> (char -> bool) -> string t
(** Like {!chars_if}, but only non-empty strings.
Fails if the string is empty.
@param descr describes what kind of character was expected *)
val endline : char t val endline : char t
(** Parse '\n'. *) (** Parse '\n'. *)
@ -272,7 +353,9 @@ val many : 'a t -> 'a list t
val optional : _ t -> unit t val optional : _ t -> unit t
(** [optional p] tries to parse [p], and return [()] whether it (** [optional p] tries to parse [p], and return [()] whether it
succeeded or failed. Cannot fail. succeeded or failed. Cannot fail itself.
It consumes input if [p] succeeded (as much as [p] consumed), but
consumes not input if [p] failed.
@since NEXT_RELEASE *) @since NEXT_RELEASE *)
val try_ : 'a t -> 'a t val try_ : 'a t -> 'a t
@ -307,6 +390,27 @@ val try_or : 'a t -> f:('a -> 'b t) -> else_:'b t -> 'b t
@since NEXT_RELEASE @since NEXT_RELEASE
*) *)
val try_or_l :
?msg:string ->
?else_:'a t ->
(unit t * 'a t) list ->
'a t
(** [try_or_l ?else_ l] tries each pair [(test, p)] in order.
If the n-th [test] succeeds, then [try_or_l l] behaves like n-th [p],
whether [p] fails or not.
If they all fail, and [else_] is defined, then it behaves like [else_].
If all fail, and [else_] is [None], then it fails as well.
This is a performance optimization compared to {!(<|>)}. We commit to a
branch if the test succeeds, without backtracking at all.
See {!lookahead_ignore} for a convenient way of writing the test conditions.
@param msg error message if all options fail
{b EXPERIMENTAL}
@since NEXT_RELEASE *)
val or_ : 'a t -> 'a t -> 'a t val or_ : 'a t -> 'a t -> 'a t
(** [or_ p1 p2] tries to parse [p1], and if it fails, tries [p2] (** [or_ p1 p2] tries to parse [p1], and if it fails, tries [p2]
from the same position. from the same position.
@ -316,12 +420,6 @@ val both : 'a t -> 'b t -> ('a * 'b) t
(** [both a b] parses [a], then [b], then returns the pair of their results. (** [both a b] parses [a], then [b], then returns the pair of their results.
@since NEXT_RELEASE *) @since NEXT_RELEASE *)
val set_error_message : string -> 'a t -> 'a t
(** [set_error_message msg p] behaves like [p], but if [p] fails,
[set_error_message msg p] fails with [msg] instead.
@since NEXT_RELEASE
*)
val many1 : 'a t -> 'a list t val many1 : 'a t -> 'a list t
(** [many1 p] is like [many p] excepts it fails if the (** [many1 p] is like [many p] excepts it fails if the
list is empty (i.e. it needs [p] to succeed at least once). *) list is empty (i.e. it needs [p] to succeed at least once). *)
@ -345,8 +443,26 @@ val lookahead : 'a t -> 'a t
{b EXPERIMENTAL} {b EXPERIMENTAL}
@since NEXT_RELEASE *) @since NEXT_RELEASE *)
val line : string t val lookahead_ignore : 'a t -> unit t
(** Parse a line, '\n' excluded. (** [lookahead_ignore p] tries to parse input with [p],
and succeeds if [p] succeeds. However it doesn't consume any input
and returns [()], so in effect its only use-case is to detect
whether [p] succeeds, e.g. in {!cond}.
{b EXPERIMENTAL}
@since NEXT_RELEASE *)
val fix : ('a t -> 'a t) -> 'a t
(** Fixpoint combinator. *)
val line : slice t
(** Parse a line, ['\n'] excluded, and position the cursor after the ['\n'].
@since NEXT_RELEASE *)
val line_str : string t
(** [line_str] is [line >|= Slice.to_string].
It parses the next line and turns the slice into a string.
The state points to after the ['\n'] character.
@since NEXT_RELEASE *) @since NEXT_RELEASE *)
val each_line : 'a t -> 'a list t val each_line : 'a t -> 'a list t
@ -354,8 +470,90 @@ val each_line : 'a t -> 'a list t
{b EXPERIMENTAL} {b EXPERIMENTAL}
@since NEXT_RELEASE *) @since NEXT_RELEASE *)
val fix : ('a t -> 'a t) -> 'a t val split_1 : on_char:char -> (slice * slice option) t
(** Fixpoint combinator. *) (** [split_1 ~on_char] looks for [on_char] in the input, and returns a
pair [sl1, sl2], where:
- [sl1] is the slice of the input the precedes the first occurrence
of [on_char], or the whole input if [on_char] cannot be found.
- [sl2] is the slice that comes after [on_char],
or [None] if [on_char] couldn't be found.
The parser is now positioned at the end of the input.
{b EXPERIMENTAL}
@since NEXT_RELEASE *)
val split_list : on_char:char -> slice list t
(** [split_n ~on_char] splits the input on all occurrences of [on_char],
returning a list of slices.
A useful specialization of this is {!each_line}, which is
basically [split_n ~on_char:'\n' p].
{b EXPERIMENTAL}
@since NEXT_RELEASE *)
val split_list_at_most : on_char:char -> int -> slice list t
(** [split_list_at_most ~on_char n] applies [split_1 ~on_char] at most
[n] times, to get a list of [n+1] elements.
The last element might contain [on_char]. This is useful to limit the
amount of work done by {!split_list}.
{b EXPERIMENTAL}
@since NEXT_RELEASE *)
val split_2 : on_char:char -> (slice * slice) t
(** [split_2 ~on_char] splits the input into exactly 2 fields,
and fails if the split yields less or more than 2 items.
{b EXPERIMENTAL}
@since NEXT_RELEASE *)
val split_3 : on_char:char -> (slice * slice * slice) t
(** See {!split_2}
{b EXPERIMENTAL}
@since NEXT_RELEASE *)
val split_4 : on_char:char -> (slice * slice * slice * slice) t
(** See {!split_2}
{b EXPERIMENTAL}
@since NEXT_RELEASE *)
val each_split : on_char:char -> 'a t -> 'a list t
(** [split_list_map ~on_char p] uses [split_list ~on_char] to split
the input, then parses each chunk of the input thus obtained using [p].
The difference with [sep ~by:(char on_char) p] is that
[sep] calls [p] first, and only tries to find [on_char] after [p] returns.
While it is more flexible, this technique also means [p] has to be careful
not to consume [on_char] by error.
{b EXPERIMENTAL}
@since NEXT_RELEASE *)
val all : slice t
(** [all] returns all the unconsumed input as a slice, and consumes it.
Use {!Slice.to_string} to turn it into a string.
Note that [lookahead all] can be used to {i peek} at the rest of the input
without consuming anything.
@since NEXT_RELEASE *)
val all_str : string t
(** [all_str] accepts all the remaining chars and extracts them into a
string. Similar to {!rest_of_input} but with a string.
{b EXPERIMENTAL}
@since NEXT_RELEASE *)
(* TODO
val trim : slice t
(** [trim] is like {!all}, but removes whitespace on the left and right.
{b EXPERIMENTAL}
@since NEXT_RELEASE *)
*)
val memo : 'a t -> 'a t val memo : 'a t -> 'a t
(** Memoize the parser. [memo p] will behave like [p], but when called (** Memoize the parser. [memo p] will behave like [p], but when called