CCParse: add slice and the ability to recurse on them

the idea is that it's often convenient to split the input into smaller part (e.g. lines), or do a first pass of parsing that just returns a slice of the input; and then later to use another parser on that slice to extract the actual data. The new notion of `slice` allows that, while preserving locations wrt the original input.
2026-03-09 23:23:35 -04:00 · 2021-06-06 22:42:20 -04:00 · 2021-06-06 22:42:20 -04:00 · 1517f64f55
commit 1517f64f55
parent 7bdc3cff24
2 changed files with 427 additions and 84 deletions
--- a/src/core/CCParse.ml
+++ b/src/core/CCParse.ml
@ -167,7 +167,7 @@ module Error = struct
    Format.fprintf out "@[<hv>at line %d, char %d:@ %s@]" line col (self.msg())
 end

-type 'a or_error = ('a, Error.t) result
+type +'a or_error = ('a, Error.t) result

 module Memo_tbl = Hashtbl.Make(struct
    type t = int * int  (* id of parser, position *)
@ -186,7 +186,8 @@ end
 (** Purely functional state passed around *)
 type state = {
  str: string; (* the input *)
-  i: int; (* offset in [input.str] *)
+  i: int; (* offset in [str] *)
+  j: int; (* end pointer in [str], excluded. [len = j-i] *)
  memo : Memo_state.t option ref; (* Memoization table, if any *)
 }

@ -207,11 +208,12 @@ let state_of_string str =
  let s = {
    str;
    i=0;
+    j=String.length str;
    memo=ref None;
  } in
  s

-let[@inline] is_done st = st.i >= String.length st.str
+let[@inline] is_done st = st.i >= st.j
 let[@inline] cur st = st.str.[st.i]

 let pos_of_st_ st : position = {pos_buffer=st.str; pos_offset=st.i; pos_lc=None}
@ -328,7 +330,7 @@ let map2 f x y = pure f <*> x <*> y
 let map3 f x y z = pure f <*> x <*> y <*> z

 let junk_ (st:state) : state =
-  assert (st.i < String.length st.str);
+  assert (st.i < st.j);
  {st with i=st.i + 1}

 let eoi = {
@ -344,11 +346,61 @@ let eoi = {
  (Ok true) (parse_string (U.bool <* skip_white <* eoi) "true")
 *)

+let with_pos p : _ t = {
+  run=fun st ~ok ~err ->
+    p.run st
+      ~ok:(fun st' x ->
+          ok st' (x, pos_of_st_ st))
+      ~err
+}
+
+(* a slice is just a state, which makes {!recurse} quite easy. *)
+type slice = state
+
+module Slice = struct
+  type t = slice
+  let length sl = sl.j - sl.i
+  let is_empty sl = sl.i = sl.j
+  let to_string sl = String.sub sl.str sl.i (length sl)
+end
+
+let recurse slice p : _ t = {
+  run=fun _st ~ok ~err ->
+    (* make sure these states are related. all slices share the
+       same reference as the initial state they derive from. *)
+    assert CCShims_.Stdlib.(_st.memo == slice.memo);
+    p.run slice ~ok ~err
+}
+
+let all = {
+  run=fun st ~ok ~err:_ ->
+    if is_done st then ok st st
+    else (
+      let st_done = {st with i=st.j} in
+      ok st_done st
+    )
+}
+
+let all_str = all >|= Slice.to_string
+
+(*$= & ~printer:(errpp Q.Print.string) ~cmp:(erreq (=))
+  (Ok "abcd") (parse_string all_str "abcd")
+  (Ok "cd") (parse_string (string "ab" *> all_str) "ab")
+*)
+
+(*$= & ~printer:(errpp Q.Print.(pair string string)) ~cmp:(erreq (=))
+  (Ok ("foobar", "")) (parse_string (both all_str all_str) "foobar")
+  *)
+
 let fail msg : _ t = {
  run=fun st ~ok:_ ~err ->
    err (mk_error_ st (const_str_ msg))
 }
 let failf msg = Printf.ksprintf fail msg
+let fail_lazy msg = {
+  run=fun st ~ok:_ ~err ->
+    err (mk_error_ st msg)
+}

 let parsing what p = {
  run=fun st ~ok ~err ->
@ -360,9 +412,10 @@ let parsing what p = {
          err {e with Error.msg})
 }

-let nop = {
+let empty = {
  run=fun st ~ok ~err:_ -> ok st ();
 }
+let nop = empty

 let any_char = {
  run=fun st ~ok ~err -> consume_ st ~ok ~err
@ -398,19 +451,35 @@ let char_if ?descr p = {
      ~err
 }

-let chars_if p = {
+let take_if p : slice t = {
  run=fun st ~ok ~err:_ ->
-    let i0 = st.i in
-    let i = ref i0 in
+    let i = ref st.i in
    while
      let st = {st with i = !i} in
      not (is_done st) && p (cur st)
    do
      incr i;
    done;
-    ok {st with i = !i} (String.sub st.str i0 (!i - i0))
+    ok {st with i = !i} {st with j= !i}
 }

+let take1_if ?descr p =
+  take_if p >>= fun sl ->
+  if Slice.is_empty sl then (
+    let msg() =
+      let what = match descr with
+        | None -> ""
+        | Some d -> Printf.sprintf " for %s" d
+      in
+      Printf.sprintf "expected non-empty sequence of chars%s" what
+    in
+    fail_lazy msg
+  ) else (
+    return sl
+  )
+
+let chars_if p = take_if p >|= Slice.to_string
+
 let chars1_if ?descr p = {
  run=fun st ~ok ~err ->
    (chars_if p).run st
@ -453,11 +522,11 @@ let chars_fold ~f acc0 = {
            | `Fail msg -> raise (Fold_fail (st,msg))
        )
      done;
-      ok {st with i= !i} !acc
+      ok {st with i= !i} (!acc, {st with j= !i})
    with Fold_fail (st,msg) -> err (mk_error_ st (const_str_ msg))
 }

-let chars_fold_map ~f acc0 = {
+let chars_fold_transduce ~f acc0 = {
  run=fun st ~ok ~err ->
    let i0 = st.i in
    let i = ref i0 in
@ -524,6 +593,22 @@ let try_or p1 ~f ~else_:p2 = {
      ~err:(fun _ -> p2.run st ~ok ~err)
 }

+let try_or_l ?(msg="try_or_l ran out of options") ?else_ l : _ t = {
+  run=fun st ~ok ~err ->
+    let rec loop = function
+      | (test, p) :: tl ->
+        test.run st
+          ~ok:(fun _ _ -> p.run st ~ok ~err) (* commit *)
+          ~err:(fun _ -> loop tl)
+      | [] ->
+        begin match else_ with
+          | None -> err (mk_error_ st (const_str_ msg))
+          | Some p -> p.run st ~ok ~err
+        end
+    in
+    loop l
+}
+
 let suspend f = {
  run=fun st ~ok ~err ->
    let p = f () in
@ -531,12 +616,12 @@ let suspend f = {
 }

 (* read [len] chars at once *)
-let any_chars len : _ t = {
+let take len : slice t = {
  run=fun st ~ok ~err ->
-    if st.i + len <= String.length st.str then (
-      let s = String.sub st.str st.i len in
+    if st.i + len <= st.j then (
+      let slice = {st with j = st.i + len} in
      let st = {st with i = st.i + len} in
-      ok st s
+      ok st slice
    ) else (
      let msg() =
        Printf.sprintf "expected to be able to consume %d chars" len
@ -545,6 +630,8 @@ let any_chars len : _ t = {
    )
 }

+let any_chars len : _ t = take len >|= Slice.to_string
+
 let exact s = {
  run=fun st ~ok ~err ->
    (* parse a string of length [String.length s] and compare with [s] *)
@ -697,58 +784,116 @@ let lookahead p : _ t = {
      ~err
 }

+let lookahead_ignore p : _ t = {
+  run=fun st ~ok ~err ->
+    p.run st
+      ~ok:(fun _st _x -> ok st ())
+      ~err
+}
+
 (*$= & ~printer:(errpp Q.Print.(string))
  (Ok "abc") (parse_string (lookahead (string "ab") *> (string "abc")) "abcd")
 *)

-let line : _ t = {
-  run=fun st ~ok ~err ->
-    if is_done st then err (mk_error_ st (const_str_ "expected a line, not EOI"))
-    else (
-      match String.index_from st.str st.i '\n' with
+(*$=
+  (Ok "1234") (parse_string line_str "1234\nyolo")
+  (Ok ("1234", "yolo")) (parse_string (line_str ||| line_str) "1234\nyolo\nswag")
+*)
+
+let split_1 ~on_char : _ t = {
+  run=fun st ~ok ~err:_ ->
+    if st.i >= st.j then (
+      ok st (st, None)
+    ) else (
+      match String.index_from st.str st.i on_char with
      | j ->
-        let s = String.sub st.str st.i (j - st.i) in
-        ok {st with i=j+1} s
+        let x = {st with j} in
+        let y = {st with i=min st.j (j+1)} in
+        let st_done = {st with i=st.j} in (* empty *)
+        ok st_done (x, Some y)
      | exception Not_found ->
-        err (mk_error_ st (const_str_ "unterminated line"))
+        let st_done = {st with i=st.j} in (* empty *)
+        ok st_done (st, None)
    )
 }

-(*$=
-  (Ok "1234") (parse_string line "1234\nyolo")
-  (Ok ("1234", "yolo")) (parse_string (line ||| line) "1234\nyolo\nswag")
+let split_list_at_most ~on_char n : slice list t =
+  let rec loop acc n =
+    if n <= 0 then return (List.rev acc)
+    else (
+      try_or
+        eoi
+        ~f:(fun _ -> return (List.rev acc))
+        ~else_:(parse_1 acc n)
+    )
+  and parse_1 acc n =
+    split_1 ~on_char >>= fun (sl1, rest) ->
+    let acc = sl1 :: acc in
+    match rest with
+      | None -> return (List.rev acc)
+      | Some rest -> recurse rest (loop acc (n-1))
+  in
+  loop [] n
+
+(*$= & ~printer:(errpp Q.Print.(list string)) ~cmp:(erreq (=))
+  (Ok ["a";"b";"c";"d,e,f"]) \
+    (parse_string (split_list_at_most ~on_char:',' 3 >|= List.map Slice.to_string) "a,b,c,d,e,f")
+  (Ok ["a";"bc"]) \
+    (parse_string (split_list_at_most ~on_char:',' 3 >|= List.map Slice.to_string) "a,bc")
 *)

-(* parse a string [s] using [p_sub], then parse [s] using [p].
-   The result is that of parsing [s] using [p], but the state is
-   the one after using [p_sub], and errors are translated back into the context
-   of [p_sub].
-   This can be useful for example in [p_sub line some_line_parser]. *)
-let parse_sub_ p_sub p : _ t = {
-  run=fun st0 ~ok ~err ->
-    let p = p <* eoi in (* make sure [p] reads all *)
-    p_sub.run st0
-      ~ok:(fun st1 s ->
-          p.run (state_of_string s)
-            ~ok:(fun _ r -> ok st1 r)
-            ~err:(fun e ->
-                let pos = e.pos in
-                let pos = {
-                  pos_buffer=pos.pos_buffer;
-                  pos_offset=pos.pos_offset + st0.i;
-                  pos_lc=None;
-                } in
-                err {e with pos}))
-      ~err
-}
+let split_list ~on_char : _ t =
+  split_list_at_most ~on_char max_int
+
+let split_2 ~on_char : _ t =
+  split_list_at_most ~on_char 2 >>= function
+  | [a; b] -> return (a,b)
+  | _ -> fail "split_2: expected 2 fields exactly"
+
+let split_3 ~on_char : _ t =
+  split_list_at_most ~on_char 3 >>= function
+  | [a; b; c] -> return (a,b,c)
+  | _ -> fail "split_3: expected 3 fields exactly"
+
+let split_4 ~on_char : _ t =
+  split_list_at_most ~on_char 4 >>= function
+  | [a; b; c; d] -> return (a,b,c,d)
+  | _ -> fail "split_4: expected 4 fields exactly"
+
+let split_list ~on_char : slice list t =
+  let rec loop acc =
+    try_or
+      eoi
+      ~f:(fun _ -> return (List.rev acc))
+      ~else_:(parse_1 acc)
+  and parse_1 acc =
+    split_1 ~on_char >>= fun (sl1, rest) ->
+    let acc = sl1 :: acc in
+    match rest with
+      | None -> return (List.rev acc)
+      | Some rest -> recurse rest (loop acc)
+  in
+  loop []
+
+let each_split ~on_char p : 'a list t =
+  let rec loop acc =
+    split_1 ~on_char >>= fun (sl1, rest) ->
+    (* parse [sl1] with [p] *)
+    recurse sl1 p >>= fun x ->
+    let acc = x :: acc in
+    match rest with
+      | None -> return (List.rev acc)
+      | Some rest -> recurse rest (loop acc)
+  in
+  loop []
+
+let line : slice t =
+  split_1 ~on_char:'\n' >|= fst
+
+let line_str = line >|= Slice.to_string

 let each_line p : _ t =
-  fix
-    (fun self ->
-       try_or eoi
-         ~f:(fun _ -> pure [])
-         (parse_sub_ line p >>= fun x ->
-          self >|= fun tl -> x :: tl))
+  each_split ~on_char:'\n' p

 (*$= & ~printer:(errpp Q.Print.(list @@ list int))
  (Ok ([[1;1];[2;2];[3;3]])) \
--- a/src/core/CCParse.mli
+++ b/src/core/CCParse.mli
@ -8,6 +8,8 @@

    {2 A few examples}

+    Some more advanced example(s) can be found in the [/examples] directory.
+
    {4 Parse a tree}

    {[
@ -104,7 +106,7 @@ module Error : sig
  (** Pretty prints the error *)
 end

-type 'a or_error = ('a, Error.t) result
+type +'a or_error = ('a, Error.t) result
 (** ['a or_error] is either [Ok x] for some result [x : 'a],
    or an error {!Error.t}.

@ -146,21 +148,45 @@ val ap : ('a -> 'b) t -> 'a t -> 'b t
 (** Applicative.
    @since NEXT_RELEASE *)

+val eoi : unit t
+(** Expect the end of input, fails otherwise. *)
+
+val nop : unit t
+(** Succeed with [()]. *)
+
+val empty : unit t
+(** Succeed with [()], same as {!nop}.
+    @since NEXT_RELEASE *)
+
 val fail : string -> 'a t
 (** [fail msg] fails with the given message. It can trigger a backtrack. *)

 val failf: ('a, unit, string, 'b t) format4 -> 'a
 (** [Format.sprintf] version of {!fail}. *)

+val fail_lazy : (unit -> string) -> 'a t
+(** Like {!fail}, but only produce an error message on demand.
+    @since NEXT_RELEASE *)
+
 val parsing : string -> 'a t -> 'a t
 (** [parsing s p] behaves the same as [p], with the information that
-    we are parsing [s], if [p] fails. *)
+    we are parsing [s], if [p] fails.
+    The message [s] is added to the error, it does not replace it,
+    not does the location change (the error still points to
+    the same location as in [p]). *)

-val eoi : unit t
-(** Expect the end of input, fails otherwise. *)
+val set_error_message : string -> 'a t -> 'a t
+(** [set_error_message msg p] behaves like [p], but if [p] fails,
+    [set_error_message msg p] fails with [msg] instead and at the current
+    position. The internal error message of [p] is just discarded.
+    @since NEXT_RELEASE *)

-val nop : unit t
-(** Succeed with [()]. *)
+val with_pos : 'a t -> ('a * position) t
+(** [with_pos p] behaves like [p], but returns the (starting) position
+    along with [p]'s result.
+
+    {b EXPERIMENTAL}
+    @since NEXT_RELEASE *)

 val any_char : char t
 (** [any_char] parses any character.
@ -169,27 +195,52 @@ val any_char : char t

 val any_chars : int -> string t
 (** [any_chars len] parses exactly [len] characters from the input.
+    Fails if the input doesn't contain at least [len] chars.
    @since NEXT_RELEASE *)

 val char : char -> char t
 (** [char c] parses the character [c] and nothing else. *)

-val char_if : ?descr:string -> (char -> bool) -> char t
-(** [char_if f] parses a character [c] if [f c = true].
-    @param descr describes what kind of character was expected *)
+type slice
+(** A slice of the input, as returned by some combinators such
+    as {!split_1} or {split_n}.

-val chars_if : (char -> bool) -> string t
-(** [chars_if f] parses a string of chars that satisfy [f]. *)
+    {b EXPERIMENTAL}
+    @since NEXT_RELEASE *)

-val chars1_if : ?descr:string -> (char -> bool) -> string t
-(** Like {!chars_if}, but only non-empty strings.
-    @param descr describes what kind of character was expected *)
+(** Functions on slices.
+    @since NEXT_RELEASE *)
+module Slice : sig
+  type t = slice
+
+  val is_empty : t -> bool
+  (** Is the slice empty? *)
+
+  val length : t -> int
+  (** Length of the slice *)
+
+  val to_string : t -> string
+  (** Convert the slice into a string.
+      Linear time and memory in [length slice] *)
+end
+
+val recurse : slice -> 'a t -> 'a t
+(** [recurse slice p] parses the [slice]
+    (most likely obtained via another combinator, such as {!split_1}
+    or {!split_n}), using [p].
+
+    The slice contains a position which is used to relocate error
+    messages to their position in the whole input, not just relative to
+    the slice.
+
+    {b EXPERIMENTAL}
+    @since NEXT_RELEASE *)

 val chars_fold :
  f:('acc -> char ->
     [`Continue of 'acc | `Consume_and_stop | `Stop | `Fail of string]) ->
  'acc ->
-  'acc t
+  ('acc * slice) t
 (** [chars_fold f acc0] folds over characters of the input.
    Each char [c] is passed, along with the current accumulator, to [f];
    [f] can either:
@ -204,24 +255,54 @@ val chars_fold :

    This is a generalization of of {!chars_if} that allows one to transform
    characters on the fly, skip some, handle escape sequences, etc.
+    It can also be useful as a base component for a lexer.

   @since NEXT_RELEASE *)

-val chars_fold_map :
+val chars_fold_transduce :
  f:('acc -> char ->
-     [`Continue of 'acc | `Yield of 'acc * char
+     [ `Continue of 'acc | `Yield of 'acc * char
     | `Consume_and_stop | `Stop | `Fail of string]) ->
  'acc ->
  ('acc * string) t
 (** Same as {!char_fold} but with the following differences:

-    - returns a string along with the accumulator. The string is built from
-      characters returned by [`Yield].
+    - returns a string along with the accumulator, rather than the slice
+      of all the characters accepted by [`Continue _].
+      The string is built from characters returned by [`Yield].
    - new case [`Yield (acc, c)] adds [c] to the returned string
      and continues parsing with [acc].

    @since NEXT_RELEASE *)

+val take : int -> slice t
+(** [slice_of_len len] parses exactly [len] characters from the input.
+    Fails if the input doesn't contain at least [len] chars.
+    @since NEXT_RELEASE *)
+
+val take_if : (char -> bool) -> slice t
+(** [take_if f] takes characters as long as they satisfy the predicate [f].
+    @since NEXT_RELEASE *)
+
+val take1_if : ?descr:string -> (char -> bool) -> slice t
+(** [take1_if f] takes characters as long as they satisfy the predicate [f].
+    Fails if no character satisfies [f].
+    @since NEXT_RELEASE *)
+
+val char_if : ?descr:string -> (char -> bool) -> char t
+(** [char_if f] parses a character [c] if [f c = true].
+    Fails if  the next char does not satisfy [f].
+    @param descr describes what kind of character was expected *)
+
+val chars_if : (char -> bool) -> string t
+(** [chars_if f] parses a string of chars that satisfy [f].
+    Cannot fail. *)
+
+val chars1_if : ?descr:string -> (char -> bool) -> string t
+(** Like {!chars_if}, but only non-empty strings.
+    Fails if the string is empty.
+    @param descr describes what kind of character was expected *)
+
 val endline : char t
 (** Parse '\n'. *)

@ -272,7 +353,9 @@ val many : 'a t -> 'a list t

 val optional : _ t -> unit t
 (** [optional p] tries to parse [p], and return [()] whether it
-    succeeded or failed. Cannot fail.
+    succeeded or failed. Cannot fail itself.
+    It consumes input if [p] succeeded (as much as [p] consumed), but
+    consumes not input if [p] failed.
    @since NEXT_RELEASE *)

 val try_ : 'a t -> 'a t
@ -307,6 +390,27 @@ val try_or : 'a t -> f:('a -> 'b t) -> else_:'b t -> 'b t
    @since NEXT_RELEASE
 *)

+val try_or_l :
+  ?msg:string ->
+  ?else_:'a t ->
+  (unit t * 'a t) list ->
+  'a t
+(** [try_or_l ?else_ l] tries each pair [(test, p)] in order.
+    If the n-th [test] succeeds, then [try_or_l l] behaves like n-th [p],
+    whether [p] fails or not.
+    If they all fail, and [else_] is defined, then it behaves like [else_].
+    If all fail, and [else_] is [None], then it fails as well.
+
+    This is a performance optimization compared to {!(<|>)}. We commit to a
+    branch if the test succeeds, without backtracking at all.
+
+    See {!lookahead_ignore} for a convenient way of writing the test conditions.
+
+    @param msg error message if all options fail
+
+    {b EXPERIMENTAL}
+    @since NEXT_RELEASE *)
+
 val or_ : 'a t -> 'a t -> 'a t
 (** [or_ p1 p2] tries to parse [p1], and if it fails, tries [p2]
    from the same position.
@ -316,12 +420,6 @@ val both : 'a t -> 'b t -> ('a * 'b) t
 (** [both a b] parses [a], then [b], then returns the pair of their results.
    @since NEXT_RELEASE *)

-val set_error_message : string -> 'a t -> 'a t
-(** [set_error_message msg p] behaves like [p], but if [p] fails,
-    [set_error_message msg p] fails with [msg] instead.
-    @since NEXT_RELEASE
-*)
-
 val many1 : 'a t -> 'a list t
 (** [many1 p] is like [many p] excepts it fails if the
    list is empty (i.e. it needs [p] to succeed at least once). *)
@ -345,8 +443,26 @@ val lookahead : 'a t -> 'a t
    {b EXPERIMENTAL}
    @since NEXT_RELEASE *)

-val line : string t
-(** Parse a line, '\n' excluded.
+val lookahead_ignore : 'a t -> unit t
+(** [lookahead_ignore p] tries to parse input with [p],
+    and succeeds if [p] succeeds. However it doesn't consume any input
+    and returns [()], so in effect its only use-case is to detect
+    whether [p] succeeds, e.g. in {!cond}.
+
+    {b EXPERIMENTAL}
+    @since NEXT_RELEASE *)
+
+val fix : ('a t -> 'a t) -> 'a t
+(** Fixpoint combinator. *)
+
+val line : slice t
+(** Parse a line, ['\n'] excluded, and position the cursor after the ['\n'].
+    @since NEXT_RELEASE *)
+
+val line_str : string t
+(** [line_str] is [line >|= Slice.to_string].
+    It parses the next line and turns the slice into a string.
+    The state points to after the ['\n'] character.
    @since NEXT_RELEASE *)

 val each_line : 'a t -> 'a list t
@ -354,8 +470,90 @@ val each_line : 'a t -> 'a list t
    {b EXPERIMENTAL}
    @since NEXT_RELEASE *)

-val fix : ('a t -> 'a t) -> 'a t
-(** Fixpoint combinator. *)
+val split_1 : on_char:char -> (slice * slice option) t
+(** [split_1 ~on_char] looks for [on_char] in the input, and returns a
+    pair [sl1, sl2], where:
+
+    - [sl1] is the slice of the input the precedes the first occurrence
+      of [on_char], or the whole input if [on_char] cannot be found.
+    - [sl2] is the slice that comes after [on_char],
+      or [None] if [on_char] couldn't be found.
+
+    The parser is now positioned at the end of the input.
+
+    {b EXPERIMENTAL}
+    @since NEXT_RELEASE *)
+
+val split_list : on_char:char -> slice list t
+(** [split_n ~on_char] splits the input on all occurrences of [on_char],
+    returning a list of slices.
+
+    A useful specialization of this is {!each_line}, which is
+    basically [split_n ~on_char:'\n' p].
+
+    {b EXPERIMENTAL}
+    @since NEXT_RELEASE *)
+
+val split_list_at_most : on_char:char -> int -> slice list t
+(** [split_list_at_most ~on_char n] applies [split_1 ~on_char] at most
+    [n] times, to get a list of [n+1] elements.
+    The last element might contain [on_char]. This is useful to limit the
+    amount of work done by {!split_list}.
+
+    {b EXPERIMENTAL}
+    @since NEXT_RELEASE *)
+
+
+val split_2 : on_char:char -> (slice * slice) t
+(** [split_2 ~on_char] splits the input into exactly 2 fields,
+    and fails if the split yields less or more than 2 items.
+    {b EXPERIMENTAL}
+    @since NEXT_RELEASE *)
+
+val split_3 : on_char:char -> (slice * slice * slice) t
+(** See {!split_2}
+    {b EXPERIMENTAL}
+    @since NEXT_RELEASE *)
+
+val split_4 : on_char:char -> (slice * slice * slice * slice) t
+(** See {!split_2}
+    {b EXPERIMENTAL}
+    @since NEXT_RELEASE *)
+
+val each_split : on_char:char -> 'a t -> 'a list t
+(** [split_list_map ~on_char p] uses [split_list ~on_char] to split
+    the input, then parses each chunk of the input thus obtained using [p].
+
+    The difference with [sep ~by:(char on_char) p] is that
+    [sep] calls [p] first, and only tries to find [on_char] after [p] returns.
+    While it is more flexible, this technique also means [p] has to be careful
+    not to consume [on_char] by error.
+
+    {b EXPERIMENTAL}
+    @since NEXT_RELEASE *)
+
+val all : slice t
+(** [all] returns all the unconsumed input as a slice, and consumes it.
+    Use {!Slice.to_string} to turn it into a string.
+
+    Note that [lookahead all] can be used to {i peek} at the rest of the input
+    without consuming anything.
+
+    @since NEXT_RELEASE *)
+
+val all_str : string t
+(** [all_str] accepts all the remaining chars and extracts them into a
+    string. Similar to {!rest_of_input} but with a string.
+
+    {b EXPERIMENTAL}
+    @since NEXT_RELEASE *)
+
+(* TODO
+val trim : slice t
+(** [trim] is like {!all}, but removes whitespace on the left and right.
+   {b EXPERIMENTAL}
+    @since NEXT_RELEASE *)
+ *)

 val memo : 'a t -> 'a t
 (** Memoize the parser. [memo p] will behave like [p], but when called