From 84173382db57bda7b605db39e29a4cb60103ffb3 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 7 Apr 2023 11:45:12 -0400 Subject: [PATCH] feat(CCParse): add `take_until_success` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this reads a slice using the given parser to parse the end delimiter (e.g "end gpg signature" 😉) --- src/core/CCParse.ml | 30 ++++++++++++++++++++++++++++++ src/core/CCParse.mli | 9 +++++++++ tests/core/t_parse.ml | 11 +++++++++++ 3 files changed, 50 insertions(+) diff --git a/src/core/CCParse.ml b/src/core/CCParse.ml index fdb43481..aafe1a8a 100644 --- a/src/core/CCParse.ml +++ b/src/core/CCParse.ml @@ -600,6 +600,36 @@ let take len : slice t = )); } +let take_until_success p : (slice * _) t = + { + run = + (fun st ~ok ~err -> + let i = ref st.i in + let st_after_p = ref st in + let continue = ref true in + let res = ref None in + + while !continue && !i < st.j do + let st' = { st with i = !i } in + p.run st' + ~ok:(fun new_st x -> + (* success *) + res := Some x; + continue := false; + (* parsing will continue where [p] left off *) + st_after_p := new_st) + ~err:(fun _ -> incr i) + done; + + match !res with + | None -> + err + (mk_error_ st (const_str_ "take_until_success: no position worked")) + | Some x -> + let slice = { st with j = !i } in + ok !st_after_p (slice, x)); + } + let any_char_n len : _ t = take len >|= Slice.to_string let exact s = diff --git a/src/core/CCParse.mli b/src/core/CCParse.mli index 7d1da442..0c8b44bd 100644 --- a/src/core/CCParse.mli +++ b/src/core/CCParse.mli @@ -304,6 +304,15 @@ val chars_fold_transduce : @since 3.6 *) +val take_until_success : 'a t -> (slice * 'a) t +(** [take_until_success p] accumulates characters of the input into a slice, + until [p] successfully parses a value [x]; then it returns [slice, x]. + + {b NOTE} performance wise, if [p] does a lot of work at each position, + this can be costly (thing naive substring search if [p] is [string "very long needle"]). + + @since NEXT_RELEASE *) + val take : int -> slice t (** [take len] parses exactly [len] characters from the input. Fails if the input doesn't contain at least [len] chars. diff --git a/tests/core/t_parse.ml b/tests/core/t_parse.ml index d83844ee..102fe132 100644 --- a/tests/core/t_parse.ml +++ b/tests/core/t_parse.ml @@ -287,3 +287,14 @@ eq ~printer:Q.Print.(errpp (pair int int)) (Ok (1, 2)) U.(parse_string (pair int int) "(1 , 2 )") +;; + +eq + ~printer:Q.Print.(errpp (pair (pair string string) string)) + (Ok (("!this is the text between!", "LOL"), " and a lot of other stuff")) + (parse_string + (string "COUCOU" + *> let* slice, x = take_until_success (string "LOL") in + let+ rest = take_if (fun _ -> true) <* eoi in + (Slice.to_string slice, x), Slice.to_string rest) + "COUCOU!this is the text between!LOL and a lot of other stuff")