From 352fc10d3b64bd2158a3c32ab76ca378a475b990 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 6 Jun 2021 22:49:43 -0400 Subject: [PATCH] more doc for CCParse --- src/core/CCParse.ml | 13 +++++++++++++ src/core/CCParse.mli | 14 +++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/core/CCParse.ml b/src/core/CCParse.ml index 2fbdf451..175ef525 100644 --- a/src/core/CCParse.ml +++ b/src/core/CCParse.ml @@ -190,6 +190,19 @@ type state = { j: int; (* end pointer in [str], excluded. [len = j-i] *) memo : Memo_state.t option ref; (* Memoization table, if any *) } +(* FIXME: replace memo with: + [global : global_st ref] + + where: + [type global = { + mutable memo: Memo_state.t option; + line_offsets: int CCVector.vector; + } + + with line_offsets used to cache the offset where each line begins, + and is computed lazily, to make {!Position.line_and_column} + faster if called many times. + *) let[@inline] char_equal (a : char) b = Stdlib.(=) a b let string_equal = String.equal diff --git a/src/core/CCParse.mli b/src/core/CCParse.mli index c3a035c6..2edf94b5 100644 --- a/src/core/CCParse.mli +++ b/src/core/CCParse.mli @@ -203,7 +203,19 @@ val char : char -> char t type slice (** A slice of the input, as returned by some combinators such - as {!split_1} or {split_n}. + as {!split_1} or {split_list} or {!take}. + + The idea is that one can use some parsers to cut the input into slices, + e.g. split into lines, or split a line into fields (think CSV or TSV). + Then a variety of parsers can be used on each slice to extract data from + it using {!recurse}. + + Slices contain enough information to make it possible + for [recurse slice p] to report failures (if [p] fails) using locations + from the original input, not relative to the slice. + Therefore, even after splitting the input into lines using, say, {!each_line}, + a failure to parse the 500th line will be reported at line 500 and + not at line 1. {b EXPERIMENTAL} @since NEXT_RELEASE *)