From 7318162c5552d5bdd3cdb2f29c3eaa1d5a5cef3f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 4 May 2021 17:48:49 -0400 Subject: [PATCH] wip: rework CCParse --- src/core/CCParse.ml | 60 +++++++++++++++++++++++++++++++++++++++++--- src/core/CCParse.mli | 27 ++++++++++++-------- 2 files changed, 73 insertions(+), 14 deletions(-) diff --git a/src/core/CCParse.ml b/src/core/CCParse.ml index 027dd46a..f8a15cc8 100644 --- a/src/core/CCParse.ml +++ b/src/core/CCParse.ml @@ -30,9 +30,16 @@ open CCShims_ | N (a,b) -> Printf.sprintf "N (%s, %s)" (pptree a) (pptree b) | L x -> Printf.sprintf "L %d" x - let errpptree = function - | Ok x -> "Ok " ^ pptree x + let errpp pp = function + | Ok x -> "Ok " ^ pp x | Error s -> "Error " ^ s + + let errpptree = errpp pptree + + let erreq eq x y = match x, y with + | Ok x, Ok y -> eq x y + | Error _ , Error _ -> true + | _ -> false *) (*$= & ~printer:errpptree @@ -125,6 +132,8 @@ module MemoTbl = struct | Ok of 'a end +(* TODO: [type position = {state: state; i: int}] and recompute line, col + on demand *) type position = int * int * int (* pos, line, column *) type parse_branch = (line_num * col_num * string option) list @@ -137,6 +146,7 @@ type state = { mutable branch: parse_branch; memo : MemoTbl.t; (* Memoization table, if any *) } +(* TODO: remove lnum/cnum, recompute them lazily in errors *) exception ParseError of parse_branch * (unit -> string) @@ -202,7 +212,17 @@ let backtrack st (i',l',c') = st.cnum <- c'; () +(* FIXME: + remove all backtracking stuff and instead, pass the state as parameter + to [ok] and [err], with an explicit offset that changes. *) + type 'a t = state -> ok:('a -> unit) -> err:(exn -> unit) -> unit +(** Takes the input and two continuations: + {ul + {- [ok] to call with the result when it's done} + {- [err] to call when the parser met an error} + } +*) let return : 'a -> 'a t = fun x _st ~ok ~err:_ -> ok x let pure = return @@ -500,23 +520,55 @@ module U = struct sep_ ~by:(skip_white *> string sep *> skip_white) p <* skip_white <* string stop + let int = + skip_white *> chars1_if (fun c -> is_num c || char_equal c '-') >>= fun s -> try return (int_of_string s) with Failure _ -> fail "expected an int" + let hexa_int = + (try_ (string "0x") <|> return "") *> + begin + chars1_if (function '0' .. '9' | 'a'..'f' | 'A'..'F' -> true | _ -> false) + >|= fun s -> + let i = ref 0 in + String.iter + (fun c -> + let n = match c with + | '0' .. '9' -> Char.code c - Char.code '0' + | 'a' .. 'f' -> Char.code c - Char.code 'a' + 10 + | 'A' .. 'F' -> Char.code c - Char.code 'A' + 10 + | _ -> assert false + in + i := !i * 16 + n) + s; + !i + end + + (*$= & ~printer:(errpp Q.Print.int) ~cmp:(erreq (=)) + (Ok 16) (parse_string U.hexa_int "0x10") + (Ok 16) (parse_string U.hexa_int "10") + (Error "") (parse_string U.hexa_int "x10") + (Error "") (parse_string U.hexa_int "0xz") + *) + let prepend_str c s = String.make 1 c ^ s let word = map2 prepend_str (char_if is_alpha) (chars_if is_alpha_num) let pair ?(start="(") ?(stop=")") ?(sep=",") p1 p2 = - string start *> skip_white *> + skip_white *> string start *> skip_white *> p1 >>= fun x1 -> skip_white *> string sep *> skip_white *> p2 >>= fun x2 -> - string stop *> return (x1,x2) + skip_white *> string stop *> return (x1,x2) + + (*$= & ~printer:Q.Print.(errpp (pair int int)) + (Ok(1,2)) U.(parse_string (pair int int) "(1 , 2 )") + *) let triple ?(start="(") ?(stop=")") ?(sep=",") p1 p2 p3 = string start *> skip_white *> diff --git a/src/core/CCParse.mli b/src/core/CCParse.mli index ecf90fa5..599c066c 100644 --- a/src/core/CCParse.mli +++ b/src/core/CCParse.mli @@ -3,6 +3,9 @@ (** {1 Very Simple Parser Combinators} + These combinators can be used to write very simple parsers, for example + to extract data from a line-oriented file. + {[ open CCParse;; @@ -70,13 +73,12 @@ val state_of_string : string -> state (** {2 Combinators} *) -type 'a t = state -> ok:('a -> unit) -> err:(exn -> unit) -> unit -(** Takes the input and two continuations: - {ul - {- [ok] to call with the result when it's done} - {- [err] to call when the parser met an error} - } - @raise ParseError in case of failure. *) +type 'a t +(** The abstract type of parsers that return a value of type ['a] (or fail). + + @raise ParseError in case of failure. + @since NEXT_RELEASE the type is private. +*) val return : 'a -> 'a t (** Always succeeds, without consuming its input. *) @@ -299,8 +301,8 @@ end (** {2 Utils} - This is useful to parse OCaml-like values in a simple way. *) - + This is useful to parse OCaml-like values in a simple way. + All the parsers are whitespace-insensitive (they skip whitespace). *) module U : sig val list : ?start:string -> ?stop:string -> ?sep:string -> 'a t -> 'a list t (** [list p] parses a list of [p], with the OCaml conventions for @@ -308,7 +310,12 @@ module U : sig Whitespace between items are skipped. *) val int : int t - (** Parse an int. *) + (** Parse an int in decimal representation. *) + + val hexa_int : int t + (** Parse an int int hexadecimal format. Accepts an optional [0x] prefix, + and ignores capitalization. + @since NEXT_RELEASE *) val word : string t (** Non empty string of alpha num, start with alpha. *)