diff --git a/src/string/CCParse.ml b/src/string/CCParse.ml index c227fe09..dc816a79 100644 --- a/src/string/CCParse.ml +++ b/src/string/CCParse.ml @@ -101,6 +101,48 @@ let input_of_string s = sub=(fun j len -> assert (j + len <= !i); String.sub s j len); } +let input_of_chan ?(size=1024) ic = + assert (size > 0); + let b = ref (Bytes.make size ' ') in + let n = ref 0 in (* length of buffer *) + let i = ref 0 in (* current index in buffer *) + let exhausted = ref false in (* input fully read? *) + let eoi() = raise (ParseError (!i, "unexpected EOI")) in + (* read a chunk of input *) + let read_more () = + assert (not !exhausted); + (* resize *) + if Bytes.length !b - !n < size then ( + let b' = Bytes.make (Bytes.length !b + 2 * size) ' ' in + Bytes.blit !b 0 b' 0 !n; + b := b'; + ); + let len = input ic !b !n size in + exhausted := len = 0; + n := !n + len + in + (* read next char *) + let next() = + if !exhausted && !i = !n then eoi(); + let c = Bytes.get !b !i in + incr i; + if !i = !n then ( + read_more(); + if !exhausted then eoi(); + assert (!i < !n); + ); + c + and is_done () = !exhausted && !i = !n in + (* fetch first chars *) + read_more(); + { is_done=(fun () -> !exhausted && !i = !n); + cur=(fun () -> assert (not (is_done())); Bytes.get !b !i); + next; + pos=(fun() -> !i); + backtrack=(fun j -> assert (0 <= j && j <= !i); i:=j); + sub=(fun j len -> assert (j + len <= !i); Bytes.sub_string !b j len); + } + type 'a t = input -> 'a let return x _ = x @@ -234,11 +276,31 @@ let parse_exn ~input p = p input let parse ~input p = try `Ok (parse_exn ~input p) with ParseError (i, msg) -> - `Error (Printf.sprintf "at position %d: error %s" i msg) + `Error (Printf.sprintf "at position %d: error, %s" i msg) let parse_string s p = parse ~input:(input_of_string s) p let parse_string_exn s p = parse_exn ~input:(input_of_string s) p +let parse_file_exn ?size ~file p = + let ic = open_in file in + let input = input_of_chan ?size ic in + try + let res = parse_exn ~input p in + close_in ic; + res + with e -> + close_in ic; + raise e + +let parse_file ?size ~file p = + try + `Ok (parse_file_exn ?size ~file p) + with + | ParseError (i, msg) -> + `Error (Printf.sprintf "at position %d: error, %s" i msg) + | Sys_error s -> + `Error (Printf.sprintf "error while reading %s: %s" file s) + module U = struct let sep_ = sep diff --git a/src/string/CCParse.mli b/src/string/CCParse.mli index 106abc73..363e45a2 100644 --- a/src/string/CCParse.mli +++ b/src/string/CCParse.mli @@ -70,13 +70,24 @@ exception ParseError of int * string (** position * message *) type input = { is_done : unit -> bool; (** End of input? *) cur : unit -> char; (** Current char *) - next : unit -> char; (** if not {!is_done}, move to next char *) + next : unit -> char; + (** Returns current char; + if not {!is_done}, move to next char, + otherwise throw ParseError *) + pos : unit -> int; (** Current pos *) backtrack : int -> unit; (** Restore to previous pos *) sub : int -> int -> string; (** [sub pos len] extracts slice from [pos] with [len] *) } val input_of_string : string -> input +(** Parse the string *) + +val input_of_chan : ?size:int -> in_channel -> input +(** [input_of_chan ic] reads lazily the content of [ic] as parsing goes. + All content that is read is saved to an internal buffer for backtracking. + @param size number of bytes read at once from [ic] + @since NEXT_RELEASE *) (** {2 Combinators} *) @@ -136,6 +147,15 @@ val parse_exn : input:input -> 'a t -> 'a (** @raise ParseError if it fails *) val parse_string : string -> 'a t -> 'a or_error val parse_string_exn : string -> 'a t -> 'a (** @raise ParseError if it fails *) +val parse_file : ?size:int -> file:string -> 'a t -> 'a or_error +(** [parse_file ~file p] parses [file] with [p] by opening the file + and using {!input_of_chan}. + @param size size of chunks read from file + @since NEXT_RELEASE *) + +val parse_file_exn : ?size:int -> file:string -> 'a t -> 'a +(** Unsafe version of {!parse_file} + @since NEXT_RELEASE *) (** {2 Utils} *)