diff --git a/_oasis b/_oasis index 12c1bf34..2f10d395 100644 --- a/_oasis +++ b/_oasis @@ -80,7 +80,7 @@ Library "containers_iter" Library "containers_string" Path: src/string - Modules: Containers_string, CCKMP, CCLevenshtein, CCApp_parse, CCParse + Modules: Containers_string, CCLevenshtein, CCApp_parse BuildDepends: bytes FindlibName: string FindlibParent: containers diff --git a/src/string/CCKMP.ml b/src/string/CCKMP.ml deleted file mode 100644 index 5511fad1..00000000 --- a/src/string/CCKMP.ml +++ /dev/null @@ -1,154 +0,0 @@ - -(* This file is free software, part of containers. See file "license" for more details. *) - -(** {1 Knuth-Morris-Pratt} *) - -module type STRING = sig - type t - type char - - val length : t -> int - val get : t -> int -> char - val char_equal : char -> char -> bool -end - -type 'a gen = unit -> 'a option -type 'a sequence = ('a -> unit) -> unit - -module type S = sig - type string - - type pattern - (** Compiled pattern (needle: string to search in another string) *) - - val compile : string -> pattern - (** Compile a string into a pattern *) - - val find : pattern:pattern -> string -> int -> int option - (** [find ~pattern s i] finds the next occurrence of [pattern] - in [s] starting at offset [i], and returns it, - or returns [None] if the pattern doesn't occur. *) - - val search : pattern:pattern -> string -> int option - (** [search ~pattern s] is a shortcut for [find ~pattern s 0]. *) - - val find_all : pattern:pattern -> string -> int -> int gen - (** Generator on all occurrences of the pattern *) - - val seq : pattern:pattern -> string -> int -> int sequence - (** Iterate on matching positions *) - - (** {6 One-shot functions that compile the pattern on-the-fly} *) - - val search' : pattern:string -> string -> int option - - val find_all' : pattern:string -> string -> int gen - - val seq' : pattern:string -> string -> int sequence -end - -module Make(Str : STRING) = struct - type string = Str.t - type pattern = { - failure : int array; - str : Str.t; - len : int; (* = length str = length failure *) - } - - let compile str = - let len = Str.length str in - match len with - | 0 -> {failure=[| |]; len; str;} - | 1 -> {failure=[| -1 |]; len; str;} - | _ -> - (* at least 2 elements, the algorithm can work *) - let failure = Array.make len 0 in - (* i: current index in str *) - let i = ref 1 in - (* j: index of candidate substring *) - let j = ref 0 in - while !i < len-1 do - match !j with - | _ when Str.char_equal (Str.get str !i) (Str.get str !j) -> - (* substring starting at !j continues matching current char *) - i := !i+1; - j := !j+1; - failure.(!i) <- !j; - | 0 -> - (* back to the beginning *) - i := !i+1; - failure.(!i) <- 0; - | _ -> - (* fallback for the prefix string *) - assert (!j > 0); - j := failure.(!j) - done; - { failure; str; len; } - - let find ~pattern s idx = - (* proper search function. - [i] index in [s] - [j] index in [pattern] - [len] length of [s] *) - let len = Str.length s in - let i = ref idx in - let j = ref 0 in - while !i < len && !j < pattern.len do - let c = Str.get s !i in - let expected = Str.get pattern.str !j in - if Str.char_equal c expected - then ( - (* char matches *) - i := !i + 1; j := !j + 1 - ) else - if !j=0 - then (* beginning of the pattern *) - i := !i + 1 - else (* follow the failure link *) - j := pattern.failure.(!j) - done; - if !j = pattern.len - then Some (!i-pattern.len) - else None - - let search ~pattern s = find ~pattern s 0 - - let find_all ~pattern s i = - let i = ref i in - fun () -> - if !i >= Str.length s - then None - else match find ~pattern s !i with - | None -> None - | (Some j) as res -> - i := j + pattern.len; - res - - let seq ~pattern s i k = - let rec iter i = - match find ~pattern s i with - | None -> () - | Some j -> - k j; - iter (j+pattern.len) - in - iter i - - let search' ~pattern s = - search ~pattern:(compile pattern) s - - let find_all' ~pattern s = - find_all ~pattern:(compile pattern) s 0 - - let seq' ~pattern s = - seq ~pattern:(compile pattern) s 0 -end - -include Make(struct - type char_ = char - type char = char_ - type t = string - let char_equal a b = a=b - let get = String.get - let length = String.length -end) diff --git a/src/string/CCKMP.mli b/src/string/CCKMP.mli deleted file mode 100644 index 13b059f5..00000000 --- a/src/string/CCKMP.mli +++ /dev/null @@ -1,52 +0,0 @@ - -(* This file is free software, part of containers. See file "license" for more details. *) - -(** {1 Knuth-Morris-Pratt} *) - -module type STRING = sig - type t - type char - - val length : t -> int - val get : t -> int -> char - val char_equal : char -> char -> bool -end - -type 'a gen = unit -> 'a option -type 'a sequence = ('a -> unit) -> unit - -module type S = sig - type string - - type pattern - (** Compiled pattern (needle: string to search in another string) *) - - val compile : string -> pattern - (** Compile a string into a pattern *) - - val find : pattern:pattern -> string -> int -> int option - (** [find ~pattern s i] finds the next occurrence of [pattern] - in [s] starting at offset [i], and returns it, - or returns [None] if the pattern doesn't occur. *) - - val search : pattern:pattern -> string -> int option - (** [search ~pattern s] is a shortcut for [find ~pattern s 0]. *) - - val find_all : pattern:pattern -> string -> int -> int gen - (** Generator on all occurrences of the pattern *) - - val seq : pattern:pattern -> string -> int -> int sequence - (** iterate on matching positions *) - - (** {6 One-shot functions that compile the pattern on-the-fly} *) - - val search' : pattern:string -> string -> int option - - val find_all' : pattern:string -> string -> int gen - - val seq' : pattern:string -> string -> int sequence -end - -module Make(Str : STRING) : S with type string = Str.t - -include S with type string = string diff --git a/src/string/CCParse.ml b/src/string/CCParse.ml deleted file mode 100644 index 2c23f504..00000000 --- a/src/string/CCParse.ml +++ /dev/null @@ -1,482 +0,0 @@ -(* -copyright (c) 2013-2015, simon cruanes -all rights reserved. - -redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) - -(** {1 Very Simple Parser Combinators} *) - -type 'a or_error = [`Ok of 'a | `Error of string] - -type line_num = int -type col_num = int - -module H = Hashtbl.Make(struct - type t = int * int (* id of parser, position *) - let equal ((a,b):t)(c,d) = a=c && b=d - let hash = Hashtbl.hash -end) - -type memo_ = (unit -> unit) H.t lazy_t - -type input = { - is_done : unit -> bool; (** End of input? *) - cur : unit -> char; (** Current char *) - next : unit -> char; (** if not {!is_done}, move to next char *) - pos : unit -> int; (** Current pos *) - lnum : unit -> line_num; (** Line number @since 0.13 *) - cnum : unit -> col_num; (** Column number @since 0.13 *) - memo : memo_; (** Memoization table, if any *) - backtrack : int -> unit; (** Restore to previous pos *) - sub : int -> int -> string; (** Extract slice from [pos] with [len] *) -} - -exception ParseError of line_num * col_num * (unit -> string) - -(*$inject - module T = struct - type tree = L of int | N of tree * tree - end - open T - - let mk_leaf x = L x - let mk_node x y = N(x,y) - - let ptree = fix @@ fun self -> - skip_space *> - ( (char '(' *> (pure mk_node <*> self <*> self) <* char ')') - <|> - (U.int >|= mk_leaf) ) - - let ptree' = fix_memo @@ fun self -> - skip_space *> - ( (char '(' *> (pure mk_node <*> self <*> self) <* char ')') - <|> - (U.int >|= mk_leaf) ) - - let rec pptree = function - | N (a,b) -> Printf.sprintf "N (%s, %s)" (pptree a) (pptree b) - | L x -> Printf.sprintf "L %d" x - - let errpptree = function - | `Ok x -> "Ok " ^ pptree x - | `Error s -> "Error " ^ s -*) - -(*$= & ~printer:errpptree - (`Ok (N (L 1, N (L 2, L 3)))) \ - (parse_string ~p:ptree "(1 (2 3))" ) - (`Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \ - (parse_string ~p:ptree "((1 2) (3 (4 5)))" ) - (`Ok (N (L 1, N (L 2, L 3)))) \ - (parse_string ~p:ptree' "(1 (2 3))" ) - (`Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \ - (parse_string ~p:ptree' "((1 2) (3 (4 5)))" ) -*) - -(*$R - let p = U.list ~sep:"," U.word in - let printer = function - | `Ok l -> "Ok " ^ CCFormat.(to_string (list string)) l - | `Error s -> "Error " ^ s - in - assert_equal ~printer - (`Ok ["abc"; "de"; "hello"; "world"]) - (parse_string ~p "[abc , de, hello ,world ]"); - *) - -(*$R - let test n = - let p = CCParse.(U.list ~sep:"," U.int) in - - let l = CCList.(1 -- n) in - let l_printed = - CCFormat.(to_string (list ~start:"[" ~stop:"]" ~sep:"," int)) l in - - let l' = CCParse.parse_string_exn ~p l_printed in - - assert_equal ~printer:Q.Print.(list int) l l' - in - test 100_000; - test 400_000; - -*) - -(* test with a temporary file *) -(*$R - let test n = - let p = CCParse.(U.list ~sep:"," U.int) in - - let l = CCList.(1 -- n) in - let l' = - CCIO.File.with_temp ~temp_dir:"/tmp/" - ~prefix:"containers_test" ~suffix:"" - (fun name -> - (* write test into file *) - CCIO.with_out name - (fun oc -> - let fmt = Format.formatter_of_out_channel oc in - Format.fprintf fmt "@[%a@]@." - CCFormat.(list ~start:"[" ~stop:"]" ~sep:"," int) l); - (* parse it back *) - CCParse.parse_file_exn ~size:1024 ~file:name ~p) - in - assert_equal ~printer:Q.Print.(list int) l l' - in - test 100_000; - test 400_000; -*) - -let const_ x () = x - -let input_of_string s = - let i = ref 0 in - let line = ref 1 in (* line *) - let col = ref 1 in (* column *) - { is_done=(fun () -> !i = String.length s); - cur=(fun () -> s.[!i]); - next=(fun () -> - if !i = String.length s - then raise (ParseError (!line, !col, const_ "unexpected EOI")) - else ( - let c = s.[!i] in - incr i; - if c='\n' then (incr line; col:=1) else incr col; - c - ) - ); - lnum=(fun () -> !line); - cnum=(fun () -> !col); - memo=lazy (H.create 32); - pos=(fun () -> !i); - backtrack=(fun j -> assert (0 <= j && j <= !i); i := j); - sub=(fun j len -> assert (j + len <= !i); String.sub s j len); - } - -let input_of_chan ?(size=1024) ic = - assert (size > 0); - let b = ref (Bytes.make size ' ') in - let n = ref 0 in (* length of buffer *) - let i = ref 0 in (* current index in buffer *) - let line = ref 1 in - let col = ref 1 in - let exhausted = ref false in (* input fully read? *) - let eoi() = raise (ParseError (!line, !col, const_ "unexpected EOI")) in - (* read a chunk of input *) - let read_more () = - assert (not !exhausted); - (* resize *) - if Bytes.length !b - !n < size then ( - let b' = Bytes.make (Bytes.length !b + 2 * size) ' ' in - Bytes.blit !b 0 b' 0 !n; - b := b'; - ); - let len = input ic !b !n size in - exhausted := len = 0; - n := !n + len - in - (* read next char *) - let next() = - if !exhausted && !i = !n then eoi(); - let c = Bytes.get !b !i in - incr i; - if c='\n' then (incr line; col := 1) else incr col; - if !i = !n then ( - read_more(); - if !exhausted then eoi(); - assert (!i < !n); - ); - c - and is_done () = !exhausted && !i = !n in - (* fetch first chars *) - read_more(); - { is_done=(fun () -> !exhausted && !i = !n); - cur=(fun () -> assert (not (is_done())); Bytes.get !b !i); - next; - pos=(fun() -> !i); - lnum=(fun () -> !line); - cnum=(fun () -> !col); - memo=lazy (H.create 32); - backtrack=(fun j -> assert (0 <= j && j <= !i); i:=j); - sub=(fun j len -> assert (j + len <= !i); Bytes.sub_string !b j len); - } - -type 'a t = input -> ok:('a -> unit) -> err:(exn -> unit) -> unit - -let return : 'a -> 'a t = fun x _st ~ok ~err:_ -> ok x -let pure = return -let (>|=) : 'a t -> ('a -> 'b) -> 'b t - = fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> ok (f x)) -let (>>=) : 'a t -> ('a -> 'b t) -> 'b t - = fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> f x st ~err ~ok) -let (<*>) : ('a -> 'b) t -> 'a t -> 'b t - = fun f x st ~ok ~err -> - f st ~err ~ok:(fun f' -> x st ~err ~ok:(fun x' -> ok (f' x'))) -let (<* ) : 'a t -> _ t -> 'a t - = fun x y st ~ok ~err -> - x st ~err ~ok:(fun res -> y st ~err ~ok:(fun _ -> ok res)) -let ( *>) : _ t -> 'a t -> 'a t - = fun x y st ~ok ~err -> - x st ~err ~ok:(fun _ -> y st ~err ~ok) - -let junk_ st = ignore (st.next ()) -let pf = Printf.sprintf -let fail_ ~err st msg = err (ParseError (st.lnum(), st.cnum(), msg)) - -let eoi st ~ok ~err = - if st.is_done() - then ok () - else fail_ ~err st (const_ "expected EOI") - -let fail msg st ~ok:_ ~err = fail_ ~err st (const_ msg) -let nop _ ~ok ~err:_ = ok() - -let char c = - let msg = pf "expected '%c'" c in - fun st ~ok ~err -> if st.next () = c then ok c else fail_ ~err st (const_ msg) - -let char_if p st ~ok ~err = - let c = st.next () in - if p c then ok c else fail_ ~err st (fun () -> pf "unexpected char '%c'" c) - -let chars_if p st ~ok ~err:_ = - let i = st.pos () in - let len = ref 0 in - while not (st.is_done ()) && p (st.cur ()) do junk_ st; incr len done; - ok (st.sub i !len) - -let chars1_if p st ~ok ~err = - chars_if p st ~err - ~ok:(fun s -> - if s = "" then fail_ ~err st (const_ "unexpected sequence of chars"); - ok s - ) - -let rec skip_chars p st ~ok ~err = - if not (st.is_done ()) && p (st.cur ()) then ( - junk_ st; - skip_chars p st ~ok ~err - ) else ok() - -let is_alpha = function - | 'a' .. 'z' | 'A' .. 'Z' -> true - | _ -> false -let is_num = function '0' .. '9' -> true | _ -> false -let is_alpha_num = function - | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' -> true - | _ -> false -let is_space = function ' ' | '\t' -> true | _ -> false -let is_white = function ' ' | '\t' | '\n' -> true | _ -> false -let (~~~) p c = not (p c) -let (|||) p1 p2 c = p1 c || p2 c -let (&&&) p1 p2 c = p1 c && p2 c - -let endline = char '\n' -let space = char_if is_space -let white = char_if is_white - -let skip_space = skip_chars is_space -let skip_white = skip_chars is_white - -(* XXX: combine errors? *) - -let (<|>) : 'a t -> 'a t -> 'a t - = fun x y st ~ok ~err -> - let i = st.pos () in - x st ~ok - ~err:(fun _ -> - st.backtrack i; (* restore pos *) - y st ~ok ~err - ) - -let string s st ~ok ~err = - let rec check i = - i = String.length s || - (s.[i] = st.next () && check (i+1)) - in - if check 0 then ok s else fail_ ~err st (fun () -> pf "expected \"%s\"" s) - -let rec many_rec : 'a t -> 'a list -> 'a list t = fun p acc st ~ok ~err -> - if st.is_done () then ok(List.rev acc) - else - let i = st.pos () in - p st ~err - ~ok:(fun x -> - many_rec p (x :: acc) st ~ok - ~err:(fun _ -> - st.backtrack i; - ok(List.rev acc) - ) - ) - -let many : 'a t -> 'a list t - = fun p st ~ok ~err -> many_rec p [] st ~ok ~err - -let many1 : 'a t -> 'a list t = - fun p st ~ok ~err -> - p st ~err ~ok:(fun x -> many_rec p [x] st ~err ~ok) - -let rec skip p st ~ok ~err = - let i = st.pos () in - p st - ~ok:(fun _ -> skip p st ~ok ~err) - ~err:(fun _ -> - st.backtrack i; - ok() - ) - -let rec sep1 ~by p = - p >>= fun x -> - let cont = by *> sep ~by p >|= fun tl -> x :: tl in - cont <|> return [x] -and sep ~by p = - sep1 ~by p <|> return [] - -module MemoTbl = struct - (* table of closures, used to implement universal type *) - type t = memo_ - - let create n = lazy (H.create n) - - (* unique ID for each parser *) - let id_ = ref 0 - - type 'a res = - | Fail of exn - | Ok of 'a -end - -let fix f = - let rec p st ~ok ~err = f p st ~ok ~err in - p - -let memo (type a) (p:a t):a t = - let id = !MemoTbl.id_ in - incr MemoTbl.id_; - let r = ref None in (* used for universal encoding *) - fun input ~ok ~err -> - let i = input.pos () in - let (lazy tbl) = input.memo in - try - let f = H.find tbl (i, id) in - (* extract hidden value *) - r := None; - f (); - begin match !r with - | None -> assert false - | Some (MemoTbl.Ok x) -> ok x - | Some (MemoTbl.Fail e) -> err e - end - with Not_found -> - (* parse, and save *) - p input - ~err:(fun e -> - H.replace tbl (i,id) (fun () -> r := Some (MemoTbl.Fail e)); - err e - ) - ~ok:(fun x -> - H.replace tbl (i,id) (fun () -> r := Some (MemoTbl.Ok x)); - ok x - ) - -let fix_memo f = - let rec p = - let p' = lazy (memo p) in - fun st ~ok ~err -> f (Lazy.force p') st ~ok ~err - in - p - -let parse_exn ~input ~p = - let res = ref None in - p input ~ok:(fun x -> res := Some x) ~err:(fun e -> raise e); - match !res with - | None -> failwith "no input returned by parser" - | Some x -> x - -let parse ~input ~p = - try `Ok (parse_exn ~input ~p) - with ParseError (lnum, cnum, msg) -> - `Error (Printf.sprintf "at line %d, column %d: error, %s" lnum cnum (msg ())) - -let parse_string s ~p = parse ~input:(input_of_string s) ~p -let parse_string_exn s ~p = parse_exn ~input:(input_of_string s) ~p - -let parse_file_exn ?size ~file ~p = - let ic = open_in file in - let input = input_of_chan ?size ic in - try - let res = parse_exn ~input ~p in - close_in ic; - res - with e -> - close_in ic; - raise e - -let parse_file ?size ~file ~p = - try - `Ok (parse_file_exn ?size ~file ~p) - with - | ParseError (lnum, cnum, msg) -> - `Error (Printf.sprintf "at line %d, column %d: error, %s" lnum cnum (msg ())) - | Sys_error s -> - `Error (Printf.sprintf "error while reading %s: %s" file s) - -module U = struct - let sep_ = sep - - let list ?(start="[") ?(stop="]") ?(sep=";") p = - string start *> skip_white *> - sep_ ~by:(skip_white *> string sep *> skip_white) p <* - skip_white <* string stop - - let int = - chars1_if (is_num ||| (=) '-') - >>= fun s -> - try return (int_of_string s) - with Failure _ -> fail "expected an int" - - let map f x = x >|= f - let map2 f x y = pure f <*> x <*> y - let map3 f x y z = pure f <*> x <*> y <*> z - - let prepend_str c s = String.make 1 c ^ s - - let word = - map2 prepend_str (char_if is_alpha) (chars_if is_alpha_num) - - let pair ?(start="(") ?(stop=")") ?(sep=",") p1 p2 = - string start *> skip_white *> - p1 >>= fun x1 -> - skip_white *> string sep *> skip_white *> - p2 >>= fun x2 -> - string stop *> return (x1,x2) - - let triple ?(start="(") ?(stop=")") ?(sep=",") p1 p2 p3 = - string start *> skip_white *> - p1 >>= fun x1 -> - skip_white *> string sep *> skip_white *> - p2 >>= fun x2 -> - skip_white *> string sep *> skip_white *> - p3 >>= fun x3 -> - string stop *> return (x1,x2,x3) - -end diff --git a/src/string/CCParse.mli b/src/string/CCParse.mli deleted file mode 100644 index 66f16870..00000000 --- a/src/string/CCParse.mli +++ /dev/null @@ -1,321 +0,0 @@ -(* -copyright (c) 2013-2015, simon cruanes -all rights reserved. - -redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) - -(** -{1 Very Simple Parser Combinators} - -{b status} still a bit unstable, the type {!'a t} might still change. - -Examples: - -{6 parse recursive structures} - -{[ -open Containers_string.Parse;; - -type tree = L of int | N of tree * tree;; - -let mk_leaf x = L x -let mk_node x y = N(x,y) - -let ptree = fix @@ fun self -> - skip_space *> - ( (char '(' *> (pure mk_node <*> self <*> self) <* char ')') - <|> - (U.int >|= mk_leaf) ) -;; - -parse_string_exn "(1 (2 3))" ptree;; -parse_string_exn "((1 2) (3 (4 5)))" ptree;; - -]} - -{6 Parse a list of words} - -{[ -open Containers_string.Parse;; -let p = U.list ~sep:"," U.word;; -parse_string_exn "[abc , de, hello ,world ]" p;; -]} - -{6 Stress Test} -This makes a list of 100_000 integers, prints it and parses it back. - -{[ -let p = CCParse.(U.list ~sep:"," U.int);; - -let l = CCList.(1 -- 100_000);; -let l_printed = - CCFormat.to_string (CCList.print ~sep:"," ~start:"[" ~stop:"]" CCInt.print) l;; - -let l' = CCParse.parse_string_exn ~p l_printed;; - -assert (l=l');; -]} - -@since 0.11 -*) - -type 'a or_error = [`Ok of 'a | `Error of string] - -type line_num = int (** @since 0.13 *) -type col_num = int (** @since 0.13 *) - -exception ParseError of line_num * col_num * (unit -> string) -(** position * message - - This type changed at 0.13 *) - -(** {2 Input} *) - -(** @since 0.13 *) -module MemoTbl : sig - type t - val create: int -> t (** New memoization table *) -end - -type input = { - is_done : unit -> bool; (** End of input? *) - cur : unit -> char; (** Current char *) - next : unit -> char; - (** Returns current char; - if not {!is_done}, move to next char, - otherwise throw ParseError *) - - pos : unit -> int; (** Current pos *) - lnum : unit -> line_num; (** Line number @since 0.13 *) - cnum : unit -> col_num; (** Column number @since 0.13 *) - memo : MemoTbl.t; (** Memoization table, if any *) - backtrack : int -> unit; (** Restore to previous pos *) - sub : int -> int -> string; (** [sub pos len] extracts slice from [pos] with [len] *) -} -(** The type of input, which must allow for backtracking somehow. - This type is {b unstable} and its details might change. *) - -val input_of_string : string -> input -(** Parse the string *) - -val input_of_chan : ?size:int -> in_channel -> input -(** [input_of_chan ic] reads lazily the content of [ic] as parsing goes. - All content that is read is saved to an internal buffer for backtracking. - @param size number of bytes read at once from [ic] - @since 0.13 *) - -(** {2 Combinators} *) - -type 'a t = input -> ok:('a -> unit) -> err:(exn -> unit) -> unit -(** Takes the input and two continuations: - {ul - {- [ok] to call with the result when it's done} - {- [err] to call when the parser met an error} - } - The type definition changed since 0.14 to avoid stack overflows - @raise ParseError in case of failure *) - -val return : 'a -> 'a t -(** Always succeeds, without consuming its input *) - -val pure : 'a -> 'a t -(** Synonym to {!return} *) - -val (>|=) : 'a t -> ('a -> 'b) -> 'b t -(** Map *) - -val (>>=) : 'a t -> ('a -> 'b t) -> 'b t -(** Monadic bind *) - -val (<*>) : ('a -> 'b) t -> 'a t -> 'b t -(** Applicative *) - -val (<* ) : 'a t -> _ t -> 'a t -(** [a <* b] parses [a] into [x], parses [b] and ignores its result, - and returns [x] *) - -val ( *>) : _ t -> 'a t -> 'a t -(** [a *> b] parses [a], then parses [b] into [x], and returns [x]. The - results of [a] is ignored. *) - -val fail : string -> 'a t -(** [fail msg] fails with the given message. It can trigger a backtrack *) - -val eoi : unit t -(** Expect the end of input, fails otherwise *) - -val nop : unit t -(** Succeed with [()] *) - -val char : char -> char t -(** [char c] parses the char [c] and nothing else *) - -val char_if : (char -> bool) -> char t -(** [char_if f] parses a character [c] if [f c = true] *) - -val chars_if : (char -> bool) -> string t -(** [chars_if f] parses a string of chars that satisfy [f] *) - -val chars1_if : (char -> bool) -> string t -(** Same as {!chars_if}, but only non-empty strings *) - -val endline : char t -(** Parses '\n' *) - -val space : char t -(** Tab or space *) - -val white : char t -(** Tab or space or newline *) - -val skip_chars : (char -> bool) -> unit t -(** Skip 0 or more chars satisfying the predicate *) - -val skip_space : unit t -(** Skip ' ' and '\t' *) - -val skip_white : unit t -(** Skip ' ' and '\t' and '\n' *) - -val is_alpha : char -> bool -(** Is the char a letter? *) - -val is_num : char -> bool -(** Is the char a digit? *) - -val is_alpha_num : char -> bool - -val is_space : char -> bool -(** True on ' ' and '\t' *) - -val is_white : char -> bool -(** True on ' ' and '\t' and '\n' - @since 0.13 *) - -val (~~~) : (char -> bool) -> char -> bool -(** Negation on predicates *) - -val (|||) : (char -> bool) -> (char -> bool) -> char -> bool -(** Disjunction on predicates *) - -val (&&&) : (char -> bool) -> (char -> bool) -> char -> bool -(** Conjunction on predicates *) - -val (<|>) : 'a t -> 'a t -> 'a t -(** [a <|> b] tries to parse [a], and if [a] fails, backtracks and tries - to parse [b]. Therefore, it succeeds if either succeeds *) - -val string : string -> string t -(** [string s] parses exactly the string [s], and nothing else *) - -val many : 'a t -> 'a list t -(** [many p] parses a list of [p], eagerly (as long as possible) *) - -val many1 : 'a t -> 'a list t -(** parses a non empty list *) - -val skip : _ t -> unit t -(** [skip p] parses [p] and ignores its result *) - -val sep : by:_ t -> 'a t -> 'a list t -(** [sep ~by p] parses a list of [p] separated by [by] *) - -val sep1 : by:_ t -> 'a t -> 'a list t -(** [sep1 ~by p] parses a non empty list of [p], separated by [by] *) - -val fix : ('a t -> 'a t) -> 'a t -(** Fixpoint combinator *) - -val memo : 'a t -> 'a t -(** Memoize the parser. [memo p] will behave like [p], but when called - in a state (read: position in input) it has already processed, [memo p] - returns a result directly. The implementation uses an underlying - hashtable. - This can be costly in memory, but improve the run time a lot if there - is a lot of backtracking involving [p]. - - This function is not thread-safe. - @since 0.13 *) - -val fix_memo : ('a t -> 'a t) -> 'a t -(** Same as {!fix}, but the fixpoint is memoized. - @since 0.13 *) - -(** {2 Parse} - - Those functions have a label [~p] on the parser, since 0.14. -*) - -val parse : input:input -> p:'a t -> 'a or_error -(** [parse ~input p] applies [p] on the input, and returns [`Ok x] if - [p] succeeds with [x], or [`Error s] otherwise *) - -val parse_exn : input:input -> p:'a t -> 'a -(** @raise ParseError if it fails *) - -val parse_string : string -> p:'a t -> 'a or_error -(** Specialization of {!parse} for string inputs *) - -val parse_string_exn : string -> p:'a t -> 'a -(** @raise ParseError if it fails *) - -val parse_file : ?size:int -> file:string -> p:'a t -> 'a or_error -(** [parse_file ~file p] parses [file] with [p] by opening the file - and using {!input_of_chan}. - @param size size of chunks read from file - @since 0.13 *) - -val parse_file_exn : ?size:int -> file:string -> p:'a t -> 'a -(** Unsafe version of {!parse_file} - @since 0.13 *) - -(** {2 Utils} *) - -module U : sig - val list : ?start:string -> ?stop:string -> ?sep:string -> 'a t -> 'a list t - (** [list p] parses a list of [p], with the OCaml conventions for - start token "[", stop token "]" and separator ";". - Whitespace between items are skipped *) - - val int : int t - - val word : string t - (** non empty string of alpha num, start with alpha *) - - val map : ('a -> 'b) -> 'a t -> 'b t - - val map2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t - - val map3 : ('a -> 'b -> 'c -> 'd) -> 'a t -> 'b t -> 'c t -> 'd t - - val pair : ?start:string -> ?stop:string -> ?sep:string -> - 'a t -> 'b t -> ('a * 'b) t - (** Parse a pair using OCaml whitespace conventions. - The default is "(a, b)". - @since 0.14 *) - - val triple : ?start:string -> ?stop:string -> ?sep:string -> - 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t - (** Parse a triple using OCaml whitespace conventions. - The default is "(a, b, c)". - @since 0.14 *) -end