remove CCParse and CCKMP (will be replaced in core)

This commit is contained in:
Simon Cruanes 2016-11-03 19:08:00 +01:00
parent 46cee7096c
commit 8f7b29c3bd
5 changed files with 1 additions and 1010 deletions

2
_oasis
View file

@ -80,7 +80,7 @@ Library "containers_iter"
Library "containers_string"
Path: src/string
Modules: Containers_string, CCKMP, CCLevenshtein, CCApp_parse, CCParse
Modules: Containers_string, CCLevenshtein, CCApp_parse
BuildDepends: bytes
FindlibName: string
FindlibParent: containers

View file

@ -1,154 +0,0 @@
(* This file is free software, part of containers. See file "license" for more details. *)
(** {1 Knuth-Morris-Pratt} *)
module type STRING = sig
type t
type char
val length : t -> int
val get : t -> int -> char
val char_equal : char -> char -> bool
end
type 'a gen = unit -> 'a option
type 'a sequence = ('a -> unit) -> unit
module type S = sig
type string
type pattern
(** Compiled pattern (needle: string to search in another string) *)
val compile : string -> pattern
(** Compile a string into a pattern *)
val find : pattern:pattern -> string -> int -> int option
(** [find ~pattern s i] finds the next occurrence of [pattern]
in [s] starting at offset [i], and returns it,
or returns [None] if the pattern doesn't occur. *)
val search : pattern:pattern -> string -> int option
(** [search ~pattern s] is a shortcut for [find ~pattern s 0]. *)
val find_all : pattern:pattern -> string -> int -> int gen
(** Generator on all occurrences of the pattern *)
val seq : pattern:pattern -> string -> int -> int sequence
(** Iterate on matching positions *)
(** {6 One-shot functions that compile the pattern on-the-fly} *)
val search' : pattern:string -> string -> int option
val find_all' : pattern:string -> string -> int gen
val seq' : pattern:string -> string -> int sequence
end
module Make(Str : STRING) = struct
type string = Str.t
type pattern = {
failure : int array;
str : Str.t;
len : int; (* = length str = length failure *)
}
let compile str =
let len = Str.length str in
match len with
| 0 -> {failure=[| |]; len; str;}
| 1 -> {failure=[| -1 |]; len; str;}
| _ ->
(* at least 2 elements, the algorithm can work *)
let failure = Array.make len 0 in
(* i: current index in str *)
let i = ref 1 in
(* j: index of candidate substring *)
let j = ref 0 in
while !i < len-1 do
match !j with
| _ when Str.char_equal (Str.get str !i) (Str.get str !j) ->
(* substring starting at !j continues matching current char *)
i := !i+1;
j := !j+1;
failure.(!i) <- !j;
| 0 ->
(* back to the beginning *)
i := !i+1;
failure.(!i) <- 0;
| _ ->
(* fallback for the prefix string *)
assert (!j > 0);
j := failure.(!j)
done;
{ failure; str; len; }
let find ~pattern s idx =
(* proper search function.
[i] index in [s]
[j] index in [pattern]
[len] length of [s] *)
let len = Str.length s in
let i = ref idx in
let j = ref 0 in
while !i < len && !j < pattern.len do
let c = Str.get s !i in
let expected = Str.get pattern.str !j in
if Str.char_equal c expected
then (
(* char matches *)
i := !i + 1; j := !j + 1
) else
if !j=0
then (* beginning of the pattern *)
i := !i + 1
else (* follow the failure link *)
j := pattern.failure.(!j)
done;
if !j = pattern.len
then Some (!i-pattern.len)
else None
let search ~pattern s = find ~pattern s 0
let find_all ~pattern s i =
let i = ref i in
fun () ->
if !i >= Str.length s
then None
else match find ~pattern s !i with
| None -> None
| (Some j) as res ->
i := j + pattern.len;
res
let seq ~pattern s i k =
let rec iter i =
match find ~pattern s i with
| None -> ()
| Some j ->
k j;
iter (j+pattern.len)
in
iter i
let search' ~pattern s =
search ~pattern:(compile pattern) s
let find_all' ~pattern s =
find_all ~pattern:(compile pattern) s 0
let seq' ~pattern s =
seq ~pattern:(compile pattern) s 0
end
include Make(struct
type char_ = char
type char = char_
type t = string
let char_equal a b = a=b
let get = String.get
let length = String.length
end)

View file

@ -1,52 +0,0 @@
(* This file is free software, part of containers. See file "license" for more details. *)
(** {1 Knuth-Morris-Pratt} *)
module type STRING = sig
type t
type char
val length : t -> int
val get : t -> int -> char
val char_equal : char -> char -> bool
end
type 'a gen = unit -> 'a option
type 'a sequence = ('a -> unit) -> unit
module type S = sig
type string
type pattern
(** Compiled pattern (needle: string to search in another string) *)
val compile : string -> pattern
(** Compile a string into a pattern *)
val find : pattern:pattern -> string -> int -> int option
(** [find ~pattern s i] finds the next occurrence of [pattern]
in [s] starting at offset [i], and returns it,
or returns [None] if the pattern doesn't occur. *)
val search : pattern:pattern -> string -> int option
(** [search ~pattern s] is a shortcut for [find ~pattern s 0]. *)
val find_all : pattern:pattern -> string -> int -> int gen
(** Generator on all occurrences of the pattern *)
val seq : pattern:pattern -> string -> int -> int sequence
(** iterate on matching positions *)
(** {6 One-shot functions that compile the pattern on-the-fly} *)
val search' : pattern:string -> string -> int option
val find_all' : pattern:string -> string -> int gen
val seq' : pattern:string -> string -> int sequence
end
module Make(Str : STRING) : S with type string = Str.t
include S with type string = string

View file

@ -1,482 +0,0 @@
(*
copyright (c) 2013-2015, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Very Simple Parser Combinators} *)
type 'a or_error = [`Ok of 'a | `Error of string]
type line_num = int
type col_num = int
module H = Hashtbl.Make(struct
type t = int * int (* id of parser, position *)
let equal ((a,b):t)(c,d) = a=c && b=d
let hash = Hashtbl.hash
end)
type memo_ = (unit -> unit) H.t lazy_t
type input = {
is_done : unit -> bool; (** End of input? *)
cur : unit -> char; (** Current char *)
next : unit -> char; (** if not {!is_done}, move to next char *)
pos : unit -> int; (** Current pos *)
lnum : unit -> line_num; (** Line number @since 0.13 *)
cnum : unit -> col_num; (** Column number @since 0.13 *)
memo : memo_; (** Memoization table, if any *)
backtrack : int -> unit; (** Restore to previous pos *)
sub : int -> int -> string; (** Extract slice from [pos] with [len] *)
}
exception ParseError of line_num * col_num * (unit -> string)
(*$inject
module T = struct
type tree = L of int | N of tree * tree
end
open T
let mk_leaf x = L x
let mk_node x y = N(x,y)
let ptree = fix @@ fun self ->
skip_space *>
( (char '(' *> (pure mk_node <*> self <*> self) <* char ')')
<|>
(U.int >|= mk_leaf) )
let ptree' = fix_memo @@ fun self ->
skip_space *>
( (char '(' *> (pure mk_node <*> self <*> self) <* char ')')
<|>
(U.int >|= mk_leaf) )
let rec pptree = function
| N (a,b) -> Printf.sprintf "N (%s, %s)" (pptree a) (pptree b)
| L x -> Printf.sprintf "L %d" x
let errpptree = function
| `Ok x -> "Ok " ^ pptree x
| `Error s -> "Error " ^ s
*)
(*$= & ~printer:errpptree
(`Ok (N (L 1, N (L 2, L 3)))) \
(parse_string ~p:ptree "(1 (2 3))" )
(`Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \
(parse_string ~p:ptree "((1 2) (3 (4 5)))" )
(`Ok (N (L 1, N (L 2, L 3)))) \
(parse_string ~p:ptree' "(1 (2 3))" )
(`Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \
(parse_string ~p:ptree' "((1 2) (3 (4 5)))" )
*)
(*$R
let p = U.list ~sep:"," U.word in
let printer = function
| `Ok l -> "Ok " ^ CCFormat.(to_string (list string)) l
| `Error s -> "Error " ^ s
in
assert_equal ~printer
(`Ok ["abc"; "de"; "hello"; "world"])
(parse_string ~p "[abc , de, hello ,world ]");
*)
(*$R
let test n =
let p = CCParse.(U.list ~sep:"," U.int) in
let l = CCList.(1 -- n) in
let l_printed =
CCFormat.(to_string (list ~start:"[" ~stop:"]" ~sep:"," int)) l in
let l' = CCParse.parse_string_exn ~p l_printed in
assert_equal ~printer:Q.Print.(list int) l l'
in
test 100_000;
test 400_000;
*)
(* test with a temporary file *)
(*$R
let test n =
let p = CCParse.(U.list ~sep:"," U.int) in
let l = CCList.(1 -- n) in
let l' =
CCIO.File.with_temp ~temp_dir:"/tmp/"
~prefix:"containers_test" ~suffix:""
(fun name ->
(* write test into file *)
CCIO.with_out name
(fun oc ->
let fmt = Format.formatter_of_out_channel oc in
Format.fprintf fmt "@[%a@]@."
CCFormat.(list ~start:"[" ~stop:"]" ~sep:"," int) l);
(* parse it back *)
CCParse.parse_file_exn ~size:1024 ~file:name ~p)
in
assert_equal ~printer:Q.Print.(list int) l l'
in
test 100_000;
test 400_000;
*)
let const_ x () = x
let input_of_string s =
let i = ref 0 in
let line = ref 1 in (* line *)
let col = ref 1 in (* column *)
{ is_done=(fun () -> !i = String.length s);
cur=(fun () -> s.[!i]);
next=(fun () ->
if !i = String.length s
then raise (ParseError (!line, !col, const_ "unexpected EOI"))
else (
let c = s.[!i] in
incr i;
if c='\n' then (incr line; col:=1) else incr col;
c
)
);
lnum=(fun () -> !line);
cnum=(fun () -> !col);
memo=lazy (H.create 32);
pos=(fun () -> !i);
backtrack=(fun j -> assert (0 <= j && j <= !i); i := j);
sub=(fun j len -> assert (j + len <= !i); String.sub s j len);
}
let input_of_chan ?(size=1024) ic =
assert (size > 0);
let b = ref (Bytes.make size ' ') in
let n = ref 0 in (* length of buffer *)
let i = ref 0 in (* current index in buffer *)
let line = ref 1 in
let col = ref 1 in
let exhausted = ref false in (* input fully read? *)
let eoi() = raise (ParseError (!line, !col, const_ "unexpected EOI")) in
(* read a chunk of input *)
let read_more () =
assert (not !exhausted);
(* resize *)
if Bytes.length !b - !n < size then (
let b' = Bytes.make (Bytes.length !b + 2 * size) ' ' in
Bytes.blit !b 0 b' 0 !n;
b := b';
);
let len = input ic !b !n size in
exhausted := len = 0;
n := !n + len
in
(* read next char *)
let next() =
if !exhausted && !i = !n then eoi();
let c = Bytes.get !b !i in
incr i;
if c='\n' then (incr line; col := 1) else incr col;
if !i = !n then (
read_more();
if !exhausted then eoi();
assert (!i < !n);
);
c
and is_done () = !exhausted && !i = !n in
(* fetch first chars *)
read_more();
{ is_done=(fun () -> !exhausted && !i = !n);
cur=(fun () -> assert (not (is_done())); Bytes.get !b !i);
next;
pos=(fun() -> !i);
lnum=(fun () -> !line);
cnum=(fun () -> !col);
memo=lazy (H.create 32);
backtrack=(fun j -> assert (0 <= j && j <= !i); i:=j);
sub=(fun j len -> assert (j + len <= !i); Bytes.sub_string !b j len);
}
type 'a t = input -> ok:('a -> unit) -> err:(exn -> unit) -> unit
let return : 'a -> 'a t = fun x _st ~ok ~err:_ -> ok x
let pure = return
let (>|=) : 'a t -> ('a -> 'b) -> 'b t
= fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> ok (f x))
let (>>=) : 'a t -> ('a -> 'b t) -> 'b t
= fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> f x st ~err ~ok)
let (<*>) : ('a -> 'b) t -> 'a t -> 'b t
= fun f x st ~ok ~err ->
f st ~err ~ok:(fun f' -> x st ~err ~ok:(fun x' -> ok (f' x')))
let (<* ) : 'a t -> _ t -> 'a t
= fun x y st ~ok ~err ->
x st ~err ~ok:(fun res -> y st ~err ~ok:(fun _ -> ok res))
let ( *>) : _ t -> 'a t -> 'a t
= fun x y st ~ok ~err ->
x st ~err ~ok:(fun _ -> y st ~err ~ok)
let junk_ st = ignore (st.next ())
let pf = Printf.sprintf
let fail_ ~err st msg = err (ParseError (st.lnum(), st.cnum(), msg))
let eoi st ~ok ~err =
if st.is_done()
then ok ()
else fail_ ~err st (const_ "expected EOI")
let fail msg st ~ok:_ ~err = fail_ ~err st (const_ msg)
let nop _ ~ok ~err:_ = ok()
let char c =
let msg = pf "expected '%c'" c in
fun st ~ok ~err -> if st.next () = c then ok c else fail_ ~err st (const_ msg)
let char_if p st ~ok ~err =
let c = st.next () in
if p c then ok c else fail_ ~err st (fun () -> pf "unexpected char '%c'" c)
let chars_if p st ~ok ~err:_ =
let i = st.pos () in
let len = ref 0 in
while not (st.is_done ()) && p (st.cur ()) do junk_ st; incr len done;
ok (st.sub i !len)
let chars1_if p st ~ok ~err =
chars_if p st ~err
~ok:(fun s ->
if s = "" then fail_ ~err st (const_ "unexpected sequence of chars");
ok s
)
let rec skip_chars p st ~ok ~err =
if not (st.is_done ()) && p (st.cur ()) then (
junk_ st;
skip_chars p st ~ok ~err
) else ok()
let is_alpha = function
| 'a' .. 'z' | 'A' .. 'Z' -> true
| _ -> false
let is_num = function '0' .. '9' -> true | _ -> false
let is_alpha_num = function
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' -> true
| _ -> false
let is_space = function ' ' | '\t' -> true | _ -> false
let is_white = function ' ' | '\t' | '\n' -> true | _ -> false
let (~~~) p c = not (p c)
let (|||) p1 p2 c = p1 c || p2 c
let (&&&) p1 p2 c = p1 c && p2 c
let endline = char '\n'
let space = char_if is_space
let white = char_if is_white
let skip_space = skip_chars is_space
let skip_white = skip_chars is_white
(* XXX: combine errors? *)
let (<|>) : 'a t -> 'a t -> 'a t
= fun x y st ~ok ~err ->
let i = st.pos () in
x st ~ok
~err:(fun _ ->
st.backtrack i; (* restore pos *)
y st ~ok ~err
)
let string s st ~ok ~err =
let rec check i =
i = String.length s ||
(s.[i] = st.next () && check (i+1))
in
if check 0 then ok s else fail_ ~err st (fun () -> pf "expected \"%s\"" s)
let rec many_rec : 'a t -> 'a list -> 'a list t = fun p acc st ~ok ~err ->
if st.is_done () then ok(List.rev acc)
else
let i = st.pos () in
p st ~err
~ok:(fun x ->
many_rec p (x :: acc) st ~ok
~err:(fun _ ->
st.backtrack i;
ok(List.rev acc)
)
)
let many : 'a t -> 'a list t
= fun p st ~ok ~err -> many_rec p [] st ~ok ~err
let many1 : 'a t -> 'a list t =
fun p st ~ok ~err ->
p st ~err ~ok:(fun x -> many_rec p [x] st ~err ~ok)
let rec skip p st ~ok ~err =
let i = st.pos () in
p st
~ok:(fun _ -> skip p st ~ok ~err)
~err:(fun _ ->
st.backtrack i;
ok()
)
let rec sep1 ~by p =
p >>= fun x ->
let cont = by *> sep ~by p >|= fun tl -> x :: tl in
cont <|> return [x]
and sep ~by p =
sep1 ~by p <|> return []
module MemoTbl = struct
(* table of closures, used to implement universal type *)
type t = memo_
let create n = lazy (H.create n)
(* unique ID for each parser *)
let id_ = ref 0
type 'a res =
| Fail of exn
| Ok of 'a
end
let fix f =
let rec p st ~ok ~err = f p st ~ok ~err in
p
let memo (type a) (p:a t):a t =
let id = !MemoTbl.id_ in
incr MemoTbl.id_;
let r = ref None in (* used for universal encoding *)
fun input ~ok ~err ->
let i = input.pos () in
let (lazy tbl) = input.memo in
try
let f = H.find tbl (i, id) in
(* extract hidden value *)
r := None;
f ();
begin match !r with
| None -> assert false
| Some (MemoTbl.Ok x) -> ok x
| Some (MemoTbl.Fail e) -> err e
end
with Not_found ->
(* parse, and save *)
p input
~err:(fun e ->
H.replace tbl (i,id) (fun () -> r := Some (MemoTbl.Fail e));
err e
)
~ok:(fun x ->
H.replace tbl (i,id) (fun () -> r := Some (MemoTbl.Ok x));
ok x
)
let fix_memo f =
let rec p =
let p' = lazy (memo p) in
fun st ~ok ~err -> f (Lazy.force p') st ~ok ~err
in
p
let parse_exn ~input ~p =
let res = ref None in
p input ~ok:(fun x -> res := Some x) ~err:(fun e -> raise e);
match !res with
| None -> failwith "no input returned by parser"
| Some x -> x
let parse ~input ~p =
try `Ok (parse_exn ~input ~p)
with ParseError (lnum, cnum, msg) ->
`Error (Printf.sprintf "at line %d, column %d: error, %s" lnum cnum (msg ()))
let parse_string s ~p = parse ~input:(input_of_string s) ~p
let parse_string_exn s ~p = parse_exn ~input:(input_of_string s) ~p
let parse_file_exn ?size ~file ~p =
let ic = open_in file in
let input = input_of_chan ?size ic in
try
let res = parse_exn ~input ~p in
close_in ic;
res
with e ->
close_in ic;
raise e
let parse_file ?size ~file ~p =
try
`Ok (parse_file_exn ?size ~file ~p)
with
| ParseError (lnum, cnum, msg) ->
`Error (Printf.sprintf "at line %d, column %d: error, %s" lnum cnum (msg ()))
| Sys_error s ->
`Error (Printf.sprintf "error while reading %s: %s" file s)
module U = struct
let sep_ = sep
let list ?(start="[") ?(stop="]") ?(sep=";") p =
string start *> skip_white *>
sep_ ~by:(skip_white *> string sep *> skip_white) p <*
skip_white <* string stop
let int =
chars1_if (is_num ||| (=) '-')
>>= fun s ->
try return (int_of_string s)
with Failure _ -> fail "expected an int"
let map f x = x >|= f
let map2 f x y = pure f <*> x <*> y
let map3 f x y z = pure f <*> x <*> y <*> z
let prepend_str c s = String.make 1 c ^ s
let word =
map2 prepend_str (char_if is_alpha) (chars_if is_alpha_num)
let pair ?(start="(") ?(stop=")") ?(sep=",") p1 p2 =
string start *> skip_white *>
p1 >>= fun x1 ->
skip_white *> string sep *> skip_white *>
p2 >>= fun x2 ->
string stop *> return (x1,x2)
let triple ?(start="(") ?(stop=")") ?(sep=",") p1 p2 p3 =
string start *> skip_white *>
p1 >>= fun x1 ->
skip_white *> string sep *> skip_white *>
p2 >>= fun x2 ->
skip_white *> string sep *> skip_white *>
p3 >>= fun x3 ->
string stop *> return (x1,x2,x3)
end

View file

@ -1,321 +0,0 @@
(*
copyright (c) 2013-2015, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(**
{1 Very Simple Parser Combinators}
{b status} still a bit unstable, the type {!'a t} might still change.
Examples:
{6 parse recursive structures}
{[
open Containers_string.Parse;;
type tree = L of int | N of tree * tree;;
let mk_leaf x = L x
let mk_node x y = N(x,y)
let ptree = fix @@ fun self ->
skip_space *>
( (char '(' *> (pure mk_node <*> self <*> self) <* char ')')
<|>
(U.int >|= mk_leaf) )
;;
parse_string_exn "(1 (2 3))" ptree;;
parse_string_exn "((1 2) (3 (4 5)))" ptree;;
]}
{6 Parse a list of words}
{[
open Containers_string.Parse;;
let p = U.list ~sep:"," U.word;;
parse_string_exn "[abc , de, hello ,world ]" p;;
]}
{6 Stress Test}
This makes a list of 100_000 integers, prints it and parses it back.
{[
let p = CCParse.(U.list ~sep:"," U.int);;
let l = CCList.(1 -- 100_000);;
let l_printed =
CCFormat.to_string (CCList.print ~sep:"," ~start:"[" ~stop:"]" CCInt.print) l;;
let l' = CCParse.parse_string_exn ~p l_printed;;
assert (l=l');;
]}
@since 0.11
*)
type 'a or_error = [`Ok of 'a | `Error of string]
type line_num = int (** @since 0.13 *)
type col_num = int (** @since 0.13 *)
exception ParseError of line_num * col_num * (unit -> string)
(** position * message
This type changed at 0.13 *)
(** {2 Input} *)
(** @since 0.13 *)
module MemoTbl : sig
type t
val create: int -> t (** New memoization table *)
end
type input = {
is_done : unit -> bool; (** End of input? *)
cur : unit -> char; (** Current char *)
next : unit -> char;
(** Returns current char;
if not {!is_done}, move to next char,
otherwise throw ParseError *)
pos : unit -> int; (** Current pos *)
lnum : unit -> line_num; (** Line number @since 0.13 *)
cnum : unit -> col_num; (** Column number @since 0.13 *)
memo : MemoTbl.t; (** Memoization table, if any *)
backtrack : int -> unit; (** Restore to previous pos *)
sub : int -> int -> string; (** [sub pos len] extracts slice from [pos] with [len] *)
}
(** The type of input, which must allow for backtracking somehow.
This type is {b unstable} and its details might change. *)
val input_of_string : string -> input
(** Parse the string *)
val input_of_chan : ?size:int -> in_channel -> input
(** [input_of_chan ic] reads lazily the content of [ic] as parsing goes.
All content that is read is saved to an internal buffer for backtracking.
@param size number of bytes read at once from [ic]
@since 0.13 *)
(** {2 Combinators} *)
type 'a t = input -> ok:('a -> unit) -> err:(exn -> unit) -> unit
(** Takes the input and two continuations:
{ul
{- [ok] to call with the result when it's done}
{- [err] to call when the parser met an error}
}
The type definition changed since 0.14 to avoid stack overflows
@raise ParseError in case of failure *)
val return : 'a -> 'a t
(** Always succeeds, without consuming its input *)
val pure : 'a -> 'a t
(** Synonym to {!return} *)
val (>|=) : 'a t -> ('a -> 'b) -> 'b t
(** Map *)
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t
(** Monadic bind *)
val (<*>) : ('a -> 'b) t -> 'a t -> 'b t
(** Applicative *)
val (<* ) : 'a t -> _ t -> 'a t
(** [a <* b] parses [a] into [x], parses [b] and ignores its result,
and returns [x] *)
val ( *>) : _ t -> 'a t -> 'a t
(** [a *> b] parses [a], then parses [b] into [x], and returns [x]. The
results of [a] is ignored. *)
val fail : string -> 'a t
(** [fail msg] fails with the given message. It can trigger a backtrack *)
val eoi : unit t
(** Expect the end of input, fails otherwise *)
val nop : unit t
(** Succeed with [()] *)
val char : char -> char t
(** [char c] parses the char [c] and nothing else *)
val char_if : (char -> bool) -> char t
(** [char_if f] parses a character [c] if [f c = true] *)
val chars_if : (char -> bool) -> string t
(** [chars_if f] parses a string of chars that satisfy [f] *)
val chars1_if : (char -> bool) -> string t
(** Same as {!chars_if}, but only non-empty strings *)
val endline : char t
(** Parses '\n' *)
val space : char t
(** Tab or space *)
val white : char t
(** Tab or space or newline *)
val skip_chars : (char -> bool) -> unit t
(** Skip 0 or more chars satisfying the predicate *)
val skip_space : unit t
(** Skip ' ' and '\t' *)
val skip_white : unit t
(** Skip ' ' and '\t' and '\n' *)
val is_alpha : char -> bool
(** Is the char a letter? *)
val is_num : char -> bool
(** Is the char a digit? *)
val is_alpha_num : char -> bool
val is_space : char -> bool
(** True on ' ' and '\t' *)
val is_white : char -> bool
(** True on ' ' and '\t' and '\n'
@since 0.13 *)
val (~~~) : (char -> bool) -> char -> bool
(** Negation on predicates *)
val (|||) : (char -> bool) -> (char -> bool) -> char -> bool
(** Disjunction on predicates *)
val (&&&) : (char -> bool) -> (char -> bool) -> char -> bool
(** Conjunction on predicates *)
val (<|>) : 'a t -> 'a t -> 'a t
(** [a <|> b] tries to parse [a], and if [a] fails, backtracks and tries
to parse [b]. Therefore, it succeeds if either succeeds *)
val string : string -> string t
(** [string s] parses exactly the string [s], and nothing else *)
val many : 'a t -> 'a list t
(** [many p] parses a list of [p], eagerly (as long as possible) *)
val many1 : 'a t -> 'a list t
(** parses a non empty list *)
val skip : _ t -> unit t
(** [skip p] parses [p] and ignores its result *)
val sep : by:_ t -> 'a t -> 'a list t
(** [sep ~by p] parses a list of [p] separated by [by] *)
val sep1 : by:_ t -> 'a t -> 'a list t
(** [sep1 ~by p] parses a non empty list of [p], separated by [by] *)
val fix : ('a t -> 'a t) -> 'a t
(** Fixpoint combinator *)
val memo : 'a t -> 'a t
(** Memoize the parser. [memo p] will behave like [p], but when called
in a state (read: position in input) it has already processed, [memo p]
returns a result directly. The implementation uses an underlying
hashtable.
This can be costly in memory, but improve the run time a lot if there
is a lot of backtracking involving [p].
This function is not thread-safe.
@since 0.13 *)
val fix_memo : ('a t -> 'a t) -> 'a t
(** Same as {!fix}, but the fixpoint is memoized.
@since 0.13 *)
(** {2 Parse}
Those functions have a label [~p] on the parser, since 0.14.
*)
val parse : input:input -> p:'a t -> 'a or_error
(** [parse ~input p] applies [p] on the input, and returns [`Ok x] if
[p] succeeds with [x], or [`Error s] otherwise *)
val parse_exn : input:input -> p:'a t -> 'a
(** @raise ParseError if it fails *)
val parse_string : string -> p:'a t -> 'a or_error
(** Specialization of {!parse} for string inputs *)
val parse_string_exn : string -> p:'a t -> 'a
(** @raise ParseError if it fails *)
val parse_file : ?size:int -> file:string -> p:'a t -> 'a or_error
(** [parse_file ~file p] parses [file] with [p] by opening the file
and using {!input_of_chan}.
@param size size of chunks read from file
@since 0.13 *)
val parse_file_exn : ?size:int -> file:string -> p:'a t -> 'a
(** Unsafe version of {!parse_file}
@since 0.13 *)
(** {2 Utils} *)
module U : sig
val list : ?start:string -> ?stop:string -> ?sep:string -> 'a t -> 'a list t
(** [list p] parses a list of [p], with the OCaml conventions for
start token "[", stop token "]" and separator ";".
Whitespace between items are skipped *)
val int : int t
val word : string t
(** non empty string of alpha num, start with alpha *)
val map : ('a -> 'b) -> 'a t -> 'b t
val map2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t
val map3 : ('a -> 'b -> 'c -> 'd) -> 'a t -> 'b t -> 'c t -> 'd t
val pair : ?start:string -> ?stop:string -> ?sep:string ->
'a t -> 'b t -> ('a * 'b) t
(** Parse a pair using OCaml whitespace conventions.
The default is "(a, b)".
@since 0.14 *)
val triple : ?start:string -> ?stop:string -> ?sep:string ->
'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t
(** Parse a triple using OCaml whitespace conventions.
The default is "(a, b, c)".
@since 0.14 *)
end