mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 11:15:31 -05:00
remove CCParse and CCKMP (will be replaced in core)
This commit is contained in:
parent
46cee7096c
commit
8f7b29c3bd
5 changed files with 1 additions and 1010 deletions
2
_oasis
2
_oasis
|
|
@ -80,7 +80,7 @@ Library "containers_iter"
|
|||
|
||||
Library "containers_string"
|
||||
Path: src/string
|
||||
Modules: Containers_string, CCKMP, CCLevenshtein, CCApp_parse, CCParse
|
||||
Modules: Containers_string, CCLevenshtein, CCApp_parse
|
||||
BuildDepends: bytes
|
||||
FindlibName: string
|
||||
FindlibParent: containers
|
||||
|
|
|
|||
|
|
@ -1,154 +0,0 @@
|
|||
|
||||
(* This file is free software, part of containers. See file "license" for more details. *)
|
||||
|
||||
(** {1 Knuth-Morris-Pratt} *)
|
||||
|
||||
module type STRING = sig
|
||||
type t
|
||||
type char
|
||||
|
||||
val length : t -> int
|
||||
val get : t -> int -> char
|
||||
val char_equal : char -> char -> bool
|
||||
end
|
||||
|
||||
type 'a gen = unit -> 'a option
|
||||
type 'a sequence = ('a -> unit) -> unit
|
||||
|
||||
module type S = sig
|
||||
type string
|
||||
|
||||
type pattern
|
||||
(** Compiled pattern (needle: string to search in another string) *)
|
||||
|
||||
val compile : string -> pattern
|
||||
(** Compile a string into a pattern *)
|
||||
|
||||
val find : pattern:pattern -> string -> int -> int option
|
||||
(** [find ~pattern s i] finds the next occurrence of [pattern]
|
||||
in [s] starting at offset [i], and returns it,
|
||||
or returns [None] if the pattern doesn't occur. *)
|
||||
|
||||
val search : pattern:pattern -> string -> int option
|
||||
(** [search ~pattern s] is a shortcut for [find ~pattern s 0]. *)
|
||||
|
||||
val find_all : pattern:pattern -> string -> int -> int gen
|
||||
(** Generator on all occurrences of the pattern *)
|
||||
|
||||
val seq : pattern:pattern -> string -> int -> int sequence
|
||||
(** Iterate on matching positions *)
|
||||
|
||||
(** {6 One-shot functions that compile the pattern on-the-fly} *)
|
||||
|
||||
val search' : pattern:string -> string -> int option
|
||||
|
||||
val find_all' : pattern:string -> string -> int gen
|
||||
|
||||
val seq' : pattern:string -> string -> int sequence
|
||||
end
|
||||
|
||||
module Make(Str : STRING) = struct
|
||||
type string = Str.t
|
||||
type pattern = {
|
||||
failure : int array;
|
||||
str : Str.t;
|
||||
len : int; (* = length str = length failure *)
|
||||
}
|
||||
|
||||
let compile str =
|
||||
let len = Str.length str in
|
||||
match len with
|
||||
| 0 -> {failure=[| |]; len; str;}
|
||||
| 1 -> {failure=[| -1 |]; len; str;}
|
||||
| _ ->
|
||||
(* at least 2 elements, the algorithm can work *)
|
||||
let failure = Array.make len 0 in
|
||||
(* i: current index in str *)
|
||||
let i = ref 1 in
|
||||
(* j: index of candidate substring *)
|
||||
let j = ref 0 in
|
||||
while !i < len-1 do
|
||||
match !j with
|
||||
| _ when Str.char_equal (Str.get str !i) (Str.get str !j) ->
|
||||
(* substring starting at !j continues matching current char *)
|
||||
i := !i+1;
|
||||
j := !j+1;
|
||||
failure.(!i) <- !j;
|
||||
| 0 ->
|
||||
(* back to the beginning *)
|
||||
i := !i+1;
|
||||
failure.(!i) <- 0;
|
||||
| _ ->
|
||||
(* fallback for the prefix string *)
|
||||
assert (!j > 0);
|
||||
j := failure.(!j)
|
||||
done;
|
||||
{ failure; str; len; }
|
||||
|
||||
let find ~pattern s idx =
|
||||
(* proper search function.
|
||||
[i] index in [s]
|
||||
[j] index in [pattern]
|
||||
[len] length of [s] *)
|
||||
let len = Str.length s in
|
||||
let i = ref idx in
|
||||
let j = ref 0 in
|
||||
while !i < len && !j < pattern.len do
|
||||
let c = Str.get s !i in
|
||||
let expected = Str.get pattern.str !j in
|
||||
if Str.char_equal c expected
|
||||
then (
|
||||
(* char matches *)
|
||||
i := !i + 1; j := !j + 1
|
||||
) else
|
||||
if !j=0
|
||||
then (* beginning of the pattern *)
|
||||
i := !i + 1
|
||||
else (* follow the failure link *)
|
||||
j := pattern.failure.(!j)
|
||||
done;
|
||||
if !j = pattern.len
|
||||
then Some (!i-pattern.len)
|
||||
else None
|
||||
|
||||
let search ~pattern s = find ~pattern s 0
|
||||
|
||||
let find_all ~pattern s i =
|
||||
let i = ref i in
|
||||
fun () ->
|
||||
if !i >= Str.length s
|
||||
then None
|
||||
else match find ~pattern s !i with
|
||||
| None -> None
|
||||
| (Some j) as res ->
|
||||
i := j + pattern.len;
|
||||
res
|
||||
|
||||
let seq ~pattern s i k =
|
||||
let rec iter i =
|
||||
match find ~pattern s i with
|
||||
| None -> ()
|
||||
| Some j ->
|
||||
k j;
|
||||
iter (j+pattern.len)
|
||||
in
|
||||
iter i
|
||||
|
||||
let search' ~pattern s =
|
||||
search ~pattern:(compile pattern) s
|
||||
|
||||
let find_all' ~pattern s =
|
||||
find_all ~pattern:(compile pattern) s 0
|
||||
|
||||
let seq' ~pattern s =
|
||||
seq ~pattern:(compile pattern) s 0
|
||||
end
|
||||
|
||||
include Make(struct
|
||||
type char_ = char
|
||||
type char = char_
|
||||
type t = string
|
||||
let char_equal a b = a=b
|
||||
let get = String.get
|
||||
let length = String.length
|
||||
end)
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
|
||||
(* This file is free software, part of containers. See file "license" for more details. *)
|
||||
|
||||
(** {1 Knuth-Morris-Pratt} *)
|
||||
|
||||
module type STRING = sig
|
||||
type t
|
||||
type char
|
||||
|
||||
val length : t -> int
|
||||
val get : t -> int -> char
|
||||
val char_equal : char -> char -> bool
|
||||
end
|
||||
|
||||
type 'a gen = unit -> 'a option
|
||||
type 'a sequence = ('a -> unit) -> unit
|
||||
|
||||
module type S = sig
|
||||
type string
|
||||
|
||||
type pattern
|
||||
(** Compiled pattern (needle: string to search in another string) *)
|
||||
|
||||
val compile : string -> pattern
|
||||
(** Compile a string into a pattern *)
|
||||
|
||||
val find : pattern:pattern -> string -> int -> int option
|
||||
(** [find ~pattern s i] finds the next occurrence of [pattern]
|
||||
in [s] starting at offset [i], and returns it,
|
||||
or returns [None] if the pattern doesn't occur. *)
|
||||
|
||||
val search : pattern:pattern -> string -> int option
|
||||
(** [search ~pattern s] is a shortcut for [find ~pattern s 0]. *)
|
||||
|
||||
val find_all : pattern:pattern -> string -> int -> int gen
|
||||
(** Generator on all occurrences of the pattern *)
|
||||
|
||||
val seq : pattern:pattern -> string -> int -> int sequence
|
||||
(** iterate on matching positions *)
|
||||
|
||||
(** {6 One-shot functions that compile the pattern on-the-fly} *)
|
||||
|
||||
val search' : pattern:string -> string -> int option
|
||||
|
||||
val find_all' : pattern:string -> string -> int gen
|
||||
|
||||
val seq' : pattern:string -> string -> int sequence
|
||||
end
|
||||
|
||||
module Make(Str : STRING) : S with type string = Str.t
|
||||
|
||||
include S with type string = string
|
||||
|
|
@ -1,482 +0,0 @@
|
|||
(*
|
||||
copyright (c) 2013-2015, simon cruanes
|
||||
all rights reserved.
|
||||
|
||||
redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
(** {1 Very Simple Parser Combinators} *)
|
||||
|
||||
type 'a or_error = [`Ok of 'a | `Error of string]
|
||||
|
||||
type line_num = int
|
||||
type col_num = int
|
||||
|
||||
module H = Hashtbl.Make(struct
|
||||
type t = int * int (* id of parser, position *)
|
||||
let equal ((a,b):t)(c,d) = a=c && b=d
|
||||
let hash = Hashtbl.hash
|
||||
end)
|
||||
|
||||
type memo_ = (unit -> unit) H.t lazy_t
|
||||
|
||||
type input = {
|
||||
is_done : unit -> bool; (** End of input? *)
|
||||
cur : unit -> char; (** Current char *)
|
||||
next : unit -> char; (** if not {!is_done}, move to next char *)
|
||||
pos : unit -> int; (** Current pos *)
|
||||
lnum : unit -> line_num; (** Line number @since 0.13 *)
|
||||
cnum : unit -> col_num; (** Column number @since 0.13 *)
|
||||
memo : memo_; (** Memoization table, if any *)
|
||||
backtrack : int -> unit; (** Restore to previous pos *)
|
||||
sub : int -> int -> string; (** Extract slice from [pos] with [len] *)
|
||||
}
|
||||
|
||||
exception ParseError of line_num * col_num * (unit -> string)
|
||||
|
||||
(*$inject
|
||||
module T = struct
|
||||
type tree = L of int | N of tree * tree
|
||||
end
|
||||
open T
|
||||
|
||||
let mk_leaf x = L x
|
||||
let mk_node x y = N(x,y)
|
||||
|
||||
let ptree = fix @@ fun self ->
|
||||
skip_space *>
|
||||
( (char '(' *> (pure mk_node <*> self <*> self) <* char ')')
|
||||
<|>
|
||||
(U.int >|= mk_leaf) )
|
||||
|
||||
let ptree' = fix_memo @@ fun self ->
|
||||
skip_space *>
|
||||
( (char '(' *> (pure mk_node <*> self <*> self) <* char ')')
|
||||
<|>
|
||||
(U.int >|= mk_leaf) )
|
||||
|
||||
let rec pptree = function
|
||||
| N (a,b) -> Printf.sprintf "N (%s, %s)" (pptree a) (pptree b)
|
||||
| L x -> Printf.sprintf "L %d" x
|
||||
|
||||
let errpptree = function
|
||||
| `Ok x -> "Ok " ^ pptree x
|
||||
| `Error s -> "Error " ^ s
|
||||
*)
|
||||
|
||||
(*$= & ~printer:errpptree
|
||||
(`Ok (N (L 1, N (L 2, L 3)))) \
|
||||
(parse_string ~p:ptree "(1 (2 3))" )
|
||||
(`Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \
|
||||
(parse_string ~p:ptree "((1 2) (3 (4 5)))" )
|
||||
(`Ok (N (L 1, N (L 2, L 3)))) \
|
||||
(parse_string ~p:ptree' "(1 (2 3))" )
|
||||
(`Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \
|
||||
(parse_string ~p:ptree' "((1 2) (3 (4 5)))" )
|
||||
*)
|
||||
|
||||
(*$R
|
||||
let p = U.list ~sep:"," U.word in
|
||||
let printer = function
|
||||
| `Ok l -> "Ok " ^ CCFormat.(to_string (list string)) l
|
||||
| `Error s -> "Error " ^ s
|
||||
in
|
||||
assert_equal ~printer
|
||||
(`Ok ["abc"; "de"; "hello"; "world"])
|
||||
(parse_string ~p "[abc , de, hello ,world ]");
|
||||
*)
|
||||
|
||||
(*$R
|
||||
let test n =
|
||||
let p = CCParse.(U.list ~sep:"," U.int) in
|
||||
|
||||
let l = CCList.(1 -- n) in
|
||||
let l_printed =
|
||||
CCFormat.(to_string (list ~start:"[" ~stop:"]" ~sep:"," int)) l in
|
||||
|
||||
let l' = CCParse.parse_string_exn ~p l_printed in
|
||||
|
||||
assert_equal ~printer:Q.Print.(list int) l l'
|
||||
in
|
||||
test 100_000;
|
||||
test 400_000;
|
||||
|
||||
*)
|
||||
|
||||
(* test with a temporary file *)
|
||||
(*$R
|
||||
let test n =
|
||||
let p = CCParse.(U.list ~sep:"," U.int) in
|
||||
|
||||
let l = CCList.(1 -- n) in
|
||||
let l' =
|
||||
CCIO.File.with_temp ~temp_dir:"/tmp/"
|
||||
~prefix:"containers_test" ~suffix:""
|
||||
(fun name ->
|
||||
(* write test into file *)
|
||||
CCIO.with_out name
|
||||
(fun oc ->
|
||||
let fmt = Format.formatter_of_out_channel oc in
|
||||
Format.fprintf fmt "@[%a@]@."
|
||||
CCFormat.(list ~start:"[" ~stop:"]" ~sep:"," int) l);
|
||||
(* parse it back *)
|
||||
CCParse.parse_file_exn ~size:1024 ~file:name ~p)
|
||||
in
|
||||
assert_equal ~printer:Q.Print.(list int) l l'
|
||||
in
|
||||
test 100_000;
|
||||
test 400_000;
|
||||
*)
|
||||
|
||||
let const_ x () = x
|
||||
|
||||
let input_of_string s =
|
||||
let i = ref 0 in
|
||||
let line = ref 1 in (* line *)
|
||||
let col = ref 1 in (* column *)
|
||||
{ is_done=(fun () -> !i = String.length s);
|
||||
cur=(fun () -> s.[!i]);
|
||||
next=(fun () ->
|
||||
if !i = String.length s
|
||||
then raise (ParseError (!line, !col, const_ "unexpected EOI"))
|
||||
else (
|
||||
let c = s.[!i] in
|
||||
incr i;
|
||||
if c='\n' then (incr line; col:=1) else incr col;
|
||||
c
|
||||
)
|
||||
);
|
||||
lnum=(fun () -> !line);
|
||||
cnum=(fun () -> !col);
|
||||
memo=lazy (H.create 32);
|
||||
pos=(fun () -> !i);
|
||||
backtrack=(fun j -> assert (0 <= j && j <= !i); i := j);
|
||||
sub=(fun j len -> assert (j + len <= !i); String.sub s j len);
|
||||
}
|
||||
|
||||
let input_of_chan ?(size=1024) ic =
|
||||
assert (size > 0);
|
||||
let b = ref (Bytes.make size ' ') in
|
||||
let n = ref 0 in (* length of buffer *)
|
||||
let i = ref 0 in (* current index in buffer *)
|
||||
let line = ref 1 in
|
||||
let col = ref 1 in
|
||||
let exhausted = ref false in (* input fully read? *)
|
||||
let eoi() = raise (ParseError (!line, !col, const_ "unexpected EOI")) in
|
||||
(* read a chunk of input *)
|
||||
let read_more () =
|
||||
assert (not !exhausted);
|
||||
(* resize *)
|
||||
if Bytes.length !b - !n < size then (
|
||||
let b' = Bytes.make (Bytes.length !b + 2 * size) ' ' in
|
||||
Bytes.blit !b 0 b' 0 !n;
|
||||
b := b';
|
||||
);
|
||||
let len = input ic !b !n size in
|
||||
exhausted := len = 0;
|
||||
n := !n + len
|
||||
in
|
||||
(* read next char *)
|
||||
let next() =
|
||||
if !exhausted && !i = !n then eoi();
|
||||
let c = Bytes.get !b !i in
|
||||
incr i;
|
||||
if c='\n' then (incr line; col := 1) else incr col;
|
||||
if !i = !n then (
|
||||
read_more();
|
||||
if !exhausted then eoi();
|
||||
assert (!i < !n);
|
||||
);
|
||||
c
|
||||
and is_done () = !exhausted && !i = !n in
|
||||
(* fetch first chars *)
|
||||
read_more();
|
||||
{ is_done=(fun () -> !exhausted && !i = !n);
|
||||
cur=(fun () -> assert (not (is_done())); Bytes.get !b !i);
|
||||
next;
|
||||
pos=(fun() -> !i);
|
||||
lnum=(fun () -> !line);
|
||||
cnum=(fun () -> !col);
|
||||
memo=lazy (H.create 32);
|
||||
backtrack=(fun j -> assert (0 <= j && j <= !i); i:=j);
|
||||
sub=(fun j len -> assert (j + len <= !i); Bytes.sub_string !b j len);
|
||||
}
|
||||
|
||||
type 'a t = input -> ok:('a -> unit) -> err:(exn -> unit) -> unit
|
||||
|
||||
let return : 'a -> 'a t = fun x _st ~ok ~err:_ -> ok x
|
||||
let pure = return
|
||||
let (>|=) : 'a t -> ('a -> 'b) -> 'b t
|
||||
= fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> ok (f x))
|
||||
let (>>=) : 'a t -> ('a -> 'b t) -> 'b t
|
||||
= fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> f x st ~err ~ok)
|
||||
let (<*>) : ('a -> 'b) t -> 'a t -> 'b t
|
||||
= fun f x st ~ok ~err ->
|
||||
f st ~err ~ok:(fun f' -> x st ~err ~ok:(fun x' -> ok (f' x')))
|
||||
let (<* ) : 'a t -> _ t -> 'a t
|
||||
= fun x y st ~ok ~err ->
|
||||
x st ~err ~ok:(fun res -> y st ~err ~ok:(fun _ -> ok res))
|
||||
let ( *>) : _ t -> 'a t -> 'a t
|
||||
= fun x y st ~ok ~err ->
|
||||
x st ~err ~ok:(fun _ -> y st ~err ~ok)
|
||||
|
||||
let junk_ st = ignore (st.next ())
|
||||
let pf = Printf.sprintf
|
||||
let fail_ ~err st msg = err (ParseError (st.lnum(), st.cnum(), msg))
|
||||
|
||||
let eoi st ~ok ~err =
|
||||
if st.is_done()
|
||||
then ok ()
|
||||
else fail_ ~err st (const_ "expected EOI")
|
||||
|
||||
let fail msg st ~ok:_ ~err = fail_ ~err st (const_ msg)
|
||||
let nop _ ~ok ~err:_ = ok()
|
||||
|
||||
let char c =
|
||||
let msg = pf "expected '%c'" c in
|
||||
fun st ~ok ~err -> if st.next () = c then ok c else fail_ ~err st (const_ msg)
|
||||
|
||||
let char_if p st ~ok ~err =
|
||||
let c = st.next () in
|
||||
if p c then ok c else fail_ ~err st (fun () -> pf "unexpected char '%c'" c)
|
||||
|
||||
let chars_if p st ~ok ~err:_ =
|
||||
let i = st.pos () in
|
||||
let len = ref 0 in
|
||||
while not (st.is_done ()) && p (st.cur ()) do junk_ st; incr len done;
|
||||
ok (st.sub i !len)
|
||||
|
||||
let chars1_if p st ~ok ~err =
|
||||
chars_if p st ~err
|
||||
~ok:(fun s ->
|
||||
if s = "" then fail_ ~err st (const_ "unexpected sequence of chars");
|
||||
ok s
|
||||
)
|
||||
|
||||
let rec skip_chars p st ~ok ~err =
|
||||
if not (st.is_done ()) && p (st.cur ()) then (
|
||||
junk_ st;
|
||||
skip_chars p st ~ok ~err
|
||||
) else ok()
|
||||
|
||||
let is_alpha = function
|
||||
| 'a' .. 'z' | 'A' .. 'Z' -> true
|
||||
| _ -> false
|
||||
let is_num = function '0' .. '9' -> true | _ -> false
|
||||
let is_alpha_num = function
|
||||
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' -> true
|
||||
| _ -> false
|
||||
let is_space = function ' ' | '\t' -> true | _ -> false
|
||||
let is_white = function ' ' | '\t' | '\n' -> true | _ -> false
|
||||
let (~~~) p c = not (p c)
|
||||
let (|||) p1 p2 c = p1 c || p2 c
|
||||
let (&&&) p1 p2 c = p1 c && p2 c
|
||||
|
||||
let endline = char '\n'
|
||||
let space = char_if is_space
|
||||
let white = char_if is_white
|
||||
|
||||
let skip_space = skip_chars is_space
|
||||
let skip_white = skip_chars is_white
|
||||
|
||||
(* XXX: combine errors? *)
|
||||
|
||||
let (<|>) : 'a t -> 'a t -> 'a t
|
||||
= fun x y st ~ok ~err ->
|
||||
let i = st.pos () in
|
||||
x st ~ok
|
||||
~err:(fun _ ->
|
||||
st.backtrack i; (* restore pos *)
|
||||
y st ~ok ~err
|
||||
)
|
||||
|
||||
let string s st ~ok ~err =
|
||||
let rec check i =
|
||||
i = String.length s ||
|
||||
(s.[i] = st.next () && check (i+1))
|
||||
in
|
||||
if check 0 then ok s else fail_ ~err st (fun () -> pf "expected \"%s\"" s)
|
||||
|
||||
let rec many_rec : 'a t -> 'a list -> 'a list t = fun p acc st ~ok ~err ->
|
||||
if st.is_done () then ok(List.rev acc)
|
||||
else
|
||||
let i = st.pos () in
|
||||
p st ~err
|
||||
~ok:(fun x ->
|
||||
many_rec p (x :: acc) st ~ok
|
||||
~err:(fun _ ->
|
||||
st.backtrack i;
|
||||
ok(List.rev acc)
|
||||
)
|
||||
)
|
||||
|
||||
let many : 'a t -> 'a list t
|
||||
= fun p st ~ok ~err -> many_rec p [] st ~ok ~err
|
||||
|
||||
let many1 : 'a t -> 'a list t =
|
||||
fun p st ~ok ~err ->
|
||||
p st ~err ~ok:(fun x -> many_rec p [x] st ~err ~ok)
|
||||
|
||||
let rec skip p st ~ok ~err =
|
||||
let i = st.pos () in
|
||||
p st
|
||||
~ok:(fun _ -> skip p st ~ok ~err)
|
||||
~err:(fun _ ->
|
||||
st.backtrack i;
|
||||
ok()
|
||||
)
|
||||
|
||||
let rec sep1 ~by p =
|
||||
p >>= fun x ->
|
||||
let cont = by *> sep ~by p >|= fun tl -> x :: tl in
|
||||
cont <|> return [x]
|
||||
and sep ~by p =
|
||||
sep1 ~by p <|> return []
|
||||
|
||||
module MemoTbl = struct
|
||||
(* table of closures, used to implement universal type *)
|
||||
type t = memo_
|
||||
|
||||
let create n = lazy (H.create n)
|
||||
|
||||
(* unique ID for each parser *)
|
||||
let id_ = ref 0
|
||||
|
||||
type 'a res =
|
||||
| Fail of exn
|
||||
| Ok of 'a
|
||||
end
|
||||
|
||||
let fix f =
|
||||
let rec p st ~ok ~err = f p st ~ok ~err in
|
||||
p
|
||||
|
||||
let memo (type a) (p:a t):a t =
|
||||
let id = !MemoTbl.id_ in
|
||||
incr MemoTbl.id_;
|
||||
let r = ref None in (* used for universal encoding *)
|
||||
fun input ~ok ~err ->
|
||||
let i = input.pos () in
|
||||
let (lazy tbl) = input.memo in
|
||||
try
|
||||
let f = H.find tbl (i, id) in
|
||||
(* extract hidden value *)
|
||||
r := None;
|
||||
f ();
|
||||
begin match !r with
|
||||
| None -> assert false
|
||||
| Some (MemoTbl.Ok x) -> ok x
|
||||
| Some (MemoTbl.Fail e) -> err e
|
||||
end
|
||||
with Not_found ->
|
||||
(* parse, and save *)
|
||||
p input
|
||||
~err:(fun e ->
|
||||
H.replace tbl (i,id) (fun () -> r := Some (MemoTbl.Fail e));
|
||||
err e
|
||||
)
|
||||
~ok:(fun x ->
|
||||
H.replace tbl (i,id) (fun () -> r := Some (MemoTbl.Ok x));
|
||||
ok x
|
||||
)
|
||||
|
||||
let fix_memo f =
|
||||
let rec p =
|
||||
let p' = lazy (memo p) in
|
||||
fun st ~ok ~err -> f (Lazy.force p') st ~ok ~err
|
||||
in
|
||||
p
|
||||
|
||||
let parse_exn ~input ~p =
|
||||
let res = ref None in
|
||||
p input ~ok:(fun x -> res := Some x) ~err:(fun e -> raise e);
|
||||
match !res with
|
||||
| None -> failwith "no input returned by parser"
|
||||
| Some x -> x
|
||||
|
||||
let parse ~input ~p =
|
||||
try `Ok (parse_exn ~input ~p)
|
||||
with ParseError (lnum, cnum, msg) ->
|
||||
`Error (Printf.sprintf "at line %d, column %d: error, %s" lnum cnum (msg ()))
|
||||
|
||||
let parse_string s ~p = parse ~input:(input_of_string s) ~p
|
||||
let parse_string_exn s ~p = parse_exn ~input:(input_of_string s) ~p
|
||||
|
||||
let parse_file_exn ?size ~file ~p =
|
||||
let ic = open_in file in
|
||||
let input = input_of_chan ?size ic in
|
||||
try
|
||||
let res = parse_exn ~input ~p in
|
||||
close_in ic;
|
||||
res
|
||||
with e ->
|
||||
close_in ic;
|
||||
raise e
|
||||
|
||||
let parse_file ?size ~file ~p =
|
||||
try
|
||||
`Ok (parse_file_exn ?size ~file ~p)
|
||||
with
|
||||
| ParseError (lnum, cnum, msg) ->
|
||||
`Error (Printf.sprintf "at line %d, column %d: error, %s" lnum cnum (msg ()))
|
||||
| Sys_error s ->
|
||||
`Error (Printf.sprintf "error while reading %s: %s" file s)
|
||||
|
||||
module U = struct
|
||||
let sep_ = sep
|
||||
|
||||
let list ?(start="[") ?(stop="]") ?(sep=";") p =
|
||||
string start *> skip_white *>
|
||||
sep_ ~by:(skip_white *> string sep *> skip_white) p <*
|
||||
skip_white <* string stop
|
||||
|
||||
let int =
|
||||
chars1_if (is_num ||| (=) '-')
|
||||
>>= fun s ->
|
||||
try return (int_of_string s)
|
||||
with Failure _ -> fail "expected an int"
|
||||
|
||||
let map f x = x >|= f
|
||||
let map2 f x y = pure f <*> x <*> y
|
||||
let map3 f x y z = pure f <*> x <*> y <*> z
|
||||
|
||||
let prepend_str c s = String.make 1 c ^ s
|
||||
|
||||
let word =
|
||||
map2 prepend_str (char_if is_alpha) (chars_if is_alpha_num)
|
||||
|
||||
let pair ?(start="(") ?(stop=")") ?(sep=",") p1 p2 =
|
||||
string start *> skip_white *>
|
||||
p1 >>= fun x1 ->
|
||||
skip_white *> string sep *> skip_white *>
|
||||
p2 >>= fun x2 ->
|
||||
string stop *> return (x1,x2)
|
||||
|
||||
let triple ?(start="(") ?(stop=")") ?(sep=",") p1 p2 p3 =
|
||||
string start *> skip_white *>
|
||||
p1 >>= fun x1 ->
|
||||
skip_white *> string sep *> skip_white *>
|
||||
p2 >>= fun x2 ->
|
||||
skip_white *> string sep *> skip_white *>
|
||||
p3 >>= fun x3 ->
|
||||
string stop *> return (x1,x2,x3)
|
||||
|
||||
end
|
||||
|
|
@ -1,321 +0,0 @@
|
|||
(*
|
||||
copyright (c) 2013-2015, simon cruanes
|
||||
all rights reserved.
|
||||
|
||||
redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
(**
|
||||
{1 Very Simple Parser Combinators}
|
||||
|
||||
{b status} still a bit unstable, the type {!'a t} might still change.
|
||||
|
||||
Examples:
|
||||
|
||||
{6 parse recursive structures}
|
||||
|
||||
{[
|
||||
open Containers_string.Parse;;
|
||||
|
||||
type tree = L of int | N of tree * tree;;
|
||||
|
||||
let mk_leaf x = L x
|
||||
let mk_node x y = N(x,y)
|
||||
|
||||
let ptree = fix @@ fun self ->
|
||||
skip_space *>
|
||||
( (char '(' *> (pure mk_node <*> self <*> self) <* char ')')
|
||||
<|>
|
||||
(U.int >|= mk_leaf) )
|
||||
;;
|
||||
|
||||
parse_string_exn "(1 (2 3))" ptree;;
|
||||
parse_string_exn "((1 2) (3 (4 5)))" ptree;;
|
||||
|
||||
]}
|
||||
|
||||
{6 Parse a list of words}
|
||||
|
||||
{[
|
||||
open Containers_string.Parse;;
|
||||
let p = U.list ~sep:"," U.word;;
|
||||
parse_string_exn "[abc , de, hello ,world ]" p;;
|
||||
]}
|
||||
|
||||
{6 Stress Test}
|
||||
This makes a list of 100_000 integers, prints it and parses it back.
|
||||
|
||||
{[
|
||||
let p = CCParse.(U.list ~sep:"," U.int);;
|
||||
|
||||
let l = CCList.(1 -- 100_000);;
|
||||
let l_printed =
|
||||
CCFormat.to_string (CCList.print ~sep:"," ~start:"[" ~stop:"]" CCInt.print) l;;
|
||||
|
||||
let l' = CCParse.parse_string_exn ~p l_printed;;
|
||||
|
||||
assert (l=l');;
|
||||
]}
|
||||
|
||||
@since 0.11
|
||||
*)
|
||||
|
||||
type 'a or_error = [`Ok of 'a | `Error of string]
|
||||
|
||||
type line_num = int (** @since 0.13 *)
|
||||
type col_num = int (** @since 0.13 *)
|
||||
|
||||
exception ParseError of line_num * col_num * (unit -> string)
|
||||
(** position * message
|
||||
|
||||
This type changed at 0.13 *)
|
||||
|
||||
(** {2 Input} *)
|
||||
|
||||
(** @since 0.13 *)
|
||||
module MemoTbl : sig
|
||||
type t
|
||||
val create: int -> t (** New memoization table *)
|
||||
end
|
||||
|
||||
type input = {
|
||||
is_done : unit -> bool; (** End of input? *)
|
||||
cur : unit -> char; (** Current char *)
|
||||
next : unit -> char;
|
||||
(** Returns current char;
|
||||
if not {!is_done}, move to next char,
|
||||
otherwise throw ParseError *)
|
||||
|
||||
pos : unit -> int; (** Current pos *)
|
||||
lnum : unit -> line_num; (** Line number @since 0.13 *)
|
||||
cnum : unit -> col_num; (** Column number @since 0.13 *)
|
||||
memo : MemoTbl.t; (** Memoization table, if any *)
|
||||
backtrack : int -> unit; (** Restore to previous pos *)
|
||||
sub : int -> int -> string; (** [sub pos len] extracts slice from [pos] with [len] *)
|
||||
}
|
||||
(** The type of input, which must allow for backtracking somehow.
|
||||
This type is {b unstable} and its details might change. *)
|
||||
|
||||
val input_of_string : string -> input
|
||||
(** Parse the string *)
|
||||
|
||||
val input_of_chan : ?size:int -> in_channel -> input
|
||||
(** [input_of_chan ic] reads lazily the content of [ic] as parsing goes.
|
||||
All content that is read is saved to an internal buffer for backtracking.
|
||||
@param size number of bytes read at once from [ic]
|
||||
@since 0.13 *)
|
||||
|
||||
(** {2 Combinators} *)
|
||||
|
||||
type 'a t = input -> ok:('a -> unit) -> err:(exn -> unit) -> unit
|
||||
(** Takes the input and two continuations:
|
||||
{ul
|
||||
{- [ok] to call with the result when it's done}
|
||||
{- [err] to call when the parser met an error}
|
||||
}
|
||||
The type definition changed since 0.14 to avoid stack overflows
|
||||
@raise ParseError in case of failure *)
|
||||
|
||||
val return : 'a -> 'a t
|
||||
(** Always succeeds, without consuming its input *)
|
||||
|
||||
val pure : 'a -> 'a t
|
||||
(** Synonym to {!return} *)
|
||||
|
||||
val (>|=) : 'a t -> ('a -> 'b) -> 'b t
|
||||
(** Map *)
|
||||
|
||||
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t
|
||||
(** Monadic bind *)
|
||||
|
||||
val (<*>) : ('a -> 'b) t -> 'a t -> 'b t
|
||||
(** Applicative *)
|
||||
|
||||
val (<* ) : 'a t -> _ t -> 'a t
|
||||
(** [a <* b] parses [a] into [x], parses [b] and ignores its result,
|
||||
and returns [x] *)
|
||||
|
||||
val ( *>) : _ t -> 'a t -> 'a t
|
||||
(** [a *> b] parses [a], then parses [b] into [x], and returns [x]. The
|
||||
results of [a] is ignored. *)
|
||||
|
||||
val fail : string -> 'a t
|
||||
(** [fail msg] fails with the given message. It can trigger a backtrack *)
|
||||
|
||||
val eoi : unit t
|
||||
(** Expect the end of input, fails otherwise *)
|
||||
|
||||
val nop : unit t
|
||||
(** Succeed with [()] *)
|
||||
|
||||
val char : char -> char t
|
||||
(** [char c] parses the char [c] and nothing else *)
|
||||
|
||||
val char_if : (char -> bool) -> char t
|
||||
(** [char_if f] parses a character [c] if [f c = true] *)
|
||||
|
||||
val chars_if : (char -> bool) -> string t
|
||||
(** [chars_if f] parses a string of chars that satisfy [f] *)
|
||||
|
||||
val chars1_if : (char -> bool) -> string t
|
||||
(** Same as {!chars_if}, but only non-empty strings *)
|
||||
|
||||
val endline : char t
|
||||
(** Parses '\n' *)
|
||||
|
||||
val space : char t
|
||||
(** Tab or space *)
|
||||
|
||||
val white : char t
|
||||
(** Tab or space or newline *)
|
||||
|
||||
val skip_chars : (char -> bool) -> unit t
|
||||
(** Skip 0 or more chars satisfying the predicate *)
|
||||
|
||||
val skip_space : unit t
|
||||
(** Skip ' ' and '\t' *)
|
||||
|
||||
val skip_white : unit t
|
||||
(** Skip ' ' and '\t' and '\n' *)
|
||||
|
||||
val is_alpha : char -> bool
|
||||
(** Is the char a letter? *)
|
||||
|
||||
val is_num : char -> bool
|
||||
(** Is the char a digit? *)
|
||||
|
||||
val is_alpha_num : char -> bool
|
||||
|
||||
val is_space : char -> bool
|
||||
(** True on ' ' and '\t' *)
|
||||
|
||||
val is_white : char -> bool
|
||||
(** True on ' ' and '\t' and '\n'
|
||||
@since 0.13 *)
|
||||
|
||||
val (~~~) : (char -> bool) -> char -> bool
|
||||
(** Negation on predicates *)
|
||||
|
||||
val (|||) : (char -> bool) -> (char -> bool) -> char -> bool
|
||||
(** Disjunction on predicates *)
|
||||
|
||||
val (&&&) : (char -> bool) -> (char -> bool) -> char -> bool
|
||||
(** Conjunction on predicates *)
|
||||
|
||||
val (<|>) : 'a t -> 'a t -> 'a t
|
||||
(** [a <|> b] tries to parse [a], and if [a] fails, backtracks and tries
|
||||
to parse [b]. Therefore, it succeeds if either succeeds *)
|
||||
|
||||
val string : string -> string t
|
||||
(** [string s] parses exactly the string [s], and nothing else *)
|
||||
|
||||
val many : 'a t -> 'a list t
|
||||
(** [many p] parses a list of [p], eagerly (as long as possible) *)
|
||||
|
||||
val many1 : 'a t -> 'a list t
|
||||
(** parses a non empty list *)
|
||||
|
||||
val skip : _ t -> unit t
|
||||
(** [skip p] parses [p] and ignores its result *)
|
||||
|
||||
val sep : by:_ t -> 'a t -> 'a list t
|
||||
(** [sep ~by p] parses a list of [p] separated by [by] *)
|
||||
|
||||
val sep1 : by:_ t -> 'a t -> 'a list t
|
||||
(** [sep1 ~by p] parses a non empty list of [p], separated by [by] *)
|
||||
|
||||
val fix : ('a t -> 'a t) -> 'a t
|
||||
(** Fixpoint combinator *)
|
||||
|
||||
val memo : 'a t -> 'a t
|
||||
(** Memoize the parser. [memo p] will behave like [p], but when called
|
||||
in a state (read: position in input) it has already processed, [memo p]
|
||||
returns a result directly. The implementation uses an underlying
|
||||
hashtable.
|
||||
This can be costly in memory, but improve the run time a lot if there
|
||||
is a lot of backtracking involving [p].
|
||||
|
||||
This function is not thread-safe.
|
||||
@since 0.13 *)
|
||||
|
||||
val fix_memo : ('a t -> 'a t) -> 'a t
|
||||
(** Same as {!fix}, but the fixpoint is memoized.
|
||||
@since 0.13 *)
|
||||
|
||||
(** {2 Parse}
|
||||
|
||||
Those functions have a label [~p] on the parser, since 0.14.
|
||||
*)
|
||||
|
||||
val parse : input:input -> p:'a t -> 'a or_error
|
||||
(** [parse ~input p] applies [p] on the input, and returns [`Ok x] if
|
||||
[p] succeeds with [x], or [`Error s] otherwise *)
|
||||
|
||||
val parse_exn : input:input -> p:'a t -> 'a
|
||||
(** @raise ParseError if it fails *)
|
||||
|
||||
val parse_string : string -> p:'a t -> 'a or_error
|
||||
(** Specialization of {!parse} for string inputs *)
|
||||
|
||||
val parse_string_exn : string -> p:'a t -> 'a
|
||||
(** @raise ParseError if it fails *)
|
||||
|
||||
val parse_file : ?size:int -> file:string -> p:'a t -> 'a or_error
|
||||
(** [parse_file ~file p] parses [file] with [p] by opening the file
|
||||
and using {!input_of_chan}.
|
||||
@param size size of chunks read from file
|
||||
@since 0.13 *)
|
||||
|
||||
val parse_file_exn : ?size:int -> file:string -> p:'a t -> 'a
|
||||
(** Unsafe version of {!parse_file}
|
||||
@since 0.13 *)
|
||||
|
||||
(** {2 Utils} *)
|
||||
|
||||
module U : sig
|
||||
val list : ?start:string -> ?stop:string -> ?sep:string -> 'a t -> 'a list t
|
||||
(** [list p] parses a list of [p], with the OCaml conventions for
|
||||
start token "[", stop token "]" and separator ";".
|
||||
Whitespace between items are skipped *)
|
||||
|
||||
val int : int t
|
||||
|
||||
val word : string t
|
||||
(** non empty string of alpha num, start with alpha *)
|
||||
|
||||
val map : ('a -> 'b) -> 'a t -> 'b t
|
||||
|
||||
val map2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t
|
||||
|
||||
val map3 : ('a -> 'b -> 'c -> 'd) -> 'a t -> 'b t -> 'c t -> 'd t
|
||||
|
||||
val pair : ?start:string -> ?stop:string -> ?sep:string ->
|
||||
'a t -> 'b t -> ('a * 'b) t
|
||||
(** Parse a pair using OCaml whitespace conventions.
|
||||
The default is "(a, b)".
|
||||
@since 0.14 *)
|
||||
|
||||
val triple : ?start:string -> ?stop:string -> ?sep:string ->
|
||||
'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t
|
||||
(** Parse a triple using OCaml whitespace conventions.
|
||||
The default is "(a, b, c)".
|
||||
@since 0.14 *)
|
||||
end
|
||||
Loading…
Add table
Reference in a new issue