change the type 'a CCParse.t with continuations

- goal: avoid stack overflows (tailcalls!)
- add stress test in comments and test
This commit is contained in:
Simon Cruanes 2015-10-21 23:51:56 +02:00
parent 49c2e6fba2
commit d30e86f628
2 changed files with 115 additions and 67 deletions

View file

@ -105,6 +105,23 @@ exception ParseError of line_num * col_num * (unit -> string)
(parse_string "[abc , de, hello ,world ]" p); (parse_string "[abc , de, hello ,world ]" p);
*) *)
(*$R
let test n =
let p = CCParse.(U.list ~sep:"," U.int) in
let l = CCList.(1 -- n) in
let l_printed =
CCFormat.to_string (CCList.print ~sep:"," ~start:"[" ~stop:"]" CCInt.print) l in
let l' = CCParse.parse_string_exn ~p l_printed in
assert_equal ~printer:Q.Print.(list int) l l'
in
test 100_000;
test 400_000;
*)
let const_ x () = x let const_ x () = x
let input_of_string s = let input_of_string s =
@ -179,59 +196,62 @@ let input_of_chan ?(size=1024) ic =
sub=(fun j len -> assert (j + len <= !i); Bytes.sub_string !b j len); sub=(fun j len -> assert (j + len <= !i); Bytes.sub_string !b j len);
} }
type 'a t = input -> 'a type 'a t = input -> ok:('a -> unit) -> err:(exn -> unit) -> unit
let return x _ = x let return : 'a -> 'a t = fun x _st ~ok ~err:_ -> ok x
let pure = return let pure = return
let (>|=) p f st = f (p st) let (>|=) : 'a t -> ('a -> 'b) -> 'b t
let (>>=) p f st = = fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> ok (f x))
let x = p st in let (>>=) : 'a t -> ('a -> 'b t) -> 'b t
f x st = fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> f x st ~err ~ok)
let (<*>) x y st = let (<*>) : ('a -> 'b) t -> 'a t -> 'b t
let f = x st in = fun f x st ~ok ~err ->
let g = y st in f st ~err ~ok:(fun f' -> x st ~err ~ok:(fun x' -> ok (f' x')))
f g let (<* ) : 'a t -> _ t -> 'a t
let (<* ) x y st = = fun x y st ~ok ~err ->
let res = x st in x st ~err ~ok:(fun res -> y st ~err ~ok:(fun _ -> ok res))
let _ = y st in let ( *>) : _ t -> 'a t -> 'a t
res = fun x y st ~ok ~err ->
let ( *>) x y st = x st ~err ~ok:(fun _ -> y st ~err ~ok)
let _ = x st in
let res = y st in
res
let junk_ st = ignore (st.next ()) let junk_ st = ignore (st.next ())
let pf = Printf.sprintf let pf = Printf.sprintf
let fail_ st msg = raise (ParseError (st.lnum(), st.cnum(), msg)) let fail_ ~err st msg = err (ParseError (st.lnum(), st.cnum(), msg))
let eoi st = if st.is_done() then () else fail_ st (const_ "expected EOI") let eoi st ~ok ~err =
let fail msg st = fail_ st (const_ msg) if st.is_done()
let nop _ = () then ok ()
else fail_ ~err st (const_ "expected EOI")
let fail msg st ~ok:_ ~err = fail_ ~err st (const_ msg)
let nop _ ~ok ~err:_ = ok()
let char c = let char c =
let msg = pf "expected '%c'" c in let msg = pf "expected '%c'" c in
fun st -> if st.next () = c then c else fail_ st (const_ msg) fun st ~ok ~err -> if st.next () = c then ok c else fail_ ~err st (const_ msg)
let char_if p st = let char_if p st ~ok ~err =
let c = st.next () in let c = st.next () in
if p c then c else fail_ st (fun () -> pf "unexpected char '%c'" c) if p c then ok c else fail_ ~err st (fun () -> pf "unexpected char '%c'" c)
let chars_if p st = let chars_if p st ~ok ~err:_ =
let i = st.pos () in let i = st.pos () in
let len = ref 0 in let len = ref 0 in
while not (st.is_done ()) && p (st.cur ()) do junk_ st; incr len done; while not (st.is_done ()) && p (st.cur ()) do junk_ st; incr len done;
st.sub i !len ok (st.sub i !len)
let chars1_if p st = let chars1_if p st ~ok ~err =
let s = chars_if p st in chars_if p st ~err
if s = "" then fail_ st (const_ "unexpected sequence of chars"); ~ok:(fun s ->
s if s = "" then fail_ ~err st (const_ "unexpected sequence of chars");
ok s
)
let rec skip_chars p st = let rec skip_chars p st ~ok ~err =
if not (st.is_done ()) && p (st.cur ()) then ( if not (st.is_done ()) && p (st.cur ()) then (
junk_ st; junk_ st;
skip_chars p st skip_chars p st ~ok ~err
) ) else ok()
let is_alpha = function let is_alpha = function
| 'a' .. 'z' | 'A' .. 'Z' -> true | 'a' .. 'z' | 'A' .. 'Z' -> true
@ -255,48 +275,50 @@ let skip_white = skip_chars is_white
(* XXX: combine errors? *) (* XXX: combine errors? *)
let (<|>) x y st = let (<|>) : 'a t -> 'a t -> 'a t
let i = st.pos () in = fun x y st ~ok ~err ->
try let i = st.pos () in
x st x st ~ok
with ParseError _ -> ~err:(fun _ ->
st.backtrack i; (* restore pos *) st.backtrack i; (* restore pos *)
y st y st ~ok ~err
)
let string s st = let string s st ~ok ~err =
let rec check i = let rec check i =
i = String.length s || i = String.length s ||
(s.[i] = st.next () && check (i+1)) (s.[i] = st.next () && check (i+1))
in in
if check 0 then s else fail_ st (fun () -> pf "expected \"%s\"" s) if check 0 then ok s else fail_ ~err st (fun () -> pf "expected \"%s\"" s)
let rec many_rec p st acc = let rec many_rec : 'a t -> 'a list -> 'a list t = fun p acc st ~ok ~err ->
if st.is_done () then List.rev acc if st.is_done () then ok(List.rev acc)
else else
let i = st.pos () in let i = st.pos () in
try p st ~err
let x = p st in ~ok:(fun x ->
many_rec p st (x :: acc) many_rec p (x :: acc) st ~ok
with ParseError _ -> ~err:(fun _ ->
st.backtrack i; st.backtrack i;
List.rev acc ok(List.rev acc)
)
)
let many p st = many_rec p st [] let many : 'a t -> 'a list t
= fun p st ~ok ~err -> many_rec p [] st ~ok ~err
let many1 p st = let many1 : 'a t -> 'a list t =
let x = p st in fun p st ~ok ~err ->
many_rec p st [x] p st ~err ~ok:(fun x -> many_rec p [x] st ~err ~ok)
let rec skip p st = let rec skip p st ~ok ~err =
let i = st.pos () in let i = st.pos () in
let matched = p st
try ~ok:(fun _ -> skip p st ~ok ~err)
let _ = p st in ~err:(fun _ ->
true st.backtrack i;
with ParseError _ -> ok()
false )
in
if matched then skip p st else st.backtrack i
let rec sep1 ~by p = let rec sep1 ~by p =
p >>= fun x -> p >>= fun x ->
@ -357,7 +379,12 @@ let fix_memo f =
in in
p p
let parse_exn ~input ~p = p input let parse_exn ~input ~p =
let res = ref None in
p input ~ok:(fun x -> res := Some x) ~err:(fun e -> raise e);
match !res with
| None -> failwith "no input returned by parser"
| Some x -> x
let parse ~input ~p = let parse ~input ~p =
try `Ok (parse_exn ~input ~p) try `Ok (parse_exn ~input ~p)

View file

@ -61,6 +61,21 @@ let p = U.list ~sep:"," U.word;;
parse_string_exn "[abc , de, hello ,world ]" p;; parse_string_exn "[abc , de, hello ,world ]" p;;
]} ]}
{6 Stress Test}
This makes a list of 100_000 integers, prints it and parses it back.
{[
let p = CCParse.(U.list ~sep:"," U.int);;
let l = CCList.(1 -- 100_000);;
let l_printed =
CCFormat.to_string (CCList.print ~sep:"," ~start:"[" ~stop:"]" CCInt.print) l;;
let l' = CCParse.parse_string_exn ~p l_printed;;
assert (l=l');;
]}
@since 0.11 @since 0.11
*) *)
@ -111,8 +126,14 @@ val input_of_chan : ?size:int -> in_channel -> input
(** {2 Combinators} *) (** {2 Combinators} *)
type 'a t = input -> 'a type 'a t = input -> ok:('a -> unit) -> err:(exn -> unit) -> unit
(** @raise ParseError in case of failure *) (** Takes the input and two continuations:
{ul
{- [ok] to call with the result when it's done}
{- [err] to call when the parser met an error}
}
The type definition changed since NEXT_RELEASE to avoid stack overflows
@raise ParseError in case of failure *)
val return : 'a -> 'a t val return : 'a -> 'a t
(** Always succeeds, without consuming its input *) (** Always succeeds, without consuming its input *)