mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 11:15:31 -05:00
change the type 'a CCParse.t with continuations
- goal: avoid stack overflows (tailcalls!) - add stress test in comments and test
This commit is contained in:
parent
49c2e6fba2
commit
d30e86f628
2 changed files with 115 additions and 67 deletions
|
|
@ -105,6 +105,23 @@ exception ParseError of line_num * col_num * (unit -> string)
|
||||||
(parse_string "[abc , de, hello ,world ]" p);
|
(parse_string "[abc , de, hello ,world ]" p);
|
||||||
*)
|
*)
|
||||||
|
|
||||||
|
(*$R
|
||||||
|
let test n =
|
||||||
|
let p = CCParse.(U.list ~sep:"," U.int) in
|
||||||
|
|
||||||
|
let l = CCList.(1 -- n) in
|
||||||
|
let l_printed =
|
||||||
|
CCFormat.to_string (CCList.print ~sep:"," ~start:"[" ~stop:"]" CCInt.print) l in
|
||||||
|
|
||||||
|
let l' = CCParse.parse_string_exn ~p l_printed in
|
||||||
|
|
||||||
|
assert_equal ~printer:Q.Print.(list int) l l'
|
||||||
|
in
|
||||||
|
test 100_000;
|
||||||
|
test 400_000;
|
||||||
|
|
||||||
|
*)
|
||||||
|
|
||||||
let const_ x () = x
|
let const_ x () = x
|
||||||
|
|
||||||
let input_of_string s =
|
let input_of_string s =
|
||||||
|
|
@ -179,59 +196,62 @@ let input_of_chan ?(size=1024) ic =
|
||||||
sub=(fun j len -> assert (j + len <= !i); Bytes.sub_string !b j len);
|
sub=(fun j len -> assert (j + len <= !i); Bytes.sub_string !b j len);
|
||||||
}
|
}
|
||||||
|
|
||||||
type 'a t = input -> 'a
|
type 'a t = input -> ok:('a -> unit) -> err:(exn -> unit) -> unit
|
||||||
|
|
||||||
let return x _ = x
|
let return : 'a -> 'a t = fun x _st ~ok ~err:_ -> ok x
|
||||||
let pure = return
|
let pure = return
|
||||||
let (>|=) p f st = f (p st)
|
let (>|=) : 'a t -> ('a -> 'b) -> 'b t
|
||||||
let (>>=) p f st =
|
= fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> ok (f x))
|
||||||
let x = p st in
|
let (>>=) : 'a t -> ('a -> 'b t) -> 'b t
|
||||||
f x st
|
= fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> f x st ~err ~ok)
|
||||||
let (<*>) x y st =
|
let (<*>) : ('a -> 'b) t -> 'a t -> 'b t
|
||||||
let f = x st in
|
= fun f x st ~ok ~err ->
|
||||||
let g = y st in
|
f st ~err ~ok:(fun f' -> x st ~err ~ok:(fun x' -> ok (f' x')))
|
||||||
f g
|
let (<* ) : 'a t -> _ t -> 'a t
|
||||||
let (<* ) x y st =
|
= fun x y st ~ok ~err ->
|
||||||
let res = x st in
|
x st ~err ~ok:(fun res -> y st ~err ~ok:(fun _ -> ok res))
|
||||||
let _ = y st in
|
let ( *>) : _ t -> 'a t -> 'a t
|
||||||
res
|
= fun x y st ~ok ~err ->
|
||||||
let ( *>) x y st =
|
x st ~err ~ok:(fun _ -> y st ~err ~ok)
|
||||||
let _ = x st in
|
|
||||||
let res = y st in
|
|
||||||
res
|
|
||||||
|
|
||||||
let junk_ st = ignore (st.next ())
|
let junk_ st = ignore (st.next ())
|
||||||
let pf = Printf.sprintf
|
let pf = Printf.sprintf
|
||||||
let fail_ st msg = raise (ParseError (st.lnum(), st.cnum(), msg))
|
let fail_ ~err st msg = err (ParseError (st.lnum(), st.cnum(), msg))
|
||||||
|
|
||||||
let eoi st = if st.is_done() then () else fail_ st (const_ "expected EOI")
|
let eoi st ~ok ~err =
|
||||||
let fail msg st = fail_ st (const_ msg)
|
if st.is_done()
|
||||||
let nop _ = ()
|
then ok ()
|
||||||
|
else fail_ ~err st (const_ "expected EOI")
|
||||||
|
|
||||||
|
let fail msg st ~ok:_ ~err = fail_ ~err st (const_ msg)
|
||||||
|
let nop _ ~ok ~err:_ = ok()
|
||||||
|
|
||||||
let char c =
|
let char c =
|
||||||
let msg = pf "expected '%c'" c in
|
let msg = pf "expected '%c'" c in
|
||||||
fun st -> if st.next () = c then c else fail_ st (const_ msg)
|
fun st ~ok ~err -> if st.next () = c then ok c else fail_ ~err st (const_ msg)
|
||||||
|
|
||||||
let char_if p st =
|
let char_if p st ~ok ~err =
|
||||||
let c = st.next () in
|
let c = st.next () in
|
||||||
if p c then c else fail_ st (fun () -> pf "unexpected char '%c'" c)
|
if p c then ok c else fail_ ~err st (fun () -> pf "unexpected char '%c'" c)
|
||||||
|
|
||||||
let chars_if p st =
|
let chars_if p st ~ok ~err:_ =
|
||||||
let i = st.pos () in
|
let i = st.pos () in
|
||||||
let len = ref 0 in
|
let len = ref 0 in
|
||||||
while not (st.is_done ()) && p (st.cur ()) do junk_ st; incr len done;
|
while not (st.is_done ()) && p (st.cur ()) do junk_ st; incr len done;
|
||||||
st.sub i !len
|
ok (st.sub i !len)
|
||||||
|
|
||||||
let chars1_if p st =
|
let chars1_if p st ~ok ~err =
|
||||||
let s = chars_if p st in
|
chars_if p st ~err
|
||||||
if s = "" then fail_ st (const_ "unexpected sequence of chars");
|
~ok:(fun s ->
|
||||||
s
|
if s = "" then fail_ ~err st (const_ "unexpected sequence of chars");
|
||||||
|
ok s
|
||||||
|
)
|
||||||
|
|
||||||
let rec skip_chars p st =
|
let rec skip_chars p st ~ok ~err =
|
||||||
if not (st.is_done ()) && p (st.cur ()) then (
|
if not (st.is_done ()) && p (st.cur ()) then (
|
||||||
junk_ st;
|
junk_ st;
|
||||||
skip_chars p st
|
skip_chars p st ~ok ~err
|
||||||
)
|
) else ok()
|
||||||
|
|
||||||
let is_alpha = function
|
let is_alpha = function
|
||||||
| 'a' .. 'z' | 'A' .. 'Z' -> true
|
| 'a' .. 'z' | 'A' .. 'Z' -> true
|
||||||
|
|
@ -255,48 +275,50 @@ let skip_white = skip_chars is_white
|
||||||
|
|
||||||
(* XXX: combine errors? *)
|
(* XXX: combine errors? *)
|
||||||
|
|
||||||
let (<|>) x y st =
|
let (<|>) : 'a t -> 'a t -> 'a t
|
||||||
let i = st.pos () in
|
= fun x y st ~ok ~err ->
|
||||||
try
|
let i = st.pos () in
|
||||||
x st
|
x st ~ok
|
||||||
with ParseError _ ->
|
~err:(fun _ ->
|
||||||
st.backtrack i; (* restore pos *)
|
st.backtrack i; (* restore pos *)
|
||||||
y st
|
y st ~ok ~err
|
||||||
|
)
|
||||||
|
|
||||||
let string s st =
|
let string s st ~ok ~err =
|
||||||
let rec check i =
|
let rec check i =
|
||||||
i = String.length s ||
|
i = String.length s ||
|
||||||
(s.[i] = st.next () && check (i+1))
|
(s.[i] = st.next () && check (i+1))
|
||||||
in
|
in
|
||||||
if check 0 then s else fail_ st (fun () -> pf "expected \"%s\"" s)
|
if check 0 then ok s else fail_ ~err st (fun () -> pf "expected \"%s\"" s)
|
||||||
|
|
||||||
let rec many_rec p st acc =
|
let rec many_rec : 'a t -> 'a list -> 'a list t = fun p acc st ~ok ~err ->
|
||||||
if st.is_done () then List.rev acc
|
if st.is_done () then ok(List.rev acc)
|
||||||
else
|
else
|
||||||
let i = st.pos () in
|
let i = st.pos () in
|
||||||
try
|
p st ~err
|
||||||
let x = p st in
|
~ok:(fun x ->
|
||||||
many_rec p st (x :: acc)
|
many_rec p (x :: acc) st ~ok
|
||||||
with ParseError _ ->
|
~err:(fun _ ->
|
||||||
st.backtrack i;
|
st.backtrack i;
|
||||||
List.rev acc
|
ok(List.rev acc)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
let many p st = many_rec p st []
|
let many : 'a t -> 'a list t
|
||||||
|
= fun p st ~ok ~err -> many_rec p [] st ~ok ~err
|
||||||
|
|
||||||
let many1 p st =
|
let many1 : 'a t -> 'a list t =
|
||||||
let x = p st in
|
fun p st ~ok ~err ->
|
||||||
many_rec p st [x]
|
p st ~err ~ok:(fun x -> many_rec p [x] st ~err ~ok)
|
||||||
|
|
||||||
let rec skip p st =
|
let rec skip p st ~ok ~err =
|
||||||
let i = st.pos () in
|
let i = st.pos () in
|
||||||
let matched =
|
p st
|
||||||
try
|
~ok:(fun _ -> skip p st ~ok ~err)
|
||||||
let _ = p st in
|
~err:(fun _ ->
|
||||||
true
|
st.backtrack i;
|
||||||
with ParseError _ ->
|
ok()
|
||||||
false
|
)
|
||||||
in
|
|
||||||
if matched then skip p st else st.backtrack i
|
|
||||||
|
|
||||||
let rec sep1 ~by p =
|
let rec sep1 ~by p =
|
||||||
p >>= fun x ->
|
p >>= fun x ->
|
||||||
|
|
@ -357,7 +379,12 @@ let fix_memo f =
|
||||||
in
|
in
|
||||||
p
|
p
|
||||||
|
|
||||||
let parse_exn ~input ~p = p input
|
let parse_exn ~input ~p =
|
||||||
|
let res = ref None in
|
||||||
|
p input ~ok:(fun x -> res := Some x) ~err:(fun e -> raise e);
|
||||||
|
match !res with
|
||||||
|
| None -> failwith "no input returned by parser"
|
||||||
|
| Some x -> x
|
||||||
|
|
||||||
let parse ~input ~p =
|
let parse ~input ~p =
|
||||||
try `Ok (parse_exn ~input ~p)
|
try `Ok (parse_exn ~input ~p)
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,21 @@ let p = U.list ~sep:"," U.word;;
|
||||||
parse_string_exn "[abc , de, hello ,world ]" p;;
|
parse_string_exn "[abc , de, hello ,world ]" p;;
|
||||||
]}
|
]}
|
||||||
|
|
||||||
|
{6 Stress Test}
|
||||||
|
This makes a list of 100_000 integers, prints it and parses it back.
|
||||||
|
|
||||||
|
{[
|
||||||
|
let p = CCParse.(U.list ~sep:"," U.int);;
|
||||||
|
|
||||||
|
let l = CCList.(1 -- 100_000);;
|
||||||
|
let l_printed =
|
||||||
|
CCFormat.to_string (CCList.print ~sep:"," ~start:"[" ~stop:"]" CCInt.print) l;;
|
||||||
|
|
||||||
|
let l' = CCParse.parse_string_exn ~p l_printed;;
|
||||||
|
|
||||||
|
assert (l=l');;
|
||||||
|
]}
|
||||||
|
|
||||||
@since 0.11
|
@since 0.11
|
||||||
*)
|
*)
|
||||||
|
|
||||||
|
|
@ -111,8 +126,14 @@ val input_of_chan : ?size:int -> in_channel -> input
|
||||||
|
|
||||||
(** {2 Combinators} *)
|
(** {2 Combinators} *)
|
||||||
|
|
||||||
type 'a t = input -> 'a
|
type 'a t = input -> ok:('a -> unit) -> err:(exn -> unit) -> unit
|
||||||
(** @raise ParseError in case of failure *)
|
(** Takes the input and two continuations:
|
||||||
|
{ul
|
||||||
|
{- [ok] to call with the result when it's done}
|
||||||
|
{- [err] to call when the parser met an error}
|
||||||
|
}
|
||||||
|
The type definition changed since NEXT_RELEASE to avoid stack overflows
|
||||||
|
@raise ParseError in case of failure *)
|
||||||
|
|
||||||
val return : 'a -> 'a t
|
val return : 'a -> 'a t
|
||||||
(** Always succeeds, without consuming its input *)
|
(** Always succeeds, without consuming its input *)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue