ocaml-containers/misc/parseReact.ml
2014-06-11 21:53:47 +02:00

237 lines
6.9 KiB
OCaml

(*
copyright (c) 2013-2014, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Parser combinators driven by the input} *)
type ('a, 'b) t =
| Return : 'b -> ('a,'b) t
| Delay : (unit -> ('a, 'b) t) -> ('a, 'b) t
| One : ('a, 'a) t
| Stop : ('a, unit) t
| Bind : ('a, 'b) t * ('b -> ('a, 'c) t) -> ('a, 'c) t
| Choice : ('a, 'b) t * ('a, 'b) t -> ('a, 'b) t
| Map : ('a, 'b) t * ('b -> 'c) -> ('a, 'c) t
| Guard : ('a, 'b) t * ('b -> bool) -> ('a, 'b) t
| Skip : ('a, unit) t
| IfThenElse: ('a -> bool) * ('a, 'b) t * ('a, 'b) t -> ('a, 'b) t
| Fail : ('a, 'b) t
let stop = Stop
let return x = Return x
let delay f = Delay f
let return' f = Delay (fun () -> return (f ()))
let fail = Fail
let one = One
let skip = Skip
let bind f p = Bind (p, f)
let (>>=) p f = bind f p
let exact ?(eq=(=)) x =
one
>>= fun y ->
if eq x y then Return () else Fail
let guard f p = Guard (p, f)
let (>>) p1 p2 = p1 >>= fun _ -> p2
let map f p = Map (p, f)
let (>>|) p f = Map (p, f)
let (<|>) p1 p2 = Choice (p1, p2)
let pair p1 p2 =
p1 >>= fun x1 ->
p2 >>= fun x2 ->
return (x1, x2)
let triple p1 p2 p3 =
p1 >>= fun x1 ->
p2 >>= fun x2 ->
p3 >>= fun x3 ->
return (x1, x2, x3)
let if_then_else p a b = IfThenElse (p, a, b)
(** {6 Utils} *)
let take_while pred =
let rec next acc =
if_then_else pred
(one >>= fun x -> next (x::acc))
(return' (fun () -> List.rev acc))
in
next []
let take_n n =
let rec next acc n =
if n = 0
then return (List.rev acc)
else one >>= fun x -> next (x::acc) (n-1)
in
next [] n
let skip_spaces =
let rec next () =
if_then_else
(fun c -> c = ' ' || c = '\t' || c = '\n')
(skip >> delay next)
(return ())
in next ()
let ident =
let accept = function
| c when Char.code c >= Char.code 'a' && Char.code c <= Char.code 'z' -> true
| c when Char.code c >= Char.code 'A' && Char.code c <= Char.code 'Z' -> true
| c when Char.code c >= Char.code '0' && Char.code c <= Char.code '9' -> true
| _ -> false
in
let rec aggregate buf =
if_then_else
accept
(one >>= fun c -> Buffer.add_char buf c; aggregate buf)
(return (Buffer.contents buf))
in
(* create buffer on demand, to avoid sharing it *)
delay (fun () -> aggregate (Buffer.create 32))
let many ~sep p =
let rec next acc =
(return (List.rev acc))
<|> (p >>= fun x -> sep >> next (x::acc))
in
next []
let many1 ~sep p =
let rec next acc =
p >>= fun x ->
let acc = x :: acc in
(return (List.rev acc))
<|> (sep >> next acc)
in
next []
(** {6 Run} *)
type 'a sequence = ('a -> unit) -> unit
let _fold_seq f acc seq =
let acc = ref acc in
seq (fun x -> acc := f !acc x);
!acc
(** Partial state during parsing: a tree of continuations *)
type (_, _) state =
| STBottom : 'b -> ('a, 'b) state
| STPush : ('a, 'c) t * ('c -> ('a, 'b) state list) -> ('a, 'b) state
let (>>>) p cont = STPush (p, cont)
let run p seq =
(* normalize the stack (do not let a "return" on top) *)
let rec reduce : type a b. (a,b)state -> (a,b) state list
= fun stack -> match stack with
| STPush (Return x, cont) -> CCList.flat_map reduce (cont x)
| STPush (Delay f, cont) -> reduce (f () >>> cont)
| STPush (Bind (p, f), cont) ->
let stack' = p >>> fun x -> [f x >>> cont] in
reduce stack'
| STPush (Choice (a, b), cont) ->
(* fork into sub-stacks *)
CCList.append (reduce (a >>> cont)) (reduce (b >>> cont))
| STPush (Map (p, f), cont) ->
let stack' = p >>> fun x -> cont (f x) in
reduce stack'
| STPush (Guard (p, f), cont) ->
let stack' = p >>> fun x -> if f x then cont x else [] in
reduce stack'
| _ -> [stack]
in
(* consume one input token *)
let rec consume_one : type a b. (a,b) state -> a -> (a,b) state list
= fun stack x -> match stack with
| STBottom _ -> [] (* fail *)
| STPush (Stop, _) -> [] (* fail *)
| STPush (Fail, _) -> [] (* fail *)
| STPush (One, cont) -> CCList.flat_map reduce (cont x)
| STPush (Skip, cont) -> CCList.flat_map reduce (cont ())
| STPush (IfThenElse (p, yay, nay), cont) ->
let l = if p x
then reduce (yay >>> cont)
else reduce (nay >>> cont)
in
CCList.flat_map (fun stack -> consume_one stack x) l
| STPush (Return _, _) -> assert false
| STPush (Delay _, _) -> assert false
| STPush (Bind _, _) -> assert false
| STPush (Choice _, _) -> assert false
| STPush (Map _, _) -> assert false
| STPush (Guard _, _) -> assert false
in
(* to be called at the end of input *)
let finish : type a b. (a,b) state -> (a,b) state list
= fun stack -> match stack with
| STPush (Stop, cont) -> CCList.flat_map reduce (cont ())
| STPush (Fail, _) -> []
| _ -> [stack]
in
(* how to parse the input: step by step, starting with [p] as initial parser *)
let step l x = CCList.flat_map (fun p -> consume_one p x) l in
let initial_state = p >>> fun x -> [STBottom x] in
let res = _fold_seq step (reduce initial_state) seq in
(* signal "end of input" *)
let res = CCList.flat_map finish res in
(* recover results *)
CCList.filter_map
(function
| STBottom x -> Some x
| _ -> None
) res
(*$R
let module S = struct type t = Atom of string | List of t list end in
let open S in
let (%) f g x = f (g x) in
let atom i = Atom i in
let list_ i = List i in
let rec p () =
(skip_spaces >> ident >>= (return % atom))
<|> (skip_spaces >> exact '(' >> many1 ~sep:(exact ' ') (delay p) >>= fun l ->
skip_spaces >> exact ')' >> return (list_ l))
in
let res = run (p ()) (CCSequence.of_str "(a b (c d))") in
assert_equal res [list_ [atom "a"; atom "b"; list_ [atom "c"; atom "d"]]]
*)