mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-09 12:45:34 -05:00
crazy input-driven parser combinators
This commit is contained in:
parent
9ca1f76bd7
commit
39f5e135bd
3 changed files with 349 additions and 1 deletions
2
_oasis
2
_oasis
|
|
@ -61,7 +61,7 @@ Library "containers_misc"
|
||||||
UnionFind, SmallSet, AbsSet, CSM,
|
UnionFind, SmallSet, AbsSet, CSM,
|
||||||
ActionMan, QCheck, BencodeOnDisk, TTree,
|
ActionMan, QCheck, BencodeOnDisk, TTree,
|
||||||
HGraph, Automaton, Conv, Bidir, Iteratee,
|
HGraph, Automaton, Conv, Bidir, Iteratee,
|
||||||
Ty, Tell, BencodeStream, RatTerm, Cause, AVL
|
Ty, Tell, BencodeStream, RatTerm, Cause, AVL, ParseReact
|
||||||
BuildDepends: unix,containers
|
BuildDepends: unix,containers
|
||||||
FindlibName: misc
|
FindlibName: misc
|
||||||
FindlibParent: containers
|
FindlibParent: containers
|
||||||
|
|
|
||||||
235
misc/parseReact.ml
Normal file
235
misc/parseReact.ml
Normal file
|
|
@ -0,0 +1,235 @@
|
||||||
|
|
||||||
|
(*
|
||||||
|
copyright (c) 2013-2014, simon cruanes
|
||||||
|
all rights reserved.
|
||||||
|
|
||||||
|
redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer. redistributions in binary
|
||||||
|
form must reproduce the above copyright notice, this list of conditions and the
|
||||||
|
following disclaimer in the documentation and/or other materials provided with
|
||||||
|
the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*)
|
||||||
|
|
||||||
|
(** {1 Parser combinators driven by the input} *)
|
||||||
|
|
||||||
|
type ('a, 'b) t =
|
||||||
|
| Return : 'b -> ('a,'b) t
|
||||||
|
| Delay : (unit -> ('a, 'b) t) -> ('a, 'b) t
|
||||||
|
| One : ('a, 'a) t
|
||||||
|
| Stop : ('a, unit) t
|
||||||
|
| Bind : ('a, 'b) t * ('b -> ('a, 'c) t) -> ('a, 'c) t
|
||||||
|
| Choice : ('a, 'b) t * ('a, 'b) t -> ('a, 'b) t
|
||||||
|
| Map : ('a, 'b) t * ('b -> 'c) -> ('a, 'c) t
|
||||||
|
| Guard : ('a, 'b) t * ('b -> bool) -> ('a, 'b) t
|
||||||
|
| Skip : ('a, unit) t
|
||||||
|
| IfThenElse: ('a -> bool) * ('a, 'b) t * ('a, 'b) t -> ('a, 'b) t
|
||||||
|
| Fail : ('a, 'b) t
|
||||||
|
|
||||||
|
let stop = Stop
|
||||||
|
|
||||||
|
let return x = Return x
|
||||||
|
|
||||||
|
let delay f = Delay f
|
||||||
|
|
||||||
|
let return' f = Delay (fun () -> return (f ()))
|
||||||
|
|
||||||
|
let fail = Fail
|
||||||
|
|
||||||
|
let one = One
|
||||||
|
|
||||||
|
let skip = Skip
|
||||||
|
|
||||||
|
let bind f p = Bind (p, f)
|
||||||
|
|
||||||
|
let (>>=) p f = bind f p
|
||||||
|
|
||||||
|
let exact ?(eq=(=)) x =
|
||||||
|
one
|
||||||
|
>>= fun y ->
|
||||||
|
if eq x y then Return () else Fail
|
||||||
|
|
||||||
|
let guard f p = Guard (p, f)
|
||||||
|
|
||||||
|
let (>>) p1 p2 = p1 >>= fun _ -> p2
|
||||||
|
|
||||||
|
let map f p = Map (p, f)
|
||||||
|
|
||||||
|
let (>>|) p f = Map (p, f)
|
||||||
|
|
||||||
|
let (<|>) p1 p2 = Choice (p1, p2)
|
||||||
|
|
||||||
|
let pair p1 p2 =
|
||||||
|
p1 >>= fun x1 ->
|
||||||
|
p2 >>= fun x2 ->
|
||||||
|
return (x1, x2)
|
||||||
|
|
||||||
|
let triple p1 p2 p3 =
|
||||||
|
p1 >>= fun x1 ->
|
||||||
|
p2 >>= fun x2 ->
|
||||||
|
p3 >>= fun x3 ->
|
||||||
|
return (x1, x2, x3)
|
||||||
|
|
||||||
|
let if_then_else p a b = IfThenElse (p, a, b)
|
||||||
|
|
||||||
|
(** {6 Utils} *)
|
||||||
|
|
||||||
|
let take_while pred =
|
||||||
|
let rec next acc =
|
||||||
|
if_then_else pred
|
||||||
|
(one >>= fun x -> next (x::acc))
|
||||||
|
(return' (fun () -> List.rev acc))
|
||||||
|
in
|
||||||
|
next []
|
||||||
|
|
||||||
|
let take_n n =
|
||||||
|
let rec next acc n =
|
||||||
|
if n = 0
|
||||||
|
then return (List.rev acc)
|
||||||
|
else one >>= fun x -> next (x::acc) (n-1)
|
||||||
|
in
|
||||||
|
next [] n
|
||||||
|
|
||||||
|
let skip_spaces =
|
||||||
|
let rec next () =
|
||||||
|
if_then_else
|
||||||
|
(fun c -> c = ' ' || c = '\t' || c = '\n')
|
||||||
|
(skip >> delay next)
|
||||||
|
(return ())
|
||||||
|
in next ()
|
||||||
|
|
||||||
|
let ident =
|
||||||
|
let accept = function
|
||||||
|
| c when Char.code c >= Char.code 'a' && Char.code c <= Char.code 'z' -> true
|
||||||
|
| c when Char.code c >= Char.code 'A' && Char.code c <= Char.code 'Z' -> true
|
||||||
|
| c when Char.code c >= Char.code '0' && Char.code c <= Char.code '9' -> true
|
||||||
|
| _ -> false
|
||||||
|
in
|
||||||
|
let rec aggregate buf =
|
||||||
|
if_then_else
|
||||||
|
accept
|
||||||
|
(one >>= fun c -> Buffer.add_char buf c; aggregate buf)
|
||||||
|
(return (Buffer.contents buf))
|
||||||
|
in
|
||||||
|
(* create buffer on demand, to avoid sharing it *)
|
||||||
|
delay (fun () -> aggregate (Buffer.create 32))
|
||||||
|
|
||||||
|
let many ~sep p =
|
||||||
|
let rec next acc =
|
||||||
|
(return (List.rev acc))
|
||||||
|
<|> (p >>= fun x -> sep >> next (x::acc))
|
||||||
|
in
|
||||||
|
next []
|
||||||
|
|
||||||
|
let many1 ~sep p =
|
||||||
|
let rec next acc =
|
||||||
|
p >>= fun x ->
|
||||||
|
let acc = x :: acc in
|
||||||
|
(return (List.rev acc))
|
||||||
|
<|> (sep >> next acc)
|
||||||
|
in
|
||||||
|
next []
|
||||||
|
|
||||||
|
(** {6 Run} *)
|
||||||
|
|
||||||
|
type 'a sequence = ('a -> unit) -> unit
|
||||||
|
|
||||||
|
let _fold_seq f acc seq =
|
||||||
|
let acc = ref acc in
|
||||||
|
seq (fun x -> acc := f !acc x);
|
||||||
|
!acc
|
||||||
|
|
||||||
|
(** Partial state during parsing: a tree of continuations *)
|
||||||
|
type (_, _) state =
|
||||||
|
| STBottom : 'b -> ('a, 'b) state
|
||||||
|
| STPush : ('a, 'c) t * ('c -> ('a, 'b) state list) -> ('a, 'b) state
|
||||||
|
|
||||||
|
let (>>>) p cont = STPush (p, cont)
|
||||||
|
|
||||||
|
let run p seq =
|
||||||
|
(* normalize the stack (do not let a "return" on top) *)
|
||||||
|
let rec reduce : type a b. (a,b)state -> (a,b) state list
|
||||||
|
= fun stack -> match stack with
|
||||||
|
| STPush (Return x, cont) -> CCList.flat_map reduce (cont x)
|
||||||
|
| STPush (Delay f, cont) -> reduce (f () >>> cont)
|
||||||
|
| STPush (Bind (p, f), cont) ->
|
||||||
|
let stack' = p >>> fun x -> [f x >>> cont] in
|
||||||
|
reduce stack'
|
||||||
|
| STPush (Choice (a, b), cont) ->
|
||||||
|
(* fork into sub-stacks *)
|
||||||
|
CCList.append (reduce (a >>> cont)) (reduce (b >>> cont))
|
||||||
|
| STPush (Map (p, f), cont) ->
|
||||||
|
let stack' = p >>> fun x -> cont (f x) in
|
||||||
|
reduce stack'
|
||||||
|
| STPush (Guard (p, f), cont) ->
|
||||||
|
let stack' = p >>> fun x -> if f x then cont x else [] in
|
||||||
|
reduce stack'
|
||||||
|
| _ -> [stack]
|
||||||
|
in
|
||||||
|
(* consume one input token *)
|
||||||
|
let rec consume_one : type a b. (a,b) state -> a -> (a,b) state list
|
||||||
|
= fun stack x -> match stack with
|
||||||
|
| STBottom _ -> [] (* fail *)
|
||||||
|
| STPush (Stop, _) -> [] (* fail *)
|
||||||
|
| STPush (Fail, _) -> [] (* fail *)
|
||||||
|
| STPush (One, cont) -> CCList.flat_map reduce (cont x)
|
||||||
|
| STPush (Skip, cont) -> CCList.flat_map reduce (cont ())
|
||||||
|
| STPush (IfThenElse (p, yay, nay), cont) ->
|
||||||
|
let l = if p x
|
||||||
|
then reduce (yay >>> cont)
|
||||||
|
else reduce (nay >>> cont)
|
||||||
|
in
|
||||||
|
CCList.flat_map (fun stack -> consume_one stack x) l
|
||||||
|
| STPush (Return _, _) -> assert false
|
||||||
|
| STPush (Delay _, _) -> assert false
|
||||||
|
| STPush (Bind _, _) -> assert false
|
||||||
|
| STPush (Choice _, _) -> assert false
|
||||||
|
| STPush (Map _, _) -> assert false
|
||||||
|
| STPush (Guard _, _) -> assert false
|
||||||
|
in
|
||||||
|
(* to be called at the end of input *)
|
||||||
|
let finish : type a b. (a,b) state -> (a,b) state list
|
||||||
|
= fun stack -> match stack with
|
||||||
|
| STPush (Stop, cont) -> CCList.flat_map reduce (cont ())
|
||||||
|
| STPush (Fail, _) -> []
|
||||||
|
| _ -> [stack]
|
||||||
|
in
|
||||||
|
(* how to parse the input: step by step, starting with [p] as initial parser *)
|
||||||
|
let step l x = CCList.flat_map (fun p -> consume_one p x) l in
|
||||||
|
let initial_state = p >>> fun x -> [STBottom x] in
|
||||||
|
let res = _fold_seq step [initial_state] seq in
|
||||||
|
(* signal "end of input" *)
|
||||||
|
let res = CCList.flat_map finish res in
|
||||||
|
(* recover results *)
|
||||||
|
CCList.filter_map
|
||||||
|
(function
|
||||||
|
| STBottom x -> Some x
|
||||||
|
| _ -> None
|
||||||
|
) res
|
||||||
|
|
||||||
|
(*$R
|
||||||
|
type sexp = Atom of string | List of sexp list \
|
||||||
|
let atom i = Atom i \
|
||||||
|
let list_ i = List i \
|
||||||
|
|
||||||
|
let rec p () =
|
||||||
|
(skip_spaces >> ident >>= atom)
|
||||||
|
<|> (skip_spaces >> exact '(' >> many1 ~sep:(exact ' ') (delay p) >>= fun l
|
||||||
|
>> skip_spaces >> exact ')' >> return (list_ l))
|
||||||
|
in
|
||||||
|
run (p ()) (CCSequence.of_string "(a b (c d))") =
|
||||||
|
[list_ [atom "a"; atom "b"; list_ [atom "c"; atom "d"]]]
|
||||||
|
*)
|
||||||
113
misc/parseReact.mli
Normal file
113
misc/parseReact.mli
Normal file
|
|
@ -0,0 +1,113 @@
|
||||||
|
|
||||||
|
(*
|
||||||
|
copyright (c) 2013-2014, simon cruanes
|
||||||
|
all rights reserved.
|
||||||
|
|
||||||
|
redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer. redistributions in binary
|
||||||
|
form must reproduce the above copyright notice, this list of conditions and the
|
||||||
|
following disclaimer in the documentation and/or other materials provided with
|
||||||
|
the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*)
|
||||||
|
|
||||||
|
(** {1 Parser combinators driven by the input} *)
|
||||||
|
|
||||||
|
type ('input, 'result) t
|
||||||
|
(** parser that takes some type as input and outputs a value of type 'result
|
||||||
|
when it's done *)
|
||||||
|
|
||||||
|
(** {6 Basic Building Blocs} *)
|
||||||
|
|
||||||
|
val stop : ('a, unit) t
|
||||||
|
(** Succeed exactly at the end of input *)
|
||||||
|
|
||||||
|
val return : 'b -> ('a, 'b) t
|
||||||
|
(** Return a value *)
|
||||||
|
|
||||||
|
val return' : (unit -> 'b) -> ('a, 'b) t
|
||||||
|
(** Suspended version of {!return}, does not evaluate yet *)
|
||||||
|
|
||||||
|
val delay : (unit -> ('a, 'b) t) -> ('a, 'b) t
|
||||||
|
(** Delay evaluation of the parser *)
|
||||||
|
|
||||||
|
val fail : ('a, 'b) t
|
||||||
|
(** Failure *)
|
||||||
|
|
||||||
|
val one : ('a, 'a) t
|
||||||
|
(** Parse one value exactly *)
|
||||||
|
|
||||||
|
val skip : ('a, unit) t
|
||||||
|
(** Ignore the next value *)
|
||||||
|
|
||||||
|
val exact : ?eq:('a -> 'a -> bool) -> 'a -> ('a, unit) t
|
||||||
|
(** Accept one value as input exactly *)
|
||||||
|
|
||||||
|
val guard : ('b -> bool) -> ('a, 'b) t -> ('a, 'b) t
|
||||||
|
(** Ensure the return value of the given parser satisfies the predicate.
|
||||||
|
[guard f p] will be the same as [p] if [p] returns
|
||||||
|
some [x] with [f x = true]. If [not (f x)], then [guard f p] fails. *)
|
||||||
|
|
||||||
|
val bind : ('b -> ('a, 'c) t) -> ('a, 'b) t -> ('a, 'c) t
|
||||||
|
|
||||||
|
val (>>=) : ('a, 'b) t -> ('b -> ('a, 'c) t) -> ('a, 'c) t
|
||||||
|
|
||||||
|
val (>>) : ('a, 'b) t -> ('a, 'c) t -> ('a, 'c) t
|
||||||
|
(** Wait for the first parser to succeed, then switch to the second one *)
|
||||||
|
|
||||||
|
val map : ('b -> 'c) -> ('a, 'b) t -> ('a, 'c) t
|
||||||
|
(** Map outputs *)
|
||||||
|
|
||||||
|
val (>>|) : ('a, 'b) t -> ('b -> 'c) -> ('a, 'c) t
|
||||||
|
(** Infix version of {!map} *)
|
||||||
|
|
||||||
|
val (<|>) : ('a, 'b) t -> ('a, 'b) t -> ('a, 'b) t
|
||||||
|
(** Non-deterministic choice. Both branches are evaluated in parallel *)
|
||||||
|
|
||||||
|
val pair : ('a,'b) t -> ('a, 'c) t -> ('a, ('b * 'c)) t
|
||||||
|
val triple : ('a,'b) t -> ('a, 'c) t -> ('a, 'd) t -> ('a, ('b * 'c * 'd)) t
|
||||||
|
|
||||||
|
val if_then_else : ('a -> bool) -> ('a, 'b) t -> ('a, 'b) t -> ('a, 'b) t
|
||||||
|
(** Test the next input, and choose the parser based on it. Does not consume
|
||||||
|
the input token for the test *)
|
||||||
|
|
||||||
|
(** {6 Utils} *)
|
||||||
|
|
||||||
|
val take_while : ('a -> bool) -> ('a, 'a list) t
|
||||||
|
(** Take input while it satisfies the given predicate *)
|
||||||
|
|
||||||
|
val take_n : int -> ('a, 'a list) t
|
||||||
|
(** Take n input elements *)
|
||||||
|
|
||||||
|
val skip_spaces : (char, unit) t
|
||||||
|
(** Skip whitespace (space,tab,newline) *)
|
||||||
|
|
||||||
|
val ident : (char, string) t
|
||||||
|
(** Parse identifiers (stops on whitespaces) *)
|
||||||
|
|
||||||
|
val many : sep:('a,_) t -> ('a, 'b) t -> ('a, 'b list) t
|
||||||
|
(** [many ~sep p] parses as many [p] as possible, separated by [sep]. *)
|
||||||
|
|
||||||
|
val many1 : sep:('a,_) t -> ('a, 'b) t -> ('a, 'b list) t
|
||||||
|
|
||||||
|
(** {6 Run} *)
|
||||||
|
|
||||||
|
type 'a sequence = ('a -> unit) -> unit
|
||||||
|
|
||||||
|
val run : ('a,'b) t -> 'a sequence -> 'b list
|
||||||
|
(** List of results. Each element of the list comes from a successful
|
||||||
|
series of choices [<|>]. If no choice operator was used, the list
|
||||||
|
contains 0 or 1 elements *)
|
||||||
Loading…
Add table
Reference in a new issue