mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-09 04:35:29 -05:00
crazy input-driven parser combinators
This commit is contained in:
parent
9ca1f76bd7
commit
39f5e135bd
3 changed files with 349 additions and 1 deletions
2
_oasis
2
_oasis
|
|
@ -61,7 +61,7 @@ Library "containers_misc"
|
|||
UnionFind, SmallSet, AbsSet, CSM,
|
||||
ActionMan, QCheck, BencodeOnDisk, TTree,
|
||||
HGraph, Automaton, Conv, Bidir, Iteratee,
|
||||
Ty, Tell, BencodeStream, RatTerm, Cause, AVL
|
||||
Ty, Tell, BencodeStream, RatTerm, Cause, AVL, ParseReact
|
||||
BuildDepends: unix,containers
|
||||
FindlibName: misc
|
||||
FindlibParent: containers
|
||||
|
|
|
|||
235
misc/parseReact.ml
Normal file
235
misc/parseReact.ml
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
|
||||
(*
|
||||
copyright (c) 2013-2014, simon cruanes
|
||||
all rights reserved.
|
||||
|
||||
redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
(** {1 Parser combinators driven by the input} *)
|
||||
|
||||
type ('a, 'b) t =
|
||||
| Return : 'b -> ('a,'b) t
|
||||
| Delay : (unit -> ('a, 'b) t) -> ('a, 'b) t
|
||||
| One : ('a, 'a) t
|
||||
| Stop : ('a, unit) t
|
||||
| Bind : ('a, 'b) t * ('b -> ('a, 'c) t) -> ('a, 'c) t
|
||||
| Choice : ('a, 'b) t * ('a, 'b) t -> ('a, 'b) t
|
||||
| Map : ('a, 'b) t * ('b -> 'c) -> ('a, 'c) t
|
||||
| Guard : ('a, 'b) t * ('b -> bool) -> ('a, 'b) t
|
||||
| Skip : ('a, unit) t
|
||||
| IfThenElse: ('a -> bool) * ('a, 'b) t * ('a, 'b) t -> ('a, 'b) t
|
||||
| Fail : ('a, 'b) t
|
||||
|
||||
let stop = Stop
|
||||
|
||||
let return x = Return x
|
||||
|
||||
let delay f = Delay f
|
||||
|
||||
let return' f = Delay (fun () -> return (f ()))
|
||||
|
||||
let fail = Fail
|
||||
|
||||
let one = One
|
||||
|
||||
let skip = Skip
|
||||
|
||||
let bind f p = Bind (p, f)
|
||||
|
||||
let (>>=) p f = bind f p
|
||||
|
||||
let exact ?(eq=(=)) x =
|
||||
one
|
||||
>>= fun y ->
|
||||
if eq x y then Return () else Fail
|
||||
|
||||
let guard f p = Guard (p, f)
|
||||
|
||||
let (>>) p1 p2 = p1 >>= fun _ -> p2
|
||||
|
||||
let map f p = Map (p, f)
|
||||
|
||||
let (>>|) p f = Map (p, f)
|
||||
|
||||
let (<|>) p1 p2 = Choice (p1, p2)
|
||||
|
||||
let pair p1 p2 =
|
||||
p1 >>= fun x1 ->
|
||||
p2 >>= fun x2 ->
|
||||
return (x1, x2)
|
||||
|
||||
let triple p1 p2 p3 =
|
||||
p1 >>= fun x1 ->
|
||||
p2 >>= fun x2 ->
|
||||
p3 >>= fun x3 ->
|
||||
return (x1, x2, x3)
|
||||
|
||||
let if_then_else p a b = IfThenElse (p, a, b)
|
||||
|
||||
(** {6 Utils} *)
|
||||
|
||||
let take_while pred =
|
||||
let rec next acc =
|
||||
if_then_else pred
|
||||
(one >>= fun x -> next (x::acc))
|
||||
(return' (fun () -> List.rev acc))
|
||||
in
|
||||
next []
|
||||
|
||||
let take_n n =
|
||||
let rec next acc n =
|
||||
if n = 0
|
||||
then return (List.rev acc)
|
||||
else one >>= fun x -> next (x::acc) (n-1)
|
||||
in
|
||||
next [] n
|
||||
|
||||
let skip_spaces =
|
||||
let rec next () =
|
||||
if_then_else
|
||||
(fun c -> c = ' ' || c = '\t' || c = '\n')
|
||||
(skip >> delay next)
|
||||
(return ())
|
||||
in next ()
|
||||
|
||||
let ident =
|
||||
let accept = function
|
||||
| c when Char.code c >= Char.code 'a' && Char.code c <= Char.code 'z' -> true
|
||||
| c when Char.code c >= Char.code 'A' && Char.code c <= Char.code 'Z' -> true
|
||||
| c when Char.code c >= Char.code '0' && Char.code c <= Char.code '9' -> true
|
||||
| _ -> false
|
||||
in
|
||||
let rec aggregate buf =
|
||||
if_then_else
|
||||
accept
|
||||
(one >>= fun c -> Buffer.add_char buf c; aggregate buf)
|
||||
(return (Buffer.contents buf))
|
||||
in
|
||||
(* create buffer on demand, to avoid sharing it *)
|
||||
delay (fun () -> aggregate (Buffer.create 32))
|
||||
|
||||
let many ~sep p =
|
||||
let rec next acc =
|
||||
(return (List.rev acc))
|
||||
<|> (p >>= fun x -> sep >> next (x::acc))
|
||||
in
|
||||
next []
|
||||
|
||||
let many1 ~sep p =
|
||||
let rec next acc =
|
||||
p >>= fun x ->
|
||||
let acc = x :: acc in
|
||||
(return (List.rev acc))
|
||||
<|> (sep >> next acc)
|
||||
in
|
||||
next []
|
||||
|
||||
(** {6 Run} *)
|
||||
|
||||
type 'a sequence = ('a -> unit) -> unit
|
||||
|
||||
let _fold_seq f acc seq =
|
||||
let acc = ref acc in
|
||||
seq (fun x -> acc := f !acc x);
|
||||
!acc
|
||||
|
||||
(** Partial state during parsing: a tree of continuations *)
|
||||
type (_, _) state =
|
||||
| STBottom : 'b -> ('a, 'b) state
|
||||
| STPush : ('a, 'c) t * ('c -> ('a, 'b) state list) -> ('a, 'b) state
|
||||
|
||||
let (>>>) p cont = STPush (p, cont)
|
||||
|
||||
let run p seq =
|
||||
(* normalize the stack (do not let a "return" on top) *)
|
||||
let rec reduce : type a b. (a,b)state -> (a,b) state list
|
||||
= fun stack -> match stack with
|
||||
| STPush (Return x, cont) -> CCList.flat_map reduce (cont x)
|
||||
| STPush (Delay f, cont) -> reduce (f () >>> cont)
|
||||
| STPush (Bind (p, f), cont) ->
|
||||
let stack' = p >>> fun x -> [f x >>> cont] in
|
||||
reduce stack'
|
||||
| STPush (Choice (a, b), cont) ->
|
||||
(* fork into sub-stacks *)
|
||||
CCList.append (reduce (a >>> cont)) (reduce (b >>> cont))
|
||||
| STPush (Map (p, f), cont) ->
|
||||
let stack' = p >>> fun x -> cont (f x) in
|
||||
reduce stack'
|
||||
| STPush (Guard (p, f), cont) ->
|
||||
let stack' = p >>> fun x -> if f x then cont x else [] in
|
||||
reduce stack'
|
||||
| _ -> [stack]
|
||||
in
|
||||
(* consume one input token *)
|
||||
let rec consume_one : type a b. (a,b) state -> a -> (a,b) state list
|
||||
= fun stack x -> match stack with
|
||||
| STBottom _ -> [] (* fail *)
|
||||
| STPush (Stop, _) -> [] (* fail *)
|
||||
| STPush (Fail, _) -> [] (* fail *)
|
||||
| STPush (One, cont) -> CCList.flat_map reduce (cont x)
|
||||
| STPush (Skip, cont) -> CCList.flat_map reduce (cont ())
|
||||
| STPush (IfThenElse (p, yay, nay), cont) ->
|
||||
let l = if p x
|
||||
then reduce (yay >>> cont)
|
||||
else reduce (nay >>> cont)
|
||||
in
|
||||
CCList.flat_map (fun stack -> consume_one stack x) l
|
||||
| STPush (Return _, _) -> assert false
|
||||
| STPush (Delay _, _) -> assert false
|
||||
| STPush (Bind _, _) -> assert false
|
||||
| STPush (Choice _, _) -> assert false
|
||||
| STPush (Map _, _) -> assert false
|
||||
| STPush (Guard _, _) -> assert false
|
||||
in
|
||||
(* to be called at the end of input *)
|
||||
let finish : type a b. (a,b) state -> (a,b) state list
|
||||
= fun stack -> match stack with
|
||||
| STPush (Stop, cont) -> CCList.flat_map reduce (cont ())
|
||||
| STPush (Fail, _) -> []
|
||||
| _ -> [stack]
|
||||
in
|
||||
(* how to parse the input: step by step, starting with [p] as initial parser *)
|
||||
let step l x = CCList.flat_map (fun p -> consume_one p x) l in
|
||||
let initial_state = p >>> fun x -> [STBottom x] in
|
||||
let res = _fold_seq step [initial_state] seq in
|
||||
(* signal "end of input" *)
|
||||
let res = CCList.flat_map finish res in
|
||||
(* recover results *)
|
||||
CCList.filter_map
|
||||
(function
|
||||
| STBottom x -> Some x
|
||||
| _ -> None
|
||||
) res
|
||||
|
||||
(*$R
|
||||
type sexp = Atom of string | List of sexp list \
|
||||
let atom i = Atom i \
|
||||
let list_ i = List i \
|
||||
|
||||
let rec p () =
|
||||
(skip_spaces >> ident >>= atom)
|
||||
<|> (skip_spaces >> exact '(' >> many1 ~sep:(exact ' ') (delay p) >>= fun l
|
||||
>> skip_spaces >> exact ')' >> return (list_ l))
|
||||
in
|
||||
run (p ()) (CCSequence.of_string "(a b (c d))") =
|
||||
[list_ [atom "a"; atom "b"; list_ [atom "c"; atom "d"]]]
|
||||
*)
|
||||
113
misc/parseReact.mli
Normal file
113
misc/parseReact.mli
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
|
||||
(*
|
||||
copyright (c) 2013-2014, simon cruanes
|
||||
all rights reserved.
|
||||
|
||||
redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
(** {1 Parser combinators driven by the input} *)
|
||||
|
||||
type ('input, 'result) t
|
||||
(** parser that takes some type as input and outputs a value of type 'result
|
||||
when it's done *)
|
||||
|
||||
(** {6 Basic Building Blocs} *)
|
||||
|
||||
val stop : ('a, unit) t
|
||||
(** Succeed exactly at the end of input *)
|
||||
|
||||
val return : 'b -> ('a, 'b) t
|
||||
(** Return a value *)
|
||||
|
||||
val return' : (unit -> 'b) -> ('a, 'b) t
|
||||
(** Suspended version of {!return}, does not evaluate yet *)
|
||||
|
||||
val delay : (unit -> ('a, 'b) t) -> ('a, 'b) t
|
||||
(** Delay evaluation of the parser *)
|
||||
|
||||
val fail : ('a, 'b) t
|
||||
(** Failure *)
|
||||
|
||||
val one : ('a, 'a) t
|
||||
(** Parse one value exactly *)
|
||||
|
||||
val skip : ('a, unit) t
|
||||
(** Ignore the next value *)
|
||||
|
||||
val exact : ?eq:('a -> 'a -> bool) -> 'a -> ('a, unit) t
|
||||
(** Accept one value as input exactly *)
|
||||
|
||||
val guard : ('b -> bool) -> ('a, 'b) t -> ('a, 'b) t
|
||||
(** Ensure the return value of the given parser satisfies the predicate.
|
||||
[guard f p] will be the same as [p] if [p] returns
|
||||
some [x] with [f x = true]. If [not (f x)], then [guard f p] fails. *)
|
||||
|
||||
val bind : ('b -> ('a, 'c) t) -> ('a, 'b) t -> ('a, 'c) t
|
||||
|
||||
val (>>=) : ('a, 'b) t -> ('b -> ('a, 'c) t) -> ('a, 'c) t
|
||||
|
||||
val (>>) : ('a, 'b) t -> ('a, 'c) t -> ('a, 'c) t
|
||||
(** Wait for the first parser to succeed, then switch to the second one *)
|
||||
|
||||
val map : ('b -> 'c) -> ('a, 'b) t -> ('a, 'c) t
|
||||
(** Map outputs *)
|
||||
|
||||
val (>>|) : ('a, 'b) t -> ('b -> 'c) -> ('a, 'c) t
|
||||
(** Infix version of {!map} *)
|
||||
|
||||
val (<|>) : ('a, 'b) t -> ('a, 'b) t -> ('a, 'b) t
|
||||
(** Non-deterministic choice. Both branches are evaluated in parallel *)
|
||||
|
||||
val pair : ('a,'b) t -> ('a, 'c) t -> ('a, ('b * 'c)) t
|
||||
val triple : ('a,'b) t -> ('a, 'c) t -> ('a, 'd) t -> ('a, ('b * 'c * 'd)) t
|
||||
|
||||
val if_then_else : ('a -> bool) -> ('a, 'b) t -> ('a, 'b) t -> ('a, 'b) t
|
||||
(** Test the next input, and choose the parser based on it. Does not consume
|
||||
the input token for the test *)
|
||||
|
||||
(** {6 Utils} *)
|
||||
|
||||
val take_while : ('a -> bool) -> ('a, 'a list) t
|
||||
(** Take input while it satisfies the given predicate *)
|
||||
|
||||
val take_n : int -> ('a, 'a list) t
|
||||
(** Take n input elements *)
|
||||
|
||||
val skip_spaces : (char, unit) t
|
||||
(** Skip whitespace (space,tab,newline) *)
|
||||
|
||||
val ident : (char, string) t
|
||||
(** Parse identifiers (stops on whitespaces) *)
|
||||
|
||||
val many : sep:('a,_) t -> ('a, 'b) t -> ('a, 'b list) t
|
||||
(** [many ~sep p] parses as many [p] as possible, separated by [sep]. *)
|
||||
|
||||
val many1 : sep:('a,_) t -> ('a, 'b) t -> ('a, 'b list) t
|
||||
|
||||
(** {6 Run} *)
|
||||
|
||||
type 'a sequence = ('a -> unit) -> unit
|
||||
|
||||
val run : ('a,'b) t -> 'a sequence -> 'b list
|
||||
(** List of results. Each element of the list comes from a successful
|
||||
series of choices [<|>]. If no choice operator was used, the list
|
||||
contains 0 or 1 elements *)
|
||||
Loading…
Add table
Reference in a new issue