mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 11:15:31 -05:00
237 lines
6.9 KiB
OCaml
237 lines
6.9 KiB
OCaml
|
|
(*
|
|
copyright (c) 2013-2014, simon cruanes
|
|
all rights reserved.
|
|
|
|
redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
redistributions of source code must retain the above copyright notice, this
|
|
list of conditions and the following disclaimer. redistributions in binary
|
|
form must reproduce the above copyright notice, this list of conditions and the
|
|
following disclaimer in the documentation and/or other materials provided with
|
|
the distribution.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*)
|
|
|
|
(** {1 Parser combinators driven by the input} *)
|
|
|
|
type ('a, 'b) t =
|
|
| Return : 'b -> ('a,'b) t
|
|
| Delay : (unit -> ('a, 'b) t) -> ('a, 'b) t
|
|
| One : ('a, 'a) t
|
|
| Stop : ('a, unit) t
|
|
| Bind : ('a, 'b) t * ('b -> ('a, 'c) t) -> ('a, 'c) t
|
|
| Choice : ('a, 'b) t * ('a, 'b) t -> ('a, 'b) t
|
|
| Map : ('a, 'b) t * ('b -> 'c) -> ('a, 'c) t
|
|
| Guard : ('a, 'b) t * ('b -> bool) -> ('a, 'b) t
|
|
| Skip : ('a, unit) t
|
|
| IfThenElse: ('a -> bool) * ('a, 'b) t * ('a, 'b) t -> ('a, 'b) t
|
|
| Fail : ('a, 'b) t
|
|
|
|
let stop = Stop
|
|
|
|
let return x = Return x
|
|
|
|
let delay f = Delay f
|
|
|
|
let return' f = Delay (fun () -> return (f ()))
|
|
|
|
let fail = Fail
|
|
|
|
let one = One
|
|
|
|
let skip = Skip
|
|
|
|
let bind f p = Bind (p, f)
|
|
|
|
let (>>=) p f = bind f p
|
|
|
|
let exact ?(eq=(=)) x =
|
|
one
|
|
>>= fun y ->
|
|
if eq x y then Return () else Fail
|
|
|
|
let guard f p = Guard (p, f)
|
|
|
|
let (>>) p1 p2 = p1 >>= fun _ -> p2
|
|
|
|
let map f p = Map (p, f)
|
|
|
|
let (>>|) p f = Map (p, f)
|
|
|
|
let (<|>) p1 p2 = Choice (p1, p2)
|
|
|
|
let pair p1 p2 =
|
|
p1 >>= fun x1 ->
|
|
p2 >>= fun x2 ->
|
|
return (x1, x2)
|
|
|
|
let triple p1 p2 p3 =
|
|
p1 >>= fun x1 ->
|
|
p2 >>= fun x2 ->
|
|
p3 >>= fun x3 ->
|
|
return (x1, x2, x3)
|
|
|
|
let if_then_else p a b = IfThenElse (p, a, b)
|
|
|
|
(** {6 Utils} *)
|
|
|
|
let take_while pred =
|
|
let rec next acc =
|
|
if_then_else pred
|
|
(one >>= fun x -> next (x::acc))
|
|
(return' (fun () -> List.rev acc))
|
|
in
|
|
next []
|
|
|
|
let take_n n =
|
|
let rec next acc n =
|
|
if n = 0
|
|
then return (List.rev acc)
|
|
else one >>= fun x -> next (x::acc) (n-1)
|
|
in
|
|
next [] n
|
|
|
|
let skip_spaces =
|
|
let rec next () =
|
|
if_then_else
|
|
(fun c -> c = ' ' || c = '\t' || c = '\n')
|
|
(skip >> delay next)
|
|
(return ())
|
|
in next ()
|
|
|
|
let ident =
|
|
let accept = function
|
|
| c when Char.code c >= Char.code 'a' && Char.code c <= Char.code 'z' -> true
|
|
| c when Char.code c >= Char.code 'A' && Char.code c <= Char.code 'Z' -> true
|
|
| c when Char.code c >= Char.code '0' && Char.code c <= Char.code '9' -> true
|
|
| _ -> false
|
|
in
|
|
let rec aggregate buf =
|
|
if_then_else
|
|
accept
|
|
(one >>= fun c -> Buffer.add_char buf c; aggregate buf)
|
|
(return (Buffer.contents buf))
|
|
in
|
|
(* create buffer on demand, to avoid sharing it *)
|
|
delay (fun () -> aggregate (Buffer.create 32))
|
|
|
|
let many ~sep p =
|
|
let rec next acc =
|
|
(return (List.rev acc))
|
|
<|> (p >>= fun x -> sep >> next (x::acc))
|
|
in
|
|
next []
|
|
|
|
let many1 ~sep p =
|
|
let rec next acc =
|
|
p >>= fun x ->
|
|
let acc = x :: acc in
|
|
(return (List.rev acc))
|
|
<|> (sep >> next acc)
|
|
in
|
|
next []
|
|
|
|
(** {6 Run} *)
|
|
|
|
type 'a sequence = ('a -> unit) -> unit
|
|
|
|
let _fold_seq f acc seq =
|
|
let acc = ref acc in
|
|
seq (fun x -> acc := f !acc x);
|
|
!acc
|
|
|
|
(** Partial state during parsing: a tree of continuations *)
|
|
type (_, _) state =
|
|
| STBottom : 'b -> ('a, 'b) state
|
|
| STPush : ('a, 'c) t * ('c -> ('a, 'b) state list) -> ('a, 'b) state
|
|
|
|
let (>>>) p cont = STPush (p, cont)
|
|
|
|
let run p seq =
|
|
(* normalize the stack (do not let a "return" on top) *)
|
|
let rec reduce : type a b. (a,b)state -> (a,b) state list
|
|
= fun stack -> match stack with
|
|
| STPush (Return x, cont) -> CCList.flat_map reduce (cont x)
|
|
| STPush (Delay f, cont) -> reduce (f () >>> cont)
|
|
| STPush (Bind (p, f), cont) ->
|
|
let stack' = p >>> fun x -> [f x >>> cont] in
|
|
reduce stack'
|
|
| STPush (Choice (a, b), cont) ->
|
|
(* fork into sub-stacks *)
|
|
CCList.append (reduce (a >>> cont)) (reduce (b >>> cont))
|
|
| STPush (Map (p, f), cont) ->
|
|
let stack' = p >>> fun x -> cont (f x) in
|
|
reduce stack'
|
|
| STPush (Guard (p, f), cont) ->
|
|
let stack' = p >>> fun x -> if f x then cont x else [] in
|
|
reduce stack'
|
|
| _ -> [stack]
|
|
in
|
|
(* consume one input token *)
|
|
let rec consume_one : type a b. (a,b) state -> a -> (a,b) state list
|
|
= fun stack x -> match stack with
|
|
| STBottom _ -> [] (* fail *)
|
|
| STPush (Stop, _) -> [] (* fail *)
|
|
| STPush (Fail, _) -> [] (* fail *)
|
|
| STPush (One, cont) -> CCList.flat_map reduce (cont x)
|
|
| STPush (Skip, cont) -> CCList.flat_map reduce (cont ())
|
|
| STPush (IfThenElse (p, yay, nay), cont) ->
|
|
let l = if p x
|
|
then reduce (yay >>> cont)
|
|
else reduce (nay >>> cont)
|
|
in
|
|
CCList.flat_map (fun stack -> consume_one stack x) l
|
|
| STPush (Return _, _) -> assert false
|
|
| STPush (Delay _, _) -> assert false
|
|
| STPush (Bind _, _) -> assert false
|
|
| STPush (Choice _, _) -> assert false
|
|
| STPush (Map _, _) -> assert false
|
|
| STPush (Guard _, _) -> assert false
|
|
in
|
|
(* to be called at the end of input *)
|
|
let finish : type a b. (a,b) state -> (a,b) state list
|
|
= fun stack -> match stack with
|
|
| STPush (Stop, cont) -> CCList.flat_map reduce (cont ())
|
|
| STPush (Fail, _) -> []
|
|
| _ -> [stack]
|
|
in
|
|
(* how to parse the input: step by step, starting with [p] as initial parser *)
|
|
let step l x = CCList.flat_map (fun p -> consume_one p x) l in
|
|
let initial_state = p >>> fun x -> [STBottom x] in
|
|
let res = _fold_seq step (reduce initial_state) seq in
|
|
(* signal "end of input" *)
|
|
let res = CCList.flat_map finish res in
|
|
(* recover results *)
|
|
CCList.filter_map
|
|
(function
|
|
| STBottom x -> Some x
|
|
| _ -> None
|
|
) res
|
|
|
|
|
|
(*$R
|
|
let module S = struct type t = Atom of string | List of t list end in
|
|
let open S in
|
|
let (%) f g x = f (g x) in
|
|
let atom i = Atom i in
|
|
let list_ i = List i in
|
|
let rec p () =
|
|
(skip_spaces >> ident >>= (return % atom))
|
|
<|> (skip_spaces >> exact '(' >> many1 ~sep:(exact ' ') (delay p) >>= fun l ->
|
|
skip_spaces >> exact ')' >> return (list_ l))
|
|
in
|
|
let res = run (p ()) (CCSequence.of_str "(a b (c d))") in
|
|
assert_equal res [list_ [atom "a"; atom "b"; list_ [atom "c"; atom "d"]]]
|
|
*)
|