add Containers_string.Parse, very simple monadic parser combinators

This commit is contained in:
Simon Cruanes 2015-05-19 03:11:29 +02:00
parent 074234c47f
commit a17f8d5cfb
3 changed files with 369 additions and 1 deletions

2
_oasis
View file

@ -97,7 +97,7 @@ Library "containers_iter"
Library "containers_string"
Path: src/string
Pack: true
Modules: KMP, Levenshtein, App_parse
Modules: KMP, Levenshtein, App_parse, Parse
BuildDepends: bytes
FindlibName: string
FindlibParent: containers

220
src/string/parse.ml Normal file
View file

@ -0,0 +1,220 @@
(*
copyright (c) 2013-2015, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Very Simple Parser Combinators} *)
type 'a or_error = [`Ok of 'a | `Error of string]
type input = {
is_done : unit -> bool; (** End of input? *)
cur : unit -> char; (** Current char *)
next : unit -> char; (** if not {!is_done}, move to next char *)
pos : unit -> int; (** Current pos *)
backtrack : int -> unit; (** Restore to previous pos *)
sub : int -> int -> string; (** Extract slice from [pos] with [len] *)
}
exception ParseError of int * string (** position * message *)
let input_of_string s =
let i = ref 0 in
{ is_done=(fun () -> !i = String.length s);
cur=(fun () -> s.[!i]);
next=(fun () ->
if !i = String.length s
then raise (ParseError (!i, "unexpected EOI"))
else (
let c = s.[!i] in
incr i;
c
)
);
pos=(fun () -> !i);
backtrack=(fun j -> assert (0 <= j && j <= !i); i := j);
sub=(fun j len -> assert (j + len <= !i); String.sub s j len);
}
type 'a t = input -> 'a
let return x _ = x
let pure = return
let (>|=) p f st = f (p st)
let (>>=) p f st =
let x = p st in
f x st
let (<*>) x y st =
let f = x st in
let g = y st in
f g
let (<* ) x y st =
let res = x st in
let _ = y st in
res
let ( *>) x y st =
let _ = x st in
let res = y st in
res
let junk_ st = ignore (st.next ())
let fail_ st fmt =
Printf.ksprintf
(fun msg -> raise (ParseError (st.pos (), msg))) fmt
let eoi st = if st.is_done() then () else fail_ st "expected EOI"
let fail msg st = fail_ st "%s" msg
let nop _ = ()
let char c st =
if st.next () = c then c else fail_ st "expected '%c'" c
let char_if p st =
let c = st.next () in
if p c then c else fail_ st "unexpected char '%c'" c
let chars_if p st =
let i = st.pos () in
let len = ref 0 in
while not (st.is_done ()) && p (st.cur ()) do junk_ st; incr len done;
st.sub i !len
let chars1_if p st =
let s = chars_if p st in
if s = "" then fail_ st "unexpected sequence of chars";
s
let rec skip_chars p st =
if not (st.is_done ()) && p (st.cur ()) then (
junk_ st;
skip_chars p st
)
let is_alpha = function
| 'a' .. 'z' | 'A' .. 'Z' -> true
| _ -> false
let is_num = function '0' .. '9' -> true | _ -> false
let is_alpha_num = function
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' -> true
| _ -> false
let is_space = function ' ' | '\t' -> true | _ -> false
let is_white = function ' ' | '\t' | '\n' -> true | _ -> false
let (~~~) p c = not (p c)
let (|||) p1 p2 c = p1 c || p2 c
let (&&&) p1 p2 c = p1 c && p2 c
let endline = char '\n'
let space = char_if is_space
let white = char_if is_white
let skip_space = skip_chars is_space
let skip_white = skip_chars is_white
let (<|>) x y st =
let i = st.pos () in
try
x st
with ParseError _ ->
st.backtrack i; (* restore pos *)
y st
let string s st =
let rec check i =
i = String.length s ||
(s.[i] = st.next () && check (i+1))
in
if check 0 then s else fail_ st "expected \"%s\"" s
let rec many_rec p st acc =
if st.is_done () then List.rev acc
else
let i = st.pos () in
try
let x = p st in
many_rec p st (x :: acc)
with ParseError _ ->
st.backtrack i;
List.rev acc
let many p st = many_rec p st []
let many1 p st =
let x = p st in
many_rec p st [x]
let rec skip p st =
let i = st.pos () in
let matched =
try
let _ = p st in
true
with ParseError _ ->
false
in
if matched then skip p st else st.backtrack i
let rec sep1 ~by p =
p >>= fun x ->
let cont = by *> sep ~by p >|= fun tl -> x :: tl in
cont <|> return [x]
and sep ~by p =
sep1 ~by p <|> return []
let fix f =
let rec p st = f p st in
p
let parse_exn ~input p = p input
let parse ~input p =
try `Ok (parse_exn ~input p)
with ParseError (i, msg) ->
`Error (Printf.sprintf "at position %d: error %s" i msg)
let parse_string s p = parse ~input:(input_of_string s) p
let parse_string_exn s p = parse_exn ~input:(input_of_string s) p
module U = struct
let sep_ = sep
let list ?(start="[") ?(stop="]") ?(sep=";") p =
string start *> skip_space *>
sep_ ~by:(skip_space *> string sep *> skip_space) p <*
skip_space <* string stop
let int =
chars1_if (is_num ||| (=) '-')
>>= fun s ->
try return (int_of_string s)
with Failure _ -> fail "expected an int"
let map f x = x >|= f
let map2 f x y = pure f <*> x <*> y
let map3 f x y z = pure f <*> x <*> y <*> z
let prepend_str c s = String.make 1 c ^ s
let word =
map2 prepend_str (char_if is_alpha) (chars_if is_alpha_num)
end

148
src/string/parse.mli Normal file
View file

@ -0,0 +1,148 @@
(*
copyright (c) 2013-2015, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(**
{1 Very Simple Parser Combinators}
Examples:
{6 parse recursive structures}
{[
open Containers_string.Parse;;
type tree = L of int | N of tree * tree;;
let mk_leaf x = L x
let mk_node x y = N(x,y)
let ptree = fix @@ fun self ->
(char '(' *> (pure mk_node <*> self <* skip_chars is_space <*> self) <* char ')')
<|>
(U.int >|= mk_leaf)
;;
parse_string_exn "(1 (2 3))" ptree;;
parse_string_exn "((1 2) (3 (4 5)))" ptree;;
]}
{6 Parse a list of words}
{[
open Containers_string.Parse;;
let p = U.list ~sep:"," U.word;;
parse_string_exn "[abc , de, hello ,world ]" p;;
]}
@since NEXT_RELEASE
*)
type 'a or_error = [`Ok of 'a | `Error of string]
exception ParseError of int * string (** position * message *)
(** {2 Input} *)
type input = {
is_done : unit -> bool; (** End of input? *)
cur : unit -> char; (** Current char *)
next : unit -> char; (** if not {!is_done}, move to next char *)
pos : unit -> int; (** Current pos *)
backtrack : int -> unit; (** Restore to previous pos *)
sub : int -> int -> string; (** [sub pos len] extracts slice from [pos] with [len] *)
}
val input_of_string : string -> input
(** {2 Combinators} *)
type 'a t = input -> 'a (** @raise ParseError in case of failure *)
val return : 'a -> 'a t
val pure : 'a -> 'a t (** synonym to {!return} *)
val (>|=) : 'a t -> ('a -> 'b) -> 'b t
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t
val (<*>) : ('a -> 'b) t -> 'a t -> 'b t
val (<* ) : 'a t -> _ t -> 'a t
val ( *>) : _ t -> 'a t -> 'a t
val fail : string -> 'a t
val eoi : unit t (** end of string *)
val nop : unit t (** do nothing *)
val char : char -> char t
val char_if : (char -> bool) -> char t
val chars_if : (char -> bool) -> string t
val chars1_if : (char -> bool) -> string t (** non empty *)
val endline : char t
val space : char t (** tab or space *)
val white : char t (** tab or space or newline *)
val skip_chars : (char -> bool) -> unit t (** Skip 0 or more chars *)
val skip_space : unit t
val skip_white : unit t
val is_alpha : char -> bool
val is_num : char -> bool
val is_alpha_num : char -> bool
val is_space : char -> bool
val (~~~) : (char -> bool) -> char -> bool
val (|||) : (char -> bool) -> (char -> bool) -> char -> bool
val (&&&) : (char -> bool) -> (char -> bool) -> char -> bool
val (<|>) : 'a t -> 'a t -> 'a t (* succeeds if either succeeds *)
val string : string -> string t
val many : 'a t -> 'a list t
val many1 : 'a t -> 'a list t (** non empty *)
val skip : _ t -> unit t
val sep : by:_ t -> 'a t -> 'a list t
val sep1 : by:_ t -> 'a t -> 'a list t (** non empty *)
val fix : ('a t -> 'a t) -> 'a t
(** Fixpoint combinator *)
(** {2 Parse} *)
val parse : input:input -> 'a t -> 'a or_error
val parse_exn : input:input -> 'a t -> 'a (** @raise ParseError if it fails *)
val parse_string : string -> 'a t -> 'a or_error
val parse_string_exn : string -> 'a t -> 'a (** @raise ParseError if it fails *)
(** {2 Utils} *)
module U : sig
val list : ?start:string -> ?stop:string -> ?sep:string -> 'a t -> 'a list t
val int : int t
val word : string t (** alpha num, start with alpha *)
val map : ('a -> 'b) -> 'a t -> 'b t
val map2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t
val map3 : ('a -> 'b -> 'c -> 'd) -> 'a t -> 'b t -> 'c t -> 'd t
end