diff --git a/_oasis b/_oasis index 568502b7..cea3fd13 100644 --- a/_oasis +++ b/_oasis @@ -97,7 +97,7 @@ Library "containers_iter" Library "containers_string" Path: src/string Pack: true - Modules: KMP, Levenshtein, App_parse + Modules: KMP, Levenshtein, App_parse, Parse BuildDepends: bytes FindlibName: string FindlibParent: containers diff --git a/src/string/parse.ml b/src/string/parse.ml new file mode 100644 index 00000000..cbc710a4 --- /dev/null +++ b/src/string/parse.ml @@ -0,0 +1,220 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Very Simple Parser Combinators} *) + +type 'a or_error = [`Ok of 'a | `Error of string] + +type input = { + is_done : unit -> bool; (** End of input? *) + cur : unit -> char; (** Current char *) + next : unit -> char; (** if not {!is_done}, move to next char *) + pos : unit -> int; (** Current pos *) + backtrack : int -> unit; (** Restore to previous pos *) + sub : int -> int -> string; (** Extract slice from [pos] with [len] *) +} + +exception ParseError of int * string (** position * message *) + +let input_of_string s = + let i = ref 0 in + { is_done=(fun () -> !i = String.length s); + cur=(fun () -> s.[!i]); + next=(fun () -> + if !i = String.length s + then raise (ParseError (!i, "unexpected EOI")) + else ( + let c = s.[!i] in + incr i; + c + ) + ); + pos=(fun () -> !i); + backtrack=(fun j -> assert (0 <= j && j <= !i); i := j); + sub=(fun j len -> assert (j + len <= !i); String.sub s j len); + } + +type 'a t = input -> 'a + +let return x _ = x +let pure = return +let (>|=) p f st = f (p st) +let (>>=) p f st = + let x = p st in + f x st +let (<*>) x y st = + let f = x st in + let g = y st in + f g +let (<* ) x y st = + let res = x st in + let _ = y st in + res +let ( *>) x y st = + let _ = x st in + let res = y st in + res + +let junk_ st = ignore (st.next ()) +let fail_ st fmt = + Printf.ksprintf + (fun msg -> raise (ParseError (st.pos (), msg))) fmt + +let eoi st = if st.is_done() then () else fail_ st "expected EOI" +let fail msg st = fail_ st "%s" msg +let nop _ = () + +let char c st = + if st.next () = c then c else fail_ st "expected '%c'" c + +let char_if p st = + let c = st.next () in + if p c then c else fail_ st "unexpected char '%c'" c + +let chars_if p st = + let i = st.pos () in + let len = ref 0 in + while not (st.is_done ()) && p (st.cur ()) do junk_ st; incr len done; + st.sub i !len + +let chars1_if p st = + let s = chars_if p st in + if s = "" then fail_ st "unexpected sequence of chars"; + s + +let rec skip_chars p st = + if not (st.is_done ()) && p (st.cur ()) then ( + junk_ st; + skip_chars p st + ) + +let is_alpha = function + | 'a' .. 'z' | 'A' .. 'Z' -> true + | _ -> false +let is_num = function '0' .. '9' -> true | _ -> false +let is_alpha_num = function + | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' -> true + | _ -> false +let is_space = function ' ' | '\t' -> true | _ -> false +let is_white = function ' ' | '\t' | '\n' -> true | _ -> false +let (~~~) p c = not (p c) +let (|||) p1 p2 c = p1 c || p2 c +let (&&&) p1 p2 c = p1 c && p2 c + +let endline = char '\n' +let space = char_if is_space +let white = char_if is_white + +let skip_space = skip_chars is_space +let skip_white = skip_chars is_white + +let (<|>) x y st = + let i = st.pos () in + try + x st + with ParseError _ -> + st.backtrack i; (* restore pos *) + y st + +let string s st = + let rec check i = + i = String.length s || + (s.[i] = st.next () && check (i+1)) + in + if check 0 then s else fail_ st "expected \"%s\"" s + +let rec many_rec p st acc = + if st.is_done () then List.rev acc + else + let i = st.pos () in + try + let x = p st in + many_rec p st (x :: acc) + with ParseError _ -> + st.backtrack i; + List.rev acc + +let many p st = many_rec p st [] + +let many1 p st = + let x = p st in + many_rec p st [x] + +let rec skip p st = + let i = st.pos () in + let matched = + try + let _ = p st in + true + with ParseError _ -> + false + in + if matched then skip p st else st.backtrack i + +let rec sep1 ~by p = + p >>= fun x -> + let cont = by *> sep ~by p >|= fun tl -> x :: tl in + cont <|> return [x] +and sep ~by p = + sep1 ~by p <|> return [] + +let fix f = + let rec p st = f p st in + p + +let parse_exn ~input p = p input + +let parse ~input p = + try `Ok (parse_exn ~input p) + with ParseError (i, msg) -> + `Error (Printf.sprintf "at position %d: error %s" i msg) + +let parse_string s p = parse ~input:(input_of_string s) p +let parse_string_exn s p = parse_exn ~input:(input_of_string s) p + +module U = struct + let sep_ = sep + + let list ?(start="[") ?(stop="]") ?(sep=";") p = + string start *> skip_space *> + sep_ ~by:(skip_space *> string sep *> skip_space) p <* + skip_space <* string stop + + let int = + chars1_if (is_num ||| (=) '-') + >>= fun s -> + try return (int_of_string s) + with Failure _ -> fail "expected an int" + + let map f x = x >|= f + let map2 f x y = pure f <*> x <*> y + let map3 f x y z = pure f <*> x <*> y <*> z + + let prepend_str c s = String.make 1 c ^ s + + let word = + map2 prepend_str (char_if is_alpha) (chars_if is_alpha_num) +end diff --git a/src/string/parse.mli b/src/string/parse.mli new file mode 100644 index 00000000..c6f286ee --- /dev/null +++ b/src/string/parse.mli @@ -0,0 +1,148 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** +{1 Very Simple Parser Combinators} + +Examples: + +{6 parse recursive structures} + +{[ +open Containers_string.Parse;; + +type tree = L of int | N of tree * tree;; + +let mk_leaf x = L x +let mk_node x y = N(x,y) + +let ptree = fix @@ fun self -> + (char '(' *> (pure mk_node <*> self <* skip_chars is_space <*> self) <* char ')') + <|> + (U.int >|= mk_leaf) +;; + +parse_string_exn "(1 (2 3))" ptree;; +parse_string_exn "((1 2) (3 (4 5)))" ptree;; + +]} + +{6 Parse a list of words} + +{[ +open Containers_string.Parse;; +let p = U.list ~sep:"," U.word;; +parse_string_exn "[abc , de, hello ,world ]" p;; +]} + +@since NEXT_RELEASE +*) + +type 'a or_error = [`Ok of 'a | `Error of string] +exception ParseError of int * string (** position * message *) + +(** {2 Input} *) + +type input = { + is_done : unit -> bool; (** End of input? *) + cur : unit -> char; (** Current char *) + next : unit -> char; (** if not {!is_done}, move to next char *) + pos : unit -> int; (** Current pos *) + backtrack : int -> unit; (** Restore to previous pos *) + sub : int -> int -> string; (** [sub pos len] extracts slice from [pos] with [len] *) +} + +val input_of_string : string -> input + +(** {2 Combinators} *) + +type 'a t = input -> 'a (** @raise ParseError in case of failure *) + +val return : 'a -> 'a t +val pure : 'a -> 'a t (** synonym to {!return} *) +val (>|=) : 'a t -> ('a -> 'b) -> 'b t +val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +val (<*>) : ('a -> 'b) t -> 'a t -> 'b t +val (<* ) : 'a t -> _ t -> 'a t +val ( *>) : _ t -> 'a t -> 'a t + +val fail : string -> 'a t +val eoi : unit t (** end of string *) +val nop : unit t (** do nothing *) + +val char : char -> char t +val char_if : (char -> bool) -> char t +val chars_if : (char -> bool) -> string t +val chars1_if : (char -> bool) -> string t (** non empty *) +val endline : char t +val space : char t (** tab or space *) +val white : char t (** tab or space or newline *) + +val skip_chars : (char -> bool) -> unit t (** Skip 0 or more chars *) +val skip_space : unit t +val skip_white : unit t + +val is_alpha : char -> bool +val is_num : char -> bool +val is_alpha_num : char -> bool +val is_space : char -> bool +val (~~~) : (char -> bool) -> char -> bool +val (|||) : (char -> bool) -> (char -> bool) -> char -> bool +val (&&&) : (char -> bool) -> (char -> bool) -> char -> bool + +val (<|>) : 'a t -> 'a t -> 'a t (* succeeds if either succeeds *) + +val string : string -> string t + +val many : 'a t -> 'a list t +val many1 : 'a t -> 'a list t (** non empty *) +val skip : _ t -> unit t + +val sep : by:_ t -> 'a t -> 'a list t +val sep1 : by:_ t -> 'a t -> 'a list t (** non empty *) + +val fix : ('a t -> 'a t) -> 'a t +(** Fixpoint combinator *) + +(** {2 Parse} *) + +val parse : input:input -> 'a t -> 'a or_error +val parse_exn : input:input -> 'a t -> 'a (** @raise ParseError if it fails *) + +val parse_string : string -> 'a t -> 'a or_error +val parse_string_exn : string -> 'a t -> 'a (** @raise ParseError if it fails *) + + +(** {2 Utils} *) + +module U : sig + val list : ?start:string -> ?stop:string -> ?sep:string -> 'a t -> 'a list t + val int : int t + val word : string t (** alpha num, start with alpha *) + val map : ('a -> 'b) -> 'a t -> 'b t + val map2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t + val map3 : ('a -> 'b -> 'c -> 'd) -> 'a t -> 'b t -> 'c t -> 'd t +end