mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-07 03:35:30 -05:00
add Containers_string.Parse, very simple monadic parser combinators
This commit is contained in:
parent
074234c47f
commit
a17f8d5cfb
3 changed files with 369 additions and 1 deletions
2
_oasis
2
_oasis
|
|
@ -97,7 +97,7 @@ Library "containers_iter"
|
|||
Library "containers_string"
|
||||
Path: src/string
|
||||
Pack: true
|
||||
Modules: KMP, Levenshtein, App_parse
|
||||
Modules: KMP, Levenshtein, App_parse, Parse
|
||||
BuildDepends: bytes
|
||||
FindlibName: string
|
||||
FindlibParent: containers
|
||||
|
|
|
|||
220
src/string/parse.ml
Normal file
220
src/string/parse.ml
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
|
||||
(*
|
||||
copyright (c) 2013-2015, simon cruanes
|
||||
all rights reserved.
|
||||
|
||||
redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
(** {1 Very Simple Parser Combinators} *)
|
||||
|
||||
type 'a or_error = [`Ok of 'a | `Error of string]
|
||||
|
||||
type input = {
|
||||
is_done : unit -> bool; (** End of input? *)
|
||||
cur : unit -> char; (** Current char *)
|
||||
next : unit -> char; (** if not {!is_done}, move to next char *)
|
||||
pos : unit -> int; (** Current pos *)
|
||||
backtrack : int -> unit; (** Restore to previous pos *)
|
||||
sub : int -> int -> string; (** Extract slice from [pos] with [len] *)
|
||||
}
|
||||
|
||||
exception ParseError of int * string (** position * message *)
|
||||
|
||||
let input_of_string s =
|
||||
let i = ref 0 in
|
||||
{ is_done=(fun () -> !i = String.length s);
|
||||
cur=(fun () -> s.[!i]);
|
||||
next=(fun () ->
|
||||
if !i = String.length s
|
||||
then raise (ParseError (!i, "unexpected EOI"))
|
||||
else (
|
||||
let c = s.[!i] in
|
||||
incr i;
|
||||
c
|
||||
)
|
||||
);
|
||||
pos=(fun () -> !i);
|
||||
backtrack=(fun j -> assert (0 <= j && j <= !i); i := j);
|
||||
sub=(fun j len -> assert (j + len <= !i); String.sub s j len);
|
||||
}
|
||||
|
||||
type 'a t = input -> 'a
|
||||
|
||||
let return x _ = x
|
||||
let pure = return
|
||||
let (>|=) p f st = f (p st)
|
||||
let (>>=) p f st =
|
||||
let x = p st in
|
||||
f x st
|
||||
let (<*>) x y st =
|
||||
let f = x st in
|
||||
let g = y st in
|
||||
f g
|
||||
let (<* ) x y st =
|
||||
let res = x st in
|
||||
let _ = y st in
|
||||
res
|
||||
let ( *>) x y st =
|
||||
let _ = x st in
|
||||
let res = y st in
|
||||
res
|
||||
|
||||
let junk_ st = ignore (st.next ())
|
||||
let fail_ st fmt =
|
||||
Printf.ksprintf
|
||||
(fun msg -> raise (ParseError (st.pos (), msg))) fmt
|
||||
|
||||
let eoi st = if st.is_done() then () else fail_ st "expected EOI"
|
||||
let fail msg st = fail_ st "%s" msg
|
||||
let nop _ = ()
|
||||
|
||||
let char c st =
|
||||
if st.next () = c then c else fail_ st "expected '%c'" c
|
||||
|
||||
let char_if p st =
|
||||
let c = st.next () in
|
||||
if p c then c else fail_ st "unexpected char '%c'" c
|
||||
|
||||
let chars_if p st =
|
||||
let i = st.pos () in
|
||||
let len = ref 0 in
|
||||
while not (st.is_done ()) && p (st.cur ()) do junk_ st; incr len done;
|
||||
st.sub i !len
|
||||
|
||||
let chars1_if p st =
|
||||
let s = chars_if p st in
|
||||
if s = "" then fail_ st "unexpected sequence of chars";
|
||||
s
|
||||
|
||||
let rec skip_chars p st =
|
||||
if not (st.is_done ()) && p (st.cur ()) then (
|
||||
junk_ st;
|
||||
skip_chars p st
|
||||
)
|
||||
|
||||
let is_alpha = function
|
||||
| 'a' .. 'z' | 'A' .. 'Z' -> true
|
||||
| _ -> false
|
||||
let is_num = function '0' .. '9' -> true | _ -> false
|
||||
let is_alpha_num = function
|
||||
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' -> true
|
||||
| _ -> false
|
||||
let is_space = function ' ' | '\t' -> true | _ -> false
|
||||
let is_white = function ' ' | '\t' | '\n' -> true | _ -> false
|
||||
let (~~~) p c = not (p c)
|
||||
let (|||) p1 p2 c = p1 c || p2 c
|
||||
let (&&&) p1 p2 c = p1 c && p2 c
|
||||
|
||||
let endline = char '\n'
|
||||
let space = char_if is_space
|
||||
let white = char_if is_white
|
||||
|
||||
let skip_space = skip_chars is_space
|
||||
let skip_white = skip_chars is_white
|
||||
|
||||
let (<|>) x y st =
|
||||
let i = st.pos () in
|
||||
try
|
||||
x st
|
||||
with ParseError _ ->
|
||||
st.backtrack i; (* restore pos *)
|
||||
y st
|
||||
|
||||
let string s st =
|
||||
let rec check i =
|
||||
i = String.length s ||
|
||||
(s.[i] = st.next () && check (i+1))
|
||||
in
|
||||
if check 0 then s else fail_ st "expected \"%s\"" s
|
||||
|
||||
let rec many_rec p st acc =
|
||||
if st.is_done () then List.rev acc
|
||||
else
|
||||
let i = st.pos () in
|
||||
try
|
||||
let x = p st in
|
||||
many_rec p st (x :: acc)
|
||||
with ParseError _ ->
|
||||
st.backtrack i;
|
||||
List.rev acc
|
||||
|
||||
let many p st = many_rec p st []
|
||||
|
||||
let many1 p st =
|
||||
let x = p st in
|
||||
many_rec p st [x]
|
||||
|
||||
let rec skip p st =
|
||||
let i = st.pos () in
|
||||
let matched =
|
||||
try
|
||||
let _ = p st in
|
||||
true
|
||||
with ParseError _ ->
|
||||
false
|
||||
in
|
||||
if matched then skip p st else st.backtrack i
|
||||
|
||||
let rec sep1 ~by p =
|
||||
p >>= fun x ->
|
||||
let cont = by *> sep ~by p >|= fun tl -> x :: tl in
|
||||
cont <|> return [x]
|
||||
and sep ~by p =
|
||||
sep1 ~by p <|> return []
|
||||
|
||||
let fix f =
|
||||
let rec p st = f p st in
|
||||
p
|
||||
|
||||
let parse_exn ~input p = p input
|
||||
|
||||
let parse ~input p =
|
||||
try `Ok (parse_exn ~input p)
|
||||
with ParseError (i, msg) ->
|
||||
`Error (Printf.sprintf "at position %d: error %s" i msg)
|
||||
|
||||
let parse_string s p = parse ~input:(input_of_string s) p
|
||||
let parse_string_exn s p = parse_exn ~input:(input_of_string s) p
|
||||
|
||||
module U = struct
|
||||
let sep_ = sep
|
||||
|
||||
let list ?(start="[") ?(stop="]") ?(sep=";") p =
|
||||
string start *> skip_space *>
|
||||
sep_ ~by:(skip_space *> string sep *> skip_space) p <*
|
||||
skip_space <* string stop
|
||||
|
||||
let int =
|
||||
chars1_if (is_num ||| (=) '-')
|
||||
>>= fun s ->
|
||||
try return (int_of_string s)
|
||||
with Failure _ -> fail "expected an int"
|
||||
|
||||
let map f x = x >|= f
|
||||
let map2 f x y = pure f <*> x <*> y
|
||||
let map3 f x y z = pure f <*> x <*> y <*> z
|
||||
|
||||
let prepend_str c s = String.make 1 c ^ s
|
||||
|
||||
let word =
|
||||
map2 prepend_str (char_if is_alpha) (chars_if is_alpha_num)
|
||||
end
|
||||
148
src/string/parse.mli
Normal file
148
src/string/parse.mli
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
|
||||
(*
|
||||
copyright (c) 2013-2015, simon cruanes
|
||||
all rights reserved.
|
||||
|
||||
redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
(**
|
||||
{1 Very Simple Parser Combinators}
|
||||
|
||||
Examples:
|
||||
|
||||
{6 parse recursive structures}
|
||||
|
||||
{[
|
||||
open Containers_string.Parse;;
|
||||
|
||||
type tree = L of int | N of tree * tree;;
|
||||
|
||||
let mk_leaf x = L x
|
||||
let mk_node x y = N(x,y)
|
||||
|
||||
let ptree = fix @@ fun self ->
|
||||
(char '(' *> (pure mk_node <*> self <* skip_chars is_space <*> self) <* char ')')
|
||||
<|>
|
||||
(U.int >|= mk_leaf)
|
||||
;;
|
||||
|
||||
parse_string_exn "(1 (2 3))" ptree;;
|
||||
parse_string_exn "((1 2) (3 (4 5)))" ptree;;
|
||||
|
||||
]}
|
||||
|
||||
{6 Parse a list of words}
|
||||
|
||||
{[
|
||||
open Containers_string.Parse;;
|
||||
let p = U.list ~sep:"," U.word;;
|
||||
parse_string_exn "[abc , de, hello ,world ]" p;;
|
||||
]}
|
||||
|
||||
@since NEXT_RELEASE
|
||||
*)
|
||||
|
||||
type 'a or_error = [`Ok of 'a | `Error of string]
|
||||
exception ParseError of int * string (** position * message *)
|
||||
|
||||
(** {2 Input} *)
|
||||
|
||||
type input = {
|
||||
is_done : unit -> bool; (** End of input? *)
|
||||
cur : unit -> char; (** Current char *)
|
||||
next : unit -> char; (** if not {!is_done}, move to next char *)
|
||||
pos : unit -> int; (** Current pos *)
|
||||
backtrack : int -> unit; (** Restore to previous pos *)
|
||||
sub : int -> int -> string; (** [sub pos len] extracts slice from [pos] with [len] *)
|
||||
}
|
||||
|
||||
val input_of_string : string -> input
|
||||
|
||||
(** {2 Combinators} *)
|
||||
|
||||
type 'a t = input -> 'a (** @raise ParseError in case of failure *)
|
||||
|
||||
val return : 'a -> 'a t
|
||||
val pure : 'a -> 'a t (** synonym to {!return} *)
|
||||
val (>|=) : 'a t -> ('a -> 'b) -> 'b t
|
||||
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t
|
||||
val (<*>) : ('a -> 'b) t -> 'a t -> 'b t
|
||||
val (<* ) : 'a t -> _ t -> 'a t
|
||||
val ( *>) : _ t -> 'a t -> 'a t
|
||||
|
||||
val fail : string -> 'a t
|
||||
val eoi : unit t (** end of string *)
|
||||
val nop : unit t (** do nothing *)
|
||||
|
||||
val char : char -> char t
|
||||
val char_if : (char -> bool) -> char t
|
||||
val chars_if : (char -> bool) -> string t
|
||||
val chars1_if : (char -> bool) -> string t (** non empty *)
|
||||
val endline : char t
|
||||
val space : char t (** tab or space *)
|
||||
val white : char t (** tab or space or newline *)
|
||||
|
||||
val skip_chars : (char -> bool) -> unit t (** Skip 0 or more chars *)
|
||||
val skip_space : unit t
|
||||
val skip_white : unit t
|
||||
|
||||
val is_alpha : char -> bool
|
||||
val is_num : char -> bool
|
||||
val is_alpha_num : char -> bool
|
||||
val is_space : char -> bool
|
||||
val (~~~) : (char -> bool) -> char -> bool
|
||||
val (|||) : (char -> bool) -> (char -> bool) -> char -> bool
|
||||
val (&&&) : (char -> bool) -> (char -> bool) -> char -> bool
|
||||
|
||||
val (<|>) : 'a t -> 'a t -> 'a t (* succeeds if either succeeds *)
|
||||
|
||||
val string : string -> string t
|
||||
|
||||
val many : 'a t -> 'a list t
|
||||
val many1 : 'a t -> 'a list t (** non empty *)
|
||||
val skip : _ t -> unit t
|
||||
|
||||
val sep : by:_ t -> 'a t -> 'a list t
|
||||
val sep1 : by:_ t -> 'a t -> 'a list t (** non empty *)
|
||||
|
||||
val fix : ('a t -> 'a t) -> 'a t
|
||||
(** Fixpoint combinator *)
|
||||
|
||||
(** {2 Parse} *)
|
||||
|
||||
val parse : input:input -> 'a t -> 'a or_error
|
||||
val parse_exn : input:input -> 'a t -> 'a (** @raise ParseError if it fails *)
|
||||
|
||||
val parse_string : string -> 'a t -> 'a or_error
|
||||
val parse_string_exn : string -> 'a t -> 'a (** @raise ParseError if it fails *)
|
||||
|
||||
|
||||
(** {2 Utils} *)
|
||||
|
||||
module U : sig
|
||||
val list : ?start:string -> ?stop:string -> ?sep:string -> 'a t -> 'a list t
|
||||
val int : int t
|
||||
val word : string t (** alpha num, start with alpha *)
|
||||
val map : ('a -> 'b) -> 'a t -> 'b t
|
||||
val map2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t
|
||||
val map3 : ('a -> 'b -> 'c -> 'd) -> 'a t -> 'b t -> 'c t -> 'd t
|
||||
end
|
||||
Loading…
Add table
Reference in a new issue