mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 11:15:31 -05:00
wip: App_parse in containers_string, a draft of efficient applicative parser combinators
This commit is contained in:
parent
af4bf49156
commit
ee8c7c03f2
3 changed files with 837 additions and 1 deletions
2
_oasis
2
_oasis
|
|
@ -85,7 +85,7 @@ Library "containers_iter"
|
|||
Library "containers_string"
|
||||
Path: src/string
|
||||
Pack: true
|
||||
Modules: KMP, Levenshtein
|
||||
Modules: KMP, Levenshtein, App_parse
|
||||
FindlibName: string
|
||||
FindlibParent: containers
|
||||
|
||||
|
|
|
|||
617
src/string/app_parse.ml
Normal file
617
src/string/app_parse.ml
Normal file
|
|
@ -0,0 +1,617 @@
|
|||
|
||||
(*
|
||||
copyright (c) 2013-2015, simon cruanes
|
||||
all rights reserved.
|
||||
|
||||
redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
(** {1 Applicative Parser Combinators} *)
|
||||
|
||||
type ('a,'b) result = [`Error of 'b | `Ok of 'a]
|
||||
|
||||
type multiplicity =
|
||||
| Star (* 0 or more *)
|
||||
| Plus (* 1 or more *)
|
||||
| Question (* 0 or 1 *)
|
||||
|
||||
let str fmt = Printf.sprintf fmt
|
||||
|
||||
module CharSet = Set.Make(Char)
|
||||
module CharMap = Map.Make(Char)
|
||||
|
||||
let print_char_set set =
|
||||
let l = CharSet.fold
|
||||
(fun c acc -> str "'%c'" c :: acc) set [] in
|
||||
String.concat ", " l
|
||||
|
||||
let domain_of_char_map m =
|
||||
CharMap.fold (fun c _ set -> CharSet.add c set) m CharSet.empty
|
||||
|
||||
let print_char_map map =
|
||||
let l = CharMap.fold
|
||||
(fun c _ acc -> str "'%c'" c :: acc) map [] in
|
||||
String.concat ", " l
|
||||
|
||||
(* function composition *)
|
||||
let compose f g x = f (g x)
|
||||
|
||||
let string_of_list l =
|
||||
let b = Bytes.make (List.length l) ' ' in
|
||||
List.iteri (fun i c -> Bytes.set b i c) l;
|
||||
Bytes.unsafe_to_string b
|
||||
|
||||
type _ t =
|
||||
| Return : 'a -> 'a t
|
||||
| Map : ('a -> 'b) * 'a t -> 'b t
|
||||
| Filter: ('a -> bool) * 'a t -> 'a t
|
||||
| App : ('a -> 'b) t * 'a t -> 'b t
|
||||
| AppLeft : 'a t * 'b t -> 'a t
|
||||
| AppRight : 'a t * 'b t -> 'b t
|
||||
| Fail : string -> 'a t
|
||||
| Int : int t
|
||||
| Float : float t
|
||||
| AnyOf : CharSet.t -> char t
|
||||
| Many : CharSet.t * 'a t * unit t * multiplicity -> 'a list t
|
||||
| Skip : CharSet.t * 'a t * multiplicity -> unit t (* same as Many, but ignores *)
|
||||
| SwitchC : 'a t CharMap.t * 'a t option -> 'a t
|
||||
| SwitchS :'a trie -> 'a t
|
||||
| Fix : ('a t -> 'a t) -> 'a t
|
||||
| Eof : unit t
|
||||
|
||||
(* a prefix trie *)
|
||||
and 'a trie =
|
||||
| TrieEmpty
|
||||
| TrieNode of 'a t option * 'a trie CharMap.t
|
||||
|
||||
let return x = Return x
|
||||
|
||||
let success = Return ()
|
||||
|
||||
let fail msg = Fail msg
|
||||
|
||||
let app f x = App (f, x)
|
||||
|
||||
let map f x = match x with
|
||||
| Map (g, y) -> Map (compose f g, y)
|
||||
| Return x -> Return (f x)
|
||||
| _ -> Map (f,x)
|
||||
|
||||
let filter f x = Filter (f, x)
|
||||
|
||||
let int = Int
|
||||
|
||||
let float = Float
|
||||
|
||||
let int_first_char =
|
||||
lazy (CharSet.of_list ['-'; '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'])
|
||||
|
||||
(* a set of characters that are valid as first characters of a parser *)
|
||||
type possible_first_chars =
|
||||
| Set of CharSet.t
|
||||
| AllChars
|
||||
| NoChar
|
||||
| IsFail of string
|
||||
|
||||
(* set of possibilities for the first char of a parser *)
|
||||
let rec possible_first_chars
|
||||
: type a. a t -> possible_first_chars
|
||||
= function
|
||||
| Return _ -> NoChar
|
||||
| Map (_, x) -> possible_first_chars x
|
||||
| Filter (_, x) -> possible_first_chars x
|
||||
| App (f, _) -> possible_first_chars f
|
||||
| AppLeft (a, _) -> possible_first_chars a
|
||||
| AppRight (a, _) -> possible_first_chars a
|
||||
| Fail e -> IsFail e
|
||||
| Int -> Set (Lazy.force int_first_char)
|
||||
| Float -> Set (Lazy.force int_first_char)
|
||||
| AnyOf set -> Set set
|
||||
| Many(set, _, _, _) -> Set set
|
||||
| Skip (set, _, _) -> Set set
|
||||
| SwitchC (map, None) -> Set (domain_of_char_map map)
|
||||
| SwitchC (_, Some _) -> AllChars
|
||||
| SwitchS TrieEmpty -> assert false
|
||||
| SwitchS (TrieNode (_, m)) -> Set (domain_of_char_map m)
|
||||
| Fix f ->
|
||||
let p = f (Fix f) in
|
||||
possible_first_chars p
|
||||
| Eof -> NoChar
|
||||
|
||||
let many ?(sep=success) t =
|
||||
match possible_first_chars t with
|
||||
| Set set -> Many (set, t, sep, Star)
|
||||
| IsFail msg -> Fail msg
|
||||
| AllChars -> invalid_arg (str "many: invalid parser (always succeeds)")
|
||||
| NoChar -> invalid_arg (str "many: invalid parser (does not consume input)")
|
||||
|
||||
let many1 ?(sep=success) t =
|
||||
match possible_first_chars t with
|
||||
| Set set -> Many (set, t, sep, Plus)
|
||||
| IsFail msg -> Fail msg
|
||||
| AllChars -> invalid_arg (str "many: invalid parser (always succeeds)")
|
||||
| NoChar -> invalid_arg (str "many: invalid parser (does not consume input)")
|
||||
|
||||
let skip t =
|
||||
match possible_first_chars t with
|
||||
| Set set -> Skip (set, t, Star)
|
||||
| IsFail msg -> Fail msg
|
||||
| AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)")
|
||||
| NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)")
|
||||
|
||||
let skip1 t =
|
||||
match possible_first_chars t with
|
||||
| Set set -> Skip (set, t, Plus)
|
||||
| IsFail msg -> Fail msg
|
||||
| AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)")
|
||||
| NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)")
|
||||
|
||||
let opt t =
|
||||
match possible_first_chars t with
|
||||
| Set set ->
|
||||
map
|
||||
(function
|
||||
| [x] -> Some x
|
||||
| [] -> None
|
||||
| _ -> assert false
|
||||
) (Many (set, t, success, Question))
|
||||
| IsFail msg -> Fail msg
|
||||
| AllChars -> map (fun x -> Some x) t (* always succeeds *)
|
||||
| NoChar -> invalid_arg (str "opt: invalid parser (does not consume input)")
|
||||
|
||||
let set_of_string s =
|
||||
let set = ref CharSet.empty in
|
||||
String.iter
|
||||
(fun c ->
|
||||
if CharSet.mem c !set
|
||||
then invalid_arg (str "any_of: duplicate char %c" c);
|
||||
set := CharSet.add c !set
|
||||
) s;
|
||||
!set
|
||||
|
||||
let any_of s = AnyOf (set_of_string s)
|
||||
|
||||
let char c = AnyOf (CharSet.singleton c)
|
||||
|
||||
let spaces = skip (any_of " \t")
|
||||
let spaces1 = skip1 (any_of " \t")
|
||||
|
||||
let white = skip (any_of " \t\n")
|
||||
let white1 = skip1 (any_of " \t\n")
|
||||
|
||||
let alpha_lower_ = set_of_string "abcdefghijklmonpqrstuvwxyz"
|
||||
let alpha_upper_ = set_of_string "ABCDEFGHIJKLMONPQRSTUVWXYZ"
|
||||
let num_ = set_of_string "0123456789"
|
||||
let alpha_ = CharSet.union alpha_lower_ alpha_upper_
|
||||
|
||||
let alpha_lower = AnyOf alpha_lower_
|
||||
let alpha_upper = AnyOf alpha_upper_
|
||||
let num = AnyOf num_
|
||||
let alpha = AnyOf alpha_
|
||||
let alpha_num = AnyOf (CharSet.union num_ alpha_)
|
||||
|
||||
let eof = Eof
|
||||
|
||||
let switch_c ?default l =
|
||||
if l = [] then match default with
|
||||
| None -> invalid_arg "switch_c: empty list";
|
||||
| Some d -> d
|
||||
else
|
||||
let map = List.fold_left
|
||||
(fun map (c, t) ->
|
||||
if CharMap.mem c map
|
||||
then invalid_arg (str "switch_c: duplicate char %c" c);
|
||||
CharMap.add c t map
|
||||
) CharMap.empty l
|
||||
in
|
||||
SwitchC (map, default)
|
||||
|
||||
exception ExnIsFail of string
|
||||
|
||||
let choice l =
|
||||
if l = [] then invalid_arg "choice: empty list";
|
||||
(* build a switch by first char *)
|
||||
try
|
||||
(* a map and possibly a default parser *)
|
||||
let map, def = List.fold_left
|
||||
(fun (map, def) p ->
|
||||
match possible_first_chars p, def with
|
||||
| AllChars, Some _ ->
|
||||
invalid_arg "choice: ambiguous, several parsers accept any input"
|
||||
| AllChars, None -> map, Some p
|
||||
| NoChar, _ -> map, def
|
||||
| IsFail msg, _ -> raise (ExnIsFail msg)
|
||||
| Set set, def ->
|
||||
if CharSet.exists (fun c -> CharMap.mem c map) set
|
||||
then invalid_arg
|
||||
(str "choice: ambiguous parsers (overlap on {%s})"
|
||||
(print_char_set (CharSet.inter set (domain_of_char_map map))));
|
||||
let map = CharSet.fold
|
||||
(fun c map -> CharMap.add c p map)
|
||||
set map
|
||||
in map, def
|
||||
) (CharMap.empty, None) l
|
||||
in
|
||||
SwitchC (map, def)
|
||||
with ExnIsFail msg ->
|
||||
fail msg
|
||||
|
||||
(* build prefix trie *)
|
||||
let switch_s l =
|
||||
if l = [] then invalid_arg "switch_s: empty list";
|
||||
(* add parser p in trie [t], with key slice of [s] starting at [i] *)
|
||||
let rec add_trie t s i p =
|
||||
if i = String.length s
|
||||
then match t with
|
||||
| TrieEmpty -> TrieNode (Some p, CharMap.empty)
|
||||
| TrieNode (Some _, _) -> invalid_arg (str "duplicate key \"%s\"" s)
|
||||
| TrieNode (None, m) -> TrieNode (Some p, m)
|
||||
else
|
||||
let c = String.get s i in
|
||||
match t with
|
||||
| TrieEmpty ->
|
||||
let sub = add_trie TrieEmpty s (i+1) p in
|
||||
TrieNode (None, CharMap.singleton c sub)
|
||||
| TrieNode (opt, map) ->
|
||||
try
|
||||
let sub = CharMap.find c map in
|
||||
let sub = add_trie sub s (i+1) p in
|
||||
TrieNode (opt, CharMap.add c sub map)
|
||||
with Not_found ->
|
||||
let sub = add_trie TrieEmpty s (i+1) p in
|
||||
TrieNode (opt, CharMap.add c sub map)
|
||||
in
|
||||
let trie =
|
||||
List.fold_left
|
||||
(fun trie (s, p) ->
|
||||
if s = "" then invalid_arg "switch_s: empty string";
|
||||
add_trie trie s 0 p
|
||||
) TrieEmpty l
|
||||
in
|
||||
SwitchS trie
|
||||
|
||||
let bool =
|
||||
switch_s
|
||||
[ "true", Return true
|
||||
; "false", Return false
|
||||
]
|
||||
|
||||
let fix f = Fix f
|
||||
|
||||
module Infix = struct
|
||||
let (>|=) x f = map f x
|
||||
let (<*>) = app
|
||||
let (<<) a b = AppLeft (a, b) (* return (fun x y -> x) <*> a <*> b *)
|
||||
let (>>) a b = AppRight (a, b) (* return (fun x y -> y) <*> a <*> b *)
|
||||
let (<+>) a b = choice [a; b]
|
||||
end
|
||||
|
||||
include Infix
|
||||
|
||||
(* TODO: more efficient version, with buffer *)
|
||||
let word =
|
||||
return (fun c s -> string_of_list (c :: s)) <*> alpha <*> many alpha_num
|
||||
|
||||
(** {2 Signatures} *)
|
||||
|
||||
type error = {
|
||||
line: int;
|
||||
col: int;
|
||||
msg: string;
|
||||
}
|
||||
|
||||
let string_of_error e = str "at %d:%d; %s" e.line e.col e.msg
|
||||
|
||||
exception Error of error
|
||||
|
||||
module type S = sig
|
||||
type source
|
||||
(** Source of characters *)
|
||||
|
||||
val parse : source -> 'a t -> ('a, error) result
|
||||
(** Parse the given source using the parser, and returns the parsed value. *)
|
||||
|
||||
val parse': source -> 'a t -> ('a, string) result
|
||||
(** Same as {!parse}, but returns a user-friendly string in case of failure *)
|
||||
|
||||
val parse_exn : source -> 'a t -> 'a
|
||||
(** Unsafe version of {!parse}.
|
||||
@raise Error if parsing fails *)
|
||||
end
|
||||
|
||||
(** {2 Build a parser from a given Monadic Input} *)
|
||||
|
||||
module type INPUT = sig
|
||||
type t
|
||||
|
||||
val read : t -> Bytes.t -> int -> int -> int
|
||||
end
|
||||
|
||||
type token =
|
||||
| Yield of char
|
||||
| EOF
|
||||
|
||||
module type READER = sig
|
||||
type t
|
||||
type source
|
||||
|
||||
val create : source -> t
|
||||
val peek : t -> token (* peek; do not consume *)
|
||||
val next : t -> token (* read and consume *)
|
||||
val junk : t -> unit (* consume last token, obtained with junk *)
|
||||
val line : t -> int
|
||||
val col : t -> int
|
||||
end
|
||||
|
||||
module ReaderOfInput(I : INPUT) : READER with type source = I.t = struct
|
||||
type t = {
|
||||
mutable rline : int;
|
||||
mutable rcol : int;
|
||||
input : I.t;
|
||||
buf : Bytes.t;
|
||||
mutable i : int;
|
||||
mutable len : int;
|
||||
}
|
||||
type source = I.t
|
||||
|
||||
let line t = t.rline
|
||||
let col t = t.rcol
|
||||
|
||||
let create input = {
|
||||
rline=0;
|
||||
rcol=0;
|
||||
input;
|
||||
buf = Bytes.make 1024 ' ';
|
||||
i=1;
|
||||
len=1; (* trick for initialization *)
|
||||
}
|
||||
|
||||
let read_next t =
|
||||
let c = Bytes.get t.buf t.i in
|
||||
t.i <- t.i + 1;
|
||||
if c = '\n' then (
|
||||
t.rcol <- 0;
|
||||
t.rline <- t.rline + 1;
|
||||
) else (
|
||||
t.rcol <- t.rcol + 1
|
||||
);
|
||||
Yield c
|
||||
|
||||
let refill t =
|
||||
t.len <- I.read t.input t.buf 0 (Bytes.length t.buf);
|
||||
t.i <- 0;
|
||||
()
|
||||
|
||||
let next t =
|
||||
if t.len = 0 then EOF
|
||||
else if t.i = t.len
|
||||
then (
|
||||
refill t;
|
||||
if t.len = 0 then EOF else read_next t
|
||||
) else read_next t
|
||||
|
||||
let peek t =
|
||||
if t.i = t.len
|
||||
then refill t;
|
||||
Yield (Bytes.get t.buf t.i)
|
||||
|
||||
let junk t =
|
||||
assert (t.len > 0 && t.i < t.len);
|
||||
t.i <- t.i + 1
|
||||
end
|
||||
|
||||
module MakeFromReader(R : READER) : S with type source = R.source = struct
|
||||
type source = R.source
|
||||
|
||||
let error r msg =
|
||||
raise (Error {
|
||||
line = R.line r;
|
||||
col = R.col r;
|
||||
msg;
|
||||
})
|
||||
let errorf r fmt =
|
||||
Printf.ksprintf
|
||||
(fun msg -> error r msg)
|
||||
fmt
|
||||
|
||||
let is_int c = Char.code c >= Char.code '0' && Char.code c <= Char.code '9'
|
||||
let to_int c = Char.code c - Char.code '0'
|
||||
|
||||
let rec parse_int r sign i = match R.peek r with
|
||||
| EOF -> i
|
||||
| Yield c when is_int c ->
|
||||
R.junk r;
|
||||
parse_int r sign (10 * i + to_int c)
|
||||
| Yield '-' when i = 0 && sign ->
|
||||
(* switch sign: only on first char *)
|
||||
R.junk r;
|
||||
parse_int r false 0
|
||||
| _ -> if sign then i else -i
|
||||
|
||||
let parse_float _r _buf = assert false
|
||||
|
||||
let rec parse_rec : type a. R.t -> a t -> a =
|
||||
fun r p -> match p with
|
||||
| Return x -> x
|
||||
| Map (f, x) ->
|
||||
let y = parse_rec r x in
|
||||
f y
|
||||
| Filter (f, x) ->
|
||||
let y = parse_rec r x in
|
||||
if f y then y else errorf r "filter failed"
|
||||
| App (f, x) ->
|
||||
let f' = parse_rec r f in
|
||||
let x' = parse_rec r x in
|
||||
f' x'
|
||||
| AppLeft (a, b) ->
|
||||
let a' = parse_rec r a in
|
||||
let _ = parse_rec r b in
|
||||
a'
|
||||
| AppRight (a, b) ->
|
||||
let _ = parse_rec r a in
|
||||
let b' = parse_rec r b in
|
||||
b'
|
||||
| Fail msg -> error r msg
|
||||
| Int -> parse_int r true 0
|
||||
| Float -> parse_float r (Buffer.create 8)
|
||||
| AnyOf set ->
|
||||
begin match R.next r with
|
||||
| EOF -> errorf r "expected any of {%s}, got EOF" (print_char_set set)
|
||||
| Yield c ->
|
||||
if CharSet.mem c set then c
|
||||
else errorf r "expected any of {%s}, got %c" (print_char_set set) c
|
||||
end
|
||||
| Many (set, p, sep, mult) -> parse_many r ~set ~sep ~p ~mult []
|
||||
| Skip (set, p, mult) -> parse_skip r ~set ~p ~mult
|
||||
| SwitchC (map, def) ->
|
||||
begin match R.peek r with
|
||||
| EOF -> errorf r "expected any of {%s}, got EOF" (print_char_map map)
|
||||
| Yield c ->
|
||||
begin try
|
||||
let p' = CharMap.find c map in
|
||||
parse_rec r p'
|
||||
with Not_found ->
|
||||
match def with
|
||||
| None ->
|
||||
errorf r "expected any of {%s}, got %c" (print_char_map map) c
|
||||
| Some d -> parse_rec r d
|
||||
end
|
||||
end
|
||||
| SwitchS TrieEmpty -> assert false
|
||||
| SwitchS (TrieNode (Some p, _)) ->
|
||||
parse_rec r p
|
||||
| SwitchS (TrieNode (None, map)) ->
|
||||
begin match R.next r with
|
||||
| EOF -> errorf r "expected any of {%s}, got EOF" (print_char_map map)
|
||||
| Yield c ->
|
||||
begin try
|
||||
let trie = CharMap.find c map in
|
||||
parse_rec r (SwitchS trie) (* recurse in subtree *)
|
||||
with Not_found ->
|
||||
errorf r "expected any of {%s}, got %c" (print_char_map map) c
|
||||
end
|
||||
end
|
||||
| Fix f ->
|
||||
let p = f (Fix f) in
|
||||
parse_rec r p
|
||||
| Eof ->
|
||||
begin match R.next r with
|
||||
| EOF -> ()
|
||||
| Yield c -> errorf r "expected EOF, got %c" c
|
||||
end
|
||||
|
||||
and parse_many
|
||||
: type a. R.t -> set:CharSet.t -> p:a t -> sep:unit t ->
|
||||
mult:multiplicity -> a list -> a list
|
||||
= fun r ~set ~p ~sep ~mult acc ->
|
||||
match R.peek r with
|
||||
| EOF -> List.rev acc
|
||||
| Yield c ->
|
||||
if CharSet.mem c set
|
||||
then
|
||||
let x = parse_rec r p in
|
||||
match mult with
|
||||
| Question -> assert (acc = []); [x]
|
||||
| Plus | Star ->
|
||||
let _ = parse_rec r sep in (* separator *)
|
||||
parse_many r ~set ~p ~sep ~mult:Star (x::acc)
|
||||
else if mult = Plus
|
||||
then errorf r "expected {%s}, got %c" (print_char_set set) c
|
||||
else List.rev acc
|
||||
|
||||
and parse_skip
|
||||
: type a. R.t -> set:CharSet.t -> p:a t -> mult:multiplicity -> unit
|
||||
= fun r ~set ~p ~mult ->
|
||||
match R.peek r with
|
||||
| EOF -> ()
|
||||
| Yield c ->
|
||||
if CharSet.mem c set
|
||||
then
|
||||
let _ = parse_rec r p in
|
||||
match mult with
|
||||
| Question -> ()
|
||||
| Plus | Star -> parse_skip r ~set ~p ~mult:Star
|
||||
else if mult = Plus
|
||||
then errorf r "expected {%s}, got %c" (print_char_set set) c
|
||||
else ()
|
||||
|
||||
(* public functions *)
|
||||
let parse_exn src p =
|
||||
let r = R.create src in
|
||||
parse_rec r p
|
||||
|
||||
let parse src p =
|
||||
let r = R.create src in
|
||||
try
|
||||
`Ok (parse_rec r p)
|
||||
with Error e ->
|
||||
`Error e
|
||||
|
||||
let parse' src p = match parse src p with
|
||||
| `Ok x -> `Ok x
|
||||
| `Error e -> `Error (string_of_error e)
|
||||
end
|
||||
|
||||
module Make(I : INPUT) = struct
|
||||
module R = ReaderOfInput(I)
|
||||
include MakeFromReader(R)
|
||||
end
|
||||
|
||||
module Str = MakeFromReader(struct
|
||||
(* reader of string *)
|
||||
type t = {
|
||||
str : string;
|
||||
mutable i : int;
|
||||
mutable rcol : int;
|
||||
mutable rline : int;
|
||||
}
|
||||
type source = string
|
||||
|
||||
let create str = {
|
||||
str;
|
||||
i = 0;
|
||||
rcol = 1;
|
||||
rline = 1;
|
||||
}
|
||||
let line t = t.rline
|
||||
let col t = t.rcol
|
||||
let peek t =
|
||||
if t.i = String.length t.str then EOF else Yield (String.get t.str t.i)
|
||||
let junk t =
|
||||
assert (t.i < String.length t.str);
|
||||
t.i <- t.i + 1
|
||||
let next t =
|
||||
if t.i = String.length t.str then EOF
|
||||
else (
|
||||
let c = String.get t.str t.i in
|
||||
t.i <- t.i + 1;
|
||||
if c = '\n' then (
|
||||
t.rcol <- 1;
|
||||
t.rline <- t.rline + 1
|
||||
) else t.rcol <- t.rcol + 1;
|
||||
Yield c
|
||||
)
|
||||
end)
|
||||
|
||||
module Chan = Make(struct
|
||||
type t = in_channel
|
||||
let read = input
|
||||
end)
|
||||
219
src/string/app_parse.mli
Normal file
219
src/string/app_parse.mli
Normal file
|
|
@ -0,0 +1,219 @@
|
|||
|
||||
(*
|
||||
copyright (c) 2013-2015, simon cruanes
|
||||
all rights reserved.
|
||||
|
||||
redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
(** {1 Applicative Parser Combinators}
|
||||
|
||||
{b status: experimental}
|
||||
@since NEXT_RELEASE
|
||||
*)
|
||||
|
||||
type ('a,'b) result = [`Error of 'b | `Ok of 'a]
|
||||
|
||||
type 'a t
|
||||
(** Parser that yields an error or a value of type 'a *)
|
||||
|
||||
(** {6 Combinators} *)
|
||||
|
||||
val return : 'a -> 'a t
|
||||
(** Parser that succeeds with the given value *)
|
||||
|
||||
val fail : string -> 'a t
|
||||
(** [fail msg] fails with the given error message *)
|
||||
|
||||
(* TODO: a format version of fail *)
|
||||
|
||||
val app : ('a -> 'b) t -> 'a t -> 'b t
|
||||
(** Applicative *)
|
||||
|
||||
val map : ('a -> 'b) -> 'a t -> 'b t
|
||||
(** Map the parsed value *)
|
||||
|
||||
val int : int t
|
||||
(** Parse an integer *)
|
||||
|
||||
val float : float t
|
||||
(** Parse a floating point number *)
|
||||
|
||||
val bool : bool t
|
||||
(** Parse "true" or "false" *)
|
||||
|
||||
val char : char -> char t
|
||||
(** [char c] parses [c] and [c] only *)
|
||||
|
||||
val any_of : string -> char t
|
||||
(** Parse any of the chars present in the given string *)
|
||||
|
||||
val alpha_lower : char t
|
||||
|
||||
val alpha_upper : char t
|
||||
|
||||
val alpha : char t
|
||||
|
||||
val num : char t
|
||||
|
||||
val alpha_num : char t
|
||||
|
||||
val word : string t
|
||||
(** [word] parses any identifier not starting with an integer and
|
||||
not containing any whitespace nor delimiter
|
||||
TODO: specify *)
|
||||
|
||||
val spaces : unit t
|
||||
(** Parse a sequence of ['\t'] and [' '] *)
|
||||
|
||||
val spaces1 : unit t
|
||||
(** Same as {!spaces} but requires at least one space *)
|
||||
|
||||
val white : unit t
|
||||
(** Parse a sequence of ['\t'], ['\n'] and [' '] *)
|
||||
|
||||
val white1 : unit t
|
||||
|
||||
val eof : unit t
|
||||
(** Matches the end of input, fails otherwise *)
|
||||
|
||||
val many : ?sep:unit t -> 'a t -> 'a list t
|
||||
(** 0 or more parsed elements of the given type.
|
||||
@param sep separator between elements of the list (for instance, {!space}) *)
|
||||
|
||||
val many1 : ?sep:unit t -> 'a t -> 'a list t
|
||||
(** Same as {!many}, but needs at least one element *)
|
||||
|
||||
val skip : _ t -> unit t
|
||||
(** Skip 0 or more instances of the given parser *)
|
||||
|
||||
val skip1 : _ t -> unit t
|
||||
|
||||
val opt : 'a t -> 'a option t
|
||||
(** [opt x] tries to parse [x], and returns [None] otherwise *)
|
||||
|
||||
val filter : ('a -> bool) -> 'a t -> 'a t
|
||||
(** [filter f p] parses the same as [p], but fails if the returned value
|
||||
does not satisfy [f] *)
|
||||
|
||||
val switch_c : ?default:'a t -> (char * 'a t) list -> 'a t
|
||||
(** [switch_c l] matches the next char and uses the corresponding parser.
|
||||
Fails if the next char is not in the list, unless default is defined.
|
||||
@param default parser to use if no char matches
|
||||
@raise Invalid_argument if some char occurs several times in [l] *)
|
||||
|
||||
val switch_s : (string * 'a t) list -> 'a t
|
||||
(** [switch_s l] attempts to match matches any of the strings in [l].
|
||||
If one of those strings matches, the corresponding parser
|
||||
is used from now on.
|
||||
@raise Invalid_argument if some string is a prefix of another string,
|
||||
or is empty, or if the list is empty *)
|
||||
|
||||
val choice : 'a t list -> 'a t
|
||||
(** [choice l] chooses between the parsers, unambiguously
|
||||
@raise Invalid_argument if the list is empty, or if some parsers
|
||||
overlap, making the choice ambiguous *)
|
||||
|
||||
val fix : ('a t -> 'a t) -> 'a t
|
||||
(** [fix f] makes a fixpoint *)
|
||||
|
||||
module Infix : sig
|
||||
val (>|=) : 'a t -> ('a -> 'b) -> 'b t
|
||||
|
||||
val (<*>) : ('a -> 'b) t -> 'a t -> 'b t
|
||||
(** Synonym to {!app} *)
|
||||
|
||||
val (>>) : _ t -> 'a t -> 'a t
|
||||
(** [a >> b] parses [a], ignores its result, then parses [b] *)
|
||||
|
||||
val (<<) : 'a t -> _ t -> 'a t
|
||||
(** [a << b] parses [a], then [b], and discards [b] to return [a] *)
|
||||
|
||||
val (<+>) : 'a t -> 'a t -> 'a t
|
||||
(** [a <+> b] is [choice [a;b]], a binary choice *)
|
||||
end
|
||||
|
||||
include module type of Infix
|
||||
|
||||
(** {2 Signatures} *)
|
||||
|
||||
(** {6 Parsing} *)
|
||||
|
||||
type error = {
|
||||
line: int;
|
||||
col: int;
|
||||
msg: string;
|
||||
}
|
||||
|
||||
val string_of_error : error -> string
|
||||
|
||||
exception Error of error
|
||||
|
||||
module type S = sig
|
||||
type source
|
||||
(** Source of characters *)
|
||||
|
||||
val parse : source -> 'a t -> ('a, error) result
|
||||
(** Parse the given source using the parser, and returns the parsed value. *)
|
||||
|
||||
val parse': source -> 'a t -> ('a, string) result
|
||||
(** Same as {!parse}, but returns a user-friendly string in case of failure *)
|
||||
|
||||
val parse_exn : source -> 'a t -> 'a
|
||||
(** Unsafe version of {!parse}.
|
||||
@raise Error if parsing fails *)
|
||||
end
|
||||
|
||||
(** {2 Parse} *)
|
||||
|
||||
module type INPUT = sig
|
||||
type t
|
||||
|
||||
val read : t -> Bytes.t -> int -> int -> int
|
||||
end
|
||||
|
||||
module Make(I : INPUT) : S with type source = I.t
|
||||
|
||||
(** {2 Low-level interface} *)
|
||||
|
||||
type token =
|
||||
| Yield of char
|
||||
| EOF
|
||||
|
||||
module type READER = sig
|
||||
type t
|
||||
type source (* underlying source *)
|
||||
|
||||
val create : source -> t
|
||||
val peek : t -> token (* peek; do not consume *)
|
||||
val next : t -> token (* read and consume *)
|
||||
val junk : t -> unit (* consume last token, obtained with junk *)
|
||||
val line : t -> int
|
||||
val col : t -> int
|
||||
end
|
||||
|
||||
module MakeFromReader(R : READER) : S with type source = R.source
|
||||
|
||||
(** {2 Defaults} *)
|
||||
|
||||
module Str : S with type source = string
|
||||
|
||||
module Chan : S with type source = in_channel
|
||||
Loading…
Add table
Reference in a new issue