mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 11:15:31 -05:00
wip: App_parse in containers_string, a draft of efficient applicative parser combinators
This commit is contained in:
parent
af4bf49156
commit
ee8c7c03f2
3 changed files with 837 additions and 1 deletions
2
_oasis
2
_oasis
|
|
@ -85,7 +85,7 @@ Library "containers_iter"
|
||||||
Library "containers_string"
|
Library "containers_string"
|
||||||
Path: src/string
|
Path: src/string
|
||||||
Pack: true
|
Pack: true
|
||||||
Modules: KMP, Levenshtein
|
Modules: KMP, Levenshtein, App_parse
|
||||||
FindlibName: string
|
FindlibName: string
|
||||||
FindlibParent: containers
|
FindlibParent: containers
|
||||||
|
|
||||||
|
|
|
||||||
617
src/string/app_parse.ml
Normal file
617
src/string/app_parse.ml
Normal file
|
|
@ -0,0 +1,617 @@
|
||||||
|
|
||||||
|
(*
|
||||||
|
copyright (c) 2013-2015, simon cruanes
|
||||||
|
all rights reserved.
|
||||||
|
|
||||||
|
redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer. redistributions in binary
|
||||||
|
form must reproduce the above copyright notice, this list of conditions and the
|
||||||
|
following disclaimer in the documentation and/or other materials provided with
|
||||||
|
the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*)
|
||||||
|
|
||||||
|
(** {1 Applicative Parser Combinators} *)
|
||||||
|
|
||||||
|
type ('a,'b) result = [`Error of 'b | `Ok of 'a]
|
||||||
|
|
||||||
|
type multiplicity =
|
||||||
|
| Star (* 0 or more *)
|
||||||
|
| Plus (* 1 or more *)
|
||||||
|
| Question (* 0 or 1 *)
|
||||||
|
|
||||||
|
let str fmt = Printf.sprintf fmt
|
||||||
|
|
||||||
|
module CharSet = Set.Make(Char)
|
||||||
|
module CharMap = Map.Make(Char)
|
||||||
|
|
||||||
|
let print_char_set set =
|
||||||
|
let l = CharSet.fold
|
||||||
|
(fun c acc -> str "'%c'" c :: acc) set [] in
|
||||||
|
String.concat ", " l
|
||||||
|
|
||||||
|
let domain_of_char_map m =
|
||||||
|
CharMap.fold (fun c _ set -> CharSet.add c set) m CharSet.empty
|
||||||
|
|
||||||
|
let print_char_map map =
|
||||||
|
let l = CharMap.fold
|
||||||
|
(fun c _ acc -> str "'%c'" c :: acc) map [] in
|
||||||
|
String.concat ", " l
|
||||||
|
|
||||||
|
(* function composition *)
|
||||||
|
let compose f g x = f (g x)
|
||||||
|
|
||||||
|
let string_of_list l =
|
||||||
|
let b = Bytes.make (List.length l) ' ' in
|
||||||
|
List.iteri (fun i c -> Bytes.set b i c) l;
|
||||||
|
Bytes.unsafe_to_string b
|
||||||
|
|
||||||
|
type _ t =
|
||||||
|
| Return : 'a -> 'a t
|
||||||
|
| Map : ('a -> 'b) * 'a t -> 'b t
|
||||||
|
| Filter: ('a -> bool) * 'a t -> 'a t
|
||||||
|
| App : ('a -> 'b) t * 'a t -> 'b t
|
||||||
|
| AppLeft : 'a t * 'b t -> 'a t
|
||||||
|
| AppRight : 'a t * 'b t -> 'b t
|
||||||
|
| Fail : string -> 'a t
|
||||||
|
| Int : int t
|
||||||
|
| Float : float t
|
||||||
|
| AnyOf : CharSet.t -> char t
|
||||||
|
| Many : CharSet.t * 'a t * unit t * multiplicity -> 'a list t
|
||||||
|
| Skip : CharSet.t * 'a t * multiplicity -> unit t (* same as Many, but ignores *)
|
||||||
|
| SwitchC : 'a t CharMap.t * 'a t option -> 'a t
|
||||||
|
| SwitchS :'a trie -> 'a t
|
||||||
|
| Fix : ('a t -> 'a t) -> 'a t
|
||||||
|
| Eof : unit t
|
||||||
|
|
||||||
|
(* a prefix trie *)
|
||||||
|
and 'a trie =
|
||||||
|
| TrieEmpty
|
||||||
|
| TrieNode of 'a t option * 'a trie CharMap.t
|
||||||
|
|
||||||
|
let return x = Return x
|
||||||
|
|
||||||
|
let success = Return ()
|
||||||
|
|
||||||
|
let fail msg = Fail msg
|
||||||
|
|
||||||
|
let app f x = App (f, x)
|
||||||
|
|
||||||
|
let map f x = match x with
|
||||||
|
| Map (g, y) -> Map (compose f g, y)
|
||||||
|
| Return x -> Return (f x)
|
||||||
|
| _ -> Map (f,x)
|
||||||
|
|
||||||
|
let filter f x = Filter (f, x)
|
||||||
|
|
||||||
|
let int = Int
|
||||||
|
|
||||||
|
let float = Float
|
||||||
|
|
||||||
|
let int_first_char =
|
||||||
|
lazy (CharSet.of_list ['-'; '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'])
|
||||||
|
|
||||||
|
(* a set of characters that are valid as first characters of a parser *)
|
||||||
|
type possible_first_chars =
|
||||||
|
| Set of CharSet.t
|
||||||
|
| AllChars
|
||||||
|
| NoChar
|
||||||
|
| IsFail of string
|
||||||
|
|
||||||
|
(* set of possibilities for the first char of a parser *)
|
||||||
|
let rec possible_first_chars
|
||||||
|
: type a. a t -> possible_first_chars
|
||||||
|
= function
|
||||||
|
| Return _ -> NoChar
|
||||||
|
| Map (_, x) -> possible_first_chars x
|
||||||
|
| Filter (_, x) -> possible_first_chars x
|
||||||
|
| App (f, _) -> possible_first_chars f
|
||||||
|
| AppLeft (a, _) -> possible_first_chars a
|
||||||
|
| AppRight (a, _) -> possible_first_chars a
|
||||||
|
| Fail e -> IsFail e
|
||||||
|
| Int -> Set (Lazy.force int_first_char)
|
||||||
|
| Float -> Set (Lazy.force int_first_char)
|
||||||
|
| AnyOf set -> Set set
|
||||||
|
| Many(set, _, _, _) -> Set set
|
||||||
|
| Skip (set, _, _) -> Set set
|
||||||
|
| SwitchC (map, None) -> Set (domain_of_char_map map)
|
||||||
|
| SwitchC (_, Some _) -> AllChars
|
||||||
|
| SwitchS TrieEmpty -> assert false
|
||||||
|
| SwitchS (TrieNode (_, m)) -> Set (domain_of_char_map m)
|
||||||
|
| Fix f ->
|
||||||
|
let p = f (Fix f) in
|
||||||
|
possible_first_chars p
|
||||||
|
| Eof -> NoChar
|
||||||
|
|
||||||
|
let many ?(sep=success) t =
|
||||||
|
match possible_first_chars t with
|
||||||
|
| Set set -> Many (set, t, sep, Star)
|
||||||
|
| IsFail msg -> Fail msg
|
||||||
|
| AllChars -> invalid_arg (str "many: invalid parser (always succeeds)")
|
||||||
|
| NoChar -> invalid_arg (str "many: invalid parser (does not consume input)")
|
||||||
|
|
||||||
|
let many1 ?(sep=success) t =
|
||||||
|
match possible_first_chars t with
|
||||||
|
| Set set -> Many (set, t, sep, Plus)
|
||||||
|
| IsFail msg -> Fail msg
|
||||||
|
| AllChars -> invalid_arg (str "many: invalid parser (always succeeds)")
|
||||||
|
| NoChar -> invalid_arg (str "many: invalid parser (does not consume input)")
|
||||||
|
|
||||||
|
let skip t =
|
||||||
|
match possible_first_chars t with
|
||||||
|
| Set set -> Skip (set, t, Star)
|
||||||
|
| IsFail msg -> Fail msg
|
||||||
|
| AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)")
|
||||||
|
| NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)")
|
||||||
|
|
||||||
|
let skip1 t =
|
||||||
|
match possible_first_chars t with
|
||||||
|
| Set set -> Skip (set, t, Plus)
|
||||||
|
| IsFail msg -> Fail msg
|
||||||
|
| AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)")
|
||||||
|
| NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)")
|
||||||
|
|
||||||
|
let opt t =
|
||||||
|
match possible_first_chars t with
|
||||||
|
| Set set ->
|
||||||
|
map
|
||||||
|
(function
|
||||||
|
| [x] -> Some x
|
||||||
|
| [] -> None
|
||||||
|
| _ -> assert false
|
||||||
|
) (Many (set, t, success, Question))
|
||||||
|
| IsFail msg -> Fail msg
|
||||||
|
| AllChars -> map (fun x -> Some x) t (* always succeeds *)
|
||||||
|
| NoChar -> invalid_arg (str "opt: invalid parser (does not consume input)")
|
||||||
|
|
||||||
|
let set_of_string s =
|
||||||
|
let set = ref CharSet.empty in
|
||||||
|
String.iter
|
||||||
|
(fun c ->
|
||||||
|
if CharSet.mem c !set
|
||||||
|
then invalid_arg (str "any_of: duplicate char %c" c);
|
||||||
|
set := CharSet.add c !set
|
||||||
|
) s;
|
||||||
|
!set
|
||||||
|
|
||||||
|
let any_of s = AnyOf (set_of_string s)
|
||||||
|
|
||||||
|
let char c = AnyOf (CharSet.singleton c)
|
||||||
|
|
||||||
|
let spaces = skip (any_of " \t")
|
||||||
|
let spaces1 = skip1 (any_of " \t")
|
||||||
|
|
||||||
|
let white = skip (any_of " \t\n")
|
||||||
|
let white1 = skip1 (any_of " \t\n")
|
||||||
|
|
||||||
|
let alpha_lower_ = set_of_string "abcdefghijklmonpqrstuvwxyz"
|
||||||
|
let alpha_upper_ = set_of_string "ABCDEFGHIJKLMONPQRSTUVWXYZ"
|
||||||
|
let num_ = set_of_string "0123456789"
|
||||||
|
let alpha_ = CharSet.union alpha_lower_ alpha_upper_
|
||||||
|
|
||||||
|
let alpha_lower = AnyOf alpha_lower_
|
||||||
|
let alpha_upper = AnyOf alpha_upper_
|
||||||
|
let num = AnyOf num_
|
||||||
|
let alpha = AnyOf alpha_
|
||||||
|
let alpha_num = AnyOf (CharSet.union num_ alpha_)
|
||||||
|
|
||||||
|
let eof = Eof
|
||||||
|
|
||||||
|
let switch_c ?default l =
|
||||||
|
if l = [] then match default with
|
||||||
|
| None -> invalid_arg "switch_c: empty list";
|
||||||
|
| Some d -> d
|
||||||
|
else
|
||||||
|
let map = List.fold_left
|
||||||
|
(fun map (c, t) ->
|
||||||
|
if CharMap.mem c map
|
||||||
|
then invalid_arg (str "switch_c: duplicate char %c" c);
|
||||||
|
CharMap.add c t map
|
||||||
|
) CharMap.empty l
|
||||||
|
in
|
||||||
|
SwitchC (map, default)
|
||||||
|
|
||||||
|
exception ExnIsFail of string
|
||||||
|
|
||||||
|
let choice l =
|
||||||
|
if l = [] then invalid_arg "choice: empty list";
|
||||||
|
(* build a switch by first char *)
|
||||||
|
try
|
||||||
|
(* a map and possibly a default parser *)
|
||||||
|
let map, def = List.fold_left
|
||||||
|
(fun (map, def) p ->
|
||||||
|
match possible_first_chars p, def with
|
||||||
|
| AllChars, Some _ ->
|
||||||
|
invalid_arg "choice: ambiguous, several parsers accept any input"
|
||||||
|
| AllChars, None -> map, Some p
|
||||||
|
| NoChar, _ -> map, def
|
||||||
|
| IsFail msg, _ -> raise (ExnIsFail msg)
|
||||||
|
| Set set, def ->
|
||||||
|
if CharSet.exists (fun c -> CharMap.mem c map) set
|
||||||
|
then invalid_arg
|
||||||
|
(str "choice: ambiguous parsers (overlap on {%s})"
|
||||||
|
(print_char_set (CharSet.inter set (domain_of_char_map map))));
|
||||||
|
let map = CharSet.fold
|
||||||
|
(fun c map -> CharMap.add c p map)
|
||||||
|
set map
|
||||||
|
in map, def
|
||||||
|
) (CharMap.empty, None) l
|
||||||
|
in
|
||||||
|
SwitchC (map, def)
|
||||||
|
with ExnIsFail msg ->
|
||||||
|
fail msg
|
||||||
|
|
||||||
|
(* build prefix trie *)
|
||||||
|
let switch_s l =
|
||||||
|
if l = [] then invalid_arg "switch_s: empty list";
|
||||||
|
(* add parser p in trie [t], with key slice of [s] starting at [i] *)
|
||||||
|
let rec add_trie t s i p =
|
||||||
|
if i = String.length s
|
||||||
|
then match t with
|
||||||
|
| TrieEmpty -> TrieNode (Some p, CharMap.empty)
|
||||||
|
| TrieNode (Some _, _) -> invalid_arg (str "duplicate key \"%s\"" s)
|
||||||
|
| TrieNode (None, m) -> TrieNode (Some p, m)
|
||||||
|
else
|
||||||
|
let c = String.get s i in
|
||||||
|
match t with
|
||||||
|
| TrieEmpty ->
|
||||||
|
let sub = add_trie TrieEmpty s (i+1) p in
|
||||||
|
TrieNode (None, CharMap.singleton c sub)
|
||||||
|
| TrieNode (opt, map) ->
|
||||||
|
try
|
||||||
|
let sub = CharMap.find c map in
|
||||||
|
let sub = add_trie sub s (i+1) p in
|
||||||
|
TrieNode (opt, CharMap.add c sub map)
|
||||||
|
with Not_found ->
|
||||||
|
let sub = add_trie TrieEmpty s (i+1) p in
|
||||||
|
TrieNode (opt, CharMap.add c sub map)
|
||||||
|
in
|
||||||
|
let trie =
|
||||||
|
List.fold_left
|
||||||
|
(fun trie (s, p) ->
|
||||||
|
if s = "" then invalid_arg "switch_s: empty string";
|
||||||
|
add_trie trie s 0 p
|
||||||
|
) TrieEmpty l
|
||||||
|
in
|
||||||
|
SwitchS trie
|
||||||
|
|
||||||
|
let bool =
|
||||||
|
switch_s
|
||||||
|
[ "true", Return true
|
||||||
|
; "false", Return false
|
||||||
|
]
|
||||||
|
|
||||||
|
let fix f = Fix f
|
||||||
|
|
||||||
|
module Infix = struct
|
||||||
|
let (>|=) x f = map f x
|
||||||
|
let (<*>) = app
|
||||||
|
let (<<) a b = AppLeft (a, b) (* return (fun x y -> x) <*> a <*> b *)
|
||||||
|
let (>>) a b = AppRight (a, b) (* return (fun x y -> y) <*> a <*> b *)
|
||||||
|
let (<+>) a b = choice [a; b]
|
||||||
|
end
|
||||||
|
|
||||||
|
include Infix
|
||||||
|
|
||||||
|
(* TODO: more efficient version, with buffer *)
|
||||||
|
let word =
|
||||||
|
return (fun c s -> string_of_list (c :: s)) <*> alpha <*> many alpha_num
|
||||||
|
|
||||||
|
(** {2 Signatures} *)
|
||||||
|
|
||||||
|
type error = {
|
||||||
|
line: int;
|
||||||
|
col: int;
|
||||||
|
msg: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
let string_of_error e = str "at %d:%d; %s" e.line e.col e.msg
|
||||||
|
|
||||||
|
exception Error of error
|
||||||
|
|
||||||
|
module type S = sig
|
||||||
|
type source
|
||||||
|
(** Source of characters *)
|
||||||
|
|
||||||
|
val parse : source -> 'a t -> ('a, error) result
|
||||||
|
(** Parse the given source using the parser, and returns the parsed value. *)
|
||||||
|
|
||||||
|
val parse': source -> 'a t -> ('a, string) result
|
||||||
|
(** Same as {!parse}, but returns a user-friendly string in case of failure *)
|
||||||
|
|
||||||
|
val parse_exn : source -> 'a t -> 'a
|
||||||
|
(** Unsafe version of {!parse}.
|
||||||
|
@raise Error if parsing fails *)
|
||||||
|
end
|
||||||
|
|
||||||
|
(** {2 Build a parser from a given Monadic Input} *)
|
||||||
|
|
||||||
|
module type INPUT = sig
|
||||||
|
type t
|
||||||
|
|
||||||
|
val read : t -> Bytes.t -> int -> int -> int
|
||||||
|
end
|
||||||
|
|
||||||
|
type token =
|
||||||
|
| Yield of char
|
||||||
|
| EOF
|
||||||
|
|
||||||
|
module type READER = sig
|
||||||
|
type t
|
||||||
|
type source
|
||||||
|
|
||||||
|
val create : source -> t
|
||||||
|
val peek : t -> token (* peek; do not consume *)
|
||||||
|
val next : t -> token (* read and consume *)
|
||||||
|
val junk : t -> unit (* consume last token, obtained with junk *)
|
||||||
|
val line : t -> int
|
||||||
|
val col : t -> int
|
||||||
|
end
|
||||||
|
|
||||||
|
module ReaderOfInput(I : INPUT) : READER with type source = I.t = struct
|
||||||
|
type t = {
|
||||||
|
mutable rline : int;
|
||||||
|
mutable rcol : int;
|
||||||
|
input : I.t;
|
||||||
|
buf : Bytes.t;
|
||||||
|
mutable i : int;
|
||||||
|
mutable len : int;
|
||||||
|
}
|
||||||
|
type source = I.t
|
||||||
|
|
||||||
|
let line t = t.rline
|
||||||
|
let col t = t.rcol
|
||||||
|
|
||||||
|
let create input = {
|
||||||
|
rline=0;
|
||||||
|
rcol=0;
|
||||||
|
input;
|
||||||
|
buf = Bytes.make 1024 ' ';
|
||||||
|
i=1;
|
||||||
|
len=1; (* trick for initialization *)
|
||||||
|
}
|
||||||
|
|
||||||
|
let read_next t =
|
||||||
|
let c = Bytes.get t.buf t.i in
|
||||||
|
t.i <- t.i + 1;
|
||||||
|
if c = '\n' then (
|
||||||
|
t.rcol <- 0;
|
||||||
|
t.rline <- t.rline + 1;
|
||||||
|
) else (
|
||||||
|
t.rcol <- t.rcol + 1
|
||||||
|
);
|
||||||
|
Yield c
|
||||||
|
|
||||||
|
let refill t =
|
||||||
|
t.len <- I.read t.input t.buf 0 (Bytes.length t.buf);
|
||||||
|
t.i <- 0;
|
||||||
|
()
|
||||||
|
|
||||||
|
let next t =
|
||||||
|
if t.len = 0 then EOF
|
||||||
|
else if t.i = t.len
|
||||||
|
then (
|
||||||
|
refill t;
|
||||||
|
if t.len = 0 then EOF else read_next t
|
||||||
|
) else read_next t
|
||||||
|
|
||||||
|
let peek t =
|
||||||
|
if t.i = t.len
|
||||||
|
then refill t;
|
||||||
|
Yield (Bytes.get t.buf t.i)
|
||||||
|
|
||||||
|
let junk t =
|
||||||
|
assert (t.len > 0 && t.i < t.len);
|
||||||
|
t.i <- t.i + 1
|
||||||
|
end
|
||||||
|
|
||||||
|
module MakeFromReader(R : READER) : S with type source = R.source = struct
|
||||||
|
type source = R.source
|
||||||
|
|
||||||
|
let error r msg =
|
||||||
|
raise (Error {
|
||||||
|
line = R.line r;
|
||||||
|
col = R.col r;
|
||||||
|
msg;
|
||||||
|
})
|
||||||
|
let errorf r fmt =
|
||||||
|
Printf.ksprintf
|
||||||
|
(fun msg -> error r msg)
|
||||||
|
fmt
|
||||||
|
|
||||||
|
let is_int c = Char.code c >= Char.code '0' && Char.code c <= Char.code '9'
|
||||||
|
let to_int c = Char.code c - Char.code '0'
|
||||||
|
|
||||||
|
let rec parse_int r sign i = match R.peek r with
|
||||||
|
| EOF -> i
|
||||||
|
| Yield c when is_int c ->
|
||||||
|
R.junk r;
|
||||||
|
parse_int r sign (10 * i + to_int c)
|
||||||
|
| Yield '-' when i = 0 && sign ->
|
||||||
|
(* switch sign: only on first char *)
|
||||||
|
R.junk r;
|
||||||
|
parse_int r false 0
|
||||||
|
| _ -> if sign then i else -i
|
||||||
|
|
||||||
|
let parse_float _r _buf = assert false
|
||||||
|
|
||||||
|
let rec parse_rec : type a. R.t -> a t -> a =
|
||||||
|
fun r p -> match p with
|
||||||
|
| Return x -> x
|
||||||
|
| Map (f, x) ->
|
||||||
|
let y = parse_rec r x in
|
||||||
|
f y
|
||||||
|
| Filter (f, x) ->
|
||||||
|
let y = parse_rec r x in
|
||||||
|
if f y then y else errorf r "filter failed"
|
||||||
|
| App (f, x) ->
|
||||||
|
let f' = parse_rec r f in
|
||||||
|
let x' = parse_rec r x in
|
||||||
|
f' x'
|
||||||
|
| AppLeft (a, b) ->
|
||||||
|
let a' = parse_rec r a in
|
||||||
|
let _ = parse_rec r b in
|
||||||
|
a'
|
||||||
|
| AppRight (a, b) ->
|
||||||
|
let _ = parse_rec r a in
|
||||||
|
let b' = parse_rec r b in
|
||||||
|
b'
|
||||||
|
| Fail msg -> error r msg
|
||||||
|
| Int -> parse_int r true 0
|
||||||
|
| Float -> parse_float r (Buffer.create 8)
|
||||||
|
| AnyOf set ->
|
||||||
|
begin match R.next r with
|
||||||
|
| EOF -> errorf r "expected any of {%s}, got EOF" (print_char_set set)
|
||||||
|
| Yield c ->
|
||||||
|
if CharSet.mem c set then c
|
||||||
|
else errorf r "expected any of {%s}, got %c" (print_char_set set) c
|
||||||
|
end
|
||||||
|
| Many (set, p, sep, mult) -> parse_many r ~set ~sep ~p ~mult []
|
||||||
|
| Skip (set, p, mult) -> parse_skip r ~set ~p ~mult
|
||||||
|
| SwitchC (map, def) ->
|
||||||
|
begin match R.peek r with
|
||||||
|
| EOF -> errorf r "expected any of {%s}, got EOF" (print_char_map map)
|
||||||
|
| Yield c ->
|
||||||
|
begin try
|
||||||
|
let p' = CharMap.find c map in
|
||||||
|
parse_rec r p'
|
||||||
|
with Not_found ->
|
||||||
|
match def with
|
||||||
|
| None ->
|
||||||
|
errorf r "expected any of {%s}, got %c" (print_char_map map) c
|
||||||
|
| Some d -> parse_rec r d
|
||||||
|
end
|
||||||
|
end
|
||||||
|
| SwitchS TrieEmpty -> assert false
|
||||||
|
| SwitchS (TrieNode (Some p, _)) ->
|
||||||
|
parse_rec r p
|
||||||
|
| SwitchS (TrieNode (None, map)) ->
|
||||||
|
begin match R.next r with
|
||||||
|
| EOF -> errorf r "expected any of {%s}, got EOF" (print_char_map map)
|
||||||
|
| Yield c ->
|
||||||
|
begin try
|
||||||
|
let trie = CharMap.find c map in
|
||||||
|
parse_rec r (SwitchS trie) (* recurse in subtree *)
|
||||||
|
with Not_found ->
|
||||||
|
errorf r "expected any of {%s}, got %c" (print_char_map map) c
|
||||||
|
end
|
||||||
|
end
|
||||||
|
| Fix f ->
|
||||||
|
let p = f (Fix f) in
|
||||||
|
parse_rec r p
|
||||||
|
| Eof ->
|
||||||
|
begin match R.next r with
|
||||||
|
| EOF -> ()
|
||||||
|
| Yield c -> errorf r "expected EOF, got %c" c
|
||||||
|
end
|
||||||
|
|
||||||
|
and parse_many
|
||||||
|
: type a. R.t -> set:CharSet.t -> p:a t -> sep:unit t ->
|
||||||
|
mult:multiplicity -> a list -> a list
|
||||||
|
= fun r ~set ~p ~sep ~mult acc ->
|
||||||
|
match R.peek r with
|
||||||
|
| EOF -> List.rev acc
|
||||||
|
| Yield c ->
|
||||||
|
if CharSet.mem c set
|
||||||
|
then
|
||||||
|
let x = parse_rec r p in
|
||||||
|
match mult with
|
||||||
|
| Question -> assert (acc = []); [x]
|
||||||
|
| Plus | Star ->
|
||||||
|
let _ = parse_rec r sep in (* separator *)
|
||||||
|
parse_many r ~set ~p ~sep ~mult:Star (x::acc)
|
||||||
|
else if mult = Plus
|
||||||
|
then errorf r "expected {%s}, got %c" (print_char_set set) c
|
||||||
|
else List.rev acc
|
||||||
|
|
||||||
|
and parse_skip
|
||||||
|
: type a. R.t -> set:CharSet.t -> p:a t -> mult:multiplicity -> unit
|
||||||
|
= fun r ~set ~p ~mult ->
|
||||||
|
match R.peek r with
|
||||||
|
| EOF -> ()
|
||||||
|
| Yield c ->
|
||||||
|
if CharSet.mem c set
|
||||||
|
then
|
||||||
|
let _ = parse_rec r p in
|
||||||
|
match mult with
|
||||||
|
| Question -> ()
|
||||||
|
| Plus | Star -> parse_skip r ~set ~p ~mult:Star
|
||||||
|
else if mult = Plus
|
||||||
|
then errorf r "expected {%s}, got %c" (print_char_set set) c
|
||||||
|
else ()
|
||||||
|
|
||||||
|
(* public functions *)
|
||||||
|
let parse_exn src p =
|
||||||
|
let r = R.create src in
|
||||||
|
parse_rec r p
|
||||||
|
|
||||||
|
let parse src p =
|
||||||
|
let r = R.create src in
|
||||||
|
try
|
||||||
|
`Ok (parse_rec r p)
|
||||||
|
with Error e ->
|
||||||
|
`Error e
|
||||||
|
|
||||||
|
let parse' src p = match parse src p with
|
||||||
|
| `Ok x -> `Ok x
|
||||||
|
| `Error e -> `Error (string_of_error e)
|
||||||
|
end
|
||||||
|
|
||||||
|
module Make(I : INPUT) = struct
|
||||||
|
module R = ReaderOfInput(I)
|
||||||
|
include MakeFromReader(R)
|
||||||
|
end
|
||||||
|
|
||||||
|
module Str = MakeFromReader(struct
|
||||||
|
(* reader of string *)
|
||||||
|
type t = {
|
||||||
|
str : string;
|
||||||
|
mutable i : int;
|
||||||
|
mutable rcol : int;
|
||||||
|
mutable rline : int;
|
||||||
|
}
|
||||||
|
type source = string
|
||||||
|
|
||||||
|
let create str = {
|
||||||
|
str;
|
||||||
|
i = 0;
|
||||||
|
rcol = 1;
|
||||||
|
rline = 1;
|
||||||
|
}
|
||||||
|
let line t = t.rline
|
||||||
|
let col t = t.rcol
|
||||||
|
let peek t =
|
||||||
|
if t.i = String.length t.str then EOF else Yield (String.get t.str t.i)
|
||||||
|
let junk t =
|
||||||
|
assert (t.i < String.length t.str);
|
||||||
|
t.i <- t.i + 1
|
||||||
|
let next t =
|
||||||
|
if t.i = String.length t.str then EOF
|
||||||
|
else (
|
||||||
|
let c = String.get t.str t.i in
|
||||||
|
t.i <- t.i + 1;
|
||||||
|
if c = '\n' then (
|
||||||
|
t.rcol <- 1;
|
||||||
|
t.rline <- t.rline + 1
|
||||||
|
) else t.rcol <- t.rcol + 1;
|
||||||
|
Yield c
|
||||||
|
)
|
||||||
|
end)
|
||||||
|
|
||||||
|
module Chan = Make(struct
|
||||||
|
type t = in_channel
|
||||||
|
let read = input
|
||||||
|
end)
|
||||||
219
src/string/app_parse.mli
Normal file
219
src/string/app_parse.mli
Normal file
|
|
@ -0,0 +1,219 @@
|
||||||
|
|
||||||
|
(*
|
||||||
|
copyright (c) 2013-2015, simon cruanes
|
||||||
|
all rights reserved.
|
||||||
|
|
||||||
|
redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer. redistributions in binary
|
||||||
|
form must reproduce the above copyright notice, this list of conditions and the
|
||||||
|
following disclaimer in the documentation and/or other materials provided with
|
||||||
|
the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*)
|
||||||
|
|
||||||
|
(** {1 Applicative Parser Combinators}
|
||||||
|
|
||||||
|
{b status: experimental}
|
||||||
|
@since NEXT_RELEASE
|
||||||
|
*)
|
||||||
|
|
||||||
|
type ('a,'b) result = [`Error of 'b | `Ok of 'a]
|
||||||
|
|
||||||
|
type 'a t
|
||||||
|
(** Parser that yields an error or a value of type 'a *)
|
||||||
|
|
||||||
|
(** {6 Combinators} *)
|
||||||
|
|
||||||
|
val return : 'a -> 'a t
|
||||||
|
(** Parser that succeeds with the given value *)
|
||||||
|
|
||||||
|
val fail : string -> 'a t
|
||||||
|
(** [fail msg] fails with the given error message *)
|
||||||
|
|
||||||
|
(* TODO: a format version of fail *)
|
||||||
|
|
||||||
|
val app : ('a -> 'b) t -> 'a t -> 'b t
|
||||||
|
(** Applicative *)
|
||||||
|
|
||||||
|
val map : ('a -> 'b) -> 'a t -> 'b t
|
||||||
|
(** Map the parsed value *)
|
||||||
|
|
||||||
|
val int : int t
|
||||||
|
(** Parse an integer *)
|
||||||
|
|
||||||
|
val float : float t
|
||||||
|
(** Parse a floating point number *)
|
||||||
|
|
||||||
|
val bool : bool t
|
||||||
|
(** Parse "true" or "false" *)
|
||||||
|
|
||||||
|
val char : char -> char t
|
||||||
|
(** [char c] parses [c] and [c] only *)
|
||||||
|
|
||||||
|
val any_of : string -> char t
|
||||||
|
(** Parse any of the chars present in the given string *)
|
||||||
|
|
||||||
|
val alpha_lower : char t
|
||||||
|
|
||||||
|
val alpha_upper : char t
|
||||||
|
|
||||||
|
val alpha : char t
|
||||||
|
|
||||||
|
val num : char t
|
||||||
|
|
||||||
|
val alpha_num : char t
|
||||||
|
|
||||||
|
val word : string t
|
||||||
|
(** [word] parses any identifier not starting with an integer and
|
||||||
|
not containing any whitespace nor delimiter
|
||||||
|
TODO: specify *)
|
||||||
|
|
||||||
|
val spaces : unit t
|
||||||
|
(** Parse a sequence of ['\t'] and [' '] *)
|
||||||
|
|
||||||
|
val spaces1 : unit t
|
||||||
|
(** Same as {!spaces} but requires at least one space *)
|
||||||
|
|
||||||
|
val white : unit t
|
||||||
|
(** Parse a sequence of ['\t'], ['\n'] and [' '] *)
|
||||||
|
|
||||||
|
val white1 : unit t
|
||||||
|
|
||||||
|
val eof : unit t
|
||||||
|
(** Matches the end of input, fails otherwise *)
|
||||||
|
|
||||||
|
val many : ?sep:unit t -> 'a t -> 'a list t
|
||||||
|
(** 0 or more parsed elements of the given type.
|
||||||
|
@param sep separator between elements of the list (for instance, {!space}) *)
|
||||||
|
|
||||||
|
val many1 : ?sep:unit t -> 'a t -> 'a list t
|
||||||
|
(** Same as {!many}, but needs at least one element *)
|
||||||
|
|
||||||
|
val skip : _ t -> unit t
|
||||||
|
(** Skip 0 or more instances of the given parser *)
|
||||||
|
|
||||||
|
val skip1 : _ t -> unit t
|
||||||
|
|
||||||
|
val opt : 'a t -> 'a option t
|
||||||
|
(** [opt x] tries to parse [x], and returns [None] otherwise *)
|
||||||
|
|
||||||
|
val filter : ('a -> bool) -> 'a t -> 'a t
|
||||||
|
(** [filter f p] parses the same as [p], but fails if the returned value
|
||||||
|
does not satisfy [f] *)
|
||||||
|
|
||||||
|
val switch_c : ?default:'a t -> (char * 'a t) list -> 'a t
|
||||||
|
(** [switch_c l] matches the next char and uses the corresponding parser.
|
||||||
|
Fails if the next char is not in the list, unless default is defined.
|
||||||
|
@param default parser to use if no char matches
|
||||||
|
@raise Invalid_argument if some char occurs several times in [l] *)
|
||||||
|
|
||||||
|
val switch_s : (string * 'a t) list -> 'a t
|
||||||
|
(** [switch_s l] attempts to match matches any of the strings in [l].
|
||||||
|
If one of those strings matches, the corresponding parser
|
||||||
|
is used from now on.
|
||||||
|
@raise Invalid_argument if some string is a prefix of another string,
|
||||||
|
or is empty, or if the list is empty *)
|
||||||
|
|
||||||
|
val choice : 'a t list -> 'a t
|
||||||
|
(** [choice l] chooses between the parsers, unambiguously
|
||||||
|
@raise Invalid_argument if the list is empty, or if some parsers
|
||||||
|
overlap, making the choice ambiguous *)
|
||||||
|
|
||||||
|
val fix : ('a t -> 'a t) -> 'a t
|
||||||
|
(** [fix f] makes a fixpoint *)
|
||||||
|
|
||||||
|
module Infix : sig
|
||||||
|
val (>|=) : 'a t -> ('a -> 'b) -> 'b t
|
||||||
|
|
||||||
|
val (<*>) : ('a -> 'b) t -> 'a t -> 'b t
|
||||||
|
(** Synonym to {!app} *)
|
||||||
|
|
||||||
|
val (>>) : _ t -> 'a t -> 'a t
|
||||||
|
(** [a >> b] parses [a], ignores its result, then parses [b] *)
|
||||||
|
|
||||||
|
val (<<) : 'a t -> _ t -> 'a t
|
||||||
|
(** [a << b] parses [a], then [b], and discards [b] to return [a] *)
|
||||||
|
|
||||||
|
val (<+>) : 'a t -> 'a t -> 'a t
|
||||||
|
(** [a <+> b] is [choice [a;b]], a binary choice *)
|
||||||
|
end
|
||||||
|
|
||||||
|
include module type of Infix
|
||||||
|
|
||||||
|
(** {2 Signatures} *)
|
||||||
|
|
||||||
|
(** {6 Parsing} *)
|
||||||
|
|
||||||
|
type error = {
|
||||||
|
line: int;
|
||||||
|
col: int;
|
||||||
|
msg: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
val string_of_error : error -> string
|
||||||
|
|
||||||
|
exception Error of error
|
||||||
|
|
||||||
|
module type S = sig
|
||||||
|
type source
|
||||||
|
(** Source of characters *)
|
||||||
|
|
||||||
|
val parse : source -> 'a t -> ('a, error) result
|
||||||
|
(** Parse the given source using the parser, and returns the parsed value. *)
|
||||||
|
|
||||||
|
val parse': source -> 'a t -> ('a, string) result
|
||||||
|
(** Same as {!parse}, but returns a user-friendly string in case of failure *)
|
||||||
|
|
||||||
|
val parse_exn : source -> 'a t -> 'a
|
||||||
|
(** Unsafe version of {!parse}.
|
||||||
|
@raise Error if parsing fails *)
|
||||||
|
end
|
||||||
|
|
||||||
|
(** {2 Parse} *)
|
||||||
|
|
||||||
|
module type INPUT = sig
|
||||||
|
type t
|
||||||
|
|
||||||
|
val read : t -> Bytes.t -> int -> int -> int
|
||||||
|
end
|
||||||
|
|
||||||
|
module Make(I : INPUT) : S with type source = I.t
|
||||||
|
|
||||||
|
(** {2 Low-level interface} *)
|
||||||
|
|
||||||
|
type token =
|
||||||
|
| Yield of char
|
||||||
|
| EOF
|
||||||
|
|
||||||
|
module type READER = sig
|
||||||
|
type t
|
||||||
|
type source (* underlying source *)
|
||||||
|
|
||||||
|
val create : source -> t
|
||||||
|
val peek : t -> token (* peek; do not consume *)
|
||||||
|
val next : t -> token (* read and consume *)
|
||||||
|
val junk : t -> unit (* consume last token, obtained with junk *)
|
||||||
|
val line : t -> int
|
||||||
|
val col : t -> int
|
||||||
|
end
|
||||||
|
|
||||||
|
module MakeFromReader(R : READER) : S with type source = R.source
|
||||||
|
|
||||||
|
(** {2 Defaults} *)
|
||||||
|
|
||||||
|
module Str : S with type source = string
|
||||||
|
|
||||||
|
module Chan : S with type source = in_channel
|
||||||
Loading…
Add table
Reference in a new issue