From ee8c7c03f20bbe744137b9adc0a07796415984e6 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 5 Mar 2015 01:55:43 +0100 Subject: [PATCH] wip: App_parse in containers_string, a draft of efficient applicative parser combinators --- _oasis | 2 +- src/string/app_parse.ml | 617 +++++++++++++++++++++++++++++++++++++++ src/string/app_parse.mli | 219 ++++++++++++++ 3 files changed, 837 insertions(+), 1 deletion(-) create mode 100644 src/string/app_parse.ml create mode 100644 src/string/app_parse.mli diff --git a/_oasis b/_oasis index 3ee93dd6..19eacd1d 100644 --- a/_oasis +++ b/_oasis @@ -85,7 +85,7 @@ Library "containers_iter" Library "containers_string" Path: src/string Pack: true - Modules: KMP, Levenshtein + Modules: KMP, Levenshtein, App_parse FindlibName: string FindlibParent: containers diff --git a/src/string/app_parse.ml b/src/string/app_parse.ml new file mode 100644 index 00000000..acd9035a --- /dev/null +++ b/src/string/app_parse.ml @@ -0,0 +1,617 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Applicative Parser Combinators} *) + +type ('a,'b) result = [`Error of 'b | `Ok of 'a] + +type multiplicity = + | Star (* 0 or more *) + | Plus (* 1 or more *) + | Question (* 0 or 1 *) + +let str fmt = Printf.sprintf fmt + +module CharSet = Set.Make(Char) +module CharMap = Map.Make(Char) + +let print_char_set set = + let l = CharSet.fold + (fun c acc -> str "'%c'" c :: acc) set [] in + String.concat ", " l + +let domain_of_char_map m = + CharMap.fold (fun c _ set -> CharSet.add c set) m CharSet.empty + +let print_char_map map = + let l = CharMap.fold + (fun c _ acc -> str "'%c'" c :: acc) map [] in + String.concat ", " l + +(* function composition *) +let compose f g x = f (g x) + +let string_of_list l = + let b = Bytes.make (List.length l) ' ' in + List.iteri (fun i c -> Bytes.set b i c) l; + Bytes.unsafe_to_string b + +type _ t = + | Return : 'a -> 'a t + | Map : ('a -> 'b) * 'a t -> 'b t + | Filter: ('a -> bool) * 'a t -> 'a t + | App : ('a -> 'b) t * 'a t -> 'b t + | AppLeft : 'a t * 'b t -> 'a t + | AppRight : 'a t * 'b t -> 'b t + | Fail : string -> 'a t + | Int : int t + | Float : float t + | AnyOf : CharSet.t -> char t + | Many : CharSet.t * 'a t * unit t * multiplicity -> 'a list t + | Skip : CharSet.t * 'a t * multiplicity -> unit t (* same as Many, but ignores *) + | SwitchC : 'a t CharMap.t * 'a t option -> 'a t + | SwitchS :'a trie -> 'a t + | Fix : ('a t -> 'a t) -> 'a t + | Eof : unit t + +(* a prefix trie *) +and 'a trie = + | TrieEmpty + | TrieNode of 'a t option * 'a trie CharMap.t + +let return x = Return x + +let success = Return () + +let fail msg = Fail msg + +let app f x = App (f, x) + +let map f x = match x with + | Map (g, y) -> Map (compose f g, y) + | Return x -> Return (f x) + | _ -> Map (f,x) + +let filter f x = Filter (f, x) + +let int = Int + +let float = Float + +let int_first_char = + lazy (CharSet.of_list ['-'; '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9']) + +(* a set of characters that are valid as first characters of a parser *) +type possible_first_chars = + | Set of CharSet.t + | AllChars + | NoChar + | IsFail of string + +(* set of possibilities for the first char of a parser *) +let rec possible_first_chars + : type a. a t -> possible_first_chars + = function + | Return _ -> NoChar + | Map (_, x) -> possible_first_chars x + | Filter (_, x) -> possible_first_chars x + | App (f, _) -> possible_first_chars f + | AppLeft (a, _) -> possible_first_chars a + | AppRight (a, _) -> possible_first_chars a + | Fail e -> IsFail e + | Int -> Set (Lazy.force int_first_char) + | Float -> Set (Lazy.force int_first_char) + | AnyOf set -> Set set + | Many(set, _, _, _) -> Set set + | Skip (set, _, _) -> Set set + | SwitchC (map, None) -> Set (domain_of_char_map map) + | SwitchC (_, Some _) -> AllChars + | SwitchS TrieEmpty -> assert false + | SwitchS (TrieNode (_, m)) -> Set (domain_of_char_map m) + | Fix f -> + let p = f (Fix f) in + possible_first_chars p + | Eof -> NoChar + +let many ?(sep=success) t = + match possible_first_chars t with + | Set set -> Many (set, t, sep, Star) + | IsFail msg -> Fail msg + | AllChars -> invalid_arg (str "many: invalid parser (always succeeds)") + | NoChar -> invalid_arg (str "many: invalid parser (does not consume input)") + +let many1 ?(sep=success) t = + match possible_first_chars t with + | Set set -> Many (set, t, sep, Plus) + | IsFail msg -> Fail msg + | AllChars -> invalid_arg (str "many: invalid parser (always succeeds)") + | NoChar -> invalid_arg (str "many: invalid parser (does not consume input)") + +let skip t = + match possible_first_chars t with + | Set set -> Skip (set, t, Star) + | IsFail msg -> Fail msg + | AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)") + | NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)") + +let skip1 t = + match possible_first_chars t with + | Set set -> Skip (set, t, Plus) + | IsFail msg -> Fail msg + | AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)") + | NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)") + +let opt t = + match possible_first_chars t with + | Set set -> + map + (function + | [x] -> Some x + | [] -> None + | _ -> assert false + ) (Many (set, t, success, Question)) + | IsFail msg -> Fail msg + | AllChars -> map (fun x -> Some x) t (* always succeeds *) + | NoChar -> invalid_arg (str "opt: invalid parser (does not consume input)") + +let set_of_string s = + let set = ref CharSet.empty in + String.iter + (fun c -> + if CharSet.mem c !set + then invalid_arg (str "any_of: duplicate char %c" c); + set := CharSet.add c !set + ) s; + !set + +let any_of s = AnyOf (set_of_string s) + +let char c = AnyOf (CharSet.singleton c) + +let spaces = skip (any_of " \t") +let spaces1 = skip1 (any_of " \t") + +let white = skip (any_of " \t\n") +let white1 = skip1 (any_of " \t\n") + +let alpha_lower_ = set_of_string "abcdefghijklmonpqrstuvwxyz" +let alpha_upper_ = set_of_string "ABCDEFGHIJKLMONPQRSTUVWXYZ" +let num_ = set_of_string "0123456789" +let alpha_ = CharSet.union alpha_lower_ alpha_upper_ + +let alpha_lower = AnyOf alpha_lower_ +let alpha_upper = AnyOf alpha_upper_ +let num = AnyOf num_ +let alpha = AnyOf alpha_ +let alpha_num = AnyOf (CharSet.union num_ alpha_) + +let eof = Eof + +let switch_c ?default l = + if l = [] then match default with + | None -> invalid_arg "switch_c: empty list"; + | Some d -> d + else + let map = List.fold_left + (fun map (c, t) -> + if CharMap.mem c map + then invalid_arg (str "switch_c: duplicate char %c" c); + CharMap.add c t map + ) CharMap.empty l + in + SwitchC (map, default) + +exception ExnIsFail of string + +let choice l = + if l = [] then invalid_arg "choice: empty list"; + (* build a switch by first char *) + try + (* a map and possibly a default parser *) + let map, def = List.fold_left + (fun (map, def) p -> + match possible_first_chars p, def with + | AllChars, Some _ -> + invalid_arg "choice: ambiguous, several parsers accept any input" + | AllChars, None -> map, Some p + | NoChar, _ -> map, def + | IsFail msg, _ -> raise (ExnIsFail msg) + | Set set, def -> + if CharSet.exists (fun c -> CharMap.mem c map) set + then invalid_arg + (str "choice: ambiguous parsers (overlap on {%s})" + (print_char_set (CharSet.inter set (domain_of_char_map map)))); + let map = CharSet.fold + (fun c map -> CharMap.add c p map) + set map + in map, def + ) (CharMap.empty, None) l + in + SwitchC (map, def) + with ExnIsFail msg -> + fail msg + +(* build prefix trie *) +let switch_s l = + if l = [] then invalid_arg "switch_s: empty list"; + (* add parser p in trie [t], with key slice of [s] starting at [i] *) + let rec add_trie t s i p = + if i = String.length s + then match t with + | TrieEmpty -> TrieNode (Some p, CharMap.empty) + | TrieNode (Some _, _) -> invalid_arg (str "duplicate key \"%s\"" s) + | TrieNode (None, m) -> TrieNode (Some p, m) + else + let c = String.get s i in + match t with + | TrieEmpty -> + let sub = add_trie TrieEmpty s (i+1) p in + TrieNode (None, CharMap.singleton c sub) + | TrieNode (opt, map) -> + try + let sub = CharMap.find c map in + let sub = add_trie sub s (i+1) p in + TrieNode (opt, CharMap.add c sub map) + with Not_found -> + let sub = add_trie TrieEmpty s (i+1) p in + TrieNode (opt, CharMap.add c sub map) + in + let trie = + List.fold_left + (fun trie (s, p) -> + if s = "" then invalid_arg "switch_s: empty string"; + add_trie trie s 0 p + ) TrieEmpty l + in + SwitchS trie + +let bool = + switch_s + [ "true", Return true + ; "false", Return false + ] + +let fix f = Fix f + +module Infix = struct + let (>|=) x f = map f x + let (<*>) = app + let (<<) a b = AppLeft (a, b) (* return (fun x y -> x) <*> a <*> b *) + let (>>) a b = AppRight (a, b) (* return (fun x y -> y) <*> a <*> b *) + let (<+>) a b = choice [a; b] +end + +include Infix + +(* TODO: more efficient version, with buffer *) +let word = + return (fun c s -> string_of_list (c :: s)) <*> alpha <*> many alpha_num + +(** {2 Signatures} *) + +type error = { + line: int; + col: int; + msg: string; +} + +let string_of_error e = str "at %d:%d; %s" e.line e.col e.msg + +exception Error of error + +module type S = sig + type source + (** Source of characters *) + + val parse : source -> 'a t -> ('a, error) result + (** Parse the given source using the parser, and returns the parsed value. *) + + val parse': source -> 'a t -> ('a, string) result + (** Same as {!parse}, but returns a user-friendly string in case of failure *) + + val parse_exn : source -> 'a t -> 'a + (** Unsafe version of {!parse}. + @raise Error if parsing fails *) +end + +(** {2 Build a parser from a given Monadic Input} *) + +module type INPUT = sig + type t + + val read : t -> Bytes.t -> int -> int -> int +end + +type token = + | Yield of char + | EOF + +module type READER = sig + type t + type source + + val create : source -> t + val peek : t -> token (* peek; do not consume *) + val next : t -> token (* read and consume *) + val junk : t -> unit (* consume last token, obtained with junk *) + val line : t -> int + val col : t -> int +end + +module ReaderOfInput(I : INPUT) : READER with type source = I.t = struct + type t = { + mutable rline : int; + mutable rcol : int; + input : I.t; + buf : Bytes.t; + mutable i : int; + mutable len : int; + } + type source = I.t + + let line t = t.rline + let col t = t.rcol + + let create input = { + rline=0; + rcol=0; + input; + buf = Bytes.make 1024 ' '; + i=1; + len=1; (* trick for initialization *) + } + + let read_next t = + let c = Bytes.get t.buf t.i in + t.i <- t.i + 1; + if c = '\n' then ( + t.rcol <- 0; + t.rline <- t.rline + 1; + ) else ( + t.rcol <- t.rcol + 1 + ); + Yield c + + let refill t = + t.len <- I.read t.input t.buf 0 (Bytes.length t.buf); + t.i <- 0; + () + + let next t = + if t.len = 0 then EOF + else if t.i = t.len + then ( + refill t; + if t.len = 0 then EOF else read_next t + ) else read_next t + + let peek t = + if t.i = t.len + then refill t; + Yield (Bytes.get t.buf t.i) + + let junk t = + assert (t.len > 0 && t.i < t.len); + t.i <- t.i + 1 +end + +module MakeFromReader(R : READER) : S with type source = R.source = struct + type source = R.source + + let error r msg = + raise (Error { + line = R.line r; + col = R.col r; + msg; + }) + let errorf r fmt = + Printf.ksprintf + (fun msg -> error r msg) + fmt + + let is_int c = Char.code c >= Char.code '0' && Char.code c <= Char.code '9' + let to_int c = Char.code c - Char.code '0' + + let rec parse_int r sign i = match R.peek r with + | EOF -> i + | Yield c when is_int c -> + R.junk r; + parse_int r sign (10 * i + to_int c) + | Yield '-' when i = 0 && sign -> + (* switch sign: only on first char *) + R.junk r; + parse_int r false 0 + | _ -> if sign then i else -i + + let parse_float _r _buf = assert false + + let rec parse_rec : type a. R.t -> a t -> a = + fun r p -> match p with + | Return x -> x + | Map (f, x) -> + let y = parse_rec r x in + f y + | Filter (f, x) -> + let y = parse_rec r x in + if f y then y else errorf r "filter failed" + | App (f, x) -> + let f' = parse_rec r f in + let x' = parse_rec r x in + f' x' + | AppLeft (a, b) -> + let a' = parse_rec r a in + let _ = parse_rec r b in + a' + | AppRight (a, b) -> + let _ = parse_rec r a in + let b' = parse_rec r b in + b' + | Fail msg -> error r msg + | Int -> parse_int r true 0 + | Float -> parse_float r (Buffer.create 8) + | AnyOf set -> + begin match R.next r with + | EOF -> errorf r "expected any of {%s}, got EOF" (print_char_set set) + | Yield c -> + if CharSet.mem c set then c + else errorf r "expected any of {%s}, got %c" (print_char_set set) c + end + | Many (set, p, sep, mult) -> parse_many r ~set ~sep ~p ~mult [] + | Skip (set, p, mult) -> parse_skip r ~set ~p ~mult + | SwitchC (map, def) -> + begin match R.peek r with + | EOF -> errorf r "expected any of {%s}, got EOF" (print_char_map map) + | Yield c -> + begin try + let p' = CharMap.find c map in + parse_rec r p' + with Not_found -> + match def with + | None -> + errorf r "expected any of {%s}, got %c" (print_char_map map) c + | Some d -> parse_rec r d + end + end + | SwitchS TrieEmpty -> assert false + | SwitchS (TrieNode (Some p, _)) -> + parse_rec r p + | SwitchS (TrieNode (None, map)) -> + begin match R.next r with + | EOF -> errorf r "expected any of {%s}, got EOF" (print_char_map map) + | Yield c -> + begin try + let trie = CharMap.find c map in + parse_rec r (SwitchS trie) (* recurse in subtree *) + with Not_found -> + errorf r "expected any of {%s}, got %c" (print_char_map map) c + end + end + | Fix f -> + let p = f (Fix f) in + parse_rec r p + | Eof -> + begin match R.next r with + | EOF -> () + | Yield c -> errorf r "expected EOF, got %c" c + end + + and parse_many + : type a. R.t -> set:CharSet.t -> p:a t -> sep:unit t -> + mult:multiplicity -> a list -> a list + = fun r ~set ~p ~sep ~mult acc -> + match R.peek r with + | EOF -> List.rev acc + | Yield c -> + if CharSet.mem c set + then + let x = parse_rec r p in + match mult with + | Question -> assert (acc = []); [x] + | Plus | Star -> + let _ = parse_rec r sep in (* separator *) + parse_many r ~set ~p ~sep ~mult:Star (x::acc) + else if mult = Plus + then errorf r "expected {%s}, got %c" (print_char_set set) c + else List.rev acc + + and parse_skip + : type a. R.t -> set:CharSet.t -> p:a t -> mult:multiplicity -> unit + = fun r ~set ~p ~mult -> + match R.peek r with + | EOF -> () + | Yield c -> + if CharSet.mem c set + then + let _ = parse_rec r p in + match mult with + | Question -> () + | Plus | Star -> parse_skip r ~set ~p ~mult:Star + else if mult = Plus + then errorf r "expected {%s}, got %c" (print_char_set set) c + else () + + (* public functions *) + let parse_exn src p = + let r = R.create src in + parse_rec r p + + let parse src p = + let r = R.create src in + try + `Ok (parse_rec r p) + with Error e -> + `Error e + + let parse' src p = match parse src p with + | `Ok x -> `Ok x + | `Error e -> `Error (string_of_error e) +end + +module Make(I : INPUT) = struct + module R = ReaderOfInput(I) + include MakeFromReader(R) +end + +module Str = MakeFromReader(struct + (* reader of string *) + type t = { + str : string; + mutable i : int; + mutable rcol : int; + mutable rline : int; + } + type source = string + + let create str = { + str; + i = 0; + rcol = 1; + rline = 1; + } + let line t = t.rline + let col t = t.rcol + let peek t = + if t.i = String.length t.str then EOF else Yield (String.get t.str t.i) + let junk t = + assert (t.i < String.length t.str); + t.i <- t.i + 1 + let next t = + if t.i = String.length t.str then EOF + else ( + let c = String.get t.str t.i in + t.i <- t.i + 1; + if c = '\n' then ( + t.rcol <- 1; + t.rline <- t.rline + 1 + ) else t.rcol <- t.rcol + 1; + Yield c + ) +end) + +module Chan = Make(struct + type t = in_channel + let read = input +end) diff --git a/src/string/app_parse.mli b/src/string/app_parse.mli new file mode 100644 index 00000000..efe04756 --- /dev/null +++ b/src/string/app_parse.mli @@ -0,0 +1,219 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Applicative Parser Combinators} + +{b status: experimental} +@since NEXT_RELEASE +*) + +type ('a,'b) result = [`Error of 'b | `Ok of 'a] + +type 'a t +(** Parser that yields an error or a value of type 'a *) + +(** {6 Combinators} *) + +val return : 'a -> 'a t +(** Parser that succeeds with the given value *) + +val fail : string -> 'a t +(** [fail msg] fails with the given error message *) + +(* TODO: a format version of fail *) + +val app : ('a -> 'b) t -> 'a t -> 'b t +(** Applicative *) + +val map : ('a -> 'b) -> 'a t -> 'b t +(** Map the parsed value *) + +val int : int t +(** Parse an integer *) + +val float : float t +(** Parse a floating point number *) + +val bool : bool t +(** Parse "true" or "false" *) + +val char : char -> char t +(** [char c] parses [c] and [c] only *) + +val any_of : string -> char t +(** Parse any of the chars present in the given string *) + +val alpha_lower : char t + +val alpha_upper : char t + +val alpha : char t + +val num : char t + +val alpha_num : char t + +val word : string t +(** [word] parses any identifier not starting with an integer and + not containing any whitespace nor delimiter + TODO: specify *) + +val spaces : unit t +(** Parse a sequence of ['\t'] and [' '] *) + +val spaces1 : unit t +(** Same as {!spaces} but requires at least one space *) + +val white : unit t +(** Parse a sequence of ['\t'], ['\n'] and [' '] *) + +val white1 : unit t + +val eof : unit t +(** Matches the end of input, fails otherwise *) + +val many : ?sep:unit t -> 'a t -> 'a list t +(** 0 or more parsed elements of the given type. + @param sep separator between elements of the list (for instance, {!space}) *) + +val many1 : ?sep:unit t -> 'a t -> 'a list t +(** Same as {!many}, but needs at least one element *) + +val skip : _ t -> unit t +(** Skip 0 or more instances of the given parser *) + +val skip1 : _ t -> unit t + +val opt : 'a t -> 'a option t +(** [opt x] tries to parse [x], and returns [None] otherwise *) + +val filter : ('a -> bool) -> 'a t -> 'a t +(** [filter f p] parses the same as [p], but fails if the returned value + does not satisfy [f] *) + +val switch_c : ?default:'a t -> (char * 'a t) list -> 'a t +(** [switch_c l] matches the next char and uses the corresponding parser. + Fails if the next char is not in the list, unless default is defined. + @param default parser to use if no char matches + @raise Invalid_argument if some char occurs several times in [l] *) + +val switch_s : (string * 'a t) list -> 'a t +(** [switch_s l] attempts to match matches any of the strings in [l]. + If one of those strings matches, the corresponding parser + is used from now on. + @raise Invalid_argument if some string is a prefix of another string, + or is empty, or if the list is empty *) + +val choice : 'a t list -> 'a t +(** [choice l] chooses between the parsers, unambiguously + @raise Invalid_argument if the list is empty, or if some parsers + overlap, making the choice ambiguous *) + +val fix : ('a t -> 'a t) -> 'a t +(** [fix f] makes a fixpoint *) + +module Infix : sig + val (>|=) : 'a t -> ('a -> 'b) -> 'b t + + val (<*>) : ('a -> 'b) t -> 'a t -> 'b t + (** Synonym to {!app} *) + + val (>>) : _ t -> 'a t -> 'a t + (** [a >> b] parses [a], ignores its result, then parses [b] *) + + val (<<) : 'a t -> _ t -> 'a t + (** [a << b] parses [a], then [b], and discards [b] to return [a] *) + + val (<+>) : 'a t -> 'a t -> 'a t + (** [a <+> b] is [choice [a;b]], a binary choice *) +end + +include module type of Infix + +(** {2 Signatures} *) + +(** {6 Parsing} *) + +type error = { + line: int; + col: int; + msg: string; +} + +val string_of_error : error -> string + +exception Error of error + +module type S = sig + type source + (** Source of characters *) + + val parse : source -> 'a t -> ('a, error) result + (** Parse the given source using the parser, and returns the parsed value. *) + + val parse': source -> 'a t -> ('a, string) result + (** Same as {!parse}, but returns a user-friendly string in case of failure *) + + val parse_exn : source -> 'a t -> 'a + (** Unsafe version of {!parse}. + @raise Error if parsing fails *) +end + +(** {2 Parse} *) + +module type INPUT = sig + type t + + val read : t -> Bytes.t -> int -> int -> int +end + +module Make(I : INPUT) : S with type source = I.t + +(** {2 Low-level interface} *) + +type token = + | Yield of char + | EOF + +module type READER = sig + type t + type source (* underlying source *) + + val create : source -> t + val peek : t -> token (* peek; do not consume *) + val next : t -> token (* read and consume *) + val junk : t -> unit (* consume last token, obtained with junk *) + val line : t -> int + val col : t -> int +end + +module MakeFromReader(R : READER) : S with type source = R.source + +(** {2 Defaults} *) + +module Str : S with type source = string + +module Chan : S with type source = in_channel