diff --git a/src/string/app_parse.ml b/src/string/app_parse.ml index 5376f4ff..e841d10b 100644 --- a/src/string/app_parse.ml +++ b/src/string/app_parse.ml @@ -42,6 +42,7 @@ let print_char = function | '\t' -> "\\t" | '\n' -> "\\n" | '\r' -> "\\r" + | '"' -> "\\\"" | c -> str "%c" c let print_char_set set = @@ -332,10 +333,12 @@ let alpha_lower_ = set_of_string "abcdefghijklmonpqrstuvwxyz" let alpha_upper_ = set_of_string "ABCDEFGHIJKLMONPQRSTUVWXYZ" let num_ = set_of_string "0123456789" let alpha_ = CharSet.union alpha_lower_ alpha_upper_ +let symbols_ = set_of_string "|!;$#@%&-_/=" let alpha_lower = any_of' alpha_lower_ let alpha_upper = any_of' alpha_upper_ let num = any_of' num_ +let symbols = any_of' symbols_ let alpha = any_of' alpha_ let alpha_num = any_of' (CharSet.union num_ alpha_) @@ -502,13 +505,14 @@ end include Infix -(* TODO: more efficient version, with buffer *) let word = pure (fun c s -> str_of_l (c :: s)) <*> alpha <*> many alpha_num -(* TODO *) let quoted = - make (Lazy (lazy (failwith "quoted: not implemented"))) + let q = char '"' in + let escaped = char '\\' >> char '"' in + let inner = choice [escaped; alpha_num; any_of "()' \t\n|!;$#@%&-_/=~.,:<>[]"] in + q >> (many inner >|= str_of_l) << q (** {2 Compilation} *) @@ -644,7 +648,7 @@ module ReaderOfInput(I : INPUT) : READER with type source = I.t = struct let col t = t.rcol let create input = { - rline=0; + rline=1; rcol=0; input; buf = Bytes.make 1024 ' '; @@ -746,7 +750,7 @@ module MakeFromReader(R : READER) : S with type source = R.source = struct | EOF -> errorf r "expected any of %s, got EOF" (print_char_set set) | Yield c -> if CharSet.mem c set then c - else errorf r "expected any of %s, got %c" (print_char_set set) c + else errorf r "expected any of %s, got '%s'" (print_char_set set) (print_char c) end | C_SwitchC (map, def) -> begin match R.peek r with diff --git a/src/string/app_parse.mli b/src/string/app_parse.mli index 7d1249bb..8e8cab4c 100644 --- a/src/string/app_parse.mli +++ b/src/string/app_parse.mli @@ -36,9 +36,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. let mkatom a = Atom a;; let mklist l = List l;; + let ident_char = alpha_num <+> any_of "|!;$#@%&-_/=*.:~+[]<>'" ;; + let ident = many1 ident_char >|= str_of_l ;; + let atom = (ident <+> quoted) >|= mkatom ;; + let sexp = fix (fun sexp -> - spaces >> - ((word >|= mkatom) <+> + white >> + (atom <+> ((char '(' >> many sexp << char ')') >|= mklist) ) );; @@ -99,6 +103,9 @@ val alpha_upper : char t val alpha : char t +val symbols : char t +(** symbols, such as "!-=_"... *) + val num : char t val alpha_num : char t @@ -147,6 +154,11 @@ val filter : ('a -> bool) -> 'a t -> 'a t (** [filter f p] parses the same as [p], but fails if the returned value does not satisfy [f] *) + +(* TODO: complement operator any_but (all but \, for instance) *) +(* TODO: a "if-then-else" combinator (assuming the test has a + set of possible first chars) *) + val switch_c : ?default:'a t -> (char * 'a t) list -> 'a t (** [switch_c l] matches the next char and uses the corresponding parser. Fails if the next char is not in the list, unless default is defined.