a few fixes, including implementing quoted

This commit is contained in:
Simon Cruanes 2015-03-12 18:51:26 +01:00
parent 0dc8b90d66
commit 0e62f9a345
2 changed files with 23 additions and 7 deletions

View file

@ -42,6 +42,7 @@ let print_char = function
| '\t' -> "\\t" | '\t' -> "\\t"
| '\n' -> "\\n" | '\n' -> "\\n"
| '\r' -> "\\r" | '\r' -> "\\r"
| '"' -> "\\\""
| c -> str "%c" c | c -> str "%c" c
let print_char_set set = let print_char_set set =
@ -332,10 +333,12 @@ let alpha_lower_ = set_of_string "abcdefghijklmonpqrstuvwxyz"
let alpha_upper_ = set_of_string "ABCDEFGHIJKLMONPQRSTUVWXYZ" let alpha_upper_ = set_of_string "ABCDEFGHIJKLMONPQRSTUVWXYZ"
let num_ = set_of_string "0123456789" let num_ = set_of_string "0123456789"
let alpha_ = CharSet.union alpha_lower_ alpha_upper_ let alpha_ = CharSet.union alpha_lower_ alpha_upper_
let symbols_ = set_of_string "|!;$#@%&-_/="
let alpha_lower = any_of' alpha_lower_ let alpha_lower = any_of' alpha_lower_
let alpha_upper = any_of' alpha_upper_ let alpha_upper = any_of' alpha_upper_
let num = any_of' num_ let num = any_of' num_
let symbols = any_of' symbols_
let alpha = any_of' alpha_ let alpha = any_of' alpha_
let alpha_num = any_of' (CharSet.union num_ alpha_) let alpha_num = any_of' (CharSet.union num_ alpha_)
@ -502,13 +505,14 @@ end
include Infix include Infix
(* TODO: more efficient version, with buffer *)
let word = let word =
pure (fun c s -> str_of_l (c :: s)) <*> alpha <*> many alpha_num pure (fun c s -> str_of_l (c :: s)) <*> alpha <*> many alpha_num
(* TODO *)
let quoted = let quoted =
make (Lazy (lazy (failwith "quoted: not implemented"))) let q = char '"' in
let escaped = char '\\' >> char '"' in
let inner = choice [escaped; alpha_num; any_of "()' \t\n|!;$#@%&-_/=~.,:<>[]"] in
q >> (many inner >|= str_of_l) << q
(** {2 Compilation} *) (** {2 Compilation} *)
@ -644,7 +648,7 @@ module ReaderOfInput(I : INPUT) : READER with type source = I.t = struct
let col t = t.rcol let col t = t.rcol
let create input = { let create input = {
rline=0; rline=1;
rcol=0; rcol=0;
input; input;
buf = Bytes.make 1024 ' '; buf = Bytes.make 1024 ' ';
@ -746,7 +750,7 @@ module MakeFromReader(R : READER) : S with type source = R.source = struct
| EOF -> errorf r "expected any of %s, got EOF" (print_char_set set) | EOF -> errorf r "expected any of %s, got EOF" (print_char_set set)
| Yield c -> | Yield c ->
if CharSet.mem c set then c if CharSet.mem c set then c
else errorf r "expected any of %s, got %c" (print_char_set set) c else errorf r "expected any of %s, got '%s'" (print_char_set set) (print_char c)
end end
| C_SwitchC (map, def) -> | C_SwitchC (map, def) ->
begin match R.peek r with begin match R.peek r with

View file

@ -36,9 +36,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
let mkatom a = Atom a;; let mkatom a = Atom a;;
let mklist l = List l;; let mklist l = List l;;
let ident_char = alpha_num <+> any_of "|!;$#@%&-_/=*.:~+[]<>'" ;;
let ident = many1 ident_char >|= str_of_l ;;
let atom = (ident <+> quoted) >|= mkatom ;;
let sexp = fix (fun sexp -> let sexp = fix (fun sexp ->
spaces >> white >>
((word >|= mkatom) <+> (atom <+>
((char '(' >> many sexp << char ')') >|= mklist) ((char '(' >> many sexp << char ')') >|= mklist)
) )
);; );;
@ -99,6 +103,9 @@ val alpha_upper : char t
val alpha : char t val alpha : char t
val symbols : char t
(** symbols, such as "!-=_"... *)
val num : char t val num : char t
val alpha_num : char t val alpha_num : char t
@ -147,6 +154,11 @@ val filter : ('a -> bool) -> 'a t -> 'a t
(** [filter f p] parses the same as [p], but fails if the returned value (** [filter f p] parses the same as [p], but fails if the returned value
does not satisfy [f] *) does not satisfy [f] *)
(* TODO: complement operator any_but (all but \, for instance) *)
(* TODO: a "if-then-else" combinator (assuming the test has a
set of possible first chars) *)
val switch_c : ?default:'a t -> (char * 'a t) list -> 'a t val switch_c : ?default:'a t -> (char * 'a t) list -> 'a t
(** [switch_c l] matches the next char and uses the corresponding parser. (** [switch_c l] matches the next char and uses the corresponding parser.
Fails if the next char is not in the list, unless default is defined. Fails if the next char is not in the list, unless default is defined.