mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2026-01-28 11:54:51 -05:00
wip: app_parse, now with both definition and compiled AST, lazy
compilation, printer
This commit is contained in:
parent
5c7cb55378
commit
3e769750b6
2 changed files with 297 additions and 179 deletions
|
|
@ -38,19 +38,39 @@ let str fmt = Printf.sprintf fmt
|
||||||
module CharSet = Set.Make(Char)
|
module CharSet = Set.Make(Char)
|
||||||
module CharMap = Map.Make(Char)
|
module CharMap = Map.Make(Char)
|
||||||
|
|
||||||
|
let print_char = function
|
||||||
|
| '\t' -> "\\t"
|
||||||
|
| '\n' -> "\\n"
|
||||||
|
| '\r' -> "\\r"
|
||||||
|
| c -> str "%c" c
|
||||||
|
|
||||||
let print_char_set set =
|
let print_char_set set =
|
||||||
let l = CharSet.fold
|
let buf = Buffer.create 32 in
|
||||||
(fun c acc -> str "'%c'" c :: acc) set [] in
|
Buffer.add_char buf '"';
|
||||||
String.concat ", " l
|
CharSet.iter (fun c -> Buffer.add_string buf (print_char c)) set;
|
||||||
|
Buffer.add_char buf '"';
|
||||||
|
Buffer.contents buf
|
||||||
|
|
||||||
let domain_of_char_map m =
|
let domain_of_char_map m =
|
||||||
CharMap.fold (fun c _ set -> CharSet.add c set) m CharSet.empty
|
CharMap.fold (fun c _ set -> CharSet.add c set) m CharSet.empty
|
||||||
|
|
||||||
let print_char_map map =
|
let print_char_map map =
|
||||||
let l = CharMap.fold
|
let l = CharMap.fold
|
||||||
(fun c _ acc -> str "'%c'" c :: acc) map [] in
|
(fun c _ acc -> print_char c :: acc) map [] in
|
||||||
String.concat ", " l
|
String.concat ", " l
|
||||||
|
|
||||||
|
let ppmap ?(sep=", ") pp_k pp_v fmt m =
|
||||||
|
let first = ref true in
|
||||||
|
CharMap.iter
|
||||||
|
(fun k v ->
|
||||||
|
if !first then first := false else Format.pp_print_string fmt sep;
|
||||||
|
pp_k fmt k;
|
||||||
|
Format.pp_print_string fmt " → ";
|
||||||
|
pp_v fmt v;
|
||||||
|
Format.pp_print_cut fmt ()
|
||||||
|
) m;
|
||||||
|
()
|
||||||
|
|
||||||
let set_of_string s =
|
let set_of_string s =
|
||||||
let set = ref CharSet.empty in
|
let set = ref CharSet.empty in
|
||||||
String.iter
|
String.iter
|
||||||
|
|
@ -75,53 +95,91 @@ let str_of_l l =
|
||||||
List.iteri (fun i c -> Bytes.set b i c) l;
|
List.iteri (fun i c -> Bytes.set b i c) l;
|
||||||
Bytes.unsafe_to_string b
|
Bytes.unsafe_to_string b
|
||||||
|
|
||||||
type _ t =
|
type 'a t = {
|
||||||
| Return : 'a -> 'a t
|
parse : 'a parse;
|
||||||
| Map : ('a -> 'b) * 'a t -> 'b t
|
mutable compiled : 'a compiled;
|
||||||
| Filter: ('a -> bool) * 'a t -> 'a t
|
}
|
||||||
| App : ('a -> 'b) t * 'a t -> 'b t
|
|
||||||
| AppLeft : 'a t * 'b t -> 'a t
|
|
||||||
| AppRight : 'a t * 'b t -> 'b t
|
|
||||||
| Fail : string -> 'a t
|
|
||||||
| Int : int t
|
|
||||||
| Float : float t
|
|
||||||
| AnyOf : CharSet.t -> char t
|
|
||||||
| Many : CharSet.t * 'a t * unit t * multiplicity -> 'a list t
|
|
||||||
| Skip : CharSet.t * 'a t * multiplicity -> unit t (* same as Many, but ignores *)
|
|
||||||
| SwitchC : 'a t CharMap.t * 'a t option -> 'a t
|
|
||||||
| Lazy : 'a t Lazy.t -> 'a t
|
|
||||||
| Eof : unit t
|
|
||||||
|
|
||||||
let return x = Return x
|
(* syntactic version *)
|
||||||
let pure = return
|
and _ parse =
|
||||||
|
| Return : 'a -> 'a parse
|
||||||
|
| Map : ('a -> 'b) * 'a t -> 'b parse
|
||||||
|
| Filter: ('a -> bool) * 'a t -> 'a parse
|
||||||
|
| App : ('a -> 'b) t * 'a t -> 'b parse
|
||||||
|
| AppLeft : 'a t * 'b t -> 'a parse
|
||||||
|
| AppRight : 'a t * 'b t -> 'b parse
|
||||||
|
| Fail : string -> 'a parse
|
||||||
|
| Int : int parse
|
||||||
|
| Float : float parse
|
||||||
|
| AnyOf : CharSet.t -> char parse
|
||||||
|
| Many : 'a t * unit t * multiplicity -> 'a list parse
|
||||||
|
| Skip : 'a t * multiplicity -> unit parse (* same as Many, but ignores *)
|
||||||
|
| SwitchC : 'a t CharMap.t * 'a t option -> 'a parse
|
||||||
|
| Lazy : 'a t lazy_t -> 'a parse
|
||||||
|
| Eof : unit parse
|
||||||
|
|
||||||
let success = Return ()
|
(* compiled version *)
|
||||||
|
and 'a compiled =
|
||||||
|
| C_Return : 'a -> 'a compiled
|
||||||
|
| C_Map : ('a -> 'b) * 'a t -> 'b compiled
|
||||||
|
| C_Filter: ('a -> bool) * 'a t -> 'a compiled
|
||||||
|
| C_App : ('a -> 'b) t * 'a t -> 'b compiled
|
||||||
|
| C_AppLeft : 'a t * 'b t -> 'a compiled
|
||||||
|
| C_AppRight : 'a t * 'b t -> 'b compiled
|
||||||
|
| C_Fail : string -> 'a compiled
|
||||||
|
| C_Int : int compiled
|
||||||
|
| C_Float : float compiled
|
||||||
|
| C_AnyOf : CharSet.t -> char compiled
|
||||||
|
| C_SwitchC : 'a t CharMap.t * 'a t option -> 'a compiled
|
||||||
|
| C_Eof : unit compiled
|
||||||
|
| C_NotCompiled : 'a compiled (* to be compiled *)
|
||||||
|
|
||||||
let fail msg = Fail msg
|
(** {2 Helpers} *)
|
||||||
|
|
||||||
let failf fmt = Printf.ksprintf (fun msg -> fail msg) fmt
|
(* build a new parser *)
|
||||||
|
let make parse = { parse; compiled=C_NotCompiled; }
|
||||||
|
|
||||||
let map f x = match x with
|
let ppmult fmt = function
|
||||||
| Map (g, y) -> Map (compose f g, y)
|
| Star -> Format.pp_print_string fmt "*"
|
||||||
| Return x -> Return (f x)
|
| Plus -> Format.pp_print_string fmt "+"
|
||||||
| _ -> Map (f,x)
|
| Question -> Format.pp_print_string fmt "?"
|
||||||
|
|
||||||
let app f x = match f with
|
let print fmt p =
|
||||||
| Return f -> map f x
|
let depth = ref 0 in
|
||||||
| _ -> App (f, x)
|
(* print up to a given limit into lazy values *)
|
||||||
|
let rec print_aux
|
||||||
let filter f x = match x with
|
: type a. Format.formatter -> a t -> unit
|
||||||
| Return y -> if f y then Return y else fail "filter failed"
|
= fun fmt p ->
|
||||||
| Filter (f', y) -> Filter ((fun x -> f' x && f x), y)
|
let ppstr = Format.pp_print_string
|
||||||
| _ -> Filter (f, x)
|
and ppf fmt x = Format.fprintf fmt x in
|
||||||
|
let ppc fmt c = ppf fmt "'%s'" (print_char c) in
|
||||||
let app_left a b = AppLeft (a, b) (* return (fun x y -> x) <*> a <*> b *)
|
match p.parse with
|
||||||
|
| Return _ -> ppstr fmt "<ret>"
|
||||||
let app_right a b = AppRight (a, b) (* return (fun x y -> y) <*> a <*> b *)
|
| Map (_, x) -> ppf fmt "@[(map@ %a)@]" print_aux x
|
||||||
|
| Filter (_, x) -> ppf fmt "@[(filter@ %a)@]" print_aux x
|
||||||
let int = Int
|
| App (f, x) -> ppf fmt "@[<2>@[%a@]@ <*>@ @[%a@]@]" print_aux f print_aux x
|
||||||
|
| AppLeft (a, b) -> ppf fmt "@[%a@ <<@ %a@]" print_aux a print_aux b
|
||||||
let float = Float
|
| AppRight (a, b) -> ppf fmt "@[%a@ >>@ %a@]" print_aux a print_aux b
|
||||||
|
| Fail _ -> ppf fmt "<fail>"
|
||||||
|
| Int -> ppstr fmt "<int>"
|
||||||
|
| Float -> ppstr fmt "<float>"
|
||||||
|
| AnyOf set -> ppf fmt "@[(any@ %s)@]" (print_char_set set)
|
||||||
|
| Many (p, sep, mult) ->
|
||||||
|
ppf fmt "@[<2>(@[%a@]@ sep:@[%a@])%a@]" print_aux p print_aux sep ppmult mult
|
||||||
|
| Skip (p, mult) ->
|
||||||
|
ppf fmt "@[<2>(skip @[%a@]%a)@]" print_aux p ppmult mult
|
||||||
|
| SwitchC (map, None) ->
|
||||||
|
ppf fmt "@[<hv2>(switch@ @[%a@])@]" (ppmap ppc print_aux) map
|
||||||
|
| SwitchC (map, Some o) ->
|
||||||
|
ppf fmt "@[<hv2>(switch@ @[%a@]@ or:%a)@]" (ppmap ppc print_aux) map print_aux o
|
||||||
|
| Lazy _ when !depth > 3 -> ppf fmt "<lazy>"
|
||||||
|
| Lazy (lazy p) ->
|
||||||
|
incr depth;
|
||||||
|
print_aux fmt p;
|
||||||
|
decr depth
|
||||||
|
| Eof -> ppstr fmt "<eof>"
|
||||||
|
in
|
||||||
|
print_aux fmt p
|
||||||
|
|
||||||
let int_first_char = lazy (set_of_string "-0123456789")
|
let int_first_char = lazy (set_of_string "-0123456789")
|
||||||
let float_first_char = lazy (set_of_string ".-0123456789")
|
let float_first_char = lazy (set_of_string ".-0123456789")
|
||||||
|
|
@ -136,56 +194,77 @@ type possible_first_chars =
|
||||||
let ret_set set =
|
let ret_set set =
|
||||||
if CharSet.cardinal set = 256 then AllChars else Set set
|
if CharSet.cardinal set = 256 then AllChars else Set set
|
||||||
|
|
||||||
|
(*
|
||||||
let union_possible_first_chars a b = match a, b with
|
let union_possible_first_chars a b = match a, b with
|
||||||
| Set a, Set b -> Set (CharSet.union a b)
|
| Set a, Set b -> Set (CharSet.union a b)
|
||||||
| IsFail e, _ | _, IsFail e -> IsFail e
|
| IsFail e, _ | _, IsFail e -> IsFail e
|
||||||
| AllChars, _ | _, AllChars -> AllChars
|
| AllChars, _ | _, AllChars -> AllChars
|
||||||
| NoChar, o | o, NoChar -> o
|
| NoChar, o | o, NoChar -> o
|
||||||
|
*)
|
||||||
|
|
||||||
|
(* TODO: handle cases that can consume 0 or more chars (skip, many...) *)
|
||||||
|
|
||||||
(* set of possibilities for the first char of a parser *)
|
(* set of possibilities for the first char of a parser *)
|
||||||
let rec possible_first_chars
|
let rec possible_first_chars
|
||||||
: type a. a t -> possible_first_chars
|
: type a. a parse -> possible_first_chars
|
||||||
= function
|
= function
|
||||||
| Return _ -> NoChar
|
| Return _ -> NoChar
|
||||||
| Map (_, x) -> possible_first_chars x
|
| Map (_, x) -> possible_first_chars x.parse
|
||||||
| Filter (_, x) -> possible_first_chars x
|
| Filter (_, x) -> possible_first_chars x.parse
|
||||||
| App (f, _) -> possible_first_chars f
|
| App (f, _) -> possible_first_chars f.parse
|
||||||
| AppLeft (a, _) -> possible_first_chars a
|
| AppLeft (a, _) -> possible_first_chars a.parse (* TODO: handle NoChar *)
|
||||||
| AppRight (a, _) -> possible_first_chars a
|
| AppRight (a, _) -> possible_first_chars a.parse
|
||||||
| Fail e -> IsFail e
|
| Fail e -> IsFail e
|
||||||
| Int -> Set (Lazy.force int_first_char)
|
| Int -> Set (Lazy.force int_first_char)
|
||||||
| Float -> Set (Lazy.force float_first_char)
|
| Float -> Set (Lazy.force float_first_char)
|
||||||
| AnyOf set -> ret_set set
|
| AnyOf set -> ret_set set
|
||||||
| Many(set, p, _, (Question | Star)) ->
|
| Many (p, _, _) -> possible_first_chars p.parse
|
||||||
union_possible_first_chars (ret_set set) (possible_first_chars p)
|
| Skip (p, _) -> possible_first_chars p.parse
|
||||||
| Skip (set, p, (Question | Star)) ->
|
|
||||||
union_possible_first_chars (ret_set set) (possible_first_chars p)
|
|
||||||
| Many (set, _, _, Plus) -> Set set
|
|
||||||
| Skip (set, _, Plus) -> ret_set set
|
|
||||||
| SwitchC (map, None) -> ret_set (domain_of_char_map map)
|
| SwitchC (map, None) -> ret_set (domain_of_char_map map)
|
||||||
| SwitchC (_, Some _) -> AllChars
|
| SwitchC (_, Some _) -> AllChars
|
||||||
| Lazy (lazy p) -> possible_first_chars p
|
| Lazy (lazy p) -> possible_first_chars p.parse
|
||||||
| Eof -> NoChar
|
| Eof -> NoChar
|
||||||
|
|
||||||
let many_ ~sep ~mult ~p = match possible_first_chars p with
|
(** {2 Combinators} *)
|
||||||
| Set set -> Many (set, p, sep, mult)
|
|
||||||
| IsFail msg -> Fail msg
|
|
||||||
| AllChars -> invalid_arg (str "many: invalid parser (always succeeds)")
|
|
||||||
| NoChar -> invalid_arg (str "many: invalid parser (does not consume input)")
|
|
||||||
|
|
||||||
let many ?(sep=success) p = many_ ~sep ~mult:Star ~p
|
let return x = {parse=Return x; compiled=C_Return x}
|
||||||
|
let pure = return
|
||||||
|
|
||||||
let many1 ?(sep=success) p = many_ ~sep ~mult:Plus ~p
|
let success = pure ()
|
||||||
|
|
||||||
let skip_ ~mult ~p = match possible_first_chars p with
|
let fail msg = make (Fail msg)
|
||||||
| Set set -> Skip (set, p, mult)
|
|
||||||
| IsFail msg -> Fail msg
|
|
||||||
| AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)")
|
|
||||||
| NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)")
|
|
||||||
|
|
||||||
let skip p = skip_ ~mult:Star ~p
|
let failf fmt = Printf.ksprintf (fun msg -> fail msg) fmt
|
||||||
|
|
||||||
let skip1 p = skip_ ~mult:Plus ~p
|
let map f x = match x.parse with
|
||||||
|
| Map (g, y) -> make (Map (compose f g, y))
|
||||||
|
| Return x -> pure (f x)
|
||||||
|
| _ -> make (Map (f, x))
|
||||||
|
|
||||||
|
let app f x = match f.parse with
|
||||||
|
| Return f -> map f x
|
||||||
|
| _ -> make (App (f, x))
|
||||||
|
|
||||||
|
let filter f x = match x.parse with
|
||||||
|
| Return y -> if f y then return y else fail "filter failed"
|
||||||
|
| Filter (f', y) -> make (Filter ((fun x -> f' x && f x), y))
|
||||||
|
| _ -> make (Filter (f, x))
|
||||||
|
|
||||||
|
let app_left a b = make (AppLeft (a, b)) (* return (fun x y -> x) <*> a <*> b *)
|
||||||
|
|
||||||
|
let app_right a b = make (AppRight (a, b)) (* return (fun x y -> y) <*> a <*> b *)
|
||||||
|
|
||||||
|
let int = make Int
|
||||||
|
|
||||||
|
let float = make Float
|
||||||
|
|
||||||
|
let many ?(sep=success) p = make (Many (p, sep, Star))
|
||||||
|
|
||||||
|
let many1 ?(sep=success) p = make (Many (p, sep, Plus))
|
||||||
|
|
||||||
|
let skip p = make (Skip (p, Star))
|
||||||
|
|
||||||
|
let skip1 p = make (Skip (p, Plus))
|
||||||
|
|
||||||
let opt p =
|
let opt p =
|
||||||
map
|
map
|
||||||
|
|
@ -193,11 +272,12 @@ let opt p =
|
||||||
| [x] -> Some x
|
| [x] -> Some x
|
||||||
| [] -> None
|
| [] -> None
|
||||||
| _ -> assert false
|
| _ -> assert false
|
||||||
) (many_ ~sep:success ~mult:Question ~p)
|
) (make (Many (p, success, Question)))
|
||||||
|
|
||||||
let any_of s = AnyOf (set_of_string s)
|
let any_of' s = make (AnyOf s)
|
||||||
|
let any_of s = any_of' (set_of_string s)
|
||||||
|
|
||||||
let char c = AnyOf (CharSet.singleton c)
|
let char c = make (AnyOf (CharSet.singleton c))
|
||||||
|
|
||||||
let spaces = skip (any_of " \t")
|
let spaces = skip (any_of " \t")
|
||||||
let spaces1 = skip1 (any_of " \t")
|
let spaces1 = skip1 (any_of " \t")
|
||||||
|
|
@ -210,13 +290,13 @@ let alpha_upper_ = set_of_string "ABCDEFGHIJKLMONPQRSTUVWXYZ"
|
||||||
let num_ = set_of_string "0123456789"
|
let num_ = set_of_string "0123456789"
|
||||||
let alpha_ = CharSet.union alpha_lower_ alpha_upper_
|
let alpha_ = CharSet.union alpha_lower_ alpha_upper_
|
||||||
|
|
||||||
let alpha_lower = AnyOf alpha_lower_
|
let alpha_lower = any_of' alpha_lower_
|
||||||
let alpha_upper = AnyOf alpha_upper_
|
let alpha_upper = any_of' alpha_upper_
|
||||||
let num = AnyOf num_
|
let num = any_of' num_
|
||||||
let alpha = AnyOf alpha_
|
let alpha = any_of' alpha_
|
||||||
let alpha_num = AnyOf (CharSet.union num_ alpha_)
|
let alpha_num = any_of' (CharSet.union num_ alpha_)
|
||||||
|
|
||||||
let eof = Eof
|
let eof = make Eof
|
||||||
|
|
||||||
let switch_c ?default l =
|
let switch_c ?default l =
|
||||||
if l = [] then match default with
|
if l = [] then match default with
|
||||||
|
|
@ -230,15 +310,15 @@ let switch_c ?default l =
|
||||||
CharMap.add c t map
|
CharMap.add c t map
|
||||||
) CharMap.empty l
|
) CharMap.empty l
|
||||||
in
|
in
|
||||||
SwitchC (map, default)
|
make (SwitchC (map, default))
|
||||||
|
|
||||||
exception ExnIsFail of string
|
exception ExnIsFail of string
|
||||||
|
|
||||||
(* binary choice *)
|
(* binary choice: compiled into decision tree *)
|
||||||
let rec merge a b =
|
let rec merge a b =
|
||||||
(* build a switch by first char *)
|
(* build a switch by first char *)
|
||||||
try
|
try
|
||||||
begin match a, b with
|
begin match a.parse, b.parse with
|
||||||
| SwitchC (map_a, def_a), SwitchC (map_b, def_b) ->
|
| SwitchC (map_a, def_a), SwitchC (map_b, def_b) ->
|
||||||
(* merge jump tables *)
|
(* merge jump tables *)
|
||||||
let def = match def_a, def_b with
|
let def = match def_a, def_b with
|
||||||
|
|
@ -256,13 +336,13 @@ let rec merge a b =
|
||||||
| None, None -> assert false
|
| None, None -> assert false
|
||||||
) map_a map_b
|
) map_a map_b
|
||||||
in
|
in
|
||||||
SwitchC (map, def)
|
make (SwitchC (map, def))
|
||||||
| SwitchC (map, def), other
|
| SwitchC (map, def), other
|
||||||
| other, SwitchC (map, def) ->
|
| other, SwitchC (map, def) ->
|
||||||
let map', def' = match possible_first_chars other, def with
|
let map', def' = match possible_first_chars other, def with
|
||||||
| AllChars, _ ->
|
| AllChars, _ ->
|
||||||
invalid_arg "choice: ambiguous, several parsers accept any input"
|
invalid_arg "choice: ambiguous, several parsers accept any input"
|
||||||
| NoChar, None -> map, Some other
|
| NoChar, None -> map, Some (make other)
|
||||||
| NoChar, Some _ ->
|
| NoChar, Some _ ->
|
||||||
invalid_arg "choice: ambiguous"
|
invalid_arg "choice: ambiguous"
|
||||||
| IsFail msg, _ -> raise (ExnIsFail msg)
|
| IsFail msg, _ -> raise (ExnIsFail msg)
|
||||||
|
|
@ -272,12 +352,12 @@ let rec merge a b =
|
||||||
(str "choice: ambiguous parsers (overlap on {%s})"
|
(str "choice: ambiguous parsers (overlap on {%s})"
|
||||||
(print_char_set (CharSet.inter set (domain_of_char_map map))));
|
(print_char_set (CharSet.inter set (domain_of_char_map map))));
|
||||||
(* else: merge jump tables *)
|
(* else: merge jump tables *)
|
||||||
let map = map_add_set map set other in
|
let map = map_add_set map set (make other) in
|
||||||
map, def
|
map, def
|
||||||
in
|
in
|
||||||
SwitchC (map', def')
|
make (SwitchC (map', def'))
|
||||||
| _ ->
|
| _ ->
|
||||||
begin match possible_first_chars a, possible_first_chars b with
|
begin match possible_first_chars a.parse, possible_first_chars b.parse with
|
||||||
| Set set1, Set set2 ->
|
| Set set1, Set set2 ->
|
||||||
if CharSet.exists (fun c -> CharSet.mem c set2) set1
|
if CharSet.exists (fun c -> CharSet.mem c set2) set1
|
||||||
then invalid_arg
|
then invalid_arg
|
||||||
|
|
@ -285,23 +365,22 @@ let rec merge a b =
|
||||||
(print_char_set (CharSet.inter set1 set2)));
|
(print_char_set (CharSet.inter set1 set2)));
|
||||||
let map = map_add_set CharMap.empty set1 a in
|
let map = map_add_set CharMap.empty set1 a in
|
||||||
let map = map_add_set map set2 b in
|
let map = map_add_set map set2 b in
|
||||||
SwitchC (map, None)
|
make (SwitchC (map, None))
|
||||||
| IsFail e, _ | _, IsFail e -> raise (ExnIsFail e)
|
| IsFail e, _ | _, IsFail e -> raise (ExnIsFail e)
|
||||||
| Set s, NoChar -> SwitchC (map_add_set CharMap.empty s a, Some b)
|
| Set s, NoChar -> make (SwitchC (map_add_set CharMap.empty s a, Some b))
|
||||||
| NoChar, Set s -> SwitchC (map_add_set CharMap.empty s b, Some a)
|
| NoChar, Set s -> make (SwitchC (map_add_set CharMap.empty s b, Some a))
|
||||||
| AllChars, _ | _, AllChars ->
|
| AllChars, _ | _, AllChars ->
|
||||||
invalid_arg "choice: ambiguous parsers (one accepts everything)"
|
invalid_arg "choice: ambiguous parsers (one accepts everything)"
|
||||||
| NoChar, NoChar ->
|
| NoChar, NoChar ->
|
||||||
invalid_arg "choice: ambiguous parsers (both accept nothing)"
|
invalid_arg "choice: ambiguous parsers (both accept nothing)"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
with ExnIsFail msg ->
|
with ExnIsFail msg -> make (Fail msg)
|
||||||
fail msg
|
|
||||||
|
|
||||||
let rec choice l = match l with
|
let rec choice = function
|
||||||
| [] -> invalid_arg "choice: empty list";
|
| [] -> invalid_arg "choice: empty list";
|
||||||
| [x] -> x
|
| [x] -> x
|
||||||
| a :: b :: tail -> choice (merge a b :: tail)
|
| a :: tl -> merge a (choice tl)
|
||||||
|
|
||||||
(* temporary structure for buildings switches *)
|
(* temporary structure for buildings switches *)
|
||||||
type 'a trie =
|
type 'a trie =
|
||||||
|
|
@ -312,7 +391,7 @@ let trie_empty = TrieNode CharMap.empty
|
||||||
|
|
||||||
let rec parser_of_trie : type a. a trie -> a t = function
|
let rec parser_of_trie : type a. a trie -> a t = function
|
||||||
| TrieLeaf p -> p
|
| TrieLeaf p -> p
|
||||||
| TrieNode m -> SwitchC (CharMap.map parser_of_trie m, None)
|
| TrieNode m -> make (SwitchC (CharMap.map parser_of_trie m, None))
|
||||||
|
|
||||||
(* build prefix trie *)
|
(* build prefix trie *)
|
||||||
let switch_s l =
|
let switch_s l =
|
||||||
|
|
@ -353,20 +432,20 @@ let bool =
|
||||||
; "false", return false
|
; "false", return false
|
||||||
]
|
]
|
||||||
|
|
||||||
let delay p = Lazy p
|
|
||||||
|
|
||||||
(* FIXME: does not work in practice. Must separate definition of combinators
|
|
||||||
from compilation to decision tree *)
|
|
||||||
let fix f =
|
let fix f =
|
||||||
let rec r = lazy (f r) in
|
(* outermost lazy needed for the recursive definition *)
|
||||||
Lazy.force r
|
let rec r = {
|
||||||
|
parse=Lazy (lazy (f r));
|
||||||
|
compiled=C_NotCompiled;
|
||||||
|
} in
|
||||||
|
r
|
||||||
|
|
||||||
module Infix = struct
|
module Infix = struct
|
||||||
let (>|=) x f = map f x
|
let (>|=) x f = map f x
|
||||||
let (<*>) = app
|
let (<*>) = app
|
||||||
let (<<) = app_left
|
let (<<) = app_left
|
||||||
let (>>) = app_right
|
let (>>) = app_right
|
||||||
let (<+>) = merge
|
let (<+>) a b = choice [a; b]
|
||||||
let (<::>) a b = pure (fun x l -> x::l) <*> a <*> b
|
let (<::>) a b = pure (fun x l -> x::l) <*> a <*> b
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -378,7 +457,82 @@ let word =
|
||||||
|
|
||||||
(* TODO *)
|
(* TODO *)
|
||||||
let quoted =
|
let quoted =
|
||||||
Lazy (lazy (failwith "quoted: not implemented"))
|
make (Lazy (lazy (failwith "quoted: not implemented")))
|
||||||
|
|
||||||
|
(** {2 Compilation} *)
|
||||||
|
|
||||||
|
let encode_cons x sep tl = pure (fun x _sep tl -> x :: tl) <*> x <*> sep <*> tl
|
||||||
|
|
||||||
|
let encode_many
|
||||||
|
: type a. set:CharSet.t -> p:a t -> self:a list t -> sep:unit t -> a list t
|
||||||
|
= fun ~set ~p ~self ~sep ->
|
||||||
|
let on_success = encode_cons p sep self
|
||||||
|
and on_fail = pure [] in
|
||||||
|
make (SwitchC (map_add_set CharMap.empty set on_success, Some on_fail))
|
||||||
|
|
||||||
|
let encode_opt ~set x =
|
||||||
|
let mk_one x = [x] in
|
||||||
|
let on_success = make (Map (mk_one, x))
|
||||||
|
and on_fail = pure [] in
|
||||||
|
make (SwitchC (map_add_set CharMap.empty set on_success, Some on_fail))
|
||||||
|
|
||||||
|
let encode_skip
|
||||||
|
: type a. set:CharSet.t -> p:a t -> self:unit t -> unit t
|
||||||
|
= fun ~set ~p ~self ->
|
||||||
|
let on_success = p >> self
|
||||||
|
and on_fail = pure () in
|
||||||
|
make (SwitchC (map_add_set CharMap.empty set on_success, Some on_fail))
|
||||||
|
|
||||||
|
let many_
|
||||||
|
: type a. sep:unit t -> mult:multiplicity -> p:a t -> a list t
|
||||||
|
= fun ~sep ~mult ~p -> match possible_first_chars p.parse with
|
||||||
|
| Set set ->
|
||||||
|
begin match mult with
|
||||||
|
| Star -> fix (fun self -> encode_many ~set ~sep ~p ~self)
|
||||||
|
| Plus -> encode_cons p sep (fix (fun self -> encode_many ~set ~sep ~p ~self))
|
||||||
|
| Question -> encode_opt ~set p
|
||||||
|
end
|
||||||
|
| IsFail msg -> make (Fail msg)
|
||||||
|
| AllChars -> invalid_arg (str "many: invalid parser (always succeeds)")
|
||||||
|
| NoChar -> invalid_arg (str "many: invalid parser (does not consume input)")
|
||||||
|
|
||||||
|
let skip_ : type a. mult:multiplicity -> p:a t -> unit t
|
||||||
|
= fun ~mult ~p -> match possible_first_chars p.parse with
|
||||||
|
| Set set ->
|
||||||
|
begin match mult with
|
||||||
|
| Star -> fix (fun self -> encode_skip ~set ~p ~self)
|
||||||
|
| Plus -> p >> fix (fun self -> encode_skip ~set ~p ~self)
|
||||||
|
| Question -> encode_opt ~set p >> pure ()
|
||||||
|
end
|
||||||
|
| IsFail msg -> make (Fail msg)
|
||||||
|
| AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)")
|
||||||
|
| NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)")
|
||||||
|
|
||||||
|
let rec compile
|
||||||
|
: type a. a t -> a compiled
|
||||||
|
= fun t -> match t.compiled with
|
||||||
|
| C_NotCompiled ->
|
||||||
|
let c = match t.parse with
|
||||||
|
| Return x -> C_Return x
|
||||||
|
| Map (f, x) -> C_Map (f, x)
|
||||||
|
| Filter (f, x) -> C_Filter (f, x)
|
||||||
|
| App (f, x) -> C_App (f, x)
|
||||||
|
| AppLeft (a, b) -> C_AppLeft (a, b)
|
||||||
|
| AppRight (a, b) -> C_AppRight (a, b)
|
||||||
|
| Fail msg -> C_Fail msg
|
||||||
|
| Int -> C_Int
|
||||||
|
| Float -> C_Float
|
||||||
|
| AnyOf set -> C_AnyOf set
|
||||||
|
| Many (p, sep, mult) -> compile (many_ ~sep ~mult ~p)
|
||||||
|
| Skip (p, mult) -> compile (skip_ ~mult ~p)
|
||||||
|
| SwitchC (map, None) -> C_SwitchC (map, None)
|
||||||
|
| SwitchC (map, Some o) -> C_SwitchC (map, Some o)
|
||||||
|
| Eof -> C_Eof
|
||||||
|
| Lazy (lazy p) -> compile p
|
||||||
|
in
|
||||||
|
t.compiled <- c;
|
||||||
|
c
|
||||||
|
| c -> c (* already compiled *)
|
||||||
|
|
||||||
(** {2 Signatures} *)
|
(** {2 Signatures} *)
|
||||||
|
|
||||||
|
|
@ -505,108 +659,70 @@ module MakeFromReader(R : READER) : S with type source = R.source = struct
|
||||||
let is_int c = Char.code c >= Char.code '0' && Char.code c <= Char.code '9'
|
let is_int c = Char.code c >= Char.code '0' && Char.code c <= Char.code '9'
|
||||||
let to_int c = Char.code c - Char.code '0'
|
let to_int c = Char.code c - Char.code '0'
|
||||||
|
|
||||||
let rec parse_int r sign i = match R.peek r with
|
let rec parse_int r ~sign i = match R.peek r with
|
||||||
| EOF -> i
|
| EOF -> i
|
||||||
| Yield c when is_int c ->
|
| Yield c when is_int c ->
|
||||||
R.junk r;
|
R.junk r;
|
||||||
parse_int r sign (10 * i + to_int c)
|
parse_int r ~sign (10 * i + to_int c)
|
||||||
| Yield '-' when i = 0 && sign ->
|
| Yield '-' when i = 0 && sign ->
|
||||||
(* switch sign: only on first char *)
|
(* switch sign: only on first char *)
|
||||||
R.junk r;
|
R.junk r;
|
||||||
parse_int r false 0
|
parse_int r ~sign:false 0
|
||||||
| _ -> if sign then i else -i
|
| _ -> if sign then i else -i
|
||||||
|
|
||||||
let parse_float _r _buf = assert false
|
let parse_float _r _buf = assert false
|
||||||
|
|
||||||
let rec parse_rec : type a. R.t -> a t -> a =
|
let rec parse_rec : type a. R.t -> a t -> a =
|
||||||
fun r p -> match p with
|
fun r p -> match compile p with
|
||||||
| Return x -> x
|
| C_Return x -> x
|
||||||
| Map (f, x) ->
|
| C_Map (f, x) ->
|
||||||
let y = parse_rec r x in
|
let y = parse_rec r x in
|
||||||
f y
|
f y
|
||||||
| Filter (f, x) ->
|
| C_Filter (f, x) ->
|
||||||
let y = parse_rec r x in
|
let y = parse_rec r x in
|
||||||
if f y then y else errorf r "filter failed"
|
if f y then y else errorf r "filter failed"
|
||||||
| App (f, x) ->
|
| C_App (f, x) ->
|
||||||
let f' = parse_rec r f in
|
let f' = parse_rec r f in
|
||||||
let x' = parse_rec r x in
|
let x' = parse_rec r x in
|
||||||
f' x'
|
f' x'
|
||||||
| AppLeft (a, b) ->
|
| C_AppLeft (a, b) ->
|
||||||
let a' = parse_rec r a in
|
let a' = parse_rec r a in
|
||||||
let _ = parse_rec r b in
|
let _ = parse_rec r b in
|
||||||
a'
|
a'
|
||||||
| AppRight (a, b) ->
|
| C_AppRight (a, b) ->
|
||||||
let _ = parse_rec r a in
|
let _ = parse_rec r a in
|
||||||
let b' = parse_rec r b in
|
let b' = parse_rec r b in
|
||||||
b'
|
b'
|
||||||
| Fail msg -> error r msg
|
| C_Fail msg -> error r msg
|
||||||
| Int -> parse_int r true 0
|
| C_Int -> parse_int r ~sign:true 0
|
||||||
| Float -> parse_float r (Buffer.create 8)
|
| C_Float -> parse_float r (Buffer.create 8)
|
||||||
| AnyOf set ->
|
| C_AnyOf set ->
|
||||||
begin match R.next r with
|
begin match R.next r with
|
||||||
| EOF -> errorf r "expected any of {%s}, got EOF" (print_char_set set)
|
| EOF -> errorf r "expected any of %s, got EOF" (print_char_set set)
|
||||||
| Yield c ->
|
| Yield c ->
|
||||||
if CharSet.mem c set then c
|
if CharSet.mem c set then c
|
||||||
else errorf r "expected any of {%s}, got %c" (print_char_set set) c
|
else errorf r "expected any of %s, got %c" (print_char_set set) c
|
||||||
end
|
end
|
||||||
| Many (set, p, sep, mult) -> parse_many r ~set ~sep ~p ~mult []
|
| C_SwitchC (map, def) ->
|
||||||
| Skip (set, p, mult) -> parse_skip r ~set ~p ~mult
|
|
||||||
| SwitchC (map, def) ->
|
|
||||||
begin match R.peek r with
|
begin match R.peek r with
|
||||||
| EOF -> errorf r "expected any of {%s}, got EOF" (print_char_map map)
|
| EOF -> errorf r "expected any of %s, got EOF" (print_char_map map)
|
||||||
| Yield c ->
|
| Yield c ->
|
||||||
begin try
|
begin try
|
||||||
let p' = CharMap.find c map in
|
let p' = CharMap.find c map in
|
||||||
parse_rec r p'
|
parse_rec r p'
|
||||||
with Not_found ->
|
with Not_found -> match def with
|
||||||
match def with
|
| None ->
|
||||||
| None ->
|
errorf r "expected any of %s, got %c" (print_char_map map) c
|
||||||
errorf r "expected any of {%s}, got %c" (print_char_map map) c
|
| Some d -> parse_rec r d
|
||||||
| Some d -> parse_rec r d
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
| Lazy (lazy p) -> parse_rec r p
|
| C_NotCompiled -> assert false
|
||||||
| Eof ->
|
| C_Eof ->
|
||||||
begin match R.next r with
|
begin match R.next r with
|
||||||
| EOF -> ()
|
| EOF -> ()
|
||||||
| Yield c -> errorf r "expected EOF, got %c" c
|
| Yield c -> errorf r "expected EOF, got %c" c
|
||||||
end
|
end
|
||||||
|
|
||||||
and parse_many
|
|
||||||
: type a. R.t -> set:CharSet.t -> p:a t -> sep:unit t ->
|
|
||||||
mult:multiplicity -> a list -> a list
|
|
||||||
= fun r ~set ~p ~sep ~mult acc ->
|
|
||||||
match R.peek r with
|
|
||||||
| EOF -> List.rev acc
|
|
||||||
| Yield c ->
|
|
||||||
if CharSet.mem c set
|
|
||||||
then
|
|
||||||
let x = parse_rec r p in
|
|
||||||
match mult with
|
|
||||||
| Question -> assert (acc = []); [x]
|
|
||||||
| Plus | Star ->
|
|
||||||
let _ = parse_rec r sep in (* separator *)
|
|
||||||
parse_many r ~set ~p ~sep ~mult:Star (x::acc)
|
|
||||||
else if mult = Plus
|
|
||||||
then errorf r "expected {%s}, got %c" (print_char_set set) c
|
|
||||||
else List.rev acc
|
|
||||||
|
|
||||||
and parse_skip
|
|
||||||
: type a. R.t -> set:CharSet.t -> p:a t -> mult:multiplicity -> unit
|
|
||||||
= fun r ~set ~p ~mult ->
|
|
||||||
match R.peek r with
|
|
||||||
| EOF -> ()
|
|
||||||
| Yield c ->
|
|
||||||
if CharSet.mem c set
|
|
||||||
then
|
|
||||||
let _ = parse_rec r p in
|
|
||||||
match mult with
|
|
||||||
| Question -> ()
|
|
||||||
| Plus | Star -> parse_skip r ~set ~p ~mult:Star
|
|
||||||
else if mult = Plus
|
|
||||||
then errorf r "expected {%s}, got %c" (print_char_set set) c
|
|
||||||
else ()
|
|
||||||
|
|
||||||
(* public functions *)
|
(* public functions *)
|
||||||
let parse_exn src p =
|
let parse_exn src p =
|
||||||
let r = R.create src in
|
let r = R.create src in
|
||||||
|
|
|
||||||
|
|
@ -37,11 +37,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
let mklist l = List l;;
|
let mklist l = List l;;
|
||||||
|
|
||||||
let sexp = fix (fun sexp ->
|
let sexp = fix (fun sexp ->
|
||||||
spaces >>
|
spaces >>
|
||||||
((word >|= mkatom) <+>
|
((word >|= mkatom) <+>
|
||||||
((char '(' >> many (delay sexp) << char ')') >|= mklist)
|
((char '(' >> many sexp << char ')') >|= mklist)
|
||||||
)
|
)
|
||||||
);;
|
);;
|
||||||
|
|
||||||
|
Str.parse_exn "(a (b c d) e)" sexp;;
|
||||||
|
|
||||||
]}
|
]}
|
||||||
|
|
||||||
|
|
@ -160,10 +162,7 @@ val choice : 'a t list -> 'a t
|
||||||
@raise Invalid_argument if the list is empty, or if some parsers
|
@raise Invalid_argument if the list is empty, or if some parsers
|
||||||
overlap, making the choice ambiguous *)
|
overlap, making the choice ambiguous *)
|
||||||
|
|
||||||
val delay : 'a t Lazy.t -> 'a t
|
val fix : ('a t -> 'a t) -> 'a t
|
||||||
(** delay evaluation. Useful in combination with {!fix} *)
|
|
||||||
|
|
||||||
val fix : ('a t Lazy.t -> 'a t) -> 'a t
|
|
||||||
(** [fix f] makes a fixpoint *)
|
(** [fix f] makes a fixpoint *)
|
||||||
|
|
||||||
module Infix : sig
|
module Infix : sig
|
||||||
|
|
@ -229,6 +228,9 @@ module Make(I : INPUT) : S with type source = I.t
|
||||||
|
|
||||||
(** {2 Low-level interface} *)
|
(** {2 Low-level interface} *)
|
||||||
|
|
||||||
|
val print : Format.formatter -> _ t -> unit
|
||||||
|
(** Print a parser structure, for debug purpose *)
|
||||||
|
|
||||||
type token =
|
type token =
|
||||||
| Yield of char
|
| Yield of char
|
||||||
| EOF
|
| EOF
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue