mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-08 04:05:30 -05:00
wip: rework CCParse
This commit is contained in:
parent
40a6c17548
commit
7318162c55
2 changed files with 73 additions and 14 deletions
|
|
@ -30,9 +30,16 @@ open CCShims_
|
||||||
| N (a,b) -> Printf.sprintf "N (%s, %s)" (pptree a) (pptree b)
|
| N (a,b) -> Printf.sprintf "N (%s, %s)" (pptree a) (pptree b)
|
||||||
| L x -> Printf.sprintf "L %d" x
|
| L x -> Printf.sprintf "L %d" x
|
||||||
|
|
||||||
let errpptree = function
|
let errpp pp = function
|
||||||
| Ok x -> "Ok " ^ pptree x
|
| Ok x -> "Ok " ^ pp x
|
||||||
| Error s -> "Error " ^ s
|
| Error s -> "Error " ^ s
|
||||||
|
|
||||||
|
let errpptree = errpp pptree
|
||||||
|
|
||||||
|
let erreq eq x y = match x, y with
|
||||||
|
| Ok x, Ok y -> eq x y
|
||||||
|
| Error _ , Error _ -> true
|
||||||
|
| _ -> false
|
||||||
*)
|
*)
|
||||||
|
|
||||||
(*$= & ~printer:errpptree
|
(*$= & ~printer:errpptree
|
||||||
|
|
@ -125,6 +132,8 @@ module MemoTbl = struct
|
||||||
| Ok of 'a
|
| Ok of 'a
|
||||||
end
|
end
|
||||||
|
|
||||||
|
(* TODO: [type position = {state: state; i: int}] and recompute line, col
|
||||||
|
on demand *)
|
||||||
type position = int * int * int (* pos, line, column *)
|
type position = int * int * int (* pos, line, column *)
|
||||||
|
|
||||||
type parse_branch = (line_num * col_num * string option) list
|
type parse_branch = (line_num * col_num * string option) list
|
||||||
|
|
@ -137,6 +146,7 @@ type state = {
|
||||||
mutable branch: parse_branch;
|
mutable branch: parse_branch;
|
||||||
memo : MemoTbl.t; (* Memoization table, if any *)
|
memo : MemoTbl.t; (* Memoization table, if any *)
|
||||||
}
|
}
|
||||||
|
(* TODO: remove lnum/cnum, recompute them lazily in errors *)
|
||||||
|
|
||||||
exception ParseError of parse_branch * (unit -> string)
|
exception ParseError of parse_branch * (unit -> string)
|
||||||
|
|
||||||
|
|
@ -202,7 +212,17 @@ let backtrack st (i',l',c') =
|
||||||
st.cnum <- c';
|
st.cnum <- c';
|
||||||
()
|
()
|
||||||
|
|
||||||
|
(* FIXME:
|
||||||
|
remove all backtracking stuff and instead, pass the state as parameter
|
||||||
|
to [ok] and [err], with an explicit offset that changes. *)
|
||||||
|
|
||||||
type 'a t = state -> ok:('a -> unit) -> err:(exn -> unit) -> unit
|
type 'a t = state -> ok:('a -> unit) -> err:(exn -> unit) -> unit
|
||||||
|
(** Takes the input and two continuations:
|
||||||
|
{ul
|
||||||
|
{- [ok] to call with the result when it's done}
|
||||||
|
{- [err] to call when the parser met an error}
|
||||||
|
}
|
||||||
|
*)
|
||||||
|
|
||||||
let return : 'a -> 'a t = fun x _st ~ok ~err:_ -> ok x
|
let return : 'a -> 'a t = fun x _st ~ok ~err:_ -> ok x
|
||||||
let pure = return
|
let pure = return
|
||||||
|
|
@ -500,23 +520,55 @@ module U = struct
|
||||||
sep_ ~by:(skip_white *> string sep *> skip_white) p <*
|
sep_ ~by:(skip_white *> string sep *> skip_white) p <*
|
||||||
skip_white <* string stop
|
skip_white <* string stop
|
||||||
|
|
||||||
|
|
||||||
let int =
|
let int =
|
||||||
|
skip_white *>
|
||||||
chars1_if (fun c -> is_num c || char_equal c '-')
|
chars1_if (fun c -> is_num c || char_equal c '-')
|
||||||
>>= fun s ->
|
>>= fun s ->
|
||||||
try return (int_of_string s)
|
try return (int_of_string s)
|
||||||
with Failure _ -> fail "expected an int"
|
with Failure _ -> fail "expected an int"
|
||||||
|
|
||||||
|
let hexa_int =
|
||||||
|
(try_ (string "0x") <|> return "") *>
|
||||||
|
begin
|
||||||
|
chars1_if (function '0' .. '9' | 'a'..'f' | 'A'..'F' -> true | _ -> false)
|
||||||
|
>|= fun s ->
|
||||||
|
let i = ref 0 in
|
||||||
|
String.iter
|
||||||
|
(fun c ->
|
||||||
|
let n = match c with
|
||||||
|
| '0' .. '9' -> Char.code c - Char.code '0'
|
||||||
|
| 'a' .. 'f' -> Char.code c - Char.code 'a' + 10
|
||||||
|
| 'A' .. 'F' -> Char.code c - Char.code 'A' + 10
|
||||||
|
| _ -> assert false
|
||||||
|
in
|
||||||
|
i := !i * 16 + n)
|
||||||
|
s;
|
||||||
|
!i
|
||||||
|
end
|
||||||
|
|
||||||
|
(*$= & ~printer:(errpp Q.Print.int) ~cmp:(erreq (=))
|
||||||
|
(Ok 16) (parse_string U.hexa_int "0x10")
|
||||||
|
(Ok 16) (parse_string U.hexa_int "10")
|
||||||
|
(Error "") (parse_string U.hexa_int "x10")
|
||||||
|
(Error "") (parse_string U.hexa_int "0xz")
|
||||||
|
*)
|
||||||
|
|
||||||
let prepend_str c s = String.make 1 c ^ s
|
let prepend_str c s = String.make 1 c ^ s
|
||||||
|
|
||||||
let word =
|
let word =
|
||||||
map2 prepend_str (char_if is_alpha) (chars_if is_alpha_num)
|
map2 prepend_str (char_if is_alpha) (chars_if is_alpha_num)
|
||||||
|
|
||||||
let pair ?(start="(") ?(stop=")") ?(sep=",") p1 p2 =
|
let pair ?(start="(") ?(stop=")") ?(sep=",") p1 p2 =
|
||||||
string start *> skip_white *>
|
skip_white *> string start *> skip_white *>
|
||||||
p1 >>= fun x1 ->
|
p1 >>= fun x1 ->
|
||||||
skip_white *> string sep *> skip_white *>
|
skip_white *> string sep *> skip_white *>
|
||||||
p2 >>= fun x2 ->
|
p2 >>= fun x2 ->
|
||||||
string stop *> return (x1,x2)
|
skip_white *> string stop *> return (x1,x2)
|
||||||
|
|
||||||
|
(*$= & ~printer:Q.Print.(errpp (pair int int))
|
||||||
|
(Ok(1,2)) U.(parse_string (pair int int) "(1 , 2 )")
|
||||||
|
*)
|
||||||
|
|
||||||
let triple ?(start="(") ?(stop=")") ?(sep=",") p1 p2 p3 =
|
let triple ?(start="(") ?(stop=")") ?(sep=",") p1 p2 p3 =
|
||||||
string start *> skip_white *>
|
string start *> skip_white *>
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,9 @@
|
||||||
|
|
||||||
(** {1 Very Simple Parser Combinators}
|
(** {1 Very Simple Parser Combinators}
|
||||||
|
|
||||||
|
These combinators can be used to write very simple parsers, for example
|
||||||
|
to extract data from a line-oriented file.
|
||||||
|
|
||||||
{[
|
{[
|
||||||
open CCParse;;
|
open CCParse;;
|
||||||
|
|
||||||
|
|
@ -70,13 +73,12 @@ val state_of_string : string -> state
|
||||||
|
|
||||||
(** {2 Combinators} *)
|
(** {2 Combinators} *)
|
||||||
|
|
||||||
type 'a t = state -> ok:('a -> unit) -> err:(exn -> unit) -> unit
|
type 'a t
|
||||||
(** Takes the input and two continuations:
|
(** The abstract type of parsers that return a value of type ['a] (or fail).
|
||||||
{ul
|
|
||||||
{- [ok] to call with the result when it's done}
|
@raise ParseError in case of failure.
|
||||||
{- [err] to call when the parser met an error}
|
@since NEXT_RELEASE the type is private.
|
||||||
}
|
*)
|
||||||
@raise ParseError in case of failure. *)
|
|
||||||
|
|
||||||
val return : 'a -> 'a t
|
val return : 'a -> 'a t
|
||||||
(** Always succeeds, without consuming its input. *)
|
(** Always succeeds, without consuming its input. *)
|
||||||
|
|
@ -299,8 +301,8 @@ end
|
||||||
|
|
||||||
(** {2 Utils}
|
(** {2 Utils}
|
||||||
|
|
||||||
This is useful to parse OCaml-like values in a simple way. *)
|
This is useful to parse OCaml-like values in a simple way.
|
||||||
|
All the parsers are whitespace-insensitive (they skip whitespace). *)
|
||||||
module U : sig
|
module U : sig
|
||||||
val list : ?start:string -> ?stop:string -> ?sep:string -> 'a t -> 'a list t
|
val list : ?start:string -> ?stop:string -> ?sep:string -> 'a t -> 'a list t
|
||||||
(** [list p] parses a list of [p], with the OCaml conventions for
|
(** [list p] parses a list of [p], with the OCaml conventions for
|
||||||
|
|
@ -308,7 +310,12 @@ module U : sig
|
||||||
Whitespace between items are skipped. *)
|
Whitespace between items are skipped. *)
|
||||||
|
|
||||||
val int : int t
|
val int : int t
|
||||||
(** Parse an int. *)
|
(** Parse an int in decimal representation. *)
|
||||||
|
|
||||||
|
val hexa_int : int t
|
||||||
|
(** Parse an int int hexadecimal format. Accepts an optional [0x] prefix,
|
||||||
|
and ignores capitalization.
|
||||||
|
@since NEXT_RELEASE *)
|
||||||
|
|
||||||
val word : string t
|
val word : string t
|
||||||
(** Non empty string of alpha num, start with alpha. *)
|
(** Non empty string of alpha num, start with alpha. *)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue