remove CCSexpM, use ocamllex for a much simpler CCSexp

This commit is contained in:
Simon Cruanes 2016-11-03 17:44:35 +01:00
parent f26b47ea5f
commit 941d74968e
7 changed files with 339 additions and 717 deletions

4
_oasis
View file

@ -58,8 +58,8 @@ Library "containers_unix"
Library "containers_sexp"
Path: src/sexp
Modules: CCSexp, CCSexpM
BuildDepends: bytes
Modules: CCSexp, CCSexp_lex
BuildDepends: bytes, result
FindlibParent: containers
FindlibName: sexp

View file

@ -1,18 +1,20 @@
open Result
let pp_sexp s = match s with
| `Ok l ->
| Ok l ->
List.iter
(fun s -> Format.printf "@[%a@]@." CCSexpM.print s)
(fun s -> Format.printf "@[%a@]@." CCSexp.pp s)
l
| `Error msg ->
| Error msg ->
Format.printf "error: %s@." msg
let () =
match Sys.argv with
| [| _ |] ->
let s = CCSexpM.parse_chan_list stdin in
let s = CCSexp.parse_chan_list stdin in
pp_sexp s
| [| _; file |] ->
let s = CCSexpM.parse_file_list file in
let s = CCSexp.parse_file_list file in
pp_sexp s
| _ -> failwith "usage: id_sexp [file]"

View file

@ -1,34 +1,17 @@
(*
Copyright (c) 2013, Simon Cruanes
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. Redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(* This file is free software, part of containers. See file "license" for more details. *)
(** {1 Simple S-expression parsing/printing} *)
type 'a or_error = ('a, string) Result.result
type 'a sequence = ('a -> unit) -> unit
type 'a gen = unit -> 'a option
type t = [
| `Atom of string
| `List of t list
]
type sexp = t
let equal a b = a = b
@ -52,122 +35,241 @@ let of_field name t = `List [`Atom name; t]
let of_record l =
`List (List.map (fun (n,x) -> of_field n x) l)
(** {6 Traversal of S-exp} *)
(** {2 Printing} *)
module Traverse = struct
type 'a conv = t -> 'a option
let _with_out filename f =
let oc = open_out filename in
try
let x = f oc in
close_out oc;
x
with e ->
close_out oc;
raise e
let return x = Some x
(* shall we escape the string because of one of its chars? *)
let _must_escape s =
try
for i = 0 to String.length s - 1 do
let c = String.unsafe_get s i in
match c with
| ' ' | ')' | '(' | '"' | ';' | '\\' | '\n' | '\t' | '\r' -> raise Exit
| _ when Char.code c > 127 -> raise Exit (* non-ascii *)
| _ -> ()
done;
false
with Exit -> true
let (>|=) e f = match e with
| None -> None
| Some x -> Some (f x)
let rec to_buf b t = match t with
| `Atom s when _must_escape s -> Printf.bprintf b "\"%s\"" (String.escaped s)
| `Atom s -> Buffer.add_string b s
| `List [] -> Buffer.add_string b "()"
| `List [x] -> Printf.bprintf b "(%a)" to_buf x
| `List l ->
Buffer.add_char b '(';
List.iteri
(fun i t' -> (if i > 0 then Buffer.add_char b ' '; to_buf b t'))
l;
Buffer.add_char b ')'
let (>>=) e f = match e with
| None -> None
| Some x -> f x
let to_string t =
let b = Buffer.create 128 in
to_buf b t;
Buffer.contents b
let map_opt f l =
let rec recurse acc l = match l with
| [] -> Some (List.rev acc)
| x::l' ->
match f x with
| None -> None
| Some y -> recurse (y::acc) l'
in recurse [] l
let rec pp fmt t = match t with
| `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s)
| `Atom s -> Format.pp_print_string fmt s
| `List [] -> Format.pp_print_string fmt "()"
| `List [x] -> Format.fprintf fmt "@[<hov2>(%a)@]" pp x
| `List l ->
Format.fprintf fmt "@[<hov1>(";
List.iteri
(fun i t' -> (if i > 0 then Format.fprintf fmt "@ "; pp fmt t'))
l;
Format.fprintf fmt ")@]"
let rec _list_any f l = match l with
| [] -> None
| x::tl ->
match f x with
| Some _ as res -> res
| None -> _list_any f tl
let rec pp_noindent fmt t = match t with
| `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s)
| `Atom s -> Format.pp_print_string fmt s
| `List [] -> Format.pp_print_string fmt "()"
| `List [x] -> Format.fprintf fmt "(%a)" pp_noindent x
| `List l ->
Format.pp_print_char fmt '(';
List.iteri
(fun i t' -> (if i > 0 then Format.pp_print_char fmt ' '; pp_noindent fmt t'))
l;
Format.pp_print_char fmt ')'
let list_any f e = match e with
| `Atom _ -> None
| `List l -> _list_any f l
let to_chan oc t =
let fmt = Format.formatter_of_out_channel oc in
pp fmt t;
Format.pp_print_flush fmt ()
let rec _list_all f acc l = match l with
| [] -> List.rev acc
| x::tl ->
match f x with
| Some y -> _list_all f (y::acc) tl
| None -> _list_all f acc tl
let to_file_seq filename seq =
_with_out filename
(fun oc ->
seq (fun t -> to_chan oc t; output_char oc '\n')
)
let list_all f e = match e with
| `Atom _ -> []
| `List l -> _list_all f [] l
let to_file filename t = to_file_seq filename (fun k -> k t)
let _try_atom e f = match e with
| `List _ -> None
| `Atom x -> try Some (f x) with _ -> None
(** {2 Parsing} *)
let to_int e = _try_atom e int_of_string
let to_bool e = _try_atom e bool_of_string
let to_float e = _try_atom e float_of_string
let to_string e = _try_atom e (fun x->x)
let _with_in filename f =
let ic = open_in filename in
try
let x = f ic in
close_in ic;
x
with e ->
close_in ic;
Result.Error (Printexc.to_string e)
let to_pair e = match e with
| `List [x;y] -> Some (x,y)
| _ -> None
(** A parser of ['a] can return [Yield x] when it parsed a value,
or [Fail e] when a parse error was encountered, or
[End] if the input was empty *)
type 'a parse_result =
| Yield of 'a
| Fail of string
| End
let to_pair_with f1 f2 e =
to_pair e >>= fun (x,y) ->
f1 x >>= fun x ->
f2 y >>= fun y ->
return (x,y)
module Decoder = struct
module L = CCSexp_lex
let to_triple e = match e with
| `List [x;y;z] -> Some (x,y,z)
| _ -> None
type t = {
buf: Lexing.lexbuf;
mutable cur_tok: L.token option; (* current token *)
}
let to_triple_with f1 f2 f3 e =
to_triple e >>= fun (x,y,z) ->
f1 x >>= fun x ->
f2 y >>= fun y ->
f3 z >>= fun z ->
return (x,y,z)
let cur (t:t): L.token = match t.cur_tok with
| Some L.EOI -> assert false
| Some t -> t
| None ->
(* fetch token *)
let tok = L.token t.buf in
t.cur_tok <- Some tok;
tok
let to_list e = match e with
| `List l -> Some l
| `Atom _ -> None
let junk t = t.cur_tok <- None
let to_list_with f (e:t) = match e with
| `List l -> map_opt f l
| `Atom _ -> None
let of_lexbuf buf = {
buf;
cur_tok=None;
}
let rec _get_field name l = match l with
| `List [`Atom n; x] :: _ when name=n -> Some x
| _ :: tl -> _get_field name tl
| [] -> None
exception E_end
exception E_error of int * int * string
let get_field name e = match e with
| `List l -> _get_field name l
| `Atom _ -> None
let error_ lexbuf msg =
let start = Lexing.lexeme_start_p lexbuf in
let line = start.Lexing.pos_lnum in
let col = start.Lexing.pos_cnum - start.Lexing.pos_bol in
raise (E_error (line,col,msg))
let field name f e =
get_field name e >>= f
let rec _get_field_list name l = match l with
| `List (`Atom n :: tl) :: _ when name=n -> Some tl
| _ :: tl -> _get_field_list name tl
| [] -> None
let field_list name f e = match e with
| `List l -> _get_field_list name l >>= f
| `Atom _ -> None
let rec _get_variant s args l = match l with
| [] -> None
| (s', f) :: _ when s=s' -> f args
| _ :: tl -> _get_variant s args tl
let get_variant l e = match e with
| `List (`Atom s :: args) -> _get_variant s args l
| `List _ -> None
| `Atom s -> _get_variant s [] l
let get_exn e = match e with
| None -> failwith "CCSexp.Traverse.get_exn"
| Some x -> x
let next (t:t) =
let rec expr () = match cur t with
| L.EOI -> raise E_end
| L.ATOM s -> junk t; `Atom s
| L.LIST_OPEN ->
junk t;
let l = lst [] in
begin match cur t with
| L.LIST_CLOSE -> junk t; `List l
| _ -> error_ t.buf "expected ')'"
end
| L.LIST_CLOSE -> error_ t.buf "expected expression"
and lst acc = match cur t with
| L.LIST_CLOSE -> List.rev acc
| L.LIST_OPEN | L.ATOM _ ->
let sub = expr () in
lst (sub::acc)
| L.EOI -> error_ t.buf "unexpected EOI"
in
try Yield (expr ())
with
| E_end -> End
| E_error (line,col,msg)
| CCSexp_lex.Error (line,col,msg) ->
Fail (Printf.sprintf "parse error at %d:%d: %s" line col msg)
end
let parse_string s : t or_error =
let buf = Lexing.from_string s in
let d = Decoder.of_lexbuf buf in
match Decoder.next d with
| End -> Result.Error "unexpected end of file"
| Yield x -> Result.Ok x
| Fail s -> Result.Error s
(*$T
CCResult.to_opt (parse_string "(abc d/e/f \"hello \\\" () world\" )") <> None
CCResult.to_opt (parse_string "(abc ( d e ffff ) \"hello/world\")") <> None
*)
(*$inject
let sexp_gen =
let mkatom a = `Atom a and mklist l = `List l in
let atom = Q.Gen.(map mkatom (string_size ~gen:printable (1 -- 30))) in
let gen = Q.Gen.(
sized (fix
(fun self n st -> match n with
| 0 -> atom st
| _ ->
frequency
[ 1, atom
; 2, map mklist (list_size (0 -- 10) (self (n/10)))
] st
)
)) in
let rec small = function
| `Atom s -> String.length s
| `List l -> List.fold_left (fun n x->n+small x) 0 l
and print = function
| `Atom s -> Printf.sprintf "`Atom \"%s\"" s
| `List l -> "`List " ^ Q.Print.list print l
and shrink = function
| `Atom s -> Q.Iter.map mkatom (Q.Shrink.string s)
| `List l -> Q.Iter.map mklist (Q.Shrink.list ~shrink l)
in
Q.make ~print ~small ~shrink gen
let rec sexp_valid = function
| `Atom "" -> false
| `Atom _ -> true
| `List l -> List.for_all sexp_valid l
*)
(*$Q & ~count:100
sexp_gen (fun s -> sexp_valid s ==> (to_string s |> parse_string = Result.Ok s))
*)
let parse_chan ic : sexp or_error =
let buf = Lexing.from_channel ic in
let d = Decoder.of_lexbuf buf in
match Decoder.next d with
| End -> Result.Error "unexpected end of file"
| Yield x -> Result.Ok x
| Fail e -> Result.Error e
let parse_chan_list ic =
let buf = Lexing.from_channel ic in
let d = Decoder.of_lexbuf buf in
let rec iter acc = match Decoder.next d with
| End -> Result.Ok (List.rev acc)
| Yield x -> iter (x::acc)
| Fail e -> Result.Error e
in
iter []
let parse_chan_gen ic =
let buf = Lexing.from_channel ic in
let d = Decoder.of_lexbuf buf in
fun () -> match Decoder.next d with
| End -> None
| Fail e -> Some (Result.Error e)
| Yield x -> Some (Result.Ok x)
let parse_file filename = _with_in filename parse_chan
let parse_file_list filename = _with_in filename parse_chan_list

View file

@ -1,35 +1,11 @@
(*
Copyright (c) 2013, Simon Cruanes
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
(* This file is free software, part of containers. See file "license" for more details. *)
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. Redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
(** {1 Handling S-expressions} *)
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Handling S-expressions}
@since 0.4
@since 0.7
Moved the streaming parser to CCSexpStream
*)
type 'a or_error = ('a, string) Result.result
type 'a sequence = ('a -> unit) -> unit
type 'a gen = unit -> 'a option
(** {2 Basics} *)
@ -37,6 +13,7 @@ type t = [
| `Atom of string
| `List of t list
]
type sexp = t
val equal : t -> t -> bool
val compare : t -> t -> int
@ -65,104 +42,61 @@ val of_field : string -> t -> t
val of_record : (string * t) list -> t
(** Represent a record by its named fields *)
(** {6 Traversal of S-exp}
(** {2 Printing} *)
Example: serializing 2D points
{[
type pt = {x:int; y:int };;
val to_buf : Buffer.t -> t -> unit
let pt_of_sexp e =
Sexp.Traverse.(
field "x" to_int e >>= fun x ->
field "y" to_int e >>= fun y ->
return {x;y}
);;
val to_string : t -> string
let sexp_of_pt pt = Sexp.(of_record ["x", of_int pt.x; "y", of_int pt.y]);;
val to_file : string -> t -> unit
let l = [{x=1;y=1}; {x=2;y=10}];;
val to_file_seq : string -> t sequence -> unit
(** Print the given sequence of expressions to a file *)
let sexp = Sexp.(of_list (List.map sexp_of_pt l));;
val to_chan : out_channel -> t -> unit
Sexp.Traverse.list_all pt_of_sexp sexp;;
]}
val pp : Format.formatter -> t -> unit
(** Pretty-printer nice on human eyes (including indentation) *)
*)
val pp_noindent : Format.formatter -> t -> unit
(** Raw, direct printing as compact as possible *)
module Traverse : sig
type 'a conv = t -> 'a option
(** A converter from S-expressions to 'a is a function [sexp -> 'a option].
@since 0.4.1 *)
(** {2 Parsing} *)
val map_opt : ('a -> 'b option) -> 'a list -> 'b list option
(** Map over a list, failing as soon as the function fails on any element
@since 0.4.1 *)
(** A parser of ['a] can return [Yield x] when it parsed a value,
or [Fail e] when a parse error was encountered, or
[End] if the input was empty *)
type 'a parse_result =
| Yield of 'a
| Fail of string
| End
val list_any : 'a conv -> t -> 'a option
(** [list_any f (List l)] tries [f x] for every element [x] in [List l],
and returns the first non-None result (if any). *)
module Decoder : sig
type t
(** Decoder *)
val list_all : 'a conv -> t -> 'a list
(** [list_all f (List l)] returns the list of all [y] such that [x] in [l]
and [f x = Some y] *)
val of_lexbuf : Lexing.lexbuf -> t
val to_int : int conv
(** Expect an integer *)
val to_string : string conv
(** Expect a string (an atom) *)
val to_bool : bool conv
(** Expect a boolean *)
val to_float : float conv
(** Expect a float *)
val to_list : t list conv
(** Expect a list *)
val to_list_with : (t -> 'a option) -> 'a list conv
(** Expect a list, applies [f] to all the elements of the list, and succeeds
only if [f] succeeded on every element
@since 0.4.1 *)
val to_pair : (t * t) conv
(** Expect a list of two elements *)
val to_pair_with : 'a conv -> 'b conv -> ('a * 'b) conv
(** Same as {!to_pair} but applies conversion functions
@since 0.4.1 *)
val to_triple : (t * t * t) conv
val to_triple_with : 'a conv -> 'b conv -> 'c conv -> ('a * 'b * 'c) conv
(* @since 0.4.1 *)
val get_field : string -> t conv
(** [get_field name e], when [e = List [(n1,x1); (n2,x2) ... ]], extracts
the [xi] such that [name = ni], if it can find it. *)
val field : string -> 'a conv -> 'a conv
(** Enriched version of {!get_field}, with a converter as argument *)
val get_variant : (string * (t list -> 'a option)) list -> 'a conv
(** [get_variant l e] checks whether [e = List (Atom s :: args)], and
if some pair of [l] is [s, f]. In this case, it calls [f args]
and returns its result, otherwise it returns None. *)
val field_list : string -> (t list -> 'a option) -> 'a conv
(** [field_list name f "(... (name a b c d) ...record)"]
will look for a field based on the given [name], and expect it to have a
list of arguments dealt with by [f] (here, "a b c d").
@since 0.4.1 *)
val (>>=) : 'a option -> ('a -> 'b option) -> 'b option
val (>|=) : 'a option -> ('a -> 'b) -> 'b option
val return : 'a -> 'a option
val get_exn : 'a option -> 'a
(** Unwrap an option, possibly failing.
@raise Invalid_argument if the argument is [None] *)
val next : t -> sexp parse_result
(** Parse the next S-expression or return an error if the input isn't
long enough or isn't a proper S-expression *)
end
val parse_string : string -> t or_error
(** Parse a string *)
val parse_chan : in_channel -> t or_error
(** Parse a S-expression from the given channel. Can read more data than
necessary, so don't use this if you need finer-grained control (e.g.
to read something else {b after} the S-exp) *)
val parse_chan_gen : in_channel -> t or_error gen
(** Parse a channel into a generator of S-expressions *)
val parse_chan_list : in_channel -> t list or_error
val parse_file : string -> t or_error
(** Open the file and read a S-exp from it *)
val parse_file_list : string -> t list or_error
(** Open the file and read a S-exp from it *)

View file

@ -1,379 +0,0 @@
(* This file is free software, part of containers. See file "license" for more details. *)
(** {1 Simple S-expression parsing/printing} *)
type 'a or_error = [ `Ok of 'a | `Error of string ]
type 'a sequence = ('a -> unit) -> unit
type 'a gen = unit -> 'a option
type t = [
| `Atom of string
| `List of t list
]
type sexp = t
let _with_in filename f =
let ic = open_in filename in
try
let x = f ic in
close_in ic;
x
with e ->
close_in ic;
`Error (Printexc.to_string e)
let _with_out filename f =
let oc = open_out filename in
try
let x = f oc in
close_out oc;
x
with e ->
close_out oc;
raise e
(** {2 Serialization (encoding)} *)
(* shall we escape the string because of one of its chars? *)
let _must_escape s =
try
for i = 0 to String.length s - 1 do
let c = String.unsafe_get s i in
match c with
| ' ' | ';' | ')' | '(' | '"' | '\\' | '\n' | '\t' | '\r' -> raise Exit
| _ when Char.code c > 127 -> raise Exit (* non-ascii *)
| _ -> ()
done;
false
with Exit -> true
let rec to_buf b t = match t with
| `Atom s when _must_escape s -> Printf.bprintf b "\"%s\"" (String.escaped s)
| `Atom s -> Buffer.add_string b s
| `List [] -> Buffer.add_string b "()"
| `List [x] -> Printf.bprintf b "(%a)" to_buf x
| `List l ->
Buffer.add_char b '(';
List.iteri
(fun i t' -> (if i > 0 then Buffer.add_char b ' '; to_buf b t'))
l;
Buffer.add_char b ')'
let to_string t =
let b = Buffer.create 128 in
to_buf b t;
Buffer.contents b
let rec print fmt t = match t with
| `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s)
| `Atom s -> Format.pp_print_string fmt s
| `List [] -> Format.pp_print_string fmt "()"
| `List [x] -> Format.fprintf fmt "@[<hov2>(%a)@]" print x
| `List l ->
Format.fprintf fmt "@[<hov1>(";
List.iteri
(fun i t' -> (if i > 0 then Format.fprintf fmt "@ "; print fmt t'))
l;
Format.fprintf fmt ")@]"
let rec print_noindent fmt t = match t with
| `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s)
| `Atom s -> Format.pp_print_string fmt s
| `List [] -> Format.pp_print_string fmt "()"
| `List [x] -> Format.fprintf fmt "(%a)" print_noindent x
| `List l ->
Format.pp_print_char fmt '(';
List.iteri
(fun i t' -> (if i > 0 then Format.pp_print_char fmt ' '; print_noindent fmt t'))
l;
Format.pp_print_char fmt ')'
let to_chan oc t =
let fmt = Format.formatter_of_out_channel oc in
print fmt t;
Format.pp_print_flush fmt ()
let to_file_seq filename seq =
_with_out filename
(fun oc ->
seq (fun t -> to_chan oc t; output_char oc '\n')
)
let to_file filename t = to_file_seq filename (fun k -> k t)
(** {2 Deserialization (decoding)} *)
module type MONAD = sig
type 'a t
val return : 'a -> 'a t
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t
end
type 'a parse_result = ['a or_error | `End ]
module MakeDecode(M : MONAD) = struct
let (>>=) = M.(>>=)
type t = {
buf : Bytes.t;
refill : Bytes.t -> int -> int -> int M.t;
atom : Buffer.t;
mutable i : int; (* offset in [buf] *)
mutable len : int; (* how many bytes of [buf] are usable *)
mutable line : int;
mutable col : int;
}
let make ?(bufsize=1024) refill =
let bufsize = min (max bufsize 16) Sys.max_string_length in
{ buf=Bytes.create bufsize;
refill;
atom = Buffer.create 32;
i=0;
len=0;
line=1;
col=1;
}
let _is_digit c = Char.code '0' <= Char.code c && Char.code c <= Char.code '9'
let _digit2i c = Char.code c - Char.code '0'
(* refill buffer. If it works, call k_succ, otherwise call k_fail *)
let _refill t k_succ k_fail =
t.refill t.buf 0 (Bytes.length t.buf)
>>= fun n ->
t.i <- 0;
t.len <- n;
if n = 0 then k_fail t else k_succ t
(* get next char, assuming t.i < t.len *)
let _get t =
assert (t.i < t.len);
let c = Bytes.get t.buf t.i in
t.i <- t.i + 1;
if c = '\n' then (t.col <- 1; t.line <- t.line + 1) else t.col <- t.col + 1;
c
(* return an error *)
let _error t msg =
let b = Buffer.create 32 in
Printf.bprintf b "at %d, %d: " t.line t.col;
Printf.kbprintf
(fun b ->
let msg' = Buffer.contents b in
M.return (`Error msg')
) b msg
let _error_eof t = _error t "unexpected end of input"
(* The parsers all take a success continuation, and the decoder as
last arguments. The continuation is used to minimize the
number of calls to [>>=] and take two parameters, the next
char (if not consumed), and the returned expression itself *)
(* read expression *)
let rec expr k t =
if t.i = t.len then _refill t (expr k) _error_eof
else match _get t with
| ' ' | '\t' | '\r' | '\n' -> expr k t
| c -> expr_starting_with c k t
and expr_starting_with c k t = match c with
| ' ' | '\t' | '\r' | '\n' -> assert false
| ';' -> skip_comment (fun _ () -> expr k t) t
| '(' -> expr_list [] k t
| ')' -> _error t "unexpected ')'"
| '\\' -> _error t "unexpected '\\'"
| '"' -> quoted k t
| c ->
Buffer.add_char t.atom c;
atom k t
(* parse list *)
and expr_list acc k t =
if t.i = t.len then _refill t (expr_list acc k) _error_eof
else match _get t with
| ' ' | '\t' | '\r' | '\n' -> expr_list acc k t
| ')' -> k None (`List (List.rev acc))
| c ->
expr_starting_with c
(fun last e -> match last with
| Some '(' -> expr_list [] (fun _ l -> expr_list (l::acc) k t) t
| Some ')' -> k None (`List (List.rev (e::acc)))
| _ -> expr_list (e::acc) k t
) t
(* return the current atom (last char: c) *)
and _return_atom last k t =
let s = Buffer.contents t.atom in
Buffer.clear t.atom;
k last (`Atom s)
(* parse atom *)
and atom k t =
if t.i = t.len then _refill t (atom k) (_return_atom None k)
else match _get t with
| '\\' -> _error t "unexpected '\\' in non-quoted string"
| '"' -> _error t "unexpected '\"' in the middle of an atom"
| (' ' | '\r' | '\n' | '\t' | '(' | ')') as c ->
_return_atom (Some c) k t
| c ->
Buffer.add_char t.atom c;
atom k t
(* quoted string *)
and quoted k t =
if t.i = t.len then _refill t (quoted k) _error_eof
else match _get t with
| '\\' ->
(* read escaped char and continue *)
escaped
(fun c ->
Buffer.add_char t.atom c;
quoted k t
) t
| '"' -> _return_atom None k t
| c ->
Buffer.add_char t.atom c;
quoted k t
(* read escaped char *)
and escaped k t =
if t.i = t.len then _refill t (escaped k) _error_eof
else match _get t with
| 'n' -> k '\n'
| 't' -> k '\t'
| 'r' -> k '\r'
| 'b' -> k '\b'
| '\\' -> k '\\'
| '"' -> k '"'
| c when _is_digit c ->
read2int (_digit2i c) (fun n -> k (Char.chr n)) t
| c -> _error t "unexpected escaped char '%c'" c
and read2int i k t =
if t.i = t.len then _refill t (read2int i k) _error_eof
else match _get t with
| c when _is_digit c -> read1int (10 * i + _digit2i c) k t
| c -> _error t "unexpected char '%c' when reading byte" c
and read1int i k t =
if t.i = t.len then _refill t (read1int i k) _error_eof
else match _get t with
| c when _is_digit c -> k (10 * i + _digit2i c)
| c -> _error t "unexpected char '%c' when reading byte" c
(* skip until end of line, then call next() *)
and skip_comment k t =
if t.i = t.len
then _refill t (skip_comment k) _error_eof
else match _get t with
| '\n' -> k None ()
| _ -> skip_comment k t
(* top-level expression *)
let rec expr_or_end k t =
if t.i = t.len
then _refill t (expr_or_end k) (fun _ -> M.return `End)
else match _get t with
| ' ' | '\t' | '\r' | '\n' -> expr_or_end k t
| c -> expr_starting_with c k t
(* entry point *)
let next t : sexp parse_result M.t =
expr_or_end (fun _ x -> M.return (`Ok x)) t
end
module ID_MONAD = struct
type 'a t = 'a
let return x = x
let (>>=) x f = f x
end
module D = MakeDecode(ID_MONAD)
let parse_string s : t or_error =
let n = String.length s in
let stop = ref false in
let refill bytes i _len =
if !stop then 0
else (stop := true; Bytes.blit_string s 0 bytes i n; n)
in
let d = D.make ~bufsize:n refill in
match D.next d with
| `End -> `Error "unexpected end of file"
| (`Ok _ | `Error _) as res -> res
(*$T
CCError.to_opt (parse_string "(abc d/e/f \"hello \\\" () world\" )") <> None
CCError.to_opt (parse_string "(abc ( d e ffff ) \"hello/world\")") <> None
(parse_string "(abc\r\n ( d e \r\tffff ))") \
= `Ok (`List [`Atom "abc"; `List [`Atom "d"; `Atom "e"; `Atom "ffff"]])
*)
(*$inject
let sexp_gen =
let mkatom a = `Atom a and mklist l = `List l in
let atom = Q.Gen.(map mkatom (string_size ~gen:printable (1 -- 30))) in
let gen = Q.Gen.(
sized (fix
(fun self n st -> match n with
| 0 -> atom st
| _ ->
frequency
[ 1, atom
; 2, map mklist (list_size (0 -- 10) (self (n/10)))
] st
)
)) in
let rec small = function
| `Atom s -> String.length s
| `List l -> List.fold_left (fun n x->n+small x) 0 l
and print = function
| `Atom s -> Printf.sprintf "`Atom \"%s\"" s
| `List l -> "`List " ^ Q.Print.list print l
and shrink = function
| `Atom s -> Q.Iter.map mkatom (Q.Shrink.string s)
| `List l -> Q.Iter.map mklist (Q.Shrink.list ~shrink l)
in
Q.make ~print ~small ~shrink gen
let rec sexp_valid = function
| `Atom "" -> false
| `Atom _ -> true
| `List l -> List.for_all sexp_valid l
*)
(*$Q & ~count:100
sexp_gen (fun s -> sexp_valid s ==> (to_string s |> parse_string = `Ok s))
*)
let parse_chan ?bufsize ic =
let d = D.make ?bufsize (input ic) in
match D.next d with
| `End -> `Error "unexpected end of file"
| (`Ok _ | `Error _) as res -> res
let parse_chan_gen ?bufsize ic =
let d = D.make ?bufsize (input ic) in
fun () ->
match D.next d with
| `End -> None
| `Error _ as e -> Some e
| `Ok _ as res -> Some res
let parse_chan_list ?bufsize ic =
let d = D.make ?bufsize (input ic) in
let rec iter acc = match D.next d with
| `End -> `Ok (List.rev acc)
| `Ok x -> iter (x::acc)
| `Error _ as e -> e
in
iter []
let parse_file filename =
_with_in filename (fun ic -> parse_chan ic)
let parse_file_list filename =
_with_in filename (fun ic -> parse_chan_list ic)

View file

@ -1,93 +0,0 @@
(* This file is free software, part of containers. See file "license" for more details. *)
(** {1 Simple and efficient S-expression parsing/printing}
@since 0.7 *)
type 'a or_error = [ `Ok of 'a | `Error of string ]
type 'a sequence = ('a -> unit) -> unit
type 'a gen = unit -> 'a option
(** {2 Basics} *)
type t = [
| `Atom of string
| `List of t list
]
type sexp = t
(** {2 Serialization (encoding)} *)
val to_buf : Buffer.t -> t -> unit
val to_string : t -> string
val to_file : string -> t -> unit
val to_file_seq : string -> t sequence -> unit
(** Print the given sequence of expressions to a file *)
val to_chan : out_channel -> t -> unit
val print : Format.formatter -> t -> unit
(** Pretty-printer nice on human eyes (including indentation) *)
val print_noindent : Format.formatter -> t -> unit
(** Raw, direct printing as compact as possible *)
(** {2 Deserialization (decoding)} *)
module type MONAD = sig
type 'a t
val return : 'a -> 'a t
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t
end
type 'a parse_result = ['a or_error | `End ]
(** A parser of ['a] can return [`Ok x] when it parsed a value,
or [`Error e] when a parse error was encountered, or
[`End] if the input was empty *)
module MakeDecode(M : MONAD) : sig
type t
(** Decoder *)
val make : ?bufsize:int -> (Bytes.t -> int -> int -> int M.t) -> t
(** Make a decoder with the given function used to refill an
internal buffer. The function might return [0] if the
input is exhausted.
@param bufsize size of internal buffer *)
val next : t -> sexp parse_result M.t
(** Parse the next S-expression or return an error if the input isn't
long enough or isn't a proper S-expression *)
end
module ID_MONAD : MONAD with type 'a t = 'a
(** The monad that just uses blocking calls as bind
@since 0.14
['a t = 'a] contraint is @since 0.16 *)
module D : module type of MakeDecode(ID_MONAD)
(** Decoder that just blocks when input is not available
@since 0.14 *)
val parse_string : string -> t or_error
(** Parse a string *)
val parse_chan : ?bufsize:int -> in_channel -> t or_error
(** Parse a S-expression from the given channel. Can read more data than
necessary, so don't use this if you need finer-grained control (e.g.
to read something else {b after} the S-exp) *)
val parse_chan_gen : ?bufsize:int -> in_channel -> t or_error gen
(** Parse a channel into a generator of S-expressions *)
val parse_chan_list : ?bufsize:int -> in_channel -> t list or_error
val parse_file : string -> t or_error
(** Open the file and read a S-exp from it *)
val parse_file_list : string -> t list or_error
(** Open the file and read a S-exp from it *)

56
src/sexp/CCSexp_lex.mll Normal file
View file

@ -0,0 +1,56 @@
{
type token =
| ATOM of string
| LIST_OPEN
| LIST_CLOSE
| EOI
(* location + message *)
exception Error of int * int * string
let error lexbuf msg =
let start = Lexing.lexeme_start_p lexbuf in
let line = start.Lexing.pos_lnum in
let col = start.Lexing.pos_cnum - start.Lexing.pos_bol in
raise (Error (line,col,msg))
(* remove quotes + unescape *)
let remove_quotes lexbuf s =
assert (s.[0] = '"' && s.[String.length s - 1] = '"');
let buf = Buffer.create (String.length s) in
let escaped = ref false in
for i = 1 to String.length s-2 do
match s.[i] with
| '\\' when !escaped -> Buffer.add_char buf '\\'; escaped := false
| '\\' -> escaped := true
| 'n' when !escaped -> Buffer.add_char buf '\n'; escaped := false
| 'r' when !escaped -> Buffer.add_char buf '\r'; escaped := false
| 't' when !escaped -> Buffer.add_char buf '\t'; escaped := false
| '"' when !escaped -> Buffer.add_char buf '"'; escaped := false
| c when !escaped -> error lexbuf (Printf.sprintf "wrong escape `%c`" c)
| c -> Buffer.add_char buf c;
done;
Buffer.contents buf
}
let newline = '\n' | "\r\n"
let white = [' ' '\r' '\t'] | newline
let comment_line = ';' [^ '\n']*
let printable_char = [^ '\n']
let id = [^ ')' '(' '"' ' ' '\t' '\r' '\n']+
let string = '"' ([^ '"' '\\'] | "\\\"" | "\\\\" | "\\n" | "\\t" | "\\r")* '"'
rule token = parse
| comment_line { token lexbuf }
| newline { Lexing.new_line lexbuf; token lexbuf }
| white { token lexbuf }
| eof { EOI }
| '(' { LIST_OPEN }
| ')' { LIST_CLOSE }
| id { ATOM (Lexing.lexeme lexbuf) }
| string { ATOM (remove_quotes lexbuf (Lexing.lexeme lexbuf)) }
| _ as c
{ error lexbuf (Printf.sprintf "lexing failed on char `%c`" c) }