ocaml-containers/misc/sexp.ml
2014-09-17 00:45:33 +02:00

394 lines
9.9 KiB
OCaml

(*
Copyright (c) 2013, Simon Cruanes
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. Redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Simple S-expression parsing/printing} *)
type 'a or_error = [ `Ok of 'a | `Error of string ]
type 'a sequence = ('a -> unit) -> unit
type 'a gen = unit -> 'a option
type t =
| Atom of string
| List of t list
let eq a b = a = b
let compare a b = Pervasives.compare a b
let hash a = Hashtbl.hash a
(** {2 Serialization (encoding)} *)
let _must_escape s =
try
for i = 0 to String.length s - 1 do
let c = String.unsafe_get s i in
match c with
| ' ' | ')' | '(' | '\n' | '\t' -> raise Exit
| _ -> ()
done;
false
with Exit -> true
let rec to_buf b t = match t with
| Atom s when _must_escape s -> Printf.bprintf b "\"%s\"" (String.escaped s)
| Atom s -> Buffer.add_string b s
| List [] -> Buffer.add_string b "()"
| List [x] -> Printf.bprintf b "(%a)" to_buf x
| List l ->
Buffer.add_char b '(';
List.iteri
(fun i t' -> (if i > 0 then Buffer.add_char b ' '; to_buf b t'))
l;
Buffer.add_char b ')'
let to_string t =
let b = Buffer.create 128 in
to_buf b t;
Buffer.contents b
let rec print fmt t = match t with
| Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s)
| Atom s -> Format.pp_print_string fmt s
| List [] -> Format.pp_print_string fmt "()"
| List [x] -> Format.fprintf fmt "(%a)" print x
| List l ->
Format.pp_print_char fmt '(';
List.iteri
(fun i t' -> (if i > 0 then Format.fprintf fmt "@ "; print fmt t'))
l;
Format.pp_print_char fmt ')'
(** {2 Deserialization (decoding)} *)
type 'a parse_result = ['a or_error | `End ]
type 'a partial_result = [ 'a parse_result | `Await ]
module Streaming = struct
type token =
| Open
| Close
| Atom of string
type decode_state =
| St_start
| St_atom
| St_quoted
| St_escaped
| St_yield of token
| St_error of string
| St_end
type decoder = {
mutable st : decode_state;
mutable i : int;
mutable line : int;
mutable col : int;
mutable stop : bool;
buf : Buffer.t;
atom : Buffer.t; (* atom being parsed *)
}
let mk_decoder () = {
i = 0;
st = St_start;
line = 0;
col = 0;
stop = false;
buf=Buffer.create 32;
atom = Buffer.create 32;
}
exception NeedMoar
exception Error of string
exception EOI
(* yield [x] with current state [st] *)
let _yield d st x =
d.st <- st;
x
(* read the next char *)
let _next_char d =
if d.i = Buffer.length d.buf
then (
(* need more input; reset buffer to put it in *)
Buffer.clear d.buf;
d.i <- 0;
raise NeedMoar
) else (
let c = Buffer.nth d.buf d.i in
d.i <- d.i + 1;
c
)
let _take_buffer b =
let s = Buffer.contents b in
Buffer.clear b;
s
let _newline d =
d.line <- d.line + 1;
d.col <- 0;
()
(* raise an error *)
let _error d msg =
let msg' = Printf.sprintf "at %d,%d: %s" d.line d.col msg in
d.st <- St_error msg';
raise (Error msg')
(* next token *)
let rec _next d st = match st with
| St_error msg -> raise (Error msg)
| St_end -> raise EOI
| St_yield x ->
(* yield the given token, then start a fresh one *)
_yield d St_start x
| St_start ->
(* start reading next token *)
let c = _next_char d in
begin match c with
| '\n' -> _newline d; _next d St_start
| ' ' | '\t' -> _next d St_start
| '(' -> _yield d St_start Open
| ')' -> _yield d St_start Close
| '"' -> _next d St_quoted
| _ -> (* read regular atom *)
Buffer.add_char d.atom c;
_next d St_atom
end
| St_atom when d.stop ->
let a = _take_buffer d.atom in
_yield d St_end (Atom a)
| St_atom ->
(* reading an unquoted atom *)
let c = _next_char d in
begin match c with
| ' ' | '\t' | '\n' ->
let a = _take_buffer d.atom in
_yield d St_start (Atom a)
| ')' ->
let a = _take_buffer d.atom in
_yield d (St_yield Close) (Atom a)
| '(' ->
let a = _take_buffer d.atom in
_yield d (St_yield Open) (Atom a)
| '\\' -> _error d "unexpected char"
| _ ->
Buffer.add_char d.atom c;
_next d St_atom
end
| St_quoted when d.stop ->
let a = _take_buffer d.atom in
_yield d St_end (Atom a)
| St_quoted ->
(* reading an unquoted atom *)
let c = _next_char d in
begin match c with
| '\\' -> _next d St_escaped
| '"' ->
let a = _take_buffer d.atom in
_yield d St_start (Atom a)
| _ ->
Buffer.add_char d.atom c;
_next d St_atom
end
| St_escaped ->
if d.stop
then _error d "unexpected end of input (escaping)";
let c = _next_char d in
Buffer.add_char d.atom
(match c with
| 'n' -> '\n'
| 't' -> '\t'
| 'r' -> '\r'
| '\\' -> '\\'
| _ -> _error d "unexpected escaped character"
);
_next d St_quoted
let feed d s i len =
if d.stop then failwith "Sexp.Streaming.feed: end of input reached";
Buffer.add_substring d.buf s i len
let reached_end d =
d.stop <- true
let next_exn d = _next d d.st
let next d =
try
`Ok (_next d d.st)
with
| NeedMoar -> `Await
| Error msg -> `Error msg
| EOI -> `End
end
module ParseGen = struct
type 'a t = unit -> 'a parse_result
let to_list g : 'a list or_error =
let rec aux acc = match g() with
| `Error e -> `Error e
| `Ok x -> aux (x::acc)
| `End -> `Ok (List.rev acc)
in
aux []
let head g = match g() with
| `End -> `Error "expected at least one element"
| #or_error as x -> x
let head_exn g = match g() with
| `Ok x -> x
| `Error msg -> failwith msg
| `End -> failwith "expected at least one element"
let take n g =
assert (n>=0);
let n = ref n in
fun () ->
if !n = 0 then `End
else (
decr n;
g()
)
end
(* hidden parser state *)
type parser_state = {
ps_d : Streaming.decoder;
mutable ps_stack : t list list;
}
let mk_ps () = {
ps_d = Streaming.mk_decoder ();
ps_stack = [];
}
let _error ps msg =
let msg' = Printf.sprintf "at %d,%d: %s"
ps.ps_d.Streaming.line ps.ps_d.Streaming.col msg in
`Error msg'
(* next token, or await *)
let rec _next ps : t partial_result = match Streaming.next ps.ps_d with
| `Ok (Streaming.Atom s) ->
_push ps (Atom s)
| `Ok Streaming.Open ->
ps.ps_stack <- [] :: ps.ps_stack;
_next ps
| `Ok Streaming.Close ->
begin match ps.ps_stack with
| [] -> _error ps "unbalanced ')'"
| l :: stack ->
ps.ps_stack <- stack;
_push ps (List (List.rev l))
end
| `Error msg -> `Error msg
| `Await -> `Await
| `End -> `End
(* push a S-expr on top of the parser stack *)
and _push ps e = match ps.ps_stack with
| [] ->
`Ok e
| l :: tl ->
ps.ps_stack <- (e :: l) :: tl;
_next ps
let parse_gen g : t ParseGen.t =
let ps = mk_ps() in
let rec next () = match _next ps with
| `Await ->
begin match g() with
| None -> Streaming.reached_end ps.ps_d
| Some s -> Streaming.feed ps.ps_d s 0 (String.length s)
end;
next()
| `Ok x -> `Ok x
| `Error e -> `Error e
| `End -> `End
in
next
(* singleton generator *)
let _gen1 x =
let first = ref true in
fun () ->
if !first then (first:=false; Some x) else None
let parse_string s =
parse_gen (_gen1 s)
let parse_chan ic =
let buf = Buffer.create 512 in
let gen () =
Buffer.clear buf;
Buffer.add_channel buf ic 512;
if Buffer.length buf = 0
then None
else Some (Buffer.contents buf)
in
parse_gen gen
(** {6 Blocking} *)
let parse1_chan ic =
ParseGen.head (parse_chan ic)
let parse1_string s =
ParseGen.head (parse_string s)
let parse_l_chan ic =
ParseGen.to_list (parse_chan ic)
let parse_l_string s =
ParseGen.to_list (parse_string s)
let parse_l_gen g =
ParseGen.to_list (parse_gen g)
let parse_l_seq seq =
let ps = mk_ps() in
let l = ref [] in
(* read as many expressions as possible *)
let rec _nexts () = match _next ps with
| `Ok x -> l := x :: !l; _nexts ()
| `Error e -> raise (Streaming.Error e)
| `End -> raise Streaming.EOI
| `Await -> ()
in
try
seq
(fun s -> Streaming.feed ps.ps_d s 0 (String.length s); _nexts ());
Streaming.reached_end ps.ps_d;
_nexts ();
`Ok (List.rev !l)
with
| Streaming.Error msg -> `Error msg
| Streaming.EOI -> `Ok (List.rev !l)