mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-07 11:45:31 -05:00
394 lines
9.9 KiB
OCaml
394 lines
9.9 KiB
OCaml
(*
|
|
Copyright (c) 2013, Simon Cruanes
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
Redistributions of source code must retain the above copyright notice, this
|
|
list of conditions and the following disclaimer. Redistributions in binary
|
|
form must reproduce the above copyright notice, this list of conditions and the
|
|
following disclaimer in the documentation and/or other materials provided with
|
|
the distribution.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*)
|
|
|
|
(** {1 Simple S-expression parsing/printing} *)
|
|
|
|
type 'a or_error = [ `Ok of 'a | `Error of string ]
|
|
type 'a sequence = ('a -> unit) -> unit
|
|
type 'a gen = unit -> 'a option
|
|
|
|
type t =
|
|
| Atom of string
|
|
| List of t list
|
|
|
|
let eq a b = a = b
|
|
|
|
let compare a b = Pervasives.compare a b
|
|
|
|
let hash a = Hashtbl.hash a
|
|
|
|
(** {2 Serialization (encoding)} *)
|
|
|
|
let _must_escape s =
|
|
try
|
|
for i = 0 to String.length s - 1 do
|
|
let c = String.unsafe_get s i in
|
|
match c with
|
|
| ' ' | ')' | '(' | '\n' | '\t' -> raise Exit
|
|
| _ -> ()
|
|
done;
|
|
false
|
|
with Exit -> true
|
|
|
|
let rec to_buf b t = match t with
|
|
| Atom s when _must_escape s -> Printf.bprintf b "\"%s\"" (String.escaped s)
|
|
| Atom s -> Buffer.add_string b s
|
|
| List [] -> Buffer.add_string b "()"
|
|
| List [x] -> Printf.bprintf b "(%a)" to_buf x
|
|
| List l ->
|
|
Buffer.add_char b '(';
|
|
List.iteri
|
|
(fun i t' -> (if i > 0 then Buffer.add_char b ' '; to_buf b t'))
|
|
l;
|
|
Buffer.add_char b ')'
|
|
|
|
let to_string t =
|
|
let b = Buffer.create 128 in
|
|
to_buf b t;
|
|
Buffer.contents b
|
|
|
|
let rec print fmt t = match t with
|
|
| Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s)
|
|
| Atom s -> Format.pp_print_string fmt s
|
|
| List [] -> Format.pp_print_string fmt "()"
|
|
| List [x] -> Format.fprintf fmt "(%a)" print x
|
|
| List l ->
|
|
Format.pp_print_char fmt '(';
|
|
List.iteri
|
|
(fun i t' -> (if i > 0 then Format.fprintf fmt "@ "; print fmt t'))
|
|
l;
|
|
Format.pp_print_char fmt ')'
|
|
|
|
(** {2 Deserialization (decoding)} *)
|
|
|
|
type 'a parse_result = ['a or_error | `End ]
|
|
type 'a partial_result = [ 'a parse_result | `Await ]
|
|
|
|
module Streaming = struct
|
|
type token =
|
|
| Open
|
|
| Close
|
|
| Atom of string
|
|
|
|
type decode_state =
|
|
| St_start
|
|
| St_atom
|
|
| St_quoted
|
|
| St_escaped
|
|
| St_yield of token
|
|
| St_error of string
|
|
| St_end
|
|
|
|
type decoder = {
|
|
mutable st : decode_state;
|
|
mutable i : int;
|
|
mutable line : int;
|
|
mutable col : int;
|
|
mutable stop : bool;
|
|
buf : Buffer.t;
|
|
atom : Buffer.t; (* atom being parsed *)
|
|
}
|
|
|
|
let mk_decoder () = {
|
|
i = 0;
|
|
st = St_start;
|
|
line = 0;
|
|
col = 0;
|
|
stop = false;
|
|
buf=Buffer.create 32;
|
|
atom = Buffer.create 32;
|
|
}
|
|
|
|
exception NeedMoar
|
|
exception Error of string
|
|
exception EOI
|
|
|
|
(* yield [x] with current state [st] *)
|
|
let _yield d st x =
|
|
d.st <- st;
|
|
x
|
|
|
|
(* read the next char *)
|
|
let _next_char d =
|
|
if d.i = Buffer.length d.buf
|
|
then (
|
|
(* need more input; reset buffer to put it in *)
|
|
Buffer.clear d.buf;
|
|
d.i <- 0;
|
|
raise NeedMoar
|
|
) else (
|
|
let c = Buffer.nth d.buf d.i in
|
|
d.i <- d.i + 1;
|
|
c
|
|
)
|
|
|
|
let _take_buffer b =
|
|
let s = Buffer.contents b in
|
|
Buffer.clear b;
|
|
s
|
|
|
|
let _newline d =
|
|
d.line <- d.line + 1;
|
|
d.col <- 0;
|
|
()
|
|
|
|
(* raise an error *)
|
|
let _error d msg =
|
|
let msg' = Printf.sprintf "at %d,%d: %s" d.line d.col msg in
|
|
d.st <- St_error msg';
|
|
raise (Error msg')
|
|
|
|
(* next token *)
|
|
let rec _next d st = match st with
|
|
| St_error msg -> raise (Error msg)
|
|
| St_end -> raise EOI
|
|
| St_yield x ->
|
|
(* yield the given token, then start a fresh one *)
|
|
_yield d St_start x
|
|
| St_start ->
|
|
(* start reading next token *)
|
|
let c = _next_char d in
|
|
begin match c with
|
|
| '\n' -> _newline d; _next d St_start
|
|
| ' ' | '\t' -> _next d St_start
|
|
| '(' -> _yield d St_start Open
|
|
| ')' -> _yield d St_start Close
|
|
| '"' -> _next d St_quoted
|
|
| _ -> (* read regular atom *)
|
|
Buffer.add_char d.atom c;
|
|
_next d St_atom
|
|
end
|
|
| St_atom when d.stop ->
|
|
let a = _take_buffer d.atom in
|
|
_yield d St_end (Atom a)
|
|
| St_atom ->
|
|
(* reading an unquoted atom *)
|
|
let c = _next_char d in
|
|
begin match c with
|
|
| ' ' | '\t' | '\n' ->
|
|
let a = _take_buffer d.atom in
|
|
_yield d St_start (Atom a)
|
|
| ')' ->
|
|
let a = _take_buffer d.atom in
|
|
_yield d (St_yield Close) (Atom a)
|
|
| '(' ->
|
|
let a = _take_buffer d.atom in
|
|
_yield d (St_yield Open) (Atom a)
|
|
| '\\' -> _error d "unexpected char"
|
|
| _ ->
|
|
Buffer.add_char d.atom c;
|
|
_next d St_atom
|
|
end
|
|
| St_quoted when d.stop ->
|
|
let a = _take_buffer d.atom in
|
|
_yield d St_end (Atom a)
|
|
| St_quoted ->
|
|
(* reading an unquoted atom *)
|
|
let c = _next_char d in
|
|
begin match c with
|
|
| '\\' -> _next d St_escaped
|
|
| '"' ->
|
|
let a = _take_buffer d.atom in
|
|
_yield d St_start (Atom a)
|
|
| _ ->
|
|
Buffer.add_char d.atom c;
|
|
_next d St_atom
|
|
end
|
|
| St_escaped ->
|
|
if d.stop
|
|
then _error d "unexpected end of input (escaping)";
|
|
let c = _next_char d in
|
|
Buffer.add_char d.atom
|
|
(match c with
|
|
| 'n' -> '\n'
|
|
| 't' -> '\t'
|
|
| 'r' -> '\r'
|
|
| '\\' -> '\\'
|
|
| _ -> _error d "unexpected escaped character"
|
|
);
|
|
_next d St_quoted
|
|
|
|
let feed d s i len =
|
|
if d.stop then failwith "Sexp.Streaming.feed: end of input reached";
|
|
Buffer.add_substring d.buf s i len
|
|
|
|
let reached_end d =
|
|
d.stop <- true
|
|
|
|
let next_exn d = _next d d.st
|
|
|
|
let next d =
|
|
try
|
|
`Ok (_next d d.st)
|
|
with
|
|
| NeedMoar -> `Await
|
|
| Error msg -> `Error msg
|
|
| EOI -> `End
|
|
end
|
|
|
|
module ParseGen = struct
|
|
type 'a t = unit -> 'a parse_result
|
|
|
|
let to_list g : 'a list or_error =
|
|
let rec aux acc = match g() with
|
|
| `Error e -> `Error e
|
|
| `Ok x -> aux (x::acc)
|
|
| `End -> `Ok (List.rev acc)
|
|
in
|
|
aux []
|
|
|
|
let head g = match g() with
|
|
| `End -> `Error "expected at least one element"
|
|
| #or_error as x -> x
|
|
|
|
let head_exn g = match g() with
|
|
| `Ok x -> x
|
|
| `Error msg -> failwith msg
|
|
| `End -> failwith "expected at least one element"
|
|
|
|
let take n g =
|
|
assert (n>=0);
|
|
let n = ref n in
|
|
fun () ->
|
|
if !n = 0 then `End
|
|
else (
|
|
decr n;
|
|
g()
|
|
)
|
|
end
|
|
|
|
(* hidden parser state *)
|
|
type parser_state = {
|
|
ps_d : Streaming.decoder;
|
|
mutable ps_stack : t list list;
|
|
}
|
|
|
|
let mk_ps () = {
|
|
ps_d = Streaming.mk_decoder ();
|
|
ps_stack = [];
|
|
}
|
|
|
|
let _error ps msg =
|
|
let msg' = Printf.sprintf "at %d,%d: %s"
|
|
ps.ps_d.Streaming.line ps.ps_d.Streaming.col msg in
|
|
`Error msg'
|
|
|
|
(* next token, or await *)
|
|
let rec _next ps : t partial_result = match Streaming.next ps.ps_d with
|
|
| `Ok (Streaming.Atom s) ->
|
|
_push ps (Atom s)
|
|
| `Ok Streaming.Open ->
|
|
ps.ps_stack <- [] :: ps.ps_stack;
|
|
_next ps
|
|
| `Ok Streaming.Close ->
|
|
begin match ps.ps_stack with
|
|
| [] -> _error ps "unbalanced ')'"
|
|
| l :: stack ->
|
|
ps.ps_stack <- stack;
|
|
_push ps (List (List.rev l))
|
|
end
|
|
| `Error msg -> `Error msg
|
|
| `Await -> `Await
|
|
| `End -> `End
|
|
|
|
(* push a S-expr on top of the parser stack *)
|
|
and _push ps e = match ps.ps_stack with
|
|
| [] ->
|
|
`Ok e
|
|
| l :: tl ->
|
|
ps.ps_stack <- (e :: l) :: tl;
|
|
_next ps
|
|
|
|
let parse_gen g : t ParseGen.t =
|
|
let ps = mk_ps() in
|
|
let rec next () = match _next ps with
|
|
| `Await ->
|
|
begin match g() with
|
|
| None -> Streaming.reached_end ps.ps_d
|
|
| Some s -> Streaming.feed ps.ps_d s 0 (String.length s)
|
|
end;
|
|
next()
|
|
| `Ok x -> `Ok x
|
|
| `Error e -> `Error e
|
|
| `End -> `End
|
|
in
|
|
next
|
|
|
|
(* singleton generator *)
|
|
let _gen1 x =
|
|
let first = ref true in
|
|
fun () ->
|
|
if !first then (first:=false; Some x) else None
|
|
|
|
let parse_string s =
|
|
parse_gen (_gen1 s)
|
|
|
|
let parse_chan ic =
|
|
let buf = Buffer.create 512 in
|
|
let gen () =
|
|
Buffer.clear buf;
|
|
Buffer.add_channel buf ic 512;
|
|
if Buffer.length buf = 0
|
|
then None
|
|
else Some (Buffer.contents buf)
|
|
in
|
|
parse_gen gen
|
|
|
|
(** {6 Blocking} *)
|
|
|
|
let parse1_chan ic =
|
|
ParseGen.head (parse_chan ic)
|
|
|
|
let parse1_string s =
|
|
ParseGen.head (parse_string s)
|
|
|
|
let parse_l_chan ic =
|
|
ParseGen.to_list (parse_chan ic)
|
|
|
|
let parse_l_string s =
|
|
ParseGen.to_list (parse_string s)
|
|
|
|
let parse_l_gen g =
|
|
ParseGen.to_list (parse_gen g)
|
|
|
|
let parse_l_seq seq =
|
|
let ps = mk_ps() in
|
|
let l = ref [] in
|
|
(* read as many expressions as possible *)
|
|
let rec _nexts () = match _next ps with
|
|
| `Ok x -> l := x :: !l; _nexts ()
|
|
| `Error e -> raise (Streaming.Error e)
|
|
| `End -> raise Streaming.EOI
|
|
| `Await -> ()
|
|
in
|
|
try
|
|
seq
|
|
(fun s -> Streaming.feed ps.ps_d s 0 (String.length s); _nexts ());
|
|
Streaming.reached_end ps.ps_d;
|
|
_nexts ();
|
|
`Ok (List.rev !l)
|
|
with
|
|
| Streaming.Error msg -> `Error msg
|
|
| Streaming.EOI -> `Ok (List.rev !l)
|