(* Copyright (c) 2013, Simon Cruanes All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *) (** {1 Simple S-expression parsing/printing} *) type 'a or_error = [ `Ok of 'a | `Error of string ] type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option type t = | Atom of string | List of t list let eq a b = a = b let compare a b = Pervasives.compare a b let hash a = Hashtbl.hash a (** {2 Serialization (encoding)} *) let _must_escape s = try for i = 0 to String.length s - 1 do let c = String.unsafe_get s i in match c with | ' ' | ')' | '(' | '\n' | '\t' -> raise Exit | _ -> () done; false with Exit -> true let rec to_buf b t = match t with | Atom s when _must_escape s -> Printf.bprintf b "\"%s\"" (String.escaped s) | Atom s -> Buffer.add_string b s | List [] -> Buffer.add_string b "()" | List [x] -> Printf.bprintf b "(%a)" to_buf x | List l -> Buffer.add_char b '('; List.iteri (fun i t' -> (if i > 0 then Buffer.add_char b ' '; to_buf b t')) l; Buffer.add_char b ')' let to_string t = let b = Buffer.create 128 in to_buf b t; Buffer.contents b let rec print fmt t = match t with | Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s) | Atom s -> Format.pp_print_string fmt s | List [] -> Format.pp_print_string fmt "()" | List [x] -> Format.fprintf fmt "(%a)" print x | List l -> Format.pp_print_char fmt '('; List.iteri (fun i t' -> (if i > 0 then Format.fprintf fmt "@ "; print fmt t')) l; Format.pp_print_char fmt ')' (** {2 Deserialization (decoding)} *) type 'a parse_result = ['a or_error | `End ] type 'a partial_result = [ 'a parse_result | `Await ] module Streaming = struct type token = | Open | Close | Atom of string type decode_state = | St_start | St_atom | St_quoted | St_escaped | St_yield of token | St_error of string | St_end type decoder = { mutable st : decode_state; mutable i : int; mutable line : int; mutable col : int; mutable stop : bool; buf : Buffer.t; atom : Buffer.t; (* atom being parsed *) } let mk_decoder () = { i = 0; st = St_start; line = 0; col = 0; stop = false; buf=Buffer.create 32; atom = Buffer.create 32; } exception NeedMoar exception Error of string exception EOI (* yield [x] with current state [st] *) let _yield d st x = d.st <- st; x (* read the next char *) let _next_char d = if d.i = Buffer.length d.buf then ( (* need more input; reset buffer to put it in *) Buffer.clear d.buf; d.i <- 0; raise NeedMoar ) else ( let c = Buffer.nth d.buf d.i in d.i <- d.i + 1; c ) let _take_buffer b = let s = Buffer.contents b in Buffer.clear b; s let _newline d = d.line <- d.line + 1; d.col <- 0; () (* raise an error *) let _error d msg = let msg' = Printf.sprintf "at %d,%d: %s" d.line d.col msg in d.st <- St_error msg'; raise (Error msg') (* next token *) let rec _next d st = match st with | St_error msg -> raise (Error msg) | St_end -> raise EOI | St_yield x -> (* yield the given token, then start a fresh one *) _yield d St_start x | St_start -> (* start reading next token *) let c = _next_char d in begin match c with | '\n' -> _newline d; _next d St_start | ' ' | '\t' -> _next d St_start | '(' -> _yield d St_start Open | ')' -> _yield d St_start Close | '"' -> _next d St_quoted | _ -> (* read regular atom *) Buffer.add_char d.atom c; _next d St_atom end | St_atom when d.stop -> let a = _take_buffer d.atom in _yield d St_end (Atom a) | St_atom -> (* reading an unquoted atom *) let c = _next_char d in begin match c with | ' ' | '\t' | '\n' -> let a = _take_buffer d.atom in _yield d St_start (Atom a) | ')' -> let a = _take_buffer d.atom in _yield d (St_yield Close) (Atom a) | '(' -> let a = _take_buffer d.atom in _yield d (St_yield Open) (Atom a) | '\\' -> _error d "unexpected char" | _ -> Buffer.add_char d.atom c; _next d St_atom end | St_quoted when d.stop -> let a = _take_buffer d.atom in _yield d St_end (Atom a) | St_quoted -> (* reading an unquoted atom *) let c = _next_char d in begin match c with | '\\' -> _next d St_escaped | '"' -> let a = _take_buffer d.atom in _yield d St_start (Atom a) | _ -> Buffer.add_char d.atom c; _next d St_atom end | St_escaped -> if d.stop then _error d "unexpected end of input (escaping)"; let c = _next_char d in Buffer.add_char d.atom (match c with | 'n' -> '\n' | 't' -> '\t' | 'r' -> '\r' | '\\' -> '\\' | _ -> _error d "unexpected escaped character" ); _next d St_quoted let feed d s i len = if d.stop then failwith "Sexp.Streaming.feed: end of input reached"; Buffer.add_substring d.buf s i len let reached_end d = d.stop <- true let next_exn d = _next d d.st let next d = try `Ok (_next d d.st) with | NeedMoar -> `Await | Error msg -> `Error msg | EOI -> `End end module ParseGen = struct type 'a t = unit -> 'a parse_result let to_list g : 'a list or_error = let rec aux acc = match g() with | `Error e -> `Error e | `Ok x -> aux (x::acc) | `End -> `Ok (List.rev acc) in aux [] let head g = match g() with | `End -> `Error "expected at least one element" | #or_error as x -> x let head_exn g = match g() with | `Ok x -> x | `Error msg -> failwith msg | `End -> failwith "expected at least one element" let take n g = assert (n>=0); let n = ref n in fun () -> if !n = 0 then `End else ( decr n; g() ) end (* hidden parser state *) type parser_state = { ps_d : Streaming.decoder; mutable ps_stack : t list list; } let mk_ps () = { ps_d = Streaming.mk_decoder (); ps_stack = []; } let _error ps msg = let msg' = Printf.sprintf "at %d,%d: %s" ps.ps_d.Streaming.line ps.ps_d.Streaming.col msg in `Error msg' (* next token, or await *) let rec _next ps : t partial_result = match Streaming.next ps.ps_d with | `Ok (Streaming.Atom s) -> _push ps (Atom s) | `Ok Streaming.Open -> ps.ps_stack <- [] :: ps.ps_stack; _next ps | `Ok Streaming.Close -> begin match ps.ps_stack with | [] -> _error ps "unbalanced ')'" | l :: stack -> ps.ps_stack <- stack; _push ps (List (List.rev l)) end | `Error msg -> `Error msg | `Await -> `Await | `End -> `End (* push a S-expr on top of the parser stack *) and _push ps e = match ps.ps_stack with | [] -> `Ok e | l :: tl -> ps.ps_stack <- (e :: l) :: tl; _next ps let parse_gen g : t ParseGen.t = let ps = mk_ps() in let rec next () = match _next ps with | `Await -> begin match g() with | None -> Streaming.reached_end ps.ps_d | Some s -> Streaming.feed ps.ps_d s 0 (String.length s) end; next() | `Ok x -> `Ok x | `Error e -> `Error e | `End -> `End in next (* singleton generator *) let _gen1 x = let first = ref true in fun () -> if !first then (first:=false; Some x) else None let parse_string s = parse_gen (_gen1 s) let parse_chan ic = let buf = Buffer.create 512 in let gen () = Buffer.clear buf; Buffer.add_channel buf ic 512; if Buffer.length buf = 0 then None else Some (Buffer.contents buf) in parse_gen gen (** {6 Blocking} *) let parse1_chan ic = ParseGen.head (parse_chan ic) let parse1_string s = ParseGen.head (parse_string s) let parse_l_chan ic = ParseGen.to_list (parse_chan ic) let parse_l_string s = ParseGen.to_list (parse_string s) let parse_l_gen g = ParseGen.to_list (parse_gen g) let parse_l_seq seq = let ps = mk_ps() in let l = ref [] in (* read as many expressions as possible *) let rec _nexts () = match _next ps with | `Ok x -> l := x :: !l; _nexts () | `Error e -> raise (Streaming.Error e) | `End -> raise Streaming.EOI | `Await -> () in try seq (fun s -> Streaming.feed ps.ps_d s 0 (String.length s); _nexts ()); Streaming.reached_end ps.ps_d; _nexts (); `Ok (List.rev !l) with | Streaming.Error msg -> `Error msg | Streaming.EOI -> `Ok (List.rev !l)