diff --git a/.ocamlinit b/.ocamlinit index aeeaafee..2d7217dd 100644 --- a/.ocamlinit +++ b/.ocamlinit @@ -1,10 +1,16 @@ #use "topfind";; #thread -#directory "_build/core";; -#directory "_build/misc";; -#directory "_build/pervasives/";; -#directory "_build/string";; -#directory "_build/threads";; +#require "bigarray";; +#directory "_build/src/core";; +#directory "_build/src/misc";; +#directory "_build/src/pervasives/";; +#directory "_build/src/string";; +#directory "_build/src/io";; +#directory "_build/src/iter";; +#directory "_build/src/data";; +#directory "_build/src/sexp";; +#directory "_build/src/bigarray/";; +#directory "_build/src/threads";; #directory "_build/tests/";; #load "containers.cma";; #load "containers_iter.cma";; @@ -13,14 +19,11 @@ #load "containers_sexp.cma";; #load "containers_string.cma";; #load "containers_pervasives.cma";; +#load "containers_bigarray.cma";; #load "containers_misc.cma";; #thread;; #load "containers_thread.cma";; open Containers_misc;; #install_printer CCSexp.print;; -#require "CamlGI";; -#load "containers_cgi.cma";; -let pp_html fmt h = Format.pp_print_string fmt (ToWeb.HTML.render h);; -#install_printer pp_html;; (* vim:syntax=ocaml: *) diff --git a/_oasis b/_oasis index 5a5f3719..b6123d99 100644 --- a/_oasis +++ b/_oasis @@ -57,7 +57,7 @@ Library "containers_io" Library "containers_sexp" Path: src/sexp - Modules: CCSexp + Modules: CCSexp, CCSexpStream, CCSexpM BuildDepends: bytes FindlibParent: containers FindlibName: sexp @@ -236,6 +236,14 @@ Executable id_sexp Build$: flag(misc) BuildDepends: containers.sexp +Executable id_sexp2 + Path: examples/ + Install: false + CompiledObject: native + MainIs: id_sexp2.ml + Build$: flag(misc) + BuildDepends: containers.sexp + SourceRepository head Type: git Location: https://github.com/c-cube/ocaml-containers diff --git a/examples/id_sexp.ml b/examples/id_sexp.ml index a5d73e9b..1adf3080 100644 --- a/examples/id_sexp.ml +++ b/examples/id_sexp.ml @@ -3,11 +3,11 @@ let () = if Array.length Sys.argv <> 2 then failwith "usage: id_sexp file"; let f = Sys.argv.(1) in - let s = CCSexp.L.of_file f in + let s = CCSexpStream.L.of_file f in match s with | `Ok l -> List.iter - (fun s -> Format.printf "@[%a@]@." CCSexp.print s) + (fun s -> Format.printf "@[%a@]@." CCSexpStream.print s) l | `Error msg -> Format.printf "error: %s@." msg diff --git a/examples/id_sexp2.ml b/examples/id_sexp2.ml new file mode 100644 index 00000000..90e63c27 --- /dev/null +++ b/examples/id_sexp2.ml @@ -0,0 +1,13 @@ + + +let () = + if Array.length Sys.argv <> 2 then failwith "usage: id_sexp file"; + let f = Sys.argv.(1) in + let s = CCSexpM.parse_file_list f in + match s with + | `Ok l -> + List.iter + (fun s -> Format.printf "@[%a@]@." CCSexpM.print s) + l + | `Error msg -> + Format.printf "error: %s@." msg diff --git a/src/sexp/CCSexpM.ml b/src/sexp/CCSexpM.ml new file mode 100644 index 00000000..a6234a5f --- /dev/null +++ b/src/sexp/CCSexpM.ml @@ -0,0 +1,362 @@ +(* +Copyright (c) 2013, Simon Cruanes +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. Redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Simple S-expression parsing/printing} *) + +type 'a or_error = [ `Ok of 'a | `Error of string ] +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option + +type t = [ + | `Atom of string + | `List of t list + ] +type sexp = t + +let _with_in filename f = + let ic = open_in filename in + try + let x = f ic in + close_in ic; + x + with e -> + close_in ic; + `Error (Printexc.to_string e) + +let _with_out filename f = + let oc = open_out filename in + try + let x = f oc in + close_out oc; + x + with e -> + close_out oc; + raise e + +(** {2 Serialization (encoding)} *) + +(* shall we escape the string because of one of its chars? *) +let _must_escape s = + try + for i = 0 to String.length s - 1 do + let c = String.unsafe_get s i in + match c with + | ' ' | ';' | ')' | '(' | '"' | '\n' | '\t' -> raise Exit + | _ when Char.code c > 127 -> raise Exit (* non-ascii *) + | _ -> () + done; + false + with Exit -> true + +let rec to_buf b t = match t with + | `Atom s when _must_escape s -> Printf.bprintf b "\"%s\"" (String.escaped s) + | `Atom s -> Buffer.add_string b s + | `List [] -> Buffer.add_string b "()" + | `List [x] -> Printf.bprintf b "(%a)" to_buf x + | `List l -> + Buffer.add_char b '('; + List.iteri + (fun i t' -> (if i > 0 then Buffer.add_char b ' '; to_buf b t')) + l; + Buffer.add_char b ')' + +let to_string t = + let b = Buffer.create 128 in + to_buf b t; + Buffer.contents b + +let rec print fmt t = match t with + | `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s) + | `Atom s -> Format.pp_print_string fmt s + | `List [] -> Format.pp_print_string fmt "()" + | `List [x] -> Format.fprintf fmt "@[(%a)@]" print x + | `List l -> + Format.open_hovbox 2; + Format.pp_print_char fmt '('; + List.iteri + (fun i t' -> (if i > 0 then Format.fprintf fmt "@ "; print fmt t')) + l; + Format.pp_print_char fmt ')'; + Format.close_box () + +let rec print_noindent fmt t = match t with + | `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s) + | `Atom s -> Format.pp_print_string fmt s + | `List [] -> Format.pp_print_string fmt "()" + | `List [x] -> Format.fprintf fmt "(%a)" print_noindent x + | `List l -> + Format.pp_print_char fmt '('; + List.iteri + (fun i t' -> (if i > 0 then Format.pp_print_char fmt ' '; print_noindent fmt t')) + l; + Format.pp_print_char fmt ')' + +let to_chan oc t = + let fmt = Format.formatter_of_out_channel oc in + print fmt t; + Format.pp_print_flush fmt () + +let to_file_seq filename seq = + _with_out filename + (fun oc -> + seq (fun t -> to_chan oc t; output_char oc '\n') + ) + +let to_file filename t = to_file_seq filename (fun k -> k t) + +(** {2 Deserialization (decoding)} *) + +module type MONAD = sig + type 'a t + val return : 'a -> 'a t + val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +end + +type 'a parse_result = ['a or_error | `End ] + +module MakeDecode(M : MONAD) = struct + let (>>=) = M.(>>=) + + type t = { + buf : Bytes.t; + refill : Bytes.t -> int -> int -> int M.t; + atom : Buffer.t; + mutable i : int; (* offset in [buf] *) + mutable len : int; (* how many bytes of [buf] are usable *) + mutable line : int; + mutable col : int; + } + + let make ?(bufsize=1024) refill = + let bufsize = min (max bufsize 16) Sys.max_string_length in + { buf=Bytes.create bufsize; + refill; + atom = Buffer.create 32; + i=0; + len=0; + line=1; + col=1; + } + + let _is_digit c = Char.code '0' <= Char.code c && Char.code c <= Char.code '9' + let _digit2i c = Char.code c - Char.code '0' + + (* refill buffer. If it works, call k_succ, otherwise call k_fail *) + let _refill t k_succ k_fail = + t.refill t.buf 0 (Bytes.length t.buf) + >>= fun n -> + t.i <- 0; + t.len <- n; + if n = 0 then k_fail t else k_succ t + + (* get next char, assuming t.i < t.len *) + let _get t = + assert (t.i < t.len); + let c = Bytes.get t.buf t.i in + t.i <- t.i + 1; + if c = '\n' then (t.col <- 1; t.line <- t.line + 1) else t.col <- t.col + 1; + c + + (* return an error *) + let _error t msg = + let b = Buffer.create 32 in + Printf.bprintf b "at %d, %d: " t.line t.col; + Printf.kbprintf + (fun b -> + let msg' = Buffer.contents b in + M.return (`Error msg') + ) b msg + + let _error_eof t = _error t "unexpected end of input" + + (* The parsers all take a success continuation, and the decoder as + last arguments. The continuation is used to minimize the + number of calls to [>>=] and take two parameters, the next + char (if not consumed), and the returned expression itself *) + + (* read expression *) + let rec expr k t = + if t.i = t.len then _refill t (expr k) _error_eof + else match _get t with + | ' ' | '\t' | '\n' -> expr k t + | c -> expr_starting_with c k t + + and expr_starting_with c k t = match c with + | ' ' | '\t' | '\n' -> assert false + | ';' -> skip_comment (fun _ () -> expr k t) t + | '(' -> expr_list [] k t + | ')' -> _error t "unexpected ')'" + | '\\' -> _error t "unexpected '\\'" + | '"' -> quoted k t + | c -> + Buffer.add_char t.atom c; + atom k t + + (* parse list *) + and expr_list acc k t = + if t.i = t.len then _refill t (expr_list acc k) _error_eof + else match _get t with + | ' ' | '\t' | '\n' -> expr_list acc k t + | ')' -> k None (`List (List.rev acc)) + | c -> + expr_starting_with c + (fun last e -> match last with + | Some '(' -> expr_list [] (fun _ l -> expr_list (l::acc) k t) t + | Some ')' -> k None (`List (List.rev (e::acc))) + | _ -> expr_list (e::acc) k t + ) t + + (* return the current atom (last char: c) *) + and _return_atom last k t = + let s = Buffer.contents t.atom in + Buffer.clear t.atom; + k last (`Atom s) + + (* parse atom *) + and atom k t = + if t.i = t.len then _refill t (atom k) (_return_atom None k) + else match _get t with + | '\\' -> _error t "unexpected '\\' in non-quoted string" + | '"' -> _error t "unexpected '\"' in the middle of an atom" + | (' ' | '\n' | '\t' | '(' | ')') as c -> + _return_atom (Some c) k t + | c -> + Buffer.add_char t.atom c; + atom k t + + (* quoted string *) + and quoted k t = + if t.i = t.len then _refill t (quoted k) _error_eof + else match _get t with + | '\\' -> + (* read escaped char and continue *) + escaped + (fun c -> + Buffer.add_char t.atom c; + quoted k t + ) t + | '"' -> _return_atom None k t + | c -> + Buffer.add_char t.atom c; + quoted k t + + (* read escaped char *) + and escaped k t = + if t.i = t.len then _refill t (escaped k) _error_eof + else match _get t with + | 'n' -> k '\n' + | 't' -> k '\t' + | 'r' -> k '\r' + | 'b' -> k '\b' + | '\\' -> k '\\' + | '"' -> k '"' + | c when _is_digit c -> + read2int (_digit2i c) (fun n -> k (Char.chr n)) t + | c -> _error t "unexpected escaped char '%c'" c + + and read2int i k t = + if t.i = t.len then _refill t (read2int i k) _error_eof + else match _get t with + | c when _is_digit c -> read1int (10 * i + _digit2i c) k t + | c -> _error t "unexpected char '%c' when reading byte" c + + and read1int i k t = + if t.i = t.len then _refill t (read1int i k) _error_eof + else match _get t with + | c when _is_digit c -> k (10 * i + _digit2i c) + | c -> _error t "unexpected char '%c' when reading byte" c + + (* skip until end of line, then call next() *) + and skip_comment k t = + if t.i = t.len + then _refill t (skip_comment k) _error_eof + else match _get t with + | '\n' -> k None () + | _ -> skip_comment k t + + (* top-level expression *) + let rec expr_or_end k t = + if t.i = t.len + then _refill t (expr_or_end k) (fun _ -> M.return `End) + else match _get t with + | ' ' | '\t' | '\n' -> expr_or_end k t + | c -> expr_starting_with c k t + + (* entry point *) + let next t : sexp parse_result M.t = + expr_or_end (fun _ x -> M.return (`Ok x)) t +end + +module D = MakeDecode(struct + type 'a t = 'a + let return x = x + let (>>=) x f = f x +end) + +let parse_string s : t or_error = + let n = String.length s in + let stop = ref false in + let refill bytes i _len = + if !stop then 0 + else (stop := true; Bytes.blit_string s 0 bytes i n; n) + in + let d = D.make ~bufsize:n refill in + match D.next d with + | `End -> `Error "unexpected end of file" + | (`Ok _ | `Error _) as res -> res + +(*$T + CCError.to_opt (parse_string "(abc d/e/f \"hello \\\" () world\" )") <> None + CCError.to_opt (parse_string "(abc ( d e ffff ) \"hello/world\")") <> None +*) + +let parse_chan ?bufsize ic = + let d = D.make ?bufsize (input ic) in + match D.next d with + | `End -> `Error "unexpected end of file" + | (`Ok _ | `Error _) as res -> res + +let parse_chan_gen ?bufsize ic = + let d = D.make ?bufsize (input ic) in + fun () -> + match D.next d with + | `End -> None + | `Error _ as e -> Some e + | `Ok _ as res -> Some res + +let parse_chan_list ?bufsize ic = + let d = D.make ?bufsize (input ic) in + let rec iter acc = match D.next d with + | `End -> `Ok (List.rev acc) + | `Ok x -> iter (x::acc) + | `Error _ as e -> e + in + iter [] + +let parse_file filename = + _with_in filename (fun ic -> parse_chan ic) + +let parse_file_list filename = + _with_in filename (fun ic -> parse_chan_list ic) diff --git a/src/sexp/CCSexpM.mli b/src/sexp/CCSexpM.mli new file mode 100644 index 00000000..239792c1 --- /dev/null +++ b/src/sexp/CCSexpM.mli @@ -0,0 +1,106 @@ +(* +Copyright (c) 2013, Simon Cruanes +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. Redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Simple and efficient S-expression parsing/printing} + +@since NEXT_RELEASE *) + +type 'a or_error = [ `Ok of 'a | `Error of string ] +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option + +(** {2 Basics} *) + +type t = [ + | `Atom of string + | `List of t list + ] +type sexp = t + +(** {2 Serialization (encoding)} *) + +val to_buf : Buffer.t -> t -> unit + +val to_string : t -> string + +val to_file : string -> t -> unit + +val to_file_seq : string -> t sequence -> unit +(** Print the given sequence of expressions to a file *) + +val to_chan : out_channel -> t -> unit + +val print : Format.formatter -> t -> unit +(** Pretty-printer nice on human eyes (including indentation) *) + +val print_noindent : Format.formatter -> t -> unit +(** Raw, direct printing as compact as possible *) + +(** {2 Deserialization (decoding)} *) + +module type MONAD = sig + type 'a t + val return : 'a -> 'a t + val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +end + +type 'a parse_result = ['a or_error | `End ] +(** A parser of ['a] can return [`Ok x] when it parsed a value, + or [`Error e] when a parse error was encountered, or + [`End] if the input was empty *) + +module MakeDecode(M : MONAD) : sig + type t + (** Decoder *) + + val make : ?bufsize:int -> (Bytes.t -> int -> int -> int M.t) -> t + (** Make a decoder with the given function used to refill an + internal buffer. The function might return [0] if the + input is exhausted. + @param bufsize size of internal buffer *) + + val next : t -> sexp parse_result M.t + (** Parse the next S-expression or return an error if the input isn't + long enough or isn't a proper S-expression *) +end + +val parse_string : string -> t or_error +(** Parse a string *) + +val parse_chan : ?bufsize:int -> in_channel -> t or_error +(** Parse a S-expression from the given channel. Can read more data than + necessary, so don't use this if you need finer-grained control (e.g. + to read something else {b after} the S-exp) *) + +val parse_chan_gen : ?bufsize:int -> in_channel -> t or_error gen +(** Parse a channel into a generator of S-expressions *) + +val parse_chan_list : ?bufsize:int -> in_channel -> t list or_error + +val parse_file : string -> t or_error +(** Open the file and read a S-exp from it *) + +val parse_file_list : string -> t list or_error +(** Open the file and read a S-exp from it *)