mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 03:05:28 -05:00
new interface for Sexp.Streaming, allowing to provide a 'source' (generator of tokens)
This commit is contained in:
parent
dcf134b1eb
commit
33d33ec6e4
2 changed files with 291 additions and 196 deletions
419
misc/sexp.ml
419
misc/sexp.ml
|
|
@ -118,7 +118,96 @@ let to_file filename t = seq_to_file filename (fun k -> k t)
|
||||||
type 'a parse_result = ['a or_error | `End ]
|
type 'a parse_result = ['a or_error | `End ]
|
||||||
type 'a partial_result = [ 'a parse_result | `Await ]
|
type 'a partial_result = [ 'a parse_result | `Await ]
|
||||||
|
|
||||||
module Streaming = struct
|
module Source = struct
|
||||||
|
type individual_char =
|
||||||
|
| NC_yield of char
|
||||||
|
| NC_end
|
||||||
|
| NC_await
|
||||||
|
|
||||||
|
type t = unit -> individual_char
|
||||||
|
type source = t
|
||||||
|
|
||||||
|
module Manual = struct
|
||||||
|
type t = {
|
||||||
|
mutable i : int; (* offset *)
|
||||||
|
mutable stop : bool;
|
||||||
|
buf : Buffer.t; (* accessible chunk of input *)
|
||||||
|
}
|
||||||
|
|
||||||
|
let make() = {
|
||||||
|
i = 0;
|
||||||
|
stop = false;
|
||||||
|
buf=Buffer.create 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
let to_src d () =
|
||||||
|
if d.i = Buffer.length d.buf
|
||||||
|
then
|
||||||
|
if d.stop then NC_end else NC_await
|
||||||
|
else (
|
||||||
|
let c = Buffer.nth d.buf d.i in
|
||||||
|
d.i <- d.i + 1;
|
||||||
|
NC_yield c
|
||||||
|
)
|
||||||
|
|
||||||
|
let feed d s i len =
|
||||||
|
if d.stop then failwith "Sexp.Streaming.Manual.feed: reached EOI";
|
||||||
|
Buffer.add_substring d.buf s i len
|
||||||
|
|
||||||
|
let reached_end d = d.stop <- true
|
||||||
|
end
|
||||||
|
|
||||||
|
let of_string s =
|
||||||
|
let i = ref 0 in
|
||||||
|
fun () ->
|
||||||
|
if !i=String.length s
|
||||||
|
then NC_end
|
||||||
|
else (
|
||||||
|
let c = String.get s !i in
|
||||||
|
incr i;
|
||||||
|
NC_yield c
|
||||||
|
)
|
||||||
|
|
||||||
|
let of_chan ?(bufsize=1024) ic =
|
||||||
|
let buf = String.make bufsize ' ' in
|
||||||
|
let i = ref 0 in
|
||||||
|
let n = ref 0 in
|
||||||
|
let stop = ref false in
|
||||||
|
let rec next() =
|
||||||
|
if !stop then NC_end
|
||||||
|
else if !i = !n
|
||||||
|
then ( (* refill *)
|
||||||
|
i := 0;
|
||||||
|
n := input ic buf 0 bufsize;
|
||||||
|
if !n = 0 then (stop := true; NC_end) else next()
|
||||||
|
) else ( (* yield *)
|
||||||
|
let c = String.get buf !i in
|
||||||
|
incr i;
|
||||||
|
NC_yield c
|
||||||
|
)
|
||||||
|
in next
|
||||||
|
|
||||||
|
let of_gen g =
|
||||||
|
let s = ref "" in
|
||||||
|
let i = ref 0 in
|
||||||
|
let stop = ref false in
|
||||||
|
let rec next() =
|
||||||
|
if !stop then NC_end
|
||||||
|
else if !i = String.length !s
|
||||||
|
then (
|
||||||
|
match g() with
|
||||||
|
| None -> stop := true; NC_end
|
||||||
|
| Some buf -> s := buf; i := 0; next ()
|
||||||
|
) else (
|
||||||
|
let c = String.get !s !i in
|
||||||
|
incr i;
|
||||||
|
NC_yield c
|
||||||
|
)
|
||||||
|
in next
|
||||||
|
end
|
||||||
|
|
||||||
|
module Lexer = struct
|
||||||
|
(** An individual character returned by a source *)
|
||||||
type token =
|
type token =
|
||||||
| Open
|
| Open
|
||||||
| Close
|
| Close
|
||||||
|
|
@ -135,60 +224,39 @@ module Streaming = struct
|
||||||
| St_error of string
|
| St_error of string
|
||||||
| St_end
|
| St_end
|
||||||
|
|
||||||
type decoder = {
|
type t = {
|
||||||
|
src : Source.t;
|
||||||
|
atom : Buffer.t; (* atom being parsed *)
|
||||||
mutable st : decode_state;
|
mutable st : decode_state;
|
||||||
mutable i : int;
|
|
||||||
mutable line : int;
|
mutable line : int;
|
||||||
mutable col : int;
|
mutable col : int;
|
||||||
mutable stop : bool;
|
|
||||||
buf : Buffer.t;
|
|
||||||
atom : Buffer.t; (* atom being parsed *)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let mk_decoder () = {
|
let make src = {
|
||||||
i = 0;
|
src;
|
||||||
st = St_start;
|
st = St_start;
|
||||||
line = 1;
|
line = 1;
|
||||||
col = 1;
|
col = 1;
|
||||||
stop = false;
|
|
||||||
buf=Buffer.create 32;
|
|
||||||
atom = Buffer.create 32;
|
atom = Buffer.create 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
exception NeedMoar
|
let of_string s = make (Source.of_string s)
|
||||||
exception Error of string
|
|
||||||
exception EOI
|
let of_chan ic = make (Source.of_chan ic)
|
||||||
|
|
||||||
|
let line t = t.line
|
||||||
|
let col t = t.col
|
||||||
|
|
||||||
(* yield [x] with current state [st] *)
|
(* yield [x] with current state [st] *)
|
||||||
let _yield d st x =
|
let _yield d st x =
|
||||||
d.st <- st;
|
d.st <- st;
|
||||||
x
|
`Ok x
|
||||||
|
|
||||||
(* read the next char *)
|
|
||||||
let _next_char d =
|
|
||||||
if d.i = Buffer.length d.buf
|
|
||||||
then (
|
|
||||||
(* need more input; reset buffer to put it in *)
|
|
||||||
Buffer.clear d.buf;
|
|
||||||
d.i <- 0;
|
|
||||||
raise NeedMoar
|
|
||||||
) else (
|
|
||||||
let c = Buffer.nth d.buf d.i in
|
|
||||||
d.i <- d.i + 1;
|
|
||||||
d.col <- d.col + 1;
|
|
||||||
c
|
|
||||||
)
|
|
||||||
|
|
||||||
let _take_buffer b =
|
let _take_buffer b =
|
||||||
let s = Buffer.contents b in
|
let s = Buffer.contents b in
|
||||||
Buffer.clear b;
|
Buffer.clear b;
|
||||||
s
|
s
|
||||||
|
|
||||||
let _newline d =
|
|
||||||
d.line <- d.line + 1;
|
|
||||||
d.col <- 0;
|
|
||||||
()
|
|
||||||
|
|
||||||
(* raise an error *)
|
(* raise an error *)
|
||||||
let _error d msg =
|
let _error d msg =
|
||||||
let b = Buffer.create 32 in
|
let b = Buffer.create 32 in
|
||||||
|
|
@ -197,121 +265,116 @@ module Streaming = struct
|
||||||
(fun b ->
|
(fun b ->
|
||||||
let msg' = Buffer.contents b in
|
let msg' = Buffer.contents b in
|
||||||
d.st <- St_error msg';
|
d.st <- St_error msg';
|
||||||
raise (Error msg'))
|
`Error msg')
|
||||||
b msg
|
b msg
|
||||||
|
|
||||||
let _end d =
|
let _end d =
|
||||||
d.st <- St_end;
|
d.st <- St_end;
|
||||||
raise EOI
|
`End
|
||||||
|
|
||||||
let _is_digit c = Char.code '0' <= Char.code c && Char.code c <= Char.code '9'
|
let _is_digit c = Char.code '0' <= Char.code c && Char.code c <= Char.code '9'
|
||||||
let _digit2i c = Char.code c - Char.code '0'
|
let _digit2i c = Char.code c - Char.code '0'
|
||||||
|
|
||||||
(* next token *)
|
(* next token *)
|
||||||
let rec _next d st =
|
let rec _next d st : token partial_result =
|
||||||
d.st <- st;
|
|
||||||
match st with
|
match st with
|
||||||
| St_error msg -> raise (Error msg)
|
| St_error msg -> `Error msg
|
||||||
| St_end -> _end d
|
| St_end -> _end d
|
||||||
| St_yield x ->
|
| St_yield x ->
|
||||||
(* yield the given token, then start a fresh one *)
|
(* yield the given token, then start a fresh one *)
|
||||||
_yield d St_start x
|
_yield d St_start x
|
||||||
| St_start when d.stop -> _end d
|
| _ ->
|
||||||
| St_start ->
|
d.st <- st;
|
||||||
(* start reading next token *)
|
_process_next d st
|
||||||
let c = _next_char d in
|
|
||||||
begin match c with
|
|
||||||
| '\n' -> _newline d; _next d St_start
|
|
||||||
| ' ' | '\t' -> _next d St_start
|
|
||||||
| '(' -> _yield d St_start Open
|
|
||||||
| ')' -> _yield d St_start Close
|
|
||||||
| '"' -> _next d St_quoted
|
|
||||||
| _ -> (* read regular atom *)
|
|
||||||
Buffer.add_char d.atom c;
|
|
||||||
_next d St_atom
|
|
||||||
end
|
|
||||||
| St_atom when d.stop ->
|
|
||||||
let a = _take_buffer d.atom in
|
|
||||||
_yield d St_end (Atom a)
|
|
||||||
| St_atom ->
|
|
||||||
(* reading an unquoted atom *)
|
|
||||||
let c = _next_char d in
|
|
||||||
begin match c with
|
|
||||||
| '\n' ->
|
|
||||||
_newline d;
|
|
||||||
let a = _take_buffer d.atom in
|
|
||||||
_yield d St_start (Atom a)
|
|
||||||
| ' ' | '\t' ->
|
|
||||||
let a = _take_buffer d.atom in
|
|
||||||
_yield d St_start (Atom a)
|
|
||||||
| ')' ->
|
|
||||||
let a = _take_buffer d.atom in
|
|
||||||
_yield d (St_yield Close) (Atom a)
|
|
||||||
| '(' ->
|
|
||||||
let a = _take_buffer d.atom in
|
|
||||||
_yield d (St_yield Open) (Atom a)
|
|
||||||
| '"' -> _error d "unexpected \" (parsing atom %s)" (Buffer.contents d.atom)
|
|
||||||
| '\\' -> _error d "unexpected \\"
|
|
||||||
| _ ->
|
|
||||||
Buffer.add_char d.atom c;
|
|
||||||
_next d St_atom
|
|
||||||
end
|
|
||||||
| St_quoted when d.stop ->
|
|
||||||
let a = _take_buffer d.atom in
|
|
||||||
_yield d St_end (Atom a)
|
|
||||||
| St_quoted ->
|
|
||||||
(* reading an unquoted atom *)
|
|
||||||
let c = _next_char d in
|
|
||||||
begin match c with
|
|
||||||
| '\\' -> _next d St_escaped
|
|
||||||
| '"' ->
|
|
||||||
let a = _take_buffer d.atom in
|
|
||||||
_yield d St_start (Atom a)
|
|
||||||
| _ ->
|
|
||||||
Buffer.add_char d.atom c;
|
|
||||||
_next d St_quoted
|
|
||||||
end
|
|
||||||
| (St_escaped | St_raw_char1 _ | St_raw_char2 _) when d.stop ->
|
|
||||||
_error d "unexpected end of input (escaping)"
|
|
||||||
| St_escaped ->
|
|
||||||
begin match _next_char d with
|
|
||||||
| 'n' -> Buffer.add_char d.atom '\n'; _next d St_quoted
|
|
||||||
| 't' -> Buffer.add_char d.atom '\t'; _next d St_quoted
|
|
||||||
| 'r' -> Buffer.add_char d.atom '\r'; _next d St_quoted
|
|
||||||
| 'b' -> Buffer.add_char d.atom '\b'; _next d St_quoted
|
|
||||||
| '"' -> Buffer.add_char d.atom '"'; _next d St_quoted
|
|
||||||
| '\\' -> Buffer.add_char d.atom '\\'; _next d St_quoted
|
|
||||||
| c when _is_digit c -> _next d (St_raw_char1 (_digit2i c))
|
|
||||||
| c -> _error d "unexpected escaped character %c" c
|
|
||||||
end
|
|
||||||
| St_raw_char1 i ->
|
|
||||||
begin match _next_char d with
|
|
||||||
| c when _is_digit c -> _next d (St_raw_char2 (i*10 + _digit2i c))
|
|
||||||
| c -> _error d "expected digit, got %c" c
|
|
||||||
end
|
|
||||||
| St_raw_char2 i ->
|
|
||||||
begin match _next_char d with
|
|
||||||
| c when _is_digit c ->
|
|
||||||
(* read an escaped char *)
|
|
||||||
Buffer.add_char d.atom (Char.chr (i*10+_digit2i c));
|
|
||||||
_next d St_quoted
|
|
||||||
| c -> _error d "expected digit, got %c" c
|
|
||||||
end
|
|
||||||
|
|
||||||
let feed d s i len =
|
(* read and proces the next character *)
|
||||||
if d.stop then failwith "Sexp.Streaming.feed: end of input reached";
|
and _process_next d st =
|
||||||
Buffer.add_substring d.buf s i len
|
match d.src () with
|
||||||
|
| Source.NC_end ->
|
||||||
|
begin match st with
|
||||||
|
| St_error _ | St_end | St_yield _ -> assert false
|
||||||
|
| St_start -> _end d
|
||||||
|
| St_atom ->
|
||||||
|
let a = _take_buffer d.atom in
|
||||||
|
_yield d St_end (Atom a)
|
||||||
|
| St_quoted ->
|
||||||
|
let a = _take_buffer d.atom in
|
||||||
|
_yield d St_end (Atom a)
|
||||||
|
| (St_escaped | St_raw_char1 _ | St_raw_char2 _) ->
|
||||||
|
_error d "unexpected end of input (escaping)"
|
||||||
|
end
|
||||||
|
| Source.NC_await -> `Await
|
||||||
|
| Source.NC_yield c ->
|
||||||
|
if c='\n'
|
||||||
|
then (d.col <- 1; d.line <- d.line + 1)
|
||||||
|
else (d.col <- d.col + 1);
|
||||||
|
(* use the next char *)
|
||||||
|
match st with
|
||||||
|
| St_error _ | St_end | St_yield _ -> assert false
|
||||||
|
| St_start ->
|
||||||
|
begin match c with
|
||||||
|
| ' ' | '\t' | '\n' -> _next d St_start
|
||||||
|
| '(' -> _yield d St_start Open
|
||||||
|
| ')' -> _yield d St_start Close
|
||||||
|
| '"' -> _next d St_quoted
|
||||||
|
| _ -> (* read regular atom *)
|
||||||
|
Buffer.add_char d.atom c;
|
||||||
|
_next d St_atom
|
||||||
|
end
|
||||||
|
| St_atom ->
|
||||||
|
begin match c with
|
||||||
|
| ' ' | '\t' | '\n' ->
|
||||||
|
let a = _take_buffer d.atom in
|
||||||
|
_yield d St_start (Atom a)
|
||||||
|
| ')' ->
|
||||||
|
let a = _take_buffer d.atom in
|
||||||
|
_yield d (St_yield Close) (Atom a)
|
||||||
|
| '(' ->
|
||||||
|
let a = _take_buffer d.atom in
|
||||||
|
_yield d (St_yield Open) (Atom a)
|
||||||
|
| '"' -> _error d "unexpected \" (parsing atom %s)" (Buffer.contents d.atom)
|
||||||
|
| '\\' -> _error d "unexpected \\"
|
||||||
|
| _ ->
|
||||||
|
Buffer.add_char d.atom c;
|
||||||
|
_next d St_atom
|
||||||
|
end
|
||||||
|
| St_quoted ->
|
||||||
|
(* reading an unquoted atom *)
|
||||||
|
begin match c with
|
||||||
|
| '\\' -> _next d St_escaped
|
||||||
|
| '"' ->
|
||||||
|
let a = _take_buffer d.atom in
|
||||||
|
_yield d St_start (Atom a)
|
||||||
|
| _ ->
|
||||||
|
Buffer.add_char d.atom c;
|
||||||
|
_next d St_quoted
|
||||||
|
end
|
||||||
|
| St_escaped ->
|
||||||
|
begin match c with
|
||||||
|
| 'n' -> Buffer.add_char d.atom '\n'; _next d St_quoted
|
||||||
|
| 't' -> Buffer.add_char d.atom '\t'; _next d St_quoted
|
||||||
|
| 'r' -> Buffer.add_char d.atom '\r'; _next d St_quoted
|
||||||
|
| 'b' -> Buffer.add_char d.atom '\b'; _next d St_quoted
|
||||||
|
| '"' -> Buffer.add_char d.atom '"'; _next d St_quoted
|
||||||
|
| '\\' -> Buffer.add_char d.atom '\\'; _next d St_quoted
|
||||||
|
| _ when _is_digit c -> _next d (St_raw_char1 (_digit2i c))
|
||||||
|
| _ -> _error d "unexpected escaped character %c" c
|
||||||
|
end
|
||||||
|
| St_raw_char1 i ->
|
||||||
|
begin match c with
|
||||||
|
| _ when _is_digit c -> _next d (St_raw_char2 (i*10 + _digit2i c))
|
||||||
|
| _ -> _error d "expected digit, got %c" c
|
||||||
|
end
|
||||||
|
| St_raw_char2 i ->
|
||||||
|
begin match c with
|
||||||
|
| c when _is_digit c ->
|
||||||
|
(* read an escaped char *)
|
||||||
|
Buffer.add_char d.atom (Char.chr (i*10+_digit2i c));
|
||||||
|
_next d St_quoted
|
||||||
|
| c -> _error d "expected digit, got %c" c
|
||||||
|
end
|
||||||
|
|
||||||
let reached_end d =
|
let next d = _next d d.st
|
||||||
d.stop <- true
|
|
||||||
|
|
||||||
let next d =
|
|
||||||
try
|
|
||||||
`Ok (_next d d.st)
|
|
||||||
with
|
|
||||||
| NeedMoar -> `Await
|
|
||||||
| Error msg -> `Error msg
|
|
||||||
| EOI -> `End
|
|
||||||
end
|
end
|
||||||
|
|
||||||
module ParseGen = struct
|
module ParseGen = struct
|
||||||
|
|
@ -347,28 +410,28 @@ end
|
||||||
|
|
||||||
(* hidden parser state *)
|
(* hidden parser state *)
|
||||||
type parser_state = {
|
type parser_state = {
|
||||||
ps_d : Streaming.decoder;
|
ps_d : Lexer.t;
|
||||||
mutable ps_stack : t list list;
|
mutable ps_stack : t list list;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mk_ps () = {
|
let mk_ps src = {
|
||||||
ps_d = Streaming.mk_decoder ();
|
ps_d = Lexer.make src;
|
||||||
ps_stack = [];
|
ps_stack = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
let _error ps msg =
|
let _error ps msg =
|
||||||
let msg' = Printf.sprintf "at %d,%d: %s"
|
let msg' = Printf.sprintf "at %d,%d: %s" (Lexer.line ps.ps_d) (Lexer.col ps.ps_d) msg in
|
||||||
ps.ps_d.Streaming.line ps.ps_d.Streaming.col msg in
|
|
||||||
`Error msg'
|
`Error msg'
|
||||||
|
|
||||||
(* next token, or await *)
|
(* next token, or await *)
|
||||||
let rec _next ps : t partial_result = match Streaming.next ps.ps_d with
|
let rec _next ps : t partial_result =
|
||||||
| `Ok (Streaming.Atom s) ->
|
match Lexer.next ps.ps_d with
|
||||||
|
| `Ok (Lexer.Atom s) ->
|
||||||
_push ps (Atom s)
|
_push ps (Atom s)
|
||||||
| `Ok Streaming.Open ->
|
| `Ok Lexer.Open ->
|
||||||
ps.ps_stack <- [] :: ps.ps_stack;
|
ps.ps_stack <- [] :: ps.ps_stack;
|
||||||
_next ps
|
_next ps
|
||||||
| `Ok Streaming.Close ->
|
| `Ok Lexer.Close ->
|
||||||
begin match ps.ps_stack with
|
begin match ps.ps_stack with
|
||||||
| [] -> _error ps "unbalanced ')'"
|
| [] -> _error ps "unbalanced ')'"
|
||||||
| l :: stack ->
|
| l :: stack ->
|
||||||
|
|
@ -387,49 +450,25 @@ and _push ps e = match ps.ps_stack with
|
||||||
ps.ps_stack <- (e :: l) :: tl;
|
ps.ps_stack <- (e :: l) :: tl;
|
||||||
_next ps
|
_next ps
|
||||||
|
|
||||||
|
(* assume [ps] never needs [`Await] *)
|
||||||
|
let _never_block ps () = match _next ps with
|
||||||
|
| `Await -> assert false
|
||||||
|
| `Ok x -> `Ok x
|
||||||
|
| `Error e -> `Error e
|
||||||
|
| `End -> `End
|
||||||
|
|
||||||
(* parse from a generator of string slices *)
|
(* parse from a generator of string slices *)
|
||||||
let _parse_gen g : t ParseGen.t =
|
let parse_gen g : t ParseGen.t =
|
||||||
let ps = mk_ps() in
|
let ps = mk_ps (Source.of_gen g) in
|
||||||
let rec next () = match _next ps with
|
_never_block ps
|
||||||
| `Await ->
|
|
||||||
begin match g() with
|
|
||||||
| None -> Streaming.reached_end ps.ps_d
|
|
||||||
| Some (s,i,len) -> Streaming.feed ps.ps_d s i len
|
|
||||||
end;
|
|
||||||
next()
|
|
||||||
| `Ok x -> `Ok x
|
|
||||||
| `Error e -> `Error e
|
|
||||||
| `End -> `End
|
|
||||||
in
|
|
||||||
next
|
|
||||||
|
|
||||||
let parse_gen g =
|
|
||||||
_parse_gen
|
|
||||||
(fun () ->
|
|
||||||
match g() with
|
|
||||||
| None -> None
|
|
||||||
| Some s -> Some (s,0,String.length s)
|
|
||||||
)
|
|
||||||
|
|
||||||
(* singleton generator *)
|
|
||||||
let _gen1 x =
|
|
||||||
let first = ref true in
|
|
||||||
fun () ->
|
|
||||||
if !first then (first:=false; Some x) else None
|
|
||||||
|
|
||||||
let parse_string s =
|
let parse_string s =
|
||||||
parse_gen (_gen1 s)
|
let ps = mk_ps (Source.of_string s) in
|
||||||
|
_never_block ps
|
||||||
|
|
||||||
let parse_chan ?(bufsize=1024) ic =
|
let parse_chan ?bufsize ic =
|
||||||
let buf = String.make bufsize ' ' in
|
let ps = mk_ps (Source.of_chan ?bufsize ic) in
|
||||||
let stop = ref false in
|
_never_block ps
|
||||||
let gen () =
|
|
||||||
if !stop then None
|
|
||||||
else
|
|
||||||
let n = input ic buf 0 bufsize in
|
|
||||||
if n=0 then (stop:=true; None) else Some (buf,0,n)
|
|
||||||
in
|
|
||||||
_parse_gen gen
|
|
||||||
|
|
||||||
(** {6 Blocking} *)
|
(** {6 Blocking} *)
|
||||||
|
|
||||||
|
|
@ -458,22 +497,26 @@ let parse_l_string s =
|
||||||
let parse_l_gen g =
|
let parse_l_gen g =
|
||||||
ParseGen.to_list (parse_gen g)
|
ParseGen.to_list (parse_gen g)
|
||||||
|
|
||||||
|
exception OhNoes of string
|
||||||
|
exception StopNaow
|
||||||
|
|
||||||
let parse_l_seq seq =
|
let parse_l_seq seq =
|
||||||
let ps = mk_ps() in
|
let src = Source.Manual.make () in
|
||||||
|
let ps = mk_ps (Source.Manual.to_src src) in
|
||||||
let l = ref [] in
|
let l = ref [] in
|
||||||
(* read as many expressions as possible *)
|
(* read as many expressions as possible *)
|
||||||
let rec _nexts () = match _next ps with
|
let rec _nexts () = match _next ps with
|
||||||
| `Ok x -> l := x :: !l; _nexts ()
|
| `Ok x -> l := x :: !l; _nexts ()
|
||||||
| `Error e -> raise (Streaming.Error e)
|
| `Error e -> raise (OhNoes e)
|
||||||
| `End -> raise Streaming.EOI
|
| `End -> raise StopNaow
|
||||||
| `Await -> ()
|
| `Await -> ()
|
||||||
in
|
in
|
||||||
try
|
try
|
||||||
seq
|
seq
|
||||||
(fun s -> Streaming.feed ps.ps_d s 0 (String.length s); _nexts ());
|
(fun s -> Source.Manual.feed src s 0 (String.length s); _nexts ());
|
||||||
Streaming.reached_end ps.ps_d;
|
Source.Manual.reached_end src;
|
||||||
_nexts ();
|
_nexts ();
|
||||||
`Ok (List.rev !l)
|
`Ok (List.rev !l)
|
||||||
with
|
with
|
||||||
| Streaming.Error msg -> `Error msg
|
| OhNoes msg -> `Error msg
|
||||||
| Streaming.EOI -> `Ok (List.rev !l)
|
| StopNaow -> `Ok (List.rev !l)
|
||||||
|
|
|
||||||
|
|
@ -47,10 +47,13 @@ val to_file : string -> t -> unit
|
||||||
val to_chan : out_channel -> t -> unit
|
val to_chan : out_channel -> t -> unit
|
||||||
|
|
||||||
val print : Format.formatter -> t -> unit
|
val print : Format.formatter -> t -> unit
|
||||||
|
(** Pretty-printer nice on human eyes (including indentation) *)
|
||||||
|
|
||||||
val print_noindent : Format.formatter -> t -> unit
|
val print_noindent : Format.formatter -> t -> unit
|
||||||
|
(** Raw, direct printing as compact as possible *)
|
||||||
|
|
||||||
val seq_to_file : string -> t sequence -> unit
|
val seq_to_file : string -> t sequence -> unit
|
||||||
|
(** Print the given sequence of expressions to a file *)
|
||||||
|
|
||||||
(** {2 Deserialization (decoding)} *)
|
(** {2 Deserialization (decoding)} *)
|
||||||
|
|
||||||
|
|
@ -59,16 +62,65 @@ type 'a partial_result = [ 'a parse_result | `Await ]
|
||||||
|
|
||||||
(** {6 Streaming Parsing} *)
|
(** {6 Streaming Parsing} *)
|
||||||
|
|
||||||
module Streaming : sig
|
module Source : sig
|
||||||
type decoder
|
type individual_char =
|
||||||
|
| NC_yield of char
|
||||||
|
| NC_end
|
||||||
|
| NC_await
|
||||||
|
(** An individual character returned by a source *)
|
||||||
|
|
||||||
val mk_decoder : unit -> decoder
|
type t = unit -> individual_char
|
||||||
|
(** A source of characters can yield them one by one, or signal the end,
|
||||||
|
or signal that some external intervention is needed *)
|
||||||
|
|
||||||
val feed : decoder -> string -> int -> int -> unit
|
type source = t
|
||||||
(** Feed a chunk of input to the decoder *)
|
|
||||||
|
|
||||||
val reached_end : decoder -> unit
|
(** A mnual source of individual characters. When it has exhausted its
|
||||||
(** Tell the decoder that end of input has been reached *)
|
data, it asked its caller to provide more, or signal that none remains
|
||||||
|
In particular, useful when the source of data is monadic IO *)
|
||||||
|
module Manual : sig
|
||||||
|
type t
|
||||||
|
|
||||||
|
val make : unit -> t
|
||||||
|
(** Make a new manual source. It needs to be fed input manually,
|
||||||
|
using {!feed} *)
|
||||||
|
|
||||||
|
val to_src : t -> source
|
||||||
|
(** The manual source contains a source! *)
|
||||||
|
|
||||||
|
val feed : t -> string -> int -> int -> unit
|
||||||
|
(** Feed a chunk of input to the manual source *)
|
||||||
|
|
||||||
|
val reached_end : t -> unit
|
||||||
|
(** Tell the decoder that end of input has been reached. From now
|
||||||
|
the source will only yield [NC_end] *)
|
||||||
|
end
|
||||||
|
|
||||||
|
val of_string : string -> t
|
||||||
|
(** Use a single string as the source *)
|
||||||
|
|
||||||
|
val of_chan : ?bufsize:int -> in_channel -> t
|
||||||
|
(** Use a channel as the source *)
|
||||||
|
|
||||||
|
val of_gen : string gen -> t
|
||||||
|
end
|
||||||
|
|
||||||
|
module Lexer : sig
|
||||||
|
type t
|
||||||
|
(** A streaming lexer, that parses atomic chunks of S-expressions (atoms
|
||||||
|
and delimiters) *)
|
||||||
|
|
||||||
|
val make : Source.t -> t
|
||||||
|
(** Create a lexer that uses the given source of characters as an input *)
|
||||||
|
|
||||||
|
val of_string : string -> t
|
||||||
|
|
||||||
|
val of_chan : in_channel -> t
|
||||||
|
|
||||||
|
val line : t -> int
|
||||||
|
val col : t -> int
|
||||||
|
|
||||||
|
(** Obtain next token *)
|
||||||
|
|
||||||
type token =
|
type token =
|
||||||
| Open
|
| Open
|
||||||
|
|
@ -76,7 +128,7 @@ module Streaming : sig
|
||||||
| Atom of string
|
| Atom of string
|
||||||
(** An individual S-exp token *)
|
(** An individual S-exp token *)
|
||||||
|
|
||||||
val next : decoder -> token partial_result
|
val next : t -> token partial_result
|
||||||
(** Obtain the next token, an error, or block/end stream *)
|
(** Obtain the next token, an error, or block/end stream *)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue