feat multipart: first ok implementation

This commit is contained in:
Simon Cruanes 2024-12-02 14:19:26 -05:00
parent e1bfe70991
commit c966d1839c
No known key found for this signature in database
GPG key ID: EBFFF6F283F3A2B4
2 changed files with 170 additions and 52 deletions

View file

@ -1,99 +1,209 @@
(* ported from https://github.com/cryptosense/multipart-form-data . *)
open Tiny_httpd
module Slice = Iostream.Slice
let spf = Printf.sprintf
type buf = { bs: bytes; mutable len: int }
let shift_left_ (self : buf) n =
if n = self.len then
self.len <- 0
else (
assert (n < self.len);
Bytes.blit self.bs n self.bs 0 (self.len - n);
self.len <- self.len - n
)
let[@inline] buf_full (self : buf) : bool = self.len >= Bytes.length self.bs
type slice = Iostream.Slice.t
type event = Part of Tiny_httpd.Headers.t | Read of slice | End_of_input
type out_state = Begin | Inside_part | Eof
type st = {
delim: string;
boundary: string;
ic: Iostream.In.t;
buf_split: bytes; (** Used to split on the delimiter *)
mutable buf_len: int;
buf_line: Buf.t;
mutable eof: bool;
buf: buf; (** Used to split on the boundary *)
mutable eof_split: bool;
buf_out: buf; (** Used to return output slices *)
mutable st_out: out_state;
}
let create ?(buf_size = 64 * 1024) ~delim ic : st =
let create ?(buf_size = 64 * 1024) ?(out_buf_size = 8 * 1024) ~boundary ic : st
=
let ic = (ic : #Iostream.In.t :> Iostream.In.t) in
{
delim;
boundary;
ic;
buf_split = Bytes.create buf_size;
buf_len = 0;
buf_line = Buf.create ~size:256 ();
eof = false;
buf = { bs = Bytes.create buf_size; len = 0 };
eof_split = false;
buf_out = { bs = Bytes.create out_buf_size; len = 0 };
st_out = Begin;
}
type chunk = Delim | Eof | Read of int
let[@inline] min_len_ (self : st) : int = 2 + String.length self.delim
let[@inline] min_len_ (self : st) : int = 4 + String.length self.boundary
let shift_left_ (self : st) n =
if n = self.buf_len then
self.buf_len <- 0
else (
assert (n < self.buf_len);
Bytes.blit self.buf_split n self.buf_split 0 (self.buf_len - n);
self.buf_len <- self.buf_len - n
)
exception Found_delim of int
exception Found_boundary of int
let rec read_chunk_ (self : st) buf i_buf len : chunk =
if self.eof then
if self.eof_split then
Eof
else if self.buf_len < min_len_ self then (
else if self.buf.len < min_len_ self then (
(* try to refill buffer *)
let n =
Iostream.In.input self.ic self.buf_split self.buf_len
(Bytes.length self.buf_split - self.buf_len)
Iostream.In.input self.ic self.buf.bs self.buf.len
(Bytes.length self.buf.bs - self.buf.len)
in
Printf.eprintf "refill n=%d\n%!" n;
if n = 0 && self.buf_len = 0 then (
self.eof <- true;
if n = 0 && self.buf.len = 0 then (
self.eof_split <- true;
Eof
) else if n = 0 then (
let n_read = min len self.buf_len in
Bytes.blit self.buf_split 0 buf i_buf n_read;
shift_left_ self n_read;
let n_read = min len self.buf.len in
Bytes.blit self.buf.bs 0 buf i_buf n_read;
shift_left_ self.buf n_read;
Read n_read
) else (
self.buf_len <- self.buf_len + n;
self.buf.len <- self.buf.len + n;
read_chunk_ self buf i_buf len
)
) else (
Printf.eprintf "normal path buflen=%d buf=%S\n%!" self.buf_len
(Bytes.sub_string self.buf_split 0 self.buf_len);
try
let i = ref 0 in
let end_pos = min len self.buf_len - 2 - String.length self.delim in
let end_pos = min len self.buf.len - 4 - String.length self.boundary in
while !i <= end_pos do
Printf.eprintf "at %d\n%!" !i;
if
Bytes.unsafe_get self.buf_split !i = '-'
&& Bytes.unsafe_get self.buf_split (!i + 1) = '-'
Bytes.unsafe_get self.buf.bs !i = '\r'
&& Bytes.unsafe_get self.buf.bs (!i + 1) = '\n'
&& Bytes.unsafe_get self.buf.bs (!i + 2) = '-'
&& Bytes.unsafe_get self.buf.bs (!i + 3) = '-'
&& Utils_.string_eq
~a:(Bytes.unsafe_to_string self.buf_split)
~a_start:(!i + 2) ~b:self.delim ~len:(String.length self.delim)
~a:(Bytes.unsafe_to_string self.buf.bs)
~a_start:(!i + 4) ~b:self.boundary
~len:(String.length self.boundary)
then
raise_notrace (Found_delim !i);
raise_notrace (Found_boundary !i);
incr i
done;
let n_read = min !i len in
Bytes.blit self.buf_split 0 buf i_buf n_read;
shift_left_ self n_read;
Bytes.blit self.buf.bs 0 buf i_buf n_read;
shift_left_ self.buf n_read;
Read n_read
with
| Found_delim 0 ->
Printf.eprintf "found delim at 0\n%!";
shift_left_ self (2 + String.length self.delim);
| Found_boundary 0 ->
shift_left_ self.buf (4 + String.length self.boundary);
Delim
| Found_delim n ->
Printf.eprintf "found delim at %d\n%!" n;
| Found_boundary n ->
let n_read = min n len in
Bytes.blit self.buf_split 0 buf i_buf n_read;
shift_left_ self n_read;
Bytes.blit self.buf.bs 0 buf i_buf n_read;
shift_left_ self.buf n_read;
Read n_read
)
exception Found of int
(** Find \r\n *)
let find_crlf_exn (buf : buf) : int =
try
for i = 0 to buf.len - 2 do
if
Bytes.unsafe_get buf.bs i = '\r'
&& Bytes.unsafe_get buf.bs (i + 1) = '\n'
then
raise_notrace (Found i)
done;
raise Not_found
with Found i -> i
let[@inline] read_to_buf_out_ (self : st) =
assert (not (buf_full self.buf_out));
read_chunk_ self self.buf_out.bs self.buf_out.len
(Bytes.length self.buf_out.bs - self.buf_out.len)
let read_data_or_fail_ (self : st) : unit =
match read_to_buf_out_ self with
| Delim -> failwith "multipart: unexpected boundary while parsing headers"
| Eof -> failwith "multipart: unexpected EOF while parsing headers"
| Read n -> self.buf_out.len <- self.buf_out.len + n
let rec next (self : st) : event =
match self.st_out with
| Eof -> End_of_input
| Inside_part when self.buf_out.len > 0 ->
(* there's data to return *)
let sl =
{ Slice.bytes = self.buf_out.bs; off = 0; len = self.buf_out.len }
in
self.buf_out.len <- 0;
Read sl
| Inside_part ->
(* refill or reach boundary *)
(match read_to_buf_out_ self with
| Eof ->
self.st_out <- Eof;
End_of_input
| Delim -> parse_after_boundary self
| Read n ->
self.buf_out.len <- n;
next self)
| Begin ->
(match read_to_buf_out_ self with
| Delim -> parse_after_boundary self
| Eof ->
self.st_out <- Eof;
End_of_input
| Read _ -> failwith "multipart: expected boundary, got data")
and parse_after_boundary (self : st) : event =
while self.buf_out.len < 2 do
read_data_or_fail_ self
done;
let after_boundary = Bytes.sub_string self.buf_out.bs 0 2 in
shift_left_ self.buf_out 2;
match after_boundary with
| "--" ->
self.st_out <- Eof;
End_of_input
| "\r\n" ->
let headers = parse_headers_rec self [] in
self.st_out <- Inside_part;
Part headers
| s ->
failwith (spf "multipart: expect '--' or '\r\n' after boundary, got %S" s)
and parse_headers_rec (self : st) acc : Headers.t =
if self.buf_out.len = 0 then (
read_data_or_fail_ self;
parse_headers_rec self acc
) else (
match find_crlf_exn self.buf_out with
| exception Not_found ->
if buf_full self.buf_out then
failwith "multipart: header line is too long"
else (
read_data_or_fail_ self;
parse_headers_rec self acc
)
| i ->
let line = Bytes.sub_string self.buf_out.bs 0 i in
Printf.eprintf "parse header line %S\n%!" line;
shift_left_ self.buf_out (i + 2);
if line = "" then
List.rev acc
else (
match Tiny_httpd.Headers.parse_line_ line with
| Ok (k, v) ->
parse_headers_rec self ((String.lowercase_ascii k, v) :: acc)
| Error msg ->
failwith
(spf "multipart: failed to parser header: %s\nline: %S" msg line)
)
)
module Private_ = struct
type nonrec chunk = chunk = Delim | Eof | Read of int

View file

@ -1,6 +1,14 @@
(** Parser for multipart/form-data *)
type st
(** Parser state *)
val create : ?buf_size:int -> delim:string -> #Iostream.In.t -> st
val create :
?buf_size:int -> ?out_buf_size:int -> boundary:string -> #Iostream.In.t -> st
type slice = Iostream.Slice.t
type event = Part of Tiny_httpd.Headers.t | Read of slice | End_of_input
val next : st -> event
(**/*)
module Private_ : sig