wip: multipart

This commit is contained in:
Simon Cruanes 2024-12-02 11:46:40 -05:00
parent 2413a3028c
commit 2968031e5b
No known key found for this signature in database
GPG key ID: EBFFF6F283F3A2B4
7 changed files with 167 additions and 369 deletions

View file

@ -2,4 +2,4 @@
(name tiny_httpd_multipart_form_data) (name tiny_httpd_multipart_form_data)
(public_name tiny_httpd.multipart-form-data) (public_name tiny_httpd.multipart-form-data)
(synopsis "Port of multipart-form-data for tiny_httpd") (synopsis "Port of multipart-form-data for tiny_httpd")
(libraries iostream stringext)) (libraries iostream tiny_httpd))

View file

@ -1,337 +1,101 @@
(* ported from https://github.com/cryptosense/multipart-form-data . (* ported from https://github.com/cryptosense/multipart-form-data . *)
License: BSD-2 *)
open Utils_ open Tiny_httpd
(* TODO: redo some light form of lwt stream for porting purposes? *) type st = {
module Stream_ = struct delim: string;
type 'a t = { next: unit -> 'a option } [@@unboxed] ic: Iostream.In.t;
buf_split: bytes; (** Used to split on the delimiter *)
mutable buf_len: int;
buf_line: Buf.t;
mutable eof: bool;
}
let from next : _ t = { next } let create ?(buf_size = 64 * 1024) ~delim ic : st =
let[@inline] get (self : _ t) : _ option = self.next () let ic = (ic : #Iostream.In.t :> Iostream.In.t) in
{
delim;
ic;
buf_split = Bytes.create buf_size;
buf_len = 0;
buf_line = Buf.create ~size:256 ();
eof = false;
}
let filter_map f (self : _ t) : _ t = type chunk = Delim | Eof | Read of int
let rec next () =
match self.next () with
| None -> None
| Some x ->
(match f x with
| None -> next ()
| Some _ as r -> r)
in
{ next }
end
let split s boundary = let[@inline] min_len_ (self : st) : int = 2 + String.length self.delim
let r = ref None in
let push v =
match !r with
| None -> r := Some v
| Some _ -> assert false
in
let pop () =
let res = !r in
r := None;
res
in
let go c0 =
let c =
match pop () with
| Some x -> x ^ c0
| None -> c0
in
let string_to_process =
match find_common_idx c boundary with
| None -> c
| Some idx ->
let prefix = String.sub c 0 idx in
let suffix = String.sub c idx (String.length c - idx) in
push suffix;
prefix
in
split_and_process_string ~boundary string_to_process
in
let initial = List.map go s in
let final =
Stream_.flatten
@@ Stream_.from (fun () ->
option_map (split_and_process_string ~boundary) @@ pop ())
in
Stream_.append initial final
let until_next_delim s = let shift_left_ (self : st) n =
Stream_.from @@ fun () -> if n = self.buf_len then
let res = Stream_.get s in self.buf_len <- 0
match res with
| None | Some `Delim -> None
| Some (`Word w) -> Some w
let join s =
Stream_.filter_map
(function
(* | `Delim -> Some (until_next_delim @@ Lwt_stream.clone s) *)
| `Delim -> Some (until_next_delim s)
| `Word _ -> None)
s
let align stream boundary = join @@ split stream boundary
type header = string * string
let extract_boundary content_type =
Stringext.chop_prefix ~prefix:"multipart/form-data; boundary=" content_type
let unquote s = Scanf.sscanf s "%S" @@ fun x -> x
let parse_name s =
option_map unquote @@ Stringext.chop_prefix ~prefix:"form-data; name=" s
let parse_header s =
match Stringext.cut ~on:": " s with
| Some (key, value) -> key, value
| None -> invalid_arg "parse_header"
let non_empty st =
let%lwt r = Lwt_stream.to_list @@ Lwt_stream.clone st in
Lwt.return (String.concat "" r <> "")
let get_headers : string Lwt_stream.t Lwt_stream.t -> header list Lwt.t =
fun lines ->
let%lwt header_lines = Lwt_stream.get_while_s non_empty lines in
Lwt_list.map_s
(fun header_line_stream ->
let%lwt parts = Lwt_stream.to_list header_line_stream in
Lwt.return @@ parse_header @@ String.concat "" parts)
header_lines
type stream_part = { headers: header list; body: string Lwt_stream.t }
let parse_part chunk_stream =
let lines = align chunk_stream "\r\n" in
match%lwt get_headers lines with
| [] -> Lwt.return_none
| headers ->
let body = Lwt_stream.concat @@ Lwt_stream.clone lines in
Lwt.return_some { headers; body }
let parse_stream ~stream ~content_type =
match extract_boundary content_type with
| None -> Lwt.fail_with "Cannot parse content-type"
| Some boundary ->
let actual_boundary = "--" ^ boundary in
Lwt.return
@@ Lwt_stream.filter_map_s parse_part
@@ align stream actual_boundary
let s_part_body { body; _ } = body
let s_part_name { headers; _ } =
match parse_name @@ List.assoc "Content-Disposition" headers with
| Some x -> x
| None -> invalid_arg "s_part_name"
let parse_filename s =
let parts = split_on_string s ~pattern:"; " in
let f = function
| None -> None
| Some part ->
(match Stringext.cut part ~on:"=" with
| Some ("filename", quoted_string) -> Some (unquote quoted_string)
| _ -> None)
in
first_matching f parts
let s_part_filename { headers; _ } =
parse_filename @@ List.assoc "Content-Disposition" headers
type file = stream_part
let file_stream = s_part_body
let file_name = s_part_name
let file_content_type { headers; _ } = List.assoc "Content-Type" headers
let as_part part =
match s_part_filename part with
| Some _filename -> Lwt.return (`File part)
| None ->
let%lwt chunks = Lwt_stream.to_list part.body in
let body = String.concat "" chunks in
Lwt.return (`String body)
let get_parts s =
let go part m =
let name = s_part_name part in
let%lwt parsed_part = as_part part in
Lwt.return @@ StringMap.add name parsed_part m
in
Lwt_stream.fold_s go s StringMap.empty
let concat a b =
match a, b with
| _, "" -> a
| "", _ -> b
| _ -> a ^ b
module Reader = struct
type t = { mutable buffer: string; source: string Lwt_stream.t }
let make stream = { buffer = ""; source = stream }
let unread r s = r.buffer <- concat s r.buffer
let empty r =
if r.buffer = "" then
Lwt_stream.is_empty r.source
else
Lwt.return false
let read_next r =
let%lwt next_chunk = Lwt_stream.next r.source in
r.buffer <- concat r.buffer next_chunk;
Lwt.return_unit
let read_chunk r =
try%lwt
let%lwt () =
if r.buffer = "" then
read_next r
else
Lwt.return_unit
in
let res = r.buffer in
r.buffer <- "";
Lwt.return (Some res)
with Lwt_stream.Empty -> Lwt.return None
let buffer_contains r s =
match Stringext.cut r.buffer ~on:s with
| Some _ -> true
| None -> false
let rec read_until r cond =
if cond () then
Lwt.return_unit
else ( else (
let%lwt () = read_next r in assert (n < self.buf_len);
read_until r cond Bytes.blit self.buf_split n self.buf_split 0 (self.buf_len - n);
self.buf_len <- self.buf_len - n
) )
let read_line r = exception Found_delim of int
let delim = "\r\n" in
let%lwt () = read_until r (fun () -> buffer_contains r delim) in
match Stringext.cut r.buffer ~on:delim with
| None -> assert false
| Some (line, next) ->
r.buffer <- next;
Lwt.return (line ^ delim)
end
let read_headers reader = let rec read_chunk_ (self : st) buf i_buf len : chunk =
let rec go headers = if self.eof then
let%lwt line = Reader.read_line reader in Eof
if line = "\r\n" then else if self.buf_len < min_len_ self then (
Lwt.return headers (* try to refill buffer *)
else ( let n =
let header = parse_header line in Iostream.In.input self.ic self.buf_split self.buf_len
go (header :: headers) (Bytes.length self.buf_split - self.buf_len)
in
Printf.eprintf "refill n=%d\n%!" n;
if n = 0 && self.buf_len = 0 then (
self.eof <- true;
Eof
) else if n = 0 then (
let n_read = min len self.buf_len in
Bytes.blit self.buf_split 0 buf i_buf n_read;
shift_left_ self n_read;
Read n_read
) else (
self.buf_len <- self.buf_len + n;
read_chunk_ self buf i_buf len
)
) else (
Printf.eprintf "normal path buflen=%d buf=%S\n%!" self.buf_len
(Bytes.sub_string self.buf_split 0 self.buf_len);
try
let i = ref 0 in
let end_pos = min len self.buf_len - 2 - String.length self.delim in
while !i <= end_pos do
Printf.eprintf "at %d\n%!" !i;
if
Bytes.unsafe_get self.buf_split !i = '-'
&& Bytes.unsafe_get self.buf_split (!i + 1) = '-'
&& Utils_.string_eq
~a:(Bytes.unsafe_to_string self.buf_split)
~a_start:(!i + 2) ~b:self.delim ~len:(String.length self.delim)
then
raise_notrace (Found_delim !i);
incr i
done;
let n_read = min !i len in
Bytes.blit self.buf_split 0 buf i_buf n_read;
shift_left_ self n_read;
Read n_read
with
| Found_delim 0 ->
Printf.eprintf "found delim at 0\n%!";
shift_left_ self (2 + String.length self.delim);
Delim
| Found_delim n ->
Printf.eprintf "found delim at %d\n%!" n;
let n_read = min n len in
Bytes.blit self.buf_split 0 buf i_buf n_read;
shift_left_ self n_read;
Read n_read
) )
in
go []
let rec compute_case reader boundary = module Private_ = struct
match%lwt Reader.read_chunk reader with type nonrec chunk = chunk = Delim | Eof | Read of int
| None -> Lwt.return `Empty
| Some line ->
(match Stringext.cut line ~on:(boundary ^ "\r\n") with
| Some (pre, post) -> Lwt.return @@ `Boundary (pre, post)
| None ->
(match Stringext.cut line ~on:(boundary ^ "--\r\n") with
| Some (pre, post) -> Lwt.return @@ `Boundary (pre, post)
| None ->
(match find_common_idx line boundary with
| Some 0 ->
Reader.unread reader line;
let%lwt () = Reader.read_next reader in
compute_case reader boundary
| Some amb_idx ->
let unambiguous = String.sub line 0 amb_idx in
let ambiguous =
String.sub line amb_idx (String.length line - amb_idx)
in
Lwt.return @@ `May_end_with_boundary (unambiguous, ambiguous)
| None -> Lwt.return @@ `App_data line)))
let iter_part reader boundary callback = let read_chunk_ = read_chunk_
let fin = ref false in end
let last () =
fin := true;
Lwt.return_unit
in
let handle ~send ~unread ~finish =
let%lwt () = callback send in
Reader.unread reader unread;
if finish then
last ()
else
Lwt.return_unit
in
while%lwt not !fin do
let%lwt res = compute_case reader boundary in
match res with
| `Empty -> last ()
| `Boundary (pre, post) -> handle ~send:pre ~unread:post ~finish:true
| `May_end_with_boundary (unambiguous, ambiguous) ->
handle ~send:unambiguous ~unread:ambiguous ~finish:false
| `App_data line -> callback line
done
let read_file_part reader boundary callback = iter_part reader boundary callback
let strip_crlf s =
if ends_with ~suffix:"\r\n" ~suffix_length:2 s then
String.sub s 0 (String.length s - 2)
else
s
let read_string_part reader boundary =
let value = Buffer.create 0 in
let append_to_value line = Lwt.return (Buffer.add_string value line) in
let%lwt () = iter_part reader boundary append_to_value in
Lwt.return @@ strip_crlf (Buffer.contents value)
let read_part reader boundary callback fields =
let%lwt headers = read_headers reader in
let content_disposition = List.assoc "Content-Disposition" headers in
let name =
match parse_name content_disposition with
| Some x -> x
| None -> invalid_arg "handle_multipart"
in
match parse_filename content_disposition with
| Some filename -> read_file_part reader boundary (callback ~name ~filename)
| None ->
let%lwt value = read_string_part reader boundary in
fields := (name, value) :: !fields;
Lwt.return_unit
let handle_multipart reader boundary callback =
let fields = (ref [] : (string * string) list ref) in
let%lwt () =
let%lwt _dummyline = Reader.read_line reader in
let fin = ref false in
while%lwt not !fin do
if%lwt Reader.empty reader then
Lwt.return (fin := true)
else
read_part reader boundary callback fields
done
in
Lwt.return !fields
let parse ~stream ~content_type ~callback =
let reader = Reader.make stream in
let boundary =
match extract_boundary content_type with
| Some s -> "--" ^ s
| None -> invalid_arg "iter_multipart"
in
handle_multipart reader boundary callback

View file

@ -1,33 +1,11 @@
val align : string Lwt_stream.t -> string -> string Lwt_stream.t Lwt_stream.t type st
(**
Align a stream on a particular sequence and remove these boundaries.
*)
type stream_part val create : ?buf_size:int -> delim:string -> #Iostream.In.t -> st
val s_part_name : stream_part -> string (**/*)
val s_part_body : stream_part -> string Lwt_stream.t module Private_ : sig
val s_part_filename : stream_part -> string option type chunk = Delim | Eof | Read of int
val parse_stream : val read_chunk_ : st -> bytes -> int -> int -> chunk
stream:string Lwt_stream.t -> end
content_type:string -> (**/*)
stream_part Lwt_stream.t Lwt.t
type file
val file_name : file -> string
val file_content_type : file -> string
val file_stream : file -> string Lwt_stream.t
module StringMap : Map.S with type key = string
val get_parts :
stream_part Lwt_stream.t ->
[ `String of string | `File of file ] StringMap.t Lwt.t
val parse :
stream:string Lwt_stream.t ->
content_type:string ->
callback:(name:string -> filename:string -> string -> unit Lwt.t) ->
(string * string) list Lwt.t

View file

@ -1,13 +1,18 @@
module StringMap = Map.Make (String) (* module StringMap = Map.Make (String) *)
let string_eq ~a ~a_start ~b ~len = let string_eq ~a ~a_start ~b ~len : bool =
let r = ref true in assert (len <= String.length b);
if String.length a >= a_start + len then (
try
for i = 0 to len - 1 do for i = 0 to len - 1 do
let a_i = a_start + i in let a_i = a_start + i in
let b_i = i in if String.unsafe_get a a_i <> String.unsafe_get b i then
if a.[a_i] <> b.[b_i] then r := false raise_notrace Exit
done; done;
!r true
with Exit -> false
) else
false
let ends_with ~suffix ~suffix_length s = let ends_with ~suffix ~suffix_length s =
let s_length = String.length s in let s_length = String.length s in
@ -37,6 +42,7 @@ let find_common_idx a b =
in in
go (String.length b) go (String.length b)
(*
let[@inline] word = function let[@inline] word = function
| "" -> [] | "" -> []
| w -> [ Some w ] | w -> [ Some w ]
@ -60,3 +66,4 @@ let split_and_process_string ~boundary s =
| Some w -> `Word w | Some w -> `Word w
in in
List.map f @@ split_on_string ~pattern:boundary s List.map f @@ split_on_string ~pattern:boundary s
*)

View file

@ -0,0 +1,4 @@
(tests
(names t1)
(libraries tiny_httpd tiny_httpd.multipart-form-data))

View file

View file

@ -0,0 +1,45 @@
module MFD = Tiny_httpd_multipart_form_data
let pf = Printf.printf
let read_stream (st : MFD.st) : _ list =
let l = ref [] in
let buf = Bytes.create 12 in
let buffer = Buffer.create 32 in
let rec loop () =
match MFD.Private_.read_chunk_ st buf 0 (Bytes.length buf) with
| Delim ->
if Buffer.length buffer > 0 then l := `Str (Buffer.contents buffer) :: !l;
Buffer.clear buffer;
l := `Delim :: !l;
loop ()
| Read n ->
Buffer.add_subbytes buffer buf 0 n;
loop ()
| Eof ->
if Buffer.length buffer > 0 then l := `Str (Buffer.contents buffer) :: !l;
List.rev !l
in
loop ()
let test input_str =
let st =
MFD.create ~buf_size:16 ~delim:"YOLO" (Iostream.In.of_string input_str)
in
let chunks = read_stream st in
List.iter
(function
| `Delim -> pf "delim\n"
| `Str s -> pf "chunk %S\n" s)
chunks;
()
let () =
pf "T1\n";
test
{|hello--YOLO
world
what is the meaning of--YOLOthis??--YOLOok ok ok--YOLO|};
pf "T2\n";
test "--YOLO--YOLOah bon--YOLOaight--YOLO--YOLO";
()