From 2413a3028c53bdd87be7fa033ec9927a4e109a43 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 00:23:43 -0500 Subject: [PATCH] wip --- .../tiny_httpd_multipart_form_data.ml | 337 ++++++++++++++++++ .../tiny_httpd_multipart_form_data.mli | 33 ++ src/multipart_form/utils_.ml | 62 ++++ 3 files changed, 432 insertions(+) create mode 100644 src/multipart_form/tiny_httpd_multipart_form_data.ml create mode 100644 src/multipart_form/tiny_httpd_multipart_form_data.mli create mode 100644 src/multipart_form/utils_.ml diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.ml b/src/multipart_form/tiny_httpd_multipart_form_data.ml new file mode 100644 index 00000000..ce753566 --- /dev/null +++ b/src/multipart_form/tiny_httpd_multipart_form_data.ml @@ -0,0 +1,337 @@ +(* ported from https://github.com/cryptosense/multipart-form-data . + License: BSD-2 *) + +open Utils_ + +(* TODO: redo some light form of lwt stream for porting purposes? *) +module Stream_ = struct + type 'a t = { next: unit -> 'a option } [@@unboxed] + + let from next : _ t = { next } + let[@inline] get (self : _ t) : _ option = self.next () + + let filter_map f (self : _ t) : _ t = + let rec next () = + match self.next () with + | None -> None + | Some x -> + (match f x with + | None -> next () + | Some _ as r -> r) + in + { next } +end + +let split s boundary = + let r = ref None in + let push v = + match !r with + | None -> r := Some v + | Some _ -> assert false + in + let pop () = + let res = !r in + r := None; + res + in + let go c0 = + let c = + match pop () with + | Some x -> x ^ c0 + | None -> c0 + in + let string_to_process = + match find_common_idx c boundary with + | None -> c + | Some idx -> + let prefix = String.sub c 0 idx in + let suffix = String.sub c idx (String.length c - idx) in + push suffix; + prefix + in + split_and_process_string ~boundary string_to_process + in + let initial = List.map go s in + let final = + Stream_.flatten + @@ Stream_.from (fun () -> + option_map (split_and_process_string ~boundary) @@ pop ()) + in + Stream_.append initial final + +let until_next_delim s = + Stream_.from @@ fun () -> + let res = Stream_.get s in + match res with + | None | Some `Delim -> None + | Some (`Word w) -> Some w + +let join s = + Stream_.filter_map + (function + (* | `Delim -> Some (until_next_delim @@ Lwt_stream.clone s) *) + | `Delim -> Some (until_next_delim s) + | `Word _ -> None) + s + +let align stream boundary = join @@ split stream boundary + +type header = string * string + +let extract_boundary content_type = + Stringext.chop_prefix ~prefix:"multipart/form-data; boundary=" content_type + +let unquote s = Scanf.sscanf s "%S" @@ fun x -> x + +let parse_name s = + option_map unquote @@ Stringext.chop_prefix ~prefix:"form-data; name=" s + +let parse_header s = + match Stringext.cut ~on:": " s with + | Some (key, value) -> key, value + | None -> invalid_arg "parse_header" + +let non_empty st = + let%lwt r = Lwt_stream.to_list @@ Lwt_stream.clone st in + Lwt.return (String.concat "" r <> "") + +let get_headers : string Lwt_stream.t Lwt_stream.t -> header list Lwt.t = + fun lines -> + let%lwt header_lines = Lwt_stream.get_while_s non_empty lines in + Lwt_list.map_s + (fun header_line_stream -> + let%lwt parts = Lwt_stream.to_list header_line_stream in + Lwt.return @@ parse_header @@ String.concat "" parts) + header_lines + +type stream_part = { headers: header list; body: string Lwt_stream.t } + +let parse_part chunk_stream = + let lines = align chunk_stream "\r\n" in + match%lwt get_headers lines with + | [] -> Lwt.return_none + | headers -> + let body = Lwt_stream.concat @@ Lwt_stream.clone lines in + Lwt.return_some { headers; body } + +let parse_stream ~stream ~content_type = + match extract_boundary content_type with + | None -> Lwt.fail_with "Cannot parse content-type" + | Some boundary -> + let actual_boundary = "--" ^ boundary in + Lwt.return + @@ Lwt_stream.filter_map_s parse_part + @@ align stream actual_boundary + +let s_part_body { body; _ } = body + +let s_part_name { headers; _ } = + match parse_name @@ List.assoc "Content-Disposition" headers with + | Some x -> x + | None -> invalid_arg "s_part_name" + +let parse_filename s = + let parts = split_on_string s ~pattern:"; " in + let f = function + | None -> None + | Some part -> + (match Stringext.cut part ~on:"=" with + | Some ("filename", quoted_string) -> Some (unquote quoted_string) + | _ -> None) + in + first_matching f parts + +let s_part_filename { headers; _ } = + parse_filename @@ List.assoc "Content-Disposition" headers + +type file = stream_part + +let file_stream = s_part_body +let file_name = s_part_name +let file_content_type { headers; _ } = List.assoc "Content-Type" headers + +let as_part part = + match s_part_filename part with + | Some _filename -> Lwt.return (`File part) + | None -> + let%lwt chunks = Lwt_stream.to_list part.body in + let body = String.concat "" chunks in + Lwt.return (`String body) + +let get_parts s = + let go part m = + let name = s_part_name part in + let%lwt parsed_part = as_part part in + Lwt.return @@ StringMap.add name parsed_part m + in + Lwt_stream.fold_s go s StringMap.empty + +let concat a b = + match a, b with + | _, "" -> a + | "", _ -> b + | _ -> a ^ b + +module Reader = struct + type t = { mutable buffer: string; source: string Lwt_stream.t } + + let make stream = { buffer = ""; source = stream } + let unread r s = r.buffer <- concat s r.buffer + + let empty r = + if r.buffer = "" then + Lwt_stream.is_empty r.source + else + Lwt.return false + + let read_next r = + let%lwt next_chunk = Lwt_stream.next r.source in + r.buffer <- concat r.buffer next_chunk; + Lwt.return_unit + + let read_chunk r = + try%lwt + let%lwt () = + if r.buffer = "" then + read_next r + else + Lwt.return_unit + in + let res = r.buffer in + r.buffer <- ""; + Lwt.return (Some res) + with Lwt_stream.Empty -> Lwt.return None + + let buffer_contains r s = + match Stringext.cut r.buffer ~on:s with + | Some _ -> true + | None -> false + + let rec read_until r cond = + if cond () then + Lwt.return_unit + else ( + let%lwt () = read_next r in + read_until r cond + ) + + let read_line r = + let delim = "\r\n" in + let%lwt () = read_until r (fun () -> buffer_contains r delim) in + match Stringext.cut r.buffer ~on:delim with + | None -> assert false + | Some (line, next) -> + r.buffer <- next; + Lwt.return (line ^ delim) +end + +let read_headers reader = + let rec go headers = + let%lwt line = Reader.read_line reader in + if line = "\r\n" then + Lwt.return headers + else ( + let header = parse_header line in + go (header :: headers) + ) + in + go [] + +let rec compute_case reader boundary = + match%lwt Reader.read_chunk reader with + | None -> Lwt.return `Empty + | Some line -> + (match Stringext.cut line ~on:(boundary ^ "\r\n") with + | Some (pre, post) -> Lwt.return @@ `Boundary (pre, post) + | None -> + (match Stringext.cut line ~on:(boundary ^ "--\r\n") with + | Some (pre, post) -> Lwt.return @@ `Boundary (pre, post) + | None -> + (match find_common_idx line boundary with + | Some 0 -> + Reader.unread reader line; + let%lwt () = Reader.read_next reader in + compute_case reader boundary + | Some amb_idx -> + let unambiguous = String.sub line 0 amb_idx in + let ambiguous = + String.sub line amb_idx (String.length line - amb_idx) + in + Lwt.return @@ `May_end_with_boundary (unambiguous, ambiguous) + | None -> Lwt.return @@ `App_data line))) + +let iter_part reader boundary callback = + let fin = ref false in + let last () = + fin := true; + Lwt.return_unit + in + let handle ~send ~unread ~finish = + let%lwt () = callback send in + Reader.unread reader unread; + if finish then + last () + else + Lwt.return_unit + in + while%lwt not !fin do + let%lwt res = compute_case reader boundary in + match res with + | `Empty -> last () + | `Boundary (pre, post) -> handle ~send:pre ~unread:post ~finish:true + | `May_end_with_boundary (unambiguous, ambiguous) -> + handle ~send:unambiguous ~unread:ambiguous ~finish:false + | `App_data line -> callback line + done + +let read_file_part reader boundary callback = iter_part reader boundary callback + +let strip_crlf s = + if ends_with ~suffix:"\r\n" ~suffix_length:2 s then + String.sub s 0 (String.length s - 2) + else + s + +let read_string_part reader boundary = + let value = Buffer.create 0 in + let append_to_value line = Lwt.return (Buffer.add_string value line) in + let%lwt () = iter_part reader boundary append_to_value in + Lwt.return @@ strip_crlf (Buffer.contents value) + +let read_part reader boundary callback fields = + let%lwt headers = read_headers reader in + let content_disposition = List.assoc "Content-Disposition" headers in + let name = + match parse_name content_disposition with + | Some x -> x + | None -> invalid_arg "handle_multipart" + in + match parse_filename content_disposition with + | Some filename -> read_file_part reader boundary (callback ~name ~filename) + | None -> + let%lwt value = read_string_part reader boundary in + fields := (name, value) :: !fields; + Lwt.return_unit + +let handle_multipart reader boundary callback = + let fields = (ref [] : (string * string) list ref) in + let%lwt () = + let%lwt _dummyline = Reader.read_line reader in + let fin = ref false in + while%lwt not !fin do + if%lwt Reader.empty reader then + Lwt.return (fin := true) + else + read_part reader boundary callback fields + done + in + Lwt.return !fields + +let parse ~stream ~content_type ~callback = + let reader = Reader.make stream in + let boundary = + match extract_boundary content_type with + | Some s -> "--" ^ s + | None -> invalid_arg "iter_multipart" + in + handle_multipart reader boundary callback diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.mli b/src/multipart_form/tiny_httpd_multipart_form_data.mli new file mode 100644 index 00000000..557324e7 --- /dev/null +++ b/src/multipart_form/tiny_httpd_multipart_form_data.mli @@ -0,0 +1,33 @@ +val align : string Lwt_stream.t -> string -> string Lwt_stream.t Lwt_stream.t +(** + Align a stream on a particular sequence and remove these boundaries. + *) + +type stream_part + +val s_part_name : stream_part -> string +val s_part_body : stream_part -> string Lwt_stream.t +val s_part_filename : stream_part -> string option + +val parse_stream : + stream:string Lwt_stream.t -> + content_type:string -> + stream_part Lwt_stream.t Lwt.t + +type file + +val file_name : file -> string +val file_content_type : file -> string +val file_stream : file -> string Lwt_stream.t + +module StringMap : Map.S with type key = string + +val get_parts : + stream_part Lwt_stream.t -> + [ `String of string | `File of file ] StringMap.t Lwt.t + +val parse : + stream:string Lwt_stream.t -> + content_type:string -> + callback:(name:string -> filename:string -> string -> unit Lwt.t) -> + (string * string) list Lwt.t diff --git a/src/multipart_form/utils_.ml b/src/multipart_form/utils_.ml new file mode 100644 index 00000000..4d1c5f56 --- /dev/null +++ b/src/multipart_form/utils_.ml @@ -0,0 +1,62 @@ +module StringMap = Map.Make (String) + +let string_eq ~a ~a_start ~b ~len = + let r = ref true in + for i = 0 to len - 1 do + let a_i = a_start + i in + let b_i = i in + if a.[a_i] <> b.[b_i] then r := false + done; + !r + +let ends_with ~suffix ~suffix_length s = + let s_length = String.length s in + s_length >= suffix_length + && string_eq ~a:s ~a_start:(s_length - suffix_length) ~b:suffix + ~len:suffix_length + +let rec first_matching p = function + | [] -> None + | x :: xs -> + (match p x with + | Some y -> Some y + | None -> first_matching p xs) + +let[@inline] option_map f = function + | None -> None + | Some x -> Some (f x) + +let find_common_idx a b = + let rec go i = + if i <= 0 then + None + else if ends_with ~suffix:b ~suffix_length:i a then + Some (String.length a - i) + else + go (i - 1) + in + go (String.length b) + +let[@inline] word = function + | "" -> [] + | w -> [ Some w ] + +let split_on_string ~pattern s = + let pattern_length = String.length pattern in + let rec go start acc = + match Stringext.find_from ~start s ~pattern with + | Some match_start -> + let before = String.sub s start (match_start - start) in + let new_acc = (None :: word before) @ acc in + let new_start = match_start + pattern_length in + go new_start new_acc + | None -> word (Stringext.string_after s start) @ acc + in + List.rev (go 0 []) + +let split_and_process_string ~boundary s = + let f = function + | None -> `Delim + | Some w -> `Word w + in + List.map f @@ split_on_string ~pattern:boundary s