From 004d0d41fa8bc867cf6224e6db53a44c5efc1176 Mon Sep 17 00:00:00 2001 From: craff Date: Wed, 8 Dec 2021 15:57:45 -1000 Subject: [PATCH] Decode multipart post encoded form + thread safe interface to Str.search_forward --- src/Tiny_httpd_util.ml | 116 ++++++++++++++++++++++++++++++++++++++++ src/Tiny_httpd_util.mli | 27 ++++++++++ src/dune | 2 +- tiny_httpd.opam | 1 + 4 files changed, 145 insertions(+), 1 deletion(-) diff --git a/src/Tiny_httpd_util.ml b/src/Tiny_httpd_util.ml index 2e614c91..88a07365 100644 --- a/src/Tiny_httpd_util.ml +++ b/src/Tiny_httpd_util.ml @@ -161,3 +161,119 @@ let parse_query s : (_ list, string) result= (List.map (fun (x,y) -> percent_encode x ^"="^percent_encode y) l) in eq_sorted (Ok l) (parse_query s)) *) + +let str_mutex = Mutex.create () + +type (_,_) to_read = + | Nothing : ('a, 'a) to_read + | Begin : ('a, 'b) to_read -> (int -> 'a, 'b) to_read + | End : ('a, 'b) to_read -> (int -> 'a, 'b) to_read + | Grp : int * ('a, 'b) to_read -> (string -> 'a, 'b) to_read + | OptGrp : int * ('a, 'b) to_read -> (string option -> 'a, 'b) to_read + +let search_forward regexp_ str ?(from=0) groups cont = + let rec read_groups : type a b. (a,b) to_read -> a -> b = + let open Str in + fun groups cont -> + match groups,cont with + | Nothing , cont -> cont + | Begin r , cont -> read_groups r (cont (match_beginning ())) + | End r , cont -> read_groups r (cont (match_end ())) + | Grp(i,r), cont -> read_groups r (cont (matched_group i str)) + | OptGrp(i,r), cont -> + let str = try Some(matched_group i str) + with Not_found -> None + in + read_groups r (cont str) + in + let open Str in + Mutex.lock str_mutex; + let _ = search_forward regexp_ str from in + let cont = read_groups groups cont in + Mutex.unlock str_mutex; + cont + +let first_line str = + let pos = String.index str '\n' in + let pos = if str.[pos] = '\r' then pos-1 else pos in + String.sub str 0 (pos-1) + + +(* Decoding of multipart encoded post forms according to rfc7578 *) +type multipart_part = + { disposition : string + ; mime_type : string option + ; charset : string option + ; filename : string option + ; content : string } + +let content_disposition_regexp = + Str.regexp "Content-Disposition: \\([^;\r\n]*\\)\\(;[ ]*\\)?\\([^\r\n]*\\)" + +let content_type_regexp = + Str.regexp "Content-Type: \\([^\r\n]*\\)\\(;[ ]*charset=\\([^ \n\r]+\\)\\)" + +let empty_line_re = + Str.regexp "\r?\n\r?\n" + +let decode_parts str = + try + let groups = Begin (End Nothing) in + let (header_end, content_begin) = + search_forward empty_line_re str groups (fun b e -> (b,e)) + in + let header = String.sub str 0 header_end in + let len = String.length str in + let rm = if str.[len-1] = '\n' then + if str.[len-2] = '\r' then 2 else 1 else 0 + in + let len = len - content_begin - rm in + let content = String.sub str content_begin len in + let (disposition, values) = + search_forward content_disposition_regexp header + (Grp(1,Grp (3,Nothing))) (fun x y -> (x,y)) + in + let values = + match parse_query values with + | Ok l -> List.map (fun (k,v) -> + let open String in + let k = trim k in + let v = trim v in + let len = String.length v in + let v = + if v.[0] = '"' && v.[len-1] = '"' && len > 1 then + String.sub v 1 (len-2) + else v + in + (k,v)) l + | _ -> [] + in + let mime_type, charset = + try + search_forward content_type_regexp header (OptGrp (1,OptGrp (3,Nothing))) + (fun x y -> (x,y)) + with + Not_found -> (None, None) + in + let name = List.assoc "name" values in + let filename = try percent_decode (List.assoc "filename" values) + with Not_found -> None + in + Some(name,{disposition; mime_type; charset; filename; content}) + with Not_found -> None + +let decode_multipart str = + let sep = first_line str in + let parts = Str.(split (regexp (sep ^ "\\(--\\)?\r?\n")) str) in + let res = List.filter_map decode_parts parts in + let default_charset, res = + List.partition (fun (name,_) -> name = "_charset_") res + in + match default_charset with + | (_,{content=charset; _})::_ -> + List.map (function + (name, part as c) -> + if part.charset = None then + (name, {part with charset = Some charset}) + else c) res + | [] -> res diff --git a/src/Tiny_httpd_util.mli b/src/Tiny_httpd_util.mli index 025d6519..b7958110 100644 --- a/src/Tiny_httpd_util.mli +++ b/src/Tiny_httpd_util.mli @@ -34,3 +34,30 @@ val parse_query : string -> ((string*string) list, string) result The order might not be preserved. @since 0.3 *) + +(** Decoding of multipart encoded post forms according to rfc7578. + Notably: + - a part with name "_charset_" is take as a default charset + - the filename is percent decoded +*) +type multipart_part = + { disposition : string + ; mime_type : string option + ; charset : string option + ; filename : string option + ; content : string } + +(** decode the body of a multipart encoded POST request, give an association, + that associates the name of the field to the above record *) +val decode_multipart : string -> (string * multipart_part) list + +(** Thread safe interface to Str (protection by a mutex)*) +type (_,_) to_read = + | Nothing : ('a, 'a) to_read + | Begin : ('a, 'b) to_read -> (int -> 'a, 'b) to_read + | End : ('a, 'b) to_read -> (int -> 'a, 'b) to_read + | Grp : int * ('a, 'b) to_read -> (string -> 'a, 'b) to_read + | OptGrp : int * ('a, 'b) to_read -> (string option -> 'a, 'b) to_read + +val search_forward : Str.regexp -> string -> ?from:int + -> ('a,'b) to_read -> 'a -> 'b diff --git a/src/dune b/src/dune index 29a2fb12..d6f85ff0 100644 --- a/src/dune +++ b/src/dune @@ -2,6 +2,6 @@ (library (name tiny_httpd) (public_name tiny_httpd) - (libraries threads) + (libraries threads str) (flags :standard -safe-string -warn-error -a+8) (wrapped false)) diff --git a/tiny_httpd.opam b/tiny_httpd.opam index 07d0fa62..bc84236f 100644 --- a/tiny_httpd.opam +++ b/tiny_httpd.opam @@ -12,6 +12,7 @@ build: [ depends: [ "dune" { >= "2.0" } "base-threads" + "str" "ocaml" { >= "4.04.0" } "odoc" {with-doc} "qtest" { >= "2.9" & with-test}