From fbd71baa1945fd130d5c9c427ad4bae384ca72c9 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 11 Mar 2024 14:58:42 -0400 Subject: [PATCH 1/3] =?UTF-8?q?wip:=20stream=20response=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core/ezcurl_core.ml | 49 +++++++++++++++++++++++++++++++--------- src/core/ezcurl_core.mli | 40 +++++++++++++++++++++++++------- 2 files changed, 70 insertions(+), 19 deletions(-) diff --git a/src/core/ezcurl_core.ml b/src/core/ezcurl_core.ml index e0cb97d..e04e66e 100644 --- a/src/core/ezcurl_core.ml +++ b/src/core/ezcurl_core.ml @@ -159,22 +159,23 @@ let pp_response_info out r = let string_of_response_info s = Format.asprintf "%a" pp_response_info s -type response = { +type 'body response = { code: int; headers: (string * string) list; - body: string; + body: 'body; info: response_info; } -let pp_response out r = +let pp_response_with ppbody out r = let pp_header out (s1, s2) = Format.fprintf out "@[<2>%s:@ %s@]" s1 s2 in let pp_headers out l = Format.fprintf out "@[%a@]" (Format.pp_print_list pp_header) l in let { code; body; headers; info } = r in Format.fprintf out "{@[code=%d;@ headers=@[%a@];@ info=%a;@ body=@[%a@]@]}" - code pp_headers headers pp_response_info info Format.pp_print_text body + code pp_headers headers pp_response_info info ppbody body +let pp_response = pp_response_with Format.pp_print_text let string_of_response s = Format.asprintf "%a" pp_response s type meth = @@ -224,7 +225,7 @@ module type S = sig url:string -> meth:meth -> unit -> - (response, Curl.curlCode * string) result io + (string response, Curl.curlCode * string) result io (** General purpose HTTP call via cURL. @param url the URL to query @param meth which method to use (see {!meth}) @@ -245,6 +246,27 @@ module type S = sig @param headers headers of the query *) + type stream = { + on_close: unit -> unit io; + on_chunk: string -> int -> int -> unit io; + } + (** Push-based stream of bytes + @since NEXT_RELEASE *) + + val http_stream : + ?tries:int -> + ?client:t -> + ?config:Config.t -> + ?range:string -> + ?content:[ `String of string | `Write of bytes -> int -> int ] -> + ?headers:(string * string) list -> + url:string -> + meth:meth -> + unit -> + (stream response, Curl.curlCode * string) result io + (** HTTP call via cURL, with a streaming response body. + @since NEXT_RELEASE *) + val get : ?tries:int -> ?client:t -> @@ -253,7 +275,7 @@ module type S = sig ?headers:(string * string) list -> url:string -> unit -> - (response, Curl.curlCode * string) result io + (string response, Curl.curlCode * string) result io (** Shortcut for [http ~meth:GET] See {!http} for more info. *) @@ -266,7 +288,7 @@ module type S = sig url:string -> content:[ `String of string | `Write of bytes -> int -> int ] -> unit -> - (response, Curl.curlCode * string) result io + (string response, Curl.curlCode * string) result io (** Shortcut for [http ~meth:PUT] See {!http} for more info. *) @@ -280,7 +302,7 @@ module type S = sig params:Curl.curlHTTPPost list -> url:string -> unit -> - (response, Curl.curlCode * string) result io + (string response, Curl.curlCode * string) result io (** Shortcut for [http ~meth:(POST params)] See {!http} for more info. *) @@ -288,7 +310,7 @@ end exception Parse_error of Curl.curlCode * string -let mk_res (self : t) headers body : (response, _) result = +let mk_res (self : t) headers body : (_ response, _) result = let split_colon s = match String.index s ':' with | exception Not_found -> @@ -340,8 +362,13 @@ module Make (IO : IO) : S with type 'a io = 'a IO.t = struct | `String s -> Some (String.length s) | `Write _ -> None - let http ?(tries = 1) ?client ?(config = Config.default) ?range ?content - ?(headers = []) ~url ~meth () : _ result io = + type stream = { + on_close: unit -> unit io; + on_chunk: string -> int -> int -> unit io; + } + + let http_ ?(tries = 1) ?client ?(config = Config.default) ?range ?content + ?(headers = []) ~url ~meth () : (stream response, _) result io = let headers = ref headers in let do_cleanup, self = match client with diff --git a/src/core/ezcurl_core.mli b/src/core/ezcurl_core.mli index 721ceba..fdc9f03 100644 --- a/src/core/ezcurl_core.mli +++ b/src/core/ezcurl_core.mli @@ -78,17 +78,20 @@ type response_info = { val pp_response_info : Format.formatter -> response_info -> unit val string_of_response_info : response_info -> string -type response = { +type 'body response = { code: int; (** Response code. See https://developer.mozilla.org/en-US/docs/Web/HTTP/Status *) headers: (string * string) list; (** Response headers *) - body: string; (** Response body, or [""] *) + body: 'body; (** Response body, or [""] *) info: response_info; (** Information about the response *) } (** Response for a given request. *) -val pp_response : Format.formatter -> response -> unit -val string_of_response : response -> string +val pp_response_with : + (Format.formatter -> 'a -> unit) -> Format.formatter -> 'a response -> unit + +val pp_response : Format.formatter -> string response -> unit +val string_of_response : string response -> string (** The {{: https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods} HTTP method} to use *) @@ -131,7 +134,7 @@ module type S = sig url:string -> meth:meth -> unit -> - (response, Curl.curlCode * string) result io + (string response, Curl.curlCode * string) result io (** General purpose HTTP call via cURL. @param url the URL to query @param meth which method to use (see {!meth}) @@ -152,6 +155,27 @@ module type S = sig @param headers headers of the query *) + type stream = { + on_close: unit -> unit io; + on_chunk: string -> int -> int -> unit io; + } + (** Push-based stream of bytes + @since NEXT_RELEASE *) + + val http_stream : + ?tries:int -> + ?client:t -> + ?config:Config.t -> + ?range:string -> + ?content:[ `String of string | `Write of bytes -> int -> int ] -> + ?headers:(string * string) list -> + url:string -> + meth:meth -> + unit -> + (stream response, Curl.curlCode * string) result io + (** HTTP call via cURL, with a streaming response body. + @since NEXT_RELEASE *) + val get : ?tries:int -> ?client:t -> @@ -160,7 +184,7 @@ module type S = sig ?headers:(string * string) list -> url:string -> unit -> - (response, Curl.curlCode * string) result io + (string response, Curl.curlCode * string) result io (** Shortcut for [http ~meth:GET] See {!http} for more info. *) @@ -173,7 +197,7 @@ module type S = sig url:string -> content:[ `String of string | `Write of bytes -> int -> int ] -> unit -> - (response, Curl.curlCode * string) result io + (string response, Curl.curlCode * string) result io (** Shortcut for [http ~meth:PUT] See {!http} for more info. *) @@ -187,7 +211,7 @@ module type S = sig params:Curl.curlHTTPPost list -> url:string -> unit -> - (response, Curl.curlCode * string) result io + (string response, Curl.curlCode * string) result io (** Shortcut for [http ~meth:(POST params)] See {!http} for more info. *) From 49b265ce566f17ef7cbc09aa01fa7e02b6424e88 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 2 Oct 2024 12:46:43 -0400 Subject: [PATCH 2/3] implement `http_stream` --- src/core/ezcurl_core.ml | 107 +++++++++++++++++++++++++++++---------- src/core/ezcurl_core.mli | 14 +++-- 2 files changed, 90 insertions(+), 31 deletions(-) diff --git a/src/core/ezcurl_core.ml b/src/core/ezcurl_core.ml index e04e66e..a3ad4b5 100644 --- a/src/core/ezcurl_core.ml +++ b/src/core/ezcurl_core.ml @@ -69,6 +69,7 @@ module Config = struct end type t = { curl: Curl.t } [@@unboxed] +type client = t let _init = let initialized = ref false in @@ -246,12 +247,12 @@ module type S = sig @param headers headers of the query *) - type stream = { - on_close: unit -> unit io; - on_chunk: string -> int -> int -> unit io; - } - (** Push-based stream of bytes + (** Push-stream of bytes @since NEXT_RELEASE *) + class type input_stream = object + method on_close : unit -> unit + method on_input : bytes -> int -> int -> unit + end val http_stream : ?tries:int -> @@ -262,8 +263,9 @@ module type S = sig ?headers:(string * string) list -> url:string -> meth:meth -> + write_into:#input_stream -> unit -> - (stream response, Curl.curlCode * string) result io + (unit response, Curl.curlCode * string) result io (** HTTP call via cURL, with a streaming response body. @since NEXT_RELEASE *) @@ -362,13 +364,20 @@ module Make (IO : IO) : S with type 'a io = 'a IO.t = struct | `String s -> Some (String.length s) | `Write _ -> None - type stream = { - on_close: unit -> unit io; - on_chunk: string -> int -> int -> unit io; + class type input_stream = object + method on_close : unit -> unit + method on_input : bytes -> int -> int -> unit + end + + type http_state_ = { + client: client; + do_cleanup: bool; + mutable resp_headers: string list; + mutable resp_headers_done: bool; } - let http_ ?(tries = 1) ?client ?(config = Config.default) ?range ?content - ?(headers = []) ~url ~meth () : (stream response, _) result io = + let http_setup_ ?client ?(config = Config.default) ?range ?content + ?(headers = []) ~url ~meth () : http_state_ = let headers = ref headers in let do_cleanup, self = match client with @@ -390,11 +399,15 @@ module Make (IO : IO) : S with type 'a io = 'a IO.t = struct | Some size, _ -> Curl.set_infilesize self.curl size); (* local state *) - let tries = max tries 1 in - (* at least one attempt *) - let body = Buffer.create 64 in - let resp_headers = ref [] in - let resp_headers_done = ref false in + let st = + { + do_cleanup; + client = self; + resp_headers = []; + resp_headers_done = false; + } + in + (* once we get "\r\n" header line *) Curl.set_url self.curl url; (match meth with @@ -416,29 +429,71 @@ module Make (IO : IO) : S with type 'a io = 'a IO.t = struct let s = String.trim s0 in (* Printf.printf "got header %S\n%!" s0; *) if s0 = "\r\n" then - resp_headers_done := true + st.resp_headers_done <- true else ( (* redirection: drop previous headers *) - if !resp_headers_done then ( - resp_headers_done := false; - resp_headers := [] + if st.resp_headers_done then ( + st.resp_headers_done <- false; + st.resp_headers <- [] ); - resp_headers := s :: !resp_headers + st.resp_headers <- s :: st.resp_headers ); String.length s0); - Curl.set_writefunction self.curl (fun s -> + + st + + let http ?(tries = 1) ?client ?config ?range ?content ?headers ~url ~meth () : + (string response, _) result io = + (* at least one attempt *) + let tries = max tries 1 in + let st = + http_setup_ ?client ?config ?range ?content ?headers ~url ~meth () + in + + let body = Buffer.create 64 in + Curl.set_writefunction st.client.curl (fun s -> Buffer.add_string body s; String.length s); + let rec loop i = - IO.perform self.curl >>= function + IO.perform st.client.curl >>= function | Curl.CURLE_OK -> - let r = mk_res self (List.rev !resp_headers) (Buffer.contents body) in - if do_cleanup then Curl.cleanup self.curl; + let r = + mk_res st.client (List.rev st.resp_headers) (Buffer.contents body) + in + if st.do_cleanup then Curl.cleanup st.client.curl; return r | Curl.CURLE_AGAIN when i > 1 -> loop (i - 1) (* try again *) | c -> - if do_cleanup then Curl.cleanup self.curl; + if st.do_cleanup then Curl.cleanup st.client.curl; + return (Error (c, Curl.strerror c)) + in + loop tries + + let http_stream ?(tries = 1) ?client ?config ?range ?content ?headers ~url + ~meth ~(write_into : #input_stream) () : (unit response, _) result io = + let tries = max tries 1 in + let st = + http_setup_ ?client ?config ?range ?content ?headers ~url ~meth () + in + + Curl.set_writefunction st.client.curl (fun s -> + let n = String.length s in + write_into#on_input (Bytes.unsafe_of_string s) 0 n; + n); + + let rec loop i = + IO.perform st.client.curl >>= function + | Curl.CURLE_OK -> + let r = mk_res st.client (List.rev st.resp_headers) () in + write_into#on_close (); + if st.do_cleanup then Curl.cleanup st.client.curl; + return r + | Curl.CURLE_AGAIN when i > 1 -> loop (i - 1) (* try again *) + | c -> + write_into#on_close (); + if st.do_cleanup then Curl.cleanup st.client.curl; return (Error (c, Curl.strerror c)) in loop tries diff --git a/src/core/ezcurl_core.mli b/src/core/ezcurl_core.mli index fdc9f03..6a06538 100644 --- a/src/core/ezcurl_core.mli +++ b/src/core/ezcurl_core.mli @@ -155,12 +155,12 @@ module type S = sig @param headers headers of the query *) - type stream = { - on_close: unit -> unit io; - on_chunk: string -> int -> int -> unit io; - } (** Push-based stream of bytes @since NEXT_RELEASE *) + class type input_stream = object + method on_close : unit -> unit + method on_input : bytes -> int -> int -> unit + end val http_stream : ?tries:int -> @@ -171,9 +171,13 @@ module type S = sig ?headers:(string * string) list -> url:string -> meth:meth -> + write_into:#input_stream -> unit -> - (stream response, Curl.curlCode * string) result io + (unit response, Curl.curlCode * string) result io (** HTTP call via cURL, with a streaming response body. + The body is given to [write_into] by chunks, + then [write_into#on_close ()] is called + and the response is returned. @since NEXT_RELEASE *) val get : From 012016f9b40f652db5f429ee3e4b5dd6f4406976 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 3 Oct 2024 10:01:37 -0400 Subject: [PATCH 3/3] test: add a test for `http_stream` --- Makefile | 3 +++ README.md | 2 +- test/basic_test.expected | 39 +++++++++++++++++++++++++++++++++++++++ test/basic_test.ml | 28 ++++++++++++++++++++++++---- 4 files changed, 67 insertions(+), 5 deletions(-) create mode 100644 test/basic_test.expected diff --git a/Makefile b/Makefile index 1e9dc66..f60c62e 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,9 @@ build: test: @dune runtest --no-buffer --force +test-autopromote: + @dune runtest --no-buffer --force --auto-promote + clean: @dune clean diff --git a/README.md b/README.md index bd64394..0964408 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ val url : string = "https://curl.haxx.se/" # let res = Ezcurl.get ~url ();; ... # let content = match res with Ok c -> c | Error (_,s) -> failwith s;; -val content : Ezcurl_core.response = +val content : string Ezcurl_core.response = ... # content.Ezcurl.code;; diff --git a/test/basic_test.expected b/test/basic_test.expected new file mode 100644 index 0000000..890dbfc --- /dev/null +++ b/test/basic_test.expected @@ -0,0 +1,39 @@ +get: OK +body=``` +version = 0.26.2 +profile=conventional +margin=80 +if-then-else=k-r +parens-ite=true +parens-tuple=multi-line-only +sequence-style=terminator +type-decl=sparse +break-cases=toplevel +cases-exp-indent=2 +field-space=tight-decl +leading-nested-match-parens=true +module-item-spacing=compact +quiet=true +ocaml-version=4.08.0 + +``` +streaming get: OK +body=``` +version = 0.26.2 +profile=conventional +margin=80 +if-then-else=k-r +parens-ite=true +parens-tuple=multi-line-only +sequence-style=terminator +type-decl=sparse +break-cases=toplevel +cases-exp-indent=2 +field-space=tight-decl +leading-nested-match-parens=true +module-item-spacing=compact +quiet=true +ocaml-version=4.08.0 + +``` +same buf? true diff --git a/test/basic_test.ml b/test/basic_test.ml index 2a060af..1c0a1e0 100644 --- a/test/basic_test.ml +++ b/test/basic_test.ml @@ -1,10 +1,30 @@ +let body = ref "" + +let url = + "https://raw.githubusercontent.com/c-cube/ezcurl/refs/heads/main/.ocamlformat" + let () = + match Ezcurl.get ~url () with + | Error (code, msg) -> + Format.eprintf "curl error: code `%s` (%s)@." (Curl.strerror code) msg + | Ok res -> + body := res.body; + Format.printf "get: OK@.body=```@.%s@.```@." !body + +let () = + let buf = Buffer.create 32 in match - Ezcurl.get - ~url: - "https://archive.softwareheritage.org/api/1/content/sha1_git:7bdf38d4468c114206c9b6ebd9cf1176e085d346/" + Ezcurl.http_stream ~meth:GET ~url + ~write_into: + (object + method on_input bs i len = Buffer.add_subbytes buf bs i len + method on_close () = () + end) () with | Error (code, msg) -> Format.eprintf "curl error: code `%s` (%s)@." (Curl.strerror code) msg - | Ok _response -> Format.printf "OK@." + | Ok _res -> + let new_body = Buffer.contents buf in + Format.printf "streaming get: OK@.body=```@.%s@.```@." new_body; + Format.printf "same buf? %b@." (new_body = !body)