mirror of
https://github.com/ocaml-tracing/ocaml-opentelemetry.git
synced 2026-03-08 03:47:59 -04:00
Add retry with exponential backoff to HTTP client
- Add retry_max_attempts, retry_initial_delay_ms, retry_max_delay_ms, retry_backoff_multiplier to config - Retry on network failures, 5xx errors, 429 (rate limit), 408 (timeout) - No retry on 4xx client errors or user interrupt (Sysbreak) - Default: 3 attempts, 100ms initial delay, 5s max delay, 2x multiplier - Uses existing IO.sleep_s from generic_io
This commit is contained in:
parent
a54593c39c
commit
e9485c97da
3 changed files with 68 additions and 4 deletions
|
|
@ -33,6 +33,10 @@ type t = {
|
|||
batch_timeout_ms: int;
|
||||
self_trace: bool;
|
||||
http_concurrency_level: int option;
|
||||
retry_max_attempts: int;
|
||||
retry_initial_delay_ms: float;
|
||||
retry_max_delay_ms: float;
|
||||
retry_backoff_multiplier: float;
|
||||
_rest: rest;
|
||||
}
|
||||
|
||||
|
|
@ -82,6 +86,10 @@ let pp out (self : t) : unit =
|
|||
batch_logs;
|
||||
batch_timeout_ms;
|
||||
http_concurrency_level;
|
||||
retry_max_attempts;
|
||||
retry_initial_delay_ms;
|
||||
retry_max_delay_ms;
|
||||
retry_backoff_multiplier;
|
||||
_rest = _;
|
||||
} =
|
||||
self
|
||||
|
|
@ -95,13 +103,16 @@ let pp out (self : t) : unit =
|
|||
%a@];@ protocol=%a;@ timeout_ms=%d;@ timeout_traces_ms=%d;@ \
|
||||
timeout_metrics_ms=%d;@ timeout_logs_ms=%d;@ batch_traces=%a;@ \
|
||||
batch_metrics=%a;@ batch_logs=%a;@ batch_timeout_ms=%d;@ \
|
||||
http_concurrency_level=%a @]}"
|
||||
http_concurrency_level=%a;@ retry_max_attempts=%d;@ \
|
||||
retry_initial_delay_ms=%.0f;@ retry_max_delay_ms=%.0f;@ \
|
||||
retry_backoff_multiplier=%.1f @]}"
|
||||
debug pp_log_level log_level sdk_disabled self_trace url_traces url_metrics
|
||||
url_logs ppheaders headers ppheaders headers_traces ppheaders
|
||||
headers_metrics ppheaders headers_logs pp_protocol protocol timeout_ms
|
||||
timeout_traces_ms timeout_metrics_ms timeout_logs_ms ppiopt batch_traces
|
||||
ppiopt batch_metrics ppiopt batch_logs batch_timeout_ms ppiopt
|
||||
http_concurrency_level
|
||||
http_concurrency_level retry_max_attempts retry_initial_delay_ms
|
||||
retry_max_delay_ms retry_backoff_multiplier
|
||||
|
||||
let default_url = "http://localhost:4318"
|
||||
|
||||
|
|
@ -128,6 +139,10 @@ type 'k make =
|
|||
?batch_timeout_ms:int ->
|
||||
?self_trace:bool ->
|
||||
?http_concurrency_level:int ->
|
||||
?retry_max_attempts:int ->
|
||||
?retry_initial_delay_ms:float ->
|
||||
?retry_max_delay_ms:float ->
|
||||
?retry_backoff_multiplier:float ->
|
||||
'k
|
||||
|
||||
module type ENV = sig
|
||||
|
|
@ -234,7 +249,8 @@ module Env () : ENV = struct
|
|||
?(timeout_ms = get_timeout_from_env "OTEL_EXPORTER_OTLP_TIMEOUT" 10_000)
|
||||
?timeout_traces_ms ?timeout_metrics_ms ?timeout_logs_ms
|
||||
?(batch_timeout_ms = 2_000) ?(self_trace = false) ?http_concurrency_level
|
||||
=
|
||||
?(retry_max_attempts = 3) ?(retry_initial_delay_ms = 100.)
|
||||
?(retry_max_delay_ms = 5000.) ?(retry_backoff_multiplier = 2.0) =
|
||||
let url_traces, url_metrics, url_logs =
|
||||
let base_url =
|
||||
let base_url =
|
||||
|
|
@ -333,6 +349,10 @@ module Env () : ENV = struct
|
|||
batch_timeout_ms;
|
||||
self_trace;
|
||||
http_concurrency_level;
|
||||
retry_max_attempts;
|
||||
retry_initial_delay_ms;
|
||||
retry_max_delay_ms;
|
||||
retry_backoff_multiplier;
|
||||
_rest = ();
|
||||
}
|
||||
end
|
||||
|
|
|
|||
|
|
@ -88,6 +88,15 @@ type t = {
|
|||
be used to represent the size of a pool of workers where each worker
|
||||
gets a batch to send, send it, and repeats.
|
||||
@since NEXT_RELEASE *)
|
||||
retry_max_attempts: int;
|
||||
(** Maximum number of retry attempts for failed exports. 0 means no retry,
|
||||
1 means one retry after initial failure. Default 3. *)
|
||||
retry_initial_delay_ms: float;
|
||||
(** Initial delay in milliseconds before first retry. Default 100ms. *)
|
||||
retry_max_delay_ms: float;
|
||||
(** Maximum delay in milliseconds between retries. Default 5000ms. *)
|
||||
retry_backoff_multiplier: float;
|
||||
(** Multiplier for exponential backoff. Default 2.0. *)
|
||||
_rest: rest;
|
||||
}
|
||||
(** Configuration.
|
||||
|
|
@ -123,6 +132,10 @@ type 'k make =
|
|||
?batch_timeout_ms:int ->
|
||||
?self_trace:bool ->
|
||||
?http_concurrency_level:int ->
|
||||
?retry_max_attempts:int ->
|
||||
?retry_initial_delay_ms:float ->
|
||||
?retry_max_delay_ms:float ->
|
||||
?retry_backoff_multiplier:float ->
|
||||
'k
|
||||
(** A function that gathers all the values needed to construct a {!t}, and
|
||||
produces a ['k]. ['k] is typically a continuation used to construct a
|
||||
|
|
|
|||
|
|
@ -62,6 +62,13 @@ end = struct
|
|||
|
||||
let send (self : t) (sigs : OTEL.Any_signal_l.t) : (unit, error) result IO.t
|
||||
=
|
||||
let should_retry = function
|
||||
| `Failure _ -> true (* Network errors, connection issues *)
|
||||
| `Status (code, _) ->
|
||||
(* Retry on server errors, rate limits, timeouts *)
|
||||
code >= 500 || code = 429 || code = 408
|
||||
| `Sysbreak -> false (* User interrupt, don't retry *)
|
||||
in
|
||||
let res = Resource_signal.of_signal_l sigs in
|
||||
let url, signal_headers =
|
||||
match res with
|
||||
|
|
@ -78,7 +85,31 @@ end = struct
|
|||
in
|
||||
let headers = List.rev_append signal_headers filtered_general in
|
||||
let data = Resource_signal.Encode.any ~encoder:self.encoder res in
|
||||
Httpc.send self.http ~url ~headers ~decode:(`Ret ()) data
|
||||
|
||||
(* Retry loop with exponential backoff *)
|
||||
let rec retry_send attempt delay_ms =
|
||||
let open IO in
|
||||
let* result =
|
||||
Httpc.send self.http ~url ~headers ~decode:(`Ret ()) data
|
||||
in
|
||||
match result with
|
||||
| Ok x -> return (Ok x)
|
||||
| Error err
|
||||
when should_retry err && attempt < self.config.retry_max_attempts ->
|
||||
let delay_s = delay_ms /. 1000. in
|
||||
let* () = sleep_s delay_s in
|
||||
let next_delay =
|
||||
min self.config.retry_max_delay_ms
|
||||
(delay_ms *. self.config.retry_backoff_multiplier)
|
||||
in
|
||||
retry_send (attempt + 1) next_delay
|
||||
| Error _ as err -> return err
|
||||
in
|
||||
|
||||
if self.config.retry_max_attempts > 0 then
|
||||
retry_send 0 self.config.retry_initial_delay_ms
|
||||
else
|
||||
Httpc.send self.http ~url ~headers ~decode:(`Ret ()) data
|
||||
end
|
||||
|
||||
module C = Generic_consumer.Make (IO) (Notifier) (Sender)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue