mirror of
https://github.com/ocaml-tracing/ocaml-opentelemetry.git
synced 2026-03-07 18:37:56 -05:00
Add retry with exponential backoff to HTTP client
- Add retry_max_attempts, retry_initial_delay_ms, retry_max_delay_ms, retry_backoff_multiplier to config - Retry on network failures, 5xx errors, 429 (rate limit), 408 (timeout) - No retry on 4xx client errors or user interrupt (Sysbreak) - Default: 3 attempts, 100ms initial delay, 5s max delay, 2x multiplier - Uses existing IO.sleep_s from generic_io
This commit is contained in:
parent
a54593c39c
commit
e9485c97da
3 changed files with 68 additions and 4 deletions
|
|
@ -33,6 +33,10 @@ type t = {
|
||||||
batch_timeout_ms: int;
|
batch_timeout_ms: int;
|
||||||
self_trace: bool;
|
self_trace: bool;
|
||||||
http_concurrency_level: int option;
|
http_concurrency_level: int option;
|
||||||
|
retry_max_attempts: int;
|
||||||
|
retry_initial_delay_ms: float;
|
||||||
|
retry_max_delay_ms: float;
|
||||||
|
retry_backoff_multiplier: float;
|
||||||
_rest: rest;
|
_rest: rest;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -82,6 +86,10 @@ let pp out (self : t) : unit =
|
||||||
batch_logs;
|
batch_logs;
|
||||||
batch_timeout_ms;
|
batch_timeout_ms;
|
||||||
http_concurrency_level;
|
http_concurrency_level;
|
||||||
|
retry_max_attempts;
|
||||||
|
retry_initial_delay_ms;
|
||||||
|
retry_max_delay_ms;
|
||||||
|
retry_backoff_multiplier;
|
||||||
_rest = _;
|
_rest = _;
|
||||||
} =
|
} =
|
||||||
self
|
self
|
||||||
|
|
@ -95,13 +103,16 @@ let pp out (self : t) : unit =
|
||||||
%a@];@ protocol=%a;@ timeout_ms=%d;@ timeout_traces_ms=%d;@ \
|
%a@];@ protocol=%a;@ timeout_ms=%d;@ timeout_traces_ms=%d;@ \
|
||||||
timeout_metrics_ms=%d;@ timeout_logs_ms=%d;@ batch_traces=%a;@ \
|
timeout_metrics_ms=%d;@ timeout_logs_ms=%d;@ batch_traces=%a;@ \
|
||||||
batch_metrics=%a;@ batch_logs=%a;@ batch_timeout_ms=%d;@ \
|
batch_metrics=%a;@ batch_logs=%a;@ batch_timeout_ms=%d;@ \
|
||||||
http_concurrency_level=%a @]}"
|
http_concurrency_level=%a;@ retry_max_attempts=%d;@ \
|
||||||
|
retry_initial_delay_ms=%.0f;@ retry_max_delay_ms=%.0f;@ \
|
||||||
|
retry_backoff_multiplier=%.1f @]}"
|
||||||
debug pp_log_level log_level sdk_disabled self_trace url_traces url_metrics
|
debug pp_log_level log_level sdk_disabled self_trace url_traces url_metrics
|
||||||
url_logs ppheaders headers ppheaders headers_traces ppheaders
|
url_logs ppheaders headers ppheaders headers_traces ppheaders
|
||||||
headers_metrics ppheaders headers_logs pp_protocol protocol timeout_ms
|
headers_metrics ppheaders headers_logs pp_protocol protocol timeout_ms
|
||||||
timeout_traces_ms timeout_metrics_ms timeout_logs_ms ppiopt batch_traces
|
timeout_traces_ms timeout_metrics_ms timeout_logs_ms ppiopt batch_traces
|
||||||
ppiopt batch_metrics ppiopt batch_logs batch_timeout_ms ppiopt
|
ppiopt batch_metrics ppiopt batch_logs batch_timeout_ms ppiopt
|
||||||
http_concurrency_level
|
http_concurrency_level retry_max_attempts retry_initial_delay_ms
|
||||||
|
retry_max_delay_ms retry_backoff_multiplier
|
||||||
|
|
||||||
let default_url = "http://localhost:4318"
|
let default_url = "http://localhost:4318"
|
||||||
|
|
||||||
|
|
@ -128,6 +139,10 @@ type 'k make =
|
||||||
?batch_timeout_ms:int ->
|
?batch_timeout_ms:int ->
|
||||||
?self_trace:bool ->
|
?self_trace:bool ->
|
||||||
?http_concurrency_level:int ->
|
?http_concurrency_level:int ->
|
||||||
|
?retry_max_attempts:int ->
|
||||||
|
?retry_initial_delay_ms:float ->
|
||||||
|
?retry_max_delay_ms:float ->
|
||||||
|
?retry_backoff_multiplier:float ->
|
||||||
'k
|
'k
|
||||||
|
|
||||||
module type ENV = sig
|
module type ENV = sig
|
||||||
|
|
@ -234,7 +249,8 @@ module Env () : ENV = struct
|
||||||
?(timeout_ms = get_timeout_from_env "OTEL_EXPORTER_OTLP_TIMEOUT" 10_000)
|
?(timeout_ms = get_timeout_from_env "OTEL_EXPORTER_OTLP_TIMEOUT" 10_000)
|
||||||
?timeout_traces_ms ?timeout_metrics_ms ?timeout_logs_ms
|
?timeout_traces_ms ?timeout_metrics_ms ?timeout_logs_ms
|
||||||
?(batch_timeout_ms = 2_000) ?(self_trace = false) ?http_concurrency_level
|
?(batch_timeout_ms = 2_000) ?(self_trace = false) ?http_concurrency_level
|
||||||
=
|
?(retry_max_attempts = 3) ?(retry_initial_delay_ms = 100.)
|
||||||
|
?(retry_max_delay_ms = 5000.) ?(retry_backoff_multiplier = 2.0) =
|
||||||
let url_traces, url_metrics, url_logs =
|
let url_traces, url_metrics, url_logs =
|
||||||
let base_url =
|
let base_url =
|
||||||
let base_url =
|
let base_url =
|
||||||
|
|
@ -333,6 +349,10 @@ module Env () : ENV = struct
|
||||||
batch_timeout_ms;
|
batch_timeout_ms;
|
||||||
self_trace;
|
self_trace;
|
||||||
http_concurrency_level;
|
http_concurrency_level;
|
||||||
|
retry_max_attempts;
|
||||||
|
retry_initial_delay_ms;
|
||||||
|
retry_max_delay_ms;
|
||||||
|
retry_backoff_multiplier;
|
||||||
_rest = ();
|
_rest = ();
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
|
||||||
|
|
@ -88,6 +88,15 @@ type t = {
|
||||||
be used to represent the size of a pool of workers where each worker
|
be used to represent the size of a pool of workers where each worker
|
||||||
gets a batch to send, send it, and repeats.
|
gets a batch to send, send it, and repeats.
|
||||||
@since NEXT_RELEASE *)
|
@since NEXT_RELEASE *)
|
||||||
|
retry_max_attempts: int;
|
||||||
|
(** Maximum number of retry attempts for failed exports. 0 means no retry,
|
||||||
|
1 means one retry after initial failure. Default 3. *)
|
||||||
|
retry_initial_delay_ms: float;
|
||||||
|
(** Initial delay in milliseconds before first retry. Default 100ms. *)
|
||||||
|
retry_max_delay_ms: float;
|
||||||
|
(** Maximum delay in milliseconds between retries. Default 5000ms. *)
|
||||||
|
retry_backoff_multiplier: float;
|
||||||
|
(** Multiplier for exponential backoff. Default 2.0. *)
|
||||||
_rest: rest;
|
_rest: rest;
|
||||||
}
|
}
|
||||||
(** Configuration.
|
(** Configuration.
|
||||||
|
|
@ -123,6 +132,10 @@ type 'k make =
|
||||||
?batch_timeout_ms:int ->
|
?batch_timeout_ms:int ->
|
||||||
?self_trace:bool ->
|
?self_trace:bool ->
|
||||||
?http_concurrency_level:int ->
|
?http_concurrency_level:int ->
|
||||||
|
?retry_max_attempts:int ->
|
||||||
|
?retry_initial_delay_ms:float ->
|
||||||
|
?retry_max_delay_ms:float ->
|
||||||
|
?retry_backoff_multiplier:float ->
|
||||||
'k
|
'k
|
||||||
(** A function that gathers all the values needed to construct a {!t}, and
|
(** A function that gathers all the values needed to construct a {!t}, and
|
||||||
produces a ['k]. ['k] is typically a continuation used to construct a
|
produces a ['k]. ['k] is typically a continuation used to construct a
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,13 @@ end = struct
|
||||||
|
|
||||||
let send (self : t) (sigs : OTEL.Any_signal_l.t) : (unit, error) result IO.t
|
let send (self : t) (sigs : OTEL.Any_signal_l.t) : (unit, error) result IO.t
|
||||||
=
|
=
|
||||||
|
let should_retry = function
|
||||||
|
| `Failure _ -> true (* Network errors, connection issues *)
|
||||||
|
| `Status (code, _) ->
|
||||||
|
(* Retry on server errors, rate limits, timeouts *)
|
||||||
|
code >= 500 || code = 429 || code = 408
|
||||||
|
| `Sysbreak -> false (* User interrupt, don't retry *)
|
||||||
|
in
|
||||||
let res = Resource_signal.of_signal_l sigs in
|
let res = Resource_signal.of_signal_l sigs in
|
||||||
let url, signal_headers =
|
let url, signal_headers =
|
||||||
match res with
|
match res with
|
||||||
|
|
@ -78,6 +85,30 @@ end = struct
|
||||||
in
|
in
|
||||||
let headers = List.rev_append signal_headers filtered_general in
|
let headers = List.rev_append signal_headers filtered_general in
|
||||||
let data = Resource_signal.Encode.any ~encoder:self.encoder res in
|
let data = Resource_signal.Encode.any ~encoder:self.encoder res in
|
||||||
|
|
||||||
|
(* Retry loop with exponential backoff *)
|
||||||
|
let rec retry_send attempt delay_ms =
|
||||||
|
let open IO in
|
||||||
|
let* result =
|
||||||
|
Httpc.send self.http ~url ~headers ~decode:(`Ret ()) data
|
||||||
|
in
|
||||||
|
match result with
|
||||||
|
| Ok x -> return (Ok x)
|
||||||
|
| Error err
|
||||||
|
when should_retry err && attempt < self.config.retry_max_attempts ->
|
||||||
|
let delay_s = delay_ms /. 1000. in
|
||||||
|
let* () = sleep_s delay_s in
|
||||||
|
let next_delay =
|
||||||
|
min self.config.retry_max_delay_ms
|
||||||
|
(delay_ms *. self.config.retry_backoff_multiplier)
|
||||||
|
in
|
||||||
|
retry_send (attempt + 1) next_delay
|
||||||
|
| Error _ as err -> return err
|
||||||
|
in
|
||||||
|
|
||||||
|
if self.config.retry_max_attempts > 0 then
|
||||||
|
retry_send 0 self.config.retry_initial_delay_ms
|
||||||
|
else
|
||||||
Httpc.send self.http ~url ~headers ~decode:(`Ret ()) data
|
Httpc.send self.http ~url ~headers ~decode:(`Ret ()) data
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue