Add retry with exponential backoff to HTTP client

- Add retry_max_attempts, retry_initial_delay_ms, retry_max_delay_ms, retry_backoff_multiplier to config
- Retry on network failures, 5xx errors, 429 (rate limit), 408 (timeout)
- No retry on 4xx client errors or user interrupt (Sysbreak)
- Default: 3 attempts, 100ms initial delay, 5s max delay, 2x multiplier
- Uses existing IO.sleep_s from generic_io
This commit is contained in:
Simon Cruanes 2026-02-14 13:27:01 +00:00
parent a54593c39c
commit e9485c97da
No known key found for this signature in database
GPG key ID: EBFFF6F283F3A2B4
3 changed files with 68 additions and 4 deletions

View file

@ -33,6 +33,10 @@ type t = {
batch_timeout_ms: int; batch_timeout_ms: int;
self_trace: bool; self_trace: bool;
http_concurrency_level: int option; http_concurrency_level: int option;
retry_max_attempts: int;
retry_initial_delay_ms: float;
retry_max_delay_ms: float;
retry_backoff_multiplier: float;
_rest: rest; _rest: rest;
} }
@ -82,6 +86,10 @@ let pp out (self : t) : unit =
batch_logs; batch_logs;
batch_timeout_ms; batch_timeout_ms;
http_concurrency_level; http_concurrency_level;
retry_max_attempts;
retry_initial_delay_ms;
retry_max_delay_ms;
retry_backoff_multiplier;
_rest = _; _rest = _;
} = } =
self self
@ -95,13 +103,16 @@ let pp out (self : t) : unit =
%a@];@ protocol=%a;@ timeout_ms=%d;@ timeout_traces_ms=%d;@ \ %a@];@ protocol=%a;@ timeout_ms=%d;@ timeout_traces_ms=%d;@ \
timeout_metrics_ms=%d;@ timeout_logs_ms=%d;@ batch_traces=%a;@ \ timeout_metrics_ms=%d;@ timeout_logs_ms=%d;@ batch_traces=%a;@ \
batch_metrics=%a;@ batch_logs=%a;@ batch_timeout_ms=%d;@ \ batch_metrics=%a;@ batch_logs=%a;@ batch_timeout_ms=%d;@ \
http_concurrency_level=%a @]}" http_concurrency_level=%a;@ retry_max_attempts=%d;@ \
retry_initial_delay_ms=%.0f;@ retry_max_delay_ms=%.0f;@ \
retry_backoff_multiplier=%.1f @]}"
debug pp_log_level log_level sdk_disabled self_trace url_traces url_metrics debug pp_log_level log_level sdk_disabled self_trace url_traces url_metrics
url_logs ppheaders headers ppheaders headers_traces ppheaders url_logs ppheaders headers ppheaders headers_traces ppheaders
headers_metrics ppheaders headers_logs pp_protocol protocol timeout_ms headers_metrics ppheaders headers_logs pp_protocol protocol timeout_ms
timeout_traces_ms timeout_metrics_ms timeout_logs_ms ppiopt batch_traces timeout_traces_ms timeout_metrics_ms timeout_logs_ms ppiopt batch_traces
ppiopt batch_metrics ppiopt batch_logs batch_timeout_ms ppiopt ppiopt batch_metrics ppiopt batch_logs batch_timeout_ms ppiopt
http_concurrency_level http_concurrency_level retry_max_attempts retry_initial_delay_ms
retry_max_delay_ms retry_backoff_multiplier
let default_url = "http://localhost:4318" let default_url = "http://localhost:4318"
@ -128,6 +139,10 @@ type 'k make =
?batch_timeout_ms:int -> ?batch_timeout_ms:int ->
?self_trace:bool -> ?self_trace:bool ->
?http_concurrency_level:int -> ?http_concurrency_level:int ->
?retry_max_attempts:int ->
?retry_initial_delay_ms:float ->
?retry_max_delay_ms:float ->
?retry_backoff_multiplier:float ->
'k 'k
module type ENV = sig module type ENV = sig
@ -234,7 +249,8 @@ module Env () : ENV = struct
?(timeout_ms = get_timeout_from_env "OTEL_EXPORTER_OTLP_TIMEOUT" 10_000) ?(timeout_ms = get_timeout_from_env "OTEL_EXPORTER_OTLP_TIMEOUT" 10_000)
?timeout_traces_ms ?timeout_metrics_ms ?timeout_logs_ms ?timeout_traces_ms ?timeout_metrics_ms ?timeout_logs_ms
?(batch_timeout_ms = 2_000) ?(self_trace = false) ?http_concurrency_level ?(batch_timeout_ms = 2_000) ?(self_trace = false) ?http_concurrency_level
= ?(retry_max_attempts = 3) ?(retry_initial_delay_ms = 100.)
?(retry_max_delay_ms = 5000.) ?(retry_backoff_multiplier = 2.0) =
let url_traces, url_metrics, url_logs = let url_traces, url_metrics, url_logs =
let base_url = let base_url =
let base_url = let base_url =
@ -333,6 +349,10 @@ module Env () : ENV = struct
batch_timeout_ms; batch_timeout_ms;
self_trace; self_trace;
http_concurrency_level; http_concurrency_level;
retry_max_attempts;
retry_initial_delay_ms;
retry_max_delay_ms;
retry_backoff_multiplier;
_rest = (); _rest = ();
} }
end end

View file

@ -88,6 +88,15 @@ type t = {
be used to represent the size of a pool of workers where each worker be used to represent the size of a pool of workers where each worker
gets a batch to send, send it, and repeats. gets a batch to send, send it, and repeats.
@since NEXT_RELEASE *) @since NEXT_RELEASE *)
retry_max_attempts: int;
(** Maximum number of retry attempts for failed exports. 0 means no retry,
1 means one retry after initial failure. Default 3. *)
retry_initial_delay_ms: float;
(** Initial delay in milliseconds before first retry. Default 100ms. *)
retry_max_delay_ms: float;
(** Maximum delay in milliseconds between retries. Default 5000ms. *)
retry_backoff_multiplier: float;
(** Multiplier for exponential backoff. Default 2.0. *)
_rest: rest; _rest: rest;
} }
(** Configuration. (** Configuration.
@ -123,6 +132,10 @@ type 'k make =
?batch_timeout_ms:int -> ?batch_timeout_ms:int ->
?self_trace:bool -> ?self_trace:bool ->
?http_concurrency_level:int -> ?http_concurrency_level:int ->
?retry_max_attempts:int ->
?retry_initial_delay_ms:float ->
?retry_max_delay_ms:float ->
?retry_backoff_multiplier:float ->
'k 'k
(** A function that gathers all the values needed to construct a {!t}, and (** A function that gathers all the values needed to construct a {!t}, and
produces a ['k]. ['k] is typically a continuation used to construct a produces a ['k]. ['k] is typically a continuation used to construct a

View file

@ -62,6 +62,13 @@ end = struct
let send (self : t) (sigs : OTEL.Any_signal_l.t) : (unit, error) result IO.t let send (self : t) (sigs : OTEL.Any_signal_l.t) : (unit, error) result IO.t
= =
let should_retry = function
| `Failure _ -> true (* Network errors, connection issues *)
| `Status (code, _) ->
(* Retry on server errors, rate limits, timeouts *)
code >= 500 || code = 429 || code = 408
| `Sysbreak -> false (* User interrupt, don't retry *)
in
let res = Resource_signal.of_signal_l sigs in let res = Resource_signal.of_signal_l sigs in
let url, signal_headers = let url, signal_headers =
match res with match res with
@ -78,7 +85,31 @@ end = struct
in in
let headers = List.rev_append signal_headers filtered_general in let headers = List.rev_append signal_headers filtered_general in
let data = Resource_signal.Encode.any ~encoder:self.encoder res in let data = Resource_signal.Encode.any ~encoder:self.encoder res in
Httpc.send self.http ~url ~headers ~decode:(`Ret ()) data
(* Retry loop with exponential backoff *)
let rec retry_send attempt delay_ms =
let open IO in
let* result =
Httpc.send self.http ~url ~headers ~decode:(`Ret ()) data
in
match result with
| Ok x -> return (Ok x)
| Error err
when should_retry err && attempt < self.config.retry_max_attempts ->
let delay_s = delay_ms /. 1000. in
let* () = sleep_s delay_s in
let next_delay =
min self.config.retry_max_delay_ms
(delay_ms *. self.config.retry_backoff_multiplier)
in
retry_send (attempt + 1) next_delay
| Error _ as err -> return err
in
if self.config.retry_max_attempts > 0 then
retry_send 0 self.config.retry_initial_delay_ms
else
Httpc.send self.http ~url ~headers ~decode:(`Ret ()) data
end end
module C = Generic_consumer.Make (IO) (Notifier) (Sender) module C = Generic_consumer.Make (IO) (Notifier) (Sender)