fix: make metric callbacks atomic

The use of a non-threadsafe mutable reference for the metrics callbacks
was resulting in a race condition that would sometimes produce
non-deterministic results in the integration tests.

This has not affected the lwt-based collector, because of the single
threaded concurrency Lwt enforces, but it began to show up in the WIP
Eio rewrite, for which I am testing on cross-domain programs.

I suspect this may have also bee affecting the ocurl collector, but we
don't have integration test running on that yet.
This commit is contained in:
Shon Feder 2025-07-29 23:42:33 -04:00
parent d9dd7ce32c
commit 93803581b7
No known key found for this signature in database

View file

@ -1071,7 +1071,8 @@ end = struct
[
"exception.message", `String (Printexc.to_string exn);
"exception.type", `String (Printexc.exn_slot_name exn);
"exception.stacktrace", `String (Printexc.raw_backtrace_to_string bt);
( "exception.stacktrace",
`String (Printexc.raw_backtrace_to_string bt) );
]
in
scope.items <- Ev (ev, scope.items)
@ -1454,7 +1455,7 @@ end
alarms/intervals to emit them. *)
module Metrics_callbacks = struct
open struct
let cbs_ : (unit -> Metrics.t list) list ref = ref []
let cbs_ : (unit -> Metrics.t list) AList.t = AList.make ()
end
(** [register f] adds the callback [f] to the list.
@ -1463,12 +1464,12 @@ module Metrics_callbacks = struct
of metrics. It might be called regularly by the backend, in particular
(but not only) when {!Collector.tick} is called. *)
let register f : unit =
if !cbs_ = [] then
if AList.is_empty cbs_ then
(* make sure we call [f] (and others) at each tick *)
Collector.on_tick (fun () ->
let m = List.map (fun f -> f ()) !cbs_ |> List.flatten in
let m = List.map (fun f -> f ()) (AList.get cbs_) |> List.flatten in
Metrics.emit m);
cbs_ := f :: !cbs_
AList.add cbs_ f
end
(** {2 Logs} *)