mirror of
https://github.com/ocaml-tracing/ocaml-opentelemetry.git
synced 2026-03-07 18:37:56 -05:00
refactor eio client
This commit is contained in:
parent
6eeb0b9b08
commit
8de53d997b
2 changed files with 159 additions and 381 deletions
|
|
@ -5,13 +5,9 @@ open Eio.Std
|
||||||
https://github.com/open-telemetry/oteps/blob/main/text/0099-otlp-http.md
|
https://github.com/open-telemetry/oteps/blob/main/text/0099-otlp-http.md
|
||||||
*)
|
*)
|
||||||
|
|
||||||
module OT = Opentelemetry
|
|
||||||
module Config = Config
|
module Config = Config
|
||||||
module Signal = Opentelemetry_client.Signal
|
|
||||||
module Batch = Opentelemetry_client.Batch
|
|
||||||
open Opentelemetry
|
open Opentelemetry
|
||||||
|
open Opentelemetry_client
|
||||||
let ( let@ ) = ( @@ )
|
|
||||||
|
|
||||||
let spf = Printf.sprintf
|
let spf = Printf.sprintf
|
||||||
|
|
||||||
|
|
@ -19,58 +15,51 @@ let set_headers = Config.Env.set_headers
|
||||||
|
|
||||||
let get_headers = Config.Env.get_headers
|
let get_headers = Config.Env.get_headers
|
||||||
|
|
||||||
let needs_gc_metrics = Atomic.make false
|
module Make (CTX : sig
|
||||||
|
val sw : Eio.Switch.t
|
||||||
|
|
||||||
let last_gc_metrics = Atomic.make (Mtime_clock.now ())
|
val env : Eio_unix.Stdenv.base
|
||||||
|
end) =
|
||||||
|
struct
|
||||||
|
module IO : Generic_io.S_WITH_CONCURRENCY with type 'a t = 'a = struct
|
||||||
|
include Generic_io.Direct_style
|
||||||
|
|
||||||
let timeout_gc_metrics = Mtime.Span.(20 * s)
|
(* NOTE: This is only used in the main consumer thread, even though producers
|
||||||
|
might be in other domains *)
|
||||||
|
|
||||||
type error =
|
let sleep_s n = Eio.Time.sleep CTX.env#clock n
|
||||||
[ `Status of int * Opentelemetry.Proto.Status.status
|
|
||||||
| `Failure of string
|
|
||||||
| `Sysbreak
|
|
||||||
]
|
|
||||||
|
|
||||||
let n_errors = Atomic.make 0
|
let spawn f = Eio.Fiber.fork ~sw:CTX.sw f
|
||||||
|
end
|
||||||
|
|
||||||
let n_dropped = Atomic.make 0
|
module Notifier : Generic_notifier.S with module IO = IO = struct
|
||||||
|
module IO = IO
|
||||||
|
|
||||||
let report_err_ = function
|
type t = {
|
||||||
| `Sysbreak -> Printf.eprintf "opentelemetry: ctrl-c captured, stopping\n%!"
|
mutex: Eio.Mutex.t;
|
||||||
| `Failure msg ->
|
cond: Eio.Condition.t;
|
||||||
Format.eprintf "@[<2>opentelemetry: export failed: %s@]@." msg
|
}
|
||||||
| `Status
|
|
||||||
( code,
|
|
||||||
{
|
|
||||||
Opentelemetry.Proto.Status.code = scode;
|
|
||||||
message;
|
|
||||||
details;
|
|
||||||
_presence = _;
|
|
||||||
} ) ->
|
|
||||||
let pp_details out l =
|
|
||||||
List.iter
|
|
||||||
(fun s -> Format.fprintf out "%S;@ " (Bytes.unsafe_to_string s))
|
|
||||||
l
|
|
||||||
in
|
|
||||||
Format.eprintf
|
|
||||||
"@[<2>opentelemetry: export failed with@ http code=%d@ status \
|
|
||||||
{@[code=%ld;@ message=%S;@ details=[@[%a@]]@]}@]@."
|
|
||||||
code scode
|
|
||||||
(Bytes.unsafe_to_string message)
|
|
||||||
pp_details details
|
|
||||||
|
|
||||||
module Httpc : sig
|
let create () : t =
|
||||||
type t
|
{ mutex = Eio.Mutex.create (); cond = Eio.Condition.create () }
|
||||||
|
|
||||||
val create : _ Eio.Net.t -> t
|
let trigger self =
|
||||||
|
(* FIXME: this might be triggered from other threads!! how do we
|
||||||
|
ensure it runs in the Eio thread? *)
|
||||||
|
Eio.Condition.broadcast self.cond
|
||||||
|
|
||||||
val send :
|
let delete = ignore
|
||||||
t ->
|
|
||||||
url:string ->
|
(** NOTE: the mutex must be acquired *)
|
||||||
decode:[ `Dec of Pbrt.Decoder.t -> 'a | `Ret of 'a ] ->
|
let wait self = Eio.Condition.await self.cond self.mutex
|
||||||
string ->
|
|
||||||
('a, error) result
|
(** Ensure we get signalled when the queue goes from empty to non-empty *)
|
||||||
end = struct
|
let register_bounded_queue (self : t) (bq : _ Bounded_queue.t) : unit =
|
||||||
|
Bounded_queue.on_non_empty bq (fun () -> trigger self)
|
||||||
|
end
|
||||||
|
|
||||||
|
module Httpc : Generic_http_consumer.HTTPC with module IO = IO = struct
|
||||||
|
module IO = IO
|
||||||
open Opentelemetry.Proto
|
open Opentelemetry.Proto
|
||||||
module Httpc = Cohttp_eio.Client
|
module Httpc = Cohttp_eio.Client
|
||||||
|
|
||||||
|
|
@ -95,10 +84,13 @@ end = struct
|
||||||
in
|
in
|
||||||
Tls_eio.client_of_flow ?host tls_config raw
|
Tls_eio.client_of_flow ?host tls_config raw
|
||||||
|
|
||||||
let create net = Httpc.make ~https:(Some (https ~authenticator)) net
|
let create () = Httpc.make ~https:(Some (https ~authenticator)) CTX.env#net
|
||||||
|
|
||||||
|
let cleanup = ignore
|
||||||
|
|
||||||
(* send the content to the remote endpoint/path *)
|
(* send the content to the remote endpoint/path *)
|
||||||
let send (client : t) ~url ~decode (body : string) : ('a, error) result =
|
let send (client : t) ~url ~decode (body : string) :
|
||||||
|
('a, Export_error.t) result =
|
||||||
Switch.run @@ fun sw ->
|
Switch.run @@ fun sw ->
|
||||||
let uri = Uri.of_string url in
|
let uri = Uri.of_string url in
|
||||||
|
|
||||||
|
|
@ -161,271 +153,40 @@ end = struct
|
||||||
in
|
in
|
||||||
r
|
r
|
||||||
)
|
)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
(** An emitter. This is used by {!Backend} below to forward traces/metrics/…
|
let create_consumer ?(stop = Atomic.make false) ?(config = Config.make ()) ~sw
|
||||||
from the program to whatever collector client we have. *)
|
~env () : Consumer.any_resource_builder =
|
||||||
module type EMITTER = sig
|
let module M = Make (struct
|
||||||
open Opentelemetry.Proto
|
let sw = sw
|
||||||
|
|
||||||
val push_trace : Trace.resource_spans list -> unit
|
let env = env
|
||||||
|
end) in
|
||||||
|
let module C = Generic_http_consumer.Make (M.IO) (M.Notifier) (M.Httpc) in
|
||||||
|
C.consumer ~ticker_task:(Some 0.5) ~stop ~config ()
|
||||||
|
|
||||||
val push_metrics : Metrics.resource_metrics list -> unit
|
let create_exporter ?stop ?(config = Config.make ()) ~sw ~env () =
|
||||||
|
let consumer = create_consumer ?stop ~config ~sw ~env () in
|
||||||
val push_logs : Logs.resource_logs list -> unit
|
let bq =
|
||||||
|
Bounded_queue_sync.create
|
||||||
val set_on_tick_callbacks : (unit -> unit) Alist.t -> unit
|
~high_watermark:Bounded_queue.Defaults.high_watermark ()
|
||||||
|
|
||||||
val tick : unit -> unit
|
|
||||||
|
|
||||||
val cleanup : on_done:(unit -> unit) -> unit -> unit
|
|
||||||
end
|
|
||||||
|
|
||||||
(* make an emitter.
|
|
||||||
|
|
||||||
exceptions inside should be caught, see
|
|
||||||
https://opentelemetry.io/docs/reference/specification/error-handling/ *)
|
|
||||||
let mk_emitter ~stop ~net (config : Config.t) : (module EMITTER) =
|
|
||||||
(* local helpers *)
|
|
||||||
let open struct
|
|
||||||
let client =
|
|
||||||
(* Prime RNG state for TLS *)
|
|
||||||
Mirage_crypto_rng_unix.use_default ();
|
|
||||||
Httpc.create net
|
|
||||||
|
|
||||||
let send_http ~url data : unit =
|
|
||||||
let r = Httpc.send client ~url ~decode:(`Ret ()) data in
|
|
||||||
match r with
|
|
||||||
| Ok () -> ()
|
|
||||||
| Error `Sysbreak ->
|
|
||||||
Printf.eprintf "ctrl-c captured, stopping\n%!";
|
|
||||||
Atomic.set stop true
|
|
||||||
| Error err ->
|
|
||||||
(* TODO: log error _via_ otel? *)
|
|
||||||
Atomic.incr n_errors;
|
|
||||||
report_err_ err;
|
|
||||||
(* avoid crazy error loop *)
|
|
||||||
Eio_unix.sleep 3.
|
|
||||||
|
|
||||||
let timeout =
|
|
||||||
if config.batch_timeout_ms > 0 then
|
|
||||||
Some Mtime.Span.(config.batch_timeout_ms * ms)
|
|
||||||
else
|
|
||||||
None
|
|
||||||
|
|
||||||
let batch_traces : Proto.Trace.resource_spans Batch.t =
|
|
||||||
Batch.make ?batch:config.batch_traces ?timeout ()
|
|
||||||
|
|
||||||
let batch_metrics : Proto.Metrics.resource_metrics Batch.t =
|
|
||||||
Batch.make ?batch:config.batch_metrics ?timeout ()
|
|
||||||
|
|
||||||
let batch_logs : Proto.Logs.resource_logs Batch.t =
|
|
||||||
Batch.make ?batch:config.batch_logs ?timeout ()
|
|
||||||
|
|
||||||
let push_to_batch b e =
|
|
||||||
match Batch.push b e with
|
|
||||||
| `Ok -> ()
|
|
||||||
| `Dropped -> Atomic.incr n_errors
|
|
||||||
|
|
||||||
let[@inline] guard_exn_ where f =
|
|
||||||
try f ()
|
|
||||||
with e ->
|
|
||||||
let bt = Printexc.get_backtrace () in
|
|
||||||
Printf.eprintf "opentelemetry-eio: uncaught exception in %s: %s\n%s\n%!"
|
|
||||||
where (Printexc.to_string e) bt
|
|
||||||
|
|
||||||
let push_traces x =
|
|
||||||
let@ () = guard_exn_ "push trace" in
|
|
||||||
push_to_batch batch_traces x
|
|
||||||
|
|
||||||
let push_metrics x =
|
|
||||||
let@ () = guard_exn_ "push metrics" in
|
|
||||||
push_to_batch batch_metrics x
|
|
||||||
|
|
||||||
let push_logs x =
|
|
||||||
let@ () = guard_exn_ "push logs" in
|
|
||||||
push_to_batch batch_logs x
|
|
||||||
|
|
||||||
let maybe_emit (batch : 'a Batch.t) url (f : 'a list -> string) ~now ~force
|
|
||||||
() : unit =
|
|
||||||
Batch.pop_if_ready ~force ~now batch
|
|
||||||
|> Option.iter (fun signals -> f signals |> send_http ~url)
|
|
||||||
|
|
||||||
let emit_traces_maybe =
|
|
||||||
maybe_emit batch_traces config.url_traces Signal.Encode.traces
|
|
||||||
|
|
||||||
let emit_metrics_maybe =
|
|
||||||
maybe_emit batch_metrics config.url_metrics (fun collected_metrics ->
|
|
||||||
collected_metrics |> Signal.Encode.metrics)
|
|
||||||
|
|
||||||
let emit_logs_maybe =
|
|
||||||
maybe_emit batch_logs config.url_logs Signal.Encode.logs
|
|
||||||
|
|
||||||
let emit_all ~force : unit =
|
|
||||||
Switch.run @@ fun sw ->
|
|
||||||
let now = Mtime_clock.now () in
|
|
||||||
Fiber.fork ~sw @@ emit_logs_maybe ~now ~force;
|
|
||||||
Fiber.fork ~sw @@ emit_metrics_maybe ~now ~force;
|
|
||||||
Fiber.fork ~sw @@ emit_traces_maybe ~now ~force
|
|
||||||
|
|
||||||
let on_tick_cbs_ = Atomic.make (Alist.make ())
|
|
||||||
|
|
||||||
let run_tick_callbacks () =
|
|
||||||
List.iter
|
|
||||||
(fun f ->
|
|
||||||
try f ()
|
|
||||||
with e ->
|
|
||||||
Printf.eprintf "on tick callback raised: %s\n"
|
|
||||||
(Printexc.to_string e))
|
|
||||||
(Alist.get @@ Atomic.get on_tick_cbs_)
|
|
||||||
end in
|
|
||||||
let module M = struct
|
|
||||||
let set_on_tick_callbacks = Atomic.set on_tick_cbs_
|
|
||||||
|
|
||||||
let push_trace e = push_traces e
|
|
||||||
|
|
||||||
let push_metrics e = push_metrics e
|
|
||||||
|
|
||||||
let push_logs e = push_logs e
|
|
||||||
|
|
||||||
let tick () =
|
|
||||||
if Config.Env.get_debug () then
|
|
||||||
Printf.eprintf "tick (from domain %d)\n%!" (Domain.self () :> int);
|
|
||||||
run_tick_callbacks ();
|
|
||||||
emit_all ~force:false
|
|
||||||
|
|
||||||
let cleanup ~on_done () =
|
|
||||||
if Config.Env.get_debug () then
|
|
||||||
Printf.eprintf "opentelemetry: exiting…\n%!";
|
|
||||||
Atomic.set stop true;
|
|
||||||
run_tick_callbacks ();
|
|
||||||
emit_all ~force:true;
|
|
||||||
on_done ()
|
|
||||||
end in
|
|
||||||
(module M : EMITTER)
|
|
||||||
|
|
||||||
module Backend (Emitter : EMITTER) : Opentelemetry.Exporter.t = struct
|
|
||||||
open Opentelemetry.Proto
|
|
||||||
open Opentelemetry.Collector
|
|
||||||
open Emitter
|
|
||||||
|
|
||||||
let send_trace : Trace.resource_spans list sender =
|
|
||||||
{
|
|
||||||
send =
|
|
||||||
(fun l ~ret ->
|
|
||||||
(if Config.Env.get_debug () then
|
|
||||||
let@ () = Lock.with_lock in
|
|
||||||
Format.eprintf "send spans %a@."
|
|
||||||
(Format.pp_print_list Trace.pp_resource_spans)
|
|
||||||
l);
|
|
||||||
push_trace l;
|
|
||||||
ret ());
|
|
||||||
}
|
|
||||||
|
|
||||||
let last_sent_metrics = Atomic.make (Mtime_clock.now ())
|
|
||||||
|
|
||||||
let timeout_sent_metrics = Mtime.Span.(5 * s)
|
|
||||||
(* send metrics from time to time *)
|
|
||||||
|
|
||||||
let signal_emit_gc_metrics () =
|
|
||||||
if Config.Env.get_debug () then
|
|
||||||
Printf.eprintf "opentelemetry: emit GC metrics requested\n%!";
|
|
||||||
Atomic.set needs_gc_metrics true
|
|
||||||
|
|
||||||
let additional_metrics () : Metrics.resource_metrics list =
|
|
||||||
(* add exporter metrics to the lot? *)
|
|
||||||
let last_emit = Atomic.get last_sent_metrics in
|
|
||||||
let now = Mtime_clock.now () in
|
|
||||||
let add_own_metrics =
|
|
||||||
let elapsed = Mtime.span last_emit now in
|
|
||||||
Mtime.Span.compare elapsed timeout_sent_metrics > 0
|
|
||||||
in
|
in
|
||||||
|
Exporter_queued.create ~q:bq ~consumer ()
|
||||||
|
|> Exporter_add_batching.add_batching ~config
|
||||||
|
|
||||||
(* there is a possible race condition here, as several threads might update
|
let create_backend = create_exporter
|
||||||
metrics at the same time. But that's harmless. *)
|
|
||||||
if add_own_metrics then (
|
|
||||||
Atomic.set last_sent_metrics now;
|
|
||||||
let open OT.Metrics in
|
|
||||||
[
|
|
||||||
make_resource_metrics
|
|
||||||
[
|
|
||||||
sum ~name:"otel.export.dropped" ~is_monotonic:true
|
|
||||||
[
|
|
||||||
int
|
|
||||||
~start_time_unix_nano:(Mtime.to_uint64_ns last_emit)
|
|
||||||
~now:(Mtime.to_uint64_ns now) (Atomic.get n_dropped);
|
|
||||||
];
|
|
||||||
sum ~name:"otel.export.errors" ~is_monotonic:true
|
|
||||||
[
|
|
||||||
int
|
|
||||||
~start_time_unix_nano:(Mtime.to_uint64_ns last_emit)
|
|
||||||
~now:(Mtime.to_uint64_ns now) (Atomic.get n_errors);
|
|
||||||
];
|
|
||||||
];
|
|
||||||
]
|
|
||||||
) else
|
|
||||||
[]
|
|
||||||
|
|
||||||
let send_metrics : Metrics.resource_metrics list sender =
|
|
||||||
{
|
|
||||||
send =
|
|
||||||
(fun m ~ret ->
|
|
||||||
(if Config.Env.get_debug () then
|
|
||||||
let@ () = Lock.with_lock in
|
|
||||||
Format.eprintf "send metrics %a@."
|
|
||||||
(Format.pp_print_list Metrics.pp_resource_metrics)
|
|
||||||
m);
|
|
||||||
|
|
||||||
let m = List.rev_append (additional_metrics ()) m in
|
|
||||||
push_metrics m;
|
|
||||||
ret ());
|
|
||||||
}
|
|
||||||
|
|
||||||
let send_logs : Logs.resource_logs list sender =
|
|
||||||
{
|
|
||||||
send =
|
|
||||||
(fun m ~ret ->
|
|
||||||
(if Config.Env.get_debug () then
|
|
||||||
let@ () = Lock.with_lock in
|
|
||||||
Format.eprintf "send logs %a@."
|
|
||||||
(Format.pp_print_list Logs.pp_resource_logs)
|
|
||||||
m);
|
|
||||||
push_logs m;
|
|
||||||
ret ());
|
|
||||||
}
|
|
||||||
|
|
||||||
let tick = Emitter.tick
|
|
||||||
|
|
||||||
let cleanup = Emitter.cleanup
|
|
||||||
|
|
||||||
let set_on_tick_callbacks = Emitter.set_on_tick_callbacks
|
|
||||||
end
|
|
||||||
|
|
||||||
let create_backend ~sw ?(stop = Atomic.make false) ?(config = Config.make ())
|
|
||||||
env : (module OT.Collector.BACKEND) =
|
|
||||||
let module E = (val mk_emitter ~stop ~net:env#net config) in
|
|
||||||
let module B = Backend (E) in
|
|
||||||
(* Run a background fiber to keep the backend ticking regularly.
|
|
||||||
|
|
||||||
NOTE: This cannot be located inside the [Backend], because switches
|
|
||||||
are not thread safe, and cannot be used accross domains, but the
|
|
||||||
backend is accessed across domains. *)
|
|
||||||
Eio.Fiber.fork ~sw (fun () ->
|
|
||||||
while not @@ Atomic.get stop do
|
|
||||||
Eio.Time.sleep env#clock 0.5;
|
|
||||||
B.tick ()
|
|
||||||
done);
|
|
||||||
|
|
||||||
(module B)
|
|
||||||
|
|
||||||
let setup_ ~sw ?stop ?config env : unit =
|
let setup_ ~sw ?stop ?config env : unit =
|
||||||
let backend = create_backend ?stop ?config ~sw env in
|
let backend = create_backend ?stop ?config ~sw ~env () in
|
||||||
OT.Collector.set_backend backend
|
Main_exporter.set backend
|
||||||
|
|
||||||
let setup ?stop ?config ?(enable = true) ~sw env =
|
let setup ?stop ?config ?(enable = true) ~sw env =
|
||||||
if enable then setup_ ~sw ?stop ?config env
|
if enable then setup_ ~sw ?stop ?config env
|
||||||
|
|
||||||
let remove_backend () = OT.Collector.remove_backend ~on_done:ignore ()
|
let remove_exporter () = Main_exporter.remove ~on_done:ignore ()
|
||||||
|
|
||||||
|
let remove_backend = remove_exporter
|
||||||
|
|
||||||
let with_setup ?stop ?config ?(enable = true) f env =
|
let with_setup ?stop ?config ?(enable = true) f env =
|
||||||
if enable then
|
if enable then
|
||||||
|
|
|
||||||
|
|
@ -10,15 +10,32 @@ val set_headers : (string * string) list -> unit
|
||||||
|
|
||||||
module Config = Config
|
module Config = Config
|
||||||
|
|
||||||
val create_backend :
|
val create_consumer :
|
||||||
sw:Eio.Switch.t ->
|
|
||||||
?stop:bool Atomic.t ->
|
?stop:bool Atomic.t ->
|
||||||
?config:Config.t ->
|
?config:Config.t ->
|
||||||
Eio_unix.Stdenv.base ->
|
sw:Eio.Switch.t ->
|
||||||
(module Opentelemetry.Collector.BACKEND)
|
env:Eio_unix.Stdenv.base ->
|
||||||
(** Create a new backend using Cohttp_eio
|
unit ->
|
||||||
|
Opentelemetry_client.Consumer.any_resource_builder
|
||||||
|
(** Consumer that pulls from a queue *)
|
||||||
|
|
||||||
NOTE [after_cleanup] optional parameter removed @since 0.12 *)
|
val create_exporter :
|
||||||
|
?stop:bool Atomic.t ->
|
||||||
|
?config:Config.t ->
|
||||||
|
sw:Eio.Switch.t ->
|
||||||
|
env:Eio_unix.Stdenv.base ->
|
||||||
|
unit ->
|
||||||
|
Opentelemetry.Exporter.t
|
||||||
|
(** NOTE [after_cleanup] optional parameter removed @since 0.12 *)
|
||||||
|
|
||||||
|
val create_backend :
|
||||||
|
?stop:bool Atomic.t ->
|
||||||
|
?config:Config.t ->
|
||||||
|
sw:Eio.Switch.t ->
|
||||||
|
env:Eio_unix.Stdenv.base ->
|
||||||
|
unit ->
|
||||||
|
Opentelemetry.Exporter.t
|
||||||
|
[@@deprecated "use create_exporter"]
|
||||||
|
|
||||||
val setup :
|
val setup :
|
||||||
?stop:bool Atomic.t ->
|
?stop:bool Atomic.t ->
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue