From 5005c3aa9b76ec2d344d9186944b33a69f58b170 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 20 Dec 2023 14:01:24 -0500 Subject: [PATCH 01/11] perf: when we get multiple messages, check batches only once --- .../opentelemetry_client_ocurl.ml | 45 +++++++++---------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index 29e11fa4..9c01109d 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -266,39 +266,36 @@ end = struct (* read multiple events at once *) B_queue.pop_all self.q local_q; - let now = Mtime_clock.now () in + (* are we asked to flush all events? *) + let must_flush_all = ref false in (* how to process a single event *) let process_ev (ev : Event.t) : unit = match ev with - | Event.E_metric m -> - Batch.push batches.metrics m; - if should_send_batch_ ~config ~now batches.metrics then - send_metrics () - | Event.E_trace tr -> - Batch.push batches.traces tr; - if should_send_batch_ ~config ~now batches.traces then - send_traces () - | Event.E_logs logs -> - Batch.push batches.logs logs; - if should_send_batch_ ~config ~now batches.logs then send_logs () - | Event.E_tick -> - (* check for batches whose timeout expired *) - if should_send_batch_ ~config ~now batches.metrics then - send_metrics (); - if should_send_batch_ ~config ~now batches.logs then send_logs (); - if should_send_batch_ ~config ~now batches.traces then - send_traces () - | Event.E_flush_all -> - if Batch.len batches.metrics > 0 then send_metrics (); - if Batch.len batches.logs > 0 then send_logs (); - if Batch.len batches.traces > 0 then send_traces () + | Event.E_metric m -> Batch.push batches.metrics m + | Event.E_trace tr -> Batch.push batches.traces tr + | Event.E_logs logs -> Batch.push batches.logs logs + | Event.E_tick -> () + | Event.E_flush_all -> must_flush_all := true in while not (Queue.is_empty local_q) do let ev = Queue.pop local_q in process_ev ev - done + done; + + if !must_flush_all then ( + if Batch.len batches.metrics > 0 then send_metrics (); + if Batch.len batches.logs > 0 then send_logs (); + if Batch.len batches.traces > 0 then send_traces () + ) else ( + let now = Mtime_clock.now () in + if should_send_batch_ ~config ~now batches.metrics then + send_metrics (); + + if should_send_batch_ ~config ~now batches.traces then send_traces (); + if should_send_batch_ ~config ~now batches.logs then send_logs () + ) done with B_queue.Closed -> () From f0750cdfb543faf77a8d9c58b7d67def21db6131 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 20 Dec 2023 14:05:18 -0500 Subject: [PATCH 02/11] otel-ocurl: some self-tracing --- src/client-ocurl/opentelemetry_client_ocurl.ml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index 9c01109d..47466f23 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -5,6 +5,7 @@ module OT = Opentelemetry module Config = Config +module Trace' = Opentelemetry.Trace open Opentelemetry include Common_ @@ -120,9 +121,17 @@ end = struct let send_http_ ~stop ~config (client : Curl.t) encoder ~path ~encode x : unit = - Pbrt.Encoder.reset encoder; - encode x encoder; - let data = Pbrt.Encoder.to_string encoder in + let@ _sc = + Trace'.with_ ~kind:Span.Span_kind_producer "otel-ocurl.send-http" + in + + let data = + let@ _sc = Trace'.with_ ~kind:Span.Span_kind_internal "encode-proto" in + Pbrt.Encoder.reset encoder; + encode x encoder; + Pbrt.Encoder.to_string encoder + in + let url = let url = config.Config.url in if url <> "" && String.get url (String.length url - 1) = '/' then From 3d16de634f5244e33222f9d89616b154a09d6a61 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 20 Dec 2023 14:26:07 -0500 Subject: [PATCH 03/11] refine seld tracing --- src/client-ocurl/opentelemetry_client_ocurl.ml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index 47466f23..70a8fa22 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -147,11 +147,18 @@ end = struct ("Content-Type", "application/x-protobuf") :: config.headers in match + let@ _sc = + Trace'.with_ ~kind:Span.Span_kind_internal "curl.post" + ~attrs:[ "sz", `Int (String.length data) ] + in Ezcurl.post ~headers ~client ~params:[] ~url ~content:(`String data) () with | Ok { code; _ } when code >= 200 && code < 300 -> () | Ok { code; body; headers = _; info = _ } -> Atomic.incr n_errors; + Trace'.add_event _sc (fun () -> + Opentelemetry.Event.make "error" ~attrs:[ "code", `Int code ]); + if !debug_ || config.debug then ( let dec = Pbrt.Decoder.of_string body in let body = From 2ac799f10d917ddfd067b2760f4a6fd92bdc9416 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 20 Dec 2023 15:45:39 -0500 Subject: [PATCH 04/11] move AList to core --- src/{client-ocurl => core}/AList.ml | 2 ++ src/{client-ocurl => core}/AList.mli | 3 +++ src/core/opentelemetry.ml | 4 ++++ 3 files changed, 9 insertions(+) rename src/{client-ocurl => core}/AList.ml (94%) rename src/{client-ocurl => core}/AList.mli (72%) diff --git a/src/client-ocurl/AList.ml b/src/core/AList.ml similarity index 94% rename from src/client-ocurl/AList.ml rename to src/core/AList.ml index 9b7d47df..faec8000 100644 --- a/src/client-ocurl/AList.ml +++ b/src/core/AList.ml @@ -4,6 +4,8 @@ type 'a t = 'a list Atomic.t let make () = Atomic.make [] +let get = Atomic.get + let add self x = while let old = Atomic.get self in diff --git a/src/client-ocurl/AList.mli b/src/core/AList.mli similarity index 72% rename from src/client-ocurl/AList.mli rename to src/core/AList.mli index b4c718dd..4799dd6b 100644 --- a/src/client-ocurl/AList.mli +++ b/src/core/AList.mli @@ -2,6 +2,9 @@ type 'a t +val get : 'a t -> 'a list +(** Snapshot *) + val make : unit -> 'a t val add : 'a t -> 'a -> unit diff --git a/src/core/opentelemetry.ml b/src/core/opentelemetry.ml index cc5186e2..964d0183 100644 --- a/src/core/opentelemetry.ml +++ b/src/core/opentelemetry.ml @@ -6,6 +6,10 @@ module Lock = Lock module Rand_bytes = Rand_bytes (** Generation of random identifiers. *) +module AList = AList +(** Atomic list, for internal usage + @since NEXT_RELEASE *) + open struct let[@inline] result_bind x f = match x with From bd8b483e813e694fc7dbdf189d18e4663866b770 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 20 Dec 2023 15:46:39 -0500 Subject: [PATCH 05/11] feat: use AList for tick callbacks; emit GC events on tick it's cleaner to emit GC events on ticks rather than on GC, because it avoids both spamming if the GC is very active, and emitting nothing when there are few allocations. --- .../opentelemetry_client_cohttp_lwt.ml | 6 +++--- src/client-ocurl/opentelemetry_client_ocurl.ml | 5 ++--- src/core/opentelemetry.ml | 15 ++++++++------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml b/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml index 5b4f0453..b078c5b2 100644 --- a/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml +++ b/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml @@ -257,7 +257,7 @@ module type EMITTER = sig val push_logs : Logs.resource_logs list -> unit - val set_on_tick_callbacks : (unit -> unit) list ref -> unit + val set_on_tick_callbacks : (unit -> unit) AList.t -> unit val tick : unit -> unit @@ -288,7 +288,7 @@ let mk_emitter ~stop ~(config : Config.t) () : (module EMITTER) = let batch_logs : Logs.resource_logs list Batch.t = Batch.make ?batch:config.batch_logs ?timeout () - let on_tick_cbs_ = Atomic.make (ref []) + let on_tick_cbs_ = Atomic.make (AList.make ()) let set_on_tick_callbacks = Atomic.set on_tick_cbs_ @@ -384,7 +384,7 @@ let mk_emitter ~stop ~(config : Config.t) () : (module EMITTER) = with e -> Printf.eprintf "on tick callback raised: %s\n" (Printexc.to_string e)) - !(Atomic.get on_tick_cbs_); + (AList.get @@ Atomic.get on_tick_cbs_); () (* thread that calls [tick()] regularly, to help enforce timeouts *) diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index 70a8fa22..41029e8b 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -431,15 +431,14 @@ let create_backend ?(stop = Atomic.make false) ret ()); } - let on_tick_cbs_ = Atomic.make (ref []) + let on_tick_cbs_ = Atomic.make (AList.make ()) let set_on_tick_callbacks = Atomic.set on_tick_cbs_ let tick () = sample_gc_metrics_if_needed (); Backend_impl.send_event backend Event.E_tick; - let l = Atomic.get on_tick_cbs_ in - List.iter (fun f -> f ()) !l + List.iter (fun f -> f ()) (AList.get @@ Atomic.get on_tick_cbs_) let cleanup () = Backend_impl.shutdown backend end in diff --git a/src/core/opentelemetry.ml b/src/core/opentelemetry.ml index 964d0183..bd83b63d 100644 --- a/src/core/opentelemetry.ml +++ b/src/core/opentelemetry.ml @@ -86,7 +86,7 @@ module Collector = struct (** Should be called regularly for background processing, timeout checks, etc. *) - val set_on_tick_callbacks : (unit -> unit) list ref -> unit + val set_on_tick_callbacks : (unit -> unit) AList.t -> unit (** Give the collector the list of callbacks to be executed when [tick()] is called. Each such callback should be short and reentrant. Depending on the collector's implementation, it might be @@ -162,7 +162,7 @@ module Collector = struct (* hidden *) open struct - let on_tick_cbs_ = ref [] + let on_tick_cbs_ = AList.make () let backend : backend option ref = ref None end @@ -198,7 +198,7 @@ module Collector = struct let[@inline] rand_bytes_8 () = !Rand_bytes.rand_bytes_8 () - let on_tick f = on_tick_cbs_ := f :: !on_tick_cbs_ + let[@inline] on_tick f = AList.add on_tick_cbs_ f (** Do background work. Call this regularly if the collector doesn't already have a ticker thread or internal timer. *) @@ -1138,7 +1138,8 @@ end These metrics are emitted after each GC collection. *) module GC_metrics : sig val basic_setup : unit -> unit - (** Setup a hook that will emit GC statistics regularly *) + (** Setup a hook that will emit GC statistics on every tick (assuming + a ticker thread) *) val get_runtime_attributes : unit -> Span.key_value list (** Get OCaml name and version runtime attributes *) @@ -1158,13 +1159,13 @@ end = struct let get_runtime_attributes () = Lazy.force runtime_attributes let basic_setup () = - (* emit metrics when GC is called *) - let on_gc () = + let on_tick () = match Collector.get_backend () with | None -> () | Some (module C) -> C.signal_emit_gc_metrics () in - ignore (Gc.create_alarm on_gc : Gc.alarm) + + Collector.on_tick on_tick let bytes_per_word = Sys.word_size / 8 From d4186f64f4321600b0fc2087e3833a58eefc18a5 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 20 Dec 2023 15:47:37 -0500 Subject: [PATCH 06/11] client ocurl: do self-tracing in cheaper way we don't go through OTEL for self tracing as it can create regular span emission where normally there would be none (emitting a self-tracing span might create a batch, which then has to be sent after the batch timeout, and sending that one creates a new span, etc.) --- src/client-ocurl/config.ml | 27 ++++- src/client-ocurl/config.mli | 4 + .../opentelemetry_client_ocurl.ml | 102 ++++++++++++++++-- 3 files changed, 118 insertions(+), 15 deletions(-) diff --git a/src/client-ocurl/config.ml b/src/client-ocurl/config.ml index 0abca7e1..9ae54fc3 100644 --- a/src/client-ocurl/config.ml +++ b/src/client-ocurl/config.ml @@ -7,20 +7,39 @@ type t = { batch_timeout_ms: int; bg_threads: int; ticker_thread: bool; + self_trace: bool; } let pp out self = let pp_header ppf (a, b) = Format.fprintf ppf "@[%s: @,%s@]@." a b in let ppheaders = Format.pp_print_list pp_header in - let { debug; url; headers; batch_timeout_ms; bg_threads; ticker_thread } = + let { + debug; + url; + headers; + batch_timeout_ms; + bg_threads; + ticker_thread; + self_trace; + } = self in Format.fprintf out "{@[ debug=%B;@ url=%S;@ headers=%a;@ batch_timeout_ms=%d; bg_threads=%d;@ \ - ticker_thread=%B @]}" + ticker_thread=%B;@ self_trace=%B @]}" debug url ppheaders headers batch_timeout_ms bg_threads ticker_thread + self_trace let make ?(debug = !debug_) ?(url = get_url ()) ?(headers = get_headers ()) - ?(batch_timeout_ms = 500) ?(bg_threads = 4) ?(ticker_thread = true) () : t = + ?(batch_timeout_ms = 500) ?(bg_threads = 4) ?(ticker_thread = true) + ?(self_trace = true) () : t = let bg_threads = max 2 (min bg_threads 32) in - { debug; url; headers; batch_timeout_ms; bg_threads; ticker_thread } + { + debug; + url; + headers; + batch_timeout_ms; + bg_threads; + ticker_thread; + self_trace; + } diff --git a/src/client-ocurl/config.mli b/src/client-ocurl/config.mli index 17a7711e..91f6508d 100644 --- a/src/client-ocurl/config.mli +++ b/src/client-ocurl/config.mli @@ -18,6 +18,9 @@ type t = private { ticker_thread: bool; (** If true, start a thread that regularly checks if signals should be sent to the collector. Default [true] *) + self_trace: bool; + (** If true, the OTEL library will also emit its own spans. + @since NEXT_RELEASE *) } (** Configuration. @@ -31,6 +34,7 @@ val make : ?batch_timeout_ms:int -> ?bg_threads:int -> ?ticker_thread:bool -> + ?self_trace:bool -> unit -> t (** Make a configuration. diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index 41029e8b..de3fc819 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -5,7 +5,6 @@ module OT = Opentelemetry module Config = Config -module Trace' = Opentelemetry.Trace open Opentelemetry include Common_ @@ -18,6 +17,61 @@ let timeout_gc_metrics = Mtime.Span.(20 * s) (** side channel for GC, appended to metrics batch data *) let gc_metrics = AList.make () +(** Side channel for self-tracing *) +let self_spans = AList.make () + +(** Mini tracing module that doesn't go through the + collector (and thus doesn't create new batches, etc.) *) +module Self_trace = struct + let enabled = Atomic.make true + + let add_event (scope : Scope.t) ev = scope.events <- ev :: scope.events + + let dummy_trace_id_ = Trace_id.create () + + let dummy_span_id = Span_id.create () + + let with_ ?kind ?(attrs = []) name f = + if Atomic.get enabled then ( + let scope = Scope.get_ambient_scope () in + let parent, trace_id = + match scope with + | None -> None, Trace_id.create () + | Some sc -> Some sc.span_id, sc.trace_id + in + let span_id = Span_id.create () in + let start_time = Timestamp_ns.now_unix_ns () in + + let scope = { Scope.trace_id; span_id; events = []; attrs } in + + let finally () = + let span, _ = + (* TODO: should the attrs passed to with_ go on the Span + (in Span.create) or on the ResourceSpan (in emit)? + (question also applies to Opentelemetry_lwt.Trace.with) *) + Span.create ~trace_id ?parent ?kind ~attrs:scope.attrs ~id:span_id + ~start_time + ~end_time:(Timestamp_ns.now_unix_ns ()) + name + in + AList.add self_spans span + in + let@ () = Scope.with_ambient_scope scope in + Fun.protect ~finally (fun () -> f scope) + ) else ( + (* do nothing *) + let scope = + { + Scope.trace_id = dummy_trace_id_; + span_id = dummy_span_id; + attrs = []; + events = []; + } + in + f scope + ) +end + (** capture current GC metrics if {!needs_gc_metrics} is true or it has been a long time since the last GC metrics collection, and push them into {!gc_metrics} for later collection *) @@ -122,11 +176,13 @@ end = struct let send_http_ ~stop ~config (client : Curl.t) encoder ~path ~encode x : unit = let@ _sc = - Trace'.with_ ~kind:Span.Span_kind_producer "otel-ocurl.send-http" + Self_trace.with_ ~kind:Span.Span_kind_producer "otel-ocurl.send-http" in let data = - let@ _sc = Trace'.with_ ~kind:Span.Span_kind_internal "encode-proto" in + let@ _sc = + Self_trace.with_ ~kind:Span.Span_kind_internal "encode-proto" + in Pbrt.Encoder.reset encoder; encode x encoder; Pbrt.Encoder.to_string encoder @@ -148,16 +204,16 @@ end = struct in match let@ _sc = - Trace'.with_ ~kind:Span.Span_kind_internal "curl.post" - ~attrs:[ "sz", `Int (String.length data) ] + Self_trace.with_ ~kind:Span.Span_kind_internal "curl.post" + ~attrs:[ "sz", `Int (String.length data); "url", `String url ] in Ezcurl.post ~headers ~client ~params:[] ~url ~content:(`String data) () with | Ok { code; _ } when code >= 200 && code < 300 -> () | Ok { code; body; headers = _; info = _ } -> Atomic.incr n_errors; - Trace'.add_event _sc (fun () -> - Opentelemetry.Event.make "error" ~attrs:[ "code", `Int code ]); + Self_trace.add_event _sc + @@ Opentelemetry.Event.make "error" ~attrs:[ "code", `Int code ]; if !debug_ || config.debug then ( let dec = Pbrt.Decoder.of_string body in @@ -187,6 +243,11 @@ end = struct let send_logs_http ~stop ~config (client : Curl.t) encoder (l : Logs.resource_logs list list) : unit = let l = List.fold_left (fun acc l -> List.rev_append l acc) [] l in + let@ _sp = + Self_trace.with_ ~kind:Span_kind_producer "send-logs" + ~attrs:[ "n", `Int (List.length l) ] + in + let x = Logs_service.default_export_logs_service_request ~resource_logs:l () in @@ -196,6 +257,11 @@ end = struct let send_metrics_http ~stop ~config curl encoder (l : Metrics.resource_metrics list list) : unit = let l = List.fold_left (fun acc l -> List.rev_append l acc) [] l in + let@ _sp = + Self_trace.with_ ~kind:Span_kind_producer "send-metrics" + ~attrs:[ "n", `Int (List.length l) ] + in + let x = Metrics_service.default_export_metrics_service_request ~resource_metrics:l () @@ -206,6 +272,11 @@ end = struct let send_traces_http ~stop ~config curl encoder (l : Trace.resource_spans list list) : unit = let l = List.fold_left (fun acc l -> List.rev_append l acc) [] l in + let@ _sp = + Self_trace.with_ ~kind:Span_kind_producer "send-traces" + ~attrs:[ "n", `Int (List.length l) ] + in + let x = Trace_service.default_export_trace_service_request ~resource_spans:l () in @@ -264,8 +335,8 @@ end = struct in let send_metrics () = - B_queue.push self.send_q - (To_send.Send_metric (Batch.pop_all batches.metrics)) + let metrics = AList.pop_all gc_metrics :: Batch.pop_all batches.metrics in + B_queue.push self.send_q (To_send.Send_metric metrics) in let send_logs () = @@ -273,8 +344,16 @@ end = struct in let send_traces () = - B_queue.push self.send_q - (To_send.Send_trace (Batch.pop_all batches.traces)) + let traces = Batch.pop_all batches.traces in + let self_spans = AList.pop_all self_spans in + let traces = + if self_spans != [] then ( + let resource = Opentelemetry.Trace.make_resource_spans self_spans in + [ resource ] :: traces + ) else + traces + in + B_queue.push self.send_q (To_send.Send_trace traces) in try @@ -463,6 +542,7 @@ let setup_ ?(stop = Atomic.make false) ?(config : Config.t = Config.make ()) () Opentelemetry.Collector.set_backend backend; if config.url <> get_url () then set_url config.url; + Atomic.set Self_trace.enabled config.self_trace; if config.ticker_thread then ( let sleep_ms = min 5_000 (max 2 config.batch_timeout_ms) in From f0310530a31dc0e15b07c2a8b053b211b6da4771 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 20 Dec 2023 15:57:53 -0500 Subject: [PATCH 07/11] ocurl: add ticker_interval_ms config ticker interval will affect how often metrics are emitted, this doesn't need be related to batch timeouts. --- src/client-ocurl/config.ml | 11 +++++++---- src/client-ocurl/config.mli | 9 ++++++++- src/client-ocurl/opentelemetry_client_ocurl.ml | 13 +++++++------ 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/client-ocurl/config.ml b/src/client-ocurl/config.ml index 9ae54fc3..fb86c0d2 100644 --- a/src/client-ocurl/config.ml +++ b/src/client-ocurl/config.ml @@ -7,6 +7,7 @@ type t = { batch_timeout_ms: int; bg_threads: int; ticker_thread: bool; + ticker_interval_ms: int; self_trace: bool; } @@ -20,19 +21,20 @@ let pp out self = batch_timeout_ms; bg_threads; ticker_thread; + ticker_interval_ms; self_trace; } = self in Format.fprintf out "{@[ debug=%B;@ url=%S;@ headers=%a;@ batch_timeout_ms=%d; bg_threads=%d;@ \ - ticker_thread=%B;@ self_trace=%B @]}" + ticker_thread=%B;@ ticker_interval_ms=%d;@ self_trace=%B @]}" debug url ppheaders headers batch_timeout_ms bg_threads ticker_thread - self_trace + ticker_interval_ms self_trace let make ?(debug = !debug_) ?(url = get_url ()) ?(headers = get_headers ()) - ?(batch_timeout_ms = 500) ?(bg_threads = 4) ?(ticker_thread = true) - ?(self_trace = true) () : t = + ?(batch_timeout_ms = 2_000) ?(bg_threads = 4) ?(ticker_thread = true) + ?(ticker_interval_ms = 500) ?(self_trace = true) () : t = let bg_threads = max 2 (min bg_threads 32) in { debug; @@ -41,5 +43,6 @@ let make ?(debug = !debug_) ?(url = get_url ()) ?(headers = get_headers ()) batch_timeout_ms; bg_threads; ticker_thread; + ticker_interval_ms; self_trace; } diff --git a/src/client-ocurl/config.mli b/src/client-ocurl/config.mli index 91f6508d..270aa2d2 100644 --- a/src/client-ocurl/config.mli +++ b/src/client-ocurl/config.mli @@ -12,12 +12,18 @@ type t = private { (** Number of milliseconds after which we will emit a batch, even incomplete. Note that the batch might take longer than that, because this is - only checked when a new event occurs. Default 500. *) + only checked when a new event occurs or when a tick + is emitted. Default 2_000. *) bg_threads: int; (** Are there background threads, and how many? Default [4] *) ticker_thread: bool; (** If true, start a thread that regularly checks if signals should be sent to the collector. Default [true] *) + ticker_interval_ms: int; + (** Interval for ticker thread, in milliseconds. This is + only useful if [ticker_thread] is [true]. + Default 500. + @since NEXT_RELEASE *) self_trace: bool; (** If true, the OTEL library will also emit its own spans. @since NEXT_RELEASE *) @@ -34,6 +40,7 @@ val make : ?batch_timeout_ms:int -> ?bg_threads:int -> ?ticker_thread:bool -> + ?ticker_interval_ms:int -> ?self_trace:bool -> unit -> t diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index de3fc819..b9b1ba2c 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -370,14 +370,14 @@ end = struct | Event.E_metric m -> Batch.push batches.metrics m | Event.E_trace tr -> Batch.push batches.traces tr | Event.E_logs logs -> Batch.push batches.logs logs - | Event.E_tick -> () + | Event.E_tick -> + (* the only impact of "tick" is that it wakes us up regularly *) + () | Event.E_flush_all -> must_flush_all := true in - while not (Queue.is_empty local_q) do - let ev = Queue.pop local_q in - process_ev ev - done; + Queue.iter process_ev local_q; + Queue.clear local_q; if !must_flush_all then ( if Batch.len batches.metrics > 0 then send_metrics (); @@ -545,7 +545,8 @@ let setup_ ?(stop = Atomic.make false) ?(config : Config.t = Config.make ()) () Atomic.set Self_trace.enabled config.self_trace; if config.ticker_thread then ( - let sleep_ms = min 5_000 (max 2 config.batch_timeout_ms) in + (* at most a minute *) + let sleep_ms = min 60_000 (max 2 config.ticker_interval_ms) in ignore (setup_ticker_thread ~stop ~sleep_ms backend () : Thread.t) ); From 9c2b885f95be3d2fe3213a9ec193e2d9fd7f3449 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 20 Dec 2023 15:58:33 -0500 Subject: [PATCH 08/11] detail --- src/core/opentelemetry.ml | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/opentelemetry.ml b/src/core/opentelemetry.ml index bd83b63d..e8568863 100644 --- a/src/core/opentelemetry.ml +++ b/src/core/opentelemetry.ml @@ -1164,7 +1164,6 @@ end = struct | None -> () | Some (module C) -> C.signal_emit_gc_metrics () in - Collector.on_tick on_tick let bytes_per_word = Sys.word_size / 8 From 133b6ad9910483802b7cbaa1974850a6e4deab05 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 20 Dec 2023 16:05:25 -0500 Subject: [PATCH 09/11] revert: use collector again for self-tracing spans it's less dangerous to emit spans for self-tracing, even if nothing else is going on, than risk having them accumulate in case the program emits no spans but emits a lot of metrics and logs (which create self-tracing spans) --- .../opentelemetry_client_ocurl.ml | 50 +++---------------- 1 file changed, 7 insertions(+), 43 deletions(-) diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index b9b1ba2c..813eec55 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -17,11 +17,7 @@ let timeout_gc_metrics = Mtime.Span.(20 * s) (** side channel for GC, appended to metrics batch data *) let gc_metrics = AList.make () -(** Side channel for self-tracing *) -let self_spans = AList.make () - -(** Mini tracing module that doesn't go through the - collector (and thus doesn't create new batches, etc.) *) +(** Mini tracing module (disabled if [config.self_trace=false]) *) module Self_trace = struct let enabled = Atomic.make true @@ -31,34 +27,10 @@ module Self_trace = struct let dummy_span_id = Span_id.create () - let with_ ?kind ?(attrs = []) name f = - if Atomic.get enabled then ( - let scope = Scope.get_ambient_scope () in - let parent, trace_id = - match scope with - | None -> None, Trace_id.create () - | Some sc -> Some sc.span_id, sc.trace_id - in - let span_id = Span_id.create () in - let start_time = Timestamp_ns.now_unix_ns () in - - let scope = { Scope.trace_id; span_id; events = []; attrs } in - - let finally () = - let span, _ = - (* TODO: should the attrs passed to with_ go on the Span - (in Span.create) or on the ResourceSpan (in emit)? - (question also applies to Opentelemetry_lwt.Trace.with) *) - Span.create ~trace_id ?parent ?kind ~attrs:scope.attrs ~id:span_id - ~start_time - ~end_time:(Timestamp_ns.now_unix_ns ()) - name - in - AList.add self_spans span - in - let@ () = Scope.with_ambient_scope scope in - Fun.protect ~finally (fun () -> f scope) - ) else ( + let with_ ?kind ?attrs name f = + if Atomic.get enabled then + Opentelemetry.Trace.with_ ?kind ?attrs name f + else ( (* do nothing *) let scope = { @@ -344,16 +316,8 @@ end = struct in let send_traces () = - let traces = Batch.pop_all batches.traces in - let self_spans = AList.pop_all self_spans in - let traces = - if self_spans != [] then ( - let resource = Opentelemetry.Trace.make_resource_spans self_spans in - [ resource ] :: traces - ) else - traces - in - B_queue.push self.send_q (To_send.Send_trace traces) + B_queue.push self.send_q + (To_send.Send_trace (Batch.pop_all batches.traces)) in try From b830c3d1be8a5c3d20e93a4c4bf0095b7aabecf3 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 20 Dec 2023 16:07:29 -0500 Subject: [PATCH 10/11] config: self-trace is disabled by default for now --- src/client-ocurl/config.ml | 2 +- src/client-ocurl/config.mli | 2 +- tests/bin/emit1.ml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/client-ocurl/config.ml b/src/client-ocurl/config.ml index fb86c0d2..bda563e4 100644 --- a/src/client-ocurl/config.ml +++ b/src/client-ocurl/config.ml @@ -34,7 +34,7 @@ let pp out self = let make ?(debug = !debug_) ?(url = get_url ()) ?(headers = get_headers ()) ?(batch_timeout_ms = 2_000) ?(bg_threads = 4) ?(ticker_thread = true) - ?(ticker_interval_ms = 500) ?(self_trace = true) () : t = + ?(ticker_interval_ms = 500) ?(self_trace = false) () : t = let bg_threads = max 2 (min bg_threads 32) in { debug; diff --git a/src/client-ocurl/config.mli b/src/client-ocurl/config.mli index 270aa2d2..d5dddf30 100644 --- a/src/client-ocurl/config.mli +++ b/src/client-ocurl/config.mli @@ -25,7 +25,7 @@ type t = private { Default 500. @since NEXT_RELEASE *) self_trace: bool; - (** If true, the OTEL library will also emit its own spans. + (** If true, the OTEL library will also emit its own spans. Default [false]. @since NEXT_RELEASE *) } (** Configuration. diff --git a/tests/bin/emit1.ml b/tests/bin/emit1.ml index 179ddc78..cf40e2ef 100644 --- a/tests/bin/emit1.ml +++ b/tests/bin/emit1.ml @@ -124,7 +124,7 @@ let () = Arg.parse opts (fun _ -> ()) "emit1 [opt]*"; let config = - Opentelemetry_client_ocurl.Config.make ~debug:!debug + Opentelemetry_client_ocurl.Config.make ~debug:!debug ~self_trace:true ?bg_threads: (let n = !n_bg_threads in if n = 0 then From 031b7bfc38bf449af93e935a009a23afff13935b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 21 Dec 2023 08:28:07 -0500 Subject: [PATCH 11/11] more doc --- src/client-ocurl/config.ml | 2 +- src/client-ocurl/config.mli | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/client-ocurl/config.ml b/src/client-ocurl/config.ml index bda563e4..2ea68fb7 100644 --- a/src/client-ocurl/config.ml +++ b/src/client-ocurl/config.ml @@ -35,7 +35,7 @@ let pp out self = let make ?(debug = !debug_) ?(url = get_url ()) ?(headers = get_headers ()) ?(batch_timeout_ms = 2_000) ?(bg_threads = 4) ?(ticker_thread = true) ?(ticker_interval_ms = 500) ?(self_trace = false) () : t = - let bg_threads = max 2 (min bg_threads 32) in + let bg_threads = max 1 (min bg_threads 32) in { debug; url; diff --git a/src/client-ocurl/config.mli b/src/client-ocurl/config.mli index d5dddf30..dfc35c08 100644 --- a/src/client-ocurl/config.mli +++ b/src/client-ocurl/config.mli @@ -15,13 +15,16 @@ type t = private { only checked when a new event occurs or when a tick is emitted. Default 2_000. *) bg_threads: int; - (** Are there background threads, and how many? Default [4] *) + (** Are there background threads, and how many? Default [4]. + This will be adjusted to be at least [1] and at most [32]. *) ticker_thread: bool; (** If true, start a thread that regularly checks if signals should be sent to the collector. Default [true] *) ticker_interval_ms: int; (** Interval for ticker thread, in milliseconds. This is only useful if [ticker_thread] is [true]. + This will be clamped between [2 ms] and some longer + interval (maximum [60s] currently). Default 500. @since NEXT_RELEASE *) self_trace: bool;