From c1b27980f28885a72bbe004f27b0e8621073d3c1 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 13:23:58 -0500 Subject: [PATCH 01/94] large refactor: split core library into many modules; change API design follow more closely the official OTEL recommendations, and also try to reduce global state. - use a class type for `Exporter.t` (instead of 1st class module `backend`) - have tracer, logger, metrics_emitter as explicit objects - keep a `Main_exporter` to make migration easier, but discouraged - add stdout_exporter and debug_exporter to opentelemetry.client --- src/core/common_.ml | 5 + src/core/context.ml | 17 + src/core/conventions.ml | 130 +++ src/core/event.ml | 9 + src/core/event.mli | 12 + src/core/exporter.ml | 128 +++ src/core/gc_metrics.ml | 49 + src/core/gc_metrics.mli | 17 + src/core/globals.ml | 102 ++ src/core/key_value.ml | 8 + src/core/lock.ml | 17 - src/core/lock.mli | 9 - src/core/log_record.ml | 76 ++ src/core/logger.ml | 37 + src/core/metrics.ml | 80 ++ src/core/metrics_callbacks.ml | 37 + src/core/metrics_callbacks.mli | 25 + src/core/metrics_emitter.ml | 32 + src/core/opentelemetry.ml | 1656 +------------------------------- src/core/rand_bytes.ml | 27 +- src/core/rand_bytes.mli | 12 +- src/core/scope.ml | 131 +++ src/core/scope.mli | 89 ++ src/core/span.ml | 38 + src/core/span.mli | 46 + src/core/span_ctx.ml | 91 ++ src/core/span_ctx.mli | 42 + src/core/span_id.ml | 33 + src/core/span_id.mli | 23 + src/core/span_kind.ml | 13 + src/core/span_link.ml | 20 + src/core/span_link.mli | 27 + src/core/span_status.ml | 15 + src/core/span_status.mli | 15 + src/core/tick_callbacks.ml | 9 + src/core/tick_callbacks.mli | 9 + src/core/timestamp_ns.ml | 29 + src/core/trace_context.ml | 34 + src/core/trace_id.ml | 35 + src/core/trace_id.mli | 30 + src/core/tracer.ml | 165 ++++ src/core/util_bytes_.ml | 47 + src/core/value.ml | 19 + 43 files changed, 1782 insertions(+), 1663 deletions(-) create mode 100644 src/core/common_.ml create mode 100644 src/core/context.ml create mode 100644 src/core/conventions.ml create mode 100644 src/core/event.ml create mode 100644 src/core/event.mli create mode 100644 src/core/exporter.ml create mode 100644 src/core/gc_metrics.ml create mode 100644 src/core/gc_metrics.mli create mode 100644 src/core/globals.ml create mode 100644 src/core/key_value.ml delete mode 100644 src/core/lock.ml delete mode 100644 src/core/lock.mli create mode 100644 src/core/log_record.ml create mode 100644 src/core/logger.ml create mode 100644 src/core/metrics.ml create mode 100644 src/core/metrics_callbacks.ml create mode 100644 src/core/metrics_callbacks.mli create mode 100644 src/core/metrics_emitter.ml create mode 100644 src/core/scope.ml create mode 100644 src/core/scope.mli create mode 100644 src/core/span.ml create mode 100644 src/core/span.mli create mode 100644 src/core/span_ctx.ml create mode 100644 src/core/span_ctx.mli create mode 100644 src/core/span_id.ml create mode 100644 src/core/span_id.mli create mode 100644 src/core/span_kind.ml create mode 100644 src/core/span_link.ml create mode 100644 src/core/span_link.mli create mode 100644 src/core/span_status.ml create mode 100644 src/core/span_status.mli create mode 100644 src/core/tick_callbacks.ml create mode 100644 src/core/tick_callbacks.mli create mode 100644 src/core/timestamp_ns.ml create mode 100644 src/core/trace_context.ml create mode 100644 src/core/trace_id.ml create mode 100644 src/core/trace_id.mli create mode 100644 src/core/tracer.ml create mode 100644 src/core/util_bytes_.ml create mode 100644 src/core/value.ml diff --git a/src/core/common_.ml b/src/core/common_.ml new file mode 100644 index 00000000..30bb39d2 --- /dev/null +++ b/src/core/common_.ml @@ -0,0 +1,5 @@ +let spf = Printf.sprintf + +module Proto = Opentelemetry_proto +module Atomic = Opentelemetry_atomic.Atomic +module Ambient_context = Opentelemetry_ambient_context diff --git a/src/core/context.ml b/src/core/context.ml new file mode 100644 index 00000000..883f646e --- /dev/null +++ b/src/core/context.ml @@ -0,0 +1,17 @@ +(** The context used in OTEL operations, to carry the current trace, etc. + + https://opentelemetry.io/docs/specs/otel/context/ *) + +type t = Hmap.t +(** The context type. We use [Hmap.t] as it's standard and widely used. *) + +type 'a key = 'a Hmap.key + +let set = Hmap.add + +(** @raise Invalid_argument if not present *) +let get_exn : 'a key -> t -> 'a = Hmap.get + +let get : 'a key -> t -> 'a option = Hmap.find + +let[@inline] new_key () : 'a key = Hmap.Key.create () diff --git a/src/core/conventions.ml b/src/core/conventions.ml new file mode 100644 index 00000000..c4002cb1 --- /dev/null +++ b/src/core/conventions.ml @@ -0,0 +1,130 @@ +(** Semantic conventions. + + {{:https://opentelemetry.io/docs/specs/semconv/} + https://opentelemetry.io/docs/specs/semconv/} *) + +module Attributes = struct + module Process = struct + module Runtime = struct + let name = "process.runtime.name" + + let version = "process.runtime.version" + + let description = "process.runtime.description" + end + end + + (** https://opentelemetry.io/docs/specs/semconv/attributes-registry/code/ *) + module Code = struct + (** Int *) + let column = "code.column" + + let filepath = "code.filepath" + + let function_ = "code.function" + + (** int *) + let line = "code.lineno" + + let namespace = "code.namespace" + + let stacktrace = "code.stacktrace" + end + + module Service = struct + let name = "service.name" + + let namespace = "service.namespace" + + let instance_id = "service.instance.id" + + let version = "service.version" + end + + module HTTP = struct + let error_type = "error.type" + + let request_method = "http.request.method" + + let route = "http.route" + + let url_full = "url.full" + + (** HTTP status code, int *) + let response_status_code = "http.response.status_code" + + let server_address = "server.address" + + let server_port = "server.port" + + (** http or https *) + let url_scheme = "url.scheme" + end + + (** https://github.com/open-telemetry/semantic-conventions/blob/main/docs/resource/host.md + *) + module Host = struct + let id = "host.id" + + let name = "host.name" + + let type_ = "host.type" + + let arch = "host.arch" + + let ip = "host.ip" + + let mac = "host.mac" + + let image_id = "host.image.id" + + let image_name = "host.image.name" + + let image_version = "host.image.version" + end +end + +module Metrics = struct + module Process = struct + module Runtime = struct + module Ocaml = struct + module GC = struct + let compactions = "process.runtime.ocaml.gc.compactions" + + let major_collections = "process.runtime.ocaml.gc.major_collections" + + let major_heap = "process.runtime.ocaml.gc.major_heap" + + let minor_allocated = "process.runtime.ocaml.gc.minor_allocated" + + let minor_collections = "process.runtime.ocaml.gc.minor_collections" + end + end + end + end + + (** https://opentelemetry.io/docs/specs/semconv/http/ *) + module HTTP = struct + module Server = struct + let request_duration = "http.server.request.duration" + + let active_requests = "http.server.active_requests" + + (** Histogram *) + let request_body_size = "http.server.request.body.size" + + (** Histogram *) + let response_body_size = "http.server.response.body.size" + end + + module Client = struct + let request_duration = "http.client.request.duration" + + (** Histogram *) + let request_body_size = "http.client.request.body.size" + + (** Histogram *) + let response_body_size = "http.client.response.body.size" + end + end +end diff --git a/src/core/event.ml b/src/core/event.ml new file mode 100644 index 00000000..3d632a4d --- /dev/null +++ b/src/core/event.ml @@ -0,0 +1,9 @@ +open Common_ +open Proto.Trace + +type t = span_event + +let make ?(time_unix_nano = Timestamp_ns.now_unix_ns ()) ?(attrs = []) + (name : string) : t = + let attrs = List.map Key_value.conv attrs in + make_span_event ~time_unix_nano ~name ~attributes:attrs () diff --git a/src/core/event.mli b/src/core/event.mli new file mode 100644 index 00000000..8b90f641 --- /dev/null +++ b/src/core/event.mli @@ -0,0 +1,12 @@ +(** Events. + + Events occur at a given time and can carry attributes. They always belong in + a span. *) + +open Common_ +open Proto.Trace + +type t = span_event + +val make : + ?time_unix_nano:Timestamp_ns.t -> ?attrs:Key_value.t list -> string -> t diff --git a/src/core/exporter.ml b/src/core/exporter.ml new file mode 100644 index 00000000..7ccdc823 --- /dev/null +++ b/src/core/exporter.ml @@ -0,0 +1,128 @@ +(** Exporter. + + This is the pluggable component that actually sends signals to a OTEL + collector, or prints them, or saves them somewhere. + + This is part of the SDK, not just the API, so most real implementations live + in their own library. *) + +open Common_ + +open struct + module Proto = Opentelemetry_proto +end + +(** Main exporter interface *) +class type t = object + method send_trace : Proto.Trace.span list -> unit + + method send_metrics : Proto.Metrics.metric list -> unit + + method send_logs : Proto.Logs.log_record list -> unit + + method tick : unit -> unit + (** Should be called regularly for background processing, timeout checks, etc. + *) + + method add_on_tick_callback : (unit -> unit) -> unit + (** Add the given of callback to the exporter when [tick()] is called. The + callback should be short and reentrant. Depending on the exporter's + implementation, it might be called from a thread that is not the one that + called [on_tick]. *) + + method cleanup : on_done:(unit -> unit) -> unit -> unit + (** [cleanup ~on_done ()] is called when the exporter is shut down, and is + responsible for sending remaining batches, flushing sockets, etc. + @param on_done + callback invoked after the cleanup is done. @since 0.12 *) +end + +(** Dummy exporter, does nothing *) +let dummy : t = + let ticker = Tick_callbacks.create () in + object + method send_trace = ignore + + method send_metrics = ignore + + method send_logs = ignore + + method tick () = Tick_callbacks.tick ticker + + method add_on_tick_callback cb = Tick_callbacks.on_tick ticker cb + + method cleanup ~on_done () = on_done () + end + +let[@inline] send_trace (self : #t) (l : Proto.Trace.span list) = + self#send_trace l + +let[@inline] send_metrics (self : #t) (l : Proto.Metrics.metric list) = + self#send_metrics l + +let[@inline] send_logs (self : #t) (l : Proto.Logs.log_record list) = + self#send_logs l + +let[@inline] on_tick (self : #t) f = self#add_on_tick_callback f + +(** Do background work. Call this regularly if the collector doesn't already + have a ticker thread or internal timer. *) +let[@inline] tick (self : #t) = self#tick () + +let[@inline] cleanup (self : #t) ~on_done : unit = self#cleanup ~on_done () + +(** Main exporter, used by the main tracing functions. + + It is better to pass an explicit exporter when possible. *) +module Main_exporter = struct + (* hidden *) + open struct + (* a list of callbacks automatically added to the main exporter *) + let on_tick_cbs_ = AList.make () + + let exporter : t option Atomic.t = Atomic.make None + end + + (** Set the global exporter *) + let set (exp : t) : unit = + List.iter exp#add_on_tick_callback (AList.get on_tick_cbs_); + Atomic.set exporter (Some exp) + + (** Remove current exporter, if any. + @param on_done see {!t#cleanup}, @since 0.12 *) + let remove ~on_done () : unit = + match Atomic.exchange exporter None with + | None -> () + | Some exp -> + exp#tick (); + cleanup exp ~on_done + + (** Is there a configured exporter? *) + let present () : bool = Option.is_some (Atomic.get exporter) + + (** Current exporter, if any *) + let[@inline] get () : t option = Atomic.get exporter + + let add_on_tick_callback f = + AList.add on_tick_cbs_ f; + Option.iter (fun exp -> exp#add_on_tick_callback f) (get ()) +end + +let set_backend = Main_exporter.set [@@deprecated "use `Main_exporter.set`"] + +let remove_backend = Main_exporter.remove +[@@deprecated "use `Main_exporter.remove`"] + +let has_backend = Main_exporter.present +[@@deprecated "use `Main_exporter.present`"] + +let get_backend = Main_exporter.get [@@deprecated "use `Main_exporter.ge"] + +let with_setup_debug_backend ?(on_done = ignore) (exp : #t) ?(enable = true) () + f = + let exp = (exp :> t) in + if enable then ( + set_backend exp; + Fun.protect ~finally:(fun () -> cleanup exp ~on_done) f + ) else + f () diff --git a/src/core/gc_metrics.ml b/src/core/gc_metrics.ml new file mode 100644 index 00000000..c48b6734 --- /dev/null +++ b/src/core/gc_metrics.ml @@ -0,0 +1,49 @@ +open Common_ + +open struct + let[@inline] bytes_per_word = Sys.word_size / 8 + + let[@inline] word_to_bytes n = n * bytes_per_word + + let[@inline] word_to_bytes_f n = n *. float bytes_per_word +end + +let get_metrics () : Metrics.t list = + let gc = Gc.quick_stat () in + let now = Timestamp_ns.now_unix_ns () in + let open Metrics in + let open Conventions.Metrics in + [ + gauge ~name:Process.Runtime.Ocaml.GC.major_heap ~unit_:"B" + [ int ~now (word_to_bytes gc.Gc.heap_words) ]; + sum ~name:Process.Runtime.Ocaml.GC.minor_allocated + ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative + ~is_monotonic:true ~unit_:"B" + [ float ~now (word_to_bytes_f gc.Gc.minor_words) ]; + sum ~name:Process.Runtime.Ocaml.GC.minor_collections + ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative + ~is_monotonic:true + [ int ~now gc.Gc.minor_collections ]; + sum ~name:Process.Runtime.Ocaml.GC.major_collections + ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative + ~is_monotonic:true + [ int ~now gc.Gc.major_collections ]; + sum ~name:Process.Runtime.Ocaml.GC.compactions + ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative + ~is_monotonic:true + [ int ~now gc.Gc.compactions ]; + ] + +let setup (exp : #Exporter.t) = + let on_tick () = + let m = get_metrics () in + exp#send_metrics m + in + Exporter.on_tick exp on_tick + +let setup_on_main_exporter () = + match Exporter.Main_exporter.get () with + | None -> () + | Some exp -> setup exp + +let basic_setup = setup_on_main_exporter diff --git a/src/core/gc_metrics.mli b/src/core/gc_metrics.mli new file mode 100644 index 00000000..817967d2 --- /dev/null +++ b/src/core/gc_metrics.mli @@ -0,0 +1,17 @@ +(** Export GC metrics. + + These metrics are emitted regularly. *) + +val get_metrics : unit -> Metrics.t list +(** Get a few metrics from the current state of the GC. *) + +val setup : #Exporter.t -> unit +(** Setup a hook that will emit GC statistics on every tick. It does assume that + [tick] is called regularly on the exporter. For example, if we ensure the + exporter's [tick] function is called every 5s, we'll get GC metrics every + 5s. *) + +val setup_on_main_exporter : unit -> unit +(** Setup the hook on the main exporter. *) + +val basic_setup : unit -> unit [@@deprecated "use setup_on_main_exporter"] diff --git a/src/core/globals.ml b/src/core/globals.ml new file mode 100644 index 00000000..36e3e975 --- /dev/null +++ b/src/core/globals.ml @@ -0,0 +1,102 @@ +(** Process-wide metadata, environment variables, etc. *) + +open Common_ +open Proto.Common + +(** Main service name metadata *) +let service_name = ref "unknown_service" + +(** Namespace for the service *) +let service_namespace = ref None + +(** Unique identifier for the service *) +let service_instance_id = ref None + +(** Version for the service + @since 0.12 *) +let service_version = ref None + +let instrumentation_library = + make_instrumentation_scope ~version:"%%VERSION_NUM%%" ~name:"ocaml-otel" () + +(** Global attributes, initially set via OTEL_RESOURCE_ATTRIBUTES and modifiable + by the user code. They will be attached to each outgoing metrics/traces. *) +let global_attributes : key_value list ref = + let parse_pair s = + match String.split_on_char '=' s with + | [ a; b ] -> make_key_value ~key:a ~value:(String_value b) () + | _ -> failwith (Printf.sprintf "invalid attribute: %S" s) + in + ref + @@ + try + Sys.getenv "OTEL_RESOURCE_ATTRIBUTES" + |> String.split_on_char ',' |> List.map parse_pair + with _ -> [] + +(** Add a global attribute *) +let add_global_attribute (key : string) (v : Value.t) : unit = + global_attributes := Key_value.conv (key, v) :: !global_attributes + +(* add global attributes to this list *) +let merge_global_attributes_ into : _ list = + let open Key_value in + let not_redundant kv = List.for_all (fun kv' -> kv.key <> kv'.key) into in + List.rev_append (List.filter not_redundant !global_attributes) into + +(** Default span kind in {!Span.create}. This will be used in all spans that do + not specify [~kind] explicitly; it is set to "internal", following + directions from the [.proto] file. It can be convenient to set "client" or + "server" uniformly in here. + @since 0.4 *) +let default_span_kind = ref Proto.Trace.Span_kind_internal + +open struct + let runtime_attributes = + Conventions.Attributes. + [ + Process.Runtime.name, `String "ocaml"; + Process.Runtime.version, `String Sys.ocaml_version; + ] + + let runtime_attributes_converted = List.map Key_value.conv runtime_attributes +end + +(** Attributes about the OCaml runtime. See + https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/process.md#process-runtimes +*) +let[@inline] get_runtime_attributes () = runtime_attributes + +let mk_attributes ?(service_name = !service_name) ?(attrs = []) () : _ list = + let l = List.rev_map Key_value.conv attrs in + let l = List.rev_append runtime_attributes_converted l in + let l = + make_key_value ~key:Conventions.Attributes.Service.name + ~value:(String_value service_name) () + :: l + in + let l = + match !service_instance_id with + | None -> l + | Some v -> + make_key_value ~key:Conventions.Attributes.Service.instance_id + ~value:(String_value v) () + :: l + in + let l = + match !service_namespace with + | None -> l + | Some v -> + make_key_value ~key:Conventions.Attributes.Service.namespace + ~value:(String_value v) () + :: l + in + let l = + match !service_version with + | None -> l + | Some v -> + make_key_value ~key:Conventions.Attributes.Service.version + ~value:(String_value v) () + :: l + in + l |> merge_global_attributes_ diff --git a/src/core/key_value.ml b/src/core/key_value.ml new file mode 100644 index 00000000..6760c340 --- /dev/null +++ b/src/core/key_value.ml @@ -0,0 +1,8 @@ +open Common_ + +type t = string * Value.t + +let conv (k, v) = + let open Proto.Common in + let value = Value.conv v in + make_key_value ~key:k ?value () diff --git a/src/core/lock.ml b/src/core/lock.ml deleted file mode 100644 index 6ce295bb..00000000 --- a/src/core/lock.ml +++ /dev/null @@ -1,17 +0,0 @@ -let lock_ : (unit -> unit) ref = ref ignore - -let unlock_ : (unit -> unit) ref = ref ignore - -let set_mutex ~lock ~unlock : unit = - lock_ := lock; - unlock_ := unlock - -let[@inline] with_lock f = - !lock_ (); - match f () with - | x -> - !unlock_ (); - x - | exception e -> - !unlock_ (); - Printexc.raise_with_backtrace e (Printexc.get_raw_backtrace ()) diff --git a/src/core/lock.mli b/src/core/lock.mli deleted file mode 100644 index 2040bd1b..00000000 --- a/src/core/lock.mli +++ /dev/null @@ -1,9 +0,0 @@ -(** A global lock, modifiable by the user *) - -val set_mutex : lock:(unit -> unit) -> unlock:(unit -> unit) -> unit -(** Set a pair of lock/unlock functions that are used to protect access to - global state, if needed. By default these do nothing. *) - -val with_lock : (unit -> 'a) -> 'a -(** Call [f()] while holding the mutex defined {!set_mutex}, then release the - mutex. *) diff --git a/src/core/log_record.ml b/src/core/log_record.ml new file mode 100644 index 00000000..9212a8e5 --- /dev/null +++ b/src/core/log_record.ml @@ -0,0 +1,76 @@ +(** Logs. + + See + {{:https://opentelemetry.io/docs/reference/specification/overview/#log-signal} + the spec} *) + +open Common_ +open Proto.Logs + +type t = Proto.Logs.log_record + +(** Severity level of a log event *) +type severity = Proto.Logs.severity_number = + | Severity_number_unspecified + | Severity_number_trace + | Severity_number_trace2 + | Severity_number_trace3 + | Severity_number_trace4 + | Severity_number_debug + | Severity_number_debug2 + | Severity_number_debug3 + | Severity_number_debug4 + | Severity_number_info + | Severity_number_info2 + | Severity_number_info3 + | Severity_number_info4 + | Severity_number_warn + | Severity_number_warn2 + | Severity_number_warn3 + | Severity_number_warn4 + | Severity_number_error + | Severity_number_error2 + | Severity_number_error3 + | Severity_number_error4 + | Severity_number_fatal + | Severity_number_fatal2 + | Severity_number_fatal3 + | Severity_number_fatal4 + +let pp_severity = pp_severity_number + +type flags = Proto.Logs.log_record_flags = + | Log_record_flags_do_not_use + | Log_record_flags_trace_flags_mask + +let pp_flags = Proto.Logs.pp_log_record_flags + +(** Make a single log entry *) +let make ?time ?(observed_time_unix_nano = Timestamp_ns.now_unix_ns ()) + ?severity ?log_level ?flags ?trace_id ?span_id (body : Value.t) : t = + let time_unix_nano = + match time with + | None -> observed_time_unix_nano + | Some t -> t + in + let trace_id = Option.map Trace_id.to_bytes trace_id in + let span_id = Option.map Span_id.to_bytes span_id in + let body = Value.conv body in + make_log_record ~time_unix_nano ~observed_time_unix_nano + ?severity_number:severity ?severity_text:log_level ?flags ?trace_id ?span_id + ?body () + +(** Make a log entry whose body is a string *) +let make_str ?time ?observed_time_unix_nano ?severity ?log_level ?flags + ?trace_id ?span_id (body : string) : t = + make ?time ?observed_time_unix_nano ?severity ?log_level ?flags ?trace_id + ?span_id (`String body) + +(** Make a log entry with format *) +let make_strf ?time ?observed_time_unix_nano ?severity ?log_level ?flags + ?trace_id ?span_id fmt = + Format.kasprintf + (fun bod -> + make_str ?time ?observed_time_unix_nano ?severity ?log_level ?flags + ?trace_id ?span_id bod) + fmt diff --git a/src/core/logger.ml b/src/core/logger.ml new file mode 100644 index 00000000..33b890e2 --- /dev/null +++ b/src/core/logger.ml @@ -0,0 +1,37 @@ +(** Logs. + + See + {{:https://opentelemetry.io/docs/reference/specification/overview/#log-signal} + the spec} *) + +open Common_ + +(** A logger object *) +class type t = object + method is_enabled : Log_record.severity -> bool + + method emit : Log_record.t list -> unit +end + +(** Dummy logger, always disabled *) +let dummy : t = + object + method is_enabled _ = false + + method emit _ = () + end + +class simple (exp : #Exporter.t) : t = + object + method is_enabled _ = true + + method emit logs = if logs <> [] then exp#send_logs logs + end + +let emit ?service_name:_ ?attrs:_ (l : Log_record.t list) : unit = + match Exporter.Main_exporter.get () with + | None -> () + | Some e -> e#send_logs l +[@@deprecated "use an explicit Logger"] + +let k_logger : t Context.key = Context.new_key () diff --git a/src/core/metrics.ml b/src/core/metrics.ml new file mode 100644 index 00000000..f91538f8 --- /dev/null +++ b/src/core/metrics.ml @@ -0,0 +1,80 @@ +(** Metrics. + + See + {{:https://opentelemetry.io/docs/reference/specification/overview/#metric-signal} + the spec} *) + +open Common_ +open Proto +open Proto.Metrics + +type t = Metrics.metric +(** A single metric, measuring some time-varying quantity or statistical + distribution. It is composed of one or more data points that have precise + values and time stamps. Each distinct metric should have a distinct name. *) + +open struct + let _program_start = Timestamp_ns.now_unix_ns () +end + +(** Number data point, as a float *) +let float ?(start_time_unix_nano = _program_start) + ?(now = Timestamp_ns.now_unix_ns ()) ?(attrs = []) (d : float) : + number_data_point = + let attributes = attrs |> List.map Key_value.conv in + make_number_data_point ~start_time_unix_nano ~time_unix_nano:now ~attributes + ~value:(As_double d) () + +(** Number data point, as an int *) +let int ?(start_time_unix_nano = _program_start) + ?(now = Timestamp_ns.now_unix_ns ()) ?(attrs = []) (i : int) : + number_data_point = + let attributes = attrs |> List.map Key_value.conv in + make_number_data_point ~start_time_unix_nano ~time_unix_nano:now ~attributes + ~value:(As_int (Int64.of_int i)) + () + +(** Aggregation of a scalar metric, always with the current value *) +let gauge ~name ?description ?unit_ (l : number_data_point list) : t = + let data = Gauge (make_gauge ~data_points:l ()) in + make_metric ~name ?description ?unit_ ~data () + +type aggregation_temporality = Metrics.aggregation_temporality = + | Aggregation_temporality_unspecified + | Aggregation_temporality_delta + | Aggregation_temporality_cumulative + +(** Sum of all reported measurements over a time interval *) +let sum ~name ?description ?unit_ + ?(aggregation_temporality = Aggregation_temporality_cumulative) + ?is_monotonic (l : number_data_point list) : t = + let data = + Sum (make_sum ~data_points:l ?is_monotonic ~aggregation_temporality ()) + in + make_metric ~name ?description ?unit_ ~data () + +(** Histogram data + @param count number of values in population (non negative) + @param sum sum of values in population (0 if count is 0) + @param bucket_counts + count value of histogram for each bucket. Sum of the counts must be equal + to [count]. length must be [1+length explicit_bounds] + @param explicit_bounds strictly increasing list of bounds for the buckets *) +let histogram_data_point ?(start_time_unix_nano = _program_start) + ?(now = Timestamp_ns.now_unix_ns ()) ?(attrs = []) ?(exemplars = []) + ?(explicit_bounds = []) ?sum ~bucket_counts ~count () : histogram_data_point + = + let attributes = attrs |> List.map Key_value.conv in + make_histogram_data_point ~start_time_unix_nano ~time_unix_nano:now + ~attributes ~exemplars ~bucket_counts ~explicit_bounds ~count ?sum () + +let histogram ~name ?description ?unit_ ?aggregation_temporality + (l : histogram_data_point list) : t = + let data = + Histogram (make_histogram ~data_points:l ?aggregation_temporality ()) + in + make_metric ~name ?description ?unit_ ~data () + +(* TODO: exponential history *) +(* TODO: summary *) +(* TODO: exemplar *) diff --git a/src/core/metrics_callbacks.ml b/src/core/metrics_callbacks.ml new file mode 100644 index 00000000..c404c0bc --- /dev/null +++ b/src/core/metrics_callbacks.ml @@ -0,0 +1,37 @@ +open Common_ + +type t = { cbs: (unit -> Metrics.t list) AList.t } [@@unboxed] + +let create () : t = { cbs = AList.make () } + +let[@inline] add_metrics_cb (self : t) f = AList.add self.cbs f + +let add_to_exporter (exp : #Exporter.t) (self : t) = + let on_tick () = + (* collect all metrics *) + let res = ref [] in + List.iter + (fun f -> + let f_metrics = f () in + res := List.rev_append f_metrics !res) + (AList.get self.cbs); + let metrics = !res in + + (* emit the metrics *) + Exporter.send_metrics exp metrics + in + Exporter.on_tick exp on_tick + +module Main_set = struct + let cur_set_ : t option Atomic.t = Atomic.make None + + let rec get () = + match Atomic.get cur_set_ with + | Some s -> s + | None -> + let s = create () in + if Atomic.compare_and_set cur_set_ None (Some s) then + s + else + get () +end diff --git a/src/core/metrics_callbacks.mli b/src/core/metrics_callbacks.mli new file mode 100644 index 00000000..040d668f --- /dev/null +++ b/src/core/metrics_callbacks.mli @@ -0,0 +1,25 @@ +(** A set of callbacks that produce metrics when called. The metrics are + automatically called regularly. + + This allows applications to register metrics callbacks from various points + in the program (or even in libraries), and not worry about setting + alarms/intervals to emit them. *) + +type t + +val create : unit -> t + +val add_metrics_cb : t -> (unit -> Metrics.t list) -> unit +(** [register set f] adds the callback [f] to the [set]. + + [f] will be called at unspecified times and is expected to return a list of + metrics. It might be called regularly by the backend, in particular (but not + only) when {!Exporter.tick} is called. *) + +val add_to_exporter : #Exporter.t -> t -> unit +(** Make sure we export metrics at every [tick] of the exporter *) + +module Main_set : sig + val get : unit -> t + (** The global set *) +end diff --git a/src/core/metrics_emitter.ml b/src/core/metrics_emitter.ml new file mode 100644 index 00000000..4a075f4f --- /dev/null +++ b/src/core/metrics_emitter.ml @@ -0,0 +1,32 @@ +open Common_ + +class type t = object + method is_enabled : unit -> bool + + method emit : Metrics.t list -> unit +end + +class dummy : t = + object + method is_enabled () = false + + method emit _ = () + end + +class simple (exp : #Exporter.t) : t = + object + method is_enabled () = true + + method emit l = if l <> [] then exp#send_metrics l + end + +(** Emit some metrics to the collector (sync). This blocks until the backend has + pushed the metrics into some internal queue, or discarded them. + + {b NOTE} be careful not to call this inside a Gc alarm, as it can cause + deadlocks. *) +let emit ?attrs:_ (l : Metrics.t list) : unit = + match Exporter.Main_exporter.get () with + | None -> () + | Some exp -> exp#send_metrics l +[@@deprecated "use an explicit Metrics_emitter.t"] diff --git a/src/core/opentelemetry.ml b/src/core/opentelemetry.ml index 4b189819..b5f518c0 100644 --- a/src/core/opentelemetry.ml +++ b/src/core/opentelemetry.ml @@ -1,14 +1,6 @@ (** Opentelemetry types and instrumentation *) -open struct - let spf = Printf.sprintf - - module Atomic = Opentelemetry_atomic.Atomic - module Ambient_context = Opentelemetry_ambient_context -end - -module Lock = Lock -(** Global lock. *) +open Common_ module Rand_bytes = Rand_bytes (** Generation of random identifiers. *) @@ -30,1653 +22,75 @@ module Proto = Opentelemetry_proto (** {2 Timestamps} *) -(** Unix timestamp. +module Timestamp_ns = Timestamp_ns - These timestamps measure time since the Unix epoch (jan 1, 1970) UTC in - nanoseconds. *) -module Timestamp_ns = struct - type t = int64 +(** {2 Export signals to some external collector.} *) - let ns_in_a_day = Int64.(mul 1_000_000_000L (of_int (24 * 3600))) +module Exporter = Exporter +module Collector = Exporter [@@deprecated "Use 'Exporter' instead"] - (** Current unix timestamp in nanoseconds *) - let[@inline] now_unix_ns () : t = - let span = Ptime_clock.now () |> Ptime.to_span in - let d, ps = Ptime.Span.to_d_ps span in - let d = Int64.(mul (of_int d) ns_in_a_day) in - let ns = Int64.(div ps 1_000L) in - Int64.(add d ns) -end - -(** {2 Interface to data collector} *) - -(** Collector types - - These types are used by backend implementations, to send events to - collectors such as Jaeger. - - Note: most users will not need to touch this module *) -module Collector = struct - open Opentelemetry_proto - - type 'msg sender = { send: 'a. 'msg -> ret:(unit -> 'a) -> 'a } - (** Sender interface for a message of type [msg]. Inspired from Logs' reporter - (see - {{:https://erratique.ch/software/logs/doc/Logs/index.html#sync} its doc}) - but without [over] as it doesn't make much sense in presence of batching. - - The [ret] callback is used to return the desired type (unit, or a Lwt - promise, or anything else) once the event has been transferred to the - backend. It doesn't mean the event has been collected yet, it could sit in - a batch queue for a little while. *) - - (** Collector client interface. *) - module type BACKEND = sig - val send_trace : Trace.resource_spans list sender - - val send_metrics : Metrics.resource_metrics list sender - - val send_logs : Logs.resource_logs list sender - - val signal_emit_gc_metrics : unit -> unit - (** Signal the backend that it should emit GC metrics when it has the - chance. This should be installed in a GC alarm or another form of - regular trigger. *) - - val tick : unit -> unit - (** Should be called regularly for background processing, timeout checks, - etc. *) - - val set_on_tick_callbacks : (unit -> unit) AList.t -> unit - (** Give the collector the list of callbacks to be executed when [tick()] is - called. Each such callback should be short and reentrant. Depending on - the collector's implementation, it might be called from a thread that is - not the one that called [on_tick]. *) - - val cleanup : on_done:(unit -> unit) -> unit -> unit - (** [cleanup ~on_done ()] is called when the collector is shut down, and is - responsible for sending remaining batches, flushing sockets, etc. - @param on_done - callback invoked after the cleanup is done. @since 0.12 *) - end - - type backend = (module BACKEND) - - module Noop_backend : BACKEND = struct - let noop_sender _ ~ret = ret () - - let send_trace : Trace.resource_spans list sender = { send = noop_sender } - - let send_metrics : Metrics.resource_metrics list sender = - { send = noop_sender } - - let send_logs : Logs.resource_logs list sender = { send = noop_sender } - - let signal_emit_gc_metrics () = () - - let tick () = () - - let set_on_tick_callbacks _cbs = () - - let cleanup ~on_done () = - on_done (); - () - end - - module Debug_backend (B : BACKEND) : BACKEND = struct - open Proto - - let send_trace : Trace.resource_spans list sender = - { - send = - (fun l ~ret -> - Format.eprintf "SPANS: %a@." - (Format.pp_print_list Trace.pp_resource_spans) - l; - B.send_trace.send l ~ret); - } - - let send_metrics : Metrics.resource_metrics list sender = - { - send = - (fun l ~ret -> - Format.eprintf "METRICS: %a@." - (Format.pp_print_list Metrics.pp_resource_metrics) - l; - B.send_metrics.send l ~ret); - } - - let send_logs : Logs.resource_logs list sender = - { - send = - (fun l ~ret -> - Format.eprintf "LOGS: %a@." - (Format.pp_print_list Logs.pp_resource_logs) - l; - B.send_logs.send l ~ret); - } - - let signal_emit_gc_metrics () = B.signal_emit_gc_metrics () - - let tick () = B.tick () - - let set_on_tick_callbacks cbs = B.set_on_tick_callbacks cbs - - let cleanup ~on_done () = B.cleanup ~on_done () - end - - let debug_backend : backend = (module Debug_backend (Noop_backend)) - - (* hidden *) - open struct - let on_tick_cbs_ = AList.make () - - let backend : backend option Atomic.t = Atomic.make None - end - - (** Set collector backend *) - let set_backend (b : backend) : unit = - let (module B) = b in - B.set_on_tick_callbacks on_tick_cbs_; - Atomic.set backend (Some b) - - (** Remove current backend, if any. - @since 0.11 - @param on_done see {!BACKEND.cleanup}, @since 0.12 *) - let remove_backend ~on_done () : unit = - match Atomic.exchange backend None with - | None -> () - | Some (module B) -> - B.tick (); - B.cleanup ~on_done () - - (** Is there a configured backend? *) - let[@inline] has_backend () : bool = Atomic.get backend != None - - (** Current backend, if any *) - let[@inline] get_backend () : backend option = Atomic.get backend - - let send_trace (l : Trace.resource_spans list) ~ret = - match Atomic.get backend with - | None -> ret () - | Some (module B) -> B.send_trace.send l ~ret - - let send_metrics (l : Metrics.resource_metrics list) ~ret = - match Atomic.get backend with - | None -> ret () - | Some (module B) -> B.send_metrics.send l ~ret - - let send_logs (l : Logs.resource_logs list) ~ret = - match Atomic.get backend with - | None -> ret () - | Some (module B) -> B.send_logs.send l ~ret - - let[@inline] rand_bytes_16 () = !Rand_bytes.rand_bytes_16 () - - let[@inline] rand_bytes_8 () = !Rand_bytes.rand_bytes_8 () - - let[@inline] on_tick f = AList.add on_tick_cbs_ f - - (** Do background work. Call this regularly if the collector doesn't already - have a ticker thread or internal timer. *) - let tick () = - match Atomic.get backend with - | None -> () - | Some (module B) -> B.tick () - - let with_setup_debug_backend ?(on_done = ignore) b ?(enable = true) () f = - let (module B : BACKEND) = b in - if enable then ( - set_backend b; - Fun.protect ~finally:(B.cleanup ~on_done) f - ) else - f () -end - -(**/**) - -module Util_ = struct - let int_to_hex (i : int) = - if i < 10 then - Char.chr (i + Char.code '0') - else - Char.chr (i - 10 + Char.code 'a') - - let bytes_to_hex_into b res off : unit = - for i = 0 to Bytes.length b - 1 do - let n = Char.code (Bytes.get b i) in - Bytes.set res ((2 * i) + off) (int_to_hex ((n land 0xf0) lsr 4)); - Bytes.set res ((2 * i) + 1 + off) (int_to_hex (n land 0x0f)) - done - - let bytes_to_hex (b : bytes) : string = - let res = Bytes.create (2 * Bytes.length b) in - bytes_to_hex_into b res 0; - Bytes.unsafe_to_string res - - let int_of_hex = function - | '0' .. '9' as c -> Char.code c - Char.code '0' - | 'a' .. 'f' as c -> 10 + Char.code c - Char.code 'a' - | c -> raise (Invalid_argument (spf "invalid hex char: %C" c)) - - let bytes_of_hex_substring (s : string) off len = - if len mod 2 <> 0 then - raise (Invalid_argument "hex sequence must be of even length"); - let res = Bytes.make (len / 2) '\x00' in - for i = 0 to (len / 2) - 1 do - let n1 = int_of_hex (String.get s (off + (2 * i))) in - let n2 = int_of_hex (String.get s (off + (2 * i) + 1)) in - let n = (n1 lsl 4) lor n2 in - Bytes.set res i (Char.chr n) - done; - res - - let bytes_of_hex (s : string) : bytes = - bytes_of_hex_substring s 0 (String.length s) - - let bytes_non_zero (self : bytes) : bool = - try - for i = 0 to Bytes.length self - 1 do - if Char.code (Bytes.unsafe_get self i) <> 0 then raise_notrace Exit - done; - false - with Exit -> true -end - -(**/**) +module Tick_callbacks = Tick_callbacks +(** Helper to implement part of the exporter *) (** {2 Identifiers} *) -(** Trace ID. +module Trace_id = Trace_id - This 16 bytes identifier is shared by all spans in one trace. *) -module Trace_id : sig - type t +let k_trace_id = Trace_id.k_trace_id - val create : unit -> t +module Span_id = Span_id +module Span_ctx = Span_ctx - val dummy : t - - val pp : Format.formatter -> t -> unit - - val is_valid : t -> bool - - val to_bytes : t -> bytes - - val of_bytes : bytes -> t - - val to_hex : t -> string - - val to_hex_into : t -> bytes -> int -> unit - - val of_hex : string -> t - - val of_hex_substring : string -> int -> t -end = struct - type t = bytes - - let[@inline] to_bytes self = self - - let dummy : t = Bytes.make 16 '\x00' - - let create () : t = - let b = Collector.rand_bytes_16 () in - assert (Bytes.length b = 16); - (* make sure the identifier is not all 0, which is a dummy identifier. *) - Bytes.set b 0 (Char.unsafe_chr (Char.code (Bytes.get b 0) lor 1)); - b - - let[@inline] of_bytes b = - if Bytes.length b = 16 then - b - else - raise (Invalid_argument "trace ID must be 16 bytes in length") - - let is_valid = Util_.bytes_non_zero - - let to_hex = Util_.bytes_to_hex - - let to_hex_into = Util_.bytes_to_hex_into - - let[@inline] of_hex s = of_bytes (Util_.bytes_of_hex s) - - let[@inline] of_hex_substring s off = - of_bytes (Util_.bytes_of_hex_substring s off 32) - - let pp fmt t = Format.fprintf fmt "%s" (to_hex t) -end - -(** Hmap key to carry around a {!Trace_id.t}, to remember what the current trace - is. - @since 0.8 *) -let k_trace_id : Trace_id.t Hmap.key = Hmap.Key.create () - -(** Unique ID of a span. *) -module Span_id : sig - type t - - val create : unit -> t - - val dummy : t - - val pp : Format.formatter -> t -> unit - - val is_valid : t -> bool - - val to_bytes : t -> bytes - - val of_bytes : bytes -> t - - val to_hex : t -> string - - val to_hex_into : t -> bytes -> int -> unit - - val of_hex : string -> t - - val of_hex_substring : string -> int -> t -end = struct - type t = bytes - - let[@inline] to_bytes self = self - - let dummy : t = Bytes.make 8 '\x00' - - let create () : t = - let b = Collector.rand_bytes_8 () in - assert (Bytes.length b = 8); - (* make sure the identifier is not all 0, which is a dummy identifier. *) - Bytes.set b 0 (Char.unsafe_chr (Char.code (Bytes.get b 0) lor 1)); - b - - let is_valid = Util_.bytes_non_zero - - let of_bytes b = - if Bytes.length b = 8 then - b - else - raise (Invalid_argument "span IDs must be 8 bytes in length") - - let to_hex = Util_.bytes_to_hex - - let to_hex_into = Util_.bytes_to_hex_into - - let[@inline] of_hex s = of_bytes (Util_.bytes_of_hex s) - - let[@inline] of_hex_substring s off = - of_bytes (Util_.bytes_of_hex_substring s off 16) - - let pp fmt t = Format.fprintf fmt "%s" (to_hex t) -end - -(** Span context. This bundles up a trace ID and parent ID. - - {{:https://opentelemetry.io/docs/specs/otel/trace/api/#spancontext} - https://opentelemetry.io/docs/specs/otel/trace/api/#spancontext} - @since 0.7 *) -module Span_ctx : sig - type t - - val make : - ?sampled:bool -> trace_id:Trace_id.t -> parent_id:Span_id.t -> unit -> t - - val dummy : t - (** Invalid span context, to be used as a placeholder *) - - val is_valid : t -> bool - - val trace_id : t -> Trace_id.t - - val parent_id : t -> Span_id.t - - val sampled : t -> bool - - val to_w3c_trace_context : t -> bytes - - val of_w3c_trace_context : bytes -> (t, string) result - - val of_w3c_trace_context_exn : bytes -> t - (** @raise Invalid_argument if parsing failed *) -end = struct - (* TODO: trace state *) - - type t = { - trace_id: Trace_id.t; - parent_id: Span_id.t; - sampled: bool; - } - - let dummy = - { trace_id = Trace_id.dummy; parent_id = Span_id.dummy; sampled = false } - - let make ?(sampled = false) ~trace_id ~parent_id () : t = - { trace_id; parent_id; sampled } - - let[@inline] is_valid self = - Trace_id.is_valid self.trace_id && Span_id.is_valid self.parent_id - - let[@inline] sampled self = self.sampled - - let[@inline] trace_id self = self.trace_id - - let[@inline] parent_id self = self.parent_id - - let to_w3c_trace_context (self : t) : bytes = - let bs = Bytes.create 55 in - Bytes.set bs 0 '0'; - Bytes.set bs 1 '0'; - Bytes.set bs 2 '-'; - Trace_id.to_hex_into self.trace_id bs 3; - (* +32 *) - Bytes.set bs (3 + 32) '-'; - Span_id.to_hex_into self.parent_id bs 36; - (* +16 *) - Bytes.set bs 52 '-'; - Bytes.set bs 53 '0'; - Bytes.set bs 54 - (if self.sampled then - '1' - else - '0'); - bs - - let of_w3c_trace_context bs : _ result = - try - if Bytes.length bs <> 55 then invalid_arg "trace context must be 55 bytes"; - (match int_of_string_opt (Bytes.sub_string bs 0 2) with - | Some 0 -> () - | Some n -> invalid_arg @@ spf "version is %d, expected 0" n - | None -> invalid_arg "expected 2-digit version"); - if Bytes.get bs 2 <> '-' then invalid_arg "expected '-' before trace_id"; - let trace_id = - try Trace_id.of_hex_substring (Bytes.unsafe_to_string bs) 3 - with Invalid_argument msg -> invalid_arg (spf "in trace id: %s" msg) - in - if Bytes.get bs (3 + 32) <> '-' then - invalid_arg "expected '-' before parent_id"; - let parent_id = - try Span_id.of_hex_substring (Bytes.unsafe_to_string bs) 36 - with Invalid_argument msg -> invalid_arg (spf "in span id: %s" msg) - in - if Bytes.get bs 52 <> '-' then invalid_arg "expected '-' after parent_id"; - let sampled = int_of_string_opt (Bytes.sub_string bs 53 2) = Some 1 in - - (* ignore flags *) - Ok { trace_id; parent_id; sampled } - with Invalid_argument msg -> Error msg - - let of_w3c_trace_context_exn bs = - match of_w3c_trace_context bs with - | Ok t -> t - | Error msg -> invalid_arg @@ spf "invalid w3c trace context: %s" msg -end - -(** Hmap key to carry around a {!Span_ctx.t}, e.g. to remember what the current - parent span is. - @since 0.8 *) -let k_span_ctx : Span_ctx.t Hmap.key = Hmap.Key.create () +let k_span_ctx = Span_ctx.k_span_ctx (** {2 Attributes and conventions} *) -(** Semantic conventions +module Conventions = Conventions - {{:https://opentelemetry.io/docs/specs/semconv/} - https://opentelemetry.io/docs/specs/semconv/} *) -module Conventions = struct - module Attributes = struct - module Process = struct - module Runtime = struct - let name = "process.runtime.name" - - let version = "process.runtime.version" - - let description = "process.runtime.description" - end - end - - (** https://opentelemetry.io/docs/specs/semconv/attributes-registry/code/ *) - module Code = struct - (** Int *) - let column = "code.column" - - let filepath = "code.filepath" - - let function_ = "code.function" - - (** int *) - let line = "code.lineno" - - let namespace = "code.namespace" - - let stacktrace = "code.stacktrace" - end - - module Service = struct - let name = "service.name" - - let namespace = "service.namespace" - - let instance_id = "service.instance.id" - - let version = "service.version" - end - - module HTTP = struct - let error_type = "error.type" - - let request_method = "http.request.method" - - let route = "http.route" - - let url_full = "url.full" - - (** HTTP status code, int *) - let response_status_code = "http.response.status_code" - - let server_address = "server.address" - - let server_port = "server.port" - - (** http or https *) - let url_scheme = "url.scheme" - end - - (** https://github.com/open-telemetry/semantic-conventions/blob/main/docs/resource/host.md - *) - module Host = struct - let id = "host.id" - - let name = "host.name" - - let type_ = "host.type" - - let arch = "host.arch" - - let ip = "host.ip" - - let mac = "host.mac" - - let image_id = "host.image.id" - - let image_name = "host.image.name" - - let image_version = "host.image.version" - end - end - - module Metrics = struct - module Process = struct - module Runtime = struct - module Ocaml = struct - module GC = struct - let compactions = "process.runtime.ocaml.gc.compactions" - - let major_collections = "process.runtime.ocaml.gc.major_collections" - - let major_heap = "process.runtime.ocaml.gc.major_heap" - - let minor_allocated = "process.runtime.ocaml.gc.minor_allocated" - - let minor_collections = "process.runtime.ocaml.gc.minor_collections" - end - end - end - end - - (** https://opentelemetry.io/docs/specs/semconv/http/ *) - module HTTP = struct - module Server = struct - let request_duration = "http.server.request.duration" - - let active_requests = "http.server.active_requests" - - (** Histogram *) - let request_body_size = "http.server.request.body.size" - - (** Histogram *) - let response_body_size = "http.server.response.body.size" - end - - module Client = struct - let request_duration = "http.client.request.duration" - - (** Histogram *) - let request_body_size = "http.client.request.body.size" - - (** Histogram *) - let response_body_size = "http.client.response.body.size" - end - end - end -end - -type value = - [ `Int of int - | `String of string - | `Bool of bool - | `Float of float - | `None - ] +type value = Value.t (** A value in a key/value attribute *) -type key_value = string * value - -open struct - let _conv_value = - let open Proto.Common in - function - | `Int i -> Some (Int_value (Int64.of_int i)) - | `String s -> Some (String_value s) - | `Bool b -> Some (Bool_value b) - | `Float f -> Some (Double_value f) - | `None -> None - - let _conv_key_value (k, v) = - let open Proto.Common in - let value = _conv_value v in - make_key_value ~key:k ?value () -end +type key_value = Key_value.t (** {2 Global settings} *) -(** Process-wide metadata, environment variables, etc. *) -module Globals = struct - open Proto.Common - - (** Main service name metadata *) - let service_name = ref "unknown_service" - - (** Namespace for the service *) - let service_namespace = ref None - - (** Unique identifier for the service *) - let service_instance_id = ref None - - (** Version for the service - @since 0.12 *) - let service_version = ref None - - let instrumentation_library : instrumentation_scope = - make_instrumentation_scope ~version:"%%VERSION_NUM%%" ~name:"ocaml-otel" () - - (** Global attributes, initially set via OTEL_RESOURCE_ATTRIBUTES and - modifiable by the user code. They will be attached to each outgoing - metrics/traces. *) - let global_attributes : key_value list ref = - let parse_pair s = - match String.split_on_char '=' s with - | [ a; b ] -> make_key_value ~key:a ~value:(String_value b) () - | _ -> failwith (Printf.sprintf "invalid attribute: %S" s) - in - ref - @@ - try - Sys.getenv "OTEL_RESOURCE_ATTRIBUTES" - |> String.split_on_char ',' |> List.map parse_pair - with _ -> [] - - (** Add a global attribute *) - let add_global_attribute (key : string) (v : value) : unit = - global_attributes := _conv_key_value (key, v) :: !global_attributes - - (* add global attributes to this list *) - let merge_global_attributes_ into : _ list = - let not_redundant kv = List.for_all (fun kv' -> kv.key <> kv'.key) into in - List.rev_append (List.filter not_redundant !global_attributes) into - - (** Default span kind in {!Span.create}. This will be used in all spans that - do not specify [~kind] explicitly; it is set to "internal", following - directions from the [.proto] file. It can be convenient to set "client" or - "server" uniformly in here. - @since 0.4 *) - let default_span_kind = ref Proto.Trace.Span_kind_internal - - let mk_attributes ?(service_name = !service_name) ?(attrs = []) () : _ list = - let l = List.map _conv_key_value attrs in - let l = - make_key_value ~key:Conventions.Attributes.Service.name - ~value:(String_value service_name) () - :: l - in - let l = - match !service_instance_id with - | None -> l - | Some v -> - make_key_value ~key:Conventions.Attributes.Service.instance_id - ~value:(String_value v) () - :: l - in - let l = - match !service_namespace with - | None -> l - | Some v -> - make_key_value ~key:Conventions.Attributes.Service.namespace - ~value:(String_value v) () - :: l - in - let l = - match !service_version with - | None -> l - | Some v -> - make_key_value ~key:Conventions.Attributes.Service.version - ~value:(String_value v) () - :: l - in - l |> merge_global_attributes_ -end +module Globals = Globals (** {2 Traces and Spans} *) -(** Events. - - Events occur at a given time and can carry attributes. They always belong in - a span. *) -module Event : sig - open Proto.Trace - - type t = span_event - - val make : - ?time_unix_nano:Timestamp_ns.t -> ?attrs:key_value list -> string -> t -end = struct - open Proto.Trace - - type t = span_event - - let make ?(time_unix_nano = Timestamp_ns.now_unix_ns ()) ?(attrs = []) - (name : string) : t = - let attrs = List.map _conv_key_value attrs in - make_span_event ~time_unix_nano ~name ~attributes:attrs () -end - -(** Span Link - - A pointer from the current span to another span in the same trace or in a - different trace. For example, this can be used in batching operations, where - a single batch handler processes multiple requests from different traces or - when the handler receives a request from a different project. *) -module Span_link : sig - open Proto.Trace - - type t = span_link - - val make : - trace_id:Trace_id.t -> - span_id:Span_id.t -> - ?trace_state:string -> - ?attrs:key_value list -> - ?dropped_attributes_count:int -> - unit -> - t - - val of_span_ctx : - ?trace_state:string -> - ?attrs:key_value list -> - ?dropped_attributes_count:int -> - Span_ctx.t -> - t -end = struct - open Proto.Trace - - type t = span_link - - let make ~trace_id ~span_id ?trace_state ?(attrs = []) - ?dropped_attributes_count () : t = - let attributes = List.map _conv_key_value attrs in - let dropped_attributes_count = - Option.map Int32.of_int dropped_attributes_count - in - make_span_link - ~trace_id:(Trace_id.to_bytes trace_id) - ~span_id:(Span_id.to_bytes span_id) ?trace_state ~attributes - ?dropped_attributes_count () - - let[@inline] of_span_ctx ?trace_state ?attrs ?dropped_attributes_count - (ctx : Span_ctx.t) : t = - make ~trace_id:(Span_ctx.trace_id ctx) ~span_id:(Span_ctx.parent_id ctx) - ?trace_state ?attrs ?dropped_attributes_count () -end - -module Span_status : sig - open Proto.Trace - - type t = status = private { - mutable _presence: Pbrt.Bitfield.t; - mutable message: string; - mutable code: status_status_code; - } - - type code = status_status_code = - | Status_code_unset - | Status_code_ok - | Status_code_error - - val make : message:string -> code:code -> t -end = struct - open Proto.Trace - - type t = status = private { - mutable _presence: Pbrt.Bitfield.t; - mutable message: string; - mutable code: status_status_code; - } - - type code = status_status_code = - | Status_code_unset - | Status_code_ok - | Status_code_error - - let[@inline] make ~message ~code : t = make_status ~message ~code () -end - -(** @since 0.11 *) -module Span_kind : sig - open Proto.Trace - - type t = span_span_kind = - | Span_kind_unspecified - | Span_kind_internal - | Span_kind_server - | Span_kind_client - | Span_kind_producer - | Span_kind_consumer -end = struct - open Proto.Trace - - type t = span_span_kind = - | Span_kind_unspecified - | Span_kind_internal - | Span_kind_server - | Span_kind_client - | Span_kind_producer - | Span_kind_consumer -end +module Event = Event +module Span_link = Span_link +module Span_status = Span_status +module Span_kind = Span_kind (** {2 Scopes} *) -(** Scopes. - - A scope is a trace ID and the span ID of the currently active span. *) -module Scope : sig - type item_list - - type t = { - trace_id: Trace_id.t; - span_id: Span_id.t; - mutable items: item_list; - } - - val attrs : t -> key_value list - - val events : t -> Event.t list - - val links : t -> Span_link.t list - - val status : t -> Span_status.t option - - val kind : t -> Span_kind.t option - - val make : - trace_id:Trace_id.t -> - span_id:Span_id.t -> - ?events:Event.t list -> - ?attrs:key_value list -> - ?links:Span_link.t list -> - ?status:Span_status.t -> - unit -> - t - - val to_span_link : - ?trace_state:string -> - ?attrs:key_value list -> - ?dropped_attributes_count:int -> - t -> - Span_link.t - (** Turn the scope into a span link *) - - val to_span_ctx : t -> Span_ctx.t - (** Turn the scope into a span context *) - - val add_event : t -> (unit -> Event.t) -> unit - (** Add an event to the scope. It will be aggregated into the span. - - Note that this takes a function that produces an event, and will only call - it if there is an instrumentation backend. *) - - val record_exception : t -> exn -> Printexc.raw_backtrace -> unit - - val add_attrs : t -> (unit -> key_value list) -> unit - (** Add attributes to the scope. It will be aggregated into the span. - - Note that this takes a function that produces attributes, and will only - call it if there is an instrumentation backend. *) - - val add_links : t -> (unit -> Span_link.t list) -> unit - (** Add links to the scope. It will be aggregated into the span. - - Note that this takes a function that produces links, and will only call it - if there is an instrumentation backend. *) - - val set_status : t -> Span_status.t -> unit - (** set the span status. - - Note that this function will be called only if there is an instrumentation - backend. *) - - val set_kind : t -> Span_kind.t -> unit - (** Set the span's kind. - @since 0.11 *) - - val ambient_scope_key : t Ambient_context.key - (** The opaque key necessary to access/set the ambient scope with - {!Ambient_context}. *) - - val get_ambient_scope : ?scope:t -> unit -> t option - (** Obtain current scope from {!Ambient_context}, if available. *) - - val with_ambient_scope : t -> (unit -> 'a) -> 'a - (** [with_ambient_scope sc thunk] calls [thunk()] in a context where [sc] is - the (thread|continuation)-local scope, then reverts to the previous local - scope, if any. - - @see - ambient-context docs *) -end = struct - type item_list = - | Nil - | Ev of Event.t * item_list - | Attr of key_value * item_list - | Span_link of Span_link.t * item_list - | Span_status of Span_status.t * item_list - | Span_kind of Span_kind.t * item_list - - type t = { - trace_id: Trace_id.t; - span_id: Span_id.t; - mutable items: item_list; - } - - let attrs scope = - let rec loop acc = function - | Nil -> acc - | Attr (attr, l) -> loop (attr :: acc) l - | Ev (_, l) | Span_kind (_, l) | Span_link (_, l) | Span_status (_, l) -> - loop acc l - in - loop [] scope.items - - let events scope = - let rec loop acc = function - | Nil -> acc - | Ev (event, l) -> loop (event :: acc) l - | Attr (_, l) | Span_kind (_, l) | Span_link (_, l) | Span_status (_, l) - -> - loop acc l - in - loop [] scope.items - - let links scope = - let rec loop acc = function - | Nil -> acc - | Span_link (span_link, l) -> loop (span_link :: acc) l - | Ev (_, l) | Span_kind (_, l) | Attr (_, l) | Span_status (_, l) -> - loop acc l - in - loop [] scope.items - - let status scope = - let rec loop = function - | Nil -> None - | Span_status (status, _) -> Some status - | Ev (_, l) | Attr (_, l) | Span_kind (_, l) | Span_link (_, l) -> loop l - in - loop scope.items - - let kind scope = - let rec loop = function - | Nil -> None - | Span_kind (k, _) -> Some k - | Ev (_, l) | Span_status (_, l) | Attr (_, l) | Span_link (_, l) -> - loop l - in - loop scope.items - - let make ~trace_id ~span_id ?(events = []) ?(attrs = []) ?(links = []) ?status - () : t = - let items = - let items = - match status with - | None -> Nil - | Some status -> Span_status (status, Nil) - in - let items = List.fold_left (fun acc ev -> Ev (ev, acc)) items events in - let items = - List.fold_left (fun acc attr -> Attr (attr, acc)) items attrs - in - List.fold_left (fun acc link -> Span_link (link, acc)) items links - in - { trace_id; span_id; items } - - let[@inline] to_span_link ?trace_state ?attrs ?dropped_attributes_count - (self : t) : Span_link.t = - Span_link.make ?trace_state ?attrs ?dropped_attributes_count - ~trace_id:self.trace_id ~span_id:self.span_id () - - let[@inline] to_span_ctx (self : t) : Span_ctx.t = - Span_ctx.make ~trace_id:self.trace_id ~parent_id:self.span_id () - - let[@inline] add_event (scope : t) (ev : unit -> Event.t) : unit = - if Collector.has_backend () then scope.items <- Ev (ev (), scope.items) - - let[@inline] record_exception (scope : t) (exn : exn) - (bt : Printexc.raw_backtrace) : unit = - if Collector.has_backend () then ( - let ev = - Event.make "exception" - ~attrs: - [ - "exception.message", `String (Printexc.to_string exn); - "exception.type", `String (Printexc.exn_slot_name exn); - ( "exception.stacktrace", - `String (Printexc.raw_backtrace_to_string bt) ); - ] - in - scope.items <- Ev (ev, scope.items) - ) - - let[@inline] add_attrs (scope : t) (attrs : unit -> key_value list) : unit = - if Collector.has_backend () then - scope.items <- - List.fold_left (fun acc attr -> Attr (attr, acc)) scope.items (attrs ()) - - let[@inline] add_links (scope : t) (links : unit -> Span_link.t list) : unit = - if Collector.has_backend () then - scope.items <- - List.fold_left - (fun acc link -> Span_link (link, acc)) - scope.items (links ()) - - let set_status (scope : t) (status : Span_status.t) : unit = - if Collector.has_backend () then - scope.items <- Span_status (status, scope.items) - - let set_kind (scope : t) (k : Span_kind.t) : unit = - if Collector.has_backend () then scope.items <- Span_kind (k, scope.items) - - let ambient_scope_key : t Ambient_context.key = Ambient_context.create_key () - - let get_ambient_scope ?scope () : t option = - match scope with - | Some _ -> scope - | None -> Ambient_context.get ambient_scope_key - - let[@inline] with_ambient_scope (sc : t) (f : unit -> 'a) : 'a = - Ambient_context.with_binding ambient_scope_key sc (fun _ -> f ()) -end +module Scope = Scope (** {2 Traces} *) -(** Spans. - - A Span is the workhorse of traces, it indicates an operation that took place - over a given span of time (indicated by start_time and end_time) as part of - a hierarchical trace. All spans in a given trace are bound by the use of the - same {!Trace_id.t}. *) -module Span : sig - open Proto.Trace - - type t = span - - type id = Span_id.t - - type kind = Span_kind.t = - | Span_kind_unspecified - | Span_kind_internal - | Span_kind_server - | Span_kind_client - | Span_kind_producer - | Span_kind_consumer - - val id : t -> Span_id.t - - type key_value = - string - * [ `Int of int - | `String of string - | `Bool of bool - | `Float of float - | `None - ] - - val create : - ?kind:kind -> - ?id:id -> - ?trace_state:string -> - ?attrs:key_value list -> - ?events:Event.t list -> - ?status:status -> - trace_id:Trace_id.t -> - ?parent:id -> - ?links:Span_link.t list -> - start_time:Timestamp_ns.t -> - end_time:Timestamp_ns.t -> - string -> - t * id - (** [create ~trace_id name] creates a new span with its unique ID. - @param trace_id the trace this belongs to - @param parent parent span, if any - @param links - list of links to other spans, each with their trace state (see - {{:https://www.w3.org/TR/trace-context/#tracestate-header} w3.org}) *) -end = struct - open Proto.Trace - - type t = span - - type id = Span_id.t - - type kind = Span_kind.t = - | Span_kind_unspecified - | Span_kind_internal - | Span_kind_server - | Span_kind_client - | Span_kind_producer - | Span_kind_consumer - - type key_value = - string - * [ `Int of int - | `String of string - | `Bool of bool - | `Float of float - | `None - ] - - let id self = Span_id.of_bytes self.span_id - - let create ?(kind = !Globals.default_span_kind) ?(id = Span_id.create ()) - ?trace_state ?(attrs = []) ?(events = []) ?status ~trace_id ?parent - ?(links = []) ~start_time ~end_time name : t * id = - let trace_id = Trace_id.to_bytes trace_id in - let parent_span_id = Option.map Span_id.to_bytes parent in - let attributes = List.map _conv_key_value attrs in - let span = - make_span ~trace_id ?parent_span_id ~span_id:(Span_id.to_bytes id) - ~attributes ~events ?trace_state ?status ~kind ~name ~links - ~start_time_unix_nano:start_time ~end_time_unix_nano:end_time () - in - span, id -end - -(** Traces. - - See - {{:https://opentelemetry.io/docs/reference/specification/overview/#tracing-signal} - the spec} *) -module Trace = struct - open Proto.Trace - - type span = Span.t - - let make_resource_spans ?service_name ?attrs spans : resource_spans = - let ils = - make_scope_spans ~scope:Globals.instrumentation_library ~spans () - in - let attributes = Globals.mk_attributes ?service_name ?attrs () in - let resource = Proto.Resource.make_resource ~attributes () in - make_resource_spans ~resource ~scope_spans:[ ils ] () - - (** Sync emitter. - - This instructs the collector to forward the spans to some backend at a - later point. - - {b NOTE} be careful not to call this inside a Gc alarm, as it can cause - deadlocks. *) - let emit ?service_name ?attrs (spans : span list) : unit = - let rs = make_resource_spans ?service_name ?attrs spans in - Collector.send_trace [ rs ] ~ret:(fun () -> ()) - - type scope = Scope.t = { - trace_id: Trace_id.t; - span_id: Span_id.t; - mutable items: Scope.item_list; - } - [@@deprecated "use Scope.t"] - - let (add_event [@deprecated "use Scope.add_event"]) = Scope.add_event - - let (add_attrs [@deprecated "use Scope.add_attrs"]) = Scope.add_attrs - - let with_' ?(force_new_trace_id = false) ?trace_state ?service_name - ?(attrs : (string * [< value ]) list = []) ?kind ?trace_id ?parent ?scope - ?(links = []) name cb = - let scope = - if force_new_trace_id then - None - else - Scope.get_ambient_scope ?scope () - in - let trace_id = - match trace_id, scope with - | _ when force_new_trace_id -> Trace_id.create () - | Some trace_id, _ -> trace_id - | None, Some scope -> scope.trace_id - | None, None -> Trace_id.create () - in - let parent = - match parent, scope with - | _ when force_new_trace_id -> None - | Some span_id, _ -> Some span_id - | None, Some scope -> Some scope.span_id - | None, None -> None - in - let start_time = Timestamp_ns.now_unix_ns () in - let span_id = Span_id.create () in - let scope = Scope.make ~trace_id ~span_id ~attrs ~links () in - (* called once we're done, to emit a span *) - let finally res = - let status = - match Scope.status scope with - | Some status -> Some status - | None -> - (match res with - | Ok () -> - (* By default, all spans are Unset, which means a span completed without error. - The Ok status is reserved for when you need to explicitly mark a span as successful - rather than stick with the default of Unset (i.e., “without error”). - - https://opentelemetry.io/docs/languages/go/instrumentation/#set-span-status *) - None - | Error (e, bt) -> - Scope.record_exception scope e bt; - Some - (make_status ~code:Status_code_error - ~message:(Printexc.to_string e) ())) - in - let span, _ = - (* TODO: should the attrs passed to with_ go on the Span - (in Span.create) or on the ResourceSpan (in emit)? - (question also applies to Opentelemetry_lwt.Trace.with) *) - Span.create ?kind ~trace_id ?parent ~links:(Scope.links scope) - ~id:span_id ?trace_state ~attrs:(Scope.attrs scope) - ~events:(Scope.events scope) ~start_time - ~end_time:(Timestamp_ns.now_unix_ns ()) - ?status name - in - emit ?service_name [ span ] - in - let thunk () = - (* set global scope in this thread *) - Scope.with_ambient_scope scope @@ fun () -> cb scope - in - thunk, finally - - (** Sync span guard. - - Notably, this includes {e implicit} scope-tracking: if called without a - [~scope] argument (or [~parent]/[~trace_id]), it will check in the - {!Ambient_context} for a surrounding environment, and use that as the - scope. Similarly, it uses {!Scope.with_ambient_scope} to {e set} a new - scope in the ambient context, so that any logically-nested calls to - {!with_} will use this span as their parent. - - {b NOTE} be careful not to call this inside a Gc alarm, as it can cause - deadlocks. - - @param force_new_trace_id - if true (default false), the span will not use a ambient scope, the - [~scope] argument, nor [~trace_id], but will instead always create fresh - identifiers for this span *) - - let with_ ?force_new_trace_id ?trace_state ?service_name ?attrs ?kind - ?trace_id ?parent ?scope ?links name (cb : Scope.t -> 'a) : 'a = - let thunk, finally = - with_' ?force_new_trace_id ?trace_state ?service_name ?attrs ?kind - ?trace_id ?parent ?scope ?links name cb - in - - try - let rv = thunk () in - finally (Ok ()); - rv - with e -> - let bt = Printexc.get_raw_backtrace () in - finally (Error (e, bt)); - raise e -end +module Span = Span +module Tracer = Tracer +module Trace = Tracer [@@deprecated "use Tracer instead"] (** {2 Metrics} *) -(** Metrics. - - See - {{:https://opentelemetry.io/docs/reference/specification/overview/#metric-signal} - the spec} *) -module Metrics = struct - open Proto - open Proto.Metrics - - type t = Metrics.metric - (** A single metric, measuring some time-varying quantity or statistical - distribution. It is composed of one or more data points that have precise - values and time stamps. Each distinct metric should have a distinct name. - *) - - open struct - let _program_start = Timestamp_ns.now_unix_ns () - end - - (** Number data point, as a float *) - let float ?(start_time_unix_nano = _program_start) - ?(now = Timestamp_ns.now_unix_ns ()) ?(attrs = []) (d : float) : - number_data_point = - let attributes = attrs |> List.map _conv_key_value in - make_number_data_point ~start_time_unix_nano ~time_unix_nano:now ~attributes - ~value:(As_double d) () - - (** Number data point, as an int *) - let int ?(start_time_unix_nano = _program_start) - ?(now = Timestamp_ns.now_unix_ns ()) ?(attrs = []) (i : int) : - number_data_point = - let attributes = attrs |> List.map _conv_key_value in - make_number_data_point ~start_time_unix_nano ~time_unix_nano:now ~attributes - ~value:(As_int (Int64.of_int i)) - () - - (** Aggregation of a scalar metric, always with the current value *) - let gauge ~name ?description ?unit_ (l : number_data_point list) : t = - let data = Gauge (make_gauge ~data_points:l ()) in - make_metric ~name ?description ?unit_ ~data () - - type aggregation_temporality = Metrics.aggregation_temporality = - | Aggregation_temporality_unspecified - | Aggregation_temporality_delta - | Aggregation_temporality_cumulative - - (** Sum of all reported measurements over a time interval *) - let sum ~name ?description ?unit_ - ?(aggregation_temporality = Aggregation_temporality_cumulative) - ?is_monotonic (l : number_data_point list) : t = - let data = - Sum (make_sum ~data_points:l ?is_monotonic ~aggregation_temporality ()) - in - make_metric ~name ?description ?unit_ ~data () - - (** Histogram data - @param count number of values in population (non negative) - @param sum sum of values in population (0 if count is 0) - @param bucket_counts - count value of histogram for each bucket. Sum of the counts must be - equal to [count]. length must be [1+length explicit_bounds] - @param explicit_bounds strictly increasing list of bounds for the buckets - *) - let histogram_data_point ?(start_time_unix_nano = _program_start) - ?(now = Timestamp_ns.now_unix_ns ()) ?(attrs = []) ?(exemplars = []) - ?(explicit_bounds = []) ?sum ~bucket_counts ~count () : - histogram_data_point = - let attributes = attrs |> List.map _conv_key_value in - make_histogram_data_point ~start_time_unix_nano ~time_unix_nano:now - ~attributes ~exemplars ~bucket_counts ~explicit_bounds ~count ?sum () - - let histogram ~name ?description ?unit_ ?aggregation_temporality - (l : histogram_data_point list) : t = - let data = - Histogram (make_histogram ~data_points:l ?aggregation_temporality ()) - in - make_metric ~name ?description ?unit_ ~data () - - (* TODO: exponential history *) - (* TODO: summary *) - (* TODO: exemplar *) - - (** Aggregate metrics into a {!Proto.Metrics.resource_metrics} *) - let make_resource_metrics ?service_name ?attrs (l : t list) : resource_metrics - = - let lm = - make_scope_metrics ~scope:Globals.instrumentation_library ~metrics:l () - in - let attributes = Globals.mk_attributes ?service_name ?attrs () in - let resource = Proto.Resource.make_resource ~attributes () in - make_resource_metrics ~scope_metrics:[ lm ] ~resource () - - (** Emit some metrics to the collector (sync). This blocks until the backend - has pushed the metrics into some internal queue, or discarded them. - - {b NOTE} be careful not to call this inside a Gc alarm, as it can cause - deadlocks. *) - let emit ?attrs (l : t list) : unit = - let rm = make_resource_metrics ?attrs l in - Collector.send_metrics [ rm ] ~ret:ignore -end - -(** A set of callbacks that produce metrics when called. The metrics are - automatically called regularly. - - This allows applications to register metrics callbacks from various points - in the program (or even in libraries), and not worry about setting - alarms/intervals to emit them. *) -module Metrics_callbacks = struct - open struct - (* [true] iff the initial list of metric callbacks has already been registered - with `on_tick`. This registration must only happen once, after which, - [registered_with_on_tick] will forever be [false]. *) - let registered_with_on_tick : bool Atomic.t = Atomic.make false - - let cbs_ : (unit -> Metrics.t list) AList.t = AList.make () - end - - (** [register f] adds the callback [f] to the list. - - [f] will be called at unspecified times and is expected to return a list - of metrics. It might be called regularly by the backend, in particular - (but not only) when {!Collector.tick} is called. *) - let register f : unit = - (* sets [registered_with_on_tick] to [true] atomically, iff it is currently - [false]. *) - if not (Atomic.exchange registered_with_on_tick true) then - (* make sure we call [f] (and others) at each tick *) - Collector.on_tick (fun () -> - let m = List.map (fun f -> f ()) (AList.get cbs_) |> List.flatten in - Metrics.emit m); - AList.add cbs_ f -end +module Metrics = Metrics +module Metrics_callbacks = Metrics_callbacks +module Metrics_emitter = Metrics_emitter (** {2 Logs} *) -(** Logs. - - See - {{:https://opentelemetry.io/docs/reference/specification/overview/#log-signal} - the spec} *) -module Logs = struct - open Opentelemetry_proto - open Logs - - type t = log_record - - (** Severity level of a log event *) - type severity = Logs.severity_number = - | Severity_number_unspecified - | Severity_number_trace - | Severity_number_trace2 - | Severity_number_trace3 - | Severity_number_trace4 - | Severity_number_debug - | Severity_number_debug2 - | Severity_number_debug3 - | Severity_number_debug4 - | Severity_number_info - | Severity_number_info2 - | Severity_number_info3 - | Severity_number_info4 - | Severity_number_warn - | Severity_number_warn2 - | Severity_number_warn3 - | Severity_number_warn4 - | Severity_number_error - | Severity_number_error2 - | Severity_number_error3 - | Severity_number_error4 - | Severity_number_fatal - | Severity_number_fatal2 - | Severity_number_fatal3 - | Severity_number_fatal4 - - let pp_severity = Logs.pp_severity_number - - type flags = Logs.log_record_flags = - | Log_record_flags_do_not_use - | Log_record_flags_trace_flags_mask - - let pp_flags = Logs.pp_log_record_flags - - (** Make a single log entry *) - let make ?time ?(observed_time_unix_nano = Timestamp_ns.now_unix_ns ()) - ?severity ?log_level ?flags ?trace_id ?span_id (body : value) : t = - let time_unix_nano = - match time with - | None -> observed_time_unix_nano - | Some t -> t - in - let trace_id = Option.map Trace_id.to_bytes trace_id in - let span_id = Option.map Span_id.to_bytes span_id in - let body = _conv_value body in - make_log_record ~time_unix_nano ~observed_time_unix_nano - ?severity_number:severity ?severity_text:log_level ?flags ?trace_id - ?span_id ?body () - - (** Make a log entry whose body is a string *) - let make_str ?time ?observed_time_unix_nano ?severity ?log_level ?flags - ?trace_id ?span_id (body : string) : t = - make ?time ?observed_time_unix_nano ?severity ?log_level ?flags ?trace_id - ?span_id (`String body) - - (** Make a log entry with format *) - let make_strf ?time ?observed_time_unix_nano ?severity ?log_level ?flags - ?trace_id ?span_id fmt = - Format.kasprintf - (fun bod -> - make_str ?time ?observed_time_unix_nano ?severity ?log_level ?flags - ?trace_id ?span_id bod) - fmt - - (** Emit logs. - - This instructs the collector to send the logs to some backend at a later - date. {b NOTE} be careful not to call this inside a Gc alarm, as it can - cause deadlocks. *) - let emit ?service_name ?attrs (l : t list) : unit = - let attributes = Globals.mk_attributes ?service_name ?attrs () in - let resource = Proto.Resource.make_resource ~attributes () in - let ll = - make_scope_logs ~scope:Globals.instrumentation_library ~log_records:l () - in - let rl = make_resource_logs ~resource ~scope_logs:[ ll ] () in - Collector.send_logs [ rl ] ~ret:ignore -end +module Log_record = Log_record +module Logger = Logger +module Logs = Logger [@@deprecated "use Logger"] (** {2 Utils} *) -(** Implementation of the W3C Trace Context spec +module Trace_context = Trace_context +module Gc_metrics = Gc_metrics - https://www.w3.org/TR/trace-context/ *) -module Trace_context = struct - (** The traceparent header - https://www.w3.org/TR/trace-context/#traceparent-header *) - module Traceparent = struct - let name = "traceparent" +(* *) - (** Parse the value of the traceparent header. - - The values are of the form: - - {[ - { version } - { trace_id } - { parent_id } - { flags } - ]} - - For example: - - {[ - 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01 - ]} - - [{flags}] are currently ignored. *) - let of_value str : (Trace_id.t * Span_id.t, string) result = - match Span_ctx.of_w3c_trace_context (Bytes.unsafe_of_string str) with - | Ok sp -> Ok (Span_ctx.trace_id sp, Span_ctx.parent_id sp) - | Error _ as e -> e - - let to_value ?(sampled : bool option) ~(trace_id : Trace_id.t) - ~(parent_id : Span_id.t) () : string = - let span_ctx = Span_ctx.make ?sampled ~trace_id ~parent_id () in - Bytes.unsafe_to_string @@ Span_ctx.to_w3c_trace_context span_ctx - end -end - -(** Export GC metrics. - - These metrics are emitted after each GC collection. *) -module GC_metrics : sig - val basic_setup : unit -> unit - (** Setup a hook that will emit GC statistics on every tick (assuming a ticker - thread) *) - - val get_runtime_attributes : unit -> Span.key_value list - (** Get OCaml name and version runtime attributes *) - - val get_metrics : unit -> Metrics.t list - (** Get a few metrics from the current state of the GC *) -end = struct - (** See - https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/process.md#process-runtimes - *) - let runtime_attributes = - lazy - Conventions.Attributes. - [ - Process.Runtime.name, `String "ocaml"; - Process.Runtime.version, `String Sys.ocaml_version; - ] - - let get_runtime_attributes () = Lazy.force runtime_attributes - - let basic_setup () = - let on_tick () = - match Collector.get_backend () with - | None -> () - | Some (module C) -> C.signal_emit_gc_metrics () - in - Collector.on_tick on_tick - - let bytes_per_word = Sys.word_size / 8 - - let word_to_bytes n = n * bytes_per_word - - let word_to_bytes_f n = n *. float bytes_per_word - - let get_metrics () : Metrics.t list = - let gc = Gc.quick_stat () in - let now = Timestamp_ns.now_unix_ns () in - let open Metrics in - let open Conventions.Metrics in - [ - gauge ~name:Process.Runtime.Ocaml.GC.major_heap ~unit_:"B" - [ int ~now (word_to_bytes gc.Gc.heap_words) ]; - sum ~name:Process.Runtime.Ocaml.GC.minor_allocated - ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative - ~is_monotonic:true ~unit_:"B" - [ float ~now (word_to_bytes_f gc.Gc.minor_words) ]; - sum ~name:Process.Runtime.Ocaml.GC.minor_collections - ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative - ~is_monotonic:true - [ int ~now gc.Gc.minor_collections ]; - sum ~name:Process.Runtime.Ocaml.GC.major_collections - ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative - ~is_monotonic:true - [ int ~now gc.Gc.major_collections ]; - sum ~name:Process.Runtime.Ocaml.GC.compactions - ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative - ~is_monotonic:true - [ int ~now gc.Gc.compactions ]; - ] -end +module GC_metrics = Gc_metrics +[@@deprecated "use Gc_metrics (beware capitalization)"] diff --git a/src/core/rand_bytes.ml b/src/core/rand_bytes.ml index 18cf7dc8..336020a5 100644 --- a/src/core/rand_bytes.ml +++ b/src/core/rand_bytes.ml @@ -1,38 +1,39 @@ -(* generate random IDs *) -let rand_ = Random.State.make_self_init () - -let ( let@ ) = ( @@ ) - let default_rand_bytes_8 () : bytes = - let@ () = Lock.with_lock in let b = Bytes.create 8 in for i = 0 to 1 do - let r = Random.State.bits rand_ in + (* rely on the stdlib's [Random] being thread-or-domain safe *) + let r = Random.bits () in (* 30 bits, of which we use 24 *) Bytes.set b (i * 3) (Char.chr (r land 0xff)); Bytes.set b ((i * 3) + 1) (Char.chr ((r lsr 8) land 0xff)); Bytes.set b ((i * 3) + 2) (Char.chr ((r lsr 16) land 0xff)) done; - let r = Random.State.bits rand_ in + let r = Random.bits () in Bytes.set b 6 (Char.chr (r land 0xff)); Bytes.set b 7 (Char.chr ((r lsr 8) land 0xff)); b let default_rand_bytes_16 () : bytes = - let@ () = Lock.with_lock in let b = Bytes.create 16 in for i = 0 to 4 do - let r = Random.State.bits rand_ in + (* rely on the stdlib's [Random] being thread-or-domain safe *) + let r = Random.bits () in (* 30 bits, of which we use 24 *) Bytes.set b (i * 3) (Char.chr (r land 0xff)); Bytes.set b ((i * 3) + 1) (Char.chr ((r lsr 8) land 0xff)); Bytes.set b ((i * 3) + 2) (Char.chr ((r lsr 16) land 0xff)) done; - let r = Random.State.bits rand_ in + let r = Random.bits () in Bytes.set b 15 (Char.chr (r land 0xff)); (* last byte *) b -let rand_bytes_16 = ref default_rand_bytes_16 +let rand_bytes_16_ref = ref default_rand_bytes_16 -let rand_bytes_8 = ref default_rand_bytes_8 +let rand_bytes_8_ref = ref default_rand_bytes_8 + +(** Generate a 16B identifier *) +let[@inline] rand_bytes_16 () = !rand_bytes_16_ref () + +(** Generate an 8B identifier *) +let[@inline] rand_bytes_8 () = !rand_bytes_8_ref () diff --git a/src/core/rand_bytes.mli b/src/core/rand_bytes.mli index 7c42ea35..4b015b27 100644 --- a/src/core/rand_bytes.mli +++ b/src/core/rand_bytes.mli @@ -2,12 +2,12 @@ We need random identifiers for trace IDs and span IDs. *) -val rand_bytes_16 : (unit -> bytes) ref +val rand_bytes_16_ref : (unit -> bytes) ref (** Generate 16 bytes of random data. The implementation can be swapped to use any random generator. *) -val rand_bytes_8 : (unit -> bytes) ref -(** Generate 16 bytes of random data. The implementation can be swapped to use +val rand_bytes_8_ref : (unit -> bytes) ref +(** Generate 8 bytes of random data. The implementation can be swapped to use any random generator. *) val default_rand_bytes_8 : unit -> bytes @@ -15,3 +15,9 @@ val default_rand_bytes_8 : unit -> bytes val default_rand_bytes_16 : unit -> bytes (** Default implementation using {!Random} *) + +val rand_bytes_16 : unit -> bytes +(** Call the current {!rand_bytes_16_ref} *) + +val rand_bytes_8 : unit -> bytes +(** Call the current {!rand_bytes_8_ref} *) diff --git a/src/core/scope.ml b/src/core/scope.ml new file mode 100644 index 00000000..aa5cb19a --- /dev/null +++ b/src/core/scope.ml @@ -0,0 +1,131 @@ +open Common_ + +type item_list = + | Nil + | Ev of Event.t * item_list + | Attr of Key_value.t * item_list + | Span_link of Span_link.t * item_list + | Span_status of Span_status.t * item_list + | Span_kind of Span_kind.t * item_list + +type t = { + trace_id: Trace_id.t; + span_id: Span_id.t; + mutable items: item_list; +} + +let attrs scope = + let rec loop acc = function + | Nil -> acc + | Attr (attr, l) -> loop (attr :: acc) l + | Ev (_, l) | Span_kind (_, l) | Span_link (_, l) | Span_status (_, l) -> + loop acc l + in + loop [] scope.items + +let events scope = + let rec loop acc = function + | Nil -> acc + | Ev (event, l) -> loop (event :: acc) l + | Attr (_, l) | Span_kind (_, l) | Span_link (_, l) | Span_status (_, l) -> + loop acc l + in + loop [] scope.items + +let links scope = + let rec loop acc = function + | Nil -> acc + | Span_link (span_link, l) -> loop (span_link :: acc) l + | Ev (_, l) | Span_kind (_, l) | Attr (_, l) | Span_status (_, l) -> + loop acc l + in + loop [] scope.items + +let status scope = + let rec loop = function + | Nil -> None + | Span_status (status, _) -> Some status + | Ev (_, l) | Attr (_, l) | Span_kind (_, l) | Span_link (_, l) -> loop l + in + loop scope.items + +let kind scope = + let rec loop = function + | Nil -> None + | Span_kind (k, _) -> Some k + | Ev (_, l) | Span_status (_, l) | Attr (_, l) | Span_link (_, l) -> loop l + in + loop scope.items + +let make ~trace_id ~span_id ?(events = []) ?(attrs = []) ?(links = []) ?status + () : t = + let items = + let items = + match status with + | None -> Nil + | Some status -> Span_status (status, Nil) + in + let items = List.fold_left (fun acc ev -> Ev (ev, acc)) items events in + let items = List.fold_left (fun acc attr -> Attr (attr, acc)) items attrs in + List.fold_left (fun acc link -> Span_link (link, acc)) items links + in + { trace_id; span_id; items } + +let[@inline] to_span_link ?trace_state ?attrs ?dropped_attributes_count + (self : t) : Span_link.t = + Span_link.make ?trace_state ?attrs ?dropped_attributes_count + ~trace_id:self.trace_id ~span_id:self.span_id () + +let[@inline] to_span_ctx (self : t) : Span_ctx.t = + Span_ctx.make ~trace_id:self.trace_id ~parent_id:self.span_id () + +open struct + let[@inline] is_not_dummy (self : t) : bool = Span_id.is_valid self.span_id +end + +let[@inline] add_event (self : t) (ev : unit -> Event.t) : unit = + if is_not_dummy self then self.items <- Ev (ev (), self.items) + +let[@inline] record_exception (self : t) (exn : exn) + (bt : Printexc.raw_backtrace) : unit = + if is_not_dummy self then ( + let ev = + Event.make "exception" + ~attrs: + [ + "exception.message", `String (Printexc.to_string exn); + "exception.type", `String (Printexc.exn_slot_name exn); + ( "exception.stacktrace", + `String (Printexc.raw_backtrace_to_string bt) ); + ] + in + self.items <- Ev (ev, self.items) + ) + +let[@inline] add_attrs (self : t) (attrs : unit -> Key_value.t list) : unit = + if is_not_dummy self then + self.items <- + List.fold_left (fun acc attr -> Attr (attr, acc)) self.items (attrs ()) + +let[@inline] add_links (self : t) (links : unit -> Span_link.t list) : unit = + if is_not_dummy self then + self.items <- + List.fold_left + (fun acc link -> Span_link (link, acc)) + self.items (links ()) + +let set_status (self : t) (status : Span_status.t) : unit = + if is_not_dummy self then self.items <- Span_status (status, self.items) + +let set_kind (self : t) (k : Span_kind.t) : unit = + if is_not_dummy self then self.items <- Span_kind (k, self.items) + +let ambient_scope_key : t Ambient_context.key = Ambient_context.create_key () + +let get_ambient_scope ?scope () : t option = + match scope with + | Some _ -> scope + | None -> Ambient_context.get ambient_scope_key + +let[@inline] with_ambient_scope (sc : t) (f : unit -> 'a) : 'a = + Ambient_context.with_binding ambient_scope_key sc (fun _ -> f ()) diff --git a/src/core/scope.mli b/src/core/scope.mli new file mode 100644 index 00000000..9ba60d0e --- /dev/null +++ b/src/core/scope.mli @@ -0,0 +1,89 @@ +(** Scopes. + + A scope is a trace ID and the span ID of the currently active span. *) + +open Common_ + +type item_list + +type t = { + trace_id: Trace_id.t; + span_id: Span_id.t; + mutable items: item_list; +} + +val attrs : t -> Key_value.t list + +val events : t -> Event.t list + +val links : t -> Span_link.t list + +val status : t -> Span_status.t option + +val kind : t -> Span_kind.t option + +val make : + trace_id:Trace_id.t -> + span_id:Span_id.t -> + ?events:Event.t list -> + ?attrs:Key_value.t list -> + ?links:Span_link.t list -> + ?status:Span_status.t -> + unit -> + t + +val to_span_link : + ?trace_state:string -> + ?attrs:Key_value.t list -> + ?dropped_attributes_count:int -> + t -> + Span_link.t +(** Turn the scope into a span link *) + +val to_span_ctx : t -> Span_ctx.t +(** Turn the scope into a span context *) + +val add_event : t -> (unit -> Event.t) -> unit +(** Add an event to the scope. It will be aggregated into the span. + + Note that this takes a function that produces an event, and will only call + it if there is an instrumentation backend. *) + +val record_exception : t -> exn -> Printexc.raw_backtrace -> unit + +val add_attrs : t -> (unit -> Key_value.t list) -> unit +(** Add attributes to the scope. It will be aggregated into the span. + + Note that this takes a function that produces attributes, and will only call + it if there is an instrumentation backend. *) + +val add_links : t -> (unit -> Span_link.t list) -> unit +(** Add links to the scope. It will be aggregated into the span. + + Note that this takes a function that produces links, and will only call it + if there is an instrumentation backend. *) + +val set_status : t -> Span_status.t -> unit +(** set the span status. + + Note that this function will be called only if there is an instrumentation + backend. *) + +val set_kind : t -> Span_kind.t -> unit +(** Set the span's kind. + @since 0.11 *) + +val ambient_scope_key : t Ambient_context.key +(** The opaque key necessary to access/set the ambient scope with + {!Ambient_context}. *) + +val get_ambient_scope : ?scope:t -> unit -> t option +(** Obtain current scope from {!Ambient_context}, if available. *) + +val with_ambient_scope : t -> (unit -> 'a) -> 'a +(** [with_ambient_scope sc thunk] calls [thunk()] in a context where [sc] is the + (thread|continuation)-local scope, then reverts to the previous local scope, + if any. + + @see + ambient-context docs *) diff --git a/src/core/span.ml b/src/core/span.ml new file mode 100644 index 00000000..1ea8cb0b --- /dev/null +++ b/src/core/span.ml @@ -0,0 +1,38 @@ +open Common_ +open Proto.Trace + +type t = span + +type id = Span_id.t + +type kind = Span_kind.t = + | Span_kind_unspecified + | Span_kind_internal + | Span_kind_server + | Span_kind_client + | Span_kind_producer + | Span_kind_consumer + +type key_value = + string + * [ `Int of int + | `String of string + | `Bool of bool + | `Float of float + | `None + ] + +let id self = Span_id.of_bytes self.span_id + +let create ?(kind = !Globals.default_span_kind) ?(id = Span_id.create ()) + ?trace_state ?(attrs = []) ?(events = []) ?status ~trace_id ?parent + ?(links = []) ~start_time ~end_time name : t * id = + let trace_id = Trace_id.to_bytes trace_id in + let parent_span_id = Option.map Span_id.to_bytes parent in + let attributes = List.map Key_value.conv attrs in + let span = + make_span ~trace_id ?parent_span_id ~span_id:(Span_id.to_bytes id) + ~attributes ~events ?trace_state ?status ~kind ~name ~links + ~start_time_unix_nano:start_time ~end_time_unix_nano:end_time () + in + span, id diff --git a/src/core/span.mli b/src/core/span.mli new file mode 100644 index 00000000..cfb9a2de --- /dev/null +++ b/src/core/span.mli @@ -0,0 +1,46 @@ +(** Spans. + + A Span is the workhorse of traces, it indicates an operation that took place + over a given span of time (indicated by start_time and end_time) as part of + a hierarchical trace. All spans in a given trace are bound by the use of the + same {!Trace_id.t}. *) + +open Common_ +open Proto.Trace + +type t = span + +type id = Span_id.t + +type kind = Span_kind.t = + | Span_kind_unspecified + | Span_kind_internal + | Span_kind_server + | Span_kind_client + | Span_kind_producer + | Span_kind_consumer + +val id : t -> Span_id.t + +type key_value = Key_value.t + +val create : + ?kind:kind -> + ?id:id -> + ?trace_state:string -> + ?attrs:key_value list -> + ?events:Event.t list -> + ?status:status -> + trace_id:Trace_id.t -> + ?parent:id -> + ?links:Span_link.t list -> + start_time:Timestamp_ns.t -> + end_time:Timestamp_ns.t -> + string -> + t * id +(** [create ~trace_id name] creates a new span with its unique ID. + @param trace_id the trace this belongs to + @param parent parent span, if any + @param links + list of links to other spans, each with their trace state (see + {{:https://www.w3.org/TR/trace-context/#tracestate-header} w3.org}) *) diff --git a/src/core/span_ctx.ml b/src/core/span_ctx.ml new file mode 100644 index 00000000..1be170f2 --- /dev/null +++ b/src/core/span_ctx.ml @@ -0,0 +1,91 @@ +open Common_ + +(* see: https://opentelemetry.io/docs/specs/otel/trace/api/#spancontext *) + +(* TODO: trace state *) + +external int_of_bool : bool -> int = "%identity" + +module Flags = struct + let sampled = 1 + + let remote = 2 +end + +type t = { + trace_id: Trace_id.t; + parent_id: Span_id.t; + flags: int; +} + +let dummy = { trace_id = Trace_id.dummy; parent_id = Span_id.dummy; flags = 0 } + +let make ?(remote = false) ?(sampled = false) ~trace_id ~parent_id () : t = + let flags = + 0 + lor (int_of_bool remote lsl Flags.remote) + lor (int_of_bool sampled lsl Flags.sampled) + in + { trace_id; parent_id; flags } + +let[@inline] is_valid self = + Trace_id.is_valid self.trace_id && Span_id.is_valid self.parent_id + +let[@inline] sampled self = self.flags land (1 lsl Flags.sampled) != 0 + +let[@inline] is_remote self = self.flags land (1 lsl Flags.remote) != 0 + +let[@inline] trace_id self = self.trace_id + +let[@inline] parent_id self = self.parent_id + +let to_w3c_trace_context (self : t) : bytes = + let bs = Bytes.create 55 in + Bytes.set bs 0 '0'; + Bytes.set bs 1 '0'; + Bytes.set bs 2 '-'; + Trace_id.to_hex_into self.trace_id bs 3; + (* +32 *) + Bytes.set bs (3 + 32) '-'; + Span_id.to_hex_into self.parent_id bs 36; + (* +16 *) + Bytes.set bs 52 '-'; + Bytes.set bs 53 '0'; + Bytes.set bs 54 + (if sampled self then + '1' + else + '0'); + bs + +let of_w3c_trace_context bs : _ result = + try + if Bytes.length bs <> 55 then invalid_arg "trace context must be 55 bytes"; + (match int_of_string_opt (Bytes.sub_string bs 0 2) with + | Some 0 -> () + | Some n -> invalid_arg @@ spf "version is %d, expected 0" n + | None -> invalid_arg "expected 2-digit version"); + if Bytes.get bs 2 <> '-' then invalid_arg "expected '-' before trace_id"; + let trace_id = + try Trace_id.of_hex_substring (Bytes.unsafe_to_string bs) 3 + with Invalid_argument msg -> invalid_arg (spf "in trace id: %s" msg) + in + if Bytes.get bs (3 + 32) <> '-' then + invalid_arg "expected '-' before parent_id"; + let parent_id = + try Span_id.of_hex_substring (Bytes.unsafe_to_string bs) 36 + with Invalid_argument msg -> invalid_arg (spf "in span id: %s" msg) + in + if Bytes.get bs 52 <> '-' then invalid_arg "expected '-' after parent_id"; + let sampled = int_of_string_opt (Bytes.sub_string bs 53 2) = Some 1 in + + (* ignore other flags *) + Ok (make ~remote:true ~sampled ~trace_id ~parent_id ()) + with Invalid_argument msg -> Error msg + +let of_w3c_trace_context_exn bs = + match of_w3c_trace_context bs with + | Ok t -> t + | Error msg -> invalid_arg @@ spf "invalid w3c trace context: %s" msg + +let k_span_ctx : t Hmap.key = Hmap.Key.create () diff --git a/src/core/span_ctx.mli b/src/core/span_ctx.mli new file mode 100644 index 00000000..fb72046b --- /dev/null +++ b/src/core/span_ctx.mli @@ -0,0 +1,42 @@ +(** Span context. This bundles up a trace ID and parent ID. + + {{:https://opentelemetry.io/docs/specs/otel/trace/api/#spancontext} + https://opentelemetry.io/docs/specs/otel/trace/api/#spancontext} + @since 0.7 *) + +type t + +val make : + ?remote:bool -> + ?sampled:bool -> + trace_id:Trace_id.t -> + parent_id:Span_id.t -> + unit -> + t + +val dummy : t +(** Invalid span context, to be used as a placeholder *) + +val is_remote : t -> bool +(** Does this come from a remote parent? *) + +val is_valid : t -> bool +(** Are the span ID and trace ID valid (ie non-zero)? *) + +val trace_id : t -> Trace_id.t + +val parent_id : t -> Span_id.t + +val sampled : t -> bool + +val to_w3c_trace_context : t -> bytes + +val of_w3c_trace_context : bytes -> (t, string) result + +val of_w3c_trace_context_exn : bytes -> t +(** @raise Invalid_argument if parsing failed *) + +val k_span_ctx : t Hmap.key +(** Hmap key to carry around a {!Span_ctx.t}, e.g. to remember what the current + parent span is. + @since 0.8 *) diff --git a/src/core/span_id.ml b/src/core/span_id.ml new file mode 100644 index 00000000..bf9e7731 --- /dev/null +++ b/src/core/span_id.ml @@ -0,0 +1,33 @@ +open Common_ + +type t = bytes + +let[@inline] to_bytes self = self + +let dummy : t = Bytes.make 8 '\x00' + +let create () : t = + let b = Rand_bytes.rand_bytes_8 () in + assert (Bytes.length b = 8); + (* make sure the identifier is not all 0, which is a dummy identifier. *) + Bytes.set b 0 (Char.unsafe_chr (Char.code (Bytes.get b 0) lor 1)); + b + +let is_valid = Util_bytes_.bytes_non_zero + +let[@inline] of_bytes b = + if Bytes.length b = 8 then + b + else + invalid_arg "span IDs must be 8 bytes in length" + +let to_hex = Util_bytes_.bytes_to_hex + +let to_hex_into = Util_bytes_.bytes_to_hex_into + +let[@inline] of_hex s = of_bytes (Util_bytes_.bytes_of_hex s) + +let[@inline] of_hex_substring s off = + of_bytes (Util_bytes_.bytes_of_hex_substring s off 16) + +let pp fmt t = Format.fprintf fmt "%s" (to_hex t) diff --git a/src/core/span_id.mli b/src/core/span_id.mli new file mode 100644 index 00000000..db51d475 --- /dev/null +++ b/src/core/span_id.mli @@ -0,0 +1,23 @@ +(** Unique ID of a span. *) + +type t + +val create : unit -> t + +val dummy : t + +val pp : Format.formatter -> t -> unit + +val is_valid : t -> bool + +val to_bytes : t -> bytes + +val of_bytes : bytes -> t + +val to_hex : t -> string + +val to_hex_into : t -> bytes -> int -> unit + +val of_hex : string -> t + +val of_hex_substring : string -> int -> t diff --git a/src/core/span_kind.ml b/src/core/span_kind.ml new file mode 100644 index 00000000..d3ddace4 --- /dev/null +++ b/src/core/span_kind.ml @@ -0,0 +1,13 @@ +(** Span kind. + @since 0.11 *) + +open Common_ +open Proto.Trace + +type t = span_span_kind = + | Span_kind_unspecified + | Span_kind_internal + | Span_kind_server + | Span_kind_client + | Span_kind_producer + | Span_kind_consumer diff --git a/src/core/span_link.ml b/src/core/span_link.ml new file mode 100644 index 00000000..308d3598 --- /dev/null +++ b/src/core/span_link.ml @@ -0,0 +1,20 @@ +open Common_ +open Proto.Trace + +type t = span_link + +let make ~trace_id ~span_id ?trace_state ?(attrs = []) ?dropped_attributes_count + () : t = + let attributes = List.map Key_value.conv attrs in + let dropped_attributes_count = + Option.map Int32.of_int dropped_attributes_count + in + make_span_link + ~trace_id:(Trace_id.to_bytes trace_id) + ~span_id:(Span_id.to_bytes span_id) ?trace_state ~attributes + ?dropped_attributes_count () + +let[@inline] of_span_ctx ?trace_state ?attrs ?dropped_attributes_count + (ctx : Span_ctx.t) : t = + make ~trace_id:(Span_ctx.trace_id ctx) ~span_id:(Span_ctx.parent_id ctx) + ?trace_state ?attrs ?dropped_attributes_count () diff --git a/src/core/span_link.mli b/src/core/span_link.mli new file mode 100644 index 00000000..402ff0ec --- /dev/null +++ b/src/core/span_link.mli @@ -0,0 +1,27 @@ +(** Span Link + + A pointer from the current span to another span in the same trace or in a + different trace. For example, this can be used in batching operations, where + a single batch handler processes multiple requests from different traces or + when the handler receives a request from a different project. *) + +open Common_ +open Proto.Trace + +type t = span_link + +val make : + trace_id:Trace_id.t -> + span_id:Span_id.t -> + ?trace_state:string -> + ?attrs:Key_value.t list -> + ?dropped_attributes_count:int -> + unit -> + t + +val of_span_ctx : + ?trace_state:string -> + ?attrs:Key_value.t list -> + ?dropped_attributes_count:int -> + Span_ctx.t -> + t diff --git a/src/core/span_status.ml b/src/core/span_status.ml new file mode 100644 index 00000000..388d20ac --- /dev/null +++ b/src/core/span_status.ml @@ -0,0 +1,15 @@ +open Common_ +open Proto.Trace + +type t = Proto.Trace.status = private { + mutable _presence: Pbrt.Bitfield.t; + mutable message: string; + mutable code: status_status_code; +} + +type code = status_status_code = + | Status_code_unset + | Status_code_ok + | Status_code_error + +let[@inline] make ~message ~code : t = make_status ~message ~code () diff --git a/src/core/span_status.mli b/src/core/span_status.mli new file mode 100644 index 00000000..da2e11b9 --- /dev/null +++ b/src/core/span_status.mli @@ -0,0 +1,15 @@ +open Common_ +open Proto.Trace + +type t = Proto.Trace.status = private { + mutable _presence: Pbrt.Bitfield.t; + mutable message: string; + mutable code: status_status_code; +} + +type code = status_status_code = + | Status_code_unset + | Status_code_ok + | Status_code_error + +val make : message:string -> code:code -> t diff --git a/src/core/tick_callbacks.ml b/src/core/tick_callbacks.ml new file mode 100644 index 00000000..d81a3a0f --- /dev/null +++ b/src/core/tick_callbacks.ml @@ -0,0 +1,9 @@ +type cb = unit -> unit + +type t = { cbs: cb AList.t } [@@unboxed] + +let create () : t = { cbs = AList.make () } + +let[@inline] on_tick self f = AList.add self.cbs f + +let[@inline] tick self = List.iter (fun f -> f ()) (AList.get self.cbs) diff --git a/src/core/tick_callbacks.mli b/src/core/tick_callbacks.mli new file mode 100644 index 00000000..ad7ff5bb --- /dev/null +++ b/src/core/tick_callbacks.mli @@ -0,0 +1,9 @@ +(** A collection of callbacks that are regularly called. *) + +type t + +val create : unit -> t + +val on_tick : t -> (unit -> unit) -> unit + +val tick : t -> unit diff --git a/src/core/timestamp_ns.ml b/src/core/timestamp_ns.ml new file mode 100644 index 00000000..52f7cfa2 --- /dev/null +++ b/src/core/timestamp_ns.ml @@ -0,0 +1,29 @@ +(** Unix timestamp. + + These timestamps measure time since the Unix epoch (jan 1, 1970) UTC in + nanoseconds. *) + +type t = int64 + +open struct + let ns_in_a_day = Int64.(mul 1_000_000_000L (of_int (24 * 3600))) +end + +(** Current unix timestamp in nanoseconds *) +let[@inline] now_unix_ns () : t = + let span = Ptime_clock.now () |> Ptime.to_span in + let d, ps = Ptime.Span.to_d_ps span in + let d = Int64.(mul (of_int d) ns_in_a_day) in + let ns = Int64.(div ps 1_000L) in + Int64.(add d ns) + +let pp_debug out (self : t) = + let d = Int64.(to_int (div self ns_in_a_day)) in + let ns = Int64.(rem self ns_in_a_day) in + let ps = Int64.(mul ns 1_000L) in + match Ptime.Span.of_d_ps (d, ps) with + | None -> Format.fprintf out "ts: <%Ld ns>" self + | Some span -> + (match Ptime.add_span Ptime.epoch span with + | None -> Format.fprintf out "ts: <%Ld ns>" self + | Some ptime -> Ptime.pp_human () out ptime) diff --git a/src/core/trace_context.ml b/src/core/trace_context.ml new file mode 100644 index 00000000..9c8b141d --- /dev/null +++ b/src/core/trace_context.ml @@ -0,0 +1,34 @@ +(** Implementation of the W3C Trace Context spec + + https://www.w3.org/TR/trace-context/ *) + +(** The traceparent header + https://www.w3.org/TR/trace-context/#traceparent-header *) +module Traceparent = struct + let name = "traceparent" + + (** Parse the value of the traceparent header. + + The values are of the form: + + {[ + { version } - { trace_id } - { parent_id } - { flags } + ]} + + For example: + + {[ + 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01 + ]} + + [{flags}] are currently ignored. *) + let of_value str : (Trace_id.t * Span_id.t, string) result = + match Span_ctx.of_w3c_trace_context (Bytes.unsafe_of_string str) with + | Ok sp -> Ok (Span_ctx.trace_id sp, Span_ctx.parent_id sp) + | Error _ as e -> e + + let to_value ?(sampled : bool option) ~(trace_id : Trace_id.t) + ~(parent_id : Span_id.t) () : string = + let span_ctx = Span_ctx.make ?sampled ~trace_id ~parent_id () in + Bytes.unsafe_to_string @@ Span_ctx.to_w3c_trace_context span_ctx +end diff --git a/src/core/trace_id.ml b/src/core/trace_id.ml new file mode 100644 index 00000000..8893a757 --- /dev/null +++ b/src/core/trace_id.ml @@ -0,0 +1,35 @@ +open Common_ + +type t = bytes + +let[@inline] to_bytes self = self + +let dummy : t = Bytes.make 16 '\x00' + +let create () : t = + let b = Rand_bytes.rand_bytes_16 () in + assert (Bytes.length b = 16); + (* make sure the identifier is not all 0, which is a dummy identifier. *) + Bytes.set b 0 (Char.unsafe_chr (Char.code (Bytes.get b 0) lor 1)); + b + +let[@inline] of_bytes b = + if Bytes.length b = 16 then + b + else + invalid_arg "trace ID must be 16 bytes in length" + +let is_valid = Util_bytes_.bytes_non_zero + +let to_hex = Util_bytes_.bytes_to_hex + +let to_hex_into = Util_bytes_.bytes_to_hex_into + +let[@inline] of_hex s = of_bytes (Util_bytes_.bytes_of_hex s) + +let[@inline] of_hex_substring s off = + of_bytes (Util_bytes_.bytes_of_hex_substring s off 32) + +let pp fmt t = Format.fprintf fmt "%s" (to_hex t) + +let k_trace_id : t Hmap.key = Hmap.Key.create () diff --git a/src/core/trace_id.mli b/src/core/trace_id.mli new file mode 100644 index 00000000..487c901b --- /dev/null +++ b/src/core/trace_id.mli @@ -0,0 +1,30 @@ +(** Trace ID. + + This 16 bytes identifier is shared by all spans in one trace. *) + +type t + +val create : unit -> t + +val dummy : t + +val pp : Format.formatter -> t -> unit + +val is_valid : t -> bool + +val to_bytes : t -> bytes + +val of_bytes : bytes -> t + +val to_hex : t -> string + +val to_hex_into : t -> bytes -> int -> unit + +val of_hex : string -> t + +val of_hex_substring : string -> int -> t + +val k_trace_id : t Hmap.key +(** Hmap key to carry around a {!Trace_id.t}, to remember what the current trace + is. + @since 0.8 *) diff --git a/src/core/tracer.ml b/src/core/tracer.ml new file mode 100644 index 00000000..6045df9d --- /dev/null +++ b/src/core/tracer.ml @@ -0,0 +1,165 @@ +(** Traces. + + See + {{:https://opentelemetry.io/docs/reference/specification/overview/#tracing-signal} + the spec} *) + +open Common_ +open Proto.Trace + +type span = Span.t + +(** A tracer. + + https://opentelemetry.io/docs/specs/otel/trace/api/#tracer *) +class type t = object + method is_enabled : unit -> bool + + method emit : span list -> unit +end + +(** Dummy tracer, always disabled *) +let dummy : t = + object + method is_enabled () = false + + method emit _ = () + end + +(** A simple exporter that directly calls the exporter. *) +class simple (exp : #Exporter.t) : t = + object + method is_enabled () = true + + method emit spans = if spans <> [] then Exporter.send_trace exp spans + end + +(** A tracer that uses {!Exporter.Main_exporter} *) +let simple_main_exporter : t = + object + method is_enabled () = Exporter.Main_exporter.present () + + method emit spans = + match Exporter.Main_exporter.get () with + | None -> () + | Some exp -> exp#send_trace spans + end + +(** Directly emit to the main exporter. + + {b NOTE} be careful not to call this inside a Gc alarm, as it can cause + deadlocks. *) +let emit ?service_name:_ ?attrs:_ (spans : span list) : unit = + match Exporter.Main_exporter.get () with + | None -> () + | Some exp -> exp#send_trace spans +[@@deprecated "use an explicit tracer"] + +(* TODO: remove scope, use span directly *) +type scope = Scope.t = { + trace_id: Trace_id.t; + span_id: Span_id.t; + mutable items: Scope.item_list; +} +[@@deprecated "use Scope.t"] + +let (add_event [@deprecated "use Scope.add_event"]) = Scope.add_event + +let (add_attrs [@deprecated "use Scope.add_attrs"]) = Scope.add_attrs + +let with_' ?(tracer = simple_main_exporter) ?(force_new_trace_id = false) + ?trace_state ?(attrs : (string * [< Value.t ]) list = []) ?kind ?trace_id + ?parent ?scope ?(links = []) name cb = + let scope = + if force_new_trace_id then + None + else + Scope.get_ambient_scope ?scope () + in + let trace_id = + match trace_id, scope with + | _ when force_new_trace_id -> Trace_id.create () + | Some trace_id, _ -> trace_id + | None, Some scope -> scope.trace_id + | None, None -> Trace_id.create () + in + let parent = + match parent, scope with + | _ when force_new_trace_id -> None + | Some span_id, _ -> Some span_id + | None, Some scope -> Some scope.span_id + | None, None -> None + in + let start_time = Timestamp_ns.now_unix_ns () in + let span_id = Span_id.create () in + let scope = Scope.make ~trace_id ~span_id ~attrs ~links () in + (* called once we're done, to emit a span *) + let finally res = + let status = + match Scope.status scope with + | Some status -> Some status + | None -> + (match res with + | Ok () -> + (* By default, all spans are Unset, which means a span completed without error. + The Ok status is reserved for when you need to explicitly mark a span as successful + rather than stick with the default of Unset (i.e., “without error”). + + https://opentelemetry.io/docs/languages/go/instrumentation/#set-span-status *) + None + | Error (e, bt) -> + Scope.record_exception scope e bt; + Some + (make_status ~code:Status_code_error ~message:(Printexc.to_string e) + ())) + in + let span, _ = + (* TODO: should the attrs passed to with_ go on the Span + (in Span.create) or on the ResourceSpan (in emit)? + (question also applies to Opentelemetry_lwt.Trace.with) *) + Span.create ?kind ~trace_id ?parent ~links:(Scope.links scope) ~id:span_id + ?trace_state ~attrs:(Scope.attrs scope) ~events:(Scope.events scope) + ~start_time + ~end_time:(Timestamp_ns.now_unix_ns ()) + ?status name + in + + tracer#emit [ span ] + in + let thunk () = + (* set global scope in this thread *) + Scope.with_ambient_scope scope @@ fun () -> cb scope + in + thunk, finally + +(** Sync span guard. + + Notably, this includes {e implicit} scope-tracking: if called without a + [~scope] argument (or [~parent]/[~trace_id]), it will check in the + {!Ambient_context} for a surrounding environment, and use that as the scope. + Similarly, it uses {!Scope.with_ambient_scope} to {e set} a new scope in the + ambient context, so that any logically-nested calls to {!with_} will use + this span as their parent. + + {b NOTE} be careful not to call this inside a Gc alarm, as it can cause + deadlocks. + + @param force_new_trace_id + if true (default false), the span will not use a ambient scope, the + [~scope] argument, nor [~trace_id], but will instead always create fresh + identifiers for this span *) +let with_ ?tracer ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id + ?parent ?scope ?links name (cb : Scope.t -> 'a) : 'a = + let thunk, finally = + with_' ?tracer ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id + ?parent ?scope ?links name cb + in + + try + let rv = thunk () in + finally (Ok ()); + rv + with e -> + let bt = Printexc.get_raw_backtrace () in + finally (Error (e, bt)); + raise e diff --git a/src/core/util_bytes_.ml b/src/core/util_bytes_.ml new file mode 100644 index 00000000..9326991a --- /dev/null +++ b/src/core/util_bytes_.ml @@ -0,0 +1,47 @@ +open Common_ + +let int_to_hex (i : int) = + if i < 10 then + Char.chr (i + Char.code '0') + else + Char.chr (i - 10 + Char.code 'a') + +let bytes_to_hex_into b res off : unit = + for i = 0 to Bytes.length b - 1 do + let n = Char.code (Bytes.get b i) in + Bytes.set res ((2 * i) + off) (int_to_hex ((n land 0xf0) lsr 4)); + Bytes.set res ((2 * i) + 1 + off) (int_to_hex (n land 0x0f)) + done + +let bytes_to_hex (b : bytes) : string = + let res = Bytes.create (2 * Bytes.length b) in + bytes_to_hex_into b res 0; + Bytes.unsafe_to_string res + +let int_of_hex = function + | '0' .. '9' as c -> Char.code c - Char.code '0' + | 'a' .. 'f' as c -> 10 + Char.code c - Char.code 'a' + | c -> raise (Invalid_argument (spf "invalid hex char: %C" c)) + +let bytes_of_hex_substring (s : string) off len = + if len mod 2 <> 0 then + raise (Invalid_argument "hex sequence must be of even length"); + let res = Bytes.make (len / 2) '\x00' in + for i = 0 to (len / 2) - 1 do + let n1 = int_of_hex (String.get s (off + (2 * i))) in + let n2 = int_of_hex (String.get s (off + (2 * i) + 1)) in + let n = (n1 lsl 4) lor n2 in + Bytes.set res i (Char.chr n) + done; + res + +let bytes_of_hex (s : string) : bytes = + bytes_of_hex_substring s 0 (String.length s) + +let bytes_non_zero (self : bytes) : bool = + try + for i = 0 to Bytes.length self - 1 do + if Char.code (Bytes.unsafe_get self i) <> 0 then raise_notrace Exit + done; + false + with Exit -> true diff --git a/src/core/value.ml b/src/core/value.ml new file mode 100644 index 00000000..97fc0503 --- /dev/null +++ b/src/core/value.ml @@ -0,0 +1,19 @@ +open Common_ + +type t = + [ `Int of int + | `String of string + | `Bool of bool + | `Float of float + | `None + ] +(** A value in a key/value attribute *) + +let conv = + let open Proto.Common in + function + | `Int i -> Some (Int_value (Int64.of_int i)) + | `String s -> Some (String_value s) + | `Bool b -> Some (Bool_value b) + | `Float f -> Some (Double_value f) + | `None -> None From 4c69b28d03661411e39b4be3617aa979c5365db2 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 13:28:12 -0500 Subject: [PATCH 02/94] gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 811fc6e9..85d4d798 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ _opam .merlin *.install *.exe +*.tmp From 5d91e92ffa207b600dd3395fd9670a9051bcab45 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 13:28:25 -0500 Subject: [PATCH 03/94] update opentelemetry_trace just a bit --- src/trace/dune | 6 ++- src/trace/opentelemetry_trace.ml | 1 + src/trace/opentelemetry_trace.mli | 61 ++++++++++++++++++++----------- 3 files changed, 46 insertions(+), 22 deletions(-) diff --git a/src/trace/dune b/src/trace/dune index 9606e2a5..0c815c5d 100644 --- a/src/trace/dune +++ b/src/trace/dune @@ -3,4 +3,8 @@ (public_name opentelemetry.trace) (synopsis "Use opentelemetry as a collector for trace") (optional) - (libraries opentelemetry.ambient-context trace.core opentelemetry)) + (libraries + opentelemetry.ambient-context + trace.core + trace.subscriber + opentelemetry)) diff --git a/src/trace/opentelemetry_trace.ml b/src/trace/opentelemetry_trace.ml index ead41826..1d2f7f34 100644 --- a/src/trace/opentelemetry_trace.ml +++ b/src/trace/opentelemetry_trace.ml @@ -1,6 +1,7 @@ module Otel = Opentelemetry module Otrace = Trace_core (* ocaml-trace *) module TLS = Thread_local_storage +module TSub = Trace_subscriber.Subscriber open struct let spf = Printf.sprintf diff --git a/src/trace/opentelemetry_trace.mli b/src/trace/opentelemetry_trace.mli index 060f4f13..eb5591fa 100644 --- a/src/trace/opentelemetry_trace.mli +++ b/src/trace/opentelemetry_trace.mli @@ -1,21 +1,3 @@ -module Otel := Opentelemetry -module Otrace := Trace_core -module TLS := Thread_local_storage - -module Conv : sig - val trace_id_of_otel : Otel.Trace_id.t -> string - - val trace_id_to_otel : string -> Otel.Trace_id.t - - val span_id_of_otel : Otel.Span_id.t -> int64 - - val span_id_to_otel : int64 -> Otel.Span_id.t - - val ctx_to_otel : Otrace.explicit_span_ctx -> Otel.Span_ctx.t - - val ctx_of_otel : Otel.Span_ctx.t -> Otrace.explicit_span_ctx -end - (** [opentelemetry.trace] implements a {!Trace_core.Collector} for {{:https://v3.ocaml.org/p/trace} ocaml-trace}. @@ -43,21 +25,58 @@ end {!Internal.spankind_of_string} for the list of supported values.) {[ - ocaml let describe () = [ Opentelemetry_trace.(spankind_key, client) ] in Trace_core.with_span ~__FILE__ ~__LINE__ ~data:describe "my-span" @@ fun _ -> (* ... *) ]} *) +module Otel := Opentelemetry +module Otrace := Trace_core +module TLS := Thread_local_storage + +(** Conversions between [Opentelemetry] and [Trace_core] types *) +module Conv : sig + val trace_id_of_otel : Otel.Trace_id.t -> string + + val trace_id_to_otel : string -> Otel.Trace_id.t + + val span_id_of_otel : Otel.Span_id.t -> int64 + + val span_id_to_otel : int64 -> Otel.Span_id.t + + val ctx_to_otel : Otrace.explicit_span_ctx -> Otel.Span_ctx.t + + val ctx_of_otel : Otel.Span_ctx.t -> Otrace.explicit_span_ctx +end + +(** The extension events for {!Trace_core}. *) +module Extensions : sig + type Otrace.extension_event += + | Ev_link_span of Otrace.explicit_span * Otrace.explicit_span_ctx + (** Link the given span to the given context. The context isn't the + parent, but the link can be used to correlate both spans. *) + | Ev_record_exn of { + sp: Otrace.span; + exn: exn; + bt: Printexc.raw_backtrace; + error: bool; (** Is this an actual internal error? *) + } + (** Record exception and potentially turn span to an error *) + | Ev_set_span_kind of Otrace.span * Otel.Span_kind.t +end + val on_internal_error : (string -> unit) ref (** Callback to print errors in the library itself (ie bugs) *) val setup : unit -> unit (** Install the OTEL backend as a Trace collector *) -val setup_with_otel_backend : Opentelemetry.Collector.backend -> unit -(** Same as {!setup}, but also install the given backend as OTEL backend *) +val setup_with_otel_exporter : #Opentelemetry.Exporter.t -> unit +(** Same as {!setup}, but using the given exporter *) + +val setup_with_otel_backend : #Opentelemetry.Exporter.t -> unit +[@@deprecated "use setup_with_otel_exporter"] val collector : unit -> Trace_core.collector (** Make a Trace collector that uses the OTEL backend to send spans and logs *) From 59be0f625b3f421b6965952d8df85633a81b2ca9 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 13:28:50 -0500 Subject: [PATCH 04/94] client: add debug_exporter, stdout_exporter, resource helpers --- src/client/common_.ml | 4 +++ src/client/debug_exporter.ml | 36 ++++++++++++++++++++ src/client/dune | 5 +-- src/client/signal_resource_builder.ml | 34 +++++++++++++++++++ src/client/stdout_exporter.ml | 47 +++++++++++++++++++++++++++ 5 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 src/client/common_.ml create mode 100644 src/client/debug_exporter.ml create mode 100644 src/client/signal_resource_builder.ml create mode 100644 src/client/stdout_exporter.ml diff --git a/src/client/common_.ml b/src/client/common_.ml new file mode 100644 index 00000000..9ee9cf28 --- /dev/null +++ b/src/client/common_.ml @@ -0,0 +1,4 @@ +module OTEL = Opentelemetry +module Proto = Opentelemetry_proto + +let ( let@ ) = ( @@ ) diff --git a/src/client/debug_exporter.ml b/src/client/debug_exporter.ml new file mode 100644 index 00000000..cc969902 --- /dev/null +++ b/src/client/debug_exporter.ml @@ -0,0 +1,36 @@ +open Common_ + +(** [debug exporter] behaves like [exporter], but will print signals on [stderr] + before passing them to [exporter] *) +class debug ?(out = Format.err_formatter) (exp : #OTEL.Exporter.t) : + OTEL.Exporter.t = + let open Proto in + object + method send_trace l = + Format.fprintf out "SPANS: %a@." (Format.pp_print_list Trace.pp_span) l; + exp#send_trace l + + method send_metrics l = + Format.fprintf out "METRICS: %a@." + (Format.pp_print_list Metrics.pp_metric) + l; + exp#send_metrics l + + method send_logs l = + Format.fprintf out "LOGS: %a@." + (Format.pp_print_list Logs.pp_log_record) + l; + exp#send_logs l + + method tick () = exp#tick () + + method add_on_tick_callback cb = exp#add_on_tick_callback cb + + method cleanup ~on_done () = + Format.fprintf out "CLEANUP@."; + exp#cleanup ~on_done () + end + +(** Exporter that simply debugs on [stderr] *) +let debug_only : OTEL.Exporter.t = + new debug ~out:Format.err_formatter OTEL.Exporter.dummy diff --git a/src/client/dune b/src/client/dune index 095f71fa..36f6ee5c 100644 --- a/src/client/dune +++ b/src/client/dune @@ -1,5 +1,6 @@ (library (name opentelemetry_client) (public_name opentelemetry.client) - (libraries opentelemetry pbrt mtime mtime.clock.os) - (synopsis "Common types and logic shared between client implementations")) + (libraries opentelemetry opentelemetry.proto pbrt mtime mtime.clock.os) + (synopsis + "Basic exporters, as well as Common types and logic shared between exporters")) diff --git a/src/client/signal_resource_builder.ml b/src/client/signal_resource_builder.ml new file mode 100644 index 00000000..bb20fab1 --- /dev/null +++ b/src/client/signal_resource_builder.ml @@ -0,0 +1,34 @@ +(** Group signals into [resource_xxx] objects *) + +open Common_ + +let make_resource_logs (logs : Proto.Logs.log_record list) : + Proto.Logs.resource_logs = + let attributes = OTEL.Globals.mk_attributes () in + let resource = Proto.Resource.make_resource ~attributes () in + let ll = + Proto.Logs.make_scope_logs ~scope:OTEL.Globals.instrumentation_library + ~log_records:logs () + in + Proto.Logs.make_resource_logs ~resource ~scope_logs:[ ll ] () + +let make_resource_spans ?service_name ?attrs spans : Proto.Trace.resource_spans + = + let ils = + Proto.Trace.make_scope_spans ~scope:OTEL.Globals.instrumentation_library + ~spans () + in + let attributes = OTEL.Globals.mk_attributes ?service_name ?attrs () in + let resource = Proto.Resource.make_resource ~attributes () in + Proto.Trace.make_resource_spans ~resource ~scope_spans:[ ils ] () + +(** Aggregate metrics into a {!Proto.Metrics.resource_metrics} *) +let make_resource_metrics ?service_name ?attrs (l : OTEL.Metrics.t list) : + Proto.Metrics.resource_metrics = + let open Proto.Metrics in + let lm = + make_scope_metrics ~scope:OTEL.Globals.instrumentation_library ~metrics:l () + in + let attributes = OTEL.Globals.mk_attributes ?service_name ?attrs () in + let resource = Proto.Resource.make_resource ~attributes () in + Proto.Metrics.make_resource_metrics ~scope_metrics:[ lm ] ~resource () diff --git a/src/client/stdout_exporter.ml b/src/client/stdout_exporter.ml new file mode 100644 index 00000000..ac0b0af9 --- /dev/null +++ b/src/client/stdout_exporter.ml @@ -0,0 +1,47 @@ +(** A simple exporter that prints on stdout *) + +open Common_ +open OTEL + +open struct + let pp_span out (sp : Span.t) = + Format.fprintf out + "@[<2>SPAN@ trace_id: %a@ span_id: %a@ name: %S@ start: %a@ end: %a@]@." + Trace_id.pp + (Trace_id.of_bytes sp.trace_id) + Span_id.pp + (Span_id.of_bytes sp.span_id) + sp.name Timestamp_ns.pp_debug sp.start_time_unix_nano + Timestamp_ns.pp_debug sp.end_time_unix_nano + + let pp_vlist mutex pp out l = + if l != [] then ( + let@ () = Util_mutex.protect mutex in + Format.fprintf out "@["; + List.iteri + (fun i x -> + if i > 0 then Format.fprintf out "@,"; + pp out x) + l; + Format.fprintf out "@]@." + ) +end + +class stdout : OTEL.Exporter.t = + let out = Format.std_formatter in + let mutex = Mutex.create () in + + let ticker = Tick_callbacks.create () in + object + method send_trace l = pp_vlist mutex pp_span out l + + method send_metrics l = pp_vlist mutex Proto.Metrics.pp_metric out l + + method send_logs l = pp_vlist mutex Proto.Logs.pp_log_record out l + + method tick () = Tick_callbacks.tick ticker + + method add_on_tick_callback cb = Tick_callbacks.on_tick ticker cb + + method cleanup ~on_done () = on_done () + end From 29fa482b799cfc6724445e84d488548a889686bb Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 13:29:12 -0500 Subject: [PATCH 05/94] feat integration/logs: update paths --- src/integrations/logs/opentelemetry_logs.ml | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/integrations/logs/opentelemetry_logs.ml b/src/integrations/logs/opentelemetry_logs.ml index a06cf117..51aa0818 100644 --- a/src/integrations/logs/opentelemetry_logs.ml +++ b/src/integrations/logs/opentelemetry_logs.ml @@ -11,13 +11,13 @@ module Otel = Opentelemetry (* Levels *) (*****************************************************************************) (* Convert log level to Otel severity *) -let log_level_to_severity (level : Logs.level) : Otel.Logs.severity = +let log_level_to_severity (level : Logs.level) : Otel.Log_record.severity = match level with - | Logs.App -> Otel.Logs.Severity_number_info (* like info, but less severe *) - | Logs.Info -> Otel.Logs.Severity_number_info2 - | Logs.Error -> Otel.Logs.Severity_number_error - | Logs.Warning -> Otel.Logs.Severity_number_warn - | Logs.Debug -> Otel.Logs.Severity_number_debug + | Logs.App -> Severity_number_info (* like info, but less severe *) + | Logs.Info -> Severity_number_info2 + | Logs.Error -> Severity_number_error + | Logs.Warning -> Severity_number_warn + | Logs.Debug -> Severity_number_debug (*****************************************************************************) (* Logs Util *) @@ -44,9 +44,12 @@ let log ?service_name ?(attrs = []) ?(scope = Otel.Scope.get_ambient_scope ()) Option.map (fun (scope : Otel.Scope.t) -> scope.trace_id) scope in let severity = log_level_to_severity level in - let log = Otel.Logs.make_str ~severity ~log_level ?trace_id ?span_id msg in + let log = + Otel.Log_record.make_str ~severity ~log_level ?trace_id ?span_id msg + in (* Noop if no backend is set *) - Otel.Logs.emit ?service_name ~attrs [ log ] + (* TODO: be more explicit *) + Otel.Logger.emit ?service_name ~attrs [ log ] let otel_reporter ?service_name ?(attributes = []) () : Logs.reporter = let report src level ~over k msgf = From 2752eb710d73d0c62ecffab2386f36059968bc9e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 13:59:00 -0500 Subject: [PATCH 06/94] feat: opentelemetry.util with various utilities --- src/util/alist.ml | 28 +++++++++++++++++ src/util/alist.mli | 14 +++++++++ src/util/dune | 6 ++++ src/util/interval_limiter.ml | 18 +++++++++++ src/util/interval_limiter.mli | 9 ++++++ src/util/rpool.ml | 59 +++++++++++++++++++++++++++++++++++ src/util/rpool.mli | 27 ++++++++++++++++ src/util/util_bytes_.ml | 49 +++++++++++++++++++++++++++++ src/util/util_mutex.ml | 12 +++++++ src/util/util_mutex.mli | 1 + 10 files changed, 223 insertions(+) create mode 100644 src/util/alist.ml create mode 100644 src/util/alist.mli create mode 100644 src/util/dune create mode 100644 src/util/interval_limiter.ml create mode 100644 src/util/interval_limiter.mli create mode 100644 src/util/rpool.ml create mode 100644 src/util/rpool.mli create mode 100644 src/util/util_bytes_.ml create mode 100644 src/util/util_mutex.ml create mode 100644 src/util/util_mutex.mli diff --git a/src/util/alist.ml b/src/util/alist.ml new file mode 100644 index 00000000..356f2630 --- /dev/null +++ b/src/util/alist.ml @@ -0,0 +1,28 @@ +module Atomic = Opentelemetry_atomic.Atomic + +type 'a t = 'a list Atomic.t + +let make () = Atomic.make [] + +let[@inline] is_empty self : bool = + match Atomic.get self with + | [] -> true + | _ :: _ -> false + +let get = Atomic.get + +let add self x = + while + let old = Atomic.get self in + let l' = x :: old in + not (Atomic.compare_and_set self old l') + do + () + done + +let rec pop_all self = + let l = Atomic.get self in + if Atomic.compare_and_set self l [] then + l + else + pop_all self diff --git a/src/util/alist.mli b/src/util/alist.mli new file mode 100644 index 00000000..832e3c2e --- /dev/null +++ b/src/util/alist.mli @@ -0,0 +1,14 @@ +(** Atomic list *) + +type 'a t + +val get : 'a t -> 'a list +(** Snapshot *) + +val is_empty : _ t -> bool + +val make : unit -> 'a t + +val add : 'a t -> 'a -> unit + +val pop_all : 'a t -> 'a list diff --git a/src/util/dune b/src/util/dune new file mode 100644 index 00000000..1348fe68 --- /dev/null +++ b/src/util/dune @@ -0,0 +1,6 @@ +(library + (name opentelemetry_util) + (public_name opentelemetry.util) + (flags :standard -open Opentelemetry_atomic) + (libraries opentelemetry.atomic mtime mtime.clock.os) + (synopsis "Utilities for opentelemetry")) diff --git a/src/util/interval_limiter.ml b/src/util/interval_limiter.ml new file mode 100644 index 00000000..456de0f6 --- /dev/null +++ b/src/util/interval_limiter.ml @@ -0,0 +1,18 @@ +type t = { + min_interval: Mtime.span; + last: Mtime.t Atomic.t; +} + +let create ~min_interval () : t = + { min_interval; last = Atomic.make Mtime.min_stamp } + +let make_attempt (self : t) : bool = + let now = Mtime_clock.now () in + let last = Atomic.get self.last in + let elapsed = Mtime.span last now in + if Mtime.Span.compare elapsed self.min_interval >= 0 then + (* attempts succeeds, unless another thread updated [self.last] + in the mean time *) + Atomic.compare_and_set self.last last now + else + false diff --git a/src/util/interval_limiter.mli b/src/util/interval_limiter.mli new file mode 100644 index 00000000..b07f7c68 --- /dev/null +++ b/src/util/interval_limiter.mli @@ -0,0 +1,9 @@ +type t + +val create : min_interval:Mtime.span -> unit -> t + +val make_attempt : t -> bool +(** [make_attempt lim] returns [true] if the last successful attempt was more + than [min_interval] ago, as measured by mtime. If so, this counts as the new + latest attempt; otherwise [false] is returned and the state is not updated. +*) diff --git a/src/util/rpool.ml b/src/util/rpool.ml new file mode 100644 index 00000000..833ccaef --- /dev/null +++ b/src/util/rpool.ml @@ -0,0 +1,59 @@ +module A = Atomic + +type 'a list_ = + | Nil + | Cons of int * 'a * 'a list_ + +type 'a t = { + mk_item: unit -> 'a; + clear: 'a -> unit; + max_size: int; (** Max number of items *) + items: 'a list_ A.t; +} + +let create ?(clear = ignore) ~mk_item ?(max_size = 512) () : _ t = + { mk_item; clear; max_size; items = A.make Nil } + +let rec acquire self = + match A.get self.items with + | Nil -> self.mk_item () + | Cons (_, x, tl) as l -> + if A.compare_and_set self.items l tl then + x + else + acquire self + +let[@inline] size_ = function + | Cons (sz, _, _) -> sz + | Nil -> 0 + +let release self x : unit = + let rec loop () = + match A.get self.items with + | Cons (sz, _, _) when sz >= self.max_size -> + (* forget the item *) + () + | l -> + if not (A.compare_and_set self.items l (Cons (size_ l + 1, x, l))) then + loop () + in + + self.clear x; + loop () + +let with_resource (self : _ t) f = + let x = acquire self in + try + let res = f x in + release self x; + res + with e -> + let bt = Printexc.get_raw_backtrace () in + release self x; + Printexc.raise_with_backtrace e bt + +module Raw = struct + let release = release + + let acquire = acquire +end diff --git a/src/util/rpool.mli b/src/util/rpool.mli new file mode 100644 index 00000000..4a80e115 --- /dev/null +++ b/src/util/rpool.mli @@ -0,0 +1,27 @@ +(** Simple resource pool. + + This is intended for buffers, protobuf encoders, etc. *) + +type 'a t +(** Pool of values of type ['a] *) + +val create : + ?clear:('a -> unit) -> mk_item:(unit -> 'a) -> ?max_size:int -> unit -> 'a t +(** Create a new pool. + @param mk_item produce a new item in case the pool is empty + @param max_size + maximum number of item in the pool before we start dropping resources on + the floor. This controls resource consumption. + @param clear a function called on items before recycling them. *) + +val with_resource : 'a t -> ('a -> 'b) -> 'b +(** [with_resource pool f] runs [f x] with [x] a resource; when [f] fails or + returns, [x] is returned to the pool for future reuse. *) + +(** Low level control over the pool. This is easier to get wrong (e.g. releasing + the same resource twice) so use with caution. *) +module Raw : sig + val acquire : 'a t -> 'a + + val release : 'a t -> 'a -> unit +end diff --git a/src/util/util_bytes_.ml b/src/util/util_bytes_.ml new file mode 100644 index 00000000..91e65a52 --- /dev/null +++ b/src/util/util_bytes_.ml @@ -0,0 +1,49 @@ +open struct + let spf = Printf.sprintf +end + +let int_to_hex (i : int) = + if i < 10 then + Char.chr (i + Char.code '0') + else + Char.chr (i - 10 + Char.code 'a') + +let bytes_to_hex_into b res off : unit = + for i = 0 to Bytes.length b - 1 do + let n = Char.code (Bytes.get b i) in + Bytes.set res ((2 * i) + off) (int_to_hex ((n land 0xf0) lsr 4)); + Bytes.set res ((2 * i) + 1 + off) (int_to_hex (n land 0x0f)) + done + +let bytes_to_hex (b : bytes) : string = + let res = Bytes.create (2 * Bytes.length b) in + bytes_to_hex_into b res 0; + Bytes.unsafe_to_string res + +let int_of_hex = function + | '0' .. '9' as c -> Char.code c - Char.code '0' + | 'a' .. 'f' as c -> 10 + Char.code c - Char.code 'a' + | c -> raise (Invalid_argument (spf "invalid hex char: %C" c)) + +let bytes_of_hex_substring (s : string) off len = + if len mod 2 <> 0 then + raise (Invalid_argument "hex sequence must be of even length"); + let res = Bytes.make (len / 2) '\x00' in + for i = 0 to (len / 2) - 1 do + let n1 = int_of_hex (String.get s (off + (2 * i))) in + let n2 = int_of_hex (String.get s (off + (2 * i) + 1)) in + let n = (n1 lsl 4) lor n2 in + Bytes.set res i (Char.chr n) + done; + res + +let bytes_of_hex (s : string) : bytes = + bytes_of_hex_substring s 0 (String.length s) + +let bytes_non_zero (self : bytes) : bool = + try + for i = 0 to Bytes.length self - 1 do + if Char.code (Bytes.unsafe_get self i) <> 0 then raise_notrace Exit + done; + false + with Exit -> true diff --git a/src/util/util_mutex.ml b/src/util/util_mutex.ml new file mode 100644 index 00000000..ab7e48a7 --- /dev/null +++ b/src/util/util_mutex.ml @@ -0,0 +1,12 @@ +(* Mutex.protect was added in OCaml 5.1, but we want support back to 4.08 *) +(* cannot inline, otherwise flambda might move code around. (as per Stdlib) *) +let[@inline never] protect m f = + Mutex.lock m; + match f () with + | x -> + Mutex.unlock m; + x + | exception e -> + (* NOTE: [unlock] does not poll for asynchronous exceptions *) + Mutex.unlock m; + Printexc.raise_with_backtrace e (Printexc.get_raw_backtrace ()) diff --git a/src/util/util_mutex.mli b/src/util/util_mutex.mli new file mode 100644 index 00000000..feccf59f --- /dev/null +++ b/src/util/util_mutex.mli @@ -0,0 +1 @@ +val protect : Mutex.t -> (unit -> 'a) -> 'a From 15a5243b6b6178a42434d38f5b2f2158b919f7c9 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 15:08:06 -0500 Subject: [PATCH 07/94] more utils --- src/util/cb_set.ml | 9 +++++++ src/util/cb_set.mli | 9 +++++++ src/util/rpool.ml | 59 --------------------------------------------- src/util/rpool.mli | 27 --------------------- 4 files changed, 18 insertions(+), 86 deletions(-) create mode 100644 src/util/cb_set.ml create mode 100644 src/util/cb_set.mli delete mode 100644 src/util/rpool.ml delete mode 100644 src/util/rpool.mli diff --git a/src/util/cb_set.ml b/src/util/cb_set.ml new file mode 100644 index 00000000..78190855 --- /dev/null +++ b/src/util/cb_set.ml @@ -0,0 +1,9 @@ +type cb = unit -> unit + +type t = { cbs: cb Alist.t } [@@unboxed] + +let create () : t = { cbs = Alist.make () } + +let[@inline] register self f = Alist.add self.cbs f + +let[@inline] trigger self = List.iter (fun f -> f ()) (Alist.get self.cbs) diff --git a/src/util/cb_set.mli b/src/util/cb_set.mli new file mode 100644 index 00000000..3a6e2a0c --- /dev/null +++ b/src/util/cb_set.mli @@ -0,0 +1,9 @@ +(** A collection of callbacks. thread-safe. *) + +type t + +val create : unit -> t + +val register : t -> (unit -> unit) -> unit + +val trigger : t -> unit diff --git a/src/util/rpool.ml b/src/util/rpool.ml deleted file mode 100644 index 833ccaef..00000000 --- a/src/util/rpool.ml +++ /dev/null @@ -1,59 +0,0 @@ -module A = Atomic - -type 'a list_ = - | Nil - | Cons of int * 'a * 'a list_ - -type 'a t = { - mk_item: unit -> 'a; - clear: 'a -> unit; - max_size: int; (** Max number of items *) - items: 'a list_ A.t; -} - -let create ?(clear = ignore) ~mk_item ?(max_size = 512) () : _ t = - { mk_item; clear; max_size; items = A.make Nil } - -let rec acquire self = - match A.get self.items with - | Nil -> self.mk_item () - | Cons (_, x, tl) as l -> - if A.compare_and_set self.items l tl then - x - else - acquire self - -let[@inline] size_ = function - | Cons (sz, _, _) -> sz - | Nil -> 0 - -let release self x : unit = - let rec loop () = - match A.get self.items with - | Cons (sz, _, _) when sz >= self.max_size -> - (* forget the item *) - () - | l -> - if not (A.compare_and_set self.items l (Cons (size_ l + 1, x, l))) then - loop () - in - - self.clear x; - loop () - -let with_resource (self : _ t) f = - let x = acquire self in - try - let res = f x in - release self x; - res - with e -> - let bt = Printexc.get_raw_backtrace () in - release self x; - Printexc.raise_with_backtrace e bt - -module Raw = struct - let release = release - - let acquire = acquire -end diff --git a/src/util/rpool.mli b/src/util/rpool.mli deleted file mode 100644 index 4a80e115..00000000 --- a/src/util/rpool.mli +++ /dev/null @@ -1,27 +0,0 @@ -(** Simple resource pool. - - This is intended for buffers, protobuf encoders, etc. *) - -type 'a t -(** Pool of values of type ['a] *) - -val create : - ?clear:('a -> unit) -> mk_item:(unit -> 'a) -> ?max_size:int -> unit -> 'a t -(** Create a new pool. - @param mk_item produce a new item in case the pool is empty - @param max_size - maximum number of item in the pool before we start dropping resources on - the floor. This controls resource consumption. - @param clear a function called on items before recycling them. *) - -val with_resource : 'a t -> ('a -> 'b) -> 'b -(** [with_resource pool f] runs [f x] with [x] a resource; when [f] fails or - returns, [x] is returned to the pool for future reuse. *) - -(** Low level control over the pool. This is easier to get wrong (e.g. releasing - the same resource twice) so use with caution. *) -module Raw : sig - val acquire : 'a t -> 'a - - val release : 'a t -> 'a -> unit -end From 05034d978a52bf72f02b841e2f8446f3ea6f2ed8 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 15:08:14 -0500 Subject: [PATCH 08/94] feat OTEL: move some stuff to client or util; rate limit GC metrics --- src/core/AList.ml | 28 --------------------- src/core/AList.mli | 14 ----------- src/core/dune | 3 ++- src/core/exporter.ml | 29 ++++++++++----------- src/core/gc_metrics.ml | 21 +++++++++++----- src/core/gc_metrics.mli | 10 +++++--- src/core/metrics_callbacks.ml | 8 +++--- src/core/opentelemetry.ml | 2 +- src/core/tick_callbacks.ml | 9 ------- src/core/tick_callbacks.mli | 9 ------- src/core/util_bytes_.ml | 47 ----------------------------------- src/core/util_mutex.ml | 12 --------- src/core/util_mutex.mli | 1 - 13 files changed, 44 insertions(+), 149 deletions(-) delete mode 100644 src/core/AList.ml delete mode 100644 src/core/AList.mli delete mode 100644 src/core/tick_callbacks.ml delete mode 100644 src/core/tick_callbacks.mli delete mode 100644 src/core/util_bytes_.ml delete mode 100644 src/core/util_mutex.ml delete mode 100644 src/core/util_mutex.mli diff --git a/src/core/AList.ml b/src/core/AList.ml deleted file mode 100644 index 356f2630..00000000 --- a/src/core/AList.ml +++ /dev/null @@ -1,28 +0,0 @@ -module Atomic = Opentelemetry_atomic.Atomic - -type 'a t = 'a list Atomic.t - -let make () = Atomic.make [] - -let[@inline] is_empty self : bool = - match Atomic.get self with - | [] -> true - | _ :: _ -> false - -let get = Atomic.get - -let add self x = - while - let old = Atomic.get self in - let l' = x :: old in - not (Atomic.compare_and_set self old l') - do - () - done - -let rec pop_all self = - let l = Atomic.get self in - if Atomic.compare_and_set self l [] then - l - else - pop_all self diff --git a/src/core/AList.mli b/src/core/AList.mli deleted file mode 100644 index 832e3c2e..00000000 --- a/src/core/AList.mli +++ /dev/null @@ -1,14 +0,0 @@ -(** Atomic list *) - -type 'a t - -val get : 'a t -> 'a list -(** Snapshot *) - -val is_empty : _ t -> bool - -val make : unit -> 'a t - -val add : 'a t -> 'a -> unit - -val pop_all : 'a t -> 'a list diff --git a/src/core/dune b/src/core/dune index 248e3aff..debbd5c0 100644 --- a/src/core/dune +++ b/src/core/dune @@ -1,9 +1,10 @@ (library (name opentelemetry) (synopsis "API for opentelemetry instrumentation") - (flags :standard -warn-error -a+8) + (flags :standard -warn-error -a+8 -open Opentelemetry_util) (libraries opentelemetry.proto + opentelemetry.util opentelemetry.ambient-context ptime ptime.clock.os diff --git a/src/core/exporter.ml b/src/core/exporter.ml index 7ccdc823..7c08f588 100644 --- a/src/core/exporter.ml +++ b/src/core/exporter.ml @@ -39,7 +39,7 @@ end (** Dummy exporter, does nothing *) let dummy : t = - let ticker = Tick_callbacks.create () in + let tick_cbs = Cb_set.create () in object method send_trace = ignore @@ -47,9 +47,9 @@ let dummy : t = method send_logs = ignore - method tick () = Tick_callbacks.tick ticker + method tick () = Cb_set.trigger tick_cbs - method add_on_tick_callback cb = Tick_callbacks.on_tick ticker cb + method add_on_tick_callback cb = Cb_set.register tick_cbs cb method cleanup ~on_done () = on_done () end @@ -78,14 +78,15 @@ module Main_exporter = struct (* hidden *) open struct (* a list of callbacks automatically added to the main exporter *) - let on_tick_cbs_ = AList.make () + let on_tick_cbs_ = Alist.make () let exporter : t option Atomic.t = Atomic.make None end (** Set the global exporter *) - let set (exp : t) : unit = - List.iter exp#add_on_tick_callback (AList.get on_tick_cbs_); + let set (exp : #t) : unit = + let exp = (exp :> t) in + List.iter exp#add_on_tick_callback (Alist.get on_tick_cbs_); Atomic.set exporter (Some exp) (** Remove current exporter, if any. @@ -104,25 +105,25 @@ module Main_exporter = struct let[@inline] get () : t option = Atomic.get exporter let add_on_tick_callback f = - AList.add on_tick_cbs_ f; + Alist.add on_tick_cbs_ f; Option.iter (fun exp -> exp#add_on_tick_callback f) (get ()) end -let set_backend = Main_exporter.set [@@deprecated "use `Main_exporter.set`"] +let (set_backend [@deprecated "use `Main_exporter.set`"]) = Main_exporter.set -let remove_backend = Main_exporter.remove -[@@deprecated "use `Main_exporter.remove`"] +let (remove_backend [@deprecated "use `Main_exporter.remove`"]) = + Main_exporter.remove -let has_backend = Main_exporter.present -[@@deprecated "use `Main_exporter.present`"] +let (has_backend [@deprecated "use `Main_exporter.present`"]) = + Main_exporter.present -let get_backend = Main_exporter.get [@@deprecated "use `Main_exporter.ge"] +let (get_backend [@deprecated "use `Main_exporter.ge"]) = Main_exporter.get let with_setup_debug_backend ?(on_done = ignore) (exp : #t) ?(enable = true) () f = let exp = (exp :> t) in if enable then ( - set_backend exp; + Main_exporter.set exp; Fun.protect ~finally:(fun () -> cleanup exp ~on_done) f ) else f () diff --git a/src/core/gc_metrics.ml b/src/core/gc_metrics.ml index c48b6734..e565eb86 100644 --- a/src/core/gc_metrics.ml +++ b/src/core/gc_metrics.ml @@ -6,6 +6,8 @@ open struct let[@inline] word_to_bytes n = n * bytes_per_word let[@inline] word_to_bytes_f n = n *. float bytes_per_word + + let default_interval_s = 20 end let get_metrics () : Metrics.t list = @@ -34,16 +36,23 @@ let get_metrics () : Metrics.t list = [ int ~now gc.Gc.compactions ]; ] -let setup (exp : #Exporter.t) = +let setup ?(min_interval_s = default_interval_s) (exp : #Exporter.t) = + (* limit rate *) + let min_interval_s = max 5 min_interval_s in + let min_interval = Mtime.Span.(min_interval_s * s) in + let limiter = Interval_limiter.create ~min_interval () in + let on_tick () = - let m = get_metrics () in - exp#send_metrics m + if Interval_limiter.make_attempt limiter then ( + let m = get_metrics () in + exp#send_metrics m + ) in Exporter.on_tick exp on_tick -let setup_on_main_exporter () = +let setup_on_main_exporter ?min_interval_s () = match Exporter.Main_exporter.get () with | None -> () - | Some exp -> setup exp + | Some exp -> setup ?min_interval_s exp -let basic_setup = setup_on_main_exporter +let basic_setup () = setup_on_main_exporter () diff --git a/src/core/gc_metrics.mli b/src/core/gc_metrics.mli index 817967d2..e0c01883 100644 --- a/src/core/gc_metrics.mli +++ b/src/core/gc_metrics.mli @@ -5,13 +5,17 @@ val get_metrics : unit -> Metrics.t list (** Get a few metrics from the current state of the GC. *) -val setup : #Exporter.t -> unit +val setup : ?min_interval_s:int -> #Exporter.t -> unit (** Setup a hook that will emit GC statistics on every tick. It does assume that [tick] is called regularly on the exporter. For example, if we ensure the exporter's [tick] function is called every 5s, we'll get GC metrics every - 5s. *) + 5s. -val setup_on_main_exporter : unit -> unit + @param min_interval_s + if provided, GC metrics will be emitted at most every [min_interval_s] + seconds. This prevents flooding. Default value is 20s. *) + +val setup_on_main_exporter : ?min_interval_s:int -> unit -> unit (** Setup the hook on the main exporter. *) val basic_setup : unit -> unit [@@deprecated "use setup_on_main_exporter"] diff --git a/src/core/metrics_callbacks.ml b/src/core/metrics_callbacks.ml index c404c0bc..4fd78c4a 100644 --- a/src/core/metrics_callbacks.ml +++ b/src/core/metrics_callbacks.ml @@ -1,10 +1,10 @@ open Common_ -type t = { cbs: (unit -> Metrics.t list) AList.t } [@@unboxed] +type t = { cbs: (unit -> Metrics.t list) Alist.t } [@@unboxed] -let create () : t = { cbs = AList.make () } +let create () : t = { cbs = Alist.make () } -let[@inline] add_metrics_cb (self : t) f = AList.add self.cbs f +let[@inline] add_metrics_cb (self : t) f = Alist.add self.cbs f let add_to_exporter (exp : #Exporter.t) (self : t) = let on_tick () = @@ -14,7 +14,7 @@ let add_to_exporter (exp : #Exporter.t) (self : t) = (fun f -> let f_metrics = f () in res := List.rev_append f_metrics !res) - (AList.get self.cbs); + (Alist.get self.cbs); let metrics = !res in (* emit the metrics *) diff --git a/src/core/opentelemetry.ml b/src/core/opentelemetry.ml index b5f518c0..ec84d0e4 100644 --- a/src/core/opentelemetry.ml +++ b/src/core/opentelemetry.ml @@ -5,7 +5,7 @@ open Common_ module Rand_bytes = Rand_bytes (** Generation of random identifiers. *) -module AList = AList +module Alist = Alist (** Atomic list, for internal usage @since 0.7 *) diff --git a/src/core/tick_callbacks.ml b/src/core/tick_callbacks.ml deleted file mode 100644 index d81a3a0f..00000000 --- a/src/core/tick_callbacks.ml +++ /dev/null @@ -1,9 +0,0 @@ -type cb = unit -> unit - -type t = { cbs: cb AList.t } [@@unboxed] - -let create () : t = { cbs = AList.make () } - -let[@inline] on_tick self f = AList.add self.cbs f - -let[@inline] tick self = List.iter (fun f -> f ()) (AList.get self.cbs) diff --git a/src/core/tick_callbacks.mli b/src/core/tick_callbacks.mli deleted file mode 100644 index ad7ff5bb..00000000 --- a/src/core/tick_callbacks.mli +++ /dev/null @@ -1,9 +0,0 @@ -(** A collection of callbacks that are regularly called. *) - -type t - -val create : unit -> t - -val on_tick : t -> (unit -> unit) -> unit - -val tick : t -> unit diff --git a/src/core/util_bytes_.ml b/src/core/util_bytes_.ml deleted file mode 100644 index 9326991a..00000000 --- a/src/core/util_bytes_.ml +++ /dev/null @@ -1,47 +0,0 @@ -open Common_ - -let int_to_hex (i : int) = - if i < 10 then - Char.chr (i + Char.code '0') - else - Char.chr (i - 10 + Char.code 'a') - -let bytes_to_hex_into b res off : unit = - for i = 0 to Bytes.length b - 1 do - let n = Char.code (Bytes.get b i) in - Bytes.set res ((2 * i) + off) (int_to_hex ((n land 0xf0) lsr 4)); - Bytes.set res ((2 * i) + 1 + off) (int_to_hex (n land 0x0f)) - done - -let bytes_to_hex (b : bytes) : string = - let res = Bytes.create (2 * Bytes.length b) in - bytes_to_hex_into b res 0; - Bytes.unsafe_to_string res - -let int_of_hex = function - | '0' .. '9' as c -> Char.code c - Char.code '0' - | 'a' .. 'f' as c -> 10 + Char.code c - Char.code 'a' - | c -> raise (Invalid_argument (spf "invalid hex char: %C" c)) - -let bytes_of_hex_substring (s : string) off len = - if len mod 2 <> 0 then - raise (Invalid_argument "hex sequence must be of even length"); - let res = Bytes.make (len / 2) '\x00' in - for i = 0 to (len / 2) - 1 do - let n1 = int_of_hex (String.get s (off + (2 * i))) in - let n2 = int_of_hex (String.get s (off + (2 * i) + 1)) in - let n = (n1 lsl 4) lor n2 in - Bytes.set res i (Char.chr n) - done; - res - -let bytes_of_hex (s : string) : bytes = - bytes_of_hex_substring s 0 (String.length s) - -let bytes_non_zero (self : bytes) : bool = - try - for i = 0 to Bytes.length self - 1 do - if Char.code (Bytes.unsafe_get self i) <> 0 then raise_notrace Exit - done; - false - with Exit -> true diff --git a/src/core/util_mutex.ml b/src/core/util_mutex.ml deleted file mode 100644 index ab7e48a7..00000000 --- a/src/core/util_mutex.ml +++ /dev/null @@ -1,12 +0,0 @@ -(* Mutex.protect was added in OCaml 5.1, but we want support back to 4.08 *) -(* cannot inline, otherwise flambda might move code around. (as per Stdlib) *) -let[@inline never] protect m f = - Mutex.lock m; - match f () with - | x -> - Mutex.unlock m; - x - | exception e -> - (* NOTE: [unlock] does not poll for asynchronous exceptions *) - Mutex.unlock m; - Printexc.raise_with_backtrace e (Printexc.get_raw_backtrace ()) diff --git a/src/core/util_mutex.mli b/src/core/util_mutex.mli deleted file mode 100644 index feccf59f..00000000 --- a/src/core/util_mutex.mli +++ /dev/null @@ -1 +0,0 @@ -val protect : Mutex.t -> (unit -> 'a) -> 'a From 5c3c03cdd87057510f837f599bdef6351d69d71d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 15:08:40 -0500 Subject: [PATCH 09/94] feat client: various changes --- src/client/batch.ml | 2 + src/client/batch.mli | 3 + src/client/client.ml | 8 --- src/client/{config.ml => client_config.ml} | 0 src/client/{config.mli => client_config.mli} | 0 src/client/common_.ml | 2 + src/client/rpool.ml | 59 +++++++++++++++++++ src/client/rpool.mli | 27 +++++++++ src/client/self_trace.ml | 3 +- src/client/stdout_exporter.ml | 7 ++- src/client/sync_queue.ml | 59 +++++++++++++++++++ src/client/sync_queue.mli | 24 ++++++++ ..._resource_builder.ml => util_resources.ml} | 0 13 files changed, 182 insertions(+), 12 deletions(-) delete mode 100644 src/client/client.ml rename src/client/{config.ml => client_config.ml} (100%) rename src/client/{config.mli => client_config.mli} (100%) create mode 100644 src/client/rpool.ml create mode 100644 src/client/rpool.mli create mode 100644 src/client/sync_queue.ml create mode 100644 src/client/sync_queue.mli rename src/client/{signal_resource_builder.ml => util_resources.ml} (100%) diff --git a/src/client/batch.ml b/src/client/batch.ml index e508c09f..1fc4aaa9 100644 --- a/src/client/batch.ml +++ b/src/client/batch.ml @@ -85,3 +85,5 @@ let push (self : _ t) elems : [ `Dropped | `Ok ] = push_unprotected self ~elems; `Ok ) + +let[@inline] push' self elems = ignore (push self elems : [ `Dropped | `Ok ]) diff --git a/src/client/batch.mli b/src/client/batch.mli index a7ed2aa9..c3b6f7e1 100644 --- a/src/client/batch.mli +++ b/src/client/batch.mli @@ -50,3 +50,6 @@ val push : 'a t -> 'a list -> [ `Dropped | `Ok ] (** [push b xs] is [`Ok] if it succeeds in pushing the values in [xs] into the batch [b], or [`Dropped] if the current size of the batch has exceeded the high water mark determined by the [batch] argument to [{!make}]. ) *) + +val push' : 'a t -> 'a list -> unit +(** Like {!push} but ignores the result *) diff --git a/src/client/client.ml b/src/client/client.ml deleted file mode 100644 index fa69c983..00000000 --- a/src/client/client.ml +++ /dev/null @@ -1,8 +0,0 @@ -(** Utilities for writing clients - - These are used for implementing e.g., the [opentelemetry-client-cohttp-lwt] - and [opentelemetry-client-ocurl] packages package. *) - -module Config = Config -module Signal = Signal -module Self_trace = Self_trace diff --git a/src/client/config.ml b/src/client/client_config.ml similarity index 100% rename from src/client/config.ml rename to src/client/client_config.ml diff --git a/src/client/config.mli b/src/client/client_config.mli similarity index 100% rename from src/client/config.mli rename to src/client/client_config.mli diff --git a/src/client/common_.ml b/src/client/common_.ml index 9ee9cf28..b1872cd8 100644 --- a/src/client/common_.ml +++ b/src/client/common_.ml @@ -1,4 +1,6 @@ module OTEL = Opentelemetry module Proto = Opentelemetry_proto +let spf = Printf.sprintf + let ( let@ ) = ( @@ ) diff --git a/src/client/rpool.ml b/src/client/rpool.ml new file mode 100644 index 00000000..833ccaef --- /dev/null +++ b/src/client/rpool.ml @@ -0,0 +1,59 @@ +module A = Atomic + +type 'a list_ = + | Nil + | Cons of int * 'a * 'a list_ + +type 'a t = { + mk_item: unit -> 'a; + clear: 'a -> unit; + max_size: int; (** Max number of items *) + items: 'a list_ A.t; +} + +let create ?(clear = ignore) ~mk_item ?(max_size = 512) () : _ t = + { mk_item; clear; max_size; items = A.make Nil } + +let rec acquire self = + match A.get self.items with + | Nil -> self.mk_item () + | Cons (_, x, tl) as l -> + if A.compare_and_set self.items l tl then + x + else + acquire self + +let[@inline] size_ = function + | Cons (sz, _, _) -> sz + | Nil -> 0 + +let release self x : unit = + let rec loop () = + match A.get self.items with + | Cons (sz, _, _) when sz >= self.max_size -> + (* forget the item *) + () + | l -> + if not (A.compare_and_set self.items l (Cons (size_ l + 1, x, l))) then + loop () + in + + self.clear x; + loop () + +let with_resource (self : _ t) f = + let x = acquire self in + try + let res = f x in + release self x; + res + with e -> + let bt = Printexc.get_raw_backtrace () in + release self x; + Printexc.raise_with_backtrace e bt + +module Raw = struct + let release = release + + let acquire = acquire +end diff --git a/src/client/rpool.mli b/src/client/rpool.mli new file mode 100644 index 00000000..4a80e115 --- /dev/null +++ b/src/client/rpool.mli @@ -0,0 +1,27 @@ +(** Simple resource pool. + + This is intended for buffers, protobuf encoders, etc. *) + +type 'a t +(** Pool of values of type ['a] *) + +val create : + ?clear:('a -> unit) -> mk_item:(unit -> 'a) -> ?max_size:int -> unit -> 'a t +(** Create a new pool. + @param mk_item produce a new item in case the pool is empty + @param max_size + maximum number of item in the pool before we start dropping resources on + the floor. This controls resource consumption. + @param clear a function called on items before recycling them. *) + +val with_resource : 'a t -> ('a -> 'b) -> 'b +(** [with_resource pool f] runs [f x] with [x] a resource; when [f] fails or + returns, [x] is returned to the pool for future reuse. *) + +(** Low level control over the pool. This is easier to get wrong (e.g. releasing + the same resource twice) so use with caution. *) +module Raw : sig + val acquire : 'a t -> 'a + + val release : 'a t -> 'a -> unit +end diff --git a/src/client/self_trace.ml b/src/client/self_trace.ml index 46757302..52d8b2f4 100644 --- a/src/client/self_trace.ml +++ b/src/client/self_trace.ml @@ -8,9 +8,10 @@ let dummy_trace_id_ = OT.Trace_id.dummy let dummy_span_id = OT.Span_id.dummy +(* FIXME: get an explicit tracer instead *) let with_ ?kind ?attrs name f = if Atomic.get enabled then - OT.Trace.with_ ?kind ?attrs name f + OT.Tracer.with_ ?kind ?attrs name f else ( (* A new scope is needed here because it might be modified *) let scope = diff --git a/src/client/stdout_exporter.ml b/src/client/stdout_exporter.ml index ac0b0af9..05122369 100644 --- a/src/client/stdout_exporter.ml +++ b/src/client/stdout_exporter.ml @@ -28,10 +28,11 @@ open struct end class stdout : OTEL.Exporter.t = + let open Opentelemetry_util in let out = Format.std_formatter in let mutex = Mutex.create () in - let ticker = Tick_callbacks.create () in + let tick_cbs = Cb_set.create () in object method send_trace l = pp_vlist mutex pp_span out l @@ -39,9 +40,9 @@ class stdout : OTEL.Exporter.t = method send_logs l = pp_vlist mutex Proto.Logs.pp_log_record out l - method tick () = Tick_callbacks.tick ticker + method tick () = Cb_set.trigger tick_cbs - method add_on_tick_callback cb = Tick_callbacks.on_tick ticker cb + method add_on_tick_callback cb = Cb_set.register tick_cbs cb method cleanup ~on_done () = on_done () end diff --git a/src/client/sync_queue.ml b/src/client/sync_queue.ml new file mode 100644 index 00000000..10983b2f --- /dev/null +++ b/src/client/sync_queue.ml @@ -0,0 +1,59 @@ +module UM = Opentelemetry.Util_mutex + +type 'a t = { + mutex: Mutex.t; + cond: Condition.t; + q: 'a Queue.t; + mutable closed: bool; +} + +exception Closed + +let create () : _ t = + { + mutex = Mutex.create (); + cond = Condition.create (); + q = Queue.create (); + closed = false; + } + +let close (self : _ t) = + UM.protect self.mutex @@ fun () -> + if not self.closed then ( + self.closed <- true; + Condition.broadcast self.cond (* awake waiters so they fail *) + ) + +let push (self : _ t) x : unit = + UM.protect self.mutex @@ fun () -> + if self.closed then + raise Closed + else ( + Queue.push x self.q; + Condition.signal self.cond + ) + +let pop (self : 'a t) : 'a = + let rec loop () = + if self.closed then + raise Closed + else if Queue.is_empty self.q then ( + Condition.wait self.cond self.mutex; + (loop [@tailcall]) () + ) else ( + let x = Queue.pop self.q in + x + ) + in + UM.protect self.mutex loop + +let pop_all (self : 'a t) into : unit = + let rec loop () = + if Queue.is_empty self.q then ( + if self.closed then raise Closed; + Condition.wait self.cond self.mutex; + (loop [@tailcall]) () + ) else + Queue.transfer self.q into + in + UM.protect self.mutex loop diff --git a/src/client/sync_queue.mli b/src/client/sync_queue.mli new file mode 100644 index 00000000..d64296d7 --- /dev/null +++ b/src/client/sync_queue.mli @@ -0,0 +1,24 @@ +(** Simple blocking queue *) + +type 'a t + +val create : unit -> _ t + +exception Closed + +val push : 'a t -> 'a -> unit +(** [push q x] pushes [x] into [q], and returns [()]. + @raise Closed if [close q] was previously called.*) + +val pop : 'a t -> 'a +(** [pop q] pops the next element in [q]. It might block until an element comes. + @raise Closed if the queue was closed before a new element was available. *) + +val pop_all : 'a t -> 'a Queue.t -> unit +(** [pop_all q into] pops all the elements of [q] and moves them into [into]. if + no element is available, it will block until it successfully transfers at + least one item to [into]. + @raise Closed if the queue was closed before a new element was available. *) + +val close : _ t -> unit +(** Close the queue, meaning there won't be any more [push] allowed. *) diff --git a/src/client/signal_resource_builder.ml b/src/client/util_resources.ml similarity index 100% rename from src/client/signal_resource_builder.ml rename to src/client/util_resources.ml From 7020148d44e792cb8b584b31b4aaccd7b3925d8c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 15:09:10 -0500 Subject: [PATCH 10/94] feat client-ocurl: use common batch and queue; remove layer of queueing now we modify batches on the fly when we send signals; but there still is a thread pool to send signals via HTTP. --- src/client-ocurl/b_queue.ml | 59 --- src/client-ocurl/b_queue.mli | 23 - src/client-ocurl/batch.ml | 24 -- src/client-ocurl/batch.mli | 14 - src/client-ocurl/common_.ml | 4 +- src/client-ocurl/config.ml | 9 +- src/client-ocurl/config.mli | 6 +- .../opentelemetry_client_ocurl.ml | 401 ++++++------------ .../opentelemetry_client_ocurl.mli | 11 +- 9 files changed, 141 insertions(+), 410 deletions(-) delete mode 100644 src/client-ocurl/b_queue.ml delete mode 100644 src/client-ocurl/b_queue.mli delete mode 100644 src/client-ocurl/batch.ml delete mode 100644 src/client-ocurl/batch.mli diff --git a/src/client-ocurl/b_queue.ml b/src/client-ocurl/b_queue.ml deleted file mode 100644 index 98f43876..00000000 --- a/src/client-ocurl/b_queue.ml +++ /dev/null @@ -1,59 +0,0 @@ -open Opentelemetry.Util_mutex - -type 'a t = { - mutex: Mutex.t; - cond: Condition.t; - q: 'a Queue.t; - mutable closed: bool; -} - -exception Closed - -let create () : _ t = - { - mutex = Mutex.create (); - cond = Condition.create (); - q = Queue.create (); - closed = false; - } - -let close (self : _ t) = - protect self.mutex @@ fun () -> - if not self.closed then ( - self.closed <- true; - Condition.broadcast self.cond (* awake waiters so they fail *) - ) - -let push (self : _ t) x : unit = - protect self.mutex @@ fun () -> - if self.closed then - raise Closed - else ( - Queue.push x self.q; - Condition.signal self.cond - ) - -let pop (self : 'a t) : 'a = - let rec loop () = - if self.closed then - raise Closed - else if Queue.is_empty self.q then ( - Condition.wait self.cond self.mutex; - (loop [@tailcall]) () - ) else ( - let x = Queue.pop self.q in - x - ) - in - protect self.mutex loop - -let pop_all (self : 'a t) into : unit = - let rec loop () = - if Queue.is_empty self.q then ( - if self.closed then raise Closed; - Condition.wait self.cond self.mutex; - (loop [@tailcall]) () - ) else - Queue.transfer self.q into - in - protect self.mutex loop diff --git a/src/client-ocurl/b_queue.mli b/src/client-ocurl/b_queue.mli deleted file mode 100644 index d020dfb3..00000000 --- a/src/client-ocurl/b_queue.mli +++ /dev/null @@ -1,23 +0,0 @@ -(** Basic Blocking Queue *) - -type 'a t - -val create : unit -> _ t - -exception Closed - -val push : 'a t -> 'a -> unit -(** [push q x] pushes [x] into [q], and returns [()]. - @raise Closed if [close q] was previously called.*) - -val pop : 'a t -> 'a -(** [pop q] pops the next element in [q]. It might block until an element comes. - @raise Closed if the queue was closed before a new element was available. *) - -val pop_all : 'a t -> 'a Queue.t -> unit -(** [pop_all q into] pops all the elements of [q] and moves them into [into]. It - might block until an element comes. - @raise Closed if the queue was closed before a new element was available. *) - -val close : _ t -> unit -(** Close the queue, meaning there won't be any more [push] allowed. *) diff --git a/src/client-ocurl/batch.ml b/src/client-ocurl/batch.ml deleted file mode 100644 index 0be8b1b0..00000000 --- a/src/client-ocurl/batch.ml +++ /dev/null @@ -1,24 +0,0 @@ -type 'a t = { - mutable len: int; - mutable l: 'a list list; - mutable started: Mtime.t; -} - -let create () = { len = 0; l = []; started = Mtime_clock.now () } - -let push self l = - if l != [] then ( - if self.l == [] then self.started <- Mtime_clock.now (); - self.l <- l :: self.l; - self.len <- self.len + List.length l - ) - -let[@inline] len self = self.len - -let[@inline] time_started self = self.started - -let pop_all self = - let l = self.l in - self.l <- []; - self.len <- 0; - l diff --git a/src/client-ocurl/batch.mli b/src/client-ocurl/batch.mli deleted file mode 100644 index 2b867b88..00000000 --- a/src/client-ocurl/batch.mli +++ /dev/null @@ -1,14 +0,0 @@ -(** List of lists with length *) - -type 'a t - -val create : unit -> 'a t - -val push : 'a t -> 'a list -> unit - -val len : _ t -> int - -val time_started : _ t -> Mtime.t -(** Time at which the batch most recently became non-empty *) - -val pop_all : 'a t -> 'a list list diff --git a/src/client-ocurl/common_.ml b/src/client-ocurl/common_.ml index 10df0c1d..1ec6de25 100644 --- a/src/client-ocurl/common_.ml +++ b/src/client-ocurl/common_.ml @@ -1,8 +1,8 @@ module Atomic = Opentelemetry_atomic.Atomic -include Opentelemetry.Lock +module Proto = Opentelemetry_proto let spf = Printf.sprintf let ( let@ ) = ( @@ ) -let tid () = Thread.id @@ Thread.self () +let[@inline] tid () = Thread.id @@ Thread.self () diff --git a/src/client-ocurl/config.ml b/src/client-ocurl/config.ml index 0954fbe6..e06ebf7e 100644 --- a/src/client-ocurl/config.ml +++ b/src/client-ocurl/config.ml @@ -1,3 +1,5 @@ +open Opentelemetry_client + type t = { bg_threads: int; (** Are there background threads, and how many? Default [4]. This will be @@ -10,7 +12,7 @@ type t = { [ticker_thread] is [true]. This will be clamped between [2 ms] and some longer interval (maximum [60s] currently). Default 500. @since 0.7 *) - common: Opentelemetry_client.Config.t; + common: Client_config.t; (** Common configuration options @since 0.12*) } @@ -20,10 +22,9 @@ let pp out self = Format.fprintf out "{@[ bg_threads=%d;@ ticker_thread=%B;@ ticker_interval_ms=%d;@ common=%a \ @]}" - bg_threads ticker_thread ticker_interval_ms Opentelemetry_client.Config.pp - common + bg_threads ticker_thread ticker_interval_ms Client_config.pp common -module Env = Opentelemetry_client.Config.Env () +module Env = Client_config.Env () let make = Env.make diff --git a/src/client-ocurl/config.mli b/src/client-ocurl/config.mli index 514ecb3e..7726de12 100644 --- a/src/client-ocurl/config.mli +++ b/src/client-ocurl/config.mli @@ -12,7 +12,7 @@ type t = { [ticker_thread] is [true]. This will be clamped between [2 ms] and some longer interval (maximum [60s] currently). Default 500. @since 0.7 *) - common: Opentelemetry_client.Config.t; + common: Opentelemetry_client.Client_config.t; (** Common configuration options @since 0.12*) } @@ -29,7 +29,7 @@ val make : ?ticker_interval_ms:int -> unit -> t) - Opentelemetry_client.Config.make + Opentelemetry_client.Client_config.make (** Make a configuration {!t}. *) -module Env : Opentelemetry_client.Config.ENV +module Env : Opentelemetry_client.Client_config.ENV diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index a58f78b7..507bc845 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -3,70 +3,27 @@ https://github.com/open-telemetry/oteps/blob/main/text/0099-otlp-http.md *) +open Opentelemetry_client +open Common_ module OT = Opentelemetry module Config = Config -module Self_trace = Opentelemetry_client.Self_trace -module Signal = Opentelemetry_client.Signal -open Opentelemetry -include Common_ let get_headers = Config.Env.get_headers let set_headers = Config.Env.set_headers -let needs_gc_metrics = Atomic.make false - -let last_gc_metrics = Atomic.make (Mtime_clock.now ()) - -let timeout_gc_metrics = Mtime.Span.(20 * s) - -(** side channel for GC, appended to metrics batch data *) -let gc_metrics = AList.make () - -(** capture current GC metrics if {!needs_gc_metrics} is true or it has been a - long time since the last GC metrics collection, and push them into - {!gc_metrics} for later collection *) -let sample_gc_metrics_if_needed () = - let now = Mtime_clock.now () in - let alarm = Atomic.exchange needs_gc_metrics false in - let timeout () = - let elapsed = Mtime.span now (Atomic.get last_gc_metrics) in - Mtime.Span.compare elapsed timeout_gc_metrics > 0 - in - if alarm || timeout () then ( - Atomic.set last_gc_metrics now; - let l = - OT.Metrics.make_resource_metrics - ~attrs:(Opentelemetry.GC_metrics.get_runtime_attributes ()) - @@ Opentelemetry.GC_metrics.get_metrics () - in - AList.add gc_metrics l - ) - let n_errors = Atomic.make 0 let n_dropped = Atomic.make 0 -(** Something sent to the collector *) -module Event = struct - open Opentelemetry.Proto - - type t = - | E_metric of Metrics.resource_metrics list - | E_trace of Trace.resource_spans list - | E_logs of Logs.resource_logs list - | E_tick - | E_flush_all (** Flush all batches *) -end - (** Something to be sent via HTTP *) module To_send = struct open Opentelemetry.Proto type t = - | Send_metric of Metrics.resource_metrics list list - | Send_trace of Trace.resource_spans list list - | Send_logs of Logs.resource_logs list list + | Send_metric of Metrics.resource_metrics list + | Send_trace of Trace.resource_spans list + | Send_logs of Logs.resource_logs list end (** start a thread in the background, running [f()] *) @@ -110,32 +67,50 @@ let str_to_hex (s : string) : string = done; Bytes.unsafe_to_string res -module Backend_impl : sig - type t +module Exporter_impl : sig + val n_bytes_sent : int Atomic.t + + class type t = object + inherit OT.Exporter.t + + method shutdown : on_done:(unit -> unit) -> unit -> unit + end val create : stop:bool Atomic.t -> config:Config.t -> unit -> t - val send_event : t -> Event.t -> unit - - val n_bytes_sent : unit -> int - val shutdown : t -> on_done:(unit -> unit) -> unit end = struct open Opentelemetry.Proto - type t = { + let n_bytes_sent : int Atomic.t = Atomic.make 0 + + class type t = object + inherit OT.Exporter.t + + method shutdown : on_done:(unit -> unit) -> unit -> unit + end + + type state = { stop: bool Atomic.t; cleaned: bool Atomic.t; (** True when we cleaned up after closing *) config: Config.t; - q: Event.t B_queue.t; (** Queue to receive data from the user's code *) - mutable main_th: Thread.t option; (** Thread that listens on [q] *) - send_q: To_send.t B_queue.t; (** Queue for the send worker threads *) + send_q: To_send.t Sync_queue.t; (** Queue for the send worker threads *) + traces: Proto.Trace.span Batch.t; + logs: Proto.Logs.log_record Batch.t; + metrics: Proto.Metrics.metric Batch.t; mutable send_threads: Thread.t array; (** Threads that send data via http *) } + let send_batch_ (self : state) ~force ~mk_to_send (b : _ Batch.t) : unit = + match Batch.pop_if_ready ~force ~now:(Mtime_clock.now ()) b with + | None -> () + | Some l -> + let to_send = mk_to_send l in + Sync_queue.push self.send_q to_send + let send_http_ ~stop ~(config : Config.t) (client : Curl.t) ~url data : unit = let@ _sc = - Self_trace.with_ ~kind:Span.Span_kind_producer "otel-ocurl.send-http" + Self_trace.with_ ~kind:Span_kind_producer "otel-ocurl.send-http" in if Config.Env.get_debug () then @@ -146,7 +121,7 @@ end = struct in match let@ _sc = - Self_trace.with_ ~kind:Span.Span_kind_internal "curl.post" + Self_trace.with_ ~kind:Span_kind_internal "curl.post" ~attrs:[ "sz", `Int (String.length data); "url", `String url ] in Ezcurl.post ~headers ~client ~params:[] ~url ~content:(`String data) () @@ -187,31 +162,24 @@ end = struct (* avoid crazy error loop *) Thread.delay 3. - let[@inline] send_event (self : t) ev : unit = B_queue.push self.q ev - - let n_bytes_sent_ = Atomic.make 0 - - let[@inline] n_bytes_sent () = Atomic.get n_bytes_sent_ - (** Thread that, in a loop, reads from [q] to get the next message to send via http *) - let bg_thread_loop (self : t) : unit = + let bg_thread_loop (self : state) : unit = Ezcurl.with_client ?set_opts:None @@ fun client -> let config = self.config in let stop = self.stop in - let send ~name ~url ~conv signals = - let l = List.fold_left (fun acc l -> List.rev_append l acc) [] signals in + let send ~name ~url ~conv (signals : _ list) = let@ _sp = Self_trace.with_ ~kind:Span_kind_producer name - ~attrs:[ "n", `Int (List.length l) ] + ~attrs:[ "n", `Int (List.length signals) ] in - let msg = conv l in - ignore (Atomic.fetch_and_add n_bytes_sent_ (String.length msg) : int); + let msg = conv signals in + ignore (Atomic.fetch_and_add n_bytes_sent (String.length msg) : int); send_http_ ~stop ~config ~url client msg in try while not (Atomic.get stop) do - let msg = B_queue.pop self.send_q in + let msg = Sync_queue.pop self.send_q in match msg with | To_send.Send_trace tr -> send ~name:"send-traces" ~conv:Signal.Encode.traces @@ -223,252 +191,135 @@ end = struct send ~name:"send-logs" ~conv:Signal.Encode.logs ~url:config.common.url_logs logs done - with B_queue.Closed -> () - - type batches = { - traces: Proto.Trace.resource_spans Batch.t; - logs: Proto.Logs.resource_logs Batch.t; - metrics: Proto.Metrics.resource_metrics Batch.t; - } + with Sync_queue.Closed -> () let batch_max_size_ = 200 - let should_send_batch_ ?(side = []) ~config ~now (b : _ Batch.t) : bool = - (Batch.len b > 0 || side != []) - && (Batch.len b >= batch_max_size_ - || - let timeout = Mtime.Span.(config.Config.common.batch_timeout_ms * ms) in - let elapsed = Mtime.span now (Batch.time_started b) in - Mtime.Span.compare elapsed timeout >= 0) + let batch_timeout_ = Mtime.Span.(20 * s) - let main_thread_loop (self : t) : unit = - let local_q = Queue.create () in - let config = self.config in - - (* keep track of batches *) - let batches = - { - traces = Batch.create (); - logs = Batch.create (); - metrics = Batch.create (); - } - in - - let send_metrics () = - let metrics = AList.pop_all gc_metrics :: Batch.pop_all batches.metrics in - B_queue.push self.send_q (To_send.Send_metric metrics) - in - - let send_logs () = - B_queue.push self.send_q (To_send.Send_logs (Batch.pop_all batches.logs)) - in - - let send_traces () = - B_queue.push self.send_q - (To_send.Send_trace (Batch.pop_all batches.traces)) - in - - try - while not (Atomic.get self.stop) do - (* read multiple events at once *) - B_queue.pop_all self.q local_q; - - (* are we asked to flush all events? *) - let must_flush_all = ref false in - - (* how to process a single event *) - let process_ev (ev : Event.t) : unit = - match ev with - | Event.E_metric m -> Batch.push batches.metrics m - | Event.E_trace tr -> Batch.push batches.traces tr - | Event.E_logs logs -> Batch.push batches.logs logs - | Event.E_tick -> - (* the only impact of "tick" is that it wakes us up regularly *) - () - | Event.E_flush_all -> must_flush_all := true - in - - Queue.iter process_ev local_q; - Queue.clear local_q; - - if !must_flush_all then ( - if Batch.len batches.metrics > 0 || not (AList.is_empty gc_metrics) - then - send_metrics (); - if Batch.len batches.logs > 0 then send_logs (); - if Batch.len batches.traces > 0 then send_traces () - ) else ( - let now = Mtime_clock.now () in - if - should_send_batch_ ~config ~now batches.metrics - ~side:(AList.get gc_metrics) - then - send_metrics (); - - if should_send_batch_ ~config ~now batches.traces then send_traces (); - if should_send_batch_ ~config ~now batches.logs then send_logs () - ) - done - with B_queue.Closed -> () - - let create ~stop ~config () : t = + let create_state ~stop ~config () : state = let n_send_threads = max 2 config.Config.bg_threads in let self = { stop; config; - q = B_queue.create (); send_threads = [||]; - send_q = B_queue.create (); + send_q = Sync_queue.create (); cleaned = Atomic.make false; - main_th = None; + traces = Batch.make ~batch:batch_max_size_ ~timeout:batch_timeout_ (); + logs = Batch.make ~batch:batch_max_size_ ~timeout:batch_timeout_ (); + metrics = Batch.make ~batch:batch_max_size_ ~timeout:batch_timeout_ (); } in - let main_th = start_bg_thread (fun () -> main_thread_loop self) in - self.main_th <- Some main_th; - self.send_threads <- Array.init n_send_threads (fun _i -> start_bg_thread (fun () -> bg_thread_loop self)); self - let shutdown self ~on_done : unit = - Atomic.set self.stop true; - if not (Atomic.exchange self.cleaned true) then ( - (* empty batches *) - send_event self Event.E_flush_all; - (* close the incoming queue, wait for the thread to finish - before we start cutting off the background threads, so that they - have time to receive the final batches *) - B_queue.close self.q; - Option.iter Thread.join self.main_th; - (* close send queues, then wait for all threads *) - B_queue.close self.send_q; - Array.iter Thread.join self.send_threads - ); - on_done () + let maybe_send_metrics ~force (self : state) = + send_batch_ self ~force self.metrics ~mk_to_send:(fun metrics -> + let metrics = + Opentelemetry_client.Util_resources.make_resource_metrics metrics + in + To_send.Send_metric [ metrics ]) + + let maybe_send_logs ~force (self : state) = + send_batch_ self ~force self.logs ~mk_to_send:(fun logs -> + let logs = + Opentelemetry_client.Util_resources.make_resource_logs logs + in + To_send.Send_logs [ logs ]) + + let maybe_send_traces ~force (self : state) = + send_batch_ self ~force self.traces ~mk_to_send:(fun spans -> + let traces = + Opentelemetry_client.Util_resources.make_resource_spans spans + in + To_send.Send_trace [ traces ]) + + let create ~stop ~config () : #t = + let open Opentelemetry_util in + let st = create_state ~stop ~config () in + let ticker = Cb_set.create () in + object (self : #t) + method send_trace spans = + Batch.push' st.traces spans; + maybe_send_traces st ~force:false + + method send_metrics m = + Batch.push' st.metrics m; + maybe_send_metrics st ~force:false + + method send_logs m = + Batch.push' st.logs m; + maybe_send_logs st ~force:false + + method add_on_tick_callback cb = Cb_set.register ticker cb + + method tick () = Cb_set.trigger ticker + + method cleanup ~on_done () : unit = + if not (Atomic.exchange st.cleaned true) then ( + (* flush all signals *) + maybe_send_logs ~force:true st; + maybe_send_metrics ~force:true st; + maybe_send_traces ~force:true st; + + (* close send queues, then wait for all threads *) + Sync_queue.close st.send_q; + Array.iter Thread.join st.send_threads + ); + on_done () + + method shutdown ~on_done () = + Atomic.set st.stop true; + self#cleanup ~on_done () + end + + let shutdown (self : #t) ~on_done : unit = self#shutdown ~on_done () end -let create_backend ?(stop = Atomic.make false) - ?(config : Config.t = Config.make ()) () : (module Collector.BACKEND) = - let module M = struct - open Opentelemetry.Proto - open Opentelemetry.Collector - - let backend = Backend_impl.create ~stop ~config () - - let send_trace : Trace.resource_spans list sender = - { - send = - (fun l ~ret -> - Backend_impl.send_event backend (Event.E_trace l); - ret ()); - } - - let last_sent_metrics = Atomic.make (Mtime_clock.now ()) - - (* send metrics from time to time *) - let timeout_sent_metrics = Mtime.Span.(5 * s) - - let signal_emit_gc_metrics () = - if config.common.debug then - Printf.eprintf "opentelemetry: emit GC metrics requested\n%!"; - Atomic.set needs_gc_metrics true - - let additional_metrics () : Metrics.resource_metrics list = - (* add exporter metrics to the lot? *) - let last_emit = Atomic.get last_sent_metrics in - let now = Mtime_clock.now () in - let add_own_metrics = - let elapsed = Mtime.span last_emit now in - Mtime.Span.compare elapsed timeout_sent_metrics > 0 - in - - (* there is a possible race condition here, as several threads might update - metrics at the same time. But that's harmless. *) - if add_own_metrics then ( - Atomic.set last_sent_metrics now; - let open OT.Metrics in - let now_unix = OT.Timestamp_ns.now_unix_ns () in - [ - make_resource_metrics - [ - sum ~name:"otel.export.dropped" ~is_monotonic:true - [ - int ~start_time_unix_nano:now_unix ~now:now_unix - (Atomic.get n_dropped); - ]; - sum ~name:"otel.export.errors" ~is_monotonic:true - [ - int ~start_time_unix_nano:now_unix ~now:now_unix - (Atomic.get n_errors); - ]; - ]; - ] - ) else - [] - - let send_metrics : Metrics.resource_metrics list sender = - { - send = - (fun m ~ret -> - let m = List.rev_append (additional_metrics ()) m in - Backend_impl.send_event backend (Event.E_metric m); - ret ()); - } - - let send_logs : Logs.resource_logs list sender = - { - send = - (fun m ~ret -> - Backend_impl.send_event backend (Event.E_logs m); - ret ()); - } - - let on_tick_cbs_ = Atomic.make (AList.make ()) - - let set_on_tick_callbacks = Atomic.set on_tick_cbs_ - - let tick () = - sample_gc_metrics_if_needed (); - Backend_impl.send_event backend Event.E_tick; - List.iter (fun f -> f ()) (AList.get @@ Atomic.get on_tick_cbs_) - - let cleanup ~on_done () = Backend_impl.shutdown backend ~on_done - end in - (module M) +let create_exporter ?(stop = Atomic.make false) + ?(config : Config.t = Config.make ()) () : #OT.Exporter.t = + let backend = Exporter_impl.create ~stop ~config () in + (backend :> OT.Exporter.t) (** thread that calls [tick()] regularly, to help enforce timeouts *) -let setup_ticker_thread ~stop ~sleep_ms (module B : Collector.BACKEND) () = +let setup_ticker_thread ~stop ~sleep_ms (exp : #OT.Exporter.t) () = let sleep_s = float sleep_ms /. 1000. in let tick_loop () = try while not @@ Atomic.get stop do Thread.delay sleep_s; - B.tick () + exp#tick () done - with B_queue.Closed -> () + with + | Sync_queue.Closed -> () + | exn -> + (* print and ignore *) + Printf.eprintf "otel-ocurl: ticker thread: uncaught exn:\n%s\n%!" + (Printexc.to_string exn) in start_bg_thread tick_loop let setup_ ?(stop = Atomic.make false) ?(config : Config.t = Config.make ()) () : unit = - let backend = create_backend ~stop ~config () in - Opentelemetry.Collector.set_backend backend; + let exporter = Exporter_impl.create ~stop ~config () in + OT.Exporter.Main_exporter.set exporter; Self_trace.set_enabled config.common.self_trace; if config.ticker_thread then ( (* at most a minute *) let sleep_ms = min 60_000 (max 2 config.ticker_interval_ms) in - ignore (setup_ticker_thread ~stop ~sleep_ms backend () : Thread.t) + ignore (setup_ticker_thread ~stop ~sleep_ms exporter () : Thread.t) ) let remove_backend () : unit = (* we don't need the callback, this runs in the same thread *) - OT.Collector.remove_backend () ~on_done:ignore + OT.Exporter.Main_exporter.remove () ~on_done:ignore let setup ?stop ?config ?(enable = true) () = if enable then setup_ ?stop ?config () @@ -480,4 +331,4 @@ let with_setup ?stop ?config ?(enable = true) () f = ) else f () -let n_bytes_sent = Backend_impl.n_bytes_sent +let[@inline] n_bytes_sent () = Atomic.get Exporter_impl.n_bytes_sent diff --git a/src/client-ocurl/opentelemetry_client_ocurl.mli b/src/client-ocurl/opentelemetry_client_ocurl.mli index 6d3918dc..77b8ea34 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.mli +++ b/src/client-ocurl/opentelemetry_client_ocurl.mli @@ -3,22 +3,21 @@ https://opentelemetry.io/docs/reference/specification/protocol/exporter/ *) +open Opentelemetry_atomic +open Opentelemetry_util + val get_headers : unit -> (string * string) list val set_headers : (string * string) list -> unit (** Set http headers that are sent on every http query to the collector. *) -module Atomic = Opentelemetry_atomic.Atomic module Config = Config val n_bytes_sent : unit -> int (** Global counter of bytes sent (or attempted to be sent) *) -val create_backend : - ?stop:bool Atomic.t -> - ?config:Config.t -> - unit -> - (module Opentelemetry.Collector.BACKEND) +val create_exporter : + ?stop:bool Atomic.t -> ?config:Config.t -> unit -> Opentelemetry.Exporter.t val setup : ?stop:bool Atomic.t -> ?config:Config.t -> ?enable:bool -> unit -> unit From 15cda06ea0ab2c66e2e941b98c48ed84864ba469 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 15:36:10 -0500 Subject: [PATCH 11/94] feat: use a pbrt encoder pool in client-ocurl --- .../opentelemetry_client_ocurl.ml | 27 ++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index 507bc845..3e9be01e 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -94,6 +94,7 @@ end = struct stop: bool Atomic.t; cleaned: bool Atomic.t; (** True when we cleaned up after closing *) config: Config.t; + encoder_pool: Pbrt.Encoder.t Rpool.t; send_q: To_send.t Sync_queue.t; (** Queue for the send worker threads *) traces: Proto.Trace.span Batch.t; logs: Proto.Logs.log_record Batch.t; @@ -173,7 +174,11 @@ end = struct Self_trace.with_ ~kind:Span_kind_producer name ~attrs:[ "n", `Int (List.length signals) ] in - let msg = conv signals in + let msg : string = + (* borrow encoder from buffer pool and turn [signals] into bytes *) + let@ encoder = Rpool.with_resource self.encoder_pool in + conv ?encoder:(Some encoder) signals + in ignore (Atomic.fetch_and_add n_bytes_sent (String.length msg) : int); send_http_ ~stop ~config ~url client msg in @@ -199,12 +204,19 @@ end = struct let create_state ~stop ~config () : state = let n_send_threads = max 2 config.Config.bg_threads in + let encoder_pool = + Rpool.create + ~mk_item:(fun () -> Pbrt.Encoder.create ~size:1024 ()) + ~max_size:32 ~clear:Pbrt.Encoder.reset () + in + let self = { stop; config; send_threads = [||]; send_q = Sync_queue.create (); + encoder_pool; cleaned = Atomic.make false; traces = Batch.make ~batch:batch_max_size_ ~timeout:batch_timeout_ (); logs = Batch.make ~batch:batch_max_size_ ~timeout:batch_timeout_ (); @@ -239,21 +251,28 @@ end = struct in To_send.Send_trace [ traces ]) + let[@inline] push_to_batch b e = + if e <> [] then ( + match Batch.push b e with + | `Ok -> () + | `Dropped -> Atomic.incr n_dropped + ) + let create ~stop ~config () : #t = let open Opentelemetry_util in let st = create_state ~stop ~config () in let ticker = Cb_set.create () in object (self : #t) method send_trace spans = - Batch.push' st.traces spans; + push_to_batch st.traces spans; maybe_send_traces st ~force:false method send_metrics m = - Batch.push' st.metrics m; + push_to_batch st.metrics m; maybe_send_metrics st ~force:false method send_logs m = - Batch.push' st.logs m; + push_to_batch st.logs m; maybe_send_logs st ~force:false method add_on_tick_callback cb = Cb_set.register ticker cb From d856e1d33d355e34ab2f74f98fb4e50dff629aa6 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 15:36:29 -0500 Subject: [PATCH 12/94] fix client-ocurl-lwt to use the new exporter interface --- src/client-ocurl-lwt/config.ml | 6 +- src/client-ocurl-lwt/config.mli | 6 +- .../opentelemetry_client_ocurl_lwt.ml | 498 +++++++----------- .../opentelemetry_client_ocurl_lwt.mli | 5 +- 4 files changed, 196 insertions(+), 319 deletions(-) diff --git a/src/client-ocurl-lwt/config.ml b/src/client-ocurl-lwt/config.ml index 930881ff..4f3677de 100644 --- a/src/client-ocurl-lwt/config.ml +++ b/src/client-ocurl-lwt/config.ml @@ -1,7 +1,7 @@ -type t = Opentelemetry_client.Config.t +type t = Opentelemetry_client.Client_config.t -module Env = Opentelemetry_client.Config.Env () +module Env = Opentelemetry_client.Client_config.Env () -let pp = Opentelemetry_client.Config.pp +let pp = Opentelemetry_client.Client_config.pp let make = Env.make (fun common () -> common) diff --git a/src/client-ocurl-lwt/config.mli b/src/client-ocurl-lwt/config.mli index 100bb696..dff28732 100644 --- a/src/client-ocurl-lwt/config.mli +++ b/src/client-ocurl-lwt/config.mli @@ -1,4 +1,4 @@ -type t = Opentelemetry_client.Config.t +type t = Opentelemetry_client.Client_config.t (** Configuration. To build one, use {!make} below. This might be extended with more fields in @@ -6,7 +6,7 @@ type t = Opentelemetry_client.Config.t val pp : Format.formatter -> t -> unit -val make : (unit -> t) Opentelemetry_client.Config.make +val make : (unit -> t) Opentelemetry_client.Client_config.make (** Make a configuration {!t}. *) -module Env : Opentelemetry_client.Config.ENV +module Env : Opentelemetry_client.Client_config.ENV diff --git a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml index 8502db39..0041e2af 100644 --- a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml +++ b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml @@ -4,10 +4,9 @@ *) module OT = Opentelemetry -module Config = Config -module Signal = Opentelemetry_client.Signal -module Batch = Opentelemetry_client.Batch open Opentelemetry +open Opentelemetry_util +open Opentelemetry_client open Common_ let set_headers = Config.Env.set_headers @@ -18,35 +17,6 @@ external reraise : exn -> 'a = "%reraise" (** This is equivalent to [Lwt.reraise]. We inline it here so we don't force to use Lwt's latest version *) -let needs_gc_metrics = Atomic.make false - -let last_gc_metrics = Atomic.make (Mtime_clock.now ()) - -let timeout_gc_metrics = Mtime.Span.(20 * s) - -let gc_metrics = ref [] -(* side channel for GC, appended to {!E_metrics}'s data *) - -(* capture current GC metrics if {!needs_gc_metrics} is true, - or it has been a long time since the last GC metrics collection, - and push them into {!gc_metrics} for later collection *) -let sample_gc_metrics_if_needed () = - let now = Mtime_clock.now () in - let alarm = Atomic.compare_and_set needs_gc_metrics true false in - let timeout () = - let elapsed = Mtime.span now (Atomic.get last_gc_metrics) in - Mtime.Span.compare elapsed timeout_gc_metrics > 0 - in - if alarm || timeout () then ( - Atomic.set last_gc_metrics now; - let l = - OT.Metrics.make_resource_metrics - ~attrs:(Opentelemetry.GC_metrics.get_runtime_attributes ()) - @@ Opentelemetry.GC_metrics.get_metrics () - in - gc_metrics := l :: !gc_metrics - ) - type error = [ `Status of int * Opentelemetry.Proto.Status.status | `Failure of string @@ -61,8 +31,14 @@ let report_err_ = function | `Sysbreak -> Printf.eprintf "opentelemetry: ctrl-c captured, stopping\n%!" | `Failure msg -> Format.eprintf "@[<2>opentelemetry: export failed: %s@]@." msg - | `Status (code, { Opentelemetry.Proto.Status.code = scode; message; details }) - -> + | `Status + ( code, + { + Opentelemetry.Proto.Status.code = scode; + message; + details; + _presence = _; + } ) -> let pp_details out l = List.iter (fun s -> Format.fprintf out "%S;@ " (Bytes.unsafe_to_string s)) @@ -98,6 +74,9 @@ end = struct let cleanup _self = () + (* FIXME: absolutely need some rate limiting somewhere, ideally as early + as possible so we can measure how many resources we drop *) + (* send the content to the remote endpoint/path *) let send (_self : t) ~url ~decode (bod : string) : ('a, error) result Lwt.t = let* r = @@ -156,315 +135,214 @@ end = struct Lwt.return r end -(** An emitter. This is used by {!Backend} below to forward traces/metrics/… - from the program to whatever collector client we have. *) -module type EMITTER = sig - open Opentelemetry.Proto +module Exporter_impl = struct + open Lwt.Syntax - val push_trace : Trace.resource_spans list -> unit + let[@inline] push_to_batch b e = + if e <> [] then ( + match Batch.push b e with + | `Ok -> () + | `Dropped -> Atomic.incr n_dropped + ) - val push_metrics : Metrics.resource_metrics list -> unit + type state = { + stop: bool Atomic.t; + cleaned: bool Atomic.t; (** True when we cleaned up after closing *) + config: Config.t; + encoder_pool: Pbrt.Encoder.t Rpool.t; + traces: Proto.Trace.span Batch.t; + logs: Proto.Logs.log_record Batch.t; + metrics: Proto.Metrics.metric Batch.t; + } - val push_logs : Logs.resource_logs list -> unit + let send_http_ (st : state) (httpc : Httpc.t) ~url data : unit Lwt.t = + let* r = Httpc.send httpc ~url ~decode:(`Ret ()) data in + match r with + | Ok () -> Lwt.return () + | Error `Sysbreak -> + Printf.eprintf "ctrl-c captured, stopping\n%!"; + Atomic.set st.stop true; + Lwt.return () + | Error err -> + (* TODO: log error _via_ otel? *) + Atomic.incr n_errors; + report_err_ err; + (* avoid crazy error loop *) + Lwt_unix.sleep 3. - val set_on_tick_callbacks : (unit -> unit) AList.t -> unit + let send_metrics_http (st : state) client + (l : Proto.Metrics.resource_metrics list) = + let msg = + let@ encoder = Rpool.with_resource st.encoder_pool in + Signal.Encode.metrics ~encoder l + in + send_http_ st client msg ~url:st.config.url_metrics - val tick : unit -> unit + let send_traces_http st client (l : Proto.Trace.resource_spans list) = + let msg = + let@ encoder = Rpool.with_resource st.encoder_pool in + Signal.Encode.traces ~encoder l + in + send_http_ st client msg ~url:st.config.url_traces - val cleanup : on_done:(unit -> unit) -> unit -> unit -end + let send_logs_http st client (l : Proto.Logs.resource_logs list) = + let msg = + let@ encoder = Rpool.with_resource st.encoder_pool in + Signal.Encode.logs ~encoder l + in + send_http_ st client msg ~url:st.config.url_logs -(* make an emitter. + (* emit metrics, if the batch is full or timeout lapsed *) + let emit_metrics_maybe (st : state) ~now ?force httpc : bool Lwt.t = + match Batch.pop_if_ready ?force ~now st.metrics with + | None -> Lwt.return false + | Some l -> + let res = Util_resources.make_resource_metrics l in + let+ () = send_metrics_http st httpc [ res ] in + true + + let emit_traces_maybe st ~now ?force httpc : bool Lwt.t = + match Batch.pop_if_ready ?force ~now st.traces with + | None -> Lwt.return false + | Some l -> + let res = Util_resources.make_resource_spans l in + let+ () = send_traces_http st httpc [ res ] in + true + + let emit_logs_maybe st ~now ?force httpc : bool Lwt.t = + match Batch.pop_if_ready ?force ~now st.logs with + | None -> Lwt.return false + | Some l -> + let res = Util_resources.make_resource_logs l in + let+ () = send_logs_http st httpc [ res ] in + true + + let emit_all_force st (httpc : Httpc.t) : unit Lwt.t = + let now = Mtime_clock.now () in + let+ (_ : bool) = emit_traces_maybe st ~now ~force:true httpc + and+ (_ : bool) = emit_logs_maybe st ~now ~force:true httpc + and+ (_ : bool) = emit_metrics_maybe st ~now ~force:true httpc in + () + + let[@inline] guard_exn_ where f = + try f () + with e -> + let bt = Printexc.get_backtrace () in + Printf.eprintf + "opentelemetry-ocurl-lwt: uncaught exception in %s: %s\n%s\n%!" where + (Printexc.to_string e) bt + + (* Lwt task that calls [tick()] regularly, to help enforce timeouts *) + let setup_ticker_ st ~tick ~finally () = + let rec tick_loop () = + if Atomic.get st.stop then ( + finally (); + Lwt.return () + ) else + let* () = Lwt_unix.sleep 0.5 in + let* () = tick () in + tick_loop () + in + Lwt.async tick_loop + + (* make an emitter. exceptions inside should be caught, see https://opentelemetry.io/docs/reference/specification/error-handling/ *) -let mk_emitter ~stop ~(config : Config.t) () : (module EMITTER) = - let open Proto in - let open Lwt.Syntax in - (* local helpers *) - let open struct + let create ~stop ~(config : Config.t) () : OT.Exporter.t = + let open Proto in + let encoder_pool = + Rpool.create + ~mk_item:(fun () -> Pbrt.Encoder.create ~size:1024 ()) + ~max_size:32 ~clear:Pbrt.Encoder.reset () + in + + (* local helpers *) let timeout = if config.batch_timeout_ms > 0 then Some Mtime.Span.(config.batch_timeout_ms * ms) else None + in - let batch_traces : Trace.resource_spans Batch.t = - Batch.make ?batch:config.batch_traces ?timeout () + let st = + { + stop; + config; + cleaned = Atomic.make false; + encoder_pool; + traces = Batch.make ?batch:config.batch_traces ?timeout (); + metrics = Batch.make ?batch:config.batch_metrics ?timeout (); + logs = Batch.make ?batch:config.batch_logs ?timeout (); + } + in + let httpc = Httpc.create () in + let ticker = Cb_set.create () in - let batch_metrics : Metrics.resource_metrics Batch.t = - Batch.make ?batch:config.batch_metrics ?timeout () - - let batch_logs : Logs.resource_logs Batch.t = - Batch.make ?batch:config.batch_logs ?timeout () - - let on_tick_cbs_ = Atomic.make (AList.make ()) - - let set_on_tick_callbacks = Atomic.set on_tick_cbs_ - - let send_http_ (httpc : Httpc.t) ~url data : unit Lwt.t = - let* r = Httpc.send httpc ~url ~decode:(`Ret ()) data in - match r with - | Ok () -> Lwt.return () - | Error `Sysbreak -> - Printf.eprintf "ctrl-c captured, stopping\n%!"; - Atomic.set stop true; - Lwt.return () - | Error err -> - (* TODO: log error _via_ otel? *) - Atomic.incr n_errors; - report_err_ err; - (* avoid crazy error loop *) - Lwt_unix.sleep 3. - - let send_metrics_http client (l : Metrics.resource_metrics list) = - Signal.Encode.metrics l |> send_http_ client ~url:config.url_metrics - - let send_traces_http client (l : Trace.resource_spans list) = - Signal.Encode.traces l |> send_http_ client ~url:config.url_traces - - let send_logs_http client (l : Logs.resource_logs list) = - Signal.Encode.logs l |> send_http_ client ~url:config.url_logs - - (* emit metrics, if the batch is full or timeout lapsed *) - let emit_metrics_maybe ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now batch_metrics with - | None -> Lwt.return false - | Some l -> - let batch = !gc_metrics @ l in - gc_metrics := []; - let+ () = send_metrics_http httpc batch in - true - - let emit_traces_maybe ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now batch_traces with - | None -> Lwt.return false - | Some l -> - let+ () = send_traces_http httpc l in - true - - let emit_logs_maybe ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now batch_logs with - | None -> Lwt.return false - | Some l -> - let+ () = send_logs_http httpc l in - true - - let[@inline] guard_exn_ where f = - try f () - with e -> - let bt = Printexc.get_backtrace () in - Printf.eprintf - "opentelemetry-ocurl-lwt: uncaught exception in %s: %s\n%s\n%!" where - (Printexc.to_string e) bt - - let emit_all_force (httpc : Httpc.t) : unit Lwt.t = + let tick_ () = + if Config.Env.get_debug () then + Printf.eprintf "tick (from %d)\n%!" (Thread.id @@ Thread.self ()); + Cb_set.trigger ticker; let now = Mtime_clock.now () in - let+ (_ : bool) = emit_traces_maybe ~now ~force:true httpc - and+ (_ : bool) = emit_logs_maybe ~now ~force:true httpc - and+ (_ : bool) = emit_metrics_maybe ~now ~force:true httpc in + let+ (_ : bool) = emit_traces_maybe st ~now httpc + and+ (_ : bool) = emit_logs_maybe st ~now httpc + and+ (_ : bool) = emit_metrics_maybe st ~now httpc in () + in - (* thread that calls [tick()] regularly, to help enforce timeouts *) - let setup_ticker_thread ~tick ~finally () = - let rec tick_thread () = - if Atomic.get stop then ( - finally (); - Lwt.return () - ) else - let* () = Lwt_unix.sleep 0.5 in - let* () = tick () in - tick_thread () - in - Lwt.async tick_thread - end in - let httpc = Httpc.create () in + setup_ticker_ st ~tick:tick_ ~finally:ignore (); - let module M = struct (* we make sure that this is thread-safe, even though we don't have a background thread. There can still be a ticker thread, and there can also be several user threads that produce spans and call the emit functions. *) + object + method send_trace e = + let@ () = guard_exn_ "push trace" in + push_to_batch st.traces e; + let now = Mtime_clock.now () in + Lwt.async (fun () -> + let+ (_ : bool) = emit_traces_maybe st ~now httpc in + ()) - let push_to_batch b e = - match Batch.push b e with - | `Ok -> () - | `Dropped -> Atomic.incr n_dropped + method send_metrics e = + let@ () = guard_exn_ "push metrics" in + push_to_batch st.metrics e; + let now = Mtime_clock.now () in + Lwt.async (fun () -> + let+ (_ : bool) = emit_metrics_maybe st ~now httpc in + ()) - let push_trace e = - let@ () = guard_exn_ "push trace" in - push_to_batch batch_traces e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_traces_maybe ~now httpc in - ()) + method send_logs e = + let@ () = guard_exn_ "push logs" in + push_to_batch st.logs e; + let now = Mtime_clock.now () in + Lwt.async (fun () -> + let+ (_ : bool) = emit_logs_maybe st ~now httpc in + ()) - let push_metrics e = - let@ () = guard_exn_ "push metrics" in - sample_gc_metrics_if_needed (); - push_to_batch batch_metrics e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_metrics_maybe ~now httpc in - ()) + method add_on_tick_callback f = Cb_set.register ticker f - let push_logs e = - let@ () = guard_exn_ "push logs" in - push_to_batch batch_logs e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_logs_maybe ~now httpc in - ()) + (* if called in a blocking context: work in the background *) + method tick () = Lwt.async tick_ - let set_on_tick_callbacks = set_on_tick_callbacks - - let tick_ () = - if Config.Env.get_debug () then - Printf.eprintf "tick (from %d)\n%!" (tid ()); - sample_gc_metrics_if_needed (); - List.iter - (fun f -> - try f () - with e -> - Printf.eprintf "on tick callback raised: %s\n" - (Printexc.to_string e)) - (AList.get @@ Atomic.get on_tick_cbs_); - let now = Mtime_clock.now () in - let+ (_ : bool) = emit_traces_maybe ~now httpc - and+ (_ : bool) = emit_logs_maybe ~now httpc - and+ (_ : bool) = emit_metrics_maybe ~now httpc in - () - - let () = setup_ticker_thread ~tick:tick_ ~finally:ignore () - - (* if called in a blocking context: work in the background *) - let tick () = Lwt.async tick_ - - let cleanup ~on_done () = - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: exiting…\n%!"; - Lwt.async (fun () -> - let* () = emit_all_force httpc in - Httpc.cleanup httpc; - on_done (); - Lwt.return ()) - end in - (module M) - -module Backend - (Arg : sig - val stop : bool Atomic.t - - val config : Config.t - end) - () : Opentelemetry.Collector.BACKEND = struct - include (val mk_emitter ~stop:Arg.stop ~config:Arg.config ()) - - open Opentelemetry.Proto - open Opentelemetry.Collector - - let send_trace : Trace.resource_spans list sender = - { - send = - (fun l ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send spans %a@." - (Format.pp_print_list Trace.pp_resource_spans) - l); - push_trace l; - ret ()); - } - - let last_sent_metrics = Atomic.make (Mtime_clock.now ()) - - let timeout_sent_metrics = Mtime.Span.(5 * s) - (* send metrics from time to time *) - - let signal_emit_gc_metrics () = - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: emit GC metrics requested\n%!"; - Atomic.set needs_gc_metrics true - - let additional_metrics () : Metrics.resource_metrics list = - (* add exporter metrics to the lot? *) - let last_emit = Atomic.get last_sent_metrics in - let now = Mtime_clock.now () in - let add_own_metrics = - let elapsed = Mtime.span last_emit now in - Mtime.Span.compare elapsed timeout_sent_metrics > 0 - in - - (* there is a possible race condition here, as several threads might update - metrics at the same time. But that's harmless. *) - if add_own_metrics then ( - Atomic.set last_sent_metrics now; - let open OT.Metrics in - [ - make_resource_metrics - [ - sum ~name:"otel.export.dropped" ~is_monotonic:true - [ - int - ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) - ~now:(Mtime.to_uint64_ns now) (Atomic.get n_dropped); - ]; - sum ~name:"otel.export.errors" ~is_monotonic:true - [ - int - ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) - ~now:(Mtime.to_uint64_ns now) (Atomic.get n_errors); - ]; - ]; - ] - ) else - [] - - let send_metrics : Metrics.resource_metrics list sender = - { - send = - (fun m ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send metrics %a@." - (Format.pp_print_list Metrics.pp_resource_metrics) - m); - - let m = List.rev_append (additional_metrics ()) m in - push_metrics m; - ret ()); - } - - let send_logs : Logs.resource_logs list sender = - { - send = - (fun m ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send logs %a@." - (Format.pp_print_list Logs.pp_resource_logs) - m); - - push_logs m; - ret ()); - } + method cleanup ~on_done () = + if Config.Env.get_debug () then + Printf.eprintf "opentelemetry: exiting…\n%!"; + Lwt.async (fun () -> + let* () = emit_all_force st httpc in + Httpc.cleanup httpc; + on_done (); + Lwt.return ()) + end end let create_backend ?(stop = Atomic.make false) ?(config = Config.make ()) () = - let module B = - Backend - (struct - let stop = stop - - let config = config - end) - () - in - (module B : OT.Collector.BACKEND) + Exporter_impl.create ~stop ~config () let setup_ ?stop ?config () : unit = - let backend = create_backend ?stop ?config () in - OT.Collector.set_backend backend; + let exp = create_backend ?stop ?config () in + OT.Exporter.Main_exporter.set exp; () let setup ?stop ?config ?(enable = true) () = @@ -472,7 +350,9 @@ let setup ?stop ?config ?(enable = true) () = let remove_backend () : unit Lwt.t = let done_fut, done_u = Lwt.wait () in - OT.Collector.remove_backend ~on_done:(fun () -> Lwt.wakeup_later done_u ()) (); + OT.Exporter.Main_exporter.remove + ~on_done:(fun () -> Lwt.wakeup_later done_u ()) + (); done_fut let with_setup ?stop ?(config = Config.make ()) ?(enable = true) () f : _ Lwt.t diff --git a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.mli b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.mli index b20d37d3..0e02d495 100644 --- a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.mli +++ b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.mli @@ -13,10 +13,7 @@ val set_headers : (string * string) list -> unit module Config = Config val create_backend : - ?stop:bool Atomic.t -> - ?config:Config.t -> - unit -> - (module Opentelemetry.Collector.BACKEND) + ?stop:bool Atomic.t -> ?config:Config.t -> unit -> Opentelemetry.Exporter.t (** Create a new backend using lwt and ezcurl-lwt *) val setup : From 9eaf31ec9003f8670df597d2ebbde03722fd1c6c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 16:06:20 -0500 Subject: [PATCH 13/94] refator core OTEL: remove Scope, directly use `Span` as builder now that fields are mutable, it's cheaper and easier --- src/core/key_value.ml | 3 + src/core/opentelemetry.ml | 9 --- src/core/scope.ml | 131 -------------------------------------- src/core/scope.mli | 89 -------------------------- src/core/span.ml | 95 +++++++++++++++++++++++++-- src/core/span.mli | 79 ++++++++++++++++++++--- src/core/tracer.ml | 8 +-- src/core/value.ml | 8 +++ 8 files changed, 175 insertions(+), 247 deletions(-) delete mode 100644 src/core/scope.ml delete mode 100644 src/core/scope.mli diff --git a/src/core/key_value.ml b/src/core/key_value.ml index 6760c340..36ee087c 100644 --- a/src/core/key_value.ml +++ b/src/core/key_value.ml @@ -6,3 +6,6 @@ let conv (k, v) = let open Proto.Common in let value = Value.conv v in make_key_value ~key:k ?value () + +let of_otel (kv : Proto.Common.key_value) : t = + kv.key, Value.of_otel_opt kv.value diff --git a/src/core/opentelemetry.ml b/src/core/opentelemetry.ml index ec84d0e4..73e34600 100644 --- a/src/core/opentelemetry.ml +++ b/src/core/opentelemetry.ml @@ -1,7 +1,5 @@ (** Opentelemetry types and instrumentation *) -open Common_ - module Rand_bytes = Rand_bytes (** Generation of random identifiers. *) @@ -29,9 +27,6 @@ module Timestamp_ns = Timestamp_ns module Exporter = Exporter module Collector = Exporter [@@deprecated "Use 'Exporter' instead"] -module Tick_callbacks = Tick_callbacks -(** Helper to implement part of the exporter *) - (** {2 Identifiers} *) module Trace_id = Trace_id @@ -63,10 +58,6 @@ module Span_link = Span_link module Span_status = Span_status module Span_kind = Span_kind -(** {2 Scopes} *) - -module Scope = Scope - (** {2 Traces} *) module Span = Span diff --git a/src/core/scope.ml b/src/core/scope.ml deleted file mode 100644 index aa5cb19a..00000000 --- a/src/core/scope.ml +++ /dev/null @@ -1,131 +0,0 @@ -open Common_ - -type item_list = - | Nil - | Ev of Event.t * item_list - | Attr of Key_value.t * item_list - | Span_link of Span_link.t * item_list - | Span_status of Span_status.t * item_list - | Span_kind of Span_kind.t * item_list - -type t = { - trace_id: Trace_id.t; - span_id: Span_id.t; - mutable items: item_list; -} - -let attrs scope = - let rec loop acc = function - | Nil -> acc - | Attr (attr, l) -> loop (attr :: acc) l - | Ev (_, l) | Span_kind (_, l) | Span_link (_, l) | Span_status (_, l) -> - loop acc l - in - loop [] scope.items - -let events scope = - let rec loop acc = function - | Nil -> acc - | Ev (event, l) -> loop (event :: acc) l - | Attr (_, l) | Span_kind (_, l) | Span_link (_, l) | Span_status (_, l) -> - loop acc l - in - loop [] scope.items - -let links scope = - let rec loop acc = function - | Nil -> acc - | Span_link (span_link, l) -> loop (span_link :: acc) l - | Ev (_, l) | Span_kind (_, l) | Attr (_, l) | Span_status (_, l) -> - loop acc l - in - loop [] scope.items - -let status scope = - let rec loop = function - | Nil -> None - | Span_status (status, _) -> Some status - | Ev (_, l) | Attr (_, l) | Span_kind (_, l) | Span_link (_, l) -> loop l - in - loop scope.items - -let kind scope = - let rec loop = function - | Nil -> None - | Span_kind (k, _) -> Some k - | Ev (_, l) | Span_status (_, l) | Attr (_, l) | Span_link (_, l) -> loop l - in - loop scope.items - -let make ~trace_id ~span_id ?(events = []) ?(attrs = []) ?(links = []) ?status - () : t = - let items = - let items = - match status with - | None -> Nil - | Some status -> Span_status (status, Nil) - in - let items = List.fold_left (fun acc ev -> Ev (ev, acc)) items events in - let items = List.fold_left (fun acc attr -> Attr (attr, acc)) items attrs in - List.fold_left (fun acc link -> Span_link (link, acc)) items links - in - { trace_id; span_id; items } - -let[@inline] to_span_link ?trace_state ?attrs ?dropped_attributes_count - (self : t) : Span_link.t = - Span_link.make ?trace_state ?attrs ?dropped_attributes_count - ~trace_id:self.trace_id ~span_id:self.span_id () - -let[@inline] to_span_ctx (self : t) : Span_ctx.t = - Span_ctx.make ~trace_id:self.trace_id ~parent_id:self.span_id () - -open struct - let[@inline] is_not_dummy (self : t) : bool = Span_id.is_valid self.span_id -end - -let[@inline] add_event (self : t) (ev : unit -> Event.t) : unit = - if is_not_dummy self then self.items <- Ev (ev (), self.items) - -let[@inline] record_exception (self : t) (exn : exn) - (bt : Printexc.raw_backtrace) : unit = - if is_not_dummy self then ( - let ev = - Event.make "exception" - ~attrs: - [ - "exception.message", `String (Printexc.to_string exn); - "exception.type", `String (Printexc.exn_slot_name exn); - ( "exception.stacktrace", - `String (Printexc.raw_backtrace_to_string bt) ); - ] - in - self.items <- Ev (ev, self.items) - ) - -let[@inline] add_attrs (self : t) (attrs : unit -> Key_value.t list) : unit = - if is_not_dummy self then - self.items <- - List.fold_left (fun acc attr -> Attr (attr, acc)) self.items (attrs ()) - -let[@inline] add_links (self : t) (links : unit -> Span_link.t list) : unit = - if is_not_dummy self then - self.items <- - List.fold_left - (fun acc link -> Span_link (link, acc)) - self.items (links ()) - -let set_status (self : t) (status : Span_status.t) : unit = - if is_not_dummy self then self.items <- Span_status (status, self.items) - -let set_kind (self : t) (k : Span_kind.t) : unit = - if is_not_dummy self then self.items <- Span_kind (k, self.items) - -let ambient_scope_key : t Ambient_context.key = Ambient_context.create_key () - -let get_ambient_scope ?scope () : t option = - match scope with - | Some _ -> scope - | None -> Ambient_context.get ambient_scope_key - -let[@inline] with_ambient_scope (sc : t) (f : unit -> 'a) : 'a = - Ambient_context.with_binding ambient_scope_key sc (fun _ -> f ()) diff --git a/src/core/scope.mli b/src/core/scope.mli deleted file mode 100644 index 9ba60d0e..00000000 --- a/src/core/scope.mli +++ /dev/null @@ -1,89 +0,0 @@ -(** Scopes. - - A scope is a trace ID and the span ID of the currently active span. *) - -open Common_ - -type item_list - -type t = { - trace_id: Trace_id.t; - span_id: Span_id.t; - mutable items: item_list; -} - -val attrs : t -> Key_value.t list - -val events : t -> Event.t list - -val links : t -> Span_link.t list - -val status : t -> Span_status.t option - -val kind : t -> Span_kind.t option - -val make : - trace_id:Trace_id.t -> - span_id:Span_id.t -> - ?events:Event.t list -> - ?attrs:Key_value.t list -> - ?links:Span_link.t list -> - ?status:Span_status.t -> - unit -> - t - -val to_span_link : - ?trace_state:string -> - ?attrs:Key_value.t list -> - ?dropped_attributes_count:int -> - t -> - Span_link.t -(** Turn the scope into a span link *) - -val to_span_ctx : t -> Span_ctx.t -(** Turn the scope into a span context *) - -val add_event : t -> (unit -> Event.t) -> unit -(** Add an event to the scope. It will be aggregated into the span. - - Note that this takes a function that produces an event, and will only call - it if there is an instrumentation backend. *) - -val record_exception : t -> exn -> Printexc.raw_backtrace -> unit - -val add_attrs : t -> (unit -> Key_value.t list) -> unit -(** Add attributes to the scope. It will be aggregated into the span. - - Note that this takes a function that produces attributes, and will only call - it if there is an instrumentation backend. *) - -val add_links : t -> (unit -> Span_link.t list) -> unit -(** Add links to the scope. It will be aggregated into the span. - - Note that this takes a function that produces links, and will only call it - if there is an instrumentation backend. *) - -val set_status : t -> Span_status.t -> unit -(** set the span status. - - Note that this function will be called only if there is an instrumentation - backend. *) - -val set_kind : t -> Span_kind.t -> unit -(** Set the span's kind. - @since 0.11 *) - -val ambient_scope_key : t Ambient_context.key -(** The opaque key necessary to access/set the ambient scope with - {!Ambient_context}. *) - -val get_ambient_scope : ?scope:t -> unit -> t option -(** Obtain current scope from {!Ambient_context}, if available. *) - -val with_ambient_scope : t -> (unit -> 'a) -> 'a -(** [with_ambient_scope sc thunk] calls [thunk()] in a context where [sc] is the - (thread|continuation)-local scope, then reverts to the previous local scope, - if any. - - @see - ambient-context docs *) diff --git a/src/core/span.ml b/src/core/span.ml index 1ea8cb0b..f0135b40 100644 --- a/src/core/span.ml +++ b/src/core/span.ml @@ -22,11 +22,15 @@ type key_value = | `None ] -let id self = Span_id.of_bytes self.span_id +let[@inline] id self = Span_id.of_bytes self.span_id -let create ?(kind = !Globals.default_span_kind) ?(id = Span_id.create ()) - ?trace_state ?(attrs = []) ?(events = []) ?status ~trace_id ?parent - ?(links = []) ~start_time ~end_time name : t * id = +let[@inline] trace_id self = Trace_id.of_bytes self.trace_id + +let[@inline] is_not_dummy self = Span_id.is_valid (id self) + +let make ?(kind = !Globals.default_span_kind) ?trace_state ?(attrs = []) + ?(events = []) ?status ~trace_id ~id ?parent ?(links = []) ~start_time + ~end_time name : t = let trace_id = Trace_id.to_bytes trace_id in let parent_span_id = Option.map Span_id.to_bytes parent in let attributes = List.map Key_value.conv attrs in @@ -35,4 +39,85 @@ let create ?(kind = !Globals.default_span_kind) ?(id = Span_id.create ()) ~attributes ~events ?trace_state ?status ~kind ~name ~links ~start_time_unix_nano:start_time ~end_time_unix_nano:end_time () in - span, id + span + +let create_new ?kind ?(id = Span_id.create ()) ?trace_state ?attrs ?events + ?status ~trace_id ?parent ?links ~start_time ~end_time name : t = + make ?kind ~id ~trace_id ?trace_state ?attrs ?events ?status ?parent ?links + ~start_time ~end_time name + +let attrs self = self.attributes |> List.rev_map Key_value.of_otel + +let events self = self.events + +let links self : Span_link.t list = self.links + +let status self = self.status + +let kind self = + let k = self.kind in + if k = Span_kind_unspecified then + None + else + Some k + +let to_span_link (self : t) : Span_link.t = + make_span_link ~attributes:self.attributes + ?dropped_attributes_count: + (if span_has_dropped_attributes_count self then + Some self.dropped_attributes_count + else + None) + ?trace_state: + (if span_has_trace_state self then + Some self.trace_state + else + None) + ~trace_id:self.trace_id ~span_id:self.span_id () + +let[@inline] to_span_ctx (self : t) : Span_ctx.t = + Span_ctx.make ~trace_id:(trace_id self) ~parent_id:(id self) () + +let[@inline] add_event self ev : unit = + if is_not_dummy self then span_set_events self (ev :: self.events) + +let add_event' self ev : unit = if is_not_dummy self then add_event self (ev ()) + +let record_exception (self : t) (exn : exn) (bt : Printexc.raw_backtrace) : unit + = + if is_not_dummy self then ( + let ev = + Event.make "exception" + ~attrs: + [ + "exception.message", `String (Printexc.to_string exn); + "exception.type", `String (Printexc.exn_slot_name exn); + ( "exception.stacktrace", + `String (Printexc.raw_backtrace_to_string bt) ); + ] + in + add_event self ev + ) + +let[@inline] add_attrs (self : t) (attrs : unit -> Key_value.t list) : unit = + if is_not_dummy self then ( + let attrs = List.rev_map Key_value.conv (attrs ()) in + let attrs = List.rev_append attrs self.attributes in + span_set_attributes self attrs + ) + +let add_links (self : t) (links : Span_link.t list) : unit = + if links <> [] then ( + let links = List.rev_append links self.links in + span_set_links self links + ) + +let add_links' (self : t) (links : unit -> Span_link.t list) : unit = + if is_not_dummy self then ( + let links = List.rev_append (links ()) self.links in + span_set_links self links + ) + +let set_status = span_set_status + +let set_kind = span_set_kind diff --git a/src/core/span.mli b/src/core/span.mli index cfb9a2de..673654a9 100644 --- a/src/core/span.mli +++ b/src/core/span.mli @@ -24,9 +24,30 @@ val id : t -> Span_id.t type key_value = Key_value.t -val create : +val make : ?kind:kind -> - ?id:id -> + ?trace_state:string -> + ?attrs:key_value list -> + ?events:Event.t list -> + ?status:status -> + trace_id:Trace_id.t -> + id:Span_id.t -> + ?parent:id -> + ?links:Span_link.t list -> + start_time:Timestamp_ns.t -> + end_time:Timestamp_ns.t -> + string -> + t +(** [make ~trace_id ~id name] creates a new span + @param trace_id the trace this belongs to + @param parent parent span, if any + @param links + list of links to other spans, each with their trace state (see + {{:https://www.w3.org/TR/trace-context/#tracestate-header} w3.org}) *) + +val create_new : + ?kind:kind -> + ?id:Span_id.t -> ?trace_state:string -> ?attrs:key_value list -> ?events:Event.t list -> @@ -37,10 +58,50 @@ val create : start_time:Timestamp_ns.t -> end_time:Timestamp_ns.t -> string -> - t * id -(** [create ~trace_id name] creates a new span with its unique ID. - @param trace_id the trace this belongs to - @param parent parent span, if any - @param links - list of links to other spans, each with their trace state (see - {{:https://www.w3.org/TR/trace-context/#tracestate-header} w3.org}) *) + t + +val attrs : t -> Key_value.t list + +val events : t -> Event.t list + +val links : t -> Span_link.t list + +val status : t -> Span_status.t option + +val kind : t -> Span_kind.t option + +val to_span_link : t -> Span_link.t +(** Turn the scope into a span link *) + +val to_span_ctx : t -> Span_ctx.t +(** Turn the scope into a span context *) + +val add_event : t -> Event.t -> unit + +val add_event' : t -> (unit -> Event.t) -> unit +(** Add an event to the scope. It will be aggregated into the span. + + Note that this takes a function that produces an event, and will only call + it if there is an instrumentation backend. *) + +val record_exception : t -> exn -> Printexc.raw_backtrace -> unit + +val add_links : t -> Span_link.t list -> unit + +val add_links' : t -> (unit -> Span_link.t list) -> unit +(** Add links to the scope. It will be aggregated into the span. + + Note that this takes a function that produces links, and will only call it + if there is an instrumentation backend. *) + +val add_attrs : t -> (unit -> Key_value.t list) -> unit + +val set_status : t -> Span_status.t -> unit +(** set the span status. + + Note that this function will be called only if there is an instrumentation + backend. *) + +val set_kind : t -> Span_kind.t -> unit +(** Set the span's kind. + @since 0.11 *) diff --git a/src/core/tracer.ml b/src/core/tracer.ml index 6045df9d..f15c6985 100644 --- a/src/core/tracer.ml +++ b/src/core/tracer.ml @@ -49,11 +49,11 @@ let simple_main_exporter : t = {b NOTE} be careful not to call this inside a Gc alarm, as it can cause deadlocks. *) -let emit ?service_name:_ ?attrs:_ (spans : span list) : unit = +let (emit [@deprecated "use an explicit tracer"]) = + fun ?service_name:_ ?attrs:_ (spans : span list) : unit -> match Exporter.Main_exporter.get () with | None -> () | Some exp -> exp#send_trace spans -[@@deprecated "use an explicit tracer"] (* TODO: remove scope, use span directly *) type scope = Scope.t = { @@ -113,11 +113,11 @@ let with_' ?(tracer = simple_main_exporter) ?(force_new_trace_id = false) (make_status ~code:Status_code_error ~message:(Printexc.to_string e) ())) in - let span, _ = + let span = (* TODO: should the attrs passed to with_ go on the Span (in Span.create) or on the ResourceSpan (in emit)? (question also applies to Opentelemetry_lwt.Trace.with) *) - Span.create ?kind ~trace_id ?parent ~links:(Scope.links scope) ~id:span_id + Span.make ?kind ~trace_id ?parent ~links:(Scope.links scope) ~id:span_id ?trace_state ~attrs:(Scope.attrs scope) ~events:(Scope.events scope) ~start_time ~end_time:(Timestamp_ns.now_unix_ns ()) diff --git a/src/core/value.ml b/src/core/value.ml index 97fc0503..d3d07c0e 100644 --- a/src/core/value.ml +++ b/src/core/value.ml @@ -17,3 +17,11 @@ let conv = | `Bool b -> Some (Bool_value b) | `Float f -> Some (Double_value f) | `None -> None + +let of_otel_opt (v : Proto.Common.any_value option) : t = + match v with + | Some (Int_value i) -> `Int (Int64.to_int i) + | Some (String_value s) -> `String s + | Some (Bool_value b) -> `Bool b + | Some (Double_value f) -> `Float f + | Some (Array_value _ | Kvlist_value _ | Bytes_value _) | None -> `None From 0054671b0b06569eba45fe4797c9abe85f796457 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 16:06:43 -0500 Subject: [PATCH 14/94] wip: trace --- src/trace/opentelemetry_trace.ml | 72 +++++--------- src/trace/opentelemetry_trace.mli | 157 ++---------------------------- 2 files changed, 33 insertions(+), 196 deletions(-) diff --git a/src/trace/opentelemetry_trace.ml b/src/trace/opentelemetry_trace.ml index 1d2f7f34..ba3c8462 100644 --- a/src/trace/opentelemetry_trace.ml +++ b/src/trace/opentelemetry_trace.ml @@ -50,57 +50,30 @@ end open Conv -module Well_known = struct - let spankind_key = "otrace.spankind" - - let internal = `String "INTERNAL" - - let server = `String "SERVER" - - let client = `String "CLIENT" - - let producer = `String "PRODUCER" - - let consumer = `String "CONSUMER" - - let spankind_of_string = - let open Otel.Span in - function - | "INTERNAL" -> Span_kind_internal - | "SERVER" -> Span_kind_server - | "CLIENT" -> Span_kind_client - | "PRODUCER" -> Span_kind_producer - | "CONSUMER" -> Span_kind_consumer - | _ -> Span_kind_unspecified - - let otel_attrs_of_otrace_data data = - let kind : Otel.Span.kind ref = ref Otel.Span.Span_kind_unspecified in - let data = - List.filter_map - (function - | name, `String v when name = "otrace.spankind" -> - kind := spankind_of_string v; - None - | x -> Some x) - data - in - !kind, data - - (** Key to store an error [Otel.Span.status] with the message. Set - ["otrace.error" = "mymsg"] in a span data to set the span's status to - [{message="mymsg"; code=Error}]. *) - let status_error_key = "otrace.error" -end - -open Well_known - let on_internal_error = ref (fun msg -> Printf.eprintf "error in Opentelemetry_trace: %s\n%!" msg) -type Otrace.extension_event += - | Ev_link_span of Otrace.explicit_span * Otrace.explicit_span - | Ev_set_span_kind of Otrace.explicit_span * Otel.Span_kind.t - | Ev_record_exn of Otrace.explicit_span * exn * Printexc.raw_backtrace +module Extensions = struct + type Otrace.extension_event += + | Ev_link_span of Otrace.explicit_span * Otrace.explicit_span + | Ev_record_exn of Otrace.explicit_span * exn * Printexc.raw_backtrace + | Ev_set_span_kind of Otrace.explicit_span * Otel.Span_kind.t +end + +open Extensions +module Span_tbl = Trace_subscriber.Span_tbl + +(* TODO: subscriber +type state = { + foo: unit (* TODO: *) +} + +module Callbacks +*) + +let subscriber_of_exporter _ = assert false + +let collector_of_exporter _ = assert false module Internal = struct type span_begin = { @@ -190,7 +163,8 @@ module Internal = struct { start_time; name; __FILE__; __LINE__; __FUNCTION__; scope; parent } = let open Otel in let end_time = Timestamp_ns.now_unix_ns () in - let kind, attrs = otel_attrs_of_otrace_data (Scope.attrs scope) in + let kind = Scope.kind scope in + let attrs = Scope.attrs scope in let status : Span_status.t = match List.assoc_opt Well_known.status_error_key attrs with diff --git a/src/trace/opentelemetry_trace.mli b/src/trace/opentelemetry_trace.mli index eb5591fa..a8d511f6 100644 --- a/src/trace/opentelemetry_trace.mli +++ b/src/trace/opentelemetry_trace.mli @@ -9,26 +9,13 @@ and implicit scope (in {!Internal.M.with_span}, via {!Ambient_context}) are supported; see the detailed notes on {!Internal.M.enter_manual_span}. - {1:wellknown Well-known identifiers} - - Because [ocaml-trace]'s API is a subset of OpenTelemetry functionality, this - interface allows for a few 'well-known' identifiers to be used in - [Trace]-instrumented libraries that wish to further support OpenTelemetry - usage. - - (These strings will not change in subsequent versions of this library, so - you do not need to depend on [opentelemetry.trace] to use them.) - - - If a key of exactly ["otrace.spankind"] is included in the - {!Trace_core.user_data} passed to [with_span] et al., it will be used as - the {!Opentelemetry.Span.kind} of the emitted span. (See - {!Internal.spankind_of_string} for the list of supported values.) + We use [Trace_core.extension_event] to add more features on top of the + common tracing interface. For example to set the "span kind": {[ - let describe () = [ Opentelemetry_trace.(spankind_key, client) ] in - Trace_core.with_span ~__FILE__ ~__LINE__ ~data:describe "my-span" - @@ fun _ -> - (* ... *) + let@ span = Trace_core.with_span ~__FILE__ ~__LINE__ "my-span" in + Opentelemetry_trace.set_span_kind span Span_kind_client + (* ... *) ]} *) module Otel := Opentelemetry @@ -78,7 +65,12 @@ val setup_with_otel_exporter : #Opentelemetry.Exporter.t -> unit val setup_with_otel_backend : #Opentelemetry.Exporter.t -> unit [@@deprecated "use setup_with_otel_exporter"] +val subscriber_of_exporter : #Otel.Exporter.t -> Trace_subscriber.t + +val collector_of_exporter : #Otel.Exporter.t -> Trace_core.collector + val collector : unit -> Trace_core.collector +[@@deprecated "use collector_of_exporter, avoid global state"] (** Make a Trace collector that uses the OTEL backend to send spans and logs *) val link_spans : Otrace.explicit_span -> Otrace.explicit_span -> unit @@ -115,132 +107,3 @@ module Well_known : sig Otel.Span.kind * Otel.Span.key_value list end [@@deprecated "use the regular functions for this"] - -(**/**) - -(** Internal implementation details; do not consider these stable. *) -module Internal : sig - module M : sig - val with_span : - __FUNCTION__:string option -> - __FILE__:string -> - __LINE__:int -> - data:(string * Otrace.user_data) list -> - string (* span name *) -> - (Otrace.span -> 'a) -> - 'a - (** Implements {!Trace_core.Collector.S.with_span}, with the OpenTelemetry - collector as the backend. Invoked via {!Trace_core.with_span}. - - Notably, this has the same implicit-scope semantics as - {!Opentelemetry.Trace.with_}, and requires configuration of - {!Ambient_context}. - - @see - ambient-context docs *) - - val enter_manual_span : - parent:Otrace.explicit_span_ctx option -> - flavor:'a -> - __FUNCTION__:string option -> - __FILE__:string -> - __LINE__:int -> - data:(string * Otrace.user_data) list -> - string (* span name *) -> - Otrace.explicit_span - (** Implements {!Trace_core.Collector.S.enter_manual_span}, with the - OpenTelemetry collector as the backend. Invoked at - {!Trace_core.enter_manual_toplevel_span} and - {!Trace_core.enter_manual_sub_span}; requires an eventual call to - {!Trace_core.exit_manual_span}. - - These 'manual span' functions {e do not} implement the same implicit- - scope semantics of {!with_span}; and thus don't need to wrap a single - stack-frame / callback; you can freely enter a span at any point, store - the returned {!Trace_core.explicit_span}, and exit it at any later point - with {!Trace_core.exit_manual_span}. - - However, for that same reason, they also cannot update the - {!Ambient_context} — that is, when you invoke the various [manual] - functions, if you then invoke other functions that use - {!Trace_core.with_span}, those callees {e will not} see the span you - entered manually as their [parent]. - - Generally, the best practice is to only use these [manual] functions at - the 'leaves' of your callstack: that is, don't invoke user callbacks - from within them; or if you do, make sure to pass the [explicit_span] - you recieve from this function onwards to the user callback, so they can - create further child-spans. *) - - val exit_manual_span : Otrace.explicit_span -> unit - (** Implements {!Trace_core.Collector.S.exit_manual_span}, with the - OpenTelemetry collector as the backend. Invoked at - {!Trace_core.exit_manual_span}. Expects the [explicit_span] returned - from an earlier call to {!Trace_core.enter_manual_toplevel_span} or - {!Trace_core.enter_manual_sub_span}. - - (See the notes at {!enter_manual_span} about {!Ambient_context}.) *) - - val add_data_to_span : - Otrace.span -> (string * Otrace.user_data) list -> unit - - val add_data_to_manual_span : - Otrace.explicit_span -> (string * Otrace.user_data) list -> unit - - val message : - ?span:Otrace.span -> - data:(string * Otrace.user_data) list -> - string -> - unit - - val shutdown : unit -> unit - - val name_process : string -> unit - - val name_thread : string -> unit - - val counter_int : - data:(string * Otrace.user_data) list -> string -> int -> unit - - val counter_float : - data:(string * Otrace.user_data) list -> string -> float -> unit - end - - type span_begin = { - start_time: int64; - name: string; - __FILE__: string; - __LINE__: int; - __FUNCTION__: string option; - scope: Otel.Scope.t; - parent: Otel.Span_ctx.t option; - } - - module Active_span_tbl : Hashtbl.S with type key = Otrace.span - - (** Table indexed by ocaml-trace spans. *) - module Active_spans : sig - type t = private { tbl: span_begin Active_span_tbl.t } [@@unboxed] - - val create : unit -> t - - val k_tls : t TLS.t - - val get : unit -> t - end - - val otrace_of_otel : Otel.Span_id.t -> Otrace.span - - val enter_span' : - ?explicit_parent:Otrace.explicit_span_ctx -> - __FUNCTION__:string option -> - __FILE__:string -> - __LINE__:int -> - data:(string * Otrace.user_data) list -> - string -> - Otrace.span * span_begin - - val exit_span' : Otrace.span -> span_begin -> Otel.Span.t -end - -(**/**) From f0cd1ad3754adaa61a2c76b973efed2e6781070f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 16:06:50 -0500 Subject: [PATCH 15/94] perf: avoid building closures in Signal --- src/client/signal.ml | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/src/client/signal.ml b/src/client/signal.ml index d3fafcfa..e3337c60 100644 --- a/src/client/signal.ml +++ b/src/client/signal.ml @@ -37,7 +37,7 @@ let is_logs = function | _ -> false module Encode = struct - let resource_to_string ~encoder ~ctor ~enc resource = + let resource_to_string ~encoder ~ctor ~enc resource : string = let encoder = match encoder with | Some e -> @@ -51,26 +51,23 @@ module Encode = struct Pbrt.Encoder.to_string encoder let logs ?encoder resource_logs = - resource_logs - |> resource_to_string ~encoder - ~ctor:(fun r -> - Logs_service.make_export_logs_service_request ~resource_logs:r ()) - ~enc:Logs_service.encode_pb_export_logs_service_request + resource_to_string ~encoder resource_logs + ~ctor:(fun r -> + Logs_service.make_export_logs_service_request ~resource_logs:r ()) + ~enc:Logs_service.encode_pb_export_logs_service_request let metrics ?encoder resource_metrics = - resource_metrics - |> resource_to_string ~encoder - ~ctor:(fun r -> - Metrics_service.make_export_metrics_service_request - ~resource_metrics:r ()) - ~enc:Metrics_service.encode_pb_export_metrics_service_request + resource_to_string ~encoder resource_metrics + ~ctor:(fun r -> + Metrics_service.make_export_metrics_service_request ~resource_metrics:r + ()) + ~enc:Metrics_service.encode_pb_export_metrics_service_request let traces ?encoder resource_spans = - resource_spans - |> resource_to_string ~encoder - ~ctor:(fun r -> - Trace_service.make_export_trace_service_request ~resource_spans:r ()) - ~enc:Trace_service.encode_pb_export_trace_service_request + resource_to_string ~encoder resource_spans + ~ctor:(fun r -> + Trace_service.make_export_trace_service_request ~resource_spans:r ()) + ~enc:Trace_service.encode_pb_export_trace_service_request end module Decode = struct From d52345c3a82d05d64d140dd13f45127808809a9c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 3 Dec 2025 16:07:14 -0500 Subject: [PATCH 16/94] wip: refactor --- .../opentelemetry_client_cohttp_eio.ml | 6 +++--- .../opentelemetry_client_cohttp_lwt.ml | 6 +++--- src/lwt/opentelemetry_lwt.ml | 21 +++++++++++-------- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml b/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml index 9f71d52f..67b7b169 100644 --- a/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml +++ b/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml @@ -216,7 +216,7 @@ module type EMITTER = sig val push_logs : Logs.resource_logs list -> unit - val set_on_tick_callbacks : (unit -> unit) AList.t -> unit + val set_on_tick_callbacks : (unit -> unit) Alist.t -> unit val tick : unit -> unit @@ -312,7 +312,7 @@ let mk_emitter ~stop ~net (config : Config.t) : (module EMITTER) = Fiber.fork ~sw @@ emit_metrics_maybe ~now ~force; Fiber.fork ~sw @@ emit_traces_maybe ~now ~force - let on_tick_cbs_ = Atomic.make (AList.make ()) + let on_tick_cbs_ = Atomic.make (Alist.make ()) let run_tick_callbacks () = List.iter @@ -321,7 +321,7 @@ let mk_emitter ~stop ~net (config : Config.t) : (module EMITTER) = with e -> Printf.eprintf "on tick callback raised: %s\n" (Printexc.to_string e)) - (AList.get @@ Atomic.get on_tick_cbs_) + (Alist.get @@ Atomic.get on_tick_cbs_) end in let module M = struct let set_on_tick_callbacks = Atomic.set on_tick_cbs_ diff --git a/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml b/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml index 53786ed2..0f823fbc 100644 --- a/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml +++ b/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml @@ -187,7 +187,7 @@ module type EMITTER = sig val push_logs : Logs.resource_logs list -> unit - val set_on_tick_callbacks : (unit -> unit) AList.t -> unit + val set_on_tick_callbacks : (unit -> unit) Alist.t -> unit val tick : unit -> unit @@ -218,7 +218,7 @@ let mk_emitter ~stop ~(config : Config.t) () : (module EMITTER) = let batch_logs : Logs.resource_logs Batch.t = Batch.make ?batch:config.batch_logs ?timeout () - let on_tick_cbs_ = Atomic.make (AList.make ()) + let on_tick_cbs_ = Atomic.make (Alist.make ()) let set_on_tick_callbacks = Atomic.set on_tick_cbs_ @@ -348,7 +348,7 @@ let mk_emitter ~stop ~(config : Config.t) () : (module EMITTER) = with e -> Printf.eprintf "on tick callback raised: %s\n" (Printexc.to_string e)) - (AList.get @@ Atomic.get on_tick_cbs_); + (Alist.get @@ Atomic.get on_tick_cbs_); let now = Mtime_clock.now () in let+ (_ : bool) = emit_traces_maybe ~now httpc and+ (_ : bool) = emit_logs_maybe ~now httpc diff --git a/src/lwt/opentelemetry_lwt.ml b/src/lwt/opentelemetry_lwt.ml index 7b443c20..2f3cfe02 100644 --- a/src/lwt/opentelemetry_lwt.ml +++ b/src/lwt/opentelemetry_lwt.ml @@ -7,24 +7,24 @@ module Span = Span module Span_link = Span_link module Globals = Globals module Timestamp_ns = Timestamp_ns -module GC_metrics = GC_metrics +module Gc_metrics = Gc_metrics module Metrics_callbacks = Metrics_callbacks module Trace_context = Trace_context +module GC_metrics = Gc_metrics [@@depecated "use Gc_metrics"] external reraise : exn -> 'a = "%reraise" (** This is equivalent to [Lwt.reraise]. We inline it here so we don't force to use Lwt's latest version *) -module Trace = struct - include Trace +module Tracer = struct + include Tracer (** Sync span guard *) - let with_ ?force_new_trace_id ?trace_state ?service_name ?attrs ?kind - ?trace_id ?parent ?scope ?links name (cb : Scope.t -> 'a Lwt.t) : 'a Lwt.t - = + let with_ ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id ?parent + ?scope ?links name (cb : Scope.t -> 'a Lwt.t) : 'a Lwt.t = let thunk, finally = - with_' ?force_new_trace_id ?trace_state ?service_name ?attrs ?kind - ?trace_id ?parent ?scope ?links name cb + with_' ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id ?parent + ?scope ?links name cb in try%lwt @@ -37,11 +37,14 @@ module Trace = struct reraise e end +module Trace = Tracer [@@deprecated "use Tracer"] + module Metrics = struct include Metrics end module Logs = struct include Proto.Logs - include Logs + include Log_record + include Logger end From c5b2269aabb40420d2b9224bcdc271251247ec58 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 00:23:23 -0500 Subject: [PATCH 17/94] refactor thoroughly ambient-context we have a new explicit `Storage.t` interface, that can be used to get a `Context.t` (a hmap) and to locally swap it; then we have multiple implementations of the Storage; and then we have a singleton atomic containing the "main" storage. --- src/ambient-context/basic_map.ml | 55 +++++++ src/ambient-context/core/context.ml | 7 + src/ambient-context/core/dune | 5 + src/ambient-context/core/storage.ml | 44 +++++ src/ambient-context/default_.map.ml | 1 + src/ambient-context/default_.mli | 2 + src/ambient-context/default_.tls.ml | 1 + src/ambient-context/dls.ml.tmp | 46 ++++++ src/ambient-context/dune | 18 ++- src/ambient-context/eio/dune | 2 +- .../eio/opentelemetry_ambient_context_eio.ml | 44 ++--- .../eio/opentelemetry_ambient_context_eio.mli | 2 - src/ambient-context/hmap_key_.new.ml | 1 - src/ambient-context/lwt/dune | 2 +- .../lwt/opentelemetry_ambient_context_lwt.ml | 46 ++---- .../lwt/opentelemetry_ambient_context_lwt.mli | 2 - .../opentelemetry_ambient_context.ml | 151 +++++------------- .../opentelemetry_ambient_context.mli | 55 ------- src/ambient-context/tls/dune | 6 + .../tls/opentelemetry_ambient_context_tls.ml | 23 +++ src/ambient-context/types/dune | 4 - .../opentelemetry_ambient_context_types.ml | 19 --- .../opentelemetry_ambient_context_types.mli | 32 ---- 23 files changed, 264 insertions(+), 304 deletions(-) create mode 100644 src/ambient-context/basic_map.ml create mode 100644 src/ambient-context/core/context.ml create mode 100644 src/ambient-context/core/dune create mode 100644 src/ambient-context/core/storage.ml create mode 100644 src/ambient-context/default_.map.ml create mode 100644 src/ambient-context/default_.mli create mode 100644 src/ambient-context/default_.tls.ml create mode 100644 src/ambient-context/dls.ml.tmp delete mode 100644 src/ambient-context/eio/opentelemetry_ambient_context_eio.mli delete mode 100644 src/ambient-context/hmap_key_.new.ml delete mode 100644 src/ambient-context/lwt/opentelemetry_ambient_context_lwt.mli delete mode 100644 src/ambient-context/opentelemetry_ambient_context.mli create mode 100644 src/ambient-context/tls/dune create mode 100644 src/ambient-context/tls/opentelemetry_ambient_context_tls.ml delete mode 100644 src/ambient-context/types/dune delete mode 100644 src/ambient-context/types/opentelemetry_ambient_context_types.ml delete mode 100644 src/ambient-context/types/opentelemetry_ambient_context_types.mli diff --git a/src/ambient-context/basic_map.ml b/src/ambient-context/basic_map.ml new file mode 100644 index 00000000..3575835a --- /dev/null +++ b/src/ambient-context/basic_map.ml @@ -0,0 +1,55 @@ +(** Extremely basic storage using a map from thread id to context *) + +open Opentelemetry_ambient_context_core + +open struct + module Atomic = Opentelemetry_atomic.Atomic + + module Int_map = Map.Make (struct + type t = int + + let compare : t -> t -> int = Stdlib.compare + end) + + type st = { m: Context.t ref Int_map.t Atomic.t } [@@unboxed] + + let get (self : st) : Context.t = + let tid = Thread.id @@ Thread.self () in + match Int_map.find tid (Atomic.get self.m) with + | exception Not_found -> Context.empty + | ctx_ref -> !ctx_ref + + let with_context (self : st) ctx f = + let tid = Thread.id @@ Thread.self () in + + let ctx_ref = + try Int_map.find tid (Atomic.get self.m) + with Not_found -> + let r = ref Context.empty in + while + let m = Atomic.get self.m in + let m' = Int_map.add tid r m in + not (Atomic.compare_and_set self.m m m') + do + () + done; + r + in + + let old_ctx = !ctx_ref in + ctx_ref := ctx; + + let finally () = ctx_ref := old_ctx in + Fun.protect ~finally f +end + +let create_storage () : Storage.t = + let st = { m = Atomic.make Int_map.empty } in + { + name = "basic-map"; + get_context = (fun () -> get st); + with_context = (fun ctx f -> with_context st ctx f); + } + +(** Default storage *) +let storage : Storage.t = create_storage () diff --git a/src/ambient-context/core/context.ml b/src/ambient-context/core/context.ml new file mode 100644 index 00000000..658a83e6 --- /dev/null +++ b/src/ambient-context/core/context.ml @@ -0,0 +1,7 @@ +type t = Hmap.t + +type 'a key = 'a Hmap.key + +let empty : t = Hmap.empty + +let[@inline] new_key () : _ key = Hmap.Key.create () diff --git a/src/ambient-context/core/dune b/src/ambient-context/core/dune new file mode 100644 index 00000000..97961205 --- /dev/null +++ b/src/ambient-context/core/dune @@ -0,0 +1,5 @@ +(library + (name opentelemetry_ambient_context_core) + (public_name opentelemetry.ambient-context.core) + (synopsis "Core definitions for ambient-context") + (libraries hmap)) diff --git a/src/ambient-context/core/storage.ml b/src/ambient-context/core/storage.ml new file mode 100644 index 00000000..89bc3931 --- /dev/null +++ b/src/ambient-context/core/storage.ml @@ -0,0 +1,44 @@ +(** Storage implementation. + + There is a singleton storage for a given program, responsible for providing + ambient context to the rest of the program. *) + +type t = { + name: string; + get_context: unit -> Context.t; + with_context: 'a. Context.t -> (unit -> 'a) -> 'a; +} +(** Storage type *) + +(** Name of the storage implementation. *) +let[@inline] name self = self.name + +(** Get the context from the current storage, or [Hmap.empty] if there is no + ambient context. *) +let[@inline] get_context self = self.get_context () + +(** [with_context storage ctx f] calls [f()] in an ambient context in which + [get_context()] will return [ctx]. Once [f()] returns, the storage is reset + to its previous value. *) +let[@inline] with_context self ctx f = self.with_context ctx f + +(** Get the ambient context and then look up [k] in it *) +let[@inline] get self (k : 'a Context.key) : 'a option = + Hmap.find k (get_context self) + +(** [with_key_bound_to storage k v f] calls [f()] in a context updated to have + [k] map to [v]. *) +let with_key_bound_to self k v f = + let ctx = get_context self in + let new_ctx = Hmap.add k v ctx in + self.with_context new_ctx f + +(** [with_key_unbound storage k f] calls [f()] in a context updated to have [k] + bound to no value. *) +let with_key_unbound self k f = + let ctx = get_context self in + if Hmap.mem k ctx then ( + let new_ctx = Hmap.rem k ctx in + self.with_context new_ctx f + ) else + f () diff --git a/src/ambient-context/default_.map.ml b/src/ambient-context/default_.map.ml new file mode 100644 index 00000000..aecceb56 --- /dev/null +++ b/src/ambient-context/default_.map.ml @@ -0,0 +1 @@ +let storage = Basic_map.storage diff --git a/src/ambient-context/default_.mli b/src/ambient-context/default_.mli new file mode 100644 index 00000000..9f14c9bd --- /dev/null +++ b/src/ambient-context/default_.mli @@ -0,0 +1,2 @@ +val storage : Storage.t +(** Default storage. *) diff --git a/src/ambient-context/default_.tls.ml b/src/ambient-context/default_.tls.ml new file mode 100644 index 00000000..7411b452 --- /dev/null +++ b/src/ambient-context/default_.tls.ml @@ -0,0 +1 @@ +let storage = Opentelemetry_ambient_context_tls.storage diff --git a/src/ambient-context/dls.ml.tmp b/src/ambient-context/dls.ml.tmp new file mode 100644 index 00000000..dee2dc49 --- /dev/null +++ b/src/ambient-context/dls.ml.tmp @@ -0,0 +1,46 @@ +(* TODO: conditional compilation, and use Basic_map in each DLS *) + +(** Storage using DLS. *) + +open Opentelemetry_ambient_context_core + +open struct + module DLS = Domain.DLS + + module Int_map = Map.Make (struct + type t = int + + let compare : t -> t -> int = Stdlib.compare + end) + + (* key used to access the context *) + let dls_k_context : Context.t ref Int_map.t DLS.key = + DLS.new_key + ~split_from_parent:(fun _ -> Int_map.empty) + (fun _ -> Int_map.empty) + + let dls_get () = + let tid = Thread.id @@ Thread.self () in + let map_ref = DLS.get dls_k_context in + try !(Int_map.find tid map_ref) with Not_found -> Hmap.empty + + let dls_with ctx f = + let tid = Thread.id @@ Thread.self () in + let map = DLS.get dls_k_context in + let ctx_ref = + try Int_map.find tid map + with Not_found -> + let r = ref Context.empty in + DLS.set dls_k_context (Int_map.add tid r map); + r + in + + let old_ctx = !ctx_ref in + ctx_ref := ctx; + + let finally () = ctx_ref := old_ctx in + Fun.protect ~finally f +end + +let storage : Storage.t = + { name = "dls-int-map"; get_context = dls_get; with_context = dls_with } diff --git a/src/ambient-context/dune b/src/ambient-context/dune index 68e7acf5..de1f5b26 100644 --- a/src/ambient-context/dune +++ b/src/ambient-context/dune @@ -3,13 +3,19 @@ (public_name opentelemetry.ambient-context) (synopsis "Abstraction over thread-local storage and fiber-local storage mechanisms") - (private_modules hmap_key_) + (flags + :standard + -open + Opentelemetry_ambient_context_core + -open + Opentelemetry_atomic) (libraries - thread-local-storage - threads + hmap atomic - opentelemetry.ambient-context.types + opentelemetry.ambient-context.core + opentelemetry.atomic (select - hmap_key_.ml + default_.ml from - (-> hmap_key_.new.ml)))) + (opentelemetry.ambient-context.tls -> default_.tls.ml) + (-> default_.map.ml)))) diff --git a/src/ambient-context/eio/dune b/src/ambient-context/eio/dune index f3f76be7..fa56edd8 100644 --- a/src/ambient-context/eio/dune +++ b/src/ambient-context/eio/dune @@ -4,4 +4,4 @@ (synopsis "Storage backend for ambient-context using Eio's fibre-local storage") (optional) ; eio - (libraries eio hmap opentelemetry.ambient-context thread-local-storage)) + (libraries eio hmap opentelemetry.ambient-context.core)) diff --git a/src/ambient-context/eio/opentelemetry_ambient_context_eio.ml b/src/ambient-context/eio/opentelemetry_ambient_context_eio.ml index 56451a1a..1da61600 100644 --- a/src/ambient-context/eio/opentelemetry_ambient_context_eio.ml +++ b/src/ambient-context/eio/opentelemetry_ambient_context_eio.ml @@ -1,39 +1,15 @@ +open Opentelemetry_ambient_context_core module Fiber = Eio.Fiber open struct - let _internal_key : Hmap.t Fiber.key = Fiber.create_key () - - let ( let* ) = Option.bind + let fiber_context_key : Context.t Fiber.key = Fiber.create_key () end -module M = struct - let name = "Storage_eio" - - let[@inline] get_map () = Fiber.get _internal_key - - let[@inline] with_map m cb = Fiber.with_binding _internal_key m cb - - let create_key = Hmap.Key.create - - let get k = - let* context = get_map () in - Hmap.find k context - - let with_binding k v cb = - let new_context = - match get_map () with - | None -> Hmap.singleton k v - | Some old_context -> Hmap.add k v old_context - in - with_map new_context cb - - let without_binding k cb = - let new_context = - match get_map () with - | None -> Hmap.empty - | Some old_context -> Hmap.rem k old_context - in - with_map new_context cb -end - -let storage () : Opentelemetry_ambient_context.storage = (module M) +let storage : Storage.t = + { + name = "eio"; + get_context = + (fun () -> + Fiber.get fiber_context_key |> Option.value ~default:Hmap.empty); + with_context = (fun ctx f -> Fiber.with_binding fiber_context_key ctx f); + } diff --git a/src/ambient-context/eio/opentelemetry_ambient_context_eio.mli b/src/ambient-context/eio/opentelemetry_ambient_context_eio.mli deleted file mode 100644 index ac5cf8ba..00000000 --- a/src/ambient-context/eio/opentelemetry_ambient_context_eio.mli +++ /dev/null @@ -1,2 +0,0 @@ -val storage : unit -> Opentelemetry_ambient_context.storage -(** Storage using Eio's fibers local storage *) diff --git a/src/ambient-context/hmap_key_.new.ml b/src/ambient-context/hmap_key_.new.ml deleted file mode 100644 index 1925b70e..00000000 --- a/src/ambient-context/hmap_key_.new.ml +++ /dev/null @@ -1 +0,0 @@ -let key : Hmap.t Thread_local_storage.t = Thread_local_storage.create () diff --git a/src/ambient-context/lwt/dune b/src/ambient-context/lwt/dune index 68a9de15..fb7398fb 100644 --- a/src/ambient-context/lwt/dune +++ b/src/ambient-context/lwt/dune @@ -4,4 +4,4 @@ (optional) ; lwt (synopsis "Storage backend for ambient-context using Lwt's sequence-associated storage") - (libraries lwt opentelemetry.ambient-context thread-local-storage)) + (libraries lwt opentelemetry.ambient-context.core)) diff --git a/src/ambient-context/lwt/opentelemetry_ambient_context_lwt.ml b/src/ambient-context/lwt/opentelemetry_ambient_context_lwt.ml index b75105f2..d7187670 100644 --- a/src/ambient-context/lwt/opentelemetry_ambient_context_lwt.ml +++ b/src/ambient-context/lwt/opentelemetry_ambient_context_lwt.ml @@ -1,37 +1,15 @@ +(** Storage using Lwt keys *) + +open Opentelemetry_ambient_context_core + open struct - let _internal_key : Hmap.t Lwt.key = Lwt.new_key () - - let ( let* ) = Option.bind + let lwt_context_key : Context.t Lwt.key = Lwt.new_key () end -module M = struct - let name = "Storage_lwt" - - let[@inline] get_map () = Lwt.get _internal_key - - let[@inline] with_map m cb = Lwt.with_value _internal_key (Some m) cb - - let create_key = Hmap.Key.create - - let get k = - let* context = get_map () in - Hmap.find k context - - let with_binding k v cb = - let new_context = - match get_map () with - | None -> Hmap.singleton k v - | Some old_context -> Hmap.add k v old_context - in - with_map new_context cb - - let without_binding k cb = - let new_context = - match get_map () with - | None -> Hmap.empty - | Some old_context -> Hmap.rem k old_context - in - with_map new_context cb -end - -let storage () : Opentelemetry_ambient_context.storage = (module M) +let storage : Storage.t = + { + name = "lwt"; + get_context = + (fun () -> Lwt.get lwt_context_key |> Option.value ~default:Hmap.empty); + with_context = (fun ctx f -> Lwt.with_value lwt_context_key (Some ctx) f); + } diff --git a/src/ambient-context/lwt/opentelemetry_ambient_context_lwt.mli b/src/ambient-context/lwt/opentelemetry_ambient_context_lwt.mli deleted file mode 100644 index 3c462a8d..00000000 --- a/src/ambient-context/lwt/opentelemetry_ambient_context_lwt.mli +++ /dev/null @@ -1,2 +0,0 @@ -val storage : unit -> Opentelemetry_ambient_context.storage -(** Storage using Lwt keys *) diff --git a/src/ambient-context/opentelemetry_ambient_context.ml b/src/ambient-context/opentelemetry_ambient_context.ml index 7c622eb7..d8a18c57 100644 --- a/src/ambient-context/opentelemetry_ambient_context.ml +++ b/src/ambient-context/opentelemetry_ambient_context.ml @@ -1,124 +1,49 @@ -module TLS = Thread_local_storage -include Opentelemetry_ambient_context_types +include Opentelemetry_ambient_context_core -type 'a key = int * 'a Hmap.key +let default_storage = Default_.storage -let debug = - match Sys.getenv_opt "OCAML_AMBIENT_CONTEXT_DEBUG" with - | Some ("1" | "true") -> true - | _ -> false - -let _debug_id_ = Atomic.make 0 - -let[@inline] generate_debug_id () = Atomic.fetch_and_add _debug_id_ 1 - -let compare_key : int -> int -> int = Stdlib.compare - -module Storage_tls_hmap = struct - let[@inline] ( let* ) o f = - match o with - | None -> None - | Some x -> f x - - let key : Hmap.t TLS.t = Hmap_key_.key - - let name = "Storage_tls" - - let[@inline] get_map () = TLS.get_opt key - - let[@inline] with_map m cb = - let old = TLS.get_opt key |> Option.value ~default:Hmap.empty in - TLS.set key m; - Fun.protect ~finally:(fun () -> TLS.set key old) cb - - let create_key = Hmap.Key.create - - let get k = - let* context = get_map () in - Hmap.find k context - - let with_binding k v cb = - let new_context = - match get_map () with - | None -> Hmap.singleton k v - | Some old_context -> Hmap.add k v old_context - in - with_map new_context @@ fun _context -> cb () - - let without_binding k cb = - match get_map () with - | None -> cb () - | Some old_context -> - let new_context = Hmap.rem k old_context in - with_map new_context @@ fun _context -> cb () +open struct + (** The current ambient-context storage. *) + let cur_storage : Storage.t Atomic.t = Atomic.make Default_.storage end -let default_storage : storage = (module Storage_tls_hmap) +let[@inline] get_current_storage () = Atomic.get cur_storage -let k_current_storage : storage TLS.t = TLS.create () +(* NOTE: we can't really "map" each local context from the old + to the new. Maybe the old storage is TLS based and the new one + is per-lwt-task. *) +let set_current_storage (storage : Storage.t) = Atomic.set cur_storage storage -let get_current_storage () = - match TLS.get_exn k_current_storage with - | v -> v - | exception TLS.Not_set -> - let v = default_storage in - TLS.set k_current_storage v; - v +(** {2 Functions operating with the current storage} *) -let create_key () = - let (module Store : STORAGE) = get_current_storage () in - if not debug then - 0, Store.create_key () - else ( - let id = generate_debug_id () in - Printf.printf "%s: create_key %i\n%!" Store.name id; - id, Store.create_key () - ) +(** Get the context from the current storage, or [Hmap.empty] if there is no + ambient context. *) +let[@inline] get_context () = Storage.get_context (Atomic.get cur_storage) -let get (id, k) = - let (module Store : STORAGE) = get_current_storage () in - if not debug then - Store.get k - else ( - let rv = Store.get k in - (match rv with - | Some _ -> Printf.printf "%s: get %i -> Some\n%!" Store.name id - | None -> Printf.printf "%s: get %i -> None\n%!" Store.name id); - rv - ) +(** [with_context ctx f] calls [f()] in an ambient context in which + [get_context()] will return [ctx]. Once [f()] returns, the storage is reset + to its previous value. *) +let[@inline] with_context ctx f = + Storage.with_context (Atomic.get cur_storage) ctx f -let with_binding : 'a key -> 'a -> (unit -> 'r) -> 'r = - fun (id, k) v cb -> - let (module Store : STORAGE) = get_current_storage () in - if not debug then - Store.with_binding k v cb - else ( - Printf.printf "%s: with_binding %i enter\n%!" Store.name id; - let rv = Store.with_binding k v cb in - Printf.printf "%s: with_binding %i exit\n%!" Store.name id; - rv - ) +(** Get the ambient context and then look up [k] in it *) +let[@inline] get (k : 'a Context.key) : 'a option = Hmap.find k (get_context ()) -let without_binding (id, k) cb = - let (module Store : STORAGE) = get_current_storage () in - if not debug then - Store.without_binding k cb - else ( - Printf.printf "%s: without_binding %i enter\n%!" Store.name id; - let rv = Store.without_binding k cb in - Printf.printf "%s: without_binding %i exit\n%!" Store.name id; - rv - ) +(** [with_key_bound_to storage k v f] calls [f()] in a context updated to have + [k] map to [v]. *) +let with_key_bound_to k v f = + let storage = get_current_storage () in + let ctx = Storage.get_context storage in + let new_ctx = Hmap.add k v ctx in + Storage.with_context storage new_ctx f -let set_storage_provider store_new = - let store_before = get_current_storage () in - if store_new == store_before then - () - else - TLS.set k_current_storage store_new; - if debug then ( - let (module Store_before : STORAGE) = store_before in - let (module Store_new : STORAGE) = store_new in - Printf.printf "set_storage_provider %s (previously %s)\n%!" Store_new.name - Store_before.name - ) +(** [with_key_unbound k f] calls [f()] in a context updated to have [k] bound to + no value. *) +let with_key_unbound k f = + let storage = Atomic.get cur_storage in + let ctx = Storage.get_context storage in + if Hmap.mem k ctx then ( + let new_ctx = Hmap.rem k ctx in + Storage.with_context storage new_ctx f + ) else + f () diff --git a/src/ambient-context/opentelemetry_ambient_context.mli b/src/ambient-context/opentelemetry_ambient_context.mli deleted file mode 100644 index 8f19ff0c..00000000 --- a/src/ambient-context/opentelemetry_ambient_context.mli +++ /dev/null @@ -1,55 +0,0 @@ -(** Ambient context. - - The ambient context, like the Matrix, is everywhere around you. - - It is responsible for keeping track of that context in a manner that's - consistent with the program's choice of control flow paradigm: - - - for synchronous/threaded/direct style code, {b TLS} ("thread local - storage") keeps track of a global variable per thread. Each thread has its - own copy of the variable and updates it independently of other threads. - - - for Lwt, any ['a Lwt.t] created inside the [with_binding k v (fun _ -> …)] - will inherit the [k := v] assignment. - - - for Eio, fibers created inside [with_binding k v (fun () -> …)] will - inherit the [k := v] assignment. This is consistent with the structured - concurrency approach of Eio. - - The only data stored by this storage is a {!Hmap.t}, ie a heterogeneous map. - Various users (libraries, user code, etc.) can create their own {!key} to - store what they are interested in, without affecting other parts of the - storage. *) - -module Types := Opentelemetry_ambient_context_types - -module type STORAGE = Types.STORAGE - -type storage = (module STORAGE) - -val default_storage : storage - -val get_current_storage : unit -> storage - -val set_storage_provider : storage -> unit - -type 'a key -(** A key that can be mapped to values of type ['a] in the ambient context. *) - -val compare_key : int -> int -> int -(** Total order on keys *) - -val create_key : unit -> 'a key -(** Create a new fresh key, distinct from any previously created key. *) - -val get : 'a key -> 'a option -(** Get the current value for a given key, or [None] if no value was associated - with the key in the ambient context. *) - -val with_binding : 'a key -> 'a -> (unit -> 'r) -> 'r -(** [with_binding k v cb] calls [cb()] in a context in which [k] is bound to - [v]. This does not affect storage outside of [cb()]. *) - -val without_binding : 'a key -> (unit -> 'b) -> 'b -(** [without_binding k cb] calls [cb()] in a context where [k] has no binding - (possibly shadowing the current ambient binding of [k] if it exists). *) diff --git a/src/ambient-context/tls/dune b/src/ambient-context/tls/dune new file mode 100644 index 00000000..c0a285bc --- /dev/null +++ b/src/ambient-context/tls/dune @@ -0,0 +1,6 @@ +(library + (name opentelemetry_ambient_context_tls) + (public_name opentelemetry.ambient-context.tls) + (synopsis "Implementation of ambient-context from thread-local-storage") + (optional) ; TLS + (libraries opentelemetry.ambient-context.core thread-local-storage)) diff --git a/src/ambient-context/tls/opentelemetry_ambient_context_tls.ml b/src/ambient-context/tls/opentelemetry_ambient_context_tls.ml new file mode 100644 index 00000000..7d40387c --- /dev/null +++ b/src/ambient-context/tls/opentelemetry_ambient_context_tls.ml @@ -0,0 +1,23 @@ +open Opentelemetry_ambient_context_core + +open struct + module TLS = Thread_local_storage + + (* key used to access the context *) + let tls_k_context : Context.t TLS.t = TLS.create () +end + +let storage : Storage.t = + { + name = "tls"; + get_context = + (fun () -> try TLS.get_exn tls_k_context with TLS.Not_set -> Hmap.empty); + with_context = + (fun ctx f -> + let old = + try TLS.get_exn tls_k_context with TLS.Not_set -> Hmap.empty + in + let finally () = TLS.set tls_k_context old in + TLS.set tls_k_context ctx; + Fun.protect ~finally f); + } diff --git a/src/ambient-context/types/dune b/src/ambient-context/types/dune deleted file mode 100644 index b9e4146c..00000000 --- a/src/ambient-context/types/dune +++ /dev/null @@ -1,4 +0,0 @@ -(library - (name opentelemetry_ambient_context_types) - (public_name opentelemetry.ambient-context.types) - (libraries hmap thread-local-storage)) diff --git a/src/ambient-context/types/opentelemetry_ambient_context_types.ml b/src/ambient-context/types/opentelemetry_ambient_context_types.ml deleted file mode 100644 index 829f7789..00000000 --- a/src/ambient-context/types/opentelemetry_ambient_context_types.ml +++ /dev/null @@ -1,19 +0,0 @@ -type 'a key = 'a Hmap.key - -module type STORAGE = sig - val name : string - - val get_map : unit -> Hmap.t option - - val with_map : Hmap.t -> (unit -> 'b) -> 'b - - val create_key : unit -> 'a key - - val get : 'a key -> 'a option - - val with_binding : 'a key -> 'a -> (unit -> 'b) -> 'b - - val without_binding : 'a key -> (unit -> 'b) -> 'b -end - -type storage = (module STORAGE) diff --git a/src/ambient-context/types/opentelemetry_ambient_context_types.mli b/src/ambient-context/types/opentelemetry_ambient_context_types.mli deleted file mode 100644 index 738b7520..00000000 --- a/src/ambient-context/types/opentelemetry_ambient_context_types.mli +++ /dev/null @@ -1,32 +0,0 @@ -(** Storage implementation. - - There is a singleton storage for a given program, responsible for providing - ambient context to the rest of the program. *) - -type 'a key = 'a Hmap.key - -module type STORAGE = sig - val name : string - (** Name of the storage implementation. *) - - val get_map : unit -> Hmap.t option - (** Get the hmap from the current ambient context, or [None] if there is no - ambient context. *) - - val with_map : Hmap.t -> (unit -> 'b) -> 'b - (** [with_hmap h cb] calls [cb()] in an ambient context in which [get_map()] - will return [h]. Once [cb()] returns, the storage is reset to its previous - value. *) - - val create_key : unit -> 'a key - (** Create a new storage key, guaranteed to be distinct from any previously - created key. *) - - val get : 'a key -> 'a option - - val with_binding : 'a key -> 'a -> (unit -> 'b) -> 'b - - val without_binding : 'a key -> (unit -> 'b) -> 'b -end - -type storage = (module STORAGE) From c05c7e2bfc82cebbdd578b911428190cf466c56f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 00:24:42 -0500 Subject: [PATCH 18/94] feat span: ambient span --- src/core/span.ml | 10 ++++++++++ src/core/span.mli | 7 +++++++ src/core/tracer.ml | 12 ++---------- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/core/span.ml b/src/core/span.ml index f0135b40..5b771d58 100644 --- a/src/core/span.ml +++ b/src/core/span.ml @@ -121,3 +121,13 @@ let add_links' (self : t) (links : unit -> Span_link.t list) : unit = let set_status = span_set_status let set_kind = span_set_kind + +let k_context : t Context.key = Context.new_key () + +(** Find current span from ambient-context *) +let get_ambient () : t option = Ambient_context.get k_context + +(** [with_ambient span f] runs [f()] with the current ambient span being set to + [span] *) +let[@inline] with_ambient (span : t) (f : unit -> 'a) : 'a = + Ambient_context.with_key_bound_to k_context span (fun _ -> f ()) diff --git a/src/core/span.mli b/src/core/span.mli index 673654a9..3fcaf541 100644 --- a/src/core/span.mli +++ b/src/core/span.mli @@ -105,3 +105,10 @@ val set_status : t -> Span_status.t -> unit val set_kind : t -> Span_kind.t -> unit (** Set the span's kind. @since 0.11 *) + +val get_ambient : unit -> t option +(** Find current span from ambient-context *) + +val with_ambient : t -> (unit -> 'a) -> 'a +(** [with_ambient span f] runs [f()] with the current ambient span being set to + [span] *) diff --git a/src/core/tracer.ml b/src/core/tracer.ml index f15c6985..b0dfbc2a 100644 --- a/src/core/tracer.ml +++ b/src/core/tracer.ml @@ -55,17 +55,9 @@ let (emit [@deprecated "use an explicit tracer"]) = | None -> () | Some exp -> exp#send_trace spans -(* TODO: remove scope, use span directly *) -type scope = Scope.t = { - trace_id: Trace_id.t; - span_id: Span_id.t; - mutable items: Scope.item_list; -} -[@@deprecated "use Scope.t"] +let (add_event [@deprecated "use Span.add_event"]) = Span.add_event -let (add_event [@deprecated "use Scope.add_event"]) = Scope.add_event - -let (add_attrs [@deprecated "use Scope.add_attrs"]) = Scope.add_attrs +let (add_attrs [@deprecated "use Span.add_attrs"]) = Span.add_attrs let with_' ?(tracer = simple_main_exporter) ?(force_new_trace_id = false) ?trace_state ?(attrs : (string * [< Value.t ]) list = []) ?kind ?trace_id From f8ec859f8b048239986379d755d9569deadc3cd0 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 01:06:42 -0500 Subject: [PATCH 19/94] refactor core --- src/core/span.mli | 6 ++-- src/core/tracer.ml | 86 ++++++++++++++++++++-------------------------- 2 files changed, 41 insertions(+), 51 deletions(-) diff --git a/src/core/span.mli b/src/core/span.mli index 3fcaf541..138d60e1 100644 --- a/src/core/span.mli +++ b/src/core/span.mli @@ -20,8 +20,6 @@ type kind = Span_kind.t = | Span_kind_producer | Span_kind_consumer -val id : t -> Span_id.t - type key_value = Key_value.t val make : @@ -45,6 +43,10 @@ val make : list of links to other spans, each with their trace state (see {{:https://www.w3.org/TR/trace-context/#tracestate-header} w3.org}) *) +val id : t -> Span_id.t + +val trace_id : t -> Trace_id.t + val create_new : ?kind:kind -> ?id:Span_id.t -> diff --git a/src/core/tracer.ml b/src/core/tracer.ml index b0dfbc2a..c7c81752 100644 --- a/src/core/tracer.ml +++ b/src/core/tracer.ml @@ -59,69 +59,57 @@ let (add_event [@deprecated "use Span.add_event"]) = Span.add_event let (add_attrs [@deprecated "use Span.add_attrs"]) = Span.add_attrs -let with_' ?(tracer = simple_main_exporter) ?(force_new_trace_id = false) - ?trace_state ?(attrs : (string * [< Value.t ]) list = []) ?kind ?trace_id - ?parent ?scope ?(links = []) name cb = - let scope = - if force_new_trace_id then - None - else - Scope.get_ambient_scope ?scope () +let with_thunk_and_finally ?(tracer = simple_main_exporter) + ?(force_new_trace_id = false) ?trace_state + ?(attrs : (string * [< Value.t ]) list = []) ?kind ?trace_id ?parent ?links + name cb = + let parent = + match parent with + | Some _ -> parent + | None -> Span.get_ambient () in let trace_id = - match trace_id, scope with + match trace_id, parent with | _ when force_new_trace_id -> Trace_id.create () | Some trace_id, _ -> trace_id - | None, Some scope -> scope.trace_id + | None, Some p -> Span.trace_id p | None, None -> Trace_id.create () in - let parent = - match parent, scope with - | _ when force_new_trace_id -> None - | Some span_id, _ -> Some span_id - | None, Some scope -> Some scope.span_id - | None, None -> None - in let start_time = Timestamp_ns.now_unix_ns () in let span_id = Span_id.create () in - let scope = Scope.make ~trace_id ~span_id ~attrs ~links () in + + let parent_id = Option.map Span.id parent in + + let span : Span.t = + Span.make ?trace_state ?kind ?parent:parent_id ~trace_id ~id:span_id ~attrs + ?links ~start_time ~end_time:start_time name + in (* called once we're done, to emit a span *) let finally res = - let status = - match Scope.status scope with - | Some status -> Some status - | None -> - (match res with - | Ok () -> - (* By default, all spans are Unset, which means a span completed without error. + let end_time = Timestamp_ns.now_unix_ns () in + Proto.Trace.span_set_end_time_unix_nano span end_time; + + (match Span.status span with + | Some _ -> () + | None -> + (match res with + | Ok () -> + (* By default, all spans are Unset, which means a span completed without error. The Ok status is reserved for when you need to explicitly mark a span as successful rather than stick with the default of Unset (i.e., “without error”). https://opentelemetry.io/docs/languages/go/instrumentation/#set-span-status *) - None - | Error (e, bt) -> - Scope.record_exception scope e bt; - Some - (make_status ~code:Status_code_error ~message:(Printexc.to_string e) - ())) - in - let span = - (* TODO: should the attrs passed to with_ go on the Span - (in Span.create) or on the ResourceSpan (in emit)? - (question also applies to Opentelemetry_lwt.Trace.with) *) - Span.make ?kind ~trace_id ?parent ~links:(Scope.links scope) ~id:span_id - ?trace_state ~attrs:(Scope.attrs scope) ~events:(Scope.events scope) - ~start_time - ~end_time:(Timestamp_ns.now_unix_ns ()) - ?status name - in + () + | Error (e, bt) -> + Span.record_exception span e bt; + let status = + make_status ~code:Status_code_error ~message:(Printexc.to_string e) () + in + Span.set_status span status)); tracer#emit [ span ] in - let thunk () = - (* set global scope in this thread *) - Scope.with_ambient_scope scope @@ fun () -> cb scope - in + let thunk () = Span.with_ambient span (fun () -> cb span) in thunk, finally (** Sync span guard. @@ -141,10 +129,10 @@ let with_' ?(tracer = simple_main_exporter) ?(force_new_trace_id = false) [~scope] argument, nor [~trace_id], but will instead always create fresh identifiers for this span *) let with_ ?tracer ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id - ?parent ?scope ?links name (cb : Scope.t -> 'a) : 'a = + ?parent ?links name (cb : Span.t -> 'a) : 'a = let thunk, finally = - with_' ?tracer ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id - ?parent ?scope ?links name cb + with_thunk_and_finally ?tracer ?force_new_trace_id ?trace_state ?attrs ?kind + ?trace_id ?parent ?links name cb in try From 5bc7365e565e22f817e2af763d020476f52b268e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 01:06:52 -0500 Subject: [PATCH 20/94] wip: opentelemetry.emitter with same time a bit like a buffered writer for any data --- src/emitter/dune | 5 +++++ src/emitter/emitter.ml | 30 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 src/emitter/dune create mode 100644 src/emitter/emitter.ml diff --git a/src/emitter/dune b/src/emitter/dune new file mode 100644 index 00000000..76f79dee --- /dev/null +++ b/src/emitter/dune @@ -0,0 +1,5 @@ +(library + (name opentelemetry_emitter) + (public_name opentelemetry.emitter) + (libraries mtime mtime.clock.os) + (synopsis "Modular emitters for a single signal at a time")) diff --git a/src/emitter/emitter.ml b/src/emitter/emitter.ml new file mode 100644 index 00000000..54999bd4 --- /dev/null +++ b/src/emitter/emitter.ml @@ -0,0 +1,30 @@ +(** Emitters *) + +exception Closed + +type 'a t = { + emit: 'a list -> unit; + (** Emit signals. @raise Closed if the emitter is closed. *) + tick: now:Mtime.t -> unit; + (** Call regularly to ensure background work is done *) + closed: unit -> bool; (** True if the emitter was closed *) + flush_and_close: unit -> unit; + (** Flush internal buffered signals, then close *) +} +(** An emitter for values of type ['a]. *) + +let[@inline] emit (self : _ t) l : unit = if l <> [] then self.emit l + +let[@inline] tick (self : _ t) ~now : unit = self.tick ~now + +let[@inline] closed self : bool = self.closed () + +let[@inline] flush_and_close (self : _ t) : unit = self.flush_and_close () + +let map (f : 'a -> 'b) (self : 'b t) : 'a t = + { self with emit = (fun l -> self.emit (List.map f l)) } + +(* TODO: batching, either regular or sharded to reduce contention *) +(* TODO: sampling *) + +(* TODO: use in Opentelemetry, and also for Tracer, Logger, etc. *) From c009b1d20a2ed53a387e54d281bc55efb2a72f8f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 01:07:26 -0500 Subject: [PATCH 21/94] WIP trace (hiiii) --- src/trace/common_.ml | 7 ++ src/trace/conv.ml | 40 +++++++ src/trace/opentelemetry_trace.ml | 50 +-------- src/trace/subscriber.ml | 176 +++++++++++++++++++++++++++++++ 4 files changed, 224 insertions(+), 49 deletions(-) create mode 100644 src/trace/common_.ml create mode 100644 src/trace/conv.ml create mode 100644 src/trace/subscriber.ml diff --git a/src/trace/common_.ml b/src/trace/common_.ml new file mode 100644 index 00000000..74f053cf --- /dev/null +++ b/src/trace/common_.ml @@ -0,0 +1,7 @@ +module Otel = Opentelemetry +module Otrace = Trace_core (* ocaml-trace *) +module TSub = Trace_subscriber.Subscriber + +let ( let@ ) = ( @@ ) + +let spf = Printf.sprintf diff --git a/src/trace/conv.ml b/src/trace/conv.ml new file mode 100644 index 00000000..eca660bb --- /dev/null +++ b/src/trace/conv.ml @@ -0,0 +1,40 @@ +open Common_ + +let[@inline] trace_id_of_otel (id : Otel.Trace_id.t) : Otrace.trace_id = + if id == Otel.Trace_id.dummy then + Otrace.Collector.dummy_trace_id + else + Bytes.unsafe_to_string (Otel.Trace_id.to_bytes id) + +let[@inline] trace_id_to_otel (id : Otrace.trace_id) : Otel.Trace_id.t = + if id == Otrace.Collector.dummy_trace_id then + Otel.Trace_id.dummy + else + Otel.Trace_id.of_bytes @@ Bytes.unsafe_of_string id + +let[@inline] span_id_of_otel (id : Otel.Span_id.t) : Otrace.span = + if id == Otel.Span_id.dummy then + Otrace.Collector.dummy_span + else + Bytes.get_int64_le (Otel.Span_id.to_bytes id) 0 + +let[@inline] span_id_to_otel (id : Otrace.span) : Otel.Span_id.t = + if id == Otrace.Collector.dummy_span then + Otel.Span_id.dummy + else ( + let b = Bytes.create 8 in + Bytes.set_int64_le b 0 id; + Otel.Span_id.of_bytes b + ) + +let[@inline] ctx_to_otel (self : Otrace.explicit_span_ctx) : Otel.Span_ctx.t = + Otel.Span_ctx.make + ~trace_id:(trace_id_to_otel self.trace_id) + ~parent_id:(span_id_to_otel self.span) + () + +let[@inline] ctx_of_otel (ctx : Otel.Span_ctx.t) : Otrace.explicit_span_ctx = + { + trace_id = trace_id_of_otel (Otel.Span_ctx.trace_id ctx); + span = span_id_of_otel (Otel.Span_ctx.parent_id ctx); + } diff --git a/src/trace/opentelemetry_trace.ml b/src/trace/opentelemetry_trace.ml index ba3c8462..4dda635f 100644 --- a/src/trace/opentelemetry_trace.ml +++ b/src/trace/opentelemetry_trace.ml @@ -1,53 +1,5 @@ -module Otel = Opentelemetry -module Otrace = Trace_core (* ocaml-trace *) +open Common_ module TLS = Thread_local_storage -module TSub = Trace_subscriber.Subscriber - -open struct - let spf = Printf.sprintf -end - -module Conv = struct - let[@inline] trace_id_of_otel (id : Otel.Trace_id.t) : Otrace.trace_id = - if id == Otel.Trace_id.dummy then - Otrace.Collector.dummy_trace_id - else - Bytes.unsafe_to_string (Otel.Trace_id.to_bytes id) - - let[@inline] trace_id_to_otel (id : Otrace.trace_id) : Otel.Trace_id.t = - if id == Otrace.Collector.dummy_trace_id then - Otel.Trace_id.dummy - else - Otel.Trace_id.of_bytes @@ Bytes.unsafe_of_string id - - let[@inline] span_id_of_otel (id : Otel.Span_id.t) : Otrace.span = - if id == Otel.Span_id.dummy then - Otrace.Collector.dummy_span - else - Bytes.get_int64_le (Otel.Span_id.to_bytes id) 0 - - let[@inline] span_id_to_otel (id : Otrace.span) : Otel.Span_id.t = - if id == Otrace.Collector.dummy_span then - Otel.Span_id.dummy - else ( - let b = Bytes.create 8 in - Bytes.set_int64_le b 0 id; - Otel.Span_id.of_bytes b - ) - - let[@inline] ctx_to_otel (self : Otrace.explicit_span_ctx) : Otel.Span_ctx.t = - Otel.Span_ctx.make - ~trace_id:(trace_id_to_otel self.trace_id) - ~parent_id:(span_id_to_otel self.span) - () - - let[@inline] ctx_of_otel (ctx : Otel.Span_ctx.t) : Otrace.explicit_span_ctx = - { - trace_id = trace_id_of_otel (Otel.Span_ctx.trace_id ctx); - span = span_id_of_otel (Otel.Span_ctx.parent_id ctx); - } -end - open Conv let on_internal_error = diff --git a/src/trace/subscriber.ml b/src/trace/subscriber.ml new file mode 100644 index 00000000..9b6e2c15 --- /dev/null +++ b/src/trace/subscriber.ml @@ -0,0 +1,176 @@ +open Common_ +open Trace_core +module Span_tbl = Trace_subscriber.Span_tbl + +module Buf_pool = struct + type t = Buffer.t Rpool.t + + let create ?(max_size = 32) ?(buf_size = 256) () : t = + Rpool.create ~max_size ~clear:Buffer.reset + ~create:(fun () -> Buffer.create buf_size) + () +end + +open struct + let[@inline] time_us_of_time_ns (t : int64) : float = + Int64.div t 1_000L |> Int64.to_float + + let[@inline] int64_of_trace_id_ (id : Trace_core.trace_id) : int64 = + if id == Trace_core.Collector.dummy_trace_id then + 0L + else + Bytes.get_int64_le (Bytes.unsafe_of_string id) 0 +end + +let on_tracing_error = ref (fun s -> Printf.eprintf "%s\n%!" s) + +type span_info = { + tid: int; + name: string; + start_us: float; + mutable data: (string * Sub.user_data) list; + (* NOTE: thread safety: this is supposed to only be modified by the thread +that's running this (synchronous, stack-abiding) span. *) +} +(** Information we store about a span begin event, to emit a complete event when + we meet the corresponding span end event *) + +type t = { + active: bool A.t; + pid: int; + spans: span_info Span_tbl.t; + buf_pool: Buf_pool.t; + exporter: Exporter.t; +} +(** Subscriber state *) + +open struct + let print_non_closed_spans_warning spans = + let module Str_set = Set.Make (String) in + let spans = Span_tbl.to_list spans in + if spans <> [] then ( + !on_tracing_error + @@ Printf.sprintf "trace-tef: warning: %d spans were not closed" + (List.length spans); + let names = + List.fold_left + (fun set (_, span) -> Str_set.add span.name set) + Str_set.empty spans + in + Str_set.iter + (fun name -> + !on_tracing_error @@ Printf.sprintf " span %S was not closed" name) + names; + flush stderr + ) +end + +let close (self : t) : unit = + if A.exchange self.active false then ( + print_non_closed_spans_warning self.spans; + self.exporter.close () + ) + +let[@inline] active self = A.get self.active + +let[@inline] flush (self : t) : unit = self.exporter.flush () + +let create ?(buf_pool = Buf_pool.create ()) ~pid ~exporter () : t = + { active = A.make true; exporter; buf_pool; pid; spans = Span_tbl.create () } + +module Callbacks = struct + type st = t + + let on_init _ ~time_ns:_ = () + + let on_shutdown (self : st) ~time_ns:_ = close self + + let on_name_process (self : st) ~time_ns:_ ~tid:_ ~name : unit = + let@ buf = Rpool.with_ self.buf_pool in + Writer.emit_name_process ~pid:self.pid ~name buf; + self.exporter.on_json buf + + let on_name_thread (self : st) ~time_ns:_ ~tid ~name : unit = + let@ buf = Rpool.with_ self.buf_pool in + Writer.emit_name_thread buf ~pid:self.pid ~tid ~name; + self.exporter.on_json buf + + (* add function name, if provided, to the metadata *) + let add_fun_name_ fun_name data : _ list = + match fun_name with + | None -> data + | Some f -> ("function", Sub.U_string f) :: data + + let[@inline] on_enter_span (self : st) ~__FUNCTION__:fun_name ~__FILE__:_ + ~__LINE__:_ ~time_ns ~tid ~data ~name span : unit = + let time_us = time_us_of_time_ns @@ time_ns in + let data = add_fun_name_ fun_name data in + let info = { tid; name; start_us = time_us; data } in + (* save the span so we find it at exit *) + Span_tbl.add self.spans span info + + let on_exit_span (self : st) ~time_ns ~tid:_ span : unit = + let time_us = time_us_of_time_ns @@ time_ns in + + match Span_tbl.find_exn self.spans span with + | exception Not_found -> + !on_tracing_error + (Printf.sprintf "trace-tef: error: cannot find span %Ld" span) + | { tid; name; start_us; data } -> + Span_tbl.remove self.spans span; + let@ buf = Rpool.with_ self.buf_pool in + Writer.emit_duration_event buf ~pid:self.pid ~tid ~name ~start:start_us + ~end_:time_us ~args:data; + + self.exporter.on_json buf + + let on_add_data (self : st) ~data span = + if data <> [] then ( + try + let info = Span_tbl.find_exn self.spans span in + info.data <- List.rev_append data info.data + with Not_found -> + !on_tracing_error + (Printf.sprintf "trace-tef: error: cannot find span %Ld" span) + ) + + let on_message (self : st) ~time_ns ~tid ~span:_ ~data msg : unit = + let time_us = time_us_of_time_ns @@ time_ns in + let@ buf = Rpool.with_ self.buf_pool in + Writer.emit_instant_event buf ~pid:self.pid ~tid ~name:msg ~ts:time_us + ~args:data; + self.exporter.on_json buf + + let on_counter (self : st) ~time_ns ~tid ~data:_ ~name n : unit = + let time_us = time_us_of_time_ns @@ time_ns in + let@ buf = Rpool.with_ self.buf_pool in + Writer.emit_counter buf ~pid:self.pid ~name ~tid ~ts:time_us n; + self.exporter.on_json buf + + let on_enter_manual_span (self : st) ~__FUNCTION__:fun_name ~__FILE__:_ + ~__LINE__:_ ~time_ns ~tid ~parent:_ ~data ~name ~flavor ~trace_id _span : + unit = + let time_us = time_us_of_time_ns @@ time_ns in + + let data = add_fun_name_ fun_name data in + let@ buf = Rpool.with_ self.buf_pool in + Writer.emit_manual_begin buf ~pid:self.pid ~tid ~name + ~id:(int64_of_trace_id_ trace_id) + ~ts:time_us ~args:data ~flavor; + self.exporter.on_json buf + + let on_exit_manual_span (self : st) ~time_ns ~tid ~name ~data ~flavor + ~trace_id (_ : span) : unit = + let time_us = time_us_of_time_ns @@ time_ns in + + let@ buf = Rpool.with_ self.buf_pool in + Writer.emit_manual_end buf ~pid:self.pid ~tid ~name + ~id:(int64_of_trace_id_ trace_id) + ~ts:time_us ~flavor ~args:data; + self.exporter.on_json buf + + let on_extension_event _ ~time_ns:_ ~tid:_ _ev = () +end + +let subscriber (self : t) : Sub.t = + Sub.Subscriber.Sub { st = self; callbacks = (module Callbacks) } From e29ec44561ea133eacf05ef31a03939cffa3c03a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 01:07:41 -0500 Subject: [PATCH 22/94] fix self_trace --- src/client/self_trace.ml | 9 +++++---- src/client/self_trace.mli | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/client/self_trace.ml b/src/client/self_trace.ml index 52d8b2f4..28baaf5d 100644 --- a/src/client/self_trace.ml +++ b/src/client/self_trace.ml @@ -2,7 +2,7 @@ module OT = Opentelemetry let enabled = Atomic.make false -let add_event (scope : OT.Scope.t) ev = OT.Scope.add_event scope (fun () -> ev) +let[@inline] add_event (scope : OT.Span.t) ev = OT.Span.add_event scope ev let dummy_trace_id_ = OT.Trace_id.dummy @@ -14,10 +14,11 @@ let with_ ?kind ?attrs name f = OT.Tracer.with_ ?kind ?attrs name f else ( (* A new scope is needed here because it might be modified *) - let scope = - OT.Scope.make ~trace_id:dummy_trace_id_ ~span_id:dummy_span_id () + let span : OT.Span.t = + OT.Span.make ~trace_id:dummy_trace_id_ ~id:dummy_span_id ~start_time:0L + ~end_time:0L name in - f scope + f span ) let set_enabled b = Atomic.set enabled b diff --git a/src/client/self_trace.mli b/src/client/self_trace.mli index d0690f02..26e7b3e9 100644 --- a/src/client/self_trace.mli +++ b/src/client/self_trace.mli @@ -1,12 +1,12 @@ (** Mini tracing module (disabled if [config.self_trace=false]) *) -val add_event : Opentelemetry.Scope.t -> Opentelemetry.Event.t -> unit +val add_event : Opentelemetry.Span.t -> Opentelemetry.Event.t -> unit val with_ : ?kind:Opentelemetry.Span_kind.t -> ?attrs:(string * Opentelemetry.value) list -> string -> - (Opentelemetry.Scope.t -> 'a) -> + (Opentelemetry.Span.t -> 'a) -> 'a val set_enabled : bool -> unit From 2dc836b7b48d946a8e3ca13c22a0be9534a889c4 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 01:07:50 -0500 Subject: [PATCH 23/94] fixes after we removed Scope --- .../cohttp/opentelemetry_cohttp_lwt.ml | 71 ++++++++----------- src/lwt/opentelemetry_lwt.ml | 6 +- 2 files changed, 34 insertions(+), 43 deletions(-) diff --git a/src/integrations/cohttp/opentelemetry_cohttp_lwt.ml b/src/integrations/cohttp/opentelemetry_cohttp_lwt.ml index ad670b5c..2ac0ed3d 100644 --- a/src/integrations/cohttp/opentelemetry_cohttp_lwt.ml +++ b/src/integrations/cohttp/opentelemetry_cohttp_lwt.ml @@ -43,11 +43,11 @@ module Server : sig convenience. *) val get_trace_context : - ?from:[ `Internal | `External ] -> Request.t -> Otel.Scope.t option + ?from:[ `Internal | `External ] -> Request.t -> Otel.Span.t option (** Get the tracing scope from the custom [x-ocaml-otel-traceparent] header added by [trace] and [with_]. *) - val set_trace_context : Otel.Scope.t -> Request.t -> Request.t + val set_trace_context : Otel.Span.t -> Request.t -> Request.t (** Set the tracing scope in the custom [x-ocaml-otel-traceparent] header used by [trace] and [with_]. *) @@ -83,16 +83,16 @@ end = struct let header_x_ocaml_otel_traceparent = "x-ocaml-otel-traceparent" - let set_trace_context (scope : Otel.Scope.t) req = + let set_trace_context (span : Otel.Span.t) req = let module Traceparent = Otel.Trace_context.Traceparent in let headers = Header.add (Request.headers req) header_x_ocaml_otel_traceparent - (Traceparent.to_value ~trace_id:scope.trace_id ~parent_id:scope.span_id - ()) + (Traceparent.to_value ~trace_id:(Otel.Span.trace_id span) + ~parent_id:(Otel.Span.id span) ()) in { req with headers } - let get_trace_context ?(from = `Internal) req = + let get_trace_context ?(from = `Internal) req : Otel.Span.t option = let module Traceparent = Otel.Trace_context.Traceparent in let name = match from with @@ -104,7 +104,9 @@ end = struct | Some v -> (match Traceparent.of_value v with | Ok (trace_id, parent_id) -> - Some (Otel.Scope.make ~trace_id ~span_id:parent_id ()) + (* TODO: we need a span_ctx here actually *) + Some + (Otel.Span.make ~trace_id ~id:parent_id ~start_time:0L ~end_time:0L "") | Error _ -> None) let remove_trace_context req = @@ -115,31 +117,28 @@ end = struct let trace ?service_name ?(attrs = []) callback conn req body = let scope = get_trace_context ~from:`External req in - Otel_lwt.Trace.with_ ?service_name "request" ~kind:Span_kind_server - ?trace_id:(Option.map (fun scope -> scope.Otel.Trace.trace_id) scope) - ?parent:(Option.map (fun scope -> scope.Otel.Trace.span_id) scope) + Otel_lwt.Tracer.with_ "request" ~kind:Span_kind_server + ?trace_id:(Option.map Otel.Span.trace_id parent) + ?parent:(Option.map Otel.Span.id parent) ~attrs:(attrs @ attrs_of_request req) (fun scope -> let open Lwt.Syntax in let req = set_trace_context scope req in let* res, body = callback conn req body in - Otel.Scope.add_attrs scope (fun () -> attrs_of_response res); + Otel.Span.add_attrs scope (fun () -> attrs_of_response res); Lwt.return (res, body)) - let with_ ?trace_state ?service_name ?attrs - ?(kind = Otel.Span.Span_kind_internal) ?links name req - (f : Request.t -> 'a Lwt.t) = - let scope = get_trace_context ~from:`Internal req in - Otel_lwt.Trace.with_ ?trace_state ?service_name ?attrs ~kind - ?trace_id:(Option.map (fun scope -> scope.Otel.Trace.trace_id) scope) - ?parent:(Option.map (fun scope -> scope.Otel.Trace.span_id) scope) - ?links name - (fun scope -> - let req = set_trace_context scope req in + let with_ ?trace_state ?attrs ?(kind = Otel.Span.Span_kind_internal) ?links + name req (f : Request.t -> 'a Lwt.t) = + let span = get_trace_context ~from:`Internal req in + Otel_lwt.Trace.with_ ?trace_state ?attrs ~kind + ?trace_id:(Option.map Otel.Span.trace_id span) ?parent:span ?links name + (fun span -> + let req = set_trace_context span in f req) end -let client ?(scope : Otel.Scope.t option) (module C : Cohttp_lwt.S.Client) = +let client ?(span : Otel.Span.t option) (module C : Cohttp_lwt.S.Client) = let module Traced = struct open Lwt.Syntax @@ -168,20 +167,12 @@ let client ?(scope : Otel.Scope.t option) (module C : Cohttp_lwt.S.Client) = ] let context_for ~uri ~meth = - let trace_id = - match scope with - | Some scope -> Some scope.trace_id - | None -> None - in - let parent = - match scope with - | Some scope -> Some scope.span_id - | None -> None - in + let trace_id = Option.map Otel.Span.trace_id span in + let parent = Option.map Otel.Span.id span in let attrs = attrs_for ~uri ~meth () in trace_id, parent, attrs - let add_traceparent (scope : Otel.Scope.t) headers = + let add_traceparent (span : Otel.Span.t) headers = let module Traceparent = Otel.Trace_context.Traceparent in let headers = match headers with @@ -189,17 +180,17 @@ let client ?(scope : Otel.Scope.t option) (module C : Cohttp_lwt.S.Client) = | Some headers -> headers in Header.add headers Traceparent.name - (Traceparent.to_value ~trace_id:scope.trace_id ~parent_id:scope.span_id - ()) + (Traceparent.to_value ~trace_id:(Otel.Span.trace_id span) + ~parent_id:(Otel.Span.id span) ()) let call ?ctx ?headers ?body ?chunked meth (uri : Uri.t) : (Response.t * Cohttp_lwt.Body.t) Lwt.t = let trace_id, parent, attrs = context_for ~uri ~meth in Otel_lwt.Trace.with_ "request" ~kind:Span_kind_client ?trace_id ?parent - ~attrs (fun scope -> - let headers = add_traceparent scope headers in + ~attrs (fun span -> + let headers = add_traceparent span headers in let* res, body = C.call ?ctx ~headers ?body ?chunked meth uri in - Otel.Scope.add_attrs scope (fun () -> + Otel.Span.add_attrs span (fun () -> let code = Response.status res in let code = Code.code_of_status code in [ "http.status_code", `Int code ]); @@ -226,10 +217,10 @@ let client ?(scope : Otel.Scope.t option) (module C : Cohttp_lwt.S.Client) = let post_form ?ctx ?headers ~params uri = let trace_id, parent, attrs = context_for ~uri ~meth:`POST in Otel_lwt.Trace.with_ "request" ~kind:Span_kind_client ?trace_id ?parent - ~attrs (fun scope -> + ~attrs (fun span -> let headers = add_traceparent scope headers in let* res, body = C.post_form ?ctx ~headers ~params uri in - Otel.Scope.add_attrs scope (fun () -> + Otel.Span.add_attrs span (fun () -> let code = Response.status res in let code = Code.code_of_status code in [ "http.status_code", `Int code ]); diff --git a/src/lwt/opentelemetry_lwt.ml b/src/lwt/opentelemetry_lwt.ml index 2f3cfe02..2ce4e12c 100644 --- a/src/lwt/opentelemetry_lwt.ml +++ b/src/lwt/opentelemetry_lwt.ml @@ -21,10 +21,10 @@ module Tracer = struct (** Sync span guard *) let with_ ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id ?parent - ?scope ?links name (cb : Scope.t -> 'a Lwt.t) : 'a Lwt.t = + ?links name (cb : Span.t -> 'a Lwt.t) : 'a Lwt.t = let thunk, finally = - with_' ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id ?parent - ?scope ?links name cb + with_thunk_and_finally ?force_new_trace_id ?trace_state ?attrs ?kind + ?trace_id ?parent ?links name cb in try%lwt From a56cd5c0737ec6eaccf91e7dd3e716e332e9cb5d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 01:08:09 -0500 Subject: [PATCH 24/94] chore: makefile --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 4291a68f..f6d2cc6d 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,9 @@ clean: protoc-gen: FORCE_GENPROTO=true dune build @lint +update-submodules: + git submodule update --init + format: @dune build @fmt --auto-promote From e9fafd431a623bf7dd27ab738297af542725f962 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 09:31:06 -0500 Subject: [PATCH 25/94] split core library into opentelemetry.core and opentelemetry --- src/client/batch.ml | 6 +++--- src/client/self_trace.ml | 14 +++++++------- src/client/self_trace.mli | 10 ++++++---- src/client/stdout_exporter.ml | 5 +++-- src/client/sync_queue.ml | 2 +- src/core/common_.ml | 1 - src/core/dune | 9 ++++----- src/core/gc_metrics.ml | 2 +- src/core/globals.ml | 7 ++++--- src/core/span.ml | 8 -------- src/core/span.mli | 7 +------ src/core/span_id.ml | 10 +++++++--- src/core/trace_id.ml | 11 ++++++++--- src/lib/ambient_span.ml | 9 +++++++++ src/lib/ambient_span.mli | 6 ++++++ src/lib/dune | 25 +++++++++++++++++++++++++ src/{core => lib}/logger.ml | 0 src/{core => lib}/metrics_callbacks.ml | 0 src/{core => lib}/metrics_callbacks.mli | 0 src/{core => lib}/metrics_emitter.ml | 0 src/{core => lib}/opentelemetry.ml | 10 +++------- src/{core => lib}/tracer.ml | 4 ++-- 22 files changed, 90 insertions(+), 56 deletions(-) create mode 100644 src/lib/ambient_span.ml create mode 100644 src/lib/ambient_span.mli create mode 100644 src/lib/dune rename src/{core => lib}/logger.ml (100%) rename src/{core => lib}/metrics_callbacks.ml (100%) rename src/{core => lib}/metrics_callbacks.mli (100%) rename src/{core => lib}/metrics_emitter.ml (100%) rename src/{core => lib}/opentelemetry.ml (87%) rename src/{core => lib}/tracer.ml (97%) diff --git a/src/client/batch.ml b/src/client/batch.ml index 1fc4aaa9..d26cb04f 100644 --- a/src/client/batch.ml +++ b/src/client/batch.ml @@ -1,4 +1,4 @@ -module Otel = Opentelemetry +open Opentelemetry_util type 'a t = { mutable size: int; @@ -47,7 +47,7 @@ let ready_to_pop ~force ~now self = let pop_if_ready ?(force = false) ~now (self : _ t) : _ list option = let rev_batch_opt = - Otel.Util_mutex.protect self.mutex @@ fun () -> + Util_mutex.protect self.mutex @@ fun () -> if ready_to_pop ~force ~now self then ( assert (self.q <> []); let batch = self.q in @@ -72,7 +72,7 @@ let rec push_unprotected (self : _ t) ~(elems : _ list) : unit = push_unprotected self ~elems:xs let push (self : _ t) elems : [ `Dropped | `Ok ] = - Otel.Util_mutex.protect self.mutex @@ fun () -> + Util_mutex.protect self.mutex @@ fun () -> if self.size >= self.high_watermark then (* drop this to prevent queue from growing too fast *) `Dropped diff --git a/src/client/self_trace.ml b/src/client/self_trace.ml index 28baaf5d..23dd258b 100644 --- a/src/client/self_trace.ml +++ b/src/client/self_trace.ml @@ -1,21 +1,21 @@ -module OT = Opentelemetry +open Common_ let enabled = Atomic.make false -let[@inline] add_event (scope : OT.Span.t) ev = OT.Span.add_event scope ev +let[@inline] add_event (scope : OTEL.Span.t) ev = OTEL.Span.add_event scope ev -let dummy_trace_id_ = OT.Trace_id.dummy +let dummy_trace_id_ = OTEL.Trace_id.dummy -let dummy_span_id = OT.Span_id.dummy +let dummy_span_id = OTEL.Span_id.dummy (* FIXME: get an explicit tracer instead *) let with_ ?kind ?attrs name f = if Atomic.get enabled then - OT.Tracer.with_ ?kind ?attrs name f + OTEL.Tracer.with_ ?kind ?attrs name f else ( (* A new scope is needed here because it might be modified *) - let span : OT.Span.t = - OT.Span.make ~trace_id:dummy_trace_id_ ~id:dummy_span_id ~start_time:0L + let span : OTEL.Span.t = + OTEL.Span.make ~trace_id:dummy_trace_id_ ~id:dummy_span_id ~start_time:0L ~end_time:0L name in f span diff --git a/src/client/self_trace.mli b/src/client/self_trace.mli index 26e7b3e9..b6b371ef 100644 --- a/src/client/self_trace.mli +++ b/src/client/self_trace.mli @@ -1,12 +1,14 @@ (** Mini tracing module (disabled if [config.self_trace=false]) *) -val add_event : Opentelemetry.Span.t -> Opentelemetry.Event.t -> unit +open Common_ + +val add_event : OTEL.Span.t -> OTEL.Event.t -> unit val with_ : - ?kind:Opentelemetry.Span_kind.t -> - ?attrs:(string * Opentelemetry.value) list -> + ?kind:OTEL.Span_kind.t -> + ?attrs:(string * OTEL.value) list -> string -> - (Opentelemetry.Span.t -> 'a) -> + (OTEL.Span.t -> 'a) -> 'a val set_enabled : bool -> unit diff --git a/src/client/stdout_exporter.ml b/src/client/stdout_exporter.ml index 05122369..71c8e2d1 100644 --- a/src/client/stdout_exporter.ml +++ b/src/client/stdout_exporter.ml @@ -1,10 +1,11 @@ (** A simple exporter that prints on stdout *) open Common_ -open OTEL +open Opentelemetry_util open struct - let pp_span out (sp : Span.t) = + let pp_span out (sp : OTEL.Span.t) = + let open OTEL in Format.fprintf out "@[<2>SPAN@ trace_id: %a@ span_id: %a@ name: %S@ start: %a@ end: %a@]@." Trace_id.pp diff --git a/src/client/sync_queue.ml b/src/client/sync_queue.ml index 10983b2f..46d46af1 100644 --- a/src/client/sync_queue.ml +++ b/src/client/sync_queue.ml @@ -1,4 +1,4 @@ -module UM = Opentelemetry.Util_mutex +module UM = Opentelemetry_util.Util_mutex type 'a t = { mutex: Mutex.t; diff --git a/src/core/common_.ml b/src/core/common_.ml index 30bb39d2..c6544d5d 100644 --- a/src/core/common_.ml +++ b/src/core/common_.ml @@ -2,4 +2,3 @@ let spf = Printf.sprintf module Proto = Opentelemetry_proto module Atomic = Opentelemetry_atomic.Atomic -module Ambient_context = Opentelemetry_ambient_context diff --git a/src/core/dune b/src/core/dune index debbd5c0..dce03edd 100644 --- a/src/core/dune +++ b/src/core/dune @@ -1,15 +1,14 @@ (library - (name opentelemetry) - (synopsis "API for opentelemetry instrumentation") + (name opentelemetry_core) + (public_name opentelemetry.core) + (synopsis "Core types and definitions for opentelemetry") (flags :standard -warn-error -a+8 -open Opentelemetry_util) (libraries opentelemetry.proto opentelemetry.util - opentelemetry.ambient-context ptime ptime.clock.os pbrt threads opentelemetry.atomic - hmap) - (public_name opentelemetry)) + hmap)) diff --git a/src/core/gc_metrics.ml b/src/core/gc_metrics.ml index e565eb86..9e3c17cf 100644 --- a/src/core/gc_metrics.ml +++ b/src/core/gc_metrics.ml @@ -1,7 +1,7 @@ open Common_ open struct - let[@inline] bytes_per_word = Sys.word_size / 8 + let bytes_per_word = Sys.word_size / 8 let[@inline] word_to_bytes n = n * bytes_per_word diff --git a/src/core/globals.ml b/src/core/globals.ml index 36e3e975..0e5ccfb6 100644 --- a/src/core/globals.ml +++ b/src/core/globals.ml @@ -40,9 +40,10 @@ let add_global_attribute (key : string) (v : Value.t) : unit = (* add global attributes to this list *) let merge_global_attributes_ into : _ list = - let open Key_value in - let not_redundant kv = List.for_all (fun kv' -> kv.key <> kv'.key) into in - List.rev_append (List.filter not_redundant !global_attributes) into + let not_redundant kv = + List.for_all Key_value.(fun kv' -> kv.key <> kv'.key) into + in + List.rev_append Key_value.(List.filter not_redundant !global_attributes) into (** Default span kind in {!Span.create}. This will be used in all spans that do not specify [~kind] explicitly; it is set to "internal", following diff --git a/src/core/span.ml b/src/core/span.ml index 5b771d58..c4c0c8cd 100644 --- a/src/core/span.ml +++ b/src/core/span.ml @@ -123,11 +123,3 @@ let set_status = span_set_status let set_kind = span_set_kind let k_context : t Context.key = Context.new_key () - -(** Find current span from ambient-context *) -let get_ambient () : t option = Ambient_context.get k_context - -(** [with_ambient span f] runs [f()] with the current ambient span being set to - [span] *) -let[@inline] with_ambient (span : t) (f : unit -> 'a) : 'a = - Ambient_context.with_key_bound_to k_context span (fun _ -> f ()) diff --git a/src/core/span.mli b/src/core/span.mli index 138d60e1..34a4e858 100644 --- a/src/core/span.mli +++ b/src/core/span.mli @@ -108,9 +108,4 @@ val set_kind : t -> Span_kind.t -> unit (** Set the span's kind. @since 0.11 *) -val get_ambient : unit -> t option -(** Find current span from ambient-context *) - -val with_ambient : t -> (unit -> 'a) -> 'a -(** [with_ambient span f] runs [f()] with the current ambient span being set to - [span] *) +val k_context : t Context.key diff --git a/src/core/span_id.ml b/src/core/span_id.ml index bf9e7731..f7d926fc 100644 --- a/src/core/span_id.ml +++ b/src/core/span_id.ml @@ -1,5 +1,3 @@ -open Common_ - type t = bytes let[@inline] to_bytes self = self @@ -13,7 +11,13 @@ let create () : t = Bytes.set b 0 (Char.unsafe_chr (Char.code (Bytes.get b 0) lor 1)); b -let is_valid = Util_bytes_.bytes_non_zero +let[@inline] is_zero (self : t) : bool = + (* try to reduce branches *) + assert (Bytes.length self = 8); + let n1 = Bytes.get_int64_ne self 0 in + n1 = 0L + +let[@inline] is_valid self = not (is_zero self) let[@inline] of_bytes b = if Bytes.length b = 8 then diff --git a/src/core/trace_id.ml b/src/core/trace_id.ml index 8893a757..999eb3af 100644 --- a/src/core/trace_id.ml +++ b/src/core/trace_id.ml @@ -1,5 +1,3 @@ -open Common_ - type t = bytes let[@inline] to_bytes self = self @@ -19,7 +17,14 @@ let[@inline] of_bytes b = else invalid_arg "trace ID must be 16 bytes in length" -let is_valid = Util_bytes_.bytes_non_zero +let[@inline] is_zero (self : t) : bool = + (* try to reduce branches *) + assert (Bytes.length self = 1); + let n1 = Bytes.get_int64_ne self 0 in + let n2 = Bytes.get_int64_ne self 8 in + n1 = 0L && n2 = 0L + +let[@inline] is_valid self = not (is_zero self) let to_hex = Util_bytes_.bytes_to_hex diff --git a/src/lib/ambient_span.ml b/src/lib/ambient_span.ml new file mode 100644 index 00000000..b8cc84e6 --- /dev/null +++ b/src/lib/ambient_span.ml @@ -0,0 +1,9 @@ +(** Find current span from ambient-context *) +let[@inline] get () : Span.t option = + Opentelemetry_ambient_context.get Span.k_context + +(** [with_ambient span f] runs [f()] with the current ambient span being set to + [span] *) +let[@inline] with_ambient (span : Span.t) (f : unit -> 'a) : 'a = + Opentelemetry_ambient_context.with_key_bound_to Span.k_context span (fun _ -> + f ()) diff --git a/src/lib/ambient_span.mli b/src/lib/ambient_span.mli new file mode 100644 index 00000000..ecde51ea --- /dev/null +++ b/src/lib/ambient_span.mli @@ -0,0 +1,6 @@ +val get : unit -> Span.t option +(** Find current span from ambient-context *) + +val with_ambient : Span.t -> (unit -> 'a) -> 'a +(** [with_ambient span f] runs [f()] with the current ambient span being set to + [span] *) diff --git a/src/lib/dune b/src/lib/dune new file mode 100644 index 00000000..4e68dbcb --- /dev/null +++ b/src/lib/dune @@ -0,0 +1,25 @@ +(library + (name opentelemetry) + (public_name opentelemetry) + (synopsis "API for opentelemetry instrumentation") + (flags + :standard + -warn-error + -a+8 + -open + Opentelemetry_util + -open + Opentelemetry_core + -open + Opentelemetry_core.Common_) + (libraries + opentelemetry.core + opentelemetry.proto + opentelemetry.util + opentelemetry.ambient-context + opentelemetry.atomic + ptime + ptime.clock.os + pbrt + threads + hmap)) diff --git a/src/core/logger.ml b/src/lib/logger.ml similarity index 100% rename from src/core/logger.ml rename to src/lib/logger.ml diff --git a/src/core/metrics_callbacks.ml b/src/lib/metrics_callbacks.ml similarity index 100% rename from src/core/metrics_callbacks.ml rename to src/lib/metrics_callbacks.ml diff --git a/src/core/metrics_callbacks.mli b/src/lib/metrics_callbacks.mli similarity index 100% rename from src/core/metrics_callbacks.mli rename to src/lib/metrics_callbacks.mli diff --git a/src/core/metrics_emitter.ml b/src/lib/metrics_emitter.ml similarity index 100% rename from src/core/metrics_emitter.ml rename to src/lib/metrics_emitter.ml diff --git a/src/core/opentelemetry.ml b/src/lib/opentelemetry.ml similarity index 87% rename from src/core/opentelemetry.ml rename to src/lib/opentelemetry.ml index 73e34600..8fe30707 100644 --- a/src/core/opentelemetry.ml +++ b/src/lib/opentelemetry.ml @@ -1,16 +1,12 @@ -(** Opentelemetry types and instrumentation *) +(** Main Opentelemetry API for libraries and user code. *) -module Rand_bytes = Rand_bytes -(** Generation of random identifiers. *) +module Core = Opentelemetry_core +(** Core types and definitions *) module Alist = Alist (** Atomic list, for internal usage @since 0.7 *) -module Util_mutex = Util_mutex -(** Utilities for internal usage. - @since NEXT_RELEASE *) - (** {2 Wire format} *) module Proto = Opentelemetry_proto diff --git a/src/core/tracer.ml b/src/lib/tracer.ml similarity index 97% rename from src/core/tracer.ml rename to src/lib/tracer.ml index c7c81752..15ab1e97 100644 --- a/src/core/tracer.ml +++ b/src/lib/tracer.ml @@ -66,7 +66,7 @@ let with_thunk_and_finally ?(tracer = simple_main_exporter) let parent = match parent with | Some _ -> parent - | None -> Span.get_ambient () + | None -> Ambient_span.get () in let trace_id = match trace_id, parent with @@ -109,7 +109,7 @@ let with_thunk_and_finally ?(tracer = simple_main_exporter) tracer#emit [ span ] in - let thunk () = Span.with_ambient span (fun () -> cb span) in + let thunk () = Ambient_span.with_ambient span (fun () -> cb span) in thunk, finally (** Sync span guard. From 755e24a1e88305a8648b88fde0de00492dcdecc4 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 10:07:29 -0500 Subject: [PATCH 26/94] feat emitter: better docs, add a `to_list` emitter --- src/emitter/dune | 3 ++- src/emitter/emitter.ml | 17 +++++++++++++---- src/emitter/to_list.ml | 10 ++++++++++ 3 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 src/emitter/to_list.ml diff --git a/src/emitter/dune b/src/emitter/dune index 76f79dee..3844e185 100644 --- a/src/emitter/dune +++ b/src/emitter/dune @@ -1,5 +1,6 @@ (library (name opentelemetry_emitter) (public_name opentelemetry.emitter) - (libraries mtime mtime.clock.os) + (libraries mtime mtime.clock.os opentelemetry.atomic) + (flags :standard -open Opentelemetry_atomic) (synopsis "Modular emitters for a single signal at a time")) diff --git a/src/emitter/emitter.ml b/src/emitter/emitter.ml index 54999bd4..02d8822e 100644 --- a/src/emitter/emitter.ml +++ b/src/emitter/emitter.ml @@ -1,4 +1,9 @@ -(** Emitters *) +(** Emitters. + + This is the composable abstraction we use to represent how signals are + emitted, from their origin point (a site in user code or library code that + was instrumented, and just created a span or log record or metric), down to + the actual SDK exporter installed in the application. *) exception Closed @@ -6,10 +11,12 @@ type 'a t = { emit: 'a list -> unit; (** Emit signals. @raise Closed if the emitter is closed. *) tick: now:Mtime.t -> unit; - (** Call regularly to ensure background work is done *) - closed: unit -> bool; (** True if the emitter was closed *) + (** Call regularly to ensure background work is done. The current + timestamp is passed to improve testability. *) + closed: unit -> bool; + (** True if the emitter is already closed. Beware TOCTOU bugs. *) flush_and_close: unit -> unit; - (** Flush internal buffered signals, then close *) + (** Flush internally buffered signals, then close. *) } (** An emitter for values of type ['a]. *) @@ -21,6 +28,8 @@ let[@inline] closed self : bool = self.closed () let[@inline] flush_and_close (self : _ t) : unit = self.flush_and_close () +(** [map f emitter] returns a new emitter that applies [f] to signals before + passing them to [emitter] *) let map (f : 'a -> 'b) (self : 'b t) : 'a t = { self with emit = (fun l -> self.emit (List.map f l)) } diff --git a/src/emitter/to_list.ml b/src/emitter/to_list.ml new file mode 100644 index 00000000..61a42ab8 --- /dev/null +++ b/src/emitter/to_list.ml @@ -0,0 +1,10 @@ +(** Emitter that stores signals into a list, in reverse order (most recent + signals first). *) +let to_list (l : 'a list ref) : 'a Emitter.t = + let closed = Atomic.make false in + { + emit = (fun sigs -> l := List.rev_append sigs !l); + tick = (fun ~now:_ -> ()); + closed = (fun () -> Atomic.get closed); + flush_and_close = (fun () -> Atomic.set closed true); + } From bb6d83483c873ef682b51ff32c03d9c4e8a9d82b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 10:16:55 -0500 Subject: [PATCH 27/94] client: add sampler; batch and sampler are now emitter transformers --- src/client/batch.ml | 43 +++++++++++++++++++++++++++++++++++++++ src/client/batch.mli | 6 ++++++ src/client/dune | 8 +++++++- src/client/sampler.ml | 46 ++++++++++++++++++++++++++++++++++++++++++ src/client/sampler.mli | 25 +++++++++++++++++++++++ 5 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 src/client/sampler.ml create mode 100644 src/client/sampler.mli diff --git a/src/client/batch.ml b/src/client/batch.ml index d26cb04f..62cd8703 100644 --- a/src/client/batch.ml +++ b/src/client/batch.ml @@ -87,3 +87,46 @@ let push (self : _ t) elems : [ `Dropped | `Ok ] = ) let[@inline] push' self elems = ignore (push self elems : [ `Dropped | `Ok ]) + +open Opentelemetry_emitter + +let wrap_emitter (self : _ t) (e : _ Emitter.t) : _ Emitter.t = + let closed () = e.closed () in + let flush_and_close () = + (* FIXME: we need to close the batch first, to prevent + further pushes; then write the content to [e]; then + flusn and close [e]. In this order. *) + (match pop_if_ready self ~force:true ~now:Mtime.max_stamp with + | None -> () + | Some l -> Emitter.emit e l); + + Emitter.flush_and_close e + in + + let maybe_emit ~now = + match pop_if_ready self ~force:false ~now with + | None -> () + | Some l -> Emitter.emit e l + in + + let tick ~now = + (* first, check if batch has timed out *) + maybe_emit ~now; + + (* only then, tick the underlying emitter *) + Emitter.tick e ~now + in + + let emit l = + if l <> [] then ( + push' self l; + + (* TODO: it'd be nice if we checked only for size here, not + for timeout. The [tick] function is enough for timeouts, + whereas [emit] is in the hot path of every single span/metric/log *) + let now = Mtime_clock.now () in + maybe_emit ~now + ) + in + + { Emitter.closed; flush_and_close; tick; emit } diff --git a/src/client/batch.mli b/src/client/batch.mli index c3b6f7e1..f50e1675 100644 --- a/src/client/batch.mli +++ b/src/client/batch.mli @@ -53,3 +53,9 @@ val push : 'a t -> 'a list -> [ `Dropped | `Ok ] val push' : 'a t -> 'a list -> unit (** Like {!push} but ignores the result *) + +open Opentelemetry_emitter + +val wrap_emitter : 'a t -> 'a Emitter.t -> 'a Emitter.t +(** [batch_emitter batch e] is an emitter that uses batch [batch] to gather + signals into larger lists before passing them to [e]. *) diff --git a/src/client/dune b/src/client/dune index 36f6ee5c..48e836d7 100644 --- a/src/client/dune +++ b/src/client/dune @@ -1,6 +1,12 @@ (library (name opentelemetry_client) (public_name opentelemetry.client) - (libraries opentelemetry opentelemetry.proto pbrt mtime mtime.clock.os) + (libraries + opentelemetry + opentelemetry.emitter + opentelemetry.proto + pbrt + mtime + mtime.clock.os) (synopsis "Basic exporters, as well as Common types and logic shared between exporters")) diff --git a/src/client/sampler.ml b/src/client/sampler.ml new file mode 100644 index 00000000..ecaece0a --- /dev/null +++ b/src/client/sampler.ml @@ -0,0 +1,46 @@ +type t = { + proba_accept: float; + n_seen: int Atomic.t; + n_accepted: int Atomic.t; +} + +let create ~proba_accept () : t = + (* FIXME: either czzry a random state and protect it, or make sure + we Random.self_init() in the current domain?? *) + if proba_accept < 0. || proba_accept > 1. then + invalid_arg "sampler: proba_accept must be in [0., 1.]"; + { proba_accept; n_seen = Atomic.make 0; n_accepted = Atomic.make 0 } + +let[@inline] proba_accept self = self.proba_accept + +let actual_rate (self : t) : float = + let accept = Atomic.get self.n_accepted in + let total = Atomic.get self.n_seen in + + if total = 0 then + 1. + else + float accept /. float total + +let accept (self : t) : bool = + Atomic.incr self.n_seen; + + let n = Random.float 1. in + let res = n < self.proba_accept in + + if res then Atomic.incr self.n_accepted; + res + +open Opentelemetry_emitter + +let wrap_emitter (self : t) (e : _ Emitter.t) : _ Emitter.t = + let closed () = Emitter.closed e in + let flush_and_close () = Emitter.flush_and_close e in + let tick ~now = Emitter.tick e ~now in + + let emit l = + let accepted = List.filter (fun _x -> accept self) l in + if accepted <> [] then Emitter.emit e accepted + in + + { Emitter.closed; flush_and_close; tick; emit } diff --git a/src/client/sampler.mli b/src/client/sampler.mli new file mode 100644 index 00000000..784c8346 --- /dev/null +++ b/src/client/sampler.mli @@ -0,0 +1,25 @@ +(** Basic random sampling *) + +type t + +val create : proba_accept:float -> unit -> t +(** [create ~proba_accept:n ()] makes a new sampler. + + The sampler will accept signals with probability [n] (must be between 0 and + 1). + @raise Invalid_argument if [n] is not between 0 and 1. *) + +val accept : t -> bool +(** Do we accept a sample? This returns [true] with probability [proba_accept]. +*) + +val proba_accept : t -> float + +val actual_rate : t -> float +(** The ratio of signals we actually accepted so far *) + +open Opentelemetry_emitter + +val wrap_emitter : t -> 'a Emitter.t -> 'a Emitter.t +(** [wrap_emitter sampler e] is a new emitter that uses the [sampler] on each + individual signal before passing them to [e]. *) From aa96be2e0f5cfd0b316b802938f078aec318e243 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 10:17:20 -0500 Subject: [PATCH 28/94] fix rand_bytes: init at least the local domain's Rand state --- src/core/rand_bytes.ml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/core/rand_bytes.ml b/src/core/rand_bytes.ml index 336020a5..c90c1a72 100644 --- a/src/core/rand_bytes.ml +++ b/src/core/rand_bytes.ml @@ -1,4 +1,12 @@ +let initialized_ = Atomic.make false + +let[@inline never] actually_init () = Random.self_init () + +let[@inline] maybe_init () = + if not (Atomic.exchange initialized_ true) then actually_init () + let default_rand_bytes_8 () : bytes = + maybe_init (); let b = Bytes.create 8 in for i = 0 to 1 do (* rely on the stdlib's [Random] being thread-or-domain safe *) @@ -14,6 +22,7 @@ let default_rand_bytes_8 () : bytes = b let default_rand_bytes_16 () : bytes = + maybe_init (); let b = Bytes.create 16 in for i = 0 to 4 do (* rely on the stdlib's [Random] being thread-or-domain safe *) From 242370f94d6d509269ab0f0de8507c3e53b76cdb Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 10:18:03 -0500 Subject: [PATCH 29/94] wip: trace --- src/trace/dune | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/trace/dune b/src/trace/dune index 0c815c5d..adcd12df 100644 --- a/src/trace/dune +++ b/src/trace/dune @@ -2,9 +2,11 @@ (name opentelemetry_trace) (public_name opentelemetry.trace) (synopsis "Use opentelemetry as a collector for trace") - (optional) + (optional) ; trace + (flags :standard -open Opentelemetry_util) (libraries opentelemetry.ambient-context + opentelemetry.util + opentelemetry.core trace.core - trace.subscriber - opentelemetry)) + trace.subscriber)) From 07e17d9682cac71844c4f4b6112d86235579e9ff Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 10:18:07 -0500 Subject: [PATCH 30/94] feat lib: expose Ambient_span --- src/lib/ambient_span.mli | 2 ++ src/lib/opentelemetry.ml | 1 + 2 files changed, 3 insertions(+) diff --git a/src/lib/ambient_span.mli b/src/lib/ambient_span.mli index ecde51ea..70997ac4 100644 --- a/src/lib/ambient_span.mli +++ b/src/lib/ambient_span.mli @@ -1,3 +1,5 @@ +(** Storing the current span in ambient context. *) + val get : unit -> Span.t option (** Find current span from ambient-context *) diff --git a/src/lib/opentelemetry.ml b/src/lib/opentelemetry.ml index 8fe30707..5e6cafb0 100644 --- a/src/lib/opentelemetry.ml +++ b/src/lib/opentelemetry.ml @@ -57,6 +57,7 @@ module Span_kind = Span_kind (** {2 Traces} *) module Span = Span +module Ambient_span = Ambient_span module Tracer = Tracer module Trace = Tracer [@@deprecated "use Tracer instead"] From 2a798098c5ea3b64f6eee46c12118284b774ba9b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 10:18:18 -0500 Subject: [PATCH 31/94] wip: various fixes --- src/client-cohttp-eio/config.ml | 6 +-- src/client-cohttp-eio/config.mli | 6 +-- .../opentelemetry_client_cohttp_eio.ml | 50 +------------------ src/integrations/logs/opentelemetry_logs.ml | 12 ++--- 4 files changed, 12 insertions(+), 62 deletions(-) diff --git a/src/client-cohttp-eio/config.ml b/src/client-cohttp-eio/config.ml index 930881ff..4f3677de 100644 --- a/src/client-cohttp-eio/config.ml +++ b/src/client-cohttp-eio/config.ml @@ -1,7 +1,7 @@ -type t = Opentelemetry_client.Config.t +type t = Opentelemetry_client.Client_config.t -module Env = Opentelemetry_client.Config.Env () +module Env = Opentelemetry_client.Client_config.Env () -let pp = Opentelemetry_client.Config.pp +let pp = Opentelemetry_client.Client_config.pp let make = Env.make (fun common () -> common) diff --git a/src/client-cohttp-eio/config.mli b/src/client-cohttp-eio/config.mli index 100bb696..dff28732 100644 --- a/src/client-cohttp-eio/config.mli +++ b/src/client-cohttp-eio/config.mli @@ -1,4 +1,4 @@ -type t = Opentelemetry_client.Config.t +type t = Opentelemetry_client.Client_config.t (** Configuration. To build one, use {!make} below. This might be extended with more fields in @@ -6,7 +6,7 @@ type t = Opentelemetry_client.Config.t val pp : Format.formatter -> t -> unit -val make : (unit -> t) Opentelemetry_client.Config.make +val make : (unit -> t) Opentelemetry_client.Client_config.make (** Make a configuration {!t}. *) -module Env : Opentelemetry_client.Config.ENV +module Env : Opentelemetry_client.Client_config.ENV diff --git a/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml b/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml index 67b7b169..6a182963 100644 --- a/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml +++ b/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml @@ -25,48 +25,6 @@ let last_gc_metrics = Atomic.make (Mtime_clock.now ()) let timeout_gc_metrics = Mtime.Span.(20 * s) -(* Cross-domain, thread-safe storage for GC metrics gathered from different fibres. *) -module GC_metrics : sig - val add : Proto.Metrics.resource_metrics -> unit - - val drain : unit -> Proto.Metrics.resource_metrics list -end = struct - (* Used to prevent data races across domains *) - let mutex = Eio.Mutex.create () - - let gc_metrics = ref [] - - let add m = - Eio.Mutex.use_rw ~protect:true mutex (fun () -> - gc_metrics := m :: !gc_metrics) - - let drain () = - Eio.Mutex.use_rw ~protect:true mutex (fun () -> - let metrics = !gc_metrics in - gc_metrics := []; - metrics) -end - -(* capture current GC metrics if {!needs_gc_metrics} is true, - or it has been a long time since the last GC metrics collection, - and push them into {!gc_metrics} for later collection *) -let sample_gc_metrics_if_needed () = - let now = Mtime_clock.now () in - let alarm = Atomic.compare_and_set needs_gc_metrics true false in - let timeout () = - let elapsed = Mtime.span now (Atomic.get last_gc_metrics) in - Mtime.Span.compare elapsed timeout_gc_metrics > 0 - in - if alarm || timeout () then ( - Atomic.set last_gc_metrics now; - let l = - OT.Metrics.make_resource_metrics - ~attrs:(Opentelemetry.GC_metrics.get_runtime_attributes ()) - @@ Opentelemetry.GC_metrics.get_metrics () - in - GC_metrics.add l - ) - type error = [ `Status of int * Opentelemetry.Proto.Status.status | `Failure of string @@ -282,7 +240,6 @@ let mk_emitter ~stop ~net (config : Config.t) : (module EMITTER) = let push_metrics x = let@ () = guard_exn_ "push metrics" in - sample_gc_metrics_if_needed (); push_to_batch batch_metrics x let push_logs x = @@ -299,8 +256,7 @@ let mk_emitter ~stop ~net (config : Config.t) : (module EMITTER) = let emit_metrics_maybe = maybe_emit batch_metrics config.url_metrics (fun collected_metrics -> - let gc_metrics = GC_metrics.drain () in - gc_metrics @ collected_metrics |> Signal.Encode.metrics) + collected_metrics |> Signal.Encode.metrics) let emit_logs_maybe = maybe_emit batch_logs config.url_logs Signal.Encode.logs @@ -336,7 +292,6 @@ let mk_emitter ~stop ~net (config : Config.t) : (module EMITTER) = if Config.Env.get_debug () then Printf.eprintf "tick (from domain %d)\n%!" (Domain.self () :> int); run_tick_callbacks (); - sample_gc_metrics_if_needed (); emit_all ~force:false let cleanup ~on_done () = @@ -344,13 +299,12 @@ let mk_emitter ~stop ~net (config : Config.t) : (module EMITTER) = Printf.eprintf "opentelemetry: exiting…\n%!"; Atomic.set stop true; run_tick_callbacks (); - sample_gc_metrics_if_needed (); emit_all ~force:true; on_done () end in (module M : EMITTER) -module Backend (Emitter : EMITTER) : Opentelemetry.Collector.BACKEND = struct +module Backend (Emitter : EMITTER) : Opentelemetry.Exporter.t = struct open Opentelemetry.Proto open Opentelemetry.Collector open Emitter diff --git a/src/integrations/logs/opentelemetry_logs.ml b/src/integrations/logs/opentelemetry_logs.ml index 51aa0818..a91f2e44 100644 --- a/src/integrations/logs/opentelemetry_logs.ml +++ b/src/integrations/logs/opentelemetry_logs.ml @@ -34,15 +34,11 @@ let emit_telemetry do_emit = Logs.Tag.(empty |> add emit_telemetry_tag do_emit) (*****************************************************************************) (* Log a message to otel with some attrs *) -let log ?service_name ?(attrs = []) ?(scope = Otel.Scope.get_ambient_scope ()) - ~level msg = +let log ?service_name ?(attrs = []) ?(scope = Otel.Ambient_span.get ()) ~level + msg = let log_level = Logs.level_to_string (Some level) in - let span_id = - Option.map (fun (scope : Otel.Scope.t) -> scope.span_id) scope - in - let trace_id = - Option.map (fun (scope : Otel.Scope.t) -> scope.trace_id) scope - in + let span_id = Option.map Otel.Span.id scope in + let trace_id = Option.map Otel.Span.trace_id scope in let severity = log_level_to_severity level in let log = Otel.Log_record.make_str ~severity ~log_level ?trace_id ?span_id msg From 57b790d2d29a4ed885aa66f3ae0f9a921dfecc7f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 10:56:04 -0500 Subject: [PATCH 32/94] emitter: add `enabled()` field, and `tap` --- src/emitter/emitter.ml | 26 ++++++++++++++++++++++---- src/emitter/to_list.ml | 6 +++++- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/emitter/emitter.ml b/src/emitter/emitter.ml index 02d8822e..d3aa0e5e 100644 --- a/src/emitter/emitter.ml +++ b/src/emitter/emitter.ml @@ -7,7 +7,10 @@ exception Closed -type 'a t = { +type -'a t = { + enabled: unit -> bool; + (** Return [true] if [emit] has a chance of doing something with the + signals it's given. *) emit: 'a list -> unit; (** Emit signals. @raise Closed if the emitter is closed. *) tick: now:Mtime.t -> unit; @@ -20,6 +23,8 @@ type 'a t = { } (** An emitter for values of type ['a]. *) +let[@inline] enabled self : bool = self.enabled () + let[@inline] emit (self : _ t) l : unit = if l <> [] then self.emit l let[@inline] tick (self : _ t) ~now : unit = self.tick ~now @@ -33,7 +38,20 @@ let[@inline] flush_and_close (self : _ t) : unit = self.flush_and_close () let map (f : 'a -> 'b) (self : 'b t) : 'a t = { self with emit = (fun l -> self.emit (List.map f l)) } -(* TODO: batching, either regular or sharded to reduce contention *) -(* TODO: sampling *) +(** [tap f e] is like [e], but every signal is passed to [f] *) +let tap (f : 'a -> unit) (self : 'a t) : 'a t = + let emit l = + List.iter f l; + self.emit l + in + { self with emit } -(* TODO: use in Opentelemetry, and also for Tracer, Logger, etc. *) +let dummy () : _ t = + let closed = Atomic.make false in + { + enabled = (fun () -> false); + emit = ignore; + tick = (fun ~now:_ -> ()); + closed = (fun () -> Atomic.get closed); + flush_and_close = (fun () -> Atomic.set closed true); + } diff --git a/src/emitter/to_list.ml b/src/emitter/to_list.ml index 61a42ab8..04e228ca 100644 --- a/src/emitter/to_list.ml +++ b/src/emitter/to_list.ml @@ -3,7 +3,11 @@ let to_list (l : 'a list ref) : 'a Emitter.t = let closed = Atomic.make false in { - emit = (fun sigs -> l := List.rev_append sigs !l); + enabled = (fun () -> not (Atomic.get closed)); + emit = + (fun sigs -> + if Atomic.get closed then raise Emitter.Closed; + l := List.rev_append sigs !l); tick = (fun ~now:_ -> ()); closed = (fun () -> Atomic.get closed); flush_and_close = (fun () -> Atomic.set closed true); From cded07d90aa51451b3b64dd8620b6e56db805597 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 10:56:30 -0500 Subject: [PATCH 33/94] feat core: add `Any_signal.t`; make Exporter a record of emitters --- src/core/any_signal.ml | 13 ++++++ src/core/dune | 3 +- src/core/exporter.ml | 96 ++++++++++++++++++----------------------- src/core/gc_metrics.ml | 4 +- src/core/gc_metrics.mli | 2 +- 5 files changed, 61 insertions(+), 57 deletions(-) create mode 100644 src/core/any_signal.ml diff --git a/src/core/any_signal.ml b/src/core/any_signal.ml new file mode 100644 index 00000000..4b3ec102 --- /dev/null +++ b/src/core/any_signal.ml @@ -0,0 +1,13 @@ +(** Any kind of signal *) + +open Common_ + +type t = + | Span of Span.t + | Metric of Metrics.t + | Log of Log_record.t + +let pp out = function + | Span s -> Proto.Trace.pp_span out s + | Metric m -> Proto.Metrics.pp_metric out m + | Log l -> Proto.Logs.pp_log_record out l diff --git a/src/core/dune b/src/core/dune index dce03edd..c33b4edc 100644 --- a/src/core/dune +++ b/src/core/dune @@ -6,9 +6,10 @@ (libraries opentelemetry.proto opentelemetry.util + opentelemetry.atomic + opentelemetry.emitter ptime ptime.clock.os pbrt threads - opentelemetry.atomic hmap)) diff --git a/src/core/exporter.ml b/src/core/exporter.ml index 7c08f588..c077cfe7 100644 --- a/src/core/exporter.ml +++ b/src/core/exporter.ml @@ -7,69 +7,61 @@ in their own library. *) open Common_ +open Opentelemetry_emitter open struct module Proto = Opentelemetry_proto end -(** Main exporter interface *) -class type t = object - method send_trace : Proto.Trace.span list -> unit - - method send_metrics : Proto.Metrics.metric list -> unit - - method send_logs : Proto.Logs.log_record list -> unit - - method tick : unit -> unit - (** Should be called regularly for background processing, timeout checks, etc. - *) - - method add_on_tick_callback : (unit -> unit) -> unit - (** Add the given of callback to the exporter when [tick()] is called. The - callback should be short and reentrant. Depending on the exporter's - implementation, it might be called from a thread that is not the one that - called [on_tick]. *) - - method cleanup : on_done:(unit -> unit) -> unit -> unit - (** [cleanup ~on_done ()] is called when the exporter is shut down, and is +type t = { + emit_spans: Proto.Trace.span Emitter.t; + emit_metrics: Proto.Metrics.metric Emitter.t; + emit_logs: Proto.Logs.log_record Emitter.t; + on_tick: Cb_set.t; + (** Set of callbacks for "on tick". Should be triggered regularly for + background processing, timeout checks, etc. *) + cleanup: on_done:(unit -> unit) -> unit -> unit; + (** [cleanup ~on_done ()] is called when the exporter is shut down, and is responsible for sending remaining batches, flushing sockets, etc. @param on_done callback invoked after the cleanup is done. @since 0.12 *) -end +} +(** Main exporter interface. *) (** Dummy exporter, does nothing *) -let dummy : t = - let tick_cbs = Cb_set.create () in - object - method send_trace = ignore +let dummy () : t = + let on_tick = Cb_set.create () in + { + emit_spans = Emitter.dummy (); + emit_metrics = Emitter.dummy (); + emit_logs = Emitter.dummy (); + on_tick; + cleanup = (fun ~on_done () -> on_done ()); + } - method send_metrics = ignore +let[@inline] send_trace (self : t) (l : Proto.Trace.span list) = + Emitter.emit self.emit_spans l - method send_logs = ignore +let[@inline] send_metrics (self : t) (l : Proto.Metrics.metric list) = + Emitter.emit self.emit_metrics l - method tick () = Cb_set.trigger tick_cbs +let[@inline] send_logs (self : t) (l : Proto.Logs.log_record list) = + Emitter.emit self.emit_logs l - method add_on_tick_callback cb = Cb_set.register tick_cbs cb - - method cleanup ~on_done () = on_done () - end - -let[@inline] send_trace (self : #t) (l : Proto.Trace.span list) = - self#send_trace l - -let[@inline] send_metrics (self : #t) (l : Proto.Metrics.metric list) = - self#send_metrics l - -let[@inline] send_logs (self : #t) (l : Proto.Logs.log_record list) = - self#send_logs l - -let[@inline] on_tick (self : #t) f = self#add_on_tick_callback f +let on_tick (self : t) f = Cb_set.register self.on_tick f (** Do background work. Call this regularly if the collector doesn't already have a ticker thread or internal timer. *) -let[@inline] tick (self : #t) = self#tick () +let tick (self : t) = + Cb_set.trigger self.on_tick; + (* also tick each emitter! *) + let now = Mtime_clock.now () in + Emitter.tick ~now self.emit_spans; + Emitter.tick ~now self.emit_metrics; + Emitter.tick ~now self.emit_logs; + () -let[@inline] cleanup (self : #t) ~on_done : unit = self#cleanup ~on_done () +let[@inline] cleanup (self : t) ~on_done : unit = self.cleanup ~on_done () (** Main exporter, used by the main tracing functions. @@ -84,9 +76,8 @@ module Main_exporter = struct end (** Set the global exporter *) - let set (exp : #t) : unit = - let exp = (exp :> t) in - List.iter exp#add_on_tick_callback (Alist.get on_tick_cbs_); + let set (exp : t) : unit = + List.iter (on_tick exp) (Alist.get on_tick_cbs_); Atomic.set exporter (Some exp) (** Remove current exporter, if any. @@ -95,7 +86,7 @@ module Main_exporter = struct match Atomic.exchange exporter None with | None -> () | Some exp -> - exp#tick (); + tick exp; cleanup exp ~on_done (** Is there a configured exporter? *) @@ -106,7 +97,7 @@ module Main_exporter = struct let add_on_tick_callback f = Alist.add on_tick_cbs_ f; - Option.iter (fun exp -> exp#add_on_tick_callback f) (get ()) + Option.iter (fun exp -> on_tick exp f) (get ()) end let (set_backend [@deprecated "use `Main_exporter.set`"]) = Main_exporter.set @@ -119,9 +110,8 @@ let (has_backend [@deprecated "use `Main_exporter.present`"]) = let (get_backend [@deprecated "use `Main_exporter.ge"]) = Main_exporter.get -let with_setup_debug_backend ?(on_done = ignore) (exp : #t) ?(enable = true) () - f = - let exp = (exp :> t) in +let with_setup_debug_backend ?(on_done = ignore) (exp : t) ?(enable = true) () f + = if enable then ( Main_exporter.set exp; Fun.protect ~finally:(fun () -> cleanup exp ~on_done) f diff --git a/src/core/gc_metrics.ml b/src/core/gc_metrics.ml index 9e3c17cf..e2ade0dd 100644 --- a/src/core/gc_metrics.ml +++ b/src/core/gc_metrics.ml @@ -36,7 +36,7 @@ let get_metrics () : Metrics.t list = [ int ~now gc.Gc.compactions ]; ] -let setup ?(min_interval_s = default_interval_s) (exp : #Exporter.t) = +let setup ?(min_interval_s = default_interval_s) (exp : Exporter.t) = (* limit rate *) let min_interval_s = max 5 min_interval_s in let min_interval = Mtime.Span.(min_interval_s * s) in @@ -45,7 +45,7 @@ let setup ?(min_interval_s = default_interval_s) (exp : #Exporter.t) = let on_tick () = if Interval_limiter.make_attempt limiter then ( let m = get_metrics () in - exp#send_metrics m + Exporter.send_metrics exp m ) in Exporter.on_tick exp on_tick diff --git a/src/core/gc_metrics.mli b/src/core/gc_metrics.mli index e0c01883..d17496aa 100644 --- a/src/core/gc_metrics.mli +++ b/src/core/gc_metrics.mli @@ -5,7 +5,7 @@ val get_metrics : unit -> Metrics.t list (** Get a few metrics from the current state of the GC. *) -val setup : ?min_interval_s:int -> #Exporter.t -> unit +val setup : ?min_interval_s:int -> Exporter.t -> unit (** Setup a hook that will emit GC statistics on every tick. It does assume that [tick] is called regularly on the exporter. For example, if we ensure the exporter's [tick] function is called every 5s, we'll get GC metrics every From 2a08d62c13deb5f2ec550fad87725426da0dc134 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 10:57:02 -0500 Subject: [PATCH 34/94] update exporters and emitter combinators in client --- src/client/batch.ml | 5 ++-- src/client/debug_exporter.ml | 51 ++++++++++++++++------------------- src/client/sampler.ml | 9 ++++--- src/client/stdout_exporter.ml | 49 --------------------------------- 4 files changed, 32 insertions(+), 82 deletions(-) delete mode 100644 src/client/stdout_exporter.ml diff --git a/src/client/batch.ml b/src/client/batch.ml index 62cd8703..20343630 100644 --- a/src/client/batch.ml +++ b/src/client/batch.ml @@ -91,6 +91,7 @@ let[@inline] push' self elems = ignore (push self elems : [ `Dropped | `Ok ]) open Opentelemetry_emitter let wrap_emitter (self : _ t) (e : _ Emitter.t) : _ Emitter.t = + let enabled () = e.enabled () in let closed () = e.closed () in let flush_and_close () = (* FIXME: we need to close the batch first, to prevent @@ -118,7 +119,7 @@ let wrap_emitter (self : _ t) (e : _ Emitter.t) : _ Emitter.t = in let emit l = - if l <> [] then ( + if l <> [] && e.enabled () then ( push' self l; (* TODO: it'd be nice if we checked only for size here, not @@ -129,4 +130,4 @@ let wrap_emitter (self : _ t) (e : _ Emitter.t) : _ Emitter.t = ) in - { Emitter.closed; flush_and_close; tick; emit } + { Emitter.closed; enabled; flush_and_close; tick; emit } diff --git a/src/client/debug_exporter.ml b/src/client/debug_exporter.ml index cc969902..291067bd 100644 --- a/src/client/debug_exporter.ml +++ b/src/client/debug_exporter.ml @@ -1,36 +1,31 @@ open Common_ +open Opentelemetry_emitter (** [debug exporter] behaves like [exporter], but will print signals on [stderr] before passing them to [exporter] *) -class debug ?(out = Format.err_formatter) (exp : #OTEL.Exporter.t) : - OTEL.Exporter.t = +let debug ?(out = Format.err_formatter) (exp : OTEL.Exporter.t) : + OTEL.Exporter.t = let open Proto in - object - method send_trace l = - Format.fprintf out "SPANS: %a@." (Format.pp_print_list Trace.pp_span) l; - exp#send_trace l - - method send_metrics l = - Format.fprintf out "METRICS: %a@." - (Format.pp_print_list Metrics.pp_metric) - l; - exp#send_metrics l - - method send_logs l = - Format.fprintf out "LOGS: %a@." - (Format.pp_print_list Logs.pp_log_record) - l; - exp#send_logs l - - method tick () = exp#tick () - - method add_on_tick_callback cb = exp#add_on_tick_callback cb - - method cleanup ~on_done () = - Format.fprintf out "CLEANUP@."; - exp#cleanup ~on_done () - end + { + emit_spans = + Emitter.tap + (fun sp -> Format.fprintf out "SPAN: %a@." Trace.pp_span sp) + exp.emit_spans; + emit_logs = + Emitter.tap + (fun log -> Format.fprintf out "LOG: %a@." Proto.Logs.pp_log_record log) + exp.emit_logs; + emit_metrics = + Emitter.tap + (fun m -> Format.fprintf out "METRIC: %a@." Metrics.pp_metric m) + exp.emit_metrics; + on_tick = exp.on_tick; + cleanup = + (fun ~on_done () -> + Format.fprintf out "CLEANUP@."; + exp.cleanup ~on_done ()); + } (** Exporter that simply debugs on [stderr] *) let debug_only : OTEL.Exporter.t = - new debug ~out:Format.err_formatter OTEL.Exporter.dummy + debug ~out:Format.err_formatter @@ OTEL.Exporter.dummy () diff --git a/src/client/sampler.ml b/src/client/sampler.ml index ecaece0a..9ae63e0d 100644 --- a/src/client/sampler.ml +++ b/src/client/sampler.ml @@ -34,13 +34,16 @@ let accept (self : t) : bool = open Opentelemetry_emitter let wrap_emitter (self : t) (e : _ Emitter.t) : _ Emitter.t = + let enabled () = e.enabled () in let closed () = Emitter.closed e in let flush_and_close () = Emitter.flush_and_close e in let tick ~now = Emitter.tick e ~now in let emit l = - let accepted = List.filter (fun _x -> accept self) l in - if accepted <> [] then Emitter.emit e accepted + if l <> [] && e.enabled () then ( + let accepted = List.filter (fun _x -> accept self) l in + if accepted <> [] then Emitter.emit e accepted + ) in - { Emitter.closed; flush_and_close; tick; emit } + { Emitter.closed; enabled; flush_and_close; tick; emit } diff --git a/src/client/stdout_exporter.ml b/src/client/stdout_exporter.ml deleted file mode 100644 index 71c8e2d1..00000000 --- a/src/client/stdout_exporter.ml +++ /dev/null @@ -1,49 +0,0 @@ -(** A simple exporter that prints on stdout *) - -open Common_ -open Opentelemetry_util - -open struct - let pp_span out (sp : OTEL.Span.t) = - let open OTEL in - Format.fprintf out - "@[<2>SPAN@ trace_id: %a@ span_id: %a@ name: %S@ start: %a@ end: %a@]@." - Trace_id.pp - (Trace_id.of_bytes sp.trace_id) - Span_id.pp - (Span_id.of_bytes sp.span_id) - sp.name Timestamp_ns.pp_debug sp.start_time_unix_nano - Timestamp_ns.pp_debug sp.end_time_unix_nano - - let pp_vlist mutex pp out l = - if l != [] then ( - let@ () = Util_mutex.protect mutex in - Format.fprintf out "@["; - List.iteri - (fun i x -> - if i > 0 then Format.fprintf out "@,"; - pp out x) - l; - Format.fprintf out "@]@." - ) -end - -class stdout : OTEL.Exporter.t = - let open Opentelemetry_util in - let out = Format.std_formatter in - let mutex = Mutex.create () in - - let tick_cbs = Cb_set.create () in - object - method send_trace l = pp_vlist mutex pp_span out l - - method send_metrics l = pp_vlist mutex Proto.Metrics.pp_metric out l - - method send_logs l = pp_vlist mutex Proto.Logs.pp_log_record out l - - method tick () = Cb_set.trigger tick_cbs - - method add_on_tick_callback cb = Cb_set.register tick_cbs cb - - method cleanup ~on_done () = on_done () - end From 6c59585227b34267e30ab137bc16527c8c8ed5a4 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 10:57:32 -0500 Subject: [PATCH 35/94] feat: tracer, logger, etc are regular emitters now --- src/lib/logger.ml | 32 +++++------------------- src/lib/metrics_callbacks.ml | 2 +- src/lib/metrics_callbacks.mli | 2 +- src/lib/metrics_emitter.ml | 29 ++++++---------------- src/lib/opentelemetry.ml | 1 + src/lib/tracer.ml | 46 +++++++++++++++-------------------- 6 files changed, 36 insertions(+), 76 deletions(-) diff --git a/src/lib/logger.ml b/src/lib/logger.ml index 33b890e2..a3617562 100644 --- a/src/lib/logger.ml +++ b/src/lib/logger.ml @@ -4,34 +4,14 @@ {{:https://opentelemetry.io/docs/reference/specification/overview/#log-signal} the spec} *) -open Common_ +open Opentelemetry_emitter -(** A logger object *) -class type t = object - method is_enabled : Log_record.severity -> bool +type t = Log_record.t Emitter.t - method emit : Log_record.t list -> unit -end +let dummy () : t = Emitter.dummy () -(** Dummy logger, always disabled *) -let dummy : t = - object - method is_enabled _ = false +let enabled = Emitter.enabled - method emit _ = () - end +let emit = Emitter.emit -class simple (exp : #Exporter.t) : t = - object - method is_enabled _ = true - - method emit logs = if logs <> [] then exp#send_logs logs - end - -let emit ?service_name:_ ?attrs:_ (l : Log_record.t list) : unit = - match Exporter.Main_exporter.get () with - | None -> () - | Some e -> e#send_logs l -[@@deprecated "use an explicit Logger"] - -let k_logger : t Context.key = Context.new_key () +let of_exporter (exp : Exporter.t) : t = exp.emit_logs diff --git a/src/lib/metrics_callbacks.ml b/src/lib/metrics_callbacks.ml index 4fd78c4a..ef4e1ba9 100644 --- a/src/lib/metrics_callbacks.ml +++ b/src/lib/metrics_callbacks.ml @@ -6,7 +6,7 @@ let create () : t = { cbs = Alist.make () } let[@inline] add_metrics_cb (self : t) f = Alist.add self.cbs f -let add_to_exporter (exp : #Exporter.t) (self : t) = +let add_to_exporter (exp : Exporter.t) (self : t) = let on_tick () = (* collect all metrics *) let res = ref [] in diff --git a/src/lib/metrics_callbacks.mli b/src/lib/metrics_callbacks.mli index 040d668f..b9db7b66 100644 --- a/src/lib/metrics_callbacks.mli +++ b/src/lib/metrics_callbacks.mli @@ -16,7 +16,7 @@ val add_metrics_cb : t -> (unit -> Metrics.t list) -> unit metrics. It might be called regularly by the backend, in particular (but not only) when {!Exporter.tick} is called. *) -val add_to_exporter : #Exporter.t -> t -> unit +val add_to_exporter : Exporter.t -> t -> unit (** Make sure we export metrics at every [tick] of the exporter *) module Main_set : sig diff --git a/src/lib/metrics_emitter.ml b/src/lib/metrics_emitter.ml index 4a075f4f..2203ba86 100644 --- a/src/lib/metrics_emitter.ml +++ b/src/lib/metrics_emitter.ml @@ -1,32 +1,19 @@ -open Common_ +open Opentelemetry_emitter -class type t = object - method is_enabled : unit -> bool +type t = Metrics.t Emitter.t - method emit : Metrics.t list -> unit -end +let dummy () : t = Emitter.dummy () -class dummy : t = - object - method is_enabled () = false +let enabled = Emitter.enabled - method emit _ = () - end +let emit = Emitter.emit -class simple (exp : #Exporter.t) : t = - object - method is_enabled () = true - - method emit l = if l <> [] then exp#send_metrics l - end +let of_exporter (exp : Exporter.t) : t = exp.emit_metrics (** Emit some metrics to the collector (sync). This blocks until the backend has - pushed the metrics into some internal queue, or discarded them. - - {b NOTE} be careful not to call this inside a Gc alarm, as it can cause - deadlocks. *) + pushed the metrics into some internal queue, or discarded them. *) let emit ?attrs:_ (l : Metrics.t list) : unit = match Exporter.Main_exporter.get () with | None -> () - | Some exp -> exp#send_metrics l + | Some exp -> Exporter.send_metrics exp l [@@deprecated "use an explicit Metrics_emitter.t"] diff --git a/src/lib/opentelemetry.ml b/src/lib/opentelemetry.ml index 5e6cafb0..2e3e32d3 100644 --- a/src/lib/opentelemetry.ml +++ b/src/lib/opentelemetry.ml @@ -75,6 +75,7 @@ module Logs = Logger [@@deprecated "use Logger"] (** {2 Utils} *) +module Any_signal = Any_signal module Trace_context = Trace_context module Gc_metrics = Gc_metrics diff --git a/src/lib/tracer.ml b/src/lib/tracer.ml index 15ab1e97..4e42c7dd 100644 --- a/src/lib/tracer.ml +++ b/src/lib/tracer.ml @@ -6,44 +6,36 @@ open Common_ open Proto.Trace +open Opentelemetry_emitter type span = Span.t +type t = Span.t Emitter.t (** A tracer. https://opentelemetry.io/docs/specs/otel/trace/api/#tracer *) -class type t = object - method is_enabled : unit -> bool - - method emit : span list -> unit -end (** Dummy tracer, always disabled *) -let dummy : t = - object - method is_enabled () = false - - method emit _ = () - end - -(** A simple exporter that directly calls the exporter. *) -class simple (exp : #Exporter.t) : t = - object - method is_enabled () = true - - method emit spans = if spans <> [] then Exporter.send_trace exp spans - end +let dummy () : t = Emitter.dummy () (** A tracer that uses {!Exporter.Main_exporter} *) let simple_main_exporter : t = - object - method is_enabled () = Exporter.Main_exporter.present () - - method emit spans = + let enabled () = Exporter.Main_exporter.present () in + let closed () = not (enabled ()) in + let flush_and_close () = () in + let tick ~now:_ = + match Exporter.Main_exporter.get () with + | None -> () + | Some exp -> Exporter.tick exp + in + let emit spans = + if spans <> [] then ( match Exporter.Main_exporter.get () with | None -> () - | Some exp -> exp#send_trace spans - end + | Some exp -> Exporter.send_trace exp spans + ) + in + { Emitter.enabled; closed; emit; tick; flush_and_close } (** Directly emit to the main exporter. @@ -53,7 +45,7 @@ let (emit [@deprecated "use an explicit tracer"]) = fun ?service_name:_ ?attrs:_ (spans : span list) : unit -> match Exporter.Main_exporter.get () with | None -> () - | Some exp -> exp#send_trace spans + | Some exp -> Exporter.send_trace exp spans let (add_event [@deprecated "use Span.add_event"]) = Span.add_event @@ -107,7 +99,7 @@ let with_thunk_and_finally ?(tracer = simple_main_exporter) in Span.set_status span status)); - tracer#emit [ span ] + Emitter.emit tracer [ span ] in let thunk () = Ambient_span.with_ambient span (fun () -> cb span) in thunk, finally From baf25c07ee60d8db21e50e645795a2755f0d73a5 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 11:30:38 -0500 Subject: [PATCH 36/94] move interval_limiter to src/client --- src/{util => client}/interval_limiter.ml | 0 src/{util => client}/interval_limiter.mli | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/{util => client}/interval_limiter.ml (100%) rename src/{util => client}/interval_limiter.mli (100%) diff --git a/src/util/interval_limiter.ml b/src/client/interval_limiter.ml similarity index 100% rename from src/util/interval_limiter.ml rename to src/client/interval_limiter.ml diff --git a/src/util/interval_limiter.mli b/src/client/interval_limiter.mli similarity index 100% rename from src/util/interval_limiter.mli rename to src/client/interval_limiter.mli From ab4246d68610b752172a1628bdb496d377fdf942 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 11:30:49 -0500 Subject: [PATCH 37/94] feat client: add exporter_stdout --- src/client/exporter_stdout.ml | 62 +++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 src/client/exporter_stdout.ml diff --git a/src/client/exporter_stdout.ml b/src/client/exporter_stdout.ml new file mode 100644 index 00000000..4e8ec5c2 --- /dev/null +++ b/src/client/exporter_stdout.ml @@ -0,0 +1,62 @@ +(** A simple exporter that prints on stdout *) + +open Common_ +open Opentelemetry_util +open Opentelemetry_emitter + +open struct + let pp_span out (sp : OTEL.Span.t) = + let open OTEL in + Format.fprintf out + "@[<2>SPAN@ trace_id: %a@ span_id: %a@ name: %S@ start: %a@ end: %a@]@." + Trace_id.pp + (Trace_id.of_bytes sp.trace_id) + Span_id.pp + (Span_id.of_bytes sp.span_id) + sp.name Timestamp_ns.pp_debug sp.start_time_unix_nano + Timestamp_ns.pp_debug sp.end_time_unix_nano + + let pp_vlist mutex pp out l = + if l != [] then ( + let@ () = Util_mutex.protect mutex in + Format.fprintf out "@["; + List.iteri + (fun i x -> + if i > 0 then Format.fprintf out "@,"; + pp out x) + l; + Format.fprintf out "@]@." + ) +end + +let stdout : OTEL.Exporter.t = + let open Opentelemetry_util in + let out = Format.std_formatter in + let mutex = Mutex.create () in + + let closed = Atomic.make false in + + let mk_emitter pp_signal = + let emit l = + if Atomic.get closed then raise Emitter.Closed; + pp_vlist mutex pp_signal out l + in + let enabled () = not (Atomic.get closed) in + let tick ~now:_ = () in + let flush_and_close () = + if not (Atomic.exchange closed true) then + let@ () = Util_mutex.protect mutex in + Format.pp_print_flush out () + in + let closed () = Atomic.get closed in + + { Emitter.emit; closed; enabled; tick; flush_and_close } + in + + { + emit_spans = mk_emitter pp_span; + emit_logs = mk_emitter Proto.Logs.pp_log_record; + emit_metrics = mk_emitter Proto.Metrics.pp_metric; + on_tick = Cb_set.create (); + cleanup = (fun ~on_done () -> on_done ()); + } From 8b6c0a2a70345e5b6a15f0a0d88f97999e2b69b6 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 12:07:52 -0500 Subject: [PATCH 38/94] fix warning --- src/core/globals.ml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/core/globals.ml b/src/core/globals.ml index 0e5ccfb6..74f7433a 100644 --- a/src/core/globals.ml +++ b/src/core/globals.ml @@ -40,10 +40,8 @@ let add_global_attribute (key : string) (v : Value.t) : unit = (* add global attributes to this list *) let merge_global_attributes_ into : _ list = - let not_redundant kv = - List.for_all Key_value.(fun kv' -> kv.key <> kv'.key) into - in - List.rev_append Key_value.(List.filter not_redundant !global_attributes) into + let not_redundant kv = List.for_all (fun kv' -> kv.key <> kv'.key) into in + List.rev_append (List.filter not_redundant !global_attributes) into (** Default span kind in {!Span.create}. This will be used in all spans that do not specify [~kind] explicitly; it is set to "internal", following From 1ff56c60bf2895d1cebe8dc2d9e60f9b6cc51307 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 12:08:01 -0500 Subject: [PATCH 39/94] perf: optimize `{Trace,Span}_id.is_zero` --- src/core/span_id.ml | 5 ++++- src/core/trace_id.ml | 17 ++++++++++++++--- src/core/trace_id.mli | 4 ++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/core/span_id.ml b/src/core/span_id.ml index f7d926fc..6285fe94 100644 --- a/src/core/span_id.ml +++ b/src/core/span_id.ml @@ -11,10 +11,13 @@ let create () : t = Bytes.set b 0 (Char.unsafe_chr (Char.code (Bytes.get b 0) lor 1)); b +(* dark magic, woo. We have an [assert] below to do the bound checks once *) +external unsafe_b_get64 : bytes -> int -> int64 = "%caml_bytes_get64u" + let[@inline] is_zero (self : t) : bool = (* try to reduce branches *) assert (Bytes.length self = 8); - let n1 = Bytes.get_int64_ne self 0 in + let n1 = unsafe_b_get64 self 0 in n1 = 0L let[@inline] is_valid self = not (is_zero self) diff --git a/src/core/trace_id.ml b/src/core/trace_id.ml index 999eb3af..e82539c5 100644 --- a/src/core/trace_id.ml +++ b/src/core/trace_id.ml @@ -17,11 +17,14 @@ let[@inline] of_bytes b = else invalid_arg "trace ID must be 16 bytes in length" +(* dark magic, woo. We have an [assert] below to do the bound checks once *) +external unsafe_b_get64 : bytes -> int -> int64 = "%caml_bytes_get64u" + let[@inline] is_zero (self : t) : bool = (* try to reduce branches *) - assert (Bytes.length self = 1); - let n1 = Bytes.get_int64_ne self 0 in - let n2 = Bytes.get_int64_ne self 8 in + assert (Bytes.length self = 16); + let n1 = unsafe_b_get64 self 0 in + let n2 = unsafe_b_get64 self 8 in n1 = 0L && n2 = 0L let[@inline] is_valid self = not (is_zero self) @@ -37,4 +40,12 @@ let[@inline] of_hex_substring s off = let pp fmt t = Format.fprintf fmt "%s" (to_hex t) +let compare = Bytes.compare + +module Map = Map.Make (struct + type nonrec t = t + + let compare = compare +end) + let k_trace_id : t Hmap.key = Hmap.Key.create () diff --git a/src/core/trace_id.mli b/src/core/trace_id.mli index 487c901b..d3c45f9b 100644 --- a/src/core/trace_id.mli +++ b/src/core/trace_id.mli @@ -8,6 +8,8 @@ val create : unit -> t val dummy : t +val compare : t -> t -> int + val pp : Format.formatter -> t -> unit val is_valid : t -> bool @@ -24,6 +26,8 @@ val of_hex : string -> t val of_hex_substring : string -> int -> t +module Map : Map.S with type key = t + val k_trace_id : t Hmap.key (** Hmap key to carry around a {!Trace_id.t}, to remember what the current trace is. From ab0f3a98ba0f1e3bc7a6794f1206b09b80828e47 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 14:16:08 -0500 Subject: [PATCH 40/94] feat exporter: split tick/on_tick again --- src/core/exporter.ml | 79 +++++++------------------------------------- src/core/span.mli | 6 +++- 2 files changed, 17 insertions(+), 68 deletions(-) diff --git a/src/core/exporter.ml b/src/core/exporter.ml index c077cfe7..8bd96b7f 100644 --- a/src/core/exporter.ml +++ b/src/core/exporter.ml @@ -9,17 +9,14 @@ open Common_ open Opentelemetry_emitter -open struct - module Proto = Opentelemetry_proto -end - type t = { emit_spans: Proto.Trace.span Emitter.t; emit_metrics: Proto.Metrics.metric Emitter.t; emit_logs: Proto.Logs.log_record Emitter.t; - on_tick: Cb_set.t; - (** Set of callbacks for "on tick". Should be triggered regularly for - background processing, timeout checks, etc. *) + on_tick: (unit -> unit) -> unit; + tick: unit -> unit; + (** Call all the callbacks registered with [on_tick]. Should be triggered + regularly for background processing, timeout checks, etc. *) cleanup: on_done:(unit -> unit) -> unit -> unit; (** [cleanup ~on_done ()] is called when the exporter is shut down, and is responsible for sending remaining batches, flushing sockets, etc. @@ -30,12 +27,13 @@ type t = { (** Dummy exporter, does nothing *) let dummy () : t = - let on_tick = Cb_set.create () in + let ticker = Cb_set.create () in { emit_spans = Emitter.dummy (); emit_metrics = Emitter.dummy (); emit_logs = Emitter.dummy (); - on_tick; + on_tick = Cb_set.register ticker; + tick = (fun () -> Cb_set.trigger ticker); cleanup = (fun ~on_done () -> on_done ()); } @@ -48,72 +46,19 @@ let[@inline] send_metrics (self : t) (l : Proto.Metrics.metric list) = let[@inline] send_logs (self : t) (l : Proto.Logs.log_record list) = Emitter.emit self.emit_logs l -let on_tick (self : t) f = Cb_set.register self.on_tick f +let[@inline] on_tick (self : t) f = self.on_tick f (** Do background work. Call this regularly if the collector doesn't already have a ticker thread or internal timer. *) let tick (self : t) = - Cb_set.trigger self.on_tick; - (* also tick each emitter! *) + (* make sure emitters get the chance to check timeouts, flush, etc. *) let now = Mtime_clock.now () in Emitter.tick ~now self.emit_spans; Emitter.tick ~now self.emit_metrics; Emitter.tick ~now self.emit_logs; + + (* call the callbacks *) + self.tick (); () let[@inline] cleanup (self : t) ~on_done : unit = self.cleanup ~on_done () - -(** Main exporter, used by the main tracing functions. - - It is better to pass an explicit exporter when possible. *) -module Main_exporter = struct - (* hidden *) - open struct - (* a list of callbacks automatically added to the main exporter *) - let on_tick_cbs_ = Alist.make () - - let exporter : t option Atomic.t = Atomic.make None - end - - (** Set the global exporter *) - let set (exp : t) : unit = - List.iter (on_tick exp) (Alist.get on_tick_cbs_); - Atomic.set exporter (Some exp) - - (** Remove current exporter, if any. - @param on_done see {!t#cleanup}, @since 0.12 *) - let remove ~on_done () : unit = - match Atomic.exchange exporter None with - | None -> () - | Some exp -> - tick exp; - cleanup exp ~on_done - - (** Is there a configured exporter? *) - let present () : bool = Option.is_some (Atomic.get exporter) - - (** Current exporter, if any *) - let[@inline] get () : t option = Atomic.get exporter - - let add_on_tick_callback f = - Alist.add on_tick_cbs_ f; - Option.iter (fun exp -> on_tick exp f) (get ()) -end - -let (set_backend [@deprecated "use `Main_exporter.set`"]) = Main_exporter.set - -let (remove_backend [@deprecated "use `Main_exporter.remove`"]) = - Main_exporter.remove - -let (has_backend [@deprecated "use `Main_exporter.present`"]) = - Main_exporter.present - -let (get_backend [@deprecated "use `Main_exporter.ge"]) = Main_exporter.get - -let with_setup_debug_backend ?(on_done = ignore) (exp : t) ?(enable = true) () f - = - if enable then ( - Main_exporter.set exp; - Fun.protect ~finally:(fun () -> cleanup exp ~on_done) f - ) else - f () diff --git a/src/core/span.mli b/src/core/span.mli index 34a4e858..d4722edc 100644 --- a/src/core/span.mli +++ b/src/core/span.mli @@ -47,6 +47,8 @@ val id : t -> Span_id.t val trace_id : t -> Trace_id.t +val is_not_dummy : t -> bool + val create_new : ?kind:kind -> ?id:Span_id.t -> @@ -96,7 +98,9 @@ val add_links' : t -> (unit -> Span_link.t list) -> unit Note that this takes a function that produces links, and will only call it if there is an instrumentation backend. *) -val add_attrs : t -> (unit -> Key_value.t list) -> unit +val add_attrs : t -> Key_value.t list -> unit + +val add_attrs' : t -> (unit -> Key_value.t list) -> unit val set_status : t -> Span_status.t -> unit (** set the span status. From 0b73b2fac914ae4f00ce81c9a1c4241281a6a574 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 14:16:32 -0500 Subject: [PATCH 41/94] feat lib: dynamic forward to main; improve Main_exporter --- src/lib/logger.ml | 13 +++- src/lib/main_exporter.ml | 115 +++++++++++++++++++++++++++++++++++ src/lib/metrics_emitter.ml | 13 ++-- src/lib/opentelemetry.ml | 8 ++- src/lib/tracer.ml | 121 ++----------------------------------- 5 files changed, 145 insertions(+), 125 deletions(-) create mode 100644 src/lib/main_exporter.ml diff --git a/src/lib/logger.ml b/src/lib/logger.ml index a3617562..9c2681fd 100644 --- a/src/lib/logger.ml +++ b/src/lib/logger.ml @@ -12,6 +12,15 @@ let dummy () : t = Emitter.dummy () let enabled = Emitter.enabled -let emit = Emitter.emit - let of_exporter (exp : Exporter.t) : t = exp.emit_logs + +let emit ?attrs:_ (logs : Log_record.t list) : unit = + match Main_exporter.get () with + | None -> () + | Some exp -> Exporter.send_logs exp logs +[@@deprecated "use an explicit Logger.t"] + +(** An emitter that uses the current {!Main_exporter} *) +let dynamic_forward_to_main_exporter : t = + Main_exporter.Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> + e.emit_logs) diff --git a/src/lib/main_exporter.ml b/src/lib/main_exporter.ml new file mode 100644 index 00000000..6a5284bc --- /dev/null +++ b/src/lib/main_exporter.ml @@ -0,0 +1,115 @@ +(** Main exporter, used by the main tracing functions. + + It is better to pass an explicit exporter when possible. *) + +open Exporter + +(* hidden *) +open struct + (* a list of callbacks automatically added to the main exporter *) + let on_tick_cbs_ = Alist.make () + + let exporter : t option Atomic.t = Atomic.make None +end + +(** Remove current exporter, if any. + @param on_done see {!t#cleanup}, @since 0.12 *) +let remove ~on_done () : unit = + match Atomic.exchange exporter None with + | None -> () + | Some exp -> + tick exp; + cleanup exp ~on_done + +(** Is there a configured exporter? *) +let present () : bool = Option.is_some (Atomic.get exporter) + +(** Current exporter, if any *) +let[@inline] get () : t option = Atomic.get exporter + +let add_on_tick_callback f = + Alist.add on_tick_cbs_ f; + Option.iter (fun exp -> on_tick exp f) (get ()) + +module Util = struct + open Opentelemetry_emitter + + (** An emitter that uses the current main *) + let dynamic_forward_to_main_exporter ~get_emitter () : _ Emitter.t = + let enabled () = present () in + let closed () = not (enabled ()) in + let flush_and_close () = () in + let tick ~now:_ = + match get () with + | None -> () + | Some exp -> Exporter.tick exp + in + let emit signals = + if signals <> [] then ( + match get () with + | None -> () + | Some exp -> + let emitter = get_emitter exp in + Emitter.emit emitter signals + ) + in + { Emitter.enabled; closed; emit; tick; flush_and_close } +end + +(** This exporter uses the current "main exporter" using [get()] at every + invocation. It is useful as a fallback or to port existing applications that + expect a global singleton backend^W exporter. + @since NEXT_RELEASE *) +let dynamic_forward_to_main_exporter : Exporter.t = + let open Exporter in + let emit_logs = + Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> e.emit_logs) + in + let emit_metrics = + Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> + e.emit_metrics) + in + let emit_spans = + Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> + e.emit_spans) + in + let on_tick f = + match get () with + | None -> () + | Some exp -> Exporter.on_tick exp f + in + let tick () = + match get () with + | None -> () + | Some exp -> exp.tick () + in + let cleanup ~on_done () = on_done () in + { Exporter.emit_metrics; emit_spans; emit_logs; on_tick; tick; cleanup } + +(** Set the global exporter *) +let set (exp : t) : unit = + (* sanity check! this specific exporter would just call itself, leading to + stack overflow. *) + if exp == dynamic_forward_to_main_exporter then + failwith + "cannot set Main_exporter.dynamic_forward_to_main_exporter as main \ + exporter!"; + + List.iter (on_tick exp) (Alist.get on_tick_cbs_); + Atomic.set exporter (Some exp) + +let (set_backend [@deprecated "use `Main_exporter.set`"]) = set + +let (remove_backend [@deprecated "use `Main_exporter.remove`"]) = remove + +let (has_backend [@deprecated "use `Main_exporter.present`"]) = present + +let (get_backend [@deprecated "use `Main_exporter.get"]) = get + +let with_setup_debug_backend ?(on_done = ignore) (exp : t) ?(enable = true) () f + = + if enable then ( + set exp; + Fun.protect ~finally:(fun () -> cleanup exp ~on_done) f + ) else + f () diff --git a/src/lib/metrics_emitter.ml b/src/lib/metrics_emitter.ml index 2203ba86..9c9c530b 100644 --- a/src/lib/metrics_emitter.ml +++ b/src/lib/metrics_emitter.ml @@ -6,14 +6,17 @@ let dummy () : t = Emitter.dummy () let enabled = Emitter.enabled -let emit = Emitter.emit - let of_exporter (exp : Exporter.t) : t = exp.emit_metrics (** Emit some metrics to the collector (sync). This blocks until the backend has pushed the metrics into some internal queue, or discarded them. *) -let emit ?attrs:_ (l : Metrics.t list) : unit = - match Exporter.Main_exporter.get () with +let (emit [@deprecated "use an explicit Metrics_emitter.t"]) = + fun ?attrs:_ (l : Metrics.t list) : unit -> + match Main_exporter.get () with | None -> () | Some exp -> Exporter.send_metrics exp l -[@@deprecated "use an explicit Metrics_emitter.t"] + +(** An emitter that uses the current {!Main_exporter} *) +let dynamic_forward_to_main_exporter : t = + Main_exporter.Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> + e.emit_metrics) diff --git a/src/lib/opentelemetry.ml b/src/lib/opentelemetry.ml index 2e3e32d3..42808709 100644 --- a/src/lib/opentelemetry.ml +++ b/src/lib/opentelemetry.ml @@ -21,7 +21,13 @@ module Timestamp_ns = Timestamp_ns (** {2 Export signals to some external collector.} *) module Exporter = Exporter -module Collector = Exporter [@@deprecated "Use 'Exporter' instead"] +module Main_exporter = Main_exporter + +module Collector = struct + include Exporter + include Main_exporter +end +[@@deprecated "Use 'Exporter' instead"] (** {2 Identifiers} *) diff --git a/src/lib/tracer.ml b/src/lib/tracer.ml index 4e42c7dd..4b6009c3 100644 --- a/src/lib/tracer.ml +++ b/src/lib/tracer.ml @@ -18,120 +18,7 @@ type t = Span.t Emitter.t (** Dummy tracer, always disabled *) let dummy () : t = Emitter.dummy () -(** A tracer that uses {!Exporter.Main_exporter} *) -let simple_main_exporter : t = - let enabled () = Exporter.Main_exporter.present () in - let closed () = not (enabled ()) in - let flush_and_close () = () in - let tick ~now:_ = - match Exporter.Main_exporter.get () with - | None -> () - | Some exp -> Exporter.tick exp - in - let emit spans = - if spans <> [] then ( - match Exporter.Main_exporter.get () with - | None -> () - | Some exp -> Exporter.send_trace exp spans - ) - in - { Emitter.enabled; closed; emit; tick; flush_and_close } - -(** Directly emit to the main exporter. - - {b NOTE} be careful not to call this inside a Gc alarm, as it can cause - deadlocks. *) -let (emit [@deprecated "use an explicit tracer"]) = - fun ?service_name:_ ?attrs:_ (spans : span list) : unit -> - match Exporter.Main_exporter.get () with - | None -> () - | Some exp -> Exporter.send_trace exp spans - -let (add_event [@deprecated "use Span.add_event"]) = Span.add_event - -let (add_attrs [@deprecated "use Span.add_attrs"]) = Span.add_attrs - -let with_thunk_and_finally ?(tracer = simple_main_exporter) - ?(force_new_trace_id = false) ?trace_state - ?(attrs : (string * [< Value.t ]) list = []) ?kind ?trace_id ?parent ?links - name cb = - let parent = - match parent with - | Some _ -> parent - | None -> Ambient_span.get () - in - let trace_id = - match trace_id, parent with - | _ when force_new_trace_id -> Trace_id.create () - | Some trace_id, _ -> trace_id - | None, Some p -> Span.trace_id p - | None, None -> Trace_id.create () - in - let start_time = Timestamp_ns.now_unix_ns () in - let span_id = Span_id.create () in - - let parent_id = Option.map Span.id parent in - - let span : Span.t = - Span.make ?trace_state ?kind ?parent:parent_id ~trace_id ~id:span_id ~attrs - ?links ~start_time ~end_time:start_time name - in - (* called once we're done, to emit a span *) - let finally res = - let end_time = Timestamp_ns.now_unix_ns () in - Proto.Trace.span_set_end_time_unix_nano span end_time; - - (match Span.status span with - | Some _ -> () - | None -> - (match res with - | Ok () -> - (* By default, all spans are Unset, which means a span completed without error. - The Ok status is reserved for when you need to explicitly mark a span as successful - rather than stick with the default of Unset (i.e., “without error”). - - https://opentelemetry.io/docs/languages/go/instrumentation/#set-span-status *) - () - | Error (e, bt) -> - Span.record_exception span e bt; - let status = - make_status ~code:Status_code_error ~message:(Printexc.to_string e) () - in - Span.set_status span status)); - - Emitter.emit tracer [ span ] - in - let thunk () = Ambient_span.with_ambient span (fun () -> cb span) in - thunk, finally - -(** Sync span guard. - - Notably, this includes {e implicit} scope-tracking: if called without a - [~scope] argument (or [~parent]/[~trace_id]), it will check in the - {!Ambient_context} for a surrounding environment, and use that as the scope. - Similarly, it uses {!Scope.with_ambient_scope} to {e set} a new scope in the - ambient context, so that any logically-nested calls to {!with_} will use - this span as their parent. - - {b NOTE} be careful not to call this inside a Gc alarm, as it can cause - deadlocks. - - @param force_new_trace_id - if true (default false), the span will not use a ambient scope, the - [~scope] argument, nor [~trace_id], but will instead always create fresh - identifiers for this span *) -let with_ ?tracer ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id - ?parent ?links name (cb : Span.t -> 'a) : 'a = - let thunk, finally = - with_thunk_and_finally ?tracer ?force_new_trace_id ?trace_state ?attrs ?kind - ?trace_id ?parent ?links name cb - in - - try - let rv = thunk () in - finally (Ok ()); - rv - with e -> - let bt = Printexc.get_raw_backtrace () in - finally (Error (e, bt)); - raise e +(** A tracer that uses the current {!Main_exporter} *) +let dynamic_forward_to_main_exporter : t = + Main_exporter.Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> + e.emit_spans) From 0a69040bae6f969288cf47aaa35b0efb8db82679 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 14:17:03 -0500 Subject: [PATCH 42/94] feat trace: make it compile again, no TLS, no magic strings, pass exporter --- src/trace/common_.ml | 3 +- src/trace/conv.ml | 34 +- src/trace/dune | 5 +- src/trace/opentelemetry_trace.ml | 499 +++++++++++++++--------------- src/trace/opentelemetry_trace.mli | 69 ++--- src/trace/subscriber.ml | 176 ----------- 6 files changed, 293 insertions(+), 493 deletions(-) delete mode 100644 src/trace/subscriber.ml diff --git a/src/trace/common_.ml b/src/trace/common_.ml index 74f053cf..8c12f62a 100644 --- a/src/trace/common_.ml +++ b/src/trace/common_.ml @@ -1,6 +1,5 @@ -module Otel = Opentelemetry +module OTEL = Opentelemetry module Otrace = Trace_core (* ocaml-trace *) -module TSub = Trace_subscriber.Subscriber let ( let@ ) = ( @@ ) diff --git a/src/trace/conv.ml b/src/trace/conv.ml index eca660bb..aa54bd79 100644 --- a/src/trace/conv.ml +++ b/src/trace/conv.ml @@ -1,40 +1,40 @@ open Common_ -let[@inline] trace_id_of_otel (id : Otel.Trace_id.t) : Otrace.trace_id = - if id == Otel.Trace_id.dummy then +let[@inline] trace_id_of_otel (id : OTEL.Trace_id.t) : Otrace.trace_id = + if id == OTEL.Trace_id.dummy then Otrace.Collector.dummy_trace_id else - Bytes.unsafe_to_string (Otel.Trace_id.to_bytes id) + Bytes.unsafe_to_string (OTEL.Trace_id.to_bytes id) -let[@inline] trace_id_to_otel (id : Otrace.trace_id) : Otel.Trace_id.t = +let[@inline] trace_id_to_otel (id : Otrace.trace_id) : OTEL.Trace_id.t = if id == Otrace.Collector.dummy_trace_id then - Otel.Trace_id.dummy + OTEL.Trace_id.dummy else - Otel.Trace_id.of_bytes @@ Bytes.unsafe_of_string id + OTEL.Trace_id.of_bytes @@ Bytes.unsafe_of_string id -let[@inline] span_id_of_otel (id : Otel.Span_id.t) : Otrace.span = - if id == Otel.Span_id.dummy then +let[@inline] span_id_of_otel (id : OTEL.Span_id.t) : Otrace.span = + if id == OTEL.Span_id.dummy then Otrace.Collector.dummy_span else - Bytes.get_int64_le (Otel.Span_id.to_bytes id) 0 + Bytes.get_int64_le (OTEL.Span_id.to_bytes id) 0 -let[@inline] span_id_to_otel (id : Otrace.span) : Otel.Span_id.t = +let[@inline] span_id_to_otel (id : Otrace.span) : OTEL.Span_id.t = if id == Otrace.Collector.dummy_span then - Otel.Span_id.dummy + OTEL.Span_id.dummy else ( let b = Bytes.create 8 in Bytes.set_int64_le b 0 id; - Otel.Span_id.of_bytes b + OTEL.Span_id.of_bytes b ) -let[@inline] ctx_to_otel (self : Otrace.explicit_span_ctx) : Otel.Span_ctx.t = - Otel.Span_ctx.make +let[@inline] ctx_to_otel (self : Otrace.explicit_span_ctx) : OTEL.Span_ctx.t = + OTEL.Span_ctx.make ~trace_id:(trace_id_to_otel self.trace_id) ~parent_id:(span_id_to_otel self.span) () -let[@inline] ctx_of_otel (ctx : Otel.Span_ctx.t) : Otrace.explicit_span_ctx = +let[@inline] ctx_of_otel (ctx : OTEL.Span_ctx.t) : Otrace.explicit_span_ctx = { - trace_id = trace_id_of_otel (Otel.Span_ctx.trace_id ctx); - span = span_id_of_otel (Otel.Span_ctx.parent_id ctx); + trace_id = trace_id_of_otel (OTEL.Span_ctx.trace_id ctx); + span = span_id_of_otel (OTEL.Span_ctx.parent_id ctx); } diff --git a/src/trace/dune b/src/trace/dune index adcd12df..da752802 100644 --- a/src/trace/dune +++ b/src/trace/dune @@ -3,10 +3,11 @@ (public_name opentelemetry.trace) (synopsis "Use opentelemetry as a collector for trace") (optional) ; trace - (flags :standard -open Opentelemetry_util) + (flags :standard -open Opentelemetry_util -open Opentelemetry_atomic) (libraries opentelemetry.ambient-context opentelemetry.util - opentelemetry.core + opentelemetry.atomic + opentelemetry trace.core trace.subscriber)) diff --git a/src/trace/opentelemetry_trace.ml b/src/trace/opentelemetry_trace.ml index 4dda635f..31f87c5c 100644 --- a/src/trace/opentelemetry_trace.ml +++ b/src/trace/opentelemetry_trace.ml @@ -1,5 +1,5 @@ open Common_ -module TLS = Thread_local_storage +module Conv = Conv open Conv let on_internal_error = @@ -7,305 +7,288 @@ let on_internal_error = module Extensions = struct type Otrace.extension_event += - | Ev_link_span of Otrace.explicit_span * Otrace.explicit_span - | Ev_record_exn of Otrace.explicit_span * exn * Printexc.raw_backtrace - | Ev_set_span_kind of Otrace.explicit_span * Otel.Span_kind.t + | Ev_link_span of Otrace.explicit_span * OTEL.Span_ctx.t + | Ev_record_exn of { + sp: Otrace.explicit_span; + exn: exn; + bt: Printexc.raw_backtrace; + } + | Ev_set_span_kind of Otrace.explicit_span * OTEL.Span_kind.t end open Extensions + +(* use the fast, thread safe span table that relies on picos. *) module Span_tbl = Trace_subscriber.Span_tbl -(* TODO: subscriber -type state = { - foo: unit (* TODO: *) -} - -module Callbacks -*) - -let subscriber_of_exporter _ = assert false - -let collector_of_exporter _ = assert false - module Internal = struct - type span_begin = { - start_time: int64; - name: string; - __FILE__: string; - __LINE__: int; - __FUNCTION__: string option; - scope: Otel.Scope.t; - parent: Otel.Span_ctx.t option; - } + type span_begin = { span: OTEL.Span.t } [@@unboxed] - module Active_span_tbl = Hashtbl.Make (struct - include Int64 + module Active_span_tbl = Span_tbl - let hash : t -> int = Hashtbl.hash - end) + type state = { tbl: span_begin Active_span_tbl.t } [@@unboxed] - (** key to access a OTEL scope from an explicit span *) - let k_explicit_scope : Otel.Scope.t Otrace.Meta_map.key = + let create_state () : state = { tbl = Active_span_tbl.create () } + + (** key to access a OTEL span (the current span) from a Trace_core + explicit_span *) + let k_explicit_span : OTEL.Span.t Otrace.Meta_map.key = Otrace.Meta_map.Key.create () - (** Per-thread set of active spans. *) - module Active_spans = struct - type t = { tbl: span_begin Active_span_tbl.t } [@@unboxed] - - let create () : t = { tbl = Active_span_tbl.create 32 } - - let k_tls : t TLS.t = TLS.create () - - let[@inline] get () : t = - try TLS.get_exn k_tls - with TLS.Not_set -> - let self = create () in - TLS.set k_tls self; - self - end - - let otrace_of_otel (id : Otel.Span_id.t) : int64 = - let bs = Otel.Span_id.to_bytes id in + let otrace_of_otel (id : OTEL.Span_id.t) : int64 = + let bs = OTEL.Span_id.to_bytes id in (* lucky that it coincides! *) assert (Bytes.length bs = 8); Bytes.get_int64_le bs 0 - let enter_span' ?(explicit_parent : Otrace.explicit_span_ctx option) - ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name = - let open Otel in + let enter_span' (self : state) + ?(explicit_parent : Otrace.explicit_span_ctx option) ~__FUNCTION__ + ~__FILE__ ~__LINE__ ~data name = + let open OTEL in let otel_id = Span_id.create () in let otrace_id = otrace_of_otel otel_id in - let parent_scope = Scope.get_ambient_scope () in - let trace_id = - match parent_scope with - | Some sc -> sc.trace_id - | None -> Trace_id.create () - in - let parent = - match explicit_parent, parent_scope with + let implicit_parent = OTEL.Ambient_span.get () in + + let trace_id, parent_id = + match explicit_parent, implicit_parent with | Some p, _ -> - Some - (Otel.Span_ctx.make ~trace_id ~parent_id:(span_id_to_otel p.span) ()) - | None, Some parent -> Some (Otel.Scope.to_span_ctx parent) - | None, None -> None - in - - let new_scope = Otel.Scope.make ~trace_id ~span_id:otel_id ~attrs:data () in - - let start_time = Timestamp_ns.now_unix_ns () in - let sb = - { - start_time; - name; - __FILE__; - __LINE__; - __FUNCTION__; - scope = new_scope; - parent; - } - in - - let active_spans = Active_spans.get () in - Active_span_tbl.add active_spans.tbl otrace_id sb; - - otrace_id, sb - - let exit_span_ - { start_time; name; __FILE__; __LINE__; __FUNCTION__; scope; parent } = - let open Otel in - let end_time = Timestamp_ns.now_unix_ns () in - let kind = Scope.kind scope in - let attrs = Scope.attrs scope in - - let status : Span_status.t = - match List.assoc_opt Well_known.status_error_key attrs with - | Some (`String message) -> - Span_status.make ~message ~code:Status_code_error - | _ -> Span_status.make ~message:"" ~code:Status_code_ok + let trace_id = p.trace_id |> Conv.trace_id_to_otel in + let parent_id = + try + let sb = Active_span_tbl.find_exn self.tbl p.span in + Some (OTEL.Span.id sb.span) + with Not_found -> None + in + trace_id, parent_id + | None, Some p -> Span.trace_id p, Some (Span.id p) + | None, None -> Trace_id.create (), None in let attrs = - match __FUNCTION__ with - | None -> - [ "code.filepath", `String __FILE__; "code.lineno", `Int __LINE__ ] - @ attrs - | Some __FUNCTION__ -> - let last_dot = String.rindex __FUNCTION__ '.' in - let module_path = String.sub __FUNCTION__ 0 last_dot in - let function_name = - String.sub __FUNCTION__ (last_dot + 1) - (String.length __FUNCTION__ - last_dot - 1) - in + ("code.filepath", `String __FILE__) + :: ("code.lineno", `Int __LINE__) + :: data + in + + let start_time = Timestamp_ns.now_unix_ns () in + let span : OTEL.Span.t = + OTEL.Span.make ?parent:parent_id ~trace_id ~id:otel_id ~attrs name + ~start_time ~end_time:start_time + in + + let sb = { span } in + + (match __FUNCTION__ with + | Some __FUNCTION__ when OTEL.Span.is_not_dummy span -> + let last_dot = String.rindex __FUNCTION__ '.' in + let module_path = String.sub __FUNCTION__ 0 last_dot in + let function_name = + String.sub __FUNCTION__ (last_dot + 1) + (String.length __FUNCTION__ - last_dot - 1) + in + Span.add_attrs span [ - "code.filepath", `String __FILE__; - "code.lineno", `Int __LINE__; "code.function", `String function_name; "code.namespace", `String module_path; ] - @ attrs - in + | _ -> ()); - let parent_id = Option.map Otel.Span_ctx.parent_id parent in - Span.create ~kind ~trace_id:scope.trace_id ?parent:parent_id ~status - ~id:scope.span_id ~start_time ~end_time ~attrs - ~events:(Scope.events scope) name - |> fst + Active_span_tbl.add self.tbl otrace_id sb; - let exit_span' otrace_id otel_span_begin = - let active_spans = Active_spans.get () in - Active_span_tbl.remove active_spans.tbl otrace_id; + otrace_id, sb + + let exit_span_ { span } : OTEL.Span.t = + let open OTEL in + let end_time = Timestamp_ns.now_unix_ns () in + Proto.Trace.span_set_end_time_unix_nano span end_time; + span + + let exit_span' (self : state) otrace_id otel_span_begin = + Active_span_tbl.remove self.tbl otrace_id; exit_span_ otel_span_begin - let exit_span_from_id otrace_id = - let active_spans = Active_spans.get () in - match Active_span_tbl.find_opt active_spans.tbl otrace_id with - | None -> None - | Some otel_span_begin -> - Active_span_tbl.remove active_spans.tbl otrace_id; + (** Find the OTEL span corresponding to this Trace span *) + let exit_span_from_id (self : state) otrace_id = + match Active_span_tbl.find_exn self.tbl otrace_id with + | exception Not_found -> None + | otel_span_begin -> + Active_span_tbl.remove self.tbl otrace_id; Some (exit_span_ otel_span_begin) - let[@inline] get_scope (span : Otrace.explicit_span) : Otel.Scope.t option = - Otrace.Meta_map.find k_explicit_scope span.meta - - module M = struct - let with_span ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name cb = - let otrace_id, sb = - enter_span' ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name - in - - Otel.Scope.with_ambient_scope sb.scope @@ fun () -> - match cb otrace_id with - | res -> - let otel_span = exit_span' otrace_id sb in - Otel.Trace.emit [ otel_span ]; - res - | exception e -> - let bt = Printexc.get_raw_backtrace () in - - Otel.Scope.record_exception sb.scope e bt; - let otel_span = exit_span' otrace_id sb in - Otel.Trace.emit [ otel_span ]; - - Printexc.raise_with_backtrace e bt - - let enter_span ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name : - Trace_core.span = - let otrace_id, _sb = - enter_span' ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name - in - (* NOTE: we cannot enter ambient scope in a disjoint way - with the exit, because we only have [Ambient_context.with_binding], - no [set_binding] *) - otrace_id - - let exit_span otrace_id = - match exit_span_from_id otrace_id with - | None -> () - | Some otel_span -> Otel.Trace.emit [ otel_span ] - - let enter_manual_span ~(parent : Otrace.explicit_span_ctx option) ~flavor:_ - ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name : Otrace.explicit_span = - let otrace_id, sb = - match parent with - | None -> enter_span' ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name - | Some parent -> - enter_span' ~explicit_parent:parent ~__FUNCTION__ ~__FILE__ ~__LINE__ - ~data name - in - - let active_spans = Active_spans.get () in - Active_span_tbl.add active_spans.tbl otrace_id sb; - - Otrace. - { - span = otrace_id; - trace_id = trace_id_of_otel sb.scope.trace_id; - meta = Meta_map.(empty |> add k_explicit_scope sb.scope); - } - - let exit_manual_span Otrace.{ span = otrace_id; _ } = - let active_spans = Active_spans.get () in - match Active_span_tbl.find_opt active_spans.tbl otrace_id with - | None -> !on_internal_error (spf "no active span with ID %Ld" otrace_id) - | Some sb -> - let otel_span = exit_span' otrace_id sb in - Otel.Trace.emit [ otel_span ] - - let add_data_to_span otrace_id data = - let active_spans = Active_spans.get () in - match Active_span_tbl.find_opt active_spans.tbl otrace_id with - | None -> !on_internal_error (spf "no active span with ID %Ld" otrace_id) - | Some sb -> Otel.Scope.add_attrs sb.scope (fun () -> data) - - let add_data_to_manual_span (span : Otrace.explicit_span) data : unit = - match get_scope span with - | None -> - !on_internal_error (spf "manual span does not a contain an OTEL scope") - | Some scope -> Otel.Scope.add_attrs scope (fun () -> data) - - let message ?span ~data:_ msg : unit = - (* gather information from context *) - let old_scope = Otel.Scope.get_ambient_scope () in - let trace_id = Option.map (fun sc -> sc.Otel.Scope.trace_id) old_scope in - - let span_id = - match span with - | Some id -> Some (span_id_to_otel id) - | None -> Option.map (fun sc -> sc.Otel.Scope.span_id) old_scope - in - - let log = Otel.Logs.make_str ?trace_id ?span_id msg in - Otel.Logs.emit [ log ] - - let shutdown () = () - - let name_process _name = () - - let name_thread _name = () - - let counter_int ~data name cur_val : unit = - let _kind, attrs = otel_attrs_of_otrace_data data in - let m = Otel.Metrics.(gauge ~name [ int ~attrs cur_val ]) in - Otel.Metrics.emit [ m ] - - let counter_float ~data name cur_val : unit = - let _kind, attrs = otel_attrs_of_otrace_data data in - let m = Otel.Metrics.(gauge ~name [ float ~attrs cur_val ]) in - Otel.Metrics.emit [ m ] - - let extension_event = function - | Ev_link_span (sp1, sp2) -> - (match get_scope sp1, get_scope sp2 with - | Some sc1, Some sc2 -> - Otel.Scope.add_links sc1 (fun () -> [ Otel.Scope.to_span_link sc2 ]) - | _ -> !on_internal_error "could not find scope for OTEL span") - | Ev_set_span_kind (sp, k) -> - (match get_scope sp with - | None -> !on_internal_error "could not find scope for OTEL span" - | Some sc -> Otel.Scope.set_kind sc k) - | Ev_record_exn (sp, exn, bt) -> - (match get_scope sp with - | None -> !on_internal_error "could not find scope for OTEL span" - | Some sc -> Otel.Scope.record_exception sc exn bt) - | _ -> () - end + let[@inline] get_span_ (span : Otrace.explicit_span) : OTEL.Span.t option = + Otrace.Meta_map.find k_explicit_span span.meta end +module type COLLECTOR_ARG = sig + val exporter : OTEL.Exporter.t +end + +module Make_collector (A : COLLECTOR_ARG) = struct + open Internal + + let exporter = A.exporter + + let state = create_state () + + let with_span ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name cb = + let otrace_id, sb = + enter_span' state ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name + in + + match + let@ () = OTEL.Ambient_span.with_ambient sb.span in + cb otrace_id + with + | res -> + let otel_span = exit_span' state otrace_id sb in + OTEL.Exporter.send_trace exporter [ otel_span ]; + res + | exception e -> + let bt = Printexc.get_raw_backtrace () in + + OTEL.Span.record_exception sb.span e bt; + let otel_span = exit_span' state otrace_id sb in + OTEL.Exporter.send_trace exporter [ otel_span ]; + + Printexc.raise_with_backtrace e bt + + let enter_span ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name : Trace_core.span + = + let otrace_id, _sb = + enter_span' state ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name + in + (* NOTE: we cannot enter ambient scope in a disjoint way + with the exit, because we only have [Ambient_context.with_binding], + no [set_binding] *) + otrace_id + + let exit_span otrace_id = + match exit_span_from_id state otrace_id with + | None -> () + | Some otel_span -> OTEL.Exporter.send_trace exporter [ otel_span ] + + let enter_manual_span ~(parent : Otrace.explicit_span_ctx option) ~flavor:_ + ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name : Otrace.explicit_span = + let otrace_id, sb = + match parent with + | None -> enter_span' state ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name + | Some parent -> + enter_span' state ~explicit_parent:parent ~__FUNCTION__ ~__FILE__ + ~__LINE__ ~data name + in + + Active_span_tbl.add state.tbl otrace_id sb; + + { + Otrace.span = otrace_id; + trace_id = trace_id_of_otel (OTEL.Span.trace_id sb.span); + meta = Otrace.Meta_map.(empty |> add k_explicit_span sb.span); + } + + let exit_manual_span { Otrace.span = otrace_id; _ } = + match Active_span_tbl.find_exn state.tbl otrace_id with + | exception Not_found -> + !on_internal_error (spf "no active span with ID %Ld" otrace_id) + | sb -> + let otel_span = exit_span' state otrace_id sb in + OTEL.Exporter.send_trace exporter [ otel_span ] + + let add_data_to_span otrace_id data = + match Active_span_tbl.find_exn state.tbl otrace_id with + | exception Not_found -> + !on_internal_error (spf "no active span with ID %Ld" otrace_id) + | sb -> OTEL.Span.add_attrs sb.span data + + let add_data_to_manual_span (span : Otrace.explicit_span) data : unit = + match get_span_ span with + | None -> + !on_internal_error (spf "manual span does not a contain an OTEL scope") + | Some span -> OTEL.Span.add_attrs span data + + let message ?span ~data:_ msg : unit = + (* gather information from context *) + let old_span = OTEL.Ambient_span.get () in + let trace_id = Option.map OTEL.Span.trace_id old_span in + + let span_id = + match span with + | Some id -> Some (span_id_to_otel id) + | None -> Option.map OTEL.Span.id old_span + in + + let log = OTEL.Log_record.make_str ?trace_id ?span_id msg in + OTEL.Exporter.send_logs exporter [ log ] + + let shutdown () = () + + let name_process _name = () + + let name_thread _name = () + + let counter_int ~data:attrs name cur_val : unit = + let m = OTEL.Metrics.(gauge ~name [ int ~attrs cur_val ]) in + OTEL.Exporter.send_metrics exporter [ m ] + + let counter_float ~data:attrs name cur_val : unit = + let m = OTEL.Metrics.(gauge ~name [ float ~attrs cur_val ]) in + OTEL.Exporter.send_metrics exporter [ m ] + + let extension_event = function + | Ev_link_span (sp1, sc2) -> + (match get_span_ sp1 with + | Some sc1 -> OTEL.Span.add_links sc1 [ OTEL.Span_link.of_span_ctx sc2 ] + | _ -> !on_internal_error "could not find scope for OTEL span") + | Ev_set_span_kind (sp, k) -> + (match get_span_ sp with + | None -> !on_internal_error "could not find scope for OTEL span" + | Some sc -> OTEL.Span.set_kind sc k) + | Ev_record_exn { sp; exn; bt } -> + (match get_span_ sp with + | None -> !on_internal_error "could not find scope for OTEL span" + | Some sc -> OTEL.Span.record_exception sc exn bt) + | _ -> () +end + +let collector_of_exporter (exp : OTEL.Exporter.t) : Trace_core.collector = + let module M = Make_collector (struct + let exporter = exp + end) in + (module M : Trace_core.Collector.S) + +let link_span_to_otel_ctx (sp1 : Otrace.explicit_span) (sp2 : OTEL.Span_ctx.t) : + unit = + if Otrace.enabled () then Otrace.extension_event @@ Ev_link_span (sp1, sp2) + +(* let link_spans (sp1 : Otrace.explicit_span) (sp2 : Otrace.explicit_span) : unit = if Otrace.enabled () then Otrace.extension_event @@ Ev_link_span (sp1, sp2) + *) let set_span_kind sp k : unit = if Otrace.enabled () then Otrace.extension_event @@ Ev_set_span_kind (sp, k) let record_exception sp exn bt : unit = - if Otrace.enabled () then Otrace.extension_event @@ Ev_record_exn (sp, exn, bt) + if Otrace.enabled () then + Otrace.extension_event @@ Ev_record_exn { sp; exn; bt } -let collector () : Otrace.collector = (module Internal.M) +(** Collector that forwards to the {b currently installed} OTEL exporter. *) +let collector_main_otel_exporter () : Otrace.collector = + collector_of_exporter OTEL.Main_exporter.dynamic_forward_to_main_exporter -let setup () = Otrace.setup_collector @@ collector () +let (collector + [@deprecated "use collector_of_exporter or collector_main_otel_exporter"]) + = + collector_main_otel_exporter -let setup_with_otel_backend b : unit = - Otel.Collector.set_backend b; - setup () +let setup () = Otrace.setup_collector @@ collector_main_otel_exporter () + +let setup_with_otel_exporter exp : unit = + let coll = collector_of_exporter exp in + OTEL.Main_exporter.set exp; + Otrace.setup_collector coll + +let setup_with_otel_backend = setup_with_otel_exporter + +module Well_known = struct end diff --git a/src/trace/opentelemetry_trace.mli b/src/trace/opentelemetry_trace.mli index a8d511f6..f4045f79 100644 --- a/src/trace/opentelemetry_trace.mli +++ b/src/trace/opentelemetry_trace.mli @@ -18,39 +18,37 @@ (* ... *) ]} *) -module Otel := Opentelemetry +module OTEL := Opentelemetry_core module Otrace := Trace_core -module TLS := Thread_local_storage (** Conversions between [Opentelemetry] and [Trace_core] types *) module Conv : sig - val trace_id_of_otel : Otel.Trace_id.t -> string + val trace_id_of_otel : OTEL.Trace_id.t -> string - val trace_id_to_otel : string -> Otel.Trace_id.t + val trace_id_to_otel : string -> OTEL.Trace_id.t - val span_id_of_otel : Otel.Span_id.t -> int64 + val span_id_of_otel : OTEL.Span_id.t -> int64 - val span_id_to_otel : int64 -> Otel.Span_id.t + val span_id_to_otel : int64 -> OTEL.Span_id.t - val ctx_to_otel : Otrace.explicit_span_ctx -> Otel.Span_ctx.t + val ctx_to_otel : Otrace.explicit_span_ctx -> OTEL.Span_ctx.t - val ctx_of_otel : Otel.Span_ctx.t -> Otrace.explicit_span_ctx + val ctx_of_otel : OTEL.Span_ctx.t -> Otrace.explicit_span_ctx end (** The extension events for {!Trace_core}. *) module Extensions : sig type Otrace.extension_event += - | Ev_link_span of Otrace.explicit_span * Otrace.explicit_span_ctx + | Ev_link_span of Otrace.explicit_span * OTEL.Span_ctx.t (** Link the given span to the given context. The context isn't the parent, but the link can be used to correlate both spans. *) | Ev_record_exn of { - sp: Otrace.span; + sp: Otrace.explicit_span; exn: exn; bt: Printexc.raw_backtrace; - error: bool; (** Is this an actual internal error? *) } (** Record exception and potentially turn span to an error *) - | Ev_set_span_kind of Otrace.span * Otel.Span_kind.t + | Ev_set_span_kind of Otrace.explicit_span * OTEL.Span_kind.t end val on_internal_error : (string -> unit) ref @@ -59,25 +57,38 @@ val on_internal_error : (string -> unit) ref val setup : unit -> unit (** Install the OTEL backend as a Trace collector *) -val setup_with_otel_exporter : #Opentelemetry.Exporter.t -> unit +val setup_with_otel_exporter : OTEL.Exporter.t -> unit (** Same as {!setup}, but using the given exporter *) -val setup_with_otel_backend : #Opentelemetry.Exporter.t -> unit +val setup_with_otel_backend : OTEL.Exporter.t -> unit [@@deprecated "use setup_with_otel_exporter"] -val subscriber_of_exporter : #Otel.Exporter.t -> Trace_subscriber.t +(* TODO: subscriber, with the next gen of Trace_subscriber + that allows us to provide [new_trace_id] so we can produce 16B trace IDs. +val subscriber_of_exporter : OTEL.Exporter.t -> Trace_subscriber.t +*) -val collector_of_exporter : #Otel.Exporter.t -> Trace_core.collector +val collector_of_exporter : OTEL.Exporter.t -> Trace_core.collector val collector : unit -> Trace_core.collector [@@deprecated "use collector_of_exporter, avoid global state"] (** Make a Trace collector that uses the OTEL backend to send spans and logs *) +(* NOTE: we cannot be sure that [sc2] is still alive and findable + in the active spans table. We could provide this operation under + the explicit precondition that it is? + val link_spans : Otrace.explicit_span -> Otrace.explicit_span -> unit (** [link_spans sp1 sp2] modifies [sp1] by adding a span link to [sp2]. @since 0.11 *) +*) -val set_span_kind : Otrace.explicit_span -> Otel.Span.kind -> unit +val link_span_to_otel_ctx : Otrace.explicit_span -> OTEL.Span_ctx.t -> unit +(** [link_spans sp1 sp_ctx2] modifies [sp1] by adding a span link to [sp_ctx2]. + It must be the case that [sp1] is a currently active span. + @since NEXT_RELEASE *) + +val set_span_kind : Otrace.explicit_span -> OTEL.Span.kind -> unit (** [set_span_kind sp k] sets the span's kind. @since 0.11 *) @@ -86,24 +97,6 @@ val record_exception : (** Record exception in the current span. @since 0.11 *) -(** Static references for well-known identifiers; see {!label-wellknown}. *) -module Well_known : sig - val spankind_key : string - - val internal : Otrace.user_data - - val server : Otrace.user_data - - val client : Otrace.user_data - - val producer : Otrace.user_data - - val consumer : Otrace.user_data - - val spankind_of_string : string -> Otel.Span.kind - - val otel_attrs_of_otrace_data : - (string * Otrace.user_data) list -> - Otel.Span.kind * Otel.Span.key_value list -end -[@@deprecated "use the regular functions for this"] +module Well_known : sig end +[@@deprecated + "use the regular functions such as `link_spans` or `set_span_kind` for this"] diff --git a/src/trace/subscriber.ml b/src/trace/subscriber.ml deleted file mode 100644 index 9b6e2c15..00000000 --- a/src/trace/subscriber.ml +++ /dev/null @@ -1,176 +0,0 @@ -open Common_ -open Trace_core -module Span_tbl = Trace_subscriber.Span_tbl - -module Buf_pool = struct - type t = Buffer.t Rpool.t - - let create ?(max_size = 32) ?(buf_size = 256) () : t = - Rpool.create ~max_size ~clear:Buffer.reset - ~create:(fun () -> Buffer.create buf_size) - () -end - -open struct - let[@inline] time_us_of_time_ns (t : int64) : float = - Int64.div t 1_000L |> Int64.to_float - - let[@inline] int64_of_trace_id_ (id : Trace_core.trace_id) : int64 = - if id == Trace_core.Collector.dummy_trace_id then - 0L - else - Bytes.get_int64_le (Bytes.unsafe_of_string id) 0 -end - -let on_tracing_error = ref (fun s -> Printf.eprintf "%s\n%!" s) - -type span_info = { - tid: int; - name: string; - start_us: float; - mutable data: (string * Sub.user_data) list; - (* NOTE: thread safety: this is supposed to only be modified by the thread -that's running this (synchronous, stack-abiding) span. *) -} -(** Information we store about a span begin event, to emit a complete event when - we meet the corresponding span end event *) - -type t = { - active: bool A.t; - pid: int; - spans: span_info Span_tbl.t; - buf_pool: Buf_pool.t; - exporter: Exporter.t; -} -(** Subscriber state *) - -open struct - let print_non_closed_spans_warning spans = - let module Str_set = Set.Make (String) in - let spans = Span_tbl.to_list spans in - if spans <> [] then ( - !on_tracing_error - @@ Printf.sprintf "trace-tef: warning: %d spans were not closed" - (List.length spans); - let names = - List.fold_left - (fun set (_, span) -> Str_set.add span.name set) - Str_set.empty spans - in - Str_set.iter - (fun name -> - !on_tracing_error @@ Printf.sprintf " span %S was not closed" name) - names; - flush stderr - ) -end - -let close (self : t) : unit = - if A.exchange self.active false then ( - print_non_closed_spans_warning self.spans; - self.exporter.close () - ) - -let[@inline] active self = A.get self.active - -let[@inline] flush (self : t) : unit = self.exporter.flush () - -let create ?(buf_pool = Buf_pool.create ()) ~pid ~exporter () : t = - { active = A.make true; exporter; buf_pool; pid; spans = Span_tbl.create () } - -module Callbacks = struct - type st = t - - let on_init _ ~time_ns:_ = () - - let on_shutdown (self : st) ~time_ns:_ = close self - - let on_name_process (self : st) ~time_ns:_ ~tid:_ ~name : unit = - let@ buf = Rpool.with_ self.buf_pool in - Writer.emit_name_process ~pid:self.pid ~name buf; - self.exporter.on_json buf - - let on_name_thread (self : st) ~time_ns:_ ~tid ~name : unit = - let@ buf = Rpool.with_ self.buf_pool in - Writer.emit_name_thread buf ~pid:self.pid ~tid ~name; - self.exporter.on_json buf - - (* add function name, if provided, to the metadata *) - let add_fun_name_ fun_name data : _ list = - match fun_name with - | None -> data - | Some f -> ("function", Sub.U_string f) :: data - - let[@inline] on_enter_span (self : st) ~__FUNCTION__:fun_name ~__FILE__:_ - ~__LINE__:_ ~time_ns ~tid ~data ~name span : unit = - let time_us = time_us_of_time_ns @@ time_ns in - let data = add_fun_name_ fun_name data in - let info = { tid; name; start_us = time_us; data } in - (* save the span so we find it at exit *) - Span_tbl.add self.spans span info - - let on_exit_span (self : st) ~time_ns ~tid:_ span : unit = - let time_us = time_us_of_time_ns @@ time_ns in - - match Span_tbl.find_exn self.spans span with - | exception Not_found -> - !on_tracing_error - (Printf.sprintf "trace-tef: error: cannot find span %Ld" span) - | { tid; name; start_us; data } -> - Span_tbl.remove self.spans span; - let@ buf = Rpool.with_ self.buf_pool in - Writer.emit_duration_event buf ~pid:self.pid ~tid ~name ~start:start_us - ~end_:time_us ~args:data; - - self.exporter.on_json buf - - let on_add_data (self : st) ~data span = - if data <> [] then ( - try - let info = Span_tbl.find_exn self.spans span in - info.data <- List.rev_append data info.data - with Not_found -> - !on_tracing_error - (Printf.sprintf "trace-tef: error: cannot find span %Ld" span) - ) - - let on_message (self : st) ~time_ns ~tid ~span:_ ~data msg : unit = - let time_us = time_us_of_time_ns @@ time_ns in - let@ buf = Rpool.with_ self.buf_pool in - Writer.emit_instant_event buf ~pid:self.pid ~tid ~name:msg ~ts:time_us - ~args:data; - self.exporter.on_json buf - - let on_counter (self : st) ~time_ns ~tid ~data:_ ~name n : unit = - let time_us = time_us_of_time_ns @@ time_ns in - let@ buf = Rpool.with_ self.buf_pool in - Writer.emit_counter buf ~pid:self.pid ~name ~tid ~ts:time_us n; - self.exporter.on_json buf - - let on_enter_manual_span (self : st) ~__FUNCTION__:fun_name ~__FILE__:_ - ~__LINE__:_ ~time_ns ~tid ~parent:_ ~data ~name ~flavor ~trace_id _span : - unit = - let time_us = time_us_of_time_ns @@ time_ns in - - let data = add_fun_name_ fun_name data in - let@ buf = Rpool.with_ self.buf_pool in - Writer.emit_manual_begin buf ~pid:self.pid ~tid ~name - ~id:(int64_of_trace_id_ trace_id) - ~ts:time_us ~args:data ~flavor; - self.exporter.on_json buf - - let on_exit_manual_span (self : st) ~time_ns ~tid ~name ~data ~flavor - ~trace_id (_ : span) : unit = - let time_us = time_us_of_time_ns @@ time_ns in - - let@ buf = Rpool.with_ self.buf_pool in - Writer.emit_manual_end buf ~pid:self.pid ~tid ~name - ~id:(int64_of_trace_id_ trace_id) - ~ts:time_us ~flavor ~args:data; - self.exporter.on_json buf - - let on_extension_event _ ~time_ns:_ ~tid:_ _ev = () -end - -let subscriber (self : t) : Sub.t = - Sub.Subscriber.Sub { st = self; callbacks = (module Callbacks) } From 8d9f21da2c9303e18a6ebb921e5160442a8a9b10 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 14:25:27 -0500 Subject: [PATCH 43/94] migrate a few more things to lib/ from core/ --- src/core/span.ml | 16 ++++++++++++---- src/core/span.mli | 7 +++++++ src/{core => lib}/conventions.ml | 0 src/{core => lib}/gc_metrics.ml | 2 +- src/{core => lib}/gc_metrics.mli | 0 src/{core => lib}/globals.ml | 7 +------ src/{client => lib}/interval_limiter.ml | 0 src/{client => lib}/interval_limiter.mli | 0 src/lib/opentelemetry.ml | 4 ++++ 9 files changed, 25 insertions(+), 11 deletions(-) rename src/{core => lib}/conventions.ml (100%) rename src/{core => lib}/gc_metrics.ml (97%) rename src/{core => lib}/gc_metrics.mli (100%) rename src/{core => lib}/globals.ml (89%) rename src/{client => lib}/interval_limiter.ml (100%) rename src/{client => lib}/interval_limiter.mli (100%) diff --git a/src/core/span.ml b/src/core/span.ml index c4c0c8cd..7d2c3920 100644 --- a/src/core/span.ml +++ b/src/core/span.ml @@ -28,9 +28,10 @@ let[@inline] trace_id self = Trace_id.of_bytes self.trace_id let[@inline] is_not_dummy self = Span_id.is_valid (id self) -let make ?(kind = !Globals.default_span_kind) ?trace_state ?(attrs = []) - ?(events = []) ?status ~trace_id ~id ?parent ?(links = []) ~start_time - ~end_time name : t = +let default_kind = ref Proto.Trace.Span_kind_unspecified + +let make ?(kind = !default_kind) ?trace_state ?(attrs = []) ?(events = []) + ?status ~trace_id ~id ?parent ?(links = []) ~start_time ~end_time name : t = let trace_id = Trace_id.to_bytes trace_id in let parent_span_id = Option.map Span_id.to_bytes parent in let attributes = List.map Key_value.conv attrs in @@ -99,7 +100,14 @@ let record_exception (self : t) (exn : exn) (bt : Printexc.raw_backtrace) : unit add_event self ev ) -let[@inline] add_attrs (self : t) (attrs : unit -> Key_value.t list) : unit = +let add_attrs (self : t) (attrs : Key_value.t list) : unit = + if is_not_dummy self then ( + let attrs = List.rev_map Key_value.conv attrs in + let attrs = List.rev_append attrs self.attributes in + span_set_attributes self attrs + ) + +let add_attrs' (self : t) (attrs : unit -> Key_value.t list) : unit = if is_not_dummy self then ( let attrs = List.rev_map Key_value.conv (attrs ()) in let attrs = List.rev_append attrs self.attributes in diff --git a/src/core/span.mli b/src/core/span.mli index d4722edc..8d5da0e5 100644 --- a/src/core/span.mli +++ b/src/core/span.mli @@ -112,4 +112,11 @@ val set_kind : t -> Span_kind.t -> unit (** Set the span's kind. @since 0.11 *) +val default_kind : Span_kind.t ref +(** Default span kind in {!make} and {!create_new}. + + This will be used in all spans that do not specify [~kind] explicitly; it is + set to "internal", following directions from the [.proto] file. It can be + convenient to set "client" or "server" uniformly in here. *) + val k_context : t Context.key diff --git a/src/core/conventions.ml b/src/lib/conventions.ml similarity index 100% rename from src/core/conventions.ml rename to src/lib/conventions.ml diff --git a/src/core/gc_metrics.ml b/src/lib/gc_metrics.ml similarity index 97% rename from src/core/gc_metrics.ml rename to src/lib/gc_metrics.ml index e2ade0dd..dfc3e41d 100644 --- a/src/core/gc_metrics.ml +++ b/src/lib/gc_metrics.ml @@ -51,7 +51,7 @@ let setup ?(min_interval_s = default_interval_s) (exp : Exporter.t) = Exporter.on_tick exp on_tick let setup_on_main_exporter ?min_interval_s () = - match Exporter.Main_exporter.get () with + match Main_exporter.get () with | None -> () | Some exp -> setup ?min_interval_s exp diff --git a/src/core/gc_metrics.mli b/src/lib/gc_metrics.mli similarity index 100% rename from src/core/gc_metrics.mli rename to src/lib/gc_metrics.mli diff --git a/src/core/globals.ml b/src/lib/globals.ml similarity index 89% rename from src/core/globals.ml rename to src/lib/globals.ml index 74f7433a..c4d3c55b 100644 --- a/src/core/globals.ml +++ b/src/lib/globals.ml @@ -43,12 +43,7 @@ let merge_global_attributes_ into : _ list = let not_redundant kv = List.for_all (fun kv' -> kv.key <> kv'.key) into in List.rev_append (List.filter not_redundant !global_attributes) into -(** Default span kind in {!Span.create}. This will be used in all spans that do - not specify [~kind] explicitly; it is set to "internal", following - directions from the [.proto] file. It can be convenient to set "client" or - "server" uniformly in here. - @since 0.4 *) -let default_span_kind = ref Proto.Trace.Span_kind_internal +let default_span_kind = Span.default_kind open struct let runtime_attributes = diff --git a/src/client/interval_limiter.ml b/src/lib/interval_limiter.ml similarity index 100% rename from src/client/interval_limiter.ml rename to src/lib/interval_limiter.ml diff --git a/src/client/interval_limiter.mli b/src/lib/interval_limiter.mli similarity index 100% rename from src/client/interval_limiter.mli rename to src/lib/interval_limiter.mli diff --git a/src/lib/opentelemetry.ml b/src/lib/opentelemetry.ml index 42808709..5316afc2 100644 --- a/src/lib/opentelemetry.ml +++ b/src/lib/opentelemetry.ml @@ -7,6 +7,10 @@ module Alist = Alist (** Atomic list, for internal usage @since 0.7 *) +module Interval_limiter = Interval_limiter +(** Utility to limit the frequency of some event + @since NEXT_RELEASE *) + (** {2 Wire format} *) module Proto = Opentelemetry_proto From 281e8b3ea717cb08ed8289a4007b37b4376e39a7 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 14:33:39 -0500 Subject: [PATCH 44/94] feat lib/tracer: restore `with_` and `with_thunk_and_finally` !! --- src/lib/tracer.ml | 88 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/src/lib/tracer.ml b/src/lib/tracer.ml index 4b6009c3..4e507943 100644 --- a/src/lib/tracer.ml +++ b/src/lib/tracer.ml @@ -22,3 +22,91 @@ let dummy () : t = Emitter.dummy () let dynamic_forward_to_main_exporter : t = Main_exporter.Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> e.emit_spans) + +let (add_event [@deprecated "use Span.add_event"]) = Span.add_event + +let (add_attrs [@deprecated "use Span.add_attrs"]) = Span.add_attrs + +let with_thunk_and_finally (self : t) ?(force_new_trace_id = false) ?trace_state + ?(attrs : (string * [< Value.t ]) list = []) ?kind ?trace_id ?parent ?links + name cb = + let parent = + match parent with + | Some _ -> parent + | None -> Ambient_span.get () + in + let trace_id = + match trace_id, parent with + | _ when force_new_trace_id -> Trace_id.create () + | Some trace_id, _ -> trace_id + | None, Some p -> Span.trace_id p + | None, None -> Trace_id.create () + in + let start_time = Timestamp_ns.now_unix_ns () in + let span_id = Span_id.create () in + + let parent_id = Option.map Span.id parent in + + let span : Span.t = + Span.make ?trace_state ?kind ?parent:parent_id ~trace_id ~id:span_id ~attrs + ?links ~start_time ~end_time:start_time name + in + (* called once we're done, to emit a span *) + let finally res = + let end_time = Timestamp_ns.now_unix_ns () in + Proto.Trace.span_set_end_time_unix_nano span end_time; + + (match Span.status span with + | Some _ -> () + | None -> + (match res with + | Ok () -> + (* By default, all spans are Unset, which means a span completed without error. + The Ok status is reserved for when you need to explicitly mark a span as successful + rather than stick with the default of Unset (i.e., “without error”). + + https://opentelemetry.io/docs/languages/go/instrumentation/#set-span-status *) + () + | Error (e, bt) -> + Span.record_exception span e bt; + let status = + make_status ~code:Status_code_error ~message:(Printexc.to_string e) () + in + Span.set_status span status)); + + Emitter.emit self [ span ] + in + let thunk () = Ambient_span.with_ambient span (fun () -> cb span) in + thunk, finally + +(** Sync span guard. + + Notably, this includes {e implicit} scope-tracking: if called without a + [~scope] argument (or [~parent]/[~trace_id]), it will check in the + {!Ambient_context} for a surrounding environment, and use that as the scope. + Similarly, it uses {!Scope.with_ambient_scope} to {e set} a new scope in the + ambient context, so that any logically-nested calls to {!with_} will use + this span as their parent. + + {b NOTE} be careful not to call this inside a Gc alarm, as it can cause + deadlocks. + + @param force_new_trace_id + if true (default false), the span will not use a ambient scope, the + [~scope] argument, nor [~trace_id], but will instead always create fresh + identifiers for this span *) +let with_ (self : t) ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id + ?parent ?links name (cb : Span.t -> 'a) : 'a = + let thunk, finally = + with_thunk_and_finally self ?force_new_trace_id ?trace_state ?attrs ?kind + ?trace_id ?parent ?links name cb + in + + try + let rv = thunk () in + finally (Ok ()); + rv + with e -> + let bt = Printexc.get_raw_backtrace () in + finally (Error (e, bt)); + raise e From 76a54ac6e1db96b33c770d84ba7b5d173dc83b3d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 14:37:20 -0500 Subject: [PATCH 45/94] fix client, allow to set the self-tracing tracer. --- src/client/debug_exporter.ml | 1 + src/client/dune | 2 ++ src/client/exporter_stdout.ml | 5 ++++- src/client/self_trace.ml | 12 ++++++++---- src/client/self_trace.mli | 7 +++++++ 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/client/debug_exporter.ml b/src/client/debug_exporter.ml index 291067bd..295becac 100644 --- a/src/client/debug_exporter.ml +++ b/src/client/debug_exporter.ml @@ -20,6 +20,7 @@ let debug ?(out = Format.err_formatter) (exp : OTEL.Exporter.t) : (fun m -> Format.fprintf out "METRIC: %a@." Metrics.pp_metric m) exp.emit_metrics; on_tick = exp.on_tick; + tick = exp.tick; cleanup = (fun ~on_done () -> Format.fprintf out "CLEANUP@."; diff --git a/src/client/dune b/src/client/dune index 48e836d7..037f4d6b 100644 --- a/src/client/dune +++ b/src/client/dune @@ -1,8 +1,10 @@ (library (name opentelemetry_client) (public_name opentelemetry.client) + (flags :standard -open Opentelemetry_util) (libraries opentelemetry + opentelemetry.util opentelemetry.emitter opentelemetry.proto pbrt diff --git a/src/client/exporter_stdout.ml b/src/client/exporter_stdout.ml index 4e8ec5c2..76dbf760 100644 --- a/src/client/exporter_stdout.ml +++ b/src/client/exporter_stdout.ml @@ -33,8 +33,10 @@ let stdout : OTEL.Exporter.t = let open Opentelemetry_util in let out = Format.std_formatter in let mutex = Mutex.create () in + let ticker = Cb_set.create () in let closed = Atomic.make false in + let tick () = Cb_set.trigger ticker in let mk_emitter pp_signal = let emit l = @@ -57,6 +59,7 @@ let stdout : OTEL.Exporter.t = emit_spans = mk_emitter pp_span; emit_logs = mk_emitter Proto.Logs.pp_log_record; emit_metrics = mk_emitter Proto.Metrics.pp_metric; - on_tick = Cb_set.create (); + on_tick = Cb_set.register ticker; + tick; cleanup = (fun ~on_done () -> on_done ()); } diff --git a/src/client/self_trace.ml b/src/client/self_trace.ml index 23dd258b..cf5adcb5 100644 --- a/src/client/self_trace.ml +++ b/src/client/self_trace.ml @@ -2,17 +2,21 @@ open Common_ let enabled = Atomic.make false +let tracer = Atomic.make OTEL.Tracer.dynamic_forward_to_main_exporter + let[@inline] add_event (scope : OTEL.Span.t) ev = OTEL.Span.add_event scope ev +let set_tracer tr = Atomic.set tracer tr + let dummy_trace_id_ = OTEL.Trace_id.dummy let dummy_span_id = OTEL.Span_id.dummy -(* FIXME: get an explicit tracer instead *) let with_ ?kind ?attrs name f = - if Atomic.get enabled then - OTEL.Tracer.with_ ?kind ?attrs name f - else ( + if Atomic.get enabled then ( + let tracer = Atomic.get tracer in + OTEL.Tracer.with_ tracer ?kind ?attrs name f + ) else ( (* A new scope is needed here because it might be modified *) let span : OTEL.Span.t = OTEL.Span.make ~trace_id:dummy_trace_id_ ~id:dummy_span_id ~start_time:0L diff --git a/src/client/self_trace.mli b/src/client/self_trace.mli index b6b371ef..07cf9369 100644 --- a/src/client/self_trace.mli +++ b/src/client/self_trace.mli @@ -10,5 +10,12 @@ val with_ : string -> (OTEL.Span.t -> 'a) -> 'a +(** A simple way to create spans to instrument parts of the OTEL SDK itself. *) + +val set_tracer : OTEL.Tracer.t -> unit +(** Set the tracer to use for self-tracing. We need to make sure it will not + lead to infinite loops (if the tracer itself is self-tracing, it might + invoke itself recursively, and so on). *) val set_enabled : bool -> unit +(** Enable self tracing. A tracer must also be set. *) From b9a05737d702d41b71a8fc2790365ade0cbd6cdf Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 14:41:17 -0500 Subject: [PATCH 46/94] feat lib: easily access the main tracer, logger, etc --- src/core/exporter.ml | 6 +++--- src/emitter/emitter.ml | 7 +++---- src/lib/logger.ml | 7 ++++++- src/lib/metrics_emitter.ml | 7 ++++++- src/lib/tracer.ml | 8 +++++++- 5 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/core/exporter.ml b/src/core/exporter.ml index 8bd96b7f..621ea5b9 100644 --- a/src/core/exporter.ml +++ b/src/core/exporter.ml @@ -29,9 +29,9 @@ type t = { let dummy () : t = let ticker = Cb_set.create () in { - emit_spans = Emitter.dummy (); - emit_metrics = Emitter.dummy (); - emit_logs = Emitter.dummy (); + emit_spans = Emitter.dummy; + emit_metrics = Emitter.dummy; + emit_logs = Emitter.dummy; on_tick = Cb_set.register ticker; tick = (fun () -> Cb_set.trigger ticker); cleanup = (fun ~on_done () -> on_done ()); diff --git a/src/emitter/emitter.ml b/src/emitter/emitter.ml index d3aa0e5e..fb2f9285 100644 --- a/src/emitter/emitter.ml +++ b/src/emitter/emitter.ml @@ -46,12 +46,11 @@ let tap (f : 'a -> unit) (self : 'a t) : 'a t = in { self with emit } -let dummy () : _ t = - let closed = Atomic.make false in +let dummy : _ t = { enabled = (fun () -> false); emit = ignore; tick = (fun ~now:_ -> ()); - closed = (fun () -> Atomic.get closed); - flush_and_close = (fun () -> Atomic.set closed true); + closed = (fun () -> true); + flush_and_close = ignore; } diff --git a/src/lib/logger.ml b/src/lib/logger.ml index 9c2681fd..88d70ce0 100644 --- a/src/lib/logger.ml +++ b/src/lib/logger.ml @@ -8,12 +8,17 @@ open Opentelemetry_emitter type t = Log_record.t Emitter.t -let dummy () : t = Emitter.dummy () +let dummy : t = Emitter.dummy let enabled = Emitter.enabled let of_exporter (exp : Exporter.t) : t = exp.emit_logs +let get_main () : t = + match Main_exporter.get () with + | None -> dummy + | Some e -> e.emit_logs + let emit ?attrs:_ (logs : Log_record.t list) : unit = match Main_exporter.get () with | None -> () diff --git a/src/lib/metrics_emitter.ml b/src/lib/metrics_emitter.ml index 9c9c530b..f26fc224 100644 --- a/src/lib/metrics_emitter.ml +++ b/src/lib/metrics_emitter.ml @@ -2,7 +2,7 @@ open Opentelemetry_emitter type t = Metrics.t Emitter.t -let dummy () : t = Emitter.dummy () +let dummy : t = Emitter.dummy let enabled = Emitter.enabled @@ -16,6 +16,11 @@ let (emit [@deprecated "use an explicit Metrics_emitter.t"]) = | None -> () | Some exp -> Exporter.send_metrics exp l +let get_main () : t = + match Main_exporter.get () with + | None -> dummy + | Some e -> e.emit_metrics + (** An emitter that uses the current {!Main_exporter} *) let dynamic_forward_to_main_exporter : t = Main_exporter.Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> diff --git a/src/lib/tracer.ml b/src/lib/tracer.ml index 4e507943..1abfaade 100644 --- a/src/lib/tracer.ml +++ b/src/lib/tracer.ml @@ -16,13 +16,19 @@ type t = Span.t Emitter.t https://opentelemetry.io/docs/specs/otel/trace/api/#tracer *) (** Dummy tracer, always disabled *) -let dummy () : t = Emitter.dummy () +let dummy : t = Emitter.dummy (** A tracer that uses the current {!Main_exporter} *) let dynamic_forward_to_main_exporter : t = Main_exporter.Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> e.emit_spans) +(** Get tracer using the main exporter in {!Main_exporter} *) +let get_main () : t = + match Main_exporter.get () with + | None -> dummy + | Some e -> e.emit_spans + let (add_event [@deprecated "use Span.add_event"]) = Span.add_event let (add_attrs [@deprecated "use Span.add_attrs"]) = Span.add_attrs From 1ee89d7d9b391807f09bb5fb29f24ea7d32a2dea Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 15:30:14 -0500 Subject: [PATCH 47/94] fix integrations --- .../cohttp/opentelemetry_cohttp_lwt.ml | 66 +++++++++---------- src/integrations/logs/opentelemetry_logs.ml | 16 ++--- src/integrations/logs/opentelemetry_logs.mli | 19 ++---- 3 files changed, 47 insertions(+), 54 deletions(-) diff --git a/src/integrations/cohttp/opentelemetry_cohttp_lwt.ml b/src/integrations/cohttp/opentelemetry_cohttp_lwt.ml index 2ac0ed3d..13d21438 100644 --- a/src/integrations/cohttp/opentelemetry_cohttp_lwt.ml +++ b/src/integrations/cohttp/opentelemetry_cohttp_lwt.ml @@ -2,9 +2,16 @@ module Otel = Opentelemetry module Otel_lwt = Opentelemetry_lwt open Cohttp +open struct + let attrs_of_response (res : Response.t) = + let code = Response.status res in + let code = Code.code_of_status code in + [ "http.status_code", `Int code ] +end + module Server : sig val trace : - ?service_name:string -> + ?tracer:Otel.Tracer.t -> ?attrs:Otel.Span.key_value list -> ('conn -> Request.t -> 'body -> (Response.t * 'body) Lwt.t) -> 'conn -> @@ -27,8 +34,8 @@ module Server : sig ]} *) val with_ : + ?tracer:Otel.Tracer.t -> ?trace_state:string -> - ?service_name:string -> ?attrs:Otel.Span.key_value list -> ?kind:Otel.Span.kind -> ?links:Otel.Span_link.t list -> @@ -76,11 +83,6 @@ end = struct | Some r -> [ "http.request.header.referer", `String r ]); ] - let attrs_of_response (res : Response.t) = - let code = Response.status res in - let code = Code.code_of_status code in - [ "http.status_code", `Int code ] - let header_x_ocaml_otel_traceparent = "x-ocaml-otel-traceparent" let set_trace_context (span : Otel.Span.t) req = @@ -115,30 +117,33 @@ end = struct in { req with headers } - let trace ?service_name ?(attrs = []) callback conn req body = - let scope = get_trace_context ~from:`External req in - Otel_lwt.Tracer.with_ "request" ~kind:Span_kind_server + let trace ?(tracer = Otel.Tracer.get_main ()) ?(attrs = []) callback conn req + body = + let parent = get_trace_context ~from:`External req in + Otel_lwt.Tracer.with_ tracer "request" ~kind:Span_kind_server ?trace_id:(Option.map Otel.Span.trace_id parent) - ?parent:(Option.map Otel.Span.id parent) + ?parent ~attrs:(attrs @ attrs_of_request req) - (fun scope -> + (fun span -> let open Lwt.Syntax in - let req = set_trace_context scope req in + let req = set_trace_context span req in let* res, body = callback conn req body in - Otel.Span.add_attrs scope (fun () -> attrs_of_response res); + Otel.Span.add_attrs span (attrs_of_response res); Lwt.return (res, body)) - let with_ ?trace_state ?attrs ?(kind = Otel.Span.Span_kind_internal) ?links - name req (f : Request.t -> 'a Lwt.t) = + let with_ ?(tracer = Otel.Tracer.get_main ()) ?trace_state ?attrs + ?(kind = Otel.Span.Span_kind_internal) ?links name req + (f : Request.t -> 'a Lwt.t) = let span = get_trace_context ~from:`Internal req in - Otel_lwt.Trace.with_ ?trace_state ?attrs ~kind + Otel_lwt.Tracer.with_ tracer ?trace_state ?attrs ~kind ?trace_id:(Option.map Otel.Span.trace_id span) ?parent:span ?links name (fun span -> - let req = set_trace_context span in + let req = set_trace_context span req in f req) end -let client ?(span : Otel.Span.t option) (module C : Cohttp_lwt.S.Client) = +let client ?(tracer = Otel.Tracer.get_main ()) ?(span : Otel.Span.t option) + (module C : Cohttp_lwt.S.Client) = let module Traced = struct open Lwt.Syntax @@ -168,9 +173,8 @@ let client ?(span : Otel.Span.t option) (module C : Cohttp_lwt.S.Client) = let context_for ~uri ~meth = let trace_id = Option.map Otel.Span.trace_id span in - let parent = Option.map Otel.Span.id span in let attrs = attrs_for ~uri ~meth () in - trace_id, parent, attrs + trace_id, span, attrs let add_traceparent (span : Otel.Span.t) headers = let module Traceparent = Otel.Trace_context.Traceparent in @@ -186,14 +190,11 @@ let client ?(span : Otel.Span.t option) (module C : Cohttp_lwt.S.Client) = let call ?ctx ?headers ?body ?chunked meth (uri : Uri.t) : (Response.t * Cohttp_lwt.Body.t) Lwt.t = let trace_id, parent, attrs = context_for ~uri ~meth in - Otel_lwt.Trace.with_ "request" ~kind:Span_kind_client ?trace_id ?parent - ~attrs (fun span -> + Otel_lwt.Tracer.with_ tracer "request" ~kind:Span_kind_client ?trace_id + ?parent ~attrs (fun span -> let headers = add_traceparent span headers in let* res, body = C.call ?ctx ~headers ?body ?chunked meth uri in - Otel.Span.add_attrs span (fun () -> - let code = Response.status res in - let code = Code.code_of_status code in - [ "http.status_code", `Int code ]); + Otel.Span.add_attrs span (attrs_of_response res); Lwt.return (res, body)) let head ?ctx ?headers uri = @@ -216,14 +217,11 @@ let client ?(span : Otel.Span.t option) (module C : Cohttp_lwt.S.Client) = let post_form ?ctx ?headers ~params uri = let trace_id, parent, attrs = context_for ~uri ~meth:`POST in - Otel_lwt.Trace.with_ "request" ~kind:Span_kind_client ?trace_id ?parent - ~attrs (fun span -> - let headers = add_traceparent scope headers in + Otel_lwt.Tracer.with_ tracer "request" ~kind:Span_kind_client ?trace_id + ?parent ~attrs (fun span -> + let headers = add_traceparent span headers in let* res, body = C.post_form ?ctx ~headers ~params uri in - Otel.Span.add_attrs span (fun () -> - let code = Response.status res in - let code = Code.code_of_status code in - [ "http.status_code", `Int code ]); + Otel.Span.add_attrs span (attrs_of_response res); Lwt.return (res, body)) let callv = C.callv (* TODO *) diff --git a/src/integrations/logs/opentelemetry_logs.ml b/src/integrations/logs/opentelemetry_logs.ml index a91f2e44..16b1b513 100644 --- a/src/integrations/logs/opentelemetry_logs.ml +++ b/src/integrations/logs/opentelemetry_logs.ml @@ -34,20 +34,20 @@ let emit_telemetry do_emit = Logs.Tag.(empty |> add emit_telemetry_tag do_emit) (*****************************************************************************) (* Log a message to otel with some attrs *) -let log ?service_name ?(attrs = []) ?(scope = Otel.Ambient_span.get ()) ~level - msg = +let log ?(logger = Otel.Logger.get_main ()) ?attrs + ?(scope = Otel.Ambient_span.get ()) ~level msg = let log_level = Logs.level_to_string (Some level) in let span_id = Option.map Otel.Span.id scope in let trace_id = Option.map Otel.Span.trace_id scope in let severity = log_level_to_severity level in let log = - Otel.Log_record.make_str ~severity ~log_level ?trace_id ?span_id msg + Otel.Log_record.make_str ~severity ~log_level ?attrs ?trace_id ?span_id msg in (* Noop if no backend is set *) (* TODO: be more explicit *) - Otel.Logger.emit ?service_name ~attrs [ log ] + Otel.Emitter.emit logger [ log ] -let otel_reporter ?service_name ?(attributes = []) () : Logs.reporter = +let otel_reporter ?(attributes = []) () : Logs.reporter = let report src level ~over k msgf = msgf (fun ?header ?(tags : Logs.Tag.set option) fmt -> let k _ = @@ -91,13 +91,13 @@ let otel_reporter ?service_name ?(attributes = []) () : Logs.reporter = let do_emit = Option.value ~default:true (Logs.Tag.find emit_telemetry_tag tags) in - if do_emit then log ?service_name ~attrs ~level msg; + if do_emit then log ~attrs ~level msg; k ()) fmt) in { Logs.report } -let attach_otel_reporter ?service_name ?attributes reporter = +let attach_otel_reporter ?attributes reporter = (* Copied directly from the Logs.mli docs. Just calls a bunch of reporters in a row *) let combine r1 r2 = @@ -107,5 +107,5 @@ let attach_otel_reporter ?service_name ?attributes reporter = in { Logs.report } in - let otel_reporter = otel_reporter ?service_name ?attributes () in + let otel_reporter = otel_reporter ?attributes () in combine reporter otel_reporter diff --git a/src/integrations/logs/opentelemetry_logs.mli b/src/integrations/logs/opentelemetry_logs.mli index 7ac4e594..43fadb8f 100644 --- a/src/integrations/logs/opentelemetry_logs.mli +++ b/src/integrations/logs/opentelemetry_logs.mli @@ -24,11 +24,8 @@ val emit_telemetry : bool -> Logs.Tag.set {!emit_telemetry_tag} as its only member *) val otel_reporter : - ?service_name:string -> - ?attributes:(string * Opentelemetry.value) list -> - unit -> - Logs.reporter -(** [otel_reporter ?service_name ?tag_value_pp_buffer_size ?attrs ()] creates a + ?attributes:(string * Opentelemetry.value) list -> unit -> Logs.reporter +(** [otel_reporter ?tag_value_pp_buffer_size ?attrs ()] creates a [Logs.reporter] that will create and emit an OTel log with the following info: {ul @@ -61,19 +58,17 @@ val otel_reporter : Example use: [Logs.set_reporter (Opentelemetery_logs.otel_reporter ())] *) val attach_otel_reporter : - ?service_name:string -> ?attributes:(string * Opentelemetry.value) list -> Logs.reporter -> Logs.reporter -(** [attach_otel_reporter ?service_name ?attributes reporter] will create a - reporter that first calls the reporter passed as an argument, then an otel - report created via {!otel_reporter}, for every log. This is useful for if - you want to emit logs to stderr and to OTel at the same time. +(** [attach_otel_reporter ?attributes reporter] will create a reporter that + first calls the reporter passed as an argument, then an otel report created + via {!otel_reporter}, for every log. This is useful for if you want to emit + logs to stderr and to OTel at the same time. Example: {[ let reporter = Logs_fmt.reporter () in Logs.set_reporter - (Opentelemetry_logs.attach_otel_reporter ?service_name ?attributes - reporter) + (Opentelemetry_logs.attach_otel_reporter ?attributes reporter) ]} *) From 22298495e2f2b22f5e7c7f14bd59bd3b66051ac8 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 21:10:21 -0500 Subject: [PATCH 48/94] mor efixes --- dune-project | 51 +++++++++++++++---- src/ambient-context/dune | 1 - src/client-cohttp-eio/dune | 2 +- src/client-cohttp-lwt/dune | 2 +- src/client-ocurl-lwt/dune | 2 +- src/client-ocurl/dune | 2 +- .../opentelemetry_client_ocurl.ml | 31 ++++------- src/client/dune | 5 +- src/core/log_record.ml | 14 ++--- src/lib/opentelemetry.ml | 1 + src/lwt/opentelemetry_lwt.ml | 6 +-- 11 files changed, 68 insertions(+), 49 deletions(-) diff --git a/dune-project b/dune-project index 41ba5911..3d962e1f 100644 --- a/dune-project +++ b/dune-project @@ -28,11 +28,6 @@ (>= "4.08")) ptime hmap - atomic - (thread-local-storage - (and - (>= 0.2) - (< 0.3))) (odoc :with-doc) (alcotest :with-test) (pbrt @@ -46,14 +41,33 @@ (>= 0.27) (< 0.28))) (mtime - (>= "1.4"))) - (depopts trace lwt eio) + (>= "1.4"))) + (depopts atomic trace thread-local-storage lwt eio) (conflicts (trace (< 0.10))) (tags (instrumentation tracing opentelemetry datadog jaeger))) +(package + (name opentelemetry-client) + (synopsis "Client SDK for https://opentelemetry.io") + (depends + (opentelemetry + (= :version)) + (odoc :with-doc) + (alcotest :with-test) + (saturn + (and + (>= 1.0) + (< 2.0))) + (thread-local-storage + (and + (>= 0.2) + (< 0.3)))) + (tags + (tracing opentelemetry sdk))) + (package (name opentelemetry-lwt) (synopsis "Lwt-compatible instrumentation for https://opentelemetry.io") @@ -83,6 +97,8 @@ ; atomic ; vendored (opentelemetry (= :version)) + (opentelemetry-client + (= :version)) (odoc :with-doc) (ezcurl (>= 0.2.3)) @@ -99,6 +115,8 @@ (>= "1.4")) (opentelemetry (= :version)) + (opentelemetry-client + (= :version)) (odoc :with-doc) (ezcurl-lwt (>= 0.2.3)) @@ -124,10 +142,14 @@ (containers :with-test) (cohttp-lwt-unix :with-test) (opentelemetry-client-cohttp-lwt - (and :with-test (= :version))) + (and + :with-test + (= :version))) (opentelemetry-cohttp-lwt - (and :with-test (= :version)))) - (synopsis "Opentelemetry tracing for Cohttp HTTP servers")) + (and + :with-test + (= :version)))) + (synopsis "Opentelemetry-based reporter for Logs")) (package (name opentelemetry-cohttp-lwt) @@ -156,6 +178,8 @@ ; for spans (opentelemetry (= :version)) + (opentelemetry-client + (= :version)) (odoc :with-doc) (lwt (>= "5.3")) @@ -165,7 +189,10 @@ cohttp-lwt-unix (alcotest :with-test) (containers :with-test) - (opentelemetry-lwt (and :with-test (= :version)))) + (opentelemetry-lwt + (and + :with-test + (= :version)))) (synopsis "Collector client for opentelemetry, using cohttp + lwt")) (package @@ -179,6 +206,8 @@ mirage-crypto-rng-eio (opentelemetry (= :version)) + (opentelemetry-client + (= :version)) (odoc :with-doc) (cohttp-eio (>= 6.1.0)) diff --git a/src/ambient-context/dune b/src/ambient-context/dune index de1f5b26..65425271 100644 --- a/src/ambient-context/dune +++ b/src/ambient-context/dune @@ -11,7 +11,6 @@ Opentelemetry_atomic) (libraries hmap - atomic opentelemetry.ambient-context.core opentelemetry.atomic (select diff --git a/src/client-cohttp-eio/dune b/src/client-cohttp-eio/dune index 79cf9393..2bed9063 100644 --- a/src/client-cohttp-eio/dune +++ b/src/client-cohttp-eio/dune @@ -6,7 +6,7 @@ (>= %{ocaml_version} 5.0)) (libraries opentelemetry - opentelemetry.client + opentelemetry-client eio eio.unix cohttp diff --git a/src/client-cohttp-lwt/dune b/src/client-cohttp-lwt/dune index 480fb4a8..23c36d3a 100644 --- a/src/client-cohttp-lwt/dune +++ b/src/client-cohttp-lwt/dune @@ -6,7 +6,7 @@ (pps lwt_ppx)) (libraries opentelemetry - opentelemetry.client + opentelemetry-client lwt cohttp-lwt cohttp-lwt-unix diff --git a/src/client-ocurl-lwt/dune b/src/client-ocurl-lwt/dune index c4e60769..64fb6217 100644 --- a/src/client-ocurl-lwt/dune +++ b/src/client-ocurl-lwt/dune @@ -7,7 +7,7 @@ (libraries opentelemetry opentelemetry.atomic - opentelemetry.client + opentelemetry-client pbrt mtime mtime.clock.os diff --git a/src/client-ocurl/dune b/src/client-ocurl/dune index 9823b5bc..befb9396 100644 --- a/src/client-ocurl/dune +++ b/src/client-ocurl/dune @@ -4,7 +4,7 @@ (libraries opentelemetry opentelemetry.atomic - opentelemetry.client + opentelemetry-client curl pbrt threads diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index 3e9be01e..2efc50fe 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -5,7 +5,7 @@ open Opentelemetry_client open Common_ -module OT = Opentelemetry +module OTEL = Opentelemetry module Config = Config let get_headers = Config.Env.get_headers @@ -52,26 +52,13 @@ let start_bg_thread (f : unit -> unit) : Thread.t = Thread.create run () let str_to_hex (s : string) : string = - let i_to_hex (i : int) = - if i < 10 then - Char.chr (i + Char.code '0') - else - Char.chr (i - 10 + Char.code 'a') - in - - let res = Bytes.create (2 * String.length s) in - for i = 0 to String.length s - 1 do - let n = Char.code (String.get s i) in - Bytes.set res (2 * i) (i_to_hex ((n land 0xf0) lsr 4)); - Bytes.set res ((2 * i) + 1) (i_to_hex (n land 0x0f)) - done; - Bytes.unsafe_to_string res + Opentelemetry_util.Util_bytes_.bytes_to_hex (Bytes.unsafe_of_string s) module Exporter_impl : sig val n_bytes_sent : int Atomic.t class type t = object - inherit OT.Exporter.t + inherit OTEL.Exporter.t method shutdown : on_done:(unit -> unit) -> unit -> unit end @@ -85,7 +72,7 @@ end = struct let n_bytes_sent : int Atomic.t = Atomic.make 0 class type t = object - inherit OT.Exporter.t + inherit OTEL.Exporter.t method shutdown : on_done:(unit -> unit) -> unit -> unit end @@ -301,12 +288,12 @@ end = struct end let create_exporter ?(stop = Atomic.make false) - ?(config : Config.t = Config.make ()) () : #OT.Exporter.t = + ?(config : Config.t = Config.make ()) () : #OTEL.Exporter.t = let backend = Exporter_impl.create ~stop ~config () in - (backend :> OT.Exporter.t) + (backend :> OTEL.Exporter.t) (** thread that calls [tick()] regularly, to help enforce timeouts *) -let setup_ticker_thread ~stop ~sleep_ms (exp : #OT.Exporter.t) () = +let setup_ticker_thread ~stop ~sleep_ms (exp : #OTEL.Exporter.t) () = let sleep_s = float sleep_ms /. 1000. in let tick_loop () = try @@ -326,7 +313,7 @@ let setup_ticker_thread ~stop ~sleep_ms (exp : #OT.Exporter.t) () = let setup_ ?(stop = Atomic.make false) ?(config : Config.t = Config.make ()) () : unit = let exporter = Exporter_impl.create ~stop ~config () in - OT.Exporter.Main_exporter.set exporter; + OTEL.Exporter.Main_exporter.set exporter; Self_trace.set_enabled config.common.self_trace; @@ -338,7 +325,7 @@ let setup_ ?(stop = Atomic.make false) ?(config : Config.t = Config.make ()) () let remove_backend () : unit = (* we don't need the callback, this runs in the same thread *) - OT.Exporter.Main_exporter.remove () ~on_done:ignore + OTEL.Exporter.Main_exporter.remove () ~on_done:ignore let setup ?stop ?config ?(enable = true) () = if enable then setup_ ?stop ?config () diff --git a/src/client/dune b/src/client/dune index 037f4d6b..29ad29d7 100644 --- a/src/client/dune +++ b/src/client/dune @@ -1,6 +1,6 @@ (library (name opentelemetry_client) - (public_name opentelemetry.client) + (public_name opentelemetry-client) (flags :standard -open Opentelemetry_util) (libraries opentelemetry @@ -8,7 +8,8 @@ opentelemetry.emitter opentelemetry.proto pbrt + saturn mtime mtime.clock.os) (synopsis - "Basic exporters, as well as Common types and logic shared between exporters")) + "Basic exporters, as well as common types and logic shared between exporters")) diff --git a/src/core/log_record.ml b/src/core/log_record.ml index 9212a8e5..0de8ef18 100644 --- a/src/core/log_record.ml +++ b/src/core/log_record.ml @@ -47,7 +47,8 @@ let pp_flags = Proto.Logs.pp_log_record_flags (** Make a single log entry *) let make ?time ?(observed_time_unix_nano = Timestamp_ns.now_unix_ns ()) - ?severity ?log_level ?flags ?trace_id ?span_id (body : Value.t) : t = + ?severity ?log_level ?flags ?trace_id ?span_id ?(attrs = []) + (body : Value.t) : t = let time_unix_nano = match time with | None -> observed_time_unix_nano @@ -56,21 +57,22 @@ let make ?time ?(observed_time_unix_nano = Timestamp_ns.now_unix_ns ()) let trace_id = Option.map Trace_id.to_bytes trace_id in let span_id = Option.map Span_id.to_bytes span_id in let body = Value.conv body in + let attributes = List.map Key_value.conv attrs in make_log_record ~time_unix_nano ~observed_time_unix_nano ?severity_number:severity ?severity_text:log_level ?flags ?trace_id ?span_id - ?body () + ~attributes ?body () (** Make a log entry whose body is a string *) let make_str ?time ?observed_time_unix_nano ?severity ?log_level ?flags - ?trace_id ?span_id (body : string) : t = + ?trace_id ?span_id ?attrs (body : string) : t = make ?time ?observed_time_unix_nano ?severity ?log_level ?flags ?trace_id - ?span_id (`String body) + ?span_id ?attrs (`String body) (** Make a log entry with format *) let make_strf ?time ?observed_time_unix_nano ?severity ?log_level ?flags - ?trace_id ?span_id fmt = + ?trace_id ?span_id ?attrs fmt = Format.kasprintf (fun bod -> make_str ?time ?observed_time_unix_nano ?severity ?log_level ?flags - ?trace_id ?span_id bod) + ?trace_id ?span_id ?attrs bod) fmt diff --git a/src/lib/opentelemetry.ml b/src/lib/opentelemetry.ml index 5316afc2..adef99e0 100644 --- a/src/lib/opentelemetry.ml +++ b/src/lib/opentelemetry.ml @@ -24,6 +24,7 @@ module Timestamp_ns = Timestamp_ns (** {2 Export signals to some external collector.} *) +module Emitter = Opentelemetry_emitter.Emitter module Exporter = Exporter module Main_exporter = Main_exporter diff --git a/src/lwt/opentelemetry_lwt.ml b/src/lwt/opentelemetry_lwt.ml index 2ce4e12c..d6a4f641 100644 --- a/src/lwt/opentelemetry_lwt.ml +++ b/src/lwt/opentelemetry_lwt.ml @@ -20,10 +20,10 @@ module Tracer = struct include Tracer (** Sync span guard *) - let with_ ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id ?parent - ?links name (cb : Span.t -> 'a Lwt.t) : 'a Lwt.t = + let with_ (self : t) ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id + ?parent ?links name (cb : Span.t -> 'a Lwt.t) : 'a Lwt.t = let thunk, finally = - with_thunk_and_finally ?force_new_trace_id ?trace_state ?attrs ?kind + with_thunk_and_finally self ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id ?parent ?links name cb in From 70b435f8ec487cbf7bba7c21ccd1590b93394543 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 21:11:17 -0500 Subject: [PATCH 49/94] doc --- src/client/batch.mli | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/client/batch.mli b/src/client/batch.mli index f50e1675..fa64083b 100644 --- a/src/client/batch.mli +++ b/src/client/batch.mli @@ -57,5 +57,5 @@ val push' : 'a t -> 'a list -> unit open Opentelemetry_emitter val wrap_emitter : 'a t -> 'a Emitter.t -> 'a Emitter.t -(** [batch_emitter batch e] is an emitter that uses batch [batch] to gather +(** [wrap_emitter batch e] is an emitter that uses batch [batch] to gather signals into larger lists before passing them to [e]. *) From e267c83be7695523265e80599b5bbddac598c9a0 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 21:11:22 -0500 Subject: [PATCH 50/94] feat emitter: add `flat_map` --- src/emitter/emitter.ml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/emitter/emitter.ml b/src/emitter/emitter.ml index fb2f9285..7b8ffbfb 100644 --- a/src/emitter/emitter.ml +++ b/src/emitter/emitter.ml @@ -33,11 +33,21 @@ let[@inline] closed self : bool = self.closed () let[@inline] flush_and_close (self : _ t) : unit = self.flush_and_close () -(** [map f emitter] returns a new emitter that applies [f] to signals before - passing them to [emitter] *) +(** [map f emitter] returns a new emitter that applies [f] to signals item-wise + before passing them to [emitter] *) let map (f : 'a -> 'b) (self : 'b t) : 'a t = { self with emit = (fun l -> self.emit (List.map f l)) } +(** [map_l f emitter] applies [f] to incoming lists of signals, and emits the + resulting list (if non empty) *) +let flat_map (f : 'a list -> 'b list) (self : 'b t) : 'a t = + let emit l = + match f l with + | [] -> () + | fl -> self.emit fl + in + { self with emit } + (** [tap f e] is like [e], but every signal is passed to [f] *) let tap (f : 'a -> unit) (self : 'a t) : 'a t = let emit l = From 500c9a8ba8c24975cc1913a076343af95605eb2c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 21:11:32 -0500 Subject: [PATCH 51/94] sync_queue: more operations, including a batch push --- src/client/sync_queue.ml | 32 ++++++++++++++++++++++++++++++++ src/client/sync_queue.mli | 14 ++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/src/client/sync_queue.ml b/src/client/sync_queue.ml index 46d46af1..44d71a06 100644 --- a/src/client/sync_queue.ml +++ b/src/client/sync_queue.ml @@ -17,6 +17,10 @@ let create () : _ t = closed = false; } +(* NOTE: the race condition here is benign, assuming no tearing of + a value of type [bool] which OCaml's memory model should guarantee. *) +let[@inline] closed self = self.closed + let close (self : _ t) = UM.protect self.mutex @@ fun () -> if not self.closed then ( @@ -47,6 +51,11 @@ let pop (self : 'a t) : 'a = in UM.protect self.mutex loop +let try_pop (self : 'a t) : 'a option = + UM.protect self.mutex @@ fun () -> + if self.closed then raise Closed; + try Some (Queue.pop self.q) with Queue.Empty -> None + let pop_all (self : 'a t) into : unit = let rec loop () = if Queue.is_empty self.q then ( @@ -57,3 +66,26 @@ let pop_all (self : 'a t) into : unit = Queue.transfer self.q into in UM.protect self.mutex loop + +let push_while_not_full ~high_watermark (self : 'a t) (xs : 'a list) : int * int + = + UM.protect self.mutex @@ fun () -> + if self.closed then raise Closed; + + let old_size = Queue.length self.q in + let xs = ref xs in + + let continue = ref true in + while !continue && Queue.length self.q < high_watermark do + match !xs with + | [] -> continue := false + | x :: tl_xs -> + xs := tl_xs; + Queue.push x self.q + done; + + (* pushed at least one item *) + if Queue.length self.q <> old_size then Condition.broadcast self.cond; + + let n_discarded = List.length !xs in + n_discarded, old_size diff --git a/src/client/sync_queue.mli b/src/client/sync_queue.mli index d64296d7..b1ebb345 100644 --- a/src/client/sync_queue.mli +++ b/src/client/sync_queue.mli @@ -14,11 +14,25 @@ val pop : 'a t -> 'a (** [pop q] pops the next element in [q]. It might block until an element comes. @raise Closed if the queue was closed before a new element was available. *) +val try_pop : 'a t -> 'a option + val pop_all : 'a t -> 'a Queue.t -> unit (** [pop_all q into] pops all the elements of [q] and moves them into [into]. if no element is available, it will block until it successfully transfers at least one item to [into]. @raise Closed if the queue was closed before a new element was available. *) +val closed : _ t -> bool + val close : _ t -> unit (** Close the queue, meaning there won't be any more [push] allowed. *) + +val push_while_not_full : high_watermark:int -> 'a t -> 'a list -> int * int +(** [push_while_not_full q ~high_watermark xs] tries to push each item of [x] + into [q]. + + An item is not pushed if the queue is "full" (size >= high_watermark). + + This returns a pair [num_discarded, old_size] where [num_discarded] is the + number of items that could not be pushed, and [old_size] is the size before + anything was pushed. *) From 8a45f7169857599fac2b0b3784b00af9ca7d97fd Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 21:12:06 -0500 Subject: [PATCH 52/94] rename --- src/client/{debug_exporter.ml => exporter_debug.ml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/client/{debug_exporter.ml => exporter_debug.ml} (100%) diff --git a/src/client/debug_exporter.ml b/src/client/exporter_debug.ml similarity index 100% rename from src/client/debug_exporter.ml rename to src/client/exporter_debug.ml From 6f2134f18911f50d385bab76ebd362a0e0d64a7d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 21:12:16 -0500 Subject: [PATCH 53/94] add client.Any_resource --- src/client/any_resource.ml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 src/client/any_resource.ml diff --git a/src/client/any_resource.ml b/src/client/any_resource.ml new file mode 100644 index 00000000..77592121 --- /dev/null +++ b/src/client/any_resource.ml @@ -0,0 +1,27 @@ +open Opentelemetry.Proto + +(** A resource *) +type t = + | R_metrics of Metrics.resource_metrics list + | R_spans of Trace.resource_spans list + | R_logs of Logs.resource_logs list + +let of_logs logs : t = R_logs [ Util_resources.make_resource_logs logs ] + +open struct + let of_x_or_empty ~f l = + if l = [] then + [] + else + [ f l ] +end + +let of_logs_or_empty logs = of_x_or_empty ~f:of_logs logs + +let of_spans spans : t = R_spans [ Util_resources.make_resource_spans spans ] + +let of_spans_or_empty spans = of_x_or_empty ~f:of_spans spans + +let of_metrics m : t = R_metrics [ Util_resources.make_resource_metrics m ] + +let of_metrics_or_empty ms = of_x_or_empty ~f:of_metrics ms From 90d59b40d91924baf853f0de2017dedcdf9fe5b4 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 21:12:28 -0500 Subject: [PATCH 54/94] feat client: add bounded queue interface and sync-queue based implem --- src/client/bounded_queue.ml | 61 +++++++++++++++++++++++++++++++ src/client/bounded_queue_sync.ml | 50 +++++++++++++++++++++++++ src/client/bounded_queue_sync.mli | 5 +++ 3 files changed, 116 insertions(+) create mode 100644 src/client/bounded_queue.ml create mode 100644 src/client/bounded_queue_sync.ml create mode 100644 src/client/bounded_queue_sync.mli diff --git a/src/client/bounded_queue.ml b/src/client/bounded_queue.ml new file mode 100644 index 00000000..b69a60a4 --- /dev/null +++ b/src/client/bounded_queue.ml @@ -0,0 +1,61 @@ +(** Interface for a thread-safe, bounded queue. + + After the high watermark is reached, pushing items into the queue will + instead discard them. *) + +open Common_ + +exception Closed +(** Raised when pushing into a closed queue *) + +type 'a pop_result = + [ `Empty + | `Closed + | `Item of 'a + ] + +type 'a t = { + push: 'a list -> unit; + (** Push items. This might discard some of them. + @raise Closed if the queue is closed. *) + num_discarded: unit -> int; (** How many items were discarded? *) + on_non_empty: (unit -> unit) -> unit; + (** [on_non_empty f] registers [f] to be called whenever the queue + transitions from empty to non-empty. *) + try_pop: unit -> 'a pop_result; + (** Try to pop an item right now. @raise Closed if the *) + close: unit -> unit; + closed: unit -> bool; +} + +let[@inline] push (self : _ t) x : unit = self.push x + +let[@inline] num_discarded self = self.num_discarded () + +let[@inline] try_pop (self : _ t) : _ pop_result = self.try_pop () + +let[@inline] on_non_empty (self : _ t) f = self.on_non_empty f + +let[@inline] close (self : _ t) : unit = self.close () + +let[@inline] closed (self : _ t) : bool = self.closed () + +let to_emitter (self : 'a t) : 'a Opentelemetry_emitter.Emitter.t = + let closed () = self.closed () in + let enabled () = not (closed ()) in + let emit x = if x <> [] then push self x in + let tick ~now:_ = () in + let flush_and_close () = close self in + { closed; enabled; emit; tick; flush_and_close } + +let logs_emitter (self : Any_resource.t t) : OTEL.Logger.t = + to_emitter self + |> Opentelemetry_emitter.Emitter.flat_map Any_resource.of_logs_or_empty + +let spans_emitter (self : Any_resource.t t) : OTEL.Tracer.t = + to_emitter self + |> Opentelemetry_emitter.Emitter.flat_map Any_resource.of_spans_or_empty + +let metrics_emitter (self : Any_resource.t t) : OTEL.Metrics_emitter.t = + to_emitter self + |> Opentelemetry_emitter.Emitter.flat_map Any_resource.of_metrics_or_empty diff --git a/src/client/bounded_queue_sync.ml b/src/client/bounded_queue_sync.ml new file mode 100644 index 00000000..506bd214 --- /dev/null +++ b/src/client/bounded_queue_sync.ml @@ -0,0 +1,50 @@ +module BQ = Bounded_queue + +type 'a state = { + n_discarded: int Atomic.t; + high_watermark: int; + q: 'a Sync_queue.t; + on_non_empty: Cb_set.t; +} + +let push (self : _ state) x = + let discarded, old_size = + try + Sync_queue.push_while_not_full self.q ~high_watermark:self.high_watermark + x + with Sync_queue.Closed -> raise BQ.Closed + in + + if discarded > 0 then + ignore (Atomic.fetch_and_add self.n_discarded discarded : int); + + (* wake up lagards if the queue was empty *) + if old_size = 0 then Cb_set.trigger self.on_non_empty; + () + +let try_pop (self : _ state) : _ BQ.pop_result = + match Sync_queue.try_pop self.q with + | Some x -> `Item x + | None -> `Empty + | exception Sync_queue.Closed -> `Closed + +let to_bounded_queue (self : 'a state) : 'a BQ.t = + let closed () = Sync_queue.closed self.q in + let num_discarded () = Atomic.get self.n_discarded in + let push x = push self x in + let on_non_empty = Cb_set.register self.on_non_empty in + let try_pop () = try_pop self in + let close () = Sync_queue.close self.q in + { BQ.push; num_discarded; try_pop; on_non_empty; close; closed } + +let create ~high_watermark () : _ BQ.t = + let st = + { + high_watermark; + q = Sync_queue.create (); + n_discarded = Atomic.make 0; + on_non_empty = Cb_set.create (); + } + in + to_bounded_queue st + diff --git a/src/client/bounded_queue_sync.mli b/src/client/bounded_queue_sync.mli new file mode 100644 index 00000000..abdb710c --- /dev/null +++ b/src/client/bounded_queue_sync.mli @@ -0,0 +1,5 @@ +(** Bounded queue based on {!Sync_queue} *) + +val create : high_watermark:int -> unit -> 'a Bounded_queue.t +(** [create ~high_watermark ()] creates a new bounded queue based on + {!Sync_queue} *) From 15daf8aff579b432af4c3b47cc854a96066b7290 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 21:27:53 -0500 Subject: [PATCH 55/94] warning --- src/lib/gc_metrics.ml | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lib/gc_metrics.ml b/src/lib/gc_metrics.ml index dfc3e41d..42dacac6 100644 --- a/src/lib/gc_metrics.ml +++ b/src/lib/gc_metrics.ml @@ -1,5 +1,3 @@ -open Common_ - open struct let bytes_per_word = Sys.word_size / 8 From 9e4b32ce97592ada6f2cfe1b6660d000010959bb Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 21:28:04 -0500 Subject: [PATCH 56/94] doc --- src/emitter/emitter.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/emitter/emitter.ml b/src/emitter/emitter.ml index 7b8ffbfb..a940fc43 100644 --- a/src/emitter/emitter.ml +++ b/src/emitter/emitter.ml @@ -56,6 +56,7 @@ let tap (f : 'a -> unit) (self : 'a t) : 'a t = in { self with emit } +(** Dummy emitter, doesn't accept or emit anything. *) let dummy : _ t = { enabled = (fun () -> false); From dff5c9668ec7492a6561ca16503d2f51d1ae2fe1 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 21:28:15 -0500 Subject: [PATCH 57/94] remove unused argument for Logger --- src/lib/logger.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/logger.ml b/src/lib/logger.ml index 88d70ce0..83286d57 100644 --- a/src/lib/logger.ml +++ b/src/lib/logger.ml @@ -19,11 +19,11 @@ let get_main () : t = | None -> dummy | Some e -> e.emit_logs -let emit ?attrs:_ (logs : Log_record.t list) : unit = +let (emit [@deprecated "use an explicit Logger.t"]) = + fun (logs : Log_record.t list) : unit -> match Main_exporter.get () with | None -> () | Some exp -> Exporter.send_logs exp logs -[@@deprecated "use an explicit Logger.t"] (** An emitter that uses the current {!Main_exporter} *) let dynamic_forward_to_main_exporter : t = From 135dc327b33d5692148efd226368645d8a6248b2 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 21:33:10 -0500 Subject: [PATCH 58/94] carry service_name and attrs when building resources objects --- src/client/any_resource.ml | 24 +++++++++++++++--------- src/client/util_resources.ml | 6 +++--- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/client/any_resource.ml b/src/client/any_resource.ml index 77592121..a85d425c 100644 --- a/src/client/any_resource.ml +++ b/src/client/any_resource.ml @@ -6,22 +6,28 @@ type t = | R_spans of Trace.resource_spans list | R_logs of Logs.resource_logs list -let of_logs logs : t = R_logs [ Util_resources.make_resource_logs logs ] - open struct - let of_x_or_empty ~f l = + let of_x_or_empty ?service_name ?attrs ~f l = if l = [] then [] else - [ f l ] + [ f ?service_name ?attrs l ] end -let of_logs_or_empty logs = of_x_or_empty ~f:of_logs logs +let of_logs ?service_name ?attrs logs : t = + R_logs [ Util_resources.make_resource_logs ?service_name ?attrs logs ] -let of_spans spans : t = R_spans [ Util_resources.make_resource_spans spans ] +let of_logs_or_empty ?service_name ?attrs logs = + of_x_or_empty ?service_name ?attrs ~f:of_logs logs -let of_spans_or_empty spans = of_x_or_empty ~f:of_spans spans +let of_spans ?service_name ?attrs spans : t = + R_spans [ Util_resources.make_resource_spans ?service_name ?attrs spans ] -let of_metrics m : t = R_metrics [ Util_resources.make_resource_metrics m ] +let of_spans_or_empty ?service_name ?attrs spans = + of_x_or_empty ?service_name ?attrs ~f:of_spans spans -let of_metrics_or_empty ms = of_x_or_empty ~f:of_metrics ms +let of_metrics ?service_name ?attrs m : t = + R_metrics [ Util_resources.make_resource_metrics ?service_name ?attrs m ] + +let of_metrics_or_empty ?service_name ?attrs ms = + of_x_or_empty ?service_name ?attrs ~f:of_metrics ms diff --git a/src/client/util_resources.ml b/src/client/util_resources.ml index bb20fab1..7ca16496 100644 --- a/src/client/util_resources.ml +++ b/src/client/util_resources.ml @@ -2,9 +2,9 @@ open Common_ -let make_resource_logs (logs : Proto.Logs.log_record list) : - Proto.Logs.resource_logs = - let attributes = OTEL.Globals.mk_attributes () in +let make_resource_logs ?service_name ?attrs (logs : Proto.Logs.log_record list) + : Proto.Logs.resource_logs = + let attributes = OTEL.Globals.mk_attributes ?service_name ?attrs () in let resource = Proto.Resource.make_resource ~attributes () in let ll = Proto.Logs.make_scope_logs ~scope:OTEL.Globals.instrumentation_library From 4fb824eba203b0fc6d76b2a7bca81af6c8b577ef Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 21:33:33 -0500 Subject: [PATCH 59/94] wip: exporter_queued, a queue + a consumer --- src/client/bounded_queue.ml | 12 -------- src/client/consumer.ml | 20 +++++++++++++ src/client/exporter_queued.ml | 55 +++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 12 deletions(-) create mode 100644 src/client/consumer.ml create mode 100644 src/client/exporter_queued.ml diff --git a/src/client/bounded_queue.ml b/src/client/bounded_queue.ml index b69a60a4..a8616aaa 100644 --- a/src/client/bounded_queue.ml +++ b/src/client/bounded_queue.ml @@ -47,15 +47,3 @@ let to_emitter (self : 'a t) : 'a Opentelemetry_emitter.Emitter.t = let tick ~now:_ = () in let flush_and_close () = close self in { closed; enabled; emit; tick; flush_and_close } - -let logs_emitter (self : Any_resource.t t) : OTEL.Logger.t = - to_emitter self - |> Opentelemetry_emitter.Emitter.flat_map Any_resource.of_logs_or_empty - -let spans_emitter (self : Any_resource.t t) : OTEL.Tracer.t = - to_emitter self - |> Opentelemetry_emitter.Emitter.flat_map Any_resource.of_spans_or_empty - -let metrics_emitter (self : Any_resource.t t) : OTEL.Metrics_emitter.t = - to_emitter self - |> Opentelemetry_emitter.Emitter.flat_map Any_resource.of_metrics_or_empty diff --git a/src/client/consumer.ml b/src/client/consumer.ml new file mode 100644 index 00000000..3cf92658 --- /dev/null +++ b/src/client/consumer.ml @@ -0,0 +1,20 @@ +(** Consumer that accepts items from a bounded queue *) + +(** A consumer for signals of type ['a] *) +class type ['a] t = object + method register : 'a Bounded_queue.t -> unit + + method active : unit -> bool + + method start : on_done:(unit -> unit) -> unit + + method shutdown : on_done:(unit -> unit) -> unit +end + +let register (self : _ #t) q = self#register q + +let active (self : _ #t) = self#active () + +let start (self : _ #t) ~on_done = self#start ~on_done + +let shutdown (self : _ #t) ~on_done = self#shutdown ~on_done diff --git a/src/client/exporter_queued.ml b/src/client/exporter_queued.ml new file mode 100644 index 00000000..6ee9483e --- /dev/null +++ b/src/client/exporter_queued.ml @@ -0,0 +1,55 @@ +(** Build an exporter from a queue and a consumer *) + +open Common_ +module BQ = Bounded_queue + +module BQ_emitters = struct + let logs_emitter_of_bq ?service_name ?attrs + (q : Any_resource.t Bounded_queue.t) : OTEL.Logger.t = + Bounded_queue.to_emitter q + |> Opentelemetry_emitter.Emitter.flat_map + (Any_resource.of_logs_or_empty ?service_name ?attrs) + + let spans_emitter_of_bq ?service_name ?attrs + (q : Any_resource.t Bounded_queue.t) : OTEL.Tracer.t = + Bounded_queue.to_emitter q + |> Opentelemetry_emitter.Emitter.flat_map + (Any_resource.of_spans_or_empty ?service_name ?attrs) + + let metrics_emitter_of_bq ?service_name ?attrs + (q : Any_resource.t Bounded_queue.t) : OTEL.Metrics_emitter.t = + Bounded_queue.to_emitter q + |> Opentelemetry_emitter.Emitter.flat_map + (Any_resource.of_metrics_or_empty ?service_name ?attrs) +end + +(** Pair a queue with a consumer to build an exporter. + + The resulting exporter will emit logs, spans, and traces directly into the + bounded queue; while the consumer takes them from the queue to forward them + somewhere else, store them, etc. + @param resource_attributes attributes added to every "resource" batch *) +let create ?(resource_attributes = []) ~(q : Any_resource.t Bounded_queue.t) + ~(consumer : Any_resource.t Consumer.t) () : OTEL.Exporter.t = + let emit_spans = + BQ_emitters.spans_emitter_of_bq ~attrs:resource_attributes q + in + let emit_logs = BQ_emitters.logs_emitter_of_bq ~attrs:resource_attributes q in + let emit_metrics = + BQ_emitters.metrics_emitter_of_bq ~attrs:resource_attributes q + in + + let tick_set = Cb_set.create () in + let tick () = Cb_set.trigger tick_set in + let on_tick f = Cb_set.register tick_set f in + + let closed = Atomic.make false in + + let cleanup ~on_done () = + if not (Atomic.exchange closed true) then ( + Bounded_queue.close q; + Consumer.shutdown consumer ~on_done + ) else + on_done () + in + { emit_logs; emit_metrics; emit_spans; tick; on_tick; cleanup } From ad5ef8e99c5dfd9641c62541e7e50c749a333a24 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 4 Dec 2025 21:33:53 -0500 Subject: [PATCH 60/94] config fix --- src/client-cohttp-lwt/config.ml | 6 +++--- src/client-cohttp-lwt/config.mli | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/client-cohttp-lwt/config.ml b/src/client-cohttp-lwt/config.ml index 930881ff..4f3677de 100644 --- a/src/client-cohttp-lwt/config.ml +++ b/src/client-cohttp-lwt/config.ml @@ -1,7 +1,7 @@ -type t = Opentelemetry_client.Config.t +type t = Opentelemetry_client.Client_config.t -module Env = Opentelemetry_client.Config.Env () +module Env = Opentelemetry_client.Client_config.Env () -let pp = Opentelemetry_client.Config.pp +let pp = Opentelemetry_client.Client_config.pp let make = Env.make (fun common () -> common) diff --git a/src/client-cohttp-lwt/config.mli b/src/client-cohttp-lwt/config.mli index 100bb696..dff28732 100644 --- a/src/client-cohttp-lwt/config.mli +++ b/src/client-cohttp-lwt/config.mli @@ -1,4 +1,4 @@ -type t = Opentelemetry_client.Config.t +type t = Opentelemetry_client.Client_config.t (** Configuration. To build one, use {!make} below. This might be extended with more fields in @@ -6,7 +6,7 @@ type t = Opentelemetry_client.Config.t val pp : Format.formatter -> t -> unit -val make : (unit -> t) Opentelemetry_client.Config.make +val make : (unit -> t) Opentelemetry_client.Client_config.make (** Make a configuration {!t}. *) -module Env : Opentelemetry_client.Config.ENV +module Env : Opentelemetry_client.Client_config.ENV From 741de6cece479ccb3c7d405bcedec503ae20439f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 09:04:00 -0500 Subject: [PATCH 61/94] custom queue in Bounded_queue_sync, remove bespoke stuff from Sync_queue --- src/client/bounded_queue.ml | 19 ++++++- src/client/bounded_queue_sync.ml | 89 +++++++++++++++++++++++++++---- src/client/bounded_queue_sync.mli | 4 +- src/client/sync_queue.ml | 32 ----------- src/client/sync_queue.mli | 14 ----- 5 files changed, 100 insertions(+), 58 deletions(-) diff --git a/src/client/bounded_queue.ml b/src/client/bounded_queue.ml index a8616aaa..174c624e 100644 --- a/src/client/bounded_queue.ml +++ b/src/client/bounded_queue.ml @@ -22,11 +22,22 @@ type 'a t = { on_non_empty: (unit -> unit) -> unit; (** [on_non_empty f] registers [f] to be called whenever the queue transitions from empty to non-empty. *) - try_pop: unit -> 'a pop_result; - (** Try to pop an item right now. @raise Closed if the *) + try_pop: unit -> 'a pop_result; (** Try to pop an item right now. *) close: unit -> unit; + (** Close the queue. Items currently in the queue will still be accessible + to consumers until the queue is emptied out. Idempotent. *) closed: unit -> bool; + (** Is the queue closed {b for writing}. Consumers should only use + [try_pop] because a queue that's closed-for-writing might still + contain straggler items that need to be consumed. + + This should be as fast and cheap as possible. *) } +(** A bounded queue, with multiple producers and potentially multiple consumers. + + All functions must be thread-safe except for [try_pop] which might not have + to be depending on the context (e.g. a Lwt-specific queue implementation + will consume only from the Lwt thread). *) let[@inline] push (self : _ t) x : unit = self.push x @@ -40,10 +51,14 @@ let[@inline] close (self : _ t) : unit = self.close () let[@inline] closed (self : _ t) : bool = self.closed () +(** Turn the writing end of the queue into an emitter. *) let to_emitter (self : 'a t) : 'a Opentelemetry_emitter.Emitter.t = let closed () = self.closed () in let enabled () = not (closed ()) in let emit x = if x <> [] then push self x in let tick ~now:_ = () in + + (* NOTE: we cannot actually flush, only close. Emptying the queue is + fundamentally asynchronous because it's done by consumers *) let flush_and_close () = close self in { closed; enabled; emit; tick; flush_and_close } diff --git a/src/client/bounded_queue_sync.ml b/src/client/bounded_queue_sync.ml index 506bd214..b4d80f7b 100644 --- a/src/client/bounded_queue_sync.ml +++ b/src/client/bounded_queue_sync.ml @@ -1,17 +1,89 @@ module BQ = Bounded_queue +exception Closed = Bounded_queue.Closed + +(* a variant of {!Sync_queue} with more bespoke pushing behavior *) +module Q : sig + type 'a t + + val create : unit -> 'a t + + val close : _ t -> unit + + val closed : _ t -> bool + + val try_pop : 'a t -> 'a option + + val push_while_not_full : high_watermark:int -> 'a t -> 'a list -> int * int + (** [push_while_not_full q ~high_watermark xs] tries to push each item of [x] + into [q]. + + An item is not pushed if the queue is "full" (size >= high_watermark). + + This returns a pair [num_discarded, old_size] where [num_discarded] is the + number of items that could not be pushed, and [old_size] is the size + before anything was pushed. *) +end = struct + module UM = Opentelemetry_util.Util_mutex + + type 'a t = { + mutex: Mutex.t; + q: 'a Queue.t; + mutable closed: bool; + } + + let create () : _ t = + { mutex = Mutex.create (); q = Queue.create (); closed = false } + + (* NOTE: the race condition here is benign, assuming no tearing of + a value of type [bool] which OCaml's memory model should guarantee. *) + let[@inline] closed self = self.closed + + let close (self : _ t) = + UM.protect self.mutex @@ fun () -> + if not self.closed then self.closed <- true + + let push (self : _ t) x : unit = + UM.protect self.mutex @@ fun () -> + if self.closed then raise Closed; + Queue.push x self.q + + let try_pop (self : 'a t) : 'a option = + UM.protect self.mutex @@ fun () -> + if self.closed then raise Closed; + try Some (Queue.pop self.q) with Queue.Empty -> None + + let push_while_not_full ~high_watermark (self : 'a t) (xs : 'a list) : + int * int = + UM.protect self.mutex @@ fun () -> + if self.closed then raise Closed; + + let old_size = Queue.length self.q in + let xs = ref xs in + + let continue = ref true in + while !continue && Queue.length self.q < high_watermark do + match !xs with + | [] -> continue := false + | x :: tl_xs -> + xs := tl_xs; + Queue.push x self.q + done; + + let n_discarded = List.length !xs in + n_discarded, old_size +end + type 'a state = { n_discarded: int Atomic.t; high_watermark: int; - q: 'a Sync_queue.t; + q: 'a Q.t; on_non_empty: Cb_set.t; } let push (self : _ state) x = let discarded, old_size = - try - Sync_queue.push_while_not_full self.q ~high_watermark:self.high_watermark - x + try Q.push_while_not_full self.q ~high_watermark:self.high_watermark x with Sync_queue.Closed -> raise BQ.Closed in @@ -23,28 +95,27 @@ let push (self : _ state) x = () let try_pop (self : _ state) : _ BQ.pop_result = - match Sync_queue.try_pop self.q with + match Q.try_pop self.q with | Some x -> `Item x | None -> `Empty | exception Sync_queue.Closed -> `Closed let to_bounded_queue (self : 'a state) : 'a BQ.t = - let closed () = Sync_queue.closed self.q in + let closed () = Q.closed self.q in let num_discarded () = Atomic.get self.n_discarded in let push x = push self x in let on_non_empty = Cb_set.register self.on_non_empty in let try_pop () = try_pop self in - let close () = Sync_queue.close self.q in + let close () = Q.close self.q in { BQ.push; num_discarded; try_pop; on_non_empty; close; closed } let create ~high_watermark () : _ BQ.t = let st = { high_watermark; - q = Sync_queue.create (); + q = Q.create (); n_discarded = Atomic.make 0; on_non_empty = Cb_set.create (); } in to_bounded_queue st - diff --git a/src/client/bounded_queue_sync.mli b/src/client/bounded_queue_sync.mli index abdb710c..d3cf6347 100644 --- a/src/client/bounded_queue_sync.mli +++ b/src/client/bounded_queue_sync.mli @@ -1,4 +1,6 @@ -(** Bounded queue based on {!Sync_queue} *) +(** Bounded queue based on simple synchronization primitives. + + This is not the fastest queue but it should be versatile. *) val create : high_watermark:int -> unit -> 'a Bounded_queue.t (** [create ~high_watermark ()] creates a new bounded queue based on diff --git a/src/client/sync_queue.ml b/src/client/sync_queue.ml index 44d71a06..46d46af1 100644 --- a/src/client/sync_queue.ml +++ b/src/client/sync_queue.ml @@ -17,10 +17,6 @@ let create () : _ t = closed = false; } -(* NOTE: the race condition here is benign, assuming no tearing of - a value of type [bool] which OCaml's memory model should guarantee. *) -let[@inline] closed self = self.closed - let close (self : _ t) = UM.protect self.mutex @@ fun () -> if not self.closed then ( @@ -51,11 +47,6 @@ let pop (self : 'a t) : 'a = in UM.protect self.mutex loop -let try_pop (self : 'a t) : 'a option = - UM.protect self.mutex @@ fun () -> - if self.closed then raise Closed; - try Some (Queue.pop self.q) with Queue.Empty -> None - let pop_all (self : 'a t) into : unit = let rec loop () = if Queue.is_empty self.q then ( @@ -66,26 +57,3 @@ let pop_all (self : 'a t) into : unit = Queue.transfer self.q into in UM.protect self.mutex loop - -let push_while_not_full ~high_watermark (self : 'a t) (xs : 'a list) : int * int - = - UM.protect self.mutex @@ fun () -> - if self.closed then raise Closed; - - let old_size = Queue.length self.q in - let xs = ref xs in - - let continue = ref true in - while !continue && Queue.length self.q < high_watermark do - match !xs with - | [] -> continue := false - | x :: tl_xs -> - xs := tl_xs; - Queue.push x self.q - done; - - (* pushed at least one item *) - if Queue.length self.q <> old_size then Condition.broadcast self.cond; - - let n_discarded = List.length !xs in - n_discarded, old_size diff --git a/src/client/sync_queue.mli b/src/client/sync_queue.mli index b1ebb345..d64296d7 100644 --- a/src/client/sync_queue.mli +++ b/src/client/sync_queue.mli @@ -14,25 +14,11 @@ val pop : 'a t -> 'a (** [pop q] pops the next element in [q]. It might block until an element comes. @raise Closed if the queue was closed before a new element was available. *) -val try_pop : 'a t -> 'a option - val pop_all : 'a t -> 'a Queue.t -> unit (** [pop_all q into] pops all the elements of [q] and moves them into [into]. if no element is available, it will block until it successfully transfers at least one item to [into]. @raise Closed if the queue was closed before a new element was available. *) -val closed : _ t -> bool - val close : _ t -> unit (** Close the queue, meaning there won't be any more [push] allowed. *) - -val push_while_not_full : high_watermark:int -> 'a t -> 'a list -> int * int -(** [push_while_not_full q ~high_watermark xs] tries to push each item of [x] - into [q]. - - An item is not pushed if the queue is "full" (size >= high_watermark). - - This returns a pair [num_discarded, old_size] where [num_discarded] is the - number of items that could not be pushed, and [old_size] is the size before - anything was pushed. *) From 45c5860fe45c4efa9440f444ea6f32148be0be30 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 09:10:10 -0500 Subject: [PATCH 62/94] refactor consumer --- src/client/consumer.ml | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/client/consumer.ml b/src/client/consumer.ml index 3cf92658..89288080 100644 --- a/src/client/consumer.ml +++ b/src/client/consumer.ml @@ -1,20 +1,27 @@ (** Consumer that accepts items from a bounded queue *) +type 'a t = { + active: unit -> bool; (** Still running? Must be fast and thread-safe *) + tick: unit -> unit; + (** Regularly called, eg to emit metrics, check timeouts, etc. Must be + thread safe. *) + shutdown: on_done:(unit -> unit) -> unit; + (** Shutdown the consumer as soon as possible, call [on_done()] once it's + done. *) +} (** A consumer for signals of type ['a] *) -class type ['a] t = object - method register : 'a Bounded_queue.t -> unit - method active : unit -> bool +type 'a consumer = 'a t - method start : on_done:(unit -> unit) -> unit +let[@inline] active (self : _ t) = self.active () - method shutdown : on_done:(unit -> unit) -> unit +let[@inline] shutdown (self : _ t) ~on_done = self.shutdown ~on_done + +module Builder = struct + type 'a t = { start_consuming: 'a Bounded_queue.t -> 'a consumer } + (** A builder that will create a consumer for a given queue, start the + consumer so it starts consuming from the queue, and return the consumer. + *) + + let start_consuming (self : _ t) bq = self.start_consuming bq end - -let register (self : _ #t) q = self#register q - -let active (self : _ #t) = self#active () - -let start (self : _ #t) ~on_done = self#start ~on_done - -let shutdown (self : _ #t) ~on_done = self#shutdown ~on_done From 7eef6913bc03c75657589411d4f07cd012cb6167 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 09:25:03 -0500 Subject: [PATCH 63/94] feat client: add `Exporter_add_batching` --- src/client/client_config.mli | 4 ++-- src/client/exporter_add_batching.ml | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 src/client/exporter_add_batching.ml diff --git a/src/client/client_config.mli b/src/client/client_config.mli index b8d0238f..8af6a844 100644 --- a/src/client/client_config.mli +++ b/src/client/client_config.mli @@ -19,8 +19,8 @@ type t = private { (** Batch metrics? If [Some i], then this produces batches of (at most) [i] items. If [None], there is no batching. - Note that traces and metrics are batched separately. Default [None]. - *) + Note that traces and metrics are batched separately. Default + [Some 20]. *) batch_logs: int option; (** Batch logs? See {!batch_metrics} for details. Default [Some 400] *) batch_timeout_ms: int; diff --git a/src/client/exporter_add_batching.ml b/src/client/exporter_add_batching.ml new file mode 100644 index 00000000..0858d5b8 --- /dev/null +++ b/src/client/exporter_add_batching.ml @@ -0,0 +1,25 @@ +(** Add batching to emitter based on client config *) + +open Common_ + +open struct + let add_batch ~timeout batch (emitter : 'a OTEL.Emitter.t) : 'a OTEL.Emitter.t + = + let b = Batch.make ~batch ~timeout () in + Batch.wrap_emitter b emitter +end + +let add_batching ~(config : Client_config.t) (exp : OTEL.Exporter.t) : + OTEL.Exporter.t = + let timeout = Mtime.Span.(config.batch_timeout_ms * ms) in + let add_batch_opt (b : int option) e = + match b with + | None -> e + | Some b -> add_batch ~timeout b e + in + + let emit_spans = add_batch_opt config.batch_traces exp.emit_spans in + let emit_metrics = add_batch_opt config.batch_metrics exp.emit_metrics in + let emit_logs = add_batch_opt config.batch_logs exp.emit_logs in + + { exp with emit_spans; emit_metrics; emit_logs } From 3a0a17e2e9f16c5881cab56cd7ce71dbec860a0e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 15:02:49 -0500 Subject: [PATCH 64/94] client config: add http_concurrency_level option --- src/client/client_config.ml | 12 +++++++++--- src/client/client_config.mli | 4 ++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/client/client_config.ml b/src/client/client_config.ml index 6a832291..6dedbeeb 100644 --- a/src/client/client_config.ml +++ b/src/client/client_config.ml @@ -9,6 +9,7 @@ type t = { batch_logs: int option; batch_timeout_ms: int; self_trace: bool; + http_concurrency_level: int option; } let pp out (self : t) : unit = @@ -26,15 +27,17 @@ let pp out (self : t) : unit = batch_metrics; batch_logs; batch_timeout_ms; + http_concurrency_level; } = self in Format.fprintf out "{@[ debug=%B;@ self_trace=%B; url_traces=%S;@ url_metrics=%S;@ \ url_logs=%S;@ headers=%a;@ batch_traces=%a;@ batch_metrics=%a;@ \ - batch_logs=%a;@ batch_timeout_ms=%d @]}" + batch_logs=%a;@ batch_timeout_ms=%d;@ http_concurrency_level=%a @]}" debug self_trace url_traces url_metrics url_logs ppheaders headers ppiopt - batch_traces ppiopt batch_metrics ppiopt batch_logs batch_timeout_ms + batch_traces ppiopt batch_metrics ppiopt batch_logs batch_timeout_ms ppiopt + http_concurrency_level let default_url = "http://localhost:4318" @@ -50,6 +53,7 @@ type 'k make = ?headers:(string * string) list -> ?batch_timeout_ms:int -> ?self_trace:bool -> + ?http_concurrency_level:int -> 'k module type ENV = sig @@ -123,7 +127,8 @@ module Env () : ENV = struct let make k ?(debug = get_debug ()) ?url ?url_traces ?url_metrics ?url_logs ?(batch_traces = Some 400) ?(batch_metrics = Some 20) ?(batch_logs = Some 400) ?(headers = get_headers ()) - ?(batch_timeout_ms = 2_000) ?(self_trace = false) = + ?(batch_timeout_ms = 2_000) ?(self_trace = false) ?http_concurrency_level + = (* Ensure the state is synced, in case these values are passed in explicitly *) set_debug debug; set_headers headers; @@ -165,5 +170,6 @@ module Env () : ENV = struct batch_logs; batch_timeout_ms; self_trace; + http_concurrency_level; } end diff --git a/src/client/client_config.mli b/src/client/client_config.mli index 8af6a844..6aedb128 100644 --- a/src/client/client_config.mli +++ b/src/client/client_config.mli @@ -32,6 +32,9 @@ type t = private { (** If true, the OTEL library will also emit its own spans. Default [false]. @since 0.7 *) + http_concurrency_level: int option; + (** How many HTTP requests can be done simultaneously (at most)? + @since NEXT_RELEASE *) } (** Configuration. @@ -55,6 +58,7 @@ type 'k make = ?headers:(string * string) list -> ?batch_timeout_ms:int -> ?self_trace:bool -> + ?http_concurrency_level:int -> 'k (** A function that gathers all the values needed to construct a {!t}, and produces a ['k]. ['k] is typically a continuation used to construct a From 7d0cfb75004ea1fb0613f6e3f0bbaa129019e0b0 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 15:50:09 -0500 Subject: [PATCH 65/94] client: lwt helpers, error helpers, thread utils, lwt notifier --- src/client/bounded_queue.ml | 5 +++ src/client/consumer.ml | 3 ++ src/client/export_error.ml | 50 +++++++++++++++++++++++++++ src/client/exporter_queued.ml | 4 ++- src/client/lwt/common_.ml | 1 + src/client/lwt/dune | 21 ++++++++++++ src/client/lwt/notifier.ml | 31 +++++++++++++++++ src/client/lwt/util_ticker.ml | 18 ++++++++++ src/client/util_thread.ml | 64 +++++++++++++++++++++++++++++++++++ 9 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 src/client/export_error.ml create mode 100644 src/client/lwt/common_.ml create mode 100644 src/client/lwt/dune create mode 100644 src/client/lwt/notifier.ml create mode 100644 src/client/lwt/util_ticker.ml create mode 100644 src/client/util_thread.ml diff --git a/src/client/bounded_queue.ml b/src/client/bounded_queue.ml index 174c624e..63d79802 100644 --- a/src/client/bounded_queue.ml +++ b/src/client/bounded_queue.ml @@ -62,3 +62,8 @@ let to_emitter (self : 'a t) : 'a Opentelemetry_emitter.Emitter.t = fundamentally asynchronous because it's done by consumers *) let flush_and_close () = close self in { closed; enabled; emit; tick; flush_and_close } + +module Defaults = struct + (** The default high watermark *) + let high_watermark : int = 2048 +end diff --git a/src/client/consumer.ml b/src/client/consumer.ml index 89288080..42bcf35d 100644 --- a/src/client/consumer.ml +++ b/src/client/consumer.ml @@ -25,3 +25,6 @@ module Builder = struct let start_consuming (self : _ t) bq = self.start_consuming bq end + +type any_resource_builder = Any_resource.t Builder.t +(** The type that's useful for OTEL backends *) diff --git a/src/client/export_error.ml b/src/client/export_error.ml new file mode 100644 index 00000000..b78447cb --- /dev/null +++ b/src/client/export_error.ml @@ -0,0 +1,50 @@ +open Common_ + +type t = + [ `Status of int * Opentelemetry.Proto.Status.status + | `Failure of string + | `Sysbreak + ] + +let str_to_hex (s : string) : string = + Opentelemetry_util.Util_bytes_.bytes_to_hex (Bytes.unsafe_of_string s) + +(** Report the error on stderr. *) +let report_err : t -> unit = function + | `Sysbreak -> Printf.eprintf "opentelemetry: ctrl-c captured, stopping\n%!" + | `Failure msg -> + Format.eprintf "@[<2>opentelemetry: export failed: %s@]@." msg + | `Status + ( code, + { + Opentelemetry.Proto.Status.code = scode; + message; + details; + _presence = _; + } ) -> + let pp_details out l = + List.iter + (fun s -> Format.fprintf out "%S;@ " (Bytes.unsafe_to_string s)) + l + in + Format.eprintf + "@[<2>opentelemetry: export failed with@ http code=%d@ status \ + {@[code=%ld;@ message=%S;@ details=[@[%a@]]@]}@]@." + code scode + (Bytes.unsafe_to_string message) + pp_details details + +let decode_invalid_http_response ~code ~url (body : string) : t = + try + let dec = Pbrt.Decoder.of_string body in + let status = Opentelemetry.Proto.Status.decode_pb_status dec in + `Status (code, status) + with e -> + let bt = Printexc.get_backtrace () in + `Failure + (Printf.sprintf + "httpc: decoding of status (url=%S, code=%d) failed with:\n\ + %s\n\ + HTTP body: %s\n\ + %s" + url code (Printexc.to_string e) (str_to_hex body) bt) diff --git a/src/client/exporter_queued.ml b/src/client/exporter_queued.ml index 6ee9483e..55304959 100644 --- a/src/client/exporter_queued.ml +++ b/src/client/exporter_queued.ml @@ -30,7 +30,7 @@ end somewhere else, store them, etc. @param resource_attributes attributes added to every "resource" batch *) let create ?(resource_attributes = []) ~(q : Any_resource.t Bounded_queue.t) - ~(consumer : Any_resource.t Consumer.t) () : OTEL.Exporter.t = + ~(consumer : Consumer.any_resource_builder) () : OTEL.Exporter.t = let emit_spans = BQ_emitters.spans_emitter_of_bq ~attrs:resource_attributes q in @@ -45,6 +45,8 @@ let create ?(resource_attributes = []) ~(q : Any_resource.t Bounded_queue.t) let closed = Atomic.make false in + let consumer = consumer.start_consuming q in + let cleanup ~on_done () = if not (Atomic.exchange closed true) then ( Bounded_queue.close q; diff --git a/src/client/lwt/common_.ml b/src/client/lwt/common_.ml new file mode 100644 index 00000000..6a337b5c --- /dev/null +++ b/src/client/lwt/common_.ml @@ -0,0 +1 @@ +module OTEL = Opentelemetry diff --git a/src/client/lwt/dune b/src/client/lwt/dune new file mode 100644 index 00000000..2b7d082b --- /dev/null +++ b/src/client/lwt/dune @@ -0,0 +1,21 @@ +(library + (name opentelemetry_client_lwt) + (public_name opentelemetry-client.lwt) + (flags + :standard + -open + Opentelemetry_util + -open + Opentelemetry_client + -open + Opentelemetry_atomic) + (optional) ; lwt + (libraries + opentelemetry.core + opentelemetry.util + opentelemetry.atomic + opentelemetry.emitter + opentelemetry-client + lwt + lwt.unix) + (synopsis "Lwt-specific helpers for opentelemetry-client")) diff --git a/src/client/lwt/notifier.ml b/src/client/lwt/notifier.ml new file mode 100644 index 00000000..4b924183 --- /dev/null +++ b/src/client/lwt/notifier.ml @@ -0,0 +1,31 @@ +(** Notification that can be used on the consumer side of a bounded queue *) + +type t = { + notified: bool Atomic.t; + cond: unit Lwt_condition.t; + notification: int; + deleted: bool Atomic.t; +} + +let create () : t = + let notified = Atomic.make false in + let cond = Lwt_condition.create () in + let notification = + Lwt_unix.make_notification (fun () -> + Atomic.set notified false; + Lwt_condition.broadcast cond ()) + in + { notified; notification; cond; deleted = Atomic.make false } + +let delete self : unit = + if not (Atomic.exchange self.deleted true) then + Lwt_unix.stop_notification self.notification + +let trigger (self : t) : unit = + if not (Atomic.exchange self.notified true) then + Lwt_unix.send_notification self.notification + +let wait (self : t) : unit Lwt.t = Lwt_condition.wait self.cond + +let register_bounded_queue (self : t) (q : _ Bounded_queue.t) : unit = + Bounded_queue.on_non_empty q (fun () -> trigger self) diff --git a/src/client/lwt/util_ticker.ml b/src/client/lwt/util_ticker.ml new file mode 100644 index 00000000..64c26da8 --- /dev/null +++ b/src/client/lwt/util_ticker.ml @@ -0,0 +1,18 @@ +open Common_ +open Lwt.Syntax + +(** Lwt task that calls [Exporter.tick] regularly, to help enforce timeouts. + @param frequency_s how often in seconds does the tick tock? *) +let start_ticker_thread ?(finally = ignore) ~(stop : bool Atomic.t) + ~(frequency_s : float) (exp : OTEL.Exporter.t) : unit = + let frequency_s = max frequency_s 0.5 in + let rec tick_loop () = + if Atomic.get stop then ( + finally (); + Lwt.return () + ) else + let* () = Lwt_unix.sleep frequency_s in + OTEL.Exporter.tick exp; + tick_loop () + in + Lwt.async tick_loop diff --git a/src/client/util_thread.ml b/src/client/util_thread.ml new file mode 100644 index 00000000..37764ac3 --- /dev/null +++ b/src/client/util_thread.ml @@ -0,0 +1,64 @@ +open Common_ + +(** start a thread in the background, running [f()], blocking signals *) +let start_bg_thread (f : unit -> unit) : Thread.t = + let unix_run () = + let signals = + [ + Sys.sigusr1; + Sys.sigusr2; + Sys.sigterm; + Sys.sigpipe; + Sys.sigalrm; + Sys.sigstop; + ] + in + ignore (Thread.sigmask Unix.SIG_BLOCK signals : _ list); + f () + in + (* no signals on Windows *) + let run () = + if Sys.win32 then + f () + else + unix_run () + in + Thread.create run () + +(** thread that calls [tick()] regularly, to help enforce timeouts *) +let setup_ticker_thread ~stop ~sleep_ms (exp : OTEL.Exporter.t) () = + let sleep_s = float sleep_ms /. 1000. in + let tick_loop () = + try + while not @@ Atomic.get stop do + Thread.delay sleep_s; + OTEL.Exporter.tick exp + done + with + | Sync_queue.Closed -> () + | exn -> + (* print and ignore *) + Printf.eprintf "otel-ocurl: ticker thread: uncaught exn:\n%s\n%!" + (Printexc.to_string exn) + in + start_bg_thread tick_loop + +module MCond = struct + type t = { + mutex: Mutex.t; + cond: Condition.t; + } + + let create () : t = { mutex = Mutex.create (); cond = Condition.create () } + + let signal self = Condition.signal self.cond + + let[@inline] protect self f = Util_mutex.protect self.mutex f + + (** NOTE: the mutex must be acquired *) + let wait self = Condition.wait self.cond self.mutex + + (** Ensure we get signalled when the queue goes from empty to non-empty *) + let wakeup_from_bq (self : t) (bq : _ Bounded_queue.t) : unit = + Bounded_queue.on_non_empty bq (fun () -> signal self) +end From a95037d7e22a3ccb5b981fca88fcbb96b50ad19b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 15:50:27 -0500 Subject: [PATCH 66/94] feat client-ocurl: port to just being a consumer the rest is reusable components from opentelemetry-client --- .../opentelemetry_client_ocurl.ml | 316 +++++------------- .../opentelemetry_client_ocurl.mli | 12 +- 2 files changed, 100 insertions(+), 228 deletions(-) diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index 2efc50fe..bee44a6f 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -14,89 +14,25 @@ let set_headers = Config.Env.set_headers let n_errors = Atomic.make 0 -let n_dropped = Atomic.make 0 - -(** Something to be sent via HTTP *) -module To_send = struct - open Opentelemetry.Proto - - type t = - | Send_metric of Metrics.resource_metrics list - | Send_trace of Trace.resource_spans list - | Send_logs of Logs.resource_logs list -end - -(** start a thread in the background, running [f()] *) -let start_bg_thread (f : unit -> unit) : Thread.t = - let unix_run () = - let signals = - [ - Sys.sigusr1; - Sys.sigusr2; - Sys.sigterm; - Sys.sigpipe; - Sys.sigalrm; - Sys.sigstop; - ] - in - ignore (Thread.sigmask Unix.SIG_BLOCK signals : _ list); - f () - in - (* no signals on Windows *) - let run () = - if Sys.win32 then - f () - else - unix_run () - in - Thread.create run () - -let str_to_hex (s : string) : string = - Opentelemetry_util.Util_bytes_.bytes_to_hex (Bytes.unsafe_of_string s) - -module Exporter_impl : sig - val n_bytes_sent : int Atomic.t - - class type t = object - inherit OTEL.Exporter.t - - method shutdown : on_done:(unit -> unit) -> unit -> unit - end - - val create : stop:bool Atomic.t -> config:Config.t -> unit -> t - - val shutdown : t -> on_done:(unit -> unit) -> unit -end = struct - open Opentelemetry.Proto - - let n_bytes_sent : int Atomic.t = Atomic.make 0 - - class type t = object - inherit OTEL.Exporter.t - - method shutdown : on_done:(unit -> unit) -> unit -> unit - end +let n_bytes_sent : int Atomic.t = Atomic.make 0 +module Consumer_impl = struct type state = { + bq: Any_resource.t Bounded_queue.t; (** Queue of incoming workload *) stop: bool Atomic.t; - cleaned: bool Atomic.t; (** True when we cleaned up after closing *) config: Config.t; - encoder_pool: Pbrt.Encoder.t Rpool.t; - send_q: To_send.t Sync_queue.t; (** Queue for the send worker threads *) - traces: Proto.Trace.span Batch.t; - logs: Proto.Logs.log_record Batch.t; - metrics: Proto.Metrics.metric Batch.t; - mutable send_threads: Thread.t array; (** Threads that send data via http *) + mutable send_threads: Thread.t array; + (** Threads that send data via http *) + cleaned: bool Atomic.t; (** True when we cleaned up after closing *) + mcond: Util_thread.MCond.t; (** how to wait for the queue *) } - let send_batch_ (self : state) ~force ~mk_to_send (b : _ Batch.t) : unit = - match Batch.pop_if_ready ~force ~now:(Mtime_clock.now ()) b with - | None -> () - | Some l -> - let to_send = mk_to_send l in - Sync_queue.push self.send_q to_send + let shutdown self : unit = + Atomic.set self.stop true; + (* wakeup sleepers *) + Util_thread.MCond.signal self.mcond - let send_http_ ~stop ~(config : Config.t) (client : Curl.t) ~url data : unit = + let send_http_ (self : state) (client : Curl.t) ~url (data : string) : unit = let@ _sc = Self_trace.with_ ~kind:Span_kind_producer "otel-ocurl.send-http" in @@ -105,12 +41,12 @@ end = struct Printf.eprintf "opentelemetry: send http POST to %s (%dB)\n%!" url (String.length data); let headers = - ("Content-Type", "application/x-protobuf") :: config.common.headers + ("Content-Type", "application/x-protobuf") :: self.config.common.headers in match let@ _sc = Self_trace.with_ ~kind:Span_kind_internal "curl.post" - ~attrs:[ "sz", `Int (String.length data); "url", `String url ] + ~attrs:[ "size", `Int (String.length data); "url", `String url ] in Ezcurl.post ~headers ~client ~params:[] ~url ~content:(`String data) () with @@ -123,22 +59,13 @@ end = struct @@ Opentelemetry.Event.make "error" ~attrs:[ "code", `Int code ]; if Config.Env.get_debug () then ( - let dec = Pbrt.Decoder.of_string body in - let body = - try - let status = Status.decode_pb_status dec in - Format.asprintf "%a" Status.pp_status status - with _ -> - spf "(could not decode status)\nraw bytes: %s" (str_to_hex body) - in - Printf.eprintf - "opentelemetry: error while sending data to %s:\n code=%d\n %s\n%!" - url code body - ); - () + let err = Export_error.decode_invalid_http_response ~url ~code body in + Export_error.report_err err; + () + ) | exception Sys.Break -> Printf.eprintf "ctrl-c captured, stopping\n%!"; - Atomic.set stop true + shutdown self | Error (code, msg) -> (* TODO: log error _via_ otel? *) Atomic.incr n_errors; @@ -150,182 +77,117 @@ end = struct (* avoid crazy error loop *) Thread.delay 3. - (** Thread that, in a loop, reads from [q] to get the next message to send via - http *) + (** The main loop of a thread that, reads from [bq] to get the next message to + send via http *) let bg_thread_loop (self : state) : unit = Ezcurl.with_client ?set_opts:None @@ fun client -> - let config = self.config in - let stop = self.stop in - let send ~name ~url ~conv (signals : _ list) = + (* we need exactly one encoder per thread *) + let encoder = Pbrt.Encoder.create ~size:2048 () in + + let send ~name ~url ~conv (signals : _ list) : unit = let@ _sp = Self_trace.with_ ~kind:Span_kind_producer name ~attrs:[ "n", `Int (List.length signals) ] in - let msg : string = - (* borrow encoder from buffer pool and turn [signals] into bytes *) - let@ encoder = Rpool.with_resource self.encoder_pool in - conv ?encoder:(Some encoder) signals - in + + let msg : string = conv ?encoder:(Some encoder) signals in + Pbrt.Encoder.reset encoder; + ignore (Atomic.fetch_and_add n_bytes_sent (String.length msg) : int); - send_http_ ~stop ~config ~url client msg + send_http_ self client msg ~url; + () in - try - while not (Atomic.get stop) do - let msg = Sync_queue.pop self.send_q in - match msg with - | To_send.Send_trace tr -> - send ~name:"send-traces" ~conv:Signal.Encode.traces - ~url:config.common.url_traces tr - | To_send.Send_metric ms -> - send ~name:"send-metrics" ~conv:Signal.Encode.metrics - ~url:config.common.url_metrics ms - | To_send.Send_logs logs -> - send ~name:"send-logs" ~conv:Signal.Encode.logs - ~url:config.common.url_logs logs - done - with Sync_queue.Closed -> () + while not (Atomic.get self.stop) do + match Bounded_queue.try_pop self.bq with + | `Closed -> shutdown self + | `Empty -> Util_thread.MCond.wait self.mcond + | `Item (Any_resource.R_spans tr) -> + send ~name:"send-traces" ~conv:Signal.Encode.traces + ~url:self.config.common.url_traces tr + | `Item (Any_resource.R_metrics ms) -> + send ~name:"send-metrics" ~conv:Signal.Encode.metrics + ~url:self.config.common.url_metrics ms + | `Item (Any_resource.R_logs logs) -> + send ~name:"send-logs" ~conv:Signal.Encode.logs + ~url:self.config.common.url_logs logs + done - let batch_max_size_ = 200 - - let batch_timeout_ = Mtime.Span.(20 * s) - - let create_state ~stop ~config () : state = - let n_send_threads = max 2 config.Config.bg_threads in - let encoder_pool = - Rpool.create - ~mk_item:(fun () -> Pbrt.Encoder.create ~size:1024 ()) - ~max_size:32 ~clear:Pbrt.Encoder.reset () + let to_consumer (self : state) : _ Consumer.t = + let active () = not (Atomic.get self.stop) in + let tick () = + (* make sure to poll from time to time *) + Util_thread.MCond.signal self.mcond in + let shutdown ~on_done = + shutdown self; + on_done () + in + { tick; active; shutdown } + + let create_state ~stop ~(config : Config.t) ~q () : state = + let n_send_threads = min 100 @@ max 2 config.bg_threads in let self = { stop; config; send_threads = [||]; - send_q = Sync_queue.create (); - encoder_pool; + bq = q; cleaned = Atomic.make false; - traces = Batch.make ~batch:batch_max_size_ ~timeout:batch_timeout_ (); - logs = Batch.make ~batch:batch_max_size_ ~timeout:batch_timeout_ (); - metrics = Batch.make ~batch:batch_max_size_ ~timeout:batch_timeout_ (); + mcond = Util_thread.MCond.create (); } in + Util_thread.MCond.wakeup_from_bq self.mcond q; + self.send_threads <- Array.init n_send_threads (fun _i -> - start_bg_thread (fun () -> bg_thread_loop self)); + Util_thread.start_bg_thread (fun () -> bg_thread_loop self)); self - let maybe_send_metrics ~force (self : state) = - send_batch_ self ~force self.metrics ~mk_to_send:(fun metrics -> - let metrics = - Opentelemetry_client.Util_resources.make_resource_metrics metrics - in - To_send.Send_metric [ metrics ]) - - let maybe_send_logs ~force (self : state) = - send_batch_ self ~force self.logs ~mk_to_send:(fun logs -> - let logs = - Opentelemetry_client.Util_resources.make_resource_logs logs - in - To_send.Send_logs [ logs ]) - - let maybe_send_traces ~force (self : state) = - send_batch_ self ~force self.traces ~mk_to_send:(fun spans -> - let traces = - Opentelemetry_client.Util_resources.make_resource_spans spans - in - To_send.Send_trace [ traces ]) - - let[@inline] push_to_batch b e = - if e <> [] then ( - match Batch.push b e with - | `Ok -> () - | `Dropped -> Atomic.incr n_dropped - ) - - let create ~stop ~config () : #t = - let open Opentelemetry_util in - let st = create_state ~stop ~config () in - let ticker = Cb_set.create () in - object (self : #t) - method send_trace spans = - push_to_batch st.traces spans; - maybe_send_traces st ~force:false - - method send_metrics m = - push_to_batch st.metrics m; - maybe_send_metrics st ~force:false - - method send_logs m = - push_to_batch st.logs m; - maybe_send_logs st ~force:false - - method add_on_tick_callback cb = Cb_set.register ticker cb - - method tick () = Cb_set.trigger ticker - - method cleanup ~on_done () : unit = - if not (Atomic.exchange st.cleaned true) then ( - (* flush all signals *) - maybe_send_logs ~force:true st; - maybe_send_metrics ~force:true st; - maybe_send_traces ~force:true st; - - (* close send queues, then wait for all threads *) - Sync_queue.close st.send_q; - Array.iter Thread.join st.send_threads - ); - on_done () - - method shutdown ~on_done () = - Atomic.set st.stop true; - self#cleanup ~on_done () - end - - let shutdown (self : #t) ~on_done : unit = self#shutdown ~on_done () + let create ~stop ~config () : Consumer.any_resource_builder = + { + start_consuming = + (fun q -> + let st = create_state ~stop ~config ~q () in + to_consumer st); + } end -let create_exporter ?(stop = Atomic.make false) - ?(config : Config.t = Config.make ()) () : #OTEL.Exporter.t = - let backend = Exporter_impl.create ~stop ~config () in - (backend :> OTEL.Exporter.t) +let consumer ?(stop = Atomic.make false) ?(config = Config.make ()) () : + Opentelemetry_client.Consumer.any_resource_builder = + Consumer_impl.create ~stop ~config () -(** thread that calls [tick()] regularly, to help enforce timeouts *) -let setup_ticker_thread ~stop ~sleep_ms (exp : #OTEL.Exporter.t) () = - let sleep_s = float sleep_ms /. 1000. in - let tick_loop () = - try - while not @@ Atomic.get stop do - Thread.delay sleep_s; - exp#tick () - done - with - | Sync_queue.Closed -> () - | exn -> - (* print and ignore *) - Printf.eprintf "otel-ocurl: ticker thread: uncaught exn:\n%s\n%!" - (Printexc.to_string exn) +let create_exporter ?stop ?(config = Config.make ()) () : OTEL.Exporter.t = + let consumer = consumer ?stop ~config () in + let bq = + Bounded_queue_sync.create + ~high_watermark:Bounded_queue.Defaults.high_watermark () in - start_bg_thread tick_loop + + Exporter_queued.create ~q:bq ~consumer () + |> Exporter_add_batching.add_batching ~config:config.common + +let create_backend = create_exporter let setup_ ?(stop = Atomic.make false) ?(config : Config.t = Config.make ()) () : unit = - let exporter = Exporter_impl.create ~stop ~config () in - OTEL.Exporter.Main_exporter.set exporter; + let exporter = create_exporter ~stop ~config () in + OTEL.Main_exporter.set exporter; Self_trace.set_enabled config.common.self_trace; if config.ticker_thread then ( (* at most a minute *) let sleep_ms = min 60_000 (max 2 config.ticker_interval_ms) in - ignore (setup_ticker_thread ~stop ~sleep_ms exporter () : Thread.t) + ignore + (Util_thread.setup_ticker_thread ~stop ~sleep_ms exporter () : Thread.t) ) let remove_backend () : unit = (* we don't need the callback, this runs in the same thread *) - OTEL.Exporter.Main_exporter.remove () ~on_done:ignore + OTEL.Main_exporter.remove () ~on_done:ignore let setup ?stop ?config ?(enable = true) () = if enable then setup_ ?stop ?config () @@ -337,4 +199,4 @@ let with_setup ?stop ?config ?(enable = true) () f = ) else f () -let[@inline] n_bytes_sent () = Atomic.get Exporter_impl.n_bytes_sent +let[@inline] n_bytes_sent () = Atomic.get n_bytes_sent diff --git a/src/client-ocurl/opentelemetry_client_ocurl.mli b/src/client-ocurl/opentelemetry_client_ocurl.mli index 77b8ea34..cece89df 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.mli +++ b/src/client-ocurl/opentelemetry_client_ocurl.mli @@ -4,7 +4,6 @@ *) open Opentelemetry_atomic -open Opentelemetry_util val get_headers : unit -> (string * string) list @@ -16,9 +15,20 @@ module Config = Config val n_bytes_sent : unit -> int (** Global counter of bytes sent (or attempted to be sent) *) +val consumer : + ?stop:bool Atomic.t -> + ?config:Config.t -> + unit -> + Opentelemetry_client.Consumer.any_resource_builder +(** Consumer that pulls from a queue *) + val create_exporter : ?stop:bool Atomic.t -> ?config:Config.t -> unit -> Opentelemetry.Exporter.t +val create_backend : + ?stop:bool Atomic.t -> ?config:Config.t -> unit -> Opentelemetry.Exporter.t +[@@deprecated "use create_exporter"] + val setup : ?stop:bool Atomic.t -> ?config:Config.t -> ?enable:bool -> unit -> unit (** Setup endpoint. This modifies {!Opentelemetry.Collector.backend}. From 2a850b0329024267972162402c86fafae39eea0d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 15:50:56 -0500 Subject: [PATCH 67/94] port opentelemetry_client_ocurl_lwt to the consumer framework --- src/client-ocurl-lwt/dune | 1 + .../opentelemetry_client_ocurl_lwt.ml | 324 ++++++------------ .../opentelemetry_client_ocurl_lwt.mli | 13 +- 3 files changed, 118 insertions(+), 220 deletions(-) diff --git a/src/client-ocurl-lwt/dune b/src/client-ocurl-lwt/dune index 64fb6217..41dd39a9 100644 --- a/src/client-ocurl-lwt/dune +++ b/src/client-ocurl-lwt/dune @@ -8,6 +8,7 @@ opentelemetry opentelemetry.atomic opentelemetry-client + opentelemetry-client.lwt pbrt mtime mtime.clock.os diff --git a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml index 0041e2af..b31160ed 100644 --- a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml +++ b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml @@ -3,9 +3,8 @@ https://github.com/open-telemetry/oteps/blob/main/text/0099-otlp-http.md *) -module OT = Opentelemetry +module Config = Config open Opentelemetry -open Opentelemetry_util open Opentelemetry_client open Common_ @@ -17,40 +16,14 @@ external reraise : exn -> 'a = "%reraise" (** This is equivalent to [Lwt.reraise]. We inline it here so we don't force to use Lwt's latest version *) -type error = - [ `Status of int * Opentelemetry.Proto.Status.status - | `Failure of string - | `Sysbreak - ] +type error = Export_error.t +(* TODO: emit this in a metric in [tick()] if self tracing is enabled? *) let n_errors = Atomic.make 0 -let n_dropped = Atomic.make 0 - -let report_err_ = function - | `Sysbreak -> Printf.eprintf "opentelemetry: ctrl-c captured, stopping\n%!" - | `Failure msg -> - Format.eprintf "@[<2>opentelemetry: export failed: %s@]@." msg - | `Status - ( code, - { - Opentelemetry.Proto.Status.code = scode; - message; - details; - _presence = _; - } ) -> - let pp_details out l = - List.iter - (fun s -> Format.fprintf out "%S;@ " (Bytes.unsafe_to_string s)) - l - in - Format.eprintf - "@[<2>opentelemetry: export failed with@ http code=%d@ status \ - {@[code=%ld;@ message=%S;@ details=[@[%a@]]@]}@]@." - code scode - (Bytes.unsafe_to_string message) - pp_details details +let report_err_ = Export_error.report_err +(** HTTP client *) module Httpc : sig type t @@ -68,24 +41,22 @@ end = struct open Opentelemetry.Proto open Lwt.Syntax - type t = unit + type t = Curl.t - let create () : t = () + let create () : t = Ezcurl_core.make () - let cleanup _self = () - - (* FIXME: absolutely need some rate limiting somewhere, ideally as early - as possible so we can measure how many resources we drop *) + let cleanup self = Ezcurl_core.delete self (* send the content to the remote endpoint/path *) - let send (_self : t) ~url ~decode (bod : string) : ('a, error) result Lwt.t = + let send (self : t) ~url ~decode (bod : string) : ('a, error) result Lwt.t = let* r = let headers = ("Content-Type", "application/x-protobuf") :: ("Accept", "application/x-protobuf") :: Config.Env.get_headers () in - Ezcurl_lwt.post ~headers ~params:[] ~url ~content:(`String bod) () + Ezcurl_lwt.post ~client:self ~headers ~params:[] ~url + ~content:(`String bod) () in match r with | Error (code, msg) -> @@ -115,53 +86,36 @@ end = struct in Lwt.return r) | Ok { code; body; _ } -> - let dec = Pbrt.Decoder.of_string body in - - let r = - try - let status = Status.decode_pb_status dec in - Error (`Status (code, status)) - with e -> - let bt = Printexc.get_backtrace () in - Error - (`Failure - (spf - "httpc: decoding of status (url=%S, code=%d) failed with:\n\ - %s\n\ - status: %S\n\ - %s" - url code (Printexc.to_string e) body bt)) - in - Lwt.return r + let err = Export_error.decode_invalid_http_response ~url ~code body in + Lwt.return (Error err) end -module Exporter_impl = struct +module Consumer_impl = struct + module CNotifier = Opentelemetry_client_lwt.Notifier open Lwt.Syntax - let[@inline] push_to_batch b e = - if e <> [] then ( - match Batch.push b e with - | `Ok -> () - | `Dropped -> Atomic.incr n_dropped - ) - type state = { stop: bool Atomic.t; cleaned: bool Atomic.t; (** True when we cleaned up after closing *) config: Config.t; - encoder_pool: Pbrt.Encoder.t Rpool.t; - traces: Proto.Trace.span Batch.t; - logs: Proto.Logs.log_record Batch.t; - metrics: Proto.Metrics.metric Batch.t; + q: Any_resource.t Bounded_queue.t; + notify: CNotifier.t; } - let send_http_ (st : state) (httpc : Httpc.t) ~url data : unit Lwt.t = + let shutdown self = + if not (Atomic.exchange self.stop true) then ( + CNotifier.trigger self.notify; + CNotifier.delete self.notify + ) + + let send_http_ (self : state) (httpc : Httpc.t) ~url (data : string) : + unit Lwt.t = let* r = Httpc.send httpc ~url ~decode:(`Ret ()) data in match r with | Ok () -> Lwt.return () | Error `Sysbreak -> Printf.eprintf "ctrl-c captured, stopping\n%!"; - Atomic.set st.stop true; + Atomic.set self.stop true; Lwt.return () | Error err -> (* TODO: log error _via_ otel? *) @@ -170,179 +124,113 @@ module Exporter_impl = struct (* avoid crazy error loop *) Lwt_unix.sleep 3. - let send_metrics_http (st : state) client + let send_metrics_http (st : state) client ~encoder (l : Proto.Metrics.resource_metrics list) = - let msg = - let@ encoder = Rpool.with_resource st.encoder_pool in - Signal.Encode.metrics ~encoder l - in + let msg = Signal.Encode.metrics ~encoder l in + Pbrt.Encoder.reset encoder; send_http_ st client msg ~url:st.config.url_metrics - let send_traces_http st client (l : Proto.Trace.resource_spans list) = - let msg = - let@ encoder = Rpool.with_resource st.encoder_pool in - Signal.Encode.traces ~encoder l - in + let send_traces_http st client ~encoder (l : Proto.Trace.resource_spans list) + = + let msg = Signal.Encode.traces ~encoder l in + Pbrt.Encoder.reset encoder; send_http_ st client msg ~url:st.config.url_traces - let send_logs_http st client (l : Proto.Logs.resource_logs list) = - let msg = - let@ encoder = Rpool.with_resource st.encoder_pool in - Signal.Encode.logs ~encoder l - in + let send_logs_http st client ~encoder (l : Proto.Logs.resource_logs list) = + let msg = Signal.Encode.logs ~encoder l in + Pbrt.Encoder.reset encoder; send_http_ st client msg ~url:st.config.url_logs - (* emit metrics, if the batch is full or timeout lapsed *) - let emit_metrics_maybe (st : state) ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now st.metrics with - | None -> Lwt.return false - | Some l -> - let res = Util_resources.make_resource_metrics l in - let+ () = send_metrics_http st httpc [ res ] in - true + let tick (self : state) = CNotifier.trigger self.notify - let emit_traces_maybe st ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now st.traces with - | None -> Lwt.return false - | Some l -> - let res = Util_resources.make_resource_spans l in - let+ () = send_traces_http st httpc [ res ] in - true + let start_worker (self : state) : unit = + let client = Httpc.create () in + let encoder = Pbrt.Encoder.create () in - let emit_logs_maybe st ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now st.logs with - | None -> Lwt.return false - | Some l -> - let res = Util_resources.make_resource_logs l in - let+ () = send_logs_http st httpc [ res ] in - true - - let emit_all_force st (httpc : Httpc.t) : unit Lwt.t = - let now = Mtime_clock.now () in - let+ (_ : bool) = emit_traces_maybe st ~now ~force:true httpc - and+ (_ : bool) = emit_logs_maybe st ~now ~force:true httpc - and+ (_ : bool) = emit_metrics_maybe st ~now ~force:true httpc in - () - - let[@inline] guard_exn_ where f = - try f () - with e -> - let bt = Printexc.get_backtrace () in - Printf.eprintf - "opentelemetry-ocurl-lwt: uncaught exception in %s: %s\n%s\n%!" where - (Printexc.to_string e) bt - - (* Lwt task that calls [tick()] regularly, to help enforce timeouts *) - let setup_ticker_ st ~tick ~finally () = - let rec tick_loop () = - if Atomic.get st.stop then ( - finally (); + (* loop on [q] *) + let rec loop () : unit Lwt.t = + if Atomic.get self.stop then Lwt.return () - ) else - let* () = Lwt_unix.sleep 0.5 in - let* () = tick () in - tick_loop () - in - Lwt.async tick_loop - - (* make an emitter. - - exceptions inside should be caught, see - https://opentelemetry.io/docs/reference/specification/error-handling/ *) - let create ~stop ~(config : Config.t) () : OT.Exporter.t = - let open Proto in - let encoder_pool = - Rpool.create - ~mk_item:(fun () -> Pbrt.Encoder.create ~size:1024 ()) - ~max_size:32 ~clear:Pbrt.Encoder.reset () - in - - (* local helpers *) - let timeout = - if config.batch_timeout_ms > 0 then - Some Mtime.Span.(config.batch_timeout_ms * ms) else - None + let* () = + match Bounded_queue.try_pop self.q with + | `Closed -> + shutdown self; + Lwt.return () + | `Empty -> CNotifier.wait self.notify + | `Item (R_logs logs) -> send_logs_http self client ~encoder logs + | `Item (R_metrics ms) -> send_metrics_http self client ~encoder ms + | `Item (R_spans spans) -> send_traces_http self client ~encoder spans + in + loop () in - let st = + Lwt.async (fun () -> + Lwt.finalize loop (fun () -> + Httpc.cleanup client; + Lwt.return ())) + + let default_n_workers = 50 + + let create_state ~stop ~config ~q () : state = + let self = { stop; config; + q; cleaned = Atomic.make false; - encoder_pool; - traces = Batch.make ?batch:config.batch_traces ?timeout (); - metrics = Batch.make ?batch:config.batch_metrics ?timeout (); - logs = Batch.make ?batch:config.batch_logs ?timeout (); + notify = CNotifier.create (); } in - let httpc = Httpc.create () in - let ticker = Cb_set.create () in - let tick_ () = - if Config.Env.get_debug () then - Printf.eprintf "tick (from %d)\n%!" (Thread.id @@ Thread.self ()); - Cb_set.trigger ticker; - let now = Mtime_clock.now () in - let+ (_ : bool) = emit_traces_maybe st ~now httpc - and+ (_ : bool) = emit_logs_maybe st ~now httpc - and+ (_ : bool) = emit_metrics_maybe st ~now httpc in - () + (* start workers *) + let n_workers = + min 2 + (max 500 + (Option.value ~default:default_n_workers + config.http_concurrency_level)) in + for _i = 1 to n_workers do + start_worker self + done; - setup_ticker_ st ~tick:tick_ ~finally:ignore (); + self - (* we make sure that this is thread-safe, even though we don't have a - background thread. There can still be a ticker thread, and there - can also be several user threads that produce spans and call - the emit functions. *) - object - method send_trace e = - let@ () = guard_exn_ "push trace" in - push_to_batch st.traces e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_traces_maybe st ~now httpc in - ()) + let to_consumer (self : state) : Any_resource.t Consumer.t = + let active () = not (Atomic.get self.stop) in + let shutdown ~on_done = + shutdown self; + on_done () + in + let tick () = tick self in + { active; tick; shutdown } - method send_metrics e = - let@ () = guard_exn_ "push metrics" in - push_to_batch st.metrics e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_metrics_maybe st ~now httpc in - ()) - - method send_logs e = - let@ () = guard_exn_ "push logs" in - push_to_batch st.logs e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_logs_maybe st ~now httpc in - ()) - - method add_on_tick_callback f = Cb_set.register ticker f - - (* if called in a blocking context: work in the background *) - method tick () = Lwt.async tick_ - - method cleanup ~on_done () = - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: exiting…\n%!"; - Lwt.async (fun () -> - let* () = emit_all_force st httpc in - Httpc.cleanup httpc; - on_done (); - Lwt.return ()) - end + let consumer ~stop ~config () : Consumer.any_resource_builder = + { + start_consuming = + (fun q -> + let st = create_state ~stop ~config ~q () in + to_consumer st); + } end -let create_backend ?(stop = Atomic.make false) ?(config = Config.make ()) () = - Exporter_impl.create ~stop ~config () +let create_consumer ?(stop = Atomic.make false) ?(config = Config.make ()) () = + Consumer_impl.consumer ~stop ~config () + +let create_exporter ?stop ?(config = Config.make ()) () = + let consumer = create_consumer ?stop ~config () in + let bq = + Bounded_queue_sync.create + ~high_watermark:Bounded_queue.Defaults.high_watermark () + in + Exporter_queued.create ~q:bq ~consumer () + |> Exporter_add_batching.add_batching ~config + +let create_backend = create_exporter let setup_ ?stop ?config () : unit = let exp = create_backend ?stop ?config () in - OT.Exporter.Main_exporter.set exp; + Main_exporter.set exp; () let setup ?stop ?config ?(enable = true) () = @@ -350,9 +238,7 @@ let setup ?stop ?config ?(enable = true) () = let remove_backend () : unit Lwt.t = let done_fut, done_u = Lwt.wait () in - OT.Exporter.Main_exporter.remove - ~on_done:(fun () -> Lwt.wakeup_later done_u ()) - (); + Main_exporter.remove ~on_done:(fun () -> Lwt.wakeup_later done_u ()) (); done_fut let with_setup ?stop ?(config = Config.make ()) ?(enable = true) () f : _ Lwt.t diff --git a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.mli b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.mli index 0e02d495..713ea70a 100644 --- a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.mli +++ b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.mli @@ -12,10 +12,21 @@ val set_headers : (string * string) list -> unit module Config = Config -val create_backend : +val create_consumer : + ?stop:bool Atomic.t -> + ?config:Config.t -> + unit -> + Opentelemetry_client.Consumer.any_resource_builder +(** Consumer that pulls from a queue *) + +val create_exporter : ?stop:bool Atomic.t -> ?config:Config.t -> unit -> Opentelemetry.Exporter.t (** Create a new backend using lwt and ezcurl-lwt *) +val create_backend : + ?stop:bool Atomic.t -> ?config:Config.t -> unit -> Opentelemetry.Exporter.t +[@@deprecated "use create_exporter"] + val setup : ?stop:bool Atomic.t -> ?config:Config.t -> ?enable:bool -> unit -> unit (** Setup endpoint. This modifies {!Opentelemetry.Collector.backend}. From 534b3537f8bc8cb3a111be077591b46f0786f9d1 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 16:08:16 -0500 Subject: [PATCH 68/94] detail --- src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml | 3 ++- src/client-ocurl/opentelemetry_client_ocurl.ml | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml index b31160ed..720f2dd7 100644 --- a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml +++ b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml @@ -103,7 +103,8 @@ module Consumer_impl = struct } let shutdown self = - if not (Atomic.exchange self.stop true) then ( + Atomic.set self.stop true; + if not (Atomic.exchange self.cleaned true) then ( CNotifier.trigger self.notify; CNotifier.delete self.notify ) diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index bee44a6f..5db14538 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -23,7 +23,6 @@ module Consumer_impl = struct config: Config.t; mutable send_threads: Thread.t array; (** Threads that send data via http *) - cleaned: bool Atomic.t; (** True when we cleaned up after closing *) mcond: Util_thread.MCond.t; (** how to wait for the queue *) } @@ -133,7 +132,6 @@ module Consumer_impl = struct config; send_threads = [||]; bq = q; - cleaned = Atomic.make false; mcond = Util_thread.MCond.create (); } in From 3053b20676f995f14925bbe8b11f0d642e5e978d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 22:00:17 -0500 Subject: [PATCH 69/94] feat: for exponential backoff --- src/client/util_backoff.ml | 13 +++++++++++++ src/client/util_backoff.mli | 12 ++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 src/client/util_backoff.ml create mode 100644 src/client/util_backoff.mli diff --git a/src/client/util_backoff.ml b/src/client/util_backoff.ml new file mode 100644 index 00000000..4bc50ab2 --- /dev/null +++ b/src/client/util_backoff.ml @@ -0,0 +1,13 @@ +type t = { + mutable delay_s: float; + min_delay_s: float; + max_delay_s: float; +} + +let create () = { delay_s = 0.001; min_delay_s = 0.001; max_delay_s = 20. } + +let on_success self = self.delay_s <- max self.min_delay_s (self.delay_s /. 10.) + +let on_error self = self.delay_s <- min self.max_delay_s (self.delay_s *. 2.) + +let[@inline] cur_duration_s self = self.delay_s diff --git a/src/client/util_backoff.mli b/src/client/util_backoff.mli new file mode 100644 index 00000000..f097f8ae --- /dev/null +++ b/src/client/util_backoff.mli @@ -0,0 +1,12 @@ +(** Backoff behavior in case of errors *) + +type t +(** Backoff state. Not thread safe *) + +val create : unit -> t + +val on_success : t -> unit + +val on_error : t -> unit + +val cur_duration_s : t -> float From 583372dddaa66f8e07e3b16a73e3198f6f908109 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 22:05:30 -0500 Subject: [PATCH 70/94] use backoff in ocurl clients --- .../opentelemetry_client_ocurl_lwt.ml | 30 ++++++++++--------- .../opentelemetry_client_ocurl.ml | 30 ++++++++++++------- 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml index 720f2dd7..4617754d 100644 --- a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml +++ b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml @@ -12,10 +12,6 @@ let set_headers = Config.Env.set_headers let get_headers = Config.Env.get_headers -external reraise : exn -> 'a = "%reraise" -(** This is equivalent to [Lwt.reraise]. We inline it here so we don't force to - use Lwt's latest version *) - type error = Export_error.t (* TODO: emit this in a metric in [tick()] if self tracing is enabled? *) @@ -109,21 +105,23 @@ module Consumer_impl = struct CNotifier.delete self.notify ) - let send_http_ (self : state) (httpc : Httpc.t) ~url (data : string) : - unit Lwt.t = + let send_http_ (self : state) ~backoff (httpc : Httpc.t) ~url (data : string) + : unit Lwt.t = let* r = Httpc.send httpc ~url ~decode:(`Ret ()) data in match r with - | Ok () -> Lwt.return () + | Ok () -> + Util_backoff.on_success backoff; + Lwt.return () | Error `Sysbreak -> Printf.eprintf "ctrl-c captured, stopping\n%!"; Atomic.set self.stop true; Lwt.return () | Error err -> - (* TODO: log error _via_ otel? *) Atomic.incr n_errors; + let dur_s = Util_backoff.cur_duration_s backoff in + Util_backoff.on_error backoff; report_err_ err; - (* avoid crazy error loop *) - Lwt_unix.sleep 3. + Lwt_unix.sleep (dur_s +. Random.float (dur_s /. 10.)) let send_metrics_http (st : state) client ~encoder (l : Proto.Metrics.resource_metrics list) = @@ -147,6 +145,7 @@ module Consumer_impl = struct let start_worker (self : state) : unit = let client = Httpc.create () in let encoder = Pbrt.Encoder.create () in + let backoff = Util_backoff.create () in (* loop on [q] *) let rec loop () : unit Lwt.t = @@ -159,9 +158,12 @@ module Consumer_impl = struct shutdown self; Lwt.return () | `Empty -> CNotifier.wait self.notify - | `Item (R_logs logs) -> send_logs_http self client ~encoder logs - | `Item (R_metrics ms) -> send_metrics_http self client ~encoder ms - | `Item (R_spans spans) -> send_traces_http self client ~encoder spans + | `Item (R_logs logs) -> + send_logs_http self client ~backoff ~encoder logs + | `Item (R_metrics ms) -> + send_metrics_http self client ~encoder ~backoff ms + | `Item (R_spans spans) -> + send_traces_http self client ~encoder ~backoff spans in loop () in @@ -255,6 +257,6 @@ let with_setup ?stop ?(config = Config.make ()) ?(enable = true) () f : _ Lwt.t res) (fun exn -> let* () = remove_backend () in - reraise exn) + Lwt.reraise exn) ) else f () diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index 5db14538..edba7a0a 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -31,11 +31,19 @@ module Consumer_impl = struct (* wakeup sleepers *) Util_thread.MCond.signal self.mcond - let send_http_ (self : state) (client : Curl.t) ~url (data : string) : unit = + let send_http_ (self : state) (client : Curl.t) ~backoff ~url (data : string) + : unit = let@ _sc = Self_trace.with_ ~kind:Span_kind_producer "otel-ocurl.send-http" in + (* avoid crazy error loop *) + let sleep_with_backoff () = + let dur_s = Util_backoff.cur_duration_s backoff in + Util_backoff.on_error backoff; + Thread.delay (dur_s +. Random.float (dur_s /. 10.)) + in + if Config.Env.get_debug () then Printf.eprintf "opentelemetry: send http POST to %s (%dB)\n%!" url (String.length data); @@ -50,6 +58,7 @@ module Consumer_impl = struct Ezcurl.post ~headers ~client ~params:[] ~url ~content:(`String data) () with | Ok { code; _ } when code >= 200 && code < 300 -> + Util_backoff.on_success backoff; if Config.Env.get_debug () then Printf.eprintf "opentelemetry: got response code=%d\n%!" code | Ok { code; body; headers = _; info = _ } -> @@ -61,20 +70,20 @@ module Consumer_impl = struct let err = Export_error.decode_invalid_http_response ~url ~code body in Export_error.report_err err; () - ) + ); + + sleep_with_backoff () | exception Sys.Break -> Printf.eprintf "ctrl-c captured, stopping\n%!"; shutdown self | Error (code, msg) -> - (* TODO: log error _via_ otel? *) Atomic.incr n_errors; Printf.eprintf "opentelemetry: export failed:\n %s\n curl code: %s\n url: %s\n%!" msg (Curl.strerror code) url; - (* avoid crazy error loop *) - Thread.delay 3. + sleep_with_backoff () (** The main loop of a thread that, reads from [bq] to get the next message to send via http *) @@ -82,8 +91,9 @@ module Consumer_impl = struct Ezcurl.with_client ?set_opts:None @@ fun client -> (* we need exactly one encoder per thread *) let encoder = Pbrt.Encoder.create ~size:2048 () in + let backoff = Util_backoff.create () in - let send ~name ~url ~conv (signals : _ list) : unit = + let send ~name ~url ~conv ~backoff (signals : _ list) : unit = let@ _sp = Self_trace.with_ ~kind:Span_kind_producer name ~attrs:[ "n", `Int (List.length signals) ] @@ -93,7 +103,7 @@ module Consumer_impl = struct Pbrt.Encoder.reset encoder; ignore (Atomic.fetch_and_add n_bytes_sent (String.length msg) : int); - send_http_ self client msg ~url; + send_http_ self client msg ~backoff ~url; () in while not (Atomic.get self.stop) do @@ -101,13 +111,13 @@ module Consumer_impl = struct | `Closed -> shutdown self | `Empty -> Util_thread.MCond.wait self.mcond | `Item (Any_resource.R_spans tr) -> - send ~name:"send-traces" ~conv:Signal.Encode.traces + send ~name:"send-traces" ~conv:Signal.Encode.traces ~backoff ~url:self.config.common.url_traces tr | `Item (Any_resource.R_metrics ms) -> - send ~name:"send-metrics" ~conv:Signal.Encode.metrics + send ~name:"send-metrics" ~conv:Signal.Encode.metrics ~backoff ~url:self.config.common.url_metrics ms | `Item (Any_resource.R_logs logs) -> - send ~name:"send-logs" ~conv:Signal.Encode.logs + send ~name:"send-logs" ~conv:Signal.Encode.logs ~backoff ~url:self.config.common.url_logs logs done From 4e4e2d724dff993cc71c80166c1654627e31a297 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 22:57:23 -0500 Subject: [PATCH 71/94] feat client: generic consumer, notifier, etc. --- src/client/generic_http_consumer.ml | 206 ++++++++++++++++++ src/client/generic_io.ml | 28 +++ src/client/generic_notifier.ml | 17 ++ src/client/lwt/io_lwt.ml | 11 + src/client/lwt/io_lwt.mli | 1 + .../lwt/{notifier.ml => notifier_lwt.ml} | 2 + src/client/lwt/notifier_lwt.mli | 1 + src/client/notifier_sync.ml | 8 + src/client/notifier_sync.mli | 1 + src/client/signal.ml | 4 +- 10 files changed, 278 insertions(+), 1 deletion(-) create mode 100644 src/client/generic_http_consumer.ml create mode 100644 src/client/generic_io.ml create mode 100644 src/client/generic_notifier.ml create mode 100644 src/client/lwt/io_lwt.ml create mode 100644 src/client/lwt/io_lwt.mli rename src/client/lwt/{notifier.ml => notifier_lwt.ml} (97%) create mode 100644 src/client/lwt/notifier_lwt.mli create mode 100644 src/client/notifier_sync.ml create mode 100644 src/client/notifier_sync.mli diff --git a/src/client/generic_http_consumer.ml b/src/client/generic_http_consumer.ml new file mode 100644 index 00000000..f9c3ba84 --- /dev/null +++ b/src/client/generic_http_consumer.ml @@ -0,0 +1,206 @@ +type error = Export_error.t + +(* TODO: emit this in a metric in [tick()] if self tracing is enabled? *) + +(** Number of errors met during export *) +let n_errors = Atomic.make 0 + +module type IO = Generic_io.S_WITH_CONCURRENCY + +module type HTTPC = sig + module IO : IO + + type t + + val create : unit -> t + + val send : + t -> + url:string -> + decode:[ `Dec of Pbrt.Decoder.t -> 'a | `Ret of 'a ] -> + string -> + ('a, error) result IO.t + + val cleanup : t -> unit +end + +module Make + (IO : IO) + (Notifier : Generic_notifier.S with type 'a IO.t = 'a IO.t) + (Httpc : HTTPC with type 'a IO.t = 'a IO.t) : sig + val consumer : + ?override_n_workers:int -> + ticker_task:float option -> + stop:bool Atomic.t -> + config:Client_config.t -> + unit -> + Consumer.any_resource_builder + (** Create a consumer. + @param stop + shared stop variable, set to true to stop this (and maybe other tasks) + @param ticker_task + controls whether we start a task to call [tick] at the given interval in + seconds, or [None] to not start such a task at all. *) +end = struct + module Proto = Opentelemetry_proto + open IO + + type other_config = { + override_n_workers: int option; + ticker_task: float option; + } + + type state = { + stop: bool Atomic.t; + cleaned: bool Atomic.t; (** True when we cleaned up after closing *) + config: Client_config.t; + other_config: other_config; + q: Any_resource.t Bounded_queue.t; + notify: Notifier.t; + } + + let shutdown self = + Atomic.set self.stop true; + if not (Atomic.exchange self.cleaned true) then ( + Notifier.trigger self.notify; + Notifier.delete self.notify + ) + + let send_http_ (self : state) (httpc : Httpc.t) ~backoff ~url (data : string) + : unit IO.t = + let* r = Httpc.send httpc ~url ~decode:(`Ret ()) data in + match r with + | Ok () -> + Util_backoff.on_success backoff; + IO.return () + | Error `Sysbreak -> + Printf.eprintf "ctrl-c captured, stopping\n%!"; + Atomic.set self.stop true; + IO.return () + | Error err -> + Atomic.incr n_errors; + Export_error.report_err err; + (* avoid crazy error loop *) + let dur_s = Util_backoff.cur_duration_s backoff in + Util_backoff.on_error backoff; + IO.sleep_s (dur_s +. Random.float (dur_s /. 10.)) + + let send_metrics_http (st : state) client ~encoder ~backoff + (l : Proto.Metrics.resource_metrics list) = + let msg = Signal.Encode.metrics ~encoder l in + send_http_ st client msg ~backoff ~url:st.config.url_metrics + + let send_traces_http st client ~encoder ~backoff + (l : Proto.Trace.resource_spans list) = + let msg = Signal.Encode.traces ~encoder l in + send_http_ st client msg ~backoff ~url:st.config.url_traces + + let send_logs_http st client ~encoder ~backoff + (l : Proto.Logs.resource_logs list) = + let msg = Signal.Encode.logs ~encoder l in + send_http_ st client msg ~backoff ~url:st.config.url_logs + + let tick (self : state) = Notifier.trigger self.notify + + let start_worker (self : state) : unit = + let client = Httpc.create () in + let encoder = Pbrt.Encoder.create () in + let backoff = Util_backoff.create () in + + (* loop on [q] *) + let rec loop () : unit IO.t = + if Atomic.get self.stop then + IO.return () + else + let* () = + match Bounded_queue.try_pop self.q with + | `Closed -> + shutdown self; + IO.return () + | `Empty -> Notifier.wait self.notify + | `Item (R_logs logs) -> + send_logs_http self client ~encoder ~backoff logs + | `Item (R_metrics ms) -> + send_metrics_http self client ~encoder ~backoff ms + | `Item (R_spans spans) -> + send_traces_http self client ~encoder ~backoff spans + in + loop () + in + + IO.spawn (fun () -> + IO.protect loop ~finally:(fun () -> + Httpc.cleanup client; + IO.return ())) + + let start_ticker (self : state) ~(interval_s : float) : unit = + let rec loop () : unit IO.t = + if Atomic.get self.stop then + IO.return () + else + let* () = IO.sleep_s interval_s in + tick self; + loop () + in + IO.spawn loop + + let default_n_workers = 50 + + let create_state ?override_n_workers ~ticker_task ~stop ~config ~q () : state + = + let other_config = { override_n_workers; ticker_task } in + let self = + { + stop; + config; + other_config; + q; + cleaned = Atomic.make false; + notify = Notifier.create (); + } + in + + (* start workers *) + let n_workers = + min 2 + (max 500 + (match + ( self.other_config.override_n_workers, + self.config.http_concurrency_level ) + with + | Some n, _ -> n + | None, Some n -> n + | None, None -> default_n_workers)) + in + + for _i = 1 to n_workers do + start_worker self + done; + + (* start ticker *) + (match self.other_config.ticker_task with + | None -> () + | Some interval_s -> start_ticker self ~interval_s); + + self + + let to_consumer (self : state) : Any_resource.t Consumer.t = + let active () = not (Atomic.get self.stop) in + let shutdown ~on_done = + shutdown self; + on_done () + in + let tick () = tick self in + { active; tick; shutdown } + + let consumer ?override_n_workers ~ticker_task ~stop ~config () : + Consumer.any_resource_builder = + { + start_consuming = + (fun q -> + let st = + create_state ?override_n_workers ~ticker_task ~stop ~config ~q () + in + to_consumer st); + } +end diff --git a/src/client/generic_io.ml b/src/client/generic_io.ml new file mode 100644 index 00000000..9e297026 --- /dev/null +++ b/src/client/generic_io.ml @@ -0,0 +1,28 @@ +(** Generic IO *) +module type S = sig + type 'a t + + val return : 'a -> 'a t + + val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t + + val protect : finally:(unit -> unit t) -> (unit -> 'a t) -> 'a t +end + +module type S_WITH_CONCURRENCY = sig + include S + + val sleep_s : float -> unit t + + val spawn : (unit -> unit t) -> unit +end + +module Direct_style : S with type 'a t = 'a = struct + type 'a t = 'a + + let[@inline] return x = x + + let[@inline] ( let* ) x f = f x + + let protect = Fun.protect +end diff --git a/src/client/generic_notifier.ml b/src/client/generic_notifier.ml new file mode 100644 index 00000000..0d3ea1d3 --- /dev/null +++ b/src/client/generic_notifier.ml @@ -0,0 +1,17 @@ +module type IO = Generic_io.S + +module type S = sig + module IO : IO + + type t + + val create : unit -> t + + val delete : t -> unit + + val trigger : t -> unit + + val wait : t -> unit IO.t + + val register_bounded_queue : t -> _ Bounded_queue.t -> unit +end diff --git a/src/client/lwt/io_lwt.ml b/src/client/lwt/io_lwt.ml new file mode 100644 index 00000000..d8dcece9 --- /dev/null +++ b/src/client/lwt/io_lwt.ml @@ -0,0 +1,11 @@ +type 'a t = 'a Lwt.t + +let return = Lwt.return + +let ( let* ) = Lwt.Syntax.( let* ) + +let sleep_s = Lwt_unix.sleep + +let spawn = Lwt.async + +let[@inline] protect ~finally f = Lwt.finalize f finally diff --git a/src/client/lwt/io_lwt.mli b/src/client/lwt/io_lwt.mli new file mode 100644 index 00000000..ec083176 --- /dev/null +++ b/src/client/lwt/io_lwt.mli @@ -0,0 +1 @@ +include Generic_io.S_WITH_CONCURRENCY with type 'a t = 'a Lwt.t diff --git a/src/client/lwt/notifier.ml b/src/client/lwt/notifier_lwt.ml similarity index 97% rename from src/client/lwt/notifier.ml rename to src/client/lwt/notifier_lwt.ml index 4b924183..88fd366e 100644 --- a/src/client/lwt/notifier.ml +++ b/src/client/lwt/notifier_lwt.ml @@ -1,5 +1,7 @@ (** Notification that can be used on the consumer side of a bounded queue *) +module IO = Io_lwt + type t = { notified: bool Atomic.t; cond: unit Lwt_condition.t; diff --git a/src/client/lwt/notifier_lwt.mli b/src/client/lwt/notifier_lwt.mli new file mode 100644 index 00000000..c16ae992 --- /dev/null +++ b/src/client/lwt/notifier_lwt.mli @@ -0,0 +1 @@ +include Generic_notifier.S with module IO = Io_lwt diff --git a/src/client/notifier_sync.ml b/src/client/notifier_sync.ml new file mode 100644 index 00000000..4ce44bb8 --- /dev/null +++ b/src/client/notifier_sync.ml @@ -0,0 +1,8 @@ +include Util_thread.MCond +module IO = Generic_io.Direct_style + +let delete = ignore + +let trigger = signal + +let register_bounded_queue = wakeup_from_bq diff --git a/src/client/notifier_sync.mli b/src/client/notifier_sync.mli new file mode 100644 index 00000000..f896ccb2 --- /dev/null +++ b/src/client/notifier_sync.mli @@ -0,0 +1 @@ +include Generic_notifier.S with type 'a IO.t = 'a diff --git a/src/client/signal.ml b/src/client/signal.ml index e3337c60..cde963de 100644 --- a/src/client/signal.ml +++ b/src/client/signal.ml @@ -48,7 +48,9 @@ module Encode = struct let x = ctor resource in let@ _sc = Self_trace.with_ ~kind:Span.Span_kind_internal "encode-proto" in enc x encoder; - Pbrt.Encoder.to_string encoder + let data = Pbrt.Encoder.to_string encoder in + Pbrt.Encoder.reset encoder; + data let logs ?encoder resource_logs = resource_to_string ~encoder resource_logs From 311776344af7db14fb6fa99754d87413e2fdabf3 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 22:59:11 -0500 Subject: [PATCH 72/94] refactor ocurl, ocurl_lwt, cohttp_lwt clients to use generic consumer --- src/client-cohttp-lwt/dune | 1 + .../opentelemetry_client_cohttp_lwt.ml | 419 ++---------------- .../opentelemetry_client_cohttp_lwt.mli | 14 +- .../opentelemetry_client_ocurl_lwt.ml | 161 +------ .../opentelemetry_client_ocurl.ml | 211 +++------ 5 files changed, 117 insertions(+), 689 deletions(-) diff --git a/src/client-cohttp-lwt/dune b/src/client-cohttp-lwt/dune index 23c36d3a..56b0b4f8 100644 --- a/src/client-cohttp-lwt/dune +++ b/src/client-cohttp-lwt/dune @@ -7,6 +7,7 @@ (libraries opentelemetry opentelemetry-client + opentelemetry-client.lwt lwt cohttp-lwt cohttp-lwt-unix diff --git a/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml b/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml index 0f823fbc..53cf515b 100644 --- a/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml +++ b/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml @@ -3,10 +3,8 @@ https://github.com/open-telemetry/oteps/blob/main/text/0099-otlp-http.md *) -module OT = Opentelemetry module Config = Config -module Signal = Opentelemetry_client.Signal -module Batch = Opentelemetry_client.Batch +open Opentelemetry_client open Opentelemetry open Common_ @@ -14,87 +12,14 @@ let set_headers = Config.Env.set_headers let get_headers = Config.Env.get_headers -external reraise : exn -> 'a = "%reraise" -(** This is equivalent to [Lwt.reraise]. We inline it here so we don't force to - use Lwt's latest version *) +type error = Export_error.t -let needs_gc_metrics = Atomic.make false +open struct + module IO = Opentelemetry_client_lwt.Io_lwt +end -let last_gc_metrics = Atomic.make (Mtime_clock.now ()) - -let timeout_gc_metrics = Mtime.Span.(20 * s) - -let gc_metrics = ref [] -(* side channel for GC, appended to {!E_metrics}'s data *) - -(* capture current GC metrics if {!needs_gc_metrics} is true, - or it has been a long time since the last GC metrics collection, - and push them into {!gc_metrics} for later collection *) -let sample_gc_metrics_if_needed () = - let now = Mtime_clock.now () in - let alarm = Atomic.compare_and_set needs_gc_metrics true false in - let timeout () = - let elapsed = Mtime.span now (Atomic.get last_gc_metrics) in - Mtime.Span.compare elapsed timeout_gc_metrics > 0 - in - if alarm || timeout () then ( - Atomic.set last_gc_metrics now; - let l = - OT.Metrics.make_resource_metrics - ~attrs:(Opentelemetry.GC_metrics.get_runtime_attributes ()) - @@ Opentelemetry.GC_metrics.get_metrics () - in - gc_metrics := l :: !gc_metrics - ) - -type error = - [ `Status of int * Opentelemetry.Proto.Status.status - | `Failure of string - | `Sysbreak - ] - -let n_errors = Atomic.make 0 - -let n_dropped = Atomic.make 0 - -let report_err_ = function - | `Sysbreak -> Printf.eprintf "opentelemetry: ctrl-c captured, stopping\n%!" - | `Failure msg -> - Format.eprintf "@[<2>opentelemetry: export failed: %s@]@." msg - | `Status - ( code, - { - Opentelemetry.Proto.Status.code = scode; - message; - details; - _presence = _; - } ) -> - let pp_details out l = - List.iter - (fun s -> Format.fprintf out "%S;@ " (Bytes.unsafe_to_string s)) - l - in - Format.eprintf - "@[<2>opentelemetry: export failed with@ http code=%d@ status \ - {@[code=%ld;@ message=%S;@ details=[@[%a@]]@]}@]@." - code scode - (Bytes.unsafe_to_string message) - pp_details details - -module Httpc : sig - type t - - val create : unit -> t - - val send : - t -> - url:string -> - decode:[ `Dec of Pbrt.Decoder.t -> 'a | `Ret of 'a ] -> - string -> - ('a, error) result Lwt.t - - val cleanup : t -> unit -end = struct +module Httpc : Generic_http_consumer.HTTPC with module IO = IO = struct + module IO = IO open Opentelemetry.Proto open Lwt.Syntax module Httpc = Cohttp_lwt_unix.Client @@ -176,325 +101,39 @@ end = struct ) end -(** An emitter. This is used by {!Backend} below to forward traces/metrics/… - from the program to whatever collector client we have. *) -module type EMITTER = sig - open Opentelemetry.Proto +module Consumer_impl = + Generic_http_consumer.Make (IO) (Opentelemetry_client_lwt.Notifier_lwt) + (Httpc) - val push_trace : Trace.resource_spans list -> unit +let create_consumer ?(stop = Atomic.make false) ?(config = Config.make ()) () = + Consumer_impl.consumer ~ticker_task:(Some 0.5) ~stop ~config () - val push_metrics : Metrics.resource_metrics list -> unit - - val push_logs : Logs.resource_logs list -> unit - - val set_on_tick_callbacks : (unit -> unit) Alist.t -> unit - - val tick : unit -> unit - - val cleanup : on_done:(unit -> unit) -> unit -> unit -end - -(* make an emitter. - - exceptions inside should be caught, see - https://opentelemetry.io/docs/reference/specification/error-handling/ *) -let mk_emitter ~stop ~(config : Config.t) () : (module EMITTER) = - let open Proto in - let open Lwt.Syntax in - (* local helpers *) - let open struct - let timeout = - if config.batch_timeout_ms > 0 then - Some Mtime.Span.(config.batch_timeout_ms * ms) - else - None - - let batch_traces : Trace.resource_spans Batch.t = - Batch.make ?batch:config.batch_traces ?timeout () - - let batch_metrics : Metrics.resource_metrics Batch.t = - Batch.make ?batch:config.batch_metrics ?timeout () - - let batch_logs : Logs.resource_logs Batch.t = - Batch.make ?batch:config.batch_logs ?timeout () - - let on_tick_cbs_ = Atomic.make (Alist.make ()) - - let set_on_tick_callbacks = Atomic.set on_tick_cbs_ - - let send_http_ (httpc : Httpc.t) ~url data : unit Lwt.t = - let* r = Httpc.send httpc ~url ~decode:(`Ret ()) data in - match r with - | Ok () -> Lwt.return () - | Error `Sysbreak -> - Printf.eprintf "ctrl-c captured, stopping\n%!"; - Atomic.set stop true; - Lwt.return () - | Error err -> - (* TODO: log error _via_ otel? *) - Atomic.incr n_errors; - report_err_ err; - (* avoid crazy error loop *) - Lwt_unix.sleep 3. - - let send_metrics_http client (l : Metrics.resource_metrics list) = - Signal.Encode.metrics l |> send_http_ client ~url:config.url_metrics - - let send_traces_http client (l : Trace.resource_spans list) = - Signal.Encode.traces l |> send_http_ client ~url:config.url_traces - - let send_logs_http client (l : Logs.resource_logs list) = - Signal.Encode.logs l |> send_http_ client ~url:config.url_logs - - (* emit metrics, if the batch is full or timeout lapsed *) - let emit_metrics_maybe ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now batch_metrics with - | None -> Lwt.return false - | Some l -> - let batch = !gc_metrics @ l in - gc_metrics := []; - let+ () = send_metrics_http httpc batch in - true - - let emit_traces_maybe ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now batch_traces with - | None -> Lwt.return false - | Some l -> - let+ () = send_traces_http httpc l in - true - - let emit_logs_maybe ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now batch_logs with - | None -> Lwt.return false - | Some l -> - let+ () = send_logs_http httpc l in - true - - let[@inline] guard_exn_ where f = - try f () - with e -> - let bt = Printexc.get_backtrace () in - Printf.eprintf - "opentelemetry-cohttp-lwt: uncaught exception in %s: %s\n%s\n%!" where - (Printexc.to_string e) bt - - let emit_all_force (httpc : Httpc.t) : unit Lwt.t = - let now = Mtime_clock.now () in - let+ (_ : bool) = emit_traces_maybe ~now ~force:true httpc - and+ (_ : bool) = emit_logs_maybe ~now ~force:true httpc - and+ (_ : bool) = emit_metrics_maybe ~now ~force:true httpc in - () - - (* thread that calls [tick()] regularly, to help enforce timeouts *) - let setup_ticker_thread ~tick ~finally () = - let rec tick_thread () = - if Atomic.get stop then ( - finally (); - Lwt.return () - ) else - let* () = Lwt_unix.sleep 0.5 in - let* () = tick () in - tick_thread () - in - Lwt.async tick_thread - end in - let httpc = Httpc.create () in - - let module M = struct - (* we make sure that this is thread-safe, even though we don't have a - background thread. There can still be a ticker thread, and there - can also be several user threads that produce spans and call - the emit functions. *) - - let push_to_batch b e = - match Batch.push b e with - | `Ok -> () - | `Dropped -> Atomic.incr n_errors - - let push_trace e = - let@ () = guard_exn_ "push trace" in - push_to_batch batch_traces e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_traces_maybe ~now httpc in - ()) - - let push_metrics e = - let@ () = guard_exn_ "push metrics" in - sample_gc_metrics_if_needed (); - push_to_batch batch_metrics e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_metrics_maybe ~now httpc in - ()) - - let push_logs e = - let@ () = guard_exn_ "push logs" in - push_to_batch batch_logs e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_logs_maybe ~now httpc in - ()) - - let set_on_tick_callbacks = set_on_tick_callbacks - - let tick_ () = - if Config.Env.get_debug () then - Printf.eprintf "tick (from %d)\n%!" (tid ()); - sample_gc_metrics_if_needed (); - List.iter - (fun f -> - try f () - with e -> - Printf.eprintf "on tick callback raised: %s\n" - (Printexc.to_string e)) - (Alist.get @@ Atomic.get on_tick_cbs_); - let now = Mtime_clock.now () in - let+ (_ : bool) = emit_traces_maybe ~now httpc - and+ (_ : bool) = emit_logs_maybe ~now httpc - and+ (_ : bool) = emit_metrics_maybe ~now httpc in - () - - let () = setup_ticker_thread ~tick:tick_ ~finally:ignore () - - (* if called in a blocking context: work in the background *) - let tick () = Lwt.async tick_ - - let cleanup ~on_done () = - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: exiting…\n%!"; - Lwt.async (fun () -> - let* () = emit_all_force httpc in - Httpc.cleanup httpc; - on_done (); - Lwt.return ()) - end in - (module M) - -module Backend - (Arg : sig - val stop : bool Atomic.t - - val config : Config.t - end) - () : Opentelemetry.Collector.BACKEND = struct - include (val mk_emitter ~stop:Arg.stop ~config:Arg.config ()) - - open Opentelemetry.Proto - open Opentelemetry.Collector - - let send_trace : Trace.resource_spans list sender = - { - send = - (fun l ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send spans %a@." - (Format.pp_print_list Trace.pp_resource_spans) - l); - push_trace l; - ret ()); - } - - let last_sent_metrics = Atomic.make (Mtime_clock.now ()) - - let timeout_sent_metrics = Mtime.Span.(5 * s) - (* send metrics from time to time *) - - let signal_emit_gc_metrics () = - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: emit GC metrics requested\n%!"; - Atomic.set needs_gc_metrics true - - let additional_metrics () : Metrics.resource_metrics list = - (* add exporter metrics to the lot? *) - let last_emit = Atomic.get last_sent_metrics in - let now = Mtime_clock.now () in - let add_own_metrics = - let elapsed = Mtime.span last_emit now in - Mtime.Span.compare elapsed timeout_sent_metrics > 0 - in - - (* there is a possible race condition here, as several threads might update - metrics at the same time. But that's harmless. *) - if add_own_metrics then ( - Atomic.set last_sent_metrics now; - let open OT.Metrics in - [ - make_resource_metrics - [ - sum ~name:"otel.export.dropped" ~is_monotonic:true - [ - int - ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) - ~now:(Mtime.to_uint64_ns now) (Atomic.get n_dropped); - ]; - sum ~name:"otel.export.errors" ~is_monotonic:true - [ - int - ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) - ~now:(Mtime.to_uint64_ns now) (Atomic.get n_errors); - ]; - ]; - ] - ) else - [] - - let send_metrics : Metrics.resource_metrics list sender = - { - send = - (fun m ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send metrics %a@." - (Format.pp_print_list Metrics.pp_resource_metrics) - m); - - let m = List.rev_append (additional_metrics ()) m in - push_metrics m; - ret ()); - } - - let send_logs : Logs.resource_logs list sender = - { - send = - (fun m ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send logs %a@." - (Format.pp_print_list Logs.pp_resource_logs) - m); - - push_logs m; - ret ()); - } -end - -let create_backend ?(stop = Atomic.make false) ?(config = Config.make ()) () = - let module B = - Backend - (struct - let stop = stop - - let config = config - end) - () +let create_exporter ?stop ?(config = Config.make ()) () = + let consumer = create_consumer ?stop ~config () in + let bq = + Bounded_queue_sync.create + ~high_watermark:Bounded_queue.Defaults.high_watermark () in - (module B : OT.Collector.BACKEND) + Exporter_queued.create ~q:bq ~consumer () + |> Exporter_add_batching.add_batching ~config + +let create_backend = create_exporter let setup_ ?stop ?config () : unit = let backend = create_backend ?stop ?config () in - OT.Collector.set_backend backend; + Main_exporter.set backend; () let setup ?stop ?config ?(enable = true) () = if enable then setup_ ?stop ?config () -let remove_backend () : unit Lwt.t = +let remove_exporter () : unit Lwt.t = let done_fut, done_u = Lwt.wait () in - OT.Collector.remove_backend ~on_done:(fun () -> Lwt.wakeup_later done_u ()) (); + Main_exporter.remove ~on_done:(fun () -> Lwt.wakeup_later done_u ()) (); done_fut +let remove_backend = remove_exporter + let with_setup ?stop ?(config = Config.make ()) ?(enable = true) () f : _ Lwt.t = if enable then ( @@ -504,10 +143,10 @@ let with_setup ?stop ?(config = Config.make ()) ?(enable = true) () f : _ Lwt.t Lwt.catch (fun () -> let* res = f () in - let+ () = remove_backend () in + let+ () = remove_exporter () in res) (fun exn -> - let* () = remove_backend () in - reraise exn) + let* () = remove_exporter () in + Lwt.reraise exn) ) else f () diff --git a/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.mli b/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.mli index c57d9653..2f12121f 100644 --- a/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.mli +++ b/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.mli @@ -12,14 +12,20 @@ val set_headers : (string * string) list -> unit module Config = Config -val create_backend : +val create_consumer : ?stop:bool Atomic.t -> ?config:Config.t -> unit -> - (module Opentelemetry.Collector.BACKEND) -(** Create a new backend using lwt and cohttp + Opentelemetry_client.Consumer.any_resource_builder +(** Consumer that pulls from a queue *) - NOTE [after_cleanup] optional parameter removed @since 0.12 *) +val create_exporter : + ?stop:bool Atomic.t -> ?config:Config.t -> unit -> Opentelemetry.Exporter.t +(** Create a new backend using lwt and ezcurl-lwt *) + +val create_backend : + ?stop:bool Atomic.t -> ?config:Config.t -> unit -> Opentelemetry.Exporter.t +[@@deprecated "use create_exporter"] val setup : ?stop:bool Atomic.t -> ?config:Config.t -> ?enable:bool -> unit -> unit diff --git a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml index 4617754d..50cd834b 100644 --- a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml +++ b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml @@ -14,27 +14,13 @@ let get_headers = Config.Env.get_headers type error = Export_error.t -(* TODO: emit this in a metric in [tick()] if self tracing is enabled? *) -let n_errors = Atomic.make 0 - -let report_err_ = Export_error.report_err +open struct + module IO = Opentelemetry_client_lwt.Io_lwt +end (** HTTP client *) -module Httpc : sig - type t - - val create : unit -> t - - val send : - t -> - url:string -> - decode:[ `Dec of Pbrt.Decoder.t -> 'a | `Ret of 'a ] -> - string -> - ('a, error) result Lwt.t - - val cleanup : t -> unit -end = struct - open Opentelemetry.Proto +module Httpc : Generic_http_consumer.HTTPC with module IO = IO = struct + module IO = IO open Lwt.Syntax type t = Curl.t @@ -43,7 +29,7 @@ end = struct let cleanup self = Ezcurl_core.delete self - (* send the content to the remote endpoint/path *) + (** send the content to the remote endpoint/path *) let send (self : t) ~url ~decode (bod : string) : ('a, error) result Lwt.t = let* r = let headers = @@ -86,139 +72,12 @@ end = struct Lwt.return (Error err) end -module Consumer_impl = struct - module CNotifier = Opentelemetry_client_lwt.Notifier - open Lwt.Syntax - - type state = { - stop: bool Atomic.t; - cleaned: bool Atomic.t; (** True when we cleaned up after closing *) - config: Config.t; - q: Any_resource.t Bounded_queue.t; - notify: CNotifier.t; - } - - let shutdown self = - Atomic.set self.stop true; - if not (Atomic.exchange self.cleaned true) then ( - CNotifier.trigger self.notify; - CNotifier.delete self.notify - ) - - let send_http_ (self : state) ~backoff (httpc : Httpc.t) ~url (data : string) - : unit Lwt.t = - let* r = Httpc.send httpc ~url ~decode:(`Ret ()) data in - match r with - | Ok () -> - Util_backoff.on_success backoff; - Lwt.return () - | Error `Sysbreak -> - Printf.eprintf "ctrl-c captured, stopping\n%!"; - Atomic.set self.stop true; - Lwt.return () - | Error err -> - Atomic.incr n_errors; - let dur_s = Util_backoff.cur_duration_s backoff in - Util_backoff.on_error backoff; - report_err_ err; - Lwt_unix.sleep (dur_s +. Random.float (dur_s /. 10.)) - - let send_metrics_http (st : state) client ~encoder - (l : Proto.Metrics.resource_metrics list) = - let msg = Signal.Encode.metrics ~encoder l in - Pbrt.Encoder.reset encoder; - send_http_ st client msg ~url:st.config.url_metrics - - let send_traces_http st client ~encoder (l : Proto.Trace.resource_spans list) - = - let msg = Signal.Encode.traces ~encoder l in - Pbrt.Encoder.reset encoder; - send_http_ st client msg ~url:st.config.url_traces - - let send_logs_http st client ~encoder (l : Proto.Logs.resource_logs list) = - let msg = Signal.Encode.logs ~encoder l in - Pbrt.Encoder.reset encoder; - send_http_ st client msg ~url:st.config.url_logs - - let tick (self : state) = CNotifier.trigger self.notify - - let start_worker (self : state) : unit = - let client = Httpc.create () in - let encoder = Pbrt.Encoder.create () in - let backoff = Util_backoff.create () in - - (* loop on [q] *) - let rec loop () : unit Lwt.t = - if Atomic.get self.stop then - Lwt.return () - else - let* () = - match Bounded_queue.try_pop self.q with - | `Closed -> - shutdown self; - Lwt.return () - | `Empty -> CNotifier.wait self.notify - | `Item (R_logs logs) -> - send_logs_http self client ~backoff ~encoder logs - | `Item (R_metrics ms) -> - send_metrics_http self client ~encoder ~backoff ms - | `Item (R_spans spans) -> - send_traces_http self client ~encoder ~backoff spans - in - loop () - in - - Lwt.async (fun () -> - Lwt.finalize loop (fun () -> - Httpc.cleanup client; - Lwt.return ())) - - let default_n_workers = 50 - - let create_state ~stop ~config ~q () : state = - let self = - { - stop; - config; - q; - cleaned = Atomic.make false; - notify = CNotifier.create (); - } - in - - (* start workers *) - let n_workers = - min 2 - (max 500 - (Option.value ~default:default_n_workers - config.http_concurrency_level)) - in - for _i = 1 to n_workers do - start_worker self - done; - - self - - let to_consumer (self : state) : Any_resource.t Consumer.t = - let active () = not (Atomic.get self.stop) in - let shutdown ~on_done = - shutdown self; - on_done () - in - let tick () = tick self in - { active; tick; shutdown } - - let consumer ~stop ~config () : Consumer.any_resource_builder = - { - start_consuming = - (fun q -> - let st = create_state ~stop ~config ~q () in - to_consumer st); - } -end +module Consumer_impl = + Generic_http_consumer.Make (IO) (Opentelemetry_client_lwt.Notifier_lwt) + (Httpc) let create_consumer ?(stop = Atomic.make false) ?(config = Config.make ()) () = - Consumer_impl.consumer ~stop ~config () + Consumer_impl.consumer ~ticker_task:(Some 0.5) ~stop ~config () let create_exporter ?stop ?(config = Config.make ()) () = let consumer = create_consumer ?stop ~config () in diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index edba7a0a..5906ace0 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -12,160 +12,83 @@ let get_headers = Config.Env.get_headers let set_headers = Config.Env.set_headers -let n_errors = Atomic.make 0 - let n_bytes_sent : int Atomic.t = Atomic.make 0 -module Consumer_impl = struct - type state = { - bq: Any_resource.t Bounded_queue.t; (** Queue of incoming workload *) - stop: bool Atomic.t; - config: Config.t; - mutable send_threads: Thread.t array; - (** Threads that send data via http *) - mcond: Util_thread.MCond.t; (** how to wait for the queue *) - } +type error = Export_error.t - let shutdown self : unit = - Atomic.set self.stop true; - (* wakeup sleepers *) - Util_thread.MCond.signal self.mcond +open struct + module Notifier = Notifier_sync - let send_http_ (self : state) (client : Curl.t) ~backoff ~url (data : string) - : unit = - let@ _sc = - Self_trace.with_ ~kind:Span_kind_producer "otel-ocurl.send-http" - in + module IO : Generic_io.S_WITH_CONCURRENCY with type 'a t = 'a = struct + include Generic_io.Direct_style - (* avoid crazy error loop *) - let sleep_with_backoff () = - let dur_s = Util_backoff.cur_duration_s backoff in - Util_backoff.on_error backoff; - Thread.delay (dur_s +. Random.float (dur_s /. 10.)) - in + let sleep_s = Thread.delay - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: send http POST to %s (%dB)\n%!" url - (String.length data); - let headers = - ("Content-Type", "application/x-protobuf") :: self.config.common.headers - in - match - let@ _sc = - Self_trace.with_ ~kind:Span_kind_internal "curl.post" - ~attrs:[ "size", `Int (String.length data); "url", `String url ] - in - Ezcurl.post ~headers ~client ~params:[] ~url ~content:(`String data) () - with - | Ok { code; _ } when code >= 200 && code < 300 -> - Util_backoff.on_success backoff; - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: got response code=%d\n%!" code - | Ok { code; body; headers = _; info = _ } -> - Atomic.incr n_errors; - Self_trace.add_event _sc - @@ Opentelemetry.Event.make "error" ~attrs:[ "code", `Int code ]; - - if Config.Env.get_debug () then ( - let err = Export_error.decode_invalid_http_response ~url ~code body in - Export_error.report_err err; - () - ); - - sleep_with_backoff () - | exception Sys.Break -> - Printf.eprintf "ctrl-c captured, stopping\n%!"; - shutdown self - | Error (code, msg) -> - Atomic.incr n_errors; - - Printf.eprintf - "opentelemetry: export failed:\n %s\n curl code: %s\n url: %s\n%!" - msg (Curl.strerror code) url; - - sleep_with_backoff () - - (** The main loop of a thread that, reads from [bq] to get the next message to - send via http *) - let bg_thread_loop (self : state) : unit = - Ezcurl.with_client ?set_opts:None @@ fun client -> - (* we need exactly one encoder per thread *) - let encoder = Pbrt.Encoder.create ~size:2048 () in - let backoff = Util_backoff.create () in - - let send ~name ~url ~conv ~backoff (signals : _ list) : unit = - let@ _sp = - Self_trace.with_ ~kind:Span_kind_producer name - ~attrs:[ "n", `Int (List.length signals) ] - in - - let msg : string = conv ?encoder:(Some encoder) signals in - Pbrt.Encoder.reset encoder; - - ignore (Atomic.fetch_and_add n_bytes_sent (String.length msg) : int); - send_http_ self client msg ~backoff ~url; - () - in - while not (Atomic.get self.stop) do - match Bounded_queue.try_pop self.bq with - | `Closed -> shutdown self - | `Empty -> Util_thread.MCond.wait self.mcond - | `Item (Any_resource.R_spans tr) -> - send ~name:"send-traces" ~conv:Signal.Encode.traces ~backoff - ~url:self.config.common.url_traces tr - | `Item (Any_resource.R_metrics ms) -> - send ~name:"send-metrics" ~conv:Signal.Encode.metrics ~backoff - ~url:self.config.common.url_metrics ms - | `Item (Any_resource.R_logs logs) -> - send ~name:"send-logs" ~conv:Signal.Encode.logs ~backoff - ~url:self.config.common.url_logs logs - done - - let to_consumer (self : state) : _ Consumer.t = - let active () = not (Atomic.get self.stop) in - let tick () = - (* make sure to poll from time to time *) - Util_thread.MCond.signal self.mcond - in - let shutdown ~on_done = - shutdown self; - on_done () - in - { tick; active; shutdown } - - let create_state ~stop ~(config : Config.t) ~q () : state = - let n_send_threads = min 100 @@ max 2 config.bg_threads in - - let self = - { - stop; - config; - send_threads = [||]; - bq = q; - mcond = Util_thread.MCond.create (); - } - in - - Util_thread.MCond.wakeup_from_bq self.mcond q; - - self.send_threads <- - Array.init n_send_threads (fun _i -> - Util_thread.start_bg_thread (fun () -> bg_thread_loop self)); - - self - - let create ~stop ~config () : Consumer.any_resource_builder = - { - start_consuming = - (fun q -> - let st = create_state ~stop ~config ~q () in - to_consumer st); - } + let[@inline] spawn f = ignore (Util_thread.start_bg_thread f : Thread.t) + end end +module Httpc : Generic_http_consumer.HTTPC with module IO = IO = struct + module IO = IO + + type t = Curl.t + + let create () = Ezcurl.make () + + let cleanup = Ezcurl.delete + + let send (self : t) ~url ~decode (bod : string) : ('a, error) result = + let r = + let headers = + ("Content-Type", "application/x-protobuf") + :: ("Accept", "application/x-protobuf") + :: Config.Env.get_headers () + in + Ezcurl.post ~client:self ~headers ~params:[] ~url ~content:(`String bod) + () + in + match r with + | Error (code, msg) -> + let err = + `Failure + (spf + "sending signals via http POST failed:\n\ + \ %s\n\ + \ curl code: %s\n\ + \ url: %s\n\ + %!" + msg (Curl.strerror code) url) + in + Error err + | Ok { code; body; _ } when code >= 200 && code < 300 -> + (match decode with + | `Ret x -> Ok x + | `Dec f -> + let dec = Pbrt.Decoder.of_string body in + (try Ok (f dec) + with e -> + let bt = Printexc.get_backtrace () in + Error + (`Failure + (spf "decoding failed with:\n%s\n%s" (Printexc.to_string e) bt)))) + | Ok { code; body; _ } -> + let err = Export_error.decode_invalid_http_response ~url ~code body in + Error err +end + +module Consumer_impl = Generic_http_consumer.Make (IO) (Notifier) (Httpc) + let consumer ?(stop = Atomic.make false) ?(config = Config.make ()) () : Opentelemetry_client.Consumer.any_resource_builder = - Consumer_impl.create ~stop ~config () + let n_workers = max 2 (min 32 config.bg_threads) in + let ticker_task = + if config.ticker_thread then + Some (float config.ticker_interval_ms /. 1000.) + else + None + in + Consumer_impl.consumer ~override_n_workers:n_workers ~ticker_task ~stop + ~config:config.common () let create_exporter ?stop ?(config = Config.make ()) () : OTEL.Exporter.t = let consumer = consumer ?stop ~config () in From 9e839befa45199f01e024218108dcb0c8a65bdbe Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 23:30:44 -0500 Subject: [PATCH 73/94] improve notifier_lwt --- src/client/lwt/notifier_lwt.ml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/client/lwt/notifier_lwt.ml b/src/client/lwt/notifier_lwt.ml index 88fd366e..83d8dfbe 100644 --- a/src/client/lwt/notifier_lwt.ml +++ b/src/client/lwt/notifier_lwt.ml @@ -6,6 +6,7 @@ type t = { notified: bool Atomic.t; cond: unit Lwt_condition.t; notification: int; + lwt_tid: int; (** thread ID where lwt runs *) deleted: bool Atomic.t; } @@ -17,14 +18,20 @@ let create () : t = Atomic.set notified false; Lwt_condition.broadcast cond ()) in - { notified; notification; cond; deleted = Atomic.make false } + let lwt_tid = Thread.id @@ Thread.self () in + { notified; notification; cond; lwt_tid; deleted = Atomic.make false } let delete self : unit = if not (Atomic.exchange self.deleted true) then Lwt_unix.stop_notification self.notification let trigger (self : t) : unit = - if not (Atomic.exchange self.notified true) then + let tid = Thread.id @@ Thread.self () in + + if tid = self.lwt_tid then + (* in lwt thread, directly use the condition *) + Lwt_condition.broadcast self.cond () + else if not (Atomic.exchange self.notified true) then Lwt_unix.send_notification self.notification let wait (self : t) : unit Lwt.t = Lwt_condition.wait self.cond From 97011b56933847e6b7d986e9ddeaa36298380fa2 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 23:31:15 -0500 Subject: [PATCH 74/94] move Util_thread.MCond to Notifier_sync --- src/client/notifier_sync.ml | 19 ++++++++++++++++--- src/client/util_thread.ml | 20 -------------------- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/src/client/notifier_sync.ml b/src/client/notifier_sync.ml index 4ce44bb8..d418cb59 100644 --- a/src/client/notifier_sync.ml +++ b/src/client/notifier_sync.ml @@ -1,8 +1,21 @@ -include Util_thread.MCond module IO = Generic_io.Direct_style +type t = { + mutex: Mutex.t; + cond: Condition.t; +} + +let create () : t = { mutex = Mutex.create (); cond = Condition.create () } + +let trigger self = Condition.signal self.cond + let delete = ignore -let trigger = signal +let[@inline] protect self f = Util_mutex.protect self.mutex f -let register_bounded_queue = wakeup_from_bq +(** NOTE: the mutex must be acquired *) +let wait self = Condition.wait self.cond self.mutex + +(** Ensure we get signalled when the queue goes from empty to non-empty *) +let register_bounded_queue (self : t) (bq : _ Bounded_queue.t) : unit = + Bounded_queue.on_non_empty bq (fun () -> trigger self) diff --git a/src/client/util_thread.ml b/src/client/util_thread.ml index 37764ac3..75479688 100644 --- a/src/client/util_thread.ml +++ b/src/client/util_thread.ml @@ -42,23 +42,3 @@ let setup_ticker_thread ~stop ~sleep_ms (exp : OTEL.Exporter.t) () = (Printexc.to_string exn) in start_bg_thread tick_loop - -module MCond = struct - type t = { - mutex: Mutex.t; - cond: Condition.t; - } - - let create () : t = { mutex = Mutex.create (); cond = Condition.create () } - - let signal self = Condition.signal self.cond - - let[@inline] protect self f = Util_mutex.protect self.mutex f - - (** NOTE: the mutex must be acquired *) - let wait self = Condition.wait self.cond self.mutex - - (** Ensure we get signalled when the queue goes from empty to non-empty *) - let wakeup_from_bq (self : t) (bq : _ Bounded_queue.t) : unit = - Bounded_queue.on_non_empty bq (fun () -> signal self) -end From ad158b15da5f5b24473580c97703980034fdce7e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 23:33:32 -0500 Subject: [PATCH 75/94] todo --- src/client/generic_http_consumer.ml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/client/generic_http_consumer.ml b/src/client/generic_http_consumer.ml index f9c3ba84..76823810 100644 --- a/src/client/generic_http_consumer.ml +++ b/src/client/generic_http_consumer.ml @@ -5,6 +5,34 @@ type error = Export_error.t (** Number of errors met during export *) let n_errors = Atomic.make 0 +(* TODO: put this somewhere with an interval limiter to 30s + + (* there is a possible race condition here, as several threads might update + metrics at the same time. But that's harmless. *) + if add_own_metrics then ( + Atomic.set last_sent_metrics now; + let open OT.Metrics in + [ + make_resource_metrics + [ + sum ~name:"otel.export.dropped" ~is_monotonic:true + [ + int + ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) + ~now:(Mtime.to_uint64_ns now) (Atomic.get n_dropped); + ]; + sum ~name:"otel.export.errors" ~is_monotonic:true + [ + int + ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) + ~now:(Mtime.to_uint64_ns now) (Atomic.get n_errors); + ]; + ]; + ] + ) else + [] +*) + module type IO = Generic_io.S_WITH_CONCURRENCY module type HTTPC = sig From eb651f11973150e55957766aa8a01865647d4539 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 23:33:34 -0500 Subject: [PATCH 76/94] refactor eio client --- .../opentelemetry_client_cohttp_eio.ml | 511 +++++------------- .../opentelemetry_client_cohttp_eio.mli | 29 +- 2 files changed, 159 insertions(+), 381 deletions(-) diff --git a/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml b/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml index 6a182963..14e904b6 100644 --- a/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml +++ b/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml @@ -5,13 +5,9 @@ open Eio.Std https://github.com/open-telemetry/oteps/blob/main/text/0099-otlp-http.md *) -module OT = Opentelemetry module Config = Config -module Signal = Opentelemetry_client.Signal -module Batch = Opentelemetry_client.Batch open Opentelemetry - -let ( let@ ) = ( @@ ) +open Opentelemetry_client let spf = Printf.sprintf @@ -19,413 +15,178 @@ let set_headers = Config.Env.set_headers let get_headers = Config.Env.get_headers -let needs_gc_metrics = Atomic.make false +module Make (CTX : sig + val sw : Eio.Switch.t -let last_gc_metrics = Atomic.make (Mtime_clock.now ()) + val env : Eio_unix.Stdenv.base +end) = +struct + module IO : Generic_io.S_WITH_CONCURRENCY with type 'a t = 'a = struct + include Generic_io.Direct_style -let timeout_gc_metrics = Mtime.Span.(20 * s) + (* NOTE: This is only used in the main consumer thread, even though producers + might be in other domains *) -type error = - [ `Status of int * Opentelemetry.Proto.Status.status - | `Failure of string - | `Sysbreak - ] + let sleep_s n = Eio.Time.sleep CTX.env#clock n -let n_errors = Atomic.make 0 + let spawn f = Eio.Fiber.fork ~sw:CTX.sw f + end -let n_dropped = Atomic.make 0 + module Notifier : Generic_notifier.S with module IO = IO = struct + module IO = IO -let report_err_ = function - | `Sysbreak -> Printf.eprintf "opentelemetry: ctrl-c captured, stopping\n%!" - | `Failure msg -> - Format.eprintf "@[<2>opentelemetry: export failed: %s@]@." msg - | `Status - ( code, - { - Opentelemetry.Proto.Status.code = scode; - message; - details; - _presence = _; - } ) -> - let pp_details out l = - List.iter - (fun s -> Format.fprintf out "%S;@ " (Bytes.unsafe_to_string s)) - l - in - Format.eprintf - "@[<2>opentelemetry: export failed with@ http code=%d@ status \ - {@[code=%ld;@ message=%S;@ details=[@[%a@]]@]}@]@." - code scode - (Bytes.unsafe_to_string message) - pp_details details + type t = { + mutex: Eio.Mutex.t; + cond: Eio.Condition.t; + } -module Httpc : sig - type t + let create () : t = + { mutex = Eio.Mutex.create (); cond = Eio.Condition.create () } - val create : _ Eio.Net.t -> t + let trigger self = + (* FIXME: this might be triggered from other threads!! how do we + ensure it runs in the Eio thread? *) + Eio.Condition.broadcast self.cond - val send : - t -> - url:string -> - decode:[ `Dec of Pbrt.Decoder.t -> 'a | `Ret of 'a ] -> - string -> - ('a, error) result -end = struct - open Opentelemetry.Proto - module Httpc = Cohttp_eio.Client + let delete = ignore - type t = Httpc.t + (** NOTE: the mutex must be acquired *) + let wait self = Eio.Condition.await self.cond self.mutex - let authenticator = - match Ca_certs.authenticator () with - | Ok x -> x - | Error (`Msg m) -> - Fmt.failwith "Failed to create system store X509 authenticator: %s" m + (** Ensure we get signalled when the queue goes from empty to non-empty *) + let register_bounded_queue (self : t) (bq : _ Bounded_queue.t) : unit = + Bounded_queue.on_non_empty bq (fun () -> trigger self) + end - let https ~authenticator = - let tls_config = - match Tls.Config.client ~authenticator () with - | Error (`Msg msg) -> failwith ("tls configuration problem: " ^ msg) - | Ok tls_config -> tls_config - in - fun uri raw -> - let host = - Uri.host uri - |> Option.map (fun x -> Domain_name.(host_exn (of_string_exn x))) + module Httpc : Generic_http_consumer.HTTPC with module IO = IO = struct + module IO = IO + open Opentelemetry.Proto + module Httpc = Cohttp_eio.Client + + type t = Httpc.t + + let authenticator = + match Ca_certs.authenticator () with + | Ok x -> x + | Error (`Msg m) -> + Fmt.failwith "Failed to create system store X509 authenticator: %s" m + + let https ~authenticator = + let tls_config = + match Tls.Config.client ~authenticator () with + | Error (`Msg msg) -> failwith ("tls configuration problem: " ^ msg) + | Ok tls_config -> tls_config in - Tls_eio.client_of_flow ?host tls_config raw + fun uri raw -> + let host = + Uri.host uri + |> Option.map (fun x -> Domain_name.(host_exn (of_string_exn x))) + in + Tls_eio.client_of_flow ?host tls_config raw - let create net = Httpc.make ~https:(Some (https ~authenticator)) net + let create () = Httpc.make ~https:(Some (https ~authenticator)) CTX.env#net - (* send the content to the remote endpoint/path *) - let send (client : t) ~url ~decode (body : string) : ('a, error) result = - Switch.run @@ fun sw -> - let uri = Uri.of_string url in + let cleanup = ignore - let open Cohttp in - let headers = Header.(add_list (init ()) (Config.Env.get_headers ())) in - let headers = - Header.(add headers "Content-Type" "application/x-protobuf") - in + (* send the content to the remote endpoint/path *) + let send (client : t) ~url ~decode (body : string) : + ('a, Export_error.t) result = + Switch.run @@ fun sw -> + let uri = Uri.of_string url in - let body = Cohttp_eio.Body.of_string body in - let r = - try - let r = Httpc.post client ~sw ~headers ~body uri in - Ok r - with e -> Error e - in - match r with - | Error e -> - let err = - `Failure - (spf "sending signals via http POST to %S\nfailed with:\n%s" url - (Printexc.to_string e)) + let open Cohttp in + let headers = Header.(add_list (init ()) (Config.Env.get_headers ())) in + let headers = + Header.(add headers "Content-Type" "application/x-protobuf") in - Error err - | Ok (resp, body) -> - let body = Eio.Buf_read.(parse_exn take_all) body ~max_size:max_int in - let code = Response.status resp |> Code.code_of_status in - if not (Code.is_error code) then ( - match decode with - | `Ret x -> Ok x - | `Dec f -> + + let body = Cohttp_eio.Body.of_string body in + let r = + try + let r = Httpc.post client ~sw ~headers ~body uri in + Ok r + with e -> Error e + in + match r with + | Error e -> + let err = + `Failure + (spf "sending signals via http POST to %S\nfailed with:\n%s" url + (Printexc.to_string e)) + in + Error err + | Ok (resp, body) -> + let body = Eio.Buf_read.(parse_exn take_all) body ~max_size:max_int in + let code = Response.status resp |> Code.code_of_status in + if not (Code.is_error code) then ( + match decode with + | `Ret x -> Ok x + | `Dec f -> + let dec = Pbrt.Decoder.of_string body in + let r = + try Ok (f dec) + with e -> + let bt = Printexc.get_backtrace () in + Error + (`Failure + (spf "decoding failed with:\n%s\n%s" (Printexc.to_string e) + bt)) + in + r + ) else ( let dec = Pbrt.Decoder.of_string body in + let r = - try Ok (f dec) + try + let status = Status.decode_pb_status dec in + Error (`Status (code, status)) with e -> let bt = Printexc.get_backtrace () in Error (`Failure - (spf "decoding failed with:\n%s\n%s" (Printexc.to_string e) - bt)) + (spf + "httpc: decoding of status (url=%S, code=%d) failed with:\n\ + %s\n\ + status: %S\n\ + %s" + url code (Printexc.to_string e) body bt)) in r - ) else ( - let dec = Pbrt.Decoder.of_string body in - - let r = - try - let status = Status.decode_pb_status dec in - Error (`Status (code, status)) - with e -> - let bt = Printexc.get_backtrace () in - Error - (`Failure - (spf - "httpc: decoding of status (url=%S, code=%d) failed with:\n\ - %s\n\ - status: %S\n\ - %s" - url code (Printexc.to_string e) body bt)) - in - r - ) + ) + end end -(** An emitter. This is used by {!Backend} below to forward traces/metrics/… - from the program to whatever collector client we have. *) -module type EMITTER = sig - open Opentelemetry.Proto +let create_consumer ?(stop = Atomic.make false) ?(config = Config.make ()) ~sw + ~env () : Consumer.any_resource_builder = + let module M = Make (struct + let sw = sw - val push_trace : Trace.resource_spans list -> unit + let env = env + end) in + let module C = Generic_http_consumer.Make (M.IO) (M.Notifier) (M.Httpc) in + C.consumer ~ticker_task:(Some 0.5) ~stop ~config () - val push_metrics : Metrics.resource_metrics list -> unit +let create_exporter ?stop ?(config = Config.make ()) ~sw ~env () = + let consumer = create_consumer ?stop ~config ~sw ~env () in + let bq = + Bounded_queue_sync.create + ~high_watermark:Bounded_queue.Defaults.high_watermark () + in + Exporter_queued.create ~q:bq ~consumer () + |> Exporter_add_batching.add_batching ~config - val push_logs : Logs.resource_logs list -> unit - - val set_on_tick_callbacks : (unit -> unit) Alist.t -> unit - - val tick : unit -> unit - - val cleanup : on_done:(unit -> unit) -> unit -> unit -end - -(* make an emitter. - - exceptions inside should be caught, see - https://opentelemetry.io/docs/reference/specification/error-handling/ *) -let mk_emitter ~stop ~net (config : Config.t) : (module EMITTER) = - (* local helpers *) - let open struct - let client = - (* Prime RNG state for TLS *) - Mirage_crypto_rng_unix.use_default (); - Httpc.create net - - let send_http ~url data : unit = - let r = Httpc.send client ~url ~decode:(`Ret ()) data in - match r with - | Ok () -> () - | Error `Sysbreak -> - Printf.eprintf "ctrl-c captured, stopping\n%!"; - Atomic.set stop true - | Error err -> - (* TODO: log error _via_ otel? *) - Atomic.incr n_errors; - report_err_ err; - (* avoid crazy error loop *) - Eio_unix.sleep 3. - - let timeout = - if config.batch_timeout_ms > 0 then - Some Mtime.Span.(config.batch_timeout_ms * ms) - else - None - - let batch_traces : Proto.Trace.resource_spans Batch.t = - Batch.make ?batch:config.batch_traces ?timeout () - - let batch_metrics : Proto.Metrics.resource_metrics Batch.t = - Batch.make ?batch:config.batch_metrics ?timeout () - - let batch_logs : Proto.Logs.resource_logs Batch.t = - Batch.make ?batch:config.batch_logs ?timeout () - - let push_to_batch b e = - match Batch.push b e with - | `Ok -> () - | `Dropped -> Atomic.incr n_errors - - let[@inline] guard_exn_ where f = - try f () - with e -> - let bt = Printexc.get_backtrace () in - Printf.eprintf "opentelemetry-eio: uncaught exception in %s: %s\n%s\n%!" - where (Printexc.to_string e) bt - - let push_traces x = - let@ () = guard_exn_ "push trace" in - push_to_batch batch_traces x - - let push_metrics x = - let@ () = guard_exn_ "push metrics" in - push_to_batch batch_metrics x - - let push_logs x = - let@ () = guard_exn_ "push logs" in - push_to_batch batch_logs x - - let maybe_emit (batch : 'a Batch.t) url (f : 'a list -> string) ~now ~force - () : unit = - Batch.pop_if_ready ~force ~now batch - |> Option.iter (fun signals -> f signals |> send_http ~url) - - let emit_traces_maybe = - maybe_emit batch_traces config.url_traces Signal.Encode.traces - - let emit_metrics_maybe = - maybe_emit batch_metrics config.url_metrics (fun collected_metrics -> - collected_metrics |> Signal.Encode.metrics) - - let emit_logs_maybe = - maybe_emit batch_logs config.url_logs Signal.Encode.logs - - let emit_all ~force : unit = - Switch.run @@ fun sw -> - let now = Mtime_clock.now () in - Fiber.fork ~sw @@ emit_logs_maybe ~now ~force; - Fiber.fork ~sw @@ emit_metrics_maybe ~now ~force; - Fiber.fork ~sw @@ emit_traces_maybe ~now ~force - - let on_tick_cbs_ = Atomic.make (Alist.make ()) - - let run_tick_callbacks () = - List.iter - (fun f -> - try f () - with e -> - Printf.eprintf "on tick callback raised: %s\n" - (Printexc.to_string e)) - (Alist.get @@ Atomic.get on_tick_cbs_) - end in - let module M = struct - let set_on_tick_callbacks = Atomic.set on_tick_cbs_ - - let push_trace e = push_traces e - - let push_metrics e = push_metrics e - - let push_logs e = push_logs e - - let tick () = - if Config.Env.get_debug () then - Printf.eprintf "tick (from domain %d)\n%!" (Domain.self () :> int); - run_tick_callbacks (); - emit_all ~force:false - - let cleanup ~on_done () = - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: exiting…\n%!"; - Atomic.set stop true; - run_tick_callbacks (); - emit_all ~force:true; - on_done () - end in - (module M : EMITTER) - -module Backend (Emitter : EMITTER) : Opentelemetry.Exporter.t = struct - open Opentelemetry.Proto - open Opentelemetry.Collector - open Emitter - - let send_trace : Trace.resource_spans list sender = - { - send = - (fun l ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send spans %a@." - (Format.pp_print_list Trace.pp_resource_spans) - l); - push_trace l; - ret ()); - } - - let last_sent_metrics = Atomic.make (Mtime_clock.now ()) - - let timeout_sent_metrics = Mtime.Span.(5 * s) - (* send metrics from time to time *) - - let signal_emit_gc_metrics () = - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: emit GC metrics requested\n%!"; - Atomic.set needs_gc_metrics true - - let additional_metrics () : Metrics.resource_metrics list = - (* add exporter metrics to the lot? *) - let last_emit = Atomic.get last_sent_metrics in - let now = Mtime_clock.now () in - let add_own_metrics = - let elapsed = Mtime.span last_emit now in - Mtime.Span.compare elapsed timeout_sent_metrics > 0 - in - - (* there is a possible race condition here, as several threads might update - metrics at the same time. But that's harmless. *) - if add_own_metrics then ( - Atomic.set last_sent_metrics now; - let open OT.Metrics in - [ - make_resource_metrics - [ - sum ~name:"otel.export.dropped" ~is_monotonic:true - [ - int - ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) - ~now:(Mtime.to_uint64_ns now) (Atomic.get n_dropped); - ]; - sum ~name:"otel.export.errors" ~is_monotonic:true - [ - int - ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) - ~now:(Mtime.to_uint64_ns now) (Atomic.get n_errors); - ]; - ]; - ] - ) else - [] - - let send_metrics : Metrics.resource_metrics list sender = - { - send = - (fun m ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send metrics %a@." - (Format.pp_print_list Metrics.pp_resource_metrics) - m); - - let m = List.rev_append (additional_metrics ()) m in - push_metrics m; - ret ()); - } - - let send_logs : Logs.resource_logs list sender = - { - send = - (fun m ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send logs %a@." - (Format.pp_print_list Logs.pp_resource_logs) - m); - push_logs m; - ret ()); - } - - let tick = Emitter.tick - - let cleanup = Emitter.cleanup - - let set_on_tick_callbacks = Emitter.set_on_tick_callbacks -end - -let create_backend ~sw ?(stop = Atomic.make false) ?(config = Config.make ()) - env : (module OT.Collector.BACKEND) = - let module E = (val mk_emitter ~stop ~net:env#net config) in - let module B = Backend (E) in - (* Run a background fiber to keep the backend ticking regularly. - - NOTE: This cannot be located inside the [Backend], because switches - are not thread safe, and cannot be used accross domains, but the - backend is accessed across domains. *) - Eio.Fiber.fork ~sw (fun () -> - while not @@ Atomic.get stop do - Eio.Time.sleep env#clock 0.5; - B.tick () - done); - - (module B) +let create_backend = create_exporter let setup_ ~sw ?stop ?config env : unit = - let backend = create_backend ?stop ?config ~sw env in - OT.Collector.set_backend backend + let backend = create_backend ?stop ?config ~sw ~env () in + Main_exporter.set backend let setup ?stop ?config ?(enable = true) ~sw env = if enable then setup_ ~sw ?stop ?config env -let remove_backend () = OT.Collector.remove_backend ~on_done:ignore () +let remove_exporter () = Main_exporter.remove ~on_done:ignore () + +let remove_backend = remove_exporter let with_setup ?stop ?config ?(enable = true) f env = if enable then diff --git a/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.mli b/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.mli index f88ddef5..e3ccbe4e 100644 --- a/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.mli +++ b/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.mli @@ -10,15 +10,32 @@ val set_headers : (string * string) list -> unit module Config = Config -val create_backend : - sw:Eio.Switch.t -> +val create_consumer : ?stop:bool Atomic.t -> ?config:Config.t -> - Eio_unix.Stdenv.base -> - (module Opentelemetry.Collector.BACKEND) -(** Create a new backend using Cohttp_eio + sw:Eio.Switch.t -> + env:Eio_unix.Stdenv.base -> + unit -> + Opentelemetry_client.Consumer.any_resource_builder +(** Consumer that pulls from a queue *) - NOTE [after_cleanup] optional parameter removed @since 0.12 *) +val create_exporter : + ?stop:bool Atomic.t -> + ?config:Config.t -> + sw:Eio.Switch.t -> + env:Eio_unix.Stdenv.base -> + unit -> + Opentelemetry.Exporter.t +(** NOTE [after_cleanup] optional parameter removed @since 0.12 *) + +val create_backend : + ?stop:bool Atomic.t -> + ?config:Config.t -> + sw:Eio.Switch.t -> + env:Eio_unix.Stdenv.base -> + unit -> + Opentelemetry.Exporter.t +[@@deprecated "use create_exporter"] val setup : ?stop:bool Atomic.t -> From 40c6bec30caf20b6807c5e9564231a89c5000f0e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 23:34:52 -0500 Subject: [PATCH 77/94] update opam files --- opentelemetry-client-cohttp-eio.opam | 1 + opentelemetry-client-cohttp-lwt.opam | 1 + opentelemetry-client-ocurl-lwt.opam | 1 + opentelemetry-client-ocurl.opam | 1 + opentelemetry-client.opam | 39 ++++++++++++++++++++++++++++ opentelemetry-logs.opam | 2 +- opentelemetry.opam | 4 +-- 7 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 opentelemetry-client.opam diff --git a/opentelemetry-client-cohttp-eio.opam b/opentelemetry-client-cohttp-eio.opam index b19ae6b5..bc651d2c 100644 --- a/opentelemetry-client-cohttp-eio.opam +++ b/opentelemetry-client-cohttp-eio.opam @@ -18,6 +18,7 @@ depends: [ "ca-certs" "mirage-crypto-rng-eio" "opentelemetry" {= version} + "opentelemetry-client" {= version} "odoc" {with-doc} "cohttp-eio" {>= "6.1.0"} "eio_main" {with-test} diff --git a/opentelemetry-client-cohttp-lwt.opam b/opentelemetry-client-cohttp-lwt.opam index a486137f..7c9b7894 100644 --- a/opentelemetry-client-cohttp-lwt.opam +++ b/opentelemetry-client-cohttp-lwt.opam @@ -16,6 +16,7 @@ depends: [ "ocaml" {>= "4.08"} "mtime" {>= "1.4"} "opentelemetry" {= version} + "opentelemetry-client" {= version} "odoc" {with-doc} "lwt" {>= "5.3"} "lwt_ppx" {>= "2.0"} diff --git a/opentelemetry-client-ocurl-lwt.opam b/opentelemetry-client-ocurl-lwt.opam index 263582b0..1abeacf5 100644 --- a/opentelemetry-client-ocurl-lwt.opam +++ b/opentelemetry-client-ocurl-lwt.opam @@ -16,6 +16,7 @@ depends: [ "ocaml" {>= "4.08"} "mtime" {>= "1.4"} "opentelemetry" {= version} + "opentelemetry-client" {= version} "odoc" {with-doc} "ezcurl-lwt" {>= "0.2.3"} "ocurl" diff --git a/opentelemetry-client-ocurl.opam b/opentelemetry-client-ocurl.opam index de605919..12b01e4e 100644 --- a/opentelemetry-client-ocurl.opam +++ b/opentelemetry-client-ocurl.opam @@ -16,6 +16,7 @@ depends: [ "ocaml" {>= "4.08"} "mtime" {>= "1.4"} "opentelemetry" {= version} + "opentelemetry-client" {= version} "odoc" {with-doc} "ezcurl" {>= "0.2.3"} "ocurl" diff --git a/opentelemetry-client.opam b/opentelemetry-client.opam new file mode 100644 index 00000000..c646e1b9 --- /dev/null +++ b/opentelemetry-client.opam @@ -0,0 +1,39 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +version: "0.12" +synopsis: "Client SDK for https://opentelemetry.io" +maintainer: [ + "Simon Cruanes " + "Matt Bray " + "ELLIOTTCABLE " +] +authors: ["the Imandra team and contributors"] +license: "MIT" +tags: ["tracing" "opentelemetry" "sdk"] +homepage: "https://github.com/imandra-ai/ocaml-opentelemetry" +bug-reports: "https://github.com/imandra-ai/ocaml-opentelemetry/issues" +depends: [ + "dune" {>= "2.9"} + "opentelemetry" {= version} + "odoc" {with-doc} + "alcotest" {with-test} + "saturn" {>= "1.0" & < "2.0"} + "thread-local-storage" {>= "0.2" & < "0.3"} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "--promote-install-files=false" + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] + ["dune" "install" "-p" name "--create-install-files" name] +] +dev-repo: "git+https://github.com/imandra-ai/ocaml-opentelemetry.git" diff --git a/opentelemetry-logs.opam b/opentelemetry-logs.opam index f3a5c4ee..90a192fa 100644 --- a/opentelemetry-logs.opam +++ b/opentelemetry-logs.opam @@ -1,7 +1,7 @@ # This file is generated by dune, edit dune-project instead opam-version: "2.0" version: "0.12" -synopsis: "Opentelemetry tracing for Cohttp HTTP servers" +synopsis: "Opentelemetry-based reporter for Logs" maintainer: [ "Simon Cruanes " "Matt Bray " diff --git a/opentelemetry.opam b/opentelemetry.opam index 7e3128d9..5d2d150c 100644 --- a/opentelemetry.opam +++ b/opentelemetry.opam @@ -17,8 +17,6 @@ depends: [ "ocaml" {>= "4.08"} "ptime" "hmap" - "atomic" - "thread-local-storage" {>= "0.2" & < "0.3"} "odoc" {with-doc} "alcotest" {with-test} "pbrt" {>= "4.0" & < "5.0"} @@ -26,7 +24,7 @@ depends: [ "ocamlformat" {with-dev-setup & >= "0.27" & < "0.28"} "mtime" {>= "1.4"} ] -depopts: ["trace" "lwt" "eio"] +depopts: ["atomic" "trace" "thread-local-storage" "lwt" "eio"] conflicts: [ "trace" {< "0.10"} ] From 86d44416ff487ee3570b3bc6e3d3b1673337cee7 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 23:40:47 -0500 Subject: [PATCH 78/94] wip: fix tests --- tests/bin/dune | 6 +++--- tests/bin/emit1_eio.ml | 23 +++++++++++++---------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/tests/bin/dune b/tests/bin/dune index 475f7c04..0c211cb0 100644 --- a/tests/bin/dune +++ b/tests/bin/dune @@ -4,7 +4,7 @@ (libraries unix opentelemetry - opentelemetry.client + opentelemetry-client opentelemetry-client-ocurl)) (executable @@ -16,7 +16,7 @@ unix opentelemetry opentelemetry-lwt - opentelemetry.client + opentelemetry-client opentelemetry-client-cohttp-lwt lwt.unix)) @@ -32,7 +32,7 @@ logs.fmt logs.threaded opentelemetry - opentelemetry.client + opentelemetry-client opentelemetry-client-cohttp-eio)) (executable diff --git a/tests/bin/emit1_eio.ml b/tests/bin/emit1_eio.ml index 9990b227..2cba3203 100644 --- a/tests/bin/emit1_eio.ml +++ b/tests/bin/emit1_eio.ml @@ -23,9 +23,10 @@ let num_tr = Atomic.make 0 let i = Atomic.make 0 let run_job clock _job_id iterations : unit = + let tracer = OT.Tracer.get_main () in let@ scope = Atomic.incr num_tr; - OT.Trace.with_ ~kind:OT.Span.Span_kind_producer "loop.outer" + OT.Tracer.with_ tracer ~kind:OT.Span.Span_kind_producer "loop.outer" ~attrs:[ "i", `Int (Atomic.get i) ] in @@ -37,7 +38,7 @@ let run_job clock _job_id iterations : unit = (* parent scope is found via thread local storage *) let@ scope = Atomic.incr num_tr; - OT.Trace.with_ ~scope ~kind:OT.Span.Span_kind_internal + OT.Tracer.with_ tracer ~parent:scope ~kind:OT.Span.Span_kind_internal ~attrs:[ "j", `Int j ] "loop.inner" in @@ -45,11 +46,13 @@ let run_job clock _job_id iterations : unit = let () = Eio.Time.sleep clock !sleep_outer in Atomic.incr num_sleep; - OT.Logs.( - emit + OT.Logger.( + let logger = OT.Logger.get_main () in + OT.Emitter.emit logger [ - make_strf ~trace_id:scope.trace_id ~span_id:scope.span_id - ~severity:Severity_number_info "inner at %d" j; + OT.Log_record.make_strf ~trace_id:(OT.Span.trace_id scope) + ~span_id:(OT.Span.id scope) ~severity:Severity_number_info + "inner at %d" j; ]); Atomic.incr i; @@ -57,7 +60,8 @@ let run_job clock _job_id iterations : unit = try Atomic.incr num_tr; let@ scope = - OT.Trace.with_ ~kind:OT.Span.Span_kind_internal ~scope "alloc" + OT.Tracer.with_ tracer ~kind:OT.Span.Span_kind_internal ~parent:scope + "alloc" in (* allocate some stuff *) if !stress_alloc_ then ( @@ -71,13 +75,12 @@ let run_job clock _job_id iterations : unit = if j = 4 && Atomic.get i mod 13 = 0 then failwith "oh no"; (* simulate a failure *) - Opentelemetry.Scope.add_event scope (fun () -> - OT.Event.make "done with alloc") + OT.Span.add_event scope (OT.Event.make "done with alloc") with Failure _ -> () done let run env proc iterations () : unit = - OT.GC_metrics.basic_setup (); + OT.Gc_metrics.setup_on_main_exporter (); OT.Metrics_callbacks.register (fun () -> OT.Metrics. From fe416cfbdd9f519cdffa1a283a1b833295f2e722 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 23:51:52 -0500 Subject: [PATCH 79/94] fix notifier_sync --- src/client/notifier_sync.ml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/client/notifier_sync.ml b/src/client/notifier_sync.ml index d418cb59..0f7b6220 100644 --- a/src/client/notifier_sync.ml +++ b/src/client/notifier_sync.ml @@ -13,8 +13,10 @@ let delete = ignore let[@inline] protect self f = Util_mutex.protect self.mutex f -(** NOTE: the mutex must be acquired *) -let wait self = Condition.wait self.cond self.mutex +let wait self = + Mutex.lock self.mutex; + Condition.wait self.cond self.mutex; + Mutex.unlock self.mutex (** Ensure we get signalled when the queue goes from empty to non-empty *) let register_bounded_queue (self : t) (bq : _ Bounded_queue.t) : unit = From 68761faadb420f10d022fccb1403ce897fe845cb Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 23:51:59 -0500 Subject: [PATCH 80/94] fix metrics_callbacks' API to make it easier --- src/lib/metrics_callbacks.ml | 12 +++++++++++- src/lib/metrics_callbacks.mli | 8 ++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/lib/metrics_callbacks.ml b/src/lib/metrics_callbacks.ml index ef4e1ba9..81aa2055 100644 --- a/src/lib/metrics_callbacks.ml +++ b/src/lib/metrics_callbacks.ml @@ -22,10 +22,20 @@ let add_to_exporter (exp : Exporter.t) (self : t) = in Exporter.on_tick exp on_tick +let with_set_added_to_exporter (exp : Exporter.t) (f : t -> 'a) : 'a = + let set = create () in + add_to_exporter exp set; + f set + +let with_set_added_to_main_exporter (f : t -> unit) : unit = + match Main_exporter.get () with + | None -> () + | Some exp -> with_set_added_to_exporter exp f + module Main_set = struct let cur_set_ : t option Atomic.t = Atomic.make None - let rec get () = + let rec get () : t = match Atomic.get cur_set_ with | Some s -> s | None -> diff --git a/src/lib/metrics_callbacks.mli b/src/lib/metrics_callbacks.mli index b9db7b66..d66388ad 100644 --- a/src/lib/metrics_callbacks.mli +++ b/src/lib/metrics_callbacks.mli @@ -19,6 +19,14 @@ val add_metrics_cb : t -> (unit -> Metrics.t list) -> unit val add_to_exporter : Exporter.t -> t -> unit (** Make sure we export metrics at every [tick] of the exporter *) +val with_set_added_to_exporter : Exporter.t -> (t -> 'a) -> 'a +(** [with_set_added_to_exporter exp f] creates a set, adds it to the exporter, + and calls [f] on it *) + +val with_set_added_to_main_exporter : (t -> unit) -> unit +(** If there is a main exporter, add a set to it and call [f set], else do not + call [f] at all *) + module Main_set : sig val get : unit -> t (** The global set *) From af585e3f3dbd6f393fddaa11964cdbe52dbe9c07 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 23:52:08 -0500 Subject: [PATCH 81/94] wip: fix tests --- tests/bin/cohttp_client.ml | 13 +++++----- tests/bin/emit1.ml | 51 +++++++++++++++++++++----------------- tests/bin/emit1_eio.ml | 16 ++++++------ 3 files changed, 44 insertions(+), 36 deletions(-) diff --git a/tests/bin/cohttp_client.ml b/tests/bin/cohttp_client.ml index a4523847..f1314cf9 100644 --- a/tests/bin/cohttp_client.ml +++ b/tests/bin/cohttp_client.ml @@ -1,4 +1,4 @@ -module T = Opentelemetry +module OT = Opentelemetry module Otel_lwt = Opentelemetry_lwt let spf = Printf.sprintf @@ -10,19 +10,20 @@ let sleep_inner = ref 0.1 let sleep_outer = ref 2.0 let mk_client ~scope = - Opentelemetry_cohttp_lwt.client ~scope (module Cohttp_lwt_unix.Client) + Opentelemetry_cohttp_lwt.client ~span:scope (module Cohttp_lwt_unix.Client) let run () = let open Lwt.Syntax in + let tracer = OT.Tracer.get_main () in let rec go () = let@ scope = - Otel_lwt.Trace.with_ ~kind:T.Span.Span_kind_producer "loop.outer" + Otel_lwt.Tracer.with_ tracer ~kind:OT.Span.Span_kind_producer "loop.outer" in let* () = Lwt_unix.sleep !sleep_outer in let module C = (val mk_client ~scope) in (* Using the same default server O *) let* _res, body = - C.get (Uri.of_string Opentelemetry_client.Config.default_url) + C.get (Uri.of_string Opentelemetry_client.Client_config.default_url) in let* () = Cohttp_lwt.Body.drain_body body in go () @@ -31,8 +32,8 @@ let run () = let () = Sys.catch_break true; - T.Globals.service_name := "ocaml-otel-cohttp-client"; - T.Globals.service_namespace := Some "ocaml-otel.test"; + OT.Globals.service_name := "ocaml-otel-cohttp-client"; + OT.Globals.service_namespace := Some "ocaml-otel.test"; let debug = ref false in let batch_traces = ref 400 in diff --git a/tests/bin/emit1.ml b/tests/bin/emit1.ml index fdcdbc06..edaa63f8 100644 --- a/tests/bin/emit1.ml +++ b/tests/bin/emit1.ml @@ -1,4 +1,4 @@ -module T = Opentelemetry +module OT = Opentelemetry module Atomic = Opentelemetry_atomic.Atomic let spf = Printf.sprintf @@ -23,13 +23,14 @@ let num_tr = Atomic.make 0 let run_job () = let@ () = Fun.protect ~finally:(fun () -> Atomic.set stop true) in + let tracer = OT.Tracer.get_main () in let i = ref 0 in let cnt = ref 0 in while (not @@ Atomic.get stop) && !cnt < !n do let@ _scope = Atomic.incr num_tr; - T.Trace.with_ ~kind:T.Span.Span_kind_producer "loop.outer" + OT.Tracer.with_ tracer ~kind:OT.Span.Span_kind_producer "loop.outer" ~attrs:[ "i", `Int !i ] in @@ -40,7 +41,7 @@ let run_job () = (* parent scope is found via thread local storage *) let@ scope = Atomic.incr num_tr; - T.Trace.with_ ~kind:T.Span.Span_kind_internal + OT.Tracer.with_ tracer ~kind:OT.Span.Span_kind_internal ~attrs:[ "j", `Int j ] "loop.inner" in @@ -48,18 +49,22 @@ let run_job () = Unix.sleepf !sleep_outer; Atomic.incr num_sleep; - T.Logs.( - emit - [ - make_strf ~trace_id:scope.trace_id ~span_id:scope.span_id - ~severity:Severity_number_info "inner at %d" j; - ]); + let logger = OT.Logger.get_main () in + OT.Emitter.emit logger + [ + OT.Log_record.make_strf ~trace_id:(OT.Span.trace_id scope) + ~span_id:(OT.Span.id scope) ~severity:Severity_number_info + "inner at %d" j; + ]; incr i; try Atomic.incr num_tr; - let@ _ = T.Trace.with_ ~kind:T.Span.Span_kind_internal ~scope "alloc" in + let@ _ = + OT.Tracer.with_ tracer ~kind:OT.Span.Span_kind_internal ~parent:scope + "alloc" + in (* allocate some stuff *) if !stress_alloc_ then ( let _arr = Sys.opaque_identity @@ Array.make (25 * 25551) 42.0 in @@ -72,23 +77,23 @@ let run_job () = if j = 4 && !i mod 13 = 0 then failwith "oh no"; (* simulate a failure *) - Opentelemetry.Scope.add_event scope (fun () -> - T.Event.make "done with alloc") + OT.Span.add_event scope (OT.Event.make "done with alloc") with Failure _ -> () done done let run () = - T.GC_metrics.basic_setup (); + OT.Gc_metrics.setup_on_main_exporter (); - T.Metrics_callbacks.register (fun () -> - T.Metrics. - [ - sum ~name:"num-sleep" ~is_monotonic:true - [ int (Atomic.get num_sleep) ]; - sum ~name:"otel.bytes-sent" ~is_monotonic:true ~unit_:"B" - [ int (Opentelemetry_client_ocurl.n_bytes_sent ()) ]; - ]); + OT.Metrics_callbacks.with_set_added_to_main_exporter (fun set -> + OT.Metrics_callbacks.add_metrics_cb set (fun () -> + OT.Metrics. + [ + sum ~name:"num-sleep" ~is_monotonic:true + [ int (Atomic.get num_sleep) ]; + sum ~name:"otel.bytes-sent" ~is_monotonic:true ~unit_:"B" + [ int (Opentelemetry_client_ocurl.n_bytes_sent ()) ]; + ])); let n_jobs = max 1 !n_jobs in Printf.printf "run %d jobs\n%!" n_jobs; @@ -101,8 +106,8 @@ let run () = Array.iter Thread.join jobs let () = - T.Globals.service_name := "t1"; - T.Globals.service_namespace := Some "ocaml-otel.test"; + OT.Globals.service_name := "t1"; + OT.Globals.service_namespace := Some "ocaml-otel.test"; let ts_start = Unix.gettimeofday () in let debug = ref false in diff --git a/tests/bin/emit1_eio.ml b/tests/bin/emit1_eio.ml index 2cba3203..f199ac23 100644 --- a/tests/bin/emit1_eio.ml +++ b/tests/bin/emit1_eio.ml @@ -82,12 +82,14 @@ let run_job clock _job_id iterations : unit = let run env proc iterations () : unit = OT.Gc_metrics.setup_on_main_exporter (); - OT.Metrics_callbacks.register (fun () -> - OT.Metrics. - [ - sum ~name:"num-sleep" ~is_monotonic:true - [ int (Atomic.get num_sleep) ]; - ]); + OT.Metrics_callbacks.( + with_set_added_to_main_exporter (fun set -> + add_metrics_cb set (fun () -> + OT.Metrics. + [ + sum ~name:"num-sleep" ~is_monotonic:true + [ int (Atomic.get num_sleep) ]; + ]))); let n_jobs = max 1 !n_jobs in Printf.printf "run %d jobs in proc %d\n%!" n_jobs proc; @@ -172,4 +174,4 @@ let () = Eio.Fiber.fork ~sw @@ fun () -> Eio.Domain_manager.run dm (run env proc !n_iterations) done)); - Opentelemetry.Collector.remove_backend () ~on_done:ignore + Opentelemetry.Main_exporter.remove () ~on_done:ignore From 5a32d747470a7d0556624fcc740c574d213817ab Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 6 Dec 2025 16:26:55 -0500 Subject: [PATCH 82/94] fix dune files --- tests/client/dune | 2 +- tests/client_e2e/dune | 6 +++--- tests/core/dune | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/client/dune b/tests/client/dune index a8c9c961..3eb23bf1 100644 --- a/tests/client/dune +++ b/tests/client/dune @@ -1,4 +1,4 @@ (tests (names test_client_lib) (package opentelemetry) - (libraries alcotest opentelemetry.client)) + (libraries alcotest opentelemetry-client)) diff --git a/tests/client_e2e/dune b/tests/client_e2e/dune index 8952b1a7..1e6415a0 100644 --- a/tests/client_e2e/dune +++ b/tests/client_e2e/dune @@ -20,7 +20,7 @@ containers logs.fmt logs.threaded - opentelemetry.client)) + opentelemetry-client)) (library (name clients_e2e_lib) @@ -34,7 +34,7 @@ (enabled_if (>= %{ocaml_version} 5.0)) (deps %{bin:emit1_cohttp}) - (libraries clients_e2e_lib alcotest opentelemetry opentelemetry.client)) + (libraries clients_e2e_lib alcotest opentelemetry opentelemetry-client)) (tests (names test_cottp_eio_client_e2e) @@ -43,7 +43,7 @@ (deps %{bin:emit1_eio}) (enabled_if (>= %{ocaml_version} 5.0)) - (libraries clients_e2e_lib alcotest opentelemetry opentelemetry.client)) + (libraries clients_e2e_lib alcotest opentelemetry opentelemetry-client)) (executable (name signal_reporter_server) diff --git a/tests/core/dune b/tests/core/dune index 77d2b02f..cbdd974f 100644 --- a/tests/core/dune +++ b/tests/core/dune @@ -1,4 +1,4 @@ (tests (names test_trace_context t_size) (package opentelemetry) - (libraries opentelemetry opentelemetry.client)) + (libraries opentelemetry opentelemetry-client)) From 50728fd9440fb19c4a44ed51362f30fe4952a47b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 6 Dec 2025 16:27:05 -0500 Subject: [PATCH 83/94] fix test_implicit_scope_sync --- .../sync/test_implicit_scope_sync.ml | 52 +++++++------------ 1 file changed, 19 insertions(+), 33 deletions(-) diff --git a/tests/implicit_scope/sync/test_implicit_scope_sync.ml b/tests/implicit_scope/sync/test_implicit_scope_sync.ml index d8bf632b..b4ba1503 100644 --- a/tests/implicit_scope/sync/test_implicit_scope_sync.ml +++ b/tests/implicit_scope/sync/test_implicit_scope_sync.ml @@ -1,57 +1,43 @@ open Alcotest module Otel = Opentelemetry -let spans_emitted : Otel.Proto.Trace.resource_spans list ref = ref [] +let spans_emitted : Otel.Span.t list ref = ref [] -module Test_backend = struct - open Otel.Collector - open Otel.Proto - include Noop_backend +let test_exporter : Otel.Exporter.t = + let open Otel.Exporter in + { + (dummy ()) with + emit_spans = Opentelemetry_emitter.To_list.to_list spans_emitted; + } - let record_emitted_spans (l : Trace.resource_spans list) ~ret = - spans_emitted := l @ !spans_emitted; - ret () - - let send_trace : Trace.resource_spans list sender = - { send = record_emitted_spans } -end - -let with_test_backend f = +let with_test_exporter f = (* uncomment for eprintf debugging: *) - (* let module Debug_and_test_backend = Otel.Collector.Debug_backend (Test_backend) in - let backend = (module Debug_and_test_backend : Otel.Collector.BACKEND) in *) - let backend = (module Test_backend : Otel.Collector.BACKEND) in - Otel.Collector.with_setup_debug_backend backend () f + (* let test_exporter = Opentelemetry_client.Exporter_debug.debug test_exporter in*) + Otel.Main_exporter.with_setup_debug_backend test_exporter () f -let bytes_to_hex = Otel.Util_.bytes_to_hex +let bytes_to_hex = Opentelemetry_util.Util_bytes_.bytes_to_hex let test_stack_based_implicit_scope () = let run () = - Otel.Trace.with_ "first trace" @@ fun _scope -> + let tracer = Otel.Tracer.get_main () in + Otel.Tracer.with_ tracer "first trace" @@ fun _scope -> Thread.delay 0.2; - Otel.Trace.with_ "second trace" @@ fun _scope -> + Otel.Tracer.with_ tracer "second trace" @@ fun _scope -> Thread.delay 0.2; - Otel.Trace.with_ "third trace" @@ fun _scope -> + Otel.Tracer.with_ tracer "third trace" @@ fun _scope -> Thread.delay 0.2; () in - with_test_backend @@ fun () -> + with_test_exporter @@ fun () -> (* start *) run (); check' int ~msg:"count of spans emitted" ~actual:(List.length !spans_emitted) ~expected:3; let open Otel.Proto.Trace in - let f prev_span_id { scope_spans; _ } = - Format.printf "\n%a@\n" (Format.pp_print_list pp_scope_spans) scope_spans; - check' int ~msg:"count of scope_spans in emitted span" - ~actual:(List.length scope_spans) ~expected:1; - let { scope; spans; _ } = List.hd scope_spans in - check' bool ~msg:"scope exists in emitted span" - ~actual:(Option.is_some scope) ~expected:true; - check' int ~msg:"count of spans in scope_span" ~actual:(List.length spans) - ~expected:1; - let { name; trace_id; span_id; parent_span_id; _ } = List.hd spans in + let f prev_span_id (sp : Otel.Span.t) = + Format.printf "%a@." pp_span sp; + let { name; trace_id; span_id; parent_span_id; _ } = sp in Printf.printf "name='%s' trace_id='%s' span_id='%s' parent_span_id='%s' \ prev_span_id='%s'\n" From be34faee10755226dd1c8e5496114fff6afb2171 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 7 Dec 2025 22:11:40 -0500 Subject: [PATCH 84/94] client eio: fixes --- src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml b/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml index 14e904b6..33e05830 100644 --- a/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml +++ b/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml @@ -50,8 +50,10 @@ struct let delete = ignore - (** NOTE: the mutex must be acquired *) - let wait self = Eio.Condition.await self.cond self.mutex + let wait self = + Eio.Mutex.lock self.mutex; + Eio.Condition.await self.cond self.mutex; + Eio.Mutex.unlock self.mutex (** Ensure we get signalled when the queue goes from empty to non-empty *) let register_bounded_queue (self : t) (bq : _ Bounded_queue.t) : unit = From 860b278b1c8ff1afa13392baf2b82d2277104c18 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 7 Dec 2025 22:11:47 -0500 Subject: [PATCH 85/94] export more from opentelemetry_lwt --- src/lwt/opentelemetry_lwt.ml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lwt/opentelemetry_lwt.ml b/src/lwt/opentelemetry_lwt.ml index d6a4f641..d89d7e3c 100644 --- a/src/lwt/opentelemetry_lwt.ml +++ b/src/lwt/opentelemetry_lwt.ml @@ -11,6 +11,9 @@ module Gc_metrics = Gc_metrics module Metrics_callbacks = Metrics_callbacks module Trace_context = Trace_context module GC_metrics = Gc_metrics [@@depecated "use Gc_metrics"] +module Metrics_emitter = Metrics_emitter +module Logger = Logger +module Log_record = Log_record external reraise : exn -> 'a = "%reraise" (** This is equivalent to [Lwt.reraise]. We inline it here so we don't force to From daeafc9b4b42aedea3989fcf5983e81628766b5a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 7 Dec 2025 22:11:54 -0500 Subject: [PATCH 86/94] client config: improve printer --- src/client/client_config.ml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/client/client_config.ml b/src/client/client_config.ml index 6dedbeeb..655ebcd7 100644 --- a/src/client/client_config.ml +++ b/src/client/client_config.ml @@ -13,9 +13,15 @@ type t = { } let pp out (self : t) : unit = - let ppiopt = Format.pp_print_option Format.pp_print_int in + let ppiopt out i = + match i with + | None -> Format.fprintf out "None" + | Some i -> Format.fprintf out "%d" i + in let pp_header ppf (a, b) = Format.fprintf ppf "@[%s: @,%s@]@." a b in - let ppheaders = Format.pp_print_list pp_header in + let ppheaders out l = + Format.fprintf out "[@[%a@]]" (Format.pp_print_list pp_header) l + in let { debug; self_trace; From 576ce9637b0ef1ce9020dfc452da8e5801d8f8df Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 7 Dec 2025 22:12:07 -0500 Subject: [PATCH 87/94] refactor --- .../opentelemetry_client_ocurl.ml | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index 5906ace0..8f06dbd0 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -3,10 +3,10 @@ https://github.com/open-telemetry/oteps/blob/main/text/0099-otlp-http.md *) -open Opentelemetry_client +module Config = Config +module OTELC = Opentelemetry_client open Common_ module OTEL = Opentelemetry -module Config = Config let get_headers = Config.Env.get_headers @@ -14,21 +14,22 @@ let set_headers = Config.Env.set_headers let n_bytes_sent : int Atomic.t = Atomic.make 0 -type error = Export_error.t +type error = OTELC.Export_error.t open struct - module Notifier = Notifier_sync + module Notifier = OTELC.Notifier_sync - module IO : Generic_io.S_WITH_CONCURRENCY with type 'a t = 'a = struct - include Generic_io.Direct_style + module IO : OTELC.Generic_io.S_WITH_CONCURRENCY with type 'a t = 'a = struct + include OTELC.Generic_io.Direct_style let sleep_s = Thread.delay - let[@inline] spawn f = ignore (Util_thread.start_bg_thread f : Thread.t) + let[@inline] spawn f = + ignore (OTELC.Util_thread.start_bg_thread f : Thread.t) end end -module Httpc : Generic_http_consumer.HTTPC with module IO = IO = struct +module Httpc : OTELC.Generic_http_consumer.HTTPC with module IO = IO = struct module IO = IO type t = Curl.t @@ -72,11 +73,13 @@ module Httpc : Generic_http_consumer.HTTPC with module IO = IO = struct (`Failure (spf "decoding failed with:\n%s\n%s" (Printexc.to_string e) bt)))) | Ok { code; body; _ } -> - let err = Export_error.decode_invalid_http_response ~url ~code body in + let err = + OTELC.Export_error.decode_invalid_http_response ~url ~code body + in Error err end -module Consumer_impl = Generic_http_consumer.Make (IO) (Notifier) (Httpc) +module Consumer_impl = OTELC.Generic_http_consumer.Make (IO) (Notifier) (Httpc) let consumer ?(stop = Atomic.make false) ?(config = Config.make ()) () : Opentelemetry_client.Consumer.any_resource_builder = @@ -93,12 +96,12 @@ let consumer ?(stop = Atomic.make false) ?(config = Config.make ()) () : let create_exporter ?stop ?(config = Config.make ()) () : OTEL.Exporter.t = let consumer = consumer ?stop ~config () in let bq = - Bounded_queue_sync.create - ~high_watermark:Bounded_queue.Defaults.high_watermark () + OTELC.Bounded_queue_sync.create + ~high_watermark:OTELC.Bounded_queue.Defaults.high_watermark () in - Exporter_queued.create ~q:bq ~consumer () - |> Exporter_add_batching.add_batching ~config:config.common + OTELC.Exporter_queued.create ~q:bq ~consumer () + |> OTELC.Exporter_add_batching.add_batching ~config:config.common let create_backend = create_exporter @@ -107,13 +110,14 @@ let setup_ ?(stop = Atomic.make false) ?(config : Config.t = Config.make ()) () let exporter = create_exporter ~stop ~config () in OTEL.Main_exporter.set exporter; - Self_trace.set_enabled config.common.self_trace; + OTELC.Self_trace.set_enabled config.common.self_trace; if config.ticker_thread then ( (* at most a minute *) let sleep_ms = min 60_000 (max 2 config.ticker_interval_ms) in ignore - (Util_thread.setup_ticker_thread ~stop ~sleep_ms exporter () : Thread.t) + (OTELC.Util_thread.setup_ticker_thread ~stop ~sleep_ms exporter () + : Thread.t) ) let remove_backend () : unit = From 0a32049b4c80d1b9c84df6794fd29ba384a05b75 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 7 Dec 2025 22:12:12 -0500 Subject: [PATCH 88/94] wip: fix tests --- tests/bin/emit1_cohttp.ml | 41 ++++++++++++++++------------- tests/bin/emit_logs_cohttp.ml | 10 ++++--- tests/client/test_client_lib.ml | 13 ++++----- tests/client_e2e/signal_gatherer.ml | 2 +- 4 files changed, 37 insertions(+), 29 deletions(-) diff --git a/tests/bin/emit1_cohttp.ml b/tests/bin/emit1_cohttp.ml index 14f657cd..bb0559ff 100644 --- a/tests/bin/emit1_cohttp.ml +++ b/tests/bin/emit1_cohttp.ml @@ -27,9 +27,10 @@ let i = ref 0 let run_job job_id : unit Lwt.t = while%lwt not @@ Atomic.get stop do + let tracer = T.Tracer.get_main () in let@ scope = Atomic.incr num_tr; - T.Trace.with_ ~kind:T.Span.Span_kind_producer "loop.outer" + T.Tracer.with_ tracer ~kind:T.Span.Span_kind_producer "loop.outer" ~attrs:[ "i", `Int job_id ] in @@ -39,9 +40,9 @@ let run_job job_id : unit Lwt.t = Lwt.return @@ Atomic.set stop true else (* parent scope is found via thread local storage *) - let@ scope = + let@ span = Atomic.incr num_tr; - T.Trace.with_ ~scope ~kind:T.Span.Span_kind_internal + T.Tracer.with_ tracer ~parent:scope ~kind:T.Span.Span_kind_internal ~attrs:[ "j", `Int j ] "loop.inner" in @@ -49,19 +50,20 @@ let run_job job_id : unit Lwt.t = let* () = Lwt_unix.sleep !sleep_outer in Atomic.incr num_sleep; - T.Logs.( - emit - [ - make_strf ~trace_id:scope.trace_id ~span_id:scope.span_id - ~severity:Severity_number_info "inner at %d" j; - ]); + Opentelemetry_emitter.Emitter.emit (T.Logger.get_main ()) + [ + T.Log_record.make_strf ~trace_id:(T.Span.trace_id span) + ~span_id:(T.Span.id span) ~severity:Severity_number_info + "inner at %d" j; + ]; incr i; try%lwt Atomic.incr num_tr; let@ scope = - T.Trace.with_ ~kind:T.Span.Span_kind_internal ~scope "alloc" + T.Tracer.with_ tracer ~kind:T.Span.Span_kind_internal ~parent:span + "alloc" in (* allocate some stuff *) if !stress_alloc_ then ( @@ -75,22 +77,23 @@ let run_job job_id : unit Lwt.t = (* simulate a failure *) if j = 4 && !i mod 13 = 0 then failwith "oh no"; - Opentelemetry.Scope.add_event scope (fun () -> - T.Event.make "done with alloc"); + T.Span.add_event scope (T.Event.make "done with alloc"); Lwt.return () with Failure _ -> Lwt.return () done done let run () : unit Lwt.t = - T.GC_metrics.basic_setup (); + T.Gc_metrics.setup_on_main_exporter (); - T.Metrics_callbacks.register (fun () -> - T.Metrics. - [ - sum ~name:"num-sleep" ~is_monotonic:true - [ int (Atomic.get num_sleep) ]; - ]); + T.Metrics_callbacks.( + with_set_added_to_main_exporter (fun set -> + add_metrics_cb set (fun () -> + T.Metrics. + [ + sum ~name:"num-sleep" ~is_monotonic:true + [ int (Atomic.get num_sleep) ]; + ]))); let n_jobs = max 1 !n_jobs in Printf.printf "run %d jobs\n%!" n_jobs; diff --git a/tests/bin/emit_logs_cohttp.ml b/tests/bin/emit_logs_cohttp.ml index acd846e3..6f0393e3 100644 --- a/tests/bin/emit_logs_cohttp.ml +++ b/tests/bin/emit_logs_cohttp.ml @@ -22,8 +22,9 @@ let varied_tag_set = |> add string_list_tag [ "foo"; "bar"; "baz" ]) let run () = + Opentelemetry.Globals.service_name := "emit_logs"; let otel_reporter = - Opentelemetry_logs.otel_reporter ~service_name:"emit_logs" + Opentelemetry_logs.otel_reporter ~attributes:[ "my_reporter_attr", `String "foo" ] () in @@ -35,7 +36,9 @@ let run () = Logs.err (fun m -> m "emit_logs: error log"); Logs.app (fun m -> m "emit_logs: app log"); let%lwt () = - T.Trace.with_ ~kind:T.Span.Span_kind_producer "my_scope" (fun _scope -> + let tracer = T.Tracer.get_main () in + T.Tracer.with_ tracer ~kind:T.Span.Span_kind_producer "my_scope" + (fun _scope -> Logs.info (fun m -> m ~tags:varied_tag_set "emit_logs: this log is emitted with varied tags from a span"); @@ -50,7 +53,8 @@ let run () = let fmt_logger = Logs_fmt.reporter ~dst:Format.err_formatter () in let combined_logger = - Opentelemetry_logs.attach_otel_reporter ~service_name:"emit_logs_fmt" + Opentelemetry_logs.attach_otel_reporter + (* FIXME ~service_name:"emit_logs_fmt" *) ~attributes:[ "my_fmt_attr", `String "bar" ] fmt_logger in diff --git a/tests/client/test_client_lib.ml b/tests/client/test_client_lib.ml index c3f8a360..d3010f76 100644 --- a/tests/client/test_client_lib.ml +++ b/tests/client/test_client_lib.ml @@ -1,5 +1,5 @@ open Alcotest -module Config = Opentelemetry_client.Config +module Config = Opentelemetry_client.Client_config let test_config_printing () = let module Env = Config.Env () in @@ -7,11 +7,12 @@ let test_config_printing () = Format.asprintf "%a" Config.pp @@ Env.make (fun common () -> common) () in let expected = - {|{ debug=false; - self_trace=false; url_traces="http://localhost:4318/v1/traces"; - url_metrics="http://localhost:4318/v1/metrics"; - url_logs="http://localhost:4318/v1/logs"; headers=; batch_traces=400; - batch_metrics=20; batch_logs=400; batch_timeout_ms=2000 }|} + "{ debug=false;\n\ + \ self_trace=false; url_traces=\"http://localhost:4318/v1/traces\";\n\ + \ url_metrics=\"http://localhost:4318/v1/metrics\";\n\ + \ url_logs=\"http://localhost:4318/v1/logs\"; headers=[]; batch_traces=400;\n\ + \ batch_metrics=20; batch_logs=400; batch_timeout_ms=2000;\n\ + \ http_concurrency_level=None }" in check' string ~msg:"is rendered correctly" ~actual ~expected diff --git a/tests/client_e2e/signal_gatherer.ml b/tests/client_e2e/signal_gatherer.ml index 234feaf2..ffb47485 100644 --- a/tests/client_e2e/signal_gatherer.ml +++ b/tests/client_e2e/signal_gatherer.ml @@ -111,7 +111,7 @@ module Tested_program = struct end let default_port = - String.split_on_char ':' Client.Config.default_url |> function + String.split_on_char ':' Client.Client_config.default_url |> function (* Extracting the port from 'http://foo:' *) | [ _; _; port ] -> int_of_string port | _ -> failwith "unexpected format in Client.Config.default_url" From 0400c597d0774ea7b65fe1d9f3ff0c0bd03618ef Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 8 Dec 2025 08:57:41 -0500 Subject: [PATCH 89/94] fix warnings --- src/client/bounded_queue.ml | 2 -- src/client/bounded_queue_sync.ml | 5 ----- src/client/export_error.ml | 2 -- src/client/notifier_sync.ml | 2 -- 4 files changed, 11 deletions(-) diff --git a/src/client/bounded_queue.ml b/src/client/bounded_queue.ml index 63d79802..b1cc1e81 100644 --- a/src/client/bounded_queue.ml +++ b/src/client/bounded_queue.ml @@ -3,8 +3,6 @@ After the high watermark is reached, pushing items into the queue will instead discard them. *) -open Common_ - exception Closed (** Raised when pushing into a closed queue *) diff --git a/src/client/bounded_queue_sync.ml b/src/client/bounded_queue_sync.ml index b4d80f7b..487ea4f2 100644 --- a/src/client/bounded_queue_sync.ml +++ b/src/client/bounded_queue_sync.ml @@ -43,11 +43,6 @@ end = struct UM.protect self.mutex @@ fun () -> if not self.closed then self.closed <- true - let push (self : _ t) x : unit = - UM.protect self.mutex @@ fun () -> - if self.closed then raise Closed; - Queue.push x self.q - let try_pop (self : 'a t) : 'a option = UM.protect self.mutex @@ fun () -> if self.closed then raise Closed; diff --git a/src/client/export_error.ml b/src/client/export_error.ml index b78447cb..32f5e541 100644 --- a/src/client/export_error.ml +++ b/src/client/export_error.ml @@ -1,5 +1,3 @@ -open Common_ - type t = [ `Status of int * Opentelemetry.Proto.Status.status | `Failure of string diff --git a/src/client/notifier_sync.ml b/src/client/notifier_sync.ml index 0f7b6220..e1fd501d 100644 --- a/src/client/notifier_sync.ml +++ b/src/client/notifier_sync.ml @@ -11,8 +11,6 @@ let trigger self = Condition.signal self.cond let delete = ignore -let[@inline] protect self f = Util_mutex.protect self.mutex f - let wait self = Mutex.lock self.mutex; Condition.wait self.cond self.mutex; From c77f9c83f33441ca14aca4fc234bee983cc5bffc Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 09:36:28 -0500 Subject: [PATCH 90/94] CI --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e2ac881a..118047f1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -42,7 +42,7 @@ jobs: allow-prerelease-opam: true - run: | - opam pin https://github.com/mransan/ocaml-protoc.git#5510694deffde13283742b8ad116fab61b65dfbc -y -n + opam pin pbrt 4.0 -y -n opam install pbrt -y # We cannot install packages that need eio on ocaml versions before 5 From 8b06ed208b82fbb4f1099cf90c0bca9e2c63710e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Dec 2025 09:40:23 -0500 Subject: [PATCH 91/94] fix nix --- flake.lock | 12 ++++++------ flake.nix | 7 ------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/flake.lock b/flake.lock index eb585b9b..de3aced1 100644 --- a/flake.lock +++ b/flake.lock @@ -95,11 +95,11 @@ "opam2json": "opam2json" }, "locked": { - "lastModified": 1753007101, - "narHash": "sha256-YYdS644zHwmyBY0RYdrhO05uT3xauqJ5Ww1KtN9Q3Z4=", + "lastModified": 1762273592, + "narHash": "sha256-dXex1fPdmzj4xKWEWrcvbgin/iLFaxrt9vi305m6nUc=", "owner": "tweag", "repo": "opam-nix", - "rev": "03dd8b2577c05c42dc9e319d290f2dbfc67ab38b", + "rev": "98ca8f4401e996aeac38b6f14bf3a82d85b7add7", "type": "github" }, "original": { @@ -127,11 +127,11 @@ "opam-repository": { "flake": false, "locked": { - "lastModified": 1751808506, - "narHash": "sha256-H0WN/VhgaI6GLYmLAThoRcsf4XwnMNEBsz/w8FbLSrU=", + "lastModified": 1759971927, + "narHash": "sha256-aUZWd0KOpEnioBwqlwRU40rUFAqT3RTlojXt2oI3omY=", "owner": "ocaml", "repo": "opam-repository", - "rev": "bd82a8dde3f816d8b45ecbe005ac1f8e7f25c207", + "rev": "551314ad1550478ec6be39bb0eaadd2569190464", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 6f294061..9a63f604 100644 --- a/flake.nix +++ b/flake.nix @@ -23,13 +23,6 @@ overlay = final: prev: { # You can add overrides here - pbrt = prev.pbrt.overrideAttrs (oldAttrs: { - src = pkgs.fetchgit { - url = "https://github.com/mransan/ocaml-protoc.git"; - rev = "5510694deffde13283742b8ad116fab61b65dfbc"; - sha256 = "sha256-0eQEaAZMs/OydNLsEKxdbdwx0/Ots6fLEpYg89VxK3k="; - }; - }); }; scope' = scope.overrideScope overlay; # Packages from devPackagesQuery From a602a5c714c07ab9817bbac8bb9e96dddde7b454 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 1 Dec 2025 21:02:42 -0500 Subject: [PATCH 92/94] feat batch: get rid of `Mutex` this should result in lower overhead for single threaded situations such as lwt or eio. --- src/client/batch.ml | 103 ++++++++++++++++++++++++++++---------------- 1 file changed, 65 insertions(+), 38 deletions(-) diff --git a/src/client/batch.ml b/src/client/batch.ml index 20343630..f57f34e6 100644 --- a/src/client/batch.ml +++ b/src/client/batch.ml @@ -1,14 +1,18 @@ open Opentelemetry_util +module Otel = Opentelemetry +module A = Opentelemetry_atomic.Atomic + +type 'a state = { + start: Mtime.t; + size: int; + q: 'a list; (** The queue is a FIFO represented as a list in reverse order *) +} type 'a t = { - mutable size: int; - mutable q: 'a list; - (** The queue is a FIFO represented as a list in reverse order *) + st: 'a state A.t; batch: int; (** Minimum size to batch before popping *) high_watermark: int; (** Size above which we start dropping signals *) timeout: Mtime.span option; - mutable start: Mtime.t; - mutex: Mutex.t; } let default_high_watermark batch_size = @@ -17,6 +21,8 @@ let default_high_watermark batch_size = else batch_size * 10 +let _dummy_start = Mtime.min_stamp + let make ?(batch = 1) ?high_watermark ?now ?timeout () : _ t = let high_watermark = match high_watermark with @@ -26,36 +32,48 @@ let make ?(batch = 1) ?high_watermark ?now ?timeout () : _ t = let start = match now with | Some x -> x - | None -> Mtime_clock.now () + | None -> _dummy_start in - let mutex = Mutex.create () in assert (batch > 0); - { size = 0; q = []; start; batch; timeout; high_watermark; mutex } + { st = A.make { size = 0; q = []; start }; batch; timeout; high_watermark } -let timeout_expired_ ~now self : bool = - match self.timeout with +let timeout_expired_ ~now ~timeout (self : _ state) : bool = + match timeout with | Some t -> let elapsed = Mtime.span now self.start in Mtime.Span.compare elapsed t >= 0 | None -> false (* Big enough to send a batch *) -let is_full_ self : bool = self.size >= self.batch +let[@inline] is_full_ ~batch (self : _ state) : bool = self.size >= batch -let ready_to_pop ~force ~now self = - self.size > 0 && (force || is_full_ self || timeout_expired_ ~now self) +let[@inline] ready_to_pop_ ~force ~now ~batch ~timeout (self : _ state) = + self.size > 0 + && (force || is_full_ ~batch self || timeout_expired_ ~now ~timeout self) + +let[@inline] atomic_update_loop_ (type res) (self : _ t) + (f : 'a state -> 'a state * res) : res = + let exception Return of res in + try + while true do + let st = A.get self.st in + let new_st, res = f st in + if A.compare_and_set self.st st new_st then raise_notrace (Return res) + done + with Return res -> res let pop_if_ready ?(force = false) ~now (self : _ t) : _ list option = let rev_batch_opt = - Util_mutex.protect self.mutex @@ fun () -> - if ready_to_pop ~force ~now self then ( - assert (self.q <> []); - let batch = self.q in - self.q <- []; - self.size <- 0; - Some batch + atomic_update_loop_ self @@ fun state -> + let timeout = self.timeout in + let batch = self.batch in + if ready_to_pop_ ~force ~now ~batch ~timeout state then ( + assert (state.q <> []); + let batch = state.q in + let new_st = { q = []; size = 0; start = _dummy_start } in + new_st, Some batch ) else - None + state, None in match rev_batch_opt with | None -> None @@ -63,27 +81,36 @@ let pop_if_ready ?(force = false) ~now (self : _ t) : _ list option = (* Reverse the list to retrieve the FIFO order. *) Some (List.rev batch) -let rec push_unprotected (self : _ t) ~(elems : _ list) : unit = - match elems with - | [] -> () - | x :: xs -> - self.q <- x :: self.q; - self.size <- 1 + self.size; - push_unprotected self ~elems:xs +let[@inline] push_unprotected_ (self : _ state) (elems : _ list) : _ state = + { + self with + size = self.size + List.length elems; + q = List.rev_append elems self.q; + } let push (self : _ t) elems : [ `Dropped | `Ok ] = - Util_mutex.protect self.mutex @@ fun () -> - if self.size >= self.high_watermark then - (* drop this to prevent queue from growing too fast *) - `Dropped + if elems = [] then + `Ok `Ok else ( - if self.size = 0 && Option.is_some self.timeout then - (* current batch starts now *) - self.start <- Mtime_clock.now (); + let now = lazy (Mtime_clock.now ()) in + atomic_update_loop_ self @@ fun state -> + if state.size >= self.high_watermark then + (* drop this to prevent queue from growing too fast *) + state, `Dropped + else ( + let state = + if state.size = 0 && Option.is_some self.timeout then + (* current batch starts now *) + { state with start = Lazy.force now } + else + state + in - (* add to queue *) - push_unprotected self ~elems; - `Ok + (* add to queue *) + let state = push_unprotected_ state elems in + + state, `Ok + ) ) let[@inline] push' self elems = ignore (push self elems : [ `Dropped | `Ok ]) From 69bd89ebab6ab9a1b77edcebf1f2ed98ace5febf Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 1 Dec 2025 21:21:57 -0500 Subject: [PATCH 93/94] feat: opentelemetry.domain shim --- src/domain/dune | 14 ++++++++++++++ src/domain/gen.ml | 26 ++++++++++++++++++++++++++ src/domain/opentelemetry_domain.mli | 4 ++++ 3 files changed, 44 insertions(+) create mode 100644 src/domain/dune create mode 100644 src/domain/gen.ml create mode 100644 src/domain/opentelemetry_domain.mli diff --git a/src/domain/dune b/src/domain/dune new file mode 100644 index 00000000..c75ee3c4 --- /dev/null +++ b/src/domain/dune @@ -0,0 +1,14 @@ +(library + (name opentelemetry_domain) + (synopsis "Compatibility package for the Domain module for opentelemetry") + (public_name opentelemetry.domain) + (modules opentelemetry_domain)) + +(executable + (modules gen) + (name gen)) + +(rule + (targets opentelemetry_domain.ml) + (action + (run ./gen.exe))) diff --git a/src/domain/gen.ml b/src/domain/gen.ml new file mode 100644 index 00000000..f1cf82e6 --- /dev/null +++ b/src/domain/gen.ml @@ -0,0 +1,26 @@ +let domain_4 = + {| +let cpu_relax = ignore +let relax_loop : int -> unit = ignore + |} + +let domain_5 = + {| +let cpu_relax = Domain.cpu_relax +let relax_loop i = + for _j = 1 to i do cpu_relax () done +|} + +let write_file file s = + let oc = open_out file in + output_string oc s; + close_out oc + +let () = + let version = Scanf.sscanf Sys.ocaml_version "%d.%d.%s" (fun x y _ -> x, y) in + write_file "opentelemetry_domain.ml" + (if version >= (5, 0) then + domain_5 + else + domain_4); + () diff --git a/src/domain/opentelemetry_domain.mli b/src/domain/opentelemetry_domain.mli new file mode 100644 index 00000000..36f5929e --- /dev/null +++ b/src/domain/opentelemetry_domain.mli @@ -0,0 +1,4 @@ +val cpu_relax : unit -> unit + +val relax_loop : int -> unit +(** Call {!cpu_relax} n times *) From 107e173bde314e97fcefa98dc9e645c2327537bf Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 1 Dec 2025 21:22:07 -0500 Subject: [PATCH 94/94] perf batch: proper backoff strategy --- src/client/batch.ml | 53 +++++++++++++++++++++++++++------------------ src/client/dune | 1 + 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/src/client/batch.ml b/src/client/batch.ml index f57f34e6..ba22fe1f 100644 --- a/src/client/batch.ml +++ b/src/client/batch.ml @@ -1,6 +1,7 @@ open Opentelemetry_util module Otel = Opentelemetry module A = Opentelemetry_atomic.Atomic +module Domain = Opentelemetry_domain type 'a state = { start: Mtime.t; @@ -23,6 +24,8 @@ let default_high_watermark batch_size = let _dummy_start = Mtime.min_stamp +let _empty_state : _ state = { q = []; size = 0; start = _dummy_start } + let make ?(batch = 1) ?high_watermark ?now ?timeout () : _ t = let high_watermark = match high_watermark with @@ -47,30 +50,40 @@ let timeout_expired_ ~now ~timeout (self : _ state) : bool = (* Big enough to send a batch *) let[@inline] is_full_ ~batch (self : _ state) : bool = self.size >= batch -let[@inline] ready_to_pop_ ~force ~now ~batch ~timeout (self : _ state) = - self.size > 0 - && (force || is_full_ ~batch self || timeout_expired_ ~now ~timeout self) - let[@inline] atomic_update_loop_ (type res) (self : _ t) (f : 'a state -> 'a state * res) : res = let exception Return of res in try + let backoff = ref 1 in while true do let st = A.get self.st in let new_st, res = f st in - if A.compare_and_set self.st st new_st then raise_notrace (Return res) + if A.compare_and_set self.st st new_st then raise_notrace (Return res); + + (* poor man's backoff strategy *) + Domain.relax_loop !backoff; + backoff := min 128 (2 * !backoff) done with Return res -> res let pop_if_ready ?(force = false) ~now (self : _ t) : _ list option = let rev_batch_opt = + (* update state. When uncontended this runs only once. *) atomic_update_loop_ self @@ fun state -> - let timeout = self.timeout in - let batch = self.batch in - if ready_to_pop_ ~force ~now ~batch ~timeout state then ( + (* *) + + (* check if the batch is ready *) + let ready_to_pop = + state.size > 0 + && (force + || is_full_ ~batch:self.batch state + || timeout_expired_ ~now ~timeout:self.timeout state) + in + + if ready_to_pop then ( assert (state.q <> []); let batch = state.q in - let new_st = { q = []; size = 0; start = _dummy_start } in + let new_st = _empty_state in new_st, Some batch ) else state, None @@ -81,13 +94,6 @@ let pop_if_ready ?(force = false) ~now (self : _ t) : _ list option = (* Reverse the list to retrieve the FIFO order. *) Some (List.rev batch) -let[@inline] push_unprotected_ (self : _ state) (elems : _ list) : _ state = - { - self with - size = self.size + List.length elems; - q = List.rev_append elems self.q; - } - let push (self : _ t) elems : [ `Dropped | `Ok ] = if elems = [] then `Ok `Ok @@ -98,16 +104,21 @@ let push (self : _ t) elems : [ `Dropped | `Ok ] = (* drop this to prevent queue from growing too fast *) state, `Dropped else ( - let state = + let start = if state.size = 0 && Option.is_some self.timeout then - (* current batch starts now *) - { state with start = Lazy.force now } + Lazy.force now else - state + state.start in (* add to queue *) - let state = push_unprotected_ state elems in + let state = + { + size = state.size + List.length elems; + q = List.rev_append elems state.q; + start; + } + in state, `Ok ) diff --git a/src/client/dune b/src/client/dune index 29ad29d7..5b3f6fde 100644 --- a/src/client/dune +++ b/src/client/dune @@ -7,6 +7,7 @@ opentelemetry.util opentelemetry.emitter opentelemetry.proto + opentelemetry.domain pbrt saturn mtime