refactor opentelemetry.trace to cleanly separate trace spans from OTEL spans

- trace spans: counter based, local, meant to be handles
- OTEL spans: 8B random blobs, meant to be used in a distributed context
This commit is contained in:
Simon Cruanes 2025-12-16 13:56:29 -05:00
parent b4c8803931
commit a954deb46d
No known key found for this signature in database
GPG key ID: EBFFF6F283F3A2B4
3 changed files with 133 additions and 121 deletions

View file

@ -1,40 +0,0 @@
open Common_
let[@inline] trace_id_of_otel (id : OTEL.Trace_id.t) : Otrace.trace_id =
if id == OTEL.Trace_id.dummy then
Otrace.Collector.dummy_trace_id
else
Bytes.unsafe_to_string (OTEL.Trace_id.to_bytes id)
let[@inline] trace_id_to_otel (id : Otrace.trace_id) : OTEL.Trace_id.t =
if id == Otrace.Collector.dummy_trace_id then
OTEL.Trace_id.dummy
else
OTEL.Trace_id.of_bytes @@ Bytes.unsafe_of_string id
let[@inline] span_id_of_otel (id : OTEL.Span_id.t) : Otrace.span =
if id == OTEL.Span_id.dummy then
Otrace.Collector.dummy_span
else
Bytes.get_int64_le (OTEL.Span_id.to_bytes id) 0
let[@inline] span_id_to_otel (id : Otrace.span) : OTEL.Span_id.t =
if id == Otrace.Collector.dummy_span then
OTEL.Span_id.dummy
else (
let b = Bytes.create 8 in
Bytes.set_int64_le b 0 id;
OTEL.Span_id.of_bytes b
)
let[@inline] ctx_to_otel (self : Otrace.explicit_span_ctx) : OTEL.Span_ctx.t =
OTEL.Span_ctx.make
~trace_id:(trace_id_to_otel self.trace_id)
~parent_id:(span_id_to_otel self.span)
()
let[@inline] ctx_of_otel (ctx : OTEL.Span_ctx.t) : Otrace.explicit_span_ctx =
{
trace_id = trace_id_of_otel (OTEL.Span_ctx.trace_id ctx);
span = span_id_of_otel (OTEL.Span_ctx.parent_id ctx);
}

View file

@ -1,10 +1,25 @@
open Opentelemetry_atomic
open Common_ open Common_
module Conv = Conv
open Conv
let on_internal_error = let on_internal_error =
ref (fun msg -> Printf.eprintf "error in Opentelemetry_trace: %s\n%!" msg) ref (fun msg -> Printf.eprintf "error in Opentelemetry_trace: %s\n%!" msg)
open struct
module Opt_syntax = struct
let[@inline] ( let+ ) o f =
match o with
| None -> None
| Some x -> Some (f x)
let ( >|= ) = ( let+ )
let[@inline] ( <?> ) a b =
match a, b with
| Some _, _ -> a
| None, _ -> b
end
end
module Extensions = struct module Extensions = struct
type Otrace.extension_event += type Otrace.extension_event +=
| Ev_link_span of Otrace.span * OTEL.Span_ctx.t | Ev_link_span of Otrace.span * OTEL.Span_ctx.t
@ -25,53 +40,76 @@ module Internal = struct
(* use the fast, thread safe span table that relies on picos. *) (* use the fast, thread safe span table that relies on picos. *)
module Active_span_tbl = Trace_subscriber.Span_tbl module Active_span_tbl = Trace_subscriber.Span_tbl
type state = { tbl: span_begin Active_span_tbl.t } [@@unboxed] type state = {
tbl: span_begin Active_span_tbl.t;
span_gen: int Atomic.t;
}
let create_state () : state = { tbl = Active_span_tbl.create () } let create_state () : state =
{ tbl = Active_span_tbl.create (); span_gen = Atomic.make 0 }
(* sanity check: otrace meta-map must be the same as hmap *) (* sanity check: otrace meta-map must be the same as hmap *)
let () = ignore (fun (k : _ Hmap.key) : _ Otrace.Meta_map.key -> k) let () = ignore (fun (k : _ Hmap.key) : _ Otrace.Meta_map.key -> k)
let[@inline] get_span_ (self : state) (span : Otrace.span) :
OTEL.Span.t option =
match Active_span_tbl.find_exn self.tbl span with
| exception Not_found -> None
| { span } -> Some span
(** key to access a OTEL span (the current span) from an (** key to access a OTEL span (the current span) from an
[Otrace.explicit_span] *) [Otrace.explicit_span]. We can reuse the context key because we know that
let k_explicit_span : OTEL.Span.t Otrace.Meta_map.key = [Otrace.Meta_map == Hmap]. *)
Otrace.Meta_map.Key.create () let k_span_otrace : OTEL.Span.t Otrace.Meta_map.key = OTEL.Span.k_context
let otrace_of_otel (id : OTEL.Span_id.t) : int64 = let[@inline] get_span_explicit_ (span : Otrace.explicit_span) :
let bs = OTEL.Span_id.to_bytes id in OTEL.Span.t option =
(* lucky that it coincides! *) Otrace.Meta_map.find k_span_otrace span.meta
assert (Bytes.length bs = 8);
Bytes.get_int64_le bs 0
let enter_span' (self : state) let enter_span_ (self : state)
?(explicit_parent : Otrace.explicit_span_ctx option) ~__FUNCTION__ ?(explicit_parent : Otrace.explicit_span_ctx option) ~__FUNCTION__
~__FILE__ ~__LINE__ ~data name : Otrace.span * span_begin = ~__FILE__ ~__LINE__ ~data ~(otrace_span : Otrace.span) name : span_begin =
let open OTEL in let open OTEL in
(* we create a random span ID here, it's not related in any way to (* we create a random span ID here, it's not related in any way to
the [Otrace.span] which is sequential. The [Otrace.span] has strong the [Otrace.span] which is sequential. The [Otrace.span] has strong
guarantees of uniqueness and thus we {i can} use it as an index guarantees of uniqueness and thus we {i can} use it as an index
in [Span_tbl], whereas an 8B OTEL span ID might be prone to collisions in [Span_tbl], whereas an 8 bytes OTEL span ID might be prone to
over time. *) collisions over time. *)
let otel_id = Span_id.create () in let otel_id = Span_id.create () in
let otrace_id = otrace_of_otel otel_id in
let trace_id, parent_id = (* get data from parent *)
let trace_id_from_parent, parent_id_from_parent =
let open Opt_syntax in
match explicit_parent with match explicit_parent with
| Some p -> | Some p ->
let trace_id = p.trace_id |> Conv.trace_id_to_otel in let trace_id = Otrace.Meta_map.find k_trace_id p.meta in
let parent_id = let span_id =
try Otrace.Meta_map.find k_span_otrace p.meta >|= OTEL.Span.id
let sb = Active_span_tbl.find_exn self.tbl p.span in
Some (OTEL.Span.id sb.span)
with Not_found -> None
in in
trace_id, parent_id let span_ctx = Otrace.Meta_map.find k_span_ctx p.meta in
| None -> ( trace_id <?> (span_ctx >|= OTEL.Span_ctx.trace_id),
(* look in ambient context *) span_id <?> (span_ctx >|= OTEL.Span_ctx.parent_id) )
| None -> None, None
in
(* get data from implicit context *)
let trace_id_from_ambient, parent_id_from_ambient =
if Option.is_none trace_id_from_parent then
let open Opt_syntax in
let implicit_parent = OTEL.Ambient_span.get () in let implicit_parent = OTEL.Ambient_span.get () in
(match implicit_parent with implicit_parent >|= OTEL.Span.trace_id, implicit_parent >|= OTEL.Span.id
| Some p -> Span.trace_id p, Some (Span.id p) else
| None -> Trace_id.create (), None) None, None
in
let trace_id =
match trace_id_from_parent, trace_id_from_ambient with
| Some t, _ | None, Some t -> t
| None, None -> Trace_id.create ()
in
let parent_id =
Opt_syntax.(parent_id_from_parent <?> parent_id_from_ambient)
in in
let attrs = let attrs =
@ -103,8 +141,8 @@ module Internal = struct
] ]
| _ -> ()); | _ -> ());
Active_span_tbl.add self.tbl otrace_id sb; Active_span_tbl.add self.tbl otrace_span sb;
otrace_id, sb sb
let exit_span_ { span } : OTEL.Span.t = let exit_span_ { span } : OTEL.Span.t =
let open OTEL in let open OTEL in
@ -125,16 +163,6 @@ module Internal = struct
| otel_span_begin -> | otel_span_begin ->
Active_span_tbl.remove self.tbl otrace_id; Active_span_tbl.remove self.tbl otrace_id;
Some (exit_span_ otel_span_begin) Some (exit_span_ otel_span_begin)
let[@inline] get_span_ (self : state) (span : Otrace.span) :
OTEL.Span.t option =
match Active_span_tbl.find_exn self.tbl span with
| exception Not_found -> None
| { span } -> Some span
let[@inline] get_span_explicit_ (span : Otrace.explicit_span) :
OTEL.Span.t option =
Otrace.Meta_map.find k_explicit_span span.meta
end end
module type COLLECTOR_ARG = sig module type COLLECTOR_ARG = sig
@ -148,37 +176,55 @@ module Make_collector (A : COLLECTOR_ARG) = struct
let state = create_state () let state = create_state ()
(* NOTE: perf: it would be interesting to keep the "current (OTEL) span" in
local storage/ambient-context, to accelerate most span-modifying
operations. They'd first look in local storage, and if the span isn't the
expected one, then look in the main span tbl. *)
let with_span ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name cb = let with_span ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name cb =
let (otrace_id, sb) : Otrace.span * span_begin = let otrace_span : Otrace.span =
enter_span' state ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name Int64.of_int (Atomic.fetch_and_add state.span_gen 1)
in
let sb : span_begin =
enter_span_ state ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name
~otrace_span
in in
match match
let@ () = OTEL.Ambient_span.with_ambient sb.span in let@ () = OTEL.Ambient_span.with_ambient sb.span in
cb otrace_id cb otrace_span
with with
| res -> | res ->
let otel_span = exit_span' state otrace_id sb in let otel_span = exit_span' state otrace_span sb in
OTEL.Exporter.send_trace exporter [ otel_span ]; OTEL.Exporter.send_trace exporter [ otel_span ];
res res
| exception e -> | exception e ->
let bt = Printexc.get_raw_backtrace () in let bt = Printexc.get_raw_backtrace () in
let otrace_span : Otrace.span =
Int64.of_int (Atomic.fetch_and_add state.span_gen 1)
in
OTEL.Span.record_exception sb.span e bt; OTEL.Span.record_exception sb.span e bt;
let otel_span = exit_span' state otrace_id sb in let otel_span = exit_span' state otrace_span sb in
OTEL.Exporter.send_trace exporter [ otel_span ]; OTEL.Exporter.send_trace exporter [ otel_span ];
Printexc.raise_with_backtrace e bt Printexc.raise_with_backtrace e bt
let enter_span ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name : Trace_core.span let enter_span ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name : Trace_core.span
= =
let otrace_id, _sb = let otrace_span : Otrace.span =
enter_span' state ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name Int64.of_int (Atomic.fetch_and_add state.span_gen 1)
in in
let _sb =
enter_span_ state ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data ~otrace_span
name
in
(* NOTE: we cannot enter ambient scope in a disjoint way (* NOTE: we cannot enter ambient scope in a disjoint way
with the exit, because we only have [Ambient_context.with_binding], with the exit, because we only have [Ambient_context.with_binding],
no [set_binding]. This is what {!with_parent_span} is for! *) no [set_binding]. This is what {!with_parent_span} is for! *)
otrace_id otrace_span
let exit_span otrace_id = let exit_span otrace_id =
match exit_span_from_id state otrace_id with match exit_span_from_id state otrace_id with
@ -187,20 +233,24 @@ module Make_collector (A : COLLECTOR_ARG) = struct
let enter_manual_span ~(parent : Otrace.explicit_span_ctx option) ~flavor:_ let enter_manual_span ~(parent : Otrace.explicit_span_ctx option) ~flavor:_
~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name : Otrace.explicit_span = ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name : Otrace.explicit_span =
let otrace_id, sb = let otrace_span : Otrace.span =
Int64.of_int (Atomic.fetch_and_add state.span_gen 1)
in
let sb =
match parent with match parent with
| None -> enter_span' state ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name | None ->
enter_span_ state ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data ~otrace_span
name
| Some parent -> | Some parent ->
enter_span' state ~explicit_parent:parent ~__FUNCTION__ ~__FILE__ enter_span_ state ~explicit_parent:parent ~__FUNCTION__ ~__FILE__
~__LINE__ ~data name ~__LINE__ ~data ~otrace_span name
in in
Active_span_tbl.add state.tbl otrace_id sb; Active_span_tbl.add state.tbl otrace_span sb;
{ {
Otrace.span = otrace_id; Otrace.span = otrace_span;
trace_id = trace_id_of_otel (OTEL.Span.trace_id sb.span); meta = Otrace.Meta_map.(empty |> add k_span_otrace sb.span);
meta = Otrace.Meta_map.(empty |> add k_explicit_span sb.span);
} }
let exit_manual_span { Otrace.span = otrace_id; _ } = let exit_manual_span { Otrace.span = otrace_id; _ } =
@ -223,17 +273,34 @@ module Make_collector (A : COLLECTOR_ARG) = struct
!on_internal_error (spf "manual span does not a contain an OTEL scope") !on_internal_error (spf "manual span does not a contain an OTEL scope")
| Some span -> OTEL.Span.add_attrs span data | Some span -> OTEL.Span.add_attrs span data
let message ?span ~data:_ msg : unit = let message ?(span : Otrace.span option) ~data:_ msg : unit =
(* gather information from context *) let trace_id_from_parent, span_id_from_parent =
let old_span = OTEL.Ambient_span.get () in let open Opt_syntax in
let trace_id = Option.map OTEL.Span.trace_id old_span in
let span_id =
match span with match span with
| Some id -> Some (span_id_to_otel id) | Some p ->
| None -> Option.map OTEL.Span.id old_span let sp = get_span_ state p in
( (let+ sp = sp in
OTEL.Span.trace_id sp),
let+ sp = sp in
OTEL.Span.id sp )
| None -> None, None
in in
(* get data from implicit context *)
let trace_id_from_ambient, span_id_from_ambient =
if Option.is_none trace_id_from_parent then
let open Opt_syntax in
let implicit_parent = OTEL.Ambient_span.get () in
implicit_parent >|= OTEL.Span.trace_id, implicit_parent >|= OTEL.Span.id
else
None, None
in
let trace_id =
Opt_syntax.(trace_id_from_parent <?> trace_id_from_ambient)
in
let span_id = Opt_syntax.(span_id_from_parent <?> span_id_from_ambient) in
let log = OTEL.Log_record.make_str ?trace_id ?span_id msg in let log = OTEL.Log_record.make_str ?trace_id ?span_id msg in
OTEL.Exporter.send_logs exporter [ log ] OTEL.Exporter.send_logs exporter [ log ]

View file

@ -21,21 +21,6 @@
module OTEL := Opentelemetry_core module OTEL := Opentelemetry_core
module Otrace := Trace_core module Otrace := Trace_core
(** Conversions between [Opentelemetry] and [Trace_core] types *)
module Conv : sig
val trace_id_of_otel : OTEL.Trace_id.t -> string
val trace_id_to_otel : string -> OTEL.Trace_id.t
val span_id_of_otel : OTEL.Span_id.t -> int64
val span_id_to_otel : int64 -> OTEL.Span_id.t
val ctx_to_otel : Otrace.explicit_span_ctx -> OTEL.Span_ctx.t
val ctx_of_otel : OTEL.Span_ctx.t -> Otrace.explicit_span_ctx
end
(** The extension events for {!Trace_core}. *) (** The extension events for {!Trace_core}. *)
module Extensions : sig module Extensions : sig
type Otrace.extension_event += type Otrace.extension_event +=