diff --git a/.gitignore b/.gitignore index 811fc6e9..85d4d798 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ _opam .merlin *.install *.exe +*.tmp diff --git a/Makefile b/Makefile index 4291a68f..f6d2cc6d 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,9 @@ clean: protoc-gen: FORCE_GENPROTO=true dune build @lint +update-submodules: + git submodule update --init + format: @dune build @fmt --auto-promote diff --git a/dune-project b/dune-project index 41ba5911..3d962e1f 100644 --- a/dune-project +++ b/dune-project @@ -28,11 +28,6 @@ (>= "4.08")) ptime hmap - atomic - (thread-local-storage - (and - (>= 0.2) - (< 0.3))) (odoc :with-doc) (alcotest :with-test) (pbrt @@ -46,14 +41,33 @@ (>= 0.27) (< 0.28))) (mtime - (>= "1.4"))) - (depopts trace lwt eio) + (>= "1.4"))) + (depopts atomic trace thread-local-storage lwt eio) (conflicts (trace (< 0.10))) (tags (instrumentation tracing opentelemetry datadog jaeger))) +(package + (name opentelemetry-client) + (synopsis "Client SDK for https://opentelemetry.io") + (depends + (opentelemetry + (= :version)) + (odoc :with-doc) + (alcotest :with-test) + (saturn + (and + (>= 1.0) + (< 2.0))) + (thread-local-storage + (and + (>= 0.2) + (< 0.3)))) + (tags + (tracing opentelemetry sdk))) + (package (name opentelemetry-lwt) (synopsis "Lwt-compatible instrumentation for https://opentelemetry.io") @@ -83,6 +97,8 @@ ; atomic ; vendored (opentelemetry (= :version)) + (opentelemetry-client + (= :version)) (odoc :with-doc) (ezcurl (>= 0.2.3)) @@ -99,6 +115,8 @@ (>= "1.4")) (opentelemetry (= :version)) + (opentelemetry-client + (= :version)) (odoc :with-doc) (ezcurl-lwt (>= 0.2.3)) @@ -124,10 +142,14 @@ (containers :with-test) (cohttp-lwt-unix :with-test) (opentelemetry-client-cohttp-lwt - (and :with-test (= :version))) + (and + :with-test + (= :version))) (opentelemetry-cohttp-lwt - (and :with-test (= :version)))) - (synopsis "Opentelemetry tracing for Cohttp HTTP servers")) + (and + :with-test + (= :version)))) + (synopsis "Opentelemetry-based reporter for Logs")) (package (name opentelemetry-cohttp-lwt) @@ -156,6 +178,8 @@ ; for spans (opentelemetry (= :version)) + (opentelemetry-client + (= :version)) (odoc :with-doc) (lwt (>= "5.3")) @@ -165,7 +189,10 @@ cohttp-lwt-unix (alcotest :with-test) (containers :with-test) - (opentelemetry-lwt (and :with-test (= :version)))) + (opentelemetry-lwt + (and + :with-test + (= :version)))) (synopsis "Collector client for opentelemetry, using cohttp + lwt")) (package @@ -179,6 +206,8 @@ mirage-crypto-rng-eio (opentelemetry (= :version)) + (opentelemetry-client + (= :version)) (odoc :with-doc) (cohttp-eio (>= 6.1.0)) diff --git a/opentelemetry-client-cohttp-eio.opam b/opentelemetry-client-cohttp-eio.opam index b19ae6b5..bc651d2c 100644 --- a/opentelemetry-client-cohttp-eio.opam +++ b/opentelemetry-client-cohttp-eio.opam @@ -18,6 +18,7 @@ depends: [ "ca-certs" "mirage-crypto-rng-eio" "opentelemetry" {= version} + "opentelemetry-client" {= version} "odoc" {with-doc} "cohttp-eio" {>= "6.1.0"} "eio_main" {with-test} diff --git a/opentelemetry-client-cohttp-lwt.opam b/opentelemetry-client-cohttp-lwt.opam index a486137f..7c9b7894 100644 --- a/opentelemetry-client-cohttp-lwt.opam +++ b/opentelemetry-client-cohttp-lwt.opam @@ -16,6 +16,7 @@ depends: [ "ocaml" {>= "4.08"} "mtime" {>= "1.4"} "opentelemetry" {= version} + "opentelemetry-client" {= version} "odoc" {with-doc} "lwt" {>= "5.3"} "lwt_ppx" {>= "2.0"} diff --git a/opentelemetry-client-ocurl-lwt.opam b/opentelemetry-client-ocurl-lwt.opam index 263582b0..1abeacf5 100644 --- a/opentelemetry-client-ocurl-lwt.opam +++ b/opentelemetry-client-ocurl-lwt.opam @@ -16,6 +16,7 @@ depends: [ "ocaml" {>= "4.08"} "mtime" {>= "1.4"} "opentelemetry" {= version} + "opentelemetry-client" {= version} "odoc" {with-doc} "ezcurl-lwt" {>= "0.2.3"} "ocurl" diff --git a/opentelemetry-client-ocurl.opam b/opentelemetry-client-ocurl.opam index de605919..12b01e4e 100644 --- a/opentelemetry-client-ocurl.opam +++ b/opentelemetry-client-ocurl.opam @@ -16,6 +16,7 @@ depends: [ "ocaml" {>= "4.08"} "mtime" {>= "1.4"} "opentelemetry" {= version} + "opentelemetry-client" {= version} "odoc" {with-doc} "ezcurl" {>= "0.2.3"} "ocurl" diff --git a/opentelemetry-client.opam b/opentelemetry-client.opam new file mode 100644 index 00000000..c646e1b9 --- /dev/null +++ b/opentelemetry-client.opam @@ -0,0 +1,39 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +version: "0.12" +synopsis: "Client SDK for https://opentelemetry.io" +maintainer: [ + "Simon Cruanes " + "Matt Bray " + "ELLIOTTCABLE " +] +authors: ["the Imandra team and contributors"] +license: "MIT" +tags: ["tracing" "opentelemetry" "sdk"] +homepage: "https://github.com/imandra-ai/ocaml-opentelemetry" +bug-reports: "https://github.com/imandra-ai/ocaml-opentelemetry/issues" +depends: [ + "dune" {>= "2.9"} + "opentelemetry" {= version} + "odoc" {with-doc} + "alcotest" {with-test} + "saturn" {>= "1.0" & < "2.0"} + "thread-local-storage" {>= "0.2" & < "0.3"} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "--promote-install-files=false" + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] + ["dune" "install" "-p" name "--create-install-files" name] +] +dev-repo: "git+https://github.com/imandra-ai/ocaml-opentelemetry.git" diff --git a/opentelemetry-logs.opam b/opentelemetry-logs.opam index f3a5c4ee..90a192fa 100644 --- a/opentelemetry-logs.opam +++ b/opentelemetry-logs.opam @@ -1,7 +1,7 @@ # This file is generated by dune, edit dune-project instead opam-version: "2.0" version: "0.12" -synopsis: "Opentelemetry tracing for Cohttp HTTP servers" +synopsis: "Opentelemetry-based reporter for Logs" maintainer: [ "Simon Cruanes " "Matt Bray " diff --git a/opentelemetry.opam b/opentelemetry.opam index 7e3128d9..5d2d150c 100644 --- a/opentelemetry.opam +++ b/opentelemetry.opam @@ -17,8 +17,6 @@ depends: [ "ocaml" {>= "4.08"} "ptime" "hmap" - "atomic" - "thread-local-storage" {>= "0.2" & < "0.3"} "odoc" {with-doc} "alcotest" {with-test} "pbrt" {>= "4.0" & < "5.0"} @@ -26,7 +24,7 @@ depends: [ "ocamlformat" {with-dev-setup & >= "0.27" & < "0.28"} "mtime" {>= "1.4"} ] -depopts: ["trace" "lwt" "eio"] +depopts: ["atomic" "trace" "thread-local-storage" "lwt" "eio"] conflicts: [ "trace" {< "0.10"} ] diff --git a/src/ambient-context/basic_map.ml b/src/ambient-context/basic_map.ml new file mode 100644 index 00000000..3575835a --- /dev/null +++ b/src/ambient-context/basic_map.ml @@ -0,0 +1,55 @@ +(** Extremely basic storage using a map from thread id to context *) + +open Opentelemetry_ambient_context_core + +open struct + module Atomic = Opentelemetry_atomic.Atomic + + module Int_map = Map.Make (struct + type t = int + + let compare : t -> t -> int = Stdlib.compare + end) + + type st = { m: Context.t ref Int_map.t Atomic.t } [@@unboxed] + + let get (self : st) : Context.t = + let tid = Thread.id @@ Thread.self () in + match Int_map.find tid (Atomic.get self.m) with + | exception Not_found -> Context.empty + | ctx_ref -> !ctx_ref + + let with_context (self : st) ctx f = + let tid = Thread.id @@ Thread.self () in + + let ctx_ref = + try Int_map.find tid (Atomic.get self.m) + with Not_found -> + let r = ref Context.empty in + while + let m = Atomic.get self.m in + let m' = Int_map.add tid r m in + not (Atomic.compare_and_set self.m m m') + do + () + done; + r + in + + let old_ctx = !ctx_ref in + ctx_ref := ctx; + + let finally () = ctx_ref := old_ctx in + Fun.protect ~finally f +end + +let create_storage () : Storage.t = + let st = { m = Atomic.make Int_map.empty } in + { + name = "basic-map"; + get_context = (fun () -> get st); + with_context = (fun ctx f -> with_context st ctx f); + } + +(** Default storage *) +let storage : Storage.t = create_storage () diff --git a/src/ambient-context/core/context.ml b/src/ambient-context/core/context.ml new file mode 100644 index 00000000..658a83e6 --- /dev/null +++ b/src/ambient-context/core/context.ml @@ -0,0 +1,7 @@ +type t = Hmap.t + +type 'a key = 'a Hmap.key + +let empty : t = Hmap.empty + +let[@inline] new_key () : _ key = Hmap.Key.create () diff --git a/src/ambient-context/core/dune b/src/ambient-context/core/dune new file mode 100644 index 00000000..97961205 --- /dev/null +++ b/src/ambient-context/core/dune @@ -0,0 +1,5 @@ +(library + (name opentelemetry_ambient_context_core) + (public_name opentelemetry.ambient-context.core) + (synopsis "Core definitions for ambient-context") + (libraries hmap)) diff --git a/src/ambient-context/core/storage.ml b/src/ambient-context/core/storage.ml new file mode 100644 index 00000000..89bc3931 --- /dev/null +++ b/src/ambient-context/core/storage.ml @@ -0,0 +1,44 @@ +(** Storage implementation. + + There is a singleton storage for a given program, responsible for providing + ambient context to the rest of the program. *) + +type t = { + name: string; + get_context: unit -> Context.t; + with_context: 'a. Context.t -> (unit -> 'a) -> 'a; +} +(** Storage type *) + +(** Name of the storage implementation. *) +let[@inline] name self = self.name + +(** Get the context from the current storage, or [Hmap.empty] if there is no + ambient context. *) +let[@inline] get_context self = self.get_context () + +(** [with_context storage ctx f] calls [f()] in an ambient context in which + [get_context()] will return [ctx]. Once [f()] returns, the storage is reset + to its previous value. *) +let[@inline] with_context self ctx f = self.with_context ctx f + +(** Get the ambient context and then look up [k] in it *) +let[@inline] get self (k : 'a Context.key) : 'a option = + Hmap.find k (get_context self) + +(** [with_key_bound_to storage k v f] calls [f()] in a context updated to have + [k] map to [v]. *) +let with_key_bound_to self k v f = + let ctx = get_context self in + let new_ctx = Hmap.add k v ctx in + self.with_context new_ctx f + +(** [with_key_unbound storage k f] calls [f()] in a context updated to have [k] + bound to no value. *) +let with_key_unbound self k f = + let ctx = get_context self in + if Hmap.mem k ctx then ( + let new_ctx = Hmap.rem k ctx in + self.with_context new_ctx f + ) else + f () diff --git a/src/ambient-context/default_.map.ml b/src/ambient-context/default_.map.ml new file mode 100644 index 00000000..aecceb56 --- /dev/null +++ b/src/ambient-context/default_.map.ml @@ -0,0 +1 @@ +let storage = Basic_map.storage diff --git a/src/ambient-context/default_.mli b/src/ambient-context/default_.mli new file mode 100644 index 00000000..9f14c9bd --- /dev/null +++ b/src/ambient-context/default_.mli @@ -0,0 +1,2 @@ +val storage : Storage.t +(** Default storage. *) diff --git a/src/ambient-context/default_.tls.ml b/src/ambient-context/default_.tls.ml new file mode 100644 index 00000000..7411b452 --- /dev/null +++ b/src/ambient-context/default_.tls.ml @@ -0,0 +1 @@ +let storage = Opentelemetry_ambient_context_tls.storage diff --git a/src/ambient-context/dls.ml.tmp b/src/ambient-context/dls.ml.tmp new file mode 100644 index 00000000..dee2dc49 --- /dev/null +++ b/src/ambient-context/dls.ml.tmp @@ -0,0 +1,46 @@ +(* TODO: conditional compilation, and use Basic_map in each DLS *) + +(** Storage using DLS. *) + +open Opentelemetry_ambient_context_core + +open struct + module DLS = Domain.DLS + + module Int_map = Map.Make (struct + type t = int + + let compare : t -> t -> int = Stdlib.compare + end) + + (* key used to access the context *) + let dls_k_context : Context.t ref Int_map.t DLS.key = + DLS.new_key + ~split_from_parent:(fun _ -> Int_map.empty) + (fun _ -> Int_map.empty) + + let dls_get () = + let tid = Thread.id @@ Thread.self () in + let map_ref = DLS.get dls_k_context in + try !(Int_map.find tid map_ref) with Not_found -> Hmap.empty + + let dls_with ctx f = + let tid = Thread.id @@ Thread.self () in + let map = DLS.get dls_k_context in + let ctx_ref = + try Int_map.find tid map + with Not_found -> + let r = ref Context.empty in + DLS.set dls_k_context (Int_map.add tid r map); + r + in + + let old_ctx = !ctx_ref in + ctx_ref := ctx; + + let finally () = ctx_ref := old_ctx in + Fun.protect ~finally f +end + +let storage : Storage.t = + { name = "dls-int-map"; get_context = dls_get; with_context = dls_with } diff --git a/src/ambient-context/dune b/src/ambient-context/dune index 68e7acf5..65425271 100644 --- a/src/ambient-context/dune +++ b/src/ambient-context/dune @@ -3,13 +3,18 @@ (public_name opentelemetry.ambient-context) (synopsis "Abstraction over thread-local storage and fiber-local storage mechanisms") - (private_modules hmap_key_) + (flags + :standard + -open + Opentelemetry_ambient_context_core + -open + Opentelemetry_atomic) (libraries - thread-local-storage - threads - atomic - opentelemetry.ambient-context.types + hmap + opentelemetry.ambient-context.core + opentelemetry.atomic (select - hmap_key_.ml + default_.ml from - (-> hmap_key_.new.ml)))) + (opentelemetry.ambient-context.tls -> default_.tls.ml) + (-> default_.map.ml)))) diff --git a/src/ambient-context/eio/dune b/src/ambient-context/eio/dune index f3f76be7..fa56edd8 100644 --- a/src/ambient-context/eio/dune +++ b/src/ambient-context/eio/dune @@ -4,4 +4,4 @@ (synopsis "Storage backend for ambient-context using Eio's fibre-local storage") (optional) ; eio - (libraries eio hmap opentelemetry.ambient-context thread-local-storage)) + (libraries eio hmap opentelemetry.ambient-context.core)) diff --git a/src/ambient-context/eio/opentelemetry_ambient_context_eio.ml b/src/ambient-context/eio/opentelemetry_ambient_context_eio.ml index 56451a1a..1da61600 100644 --- a/src/ambient-context/eio/opentelemetry_ambient_context_eio.ml +++ b/src/ambient-context/eio/opentelemetry_ambient_context_eio.ml @@ -1,39 +1,15 @@ +open Opentelemetry_ambient_context_core module Fiber = Eio.Fiber open struct - let _internal_key : Hmap.t Fiber.key = Fiber.create_key () - - let ( let* ) = Option.bind + let fiber_context_key : Context.t Fiber.key = Fiber.create_key () end -module M = struct - let name = "Storage_eio" - - let[@inline] get_map () = Fiber.get _internal_key - - let[@inline] with_map m cb = Fiber.with_binding _internal_key m cb - - let create_key = Hmap.Key.create - - let get k = - let* context = get_map () in - Hmap.find k context - - let with_binding k v cb = - let new_context = - match get_map () with - | None -> Hmap.singleton k v - | Some old_context -> Hmap.add k v old_context - in - with_map new_context cb - - let without_binding k cb = - let new_context = - match get_map () with - | None -> Hmap.empty - | Some old_context -> Hmap.rem k old_context - in - with_map new_context cb -end - -let storage () : Opentelemetry_ambient_context.storage = (module M) +let storage : Storage.t = + { + name = "eio"; + get_context = + (fun () -> + Fiber.get fiber_context_key |> Option.value ~default:Hmap.empty); + with_context = (fun ctx f -> Fiber.with_binding fiber_context_key ctx f); + } diff --git a/src/ambient-context/eio/opentelemetry_ambient_context_eio.mli b/src/ambient-context/eio/opentelemetry_ambient_context_eio.mli deleted file mode 100644 index ac5cf8ba..00000000 --- a/src/ambient-context/eio/opentelemetry_ambient_context_eio.mli +++ /dev/null @@ -1,2 +0,0 @@ -val storage : unit -> Opentelemetry_ambient_context.storage -(** Storage using Eio's fibers local storage *) diff --git a/src/ambient-context/hmap_key_.new.ml b/src/ambient-context/hmap_key_.new.ml deleted file mode 100644 index 1925b70e..00000000 --- a/src/ambient-context/hmap_key_.new.ml +++ /dev/null @@ -1 +0,0 @@ -let key : Hmap.t Thread_local_storage.t = Thread_local_storage.create () diff --git a/src/ambient-context/lwt/dune b/src/ambient-context/lwt/dune index 68a9de15..fb7398fb 100644 --- a/src/ambient-context/lwt/dune +++ b/src/ambient-context/lwt/dune @@ -4,4 +4,4 @@ (optional) ; lwt (synopsis "Storage backend for ambient-context using Lwt's sequence-associated storage") - (libraries lwt opentelemetry.ambient-context thread-local-storage)) + (libraries lwt opentelemetry.ambient-context.core)) diff --git a/src/ambient-context/lwt/opentelemetry_ambient_context_lwt.ml b/src/ambient-context/lwt/opentelemetry_ambient_context_lwt.ml index b75105f2..d7187670 100644 --- a/src/ambient-context/lwt/opentelemetry_ambient_context_lwt.ml +++ b/src/ambient-context/lwt/opentelemetry_ambient_context_lwt.ml @@ -1,37 +1,15 @@ +(** Storage using Lwt keys *) + +open Opentelemetry_ambient_context_core + open struct - let _internal_key : Hmap.t Lwt.key = Lwt.new_key () - - let ( let* ) = Option.bind + let lwt_context_key : Context.t Lwt.key = Lwt.new_key () end -module M = struct - let name = "Storage_lwt" - - let[@inline] get_map () = Lwt.get _internal_key - - let[@inline] with_map m cb = Lwt.with_value _internal_key (Some m) cb - - let create_key = Hmap.Key.create - - let get k = - let* context = get_map () in - Hmap.find k context - - let with_binding k v cb = - let new_context = - match get_map () with - | None -> Hmap.singleton k v - | Some old_context -> Hmap.add k v old_context - in - with_map new_context cb - - let without_binding k cb = - let new_context = - match get_map () with - | None -> Hmap.empty - | Some old_context -> Hmap.rem k old_context - in - with_map new_context cb -end - -let storage () : Opentelemetry_ambient_context.storage = (module M) +let storage : Storage.t = + { + name = "lwt"; + get_context = + (fun () -> Lwt.get lwt_context_key |> Option.value ~default:Hmap.empty); + with_context = (fun ctx f -> Lwt.with_value lwt_context_key (Some ctx) f); + } diff --git a/src/ambient-context/lwt/opentelemetry_ambient_context_lwt.mli b/src/ambient-context/lwt/opentelemetry_ambient_context_lwt.mli deleted file mode 100644 index 3c462a8d..00000000 --- a/src/ambient-context/lwt/opentelemetry_ambient_context_lwt.mli +++ /dev/null @@ -1,2 +0,0 @@ -val storage : unit -> Opentelemetry_ambient_context.storage -(** Storage using Lwt keys *) diff --git a/src/ambient-context/opentelemetry_ambient_context.ml b/src/ambient-context/opentelemetry_ambient_context.ml index 7c622eb7..d8a18c57 100644 --- a/src/ambient-context/opentelemetry_ambient_context.ml +++ b/src/ambient-context/opentelemetry_ambient_context.ml @@ -1,124 +1,49 @@ -module TLS = Thread_local_storage -include Opentelemetry_ambient_context_types +include Opentelemetry_ambient_context_core -type 'a key = int * 'a Hmap.key +let default_storage = Default_.storage -let debug = - match Sys.getenv_opt "OCAML_AMBIENT_CONTEXT_DEBUG" with - | Some ("1" | "true") -> true - | _ -> false - -let _debug_id_ = Atomic.make 0 - -let[@inline] generate_debug_id () = Atomic.fetch_and_add _debug_id_ 1 - -let compare_key : int -> int -> int = Stdlib.compare - -module Storage_tls_hmap = struct - let[@inline] ( let* ) o f = - match o with - | None -> None - | Some x -> f x - - let key : Hmap.t TLS.t = Hmap_key_.key - - let name = "Storage_tls" - - let[@inline] get_map () = TLS.get_opt key - - let[@inline] with_map m cb = - let old = TLS.get_opt key |> Option.value ~default:Hmap.empty in - TLS.set key m; - Fun.protect ~finally:(fun () -> TLS.set key old) cb - - let create_key = Hmap.Key.create - - let get k = - let* context = get_map () in - Hmap.find k context - - let with_binding k v cb = - let new_context = - match get_map () with - | None -> Hmap.singleton k v - | Some old_context -> Hmap.add k v old_context - in - with_map new_context @@ fun _context -> cb () - - let without_binding k cb = - match get_map () with - | None -> cb () - | Some old_context -> - let new_context = Hmap.rem k old_context in - with_map new_context @@ fun _context -> cb () +open struct + (** The current ambient-context storage. *) + let cur_storage : Storage.t Atomic.t = Atomic.make Default_.storage end -let default_storage : storage = (module Storage_tls_hmap) +let[@inline] get_current_storage () = Atomic.get cur_storage -let k_current_storage : storage TLS.t = TLS.create () +(* NOTE: we can't really "map" each local context from the old + to the new. Maybe the old storage is TLS based and the new one + is per-lwt-task. *) +let set_current_storage (storage : Storage.t) = Atomic.set cur_storage storage -let get_current_storage () = - match TLS.get_exn k_current_storage with - | v -> v - | exception TLS.Not_set -> - let v = default_storage in - TLS.set k_current_storage v; - v +(** {2 Functions operating with the current storage} *) -let create_key () = - let (module Store : STORAGE) = get_current_storage () in - if not debug then - 0, Store.create_key () - else ( - let id = generate_debug_id () in - Printf.printf "%s: create_key %i\n%!" Store.name id; - id, Store.create_key () - ) +(** Get the context from the current storage, or [Hmap.empty] if there is no + ambient context. *) +let[@inline] get_context () = Storage.get_context (Atomic.get cur_storage) -let get (id, k) = - let (module Store : STORAGE) = get_current_storage () in - if not debug then - Store.get k - else ( - let rv = Store.get k in - (match rv with - | Some _ -> Printf.printf "%s: get %i -> Some\n%!" Store.name id - | None -> Printf.printf "%s: get %i -> None\n%!" Store.name id); - rv - ) +(** [with_context ctx f] calls [f()] in an ambient context in which + [get_context()] will return [ctx]. Once [f()] returns, the storage is reset + to its previous value. *) +let[@inline] with_context ctx f = + Storage.with_context (Atomic.get cur_storage) ctx f -let with_binding : 'a key -> 'a -> (unit -> 'r) -> 'r = - fun (id, k) v cb -> - let (module Store : STORAGE) = get_current_storage () in - if not debug then - Store.with_binding k v cb - else ( - Printf.printf "%s: with_binding %i enter\n%!" Store.name id; - let rv = Store.with_binding k v cb in - Printf.printf "%s: with_binding %i exit\n%!" Store.name id; - rv - ) +(** Get the ambient context and then look up [k] in it *) +let[@inline] get (k : 'a Context.key) : 'a option = Hmap.find k (get_context ()) -let without_binding (id, k) cb = - let (module Store : STORAGE) = get_current_storage () in - if not debug then - Store.without_binding k cb - else ( - Printf.printf "%s: without_binding %i enter\n%!" Store.name id; - let rv = Store.without_binding k cb in - Printf.printf "%s: without_binding %i exit\n%!" Store.name id; - rv - ) +(** [with_key_bound_to storage k v f] calls [f()] in a context updated to have + [k] map to [v]. *) +let with_key_bound_to k v f = + let storage = get_current_storage () in + let ctx = Storage.get_context storage in + let new_ctx = Hmap.add k v ctx in + Storage.with_context storage new_ctx f -let set_storage_provider store_new = - let store_before = get_current_storage () in - if store_new == store_before then - () - else - TLS.set k_current_storage store_new; - if debug then ( - let (module Store_before : STORAGE) = store_before in - let (module Store_new : STORAGE) = store_new in - Printf.printf "set_storage_provider %s (previously %s)\n%!" Store_new.name - Store_before.name - ) +(** [with_key_unbound k f] calls [f()] in a context updated to have [k] bound to + no value. *) +let with_key_unbound k f = + let storage = Atomic.get cur_storage in + let ctx = Storage.get_context storage in + if Hmap.mem k ctx then ( + let new_ctx = Hmap.rem k ctx in + Storage.with_context storage new_ctx f + ) else + f () diff --git a/src/ambient-context/opentelemetry_ambient_context.mli b/src/ambient-context/opentelemetry_ambient_context.mli deleted file mode 100644 index 8f19ff0c..00000000 --- a/src/ambient-context/opentelemetry_ambient_context.mli +++ /dev/null @@ -1,55 +0,0 @@ -(** Ambient context. - - The ambient context, like the Matrix, is everywhere around you. - - It is responsible for keeping track of that context in a manner that's - consistent with the program's choice of control flow paradigm: - - - for synchronous/threaded/direct style code, {b TLS} ("thread local - storage") keeps track of a global variable per thread. Each thread has its - own copy of the variable and updates it independently of other threads. - - - for Lwt, any ['a Lwt.t] created inside the [with_binding k v (fun _ -> …)] - will inherit the [k := v] assignment. - - - for Eio, fibers created inside [with_binding k v (fun () -> …)] will - inherit the [k := v] assignment. This is consistent with the structured - concurrency approach of Eio. - - The only data stored by this storage is a {!Hmap.t}, ie a heterogeneous map. - Various users (libraries, user code, etc.) can create their own {!key} to - store what they are interested in, without affecting other parts of the - storage. *) - -module Types := Opentelemetry_ambient_context_types - -module type STORAGE = Types.STORAGE - -type storage = (module STORAGE) - -val default_storage : storage - -val get_current_storage : unit -> storage - -val set_storage_provider : storage -> unit - -type 'a key -(** A key that can be mapped to values of type ['a] in the ambient context. *) - -val compare_key : int -> int -> int -(** Total order on keys *) - -val create_key : unit -> 'a key -(** Create a new fresh key, distinct from any previously created key. *) - -val get : 'a key -> 'a option -(** Get the current value for a given key, or [None] if no value was associated - with the key in the ambient context. *) - -val with_binding : 'a key -> 'a -> (unit -> 'r) -> 'r -(** [with_binding k v cb] calls [cb()] in a context in which [k] is bound to - [v]. This does not affect storage outside of [cb()]. *) - -val without_binding : 'a key -> (unit -> 'b) -> 'b -(** [without_binding k cb] calls [cb()] in a context where [k] has no binding - (possibly shadowing the current ambient binding of [k] if it exists). *) diff --git a/src/ambient-context/tls/dune b/src/ambient-context/tls/dune new file mode 100644 index 00000000..c0a285bc --- /dev/null +++ b/src/ambient-context/tls/dune @@ -0,0 +1,6 @@ +(library + (name opentelemetry_ambient_context_tls) + (public_name opentelemetry.ambient-context.tls) + (synopsis "Implementation of ambient-context from thread-local-storage") + (optional) ; TLS + (libraries opentelemetry.ambient-context.core thread-local-storage)) diff --git a/src/ambient-context/tls/opentelemetry_ambient_context_tls.ml b/src/ambient-context/tls/opentelemetry_ambient_context_tls.ml new file mode 100644 index 00000000..7d40387c --- /dev/null +++ b/src/ambient-context/tls/opentelemetry_ambient_context_tls.ml @@ -0,0 +1,23 @@ +open Opentelemetry_ambient_context_core + +open struct + module TLS = Thread_local_storage + + (* key used to access the context *) + let tls_k_context : Context.t TLS.t = TLS.create () +end + +let storage : Storage.t = + { + name = "tls"; + get_context = + (fun () -> try TLS.get_exn tls_k_context with TLS.Not_set -> Hmap.empty); + with_context = + (fun ctx f -> + let old = + try TLS.get_exn tls_k_context with TLS.Not_set -> Hmap.empty + in + let finally () = TLS.set tls_k_context old in + TLS.set tls_k_context ctx; + Fun.protect ~finally f); + } diff --git a/src/ambient-context/types/dune b/src/ambient-context/types/dune deleted file mode 100644 index b9e4146c..00000000 --- a/src/ambient-context/types/dune +++ /dev/null @@ -1,4 +0,0 @@ -(library - (name opentelemetry_ambient_context_types) - (public_name opentelemetry.ambient-context.types) - (libraries hmap thread-local-storage)) diff --git a/src/ambient-context/types/opentelemetry_ambient_context_types.ml b/src/ambient-context/types/opentelemetry_ambient_context_types.ml deleted file mode 100644 index 829f7789..00000000 --- a/src/ambient-context/types/opentelemetry_ambient_context_types.ml +++ /dev/null @@ -1,19 +0,0 @@ -type 'a key = 'a Hmap.key - -module type STORAGE = sig - val name : string - - val get_map : unit -> Hmap.t option - - val with_map : Hmap.t -> (unit -> 'b) -> 'b - - val create_key : unit -> 'a key - - val get : 'a key -> 'a option - - val with_binding : 'a key -> 'a -> (unit -> 'b) -> 'b - - val without_binding : 'a key -> (unit -> 'b) -> 'b -end - -type storage = (module STORAGE) diff --git a/src/ambient-context/types/opentelemetry_ambient_context_types.mli b/src/ambient-context/types/opentelemetry_ambient_context_types.mli deleted file mode 100644 index 738b7520..00000000 --- a/src/ambient-context/types/opentelemetry_ambient_context_types.mli +++ /dev/null @@ -1,32 +0,0 @@ -(** Storage implementation. - - There is a singleton storage for a given program, responsible for providing - ambient context to the rest of the program. *) - -type 'a key = 'a Hmap.key - -module type STORAGE = sig - val name : string - (** Name of the storage implementation. *) - - val get_map : unit -> Hmap.t option - (** Get the hmap from the current ambient context, or [None] if there is no - ambient context. *) - - val with_map : Hmap.t -> (unit -> 'b) -> 'b - (** [with_hmap h cb] calls [cb()] in an ambient context in which [get_map()] - will return [h]. Once [cb()] returns, the storage is reset to its previous - value. *) - - val create_key : unit -> 'a key - (** Create a new storage key, guaranteed to be distinct from any previously - created key. *) - - val get : 'a key -> 'a option - - val with_binding : 'a key -> 'a -> (unit -> 'b) -> 'b - - val without_binding : 'a key -> (unit -> 'b) -> 'b -end - -type storage = (module STORAGE) diff --git a/src/client-cohttp-eio/config.ml b/src/client-cohttp-eio/config.ml index 930881ff..4f3677de 100644 --- a/src/client-cohttp-eio/config.ml +++ b/src/client-cohttp-eio/config.ml @@ -1,7 +1,7 @@ -type t = Opentelemetry_client.Config.t +type t = Opentelemetry_client.Client_config.t -module Env = Opentelemetry_client.Config.Env () +module Env = Opentelemetry_client.Client_config.Env () -let pp = Opentelemetry_client.Config.pp +let pp = Opentelemetry_client.Client_config.pp let make = Env.make (fun common () -> common) diff --git a/src/client-cohttp-eio/config.mli b/src/client-cohttp-eio/config.mli index 100bb696..dff28732 100644 --- a/src/client-cohttp-eio/config.mli +++ b/src/client-cohttp-eio/config.mli @@ -1,4 +1,4 @@ -type t = Opentelemetry_client.Config.t +type t = Opentelemetry_client.Client_config.t (** Configuration. To build one, use {!make} below. This might be extended with more fields in @@ -6,7 +6,7 @@ type t = Opentelemetry_client.Config.t val pp : Format.formatter -> t -> unit -val make : (unit -> t) Opentelemetry_client.Config.make +val make : (unit -> t) Opentelemetry_client.Client_config.make (** Make a configuration {!t}. *) -module Env : Opentelemetry_client.Config.ENV +module Env : Opentelemetry_client.Client_config.ENV diff --git a/src/client-cohttp-eio/dune b/src/client-cohttp-eio/dune index 79cf9393..2bed9063 100644 --- a/src/client-cohttp-eio/dune +++ b/src/client-cohttp-eio/dune @@ -6,7 +6,7 @@ (>= %{ocaml_version} 5.0)) (libraries opentelemetry - opentelemetry.client + opentelemetry-client eio eio.unix cohttp diff --git a/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml b/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml index 9f71d52f..33e05830 100644 --- a/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml +++ b/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.ml @@ -5,13 +5,9 @@ open Eio.Std https://github.com/open-telemetry/oteps/blob/main/text/0099-otlp-http.md *) -module OT = Opentelemetry module Config = Config -module Signal = Opentelemetry_client.Signal -module Batch = Opentelemetry_client.Batch open Opentelemetry - -let ( let@ ) = ( @@ ) +open Opentelemetry_client let spf = Printf.sprintf @@ -19,459 +15,180 @@ let set_headers = Config.Env.set_headers let get_headers = Config.Env.get_headers -let needs_gc_metrics = Atomic.make false +module Make (CTX : sig + val sw : Eio.Switch.t -let last_gc_metrics = Atomic.make (Mtime_clock.now ()) + val env : Eio_unix.Stdenv.base +end) = +struct + module IO : Generic_io.S_WITH_CONCURRENCY with type 'a t = 'a = struct + include Generic_io.Direct_style -let timeout_gc_metrics = Mtime.Span.(20 * s) + (* NOTE: This is only used in the main consumer thread, even though producers + might be in other domains *) -(* Cross-domain, thread-safe storage for GC metrics gathered from different fibres. *) -module GC_metrics : sig - val add : Proto.Metrics.resource_metrics -> unit + let sleep_s n = Eio.Time.sleep CTX.env#clock n - val drain : unit -> Proto.Metrics.resource_metrics list -end = struct - (* Used to prevent data races across domains *) - let mutex = Eio.Mutex.create () + let spawn f = Eio.Fiber.fork ~sw:CTX.sw f + end - let gc_metrics = ref [] + module Notifier : Generic_notifier.S with module IO = IO = struct + module IO = IO - let add m = - Eio.Mutex.use_rw ~protect:true mutex (fun () -> - gc_metrics := m :: !gc_metrics) + type t = { + mutex: Eio.Mutex.t; + cond: Eio.Condition.t; + } - let drain () = - Eio.Mutex.use_rw ~protect:true mutex (fun () -> - let metrics = !gc_metrics in - gc_metrics := []; - metrics) -end + let create () : t = + { mutex = Eio.Mutex.create (); cond = Eio.Condition.create () } -(* capture current GC metrics if {!needs_gc_metrics} is true, - or it has been a long time since the last GC metrics collection, - and push them into {!gc_metrics} for later collection *) -let sample_gc_metrics_if_needed () = - let now = Mtime_clock.now () in - let alarm = Atomic.compare_and_set needs_gc_metrics true false in - let timeout () = - let elapsed = Mtime.span now (Atomic.get last_gc_metrics) in - Mtime.Span.compare elapsed timeout_gc_metrics > 0 - in - if alarm || timeout () then ( - Atomic.set last_gc_metrics now; - let l = - OT.Metrics.make_resource_metrics - ~attrs:(Opentelemetry.GC_metrics.get_runtime_attributes ()) - @@ Opentelemetry.GC_metrics.get_metrics () - in - GC_metrics.add l - ) + let trigger self = + (* FIXME: this might be triggered from other threads!! how do we + ensure it runs in the Eio thread? *) + Eio.Condition.broadcast self.cond -type error = - [ `Status of int * Opentelemetry.Proto.Status.status - | `Failure of string - | `Sysbreak - ] + let delete = ignore -let n_errors = Atomic.make 0 + let wait self = + Eio.Mutex.lock self.mutex; + Eio.Condition.await self.cond self.mutex; + Eio.Mutex.unlock self.mutex -let n_dropped = Atomic.make 0 + (** Ensure we get signalled when the queue goes from empty to non-empty *) + let register_bounded_queue (self : t) (bq : _ Bounded_queue.t) : unit = + Bounded_queue.on_non_empty bq (fun () -> trigger self) + end -let report_err_ = function - | `Sysbreak -> Printf.eprintf "opentelemetry: ctrl-c captured, stopping\n%!" - | `Failure msg -> - Format.eprintf "@[<2>opentelemetry: export failed: %s@]@." msg - | `Status - ( code, - { - Opentelemetry.Proto.Status.code = scode; - message; - details; - _presence = _; - } ) -> - let pp_details out l = - List.iter - (fun s -> Format.fprintf out "%S;@ " (Bytes.unsafe_to_string s)) - l - in - Format.eprintf - "@[<2>opentelemetry: export failed with@ http code=%d@ status \ - {@[code=%ld;@ message=%S;@ details=[@[%a@]]@]}@]@." - code scode - (Bytes.unsafe_to_string message) - pp_details details + module Httpc : Generic_http_consumer.HTTPC with module IO = IO = struct + module IO = IO + open Opentelemetry.Proto + module Httpc = Cohttp_eio.Client -module Httpc : sig - type t + type t = Httpc.t - val create : _ Eio.Net.t -> t + let authenticator = + match Ca_certs.authenticator () with + | Ok x -> x + | Error (`Msg m) -> + Fmt.failwith "Failed to create system store X509 authenticator: %s" m - val send : - t -> - url:string -> - decode:[ `Dec of Pbrt.Decoder.t -> 'a | `Ret of 'a ] -> - string -> - ('a, error) result -end = struct - open Opentelemetry.Proto - module Httpc = Cohttp_eio.Client - - type t = Httpc.t - - let authenticator = - match Ca_certs.authenticator () with - | Ok x -> x - | Error (`Msg m) -> - Fmt.failwith "Failed to create system store X509 authenticator: %s" m - - let https ~authenticator = - let tls_config = - match Tls.Config.client ~authenticator () with - | Error (`Msg msg) -> failwith ("tls configuration problem: " ^ msg) - | Ok tls_config -> tls_config - in - fun uri raw -> - let host = - Uri.host uri - |> Option.map (fun x -> Domain_name.(host_exn (of_string_exn x))) + let https ~authenticator = + let tls_config = + match Tls.Config.client ~authenticator () with + | Error (`Msg msg) -> failwith ("tls configuration problem: " ^ msg) + | Ok tls_config -> tls_config in - Tls_eio.client_of_flow ?host tls_config raw + fun uri raw -> + let host = + Uri.host uri + |> Option.map (fun x -> Domain_name.(host_exn (of_string_exn x))) + in + Tls_eio.client_of_flow ?host tls_config raw - let create net = Httpc.make ~https:(Some (https ~authenticator)) net + let create () = Httpc.make ~https:(Some (https ~authenticator)) CTX.env#net - (* send the content to the remote endpoint/path *) - let send (client : t) ~url ~decode (body : string) : ('a, error) result = - Switch.run @@ fun sw -> - let uri = Uri.of_string url in + let cleanup = ignore - let open Cohttp in - let headers = Header.(add_list (init ()) (Config.Env.get_headers ())) in - let headers = - Header.(add headers "Content-Type" "application/x-protobuf") - in + (* send the content to the remote endpoint/path *) + let send (client : t) ~url ~decode (body : string) : + ('a, Export_error.t) result = + Switch.run @@ fun sw -> + let uri = Uri.of_string url in - let body = Cohttp_eio.Body.of_string body in - let r = - try - let r = Httpc.post client ~sw ~headers ~body uri in - Ok r - with e -> Error e - in - match r with - | Error e -> - let err = - `Failure - (spf "sending signals via http POST to %S\nfailed with:\n%s" url - (Printexc.to_string e)) + let open Cohttp in + let headers = Header.(add_list (init ()) (Config.Env.get_headers ())) in + let headers = + Header.(add headers "Content-Type" "application/x-protobuf") in - Error err - | Ok (resp, body) -> - let body = Eio.Buf_read.(parse_exn take_all) body ~max_size:max_int in - let code = Response.status resp |> Code.code_of_status in - if not (Code.is_error code) then ( - match decode with - | `Ret x -> Ok x - | `Dec f -> + + let body = Cohttp_eio.Body.of_string body in + let r = + try + let r = Httpc.post client ~sw ~headers ~body uri in + Ok r + with e -> Error e + in + match r with + | Error e -> + let err = + `Failure + (spf "sending signals via http POST to %S\nfailed with:\n%s" url + (Printexc.to_string e)) + in + Error err + | Ok (resp, body) -> + let body = Eio.Buf_read.(parse_exn take_all) body ~max_size:max_int in + let code = Response.status resp |> Code.code_of_status in + if not (Code.is_error code) then ( + match decode with + | `Ret x -> Ok x + | `Dec f -> + let dec = Pbrt.Decoder.of_string body in + let r = + try Ok (f dec) + with e -> + let bt = Printexc.get_backtrace () in + Error + (`Failure + (spf "decoding failed with:\n%s\n%s" (Printexc.to_string e) + bt)) + in + r + ) else ( let dec = Pbrt.Decoder.of_string body in + let r = - try Ok (f dec) + try + let status = Status.decode_pb_status dec in + Error (`Status (code, status)) with e -> let bt = Printexc.get_backtrace () in Error (`Failure - (spf "decoding failed with:\n%s\n%s" (Printexc.to_string e) - bt)) + (spf + "httpc: decoding of status (url=%S, code=%d) failed with:\n\ + %s\n\ + status: %S\n\ + %s" + url code (Printexc.to_string e) body bt)) in r - ) else ( - let dec = Pbrt.Decoder.of_string body in - - let r = - try - let status = Status.decode_pb_status dec in - Error (`Status (code, status)) - with e -> - let bt = Printexc.get_backtrace () in - Error - (`Failure - (spf - "httpc: decoding of status (url=%S, code=%d) failed with:\n\ - %s\n\ - status: %S\n\ - %s" - url code (Printexc.to_string e) body bt)) - in - r - ) + ) + end end -(** An emitter. This is used by {!Backend} below to forward traces/metrics/… - from the program to whatever collector client we have. *) -module type EMITTER = sig - open Opentelemetry.Proto +let create_consumer ?(stop = Atomic.make false) ?(config = Config.make ()) ~sw + ~env () : Consumer.any_resource_builder = + let module M = Make (struct + let sw = sw - val push_trace : Trace.resource_spans list -> unit + let env = env + end) in + let module C = Generic_http_consumer.Make (M.IO) (M.Notifier) (M.Httpc) in + C.consumer ~ticker_task:(Some 0.5) ~stop ~config () - val push_metrics : Metrics.resource_metrics list -> unit +let create_exporter ?stop ?(config = Config.make ()) ~sw ~env () = + let consumer = create_consumer ?stop ~config ~sw ~env () in + let bq = + Bounded_queue_sync.create + ~high_watermark:Bounded_queue.Defaults.high_watermark () + in + Exporter_queued.create ~q:bq ~consumer () + |> Exporter_add_batching.add_batching ~config - val push_logs : Logs.resource_logs list -> unit - - val set_on_tick_callbacks : (unit -> unit) AList.t -> unit - - val tick : unit -> unit - - val cleanup : on_done:(unit -> unit) -> unit -> unit -end - -(* make an emitter. - - exceptions inside should be caught, see - https://opentelemetry.io/docs/reference/specification/error-handling/ *) -let mk_emitter ~stop ~net (config : Config.t) : (module EMITTER) = - (* local helpers *) - let open struct - let client = - (* Prime RNG state for TLS *) - Mirage_crypto_rng_unix.use_default (); - Httpc.create net - - let send_http ~url data : unit = - let r = Httpc.send client ~url ~decode:(`Ret ()) data in - match r with - | Ok () -> () - | Error `Sysbreak -> - Printf.eprintf "ctrl-c captured, stopping\n%!"; - Atomic.set stop true - | Error err -> - (* TODO: log error _via_ otel? *) - Atomic.incr n_errors; - report_err_ err; - (* avoid crazy error loop *) - Eio_unix.sleep 3. - - let timeout = - if config.batch_timeout_ms > 0 then - Some Mtime.Span.(config.batch_timeout_ms * ms) - else - None - - let batch_traces : Proto.Trace.resource_spans Batch.t = - Batch.make ?batch:config.batch_traces ?timeout () - - let batch_metrics : Proto.Metrics.resource_metrics Batch.t = - Batch.make ?batch:config.batch_metrics ?timeout () - - let batch_logs : Proto.Logs.resource_logs Batch.t = - Batch.make ?batch:config.batch_logs ?timeout () - - let push_to_batch b e = - match Batch.push b e with - | `Ok -> () - | `Dropped -> Atomic.incr n_errors - - let[@inline] guard_exn_ where f = - try f () - with e -> - let bt = Printexc.get_backtrace () in - Printf.eprintf "opentelemetry-eio: uncaught exception in %s: %s\n%s\n%!" - where (Printexc.to_string e) bt - - let push_traces x = - let@ () = guard_exn_ "push trace" in - push_to_batch batch_traces x - - let push_metrics x = - let@ () = guard_exn_ "push metrics" in - sample_gc_metrics_if_needed (); - push_to_batch batch_metrics x - - let push_logs x = - let@ () = guard_exn_ "push logs" in - push_to_batch batch_logs x - - let maybe_emit (batch : 'a Batch.t) url (f : 'a list -> string) ~now ~force - () : unit = - Batch.pop_if_ready ~force ~now batch - |> Option.iter (fun signals -> f signals |> send_http ~url) - - let emit_traces_maybe = - maybe_emit batch_traces config.url_traces Signal.Encode.traces - - let emit_metrics_maybe = - maybe_emit batch_metrics config.url_metrics (fun collected_metrics -> - let gc_metrics = GC_metrics.drain () in - gc_metrics @ collected_metrics |> Signal.Encode.metrics) - - let emit_logs_maybe = - maybe_emit batch_logs config.url_logs Signal.Encode.logs - - let emit_all ~force : unit = - Switch.run @@ fun sw -> - let now = Mtime_clock.now () in - Fiber.fork ~sw @@ emit_logs_maybe ~now ~force; - Fiber.fork ~sw @@ emit_metrics_maybe ~now ~force; - Fiber.fork ~sw @@ emit_traces_maybe ~now ~force - - let on_tick_cbs_ = Atomic.make (AList.make ()) - - let run_tick_callbacks () = - List.iter - (fun f -> - try f () - with e -> - Printf.eprintf "on tick callback raised: %s\n" - (Printexc.to_string e)) - (AList.get @@ Atomic.get on_tick_cbs_) - end in - let module M = struct - let set_on_tick_callbacks = Atomic.set on_tick_cbs_ - - let push_trace e = push_traces e - - let push_metrics e = push_metrics e - - let push_logs e = push_logs e - - let tick () = - if Config.Env.get_debug () then - Printf.eprintf "tick (from domain %d)\n%!" (Domain.self () :> int); - run_tick_callbacks (); - sample_gc_metrics_if_needed (); - emit_all ~force:false - - let cleanup ~on_done () = - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: exiting…\n%!"; - Atomic.set stop true; - run_tick_callbacks (); - sample_gc_metrics_if_needed (); - emit_all ~force:true; - on_done () - end in - (module M : EMITTER) - -module Backend (Emitter : EMITTER) : Opentelemetry.Collector.BACKEND = struct - open Opentelemetry.Proto - open Opentelemetry.Collector - open Emitter - - let send_trace : Trace.resource_spans list sender = - { - send = - (fun l ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send spans %a@." - (Format.pp_print_list Trace.pp_resource_spans) - l); - push_trace l; - ret ()); - } - - let last_sent_metrics = Atomic.make (Mtime_clock.now ()) - - let timeout_sent_metrics = Mtime.Span.(5 * s) - (* send metrics from time to time *) - - let signal_emit_gc_metrics () = - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: emit GC metrics requested\n%!"; - Atomic.set needs_gc_metrics true - - let additional_metrics () : Metrics.resource_metrics list = - (* add exporter metrics to the lot? *) - let last_emit = Atomic.get last_sent_metrics in - let now = Mtime_clock.now () in - let add_own_metrics = - let elapsed = Mtime.span last_emit now in - Mtime.Span.compare elapsed timeout_sent_metrics > 0 - in - - (* there is a possible race condition here, as several threads might update - metrics at the same time. But that's harmless. *) - if add_own_metrics then ( - Atomic.set last_sent_metrics now; - let open OT.Metrics in - [ - make_resource_metrics - [ - sum ~name:"otel.export.dropped" ~is_monotonic:true - [ - int - ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) - ~now:(Mtime.to_uint64_ns now) (Atomic.get n_dropped); - ]; - sum ~name:"otel.export.errors" ~is_monotonic:true - [ - int - ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) - ~now:(Mtime.to_uint64_ns now) (Atomic.get n_errors); - ]; - ]; - ] - ) else - [] - - let send_metrics : Metrics.resource_metrics list sender = - { - send = - (fun m ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send metrics %a@." - (Format.pp_print_list Metrics.pp_resource_metrics) - m); - - let m = List.rev_append (additional_metrics ()) m in - push_metrics m; - ret ()); - } - - let send_logs : Logs.resource_logs list sender = - { - send = - (fun m ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send logs %a@." - (Format.pp_print_list Logs.pp_resource_logs) - m); - push_logs m; - ret ()); - } - - let tick = Emitter.tick - - let cleanup = Emitter.cleanup - - let set_on_tick_callbacks = Emitter.set_on_tick_callbacks -end - -let create_backend ~sw ?(stop = Atomic.make false) ?(config = Config.make ()) - env : (module OT.Collector.BACKEND) = - let module E = (val mk_emitter ~stop ~net:env#net config) in - let module B = Backend (E) in - (* Run a background fiber to keep the backend ticking regularly. - - NOTE: This cannot be located inside the [Backend], because switches - are not thread safe, and cannot be used accross domains, but the - backend is accessed across domains. *) - Eio.Fiber.fork ~sw (fun () -> - while not @@ Atomic.get stop do - Eio.Time.sleep env#clock 0.5; - B.tick () - done); - - (module B) +let create_backend = create_exporter let setup_ ~sw ?stop ?config env : unit = - let backend = create_backend ?stop ?config ~sw env in - OT.Collector.set_backend backend + let backend = create_backend ?stop ?config ~sw ~env () in + Main_exporter.set backend let setup ?stop ?config ?(enable = true) ~sw env = if enable then setup_ ~sw ?stop ?config env -let remove_backend () = OT.Collector.remove_backend ~on_done:ignore () +let remove_exporter () = Main_exporter.remove ~on_done:ignore () + +let remove_backend = remove_exporter let with_setup ?stop ?config ?(enable = true) f env = if enable then diff --git a/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.mli b/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.mli index f88ddef5..e3ccbe4e 100644 --- a/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.mli +++ b/src/client-cohttp-eio/opentelemetry_client_cohttp_eio.mli @@ -10,15 +10,32 @@ val set_headers : (string * string) list -> unit module Config = Config -val create_backend : - sw:Eio.Switch.t -> +val create_consumer : ?stop:bool Atomic.t -> ?config:Config.t -> - Eio_unix.Stdenv.base -> - (module Opentelemetry.Collector.BACKEND) -(** Create a new backend using Cohttp_eio + sw:Eio.Switch.t -> + env:Eio_unix.Stdenv.base -> + unit -> + Opentelemetry_client.Consumer.any_resource_builder +(** Consumer that pulls from a queue *) - NOTE [after_cleanup] optional parameter removed @since 0.12 *) +val create_exporter : + ?stop:bool Atomic.t -> + ?config:Config.t -> + sw:Eio.Switch.t -> + env:Eio_unix.Stdenv.base -> + unit -> + Opentelemetry.Exporter.t +(** NOTE [after_cleanup] optional parameter removed @since 0.12 *) + +val create_backend : + ?stop:bool Atomic.t -> + ?config:Config.t -> + sw:Eio.Switch.t -> + env:Eio_unix.Stdenv.base -> + unit -> + Opentelemetry.Exporter.t +[@@deprecated "use create_exporter"] val setup : ?stop:bool Atomic.t -> diff --git a/src/client-cohttp-lwt/config.ml b/src/client-cohttp-lwt/config.ml index 930881ff..4f3677de 100644 --- a/src/client-cohttp-lwt/config.ml +++ b/src/client-cohttp-lwt/config.ml @@ -1,7 +1,7 @@ -type t = Opentelemetry_client.Config.t +type t = Opentelemetry_client.Client_config.t -module Env = Opentelemetry_client.Config.Env () +module Env = Opentelemetry_client.Client_config.Env () -let pp = Opentelemetry_client.Config.pp +let pp = Opentelemetry_client.Client_config.pp let make = Env.make (fun common () -> common) diff --git a/src/client-cohttp-lwt/config.mli b/src/client-cohttp-lwt/config.mli index 100bb696..dff28732 100644 --- a/src/client-cohttp-lwt/config.mli +++ b/src/client-cohttp-lwt/config.mli @@ -1,4 +1,4 @@ -type t = Opentelemetry_client.Config.t +type t = Opentelemetry_client.Client_config.t (** Configuration. To build one, use {!make} below. This might be extended with more fields in @@ -6,7 +6,7 @@ type t = Opentelemetry_client.Config.t val pp : Format.formatter -> t -> unit -val make : (unit -> t) Opentelemetry_client.Config.make +val make : (unit -> t) Opentelemetry_client.Client_config.make (** Make a configuration {!t}. *) -module Env : Opentelemetry_client.Config.ENV +module Env : Opentelemetry_client.Client_config.ENV diff --git a/src/client-cohttp-lwt/dune b/src/client-cohttp-lwt/dune index 480fb4a8..56b0b4f8 100644 --- a/src/client-cohttp-lwt/dune +++ b/src/client-cohttp-lwt/dune @@ -6,7 +6,8 @@ (pps lwt_ppx)) (libraries opentelemetry - opentelemetry.client + opentelemetry-client + opentelemetry-client.lwt lwt cohttp-lwt cohttp-lwt-unix diff --git a/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml b/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml index 53786ed2..53cf515b 100644 --- a/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml +++ b/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.ml @@ -3,10 +3,8 @@ https://github.com/open-telemetry/oteps/blob/main/text/0099-otlp-http.md *) -module OT = Opentelemetry module Config = Config -module Signal = Opentelemetry_client.Signal -module Batch = Opentelemetry_client.Batch +open Opentelemetry_client open Opentelemetry open Common_ @@ -14,87 +12,14 @@ let set_headers = Config.Env.set_headers let get_headers = Config.Env.get_headers -external reraise : exn -> 'a = "%reraise" -(** This is equivalent to [Lwt.reraise]. We inline it here so we don't force to - use Lwt's latest version *) +type error = Export_error.t -let needs_gc_metrics = Atomic.make false +open struct + module IO = Opentelemetry_client_lwt.Io_lwt +end -let last_gc_metrics = Atomic.make (Mtime_clock.now ()) - -let timeout_gc_metrics = Mtime.Span.(20 * s) - -let gc_metrics = ref [] -(* side channel for GC, appended to {!E_metrics}'s data *) - -(* capture current GC metrics if {!needs_gc_metrics} is true, - or it has been a long time since the last GC metrics collection, - and push them into {!gc_metrics} for later collection *) -let sample_gc_metrics_if_needed () = - let now = Mtime_clock.now () in - let alarm = Atomic.compare_and_set needs_gc_metrics true false in - let timeout () = - let elapsed = Mtime.span now (Atomic.get last_gc_metrics) in - Mtime.Span.compare elapsed timeout_gc_metrics > 0 - in - if alarm || timeout () then ( - Atomic.set last_gc_metrics now; - let l = - OT.Metrics.make_resource_metrics - ~attrs:(Opentelemetry.GC_metrics.get_runtime_attributes ()) - @@ Opentelemetry.GC_metrics.get_metrics () - in - gc_metrics := l :: !gc_metrics - ) - -type error = - [ `Status of int * Opentelemetry.Proto.Status.status - | `Failure of string - | `Sysbreak - ] - -let n_errors = Atomic.make 0 - -let n_dropped = Atomic.make 0 - -let report_err_ = function - | `Sysbreak -> Printf.eprintf "opentelemetry: ctrl-c captured, stopping\n%!" - | `Failure msg -> - Format.eprintf "@[<2>opentelemetry: export failed: %s@]@." msg - | `Status - ( code, - { - Opentelemetry.Proto.Status.code = scode; - message; - details; - _presence = _; - } ) -> - let pp_details out l = - List.iter - (fun s -> Format.fprintf out "%S;@ " (Bytes.unsafe_to_string s)) - l - in - Format.eprintf - "@[<2>opentelemetry: export failed with@ http code=%d@ status \ - {@[code=%ld;@ message=%S;@ details=[@[%a@]]@]}@]@." - code scode - (Bytes.unsafe_to_string message) - pp_details details - -module Httpc : sig - type t - - val create : unit -> t - - val send : - t -> - url:string -> - decode:[ `Dec of Pbrt.Decoder.t -> 'a | `Ret of 'a ] -> - string -> - ('a, error) result Lwt.t - - val cleanup : t -> unit -end = struct +module Httpc : Generic_http_consumer.HTTPC with module IO = IO = struct + module IO = IO open Opentelemetry.Proto open Lwt.Syntax module Httpc = Cohttp_lwt_unix.Client @@ -176,325 +101,39 @@ end = struct ) end -(** An emitter. This is used by {!Backend} below to forward traces/metrics/… - from the program to whatever collector client we have. *) -module type EMITTER = sig - open Opentelemetry.Proto +module Consumer_impl = + Generic_http_consumer.Make (IO) (Opentelemetry_client_lwt.Notifier_lwt) + (Httpc) - val push_trace : Trace.resource_spans list -> unit +let create_consumer ?(stop = Atomic.make false) ?(config = Config.make ()) () = + Consumer_impl.consumer ~ticker_task:(Some 0.5) ~stop ~config () - val push_metrics : Metrics.resource_metrics list -> unit - - val push_logs : Logs.resource_logs list -> unit - - val set_on_tick_callbacks : (unit -> unit) AList.t -> unit - - val tick : unit -> unit - - val cleanup : on_done:(unit -> unit) -> unit -> unit -end - -(* make an emitter. - - exceptions inside should be caught, see - https://opentelemetry.io/docs/reference/specification/error-handling/ *) -let mk_emitter ~stop ~(config : Config.t) () : (module EMITTER) = - let open Proto in - let open Lwt.Syntax in - (* local helpers *) - let open struct - let timeout = - if config.batch_timeout_ms > 0 then - Some Mtime.Span.(config.batch_timeout_ms * ms) - else - None - - let batch_traces : Trace.resource_spans Batch.t = - Batch.make ?batch:config.batch_traces ?timeout () - - let batch_metrics : Metrics.resource_metrics Batch.t = - Batch.make ?batch:config.batch_metrics ?timeout () - - let batch_logs : Logs.resource_logs Batch.t = - Batch.make ?batch:config.batch_logs ?timeout () - - let on_tick_cbs_ = Atomic.make (AList.make ()) - - let set_on_tick_callbacks = Atomic.set on_tick_cbs_ - - let send_http_ (httpc : Httpc.t) ~url data : unit Lwt.t = - let* r = Httpc.send httpc ~url ~decode:(`Ret ()) data in - match r with - | Ok () -> Lwt.return () - | Error `Sysbreak -> - Printf.eprintf "ctrl-c captured, stopping\n%!"; - Atomic.set stop true; - Lwt.return () - | Error err -> - (* TODO: log error _via_ otel? *) - Atomic.incr n_errors; - report_err_ err; - (* avoid crazy error loop *) - Lwt_unix.sleep 3. - - let send_metrics_http client (l : Metrics.resource_metrics list) = - Signal.Encode.metrics l |> send_http_ client ~url:config.url_metrics - - let send_traces_http client (l : Trace.resource_spans list) = - Signal.Encode.traces l |> send_http_ client ~url:config.url_traces - - let send_logs_http client (l : Logs.resource_logs list) = - Signal.Encode.logs l |> send_http_ client ~url:config.url_logs - - (* emit metrics, if the batch is full or timeout lapsed *) - let emit_metrics_maybe ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now batch_metrics with - | None -> Lwt.return false - | Some l -> - let batch = !gc_metrics @ l in - gc_metrics := []; - let+ () = send_metrics_http httpc batch in - true - - let emit_traces_maybe ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now batch_traces with - | None -> Lwt.return false - | Some l -> - let+ () = send_traces_http httpc l in - true - - let emit_logs_maybe ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now batch_logs with - | None -> Lwt.return false - | Some l -> - let+ () = send_logs_http httpc l in - true - - let[@inline] guard_exn_ where f = - try f () - with e -> - let bt = Printexc.get_backtrace () in - Printf.eprintf - "opentelemetry-cohttp-lwt: uncaught exception in %s: %s\n%s\n%!" where - (Printexc.to_string e) bt - - let emit_all_force (httpc : Httpc.t) : unit Lwt.t = - let now = Mtime_clock.now () in - let+ (_ : bool) = emit_traces_maybe ~now ~force:true httpc - and+ (_ : bool) = emit_logs_maybe ~now ~force:true httpc - and+ (_ : bool) = emit_metrics_maybe ~now ~force:true httpc in - () - - (* thread that calls [tick()] regularly, to help enforce timeouts *) - let setup_ticker_thread ~tick ~finally () = - let rec tick_thread () = - if Atomic.get stop then ( - finally (); - Lwt.return () - ) else - let* () = Lwt_unix.sleep 0.5 in - let* () = tick () in - tick_thread () - in - Lwt.async tick_thread - end in - let httpc = Httpc.create () in - - let module M = struct - (* we make sure that this is thread-safe, even though we don't have a - background thread. There can still be a ticker thread, and there - can also be several user threads that produce spans and call - the emit functions. *) - - let push_to_batch b e = - match Batch.push b e with - | `Ok -> () - | `Dropped -> Atomic.incr n_errors - - let push_trace e = - let@ () = guard_exn_ "push trace" in - push_to_batch batch_traces e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_traces_maybe ~now httpc in - ()) - - let push_metrics e = - let@ () = guard_exn_ "push metrics" in - sample_gc_metrics_if_needed (); - push_to_batch batch_metrics e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_metrics_maybe ~now httpc in - ()) - - let push_logs e = - let@ () = guard_exn_ "push logs" in - push_to_batch batch_logs e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_logs_maybe ~now httpc in - ()) - - let set_on_tick_callbacks = set_on_tick_callbacks - - let tick_ () = - if Config.Env.get_debug () then - Printf.eprintf "tick (from %d)\n%!" (tid ()); - sample_gc_metrics_if_needed (); - List.iter - (fun f -> - try f () - with e -> - Printf.eprintf "on tick callback raised: %s\n" - (Printexc.to_string e)) - (AList.get @@ Atomic.get on_tick_cbs_); - let now = Mtime_clock.now () in - let+ (_ : bool) = emit_traces_maybe ~now httpc - and+ (_ : bool) = emit_logs_maybe ~now httpc - and+ (_ : bool) = emit_metrics_maybe ~now httpc in - () - - let () = setup_ticker_thread ~tick:tick_ ~finally:ignore () - - (* if called in a blocking context: work in the background *) - let tick () = Lwt.async tick_ - - let cleanup ~on_done () = - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: exiting…\n%!"; - Lwt.async (fun () -> - let* () = emit_all_force httpc in - Httpc.cleanup httpc; - on_done (); - Lwt.return ()) - end in - (module M) - -module Backend - (Arg : sig - val stop : bool Atomic.t - - val config : Config.t - end) - () : Opentelemetry.Collector.BACKEND = struct - include (val mk_emitter ~stop:Arg.stop ~config:Arg.config ()) - - open Opentelemetry.Proto - open Opentelemetry.Collector - - let send_trace : Trace.resource_spans list sender = - { - send = - (fun l ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send spans %a@." - (Format.pp_print_list Trace.pp_resource_spans) - l); - push_trace l; - ret ()); - } - - let last_sent_metrics = Atomic.make (Mtime_clock.now ()) - - let timeout_sent_metrics = Mtime.Span.(5 * s) - (* send metrics from time to time *) - - let signal_emit_gc_metrics () = - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: emit GC metrics requested\n%!"; - Atomic.set needs_gc_metrics true - - let additional_metrics () : Metrics.resource_metrics list = - (* add exporter metrics to the lot? *) - let last_emit = Atomic.get last_sent_metrics in - let now = Mtime_clock.now () in - let add_own_metrics = - let elapsed = Mtime.span last_emit now in - Mtime.Span.compare elapsed timeout_sent_metrics > 0 - in - - (* there is a possible race condition here, as several threads might update - metrics at the same time. But that's harmless. *) - if add_own_metrics then ( - Atomic.set last_sent_metrics now; - let open OT.Metrics in - [ - make_resource_metrics - [ - sum ~name:"otel.export.dropped" ~is_monotonic:true - [ - int - ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) - ~now:(Mtime.to_uint64_ns now) (Atomic.get n_dropped); - ]; - sum ~name:"otel.export.errors" ~is_monotonic:true - [ - int - ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) - ~now:(Mtime.to_uint64_ns now) (Atomic.get n_errors); - ]; - ]; - ] - ) else - [] - - let send_metrics : Metrics.resource_metrics list sender = - { - send = - (fun m ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send metrics %a@." - (Format.pp_print_list Metrics.pp_resource_metrics) - m); - - let m = List.rev_append (additional_metrics ()) m in - push_metrics m; - ret ()); - } - - let send_logs : Logs.resource_logs list sender = - { - send = - (fun m ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send logs %a@." - (Format.pp_print_list Logs.pp_resource_logs) - m); - - push_logs m; - ret ()); - } -end - -let create_backend ?(stop = Atomic.make false) ?(config = Config.make ()) () = - let module B = - Backend - (struct - let stop = stop - - let config = config - end) - () +let create_exporter ?stop ?(config = Config.make ()) () = + let consumer = create_consumer ?stop ~config () in + let bq = + Bounded_queue_sync.create + ~high_watermark:Bounded_queue.Defaults.high_watermark () in - (module B : OT.Collector.BACKEND) + Exporter_queued.create ~q:bq ~consumer () + |> Exporter_add_batching.add_batching ~config + +let create_backend = create_exporter let setup_ ?stop ?config () : unit = let backend = create_backend ?stop ?config () in - OT.Collector.set_backend backend; + Main_exporter.set backend; () let setup ?stop ?config ?(enable = true) () = if enable then setup_ ?stop ?config () -let remove_backend () : unit Lwt.t = +let remove_exporter () : unit Lwt.t = let done_fut, done_u = Lwt.wait () in - OT.Collector.remove_backend ~on_done:(fun () -> Lwt.wakeup_later done_u ()) (); + Main_exporter.remove ~on_done:(fun () -> Lwt.wakeup_later done_u ()) (); done_fut +let remove_backend = remove_exporter + let with_setup ?stop ?(config = Config.make ()) ?(enable = true) () f : _ Lwt.t = if enable then ( @@ -504,10 +143,10 @@ let with_setup ?stop ?(config = Config.make ()) ?(enable = true) () f : _ Lwt.t Lwt.catch (fun () -> let* res = f () in - let+ () = remove_backend () in + let+ () = remove_exporter () in res) (fun exn -> - let* () = remove_backend () in - reraise exn) + let* () = remove_exporter () in + Lwt.reraise exn) ) else f () diff --git a/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.mli b/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.mli index c57d9653..2f12121f 100644 --- a/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.mli +++ b/src/client-cohttp-lwt/opentelemetry_client_cohttp_lwt.mli @@ -12,14 +12,20 @@ val set_headers : (string * string) list -> unit module Config = Config -val create_backend : +val create_consumer : ?stop:bool Atomic.t -> ?config:Config.t -> unit -> - (module Opentelemetry.Collector.BACKEND) -(** Create a new backend using lwt and cohttp + Opentelemetry_client.Consumer.any_resource_builder +(** Consumer that pulls from a queue *) - NOTE [after_cleanup] optional parameter removed @since 0.12 *) +val create_exporter : + ?stop:bool Atomic.t -> ?config:Config.t -> unit -> Opentelemetry.Exporter.t +(** Create a new backend using lwt and ezcurl-lwt *) + +val create_backend : + ?stop:bool Atomic.t -> ?config:Config.t -> unit -> Opentelemetry.Exporter.t +[@@deprecated "use create_exporter"] val setup : ?stop:bool Atomic.t -> ?config:Config.t -> ?enable:bool -> unit -> unit diff --git a/src/client-ocurl-lwt/config.ml b/src/client-ocurl-lwt/config.ml index 930881ff..4f3677de 100644 --- a/src/client-ocurl-lwt/config.ml +++ b/src/client-ocurl-lwt/config.ml @@ -1,7 +1,7 @@ -type t = Opentelemetry_client.Config.t +type t = Opentelemetry_client.Client_config.t -module Env = Opentelemetry_client.Config.Env () +module Env = Opentelemetry_client.Client_config.Env () -let pp = Opentelemetry_client.Config.pp +let pp = Opentelemetry_client.Client_config.pp let make = Env.make (fun common () -> common) diff --git a/src/client-ocurl-lwt/config.mli b/src/client-ocurl-lwt/config.mli index 100bb696..dff28732 100644 --- a/src/client-ocurl-lwt/config.mli +++ b/src/client-ocurl-lwt/config.mli @@ -1,4 +1,4 @@ -type t = Opentelemetry_client.Config.t +type t = Opentelemetry_client.Client_config.t (** Configuration. To build one, use {!make} below. This might be extended with more fields in @@ -6,7 +6,7 @@ type t = Opentelemetry_client.Config.t val pp : Format.formatter -> t -> unit -val make : (unit -> t) Opentelemetry_client.Config.make +val make : (unit -> t) Opentelemetry_client.Client_config.make (** Make a configuration {!t}. *) -module Env : Opentelemetry_client.Config.ENV +module Env : Opentelemetry_client.Client_config.ENV diff --git a/src/client-ocurl-lwt/dune b/src/client-ocurl-lwt/dune index c4e60769..41dd39a9 100644 --- a/src/client-ocurl-lwt/dune +++ b/src/client-ocurl-lwt/dune @@ -7,7 +7,8 @@ (libraries opentelemetry opentelemetry.atomic - opentelemetry.client + opentelemetry-client + opentelemetry-client.lwt pbrt mtime mtime.clock.os diff --git a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml index 8502db39..50cd834b 100644 --- a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml +++ b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.ml @@ -3,110 +3,42 @@ https://github.com/open-telemetry/oteps/blob/main/text/0099-otlp-http.md *) -module OT = Opentelemetry module Config = Config -module Signal = Opentelemetry_client.Signal -module Batch = Opentelemetry_client.Batch open Opentelemetry +open Opentelemetry_client open Common_ let set_headers = Config.Env.set_headers let get_headers = Config.Env.get_headers -external reraise : exn -> 'a = "%reraise" -(** This is equivalent to [Lwt.reraise]. We inline it here so we don't force to - use Lwt's latest version *) +type error = Export_error.t -let needs_gc_metrics = Atomic.make false +open struct + module IO = Opentelemetry_client_lwt.Io_lwt +end -let last_gc_metrics = Atomic.make (Mtime_clock.now ()) - -let timeout_gc_metrics = Mtime.Span.(20 * s) - -let gc_metrics = ref [] -(* side channel for GC, appended to {!E_metrics}'s data *) - -(* capture current GC metrics if {!needs_gc_metrics} is true, - or it has been a long time since the last GC metrics collection, - and push them into {!gc_metrics} for later collection *) -let sample_gc_metrics_if_needed () = - let now = Mtime_clock.now () in - let alarm = Atomic.compare_and_set needs_gc_metrics true false in - let timeout () = - let elapsed = Mtime.span now (Atomic.get last_gc_metrics) in - Mtime.Span.compare elapsed timeout_gc_metrics > 0 - in - if alarm || timeout () then ( - Atomic.set last_gc_metrics now; - let l = - OT.Metrics.make_resource_metrics - ~attrs:(Opentelemetry.GC_metrics.get_runtime_attributes ()) - @@ Opentelemetry.GC_metrics.get_metrics () - in - gc_metrics := l :: !gc_metrics - ) - -type error = - [ `Status of int * Opentelemetry.Proto.Status.status - | `Failure of string - | `Sysbreak - ] - -let n_errors = Atomic.make 0 - -let n_dropped = Atomic.make 0 - -let report_err_ = function - | `Sysbreak -> Printf.eprintf "opentelemetry: ctrl-c captured, stopping\n%!" - | `Failure msg -> - Format.eprintf "@[<2>opentelemetry: export failed: %s@]@." msg - | `Status (code, { Opentelemetry.Proto.Status.code = scode; message; details }) - -> - let pp_details out l = - List.iter - (fun s -> Format.fprintf out "%S;@ " (Bytes.unsafe_to_string s)) - l - in - Format.eprintf - "@[<2>opentelemetry: export failed with@ http code=%d@ status \ - {@[code=%ld;@ message=%S;@ details=[@[%a@]]@]}@]@." - code scode - (Bytes.unsafe_to_string message) - pp_details details - -module Httpc : sig - type t - - val create : unit -> t - - val send : - t -> - url:string -> - decode:[ `Dec of Pbrt.Decoder.t -> 'a | `Ret of 'a ] -> - string -> - ('a, error) result Lwt.t - - val cleanup : t -> unit -end = struct - open Opentelemetry.Proto +(** HTTP client *) +module Httpc : Generic_http_consumer.HTTPC with module IO = IO = struct + module IO = IO open Lwt.Syntax - type t = unit + type t = Curl.t - let create () : t = () + let create () : t = Ezcurl_core.make () - let cleanup _self = () + let cleanup self = Ezcurl_core.delete self - (* send the content to the remote endpoint/path *) - let send (_self : t) ~url ~decode (bod : string) : ('a, error) result Lwt.t = + (** send the content to the remote endpoint/path *) + let send (self : t) ~url ~decode (bod : string) : ('a, error) result Lwt.t = let* r = let headers = ("Content-Type", "application/x-protobuf") :: ("Accept", "application/x-protobuf") :: Config.Env.get_headers () in - Ezcurl_lwt.post ~headers ~params:[] ~url ~content:(`String bod) () + Ezcurl_lwt.post ~client:self ~headers ~params:[] ~url + ~content:(`String bod) () in match r with | Error (code, msg) -> @@ -136,335 +68,31 @@ end = struct in Lwt.return r) | Ok { code; body; _ } -> - let dec = Pbrt.Decoder.of_string body in - - let r = - try - let status = Status.decode_pb_status dec in - Error (`Status (code, status)) - with e -> - let bt = Printexc.get_backtrace () in - Error - (`Failure - (spf - "httpc: decoding of status (url=%S, code=%d) failed with:\n\ - %s\n\ - status: %S\n\ - %s" - url code (Printexc.to_string e) body bt)) - in - Lwt.return r + let err = Export_error.decode_invalid_http_response ~url ~code body in + Lwt.return (Error err) end -(** An emitter. This is used by {!Backend} below to forward traces/metrics/… - from the program to whatever collector client we have. *) -module type EMITTER = sig - open Opentelemetry.Proto +module Consumer_impl = + Generic_http_consumer.Make (IO) (Opentelemetry_client_lwt.Notifier_lwt) + (Httpc) - val push_trace : Trace.resource_spans list -> unit +let create_consumer ?(stop = Atomic.make false) ?(config = Config.make ()) () = + Consumer_impl.consumer ~ticker_task:(Some 0.5) ~stop ~config () - val push_metrics : Metrics.resource_metrics list -> unit - - val push_logs : Logs.resource_logs list -> unit - - val set_on_tick_callbacks : (unit -> unit) AList.t -> unit - - val tick : unit -> unit - - val cleanup : on_done:(unit -> unit) -> unit -> unit -end - -(* make an emitter. - - exceptions inside should be caught, see - https://opentelemetry.io/docs/reference/specification/error-handling/ *) -let mk_emitter ~stop ~(config : Config.t) () : (module EMITTER) = - let open Proto in - let open Lwt.Syntax in - (* local helpers *) - let open struct - let timeout = - if config.batch_timeout_ms > 0 then - Some Mtime.Span.(config.batch_timeout_ms * ms) - else - None - - let batch_traces : Trace.resource_spans Batch.t = - Batch.make ?batch:config.batch_traces ?timeout () - - let batch_metrics : Metrics.resource_metrics Batch.t = - Batch.make ?batch:config.batch_metrics ?timeout () - - let batch_logs : Logs.resource_logs Batch.t = - Batch.make ?batch:config.batch_logs ?timeout () - - let on_tick_cbs_ = Atomic.make (AList.make ()) - - let set_on_tick_callbacks = Atomic.set on_tick_cbs_ - - let send_http_ (httpc : Httpc.t) ~url data : unit Lwt.t = - let* r = Httpc.send httpc ~url ~decode:(`Ret ()) data in - match r with - | Ok () -> Lwt.return () - | Error `Sysbreak -> - Printf.eprintf "ctrl-c captured, stopping\n%!"; - Atomic.set stop true; - Lwt.return () - | Error err -> - (* TODO: log error _via_ otel? *) - Atomic.incr n_errors; - report_err_ err; - (* avoid crazy error loop *) - Lwt_unix.sleep 3. - - let send_metrics_http client (l : Metrics.resource_metrics list) = - Signal.Encode.metrics l |> send_http_ client ~url:config.url_metrics - - let send_traces_http client (l : Trace.resource_spans list) = - Signal.Encode.traces l |> send_http_ client ~url:config.url_traces - - let send_logs_http client (l : Logs.resource_logs list) = - Signal.Encode.logs l |> send_http_ client ~url:config.url_logs - - (* emit metrics, if the batch is full or timeout lapsed *) - let emit_metrics_maybe ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now batch_metrics with - | None -> Lwt.return false - | Some l -> - let batch = !gc_metrics @ l in - gc_metrics := []; - let+ () = send_metrics_http httpc batch in - true - - let emit_traces_maybe ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now batch_traces with - | None -> Lwt.return false - | Some l -> - let+ () = send_traces_http httpc l in - true - - let emit_logs_maybe ~now ?force httpc : bool Lwt.t = - match Batch.pop_if_ready ?force ~now batch_logs with - | None -> Lwt.return false - | Some l -> - let+ () = send_logs_http httpc l in - true - - let[@inline] guard_exn_ where f = - try f () - with e -> - let bt = Printexc.get_backtrace () in - Printf.eprintf - "opentelemetry-ocurl-lwt: uncaught exception in %s: %s\n%s\n%!" where - (Printexc.to_string e) bt - - let emit_all_force (httpc : Httpc.t) : unit Lwt.t = - let now = Mtime_clock.now () in - let+ (_ : bool) = emit_traces_maybe ~now ~force:true httpc - and+ (_ : bool) = emit_logs_maybe ~now ~force:true httpc - and+ (_ : bool) = emit_metrics_maybe ~now ~force:true httpc in - () - - (* thread that calls [tick()] regularly, to help enforce timeouts *) - let setup_ticker_thread ~tick ~finally () = - let rec tick_thread () = - if Atomic.get stop then ( - finally (); - Lwt.return () - ) else - let* () = Lwt_unix.sleep 0.5 in - let* () = tick () in - tick_thread () - in - Lwt.async tick_thread - end in - let httpc = Httpc.create () in - - let module M = struct - (* we make sure that this is thread-safe, even though we don't have a - background thread. There can still be a ticker thread, and there - can also be several user threads that produce spans and call - the emit functions. *) - - let push_to_batch b e = - match Batch.push b e with - | `Ok -> () - | `Dropped -> Atomic.incr n_dropped - - let push_trace e = - let@ () = guard_exn_ "push trace" in - push_to_batch batch_traces e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_traces_maybe ~now httpc in - ()) - - let push_metrics e = - let@ () = guard_exn_ "push metrics" in - sample_gc_metrics_if_needed (); - push_to_batch batch_metrics e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_metrics_maybe ~now httpc in - ()) - - let push_logs e = - let@ () = guard_exn_ "push logs" in - push_to_batch batch_logs e; - let now = Mtime_clock.now () in - Lwt.async (fun () -> - let+ (_ : bool) = emit_logs_maybe ~now httpc in - ()) - - let set_on_tick_callbacks = set_on_tick_callbacks - - let tick_ () = - if Config.Env.get_debug () then - Printf.eprintf "tick (from %d)\n%!" (tid ()); - sample_gc_metrics_if_needed (); - List.iter - (fun f -> - try f () - with e -> - Printf.eprintf "on tick callback raised: %s\n" - (Printexc.to_string e)) - (AList.get @@ Atomic.get on_tick_cbs_); - let now = Mtime_clock.now () in - let+ (_ : bool) = emit_traces_maybe ~now httpc - and+ (_ : bool) = emit_logs_maybe ~now httpc - and+ (_ : bool) = emit_metrics_maybe ~now httpc in - () - - let () = setup_ticker_thread ~tick:tick_ ~finally:ignore () - - (* if called in a blocking context: work in the background *) - let tick () = Lwt.async tick_ - - let cleanup ~on_done () = - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: exiting…\n%!"; - Lwt.async (fun () -> - let* () = emit_all_force httpc in - Httpc.cleanup httpc; - on_done (); - Lwt.return ()) - end in - (module M) - -module Backend - (Arg : sig - val stop : bool Atomic.t - - val config : Config.t - end) - () : Opentelemetry.Collector.BACKEND = struct - include (val mk_emitter ~stop:Arg.stop ~config:Arg.config ()) - - open Opentelemetry.Proto - open Opentelemetry.Collector - - let send_trace : Trace.resource_spans list sender = - { - send = - (fun l ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send spans %a@." - (Format.pp_print_list Trace.pp_resource_spans) - l); - push_trace l; - ret ()); - } - - let last_sent_metrics = Atomic.make (Mtime_clock.now ()) - - let timeout_sent_metrics = Mtime.Span.(5 * s) - (* send metrics from time to time *) - - let signal_emit_gc_metrics () = - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: emit GC metrics requested\n%!"; - Atomic.set needs_gc_metrics true - - let additional_metrics () : Metrics.resource_metrics list = - (* add exporter metrics to the lot? *) - let last_emit = Atomic.get last_sent_metrics in - let now = Mtime_clock.now () in - let add_own_metrics = - let elapsed = Mtime.span last_emit now in - Mtime.Span.compare elapsed timeout_sent_metrics > 0 - in - - (* there is a possible race condition here, as several threads might update - metrics at the same time. But that's harmless. *) - if add_own_metrics then ( - Atomic.set last_sent_metrics now; - let open OT.Metrics in - [ - make_resource_metrics - [ - sum ~name:"otel.export.dropped" ~is_monotonic:true - [ - int - ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) - ~now:(Mtime.to_uint64_ns now) (Atomic.get n_dropped); - ]; - sum ~name:"otel.export.errors" ~is_monotonic:true - [ - int - ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) - ~now:(Mtime.to_uint64_ns now) (Atomic.get n_errors); - ]; - ]; - ] - ) else - [] - - let send_metrics : Metrics.resource_metrics list sender = - { - send = - (fun m ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send metrics %a@." - (Format.pp_print_list Metrics.pp_resource_metrics) - m); - - let m = List.rev_append (additional_metrics ()) m in - push_metrics m; - ret ()); - } - - let send_logs : Logs.resource_logs list sender = - { - send = - (fun m ~ret -> - (if Config.Env.get_debug () then - let@ () = Lock.with_lock in - Format.eprintf "send logs %a@." - (Format.pp_print_list Logs.pp_resource_logs) - m); - - push_logs m; - ret ()); - } -end - -let create_backend ?(stop = Atomic.make false) ?(config = Config.make ()) () = - let module B = - Backend - (struct - let stop = stop - - let config = config - end) - () +let create_exporter ?stop ?(config = Config.make ()) () = + let consumer = create_consumer ?stop ~config () in + let bq = + Bounded_queue_sync.create + ~high_watermark:Bounded_queue.Defaults.high_watermark () in - (module B : OT.Collector.BACKEND) + Exporter_queued.create ~q:bq ~consumer () + |> Exporter_add_batching.add_batching ~config + +let create_backend = create_exporter let setup_ ?stop ?config () : unit = - let backend = create_backend ?stop ?config () in - OT.Collector.set_backend backend; + let exp = create_backend ?stop ?config () in + Main_exporter.set exp; () let setup ?stop ?config ?(enable = true) () = @@ -472,7 +100,7 @@ let setup ?stop ?config ?(enable = true) () = let remove_backend () : unit Lwt.t = let done_fut, done_u = Lwt.wait () in - OT.Collector.remove_backend ~on_done:(fun () -> Lwt.wakeup_later done_u ()) (); + Main_exporter.remove ~on_done:(fun () -> Lwt.wakeup_later done_u ()) (); done_fut let with_setup ?stop ?(config = Config.make ()) ?(enable = true) () f : _ Lwt.t @@ -488,6 +116,6 @@ let with_setup ?stop ?(config = Config.make ()) ?(enable = true) () f : _ Lwt.t res) (fun exn -> let* () = remove_backend () in - reraise exn) + Lwt.reraise exn) ) else f () diff --git a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.mli b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.mli index b20d37d3..713ea70a 100644 --- a/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.mli +++ b/src/client-ocurl-lwt/opentelemetry_client_ocurl_lwt.mli @@ -12,13 +12,21 @@ val set_headers : (string * string) list -> unit module Config = Config -val create_backend : +val create_consumer : ?stop:bool Atomic.t -> ?config:Config.t -> unit -> - (module Opentelemetry.Collector.BACKEND) + Opentelemetry_client.Consumer.any_resource_builder +(** Consumer that pulls from a queue *) + +val create_exporter : + ?stop:bool Atomic.t -> ?config:Config.t -> unit -> Opentelemetry.Exporter.t (** Create a new backend using lwt and ezcurl-lwt *) +val create_backend : + ?stop:bool Atomic.t -> ?config:Config.t -> unit -> Opentelemetry.Exporter.t +[@@deprecated "use create_exporter"] + val setup : ?stop:bool Atomic.t -> ?config:Config.t -> ?enable:bool -> unit -> unit (** Setup endpoint. This modifies {!Opentelemetry.Collector.backend}. diff --git a/src/client-ocurl/batch.ml b/src/client-ocurl/batch.ml deleted file mode 100644 index 0be8b1b0..00000000 --- a/src/client-ocurl/batch.ml +++ /dev/null @@ -1,24 +0,0 @@ -type 'a t = { - mutable len: int; - mutable l: 'a list list; - mutable started: Mtime.t; -} - -let create () = { len = 0; l = []; started = Mtime_clock.now () } - -let push self l = - if l != [] then ( - if self.l == [] then self.started <- Mtime_clock.now (); - self.l <- l :: self.l; - self.len <- self.len + List.length l - ) - -let[@inline] len self = self.len - -let[@inline] time_started self = self.started - -let pop_all self = - let l = self.l in - self.l <- []; - self.len <- 0; - l diff --git a/src/client-ocurl/batch.mli b/src/client-ocurl/batch.mli deleted file mode 100644 index 2b867b88..00000000 --- a/src/client-ocurl/batch.mli +++ /dev/null @@ -1,14 +0,0 @@ -(** List of lists with length *) - -type 'a t - -val create : unit -> 'a t - -val push : 'a t -> 'a list -> unit - -val len : _ t -> int - -val time_started : _ t -> Mtime.t -(** Time at which the batch most recently became non-empty *) - -val pop_all : 'a t -> 'a list list diff --git a/src/client-ocurl/common_.ml b/src/client-ocurl/common_.ml index 10df0c1d..1ec6de25 100644 --- a/src/client-ocurl/common_.ml +++ b/src/client-ocurl/common_.ml @@ -1,8 +1,8 @@ module Atomic = Opentelemetry_atomic.Atomic -include Opentelemetry.Lock +module Proto = Opentelemetry_proto let spf = Printf.sprintf let ( let@ ) = ( @@ ) -let tid () = Thread.id @@ Thread.self () +let[@inline] tid () = Thread.id @@ Thread.self () diff --git a/src/client-ocurl/config.ml b/src/client-ocurl/config.ml index 0954fbe6..e06ebf7e 100644 --- a/src/client-ocurl/config.ml +++ b/src/client-ocurl/config.ml @@ -1,3 +1,5 @@ +open Opentelemetry_client + type t = { bg_threads: int; (** Are there background threads, and how many? Default [4]. This will be @@ -10,7 +12,7 @@ type t = { [ticker_thread] is [true]. This will be clamped between [2 ms] and some longer interval (maximum [60s] currently). Default 500. @since 0.7 *) - common: Opentelemetry_client.Config.t; + common: Client_config.t; (** Common configuration options @since 0.12*) } @@ -20,10 +22,9 @@ let pp out self = Format.fprintf out "{@[ bg_threads=%d;@ ticker_thread=%B;@ ticker_interval_ms=%d;@ common=%a \ @]}" - bg_threads ticker_thread ticker_interval_ms Opentelemetry_client.Config.pp - common + bg_threads ticker_thread ticker_interval_ms Client_config.pp common -module Env = Opentelemetry_client.Config.Env () +module Env = Client_config.Env () let make = Env.make diff --git a/src/client-ocurl/config.mli b/src/client-ocurl/config.mli index 514ecb3e..7726de12 100644 --- a/src/client-ocurl/config.mli +++ b/src/client-ocurl/config.mli @@ -12,7 +12,7 @@ type t = { [ticker_thread] is [true]. This will be clamped between [2 ms] and some longer interval (maximum [60s] currently). Default 500. @since 0.7 *) - common: Opentelemetry_client.Config.t; + common: Opentelemetry_client.Client_config.t; (** Common configuration options @since 0.12*) } @@ -29,7 +29,7 @@ val make : ?ticker_interval_ms:int -> unit -> t) - Opentelemetry_client.Config.make + Opentelemetry_client.Client_config.make (** Make a configuration {!t}. *) -module Env : Opentelemetry_client.Config.ENV +module Env : Opentelemetry_client.Client_config.ENV diff --git a/src/client-ocurl/dune b/src/client-ocurl/dune index 9823b5bc..befb9396 100644 --- a/src/client-ocurl/dune +++ b/src/client-ocurl/dune @@ -4,7 +4,7 @@ (libraries opentelemetry opentelemetry.atomic - opentelemetry.client + opentelemetry-client curl pbrt threads diff --git a/src/client-ocurl/opentelemetry_client_ocurl.ml b/src/client-ocurl/opentelemetry_client_ocurl.ml index a58f78b7..8f06dbd0 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.ml +++ b/src/client-ocurl/opentelemetry_client_ocurl.ml @@ -3,472 +3,126 @@ https://github.com/open-telemetry/oteps/blob/main/text/0099-otlp-http.md *) -module OT = Opentelemetry module Config = Config -module Self_trace = Opentelemetry_client.Self_trace -module Signal = Opentelemetry_client.Signal -open Opentelemetry -include Common_ +module OTELC = Opentelemetry_client +open Common_ +module OTEL = Opentelemetry let get_headers = Config.Env.get_headers let set_headers = Config.Env.set_headers -let needs_gc_metrics = Atomic.make false +let n_bytes_sent : int Atomic.t = Atomic.make 0 -let last_gc_metrics = Atomic.make (Mtime_clock.now ()) +type error = OTELC.Export_error.t -let timeout_gc_metrics = Mtime.Span.(20 * s) +open struct + module Notifier = OTELC.Notifier_sync -(** side channel for GC, appended to metrics batch data *) -let gc_metrics = AList.make () + module IO : OTELC.Generic_io.S_WITH_CONCURRENCY with type 'a t = 'a = struct + include OTELC.Generic_io.Direct_style -(** capture current GC metrics if {!needs_gc_metrics} is true or it has been a - long time since the last GC metrics collection, and push them into - {!gc_metrics} for later collection *) -let sample_gc_metrics_if_needed () = - let now = Mtime_clock.now () in - let alarm = Atomic.exchange needs_gc_metrics false in - let timeout () = - let elapsed = Mtime.span now (Atomic.get last_gc_metrics) in - Mtime.Span.compare elapsed timeout_gc_metrics > 0 - in - if alarm || timeout () then ( - Atomic.set last_gc_metrics now; - let l = - OT.Metrics.make_resource_metrics - ~attrs:(Opentelemetry.GC_metrics.get_runtime_attributes ()) - @@ Opentelemetry.GC_metrics.get_metrics () - in - AList.add gc_metrics l - ) + let sleep_s = Thread.delay -let n_errors = Atomic.make 0 - -let n_dropped = Atomic.make 0 - -(** Something sent to the collector *) -module Event = struct - open Opentelemetry.Proto - - type t = - | E_metric of Metrics.resource_metrics list - | E_trace of Trace.resource_spans list - | E_logs of Logs.resource_logs list - | E_tick - | E_flush_all (** Flush all batches *) + let[@inline] spawn f = + ignore (OTELC.Util_thread.start_bg_thread f : Thread.t) + end end -(** Something to be sent via HTTP *) -module To_send = struct - open Opentelemetry.Proto +module Httpc : OTELC.Generic_http_consumer.HTTPC with module IO = IO = struct + module IO = IO - type t = - | Send_metric of Metrics.resource_metrics list list - | Send_trace of Trace.resource_spans list list - | Send_logs of Logs.resource_logs list list -end + type t = Curl.t -(** start a thread in the background, running [f()] *) -let start_bg_thread (f : unit -> unit) : Thread.t = - let unix_run () = - let signals = - [ - Sys.sigusr1; - Sys.sigusr2; - Sys.sigterm; - Sys.sigpipe; - Sys.sigalrm; - Sys.sigstop; - ] - in - ignore (Thread.sigmask Unix.SIG_BLOCK signals : _ list); - f () - in - (* no signals on Windows *) - let run () = - if Sys.win32 then - f () - else - unix_run () - in - Thread.create run () + let create () = Ezcurl.make () -let str_to_hex (s : string) : string = - let i_to_hex (i : int) = - if i < 10 then - Char.chr (i + Char.code '0') - else - Char.chr (i - 10 + Char.code 'a') - in + let cleanup = Ezcurl.delete - let res = Bytes.create (2 * String.length s) in - for i = 0 to String.length s - 1 do - let n = Char.code (String.get s i) in - Bytes.set res (2 * i) (i_to_hex ((n land 0xf0) lsr 4)); - Bytes.set res ((2 * i) + 1) (i_to_hex (n land 0x0f)) - done; - Bytes.unsafe_to_string res - -module Backend_impl : sig - type t - - val create : stop:bool Atomic.t -> config:Config.t -> unit -> t - - val send_event : t -> Event.t -> unit - - val n_bytes_sent : unit -> int - - val shutdown : t -> on_done:(unit -> unit) -> unit -end = struct - open Opentelemetry.Proto - - type t = { - stop: bool Atomic.t; - cleaned: bool Atomic.t; (** True when we cleaned up after closing *) - config: Config.t; - q: Event.t B_queue.t; (** Queue to receive data from the user's code *) - mutable main_th: Thread.t option; (** Thread that listens on [q] *) - send_q: To_send.t B_queue.t; (** Queue for the send worker threads *) - mutable send_threads: Thread.t array; (** Threads that send data via http *) - } - - let send_http_ ~stop ~(config : Config.t) (client : Curl.t) ~url data : unit = - let@ _sc = - Self_trace.with_ ~kind:Span.Span_kind_producer "otel-ocurl.send-http" - in - - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: send http POST to %s (%dB)\n%!" url - (String.length data); - let headers = - ("Content-Type", "application/x-protobuf") :: config.common.headers - in - match - let@ _sc = - Self_trace.with_ ~kind:Span.Span_kind_internal "curl.post" - ~attrs:[ "sz", `Int (String.length data); "url", `String url ] + let send (self : t) ~url ~decode (bod : string) : ('a, error) result = + let r = + let headers = + ("Content-Type", "application/x-protobuf") + :: ("Accept", "application/x-protobuf") + :: Config.Env.get_headers () in - Ezcurl.post ~headers ~client ~params:[] ~url ~content:(`String data) () - with - | Ok { code; _ } when code >= 200 && code < 300 -> - if Config.Env.get_debug () then - Printf.eprintf "opentelemetry: got response code=%d\n%!" code - | Ok { code; body; headers = _; info = _ } -> - Atomic.incr n_errors; - Self_trace.add_event _sc - @@ Opentelemetry.Event.make "error" ~attrs:[ "code", `Int code ]; - - if Config.Env.get_debug () then ( - let dec = Pbrt.Decoder.of_string body in - let body = - try - let status = Status.decode_pb_status dec in - Format.asprintf "%a" Status.pp_status status - with _ -> - spf "(could not decode status)\nraw bytes: %s" (str_to_hex body) - in - Printf.eprintf - "opentelemetry: error while sending data to %s:\n code=%d\n %s\n%!" - url code body - ); - () - | exception Sys.Break -> - Printf.eprintf "ctrl-c captured, stopping\n%!"; - Atomic.set stop true + Ezcurl.post ~client:self ~headers ~params:[] ~url ~content:(`String bod) + () + in + match r with | Error (code, msg) -> - (* TODO: log error _via_ otel? *) - Atomic.incr n_errors; - - Printf.eprintf - "opentelemetry: export failed:\n %s\n curl code: %s\n url: %s\n%!" - msg (Curl.strerror code) url; - - (* avoid crazy error loop *) - Thread.delay 3. - - let[@inline] send_event (self : t) ev : unit = B_queue.push self.q ev - - let n_bytes_sent_ = Atomic.make 0 - - let[@inline] n_bytes_sent () = Atomic.get n_bytes_sent_ - - (** Thread that, in a loop, reads from [q] to get the next message to send via - http *) - let bg_thread_loop (self : t) : unit = - Ezcurl.with_client ?set_opts:None @@ fun client -> - let config = self.config in - let stop = self.stop in - let send ~name ~url ~conv signals = - let l = List.fold_left (fun acc l -> List.rev_append l acc) [] signals in - let@ _sp = - Self_trace.with_ ~kind:Span_kind_producer name - ~attrs:[ "n", `Int (List.length l) ] + let err = + `Failure + (spf + "sending signals via http POST failed:\n\ + \ %s\n\ + \ curl code: %s\n\ + \ url: %s\n\ + %!" + msg (Curl.strerror code) url) in - let msg = conv l in - ignore (Atomic.fetch_and_add n_bytes_sent_ (String.length msg) : int); - send_http_ ~stop ~config ~url client msg - in - try - while not (Atomic.get stop) do - let msg = B_queue.pop self.send_q in - match msg with - | To_send.Send_trace tr -> - send ~name:"send-traces" ~conv:Signal.Encode.traces - ~url:config.common.url_traces tr - | To_send.Send_metric ms -> - send ~name:"send-metrics" ~conv:Signal.Encode.metrics - ~url:config.common.url_metrics ms - | To_send.Send_logs logs -> - send ~name:"send-logs" ~conv:Signal.Encode.logs - ~url:config.common.url_logs logs - done - with B_queue.Closed -> () - - type batches = { - traces: Proto.Trace.resource_spans Batch.t; - logs: Proto.Logs.resource_logs Batch.t; - metrics: Proto.Metrics.resource_metrics Batch.t; - } - - let batch_max_size_ = 200 - - let should_send_batch_ ?(side = []) ~config ~now (b : _ Batch.t) : bool = - (Batch.len b > 0 || side != []) - && (Batch.len b >= batch_max_size_ - || - let timeout = Mtime.Span.(config.Config.common.batch_timeout_ms * ms) in - let elapsed = Mtime.span now (Batch.time_started b) in - Mtime.Span.compare elapsed timeout >= 0) - - let main_thread_loop (self : t) : unit = - let local_q = Queue.create () in - let config = self.config in - - (* keep track of batches *) - let batches = - { - traces = Batch.create (); - logs = Batch.create (); - metrics = Batch.create (); - } - in - - let send_metrics () = - let metrics = AList.pop_all gc_metrics :: Batch.pop_all batches.metrics in - B_queue.push self.send_q (To_send.Send_metric metrics) - in - - let send_logs () = - B_queue.push self.send_q (To_send.Send_logs (Batch.pop_all batches.logs)) - in - - let send_traces () = - B_queue.push self.send_q - (To_send.Send_trace (Batch.pop_all batches.traces)) - in - - try - while not (Atomic.get self.stop) do - (* read multiple events at once *) - B_queue.pop_all self.q local_q; - - (* are we asked to flush all events? *) - let must_flush_all = ref false in - - (* how to process a single event *) - let process_ev (ev : Event.t) : unit = - match ev with - | Event.E_metric m -> Batch.push batches.metrics m - | Event.E_trace tr -> Batch.push batches.traces tr - | Event.E_logs logs -> Batch.push batches.logs logs - | Event.E_tick -> - (* the only impact of "tick" is that it wakes us up regularly *) - () - | Event.E_flush_all -> must_flush_all := true - in - - Queue.iter process_ev local_q; - Queue.clear local_q; - - if !must_flush_all then ( - if Batch.len batches.metrics > 0 || not (AList.is_empty gc_metrics) - then - send_metrics (); - if Batch.len batches.logs > 0 then send_logs (); - if Batch.len batches.traces > 0 then send_traces () - ) else ( - let now = Mtime_clock.now () in - if - should_send_batch_ ~config ~now batches.metrics - ~side:(AList.get gc_metrics) - then - send_metrics (); - - if should_send_batch_ ~config ~now batches.traces then send_traces (); - if should_send_batch_ ~config ~now batches.logs then send_logs () - ) - done - with B_queue.Closed -> () - - let create ~stop ~config () : t = - let n_send_threads = max 2 config.Config.bg_threads in - let self = - { - stop; - config; - q = B_queue.create (); - send_threads = [||]; - send_q = B_queue.create (); - cleaned = Atomic.make false; - main_th = None; - } - in - - let main_th = start_bg_thread (fun () -> main_thread_loop self) in - self.main_th <- Some main_th; - - self.send_threads <- - Array.init n_send_threads (fun _i -> - start_bg_thread (fun () -> bg_thread_loop self)); - - self - - let shutdown self ~on_done : unit = - Atomic.set self.stop true; - if not (Atomic.exchange self.cleaned true) then ( - (* empty batches *) - send_event self Event.E_flush_all; - (* close the incoming queue, wait for the thread to finish - before we start cutting off the background threads, so that they - have time to receive the final batches *) - B_queue.close self.q; - Option.iter Thread.join self.main_th; - (* close send queues, then wait for all threads *) - B_queue.close self.send_q; - Array.iter Thread.join self.send_threads - ); - on_done () + Error err + | Ok { code; body; _ } when code >= 200 && code < 300 -> + (match decode with + | `Ret x -> Ok x + | `Dec f -> + let dec = Pbrt.Decoder.of_string body in + (try Ok (f dec) + with e -> + let bt = Printexc.get_backtrace () in + Error + (`Failure + (spf "decoding failed with:\n%s\n%s" (Printexc.to_string e) bt)))) + | Ok { code; body; _ } -> + let err = + OTELC.Export_error.decode_invalid_http_response ~url ~code body + in + Error err end -let create_backend ?(stop = Atomic.make false) - ?(config : Config.t = Config.make ()) () : (module Collector.BACKEND) = - let module M = struct - open Opentelemetry.Proto - open Opentelemetry.Collector +module Consumer_impl = OTELC.Generic_http_consumer.Make (IO) (Notifier) (Httpc) - let backend = Backend_impl.create ~stop ~config () - - let send_trace : Trace.resource_spans list sender = - { - send = - (fun l ~ret -> - Backend_impl.send_event backend (Event.E_trace l); - ret ()); - } - - let last_sent_metrics = Atomic.make (Mtime_clock.now ()) - - (* send metrics from time to time *) - let timeout_sent_metrics = Mtime.Span.(5 * s) - - let signal_emit_gc_metrics () = - if config.common.debug then - Printf.eprintf "opentelemetry: emit GC metrics requested\n%!"; - Atomic.set needs_gc_metrics true - - let additional_metrics () : Metrics.resource_metrics list = - (* add exporter metrics to the lot? *) - let last_emit = Atomic.get last_sent_metrics in - let now = Mtime_clock.now () in - let add_own_metrics = - let elapsed = Mtime.span last_emit now in - Mtime.Span.compare elapsed timeout_sent_metrics > 0 - in - - (* there is a possible race condition here, as several threads might update - metrics at the same time. But that's harmless. *) - if add_own_metrics then ( - Atomic.set last_sent_metrics now; - let open OT.Metrics in - let now_unix = OT.Timestamp_ns.now_unix_ns () in - [ - make_resource_metrics - [ - sum ~name:"otel.export.dropped" ~is_monotonic:true - [ - int ~start_time_unix_nano:now_unix ~now:now_unix - (Atomic.get n_dropped); - ]; - sum ~name:"otel.export.errors" ~is_monotonic:true - [ - int ~start_time_unix_nano:now_unix ~now:now_unix - (Atomic.get n_errors); - ]; - ]; - ] - ) else - [] - - let send_metrics : Metrics.resource_metrics list sender = - { - send = - (fun m ~ret -> - let m = List.rev_append (additional_metrics ()) m in - Backend_impl.send_event backend (Event.E_metric m); - ret ()); - } - - let send_logs : Logs.resource_logs list sender = - { - send = - (fun m ~ret -> - Backend_impl.send_event backend (Event.E_logs m); - ret ()); - } - - let on_tick_cbs_ = Atomic.make (AList.make ()) - - let set_on_tick_callbacks = Atomic.set on_tick_cbs_ - - let tick () = - sample_gc_metrics_if_needed (); - Backend_impl.send_event backend Event.E_tick; - List.iter (fun f -> f ()) (AList.get @@ Atomic.get on_tick_cbs_) - - let cleanup ~on_done () = Backend_impl.shutdown backend ~on_done - end in - (module M) - -(** thread that calls [tick()] regularly, to help enforce timeouts *) -let setup_ticker_thread ~stop ~sleep_ms (module B : Collector.BACKEND) () = - let sleep_s = float sleep_ms /. 1000. in - let tick_loop () = - try - while not @@ Atomic.get stop do - Thread.delay sleep_s; - B.tick () - done - with B_queue.Closed -> () +let consumer ?(stop = Atomic.make false) ?(config = Config.make ()) () : + Opentelemetry_client.Consumer.any_resource_builder = + let n_workers = max 2 (min 32 config.bg_threads) in + let ticker_task = + if config.ticker_thread then + Some (float config.ticker_interval_ms /. 1000.) + else + None in - start_bg_thread tick_loop + Consumer_impl.consumer ~override_n_workers:n_workers ~ticker_task ~stop + ~config:config.common () + +let create_exporter ?stop ?(config = Config.make ()) () : OTEL.Exporter.t = + let consumer = consumer ?stop ~config () in + let bq = + OTELC.Bounded_queue_sync.create + ~high_watermark:OTELC.Bounded_queue.Defaults.high_watermark () + in + + OTELC.Exporter_queued.create ~q:bq ~consumer () + |> OTELC.Exporter_add_batching.add_batching ~config:config.common + +let create_backend = create_exporter let setup_ ?(stop = Atomic.make false) ?(config : Config.t = Config.make ()) () : unit = - let backend = create_backend ~stop ~config () in - Opentelemetry.Collector.set_backend backend; + let exporter = create_exporter ~stop ~config () in + OTEL.Main_exporter.set exporter; - Self_trace.set_enabled config.common.self_trace; + OTELC.Self_trace.set_enabled config.common.self_trace; if config.ticker_thread then ( (* at most a minute *) let sleep_ms = min 60_000 (max 2 config.ticker_interval_ms) in - ignore (setup_ticker_thread ~stop ~sleep_ms backend () : Thread.t) + ignore + (OTELC.Util_thread.setup_ticker_thread ~stop ~sleep_ms exporter () + : Thread.t) ) let remove_backend () : unit = (* we don't need the callback, this runs in the same thread *) - OT.Collector.remove_backend () ~on_done:ignore + OTEL.Main_exporter.remove () ~on_done:ignore let setup ?stop ?config ?(enable = true) () = if enable then setup_ ?stop ?config () @@ -480,4 +134,4 @@ let with_setup ?stop ?config ?(enable = true) () f = ) else f () -let n_bytes_sent = Backend_impl.n_bytes_sent +let[@inline] n_bytes_sent () = Atomic.get n_bytes_sent diff --git a/src/client-ocurl/opentelemetry_client_ocurl.mli b/src/client-ocurl/opentelemetry_client_ocurl.mli index 6d3918dc..cece89df 100644 --- a/src/client-ocurl/opentelemetry_client_ocurl.mli +++ b/src/client-ocurl/opentelemetry_client_ocurl.mli @@ -3,22 +3,31 @@ https://opentelemetry.io/docs/reference/specification/protocol/exporter/ *) +open Opentelemetry_atomic + val get_headers : unit -> (string * string) list val set_headers : (string * string) list -> unit (** Set http headers that are sent on every http query to the collector. *) -module Atomic = Opentelemetry_atomic.Atomic module Config = Config val n_bytes_sent : unit -> int (** Global counter of bytes sent (or attempted to be sent) *) -val create_backend : +val consumer : ?stop:bool Atomic.t -> ?config:Config.t -> unit -> - (module Opentelemetry.Collector.BACKEND) + Opentelemetry_client.Consumer.any_resource_builder +(** Consumer that pulls from a queue *) + +val create_exporter : + ?stop:bool Atomic.t -> ?config:Config.t -> unit -> Opentelemetry.Exporter.t + +val create_backend : + ?stop:bool Atomic.t -> ?config:Config.t -> unit -> Opentelemetry.Exporter.t +[@@deprecated "use create_exporter"] val setup : ?stop:bool Atomic.t -> ?config:Config.t -> ?enable:bool -> unit -> unit diff --git a/src/client/any_resource.ml b/src/client/any_resource.ml new file mode 100644 index 00000000..a85d425c --- /dev/null +++ b/src/client/any_resource.ml @@ -0,0 +1,33 @@ +open Opentelemetry.Proto + +(** A resource *) +type t = + | R_metrics of Metrics.resource_metrics list + | R_spans of Trace.resource_spans list + | R_logs of Logs.resource_logs list + +open struct + let of_x_or_empty ?service_name ?attrs ~f l = + if l = [] then + [] + else + [ f ?service_name ?attrs l ] +end + +let of_logs ?service_name ?attrs logs : t = + R_logs [ Util_resources.make_resource_logs ?service_name ?attrs logs ] + +let of_logs_or_empty ?service_name ?attrs logs = + of_x_or_empty ?service_name ?attrs ~f:of_logs logs + +let of_spans ?service_name ?attrs spans : t = + R_spans [ Util_resources.make_resource_spans ?service_name ?attrs spans ] + +let of_spans_or_empty ?service_name ?attrs spans = + of_x_or_empty ?service_name ?attrs ~f:of_spans spans + +let of_metrics ?service_name ?attrs m : t = + R_metrics [ Util_resources.make_resource_metrics ?service_name ?attrs m ] + +let of_metrics_or_empty ?service_name ?attrs ms = + of_x_or_empty ?service_name ?attrs ~f:of_metrics ms diff --git a/src/client/batch.ml b/src/client/batch.ml index e508c09f..ba22fe1f 100644 --- a/src/client/batch.ml +++ b/src/client/batch.ml @@ -1,14 +1,19 @@ +open Opentelemetry_util module Otel = Opentelemetry +module A = Opentelemetry_atomic.Atomic +module Domain = Opentelemetry_domain + +type 'a state = { + start: Mtime.t; + size: int; + q: 'a list; (** The queue is a FIFO represented as a list in reverse order *) +} type 'a t = { - mutable size: int; - mutable q: 'a list; - (** The queue is a FIFO represented as a list in reverse order *) + st: 'a state A.t; batch: int; (** Minimum size to batch before popping *) high_watermark: int; (** Size above which we start dropping signals *) timeout: Mtime.span option; - mutable start: Mtime.t; - mutex: Mutex.t; } let default_high_watermark batch_size = @@ -17,6 +22,10 @@ let default_high_watermark batch_size = else batch_size * 10 +let _dummy_start = Mtime.min_stamp + +let _empty_state : _ state = { q = []; size = 0; start = _dummy_start } + let make ?(batch = 1) ?high_watermark ?now ?timeout () : _ t = let high_watermark = match high_watermark with @@ -26,36 +35,58 @@ let make ?(batch = 1) ?high_watermark ?now ?timeout () : _ t = let start = match now with | Some x -> x - | None -> Mtime_clock.now () + | None -> _dummy_start in - let mutex = Mutex.create () in assert (batch > 0); - { size = 0; q = []; start; batch; timeout; high_watermark; mutex } + { st = A.make { size = 0; q = []; start }; batch; timeout; high_watermark } -let timeout_expired_ ~now self : bool = - match self.timeout with +let timeout_expired_ ~now ~timeout (self : _ state) : bool = + match timeout with | Some t -> let elapsed = Mtime.span now self.start in Mtime.Span.compare elapsed t >= 0 | None -> false (* Big enough to send a batch *) -let is_full_ self : bool = self.size >= self.batch +let[@inline] is_full_ ~batch (self : _ state) : bool = self.size >= batch -let ready_to_pop ~force ~now self = - self.size > 0 && (force || is_full_ self || timeout_expired_ ~now self) +let[@inline] atomic_update_loop_ (type res) (self : _ t) + (f : 'a state -> 'a state * res) : res = + let exception Return of res in + try + let backoff = ref 1 in + while true do + let st = A.get self.st in + let new_st, res = f st in + if A.compare_and_set self.st st new_st then raise_notrace (Return res); + + (* poor man's backoff strategy *) + Domain.relax_loop !backoff; + backoff := min 128 (2 * !backoff) + done + with Return res -> res let pop_if_ready ?(force = false) ~now (self : _ t) : _ list option = let rev_batch_opt = - Otel.Util_mutex.protect self.mutex @@ fun () -> - if ready_to_pop ~force ~now self then ( - assert (self.q <> []); - let batch = self.q in - self.q <- []; - self.size <- 0; - Some batch + (* update state. When uncontended this runs only once. *) + atomic_update_loop_ self @@ fun state -> + (* *) + + (* check if the batch is ready *) + let ready_to_pop = + state.size > 0 + && (force + || is_full_ ~batch:self.batch state + || timeout_expired_ ~now ~timeout:self.timeout state) + in + + if ready_to_pop then ( + assert (state.q <> []); + let batch = state.q in + let new_st = _empty_state in + new_st, Some batch ) else - None + state, None in match rev_batch_opt with | None -> None @@ -63,25 +94,78 @@ let pop_if_ready ?(force = false) ~now (self : _ t) : _ list option = (* Reverse the list to retrieve the FIFO order. *) Some (List.rev batch) -let rec push_unprotected (self : _ t) ~(elems : _ list) : unit = - match elems with - | [] -> () - | x :: xs -> - self.q <- x :: self.q; - self.size <- 1 + self.size; - push_unprotected self ~elems:xs - let push (self : _ t) elems : [ `Dropped | `Ok ] = - Otel.Util_mutex.protect self.mutex @@ fun () -> - if self.size >= self.high_watermark then - (* drop this to prevent queue from growing too fast *) - `Dropped + if elems = [] then + `Ok `Ok else ( - if self.size = 0 && Option.is_some self.timeout then - (* current batch starts now *) - self.start <- Mtime_clock.now (); + let now = lazy (Mtime_clock.now ()) in + atomic_update_loop_ self @@ fun state -> + if state.size >= self.high_watermark then + (* drop this to prevent queue from growing too fast *) + state, `Dropped + else ( + let start = + if state.size = 0 && Option.is_some self.timeout then + Lazy.force now + else + state.start + in - (* add to queue *) - push_unprotected self ~elems; - `Ok + (* add to queue *) + let state = + { + size = state.size + List.length elems; + q = List.rev_append elems state.q; + start; + } + in + + state, `Ok + ) ) + +let[@inline] push' self elems = ignore (push self elems : [ `Dropped | `Ok ]) + +open Opentelemetry_emitter + +let wrap_emitter (self : _ t) (e : _ Emitter.t) : _ Emitter.t = + let enabled () = e.enabled () in + let closed () = e.closed () in + let flush_and_close () = + (* FIXME: we need to close the batch first, to prevent + further pushes; then write the content to [e]; then + flusn and close [e]. In this order. *) + (match pop_if_ready self ~force:true ~now:Mtime.max_stamp with + | None -> () + | Some l -> Emitter.emit e l); + + Emitter.flush_and_close e + in + + let maybe_emit ~now = + match pop_if_ready self ~force:false ~now with + | None -> () + | Some l -> Emitter.emit e l + in + + let tick ~now = + (* first, check if batch has timed out *) + maybe_emit ~now; + + (* only then, tick the underlying emitter *) + Emitter.tick e ~now + in + + let emit l = + if l <> [] && e.enabled () then ( + push' self l; + + (* TODO: it'd be nice if we checked only for size here, not + for timeout. The [tick] function is enough for timeouts, + whereas [emit] is in the hot path of every single span/metric/log *) + let now = Mtime_clock.now () in + maybe_emit ~now + ) + in + + { Emitter.closed; enabled; flush_and_close; tick; emit } diff --git a/src/client/batch.mli b/src/client/batch.mli index a7ed2aa9..fa64083b 100644 --- a/src/client/batch.mli +++ b/src/client/batch.mli @@ -50,3 +50,12 @@ val push : 'a t -> 'a list -> [ `Dropped | `Ok ] (** [push b xs] is [`Ok] if it succeeds in pushing the values in [xs] into the batch [b], or [`Dropped] if the current size of the batch has exceeded the high water mark determined by the [batch] argument to [{!make}]. ) *) + +val push' : 'a t -> 'a list -> unit +(** Like {!push} but ignores the result *) + +open Opentelemetry_emitter + +val wrap_emitter : 'a t -> 'a Emitter.t -> 'a Emitter.t +(** [wrap_emitter batch e] is an emitter that uses batch [batch] to gather + signals into larger lists before passing them to [e]. *) diff --git a/src/client/bounded_queue.ml b/src/client/bounded_queue.ml new file mode 100644 index 00000000..b1cc1e81 --- /dev/null +++ b/src/client/bounded_queue.ml @@ -0,0 +1,67 @@ +(** Interface for a thread-safe, bounded queue. + + After the high watermark is reached, pushing items into the queue will + instead discard them. *) + +exception Closed +(** Raised when pushing into a closed queue *) + +type 'a pop_result = + [ `Empty + | `Closed + | `Item of 'a + ] + +type 'a t = { + push: 'a list -> unit; + (** Push items. This might discard some of them. + @raise Closed if the queue is closed. *) + num_discarded: unit -> int; (** How many items were discarded? *) + on_non_empty: (unit -> unit) -> unit; + (** [on_non_empty f] registers [f] to be called whenever the queue + transitions from empty to non-empty. *) + try_pop: unit -> 'a pop_result; (** Try to pop an item right now. *) + close: unit -> unit; + (** Close the queue. Items currently in the queue will still be accessible + to consumers until the queue is emptied out. Idempotent. *) + closed: unit -> bool; + (** Is the queue closed {b for writing}. Consumers should only use + [try_pop] because a queue that's closed-for-writing might still + contain straggler items that need to be consumed. + + This should be as fast and cheap as possible. *) +} +(** A bounded queue, with multiple producers and potentially multiple consumers. + + All functions must be thread-safe except for [try_pop] which might not have + to be depending on the context (e.g. a Lwt-specific queue implementation + will consume only from the Lwt thread). *) + +let[@inline] push (self : _ t) x : unit = self.push x + +let[@inline] num_discarded self = self.num_discarded () + +let[@inline] try_pop (self : _ t) : _ pop_result = self.try_pop () + +let[@inline] on_non_empty (self : _ t) f = self.on_non_empty f + +let[@inline] close (self : _ t) : unit = self.close () + +let[@inline] closed (self : _ t) : bool = self.closed () + +(** Turn the writing end of the queue into an emitter. *) +let to_emitter (self : 'a t) : 'a Opentelemetry_emitter.Emitter.t = + let closed () = self.closed () in + let enabled () = not (closed ()) in + let emit x = if x <> [] then push self x in + let tick ~now:_ = () in + + (* NOTE: we cannot actually flush, only close. Emptying the queue is + fundamentally asynchronous because it's done by consumers *) + let flush_and_close () = close self in + { closed; enabled; emit; tick; flush_and_close } + +module Defaults = struct + (** The default high watermark *) + let high_watermark : int = 2048 +end diff --git a/src/client/bounded_queue_sync.ml b/src/client/bounded_queue_sync.ml new file mode 100644 index 00000000..487ea4f2 --- /dev/null +++ b/src/client/bounded_queue_sync.ml @@ -0,0 +1,116 @@ +module BQ = Bounded_queue + +exception Closed = Bounded_queue.Closed + +(* a variant of {!Sync_queue} with more bespoke pushing behavior *) +module Q : sig + type 'a t + + val create : unit -> 'a t + + val close : _ t -> unit + + val closed : _ t -> bool + + val try_pop : 'a t -> 'a option + + val push_while_not_full : high_watermark:int -> 'a t -> 'a list -> int * int + (** [push_while_not_full q ~high_watermark xs] tries to push each item of [x] + into [q]. + + An item is not pushed if the queue is "full" (size >= high_watermark). + + This returns a pair [num_discarded, old_size] where [num_discarded] is the + number of items that could not be pushed, and [old_size] is the size + before anything was pushed. *) +end = struct + module UM = Opentelemetry_util.Util_mutex + + type 'a t = { + mutex: Mutex.t; + q: 'a Queue.t; + mutable closed: bool; + } + + let create () : _ t = + { mutex = Mutex.create (); q = Queue.create (); closed = false } + + (* NOTE: the race condition here is benign, assuming no tearing of + a value of type [bool] which OCaml's memory model should guarantee. *) + let[@inline] closed self = self.closed + + let close (self : _ t) = + UM.protect self.mutex @@ fun () -> + if not self.closed then self.closed <- true + + let try_pop (self : 'a t) : 'a option = + UM.protect self.mutex @@ fun () -> + if self.closed then raise Closed; + try Some (Queue.pop self.q) with Queue.Empty -> None + + let push_while_not_full ~high_watermark (self : 'a t) (xs : 'a list) : + int * int = + UM.protect self.mutex @@ fun () -> + if self.closed then raise Closed; + + let old_size = Queue.length self.q in + let xs = ref xs in + + let continue = ref true in + while !continue && Queue.length self.q < high_watermark do + match !xs with + | [] -> continue := false + | x :: tl_xs -> + xs := tl_xs; + Queue.push x self.q + done; + + let n_discarded = List.length !xs in + n_discarded, old_size +end + +type 'a state = { + n_discarded: int Atomic.t; + high_watermark: int; + q: 'a Q.t; + on_non_empty: Cb_set.t; +} + +let push (self : _ state) x = + let discarded, old_size = + try Q.push_while_not_full self.q ~high_watermark:self.high_watermark x + with Sync_queue.Closed -> raise BQ.Closed + in + + if discarded > 0 then + ignore (Atomic.fetch_and_add self.n_discarded discarded : int); + + (* wake up lagards if the queue was empty *) + if old_size = 0 then Cb_set.trigger self.on_non_empty; + () + +let try_pop (self : _ state) : _ BQ.pop_result = + match Q.try_pop self.q with + | Some x -> `Item x + | None -> `Empty + | exception Sync_queue.Closed -> `Closed + +let to_bounded_queue (self : 'a state) : 'a BQ.t = + let closed () = Q.closed self.q in + let num_discarded () = Atomic.get self.n_discarded in + let push x = push self x in + let on_non_empty = Cb_set.register self.on_non_empty in + let try_pop () = try_pop self in + let close () = Q.close self.q in + { BQ.push; num_discarded; try_pop; on_non_empty; close; closed } + +let create ~high_watermark () : _ BQ.t = + let st = + { + high_watermark; + q = Q.create (); + n_discarded = Atomic.make 0; + on_non_empty = Cb_set.create (); + } + in + to_bounded_queue st diff --git a/src/client/bounded_queue_sync.mli b/src/client/bounded_queue_sync.mli new file mode 100644 index 00000000..d3cf6347 --- /dev/null +++ b/src/client/bounded_queue_sync.mli @@ -0,0 +1,7 @@ +(** Bounded queue based on simple synchronization primitives. + + This is not the fastest queue but it should be versatile. *) + +val create : high_watermark:int -> unit -> 'a Bounded_queue.t +(** [create ~high_watermark ()] creates a new bounded queue based on + {!Sync_queue} *) diff --git a/src/client/client.ml b/src/client/client.ml deleted file mode 100644 index fa69c983..00000000 --- a/src/client/client.ml +++ /dev/null @@ -1,8 +0,0 @@ -(** Utilities for writing clients - - These are used for implementing e.g., the [opentelemetry-client-cohttp-lwt] - and [opentelemetry-client-ocurl] packages package. *) - -module Config = Config -module Signal = Signal -module Self_trace = Self_trace diff --git a/src/client/config.ml b/src/client/client_config.ml similarity index 88% rename from src/client/config.ml rename to src/client/client_config.ml index 6a832291..655ebcd7 100644 --- a/src/client/config.ml +++ b/src/client/client_config.ml @@ -9,12 +9,19 @@ type t = { batch_logs: int option; batch_timeout_ms: int; self_trace: bool; + http_concurrency_level: int option; } let pp out (self : t) : unit = - let ppiopt = Format.pp_print_option Format.pp_print_int in + let ppiopt out i = + match i with + | None -> Format.fprintf out "None" + | Some i -> Format.fprintf out "%d" i + in let pp_header ppf (a, b) = Format.fprintf ppf "@[%s: @,%s@]@." a b in - let ppheaders = Format.pp_print_list pp_header in + let ppheaders out l = + Format.fprintf out "[@[%a@]]" (Format.pp_print_list pp_header) l + in let { debug; self_trace; @@ -26,15 +33,17 @@ let pp out (self : t) : unit = batch_metrics; batch_logs; batch_timeout_ms; + http_concurrency_level; } = self in Format.fprintf out "{@[ debug=%B;@ self_trace=%B; url_traces=%S;@ url_metrics=%S;@ \ url_logs=%S;@ headers=%a;@ batch_traces=%a;@ batch_metrics=%a;@ \ - batch_logs=%a;@ batch_timeout_ms=%d @]}" + batch_logs=%a;@ batch_timeout_ms=%d;@ http_concurrency_level=%a @]}" debug self_trace url_traces url_metrics url_logs ppheaders headers ppiopt - batch_traces ppiopt batch_metrics ppiopt batch_logs batch_timeout_ms + batch_traces ppiopt batch_metrics ppiopt batch_logs batch_timeout_ms ppiopt + http_concurrency_level let default_url = "http://localhost:4318" @@ -50,6 +59,7 @@ type 'k make = ?headers:(string * string) list -> ?batch_timeout_ms:int -> ?self_trace:bool -> + ?http_concurrency_level:int -> 'k module type ENV = sig @@ -123,7 +133,8 @@ module Env () : ENV = struct let make k ?(debug = get_debug ()) ?url ?url_traces ?url_metrics ?url_logs ?(batch_traces = Some 400) ?(batch_metrics = Some 20) ?(batch_logs = Some 400) ?(headers = get_headers ()) - ?(batch_timeout_ms = 2_000) ?(self_trace = false) = + ?(batch_timeout_ms = 2_000) ?(self_trace = false) ?http_concurrency_level + = (* Ensure the state is synced, in case these values are passed in explicitly *) set_debug debug; set_headers headers; @@ -165,5 +176,6 @@ module Env () : ENV = struct batch_logs; batch_timeout_ms; self_trace; + http_concurrency_level; } end diff --git a/src/client/config.mli b/src/client/client_config.mli similarity index 95% rename from src/client/config.mli rename to src/client/client_config.mli index b8d0238f..6aedb128 100644 --- a/src/client/config.mli +++ b/src/client/client_config.mli @@ -19,8 +19,8 @@ type t = private { (** Batch metrics? If [Some i], then this produces batches of (at most) [i] items. If [None], there is no batching. - Note that traces and metrics are batched separately. Default [None]. - *) + Note that traces and metrics are batched separately. Default + [Some 20]. *) batch_logs: int option; (** Batch logs? See {!batch_metrics} for details. Default [Some 400] *) batch_timeout_ms: int; @@ -32,6 +32,9 @@ type t = private { (** If true, the OTEL library will also emit its own spans. Default [false]. @since 0.7 *) + http_concurrency_level: int option; + (** How many HTTP requests can be done simultaneously (at most)? + @since NEXT_RELEASE *) } (** Configuration. @@ -55,6 +58,7 @@ type 'k make = ?headers:(string * string) list -> ?batch_timeout_ms:int -> ?self_trace:bool -> + ?http_concurrency_level:int -> 'k (** A function that gathers all the values needed to construct a {!t}, and produces a ['k]. ['k] is typically a continuation used to construct a diff --git a/src/client/common_.ml b/src/client/common_.ml new file mode 100644 index 00000000..b1872cd8 --- /dev/null +++ b/src/client/common_.ml @@ -0,0 +1,6 @@ +module OTEL = Opentelemetry +module Proto = Opentelemetry_proto + +let spf = Printf.sprintf + +let ( let@ ) = ( @@ ) diff --git a/src/client/consumer.ml b/src/client/consumer.ml new file mode 100644 index 00000000..42bcf35d --- /dev/null +++ b/src/client/consumer.ml @@ -0,0 +1,30 @@ +(** Consumer that accepts items from a bounded queue *) + +type 'a t = { + active: unit -> bool; (** Still running? Must be fast and thread-safe *) + tick: unit -> unit; + (** Regularly called, eg to emit metrics, check timeouts, etc. Must be + thread safe. *) + shutdown: on_done:(unit -> unit) -> unit; + (** Shutdown the consumer as soon as possible, call [on_done()] once it's + done. *) +} +(** A consumer for signals of type ['a] *) + +type 'a consumer = 'a t + +let[@inline] active (self : _ t) = self.active () + +let[@inline] shutdown (self : _ t) ~on_done = self.shutdown ~on_done + +module Builder = struct + type 'a t = { start_consuming: 'a Bounded_queue.t -> 'a consumer } + (** A builder that will create a consumer for a given queue, start the + consumer so it starts consuming from the queue, and return the consumer. + *) + + let start_consuming (self : _ t) bq = self.start_consuming bq +end + +type any_resource_builder = Any_resource.t Builder.t +(** The type that's useful for OTEL backends *) diff --git a/src/client/dune b/src/client/dune index 095f71fa..5b3f6fde 100644 --- a/src/client/dune +++ b/src/client/dune @@ -1,5 +1,16 @@ (library (name opentelemetry_client) - (public_name opentelemetry.client) - (libraries opentelemetry pbrt mtime mtime.clock.os) - (synopsis "Common types and logic shared between client implementations")) + (public_name opentelemetry-client) + (flags :standard -open Opentelemetry_util) + (libraries + opentelemetry + opentelemetry.util + opentelemetry.emitter + opentelemetry.proto + opentelemetry.domain + pbrt + saturn + mtime + mtime.clock.os) + (synopsis + "Basic exporters, as well as common types and logic shared between exporters")) diff --git a/src/client/export_error.ml b/src/client/export_error.ml new file mode 100644 index 00000000..32f5e541 --- /dev/null +++ b/src/client/export_error.ml @@ -0,0 +1,48 @@ +type t = + [ `Status of int * Opentelemetry.Proto.Status.status + | `Failure of string + | `Sysbreak + ] + +let str_to_hex (s : string) : string = + Opentelemetry_util.Util_bytes_.bytes_to_hex (Bytes.unsafe_of_string s) + +(** Report the error on stderr. *) +let report_err : t -> unit = function + | `Sysbreak -> Printf.eprintf "opentelemetry: ctrl-c captured, stopping\n%!" + | `Failure msg -> + Format.eprintf "@[<2>opentelemetry: export failed: %s@]@." msg + | `Status + ( code, + { + Opentelemetry.Proto.Status.code = scode; + message; + details; + _presence = _; + } ) -> + let pp_details out l = + List.iter + (fun s -> Format.fprintf out "%S;@ " (Bytes.unsafe_to_string s)) + l + in + Format.eprintf + "@[<2>opentelemetry: export failed with@ http code=%d@ status \ + {@[code=%ld;@ message=%S;@ details=[@[%a@]]@]}@]@." + code scode + (Bytes.unsafe_to_string message) + pp_details details + +let decode_invalid_http_response ~code ~url (body : string) : t = + try + let dec = Pbrt.Decoder.of_string body in + let status = Opentelemetry.Proto.Status.decode_pb_status dec in + `Status (code, status) + with e -> + let bt = Printexc.get_backtrace () in + `Failure + (Printf.sprintf + "httpc: decoding of status (url=%S, code=%d) failed with:\n\ + %s\n\ + HTTP body: %s\n\ + %s" + url code (Printexc.to_string e) (str_to_hex body) bt) diff --git a/src/client/exporter_add_batching.ml b/src/client/exporter_add_batching.ml new file mode 100644 index 00000000..0858d5b8 --- /dev/null +++ b/src/client/exporter_add_batching.ml @@ -0,0 +1,25 @@ +(** Add batching to emitter based on client config *) + +open Common_ + +open struct + let add_batch ~timeout batch (emitter : 'a OTEL.Emitter.t) : 'a OTEL.Emitter.t + = + let b = Batch.make ~batch ~timeout () in + Batch.wrap_emitter b emitter +end + +let add_batching ~(config : Client_config.t) (exp : OTEL.Exporter.t) : + OTEL.Exporter.t = + let timeout = Mtime.Span.(config.batch_timeout_ms * ms) in + let add_batch_opt (b : int option) e = + match b with + | None -> e + | Some b -> add_batch ~timeout b e + in + + let emit_spans = add_batch_opt config.batch_traces exp.emit_spans in + let emit_metrics = add_batch_opt config.batch_metrics exp.emit_metrics in + let emit_logs = add_batch_opt config.batch_logs exp.emit_logs in + + { exp with emit_spans; emit_metrics; emit_logs } diff --git a/src/client/exporter_debug.ml b/src/client/exporter_debug.ml new file mode 100644 index 00000000..295becac --- /dev/null +++ b/src/client/exporter_debug.ml @@ -0,0 +1,32 @@ +open Common_ +open Opentelemetry_emitter + +(** [debug exporter] behaves like [exporter], but will print signals on [stderr] + before passing them to [exporter] *) +let debug ?(out = Format.err_formatter) (exp : OTEL.Exporter.t) : + OTEL.Exporter.t = + let open Proto in + { + emit_spans = + Emitter.tap + (fun sp -> Format.fprintf out "SPAN: %a@." Trace.pp_span sp) + exp.emit_spans; + emit_logs = + Emitter.tap + (fun log -> Format.fprintf out "LOG: %a@." Proto.Logs.pp_log_record log) + exp.emit_logs; + emit_metrics = + Emitter.tap + (fun m -> Format.fprintf out "METRIC: %a@." Metrics.pp_metric m) + exp.emit_metrics; + on_tick = exp.on_tick; + tick = exp.tick; + cleanup = + (fun ~on_done () -> + Format.fprintf out "CLEANUP@."; + exp.cleanup ~on_done ()); + } + +(** Exporter that simply debugs on [stderr] *) +let debug_only : OTEL.Exporter.t = + debug ~out:Format.err_formatter @@ OTEL.Exporter.dummy () diff --git a/src/client/exporter_queued.ml b/src/client/exporter_queued.ml new file mode 100644 index 00000000..55304959 --- /dev/null +++ b/src/client/exporter_queued.ml @@ -0,0 +1,57 @@ +(** Build an exporter from a queue and a consumer *) + +open Common_ +module BQ = Bounded_queue + +module BQ_emitters = struct + let logs_emitter_of_bq ?service_name ?attrs + (q : Any_resource.t Bounded_queue.t) : OTEL.Logger.t = + Bounded_queue.to_emitter q + |> Opentelemetry_emitter.Emitter.flat_map + (Any_resource.of_logs_or_empty ?service_name ?attrs) + + let spans_emitter_of_bq ?service_name ?attrs + (q : Any_resource.t Bounded_queue.t) : OTEL.Tracer.t = + Bounded_queue.to_emitter q + |> Opentelemetry_emitter.Emitter.flat_map + (Any_resource.of_spans_or_empty ?service_name ?attrs) + + let metrics_emitter_of_bq ?service_name ?attrs + (q : Any_resource.t Bounded_queue.t) : OTEL.Metrics_emitter.t = + Bounded_queue.to_emitter q + |> Opentelemetry_emitter.Emitter.flat_map + (Any_resource.of_metrics_or_empty ?service_name ?attrs) +end + +(** Pair a queue with a consumer to build an exporter. + + The resulting exporter will emit logs, spans, and traces directly into the + bounded queue; while the consumer takes them from the queue to forward them + somewhere else, store them, etc. + @param resource_attributes attributes added to every "resource" batch *) +let create ?(resource_attributes = []) ~(q : Any_resource.t Bounded_queue.t) + ~(consumer : Consumer.any_resource_builder) () : OTEL.Exporter.t = + let emit_spans = + BQ_emitters.spans_emitter_of_bq ~attrs:resource_attributes q + in + let emit_logs = BQ_emitters.logs_emitter_of_bq ~attrs:resource_attributes q in + let emit_metrics = + BQ_emitters.metrics_emitter_of_bq ~attrs:resource_attributes q + in + + let tick_set = Cb_set.create () in + let tick () = Cb_set.trigger tick_set in + let on_tick f = Cb_set.register tick_set f in + + let closed = Atomic.make false in + + let consumer = consumer.start_consuming q in + + let cleanup ~on_done () = + if not (Atomic.exchange closed true) then ( + Bounded_queue.close q; + Consumer.shutdown consumer ~on_done + ) else + on_done () + in + { emit_logs; emit_metrics; emit_spans; tick; on_tick; cleanup } diff --git a/src/client/exporter_stdout.ml b/src/client/exporter_stdout.ml new file mode 100644 index 00000000..76dbf760 --- /dev/null +++ b/src/client/exporter_stdout.ml @@ -0,0 +1,65 @@ +(** A simple exporter that prints on stdout *) + +open Common_ +open Opentelemetry_util +open Opentelemetry_emitter + +open struct + let pp_span out (sp : OTEL.Span.t) = + let open OTEL in + Format.fprintf out + "@[<2>SPAN@ trace_id: %a@ span_id: %a@ name: %S@ start: %a@ end: %a@]@." + Trace_id.pp + (Trace_id.of_bytes sp.trace_id) + Span_id.pp + (Span_id.of_bytes sp.span_id) + sp.name Timestamp_ns.pp_debug sp.start_time_unix_nano + Timestamp_ns.pp_debug sp.end_time_unix_nano + + let pp_vlist mutex pp out l = + if l != [] then ( + let@ () = Util_mutex.protect mutex in + Format.fprintf out "@["; + List.iteri + (fun i x -> + if i > 0 then Format.fprintf out "@,"; + pp out x) + l; + Format.fprintf out "@]@." + ) +end + +let stdout : OTEL.Exporter.t = + let open Opentelemetry_util in + let out = Format.std_formatter in + let mutex = Mutex.create () in + let ticker = Cb_set.create () in + + let closed = Atomic.make false in + let tick () = Cb_set.trigger ticker in + + let mk_emitter pp_signal = + let emit l = + if Atomic.get closed then raise Emitter.Closed; + pp_vlist mutex pp_signal out l + in + let enabled () = not (Atomic.get closed) in + let tick ~now:_ = () in + let flush_and_close () = + if not (Atomic.exchange closed true) then + let@ () = Util_mutex.protect mutex in + Format.pp_print_flush out () + in + let closed () = Atomic.get closed in + + { Emitter.emit; closed; enabled; tick; flush_and_close } + in + + { + emit_spans = mk_emitter pp_span; + emit_logs = mk_emitter Proto.Logs.pp_log_record; + emit_metrics = mk_emitter Proto.Metrics.pp_metric; + on_tick = Cb_set.register ticker; + tick; + cleanup = (fun ~on_done () -> on_done ()); + } diff --git a/src/client/generic_http_consumer.ml b/src/client/generic_http_consumer.ml new file mode 100644 index 00000000..76823810 --- /dev/null +++ b/src/client/generic_http_consumer.ml @@ -0,0 +1,234 @@ +type error = Export_error.t + +(* TODO: emit this in a metric in [tick()] if self tracing is enabled? *) + +(** Number of errors met during export *) +let n_errors = Atomic.make 0 + +(* TODO: put this somewhere with an interval limiter to 30s + + (* there is a possible race condition here, as several threads might update + metrics at the same time. But that's harmless. *) + if add_own_metrics then ( + Atomic.set last_sent_metrics now; + let open OT.Metrics in + [ + make_resource_metrics + [ + sum ~name:"otel.export.dropped" ~is_monotonic:true + [ + int + ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) + ~now:(Mtime.to_uint64_ns now) (Atomic.get n_dropped); + ]; + sum ~name:"otel.export.errors" ~is_monotonic:true + [ + int + ~start_time_unix_nano:(Mtime.to_uint64_ns last_emit) + ~now:(Mtime.to_uint64_ns now) (Atomic.get n_errors); + ]; + ]; + ] + ) else + [] +*) + +module type IO = Generic_io.S_WITH_CONCURRENCY + +module type HTTPC = sig + module IO : IO + + type t + + val create : unit -> t + + val send : + t -> + url:string -> + decode:[ `Dec of Pbrt.Decoder.t -> 'a | `Ret of 'a ] -> + string -> + ('a, error) result IO.t + + val cleanup : t -> unit +end + +module Make + (IO : IO) + (Notifier : Generic_notifier.S with type 'a IO.t = 'a IO.t) + (Httpc : HTTPC with type 'a IO.t = 'a IO.t) : sig + val consumer : + ?override_n_workers:int -> + ticker_task:float option -> + stop:bool Atomic.t -> + config:Client_config.t -> + unit -> + Consumer.any_resource_builder + (** Create a consumer. + @param stop + shared stop variable, set to true to stop this (and maybe other tasks) + @param ticker_task + controls whether we start a task to call [tick] at the given interval in + seconds, or [None] to not start such a task at all. *) +end = struct + module Proto = Opentelemetry_proto + open IO + + type other_config = { + override_n_workers: int option; + ticker_task: float option; + } + + type state = { + stop: bool Atomic.t; + cleaned: bool Atomic.t; (** True when we cleaned up after closing *) + config: Client_config.t; + other_config: other_config; + q: Any_resource.t Bounded_queue.t; + notify: Notifier.t; + } + + let shutdown self = + Atomic.set self.stop true; + if not (Atomic.exchange self.cleaned true) then ( + Notifier.trigger self.notify; + Notifier.delete self.notify + ) + + let send_http_ (self : state) (httpc : Httpc.t) ~backoff ~url (data : string) + : unit IO.t = + let* r = Httpc.send httpc ~url ~decode:(`Ret ()) data in + match r with + | Ok () -> + Util_backoff.on_success backoff; + IO.return () + | Error `Sysbreak -> + Printf.eprintf "ctrl-c captured, stopping\n%!"; + Atomic.set self.stop true; + IO.return () + | Error err -> + Atomic.incr n_errors; + Export_error.report_err err; + (* avoid crazy error loop *) + let dur_s = Util_backoff.cur_duration_s backoff in + Util_backoff.on_error backoff; + IO.sleep_s (dur_s +. Random.float (dur_s /. 10.)) + + let send_metrics_http (st : state) client ~encoder ~backoff + (l : Proto.Metrics.resource_metrics list) = + let msg = Signal.Encode.metrics ~encoder l in + send_http_ st client msg ~backoff ~url:st.config.url_metrics + + let send_traces_http st client ~encoder ~backoff + (l : Proto.Trace.resource_spans list) = + let msg = Signal.Encode.traces ~encoder l in + send_http_ st client msg ~backoff ~url:st.config.url_traces + + let send_logs_http st client ~encoder ~backoff + (l : Proto.Logs.resource_logs list) = + let msg = Signal.Encode.logs ~encoder l in + send_http_ st client msg ~backoff ~url:st.config.url_logs + + let tick (self : state) = Notifier.trigger self.notify + + let start_worker (self : state) : unit = + let client = Httpc.create () in + let encoder = Pbrt.Encoder.create () in + let backoff = Util_backoff.create () in + + (* loop on [q] *) + let rec loop () : unit IO.t = + if Atomic.get self.stop then + IO.return () + else + let* () = + match Bounded_queue.try_pop self.q with + | `Closed -> + shutdown self; + IO.return () + | `Empty -> Notifier.wait self.notify + | `Item (R_logs logs) -> + send_logs_http self client ~encoder ~backoff logs + | `Item (R_metrics ms) -> + send_metrics_http self client ~encoder ~backoff ms + | `Item (R_spans spans) -> + send_traces_http self client ~encoder ~backoff spans + in + loop () + in + + IO.spawn (fun () -> + IO.protect loop ~finally:(fun () -> + Httpc.cleanup client; + IO.return ())) + + let start_ticker (self : state) ~(interval_s : float) : unit = + let rec loop () : unit IO.t = + if Atomic.get self.stop then + IO.return () + else + let* () = IO.sleep_s interval_s in + tick self; + loop () + in + IO.spawn loop + + let default_n_workers = 50 + + let create_state ?override_n_workers ~ticker_task ~stop ~config ~q () : state + = + let other_config = { override_n_workers; ticker_task } in + let self = + { + stop; + config; + other_config; + q; + cleaned = Atomic.make false; + notify = Notifier.create (); + } + in + + (* start workers *) + let n_workers = + min 2 + (max 500 + (match + ( self.other_config.override_n_workers, + self.config.http_concurrency_level ) + with + | Some n, _ -> n + | None, Some n -> n + | None, None -> default_n_workers)) + in + + for _i = 1 to n_workers do + start_worker self + done; + + (* start ticker *) + (match self.other_config.ticker_task with + | None -> () + | Some interval_s -> start_ticker self ~interval_s); + + self + + let to_consumer (self : state) : Any_resource.t Consumer.t = + let active () = not (Atomic.get self.stop) in + let shutdown ~on_done = + shutdown self; + on_done () + in + let tick () = tick self in + { active; tick; shutdown } + + let consumer ?override_n_workers ~ticker_task ~stop ~config () : + Consumer.any_resource_builder = + { + start_consuming = + (fun q -> + let st = + create_state ?override_n_workers ~ticker_task ~stop ~config ~q () + in + to_consumer st); + } +end diff --git a/src/client/generic_io.ml b/src/client/generic_io.ml new file mode 100644 index 00000000..9e297026 --- /dev/null +++ b/src/client/generic_io.ml @@ -0,0 +1,28 @@ +(** Generic IO *) +module type S = sig + type 'a t + + val return : 'a -> 'a t + + val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t + + val protect : finally:(unit -> unit t) -> (unit -> 'a t) -> 'a t +end + +module type S_WITH_CONCURRENCY = sig + include S + + val sleep_s : float -> unit t + + val spawn : (unit -> unit t) -> unit +end + +module Direct_style : S with type 'a t = 'a = struct + type 'a t = 'a + + let[@inline] return x = x + + let[@inline] ( let* ) x f = f x + + let protect = Fun.protect +end diff --git a/src/client/generic_notifier.ml b/src/client/generic_notifier.ml new file mode 100644 index 00000000..0d3ea1d3 --- /dev/null +++ b/src/client/generic_notifier.ml @@ -0,0 +1,17 @@ +module type IO = Generic_io.S + +module type S = sig + module IO : IO + + type t + + val create : unit -> t + + val delete : t -> unit + + val trigger : t -> unit + + val wait : t -> unit IO.t + + val register_bounded_queue : t -> _ Bounded_queue.t -> unit +end diff --git a/src/client/lwt/common_.ml b/src/client/lwt/common_.ml new file mode 100644 index 00000000..6a337b5c --- /dev/null +++ b/src/client/lwt/common_.ml @@ -0,0 +1 @@ +module OTEL = Opentelemetry diff --git a/src/client/lwt/dune b/src/client/lwt/dune new file mode 100644 index 00000000..2b7d082b --- /dev/null +++ b/src/client/lwt/dune @@ -0,0 +1,21 @@ +(library + (name opentelemetry_client_lwt) + (public_name opentelemetry-client.lwt) + (flags + :standard + -open + Opentelemetry_util + -open + Opentelemetry_client + -open + Opentelemetry_atomic) + (optional) ; lwt + (libraries + opentelemetry.core + opentelemetry.util + opentelemetry.atomic + opentelemetry.emitter + opentelemetry-client + lwt + lwt.unix) + (synopsis "Lwt-specific helpers for opentelemetry-client")) diff --git a/src/client/lwt/io_lwt.ml b/src/client/lwt/io_lwt.ml new file mode 100644 index 00000000..d8dcece9 --- /dev/null +++ b/src/client/lwt/io_lwt.ml @@ -0,0 +1,11 @@ +type 'a t = 'a Lwt.t + +let return = Lwt.return + +let ( let* ) = Lwt.Syntax.( let* ) + +let sleep_s = Lwt_unix.sleep + +let spawn = Lwt.async + +let[@inline] protect ~finally f = Lwt.finalize f finally diff --git a/src/client/lwt/io_lwt.mli b/src/client/lwt/io_lwt.mli new file mode 100644 index 00000000..ec083176 --- /dev/null +++ b/src/client/lwt/io_lwt.mli @@ -0,0 +1 @@ +include Generic_io.S_WITH_CONCURRENCY with type 'a t = 'a Lwt.t diff --git a/src/client/lwt/notifier_lwt.ml b/src/client/lwt/notifier_lwt.ml new file mode 100644 index 00000000..83d8dfbe --- /dev/null +++ b/src/client/lwt/notifier_lwt.ml @@ -0,0 +1,40 @@ +(** Notification that can be used on the consumer side of a bounded queue *) + +module IO = Io_lwt + +type t = { + notified: bool Atomic.t; + cond: unit Lwt_condition.t; + notification: int; + lwt_tid: int; (** thread ID where lwt runs *) + deleted: bool Atomic.t; +} + +let create () : t = + let notified = Atomic.make false in + let cond = Lwt_condition.create () in + let notification = + Lwt_unix.make_notification (fun () -> + Atomic.set notified false; + Lwt_condition.broadcast cond ()) + in + let lwt_tid = Thread.id @@ Thread.self () in + { notified; notification; cond; lwt_tid; deleted = Atomic.make false } + +let delete self : unit = + if not (Atomic.exchange self.deleted true) then + Lwt_unix.stop_notification self.notification + +let trigger (self : t) : unit = + let tid = Thread.id @@ Thread.self () in + + if tid = self.lwt_tid then + (* in lwt thread, directly use the condition *) + Lwt_condition.broadcast self.cond () + else if not (Atomic.exchange self.notified true) then + Lwt_unix.send_notification self.notification + +let wait (self : t) : unit Lwt.t = Lwt_condition.wait self.cond + +let register_bounded_queue (self : t) (q : _ Bounded_queue.t) : unit = + Bounded_queue.on_non_empty q (fun () -> trigger self) diff --git a/src/client/lwt/notifier_lwt.mli b/src/client/lwt/notifier_lwt.mli new file mode 100644 index 00000000..c16ae992 --- /dev/null +++ b/src/client/lwt/notifier_lwt.mli @@ -0,0 +1 @@ +include Generic_notifier.S with module IO = Io_lwt diff --git a/src/client/lwt/util_ticker.ml b/src/client/lwt/util_ticker.ml new file mode 100644 index 00000000..64c26da8 --- /dev/null +++ b/src/client/lwt/util_ticker.ml @@ -0,0 +1,18 @@ +open Common_ +open Lwt.Syntax + +(** Lwt task that calls [Exporter.tick] regularly, to help enforce timeouts. + @param frequency_s how often in seconds does the tick tock? *) +let start_ticker_thread ?(finally = ignore) ~(stop : bool Atomic.t) + ~(frequency_s : float) (exp : OTEL.Exporter.t) : unit = + let frequency_s = max frequency_s 0.5 in + let rec tick_loop () = + if Atomic.get stop then ( + finally (); + Lwt.return () + ) else + let* () = Lwt_unix.sleep frequency_s in + OTEL.Exporter.tick exp; + tick_loop () + in + Lwt.async tick_loop diff --git a/src/client/notifier_sync.ml b/src/client/notifier_sync.ml new file mode 100644 index 00000000..e1fd501d --- /dev/null +++ b/src/client/notifier_sync.ml @@ -0,0 +1,21 @@ +module IO = Generic_io.Direct_style + +type t = { + mutex: Mutex.t; + cond: Condition.t; +} + +let create () : t = { mutex = Mutex.create (); cond = Condition.create () } + +let trigger self = Condition.signal self.cond + +let delete = ignore + +let wait self = + Mutex.lock self.mutex; + Condition.wait self.cond self.mutex; + Mutex.unlock self.mutex + +(** Ensure we get signalled when the queue goes from empty to non-empty *) +let register_bounded_queue (self : t) (bq : _ Bounded_queue.t) : unit = + Bounded_queue.on_non_empty bq (fun () -> trigger self) diff --git a/src/client/notifier_sync.mli b/src/client/notifier_sync.mli new file mode 100644 index 00000000..f896ccb2 --- /dev/null +++ b/src/client/notifier_sync.mli @@ -0,0 +1 @@ +include Generic_notifier.S with type 'a IO.t = 'a diff --git a/src/client/rpool.ml b/src/client/rpool.ml new file mode 100644 index 00000000..833ccaef --- /dev/null +++ b/src/client/rpool.ml @@ -0,0 +1,59 @@ +module A = Atomic + +type 'a list_ = + | Nil + | Cons of int * 'a * 'a list_ + +type 'a t = { + mk_item: unit -> 'a; + clear: 'a -> unit; + max_size: int; (** Max number of items *) + items: 'a list_ A.t; +} + +let create ?(clear = ignore) ~mk_item ?(max_size = 512) () : _ t = + { mk_item; clear; max_size; items = A.make Nil } + +let rec acquire self = + match A.get self.items with + | Nil -> self.mk_item () + | Cons (_, x, tl) as l -> + if A.compare_and_set self.items l tl then + x + else + acquire self + +let[@inline] size_ = function + | Cons (sz, _, _) -> sz + | Nil -> 0 + +let release self x : unit = + let rec loop () = + match A.get self.items with + | Cons (sz, _, _) when sz >= self.max_size -> + (* forget the item *) + () + | l -> + if not (A.compare_and_set self.items l (Cons (size_ l + 1, x, l))) then + loop () + in + + self.clear x; + loop () + +let with_resource (self : _ t) f = + let x = acquire self in + try + let res = f x in + release self x; + res + with e -> + let bt = Printexc.get_raw_backtrace () in + release self x; + Printexc.raise_with_backtrace e bt + +module Raw = struct + let release = release + + let acquire = acquire +end diff --git a/src/client/rpool.mli b/src/client/rpool.mli new file mode 100644 index 00000000..4a80e115 --- /dev/null +++ b/src/client/rpool.mli @@ -0,0 +1,27 @@ +(** Simple resource pool. + + This is intended for buffers, protobuf encoders, etc. *) + +type 'a t +(** Pool of values of type ['a] *) + +val create : + ?clear:('a -> unit) -> mk_item:(unit -> 'a) -> ?max_size:int -> unit -> 'a t +(** Create a new pool. + @param mk_item produce a new item in case the pool is empty + @param max_size + maximum number of item in the pool before we start dropping resources on + the floor. This controls resource consumption. + @param clear a function called on items before recycling them. *) + +val with_resource : 'a t -> ('a -> 'b) -> 'b +(** [with_resource pool f] runs [f x] with [x] a resource; when [f] fails or + returns, [x] is returned to the pool for future reuse. *) + +(** Low level control over the pool. This is easier to get wrong (e.g. releasing + the same resource twice) so use with caution. *) +module Raw : sig + val acquire : 'a t -> 'a + + val release : 'a t -> 'a -> unit +end diff --git a/src/client/sampler.ml b/src/client/sampler.ml new file mode 100644 index 00000000..9ae63e0d --- /dev/null +++ b/src/client/sampler.ml @@ -0,0 +1,49 @@ +type t = { + proba_accept: float; + n_seen: int Atomic.t; + n_accepted: int Atomic.t; +} + +let create ~proba_accept () : t = + (* FIXME: either czzry a random state and protect it, or make sure + we Random.self_init() in the current domain?? *) + if proba_accept < 0. || proba_accept > 1. then + invalid_arg "sampler: proba_accept must be in [0., 1.]"; + { proba_accept; n_seen = Atomic.make 0; n_accepted = Atomic.make 0 } + +let[@inline] proba_accept self = self.proba_accept + +let actual_rate (self : t) : float = + let accept = Atomic.get self.n_accepted in + let total = Atomic.get self.n_seen in + + if total = 0 then + 1. + else + float accept /. float total + +let accept (self : t) : bool = + Atomic.incr self.n_seen; + + let n = Random.float 1. in + let res = n < self.proba_accept in + + if res then Atomic.incr self.n_accepted; + res + +open Opentelemetry_emitter + +let wrap_emitter (self : t) (e : _ Emitter.t) : _ Emitter.t = + let enabled () = e.enabled () in + let closed () = Emitter.closed e in + let flush_and_close () = Emitter.flush_and_close e in + let tick ~now = Emitter.tick e ~now in + + let emit l = + if l <> [] && e.enabled () then ( + let accepted = List.filter (fun _x -> accept self) l in + if accepted <> [] then Emitter.emit e accepted + ) + in + + { Emitter.closed; enabled; flush_and_close; tick; emit } diff --git a/src/client/sampler.mli b/src/client/sampler.mli new file mode 100644 index 00000000..784c8346 --- /dev/null +++ b/src/client/sampler.mli @@ -0,0 +1,25 @@ +(** Basic random sampling *) + +type t + +val create : proba_accept:float -> unit -> t +(** [create ~proba_accept:n ()] makes a new sampler. + + The sampler will accept signals with probability [n] (must be between 0 and + 1). + @raise Invalid_argument if [n] is not between 0 and 1. *) + +val accept : t -> bool +(** Do we accept a sample? This returns [true] with probability [proba_accept]. +*) + +val proba_accept : t -> float + +val actual_rate : t -> float +(** The ratio of signals we actually accepted so far *) + +open Opentelemetry_emitter + +val wrap_emitter : t -> 'a Emitter.t -> 'a Emitter.t +(** [wrap_emitter sampler e] is a new emitter that uses the [sampler] on each + individual signal before passing them to [e]. *) diff --git a/src/client/self_trace.ml b/src/client/self_trace.ml index 46757302..cf5adcb5 100644 --- a/src/client/self_trace.ml +++ b/src/client/self_trace.ml @@ -1,22 +1,28 @@ -module OT = Opentelemetry +open Common_ let enabled = Atomic.make false -let add_event (scope : OT.Scope.t) ev = OT.Scope.add_event scope (fun () -> ev) +let tracer = Atomic.make OTEL.Tracer.dynamic_forward_to_main_exporter -let dummy_trace_id_ = OT.Trace_id.dummy +let[@inline] add_event (scope : OTEL.Span.t) ev = OTEL.Span.add_event scope ev -let dummy_span_id = OT.Span_id.dummy +let set_tracer tr = Atomic.set tracer tr + +let dummy_trace_id_ = OTEL.Trace_id.dummy + +let dummy_span_id = OTEL.Span_id.dummy let with_ ?kind ?attrs name f = - if Atomic.get enabled then - OT.Trace.with_ ?kind ?attrs name f - else ( + if Atomic.get enabled then ( + let tracer = Atomic.get tracer in + OTEL.Tracer.with_ tracer ?kind ?attrs name f + ) else ( (* A new scope is needed here because it might be modified *) - let scope = - OT.Scope.make ~trace_id:dummy_trace_id_ ~span_id:dummy_span_id () + let span : OTEL.Span.t = + OTEL.Span.make ~trace_id:dummy_trace_id_ ~id:dummy_span_id ~start_time:0L + ~end_time:0L name in - f scope + f span ) let set_enabled b = Atomic.set enabled b diff --git a/src/client/self_trace.mli b/src/client/self_trace.mli index d0690f02..07cf9369 100644 --- a/src/client/self_trace.mli +++ b/src/client/self_trace.mli @@ -1,12 +1,21 @@ (** Mini tracing module (disabled if [config.self_trace=false]) *) -val add_event : Opentelemetry.Scope.t -> Opentelemetry.Event.t -> unit +open Common_ + +val add_event : OTEL.Span.t -> OTEL.Event.t -> unit val with_ : - ?kind:Opentelemetry.Span_kind.t -> - ?attrs:(string * Opentelemetry.value) list -> + ?kind:OTEL.Span_kind.t -> + ?attrs:(string * OTEL.value) list -> string -> - (Opentelemetry.Scope.t -> 'a) -> + (OTEL.Span.t -> 'a) -> 'a +(** A simple way to create spans to instrument parts of the OTEL SDK itself. *) + +val set_tracer : OTEL.Tracer.t -> unit +(** Set the tracer to use for self-tracing. We need to make sure it will not + lead to infinite loops (if the tracer itself is self-tracing, it might + invoke itself recursively, and so on). *) val set_enabled : bool -> unit +(** Enable self tracing. A tracer must also be set. *) diff --git a/src/client/signal.ml b/src/client/signal.ml index d3fafcfa..cde963de 100644 --- a/src/client/signal.ml +++ b/src/client/signal.ml @@ -37,7 +37,7 @@ let is_logs = function | _ -> false module Encode = struct - let resource_to_string ~encoder ~ctor ~enc resource = + let resource_to_string ~encoder ~ctor ~enc resource : string = let encoder = match encoder with | Some e -> @@ -48,29 +48,28 @@ module Encode = struct let x = ctor resource in let@ _sc = Self_trace.with_ ~kind:Span.Span_kind_internal "encode-proto" in enc x encoder; - Pbrt.Encoder.to_string encoder + let data = Pbrt.Encoder.to_string encoder in + Pbrt.Encoder.reset encoder; + data let logs ?encoder resource_logs = - resource_logs - |> resource_to_string ~encoder - ~ctor:(fun r -> - Logs_service.make_export_logs_service_request ~resource_logs:r ()) - ~enc:Logs_service.encode_pb_export_logs_service_request + resource_to_string ~encoder resource_logs + ~ctor:(fun r -> + Logs_service.make_export_logs_service_request ~resource_logs:r ()) + ~enc:Logs_service.encode_pb_export_logs_service_request let metrics ?encoder resource_metrics = - resource_metrics - |> resource_to_string ~encoder - ~ctor:(fun r -> - Metrics_service.make_export_metrics_service_request - ~resource_metrics:r ()) - ~enc:Metrics_service.encode_pb_export_metrics_service_request + resource_to_string ~encoder resource_metrics + ~ctor:(fun r -> + Metrics_service.make_export_metrics_service_request ~resource_metrics:r + ()) + ~enc:Metrics_service.encode_pb_export_metrics_service_request let traces ?encoder resource_spans = - resource_spans - |> resource_to_string ~encoder - ~ctor:(fun r -> - Trace_service.make_export_trace_service_request ~resource_spans:r ()) - ~enc:Trace_service.encode_pb_export_trace_service_request + resource_to_string ~encoder resource_spans + ~ctor:(fun r -> + Trace_service.make_export_trace_service_request ~resource_spans:r ()) + ~enc:Trace_service.encode_pb_export_trace_service_request end module Decode = struct diff --git a/src/client-ocurl/b_queue.ml b/src/client/sync_queue.ml similarity index 86% rename from src/client-ocurl/b_queue.ml rename to src/client/sync_queue.ml index 98f43876..46d46af1 100644 --- a/src/client-ocurl/b_queue.ml +++ b/src/client/sync_queue.ml @@ -1,4 +1,4 @@ -open Opentelemetry.Util_mutex +module UM = Opentelemetry_util.Util_mutex type 'a t = { mutex: Mutex.t; @@ -18,14 +18,14 @@ let create () : _ t = } let close (self : _ t) = - protect self.mutex @@ fun () -> + UM.protect self.mutex @@ fun () -> if not self.closed then ( self.closed <- true; Condition.broadcast self.cond (* awake waiters so they fail *) ) let push (self : _ t) x : unit = - protect self.mutex @@ fun () -> + UM.protect self.mutex @@ fun () -> if self.closed then raise Closed else ( @@ -45,7 +45,7 @@ let pop (self : 'a t) : 'a = x ) in - protect self.mutex loop + UM.protect self.mutex loop let pop_all (self : 'a t) into : unit = let rec loop () = @@ -56,4 +56,4 @@ let pop_all (self : 'a t) into : unit = ) else Queue.transfer self.q into in - protect self.mutex loop + UM.protect self.mutex loop diff --git a/src/client-ocurl/b_queue.mli b/src/client/sync_queue.mli similarity index 81% rename from src/client-ocurl/b_queue.mli rename to src/client/sync_queue.mli index d020dfb3..d64296d7 100644 --- a/src/client-ocurl/b_queue.mli +++ b/src/client/sync_queue.mli @@ -1,4 +1,4 @@ -(** Basic Blocking Queue *) +(** Simple blocking queue *) type 'a t @@ -15,8 +15,9 @@ val pop : 'a t -> 'a @raise Closed if the queue was closed before a new element was available. *) val pop_all : 'a t -> 'a Queue.t -> unit -(** [pop_all q into] pops all the elements of [q] and moves them into [into]. It - might block until an element comes. +(** [pop_all q into] pops all the elements of [q] and moves them into [into]. if + no element is available, it will block until it successfully transfers at + least one item to [into]. @raise Closed if the queue was closed before a new element was available. *) val close : _ t -> unit diff --git a/src/client/util_backoff.ml b/src/client/util_backoff.ml new file mode 100644 index 00000000..4bc50ab2 --- /dev/null +++ b/src/client/util_backoff.ml @@ -0,0 +1,13 @@ +type t = { + mutable delay_s: float; + min_delay_s: float; + max_delay_s: float; +} + +let create () = { delay_s = 0.001; min_delay_s = 0.001; max_delay_s = 20. } + +let on_success self = self.delay_s <- max self.min_delay_s (self.delay_s /. 10.) + +let on_error self = self.delay_s <- min self.max_delay_s (self.delay_s *. 2.) + +let[@inline] cur_duration_s self = self.delay_s diff --git a/src/client/util_backoff.mli b/src/client/util_backoff.mli new file mode 100644 index 00000000..f097f8ae --- /dev/null +++ b/src/client/util_backoff.mli @@ -0,0 +1,12 @@ +(** Backoff behavior in case of errors *) + +type t +(** Backoff state. Not thread safe *) + +val create : unit -> t + +val on_success : t -> unit + +val on_error : t -> unit + +val cur_duration_s : t -> float diff --git a/src/client/util_resources.ml b/src/client/util_resources.ml new file mode 100644 index 00000000..7ca16496 --- /dev/null +++ b/src/client/util_resources.ml @@ -0,0 +1,34 @@ +(** Group signals into [resource_xxx] objects *) + +open Common_ + +let make_resource_logs ?service_name ?attrs (logs : Proto.Logs.log_record list) + : Proto.Logs.resource_logs = + let attributes = OTEL.Globals.mk_attributes ?service_name ?attrs () in + let resource = Proto.Resource.make_resource ~attributes () in + let ll = + Proto.Logs.make_scope_logs ~scope:OTEL.Globals.instrumentation_library + ~log_records:logs () + in + Proto.Logs.make_resource_logs ~resource ~scope_logs:[ ll ] () + +let make_resource_spans ?service_name ?attrs spans : Proto.Trace.resource_spans + = + let ils = + Proto.Trace.make_scope_spans ~scope:OTEL.Globals.instrumentation_library + ~spans () + in + let attributes = OTEL.Globals.mk_attributes ?service_name ?attrs () in + let resource = Proto.Resource.make_resource ~attributes () in + Proto.Trace.make_resource_spans ~resource ~scope_spans:[ ils ] () + +(** Aggregate metrics into a {!Proto.Metrics.resource_metrics} *) +let make_resource_metrics ?service_name ?attrs (l : OTEL.Metrics.t list) : + Proto.Metrics.resource_metrics = + let open Proto.Metrics in + let lm = + make_scope_metrics ~scope:OTEL.Globals.instrumentation_library ~metrics:l () + in + let attributes = OTEL.Globals.mk_attributes ?service_name ?attrs () in + let resource = Proto.Resource.make_resource ~attributes () in + Proto.Metrics.make_resource_metrics ~scope_metrics:[ lm ] ~resource () diff --git a/src/client/util_thread.ml b/src/client/util_thread.ml new file mode 100644 index 00000000..75479688 --- /dev/null +++ b/src/client/util_thread.ml @@ -0,0 +1,44 @@ +open Common_ + +(** start a thread in the background, running [f()], blocking signals *) +let start_bg_thread (f : unit -> unit) : Thread.t = + let unix_run () = + let signals = + [ + Sys.sigusr1; + Sys.sigusr2; + Sys.sigterm; + Sys.sigpipe; + Sys.sigalrm; + Sys.sigstop; + ] + in + ignore (Thread.sigmask Unix.SIG_BLOCK signals : _ list); + f () + in + (* no signals on Windows *) + let run () = + if Sys.win32 then + f () + else + unix_run () + in + Thread.create run () + +(** thread that calls [tick()] regularly, to help enforce timeouts *) +let setup_ticker_thread ~stop ~sleep_ms (exp : OTEL.Exporter.t) () = + let sleep_s = float sleep_ms /. 1000. in + let tick_loop () = + try + while not @@ Atomic.get stop do + Thread.delay sleep_s; + OTEL.Exporter.tick exp + done + with + | Sync_queue.Closed -> () + | exn -> + (* print and ignore *) + Printf.eprintf "otel-ocurl: ticker thread: uncaught exn:\n%s\n%!" + (Printexc.to_string exn) + in + start_bg_thread tick_loop diff --git a/src/core/any_signal.ml b/src/core/any_signal.ml new file mode 100644 index 00000000..4b3ec102 --- /dev/null +++ b/src/core/any_signal.ml @@ -0,0 +1,13 @@ +(** Any kind of signal *) + +open Common_ + +type t = + | Span of Span.t + | Metric of Metrics.t + | Log of Log_record.t + +let pp out = function + | Span s -> Proto.Trace.pp_span out s + | Metric m -> Proto.Metrics.pp_metric out m + | Log l -> Proto.Logs.pp_log_record out l diff --git a/src/core/common_.ml b/src/core/common_.ml new file mode 100644 index 00000000..c6544d5d --- /dev/null +++ b/src/core/common_.ml @@ -0,0 +1,4 @@ +let spf = Printf.sprintf + +module Proto = Opentelemetry_proto +module Atomic = Opentelemetry_atomic.Atomic diff --git a/src/core/context.ml b/src/core/context.ml new file mode 100644 index 00000000..883f646e --- /dev/null +++ b/src/core/context.ml @@ -0,0 +1,17 @@ +(** The context used in OTEL operations, to carry the current trace, etc. + + https://opentelemetry.io/docs/specs/otel/context/ *) + +type t = Hmap.t +(** The context type. We use [Hmap.t] as it's standard and widely used. *) + +type 'a key = 'a Hmap.key + +let set = Hmap.add + +(** @raise Invalid_argument if not present *) +let get_exn : 'a key -> t -> 'a = Hmap.get + +let get : 'a key -> t -> 'a option = Hmap.find + +let[@inline] new_key () : 'a key = Hmap.Key.create () diff --git a/src/core/dune b/src/core/dune index 248e3aff..c33b4edc 100644 --- a/src/core/dune +++ b/src/core/dune @@ -1,14 +1,15 @@ (library - (name opentelemetry) - (synopsis "API for opentelemetry instrumentation") - (flags :standard -warn-error -a+8) + (name opentelemetry_core) + (public_name opentelemetry.core) + (synopsis "Core types and definitions for opentelemetry") + (flags :standard -warn-error -a+8 -open Opentelemetry_util) (libraries opentelemetry.proto - opentelemetry.ambient-context + opentelemetry.util + opentelemetry.atomic + opentelemetry.emitter ptime ptime.clock.os pbrt threads - opentelemetry.atomic - hmap) - (public_name opentelemetry)) + hmap)) diff --git a/src/core/event.ml b/src/core/event.ml new file mode 100644 index 00000000..3d632a4d --- /dev/null +++ b/src/core/event.ml @@ -0,0 +1,9 @@ +open Common_ +open Proto.Trace + +type t = span_event + +let make ?(time_unix_nano = Timestamp_ns.now_unix_ns ()) ?(attrs = []) + (name : string) : t = + let attrs = List.map Key_value.conv attrs in + make_span_event ~time_unix_nano ~name ~attributes:attrs () diff --git a/src/core/event.mli b/src/core/event.mli new file mode 100644 index 00000000..8b90f641 --- /dev/null +++ b/src/core/event.mli @@ -0,0 +1,12 @@ +(** Events. + + Events occur at a given time and can carry attributes. They always belong in + a span. *) + +open Common_ +open Proto.Trace + +type t = span_event + +val make : + ?time_unix_nano:Timestamp_ns.t -> ?attrs:Key_value.t list -> string -> t diff --git a/src/core/exporter.ml b/src/core/exporter.ml new file mode 100644 index 00000000..621ea5b9 --- /dev/null +++ b/src/core/exporter.ml @@ -0,0 +1,64 @@ +(** Exporter. + + This is the pluggable component that actually sends signals to a OTEL + collector, or prints them, or saves them somewhere. + + This is part of the SDK, not just the API, so most real implementations live + in their own library. *) + +open Common_ +open Opentelemetry_emitter + +type t = { + emit_spans: Proto.Trace.span Emitter.t; + emit_metrics: Proto.Metrics.metric Emitter.t; + emit_logs: Proto.Logs.log_record Emitter.t; + on_tick: (unit -> unit) -> unit; + tick: unit -> unit; + (** Call all the callbacks registered with [on_tick]. Should be triggered + regularly for background processing, timeout checks, etc. *) + cleanup: on_done:(unit -> unit) -> unit -> unit; + (** [cleanup ~on_done ()] is called when the exporter is shut down, and is + responsible for sending remaining batches, flushing sockets, etc. + @param on_done + callback invoked after the cleanup is done. @since 0.12 *) +} +(** Main exporter interface. *) + +(** Dummy exporter, does nothing *) +let dummy () : t = + let ticker = Cb_set.create () in + { + emit_spans = Emitter.dummy; + emit_metrics = Emitter.dummy; + emit_logs = Emitter.dummy; + on_tick = Cb_set.register ticker; + tick = (fun () -> Cb_set.trigger ticker); + cleanup = (fun ~on_done () -> on_done ()); + } + +let[@inline] send_trace (self : t) (l : Proto.Trace.span list) = + Emitter.emit self.emit_spans l + +let[@inline] send_metrics (self : t) (l : Proto.Metrics.metric list) = + Emitter.emit self.emit_metrics l + +let[@inline] send_logs (self : t) (l : Proto.Logs.log_record list) = + Emitter.emit self.emit_logs l + +let[@inline] on_tick (self : t) f = self.on_tick f + +(** Do background work. Call this regularly if the collector doesn't already + have a ticker thread or internal timer. *) +let tick (self : t) = + (* make sure emitters get the chance to check timeouts, flush, etc. *) + let now = Mtime_clock.now () in + Emitter.tick ~now self.emit_spans; + Emitter.tick ~now self.emit_metrics; + Emitter.tick ~now self.emit_logs; + + (* call the callbacks *) + self.tick (); + () + +let[@inline] cleanup (self : t) ~on_done : unit = self.cleanup ~on_done () diff --git a/src/core/key_value.ml b/src/core/key_value.ml new file mode 100644 index 00000000..36ee087c --- /dev/null +++ b/src/core/key_value.ml @@ -0,0 +1,11 @@ +open Common_ + +type t = string * Value.t + +let conv (k, v) = + let open Proto.Common in + let value = Value.conv v in + make_key_value ~key:k ?value () + +let of_otel (kv : Proto.Common.key_value) : t = + kv.key, Value.of_otel_opt kv.value diff --git a/src/core/lock.ml b/src/core/lock.ml deleted file mode 100644 index 6ce295bb..00000000 --- a/src/core/lock.ml +++ /dev/null @@ -1,17 +0,0 @@ -let lock_ : (unit -> unit) ref = ref ignore - -let unlock_ : (unit -> unit) ref = ref ignore - -let set_mutex ~lock ~unlock : unit = - lock_ := lock; - unlock_ := unlock - -let[@inline] with_lock f = - !lock_ (); - match f () with - | x -> - !unlock_ (); - x - | exception e -> - !unlock_ (); - Printexc.raise_with_backtrace e (Printexc.get_raw_backtrace ()) diff --git a/src/core/lock.mli b/src/core/lock.mli deleted file mode 100644 index 2040bd1b..00000000 --- a/src/core/lock.mli +++ /dev/null @@ -1,9 +0,0 @@ -(** A global lock, modifiable by the user *) - -val set_mutex : lock:(unit -> unit) -> unlock:(unit -> unit) -> unit -(** Set a pair of lock/unlock functions that are used to protect access to - global state, if needed. By default these do nothing. *) - -val with_lock : (unit -> 'a) -> 'a -(** Call [f()] while holding the mutex defined {!set_mutex}, then release the - mutex. *) diff --git a/src/core/log_record.ml b/src/core/log_record.ml new file mode 100644 index 00000000..0de8ef18 --- /dev/null +++ b/src/core/log_record.ml @@ -0,0 +1,78 @@ +(** Logs. + + See + {{:https://opentelemetry.io/docs/reference/specification/overview/#log-signal} + the spec} *) + +open Common_ +open Proto.Logs + +type t = Proto.Logs.log_record + +(** Severity level of a log event *) +type severity = Proto.Logs.severity_number = + | Severity_number_unspecified + | Severity_number_trace + | Severity_number_trace2 + | Severity_number_trace3 + | Severity_number_trace4 + | Severity_number_debug + | Severity_number_debug2 + | Severity_number_debug3 + | Severity_number_debug4 + | Severity_number_info + | Severity_number_info2 + | Severity_number_info3 + | Severity_number_info4 + | Severity_number_warn + | Severity_number_warn2 + | Severity_number_warn3 + | Severity_number_warn4 + | Severity_number_error + | Severity_number_error2 + | Severity_number_error3 + | Severity_number_error4 + | Severity_number_fatal + | Severity_number_fatal2 + | Severity_number_fatal3 + | Severity_number_fatal4 + +let pp_severity = pp_severity_number + +type flags = Proto.Logs.log_record_flags = + | Log_record_flags_do_not_use + | Log_record_flags_trace_flags_mask + +let pp_flags = Proto.Logs.pp_log_record_flags + +(** Make a single log entry *) +let make ?time ?(observed_time_unix_nano = Timestamp_ns.now_unix_ns ()) + ?severity ?log_level ?flags ?trace_id ?span_id ?(attrs = []) + (body : Value.t) : t = + let time_unix_nano = + match time with + | None -> observed_time_unix_nano + | Some t -> t + in + let trace_id = Option.map Trace_id.to_bytes trace_id in + let span_id = Option.map Span_id.to_bytes span_id in + let body = Value.conv body in + let attributes = List.map Key_value.conv attrs in + make_log_record ~time_unix_nano ~observed_time_unix_nano + ?severity_number:severity ?severity_text:log_level ?flags ?trace_id ?span_id + ~attributes ?body () + +(** Make a log entry whose body is a string *) +let make_str ?time ?observed_time_unix_nano ?severity ?log_level ?flags + ?trace_id ?span_id ?attrs (body : string) : t = + make ?time ?observed_time_unix_nano ?severity ?log_level ?flags ?trace_id + ?span_id ?attrs (`String body) + +(** Make a log entry with format *) +let make_strf ?time ?observed_time_unix_nano ?severity ?log_level ?flags + ?trace_id ?span_id ?attrs fmt = + Format.kasprintf + (fun bod -> + make_str ?time ?observed_time_unix_nano ?severity ?log_level ?flags + ?trace_id ?span_id ?attrs bod) + fmt diff --git a/src/core/metrics.ml b/src/core/metrics.ml new file mode 100644 index 00000000..f91538f8 --- /dev/null +++ b/src/core/metrics.ml @@ -0,0 +1,80 @@ +(** Metrics. + + See + {{:https://opentelemetry.io/docs/reference/specification/overview/#metric-signal} + the spec} *) + +open Common_ +open Proto +open Proto.Metrics + +type t = Metrics.metric +(** A single metric, measuring some time-varying quantity or statistical + distribution. It is composed of one or more data points that have precise + values and time stamps. Each distinct metric should have a distinct name. *) + +open struct + let _program_start = Timestamp_ns.now_unix_ns () +end + +(** Number data point, as a float *) +let float ?(start_time_unix_nano = _program_start) + ?(now = Timestamp_ns.now_unix_ns ()) ?(attrs = []) (d : float) : + number_data_point = + let attributes = attrs |> List.map Key_value.conv in + make_number_data_point ~start_time_unix_nano ~time_unix_nano:now ~attributes + ~value:(As_double d) () + +(** Number data point, as an int *) +let int ?(start_time_unix_nano = _program_start) + ?(now = Timestamp_ns.now_unix_ns ()) ?(attrs = []) (i : int) : + number_data_point = + let attributes = attrs |> List.map Key_value.conv in + make_number_data_point ~start_time_unix_nano ~time_unix_nano:now ~attributes + ~value:(As_int (Int64.of_int i)) + () + +(** Aggregation of a scalar metric, always with the current value *) +let gauge ~name ?description ?unit_ (l : number_data_point list) : t = + let data = Gauge (make_gauge ~data_points:l ()) in + make_metric ~name ?description ?unit_ ~data () + +type aggregation_temporality = Metrics.aggregation_temporality = + | Aggregation_temporality_unspecified + | Aggregation_temporality_delta + | Aggregation_temporality_cumulative + +(** Sum of all reported measurements over a time interval *) +let sum ~name ?description ?unit_ + ?(aggregation_temporality = Aggregation_temporality_cumulative) + ?is_monotonic (l : number_data_point list) : t = + let data = + Sum (make_sum ~data_points:l ?is_monotonic ~aggregation_temporality ()) + in + make_metric ~name ?description ?unit_ ~data () + +(** Histogram data + @param count number of values in population (non negative) + @param sum sum of values in population (0 if count is 0) + @param bucket_counts + count value of histogram for each bucket. Sum of the counts must be equal + to [count]. length must be [1+length explicit_bounds] + @param explicit_bounds strictly increasing list of bounds for the buckets *) +let histogram_data_point ?(start_time_unix_nano = _program_start) + ?(now = Timestamp_ns.now_unix_ns ()) ?(attrs = []) ?(exemplars = []) + ?(explicit_bounds = []) ?sum ~bucket_counts ~count () : histogram_data_point + = + let attributes = attrs |> List.map Key_value.conv in + make_histogram_data_point ~start_time_unix_nano ~time_unix_nano:now + ~attributes ~exemplars ~bucket_counts ~explicit_bounds ~count ?sum () + +let histogram ~name ?description ?unit_ ?aggregation_temporality + (l : histogram_data_point list) : t = + let data = + Histogram (make_histogram ~data_points:l ?aggregation_temporality ()) + in + make_metric ~name ?description ?unit_ ~data () + +(* TODO: exponential history *) +(* TODO: summary *) +(* TODO: exemplar *) diff --git a/src/core/opentelemetry.ml b/src/core/opentelemetry.ml deleted file mode 100644 index 4b189819..00000000 --- a/src/core/opentelemetry.ml +++ /dev/null @@ -1,1682 +0,0 @@ -(** Opentelemetry types and instrumentation *) - -open struct - let spf = Printf.sprintf - - module Atomic = Opentelemetry_atomic.Atomic - module Ambient_context = Opentelemetry_ambient_context -end - -module Lock = Lock -(** Global lock. *) - -module Rand_bytes = Rand_bytes -(** Generation of random identifiers. *) - -module AList = AList -(** Atomic list, for internal usage - @since 0.7 *) - -module Util_mutex = Util_mutex -(** Utilities for internal usage. - @since NEXT_RELEASE *) - -(** {2 Wire format} *) - -module Proto = Opentelemetry_proto -(** Protobuf types. - - This is mostly useful internally. Users should not need to touch it. *) - -(** {2 Timestamps} *) - -(** Unix timestamp. - - These timestamps measure time since the Unix epoch (jan 1, 1970) UTC in - nanoseconds. *) -module Timestamp_ns = struct - type t = int64 - - let ns_in_a_day = Int64.(mul 1_000_000_000L (of_int (24 * 3600))) - - (** Current unix timestamp in nanoseconds *) - let[@inline] now_unix_ns () : t = - let span = Ptime_clock.now () |> Ptime.to_span in - let d, ps = Ptime.Span.to_d_ps span in - let d = Int64.(mul (of_int d) ns_in_a_day) in - let ns = Int64.(div ps 1_000L) in - Int64.(add d ns) -end - -(** {2 Interface to data collector} *) - -(** Collector types - - These types are used by backend implementations, to send events to - collectors such as Jaeger. - - Note: most users will not need to touch this module *) -module Collector = struct - open Opentelemetry_proto - - type 'msg sender = { send: 'a. 'msg -> ret:(unit -> 'a) -> 'a } - (** Sender interface for a message of type [msg]. Inspired from Logs' reporter - (see - {{:https://erratique.ch/software/logs/doc/Logs/index.html#sync} its doc}) - but without [over] as it doesn't make much sense in presence of batching. - - The [ret] callback is used to return the desired type (unit, or a Lwt - promise, or anything else) once the event has been transferred to the - backend. It doesn't mean the event has been collected yet, it could sit in - a batch queue for a little while. *) - - (** Collector client interface. *) - module type BACKEND = sig - val send_trace : Trace.resource_spans list sender - - val send_metrics : Metrics.resource_metrics list sender - - val send_logs : Logs.resource_logs list sender - - val signal_emit_gc_metrics : unit -> unit - (** Signal the backend that it should emit GC metrics when it has the - chance. This should be installed in a GC alarm or another form of - regular trigger. *) - - val tick : unit -> unit - (** Should be called regularly for background processing, timeout checks, - etc. *) - - val set_on_tick_callbacks : (unit -> unit) AList.t -> unit - (** Give the collector the list of callbacks to be executed when [tick()] is - called. Each such callback should be short and reentrant. Depending on - the collector's implementation, it might be called from a thread that is - not the one that called [on_tick]. *) - - val cleanup : on_done:(unit -> unit) -> unit -> unit - (** [cleanup ~on_done ()] is called when the collector is shut down, and is - responsible for sending remaining batches, flushing sockets, etc. - @param on_done - callback invoked after the cleanup is done. @since 0.12 *) - end - - type backend = (module BACKEND) - - module Noop_backend : BACKEND = struct - let noop_sender _ ~ret = ret () - - let send_trace : Trace.resource_spans list sender = { send = noop_sender } - - let send_metrics : Metrics.resource_metrics list sender = - { send = noop_sender } - - let send_logs : Logs.resource_logs list sender = { send = noop_sender } - - let signal_emit_gc_metrics () = () - - let tick () = () - - let set_on_tick_callbacks _cbs = () - - let cleanup ~on_done () = - on_done (); - () - end - - module Debug_backend (B : BACKEND) : BACKEND = struct - open Proto - - let send_trace : Trace.resource_spans list sender = - { - send = - (fun l ~ret -> - Format.eprintf "SPANS: %a@." - (Format.pp_print_list Trace.pp_resource_spans) - l; - B.send_trace.send l ~ret); - } - - let send_metrics : Metrics.resource_metrics list sender = - { - send = - (fun l ~ret -> - Format.eprintf "METRICS: %a@." - (Format.pp_print_list Metrics.pp_resource_metrics) - l; - B.send_metrics.send l ~ret); - } - - let send_logs : Logs.resource_logs list sender = - { - send = - (fun l ~ret -> - Format.eprintf "LOGS: %a@." - (Format.pp_print_list Logs.pp_resource_logs) - l; - B.send_logs.send l ~ret); - } - - let signal_emit_gc_metrics () = B.signal_emit_gc_metrics () - - let tick () = B.tick () - - let set_on_tick_callbacks cbs = B.set_on_tick_callbacks cbs - - let cleanup ~on_done () = B.cleanup ~on_done () - end - - let debug_backend : backend = (module Debug_backend (Noop_backend)) - - (* hidden *) - open struct - let on_tick_cbs_ = AList.make () - - let backend : backend option Atomic.t = Atomic.make None - end - - (** Set collector backend *) - let set_backend (b : backend) : unit = - let (module B) = b in - B.set_on_tick_callbacks on_tick_cbs_; - Atomic.set backend (Some b) - - (** Remove current backend, if any. - @since 0.11 - @param on_done see {!BACKEND.cleanup}, @since 0.12 *) - let remove_backend ~on_done () : unit = - match Atomic.exchange backend None with - | None -> () - | Some (module B) -> - B.tick (); - B.cleanup ~on_done () - - (** Is there a configured backend? *) - let[@inline] has_backend () : bool = Atomic.get backend != None - - (** Current backend, if any *) - let[@inline] get_backend () : backend option = Atomic.get backend - - let send_trace (l : Trace.resource_spans list) ~ret = - match Atomic.get backend with - | None -> ret () - | Some (module B) -> B.send_trace.send l ~ret - - let send_metrics (l : Metrics.resource_metrics list) ~ret = - match Atomic.get backend with - | None -> ret () - | Some (module B) -> B.send_metrics.send l ~ret - - let send_logs (l : Logs.resource_logs list) ~ret = - match Atomic.get backend with - | None -> ret () - | Some (module B) -> B.send_logs.send l ~ret - - let[@inline] rand_bytes_16 () = !Rand_bytes.rand_bytes_16 () - - let[@inline] rand_bytes_8 () = !Rand_bytes.rand_bytes_8 () - - let[@inline] on_tick f = AList.add on_tick_cbs_ f - - (** Do background work. Call this regularly if the collector doesn't already - have a ticker thread or internal timer. *) - let tick () = - match Atomic.get backend with - | None -> () - | Some (module B) -> B.tick () - - let with_setup_debug_backend ?(on_done = ignore) b ?(enable = true) () f = - let (module B : BACKEND) = b in - if enable then ( - set_backend b; - Fun.protect ~finally:(B.cleanup ~on_done) f - ) else - f () -end - -(**/**) - -module Util_ = struct - let int_to_hex (i : int) = - if i < 10 then - Char.chr (i + Char.code '0') - else - Char.chr (i - 10 + Char.code 'a') - - let bytes_to_hex_into b res off : unit = - for i = 0 to Bytes.length b - 1 do - let n = Char.code (Bytes.get b i) in - Bytes.set res ((2 * i) + off) (int_to_hex ((n land 0xf0) lsr 4)); - Bytes.set res ((2 * i) + 1 + off) (int_to_hex (n land 0x0f)) - done - - let bytes_to_hex (b : bytes) : string = - let res = Bytes.create (2 * Bytes.length b) in - bytes_to_hex_into b res 0; - Bytes.unsafe_to_string res - - let int_of_hex = function - | '0' .. '9' as c -> Char.code c - Char.code '0' - | 'a' .. 'f' as c -> 10 + Char.code c - Char.code 'a' - | c -> raise (Invalid_argument (spf "invalid hex char: %C" c)) - - let bytes_of_hex_substring (s : string) off len = - if len mod 2 <> 0 then - raise (Invalid_argument "hex sequence must be of even length"); - let res = Bytes.make (len / 2) '\x00' in - for i = 0 to (len / 2) - 1 do - let n1 = int_of_hex (String.get s (off + (2 * i))) in - let n2 = int_of_hex (String.get s (off + (2 * i) + 1)) in - let n = (n1 lsl 4) lor n2 in - Bytes.set res i (Char.chr n) - done; - res - - let bytes_of_hex (s : string) : bytes = - bytes_of_hex_substring s 0 (String.length s) - - let bytes_non_zero (self : bytes) : bool = - try - for i = 0 to Bytes.length self - 1 do - if Char.code (Bytes.unsafe_get self i) <> 0 then raise_notrace Exit - done; - false - with Exit -> true -end - -(**/**) - -(** {2 Identifiers} *) - -(** Trace ID. - - This 16 bytes identifier is shared by all spans in one trace. *) -module Trace_id : sig - type t - - val create : unit -> t - - val dummy : t - - val pp : Format.formatter -> t -> unit - - val is_valid : t -> bool - - val to_bytes : t -> bytes - - val of_bytes : bytes -> t - - val to_hex : t -> string - - val to_hex_into : t -> bytes -> int -> unit - - val of_hex : string -> t - - val of_hex_substring : string -> int -> t -end = struct - type t = bytes - - let[@inline] to_bytes self = self - - let dummy : t = Bytes.make 16 '\x00' - - let create () : t = - let b = Collector.rand_bytes_16 () in - assert (Bytes.length b = 16); - (* make sure the identifier is not all 0, which is a dummy identifier. *) - Bytes.set b 0 (Char.unsafe_chr (Char.code (Bytes.get b 0) lor 1)); - b - - let[@inline] of_bytes b = - if Bytes.length b = 16 then - b - else - raise (Invalid_argument "trace ID must be 16 bytes in length") - - let is_valid = Util_.bytes_non_zero - - let to_hex = Util_.bytes_to_hex - - let to_hex_into = Util_.bytes_to_hex_into - - let[@inline] of_hex s = of_bytes (Util_.bytes_of_hex s) - - let[@inline] of_hex_substring s off = - of_bytes (Util_.bytes_of_hex_substring s off 32) - - let pp fmt t = Format.fprintf fmt "%s" (to_hex t) -end - -(** Hmap key to carry around a {!Trace_id.t}, to remember what the current trace - is. - @since 0.8 *) -let k_trace_id : Trace_id.t Hmap.key = Hmap.Key.create () - -(** Unique ID of a span. *) -module Span_id : sig - type t - - val create : unit -> t - - val dummy : t - - val pp : Format.formatter -> t -> unit - - val is_valid : t -> bool - - val to_bytes : t -> bytes - - val of_bytes : bytes -> t - - val to_hex : t -> string - - val to_hex_into : t -> bytes -> int -> unit - - val of_hex : string -> t - - val of_hex_substring : string -> int -> t -end = struct - type t = bytes - - let[@inline] to_bytes self = self - - let dummy : t = Bytes.make 8 '\x00' - - let create () : t = - let b = Collector.rand_bytes_8 () in - assert (Bytes.length b = 8); - (* make sure the identifier is not all 0, which is a dummy identifier. *) - Bytes.set b 0 (Char.unsafe_chr (Char.code (Bytes.get b 0) lor 1)); - b - - let is_valid = Util_.bytes_non_zero - - let of_bytes b = - if Bytes.length b = 8 then - b - else - raise (Invalid_argument "span IDs must be 8 bytes in length") - - let to_hex = Util_.bytes_to_hex - - let to_hex_into = Util_.bytes_to_hex_into - - let[@inline] of_hex s = of_bytes (Util_.bytes_of_hex s) - - let[@inline] of_hex_substring s off = - of_bytes (Util_.bytes_of_hex_substring s off 16) - - let pp fmt t = Format.fprintf fmt "%s" (to_hex t) -end - -(** Span context. This bundles up a trace ID and parent ID. - - {{:https://opentelemetry.io/docs/specs/otel/trace/api/#spancontext} - https://opentelemetry.io/docs/specs/otel/trace/api/#spancontext} - @since 0.7 *) -module Span_ctx : sig - type t - - val make : - ?sampled:bool -> trace_id:Trace_id.t -> parent_id:Span_id.t -> unit -> t - - val dummy : t - (** Invalid span context, to be used as a placeholder *) - - val is_valid : t -> bool - - val trace_id : t -> Trace_id.t - - val parent_id : t -> Span_id.t - - val sampled : t -> bool - - val to_w3c_trace_context : t -> bytes - - val of_w3c_trace_context : bytes -> (t, string) result - - val of_w3c_trace_context_exn : bytes -> t - (** @raise Invalid_argument if parsing failed *) -end = struct - (* TODO: trace state *) - - type t = { - trace_id: Trace_id.t; - parent_id: Span_id.t; - sampled: bool; - } - - let dummy = - { trace_id = Trace_id.dummy; parent_id = Span_id.dummy; sampled = false } - - let make ?(sampled = false) ~trace_id ~parent_id () : t = - { trace_id; parent_id; sampled } - - let[@inline] is_valid self = - Trace_id.is_valid self.trace_id && Span_id.is_valid self.parent_id - - let[@inline] sampled self = self.sampled - - let[@inline] trace_id self = self.trace_id - - let[@inline] parent_id self = self.parent_id - - let to_w3c_trace_context (self : t) : bytes = - let bs = Bytes.create 55 in - Bytes.set bs 0 '0'; - Bytes.set bs 1 '0'; - Bytes.set bs 2 '-'; - Trace_id.to_hex_into self.trace_id bs 3; - (* +32 *) - Bytes.set bs (3 + 32) '-'; - Span_id.to_hex_into self.parent_id bs 36; - (* +16 *) - Bytes.set bs 52 '-'; - Bytes.set bs 53 '0'; - Bytes.set bs 54 - (if self.sampled then - '1' - else - '0'); - bs - - let of_w3c_trace_context bs : _ result = - try - if Bytes.length bs <> 55 then invalid_arg "trace context must be 55 bytes"; - (match int_of_string_opt (Bytes.sub_string bs 0 2) with - | Some 0 -> () - | Some n -> invalid_arg @@ spf "version is %d, expected 0" n - | None -> invalid_arg "expected 2-digit version"); - if Bytes.get bs 2 <> '-' then invalid_arg "expected '-' before trace_id"; - let trace_id = - try Trace_id.of_hex_substring (Bytes.unsafe_to_string bs) 3 - with Invalid_argument msg -> invalid_arg (spf "in trace id: %s" msg) - in - if Bytes.get bs (3 + 32) <> '-' then - invalid_arg "expected '-' before parent_id"; - let parent_id = - try Span_id.of_hex_substring (Bytes.unsafe_to_string bs) 36 - with Invalid_argument msg -> invalid_arg (spf "in span id: %s" msg) - in - if Bytes.get bs 52 <> '-' then invalid_arg "expected '-' after parent_id"; - let sampled = int_of_string_opt (Bytes.sub_string bs 53 2) = Some 1 in - - (* ignore flags *) - Ok { trace_id; parent_id; sampled } - with Invalid_argument msg -> Error msg - - let of_w3c_trace_context_exn bs = - match of_w3c_trace_context bs with - | Ok t -> t - | Error msg -> invalid_arg @@ spf "invalid w3c trace context: %s" msg -end - -(** Hmap key to carry around a {!Span_ctx.t}, e.g. to remember what the current - parent span is. - @since 0.8 *) -let k_span_ctx : Span_ctx.t Hmap.key = Hmap.Key.create () - -(** {2 Attributes and conventions} *) - -(** Semantic conventions - - {{:https://opentelemetry.io/docs/specs/semconv/} - https://opentelemetry.io/docs/specs/semconv/} *) -module Conventions = struct - module Attributes = struct - module Process = struct - module Runtime = struct - let name = "process.runtime.name" - - let version = "process.runtime.version" - - let description = "process.runtime.description" - end - end - - (** https://opentelemetry.io/docs/specs/semconv/attributes-registry/code/ *) - module Code = struct - (** Int *) - let column = "code.column" - - let filepath = "code.filepath" - - let function_ = "code.function" - - (** int *) - let line = "code.lineno" - - let namespace = "code.namespace" - - let stacktrace = "code.stacktrace" - end - - module Service = struct - let name = "service.name" - - let namespace = "service.namespace" - - let instance_id = "service.instance.id" - - let version = "service.version" - end - - module HTTP = struct - let error_type = "error.type" - - let request_method = "http.request.method" - - let route = "http.route" - - let url_full = "url.full" - - (** HTTP status code, int *) - let response_status_code = "http.response.status_code" - - let server_address = "server.address" - - let server_port = "server.port" - - (** http or https *) - let url_scheme = "url.scheme" - end - - (** https://github.com/open-telemetry/semantic-conventions/blob/main/docs/resource/host.md - *) - module Host = struct - let id = "host.id" - - let name = "host.name" - - let type_ = "host.type" - - let arch = "host.arch" - - let ip = "host.ip" - - let mac = "host.mac" - - let image_id = "host.image.id" - - let image_name = "host.image.name" - - let image_version = "host.image.version" - end - end - - module Metrics = struct - module Process = struct - module Runtime = struct - module Ocaml = struct - module GC = struct - let compactions = "process.runtime.ocaml.gc.compactions" - - let major_collections = "process.runtime.ocaml.gc.major_collections" - - let major_heap = "process.runtime.ocaml.gc.major_heap" - - let minor_allocated = "process.runtime.ocaml.gc.minor_allocated" - - let minor_collections = "process.runtime.ocaml.gc.minor_collections" - end - end - end - end - - (** https://opentelemetry.io/docs/specs/semconv/http/ *) - module HTTP = struct - module Server = struct - let request_duration = "http.server.request.duration" - - let active_requests = "http.server.active_requests" - - (** Histogram *) - let request_body_size = "http.server.request.body.size" - - (** Histogram *) - let response_body_size = "http.server.response.body.size" - end - - module Client = struct - let request_duration = "http.client.request.duration" - - (** Histogram *) - let request_body_size = "http.client.request.body.size" - - (** Histogram *) - let response_body_size = "http.client.response.body.size" - end - end - end -end - -type value = - [ `Int of int - | `String of string - | `Bool of bool - | `Float of float - | `None - ] -(** A value in a key/value attribute *) - -type key_value = string * value - -open struct - let _conv_value = - let open Proto.Common in - function - | `Int i -> Some (Int_value (Int64.of_int i)) - | `String s -> Some (String_value s) - | `Bool b -> Some (Bool_value b) - | `Float f -> Some (Double_value f) - | `None -> None - - let _conv_key_value (k, v) = - let open Proto.Common in - let value = _conv_value v in - make_key_value ~key:k ?value () -end - -(** {2 Global settings} *) - -(** Process-wide metadata, environment variables, etc. *) -module Globals = struct - open Proto.Common - - (** Main service name metadata *) - let service_name = ref "unknown_service" - - (** Namespace for the service *) - let service_namespace = ref None - - (** Unique identifier for the service *) - let service_instance_id = ref None - - (** Version for the service - @since 0.12 *) - let service_version = ref None - - let instrumentation_library : instrumentation_scope = - make_instrumentation_scope ~version:"%%VERSION_NUM%%" ~name:"ocaml-otel" () - - (** Global attributes, initially set via OTEL_RESOURCE_ATTRIBUTES and - modifiable by the user code. They will be attached to each outgoing - metrics/traces. *) - let global_attributes : key_value list ref = - let parse_pair s = - match String.split_on_char '=' s with - | [ a; b ] -> make_key_value ~key:a ~value:(String_value b) () - | _ -> failwith (Printf.sprintf "invalid attribute: %S" s) - in - ref - @@ - try - Sys.getenv "OTEL_RESOURCE_ATTRIBUTES" - |> String.split_on_char ',' |> List.map parse_pair - with _ -> [] - - (** Add a global attribute *) - let add_global_attribute (key : string) (v : value) : unit = - global_attributes := _conv_key_value (key, v) :: !global_attributes - - (* add global attributes to this list *) - let merge_global_attributes_ into : _ list = - let not_redundant kv = List.for_all (fun kv' -> kv.key <> kv'.key) into in - List.rev_append (List.filter not_redundant !global_attributes) into - - (** Default span kind in {!Span.create}. This will be used in all spans that - do not specify [~kind] explicitly; it is set to "internal", following - directions from the [.proto] file. It can be convenient to set "client" or - "server" uniformly in here. - @since 0.4 *) - let default_span_kind = ref Proto.Trace.Span_kind_internal - - let mk_attributes ?(service_name = !service_name) ?(attrs = []) () : _ list = - let l = List.map _conv_key_value attrs in - let l = - make_key_value ~key:Conventions.Attributes.Service.name - ~value:(String_value service_name) () - :: l - in - let l = - match !service_instance_id with - | None -> l - | Some v -> - make_key_value ~key:Conventions.Attributes.Service.instance_id - ~value:(String_value v) () - :: l - in - let l = - match !service_namespace with - | None -> l - | Some v -> - make_key_value ~key:Conventions.Attributes.Service.namespace - ~value:(String_value v) () - :: l - in - let l = - match !service_version with - | None -> l - | Some v -> - make_key_value ~key:Conventions.Attributes.Service.version - ~value:(String_value v) () - :: l - in - l |> merge_global_attributes_ -end - -(** {2 Traces and Spans} *) - -(** Events. - - Events occur at a given time and can carry attributes. They always belong in - a span. *) -module Event : sig - open Proto.Trace - - type t = span_event - - val make : - ?time_unix_nano:Timestamp_ns.t -> ?attrs:key_value list -> string -> t -end = struct - open Proto.Trace - - type t = span_event - - let make ?(time_unix_nano = Timestamp_ns.now_unix_ns ()) ?(attrs = []) - (name : string) : t = - let attrs = List.map _conv_key_value attrs in - make_span_event ~time_unix_nano ~name ~attributes:attrs () -end - -(** Span Link - - A pointer from the current span to another span in the same trace or in a - different trace. For example, this can be used in batching operations, where - a single batch handler processes multiple requests from different traces or - when the handler receives a request from a different project. *) -module Span_link : sig - open Proto.Trace - - type t = span_link - - val make : - trace_id:Trace_id.t -> - span_id:Span_id.t -> - ?trace_state:string -> - ?attrs:key_value list -> - ?dropped_attributes_count:int -> - unit -> - t - - val of_span_ctx : - ?trace_state:string -> - ?attrs:key_value list -> - ?dropped_attributes_count:int -> - Span_ctx.t -> - t -end = struct - open Proto.Trace - - type t = span_link - - let make ~trace_id ~span_id ?trace_state ?(attrs = []) - ?dropped_attributes_count () : t = - let attributes = List.map _conv_key_value attrs in - let dropped_attributes_count = - Option.map Int32.of_int dropped_attributes_count - in - make_span_link - ~trace_id:(Trace_id.to_bytes trace_id) - ~span_id:(Span_id.to_bytes span_id) ?trace_state ~attributes - ?dropped_attributes_count () - - let[@inline] of_span_ctx ?trace_state ?attrs ?dropped_attributes_count - (ctx : Span_ctx.t) : t = - make ~trace_id:(Span_ctx.trace_id ctx) ~span_id:(Span_ctx.parent_id ctx) - ?trace_state ?attrs ?dropped_attributes_count () -end - -module Span_status : sig - open Proto.Trace - - type t = status = private { - mutable _presence: Pbrt.Bitfield.t; - mutable message: string; - mutable code: status_status_code; - } - - type code = status_status_code = - | Status_code_unset - | Status_code_ok - | Status_code_error - - val make : message:string -> code:code -> t -end = struct - open Proto.Trace - - type t = status = private { - mutable _presence: Pbrt.Bitfield.t; - mutable message: string; - mutable code: status_status_code; - } - - type code = status_status_code = - | Status_code_unset - | Status_code_ok - | Status_code_error - - let[@inline] make ~message ~code : t = make_status ~message ~code () -end - -(** @since 0.11 *) -module Span_kind : sig - open Proto.Trace - - type t = span_span_kind = - | Span_kind_unspecified - | Span_kind_internal - | Span_kind_server - | Span_kind_client - | Span_kind_producer - | Span_kind_consumer -end = struct - open Proto.Trace - - type t = span_span_kind = - | Span_kind_unspecified - | Span_kind_internal - | Span_kind_server - | Span_kind_client - | Span_kind_producer - | Span_kind_consumer -end - -(** {2 Scopes} *) - -(** Scopes. - - A scope is a trace ID and the span ID of the currently active span. *) -module Scope : sig - type item_list - - type t = { - trace_id: Trace_id.t; - span_id: Span_id.t; - mutable items: item_list; - } - - val attrs : t -> key_value list - - val events : t -> Event.t list - - val links : t -> Span_link.t list - - val status : t -> Span_status.t option - - val kind : t -> Span_kind.t option - - val make : - trace_id:Trace_id.t -> - span_id:Span_id.t -> - ?events:Event.t list -> - ?attrs:key_value list -> - ?links:Span_link.t list -> - ?status:Span_status.t -> - unit -> - t - - val to_span_link : - ?trace_state:string -> - ?attrs:key_value list -> - ?dropped_attributes_count:int -> - t -> - Span_link.t - (** Turn the scope into a span link *) - - val to_span_ctx : t -> Span_ctx.t - (** Turn the scope into a span context *) - - val add_event : t -> (unit -> Event.t) -> unit - (** Add an event to the scope. It will be aggregated into the span. - - Note that this takes a function that produces an event, and will only call - it if there is an instrumentation backend. *) - - val record_exception : t -> exn -> Printexc.raw_backtrace -> unit - - val add_attrs : t -> (unit -> key_value list) -> unit - (** Add attributes to the scope. It will be aggregated into the span. - - Note that this takes a function that produces attributes, and will only - call it if there is an instrumentation backend. *) - - val add_links : t -> (unit -> Span_link.t list) -> unit - (** Add links to the scope. It will be aggregated into the span. - - Note that this takes a function that produces links, and will only call it - if there is an instrumentation backend. *) - - val set_status : t -> Span_status.t -> unit - (** set the span status. - - Note that this function will be called only if there is an instrumentation - backend. *) - - val set_kind : t -> Span_kind.t -> unit - (** Set the span's kind. - @since 0.11 *) - - val ambient_scope_key : t Ambient_context.key - (** The opaque key necessary to access/set the ambient scope with - {!Ambient_context}. *) - - val get_ambient_scope : ?scope:t -> unit -> t option - (** Obtain current scope from {!Ambient_context}, if available. *) - - val with_ambient_scope : t -> (unit -> 'a) -> 'a - (** [with_ambient_scope sc thunk] calls [thunk()] in a context where [sc] is - the (thread|continuation)-local scope, then reverts to the previous local - scope, if any. - - @see - ambient-context docs *) -end = struct - type item_list = - | Nil - | Ev of Event.t * item_list - | Attr of key_value * item_list - | Span_link of Span_link.t * item_list - | Span_status of Span_status.t * item_list - | Span_kind of Span_kind.t * item_list - - type t = { - trace_id: Trace_id.t; - span_id: Span_id.t; - mutable items: item_list; - } - - let attrs scope = - let rec loop acc = function - | Nil -> acc - | Attr (attr, l) -> loop (attr :: acc) l - | Ev (_, l) | Span_kind (_, l) | Span_link (_, l) | Span_status (_, l) -> - loop acc l - in - loop [] scope.items - - let events scope = - let rec loop acc = function - | Nil -> acc - | Ev (event, l) -> loop (event :: acc) l - | Attr (_, l) | Span_kind (_, l) | Span_link (_, l) | Span_status (_, l) - -> - loop acc l - in - loop [] scope.items - - let links scope = - let rec loop acc = function - | Nil -> acc - | Span_link (span_link, l) -> loop (span_link :: acc) l - | Ev (_, l) | Span_kind (_, l) | Attr (_, l) | Span_status (_, l) -> - loop acc l - in - loop [] scope.items - - let status scope = - let rec loop = function - | Nil -> None - | Span_status (status, _) -> Some status - | Ev (_, l) | Attr (_, l) | Span_kind (_, l) | Span_link (_, l) -> loop l - in - loop scope.items - - let kind scope = - let rec loop = function - | Nil -> None - | Span_kind (k, _) -> Some k - | Ev (_, l) | Span_status (_, l) | Attr (_, l) | Span_link (_, l) -> - loop l - in - loop scope.items - - let make ~trace_id ~span_id ?(events = []) ?(attrs = []) ?(links = []) ?status - () : t = - let items = - let items = - match status with - | None -> Nil - | Some status -> Span_status (status, Nil) - in - let items = List.fold_left (fun acc ev -> Ev (ev, acc)) items events in - let items = - List.fold_left (fun acc attr -> Attr (attr, acc)) items attrs - in - List.fold_left (fun acc link -> Span_link (link, acc)) items links - in - { trace_id; span_id; items } - - let[@inline] to_span_link ?trace_state ?attrs ?dropped_attributes_count - (self : t) : Span_link.t = - Span_link.make ?trace_state ?attrs ?dropped_attributes_count - ~trace_id:self.trace_id ~span_id:self.span_id () - - let[@inline] to_span_ctx (self : t) : Span_ctx.t = - Span_ctx.make ~trace_id:self.trace_id ~parent_id:self.span_id () - - let[@inline] add_event (scope : t) (ev : unit -> Event.t) : unit = - if Collector.has_backend () then scope.items <- Ev (ev (), scope.items) - - let[@inline] record_exception (scope : t) (exn : exn) - (bt : Printexc.raw_backtrace) : unit = - if Collector.has_backend () then ( - let ev = - Event.make "exception" - ~attrs: - [ - "exception.message", `String (Printexc.to_string exn); - "exception.type", `String (Printexc.exn_slot_name exn); - ( "exception.stacktrace", - `String (Printexc.raw_backtrace_to_string bt) ); - ] - in - scope.items <- Ev (ev, scope.items) - ) - - let[@inline] add_attrs (scope : t) (attrs : unit -> key_value list) : unit = - if Collector.has_backend () then - scope.items <- - List.fold_left (fun acc attr -> Attr (attr, acc)) scope.items (attrs ()) - - let[@inline] add_links (scope : t) (links : unit -> Span_link.t list) : unit = - if Collector.has_backend () then - scope.items <- - List.fold_left - (fun acc link -> Span_link (link, acc)) - scope.items (links ()) - - let set_status (scope : t) (status : Span_status.t) : unit = - if Collector.has_backend () then - scope.items <- Span_status (status, scope.items) - - let set_kind (scope : t) (k : Span_kind.t) : unit = - if Collector.has_backend () then scope.items <- Span_kind (k, scope.items) - - let ambient_scope_key : t Ambient_context.key = Ambient_context.create_key () - - let get_ambient_scope ?scope () : t option = - match scope with - | Some _ -> scope - | None -> Ambient_context.get ambient_scope_key - - let[@inline] with_ambient_scope (sc : t) (f : unit -> 'a) : 'a = - Ambient_context.with_binding ambient_scope_key sc (fun _ -> f ()) -end - -(** {2 Traces} *) - -(** Spans. - - A Span is the workhorse of traces, it indicates an operation that took place - over a given span of time (indicated by start_time and end_time) as part of - a hierarchical trace. All spans in a given trace are bound by the use of the - same {!Trace_id.t}. *) -module Span : sig - open Proto.Trace - - type t = span - - type id = Span_id.t - - type kind = Span_kind.t = - | Span_kind_unspecified - | Span_kind_internal - | Span_kind_server - | Span_kind_client - | Span_kind_producer - | Span_kind_consumer - - val id : t -> Span_id.t - - type key_value = - string - * [ `Int of int - | `String of string - | `Bool of bool - | `Float of float - | `None - ] - - val create : - ?kind:kind -> - ?id:id -> - ?trace_state:string -> - ?attrs:key_value list -> - ?events:Event.t list -> - ?status:status -> - trace_id:Trace_id.t -> - ?parent:id -> - ?links:Span_link.t list -> - start_time:Timestamp_ns.t -> - end_time:Timestamp_ns.t -> - string -> - t * id - (** [create ~trace_id name] creates a new span with its unique ID. - @param trace_id the trace this belongs to - @param parent parent span, if any - @param links - list of links to other spans, each with their trace state (see - {{:https://www.w3.org/TR/trace-context/#tracestate-header} w3.org}) *) -end = struct - open Proto.Trace - - type t = span - - type id = Span_id.t - - type kind = Span_kind.t = - | Span_kind_unspecified - | Span_kind_internal - | Span_kind_server - | Span_kind_client - | Span_kind_producer - | Span_kind_consumer - - type key_value = - string - * [ `Int of int - | `String of string - | `Bool of bool - | `Float of float - | `None - ] - - let id self = Span_id.of_bytes self.span_id - - let create ?(kind = !Globals.default_span_kind) ?(id = Span_id.create ()) - ?trace_state ?(attrs = []) ?(events = []) ?status ~trace_id ?parent - ?(links = []) ~start_time ~end_time name : t * id = - let trace_id = Trace_id.to_bytes trace_id in - let parent_span_id = Option.map Span_id.to_bytes parent in - let attributes = List.map _conv_key_value attrs in - let span = - make_span ~trace_id ?parent_span_id ~span_id:(Span_id.to_bytes id) - ~attributes ~events ?trace_state ?status ~kind ~name ~links - ~start_time_unix_nano:start_time ~end_time_unix_nano:end_time () - in - span, id -end - -(** Traces. - - See - {{:https://opentelemetry.io/docs/reference/specification/overview/#tracing-signal} - the spec} *) -module Trace = struct - open Proto.Trace - - type span = Span.t - - let make_resource_spans ?service_name ?attrs spans : resource_spans = - let ils = - make_scope_spans ~scope:Globals.instrumentation_library ~spans () - in - let attributes = Globals.mk_attributes ?service_name ?attrs () in - let resource = Proto.Resource.make_resource ~attributes () in - make_resource_spans ~resource ~scope_spans:[ ils ] () - - (** Sync emitter. - - This instructs the collector to forward the spans to some backend at a - later point. - - {b NOTE} be careful not to call this inside a Gc alarm, as it can cause - deadlocks. *) - let emit ?service_name ?attrs (spans : span list) : unit = - let rs = make_resource_spans ?service_name ?attrs spans in - Collector.send_trace [ rs ] ~ret:(fun () -> ()) - - type scope = Scope.t = { - trace_id: Trace_id.t; - span_id: Span_id.t; - mutable items: Scope.item_list; - } - [@@deprecated "use Scope.t"] - - let (add_event [@deprecated "use Scope.add_event"]) = Scope.add_event - - let (add_attrs [@deprecated "use Scope.add_attrs"]) = Scope.add_attrs - - let with_' ?(force_new_trace_id = false) ?trace_state ?service_name - ?(attrs : (string * [< value ]) list = []) ?kind ?trace_id ?parent ?scope - ?(links = []) name cb = - let scope = - if force_new_trace_id then - None - else - Scope.get_ambient_scope ?scope () - in - let trace_id = - match trace_id, scope with - | _ when force_new_trace_id -> Trace_id.create () - | Some trace_id, _ -> trace_id - | None, Some scope -> scope.trace_id - | None, None -> Trace_id.create () - in - let parent = - match parent, scope with - | _ when force_new_trace_id -> None - | Some span_id, _ -> Some span_id - | None, Some scope -> Some scope.span_id - | None, None -> None - in - let start_time = Timestamp_ns.now_unix_ns () in - let span_id = Span_id.create () in - let scope = Scope.make ~trace_id ~span_id ~attrs ~links () in - (* called once we're done, to emit a span *) - let finally res = - let status = - match Scope.status scope with - | Some status -> Some status - | None -> - (match res with - | Ok () -> - (* By default, all spans are Unset, which means a span completed without error. - The Ok status is reserved for when you need to explicitly mark a span as successful - rather than stick with the default of Unset (i.e., “without error”). - - https://opentelemetry.io/docs/languages/go/instrumentation/#set-span-status *) - None - | Error (e, bt) -> - Scope.record_exception scope e bt; - Some - (make_status ~code:Status_code_error - ~message:(Printexc.to_string e) ())) - in - let span, _ = - (* TODO: should the attrs passed to with_ go on the Span - (in Span.create) or on the ResourceSpan (in emit)? - (question also applies to Opentelemetry_lwt.Trace.with) *) - Span.create ?kind ~trace_id ?parent ~links:(Scope.links scope) - ~id:span_id ?trace_state ~attrs:(Scope.attrs scope) - ~events:(Scope.events scope) ~start_time - ~end_time:(Timestamp_ns.now_unix_ns ()) - ?status name - in - emit ?service_name [ span ] - in - let thunk () = - (* set global scope in this thread *) - Scope.with_ambient_scope scope @@ fun () -> cb scope - in - thunk, finally - - (** Sync span guard. - - Notably, this includes {e implicit} scope-tracking: if called without a - [~scope] argument (or [~parent]/[~trace_id]), it will check in the - {!Ambient_context} for a surrounding environment, and use that as the - scope. Similarly, it uses {!Scope.with_ambient_scope} to {e set} a new - scope in the ambient context, so that any logically-nested calls to - {!with_} will use this span as their parent. - - {b NOTE} be careful not to call this inside a Gc alarm, as it can cause - deadlocks. - - @param force_new_trace_id - if true (default false), the span will not use a ambient scope, the - [~scope] argument, nor [~trace_id], but will instead always create fresh - identifiers for this span *) - - let with_ ?force_new_trace_id ?trace_state ?service_name ?attrs ?kind - ?trace_id ?parent ?scope ?links name (cb : Scope.t -> 'a) : 'a = - let thunk, finally = - with_' ?force_new_trace_id ?trace_state ?service_name ?attrs ?kind - ?trace_id ?parent ?scope ?links name cb - in - - try - let rv = thunk () in - finally (Ok ()); - rv - with e -> - let bt = Printexc.get_raw_backtrace () in - finally (Error (e, bt)); - raise e -end - -(** {2 Metrics} *) - -(** Metrics. - - See - {{:https://opentelemetry.io/docs/reference/specification/overview/#metric-signal} - the spec} *) -module Metrics = struct - open Proto - open Proto.Metrics - - type t = Metrics.metric - (** A single metric, measuring some time-varying quantity or statistical - distribution. It is composed of one or more data points that have precise - values and time stamps. Each distinct metric should have a distinct name. - *) - - open struct - let _program_start = Timestamp_ns.now_unix_ns () - end - - (** Number data point, as a float *) - let float ?(start_time_unix_nano = _program_start) - ?(now = Timestamp_ns.now_unix_ns ()) ?(attrs = []) (d : float) : - number_data_point = - let attributes = attrs |> List.map _conv_key_value in - make_number_data_point ~start_time_unix_nano ~time_unix_nano:now ~attributes - ~value:(As_double d) () - - (** Number data point, as an int *) - let int ?(start_time_unix_nano = _program_start) - ?(now = Timestamp_ns.now_unix_ns ()) ?(attrs = []) (i : int) : - number_data_point = - let attributes = attrs |> List.map _conv_key_value in - make_number_data_point ~start_time_unix_nano ~time_unix_nano:now ~attributes - ~value:(As_int (Int64.of_int i)) - () - - (** Aggregation of a scalar metric, always with the current value *) - let gauge ~name ?description ?unit_ (l : number_data_point list) : t = - let data = Gauge (make_gauge ~data_points:l ()) in - make_metric ~name ?description ?unit_ ~data () - - type aggregation_temporality = Metrics.aggregation_temporality = - | Aggregation_temporality_unspecified - | Aggregation_temporality_delta - | Aggregation_temporality_cumulative - - (** Sum of all reported measurements over a time interval *) - let sum ~name ?description ?unit_ - ?(aggregation_temporality = Aggregation_temporality_cumulative) - ?is_monotonic (l : number_data_point list) : t = - let data = - Sum (make_sum ~data_points:l ?is_monotonic ~aggregation_temporality ()) - in - make_metric ~name ?description ?unit_ ~data () - - (** Histogram data - @param count number of values in population (non negative) - @param sum sum of values in population (0 if count is 0) - @param bucket_counts - count value of histogram for each bucket. Sum of the counts must be - equal to [count]. length must be [1+length explicit_bounds] - @param explicit_bounds strictly increasing list of bounds for the buckets - *) - let histogram_data_point ?(start_time_unix_nano = _program_start) - ?(now = Timestamp_ns.now_unix_ns ()) ?(attrs = []) ?(exemplars = []) - ?(explicit_bounds = []) ?sum ~bucket_counts ~count () : - histogram_data_point = - let attributes = attrs |> List.map _conv_key_value in - make_histogram_data_point ~start_time_unix_nano ~time_unix_nano:now - ~attributes ~exemplars ~bucket_counts ~explicit_bounds ~count ?sum () - - let histogram ~name ?description ?unit_ ?aggregation_temporality - (l : histogram_data_point list) : t = - let data = - Histogram (make_histogram ~data_points:l ?aggregation_temporality ()) - in - make_metric ~name ?description ?unit_ ~data () - - (* TODO: exponential history *) - (* TODO: summary *) - (* TODO: exemplar *) - - (** Aggregate metrics into a {!Proto.Metrics.resource_metrics} *) - let make_resource_metrics ?service_name ?attrs (l : t list) : resource_metrics - = - let lm = - make_scope_metrics ~scope:Globals.instrumentation_library ~metrics:l () - in - let attributes = Globals.mk_attributes ?service_name ?attrs () in - let resource = Proto.Resource.make_resource ~attributes () in - make_resource_metrics ~scope_metrics:[ lm ] ~resource () - - (** Emit some metrics to the collector (sync). This blocks until the backend - has pushed the metrics into some internal queue, or discarded them. - - {b NOTE} be careful not to call this inside a Gc alarm, as it can cause - deadlocks. *) - let emit ?attrs (l : t list) : unit = - let rm = make_resource_metrics ?attrs l in - Collector.send_metrics [ rm ] ~ret:ignore -end - -(** A set of callbacks that produce metrics when called. The metrics are - automatically called regularly. - - This allows applications to register metrics callbacks from various points - in the program (or even in libraries), and not worry about setting - alarms/intervals to emit them. *) -module Metrics_callbacks = struct - open struct - (* [true] iff the initial list of metric callbacks has already been registered - with `on_tick`. This registration must only happen once, after which, - [registered_with_on_tick] will forever be [false]. *) - let registered_with_on_tick : bool Atomic.t = Atomic.make false - - let cbs_ : (unit -> Metrics.t list) AList.t = AList.make () - end - - (** [register f] adds the callback [f] to the list. - - [f] will be called at unspecified times and is expected to return a list - of metrics. It might be called regularly by the backend, in particular - (but not only) when {!Collector.tick} is called. *) - let register f : unit = - (* sets [registered_with_on_tick] to [true] atomically, iff it is currently - [false]. *) - if not (Atomic.exchange registered_with_on_tick true) then - (* make sure we call [f] (and others) at each tick *) - Collector.on_tick (fun () -> - let m = List.map (fun f -> f ()) (AList.get cbs_) |> List.flatten in - Metrics.emit m); - AList.add cbs_ f -end - -(** {2 Logs} *) - -(** Logs. - - See - {{:https://opentelemetry.io/docs/reference/specification/overview/#log-signal} - the spec} *) -module Logs = struct - open Opentelemetry_proto - open Logs - - type t = log_record - - (** Severity level of a log event *) - type severity = Logs.severity_number = - | Severity_number_unspecified - | Severity_number_trace - | Severity_number_trace2 - | Severity_number_trace3 - | Severity_number_trace4 - | Severity_number_debug - | Severity_number_debug2 - | Severity_number_debug3 - | Severity_number_debug4 - | Severity_number_info - | Severity_number_info2 - | Severity_number_info3 - | Severity_number_info4 - | Severity_number_warn - | Severity_number_warn2 - | Severity_number_warn3 - | Severity_number_warn4 - | Severity_number_error - | Severity_number_error2 - | Severity_number_error3 - | Severity_number_error4 - | Severity_number_fatal - | Severity_number_fatal2 - | Severity_number_fatal3 - | Severity_number_fatal4 - - let pp_severity = Logs.pp_severity_number - - type flags = Logs.log_record_flags = - | Log_record_flags_do_not_use - | Log_record_flags_trace_flags_mask - - let pp_flags = Logs.pp_log_record_flags - - (** Make a single log entry *) - let make ?time ?(observed_time_unix_nano = Timestamp_ns.now_unix_ns ()) - ?severity ?log_level ?flags ?trace_id ?span_id (body : value) : t = - let time_unix_nano = - match time with - | None -> observed_time_unix_nano - | Some t -> t - in - let trace_id = Option.map Trace_id.to_bytes trace_id in - let span_id = Option.map Span_id.to_bytes span_id in - let body = _conv_value body in - make_log_record ~time_unix_nano ~observed_time_unix_nano - ?severity_number:severity ?severity_text:log_level ?flags ?trace_id - ?span_id ?body () - - (** Make a log entry whose body is a string *) - let make_str ?time ?observed_time_unix_nano ?severity ?log_level ?flags - ?trace_id ?span_id (body : string) : t = - make ?time ?observed_time_unix_nano ?severity ?log_level ?flags ?trace_id - ?span_id (`String body) - - (** Make a log entry with format *) - let make_strf ?time ?observed_time_unix_nano ?severity ?log_level ?flags - ?trace_id ?span_id fmt = - Format.kasprintf - (fun bod -> - make_str ?time ?observed_time_unix_nano ?severity ?log_level ?flags - ?trace_id ?span_id bod) - fmt - - (** Emit logs. - - This instructs the collector to send the logs to some backend at a later - date. {b NOTE} be careful not to call this inside a Gc alarm, as it can - cause deadlocks. *) - let emit ?service_name ?attrs (l : t list) : unit = - let attributes = Globals.mk_attributes ?service_name ?attrs () in - let resource = Proto.Resource.make_resource ~attributes () in - let ll = - make_scope_logs ~scope:Globals.instrumentation_library ~log_records:l () - in - let rl = make_resource_logs ~resource ~scope_logs:[ ll ] () in - Collector.send_logs [ rl ] ~ret:ignore -end - -(** {2 Utils} *) - -(** Implementation of the W3C Trace Context spec - - https://www.w3.org/TR/trace-context/ *) -module Trace_context = struct - (** The traceparent header - https://www.w3.org/TR/trace-context/#traceparent-header *) - module Traceparent = struct - let name = "traceparent" - - (** Parse the value of the traceparent header. - - The values are of the form: - - {[ - { version } - { trace_id } - { parent_id } - { flags } - ]} - - For example: - - {[ - 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01 - ]} - - [{flags}] are currently ignored. *) - let of_value str : (Trace_id.t * Span_id.t, string) result = - match Span_ctx.of_w3c_trace_context (Bytes.unsafe_of_string str) with - | Ok sp -> Ok (Span_ctx.trace_id sp, Span_ctx.parent_id sp) - | Error _ as e -> e - - let to_value ?(sampled : bool option) ~(trace_id : Trace_id.t) - ~(parent_id : Span_id.t) () : string = - let span_ctx = Span_ctx.make ?sampled ~trace_id ~parent_id () in - Bytes.unsafe_to_string @@ Span_ctx.to_w3c_trace_context span_ctx - end -end - -(** Export GC metrics. - - These metrics are emitted after each GC collection. *) -module GC_metrics : sig - val basic_setup : unit -> unit - (** Setup a hook that will emit GC statistics on every tick (assuming a ticker - thread) *) - - val get_runtime_attributes : unit -> Span.key_value list - (** Get OCaml name and version runtime attributes *) - - val get_metrics : unit -> Metrics.t list - (** Get a few metrics from the current state of the GC *) -end = struct - (** See - https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/process.md#process-runtimes - *) - let runtime_attributes = - lazy - Conventions.Attributes. - [ - Process.Runtime.name, `String "ocaml"; - Process.Runtime.version, `String Sys.ocaml_version; - ] - - let get_runtime_attributes () = Lazy.force runtime_attributes - - let basic_setup () = - let on_tick () = - match Collector.get_backend () with - | None -> () - | Some (module C) -> C.signal_emit_gc_metrics () - in - Collector.on_tick on_tick - - let bytes_per_word = Sys.word_size / 8 - - let word_to_bytes n = n * bytes_per_word - - let word_to_bytes_f n = n *. float bytes_per_word - - let get_metrics () : Metrics.t list = - let gc = Gc.quick_stat () in - let now = Timestamp_ns.now_unix_ns () in - let open Metrics in - let open Conventions.Metrics in - [ - gauge ~name:Process.Runtime.Ocaml.GC.major_heap ~unit_:"B" - [ int ~now (word_to_bytes gc.Gc.heap_words) ]; - sum ~name:Process.Runtime.Ocaml.GC.minor_allocated - ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative - ~is_monotonic:true ~unit_:"B" - [ float ~now (word_to_bytes_f gc.Gc.minor_words) ]; - sum ~name:Process.Runtime.Ocaml.GC.minor_collections - ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative - ~is_monotonic:true - [ int ~now gc.Gc.minor_collections ]; - sum ~name:Process.Runtime.Ocaml.GC.major_collections - ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative - ~is_monotonic:true - [ int ~now gc.Gc.major_collections ]; - sum ~name:Process.Runtime.Ocaml.GC.compactions - ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative - ~is_monotonic:true - [ int ~now gc.Gc.compactions ]; - ] -end diff --git a/src/core/rand_bytes.ml b/src/core/rand_bytes.ml index 18cf7dc8..c90c1a72 100644 --- a/src/core/rand_bytes.ml +++ b/src/core/rand_bytes.ml @@ -1,38 +1,48 @@ -(* generate random IDs *) -let rand_ = Random.State.make_self_init () +let initialized_ = Atomic.make false -let ( let@ ) = ( @@ ) +let[@inline never] actually_init () = Random.self_init () + +let[@inline] maybe_init () = + if not (Atomic.exchange initialized_ true) then actually_init () let default_rand_bytes_8 () : bytes = - let@ () = Lock.with_lock in + maybe_init (); let b = Bytes.create 8 in for i = 0 to 1 do - let r = Random.State.bits rand_ in + (* rely on the stdlib's [Random] being thread-or-domain safe *) + let r = Random.bits () in (* 30 bits, of which we use 24 *) Bytes.set b (i * 3) (Char.chr (r land 0xff)); Bytes.set b ((i * 3) + 1) (Char.chr ((r lsr 8) land 0xff)); Bytes.set b ((i * 3) + 2) (Char.chr ((r lsr 16) land 0xff)) done; - let r = Random.State.bits rand_ in + let r = Random.bits () in Bytes.set b 6 (Char.chr (r land 0xff)); Bytes.set b 7 (Char.chr ((r lsr 8) land 0xff)); b let default_rand_bytes_16 () : bytes = - let@ () = Lock.with_lock in + maybe_init (); let b = Bytes.create 16 in for i = 0 to 4 do - let r = Random.State.bits rand_ in + (* rely on the stdlib's [Random] being thread-or-domain safe *) + let r = Random.bits () in (* 30 bits, of which we use 24 *) Bytes.set b (i * 3) (Char.chr (r land 0xff)); Bytes.set b ((i * 3) + 1) (Char.chr ((r lsr 8) land 0xff)); Bytes.set b ((i * 3) + 2) (Char.chr ((r lsr 16) land 0xff)) done; - let r = Random.State.bits rand_ in + let r = Random.bits () in Bytes.set b 15 (Char.chr (r land 0xff)); (* last byte *) b -let rand_bytes_16 = ref default_rand_bytes_16 +let rand_bytes_16_ref = ref default_rand_bytes_16 -let rand_bytes_8 = ref default_rand_bytes_8 +let rand_bytes_8_ref = ref default_rand_bytes_8 + +(** Generate a 16B identifier *) +let[@inline] rand_bytes_16 () = !rand_bytes_16_ref () + +(** Generate an 8B identifier *) +let[@inline] rand_bytes_8 () = !rand_bytes_8_ref () diff --git a/src/core/rand_bytes.mli b/src/core/rand_bytes.mli index 7c42ea35..4b015b27 100644 --- a/src/core/rand_bytes.mli +++ b/src/core/rand_bytes.mli @@ -2,12 +2,12 @@ We need random identifiers for trace IDs and span IDs. *) -val rand_bytes_16 : (unit -> bytes) ref +val rand_bytes_16_ref : (unit -> bytes) ref (** Generate 16 bytes of random data. The implementation can be swapped to use any random generator. *) -val rand_bytes_8 : (unit -> bytes) ref -(** Generate 16 bytes of random data. The implementation can be swapped to use +val rand_bytes_8_ref : (unit -> bytes) ref +(** Generate 8 bytes of random data. The implementation can be swapped to use any random generator. *) val default_rand_bytes_8 : unit -> bytes @@ -15,3 +15,9 @@ val default_rand_bytes_8 : unit -> bytes val default_rand_bytes_16 : unit -> bytes (** Default implementation using {!Random} *) + +val rand_bytes_16 : unit -> bytes +(** Call the current {!rand_bytes_16_ref} *) + +val rand_bytes_8 : unit -> bytes +(** Call the current {!rand_bytes_8_ref} *) diff --git a/src/core/span.ml b/src/core/span.ml new file mode 100644 index 00000000..7d2c3920 --- /dev/null +++ b/src/core/span.ml @@ -0,0 +1,133 @@ +open Common_ +open Proto.Trace + +type t = span + +type id = Span_id.t + +type kind = Span_kind.t = + | Span_kind_unspecified + | Span_kind_internal + | Span_kind_server + | Span_kind_client + | Span_kind_producer + | Span_kind_consumer + +type key_value = + string + * [ `Int of int + | `String of string + | `Bool of bool + | `Float of float + | `None + ] + +let[@inline] id self = Span_id.of_bytes self.span_id + +let[@inline] trace_id self = Trace_id.of_bytes self.trace_id + +let[@inline] is_not_dummy self = Span_id.is_valid (id self) + +let default_kind = ref Proto.Trace.Span_kind_unspecified + +let make ?(kind = !default_kind) ?trace_state ?(attrs = []) ?(events = []) + ?status ~trace_id ~id ?parent ?(links = []) ~start_time ~end_time name : t = + let trace_id = Trace_id.to_bytes trace_id in + let parent_span_id = Option.map Span_id.to_bytes parent in + let attributes = List.map Key_value.conv attrs in + let span = + make_span ~trace_id ?parent_span_id ~span_id:(Span_id.to_bytes id) + ~attributes ~events ?trace_state ?status ~kind ~name ~links + ~start_time_unix_nano:start_time ~end_time_unix_nano:end_time () + in + span + +let create_new ?kind ?(id = Span_id.create ()) ?trace_state ?attrs ?events + ?status ~trace_id ?parent ?links ~start_time ~end_time name : t = + make ?kind ~id ~trace_id ?trace_state ?attrs ?events ?status ?parent ?links + ~start_time ~end_time name + +let attrs self = self.attributes |> List.rev_map Key_value.of_otel + +let events self = self.events + +let links self : Span_link.t list = self.links + +let status self = self.status + +let kind self = + let k = self.kind in + if k = Span_kind_unspecified then + None + else + Some k + +let to_span_link (self : t) : Span_link.t = + make_span_link ~attributes:self.attributes + ?dropped_attributes_count: + (if span_has_dropped_attributes_count self then + Some self.dropped_attributes_count + else + None) + ?trace_state: + (if span_has_trace_state self then + Some self.trace_state + else + None) + ~trace_id:self.trace_id ~span_id:self.span_id () + +let[@inline] to_span_ctx (self : t) : Span_ctx.t = + Span_ctx.make ~trace_id:(trace_id self) ~parent_id:(id self) () + +let[@inline] add_event self ev : unit = + if is_not_dummy self then span_set_events self (ev :: self.events) + +let add_event' self ev : unit = if is_not_dummy self then add_event self (ev ()) + +let record_exception (self : t) (exn : exn) (bt : Printexc.raw_backtrace) : unit + = + if is_not_dummy self then ( + let ev = + Event.make "exception" + ~attrs: + [ + "exception.message", `String (Printexc.to_string exn); + "exception.type", `String (Printexc.exn_slot_name exn); + ( "exception.stacktrace", + `String (Printexc.raw_backtrace_to_string bt) ); + ] + in + add_event self ev + ) + +let add_attrs (self : t) (attrs : Key_value.t list) : unit = + if is_not_dummy self then ( + let attrs = List.rev_map Key_value.conv attrs in + let attrs = List.rev_append attrs self.attributes in + span_set_attributes self attrs + ) + +let add_attrs' (self : t) (attrs : unit -> Key_value.t list) : unit = + if is_not_dummy self then ( + let attrs = List.rev_map Key_value.conv (attrs ()) in + let attrs = List.rev_append attrs self.attributes in + span_set_attributes self attrs + ) + +let add_links (self : t) (links : Span_link.t list) : unit = + if links <> [] then ( + let links = List.rev_append links self.links in + span_set_links self links + ) + +let add_links' (self : t) (links : unit -> Span_link.t list) : unit = + if is_not_dummy self then ( + let links = List.rev_append (links ()) self.links in + span_set_links self links + ) + +let set_status = span_set_status + +let set_kind = span_set_kind + +let k_context : t Context.key = Context.new_key () diff --git a/src/core/span.mli b/src/core/span.mli new file mode 100644 index 00000000..8d5da0e5 --- /dev/null +++ b/src/core/span.mli @@ -0,0 +1,122 @@ +(** Spans. + + A Span is the workhorse of traces, it indicates an operation that took place + over a given span of time (indicated by start_time and end_time) as part of + a hierarchical trace. All spans in a given trace are bound by the use of the + same {!Trace_id.t}. *) + +open Common_ +open Proto.Trace + +type t = span + +type id = Span_id.t + +type kind = Span_kind.t = + | Span_kind_unspecified + | Span_kind_internal + | Span_kind_server + | Span_kind_client + | Span_kind_producer + | Span_kind_consumer + +type key_value = Key_value.t + +val make : + ?kind:kind -> + ?trace_state:string -> + ?attrs:key_value list -> + ?events:Event.t list -> + ?status:status -> + trace_id:Trace_id.t -> + id:Span_id.t -> + ?parent:id -> + ?links:Span_link.t list -> + start_time:Timestamp_ns.t -> + end_time:Timestamp_ns.t -> + string -> + t +(** [make ~trace_id ~id name] creates a new span + @param trace_id the trace this belongs to + @param parent parent span, if any + @param links + list of links to other spans, each with their trace state (see + {{:https://www.w3.org/TR/trace-context/#tracestate-header} w3.org}) *) + +val id : t -> Span_id.t + +val trace_id : t -> Trace_id.t + +val is_not_dummy : t -> bool + +val create_new : + ?kind:kind -> + ?id:Span_id.t -> + ?trace_state:string -> + ?attrs:key_value list -> + ?events:Event.t list -> + ?status:status -> + trace_id:Trace_id.t -> + ?parent:id -> + ?links:Span_link.t list -> + start_time:Timestamp_ns.t -> + end_time:Timestamp_ns.t -> + string -> + t + +val attrs : t -> Key_value.t list + +val events : t -> Event.t list + +val links : t -> Span_link.t list + +val status : t -> Span_status.t option + +val kind : t -> Span_kind.t option + +val to_span_link : t -> Span_link.t +(** Turn the scope into a span link *) + +val to_span_ctx : t -> Span_ctx.t +(** Turn the scope into a span context *) + +val add_event : t -> Event.t -> unit + +val add_event' : t -> (unit -> Event.t) -> unit +(** Add an event to the scope. It will be aggregated into the span. + + Note that this takes a function that produces an event, and will only call + it if there is an instrumentation backend. *) + +val record_exception : t -> exn -> Printexc.raw_backtrace -> unit + +val add_links : t -> Span_link.t list -> unit + +val add_links' : t -> (unit -> Span_link.t list) -> unit +(** Add links to the scope. It will be aggregated into the span. + + Note that this takes a function that produces links, and will only call it + if there is an instrumentation backend. *) + +val add_attrs : t -> Key_value.t list -> unit + +val add_attrs' : t -> (unit -> Key_value.t list) -> unit + +val set_status : t -> Span_status.t -> unit +(** set the span status. + + Note that this function will be called only if there is an instrumentation + backend. *) + +val set_kind : t -> Span_kind.t -> unit +(** Set the span's kind. + @since 0.11 *) + +val default_kind : Span_kind.t ref +(** Default span kind in {!make} and {!create_new}. + + This will be used in all spans that do not specify [~kind] explicitly; it is + set to "internal", following directions from the [.proto] file. It can be + convenient to set "client" or "server" uniformly in here. *) + +val k_context : t Context.key diff --git a/src/core/span_ctx.ml b/src/core/span_ctx.ml new file mode 100644 index 00000000..1be170f2 --- /dev/null +++ b/src/core/span_ctx.ml @@ -0,0 +1,91 @@ +open Common_ + +(* see: https://opentelemetry.io/docs/specs/otel/trace/api/#spancontext *) + +(* TODO: trace state *) + +external int_of_bool : bool -> int = "%identity" + +module Flags = struct + let sampled = 1 + + let remote = 2 +end + +type t = { + trace_id: Trace_id.t; + parent_id: Span_id.t; + flags: int; +} + +let dummy = { trace_id = Trace_id.dummy; parent_id = Span_id.dummy; flags = 0 } + +let make ?(remote = false) ?(sampled = false) ~trace_id ~parent_id () : t = + let flags = + 0 + lor (int_of_bool remote lsl Flags.remote) + lor (int_of_bool sampled lsl Flags.sampled) + in + { trace_id; parent_id; flags } + +let[@inline] is_valid self = + Trace_id.is_valid self.trace_id && Span_id.is_valid self.parent_id + +let[@inline] sampled self = self.flags land (1 lsl Flags.sampled) != 0 + +let[@inline] is_remote self = self.flags land (1 lsl Flags.remote) != 0 + +let[@inline] trace_id self = self.trace_id + +let[@inline] parent_id self = self.parent_id + +let to_w3c_trace_context (self : t) : bytes = + let bs = Bytes.create 55 in + Bytes.set bs 0 '0'; + Bytes.set bs 1 '0'; + Bytes.set bs 2 '-'; + Trace_id.to_hex_into self.trace_id bs 3; + (* +32 *) + Bytes.set bs (3 + 32) '-'; + Span_id.to_hex_into self.parent_id bs 36; + (* +16 *) + Bytes.set bs 52 '-'; + Bytes.set bs 53 '0'; + Bytes.set bs 54 + (if sampled self then + '1' + else + '0'); + bs + +let of_w3c_trace_context bs : _ result = + try + if Bytes.length bs <> 55 then invalid_arg "trace context must be 55 bytes"; + (match int_of_string_opt (Bytes.sub_string bs 0 2) with + | Some 0 -> () + | Some n -> invalid_arg @@ spf "version is %d, expected 0" n + | None -> invalid_arg "expected 2-digit version"); + if Bytes.get bs 2 <> '-' then invalid_arg "expected '-' before trace_id"; + let trace_id = + try Trace_id.of_hex_substring (Bytes.unsafe_to_string bs) 3 + with Invalid_argument msg -> invalid_arg (spf "in trace id: %s" msg) + in + if Bytes.get bs (3 + 32) <> '-' then + invalid_arg "expected '-' before parent_id"; + let parent_id = + try Span_id.of_hex_substring (Bytes.unsafe_to_string bs) 36 + with Invalid_argument msg -> invalid_arg (spf "in span id: %s" msg) + in + if Bytes.get bs 52 <> '-' then invalid_arg "expected '-' after parent_id"; + let sampled = int_of_string_opt (Bytes.sub_string bs 53 2) = Some 1 in + + (* ignore other flags *) + Ok (make ~remote:true ~sampled ~trace_id ~parent_id ()) + with Invalid_argument msg -> Error msg + +let of_w3c_trace_context_exn bs = + match of_w3c_trace_context bs with + | Ok t -> t + | Error msg -> invalid_arg @@ spf "invalid w3c trace context: %s" msg + +let k_span_ctx : t Hmap.key = Hmap.Key.create () diff --git a/src/core/span_ctx.mli b/src/core/span_ctx.mli new file mode 100644 index 00000000..fb72046b --- /dev/null +++ b/src/core/span_ctx.mli @@ -0,0 +1,42 @@ +(** Span context. This bundles up a trace ID and parent ID. + + {{:https://opentelemetry.io/docs/specs/otel/trace/api/#spancontext} + https://opentelemetry.io/docs/specs/otel/trace/api/#spancontext} + @since 0.7 *) + +type t + +val make : + ?remote:bool -> + ?sampled:bool -> + trace_id:Trace_id.t -> + parent_id:Span_id.t -> + unit -> + t + +val dummy : t +(** Invalid span context, to be used as a placeholder *) + +val is_remote : t -> bool +(** Does this come from a remote parent? *) + +val is_valid : t -> bool +(** Are the span ID and trace ID valid (ie non-zero)? *) + +val trace_id : t -> Trace_id.t + +val parent_id : t -> Span_id.t + +val sampled : t -> bool + +val to_w3c_trace_context : t -> bytes + +val of_w3c_trace_context : bytes -> (t, string) result + +val of_w3c_trace_context_exn : bytes -> t +(** @raise Invalid_argument if parsing failed *) + +val k_span_ctx : t Hmap.key +(** Hmap key to carry around a {!Span_ctx.t}, e.g. to remember what the current + parent span is. + @since 0.8 *) diff --git a/src/core/span_id.ml b/src/core/span_id.ml new file mode 100644 index 00000000..6285fe94 --- /dev/null +++ b/src/core/span_id.ml @@ -0,0 +1,40 @@ +type t = bytes + +let[@inline] to_bytes self = self + +let dummy : t = Bytes.make 8 '\x00' + +let create () : t = + let b = Rand_bytes.rand_bytes_8 () in + assert (Bytes.length b = 8); + (* make sure the identifier is not all 0, which is a dummy identifier. *) + Bytes.set b 0 (Char.unsafe_chr (Char.code (Bytes.get b 0) lor 1)); + b + +(* dark magic, woo. We have an [assert] below to do the bound checks once *) +external unsafe_b_get64 : bytes -> int -> int64 = "%caml_bytes_get64u" + +let[@inline] is_zero (self : t) : bool = + (* try to reduce branches *) + assert (Bytes.length self = 8); + let n1 = unsafe_b_get64 self 0 in + n1 = 0L + +let[@inline] is_valid self = not (is_zero self) + +let[@inline] of_bytes b = + if Bytes.length b = 8 then + b + else + invalid_arg "span IDs must be 8 bytes in length" + +let to_hex = Util_bytes_.bytes_to_hex + +let to_hex_into = Util_bytes_.bytes_to_hex_into + +let[@inline] of_hex s = of_bytes (Util_bytes_.bytes_of_hex s) + +let[@inline] of_hex_substring s off = + of_bytes (Util_bytes_.bytes_of_hex_substring s off 16) + +let pp fmt t = Format.fprintf fmt "%s" (to_hex t) diff --git a/src/core/span_id.mli b/src/core/span_id.mli new file mode 100644 index 00000000..db51d475 --- /dev/null +++ b/src/core/span_id.mli @@ -0,0 +1,23 @@ +(** Unique ID of a span. *) + +type t + +val create : unit -> t + +val dummy : t + +val pp : Format.formatter -> t -> unit + +val is_valid : t -> bool + +val to_bytes : t -> bytes + +val of_bytes : bytes -> t + +val to_hex : t -> string + +val to_hex_into : t -> bytes -> int -> unit + +val of_hex : string -> t + +val of_hex_substring : string -> int -> t diff --git a/src/core/span_kind.ml b/src/core/span_kind.ml new file mode 100644 index 00000000..d3ddace4 --- /dev/null +++ b/src/core/span_kind.ml @@ -0,0 +1,13 @@ +(** Span kind. + @since 0.11 *) + +open Common_ +open Proto.Trace + +type t = span_span_kind = + | Span_kind_unspecified + | Span_kind_internal + | Span_kind_server + | Span_kind_client + | Span_kind_producer + | Span_kind_consumer diff --git a/src/core/span_link.ml b/src/core/span_link.ml new file mode 100644 index 00000000..308d3598 --- /dev/null +++ b/src/core/span_link.ml @@ -0,0 +1,20 @@ +open Common_ +open Proto.Trace + +type t = span_link + +let make ~trace_id ~span_id ?trace_state ?(attrs = []) ?dropped_attributes_count + () : t = + let attributes = List.map Key_value.conv attrs in + let dropped_attributes_count = + Option.map Int32.of_int dropped_attributes_count + in + make_span_link + ~trace_id:(Trace_id.to_bytes trace_id) + ~span_id:(Span_id.to_bytes span_id) ?trace_state ~attributes + ?dropped_attributes_count () + +let[@inline] of_span_ctx ?trace_state ?attrs ?dropped_attributes_count + (ctx : Span_ctx.t) : t = + make ~trace_id:(Span_ctx.trace_id ctx) ~span_id:(Span_ctx.parent_id ctx) + ?trace_state ?attrs ?dropped_attributes_count () diff --git a/src/core/span_link.mli b/src/core/span_link.mli new file mode 100644 index 00000000..402ff0ec --- /dev/null +++ b/src/core/span_link.mli @@ -0,0 +1,27 @@ +(** Span Link + + A pointer from the current span to another span in the same trace or in a + different trace. For example, this can be used in batching operations, where + a single batch handler processes multiple requests from different traces or + when the handler receives a request from a different project. *) + +open Common_ +open Proto.Trace + +type t = span_link + +val make : + trace_id:Trace_id.t -> + span_id:Span_id.t -> + ?trace_state:string -> + ?attrs:Key_value.t list -> + ?dropped_attributes_count:int -> + unit -> + t + +val of_span_ctx : + ?trace_state:string -> + ?attrs:Key_value.t list -> + ?dropped_attributes_count:int -> + Span_ctx.t -> + t diff --git a/src/core/span_status.ml b/src/core/span_status.ml new file mode 100644 index 00000000..388d20ac --- /dev/null +++ b/src/core/span_status.ml @@ -0,0 +1,15 @@ +open Common_ +open Proto.Trace + +type t = Proto.Trace.status = private { + mutable _presence: Pbrt.Bitfield.t; + mutable message: string; + mutable code: status_status_code; +} + +type code = status_status_code = + | Status_code_unset + | Status_code_ok + | Status_code_error + +let[@inline] make ~message ~code : t = make_status ~message ~code () diff --git a/src/core/span_status.mli b/src/core/span_status.mli new file mode 100644 index 00000000..da2e11b9 --- /dev/null +++ b/src/core/span_status.mli @@ -0,0 +1,15 @@ +open Common_ +open Proto.Trace + +type t = Proto.Trace.status = private { + mutable _presence: Pbrt.Bitfield.t; + mutable message: string; + mutable code: status_status_code; +} + +type code = status_status_code = + | Status_code_unset + | Status_code_ok + | Status_code_error + +val make : message:string -> code:code -> t diff --git a/src/core/timestamp_ns.ml b/src/core/timestamp_ns.ml new file mode 100644 index 00000000..52f7cfa2 --- /dev/null +++ b/src/core/timestamp_ns.ml @@ -0,0 +1,29 @@ +(** Unix timestamp. + + These timestamps measure time since the Unix epoch (jan 1, 1970) UTC in + nanoseconds. *) + +type t = int64 + +open struct + let ns_in_a_day = Int64.(mul 1_000_000_000L (of_int (24 * 3600))) +end + +(** Current unix timestamp in nanoseconds *) +let[@inline] now_unix_ns () : t = + let span = Ptime_clock.now () |> Ptime.to_span in + let d, ps = Ptime.Span.to_d_ps span in + let d = Int64.(mul (of_int d) ns_in_a_day) in + let ns = Int64.(div ps 1_000L) in + Int64.(add d ns) + +let pp_debug out (self : t) = + let d = Int64.(to_int (div self ns_in_a_day)) in + let ns = Int64.(rem self ns_in_a_day) in + let ps = Int64.(mul ns 1_000L) in + match Ptime.Span.of_d_ps (d, ps) with + | None -> Format.fprintf out "ts: <%Ld ns>" self + | Some span -> + (match Ptime.add_span Ptime.epoch span with + | None -> Format.fprintf out "ts: <%Ld ns>" self + | Some ptime -> Ptime.pp_human () out ptime) diff --git a/src/core/trace_context.ml b/src/core/trace_context.ml new file mode 100644 index 00000000..9c8b141d --- /dev/null +++ b/src/core/trace_context.ml @@ -0,0 +1,34 @@ +(** Implementation of the W3C Trace Context spec + + https://www.w3.org/TR/trace-context/ *) + +(** The traceparent header + https://www.w3.org/TR/trace-context/#traceparent-header *) +module Traceparent = struct + let name = "traceparent" + + (** Parse the value of the traceparent header. + + The values are of the form: + + {[ + { version } - { trace_id } - { parent_id } - { flags } + ]} + + For example: + + {[ + 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01 + ]} + + [{flags}] are currently ignored. *) + let of_value str : (Trace_id.t * Span_id.t, string) result = + match Span_ctx.of_w3c_trace_context (Bytes.unsafe_of_string str) with + | Ok sp -> Ok (Span_ctx.trace_id sp, Span_ctx.parent_id sp) + | Error _ as e -> e + + let to_value ?(sampled : bool option) ~(trace_id : Trace_id.t) + ~(parent_id : Span_id.t) () : string = + let span_ctx = Span_ctx.make ?sampled ~trace_id ~parent_id () in + Bytes.unsafe_to_string @@ Span_ctx.to_w3c_trace_context span_ctx +end diff --git a/src/core/trace_id.ml b/src/core/trace_id.ml new file mode 100644 index 00000000..e82539c5 --- /dev/null +++ b/src/core/trace_id.ml @@ -0,0 +1,51 @@ +type t = bytes + +let[@inline] to_bytes self = self + +let dummy : t = Bytes.make 16 '\x00' + +let create () : t = + let b = Rand_bytes.rand_bytes_16 () in + assert (Bytes.length b = 16); + (* make sure the identifier is not all 0, which is a dummy identifier. *) + Bytes.set b 0 (Char.unsafe_chr (Char.code (Bytes.get b 0) lor 1)); + b + +let[@inline] of_bytes b = + if Bytes.length b = 16 then + b + else + invalid_arg "trace ID must be 16 bytes in length" + +(* dark magic, woo. We have an [assert] below to do the bound checks once *) +external unsafe_b_get64 : bytes -> int -> int64 = "%caml_bytes_get64u" + +let[@inline] is_zero (self : t) : bool = + (* try to reduce branches *) + assert (Bytes.length self = 16); + let n1 = unsafe_b_get64 self 0 in + let n2 = unsafe_b_get64 self 8 in + n1 = 0L && n2 = 0L + +let[@inline] is_valid self = not (is_zero self) + +let to_hex = Util_bytes_.bytes_to_hex + +let to_hex_into = Util_bytes_.bytes_to_hex_into + +let[@inline] of_hex s = of_bytes (Util_bytes_.bytes_of_hex s) + +let[@inline] of_hex_substring s off = + of_bytes (Util_bytes_.bytes_of_hex_substring s off 32) + +let pp fmt t = Format.fprintf fmt "%s" (to_hex t) + +let compare = Bytes.compare + +module Map = Map.Make (struct + type nonrec t = t + + let compare = compare +end) + +let k_trace_id : t Hmap.key = Hmap.Key.create () diff --git a/src/core/trace_id.mli b/src/core/trace_id.mli new file mode 100644 index 00000000..d3c45f9b --- /dev/null +++ b/src/core/trace_id.mli @@ -0,0 +1,34 @@ +(** Trace ID. + + This 16 bytes identifier is shared by all spans in one trace. *) + +type t + +val create : unit -> t + +val dummy : t + +val compare : t -> t -> int + +val pp : Format.formatter -> t -> unit + +val is_valid : t -> bool + +val to_bytes : t -> bytes + +val of_bytes : bytes -> t + +val to_hex : t -> string + +val to_hex_into : t -> bytes -> int -> unit + +val of_hex : string -> t + +val of_hex_substring : string -> int -> t + +module Map : Map.S with type key = t + +val k_trace_id : t Hmap.key +(** Hmap key to carry around a {!Trace_id.t}, to remember what the current trace + is. + @since 0.8 *) diff --git a/src/core/value.ml b/src/core/value.ml new file mode 100644 index 00000000..d3d07c0e --- /dev/null +++ b/src/core/value.ml @@ -0,0 +1,27 @@ +open Common_ + +type t = + [ `Int of int + | `String of string + | `Bool of bool + | `Float of float + | `None + ] +(** A value in a key/value attribute *) + +let conv = + let open Proto.Common in + function + | `Int i -> Some (Int_value (Int64.of_int i)) + | `String s -> Some (String_value s) + | `Bool b -> Some (Bool_value b) + | `Float f -> Some (Double_value f) + | `None -> None + +let of_otel_opt (v : Proto.Common.any_value option) : t = + match v with + | Some (Int_value i) -> `Int (Int64.to_int i) + | Some (String_value s) -> `String s + | Some (Bool_value b) -> `Bool b + | Some (Double_value f) -> `Float f + | Some (Array_value _ | Kvlist_value _ | Bytes_value _) | None -> `None diff --git a/src/domain/dune b/src/domain/dune new file mode 100644 index 00000000..c75ee3c4 --- /dev/null +++ b/src/domain/dune @@ -0,0 +1,14 @@ +(library + (name opentelemetry_domain) + (synopsis "Compatibility package for the Domain module for opentelemetry") + (public_name opentelemetry.domain) + (modules opentelemetry_domain)) + +(executable + (modules gen) + (name gen)) + +(rule + (targets opentelemetry_domain.ml) + (action + (run ./gen.exe))) diff --git a/src/domain/gen.ml b/src/domain/gen.ml new file mode 100644 index 00000000..f1cf82e6 --- /dev/null +++ b/src/domain/gen.ml @@ -0,0 +1,26 @@ +let domain_4 = + {| +let cpu_relax = ignore +let relax_loop : int -> unit = ignore + |} + +let domain_5 = + {| +let cpu_relax = Domain.cpu_relax +let relax_loop i = + for _j = 1 to i do cpu_relax () done +|} + +let write_file file s = + let oc = open_out file in + output_string oc s; + close_out oc + +let () = + let version = Scanf.sscanf Sys.ocaml_version "%d.%d.%s" (fun x y _ -> x, y) in + write_file "opentelemetry_domain.ml" + (if version >= (5, 0) then + domain_5 + else + domain_4); + () diff --git a/src/domain/opentelemetry_domain.mli b/src/domain/opentelemetry_domain.mli new file mode 100644 index 00000000..36f5929e --- /dev/null +++ b/src/domain/opentelemetry_domain.mli @@ -0,0 +1,4 @@ +val cpu_relax : unit -> unit + +val relax_loop : int -> unit +(** Call {!cpu_relax} n times *) diff --git a/src/emitter/dune b/src/emitter/dune new file mode 100644 index 00000000..3844e185 --- /dev/null +++ b/src/emitter/dune @@ -0,0 +1,6 @@ +(library + (name opentelemetry_emitter) + (public_name opentelemetry.emitter) + (libraries mtime mtime.clock.os opentelemetry.atomic) + (flags :standard -open Opentelemetry_atomic) + (synopsis "Modular emitters for a single signal at a time")) diff --git a/src/emitter/emitter.ml b/src/emitter/emitter.ml new file mode 100644 index 00000000..a940fc43 --- /dev/null +++ b/src/emitter/emitter.ml @@ -0,0 +1,67 @@ +(** Emitters. + + This is the composable abstraction we use to represent how signals are + emitted, from their origin point (a site in user code or library code that + was instrumented, and just created a span or log record or metric), down to + the actual SDK exporter installed in the application. *) + +exception Closed + +type -'a t = { + enabled: unit -> bool; + (** Return [true] if [emit] has a chance of doing something with the + signals it's given. *) + emit: 'a list -> unit; + (** Emit signals. @raise Closed if the emitter is closed. *) + tick: now:Mtime.t -> unit; + (** Call regularly to ensure background work is done. The current + timestamp is passed to improve testability. *) + closed: unit -> bool; + (** True if the emitter is already closed. Beware TOCTOU bugs. *) + flush_and_close: unit -> unit; + (** Flush internally buffered signals, then close. *) +} +(** An emitter for values of type ['a]. *) + +let[@inline] enabled self : bool = self.enabled () + +let[@inline] emit (self : _ t) l : unit = if l <> [] then self.emit l + +let[@inline] tick (self : _ t) ~now : unit = self.tick ~now + +let[@inline] closed self : bool = self.closed () + +let[@inline] flush_and_close (self : _ t) : unit = self.flush_and_close () + +(** [map f emitter] returns a new emitter that applies [f] to signals item-wise + before passing them to [emitter] *) +let map (f : 'a -> 'b) (self : 'b t) : 'a t = + { self with emit = (fun l -> self.emit (List.map f l)) } + +(** [map_l f emitter] applies [f] to incoming lists of signals, and emits the + resulting list (if non empty) *) +let flat_map (f : 'a list -> 'b list) (self : 'b t) : 'a t = + let emit l = + match f l with + | [] -> () + | fl -> self.emit fl + in + { self with emit } + +(** [tap f e] is like [e], but every signal is passed to [f] *) +let tap (f : 'a -> unit) (self : 'a t) : 'a t = + let emit l = + List.iter f l; + self.emit l + in + { self with emit } + +(** Dummy emitter, doesn't accept or emit anything. *) +let dummy : _ t = + { + enabled = (fun () -> false); + emit = ignore; + tick = (fun ~now:_ -> ()); + closed = (fun () -> true); + flush_and_close = ignore; + } diff --git a/src/emitter/to_list.ml b/src/emitter/to_list.ml new file mode 100644 index 00000000..04e228ca --- /dev/null +++ b/src/emitter/to_list.ml @@ -0,0 +1,14 @@ +(** Emitter that stores signals into a list, in reverse order (most recent + signals first). *) +let to_list (l : 'a list ref) : 'a Emitter.t = + let closed = Atomic.make false in + { + enabled = (fun () -> not (Atomic.get closed)); + emit = + (fun sigs -> + if Atomic.get closed then raise Emitter.Closed; + l := List.rev_append sigs !l); + tick = (fun ~now:_ -> ()); + closed = (fun () -> Atomic.get closed); + flush_and_close = (fun () -> Atomic.set closed true); + } diff --git a/src/integrations/cohttp/opentelemetry_cohttp_lwt.ml b/src/integrations/cohttp/opentelemetry_cohttp_lwt.ml index ad670b5c..13d21438 100644 --- a/src/integrations/cohttp/opentelemetry_cohttp_lwt.ml +++ b/src/integrations/cohttp/opentelemetry_cohttp_lwt.ml @@ -2,9 +2,16 @@ module Otel = Opentelemetry module Otel_lwt = Opentelemetry_lwt open Cohttp +open struct + let attrs_of_response (res : Response.t) = + let code = Response.status res in + let code = Code.code_of_status code in + [ "http.status_code", `Int code ] +end + module Server : sig val trace : - ?service_name:string -> + ?tracer:Otel.Tracer.t -> ?attrs:Otel.Span.key_value list -> ('conn -> Request.t -> 'body -> (Response.t * 'body) Lwt.t) -> 'conn -> @@ -27,8 +34,8 @@ module Server : sig ]} *) val with_ : + ?tracer:Otel.Tracer.t -> ?trace_state:string -> - ?service_name:string -> ?attrs:Otel.Span.key_value list -> ?kind:Otel.Span.kind -> ?links:Otel.Span_link.t list -> @@ -43,11 +50,11 @@ module Server : sig convenience. *) val get_trace_context : - ?from:[ `Internal | `External ] -> Request.t -> Otel.Scope.t option + ?from:[ `Internal | `External ] -> Request.t -> Otel.Span.t option (** Get the tracing scope from the custom [x-ocaml-otel-traceparent] header added by [trace] and [with_]. *) - val set_trace_context : Otel.Scope.t -> Request.t -> Request.t + val set_trace_context : Otel.Span.t -> Request.t -> Request.t (** Set the tracing scope in the custom [x-ocaml-otel-traceparent] header used by [trace] and [with_]. *) @@ -76,23 +83,18 @@ end = struct | Some r -> [ "http.request.header.referer", `String r ]); ] - let attrs_of_response (res : Response.t) = - let code = Response.status res in - let code = Code.code_of_status code in - [ "http.status_code", `Int code ] - let header_x_ocaml_otel_traceparent = "x-ocaml-otel-traceparent" - let set_trace_context (scope : Otel.Scope.t) req = + let set_trace_context (span : Otel.Span.t) req = let module Traceparent = Otel.Trace_context.Traceparent in let headers = Header.add (Request.headers req) header_x_ocaml_otel_traceparent - (Traceparent.to_value ~trace_id:scope.trace_id ~parent_id:scope.span_id - ()) + (Traceparent.to_value ~trace_id:(Otel.Span.trace_id span) + ~parent_id:(Otel.Span.id span) ()) in { req with headers } - let get_trace_context ?(from = `Internal) req = + let get_trace_context ?(from = `Internal) req : Otel.Span.t option = let module Traceparent = Otel.Trace_context.Traceparent in let name = match from with @@ -104,7 +106,9 @@ end = struct | Some v -> (match Traceparent.of_value v with | Ok (trace_id, parent_id) -> - Some (Otel.Scope.make ~trace_id ~span_id:parent_id ()) + (* TODO: we need a span_ctx here actually *) + Some + (Otel.Span.make ~trace_id ~id:parent_id ~start_time:0L ~end_time:0L "") | Error _ -> None) let remove_trace_context req = @@ -113,33 +117,33 @@ end = struct in { req with headers } - let trace ?service_name ?(attrs = []) callback conn req body = - let scope = get_trace_context ~from:`External req in - Otel_lwt.Trace.with_ ?service_name "request" ~kind:Span_kind_server - ?trace_id:(Option.map (fun scope -> scope.Otel.Trace.trace_id) scope) - ?parent:(Option.map (fun scope -> scope.Otel.Trace.span_id) scope) + let trace ?(tracer = Otel.Tracer.get_main ()) ?(attrs = []) callback conn req + body = + let parent = get_trace_context ~from:`External req in + Otel_lwt.Tracer.with_ tracer "request" ~kind:Span_kind_server + ?trace_id:(Option.map Otel.Span.trace_id parent) + ?parent ~attrs:(attrs @ attrs_of_request req) - (fun scope -> + (fun span -> let open Lwt.Syntax in - let req = set_trace_context scope req in + let req = set_trace_context span req in let* res, body = callback conn req body in - Otel.Scope.add_attrs scope (fun () -> attrs_of_response res); + Otel.Span.add_attrs span (attrs_of_response res); Lwt.return (res, body)) - let with_ ?trace_state ?service_name ?attrs + let with_ ?(tracer = Otel.Tracer.get_main ()) ?trace_state ?attrs ?(kind = Otel.Span.Span_kind_internal) ?links name req (f : Request.t -> 'a Lwt.t) = - let scope = get_trace_context ~from:`Internal req in - Otel_lwt.Trace.with_ ?trace_state ?service_name ?attrs ~kind - ?trace_id:(Option.map (fun scope -> scope.Otel.Trace.trace_id) scope) - ?parent:(Option.map (fun scope -> scope.Otel.Trace.span_id) scope) - ?links name - (fun scope -> - let req = set_trace_context scope req in + let span = get_trace_context ~from:`Internal req in + Otel_lwt.Tracer.with_ tracer ?trace_state ?attrs ~kind + ?trace_id:(Option.map Otel.Span.trace_id span) ?parent:span ?links name + (fun span -> + let req = set_trace_context span req in f req) end -let client ?(scope : Otel.Scope.t option) (module C : Cohttp_lwt.S.Client) = +let client ?(tracer = Otel.Tracer.get_main ()) ?(span : Otel.Span.t option) + (module C : Cohttp_lwt.S.Client) = let module Traced = struct open Lwt.Syntax @@ -168,20 +172,11 @@ let client ?(scope : Otel.Scope.t option) (module C : Cohttp_lwt.S.Client) = ] let context_for ~uri ~meth = - let trace_id = - match scope with - | Some scope -> Some scope.trace_id - | None -> None - in - let parent = - match scope with - | Some scope -> Some scope.span_id - | None -> None - in + let trace_id = Option.map Otel.Span.trace_id span in let attrs = attrs_for ~uri ~meth () in - trace_id, parent, attrs + trace_id, span, attrs - let add_traceparent (scope : Otel.Scope.t) headers = + let add_traceparent (span : Otel.Span.t) headers = let module Traceparent = Otel.Trace_context.Traceparent in let headers = match headers with @@ -189,20 +184,17 @@ let client ?(scope : Otel.Scope.t option) (module C : Cohttp_lwt.S.Client) = | Some headers -> headers in Header.add headers Traceparent.name - (Traceparent.to_value ~trace_id:scope.trace_id ~parent_id:scope.span_id - ()) + (Traceparent.to_value ~trace_id:(Otel.Span.trace_id span) + ~parent_id:(Otel.Span.id span) ()) let call ?ctx ?headers ?body ?chunked meth (uri : Uri.t) : (Response.t * Cohttp_lwt.Body.t) Lwt.t = let trace_id, parent, attrs = context_for ~uri ~meth in - Otel_lwt.Trace.with_ "request" ~kind:Span_kind_client ?trace_id ?parent - ~attrs (fun scope -> - let headers = add_traceparent scope headers in + Otel_lwt.Tracer.with_ tracer "request" ~kind:Span_kind_client ?trace_id + ?parent ~attrs (fun span -> + let headers = add_traceparent span headers in let* res, body = C.call ?ctx ~headers ?body ?chunked meth uri in - Otel.Scope.add_attrs scope (fun () -> - let code = Response.status res in - let code = Code.code_of_status code in - [ "http.status_code", `Int code ]); + Otel.Span.add_attrs span (attrs_of_response res); Lwt.return (res, body)) let head ?ctx ?headers uri = @@ -225,14 +217,11 @@ let client ?(scope : Otel.Scope.t option) (module C : Cohttp_lwt.S.Client) = let post_form ?ctx ?headers ~params uri = let trace_id, parent, attrs = context_for ~uri ~meth:`POST in - Otel_lwt.Trace.with_ "request" ~kind:Span_kind_client ?trace_id ?parent - ~attrs (fun scope -> - let headers = add_traceparent scope headers in + Otel_lwt.Tracer.with_ tracer "request" ~kind:Span_kind_client ?trace_id + ?parent ~attrs (fun span -> + let headers = add_traceparent span headers in let* res, body = C.post_form ?ctx ~headers ~params uri in - Otel.Scope.add_attrs scope (fun () -> - let code = Response.status res in - let code = Code.code_of_status code in - [ "http.status_code", `Int code ]); + Otel.Span.add_attrs span (attrs_of_response res); Lwt.return (res, body)) let callv = C.callv (* TODO *) diff --git a/src/integrations/logs/opentelemetry_logs.ml b/src/integrations/logs/opentelemetry_logs.ml index a06cf117..16b1b513 100644 --- a/src/integrations/logs/opentelemetry_logs.ml +++ b/src/integrations/logs/opentelemetry_logs.ml @@ -11,13 +11,13 @@ module Otel = Opentelemetry (* Levels *) (*****************************************************************************) (* Convert log level to Otel severity *) -let log_level_to_severity (level : Logs.level) : Otel.Logs.severity = +let log_level_to_severity (level : Logs.level) : Otel.Log_record.severity = match level with - | Logs.App -> Otel.Logs.Severity_number_info (* like info, but less severe *) - | Logs.Info -> Otel.Logs.Severity_number_info2 - | Logs.Error -> Otel.Logs.Severity_number_error - | Logs.Warning -> Otel.Logs.Severity_number_warn - | Logs.Debug -> Otel.Logs.Severity_number_debug + | Logs.App -> Severity_number_info (* like info, but less severe *) + | Logs.Info -> Severity_number_info2 + | Logs.Error -> Severity_number_error + | Logs.Warning -> Severity_number_warn + | Logs.Debug -> Severity_number_debug (*****************************************************************************) (* Logs Util *) @@ -34,21 +34,20 @@ let emit_telemetry do_emit = Logs.Tag.(empty |> add emit_telemetry_tag do_emit) (*****************************************************************************) (* Log a message to otel with some attrs *) -let log ?service_name ?(attrs = []) ?(scope = Otel.Scope.get_ambient_scope ()) - ~level msg = +let log ?(logger = Otel.Logger.get_main ()) ?attrs + ?(scope = Otel.Ambient_span.get ()) ~level msg = let log_level = Logs.level_to_string (Some level) in - let span_id = - Option.map (fun (scope : Otel.Scope.t) -> scope.span_id) scope - in - let trace_id = - Option.map (fun (scope : Otel.Scope.t) -> scope.trace_id) scope - in + let span_id = Option.map Otel.Span.id scope in + let trace_id = Option.map Otel.Span.trace_id scope in let severity = log_level_to_severity level in - let log = Otel.Logs.make_str ~severity ~log_level ?trace_id ?span_id msg in + let log = + Otel.Log_record.make_str ~severity ~log_level ?attrs ?trace_id ?span_id msg + in (* Noop if no backend is set *) - Otel.Logs.emit ?service_name ~attrs [ log ] + (* TODO: be more explicit *) + Otel.Emitter.emit logger [ log ] -let otel_reporter ?service_name ?(attributes = []) () : Logs.reporter = +let otel_reporter ?(attributes = []) () : Logs.reporter = let report src level ~over k msgf = msgf (fun ?header ?(tags : Logs.Tag.set option) fmt -> let k _ = @@ -92,13 +91,13 @@ let otel_reporter ?service_name ?(attributes = []) () : Logs.reporter = let do_emit = Option.value ~default:true (Logs.Tag.find emit_telemetry_tag tags) in - if do_emit then log ?service_name ~attrs ~level msg; + if do_emit then log ~attrs ~level msg; k ()) fmt) in { Logs.report } -let attach_otel_reporter ?service_name ?attributes reporter = +let attach_otel_reporter ?attributes reporter = (* Copied directly from the Logs.mli docs. Just calls a bunch of reporters in a row *) let combine r1 r2 = @@ -108,5 +107,5 @@ let attach_otel_reporter ?service_name ?attributes reporter = in { Logs.report } in - let otel_reporter = otel_reporter ?service_name ?attributes () in + let otel_reporter = otel_reporter ?attributes () in combine reporter otel_reporter diff --git a/src/integrations/logs/opentelemetry_logs.mli b/src/integrations/logs/opentelemetry_logs.mli index 7ac4e594..43fadb8f 100644 --- a/src/integrations/logs/opentelemetry_logs.mli +++ b/src/integrations/logs/opentelemetry_logs.mli @@ -24,11 +24,8 @@ val emit_telemetry : bool -> Logs.Tag.set {!emit_telemetry_tag} as its only member *) val otel_reporter : - ?service_name:string -> - ?attributes:(string * Opentelemetry.value) list -> - unit -> - Logs.reporter -(** [otel_reporter ?service_name ?tag_value_pp_buffer_size ?attrs ()] creates a + ?attributes:(string * Opentelemetry.value) list -> unit -> Logs.reporter +(** [otel_reporter ?tag_value_pp_buffer_size ?attrs ()] creates a [Logs.reporter] that will create and emit an OTel log with the following info: {ul @@ -61,19 +58,17 @@ val otel_reporter : Example use: [Logs.set_reporter (Opentelemetery_logs.otel_reporter ())] *) val attach_otel_reporter : - ?service_name:string -> ?attributes:(string * Opentelemetry.value) list -> Logs.reporter -> Logs.reporter -(** [attach_otel_reporter ?service_name ?attributes reporter] will create a - reporter that first calls the reporter passed as an argument, then an otel - report created via {!otel_reporter}, for every log. This is useful for if - you want to emit logs to stderr and to OTel at the same time. +(** [attach_otel_reporter ?attributes reporter] will create a reporter that + first calls the reporter passed as an argument, then an otel report created + via {!otel_reporter}, for every log. This is useful for if you want to emit + logs to stderr and to OTel at the same time. Example: {[ let reporter = Logs_fmt.reporter () in Logs.set_reporter - (Opentelemetry_logs.attach_otel_reporter ?service_name ?attributes - reporter) + (Opentelemetry_logs.attach_otel_reporter ?attributes reporter) ]} *) diff --git a/src/lib/ambient_span.ml b/src/lib/ambient_span.ml new file mode 100644 index 00000000..b8cc84e6 --- /dev/null +++ b/src/lib/ambient_span.ml @@ -0,0 +1,9 @@ +(** Find current span from ambient-context *) +let[@inline] get () : Span.t option = + Opentelemetry_ambient_context.get Span.k_context + +(** [with_ambient span f] runs [f()] with the current ambient span being set to + [span] *) +let[@inline] with_ambient (span : Span.t) (f : unit -> 'a) : 'a = + Opentelemetry_ambient_context.with_key_bound_to Span.k_context span (fun _ -> + f ()) diff --git a/src/lib/ambient_span.mli b/src/lib/ambient_span.mli new file mode 100644 index 00000000..70997ac4 --- /dev/null +++ b/src/lib/ambient_span.mli @@ -0,0 +1,8 @@ +(** Storing the current span in ambient context. *) + +val get : unit -> Span.t option +(** Find current span from ambient-context *) + +val with_ambient : Span.t -> (unit -> 'a) -> 'a +(** [with_ambient span f] runs [f()] with the current ambient span being set to + [span] *) diff --git a/src/lib/conventions.ml b/src/lib/conventions.ml new file mode 100644 index 00000000..c4002cb1 --- /dev/null +++ b/src/lib/conventions.ml @@ -0,0 +1,130 @@ +(** Semantic conventions. + + {{:https://opentelemetry.io/docs/specs/semconv/} + https://opentelemetry.io/docs/specs/semconv/} *) + +module Attributes = struct + module Process = struct + module Runtime = struct + let name = "process.runtime.name" + + let version = "process.runtime.version" + + let description = "process.runtime.description" + end + end + + (** https://opentelemetry.io/docs/specs/semconv/attributes-registry/code/ *) + module Code = struct + (** Int *) + let column = "code.column" + + let filepath = "code.filepath" + + let function_ = "code.function" + + (** int *) + let line = "code.lineno" + + let namespace = "code.namespace" + + let stacktrace = "code.stacktrace" + end + + module Service = struct + let name = "service.name" + + let namespace = "service.namespace" + + let instance_id = "service.instance.id" + + let version = "service.version" + end + + module HTTP = struct + let error_type = "error.type" + + let request_method = "http.request.method" + + let route = "http.route" + + let url_full = "url.full" + + (** HTTP status code, int *) + let response_status_code = "http.response.status_code" + + let server_address = "server.address" + + let server_port = "server.port" + + (** http or https *) + let url_scheme = "url.scheme" + end + + (** https://github.com/open-telemetry/semantic-conventions/blob/main/docs/resource/host.md + *) + module Host = struct + let id = "host.id" + + let name = "host.name" + + let type_ = "host.type" + + let arch = "host.arch" + + let ip = "host.ip" + + let mac = "host.mac" + + let image_id = "host.image.id" + + let image_name = "host.image.name" + + let image_version = "host.image.version" + end +end + +module Metrics = struct + module Process = struct + module Runtime = struct + module Ocaml = struct + module GC = struct + let compactions = "process.runtime.ocaml.gc.compactions" + + let major_collections = "process.runtime.ocaml.gc.major_collections" + + let major_heap = "process.runtime.ocaml.gc.major_heap" + + let minor_allocated = "process.runtime.ocaml.gc.minor_allocated" + + let minor_collections = "process.runtime.ocaml.gc.minor_collections" + end + end + end + end + + (** https://opentelemetry.io/docs/specs/semconv/http/ *) + module HTTP = struct + module Server = struct + let request_duration = "http.server.request.duration" + + let active_requests = "http.server.active_requests" + + (** Histogram *) + let request_body_size = "http.server.request.body.size" + + (** Histogram *) + let response_body_size = "http.server.response.body.size" + end + + module Client = struct + let request_duration = "http.client.request.duration" + + (** Histogram *) + let request_body_size = "http.client.request.body.size" + + (** Histogram *) + let response_body_size = "http.client.response.body.size" + end + end +end diff --git a/src/lib/dune b/src/lib/dune new file mode 100644 index 00000000..4e68dbcb --- /dev/null +++ b/src/lib/dune @@ -0,0 +1,25 @@ +(library + (name opentelemetry) + (public_name opentelemetry) + (synopsis "API for opentelemetry instrumentation") + (flags + :standard + -warn-error + -a+8 + -open + Opentelemetry_util + -open + Opentelemetry_core + -open + Opentelemetry_core.Common_) + (libraries + opentelemetry.core + opentelemetry.proto + opentelemetry.util + opentelemetry.ambient-context + opentelemetry.atomic + ptime + ptime.clock.os + pbrt + threads + hmap)) diff --git a/src/lib/gc_metrics.ml b/src/lib/gc_metrics.ml new file mode 100644 index 00000000..42dacac6 --- /dev/null +++ b/src/lib/gc_metrics.ml @@ -0,0 +1,56 @@ +open struct + let bytes_per_word = Sys.word_size / 8 + + let[@inline] word_to_bytes n = n * bytes_per_word + + let[@inline] word_to_bytes_f n = n *. float bytes_per_word + + let default_interval_s = 20 +end + +let get_metrics () : Metrics.t list = + let gc = Gc.quick_stat () in + let now = Timestamp_ns.now_unix_ns () in + let open Metrics in + let open Conventions.Metrics in + [ + gauge ~name:Process.Runtime.Ocaml.GC.major_heap ~unit_:"B" + [ int ~now (word_to_bytes gc.Gc.heap_words) ]; + sum ~name:Process.Runtime.Ocaml.GC.minor_allocated + ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative + ~is_monotonic:true ~unit_:"B" + [ float ~now (word_to_bytes_f gc.Gc.minor_words) ]; + sum ~name:Process.Runtime.Ocaml.GC.minor_collections + ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative + ~is_monotonic:true + [ int ~now gc.Gc.minor_collections ]; + sum ~name:Process.Runtime.Ocaml.GC.major_collections + ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative + ~is_monotonic:true + [ int ~now gc.Gc.major_collections ]; + sum ~name:Process.Runtime.Ocaml.GC.compactions + ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative + ~is_monotonic:true + [ int ~now gc.Gc.compactions ]; + ] + +let setup ?(min_interval_s = default_interval_s) (exp : Exporter.t) = + (* limit rate *) + let min_interval_s = max 5 min_interval_s in + let min_interval = Mtime.Span.(min_interval_s * s) in + let limiter = Interval_limiter.create ~min_interval () in + + let on_tick () = + if Interval_limiter.make_attempt limiter then ( + let m = get_metrics () in + Exporter.send_metrics exp m + ) + in + Exporter.on_tick exp on_tick + +let setup_on_main_exporter ?min_interval_s () = + match Main_exporter.get () with + | None -> () + | Some exp -> setup ?min_interval_s exp + +let basic_setup () = setup_on_main_exporter () diff --git a/src/lib/gc_metrics.mli b/src/lib/gc_metrics.mli new file mode 100644 index 00000000..d17496aa --- /dev/null +++ b/src/lib/gc_metrics.mli @@ -0,0 +1,21 @@ +(** Export GC metrics. + + These metrics are emitted regularly. *) + +val get_metrics : unit -> Metrics.t list +(** Get a few metrics from the current state of the GC. *) + +val setup : ?min_interval_s:int -> Exporter.t -> unit +(** Setup a hook that will emit GC statistics on every tick. It does assume that + [tick] is called regularly on the exporter. For example, if we ensure the + exporter's [tick] function is called every 5s, we'll get GC metrics every + 5s. + + @param min_interval_s + if provided, GC metrics will be emitted at most every [min_interval_s] + seconds. This prevents flooding. Default value is 20s. *) + +val setup_on_main_exporter : ?min_interval_s:int -> unit -> unit +(** Setup the hook on the main exporter. *) + +val basic_setup : unit -> unit [@@deprecated "use setup_on_main_exporter"] diff --git a/src/lib/globals.ml b/src/lib/globals.ml new file mode 100644 index 00000000..c4d3c55b --- /dev/null +++ b/src/lib/globals.ml @@ -0,0 +1,96 @@ +(** Process-wide metadata, environment variables, etc. *) + +open Common_ +open Proto.Common + +(** Main service name metadata *) +let service_name = ref "unknown_service" + +(** Namespace for the service *) +let service_namespace = ref None + +(** Unique identifier for the service *) +let service_instance_id = ref None + +(** Version for the service + @since 0.12 *) +let service_version = ref None + +let instrumentation_library = + make_instrumentation_scope ~version:"%%VERSION_NUM%%" ~name:"ocaml-otel" () + +(** Global attributes, initially set via OTEL_RESOURCE_ATTRIBUTES and modifiable + by the user code. They will be attached to each outgoing metrics/traces. *) +let global_attributes : key_value list ref = + let parse_pair s = + match String.split_on_char '=' s with + | [ a; b ] -> make_key_value ~key:a ~value:(String_value b) () + | _ -> failwith (Printf.sprintf "invalid attribute: %S" s) + in + ref + @@ + try + Sys.getenv "OTEL_RESOURCE_ATTRIBUTES" + |> String.split_on_char ',' |> List.map parse_pair + with _ -> [] + +(** Add a global attribute *) +let add_global_attribute (key : string) (v : Value.t) : unit = + global_attributes := Key_value.conv (key, v) :: !global_attributes + +(* add global attributes to this list *) +let merge_global_attributes_ into : _ list = + let not_redundant kv = List.for_all (fun kv' -> kv.key <> kv'.key) into in + List.rev_append (List.filter not_redundant !global_attributes) into + +let default_span_kind = Span.default_kind + +open struct + let runtime_attributes = + Conventions.Attributes. + [ + Process.Runtime.name, `String "ocaml"; + Process.Runtime.version, `String Sys.ocaml_version; + ] + + let runtime_attributes_converted = List.map Key_value.conv runtime_attributes +end + +(** Attributes about the OCaml runtime. See + https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/process.md#process-runtimes +*) +let[@inline] get_runtime_attributes () = runtime_attributes + +let mk_attributes ?(service_name = !service_name) ?(attrs = []) () : _ list = + let l = List.rev_map Key_value.conv attrs in + let l = List.rev_append runtime_attributes_converted l in + let l = + make_key_value ~key:Conventions.Attributes.Service.name + ~value:(String_value service_name) () + :: l + in + let l = + match !service_instance_id with + | None -> l + | Some v -> + make_key_value ~key:Conventions.Attributes.Service.instance_id + ~value:(String_value v) () + :: l + in + let l = + match !service_namespace with + | None -> l + | Some v -> + make_key_value ~key:Conventions.Attributes.Service.namespace + ~value:(String_value v) () + :: l + in + let l = + match !service_version with + | None -> l + | Some v -> + make_key_value ~key:Conventions.Attributes.Service.version + ~value:(String_value v) () + :: l + in + l |> merge_global_attributes_ diff --git a/src/lib/interval_limiter.ml b/src/lib/interval_limiter.ml new file mode 100644 index 00000000..456de0f6 --- /dev/null +++ b/src/lib/interval_limiter.ml @@ -0,0 +1,18 @@ +type t = { + min_interval: Mtime.span; + last: Mtime.t Atomic.t; +} + +let create ~min_interval () : t = + { min_interval; last = Atomic.make Mtime.min_stamp } + +let make_attempt (self : t) : bool = + let now = Mtime_clock.now () in + let last = Atomic.get self.last in + let elapsed = Mtime.span last now in + if Mtime.Span.compare elapsed self.min_interval >= 0 then + (* attempts succeeds, unless another thread updated [self.last] + in the mean time *) + Atomic.compare_and_set self.last last now + else + false diff --git a/src/lib/interval_limiter.mli b/src/lib/interval_limiter.mli new file mode 100644 index 00000000..b07f7c68 --- /dev/null +++ b/src/lib/interval_limiter.mli @@ -0,0 +1,9 @@ +type t + +val create : min_interval:Mtime.span -> unit -> t + +val make_attempt : t -> bool +(** [make_attempt lim] returns [true] if the last successful attempt was more + than [min_interval] ago, as measured by mtime. If so, this counts as the new + latest attempt; otherwise [false] is returned and the state is not updated. +*) diff --git a/src/lib/logger.ml b/src/lib/logger.ml new file mode 100644 index 00000000..83286d57 --- /dev/null +++ b/src/lib/logger.ml @@ -0,0 +1,31 @@ +(** Logs. + + See + {{:https://opentelemetry.io/docs/reference/specification/overview/#log-signal} + the spec} *) + +open Opentelemetry_emitter + +type t = Log_record.t Emitter.t + +let dummy : t = Emitter.dummy + +let enabled = Emitter.enabled + +let of_exporter (exp : Exporter.t) : t = exp.emit_logs + +let get_main () : t = + match Main_exporter.get () with + | None -> dummy + | Some e -> e.emit_logs + +let (emit [@deprecated "use an explicit Logger.t"]) = + fun (logs : Log_record.t list) : unit -> + match Main_exporter.get () with + | None -> () + | Some exp -> Exporter.send_logs exp logs + +(** An emitter that uses the current {!Main_exporter} *) +let dynamic_forward_to_main_exporter : t = + Main_exporter.Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> + e.emit_logs) diff --git a/src/lib/main_exporter.ml b/src/lib/main_exporter.ml new file mode 100644 index 00000000..6a5284bc --- /dev/null +++ b/src/lib/main_exporter.ml @@ -0,0 +1,115 @@ +(** Main exporter, used by the main tracing functions. + + It is better to pass an explicit exporter when possible. *) + +open Exporter + +(* hidden *) +open struct + (* a list of callbacks automatically added to the main exporter *) + let on_tick_cbs_ = Alist.make () + + let exporter : t option Atomic.t = Atomic.make None +end + +(** Remove current exporter, if any. + @param on_done see {!t#cleanup}, @since 0.12 *) +let remove ~on_done () : unit = + match Atomic.exchange exporter None with + | None -> () + | Some exp -> + tick exp; + cleanup exp ~on_done + +(** Is there a configured exporter? *) +let present () : bool = Option.is_some (Atomic.get exporter) + +(** Current exporter, if any *) +let[@inline] get () : t option = Atomic.get exporter + +let add_on_tick_callback f = + Alist.add on_tick_cbs_ f; + Option.iter (fun exp -> on_tick exp f) (get ()) + +module Util = struct + open Opentelemetry_emitter + + (** An emitter that uses the current main *) + let dynamic_forward_to_main_exporter ~get_emitter () : _ Emitter.t = + let enabled () = present () in + let closed () = not (enabled ()) in + let flush_and_close () = () in + let tick ~now:_ = + match get () with + | None -> () + | Some exp -> Exporter.tick exp + in + let emit signals = + if signals <> [] then ( + match get () with + | None -> () + | Some exp -> + let emitter = get_emitter exp in + Emitter.emit emitter signals + ) + in + { Emitter.enabled; closed; emit; tick; flush_and_close } +end + +(** This exporter uses the current "main exporter" using [get()] at every + invocation. It is useful as a fallback or to port existing applications that + expect a global singleton backend^W exporter. + @since NEXT_RELEASE *) +let dynamic_forward_to_main_exporter : Exporter.t = + let open Exporter in + let emit_logs = + Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> e.emit_logs) + in + let emit_metrics = + Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> + e.emit_metrics) + in + let emit_spans = + Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> + e.emit_spans) + in + let on_tick f = + match get () with + | None -> () + | Some exp -> Exporter.on_tick exp f + in + let tick () = + match get () with + | None -> () + | Some exp -> exp.tick () + in + let cleanup ~on_done () = on_done () in + { Exporter.emit_metrics; emit_spans; emit_logs; on_tick; tick; cleanup } + +(** Set the global exporter *) +let set (exp : t) : unit = + (* sanity check! this specific exporter would just call itself, leading to + stack overflow. *) + if exp == dynamic_forward_to_main_exporter then + failwith + "cannot set Main_exporter.dynamic_forward_to_main_exporter as main \ + exporter!"; + + List.iter (on_tick exp) (Alist.get on_tick_cbs_); + Atomic.set exporter (Some exp) + +let (set_backend [@deprecated "use `Main_exporter.set`"]) = set + +let (remove_backend [@deprecated "use `Main_exporter.remove`"]) = remove + +let (has_backend [@deprecated "use `Main_exporter.present`"]) = present + +let (get_backend [@deprecated "use `Main_exporter.get"]) = get + +let with_setup_debug_backend ?(on_done = ignore) (exp : t) ?(enable = true) () f + = + if enable then ( + set exp; + Fun.protect ~finally:(fun () -> cleanup exp ~on_done) f + ) else + f () diff --git a/src/lib/metrics_callbacks.ml b/src/lib/metrics_callbacks.ml new file mode 100644 index 00000000..81aa2055 --- /dev/null +++ b/src/lib/metrics_callbacks.ml @@ -0,0 +1,47 @@ +open Common_ + +type t = { cbs: (unit -> Metrics.t list) Alist.t } [@@unboxed] + +let create () : t = { cbs = Alist.make () } + +let[@inline] add_metrics_cb (self : t) f = Alist.add self.cbs f + +let add_to_exporter (exp : Exporter.t) (self : t) = + let on_tick () = + (* collect all metrics *) + let res = ref [] in + List.iter + (fun f -> + let f_metrics = f () in + res := List.rev_append f_metrics !res) + (Alist.get self.cbs); + let metrics = !res in + + (* emit the metrics *) + Exporter.send_metrics exp metrics + in + Exporter.on_tick exp on_tick + +let with_set_added_to_exporter (exp : Exporter.t) (f : t -> 'a) : 'a = + let set = create () in + add_to_exporter exp set; + f set + +let with_set_added_to_main_exporter (f : t -> unit) : unit = + match Main_exporter.get () with + | None -> () + | Some exp -> with_set_added_to_exporter exp f + +module Main_set = struct + let cur_set_ : t option Atomic.t = Atomic.make None + + let rec get () : t = + match Atomic.get cur_set_ with + | Some s -> s + | None -> + let s = create () in + if Atomic.compare_and_set cur_set_ None (Some s) then + s + else + get () +end diff --git a/src/lib/metrics_callbacks.mli b/src/lib/metrics_callbacks.mli new file mode 100644 index 00000000..d66388ad --- /dev/null +++ b/src/lib/metrics_callbacks.mli @@ -0,0 +1,33 @@ +(** A set of callbacks that produce metrics when called. The metrics are + automatically called regularly. + + This allows applications to register metrics callbacks from various points + in the program (or even in libraries), and not worry about setting + alarms/intervals to emit them. *) + +type t + +val create : unit -> t + +val add_metrics_cb : t -> (unit -> Metrics.t list) -> unit +(** [register set f] adds the callback [f] to the [set]. + + [f] will be called at unspecified times and is expected to return a list of + metrics. It might be called regularly by the backend, in particular (but not + only) when {!Exporter.tick} is called. *) + +val add_to_exporter : Exporter.t -> t -> unit +(** Make sure we export metrics at every [tick] of the exporter *) + +val with_set_added_to_exporter : Exporter.t -> (t -> 'a) -> 'a +(** [with_set_added_to_exporter exp f] creates a set, adds it to the exporter, + and calls [f] on it *) + +val with_set_added_to_main_exporter : (t -> unit) -> unit +(** If there is a main exporter, add a set to it and call [f set], else do not + call [f] at all *) + +module Main_set : sig + val get : unit -> t + (** The global set *) +end diff --git a/src/lib/metrics_emitter.ml b/src/lib/metrics_emitter.ml new file mode 100644 index 00000000..f26fc224 --- /dev/null +++ b/src/lib/metrics_emitter.ml @@ -0,0 +1,27 @@ +open Opentelemetry_emitter + +type t = Metrics.t Emitter.t + +let dummy : t = Emitter.dummy + +let enabled = Emitter.enabled + +let of_exporter (exp : Exporter.t) : t = exp.emit_metrics + +(** Emit some metrics to the collector (sync). This blocks until the backend has + pushed the metrics into some internal queue, or discarded them. *) +let (emit [@deprecated "use an explicit Metrics_emitter.t"]) = + fun ?attrs:_ (l : Metrics.t list) : unit -> + match Main_exporter.get () with + | None -> () + | Some exp -> Exporter.send_metrics exp l + +let get_main () : t = + match Main_exporter.get () with + | None -> dummy + | Some e -> e.emit_metrics + +(** An emitter that uses the current {!Main_exporter} *) +let dynamic_forward_to_main_exporter : t = + Main_exporter.Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> + e.emit_metrics) diff --git a/src/lib/opentelemetry.ml b/src/lib/opentelemetry.ml new file mode 100644 index 00000000..adef99e0 --- /dev/null +++ b/src/lib/opentelemetry.ml @@ -0,0 +1,96 @@ +(** Main Opentelemetry API for libraries and user code. *) + +module Core = Opentelemetry_core +(** Core types and definitions *) + +module Alist = Alist +(** Atomic list, for internal usage + @since 0.7 *) + +module Interval_limiter = Interval_limiter +(** Utility to limit the frequency of some event + @since NEXT_RELEASE *) + +(** {2 Wire format} *) + +module Proto = Opentelemetry_proto +(** Protobuf types. + + This is mostly useful internally. Users should not need to touch it. *) + +(** {2 Timestamps} *) + +module Timestamp_ns = Timestamp_ns + +(** {2 Export signals to some external collector.} *) + +module Emitter = Opentelemetry_emitter.Emitter +module Exporter = Exporter +module Main_exporter = Main_exporter + +module Collector = struct + include Exporter + include Main_exporter +end +[@@deprecated "Use 'Exporter' instead"] + +(** {2 Identifiers} *) + +module Trace_id = Trace_id + +let k_trace_id = Trace_id.k_trace_id + +module Span_id = Span_id +module Span_ctx = Span_ctx + +let k_span_ctx = Span_ctx.k_span_ctx + +(** {2 Attributes and conventions} *) + +module Conventions = Conventions + +type value = Value.t +(** A value in a key/value attribute *) + +type key_value = Key_value.t + +(** {2 Global settings} *) + +module Globals = Globals + +(** {2 Traces and Spans} *) + +module Event = Event +module Span_link = Span_link +module Span_status = Span_status +module Span_kind = Span_kind + +(** {2 Traces} *) + +module Span = Span +module Ambient_span = Ambient_span +module Tracer = Tracer +module Trace = Tracer [@@deprecated "use Tracer instead"] + +(** {2 Metrics} *) + +module Metrics = Metrics +module Metrics_callbacks = Metrics_callbacks +module Metrics_emitter = Metrics_emitter + +(** {2 Logs} *) + +module Log_record = Log_record +module Logger = Logger +module Logs = Logger [@@deprecated "use Logger"] + +(** {2 Utils} *) + +module Any_signal = Any_signal +module Trace_context = Trace_context +module Gc_metrics = Gc_metrics + +(* *) + +module GC_metrics = Gc_metrics +[@@deprecated "use Gc_metrics (beware capitalization)"] diff --git a/src/lib/tracer.ml b/src/lib/tracer.ml new file mode 100644 index 00000000..1abfaade --- /dev/null +++ b/src/lib/tracer.ml @@ -0,0 +1,118 @@ +(** Traces. + + See + {{:https://opentelemetry.io/docs/reference/specification/overview/#tracing-signal} + the spec} *) + +open Common_ +open Proto.Trace +open Opentelemetry_emitter + +type span = Span.t + +type t = Span.t Emitter.t +(** A tracer. + + https://opentelemetry.io/docs/specs/otel/trace/api/#tracer *) + +(** Dummy tracer, always disabled *) +let dummy : t = Emitter.dummy + +(** A tracer that uses the current {!Main_exporter} *) +let dynamic_forward_to_main_exporter : t = + Main_exporter.Util.dynamic_forward_to_main_exporter () ~get_emitter:(fun e -> + e.emit_spans) + +(** Get tracer using the main exporter in {!Main_exporter} *) +let get_main () : t = + match Main_exporter.get () with + | None -> dummy + | Some e -> e.emit_spans + +let (add_event [@deprecated "use Span.add_event"]) = Span.add_event + +let (add_attrs [@deprecated "use Span.add_attrs"]) = Span.add_attrs + +let with_thunk_and_finally (self : t) ?(force_new_trace_id = false) ?trace_state + ?(attrs : (string * [< Value.t ]) list = []) ?kind ?trace_id ?parent ?links + name cb = + let parent = + match parent with + | Some _ -> parent + | None -> Ambient_span.get () + in + let trace_id = + match trace_id, parent with + | _ when force_new_trace_id -> Trace_id.create () + | Some trace_id, _ -> trace_id + | None, Some p -> Span.trace_id p + | None, None -> Trace_id.create () + in + let start_time = Timestamp_ns.now_unix_ns () in + let span_id = Span_id.create () in + + let parent_id = Option.map Span.id parent in + + let span : Span.t = + Span.make ?trace_state ?kind ?parent:parent_id ~trace_id ~id:span_id ~attrs + ?links ~start_time ~end_time:start_time name + in + (* called once we're done, to emit a span *) + let finally res = + let end_time = Timestamp_ns.now_unix_ns () in + Proto.Trace.span_set_end_time_unix_nano span end_time; + + (match Span.status span with + | Some _ -> () + | None -> + (match res with + | Ok () -> + (* By default, all spans are Unset, which means a span completed without error. + The Ok status is reserved for when you need to explicitly mark a span as successful + rather than stick with the default of Unset (i.e., “without error”). + + https://opentelemetry.io/docs/languages/go/instrumentation/#set-span-status *) + () + | Error (e, bt) -> + Span.record_exception span e bt; + let status = + make_status ~code:Status_code_error ~message:(Printexc.to_string e) () + in + Span.set_status span status)); + + Emitter.emit self [ span ] + in + let thunk () = Ambient_span.with_ambient span (fun () -> cb span) in + thunk, finally + +(** Sync span guard. + + Notably, this includes {e implicit} scope-tracking: if called without a + [~scope] argument (or [~parent]/[~trace_id]), it will check in the + {!Ambient_context} for a surrounding environment, and use that as the scope. + Similarly, it uses {!Scope.with_ambient_scope} to {e set} a new scope in the + ambient context, so that any logically-nested calls to {!with_} will use + this span as their parent. + + {b NOTE} be careful not to call this inside a Gc alarm, as it can cause + deadlocks. + + @param force_new_trace_id + if true (default false), the span will not use a ambient scope, the + [~scope] argument, nor [~trace_id], but will instead always create fresh + identifiers for this span *) +let with_ (self : t) ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id + ?parent ?links name (cb : Span.t -> 'a) : 'a = + let thunk, finally = + with_thunk_and_finally self ?force_new_trace_id ?trace_state ?attrs ?kind + ?trace_id ?parent ?links name cb + in + + try + let rv = thunk () in + finally (Ok ()); + rv + with e -> + let bt = Printexc.get_raw_backtrace () in + finally (Error (e, bt)); + raise e diff --git a/src/lwt/opentelemetry_lwt.ml b/src/lwt/opentelemetry_lwt.ml index 7b443c20..d89d7e3c 100644 --- a/src/lwt/opentelemetry_lwt.ml +++ b/src/lwt/opentelemetry_lwt.ml @@ -7,24 +7,27 @@ module Span = Span module Span_link = Span_link module Globals = Globals module Timestamp_ns = Timestamp_ns -module GC_metrics = GC_metrics +module Gc_metrics = Gc_metrics module Metrics_callbacks = Metrics_callbacks module Trace_context = Trace_context +module GC_metrics = Gc_metrics [@@depecated "use Gc_metrics"] +module Metrics_emitter = Metrics_emitter +module Logger = Logger +module Log_record = Log_record external reraise : exn -> 'a = "%reraise" (** This is equivalent to [Lwt.reraise]. We inline it here so we don't force to use Lwt's latest version *) -module Trace = struct - include Trace +module Tracer = struct + include Tracer (** Sync span guard *) - let with_ ?force_new_trace_id ?trace_state ?service_name ?attrs ?kind - ?trace_id ?parent ?scope ?links name (cb : Scope.t -> 'a Lwt.t) : 'a Lwt.t - = + let with_ (self : t) ?force_new_trace_id ?trace_state ?attrs ?kind ?trace_id + ?parent ?links name (cb : Span.t -> 'a Lwt.t) : 'a Lwt.t = let thunk, finally = - with_' ?force_new_trace_id ?trace_state ?service_name ?attrs ?kind - ?trace_id ?parent ?scope ?links name cb + with_thunk_and_finally self ?force_new_trace_id ?trace_state ?attrs ?kind + ?trace_id ?parent ?links name cb in try%lwt @@ -37,11 +40,14 @@ module Trace = struct reraise e end +module Trace = Tracer [@@deprecated "use Tracer"] + module Metrics = struct include Metrics end module Logs = struct include Proto.Logs - include Logs + include Log_record + include Logger end diff --git a/src/trace/common_.ml b/src/trace/common_.ml new file mode 100644 index 00000000..8c12f62a --- /dev/null +++ b/src/trace/common_.ml @@ -0,0 +1,6 @@ +module OTEL = Opentelemetry +module Otrace = Trace_core (* ocaml-trace *) + +let ( let@ ) = ( @@ ) + +let spf = Printf.sprintf diff --git a/src/trace/conv.ml b/src/trace/conv.ml new file mode 100644 index 00000000..aa54bd79 --- /dev/null +++ b/src/trace/conv.ml @@ -0,0 +1,40 @@ +open Common_ + +let[@inline] trace_id_of_otel (id : OTEL.Trace_id.t) : Otrace.trace_id = + if id == OTEL.Trace_id.dummy then + Otrace.Collector.dummy_trace_id + else + Bytes.unsafe_to_string (OTEL.Trace_id.to_bytes id) + +let[@inline] trace_id_to_otel (id : Otrace.trace_id) : OTEL.Trace_id.t = + if id == Otrace.Collector.dummy_trace_id then + OTEL.Trace_id.dummy + else + OTEL.Trace_id.of_bytes @@ Bytes.unsafe_of_string id + +let[@inline] span_id_of_otel (id : OTEL.Span_id.t) : Otrace.span = + if id == OTEL.Span_id.dummy then + Otrace.Collector.dummy_span + else + Bytes.get_int64_le (OTEL.Span_id.to_bytes id) 0 + +let[@inline] span_id_to_otel (id : Otrace.span) : OTEL.Span_id.t = + if id == Otrace.Collector.dummy_span then + OTEL.Span_id.dummy + else ( + let b = Bytes.create 8 in + Bytes.set_int64_le b 0 id; + OTEL.Span_id.of_bytes b + ) + +let[@inline] ctx_to_otel (self : Otrace.explicit_span_ctx) : OTEL.Span_ctx.t = + OTEL.Span_ctx.make + ~trace_id:(trace_id_to_otel self.trace_id) + ~parent_id:(span_id_to_otel self.span) + () + +let[@inline] ctx_of_otel (ctx : OTEL.Span_ctx.t) : Otrace.explicit_span_ctx = + { + trace_id = trace_id_of_otel (OTEL.Span_ctx.trace_id ctx); + span = span_id_of_otel (OTEL.Span_ctx.parent_id ctx); + } diff --git a/src/trace/dune b/src/trace/dune index 9606e2a5..da752802 100644 --- a/src/trace/dune +++ b/src/trace/dune @@ -2,5 +2,12 @@ (name opentelemetry_trace) (public_name opentelemetry.trace) (synopsis "Use opentelemetry as a collector for trace") - (optional) - (libraries opentelemetry.ambient-context trace.core opentelemetry)) + (optional) ; trace + (flags :standard -open Opentelemetry_util -open Opentelemetry_atomic) + (libraries + opentelemetry.ambient-context + opentelemetry.util + opentelemetry.atomic + opentelemetry + trace.core + trace.subscriber)) diff --git a/src/trace/opentelemetry_trace.ml b/src/trace/opentelemetry_trace.ml index ead41826..31f87c5c 100644 --- a/src/trace/opentelemetry_trace.ml +++ b/src/trace/opentelemetry_trace.ml @@ -1,384 +1,294 @@ -module Otel = Opentelemetry -module Otrace = Trace_core (* ocaml-trace *) -module TLS = Thread_local_storage - -open struct - let spf = Printf.sprintf -end - -module Conv = struct - let[@inline] trace_id_of_otel (id : Otel.Trace_id.t) : Otrace.trace_id = - if id == Otel.Trace_id.dummy then - Otrace.Collector.dummy_trace_id - else - Bytes.unsafe_to_string (Otel.Trace_id.to_bytes id) - - let[@inline] trace_id_to_otel (id : Otrace.trace_id) : Otel.Trace_id.t = - if id == Otrace.Collector.dummy_trace_id then - Otel.Trace_id.dummy - else - Otel.Trace_id.of_bytes @@ Bytes.unsafe_of_string id - - let[@inline] span_id_of_otel (id : Otel.Span_id.t) : Otrace.span = - if id == Otel.Span_id.dummy then - Otrace.Collector.dummy_span - else - Bytes.get_int64_le (Otel.Span_id.to_bytes id) 0 - - let[@inline] span_id_to_otel (id : Otrace.span) : Otel.Span_id.t = - if id == Otrace.Collector.dummy_span then - Otel.Span_id.dummy - else ( - let b = Bytes.create 8 in - Bytes.set_int64_le b 0 id; - Otel.Span_id.of_bytes b - ) - - let[@inline] ctx_to_otel (self : Otrace.explicit_span_ctx) : Otel.Span_ctx.t = - Otel.Span_ctx.make - ~trace_id:(trace_id_to_otel self.trace_id) - ~parent_id:(span_id_to_otel self.span) - () - - let[@inline] ctx_of_otel (ctx : Otel.Span_ctx.t) : Otrace.explicit_span_ctx = - { - trace_id = trace_id_of_otel (Otel.Span_ctx.trace_id ctx); - span = span_id_of_otel (Otel.Span_ctx.parent_id ctx); - } -end - +open Common_ +module Conv = Conv open Conv -module Well_known = struct - let spankind_key = "otrace.spankind" - - let internal = `String "INTERNAL" - - let server = `String "SERVER" - - let client = `String "CLIENT" - - let producer = `String "PRODUCER" - - let consumer = `String "CONSUMER" - - let spankind_of_string = - let open Otel.Span in - function - | "INTERNAL" -> Span_kind_internal - | "SERVER" -> Span_kind_server - | "CLIENT" -> Span_kind_client - | "PRODUCER" -> Span_kind_producer - | "CONSUMER" -> Span_kind_consumer - | _ -> Span_kind_unspecified - - let otel_attrs_of_otrace_data data = - let kind : Otel.Span.kind ref = ref Otel.Span.Span_kind_unspecified in - let data = - List.filter_map - (function - | name, `String v when name = "otrace.spankind" -> - kind := spankind_of_string v; - None - | x -> Some x) - data - in - !kind, data - - (** Key to store an error [Otel.Span.status] with the message. Set - ["otrace.error" = "mymsg"] in a span data to set the span's status to - [{message="mymsg"; code=Error}]. *) - let status_error_key = "otrace.error" -end - -open Well_known - let on_internal_error = ref (fun msg -> Printf.eprintf "error in Opentelemetry_trace: %s\n%!" msg) -type Otrace.extension_event += - | Ev_link_span of Otrace.explicit_span * Otrace.explicit_span - | Ev_set_span_kind of Otrace.explicit_span * Otel.Span_kind.t - | Ev_record_exn of Otrace.explicit_span * exn * Printexc.raw_backtrace +module Extensions = struct + type Otrace.extension_event += + | Ev_link_span of Otrace.explicit_span * OTEL.Span_ctx.t + | Ev_record_exn of { + sp: Otrace.explicit_span; + exn: exn; + bt: Printexc.raw_backtrace; + } + | Ev_set_span_kind of Otrace.explicit_span * OTEL.Span_kind.t +end + +open Extensions + +(* use the fast, thread safe span table that relies on picos. *) +module Span_tbl = Trace_subscriber.Span_tbl module Internal = struct - type span_begin = { - start_time: int64; - name: string; - __FILE__: string; - __LINE__: int; - __FUNCTION__: string option; - scope: Otel.Scope.t; - parent: Otel.Span_ctx.t option; - } + type span_begin = { span: OTEL.Span.t } [@@unboxed] - module Active_span_tbl = Hashtbl.Make (struct - include Int64 + module Active_span_tbl = Span_tbl - let hash : t -> int = Hashtbl.hash - end) + type state = { tbl: span_begin Active_span_tbl.t } [@@unboxed] - (** key to access a OTEL scope from an explicit span *) - let k_explicit_scope : Otel.Scope.t Otrace.Meta_map.key = + let create_state () : state = { tbl = Active_span_tbl.create () } + + (** key to access a OTEL span (the current span) from a Trace_core + explicit_span *) + let k_explicit_span : OTEL.Span.t Otrace.Meta_map.key = Otrace.Meta_map.Key.create () - (** Per-thread set of active spans. *) - module Active_spans = struct - type t = { tbl: span_begin Active_span_tbl.t } [@@unboxed] - - let create () : t = { tbl = Active_span_tbl.create 32 } - - let k_tls : t TLS.t = TLS.create () - - let[@inline] get () : t = - try TLS.get_exn k_tls - with TLS.Not_set -> - let self = create () in - TLS.set k_tls self; - self - end - - let otrace_of_otel (id : Otel.Span_id.t) : int64 = - let bs = Otel.Span_id.to_bytes id in + let otrace_of_otel (id : OTEL.Span_id.t) : int64 = + let bs = OTEL.Span_id.to_bytes id in (* lucky that it coincides! *) assert (Bytes.length bs = 8); Bytes.get_int64_le bs 0 - let enter_span' ?(explicit_parent : Otrace.explicit_span_ctx option) - ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name = - let open Otel in + let enter_span' (self : state) + ?(explicit_parent : Otrace.explicit_span_ctx option) ~__FUNCTION__ + ~__FILE__ ~__LINE__ ~data name = + let open OTEL in let otel_id = Span_id.create () in let otrace_id = otrace_of_otel otel_id in - let parent_scope = Scope.get_ambient_scope () in - let trace_id = - match parent_scope with - | Some sc -> sc.trace_id - | None -> Trace_id.create () - in - let parent = - match explicit_parent, parent_scope with + let implicit_parent = OTEL.Ambient_span.get () in + + let trace_id, parent_id = + match explicit_parent, implicit_parent with | Some p, _ -> - Some - (Otel.Span_ctx.make ~trace_id ~parent_id:(span_id_to_otel p.span) ()) - | None, Some parent -> Some (Otel.Scope.to_span_ctx parent) - | None, None -> None - in - - let new_scope = Otel.Scope.make ~trace_id ~span_id:otel_id ~attrs:data () in - - let start_time = Timestamp_ns.now_unix_ns () in - let sb = - { - start_time; - name; - __FILE__; - __LINE__; - __FUNCTION__; - scope = new_scope; - parent; - } - in - - let active_spans = Active_spans.get () in - Active_span_tbl.add active_spans.tbl otrace_id sb; - - otrace_id, sb - - let exit_span_ - { start_time; name; __FILE__; __LINE__; __FUNCTION__; scope; parent } = - let open Otel in - let end_time = Timestamp_ns.now_unix_ns () in - let kind, attrs = otel_attrs_of_otrace_data (Scope.attrs scope) in - - let status : Span_status.t = - match List.assoc_opt Well_known.status_error_key attrs with - | Some (`String message) -> - Span_status.make ~message ~code:Status_code_error - | _ -> Span_status.make ~message:"" ~code:Status_code_ok + let trace_id = p.trace_id |> Conv.trace_id_to_otel in + let parent_id = + try + let sb = Active_span_tbl.find_exn self.tbl p.span in + Some (OTEL.Span.id sb.span) + with Not_found -> None + in + trace_id, parent_id + | None, Some p -> Span.trace_id p, Some (Span.id p) + | None, None -> Trace_id.create (), None in let attrs = - match __FUNCTION__ with - | None -> - [ "code.filepath", `String __FILE__; "code.lineno", `Int __LINE__ ] - @ attrs - | Some __FUNCTION__ -> - let last_dot = String.rindex __FUNCTION__ '.' in - let module_path = String.sub __FUNCTION__ 0 last_dot in - let function_name = - String.sub __FUNCTION__ (last_dot + 1) - (String.length __FUNCTION__ - last_dot - 1) - in + ("code.filepath", `String __FILE__) + :: ("code.lineno", `Int __LINE__) + :: data + in + + let start_time = Timestamp_ns.now_unix_ns () in + let span : OTEL.Span.t = + OTEL.Span.make ?parent:parent_id ~trace_id ~id:otel_id ~attrs name + ~start_time ~end_time:start_time + in + + let sb = { span } in + + (match __FUNCTION__ with + | Some __FUNCTION__ when OTEL.Span.is_not_dummy span -> + let last_dot = String.rindex __FUNCTION__ '.' in + let module_path = String.sub __FUNCTION__ 0 last_dot in + let function_name = + String.sub __FUNCTION__ (last_dot + 1) + (String.length __FUNCTION__ - last_dot - 1) + in + Span.add_attrs span [ - "code.filepath", `String __FILE__; - "code.lineno", `Int __LINE__; "code.function", `String function_name; "code.namespace", `String module_path; ] - @ attrs - in + | _ -> ()); - let parent_id = Option.map Otel.Span_ctx.parent_id parent in - Span.create ~kind ~trace_id:scope.trace_id ?parent:parent_id ~status - ~id:scope.span_id ~start_time ~end_time ~attrs - ~events:(Scope.events scope) name - |> fst + Active_span_tbl.add self.tbl otrace_id sb; - let exit_span' otrace_id otel_span_begin = - let active_spans = Active_spans.get () in - Active_span_tbl.remove active_spans.tbl otrace_id; + otrace_id, sb + + let exit_span_ { span } : OTEL.Span.t = + let open OTEL in + let end_time = Timestamp_ns.now_unix_ns () in + Proto.Trace.span_set_end_time_unix_nano span end_time; + span + + let exit_span' (self : state) otrace_id otel_span_begin = + Active_span_tbl.remove self.tbl otrace_id; exit_span_ otel_span_begin - let exit_span_from_id otrace_id = - let active_spans = Active_spans.get () in - match Active_span_tbl.find_opt active_spans.tbl otrace_id with - | None -> None - | Some otel_span_begin -> - Active_span_tbl.remove active_spans.tbl otrace_id; + (** Find the OTEL span corresponding to this Trace span *) + let exit_span_from_id (self : state) otrace_id = + match Active_span_tbl.find_exn self.tbl otrace_id with + | exception Not_found -> None + | otel_span_begin -> + Active_span_tbl.remove self.tbl otrace_id; Some (exit_span_ otel_span_begin) - let[@inline] get_scope (span : Otrace.explicit_span) : Otel.Scope.t option = - Otrace.Meta_map.find k_explicit_scope span.meta - - module M = struct - let with_span ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name cb = - let otrace_id, sb = - enter_span' ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name - in - - Otel.Scope.with_ambient_scope sb.scope @@ fun () -> - match cb otrace_id with - | res -> - let otel_span = exit_span' otrace_id sb in - Otel.Trace.emit [ otel_span ]; - res - | exception e -> - let bt = Printexc.get_raw_backtrace () in - - Otel.Scope.record_exception sb.scope e bt; - let otel_span = exit_span' otrace_id sb in - Otel.Trace.emit [ otel_span ]; - - Printexc.raise_with_backtrace e bt - - let enter_span ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name : - Trace_core.span = - let otrace_id, _sb = - enter_span' ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name - in - (* NOTE: we cannot enter ambient scope in a disjoint way - with the exit, because we only have [Ambient_context.with_binding], - no [set_binding] *) - otrace_id - - let exit_span otrace_id = - match exit_span_from_id otrace_id with - | None -> () - | Some otel_span -> Otel.Trace.emit [ otel_span ] - - let enter_manual_span ~(parent : Otrace.explicit_span_ctx option) ~flavor:_ - ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name : Otrace.explicit_span = - let otrace_id, sb = - match parent with - | None -> enter_span' ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name - | Some parent -> - enter_span' ~explicit_parent:parent ~__FUNCTION__ ~__FILE__ ~__LINE__ - ~data name - in - - let active_spans = Active_spans.get () in - Active_span_tbl.add active_spans.tbl otrace_id sb; - - Otrace. - { - span = otrace_id; - trace_id = trace_id_of_otel sb.scope.trace_id; - meta = Meta_map.(empty |> add k_explicit_scope sb.scope); - } - - let exit_manual_span Otrace.{ span = otrace_id; _ } = - let active_spans = Active_spans.get () in - match Active_span_tbl.find_opt active_spans.tbl otrace_id with - | None -> !on_internal_error (spf "no active span with ID %Ld" otrace_id) - | Some sb -> - let otel_span = exit_span' otrace_id sb in - Otel.Trace.emit [ otel_span ] - - let add_data_to_span otrace_id data = - let active_spans = Active_spans.get () in - match Active_span_tbl.find_opt active_spans.tbl otrace_id with - | None -> !on_internal_error (spf "no active span with ID %Ld" otrace_id) - | Some sb -> Otel.Scope.add_attrs sb.scope (fun () -> data) - - let add_data_to_manual_span (span : Otrace.explicit_span) data : unit = - match get_scope span with - | None -> - !on_internal_error (spf "manual span does not a contain an OTEL scope") - | Some scope -> Otel.Scope.add_attrs scope (fun () -> data) - - let message ?span ~data:_ msg : unit = - (* gather information from context *) - let old_scope = Otel.Scope.get_ambient_scope () in - let trace_id = Option.map (fun sc -> sc.Otel.Scope.trace_id) old_scope in - - let span_id = - match span with - | Some id -> Some (span_id_to_otel id) - | None -> Option.map (fun sc -> sc.Otel.Scope.span_id) old_scope - in - - let log = Otel.Logs.make_str ?trace_id ?span_id msg in - Otel.Logs.emit [ log ] - - let shutdown () = () - - let name_process _name = () - - let name_thread _name = () - - let counter_int ~data name cur_val : unit = - let _kind, attrs = otel_attrs_of_otrace_data data in - let m = Otel.Metrics.(gauge ~name [ int ~attrs cur_val ]) in - Otel.Metrics.emit [ m ] - - let counter_float ~data name cur_val : unit = - let _kind, attrs = otel_attrs_of_otrace_data data in - let m = Otel.Metrics.(gauge ~name [ float ~attrs cur_val ]) in - Otel.Metrics.emit [ m ] - - let extension_event = function - | Ev_link_span (sp1, sp2) -> - (match get_scope sp1, get_scope sp2 with - | Some sc1, Some sc2 -> - Otel.Scope.add_links sc1 (fun () -> [ Otel.Scope.to_span_link sc2 ]) - | _ -> !on_internal_error "could not find scope for OTEL span") - | Ev_set_span_kind (sp, k) -> - (match get_scope sp with - | None -> !on_internal_error "could not find scope for OTEL span" - | Some sc -> Otel.Scope.set_kind sc k) - | Ev_record_exn (sp, exn, bt) -> - (match get_scope sp with - | None -> !on_internal_error "could not find scope for OTEL span" - | Some sc -> Otel.Scope.record_exception sc exn bt) - | _ -> () - end + let[@inline] get_span_ (span : Otrace.explicit_span) : OTEL.Span.t option = + Otrace.Meta_map.find k_explicit_span span.meta end +module type COLLECTOR_ARG = sig + val exporter : OTEL.Exporter.t +end + +module Make_collector (A : COLLECTOR_ARG) = struct + open Internal + + let exporter = A.exporter + + let state = create_state () + + let with_span ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name cb = + let otrace_id, sb = + enter_span' state ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name + in + + match + let@ () = OTEL.Ambient_span.with_ambient sb.span in + cb otrace_id + with + | res -> + let otel_span = exit_span' state otrace_id sb in + OTEL.Exporter.send_trace exporter [ otel_span ]; + res + | exception e -> + let bt = Printexc.get_raw_backtrace () in + + OTEL.Span.record_exception sb.span e bt; + let otel_span = exit_span' state otrace_id sb in + OTEL.Exporter.send_trace exporter [ otel_span ]; + + Printexc.raise_with_backtrace e bt + + let enter_span ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name : Trace_core.span + = + let otrace_id, _sb = + enter_span' state ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name + in + (* NOTE: we cannot enter ambient scope in a disjoint way + with the exit, because we only have [Ambient_context.with_binding], + no [set_binding] *) + otrace_id + + let exit_span otrace_id = + match exit_span_from_id state otrace_id with + | None -> () + | Some otel_span -> OTEL.Exporter.send_trace exporter [ otel_span ] + + let enter_manual_span ~(parent : Otrace.explicit_span_ctx option) ~flavor:_ + ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name : Otrace.explicit_span = + let otrace_id, sb = + match parent with + | None -> enter_span' state ~__FUNCTION__ ~__FILE__ ~__LINE__ ~data name + | Some parent -> + enter_span' state ~explicit_parent:parent ~__FUNCTION__ ~__FILE__ + ~__LINE__ ~data name + in + + Active_span_tbl.add state.tbl otrace_id sb; + + { + Otrace.span = otrace_id; + trace_id = trace_id_of_otel (OTEL.Span.trace_id sb.span); + meta = Otrace.Meta_map.(empty |> add k_explicit_span sb.span); + } + + let exit_manual_span { Otrace.span = otrace_id; _ } = + match Active_span_tbl.find_exn state.tbl otrace_id with + | exception Not_found -> + !on_internal_error (spf "no active span with ID %Ld" otrace_id) + | sb -> + let otel_span = exit_span' state otrace_id sb in + OTEL.Exporter.send_trace exporter [ otel_span ] + + let add_data_to_span otrace_id data = + match Active_span_tbl.find_exn state.tbl otrace_id with + | exception Not_found -> + !on_internal_error (spf "no active span with ID %Ld" otrace_id) + | sb -> OTEL.Span.add_attrs sb.span data + + let add_data_to_manual_span (span : Otrace.explicit_span) data : unit = + match get_span_ span with + | None -> + !on_internal_error (spf "manual span does not a contain an OTEL scope") + | Some span -> OTEL.Span.add_attrs span data + + let message ?span ~data:_ msg : unit = + (* gather information from context *) + let old_span = OTEL.Ambient_span.get () in + let trace_id = Option.map OTEL.Span.trace_id old_span in + + let span_id = + match span with + | Some id -> Some (span_id_to_otel id) + | None -> Option.map OTEL.Span.id old_span + in + + let log = OTEL.Log_record.make_str ?trace_id ?span_id msg in + OTEL.Exporter.send_logs exporter [ log ] + + let shutdown () = () + + let name_process _name = () + + let name_thread _name = () + + let counter_int ~data:attrs name cur_val : unit = + let m = OTEL.Metrics.(gauge ~name [ int ~attrs cur_val ]) in + OTEL.Exporter.send_metrics exporter [ m ] + + let counter_float ~data:attrs name cur_val : unit = + let m = OTEL.Metrics.(gauge ~name [ float ~attrs cur_val ]) in + OTEL.Exporter.send_metrics exporter [ m ] + + let extension_event = function + | Ev_link_span (sp1, sc2) -> + (match get_span_ sp1 with + | Some sc1 -> OTEL.Span.add_links sc1 [ OTEL.Span_link.of_span_ctx sc2 ] + | _ -> !on_internal_error "could not find scope for OTEL span") + | Ev_set_span_kind (sp, k) -> + (match get_span_ sp with + | None -> !on_internal_error "could not find scope for OTEL span" + | Some sc -> OTEL.Span.set_kind sc k) + | Ev_record_exn { sp; exn; bt } -> + (match get_span_ sp with + | None -> !on_internal_error "could not find scope for OTEL span" + | Some sc -> OTEL.Span.record_exception sc exn bt) + | _ -> () +end + +let collector_of_exporter (exp : OTEL.Exporter.t) : Trace_core.collector = + let module M = Make_collector (struct + let exporter = exp + end) in + (module M : Trace_core.Collector.S) + +let link_span_to_otel_ctx (sp1 : Otrace.explicit_span) (sp2 : OTEL.Span_ctx.t) : + unit = + if Otrace.enabled () then Otrace.extension_event @@ Ev_link_span (sp1, sp2) + +(* let link_spans (sp1 : Otrace.explicit_span) (sp2 : Otrace.explicit_span) : unit = if Otrace.enabled () then Otrace.extension_event @@ Ev_link_span (sp1, sp2) + *) let set_span_kind sp k : unit = if Otrace.enabled () then Otrace.extension_event @@ Ev_set_span_kind (sp, k) let record_exception sp exn bt : unit = - if Otrace.enabled () then Otrace.extension_event @@ Ev_record_exn (sp, exn, bt) + if Otrace.enabled () then + Otrace.extension_event @@ Ev_record_exn { sp; exn; bt } -let collector () : Otrace.collector = (module Internal.M) +(** Collector that forwards to the {b currently installed} OTEL exporter. *) +let collector_main_otel_exporter () : Otrace.collector = + collector_of_exporter OTEL.Main_exporter.dynamic_forward_to_main_exporter -let setup () = Otrace.setup_collector @@ collector () +let (collector + [@deprecated "use collector_of_exporter or collector_main_otel_exporter"]) + = + collector_main_otel_exporter -let setup_with_otel_backend b : unit = - Otel.Collector.set_backend b; - setup () +let setup () = Otrace.setup_collector @@ collector_main_otel_exporter () + +let setup_with_otel_exporter exp : unit = + let coll = collector_of_exporter exp in + OTEL.Main_exporter.set exp; + Otrace.setup_collector coll + +let setup_with_otel_backend = setup_with_otel_exporter + +module Well_known = struct end diff --git a/src/trace/opentelemetry_trace.mli b/src/trace/opentelemetry_trace.mli index 060f4f13..f4045f79 100644 --- a/src/trace/opentelemetry_trace.mli +++ b/src/trace/opentelemetry_trace.mli @@ -1,21 +1,3 @@ -module Otel := Opentelemetry -module Otrace := Trace_core -module TLS := Thread_local_storage - -module Conv : sig - val trace_id_of_otel : Otel.Trace_id.t -> string - - val trace_id_to_otel : string -> Otel.Trace_id.t - - val span_id_of_otel : Otel.Span_id.t -> int64 - - val span_id_to_otel : int64 -> Otel.Span_id.t - - val ctx_to_otel : Otrace.explicit_span_ctx -> Otel.Span_ctx.t - - val ctx_of_otel : Otel.Span_ctx.t -> Otrace.explicit_span_ctx -end - (** [opentelemetry.trace] implements a {!Trace_core.Collector} for {{:https://v3.ocaml.org/p/trace} ocaml-trace}. @@ -27,46 +9,86 @@ end and implicit scope (in {!Internal.M.with_span}, via {!Ambient_context}) are supported; see the detailed notes on {!Internal.M.enter_manual_span}. - {1:wellknown Well-known identifiers} - - Because [ocaml-trace]'s API is a subset of OpenTelemetry functionality, this - interface allows for a few 'well-known' identifiers to be used in - [Trace]-instrumented libraries that wish to further support OpenTelemetry - usage. - - (These strings will not change in subsequent versions of this library, so - you do not need to depend on [opentelemetry.trace] to use them.) - - - If a key of exactly ["otrace.spankind"] is included in the - {!Trace_core.user_data} passed to [with_span] et al., it will be used as - the {!Opentelemetry.Span.kind} of the emitted span. (See - {!Internal.spankind_of_string} for the list of supported values.) + We use [Trace_core.extension_event] to add more features on top of the + common tracing interface. For example to set the "span kind": {[ - ocaml - let describe () = [ Opentelemetry_trace.(spankind_key, client) ] in - Trace_core.with_span ~__FILE__ ~__LINE__ ~data:describe "my-span" - @@ fun _ -> - (* ... *) + let@ span = Trace_core.with_span ~__FILE__ ~__LINE__ "my-span" in + Opentelemetry_trace.set_span_kind span Span_kind_client + (* ... *) ]} *) +module OTEL := Opentelemetry_core +module Otrace := Trace_core + +(** Conversions between [Opentelemetry] and [Trace_core] types *) +module Conv : sig + val trace_id_of_otel : OTEL.Trace_id.t -> string + + val trace_id_to_otel : string -> OTEL.Trace_id.t + + val span_id_of_otel : OTEL.Span_id.t -> int64 + + val span_id_to_otel : int64 -> OTEL.Span_id.t + + val ctx_to_otel : Otrace.explicit_span_ctx -> OTEL.Span_ctx.t + + val ctx_of_otel : OTEL.Span_ctx.t -> Otrace.explicit_span_ctx +end + +(** The extension events for {!Trace_core}. *) +module Extensions : sig + type Otrace.extension_event += + | Ev_link_span of Otrace.explicit_span * OTEL.Span_ctx.t + (** Link the given span to the given context. The context isn't the + parent, but the link can be used to correlate both spans. *) + | Ev_record_exn of { + sp: Otrace.explicit_span; + exn: exn; + bt: Printexc.raw_backtrace; + } + (** Record exception and potentially turn span to an error *) + | Ev_set_span_kind of Otrace.explicit_span * OTEL.Span_kind.t +end + val on_internal_error : (string -> unit) ref (** Callback to print errors in the library itself (ie bugs) *) val setup : unit -> unit (** Install the OTEL backend as a Trace collector *) -val setup_with_otel_backend : Opentelemetry.Collector.backend -> unit -(** Same as {!setup}, but also install the given backend as OTEL backend *) +val setup_with_otel_exporter : OTEL.Exporter.t -> unit +(** Same as {!setup}, but using the given exporter *) + +val setup_with_otel_backend : OTEL.Exporter.t -> unit +[@@deprecated "use setup_with_otel_exporter"] + +(* TODO: subscriber, with the next gen of Trace_subscriber + that allows us to provide [new_trace_id] so we can produce 16B trace IDs. +val subscriber_of_exporter : OTEL.Exporter.t -> Trace_subscriber.t +*) + +val collector_of_exporter : OTEL.Exporter.t -> Trace_core.collector val collector : unit -> Trace_core.collector +[@@deprecated "use collector_of_exporter, avoid global state"] (** Make a Trace collector that uses the OTEL backend to send spans and logs *) +(* NOTE: we cannot be sure that [sc2] is still alive and findable + in the active spans table. We could provide this operation under + the explicit precondition that it is? + val link_spans : Otrace.explicit_span -> Otrace.explicit_span -> unit (** [link_spans sp1 sp2] modifies [sp1] by adding a span link to [sp2]. @since 0.11 *) +*) -val set_span_kind : Otrace.explicit_span -> Otel.Span.kind -> unit +val link_span_to_otel_ctx : Otrace.explicit_span -> OTEL.Span_ctx.t -> unit +(** [link_spans sp1 sp_ctx2] modifies [sp1] by adding a span link to [sp_ctx2]. + It must be the case that [sp1] is a currently active span. + @since NEXT_RELEASE *) + +val set_span_kind : Otrace.explicit_span -> OTEL.Span.kind -> unit (** [set_span_kind sp k] sets the span's kind. @since 0.11 *) @@ -75,153 +97,6 @@ val record_exception : (** Record exception in the current span. @since 0.11 *) -(** Static references for well-known identifiers; see {!label-wellknown}. *) -module Well_known : sig - val spankind_key : string - - val internal : Otrace.user_data - - val server : Otrace.user_data - - val client : Otrace.user_data - - val producer : Otrace.user_data - - val consumer : Otrace.user_data - - val spankind_of_string : string -> Otel.Span.kind - - val otel_attrs_of_otrace_data : - (string * Otrace.user_data) list -> - Otel.Span.kind * Otel.Span.key_value list -end -[@@deprecated "use the regular functions for this"] - -(**/**) - -(** Internal implementation details; do not consider these stable. *) -module Internal : sig - module M : sig - val with_span : - __FUNCTION__:string option -> - __FILE__:string -> - __LINE__:int -> - data:(string * Otrace.user_data) list -> - string (* span name *) -> - (Otrace.span -> 'a) -> - 'a - (** Implements {!Trace_core.Collector.S.with_span}, with the OpenTelemetry - collector as the backend. Invoked via {!Trace_core.with_span}. - - Notably, this has the same implicit-scope semantics as - {!Opentelemetry.Trace.with_}, and requires configuration of - {!Ambient_context}. - - @see - ambient-context docs *) - - val enter_manual_span : - parent:Otrace.explicit_span_ctx option -> - flavor:'a -> - __FUNCTION__:string option -> - __FILE__:string -> - __LINE__:int -> - data:(string * Otrace.user_data) list -> - string (* span name *) -> - Otrace.explicit_span - (** Implements {!Trace_core.Collector.S.enter_manual_span}, with the - OpenTelemetry collector as the backend. Invoked at - {!Trace_core.enter_manual_toplevel_span} and - {!Trace_core.enter_manual_sub_span}; requires an eventual call to - {!Trace_core.exit_manual_span}. - - These 'manual span' functions {e do not} implement the same implicit- - scope semantics of {!with_span}; and thus don't need to wrap a single - stack-frame / callback; you can freely enter a span at any point, store - the returned {!Trace_core.explicit_span}, and exit it at any later point - with {!Trace_core.exit_manual_span}. - - However, for that same reason, they also cannot update the - {!Ambient_context} — that is, when you invoke the various [manual] - functions, if you then invoke other functions that use - {!Trace_core.with_span}, those callees {e will not} see the span you - entered manually as their [parent]. - - Generally, the best practice is to only use these [manual] functions at - the 'leaves' of your callstack: that is, don't invoke user callbacks - from within them; or if you do, make sure to pass the [explicit_span] - you recieve from this function onwards to the user callback, so they can - create further child-spans. *) - - val exit_manual_span : Otrace.explicit_span -> unit - (** Implements {!Trace_core.Collector.S.exit_manual_span}, with the - OpenTelemetry collector as the backend. Invoked at - {!Trace_core.exit_manual_span}. Expects the [explicit_span] returned - from an earlier call to {!Trace_core.enter_manual_toplevel_span} or - {!Trace_core.enter_manual_sub_span}. - - (See the notes at {!enter_manual_span} about {!Ambient_context}.) *) - - val add_data_to_span : - Otrace.span -> (string * Otrace.user_data) list -> unit - - val add_data_to_manual_span : - Otrace.explicit_span -> (string * Otrace.user_data) list -> unit - - val message : - ?span:Otrace.span -> - data:(string * Otrace.user_data) list -> - string -> - unit - - val shutdown : unit -> unit - - val name_process : string -> unit - - val name_thread : string -> unit - - val counter_int : - data:(string * Otrace.user_data) list -> string -> int -> unit - - val counter_float : - data:(string * Otrace.user_data) list -> string -> float -> unit - end - - type span_begin = { - start_time: int64; - name: string; - __FILE__: string; - __LINE__: int; - __FUNCTION__: string option; - scope: Otel.Scope.t; - parent: Otel.Span_ctx.t option; - } - - module Active_span_tbl : Hashtbl.S with type key = Otrace.span - - (** Table indexed by ocaml-trace spans. *) - module Active_spans : sig - type t = private { tbl: span_begin Active_span_tbl.t } [@@unboxed] - - val create : unit -> t - - val k_tls : t TLS.t - - val get : unit -> t - end - - val otrace_of_otel : Otel.Span_id.t -> Otrace.span - - val enter_span' : - ?explicit_parent:Otrace.explicit_span_ctx -> - __FUNCTION__:string option -> - __FILE__:string -> - __LINE__:int -> - data:(string * Otrace.user_data) list -> - string -> - Otrace.span * span_begin - - val exit_span' : Otrace.span -> span_begin -> Otel.Span.t -end - -(**/**) +module Well_known : sig end +[@@deprecated + "use the regular functions such as `link_spans` or `set_span_kind` for this"] diff --git a/src/core/AList.ml b/src/util/alist.ml similarity index 100% rename from src/core/AList.ml rename to src/util/alist.ml diff --git a/src/core/AList.mli b/src/util/alist.mli similarity index 100% rename from src/core/AList.mli rename to src/util/alist.mli diff --git a/src/util/cb_set.ml b/src/util/cb_set.ml new file mode 100644 index 00000000..78190855 --- /dev/null +++ b/src/util/cb_set.ml @@ -0,0 +1,9 @@ +type cb = unit -> unit + +type t = { cbs: cb Alist.t } [@@unboxed] + +let create () : t = { cbs = Alist.make () } + +let[@inline] register self f = Alist.add self.cbs f + +let[@inline] trigger self = List.iter (fun f -> f ()) (Alist.get self.cbs) diff --git a/src/util/cb_set.mli b/src/util/cb_set.mli new file mode 100644 index 00000000..3a6e2a0c --- /dev/null +++ b/src/util/cb_set.mli @@ -0,0 +1,9 @@ +(** A collection of callbacks. thread-safe. *) + +type t + +val create : unit -> t + +val register : t -> (unit -> unit) -> unit + +val trigger : t -> unit diff --git a/src/util/dune b/src/util/dune new file mode 100644 index 00000000..1348fe68 --- /dev/null +++ b/src/util/dune @@ -0,0 +1,6 @@ +(library + (name opentelemetry_util) + (public_name opentelemetry.util) + (flags :standard -open Opentelemetry_atomic) + (libraries opentelemetry.atomic mtime mtime.clock.os) + (synopsis "Utilities for opentelemetry")) diff --git a/src/util/util_bytes_.ml b/src/util/util_bytes_.ml new file mode 100644 index 00000000..91e65a52 --- /dev/null +++ b/src/util/util_bytes_.ml @@ -0,0 +1,49 @@ +open struct + let spf = Printf.sprintf +end + +let int_to_hex (i : int) = + if i < 10 then + Char.chr (i + Char.code '0') + else + Char.chr (i - 10 + Char.code 'a') + +let bytes_to_hex_into b res off : unit = + for i = 0 to Bytes.length b - 1 do + let n = Char.code (Bytes.get b i) in + Bytes.set res ((2 * i) + off) (int_to_hex ((n land 0xf0) lsr 4)); + Bytes.set res ((2 * i) + 1 + off) (int_to_hex (n land 0x0f)) + done + +let bytes_to_hex (b : bytes) : string = + let res = Bytes.create (2 * Bytes.length b) in + bytes_to_hex_into b res 0; + Bytes.unsafe_to_string res + +let int_of_hex = function + | '0' .. '9' as c -> Char.code c - Char.code '0' + | 'a' .. 'f' as c -> 10 + Char.code c - Char.code 'a' + | c -> raise (Invalid_argument (spf "invalid hex char: %C" c)) + +let bytes_of_hex_substring (s : string) off len = + if len mod 2 <> 0 then + raise (Invalid_argument "hex sequence must be of even length"); + let res = Bytes.make (len / 2) '\x00' in + for i = 0 to (len / 2) - 1 do + let n1 = int_of_hex (String.get s (off + (2 * i))) in + let n2 = int_of_hex (String.get s (off + (2 * i) + 1)) in + let n = (n1 lsl 4) lor n2 in + Bytes.set res i (Char.chr n) + done; + res + +let bytes_of_hex (s : string) : bytes = + bytes_of_hex_substring s 0 (String.length s) + +let bytes_non_zero (self : bytes) : bool = + try + for i = 0 to Bytes.length self - 1 do + if Char.code (Bytes.unsafe_get self i) <> 0 then raise_notrace Exit + done; + false + with Exit -> true diff --git a/src/core/util_mutex.ml b/src/util/util_mutex.ml similarity index 100% rename from src/core/util_mutex.ml rename to src/util/util_mutex.ml diff --git a/src/core/util_mutex.mli b/src/util/util_mutex.mli similarity index 100% rename from src/core/util_mutex.mli rename to src/util/util_mutex.mli diff --git a/tests/bin/cohttp_client.ml b/tests/bin/cohttp_client.ml index a4523847..f1314cf9 100644 --- a/tests/bin/cohttp_client.ml +++ b/tests/bin/cohttp_client.ml @@ -1,4 +1,4 @@ -module T = Opentelemetry +module OT = Opentelemetry module Otel_lwt = Opentelemetry_lwt let spf = Printf.sprintf @@ -10,19 +10,20 @@ let sleep_inner = ref 0.1 let sleep_outer = ref 2.0 let mk_client ~scope = - Opentelemetry_cohttp_lwt.client ~scope (module Cohttp_lwt_unix.Client) + Opentelemetry_cohttp_lwt.client ~span:scope (module Cohttp_lwt_unix.Client) let run () = let open Lwt.Syntax in + let tracer = OT.Tracer.get_main () in let rec go () = let@ scope = - Otel_lwt.Trace.with_ ~kind:T.Span.Span_kind_producer "loop.outer" + Otel_lwt.Tracer.with_ tracer ~kind:OT.Span.Span_kind_producer "loop.outer" in let* () = Lwt_unix.sleep !sleep_outer in let module C = (val mk_client ~scope) in (* Using the same default server O *) let* _res, body = - C.get (Uri.of_string Opentelemetry_client.Config.default_url) + C.get (Uri.of_string Opentelemetry_client.Client_config.default_url) in let* () = Cohttp_lwt.Body.drain_body body in go () @@ -31,8 +32,8 @@ let run () = let () = Sys.catch_break true; - T.Globals.service_name := "ocaml-otel-cohttp-client"; - T.Globals.service_namespace := Some "ocaml-otel.test"; + OT.Globals.service_name := "ocaml-otel-cohttp-client"; + OT.Globals.service_namespace := Some "ocaml-otel.test"; let debug = ref false in let batch_traces = ref 400 in diff --git a/tests/bin/dune b/tests/bin/dune index 475f7c04..0c211cb0 100644 --- a/tests/bin/dune +++ b/tests/bin/dune @@ -4,7 +4,7 @@ (libraries unix opentelemetry - opentelemetry.client + opentelemetry-client opentelemetry-client-ocurl)) (executable @@ -16,7 +16,7 @@ unix opentelemetry opentelemetry-lwt - opentelemetry.client + opentelemetry-client opentelemetry-client-cohttp-lwt lwt.unix)) @@ -32,7 +32,7 @@ logs.fmt logs.threaded opentelemetry - opentelemetry.client + opentelemetry-client opentelemetry-client-cohttp-eio)) (executable diff --git a/tests/bin/emit1.ml b/tests/bin/emit1.ml index fdcdbc06..edaa63f8 100644 --- a/tests/bin/emit1.ml +++ b/tests/bin/emit1.ml @@ -1,4 +1,4 @@ -module T = Opentelemetry +module OT = Opentelemetry module Atomic = Opentelemetry_atomic.Atomic let spf = Printf.sprintf @@ -23,13 +23,14 @@ let num_tr = Atomic.make 0 let run_job () = let@ () = Fun.protect ~finally:(fun () -> Atomic.set stop true) in + let tracer = OT.Tracer.get_main () in let i = ref 0 in let cnt = ref 0 in while (not @@ Atomic.get stop) && !cnt < !n do let@ _scope = Atomic.incr num_tr; - T.Trace.with_ ~kind:T.Span.Span_kind_producer "loop.outer" + OT.Tracer.with_ tracer ~kind:OT.Span.Span_kind_producer "loop.outer" ~attrs:[ "i", `Int !i ] in @@ -40,7 +41,7 @@ let run_job () = (* parent scope is found via thread local storage *) let@ scope = Atomic.incr num_tr; - T.Trace.with_ ~kind:T.Span.Span_kind_internal + OT.Tracer.with_ tracer ~kind:OT.Span.Span_kind_internal ~attrs:[ "j", `Int j ] "loop.inner" in @@ -48,18 +49,22 @@ let run_job () = Unix.sleepf !sleep_outer; Atomic.incr num_sleep; - T.Logs.( - emit - [ - make_strf ~trace_id:scope.trace_id ~span_id:scope.span_id - ~severity:Severity_number_info "inner at %d" j; - ]); + let logger = OT.Logger.get_main () in + OT.Emitter.emit logger + [ + OT.Log_record.make_strf ~trace_id:(OT.Span.trace_id scope) + ~span_id:(OT.Span.id scope) ~severity:Severity_number_info + "inner at %d" j; + ]; incr i; try Atomic.incr num_tr; - let@ _ = T.Trace.with_ ~kind:T.Span.Span_kind_internal ~scope "alloc" in + let@ _ = + OT.Tracer.with_ tracer ~kind:OT.Span.Span_kind_internal ~parent:scope + "alloc" + in (* allocate some stuff *) if !stress_alloc_ then ( let _arr = Sys.opaque_identity @@ Array.make (25 * 25551) 42.0 in @@ -72,23 +77,23 @@ let run_job () = if j = 4 && !i mod 13 = 0 then failwith "oh no"; (* simulate a failure *) - Opentelemetry.Scope.add_event scope (fun () -> - T.Event.make "done with alloc") + OT.Span.add_event scope (OT.Event.make "done with alloc") with Failure _ -> () done done let run () = - T.GC_metrics.basic_setup (); + OT.Gc_metrics.setup_on_main_exporter (); - T.Metrics_callbacks.register (fun () -> - T.Metrics. - [ - sum ~name:"num-sleep" ~is_monotonic:true - [ int (Atomic.get num_sleep) ]; - sum ~name:"otel.bytes-sent" ~is_monotonic:true ~unit_:"B" - [ int (Opentelemetry_client_ocurl.n_bytes_sent ()) ]; - ]); + OT.Metrics_callbacks.with_set_added_to_main_exporter (fun set -> + OT.Metrics_callbacks.add_metrics_cb set (fun () -> + OT.Metrics. + [ + sum ~name:"num-sleep" ~is_monotonic:true + [ int (Atomic.get num_sleep) ]; + sum ~name:"otel.bytes-sent" ~is_monotonic:true ~unit_:"B" + [ int (Opentelemetry_client_ocurl.n_bytes_sent ()) ]; + ])); let n_jobs = max 1 !n_jobs in Printf.printf "run %d jobs\n%!" n_jobs; @@ -101,8 +106,8 @@ let run () = Array.iter Thread.join jobs let () = - T.Globals.service_name := "t1"; - T.Globals.service_namespace := Some "ocaml-otel.test"; + OT.Globals.service_name := "t1"; + OT.Globals.service_namespace := Some "ocaml-otel.test"; let ts_start = Unix.gettimeofday () in let debug = ref false in diff --git a/tests/bin/emit1_cohttp.ml b/tests/bin/emit1_cohttp.ml index 14f657cd..bb0559ff 100644 --- a/tests/bin/emit1_cohttp.ml +++ b/tests/bin/emit1_cohttp.ml @@ -27,9 +27,10 @@ let i = ref 0 let run_job job_id : unit Lwt.t = while%lwt not @@ Atomic.get stop do + let tracer = T.Tracer.get_main () in let@ scope = Atomic.incr num_tr; - T.Trace.with_ ~kind:T.Span.Span_kind_producer "loop.outer" + T.Tracer.with_ tracer ~kind:T.Span.Span_kind_producer "loop.outer" ~attrs:[ "i", `Int job_id ] in @@ -39,9 +40,9 @@ let run_job job_id : unit Lwt.t = Lwt.return @@ Atomic.set stop true else (* parent scope is found via thread local storage *) - let@ scope = + let@ span = Atomic.incr num_tr; - T.Trace.with_ ~scope ~kind:T.Span.Span_kind_internal + T.Tracer.with_ tracer ~parent:scope ~kind:T.Span.Span_kind_internal ~attrs:[ "j", `Int j ] "loop.inner" in @@ -49,19 +50,20 @@ let run_job job_id : unit Lwt.t = let* () = Lwt_unix.sleep !sleep_outer in Atomic.incr num_sleep; - T.Logs.( - emit - [ - make_strf ~trace_id:scope.trace_id ~span_id:scope.span_id - ~severity:Severity_number_info "inner at %d" j; - ]); + Opentelemetry_emitter.Emitter.emit (T.Logger.get_main ()) + [ + T.Log_record.make_strf ~trace_id:(T.Span.trace_id span) + ~span_id:(T.Span.id span) ~severity:Severity_number_info + "inner at %d" j; + ]; incr i; try%lwt Atomic.incr num_tr; let@ scope = - T.Trace.with_ ~kind:T.Span.Span_kind_internal ~scope "alloc" + T.Tracer.with_ tracer ~kind:T.Span.Span_kind_internal ~parent:span + "alloc" in (* allocate some stuff *) if !stress_alloc_ then ( @@ -75,22 +77,23 @@ let run_job job_id : unit Lwt.t = (* simulate a failure *) if j = 4 && !i mod 13 = 0 then failwith "oh no"; - Opentelemetry.Scope.add_event scope (fun () -> - T.Event.make "done with alloc"); + T.Span.add_event scope (T.Event.make "done with alloc"); Lwt.return () with Failure _ -> Lwt.return () done done let run () : unit Lwt.t = - T.GC_metrics.basic_setup (); + T.Gc_metrics.setup_on_main_exporter (); - T.Metrics_callbacks.register (fun () -> - T.Metrics. - [ - sum ~name:"num-sleep" ~is_monotonic:true - [ int (Atomic.get num_sleep) ]; - ]); + T.Metrics_callbacks.( + with_set_added_to_main_exporter (fun set -> + add_metrics_cb set (fun () -> + T.Metrics. + [ + sum ~name:"num-sleep" ~is_monotonic:true + [ int (Atomic.get num_sleep) ]; + ]))); let n_jobs = max 1 !n_jobs in Printf.printf "run %d jobs\n%!" n_jobs; diff --git a/tests/bin/emit1_eio.ml b/tests/bin/emit1_eio.ml index 9990b227..f199ac23 100644 --- a/tests/bin/emit1_eio.ml +++ b/tests/bin/emit1_eio.ml @@ -23,9 +23,10 @@ let num_tr = Atomic.make 0 let i = Atomic.make 0 let run_job clock _job_id iterations : unit = + let tracer = OT.Tracer.get_main () in let@ scope = Atomic.incr num_tr; - OT.Trace.with_ ~kind:OT.Span.Span_kind_producer "loop.outer" + OT.Tracer.with_ tracer ~kind:OT.Span.Span_kind_producer "loop.outer" ~attrs:[ "i", `Int (Atomic.get i) ] in @@ -37,7 +38,7 @@ let run_job clock _job_id iterations : unit = (* parent scope is found via thread local storage *) let@ scope = Atomic.incr num_tr; - OT.Trace.with_ ~scope ~kind:OT.Span.Span_kind_internal + OT.Tracer.with_ tracer ~parent:scope ~kind:OT.Span.Span_kind_internal ~attrs:[ "j", `Int j ] "loop.inner" in @@ -45,11 +46,13 @@ let run_job clock _job_id iterations : unit = let () = Eio.Time.sleep clock !sleep_outer in Atomic.incr num_sleep; - OT.Logs.( - emit + OT.Logger.( + let logger = OT.Logger.get_main () in + OT.Emitter.emit logger [ - make_strf ~trace_id:scope.trace_id ~span_id:scope.span_id - ~severity:Severity_number_info "inner at %d" j; + OT.Log_record.make_strf ~trace_id:(OT.Span.trace_id scope) + ~span_id:(OT.Span.id scope) ~severity:Severity_number_info + "inner at %d" j; ]); Atomic.incr i; @@ -57,7 +60,8 @@ let run_job clock _job_id iterations : unit = try Atomic.incr num_tr; let@ scope = - OT.Trace.with_ ~kind:OT.Span.Span_kind_internal ~scope "alloc" + OT.Tracer.with_ tracer ~kind:OT.Span.Span_kind_internal ~parent:scope + "alloc" in (* allocate some stuff *) if !stress_alloc_ then ( @@ -71,20 +75,21 @@ let run_job clock _job_id iterations : unit = if j = 4 && Atomic.get i mod 13 = 0 then failwith "oh no"; (* simulate a failure *) - Opentelemetry.Scope.add_event scope (fun () -> - OT.Event.make "done with alloc") + OT.Span.add_event scope (OT.Event.make "done with alloc") with Failure _ -> () done let run env proc iterations () : unit = - OT.GC_metrics.basic_setup (); + OT.Gc_metrics.setup_on_main_exporter (); - OT.Metrics_callbacks.register (fun () -> - OT.Metrics. - [ - sum ~name:"num-sleep" ~is_monotonic:true - [ int (Atomic.get num_sleep) ]; - ]); + OT.Metrics_callbacks.( + with_set_added_to_main_exporter (fun set -> + add_metrics_cb set (fun () -> + OT.Metrics. + [ + sum ~name:"num-sleep" ~is_monotonic:true + [ int (Atomic.get num_sleep) ]; + ]))); let n_jobs = max 1 !n_jobs in Printf.printf "run %d jobs in proc %d\n%!" n_jobs proc; @@ -169,4 +174,4 @@ let () = Eio.Fiber.fork ~sw @@ fun () -> Eio.Domain_manager.run dm (run env proc !n_iterations) done)); - Opentelemetry.Collector.remove_backend () ~on_done:ignore + Opentelemetry.Main_exporter.remove () ~on_done:ignore diff --git a/tests/bin/emit_logs_cohttp.ml b/tests/bin/emit_logs_cohttp.ml index acd846e3..6f0393e3 100644 --- a/tests/bin/emit_logs_cohttp.ml +++ b/tests/bin/emit_logs_cohttp.ml @@ -22,8 +22,9 @@ let varied_tag_set = |> add string_list_tag [ "foo"; "bar"; "baz" ]) let run () = + Opentelemetry.Globals.service_name := "emit_logs"; let otel_reporter = - Opentelemetry_logs.otel_reporter ~service_name:"emit_logs" + Opentelemetry_logs.otel_reporter ~attributes:[ "my_reporter_attr", `String "foo" ] () in @@ -35,7 +36,9 @@ let run () = Logs.err (fun m -> m "emit_logs: error log"); Logs.app (fun m -> m "emit_logs: app log"); let%lwt () = - T.Trace.with_ ~kind:T.Span.Span_kind_producer "my_scope" (fun _scope -> + let tracer = T.Tracer.get_main () in + T.Tracer.with_ tracer ~kind:T.Span.Span_kind_producer "my_scope" + (fun _scope -> Logs.info (fun m -> m ~tags:varied_tag_set "emit_logs: this log is emitted with varied tags from a span"); @@ -50,7 +53,8 @@ let run () = let fmt_logger = Logs_fmt.reporter ~dst:Format.err_formatter () in let combined_logger = - Opentelemetry_logs.attach_otel_reporter ~service_name:"emit_logs_fmt" + Opentelemetry_logs.attach_otel_reporter + (* FIXME ~service_name:"emit_logs_fmt" *) ~attributes:[ "my_fmt_attr", `String "bar" ] fmt_logger in diff --git a/tests/client/dune b/tests/client/dune index a8c9c961..3eb23bf1 100644 --- a/tests/client/dune +++ b/tests/client/dune @@ -1,4 +1,4 @@ (tests (names test_client_lib) (package opentelemetry) - (libraries alcotest opentelemetry.client)) + (libraries alcotest opentelemetry-client)) diff --git a/tests/client/test_client_lib.ml b/tests/client/test_client_lib.ml index c3f8a360..d3010f76 100644 --- a/tests/client/test_client_lib.ml +++ b/tests/client/test_client_lib.ml @@ -1,5 +1,5 @@ open Alcotest -module Config = Opentelemetry_client.Config +module Config = Opentelemetry_client.Client_config let test_config_printing () = let module Env = Config.Env () in @@ -7,11 +7,12 @@ let test_config_printing () = Format.asprintf "%a" Config.pp @@ Env.make (fun common () -> common) () in let expected = - {|{ debug=false; - self_trace=false; url_traces="http://localhost:4318/v1/traces"; - url_metrics="http://localhost:4318/v1/metrics"; - url_logs="http://localhost:4318/v1/logs"; headers=; batch_traces=400; - batch_metrics=20; batch_logs=400; batch_timeout_ms=2000 }|} + "{ debug=false;\n\ + \ self_trace=false; url_traces=\"http://localhost:4318/v1/traces\";\n\ + \ url_metrics=\"http://localhost:4318/v1/metrics\";\n\ + \ url_logs=\"http://localhost:4318/v1/logs\"; headers=[]; batch_traces=400;\n\ + \ batch_metrics=20; batch_logs=400; batch_timeout_ms=2000;\n\ + \ http_concurrency_level=None }" in check' string ~msg:"is rendered correctly" ~actual ~expected diff --git a/tests/client_e2e/dune b/tests/client_e2e/dune index 8952b1a7..1e6415a0 100644 --- a/tests/client_e2e/dune +++ b/tests/client_e2e/dune @@ -20,7 +20,7 @@ containers logs.fmt logs.threaded - opentelemetry.client)) + opentelemetry-client)) (library (name clients_e2e_lib) @@ -34,7 +34,7 @@ (enabled_if (>= %{ocaml_version} 5.0)) (deps %{bin:emit1_cohttp}) - (libraries clients_e2e_lib alcotest opentelemetry opentelemetry.client)) + (libraries clients_e2e_lib alcotest opentelemetry opentelemetry-client)) (tests (names test_cottp_eio_client_e2e) @@ -43,7 +43,7 @@ (deps %{bin:emit1_eio}) (enabled_if (>= %{ocaml_version} 5.0)) - (libraries clients_e2e_lib alcotest opentelemetry opentelemetry.client)) + (libraries clients_e2e_lib alcotest opentelemetry opentelemetry-client)) (executable (name signal_reporter_server) diff --git a/tests/client_e2e/signal_gatherer.ml b/tests/client_e2e/signal_gatherer.ml index 234feaf2..ffb47485 100644 --- a/tests/client_e2e/signal_gatherer.ml +++ b/tests/client_e2e/signal_gatherer.ml @@ -111,7 +111,7 @@ module Tested_program = struct end let default_port = - String.split_on_char ':' Client.Config.default_url |> function + String.split_on_char ':' Client.Client_config.default_url |> function (* Extracting the port from 'http://foo:' *) | [ _; _; port ] -> int_of_string port | _ -> failwith "unexpected format in Client.Config.default_url" diff --git a/tests/core/dune b/tests/core/dune index 77d2b02f..cbdd974f 100644 --- a/tests/core/dune +++ b/tests/core/dune @@ -1,4 +1,4 @@ (tests (names test_trace_context t_size) (package opentelemetry) - (libraries opentelemetry opentelemetry.client)) + (libraries opentelemetry opentelemetry-client)) diff --git a/tests/implicit_scope/sync/test_implicit_scope_sync.ml b/tests/implicit_scope/sync/test_implicit_scope_sync.ml index d8bf632b..b4ba1503 100644 --- a/tests/implicit_scope/sync/test_implicit_scope_sync.ml +++ b/tests/implicit_scope/sync/test_implicit_scope_sync.ml @@ -1,57 +1,43 @@ open Alcotest module Otel = Opentelemetry -let spans_emitted : Otel.Proto.Trace.resource_spans list ref = ref [] +let spans_emitted : Otel.Span.t list ref = ref [] -module Test_backend = struct - open Otel.Collector - open Otel.Proto - include Noop_backend +let test_exporter : Otel.Exporter.t = + let open Otel.Exporter in + { + (dummy ()) with + emit_spans = Opentelemetry_emitter.To_list.to_list spans_emitted; + } - let record_emitted_spans (l : Trace.resource_spans list) ~ret = - spans_emitted := l @ !spans_emitted; - ret () - - let send_trace : Trace.resource_spans list sender = - { send = record_emitted_spans } -end - -let with_test_backend f = +let with_test_exporter f = (* uncomment for eprintf debugging: *) - (* let module Debug_and_test_backend = Otel.Collector.Debug_backend (Test_backend) in - let backend = (module Debug_and_test_backend : Otel.Collector.BACKEND) in *) - let backend = (module Test_backend : Otel.Collector.BACKEND) in - Otel.Collector.with_setup_debug_backend backend () f + (* let test_exporter = Opentelemetry_client.Exporter_debug.debug test_exporter in*) + Otel.Main_exporter.with_setup_debug_backend test_exporter () f -let bytes_to_hex = Otel.Util_.bytes_to_hex +let bytes_to_hex = Opentelemetry_util.Util_bytes_.bytes_to_hex let test_stack_based_implicit_scope () = let run () = - Otel.Trace.with_ "first trace" @@ fun _scope -> + let tracer = Otel.Tracer.get_main () in + Otel.Tracer.with_ tracer "first trace" @@ fun _scope -> Thread.delay 0.2; - Otel.Trace.with_ "second trace" @@ fun _scope -> + Otel.Tracer.with_ tracer "second trace" @@ fun _scope -> Thread.delay 0.2; - Otel.Trace.with_ "third trace" @@ fun _scope -> + Otel.Tracer.with_ tracer "third trace" @@ fun _scope -> Thread.delay 0.2; () in - with_test_backend @@ fun () -> + with_test_exporter @@ fun () -> (* start *) run (); check' int ~msg:"count of spans emitted" ~actual:(List.length !spans_emitted) ~expected:3; let open Otel.Proto.Trace in - let f prev_span_id { scope_spans; _ } = - Format.printf "\n%a@\n" (Format.pp_print_list pp_scope_spans) scope_spans; - check' int ~msg:"count of scope_spans in emitted span" - ~actual:(List.length scope_spans) ~expected:1; - let { scope; spans; _ } = List.hd scope_spans in - check' bool ~msg:"scope exists in emitted span" - ~actual:(Option.is_some scope) ~expected:true; - check' int ~msg:"count of spans in scope_span" ~actual:(List.length spans) - ~expected:1; - let { name; trace_id; span_id; parent_span_id; _ } = List.hd spans in + let f prev_span_id (sp : Otel.Span.t) = + Format.printf "%a@." pp_span sp; + let { name; trace_id; span_id; parent_span_id; _ } = sp in Printf.printf "name='%s' trace_id='%s' span_id='%s' parent_span_id='%s' \ prev_span_id='%s'\n"