mirror of
https://github.com/ocaml-tracing/ocaml-opentelemetry.git
synced 2026-03-07 18:37:56 -05:00
client: use self-debug and new provider config
This commit is contained in:
parent
df643c9af6
commit
f1f379d2e1
4 changed files with 121 additions and 96 deletions
|
|
@ -11,9 +11,12 @@ let str_to_hex (s : string) : string =
|
||||||
|
|
||||||
(** Report the error on stderr. *)
|
(** Report the error on stderr. *)
|
||||||
let report_err : t -> unit = function
|
let report_err : t -> unit = function
|
||||||
| `Sysbreak -> Printf.eprintf "opentelemetry: ctrl-c captured, stopping\n%!"
|
| `Sysbreak ->
|
||||||
|
Opentelemetry.Self_debug.log Opentelemetry.Self_debug.Info (fun () ->
|
||||||
|
"opentelemetry: ctrl-c captured, stopping")
|
||||||
| `Failure msg ->
|
| `Failure msg ->
|
||||||
Format.eprintf "@[<2>opentelemetry: export failed: %s@]@." msg
|
Opentelemetry.Self_debug.log Opentelemetry.Self_debug.Error (fun () ->
|
||||||
|
Printf.sprintf "opentelemetry: export failed: %s" msg)
|
||||||
| `Status
|
| `Status
|
||||||
( code,
|
( code,
|
||||||
{
|
{
|
||||||
|
|
@ -22,17 +25,18 @@ let report_err : t -> unit = function
|
||||||
details;
|
details;
|
||||||
_presence = _;
|
_presence = _;
|
||||||
} ) ->
|
} ) ->
|
||||||
let pp_details out l =
|
Opentelemetry.Self_debug.log Opentelemetry.Self_debug.Error (fun () ->
|
||||||
List.iter
|
let pp_details out l =
|
||||||
(fun s -> Format.fprintf out "%S;@ " (Bytes.unsafe_to_string s))
|
List.iter
|
||||||
l
|
(fun s -> Format.fprintf out "%S;@ " (Bytes.unsafe_to_string s))
|
||||||
in
|
l
|
||||||
Format.eprintf
|
in
|
||||||
"@[<2>opentelemetry: export failed with@ http code=%d@ status \
|
Format.asprintf
|
||||||
{@[code=%ld;@ message=%S;@ details=[@[%a@]]@]}@]@."
|
"@[<2>opentelemetry: export failed with@ http code=%d@ status \
|
||||||
code scode
|
{@[code=%ld;@ message=%S;@ details=[@[%a@]]@]}@]"
|
||||||
(Bytes.unsafe_to_string message)
|
code scode
|
||||||
pp_details details
|
(Bytes.unsafe_to_string message)
|
||||||
|
pp_details details)
|
||||||
|
|
||||||
let decode_invalid_http_response ~code ~url (body : string) : t =
|
let decode_invalid_http_response ~code ~url (body : string) : t =
|
||||||
try
|
try
|
||||||
|
|
|
||||||
|
|
@ -2,12 +2,7 @@ type protocol =
|
||||||
| Http_protobuf
|
| Http_protobuf
|
||||||
| Http_json
|
| Http_json
|
||||||
|
|
||||||
type log_level =
|
type log_level = Opentelemetry.Self_debug.level option
|
||||||
| Log_level_none
|
|
||||||
| Log_level_error
|
|
||||||
| Log_level_warn
|
|
||||||
| Log_level_info
|
|
||||||
| Log_level_debug
|
|
||||||
|
|
||||||
type rest = unit
|
type rest = unit
|
||||||
|
|
||||||
|
|
@ -27,10 +22,9 @@ type t = {
|
||||||
timeout_traces_ms: int;
|
timeout_traces_ms: int;
|
||||||
timeout_metrics_ms: int;
|
timeout_metrics_ms: int;
|
||||||
timeout_logs_ms: int;
|
timeout_logs_ms: int;
|
||||||
batch_traces: int option;
|
traces: Opentelemetry.Provider_config.t;
|
||||||
batch_metrics: int option;
|
metrics: Opentelemetry.Provider_config.t;
|
||||||
batch_logs: int option;
|
logs: Opentelemetry.Provider_config.t;
|
||||||
batch_timeout_ms: int;
|
|
||||||
self_trace: bool;
|
self_trace: bool;
|
||||||
http_concurrency_level: int option;
|
http_concurrency_level: int option;
|
||||||
retry_max_attempts: int;
|
retry_max_attempts: int;
|
||||||
|
|
@ -56,11 +50,13 @@ open struct
|
||||||
| Http_json -> Format.fprintf out "http/json"
|
| Http_json -> Format.fprintf out "http/json"
|
||||||
|
|
||||||
let pp_log_level out = function
|
let pp_log_level out = function
|
||||||
| Log_level_none -> Format.fprintf out "none"
|
| None -> Format.fprintf out "none"
|
||||||
| Log_level_error -> Format.fprintf out "error"
|
| Some level ->
|
||||||
| Log_level_warn -> Format.fprintf out "warn"
|
Format.fprintf out "%s" (Opentelemetry.Self_debug.string_of_level level)
|
||||||
| Log_level_info -> Format.fprintf out "info"
|
|
||||||
| Log_level_debug -> Format.fprintf out "debug"
|
let pp_provider_config out (c : Opentelemetry.Provider_config.t) =
|
||||||
|
Format.fprintf out "{batch=%a;@ timeout=%a}" ppiopt c.batch Mtime.Span.pp
|
||||||
|
c.timeout
|
||||||
end
|
end
|
||||||
|
|
||||||
let pp out (self : t) : unit =
|
let pp out (self : t) : unit =
|
||||||
|
|
@ -81,10 +77,9 @@ let pp out (self : t) : unit =
|
||||||
timeout_traces_ms;
|
timeout_traces_ms;
|
||||||
timeout_metrics_ms;
|
timeout_metrics_ms;
|
||||||
timeout_logs_ms;
|
timeout_logs_ms;
|
||||||
batch_traces;
|
traces;
|
||||||
batch_metrics;
|
metrics;
|
||||||
batch_logs;
|
logs;
|
||||||
batch_timeout_ms;
|
|
||||||
http_concurrency_level;
|
http_concurrency_level;
|
||||||
retry_max_attempts;
|
retry_max_attempts;
|
||||||
retry_initial_delay_ms;
|
retry_initial_delay_ms;
|
||||||
|
|
@ -101,16 +96,15 @@ let pp out (self : t) : unit =
|
||||||
%a@];@ @[<2>headers_metrics=@,\
|
%a@];@ @[<2>headers_metrics=@,\
|
||||||
%a@];@ @[<2>headers_logs=@,\
|
%a@];@ @[<2>headers_logs=@,\
|
||||||
%a@];@ protocol=%a;@ timeout_ms=%d;@ timeout_traces_ms=%d;@ \
|
%a@];@ protocol=%a;@ timeout_ms=%d;@ timeout_traces_ms=%d;@ \
|
||||||
timeout_metrics_ms=%d;@ timeout_logs_ms=%d;@ batch_traces=%a;@ \
|
timeout_metrics_ms=%d;@ timeout_logs_ms=%d;@ traces=%a;@ metrics=%a;@ \
|
||||||
batch_metrics=%a;@ batch_logs=%a;@ batch_timeout_ms=%d;@ \
|
logs=%a;@ http_concurrency_level=%a;@ retry_max_attempts=%d;@ \
|
||||||
http_concurrency_level=%a;@ retry_max_attempts=%d;@ \
|
|
||||||
retry_initial_delay_ms=%.0f;@ retry_max_delay_ms=%.0f;@ \
|
retry_initial_delay_ms=%.0f;@ retry_max_delay_ms=%.0f;@ \
|
||||||
retry_backoff_multiplier=%.1f @]}"
|
retry_backoff_multiplier=%.1f @]}"
|
||||||
debug pp_log_level log_level sdk_disabled self_trace url_traces url_metrics
|
debug pp_log_level log_level sdk_disabled self_trace url_traces url_metrics
|
||||||
url_logs ppheaders headers ppheaders headers_traces ppheaders
|
url_logs ppheaders headers ppheaders headers_traces ppheaders
|
||||||
headers_metrics ppheaders headers_logs pp_protocol protocol timeout_ms
|
headers_metrics ppheaders headers_logs pp_protocol protocol timeout_ms
|
||||||
timeout_traces_ms timeout_metrics_ms timeout_logs_ms ppiopt batch_traces
|
timeout_traces_ms timeout_metrics_ms timeout_logs_ms pp_provider_config
|
||||||
ppiopt batch_metrics ppiopt batch_logs batch_timeout_ms ppiopt
|
traces pp_provider_config metrics pp_provider_config logs ppiopt
|
||||||
http_concurrency_level retry_max_attempts retry_initial_delay_ms
|
http_concurrency_level retry_max_attempts retry_initial_delay_ms
|
||||||
retry_max_delay_ms retry_backoff_multiplier
|
retry_max_delay_ms retry_backoff_multiplier
|
||||||
|
|
||||||
|
|
@ -124,9 +118,13 @@ type 'k make =
|
||||||
?url_traces:string ->
|
?url_traces:string ->
|
||||||
?url_metrics:string ->
|
?url_metrics:string ->
|
||||||
?url_logs:string ->
|
?url_logs:string ->
|
||||||
?batch_traces:int option ->
|
?batch_traces:int ->
|
||||||
?batch_metrics:int option ->
|
?batch_metrics:int ->
|
||||||
?batch_logs:int option ->
|
?batch_logs:int ->
|
||||||
|
?batch_timeout_ms:int ->
|
||||||
|
?traces:Opentelemetry.Provider_config.t ->
|
||||||
|
?metrics:Opentelemetry.Provider_config.t ->
|
||||||
|
?logs:Opentelemetry.Provider_config.t ->
|
||||||
?headers:(string * string) list ->
|
?headers:(string * string) list ->
|
||||||
?headers_traces:(string * string) list ->
|
?headers_traces:(string * string) list ->
|
||||||
?headers_metrics:(string * string) list ->
|
?headers_metrics:(string * string) list ->
|
||||||
|
|
@ -136,7 +134,6 @@ type 'k make =
|
||||||
?timeout_traces_ms:int ->
|
?timeout_traces_ms:int ->
|
||||||
?timeout_metrics_ms:int ->
|
?timeout_metrics_ms:int ->
|
||||||
?timeout_logs_ms:int ->
|
?timeout_logs_ms:int ->
|
||||||
?batch_timeout_ms:int ->
|
|
||||||
?self_trace:bool ->
|
?self_trace:bool ->
|
||||||
?http_concurrency_level:int ->
|
?http_concurrency_level:int ->
|
||||||
?retry_max_attempts:int ->
|
?retry_max_attempts:int ->
|
||||||
|
|
@ -155,22 +152,22 @@ open struct
|
||||||
| Some ("1" | "true") -> true
|
| Some ("1" | "true") -> true
|
||||||
| _ -> false
|
| _ -> false
|
||||||
|
|
||||||
let get_log_level_from_env () =
|
let get_log_level_from_env () : log_level =
|
||||||
match Sys.getenv_opt "OTEL_LOG_LEVEL" with
|
match Sys.getenv_opt "OTEL_LOG_LEVEL" with
|
||||||
| Some "none" -> Log_level_none
|
| Some "none" -> None
|
||||||
| Some "error" -> Log_level_error
|
| Some "error" -> Some Error
|
||||||
| Some "warn" -> Log_level_warn
|
| Some "warn" -> Some Warning
|
||||||
| Some "info" -> Log_level_info
|
| Some "info" -> Some Info
|
||||||
| Some "debug" -> Log_level_debug
|
| Some "debug" -> Some Debug
|
||||||
| Some s ->
|
| Some s ->
|
||||||
Printf.eprintf "warning: unknown log level %S, defaulting to info\n%!" s;
|
Opentelemetry.Self_debug.log Warning (fun () ->
|
||||||
(* log in info level, so we at least don't miss warnings and errors *)
|
Printf.sprintf "unknown log level %S, defaulting to info" s);
|
||||||
Log_level_info
|
Some Info
|
||||||
| None ->
|
| None ->
|
||||||
if get_debug_from_env () then
|
if get_debug_from_env () then
|
||||||
Log_level_debug
|
Some Debug
|
||||||
else
|
else
|
||||||
Log_level_none
|
Some Info
|
||||||
|
|
||||||
let get_sdk_disabled_from_env () =
|
let get_sdk_disabled_from_env () =
|
||||||
match Sys.getenv_opt "OTEL_SDK_DISABLED" with
|
match Sys.getenv_opt "OTEL_SDK_DISABLED" with
|
||||||
|
|
@ -241,16 +238,47 @@ module Env () : ENV = struct
|
||||||
let make k ?(debug = get_debug_from_env ())
|
let make k ?(debug = get_debug_from_env ())
|
||||||
?(log_level = get_log_level_from_env ())
|
?(log_level = get_log_level_from_env ())
|
||||||
?(sdk_disabled = get_sdk_disabled_from_env ()) ?url ?url_traces
|
?(sdk_disabled = get_sdk_disabled_from_env ()) ?url ?url_traces
|
||||||
?url_metrics ?url_logs ?(batch_traces = Some 400)
|
?url_metrics ?url_logs ?batch_traces ?batch_metrics ?batch_logs
|
||||||
?(batch_metrics = Some 200) ?(batch_logs = Some 400)
|
?(batch_timeout_ms = 2_000) ?traces ?metrics ?logs
|
||||||
?(headers = get_general_headers_from_env ()) ?headers_traces
|
?(headers = get_general_headers_from_env ()) ?headers_traces
|
||||||
?headers_metrics ?headers_logs
|
?headers_metrics ?headers_logs
|
||||||
?(protocol = get_protocol_from_env "OTEL_EXPORTER_OTLP_PROTOCOL")
|
?(protocol = get_protocol_from_env "OTEL_EXPORTER_OTLP_PROTOCOL")
|
||||||
?(timeout_ms = get_timeout_from_env "OTEL_EXPORTER_OTLP_TIMEOUT" 10_000)
|
?(timeout_ms = get_timeout_from_env "OTEL_EXPORTER_OTLP_TIMEOUT" 10_000)
|
||||||
?timeout_traces_ms ?timeout_metrics_ms ?timeout_logs_ms
|
?timeout_traces_ms ?timeout_metrics_ms ?timeout_logs_ms
|
||||||
?(batch_timeout_ms = 2_000) ?(self_trace = false) ?http_concurrency_level
|
?(self_trace = false) ?http_concurrency_level ?(retry_max_attempts = 3)
|
||||||
?(retry_max_attempts = 3) ?(retry_initial_delay_ms = 100.)
|
?(retry_initial_delay_ms = 100.) ?(retry_max_delay_ms = 5000.)
|
||||||
?(retry_max_delay_ms = 5000.) ?(retry_backoff_multiplier = 2.0) =
|
?(retry_backoff_multiplier = 2.0) =
|
||||||
|
let batch_timeout_ = Mtime.Span.(batch_timeout_ms * ms) in
|
||||||
|
let traces =
|
||||||
|
match traces with
|
||||||
|
| Some t -> t
|
||||||
|
| None ->
|
||||||
|
let batch =
|
||||||
|
match batch_traces with
|
||||||
|
| Some b -> b
|
||||||
|
| None -> get_timeout_from_env "OTEL_BSP_MAX_EXPORT_BATCH_SIZE" 400
|
||||||
|
in
|
||||||
|
Opentelemetry.Provider_config.make ~batch ~timeout:batch_timeout_ ()
|
||||||
|
in
|
||||||
|
let metrics =
|
||||||
|
match metrics with
|
||||||
|
| Some m -> m
|
||||||
|
| None ->
|
||||||
|
let batch =
|
||||||
|
match batch_metrics with
|
||||||
|
| Some b -> b
|
||||||
|
| None -> get_timeout_from_env "OTEL_METRIC_EXPORT_INTERVAL" 200
|
||||||
|
in
|
||||||
|
Opentelemetry.Provider_config.make ~batch ~timeout:batch_timeout_ ()
|
||||||
|
in
|
||||||
|
let logs =
|
||||||
|
match logs with
|
||||||
|
| Some l -> l
|
||||||
|
| None ->
|
||||||
|
let batch = Option.value batch_logs ~default:400 in
|
||||||
|
Opentelemetry.Provider_config.make ~batch ~timeout:batch_timeout_ ()
|
||||||
|
in
|
||||||
|
|
||||||
let url_traces, url_metrics, url_logs =
|
let url_traces, url_metrics, url_logs =
|
||||||
let base_url =
|
let base_url =
|
||||||
let base_url =
|
let base_url =
|
||||||
|
|
@ -343,10 +371,9 @@ module Env () : ENV = struct
|
||||||
timeout_traces_ms;
|
timeout_traces_ms;
|
||||||
timeout_metrics_ms;
|
timeout_metrics_ms;
|
||||||
timeout_logs_ms;
|
timeout_logs_ms;
|
||||||
batch_traces;
|
traces;
|
||||||
batch_metrics;
|
metrics;
|
||||||
batch_logs;
|
logs;
|
||||||
batch_timeout_ms;
|
|
||||||
self_trace;
|
self_trace;
|
||||||
http_concurrency_level;
|
http_concurrency_level;
|
||||||
retry_max_attempts;
|
retry_max_attempts;
|
||||||
|
|
|
||||||
|
|
@ -8,12 +8,9 @@ type protocol =
|
||||||
| Http_protobuf
|
| Http_protobuf
|
||||||
| Http_json
|
| Http_json
|
||||||
|
|
||||||
type log_level =
|
type log_level = Opentelemetry.Self_debug.level option
|
||||||
| Log_level_none
|
(** [None] disables internal diagnostic logging; [Some level] enables it at that
|
||||||
| Log_level_error
|
level and above. Maps to [OTEL_LOG_LEVEL] env var. *)
|
||||||
| Log_level_warn
|
|
||||||
| Log_level_info
|
|
||||||
| Log_level_debug
|
|
||||||
|
|
||||||
type rest
|
type rest
|
||||||
(** opaque type to force using {!make} while allowing record updates *)
|
(** opaque type to force using {!make} while allowing record updates *)
|
||||||
|
|
@ -60,25 +57,17 @@ type t = {
|
||||||
timeout_logs_ms: int;
|
timeout_logs_ms: int;
|
||||||
(** Timeout for log exports. Read from OTEL_EXPORTER_OTLP_LOGS_TIMEOUT,
|
(** Timeout for log exports. Read from OTEL_EXPORTER_OTLP_LOGS_TIMEOUT,
|
||||||
falls back to timeout_ms. *)
|
falls back to timeout_ms. *)
|
||||||
batch_traces: int option;
|
traces: Opentelemetry.Provider_config.t;
|
||||||
(** Batch traces? If [Some i], then this produces batches of (at most) [i]
|
(** Per-provider batching config for traces. Default: batch=400,
|
||||||
items. If [None], there is no batching.
|
timeout=2s. The batch size is read from OTEL_BSP_MAX_EXPORT_BATCH_SIZE
|
||||||
|
if set. *)
|
||||||
Note that traces and metrics are batched separately. Default
|
metrics: Opentelemetry.Provider_config.t;
|
||||||
[Some 400]. *)
|
(** Per-provider batching config for metrics. Default: batch=200,
|
||||||
batch_metrics: int option;
|
timeout=2s. The batch size is read from OTEL_METRIC_EXPORT_INTERVAL if
|
||||||
(** Batch metrics? If [Some i], then this produces batches of (at most)
|
set. *)
|
||||||
[i] items. If [None], there is no batching.
|
logs: Opentelemetry.Provider_config.t;
|
||||||
|
(** Per-provider batching config for logs. Default: batch=400, timeout=2s.
|
||||||
Note that traces and metrics are batched separately. Default
|
*)
|
||||||
[Some 200]. *)
|
|
||||||
batch_logs: int option;
|
|
||||||
(** Batch logs? See {!batch_metrics} for details. Default [Some 400] *)
|
|
||||||
batch_timeout_ms: int;
|
|
||||||
(** Number of milliseconds after which we will emit a batch, even
|
|
||||||
incomplete. Note that the batch might take longer than that, because
|
|
||||||
this is only checked when a new event occurs or when a tick is
|
|
||||||
emitted. Default 2_000. *)
|
|
||||||
self_trace: bool;
|
self_trace: bool;
|
||||||
(** If true, the OTEL library will perform some self-instrumentation.
|
(** If true, the OTEL library will perform some self-instrumentation.
|
||||||
Default [false].
|
Default [false].
|
||||||
|
|
@ -117,9 +106,13 @@ type 'k make =
|
||||||
?url_traces:string ->
|
?url_traces:string ->
|
||||||
?url_metrics:string ->
|
?url_metrics:string ->
|
||||||
?url_logs:string ->
|
?url_logs:string ->
|
||||||
?batch_traces:int option ->
|
?batch_traces:int ->
|
||||||
?batch_metrics:int option ->
|
?batch_metrics:int ->
|
||||||
?batch_logs:int option ->
|
?batch_logs:int ->
|
||||||
|
?batch_timeout_ms:int ->
|
||||||
|
?traces:Opentelemetry.Provider_config.t ->
|
||||||
|
?metrics:Opentelemetry.Provider_config.t ->
|
||||||
|
?logs:Opentelemetry.Provider_config.t ->
|
||||||
?headers:(string * string) list ->
|
?headers:(string * string) list ->
|
||||||
?headers_traces:(string * string) list ->
|
?headers_traces:(string * string) list ->
|
||||||
?headers_metrics:(string * string) list ->
|
?headers_metrics:(string * string) list ->
|
||||||
|
|
@ -129,7 +122,6 @@ type 'k make =
|
||||||
?timeout_traces_ms:int ->
|
?timeout_traces_ms:int ->
|
||||||
?timeout_metrics_ms:int ->
|
?timeout_metrics_ms:int ->
|
||||||
?timeout_logs_ms:int ->
|
?timeout_logs_ms:int ->
|
||||||
?batch_timeout_ms:int ->
|
|
||||||
?self_trace:bool ->
|
?self_trace:bool ->
|
||||||
?http_concurrency_level:int ->
|
?http_concurrency_level:int ->
|
||||||
?retry_max_attempts:int ->
|
?retry_max_attempts:int ->
|
||||||
|
|
|
||||||
|
|
@ -101,11 +101,11 @@ end = struct
|
||||||
(* sanity check about the queue, which should be drained *)
|
(* sanity check about the queue, which should be drained *)
|
||||||
let size_q = Bounded_queue.Recv.size self.q in
|
let size_q = Bounded_queue.Recv.size self.q in
|
||||||
if size_q > 0 then
|
if size_q > 0 then
|
||||||
Printf.eprintf
|
OTEL.Self_debug.log OTEL.Self_debug.Warning (fun () ->
|
||||||
"otel: warning: workers exited but work queue still contains %d \
|
Printf.sprintf
|
||||||
elements\n\
|
"otel: warning: workers exited but work queue still contains %d \
|
||||||
%!"
|
elements"
|
||||||
size_q
|
size_q)
|
||||||
)
|
)
|
||||||
|
|
||||||
let send_signals (self : state) (sender : Sender.t) ~backoff
|
let send_signals (self : state) (sender : Sender.t) ~backoff
|
||||||
|
|
@ -122,7 +122,8 @@ end = struct
|
||||||
Util_net_backoff.on_success backoff;
|
Util_net_backoff.on_success backoff;
|
||||||
IO.return ()
|
IO.return ()
|
||||||
| Error `Sysbreak ->
|
| Error `Sysbreak ->
|
||||||
Printf.eprintf "ctrl-c captured, stopping\n%!";
|
OTEL.Self_debug.log OTEL.Self_debug.Info (fun () ->
|
||||||
|
"ctrl-c captured, stopping");
|
||||||
shutdown self;
|
shutdown self;
|
||||||
IO.return ()
|
IO.return ()
|
||||||
| Error err ->
|
| Error err ->
|
||||||
|
|
@ -135,6 +136,7 @@ end = struct
|
||||||
let start_worker (self : state) : unit =
|
let start_worker (self : state) : unit =
|
||||||
let sender = Sender.create ~config:self.sender_config () in
|
let sender = Sender.create ~config:self.sender_config () in
|
||||||
let backoff = Util_net_backoff.create () in
|
let backoff = Util_net_backoff.create () in
|
||||||
|
OTEL.Self_debug.log OTEL.Self_debug.Debug (fun () -> "otel worker started");
|
||||||
|
|
||||||
(* loop on [q] *)
|
(* loop on [q] *)
|
||||||
let rec loop () : unit IO.t =
|
let rec loop () : unit IO.t =
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue