diff --git a/Cargo.lock b/Cargo.lock index 1f3762d1..e72fd491 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -872,12 +872,15 @@ dependencies = [ "lru-cache", "nix", "num_cpus", + "once_cell", "opentelemetry", "opentelemetry-jaeger-propagator", "opentelemetry-otlp", + "opentelemetry-prometheus", "opentelemetry_sdk", "parking_lot", "phf", + "prometheus", "rand", "regex", "reqwest", @@ -1662,6 +1665,19 @@ dependencies = [ "tonic", ] +[[package]] +name = "opentelemetry-prometheus" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e1a24eafe47b693cb938f8505f240dc26c71db60df9aca376b4f857e9653ec7" +dependencies = [ + "once_cell", + "opentelemetry", + "opentelemetry_sdk", + "prometheus", + "protobuf", +] + [[package]] name = "opentelemetry-proto" version = "0.6.0" @@ -1925,6 +1941,21 @@ dependencies = [ "yansi", ] +[[package]] +name = "prometheus" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "memchr", + "parking_lot", + "protobuf", + "thiserror", +] + [[package]] name = "prost" version = "0.12.6" @@ -1948,6 +1979,12 @@ dependencies = [ "syn", ] +[[package]] +name = "protobuf" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" + [[package]] name = "quick-error" version = "1.2.3" diff --git a/Cargo.toml b/Cargo.toml index 7b9a6872..24d7dbb1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -106,12 +106,15 @@ image = { version = "0.25.1", default-features = false, features = ["jpeg", "png jsonwebtoken = "9.3.0" lru-cache = "0.1.2" num_cpus = "1.16.0" +once_cell = "1.19.0" opentelemetry = "0.23.0" opentelemetry-jaeger-propagator = "0.2.0" opentelemetry-otlp = "0.16.0" +opentelemetry-prometheus = "0.16.0" opentelemetry_sdk = { version = "0.23.0", features = ["rt-tokio"] } parking_lot = { version = "0.12.3", optional = true } phf = { version = "0.11.2", features = ["macros"] } +prometheus = "0.13.4" rand = "0.8.5" regex = "1.10.4" reqwest = { version = "0.12.4", default-features = false, features = ["http2", "rustls-tls-native-roots", "socks"] } diff --git a/src/config.rs b/src/config.rs index 88e07794..96a4d2fd 100644 --- a/src/config.rs +++ b/src/config.rs @@ -60,6 +60,8 @@ pub(crate) struct Config { #[serde(default = "false_fn")] pub(crate) allow_jaeger: bool, #[serde(default = "false_fn")] + pub(crate) allow_prometheus: bool, + #[serde(default = "false_fn")] pub(crate) tracing_flame: bool, #[serde(default)] pub(crate) proxy: ProxyConfig, diff --git a/src/main.rs b/src/main.rs index 6f8a4038..dc3e94a7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -412,6 +412,15 @@ fn routes(config: &Config) -> Router { .put(c2s::send_state_event_for_empty_key_route), ); + let router = if config.allow_prometheus { + router.route( + "/metrics", + get(|| async { observability::METRICS.export() }), + ) + } else { + router + }; + let router = router .route( "/_matrix/client/r0/rooms/:room_id/initialSync", diff --git a/src/observability.rs b/src/observability.rs index 83900a3f..9a2094a9 100644 --- a/src/observability.rs +++ b/src/observability.rs @@ -3,13 +3,17 @@ use std::{fs::File, io::BufWriter}; -use opentelemetry::KeyValue; -use opentelemetry_sdk::Resource; +use once_cell::sync::Lazy; +use opentelemetry::{metrics::MeterProvider, KeyValue}; +use opentelemetry_sdk::{metrics::SdkMeterProvider, Resource}; use tracing_flame::{FlameLayer, FlushGuard}; use tracing_subscriber::{layer::SubscriberExt, EnvFilter, Layer, Registry}; use crate::{config::Config, error, utils::error::Result}; +/// Globally accessible metrics state +pub(crate) static METRICS: Lazy = Lazy::new(Metrics::new); + /// Cleans up resources relating to observability when [`Drop`]ped pub(crate) struct Guard { /// Drop guard used to flush [`tracing_flame`] data on exit @@ -85,3 +89,43 @@ fn standard_resource() -> Resource { env!("CARGO_PKG_NAME"), )])) } + +/// Holds state relating to metrics +pub(crate) struct Metrics { + /// Internal state for OpenTelemetry metrics + /// + /// We never directly read from [`SdkMeterProvider`], but it needs to + /// outlive all calls to `self.otel_state.0.gather()`, otherwise + /// metrics collection will fail. + otel_state: (prometheus::Registry, SdkMeterProvider), +} + +impl Metrics { + /// Initializes metric-collecting and exporting facilities + fn new() -> Self { + // Set up OpenTelemetry state + let registry = prometheus::Registry::new(); + let exporter = opentelemetry_prometheus::exporter() + .with_registry(registry.clone()) + .build() + .expect("exporter configuration should be valid"); + let provider = SdkMeterProvider::builder() + .with_reader(exporter) + .with_resource(standard_resource()) + .build(); + let _meter = provider.meter(env!("CARGO_PKG_NAME")); + + // TODO: Add some metrics + + Metrics { + otel_state: (registry, provider), + } + } + + /// Export metrics to a string suitable for consumption by e.g. Prometheus + pub(crate) fn export(&self) -> String { + prometheus::TextEncoder::new() + .encode_to_string(&self.otel_state.0.gather()) + .expect("should be able to encode metrics") + } +}