set up opentelemetry for metrics

Also adds an `allow_prometheus` option (disabled by default) to expose
a `/metrics` endpoint that returns Prometheus data.
This commit is contained in:
Charles Hall 2024-05-29 14:49:49 -07:00
parent 94fda7c875
commit a0b92c82e8
No known key found for this signature in database
GPG key ID: 7B8E0645816E07CF
5 changed files with 97 additions and 2 deletions

37
Cargo.lock generated
View file

@ -872,12 +872,15 @@ dependencies = [
"lru-cache",
"nix",
"num_cpus",
"once_cell",
"opentelemetry",
"opentelemetry-jaeger-propagator",
"opentelemetry-otlp",
"opentelemetry-prometheus",
"opentelemetry_sdk",
"parking_lot",
"phf",
"prometheus",
"rand",
"regex",
"reqwest",
@ -1662,6 +1665,19 @@ dependencies = [
"tonic",
]
[[package]]
name = "opentelemetry-prometheus"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e1a24eafe47b693cb938f8505f240dc26c71db60df9aca376b4f857e9653ec7"
dependencies = [
"once_cell",
"opentelemetry",
"opentelemetry_sdk",
"prometheus",
"protobuf",
]
[[package]]
name = "opentelemetry-proto"
version = "0.6.0"
@ -1925,6 +1941,21 @@ dependencies = [
"yansi",
]
[[package]]
name = "prometheus"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1"
dependencies = [
"cfg-if",
"fnv",
"lazy_static",
"memchr",
"parking_lot",
"protobuf",
"thiserror",
]
[[package]]
name = "prost"
version = "0.12.6"
@ -1948,6 +1979,12 @@ dependencies = [
"syn",
]
[[package]]
name = "protobuf"
version = "2.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
[[package]]
name = "quick-error"
version = "1.2.3"

View file

@ -106,12 +106,15 @@ image = { version = "0.25.1", default-features = false, features = ["jpeg", "png
jsonwebtoken = "9.3.0"
lru-cache = "0.1.2"
num_cpus = "1.16.0"
once_cell = "1.19.0"
opentelemetry = "0.23.0"
opentelemetry-jaeger-propagator = "0.2.0"
opentelemetry-otlp = "0.16.0"
opentelemetry-prometheus = "0.16.0"
opentelemetry_sdk = { version = "0.23.0", features = ["rt-tokio"] }
parking_lot = { version = "0.12.3", optional = true }
phf = { version = "0.11.2", features = ["macros"] }
prometheus = "0.13.4"
rand = "0.8.5"
regex = "1.10.4"
reqwest = { version = "0.12.4", default-features = false, features = ["http2", "rustls-tls-native-roots", "socks"] }

View file

@ -60,6 +60,8 @@ pub(crate) struct Config {
#[serde(default = "false_fn")]
pub(crate) allow_jaeger: bool,
#[serde(default = "false_fn")]
pub(crate) allow_prometheus: bool,
#[serde(default = "false_fn")]
pub(crate) tracing_flame: bool,
#[serde(default)]
pub(crate) proxy: ProxyConfig,

View file

@ -412,6 +412,15 @@ fn routes(config: &Config) -> Router {
.put(c2s::send_state_event_for_empty_key_route),
);
let router = if config.allow_prometheus {
router.route(
"/metrics",
get(|| async { observability::METRICS.export() }),
)
} else {
router
};
let router = router
.route(
"/_matrix/client/r0/rooms/:room_id/initialSync",

View file

@ -3,13 +3,17 @@
use std::{fs::File, io::BufWriter};
use opentelemetry::KeyValue;
use opentelemetry_sdk::Resource;
use once_cell::sync::Lazy;
use opentelemetry::{metrics::MeterProvider, KeyValue};
use opentelemetry_sdk::{metrics::SdkMeterProvider, Resource};
use tracing_flame::{FlameLayer, FlushGuard};
use tracing_subscriber::{layer::SubscriberExt, EnvFilter, Layer, Registry};
use crate::{config::Config, error, utils::error::Result};
/// Globally accessible metrics state
pub(crate) static METRICS: Lazy<Metrics> = Lazy::new(Metrics::new);
/// Cleans up resources relating to observability when [`Drop`]ped
pub(crate) struct Guard {
/// Drop guard used to flush [`tracing_flame`] data on exit
@ -85,3 +89,43 @@ fn standard_resource() -> Resource {
env!("CARGO_PKG_NAME"),
)]))
}
/// Holds state relating to metrics
pub(crate) struct Metrics {
/// Internal state for OpenTelemetry metrics
///
/// We never directly read from [`SdkMeterProvider`], but it needs to
/// outlive all calls to `self.otel_state.0.gather()`, otherwise
/// metrics collection will fail.
otel_state: (prometheus::Registry, SdkMeterProvider),
}
impl Metrics {
/// Initializes metric-collecting and exporting facilities
fn new() -> Self {
// Set up OpenTelemetry state
let registry = prometheus::Registry::new();
let exporter = opentelemetry_prometheus::exporter()
.with_registry(registry.clone())
.build()
.expect("exporter configuration should be valid");
let provider = SdkMeterProvider::builder()
.with_reader(exporter)
.with_resource(standard_resource())
.build();
let _meter = provider.meter(env!("CARGO_PKG_NAME"));
// TODO: Add some metrics
Metrics {
otel_state: (registry, provider),
}
}
/// Export metrics to a string suitable for consumption by e.g. Prometheus
pub(crate) fn export(&self) -> String {
prometheus::TextEncoder::new()
.encode_to_string(&self.otel_state.0.gather())
.expect("should be able to encode metrics")
}
}