mirror of
https://gitlab.computer.surgery/matrix/grapevine.git
synced 2025-12-18 08:11:24 +01:00
metrics for online and offline remote server count
This commit is contained in:
parent
5b6aaa19b9
commit
56f025cb47
2 changed files with 92 additions and 7 deletions
|
|
@ -1,6 +1,6 @@
|
|||
use std::{
|
||||
collections::HashMap,
|
||||
sync::{Arc, RwLock},
|
||||
sync::{Arc, Mutex, RwLock},
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
|
|
@ -8,7 +8,7 @@ use rand::{thread_rng, Rng};
|
|||
use ruma::{OwnedServerName, ServerName};
|
||||
use tracing::{debug, info, instrument};
|
||||
|
||||
use crate::{services, Error, Result};
|
||||
use crate::{observability::METRICS, services, Error, Result};
|
||||
|
||||
/// Service to handle backing off requests to offline servers.
|
||||
///
|
||||
|
|
@ -36,6 +36,8 @@ use crate::{services, Error, Result};
|
|||
/// the server is only briefly offline.
|
||||
pub(crate) struct Service {
|
||||
servers: RwLock<HashMap<OwnedServerName, Arc<RwLock<BackoffState>>>>,
|
||||
|
||||
server_counts: Mutex<ServerCounts>,
|
||||
}
|
||||
|
||||
/// Guard to record the result of an attempted request to a server.
|
||||
|
|
@ -75,10 +77,27 @@ struct BackoffState {
|
|||
jitter_coeff: f64,
|
||||
}
|
||||
|
||||
/// State transitions for a single server
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
enum Transition {
|
||||
/// A new server, marked as online by default
|
||||
New,
|
||||
OnlineToOffline,
|
||||
OfflineToOnline,
|
||||
}
|
||||
|
||||
/// Counts of known servers in each state, used for metrics
|
||||
#[derive(Debug, Copy, Clone, Default)]
|
||||
struct ServerCounts {
|
||||
online_count: u64,
|
||||
offline_count: u64,
|
||||
}
|
||||
|
||||
impl Service {
|
||||
pub(crate) fn build() -> Arc<Service> {
|
||||
Arc::new(Service {
|
||||
servers: RwLock::default(),
|
||||
server_counts: Mutex::default(),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -113,6 +132,45 @@ impl Service {
|
|||
})
|
||||
}
|
||||
|
||||
fn record_transition(
|
||||
&self,
|
||||
server_name: &ServerName,
|
||||
transition: Transition,
|
||||
) {
|
||||
let mut counts = self.server_counts.lock().unwrap();
|
||||
|
||||
match transition {
|
||||
Transition::New => {
|
||||
info!(
|
||||
%server_name,
|
||||
"new remote server, marked as online by default"
|
||||
);
|
||||
counts.online_count += 1;
|
||||
}
|
||||
Transition::OnlineToOffline => {
|
||||
info!(
|
||||
%server_name,
|
||||
"remote server transitioned from online to offline"
|
||||
);
|
||||
counts.online_count -= 1;
|
||||
counts.offline_count += 1;
|
||||
}
|
||||
Transition::OfflineToOnline => {
|
||||
info!(
|
||||
%server_name,
|
||||
"remote server transitioned from offline to online"
|
||||
);
|
||||
counts.offline_count -= 1;
|
||||
counts.online_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
METRICS.record_remote_server_count(
|
||||
counts.online_count,
|
||||
counts.offline_count,
|
||||
);
|
||||
}
|
||||
|
||||
fn server_state(
|
||||
&self,
|
||||
server_name: &ServerName,
|
||||
|
|
@ -134,6 +192,7 @@ impl Service {
|
|||
server_name.to_owned(),
|
||||
)));
|
||||
servers.insert(server_name.to_owned(), Arc::clone(&state));
|
||||
self.record_transition(server_name, Transition::New);
|
||||
state
|
||||
}
|
||||
}
|
||||
|
|
@ -193,9 +252,12 @@ impl BackoffGuard {
|
|||
|
||||
state.failure_count = 0;
|
||||
|
||||
if state.is_online() != was_online {
|
||||
let server_name = &state.server_name;
|
||||
info!(%server_name, "server transitioned from offline to online");
|
||||
// Server is always online after setting failure_count = 0
|
||||
if !was_online {
|
||||
services().server_backoff.record_transition(
|
||||
&state.server_name,
|
||||
Transition::OfflineToOnline,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -223,8 +285,10 @@ impl BackoffGuard {
|
|||
);
|
||||
|
||||
if state.is_online() != was_online {
|
||||
let server_name = &state.server_name;
|
||||
info!(%server_name, "server transitioned from online to offline");
|
||||
services().server_backoff.record_transition(
|
||||
&state.server_name,
|
||||
Transition::OnlineToOffline,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue