diff --git a/src/service/server_backoff.rs b/src/service/server_backoff.rs index aa1f3857..0cfe39a7 100644 --- a/src/service/server_backoff.rs +++ b/src/service/server_backoff.rs @@ -6,7 +6,7 @@ use std::{ use rand::{thread_rng, Rng}; use ruma::{OwnedServerName, ServerName}; -use tracing::{debug, instrument}; +use tracing::{debug, info, instrument}; use crate::{services, Error, Result}; @@ -169,6 +169,12 @@ impl BackoffState { let delay = Duration::from_secs_f64(delay_secs); delay.checked_sub(last_failure.elapsed()) } + + /// Returns whether this server is marked as online (no backoff delay). + fn is_online(&self) -> bool { + let config = &services().globals.config.federation.backoff; + self.failure_count <= config.failure_threshold + } } impl BackoffGuard { @@ -176,6 +182,7 @@ impl BackoffGuard { #[instrument(skip(self))] pub(crate) fn success(self) { let mut state = self.backoff.write().unwrap(); + let was_online = state.is_online(); if state.failure_count != 0 { debug!( @@ -185,6 +192,11 @@ impl BackoffGuard { } state.failure_count = 0; + + if state.is_online() != was_online { + let server_name = &state.server_name; + info!(%server_name, "server transitioned from offline to online"); + } } /// Record a failed request indicating that the server may be unavailable. @@ -196,6 +208,7 @@ impl BackoffGuard { let config = &services().globals.config.federation.backoff; let mut state = self.backoff.write().unwrap(); + let was_online = state.is_online(); if state.last_failure == self.last_failure { state.failure_count = state.failure_count.saturating_add(1); @@ -208,6 +221,11 @@ impl BackoffGuard { failure_count = state.failure_count, "hard failure sending request to server, incrementing failure count" ); + + if state.is_online() != was_online { + let server_name = &state.server_name; + info!(%server_name, "server transitioned from online to offline"); + } } }