mirror of
https://gitlab.computer.surgery/matrix/grapevine.git
synced 2025-12-17 15:51:23 +01:00
backoff from offline servers in all outgoing federation requests
Only marking M_UNKNOWN errors as a hard failure if they are in the
standard error format is conservative, and might cause us to miss some
offline servers. For example, a server might configure a load balancer
to send a standard-looking { errcode: ..., ... } response when the
backend is down, with a custom errcode. We wouldn't catch this.
TODO: evaluate whether this comes up in practice by running the changes
on computer.surgery
This commit is contained in:
parent
9b22c9b40b
commit
8001dcf2eb
1 changed files with 23 additions and 1 deletions
|
|
@ -679,6 +679,10 @@ impl Service {
|
||||||
debug!("Waiting for permit");
|
debug!("Waiting for permit");
|
||||||
let permit = self.maximum_requests.acquire().await;
|
let permit = self.maximum_requests.acquire().await;
|
||||||
debug!("Got permit");
|
debug!("Got permit");
|
||||||
|
|
||||||
|
let backoff_guard =
|
||||||
|
services().server_backoff.server_ready(destination)?;
|
||||||
|
|
||||||
let response = tokio::time::timeout(
|
let response = tokio::time::timeout(
|
||||||
Duration::from_secs(2 * 60),
|
Duration::from_secs(2 * 60),
|
||||||
server_server::send_request(
|
server_server::send_request(
|
||||||
|
|
@ -692,9 +696,27 @@ impl Service {
|
||||||
.map_err(|_| {
|
.map_err(|_| {
|
||||||
warn!("Timeout waiting for server response");
|
warn!("Timeout waiting for server response");
|
||||||
Error::BadServerResponse("Timeout waiting for server response")
|
Error::BadServerResponse("Timeout waiting for server response")
|
||||||
})?;
|
})
|
||||||
|
.and_then(|result| result);
|
||||||
drop(permit);
|
drop(permit);
|
||||||
|
|
||||||
|
match &response {
|
||||||
|
Err(Error::Federation(_, error)) => {
|
||||||
|
if error.error_kind().is_some() {
|
||||||
|
// Other errors may occur during normal operation with a
|
||||||
|
// healthy server, so don't increment the failure counter.
|
||||||
|
backoff_guard.soft_failure();
|
||||||
|
} else {
|
||||||
|
// The error wasn't in the expected format for matrix API
|
||||||
|
// responses. This almost certainly indicates the server
|
||||||
|
// is unhealthy or offline.
|
||||||
|
backoff_guard.hard_failure();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => backoff_guard.hard_failure(),
|
||||||
|
Ok(_) => backoff_guard.success(),
|
||||||
|
}
|
||||||
|
|
||||||
response
|
response
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue