replace outgoing transaction backoff with global offline server backoff

The global backoff code is in `send_federation_transaction`, so we had
to switch to using this function instead of
`server_server::send_request` directly. This has the side effect of
introducing a timeout, which we previously didn't have for transactions.
This commit is contained in:
Olivia Lee 2024-08-23 20:34:52 -07:00
parent b876dca45c
commit b9118b1361
No known key found for this signature in database
GPG key ID: 54D568A15B9CD1F9

View file

@ -4,7 +4,7 @@ use std::{
future::{Future, IntoFuture},
pin::Pin,
sync::Arc,
time::{Duration, Instant},
time::Duration,
};
use base64::{engine::general_purpose, Engine as _};
@ -155,10 +155,7 @@ pub(crate) struct Service {
#[derive(Debug)]
enum TransactionStatus {
Running,
// number of times failed, time of last failure
Failed(u32, Instant),
// number of times failed
Retrying(u32),
Failed,
}
struct HandlerInputs {
@ -293,19 +290,14 @@ impl Service {
}
if let Err(error) = result {
// Logging transactions that fail due to backoff produces a lot of
// clutter in the logs. Failure due to backoff is expected behavior,
// and the transaction will be retried later.
if !matches!(error, Error::ServerBackoff { .. }) {
warn!(%error, "Marking transaction as failed");
current_transaction_status.entry(destination).and_modify(|e| {
use TransactionStatus::{Failed, Retrying, Running};
*e = match e {
Running => Failed(1, Instant::now()),
Retrying(n) => Failed(*n + 1, Instant::now()),
Failed(..) => {
error!("Request that was not even running failed?!");
return;
}
}
});
current_transaction_status
.insert(destination, TransactionStatus::Failed);
return Ok(None);
}
@ -410,27 +402,13 @@ impl Service {
entry
.and_modify(|e| match e {
TransactionStatus::Running | TransactionStatus::Retrying(_) => {
TransactionStatus::Running => {
// already running
allow = false;
}
TransactionStatus::Failed(tries, time) => {
// Fail if a request has failed recently (exponential
// backoff)
let mut min_elapsed_duration =
Duration::from_secs(30) * (*tries) * (*tries);
if min_elapsed_duration > Duration::from_secs(60 * 60 * 24)
{
min_elapsed_duration =
Duration::from_secs(60 * 60 * 24);
}
if time.elapsed() < min_elapsed_duration {
allow = false;
} else {
TransactionStatus::Failed => {
retry = true;
*e = TransactionStatus::Retrying(*tries);
}
*e = TransactionStatus::Running;
}
})
.or_insert(TransactionStatus::Running);
@ -989,9 +967,9 @@ async fn handle_federation_event(
}
}
let permit = services().sending.maximum_requests.acquire().await;
let response = server_server::send_request(
let response = services()
.sending
.send_federation_request(
server,
send_transaction_message::v1::Request {
origin: services().globals.server_name().to_owned(),
@ -1005,9 +983,12 @@ async fn handle_federation_event(
})))
.into(),
},
LogRequestError::No,
AllowLoopbackRequests::No,
)
// The spec states that this endpoint should always return success, even
// if individual PDUs fail. If we get an error, something is wrong.
.backoff_on(BackoffOn::AllErrors)
// The error will be logged in `handle_response`
.log_errors(LogRequestError::No)
.await?;
for pdu in response.pdus {
@ -1016,8 +997,6 @@ async fn handle_federation_event(
}
}
drop(permit);
Ok(())
}