remove remote device key query backoff

This is handled by the server_backoff service now.

The previous implementation of backoff for remote device key queries
that we are removing had a bug where the failure counter was never reset
after a success. This caused grapevine to accumulate a larger error rate
for remote device key queries until it is restarted. This bug is not
present in the new global backoff implementation.
This commit is contained in:
Olivia Lee 2024-08-11 15:45:43 -07:00
parent 8001dcf2eb
commit e1637d5f9a
No known key found for this signature in database
GPG key ID: 54D568A15B9CD1F9
2 changed files with 2 additions and 44 deletions

View file

@ -1,6 +1,6 @@
use std::{
collections::{hash_map, BTreeMap, HashMap, HashSet},
time::{Duration, Instant},
collections::{BTreeMap, HashMap, HashSet},
time::Duration,
};
use futures_util::{stream::FuturesUnordered, StreamExt};
@ -385,47 +385,9 @@ pub(crate) async fn get_keys_helper<F: Fn(&UserId) -> bool>(
let mut failures = BTreeMap::new();
let back_off = |id| async {
match services().globals.bad_query_ratelimiter.write().await.entry(id) {
hash_map::Entry::Vacant(e) => {
e.insert((Instant::now(), 1));
}
hash_map::Entry::Occupied(mut e) => {
*e.get_mut() = (Instant::now(), e.get().1 + 1);
}
}
};
let mut futures: FuturesUnordered<_> = get_over_federation
.into_iter()
.map(|(server, vec)| async move {
if let Some((time, tries)) = services()
.globals
.bad_query_ratelimiter
.read()
.await
.get(server)
{
// Exponential backoff
let mut min_elapsed_duration =
Duration::from_secs(30) * (*tries) * (*tries);
if min_elapsed_duration > Duration::from_secs(60 * 60 * 24) {
min_elapsed_duration = Duration::from_secs(60 * 60 * 24);
}
if let Some(remaining) =
min_elapsed_duration.checked_sub(time.elapsed())
{
debug!(%server, %tries, ?remaining, "Backing off from server");
return (
server,
Err(Error::BadServerResponse(
"bad query, still backing off",
)),
);
}
}
let mut device_keys_input_fed = BTreeMap::new();
for (user_id, keys) in vec {
device_keys_input_fed.insert(user_id.to_owned(), keys.clone());
@ -454,7 +416,6 @@ pub(crate) async fn get_keys_helper<F: Fn(&UserId) -> bool>(
let response = match response {
Ok(response) => response,
Err(error) => {
back_off(server.to_owned()).await;
debug!(%server, %error, "remote device key query failed");
failures.insert(server.to_string(), json!({}));
continue;

View file

@ -84,8 +84,6 @@ pub(crate) struct Service {
Arc<RwLock<HashMap<OwnedEventId, RateLimitState>>>,
pub(crate) bad_signature_ratelimiter:
Arc<RwLock<HashMap<Vec<String>, RateLimitState>>>,
pub(crate) bad_query_ratelimiter:
Arc<RwLock<HashMap<OwnedServerName, RateLimitState>>>,
pub(crate) servername_ratelimiter:
OnDemandHashMap<OwnedServerName, Semaphore>,
pub(crate) roomid_mutex_insert: TokenSet<OwnedRoomId, marker::Insert>,
@ -278,7 +276,6 @@ impl Service {
admin_bot_room_alias_id,
bad_event_ratelimiter: Arc::new(RwLock::new(HashMap::new())),
bad_signature_ratelimiter: Arc::new(RwLock::new(HashMap::new())),
bad_query_ratelimiter: Arc::new(RwLock::new(HashMap::new())),
servername_ratelimiter: OnDemandHashMap::new(
"servername_ratelimiter".to_owned(),
),