mirror of
https://gitlab.computer.surgery/matrix/grapevine.git
synced 2025-12-17 07:41:23 +01:00
remove remote device key query backoff
This is handled by the server_backoff service now. The previous implementation of backoff for remote device key queries that we are removing had a bug where the failure counter was never reset after a success. This caused grapevine to accumulate a larger error rate for remote device key queries until it is restarted. This bug is not present in the new global backoff implementation.
This commit is contained in:
parent
8001dcf2eb
commit
e1637d5f9a
2 changed files with 2 additions and 44 deletions
|
|
@ -1,6 +1,6 @@
|
|||
use std::{
|
||||
collections::{hash_map, BTreeMap, HashMap, HashSet},
|
||||
time::{Duration, Instant},
|
||||
collections::{BTreeMap, HashMap, HashSet},
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use futures_util::{stream::FuturesUnordered, StreamExt};
|
||||
|
|
@ -385,47 +385,9 @@ pub(crate) async fn get_keys_helper<F: Fn(&UserId) -> bool>(
|
|||
|
||||
let mut failures = BTreeMap::new();
|
||||
|
||||
let back_off = |id| async {
|
||||
match services().globals.bad_query_ratelimiter.write().await.entry(id) {
|
||||
hash_map::Entry::Vacant(e) => {
|
||||
e.insert((Instant::now(), 1));
|
||||
}
|
||||
hash_map::Entry::Occupied(mut e) => {
|
||||
*e.get_mut() = (Instant::now(), e.get().1 + 1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let mut futures: FuturesUnordered<_> = get_over_federation
|
||||
.into_iter()
|
||||
.map(|(server, vec)| async move {
|
||||
if let Some((time, tries)) = services()
|
||||
.globals
|
||||
.bad_query_ratelimiter
|
||||
.read()
|
||||
.await
|
||||
.get(server)
|
||||
{
|
||||
// Exponential backoff
|
||||
let mut min_elapsed_duration =
|
||||
Duration::from_secs(30) * (*tries) * (*tries);
|
||||
if min_elapsed_duration > Duration::from_secs(60 * 60 * 24) {
|
||||
min_elapsed_duration = Duration::from_secs(60 * 60 * 24);
|
||||
}
|
||||
|
||||
if let Some(remaining) =
|
||||
min_elapsed_duration.checked_sub(time.elapsed())
|
||||
{
|
||||
debug!(%server, %tries, ?remaining, "Backing off from server");
|
||||
return (
|
||||
server,
|
||||
Err(Error::BadServerResponse(
|
||||
"bad query, still backing off",
|
||||
)),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let mut device_keys_input_fed = BTreeMap::new();
|
||||
for (user_id, keys) in vec {
|
||||
device_keys_input_fed.insert(user_id.to_owned(), keys.clone());
|
||||
|
|
@ -454,7 +416,6 @@ pub(crate) async fn get_keys_helper<F: Fn(&UserId) -> bool>(
|
|||
let response = match response {
|
||||
Ok(response) => response,
|
||||
Err(error) => {
|
||||
back_off(server.to_owned()).await;
|
||||
debug!(%server, %error, "remote device key query failed");
|
||||
failures.insert(server.to_string(), json!({}));
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -84,8 +84,6 @@ pub(crate) struct Service {
|
|||
Arc<RwLock<HashMap<OwnedEventId, RateLimitState>>>,
|
||||
pub(crate) bad_signature_ratelimiter:
|
||||
Arc<RwLock<HashMap<Vec<String>, RateLimitState>>>,
|
||||
pub(crate) bad_query_ratelimiter:
|
||||
Arc<RwLock<HashMap<OwnedServerName, RateLimitState>>>,
|
||||
pub(crate) servername_ratelimiter:
|
||||
OnDemandHashMap<OwnedServerName, Semaphore>,
|
||||
pub(crate) roomid_mutex_insert: TokenSet<OwnedRoomId, marker::Insert>,
|
||||
|
|
@ -278,7 +276,6 @@ impl Service {
|
|||
admin_bot_room_alias_id,
|
||||
bad_event_ratelimiter: Arc::new(RwLock::new(HashMap::new())),
|
||||
bad_signature_ratelimiter: Arc::new(RwLock::new(HashMap::new())),
|
||||
bad_query_ratelimiter: Arc::new(RwLock::new(HashMap::new())),
|
||||
servername_ratelimiter: OnDemandHashMap::new(
|
||||
"servername_ratelimiter".to_owned(),
|
||||
),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue