Fix spans in tokio::spawn-ed tasks

tokio::spawn is a span boundary, the spawned future has no parent span.

For short futures, we simply inherit the current span with
`.in_current_span()`.

For long running futures containing a sleeping infinite loop, we don't
actually want a span on the entire task or even the entire loop body,
both would result in very long spans. Instead, we put the outermost span
(created using #[tracing::instrument] or .instrument()) around the
actual work happening after the sleep, which results in a new root span
being created after every sleep.
This commit is contained in:
Lambda 2024-05-19 21:39:13 +00:00
parent 5e9e5b76bc
commit ac42e0bfff
3 changed files with 109 additions and 70 deletions

View file

@ -22,7 +22,7 @@ use ruma::{
CanonicalJsonValue, EventId, OwnedDeviceId, OwnedEventId, OwnedRoomId, CanonicalJsonValue, EventId, OwnedDeviceId, OwnedEventId, OwnedRoomId,
OwnedUserId, RoomId, UserId, OwnedUserId, RoomId, UserId,
}; };
use tracing::{debug, error, info, warn}; use tracing::{debug, error, info, info_span, warn, Instrument};
use crate::{ use crate::{
service::rooms::timeline::PduCount, services, utils, Config, Error, service::rooms::timeline::PduCount, services, utils, Config, Error,
@ -1200,20 +1200,22 @@ impl KeyValueDatabase {
loop { loop {
#[cfg(unix)] #[cfg(unix)]
tokio::select! { let msg = tokio::select! {
_ = i.tick() => { _ = i.tick() => || {
debug!("cleanup: Timer ticked"); debug!("cleanup: Timer ticked");
} },
_ = s.recv() => { _ = s.recv() => || {
debug!("cleanup: Received SIGHUP"); debug!("cleanup: Received SIGHUP");
} },
}; };
#[cfg(not(unix))] #[cfg(not(unix))]
{ let msg = {
i.tick().await; i.tick().await;
debug!("cleanup: Timer ticked") || debug!("cleanup: Timer ticked")
} };
async {
msg();
let start = Instant::now(); let start = Instant::now();
if let Err(e) = services().globals.cleanup() { if let Err(e) = services().globals.cleanup() {
error!("cleanup: Errored: {}", e); error!("cleanup: Errored: {}", e);
@ -1221,6 +1223,9 @@ impl KeyValueDatabase {
debug!("cleanup: Finished in {:?}", start.elapsed()); debug!("cleanup: Finished in {:?}", start.elapsed());
} }
} }
.instrument(info_span!("database_cleanup"))
.await;
}
}); });
} }
} }

View file

@ -218,14 +218,7 @@ impl Service {
pub(crate) fn start_handler(self: &Arc<Self>) { pub(crate) fn start_handler(self: &Arc<Self>) {
let self2 = Arc::clone(self); let self2 = Arc::clone(self);
tokio::spawn(async move { tokio::spawn(async move {
self2.handler().await; let mut receiver = self2.receiver.lock().await;
});
}
async fn handler(&self) {
let mut receiver = self.receiver.lock().await;
// TODO: Use futures when we have long admin commands
let grapevine_user = UserId::parse(format!( let grapevine_user = UserId::parse(format!(
"@{}:{}", "@{}:{}",
if services().globals.config.conduit_compat { if services().globals.config.conduit_compat {
@ -237,13 +230,35 @@ impl Service {
)) ))
.expect("admin bot username should be valid"); .expect("admin bot username should be valid");
if let Ok(Some(grapevine_room)) = services().admin.get_admin_room() { let Ok(Some(grapevine_room)) = services().admin.get_admin_room()
else {
return;
};
loop { loop {
let event = receiver let event = receiver
.recv() .recv()
.await .await
.expect("admin command channel has been closed"); .expect("admin command channel has been closed");
Self::handle_event(
&self2,
event,
&grapevine_room,
&grapevine_user,
)
.await;
}
});
}
#[tracing::instrument(skip(self, grapevine_room, grapevine_user))]
async fn handle_event(
&self,
event: AdminRoomEvent,
grapevine_room: &OwnedRoomId,
grapevine_user: &ruma::OwnedUserId,
) {
let message_content = match event { let message_content = match event {
AdminRoomEvent::SendMessage(content) => content, AdminRoomEvent::SendMessage(content) => content,
AdminRoomEvent::ProcessMessage(room_message) => { AdminRoomEvent::ProcessMessage(room_message) => {
@ -275,15 +290,13 @@ impl Service {
state_key: None, state_key: None,
redacts: None, redacts: None,
}, },
&grapevine_user, grapevine_user,
&grapevine_room, grapevine_room,
&state_lock, &state_lock,
) )
.await .await
.unwrap(); .unwrap();
} }
}
}
pub(crate) fn process_message(&self, room_message: String) { pub(crate) fn process_message(&self, room_message: String) {
self.sender.send(AdminRoomEvent::ProcessMessage(room_message)).unwrap(); self.sender.send(AdminRoomEvent::ProcessMessage(room_message)).unwrap();

View file

@ -100,6 +100,7 @@ pub(crate) struct Service {
>, >,
} }
#[derive(Debug)]
enum TransactionStatus { enum TransactionStatus {
Running, Running,
// number of times failed, time of last failure // number of times failed, time of last failure
@ -114,6 +115,12 @@ struct HandlerInputs {
} }
type HandlerResponse = Result<OutgoingKind, (OutgoingKind, Error)>; type HandlerResponse = Result<OutgoingKind, (OutgoingKind, Error)>;
fn outgoing_kind_from_response(response: &HandlerResponse) -> &OutgoingKind {
match response {
Ok(kind) | Err((kind, _)) => kind,
}
}
type TransactionStatusMap = HashMap<OutgoingKind, TransactionStatus>; type TransactionStatusMap = HashMap<OutgoingKind, TransactionStatus>;
impl Service { impl Service {
@ -197,6 +204,14 @@ impl Service {
} }
} }
#[tracing::instrument(
skip(self, current_transaction_status),
fields(
current_status = ?current_transaction_status.get(
outgoing_kind_from_response(&response)
),
),
)]
fn handle_futures( fn handle_futures(
&self, &self,
response: HandlerResponse, response: HandlerResponse,
@ -259,6 +274,12 @@ impl Service {
} }
} }
#[tracing::instrument(
skip(self, event, key, current_transaction_status),
fields(
current_status = ?current_transaction_status.get(&outgoing_kind),
),
)]
fn handle_receiver( fn handle_receiver(
&self, &self,
outgoing_kind: OutgoingKind, outgoing_kind: OutgoingKind,