Fix spans in tokio::spawn-ed tasks

tokio::spawn is a span boundary, the spawned future has no parent span.

For short futures, we simply inherit the current span with
`.in_current_span()`.

For long running futures containing a sleeping infinite loop, we don't
actually want a span on the entire task or even the entire loop body,
both would result in very long spans. Instead, we put the outermost span
(created using #[tracing::instrument] or .instrument()) around the
actual work happening after the sleep, which results in a new root span
being created after every sleep.
This commit is contained in:
Lambda 2024-05-19 21:39:13 +00:00
parent 5e9e5b76bc
commit ac42e0bfff
3 changed files with 109 additions and 70 deletions

View file

@ -22,7 +22,7 @@ use ruma::{
CanonicalJsonValue, EventId, OwnedDeviceId, OwnedEventId, OwnedRoomId,
OwnedUserId, RoomId, UserId,
};
use tracing::{debug, error, info, warn};
use tracing::{debug, error, info, info_span, warn, Instrument};
use crate::{
service::rooms::timeline::PduCount, services, utils, Config, Error,
@ -1200,20 +1200,22 @@ impl KeyValueDatabase {
loop {
#[cfg(unix)]
tokio::select! {
_ = i.tick() => {
let msg = tokio::select! {
_ = i.tick() => || {
debug!("cleanup: Timer ticked");
}
_ = s.recv() => {
},
_ = s.recv() => || {
debug!("cleanup: Received SIGHUP");
}
},
};
#[cfg(not(unix))]
{
let msg = {
i.tick().await;
debug!("cleanup: Timer ticked")
}
|| debug!("cleanup: Timer ticked")
};
async {
msg();
let start = Instant::now();
if let Err(e) = services().globals.cleanup() {
error!("cleanup: Errored: {}", e);
@ -1221,6 +1223,9 @@ impl KeyValueDatabase {
debug!("cleanup: Finished in {:?}", start.elapsed());
}
}
.instrument(info_span!("database_cleanup"))
.await;
}
});
}
}

View file

@ -218,14 +218,7 @@ impl Service {
pub(crate) fn start_handler(self: &Arc<Self>) {
let self2 = Arc::clone(self);
tokio::spawn(async move {
self2.handler().await;
});
}
async fn handler(&self) {
let mut receiver = self.receiver.lock().await;
// TODO: Use futures when we have long admin commands
let mut receiver = self2.receiver.lock().await;
let grapevine_user = UserId::parse(format!(
"@{}:{}",
if services().globals.config.conduit_compat {
@ -237,13 +230,35 @@ impl Service {
))
.expect("admin bot username should be valid");
if let Ok(Some(grapevine_room)) = services().admin.get_admin_room() {
let Ok(Some(grapevine_room)) = services().admin.get_admin_room()
else {
return;
};
loop {
let event = receiver
.recv()
.await
.expect("admin command channel has been closed");
Self::handle_event(
&self2,
event,
&grapevine_room,
&grapevine_user,
)
.await;
}
});
}
#[tracing::instrument(skip(self, grapevine_room, grapevine_user))]
async fn handle_event(
&self,
event: AdminRoomEvent,
grapevine_room: &OwnedRoomId,
grapevine_user: &ruma::OwnedUserId,
) {
let message_content = match event {
AdminRoomEvent::SendMessage(content) => content,
AdminRoomEvent::ProcessMessage(room_message) => {
@ -275,15 +290,13 @@ impl Service {
state_key: None,
redacts: None,
},
&grapevine_user,
&grapevine_room,
grapevine_user,
grapevine_room,
&state_lock,
)
.await
.unwrap();
}
}
}
pub(crate) fn process_message(&self, room_message: String) {
self.sender.send(AdminRoomEvent::ProcessMessage(room_message)).unwrap();

View file

@ -100,6 +100,7 @@ pub(crate) struct Service {
>,
}
#[derive(Debug)]
enum TransactionStatus {
Running,
// number of times failed, time of last failure
@ -114,6 +115,12 @@ struct HandlerInputs {
}
type HandlerResponse = Result<OutgoingKind, (OutgoingKind, Error)>;
fn outgoing_kind_from_response(response: &HandlerResponse) -> &OutgoingKind {
match response {
Ok(kind) | Err((kind, _)) => kind,
}
}
type TransactionStatusMap = HashMap<OutgoingKind, TransactionStatus>;
impl Service {
@ -197,6 +204,14 @@ impl Service {
}
}
#[tracing::instrument(
skip(self, current_transaction_status),
fields(
current_status = ?current_transaction_status.get(
outgoing_kind_from_response(&response)
),
),
)]
fn handle_futures(
&self,
response: HandlerResponse,
@ -259,6 +274,12 @@ impl Service {
}
}
#[tracing::instrument(
skip(self, event, key, current_transaction_status),
fields(
current_status = ?current_transaction_status.get(&outgoing_kind),
),
)]
fn handle_receiver(
&self,
outgoing_kind: OutgoingKind,