grapevine/src/service/globals.rs
Olivia Lee b82458a460
use event content in account_data service api instead of full events
This eliminates the possibility of passing an event that has a
mismatching type, reducing the space of possible invalid states.
2025-06-02 11:07:21 -07:00

663 lines
21 KiB
Rust

use std::{
collections::{BTreeMap, HashMap},
fs,
future::{self, Future},
iter,
net::{IpAddr, SocketAddr},
path::PathBuf,
sync::{
atomic::{self, AtomicBool},
Arc, RwLock as StdRwLock,
},
time::{Duration, Instant},
};
use base64::{engine::general_purpose, Engine as _};
use reqwest::dns::{Addrs, Name, Resolve, Resolving};
use ruma::{
api::federation::discovery::ServerSigningKeys,
events::{
push_rules::PushRulesEventContent,
room::message::RoomMessageEventContent, GlobalAccountDataEventType,
},
push::Ruleset,
serde::{Base64, Raw},
DeviceId, MilliSecondsSinceUnixEpoch, OwnedEventId, OwnedRoomAliasId,
OwnedRoomId, OwnedServerName, OwnedUserId, RoomAliasId, RoomVersionId,
ServerName, UInt, UserId,
};
use tokio::sync::{broadcast, Mutex, RwLock, Semaphore};
use tracing::{error, warn, Instrument};
use trust_dns_resolver::TokioAsyncResolver;
use crate::{
api::server_server::FedDest,
config::{MediaBackendConfig, MediaFilesystemConfig},
observability::FilterReloadHandles,
service::media::MediaFileKey,
services,
utils::on_demand_hashmap::{OnDemandHashMap, TokenSet},
Config, Error, Result,
};
mod data;
pub(crate) use data::{Data, SigningKeys};
type WellKnownMap = HashMap<OwnedServerName, (FedDest, String)>;
type TlsNameMap = HashMap<String, (Vec<IpAddr>, u16)>;
// Time if last failed try, number of failed tries
type RateLimitState = (Instant, u32);
// Markers for
// [`Service::roomid_mutex_state`]/[`Service::roomid_mutex_insert`]/
// [`Service::roomid_mutex_federation`]
pub(crate) mod marker {
pub(crate) enum State {}
pub(crate) enum Insert {}
pub(crate) enum Federation {}
}
pub(crate) struct Service {
pub(crate) db: &'static dyn Data,
pub(crate) reload_handles: Option<Arc<RwLock<FilterReloadHandles>>>,
// actual_destination, host
pub(crate) actual_destination_cache: Arc<RwLock<WellKnownMap>>,
pub(crate) tls_name_override: Arc<StdRwLock<TlsNameMap>>,
pub(crate) config: Config,
keypair: Arc<ruma::signatures::Ed25519KeyPair>,
dns_resolver: Arc<TokioAsyncResolver>,
jwt_decoding_key: Option<jsonwebtoken::DecodingKey>,
federation_client: reqwest::Client,
default_client: reqwest::Client,
pub(crate) stable_room_versions: Vec<RoomVersionId>,
pub(crate) admin_bot_user_id: OwnedUserId,
pub(crate) admin_bot_room_alias_id: OwnedRoomAliasId,
pub(crate) bad_event_ratelimiter:
Arc<RwLock<HashMap<OwnedEventId, RateLimitState>>>,
pub(crate) bad_signature_ratelimiter:
Arc<RwLock<HashMap<Vec<String>, RateLimitState>>>,
pub(crate) bad_query_ratelimiter:
Arc<RwLock<HashMap<OwnedServerName, RateLimitState>>>,
pub(crate) servername_ratelimiter:
OnDemandHashMap<OwnedServerName, Semaphore>,
pub(crate) roomid_mutex_insert: TokenSet<OwnedRoomId, marker::Insert>,
pub(crate) roomid_mutex_state: TokenSet<OwnedRoomId, marker::State>,
// this lock will be held longer
pub(crate) roomid_mutex_federation:
TokenSet<OwnedRoomId, marker::Federation>,
pub(crate) roomid_federationhandletime:
RwLock<HashMap<OwnedRoomId, (OwnedEventId, Instant)>>,
pub(crate) stateres_mutex: Arc<Mutex<()>>,
pub(crate) rotate: RotationHandler,
pub(crate) shutdown: AtomicBool,
}
/// Handles "rotation" of long-polling requests. "Rotation" in this context is
/// similar to "rotation" of log files and the like.
///
/// This is utilized to have sync workers return early and release read locks on
/// the database.
pub(crate) struct RotationHandler(
broadcast::Sender<()>,
// TODO: Determine if it's safe to delete this field. I'm not deleting it
// right now because I'm unsure what implications that would have for how
// the sender expects to work.
#[allow(dead_code)] broadcast::Receiver<()>,
);
impl RotationHandler {
pub(crate) fn new() -> Self {
let (s, r) = broadcast::channel(1);
Self(s, r)
}
pub(crate) fn watch(&self) -> impl Future<Output = ()> {
let mut r = self.0.subscribe();
async move {
r.recv().await.expect("should receive a message");
}
}
pub(crate) fn fire(&self) {
self.0.send(()).expect("should be able to send message");
}
}
impl Default for RotationHandler {
fn default() -> Self {
Self::new()
}
}
/// Wrapper around [`trust_dns_resolver`]'s [`TokioAsyncResolver`] that can be
/// used with reqwest.
pub(crate) struct DefaultResolver {
inner: Arc<TokioAsyncResolver>,
}
impl DefaultResolver {
fn new(inner: Arc<TokioAsyncResolver>) -> Self {
DefaultResolver {
inner,
}
}
fn resolve_inner(&self, name: Name) -> Resolving {
let inner = Arc::clone(&self.inner);
let future = async move {
let lookup = inner.lookup_ip(name.as_str()).await?;
let addrs: Addrs =
Box::new(lookup.into_iter().map(|ip| SocketAddr::new(ip, 0)));
Ok(addrs)
};
Box::pin(future.in_current_span())
}
}
impl Resolve for DefaultResolver {
#[tracing::instrument(skip(self))]
fn resolve(&self, name: Name) -> Resolving {
self.resolve_inner(name)
}
}
/// Resolver used for outgoing requests to the federation API.
///
/// Hostnames that have been mapped to a different domain by SRV records in
/// [server discovery][1] are resolved to the SRV record target. This is done to
/// get reqwest to check the TLS certificate against the correct hostname
/// required in steps 3.3, 3.4, and 4 of the server discovery spec.
///
/// [1]: https://spec.matrix.org/v1.12/server-server-api/#server-discovery
pub(crate) struct FederationResolver {
inner: Arc<DefaultResolver>,
overrides: Arc<StdRwLock<TlsNameMap>>,
}
impl FederationResolver {
pub(crate) fn new(
inner: Arc<DefaultResolver>,
overrides: Arc<StdRwLock<TlsNameMap>>,
) -> Self {
FederationResolver {
inner,
overrides,
}
}
}
impl Resolve for FederationResolver {
#[tracing::instrument(skip(self))]
fn resolve(&self, name: Name) -> Resolving {
self.overrides
.read()
.unwrap()
.get(name.as_str())
.and_then(|(override_name, port)| {
override_name.first().map(|first_name| {
let x: Box<dyn Iterator<Item = SocketAddr> + Send> =
Box::new(iter::once(SocketAddr::new(
*first_name,
*port,
)));
let x: Resolving = Box::pin(future::ready(Ok(x)));
x
})
})
.unwrap_or_else(|| self.inner.resolve_inner(name))
}
}
impl Service {
#[tracing::instrument(skip_all)]
pub(crate) fn new(
db: &'static dyn Data,
config: Config,
reload_handles: Option<FilterReloadHandles>,
) -> Result<Self> {
let keypair = db.load_keypair();
let keypair = match keypair {
Ok(k) => k,
Err(e) => {
error!("Keypair invalid. Deleting...");
db.remove_keypair()?;
return Err(e);
}
};
let tls_name_override = Arc::new(StdRwLock::new(TlsNameMap::new()));
let dns_resolver = Arc::new(
TokioAsyncResolver::tokio_from_system_conf().map_err(|e| {
error!(
"Failed to set up trust dns resolver with system config: \
{}",
e
);
Error::bad_config(
"Failed to set up trust dns resolver with system config.",
)
})?,
);
let default_resolver =
Arc::new(DefaultResolver::new(Arc::clone(&dns_resolver)));
let federation_resolver = Arc::new(FederationResolver::new(
Arc::clone(&default_resolver),
Arc::clone(&tls_name_override),
));
let jwt_decoding_key = config.jwt_secret.as_ref().map(|secret| {
jsonwebtoken::DecodingKey::from_secret(secret.as_bytes())
});
let default_client = reqwest_client_builder(&config)?
.dns_resolver(default_resolver)
.build()?;
let federation_client = reqwest_client_builder(&config)?
.dns_resolver(federation_resolver)
.build()?;
// Supported and stable room versions
let stable_room_versions = vec![
RoomVersionId::V6,
RoomVersionId::V7,
RoomVersionId::V8,
RoomVersionId::V9,
RoomVersionId::V10,
RoomVersionId::V11,
];
let admin_bot_user_id = UserId::parse(format!(
"@{}:{}",
if config.conduit_compat {
"conduit"
} else {
"grapevine"
},
config.server_name,
))
.expect("admin bot user ID should be valid");
let admin_bot_room_alias_id =
RoomAliasId::parse(format!("#admins:{}", config.server_name))
.expect("admin bot room alias ID should be valid");
let mut s = Self {
db,
config,
reload_handles: reload_handles.map(|h| Arc::new(RwLock::new(h))),
keypair: Arc::new(keypair),
dns_resolver,
actual_destination_cache: Arc::new(
RwLock::new(WellKnownMap::new()),
),
tls_name_override,
federation_client,
default_client,
jwt_decoding_key,
stable_room_versions,
admin_bot_user_id,
admin_bot_room_alias_id,
bad_event_ratelimiter: Arc::new(RwLock::new(HashMap::new())),
bad_signature_ratelimiter: Arc::new(RwLock::new(HashMap::new())),
bad_query_ratelimiter: Arc::new(RwLock::new(HashMap::new())),
servername_ratelimiter: OnDemandHashMap::new(
"servername_ratelimiter".to_owned(),
),
roomid_mutex_state: TokenSet::new("roomid_mutex_state".to_owned()),
roomid_mutex_insert: TokenSet::new(
"roomid_mutex_insert".to_owned(),
),
roomid_mutex_federation: TokenSet::new(
"roomid_mutex_federation".to_owned(),
),
roomid_federationhandletime: RwLock::new(HashMap::new()),
stateres_mutex: Arc::new(Mutex::new(())),
rotate: RotationHandler::new(),
shutdown: AtomicBool::new(false),
};
fs::create_dir_all(s.get_media_folder())?;
if !s.supported_room_versions().contains(&s.config.default_room_version)
{
error!(config=?s.config.default_room_version, fallback=?crate::config::default_default_room_version(), "Room version in config isn't supported, falling back to default version");
s.config.default_room_version =
crate::config::default_default_room_version();
};
Ok(s)
}
/// Check if `server_name` in the DB and config differ, return error if so
///
/// Matrix resource ownership is based on the server name; changing it
/// requires recreating the database from scratch. This check needs to be
/// done before background tasks are started to avoid data races.
// Allowed because this function calls `services()`
#[allow(clippy::unused_self)]
pub(crate) fn err_if_server_name_changed(
&self,
) -> Result<(), crate::error::ServerNameChanged> {
use crate::error::ServerNameChanged as Error;
if services()
.users
.count()
.map(|x| x > 0)
.map_err(Error::NonZeroUsers)?
{
let admin_bot = self.admin_bot_user_id.as_ref();
if !services()
.users
.exists(admin_bot)
.map_err(Error::AdminBotExists)?
{
return Err(Error::Renamed);
}
}
Ok(())
}
/// Returns this server's keypair.
pub(crate) fn keypair(&self) -> &ruma::signatures::Ed25519KeyPair {
&self.keypair
}
/// Returns a reqwest client which can be used to send requests
pub(crate) fn default_client(&self) -> reqwest::Client {
// Client is cheap to clone (Arc wrapper) and avoids lifetime issues
self.default_client.clone()
}
/// Returns a client used for resolving .well-knowns
pub(crate) fn federation_client(&self) -> reqwest::Client {
// Client is cheap to clone (Arc wrapper) and avoids lifetime issues
self.federation_client.clone()
}
#[tracing::instrument(skip(self))]
pub(crate) fn next_count(&self) -> Result<u64> {
self.db.next_count()
}
#[tracing::instrument(skip(self))]
pub(crate) fn current_count(&self) -> Result<u64> {
self.db.current_count()
}
pub(crate) async fn watch(
&self,
user_id: &UserId,
device_id: &DeviceId,
) -> Result<()> {
self.db.watch(user_id, device_id).await
}
pub(crate) fn cleanup(&self) -> Result<()> {
self.db.cleanup()
}
pub(crate) fn server_name(&self) -> &ServerName {
self.config.server_name.as_ref()
}
pub(crate) fn max_request_size(&self) -> UInt {
self.config.max_request_size
}
pub(crate) fn max_fetch_prev_events(&self) -> u16 {
self.config.federation.max_fetch_prev_events
}
pub(crate) fn allow_registration(&self) -> bool {
self.config.allow_registration
}
pub(crate) fn allow_encryption(&self) -> bool {
self.config.allow_encryption
}
pub(crate) fn allow_federation(&self) -> bool {
self.config.federation.enable
}
pub(crate) fn allow_room_creation(&self) -> bool {
self.config.allow_room_creation
}
pub(crate) fn default_room_version(&self) -> RoomVersionId {
self.config.default_room_version.clone()
}
pub(crate) fn trusted_servers(&self) -> &[OwnedServerName] {
&self.config.federation.trusted_servers
}
pub(crate) fn dns_resolver(&self) -> &TokioAsyncResolver {
&self.dns_resolver
}
pub(crate) fn jwt_decoding_key(
&self,
) -> Option<&jsonwebtoken::DecodingKey> {
self.jwt_decoding_key.as_ref()
}
pub(crate) fn turn_password(&self) -> &String {
&self.config.turn.password
}
pub(crate) fn turn_ttl(&self) -> u64 {
self.config.turn.ttl
}
pub(crate) fn turn_uris(&self) -> &[String] {
&self.config.turn.uris
}
pub(crate) fn turn_username(&self) -> &String {
&self.config.turn.username
}
pub(crate) fn turn_secret(&self) -> &String {
&self.config.turn.secret
}
pub(crate) fn emergency_password(&self) -> Option<&str> {
self.config.emergency_password.as_deref()
}
/// If the emergency password option is set, attempts to set the emergency
/// password and push rules for the @grapevine account.
///
/// If an error occurs, it is logged.
pub(crate) fn set_emergency_access(&self) {
let inner = || -> Result<bool> {
let admin_bot = self.admin_bot_user_id.as_ref();
services()
.users
.set_password(admin_bot, self.emergency_password())?;
let (ruleset, res) = match self.emergency_password() {
Some(_) => (Ruleset::server_default(admin_bot), Ok(true)),
None => (Ruleset::new(), Ok(false)),
};
services().account_data.update_global(
admin_bot,
&GlobalAccountDataEventType::PushRules,
&Raw::new(
&PushRulesEventContent {
global: ruleset,
}
.into(),
)
.expect("json serialization should always succeed"),
)?;
res
};
match inner() {
Ok(pwd_set) => {
if pwd_set {
warn!(
"The Grapevine account emergency password is set! \
Please unset it as soon as you finish admin account \
recovery!"
);
services().admin.send_message(
RoomMessageEventContent::text_plain(
"The Grapevine account emergency password is set! \
Please unset it as soon as you finish admin \
account recovery!",
),
);
}
}
Err(error) => {
error!(
%error,
"Could not set the configured emergency password for the \
Grapevine user",
);
}
};
}
pub(crate) fn supported_room_versions(&self) -> Vec<RoomVersionId> {
self.stable_room_versions.clone()
}
/// This doesn't actually check that the keys provided are newer than the
/// old set.
pub(crate) fn add_signing_key_from_trusted_server(
&self,
origin: &ServerName,
new_keys: ServerSigningKeys,
) -> Result<SigningKeys> {
self.db.add_signing_key_from_trusted_server(origin, new_keys)
}
/// Same as `from_trusted_server`, except it will move active keys not
/// present in `new_keys` to `old_signing_keys`
pub(crate) fn add_signing_key_from_origin(
&self,
origin: &ServerName,
new_keys: ServerSigningKeys,
) -> Result<SigningKeys> {
self.db.add_signing_key_from_origin(origin, new_keys)
}
/// This returns Ok(None) when there are no keys found for the server.
pub(crate) fn signing_keys_for(
&self,
origin: &ServerName,
) -> Result<Option<SigningKeys>> {
if origin == self.server_name() {
Ok(Some(SigningKeys::load_own_keys()))
} else {
self.db.signing_keys_for(origin)
}
}
/// Filters the key map of multiple servers down to keys that should be
/// accepted given the expiry time, room version, and timestamp of the
/// paramters
#[allow(clippy::unused_self)]
pub(crate) fn filter_keys_server_map(
&self,
keys: BTreeMap<String, SigningKeys>,
timestamp: MilliSecondsSinceUnixEpoch,
room_version_id: &RoomVersionId,
) -> BTreeMap<String, BTreeMap<String, Base64>> {
keys.into_iter()
.filter_map(|(server, keys)| {
self.filter_keys_single_server(keys, timestamp, room_version_id)
.map(|keys| (server, keys))
})
.collect()
}
/// Filters the keys of a single server down to keys that should be accepted
/// given the expiry time, room version, and timestamp of the paramters
#[allow(clippy::unused_self)]
pub(crate) fn filter_keys_single_server(
&self,
keys: SigningKeys,
timestamp: MilliSecondsSinceUnixEpoch,
_room_version_id: &RoomVersionId,
) -> Option<BTreeMap<String, Base64>> {
let all_valid = keys.valid_until_ts > timestamp;
all_valid.then(|| {
// Given that either the room version allows stale keys, or the
// valid_until_ts is in the future, all verify_keys are
// valid
let mut map: BTreeMap<_, _> = keys
.verify_keys
.into_iter()
.map(|(id, key)| (id, key.key))
.collect();
map.extend(keys.old_verify_keys.into_iter().filter_map(
|(id, key)| {
// Even on old room versions, we don't allow old keys if
// they are expired
(key.expired_ts > timestamp).then_some((id, key.key))
},
));
map
})
}
pub(crate) fn database_version(&self) -> Result<u64> {
self.db.database_version()
}
pub(crate) fn bump_database_version(&self, new_version: u64) -> Result<()> {
self.db.bump_database_version(new_version)
}
pub(crate) fn get_media_folder(&self) -> PathBuf {
let MediaBackendConfig::Filesystem(MediaFilesystemConfig {
path,
}) = &self.config.media.backend;
path.clone()
}
pub(crate) fn get_media_file(&self, key: &MediaFileKey) -> PathBuf {
let mut r = self.get_media_folder();
r.push(general_purpose::URL_SAFE_NO_PAD.encode(key.as_bytes()));
r
}
pub(crate) fn shutdown(&self) {
self.shutdown.store(true, atomic::Ordering::Relaxed);
self.rotate.fire();
}
}
fn reqwest_client_builder(config: &Config) -> Result<reqwest::ClientBuilder> {
let mut reqwest_client_builder = reqwest::Client::builder()
.pool_max_idle_per_host(0)
.connect_timeout(Duration::from_secs(30))
.timeout(Duration::from_secs(60 * 3))
.user_agent(format!("{}/{}", env!("CARGO_PKG_NAME"), crate::version()));
if let Some(proxy) = config.proxy.to_proxy()? {
reqwest_client_builder = reqwest_client_builder.proxy(proxy);
}
Ok(reqwest_client_builder)
}