diff --git a/Cargo.lock b/Cargo.lock index 23f938e8..93d9cf08 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -811,6 +811,7 @@ dependencies = [ "opentelemetry", "opentelemetry-jaeger", "parking_lot", + "phf", "rand", "regex", "reqwest", @@ -1625,6 +1626,48 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_macros", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn 2.0.52", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.5" @@ -2452,6 +2495,12 @@ dependencies = [ "time", ] +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "slab" version = "0.4.9" diff --git a/Cargo.toml b/Cargo.toml index 5b37280d..f679e43d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -106,6 +106,7 @@ num_cpus = "1.15.0" opentelemetry = { version = "0.18.0", features = ["rt-tokio"] } opentelemetry-jaeger = { version = "0.17.0", features = ["rt-tokio"] } parking_lot = { version = "0.12.1", optional = true } +phf = { version = "0.11.2", features = ["macros"] } rand = "0.8.5" regex = "1.8.1" reqwest = { version = "0.11.18", default-features = false, features = ["rustls-tls-native-roots", "socks"] } diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index cbea2009..aed354a0 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -1,5 +1,11 @@ use std::time::Duration; +use axum::response::IntoResponse; +use http::{ + header::{CONTENT_DISPOSITION, CONTENT_SECURITY_POLICY, CONTENT_TYPE}, + HeaderName, HeaderValue, +}; +use phf::{phf_set, Set}; use ruma::api::client::{ error::ErrorKind, media::{ @@ -7,11 +13,104 @@ use ruma::api::client::{ get_content_thumbnail, get_media_config, }, }; +use tracing::error; use crate::{service::media::FileMeta, services, utils, Ar, Error, Ra, Result}; const MXC_LENGTH: usize = 32; +/// `Content-Type`s that can be rendered inline in a browser without risking XSS +/// +/// Cargo-culted from Synapse. Note that SVG can contain inline JavaScript. +static INLINE_CONTENT_TYPES: Set<&str> = phf_set! { + // Keep sorted + "application/json", + "application/ld+json", + "audio/aac", + "audio/flac", + "audio/mp4", + "audio/mpeg", + "audio/ogg", + "audio/wav", + "audio/wave", + "audio/webm", + "audio/x-flac", + "audio/x-pn-wav", + "audio/x-wav", + "image/apng", + "image/avif", + "image/gif", + "image/jpeg", + "image/png", + "image/webp", + "text/css", + "text/csv", + "text/plain", + "video/mp4", + "video/ogg", + "video/quicktime", + "video/webm", +}; + +/// Value for the `Content-Security-Policy` header +/// +/// Cargo-culted from Synapse. +fn content_security_policy() -> HeaderValue { + [ + "sandbox", + "default-src 'none'", + "script-src 'none'", + "plugin-types application/pdf", + "style-src 'unsafe-inline'", + "media-src 'self'", + "object-src 'self'", + ] + .join("; ") + .try_into() + .expect("hardcoded header value should be valid") +} + +/// Determine a `Content-Disposition` header that prevents XSS +// TODO: In some of the places this function is called, we could parse the +// desired filename out of an existing `Content-Disposition` header value, such +// as what we're storing in the database or what we receive over federation. +// Doing this correctly is tricky, so I'm skipping it for now. +fn content_disposition_for( + content_type: Option<&str>, + filename: Option<&str>, +) -> String { + match ( + content_type.is_some_and(|x| INLINE_CONTENT_TYPES.contains(x)), + filename, + ) { + (true, None) => "inline".to_owned(), + (true, Some(x)) => format!("inline; filename={x}"), + (false, None) => "attachment".to_owned(), + (false, Some(x)) => format!("attachment; filename={x}"), + } +} + +/// Set a header, but panic if it was already set +/// +/// # Panics +/// +/// Panics if the header was already set. +fn set_header_or_panic( + response: &mut axum::response::Response, + header_name: HeaderName, + header_value: HeaderValue, +) { + if let Some(header_value) = response.headers().get(&header_name) { + error!(?header_name, ?header_value, "unexpected pre-existing header"); + panic!( + "expected {header_name:?} to be unset but it was set to \ + {header_value:?}" + ); + } + + response.headers_mut().insert(header_name, header_value); +} + /// # `GET /_matrix/media/r0/config` /// /// Returns max upload size. @@ -86,7 +185,12 @@ pub(crate) async fn get_remote_content( ) .await?; - Ok(content_response) + Ok(get_content::v3::Response { + file: content_response.file, + content_disposition: content_response.content_disposition, + content_type: content_response.content_type, + cross_origin_resource_policy: Some("cross-origin".to_owned()), + }) } /// # `GET /_matrix/media/r0/download/{serverName}/{mediaId}` @@ -96,28 +200,55 @@ pub(crate) async fn get_remote_content( /// - Only allows federation if `allow_remote` is true pub(crate) async fn get_content_route( body: Ar, -) -> Result> { +) -> Result { + get_content_route_ruma(body).await.map(|x| { + let mut r = Ra(x).into_response(); + + set_header_or_panic( + &mut r, + CONTENT_SECURITY_POLICY, + content_security_policy(), + ); + + r + }) +} + +async fn get_content_route_ruma( + body: Ar, +) -> Result { let mxc = format!("mxc://{}/{}", body.server_name, body.media_id); if let Some(FileMeta { - content_disposition, content_type, file, + .. }) = services().media.get(mxc.clone()).await? { - Ok(Ra(get_content::v3::Response { + Ok(get_content::v3::Response { file, + content_disposition: Some(content_disposition_for( + content_type.as_deref(), + None, + )), content_type, - content_disposition, cross_origin_resource_policy: Some("cross-origin".to_owned()), - })) + }) } else if &*body.server_name != services().globals.server_name() && body.allow_remote { let remote_content_response = get_remote_content(&mxc, &body.server_name, body.media_id.clone()) .await?; - Ok(Ra(remote_content_response)) + Ok(get_content::v3::Response { + file: remote_content_response.file, + content_disposition: Some(content_disposition_for( + remote_content_response.content_type.as_deref(), + None, + )), + content_type: remote_content_response.content_type, + cross_origin_resource_policy: Some("cross-origin".to_owned()), + }) } else { Err(Error::BadRequest(ErrorKind::NotFound, "Media not found.")) } @@ -130,7 +261,23 @@ pub(crate) async fn get_content_route( /// - Only allows federation if `allow_remote` is true pub(crate) async fn get_content_as_filename_route( body: Ar, -) -> Result> { +) -> Result { + get_content_as_filename_route_ruma(body).await.map(|x| { + let mut r = Ra(x).into_response(); + + set_header_or_panic( + &mut r, + CONTENT_SECURITY_POLICY, + content_security_policy(), + ); + + r + }) +} + +pub(crate) async fn get_content_as_filename_route_ruma( + body: Ar, +) -> Result { let mxc = format!("mxc://{}/{}", body.server_name, body.media_id); if let Some(FileMeta { @@ -139,15 +286,15 @@ pub(crate) async fn get_content_as_filename_route( .. }) = services().media.get(mxc.clone()).await? { - Ok(Ra(get_content_as_filename::v3::Response { + Ok(get_content_as_filename::v3::Response { file, - content_type, - content_disposition: Some(format!( - "inline; filename={}", - body.filename + content_disposition: Some(content_disposition_for( + content_type.as_deref(), + Some(body.filename.as_str()), )), + content_type, cross_origin_resource_policy: Some("cross-origin".to_owned()), - })) + }) } else if &*body.server_name != services().globals.server_name() && body.allow_remote { @@ -155,15 +302,15 @@ pub(crate) async fn get_content_as_filename_route( get_remote_content(&mxc, &body.server_name, body.media_id.clone()) .await?; - Ok(Ra(get_content_as_filename::v3::Response { - content_disposition: Some(format!( - "inline: filename={}", - body.filename + Ok(get_content_as_filename::v3::Response { + content_disposition: Some(content_disposition_for( + remote_content_response.content_type.as_deref(), + Some(body.filename.as_str()), )), content_type: remote_content_response.content_type, file: remote_content_response.file, cross_origin_resource_policy: Some("cross-origin".to_owned()), - })) + }) } else { Err(Error::BadRequest(ErrorKind::NotFound, "Media not found.")) } @@ -176,7 +323,36 @@ pub(crate) async fn get_content_as_filename_route( /// - Only allows federation if `allow_remote` is true pub(crate) async fn get_content_thumbnail_route( body: Ar, -) -> Result> { +) -> Result { + get_content_thumbnail_route_ruma(body).await.map(|x| { + let mut r = Ra(x).into_response(); + + let content_type = r + .headers() + .get(CONTENT_TYPE) + .and_then(|x| std::str::from_utf8(x.as_ref()).ok()) + .map(ToOwned::to_owned); + + set_header_or_panic( + &mut r, + CONTENT_SECURITY_POLICY, + content_security_policy(), + ); + set_header_or_panic( + &mut r, + CONTENT_DISPOSITION, + content_disposition_for(content_type.as_deref(), None) + .try_into() + .expect("generated header value should be valid"), + ); + + r + }) +} + +async fn get_content_thumbnail_route_ruma( + body: Ar, +) -> Result { let mxc = format!("mxc://{}/{}", body.server_name, body.media_id); if let Some(FileMeta { @@ -196,11 +372,11 @@ pub(crate) async fn get_content_thumbnail_route( ) .await? { - Ok(Ra(get_content_thumbnail::v3::Response { + Ok(get_content_thumbnail::v3::Response { file, content_type, cross_origin_resource_policy: Some("cross-origin".to_owned()), - })) + }) } else if &*body.server_name != services().globals.server_name() && body.allow_remote { @@ -233,7 +409,11 @@ pub(crate) async fn get_content_thumbnail_route( ) .await?; - Ok(Ra(get_thumbnail_response)) + Ok(get_content_thumbnail::v3::Response { + file: get_thumbnail_response.file, + content_type: get_thumbnail_response.content_type, + cross_origin_resource_policy: Some("cross-origin".to_owned()), + }) } else { Err(Error::BadRequest(ErrorKind::NotFound, "Media not found.")) } diff --git a/src/service/media.rs b/src/service/media.rs index 9d43d607..f5127842 100644 --- a/src/service/media.rs +++ b/src/service/media.rs @@ -13,7 +13,13 @@ mod data; pub(crate) use data::Data; pub(crate) struct FileMeta { + // This gets written to the database but we no longer read it + // + // TODO: Write a database migration to get rid of this and instead store + // only the filename instead of the entire `Content-Disposition` header. + #[allow(dead_code)] pub(crate) content_disposition: Option, + pub(crate) content_type: Option, pub(crate) file: Vec, }