diff --git a/Cargo.lock b/Cargo.lock index caff42d9..43e1dd48 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -640,6 +640,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +[[package]] +name = "fastrand" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" + [[package]] name = "fdeflate" version = "0.3.4" @@ -843,6 +849,7 @@ dependencies = [ "serde_yaml", "sha-1", "strum", + "tempfile", "thiserror", "thread_local", "tikv-jemallocator", @@ -2782,6 +2789,19 @@ dependencies = [ "futures-core", ] +[[package]] +name = "tempfile" +version = "3.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" +dependencies = [ + "cfg-if", + "fastrand", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + [[package]] name = "terminal_size" version = "0.3.0" diff --git a/Cargo.toml b/Cargo.toml index 7904770e..f372bb79 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -154,3 +154,6 @@ jemalloc = ["dep:tikv-jemallocator"] rocksdb = ["dep:rocksdb"] sqlite = ["dep:rusqlite", "dep:parking_lot", "tokio/signal"] systemd = ["dep:sd-notify"] + +[dev-dependencies] +tempfile = "3.12.0" diff --git a/src/error.rs b/src/error.rs index 106484df..a0955b30 100644 --- a/src/error.rs +++ b/src/error.rs @@ -85,6 +85,50 @@ pub(crate) enum ServerNameChanged { Renamed, } +/// Errors copying a directory recursively. +/// +/// Returned by the [`crate::utils::copy_dir`] function. +// Missing docs are allowed here since that kind of information should be +// encoded in the error messages themselves anyway. +#[allow(missing_docs)] +#[derive(Error, Debug)] +pub(crate) enum CopyDir { + #[error("source and destination paths overlap")] + Overlap, + + #[error("destination path already exists")] + AlreadyExists, + + #[error("failed to canonicalize source path to check for overlap")] + CanonicalizeIn(#[source] std::io::Error), + + #[error("failed to canonicalize destination path to check for overlap")] + CanonicalizeOut(#[source] std::io::Error), + + #[error("failed to check whether destination path exists")] + CheckExists(#[source] std::io::Error), + + #[error("failed to create destination directory at {}", _0.display())] + CreateDir(PathBuf, #[source] std::io::Error), + + #[error("failed to read contents of directory at {}", _0.display())] + ReadDir(PathBuf, #[source] std::io::Error), + + #[error("failed to read file metadata at {}", _0.display())] + Metadata(PathBuf, #[source] std::io::Error), + + #[error("failed to copy file from {} to {}", from.display(), to.display())] + CopyFile { + from: PathBuf, + to: PathBuf, + #[source] + error: std::io::Error, + }, + + #[error("source directory contains a symlink at {}. Refusing to copy.", _0.display())] + Symlink(PathBuf), +} + /// Observability initialization errors // Missing docs are allowed here since that kind of information should be // encoded in the error messages themselves anyway. diff --git a/src/utils.rs b/src/utils.rs index c9cafa56..5356cbd9 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -6,6 +6,8 @@ use std::{ borrow::Cow, cmp, fmt, fmt::Write, + io, + path::{Component, Path, PathBuf}, str::FromStr, time::{SystemTime, UNIX_EPOCH}, }; @@ -18,6 +20,7 @@ use ruma::{ api::client::error::ErrorKind, canonical_json::try_from_json_map, CanonicalJsonError, CanonicalJsonObject, MxcUri, MxcUriError, OwnedMxcUri, }; +use tokio::fs; use crate::{Error, Result}; @@ -379,9 +382,165 @@ pub(crate) fn u8_slice_to_hex(slice: &[u8]) -> String { }) } +/// Canonicalize a path where some components may not exist yet. +/// +/// It's assumed that non-existent components will be created as +/// directories. This should match the result of [`fs::canonicalize`] +/// _after_ calling [`fs::create_dir_all`] on `path`. +async fn partial_canonicalize(path: &Path) -> io::Result { + let mut ret = std::env::current_dir()?; + + let mut base_path = Cow::Borrowed(path); + let mut components = base_path.components(); + + while let Some(component) = components.next() { + match component { + Component::Prefix(_) | Component::RootDir => { + let component_path: &Path = component.as_ref(); + component_path.clone_into(&mut ret); + } + Component::CurDir => (), + Component::ParentDir => { + ret.pop(); + } + Component::Normal(p) => { + let component_path = ret.join(p); + match fs::symlink_metadata(&component_path).await { + // path is a symlink + Ok(metadata) if metadata.is_symlink() => { + let destination = + fs::read_link(&component_path).await?; + // iterate over the symlink destination components + // before continuing with the original path + base_path = + Cow::Owned(destination.join(components.as_path())); + components = base_path.components(); + } + // path exists, not a symlink + Ok(_) => { + ret.push(p); + } + // path does not exist + Err(error) if error.kind() == io::ErrorKind::NotFound => { + // assume a directory will be created here + ret.push(p); + } + Err(error) => return Err(error), + } + } + } + } + + Ok(ret) +} + +/// Recursively copy a directory from `root_in` to `root_out`. +/// +/// This function is not protected against symlink-swapping attacks. Do not use +/// it when any subdirectories or parents of `root_in` or `root_out` may be +/// writable by an untrusted user. +/// +/// If `root_in` and `root_out` are the same path, an error is returned. +/// +/// If a directory or file already exists at `root_out`, an error is returned. +/// +/// If the parent directories of the `root_out` path do not exist, they will be +/// created. +/// +/// If an error occurs, the copy will be interrupted, and the output directory +/// may be left in an intermediate state. If this is undesirable, the caller +/// should delete the output directory on an error. +/// +/// If the `root_in` directory contains a symlink, aborts the copy and returns +/// an [`crate::error::CopyDir::Symlink`]. +pub(crate) async fn copy_dir( + root_in: &Path, + root_out: &Path, +) -> Result<(), crate::error::CopyDir> { + use crate::error::CopyDir as Error; + + let root_in = + fs::canonicalize(root_in).await.map_err(Error::CanonicalizeIn)?; + let root_out = + partial_canonicalize(root_out).await.map_err(Error::CanonicalizeOut)?; + + if root_in.starts_with(&root_out) || root_out.starts_with(&root_in) { + return Err(Error::Overlap); + } + if fs::try_exists(&root_out).await.map_err(Error::CheckExists)? { + return Err(Error::AlreadyExists); + } + + if let Some(parent) = root_out.parent() { + fs::create_dir_all(parent) + .await + .map_err(|e| Error::CreateDir(parent.to_owned(), e))?; + } + // Call 'create_dir' separately for the last dir so that we get an error if + // it already exists. 'try_exists' doesn't fully check for this case + // because TOCTOU. + fs::create_dir(&root_out) + .await + .map_err(|e| Error::CreateDir(root_out.clone(), e))?; + + let mut todo = vec![PathBuf::from(".")]; + + while let Some(path) = todo.pop() { + let dir_in = root_in.join(&path); + let dir_out = root_out.join(&path); + + let mut entries = fs::read_dir(&dir_in) + .await + .map_err(|e| Error::ReadDir(dir_in.clone(), e))?; + while let Some(entry) = entries + .next_entry() + .await + .map_err(|e| Error::ReadDir(dir_in.clone(), e))? + { + let entry_in = dir_in.join(entry.file_name()); + let entry_out = dir_out.join(entry.file_name()); + let file_type = entry + .file_type() + .await + .map_err(|e| Error::Metadata(entry_in.clone(), e))?; + + if file_type.is_dir() { + fs::create_dir(&entry_out) + .await + .map_err(|e| Error::CreateDir(entry_out.clone(), e))?; + todo.push(path.join(entry.file_name())); + } else if file_type.is_symlink() { + return Err(Error::Symlink(entry_in)); + } else { + fs::copy(&entry_in, &entry_out).await.map_err(|error| { + Error::CopyFile { + from: entry_in.clone(), + to: entry_out.clone(), + error, + } + })?; + } + } + } + + Ok(()) +} + #[cfg(test)] mod tests { - use crate::utils::dbg_truncate_str; + use std::{ + collections::HashMap, + io, + path::{Path, PathBuf}, + }; + + use tempfile::TempDir; + use tokio::fs; + + use crate::{ + error, + utils::{copy_dir, dbg_truncate_str, partial_canonicalize}, + }; #[test] fn test_truncate_str() { @@ -395,4 +554,148 @@ mod tests { assert_eq!(dbg_truncate_str(ok_hand, ok_hand.len() - 1), "👌🏽"); assert_eq!(dbg_truncate_str(ok_hand, ok_hand.len()), "👌🏽"); } + + #[tokio::test] + async fn test_partial_canonicalize() { + let tmp_dir = + TempDir::with_prefix("test_partial_canonicalize").unwrap(); + let path = tmp_dir.path(); + + fs::create_dir(&path.join("dir")).await.unwrap(); + fs::symlink(path.join("dir"), path.join("absolute-link-to-dir")) + .await + .unwrap(); + fs::symlink("./dir", path.join("relative-link-to-dir")).await.unwrap(); + + assert_eq!(partial_canonicalize(path).await.unwrap(), path); + assert_eq!(partial_canonicalize(&path.join("./")).await.unwrap(), path); + assert_eq!( + partial_canonicalize(&path.join("dir/..")).await.unwrap(), + path + ); + assert_eq!( + partial_canonicalize(&path.join("absolute-link-to-dir")) + .await + .unwrap(), + path.join("dir") + ); + assert_eq!( + partial_canonicalize(&path.join("relative-link-to-dir")) + .await + .unwrap(), + path.join("dir") + ); + assert_eq!( + partial_canonicalize(&path.join("absolute-link-to-dir/new-dir")) + .await + .unwrap(), + path.join("dir/new-dir") + ); + assert_eq!( + partial_canonicalize( + &path.join("absolute-link-to-dir/new-dir/../..") + ) + .await + .unwrap(), + path, + ); + + tmp_dir.close().unwrap(); + } + + #[derive(Clone, Debug, Eq, PartialEq)] + enum PathContents { + Dir, + Symlink(PathBuf), + File(Vec), + } + + async fn dir_contents( + root: &Path, + ) -> io::Result> { + let mut ret = HashMap::new(); + + let mut todo = vec![root.to_owned()]; + + while let Some(path) = todo.pop() { + let metadata = fs::symlink_metadata(&path).await?; + let contents = if metadata.is_file() { + PathContents::File(fs::read(&path).await?) + } else if metadata.is_dir() { + let mut entries = fs::read_dir(&path).await?; + while let Some(entry) = entries.next_entry().await? { + todo.push(entry.path()); + } + PathContents::Dir + } else if metadata.is_symlink() { + PathContents::Symlink(fs::read_link(&path).await?) + } else { + continue; + }; + ret.insert(path.strip_prefix(root).unwrap().to_owned(), contents); + } + + Ok(ret) + } + + #[tokio::test] + async fn test_copy_dir_simple() { + let tmp_dir = TempDir::with_prefix("test_copy_dir_simple").unwrap(); + let path = tmp_dir.path(); + + fs::create_dir(&path.join("src")).await.unwrap(); + fs::create_dir(&path.join("src/subdir")).await.unwrap(); + fs::create_dir(&path.join("src/empty-subdir")).await.unwrap(); + fs::write(&path.join("src/a.txt"), b"foo").await.unwrap(); + fs::write(&path.join("src/subdir/b.txt"), b"bar").await.unwrap(); + + copy_dir(&path.join("src"), &path.join("dst")).await.unwrap(); + + let src_contents = dir_contents(&path.join("src")).await.unwrap(); + let dst_contents = dir_contents(&path.join("dst")).await.unwrap(); + assert_eq!(src_contents, dst_contents); + + tmp_dir.close().unwrap(); + } + + #[tokio::test] + async fn test_copy_dir_overlap_error() { + let tmp_dir = + TempDir::with_prefix("test_copy_dir_overlap_error").unwrap(); + let path = tmp_dir.path(); + + fs::create_dir(&path.join("src")).await.unwrap(); + + assert!(matches!( + copy_dir(&path.join("src"), &path.join("src/dst")).await, + Err(error::CopyDir::Overlap) + )); + assert!(matches!( + copy_dir(&path.join("src"), &path.join("src")).await, + Err(error::CopyDir::Overlap) + )); + assert!(matches!( + copy_dir(&path.join("src"), path).await, + Err(error::CopyDir::Overlap) + )); + + tmp_dir.close().unwrap(); + } + + #[tokio::test] + async fn test_copy_dir_symlink_error() { + let tmp_dir = + TempDir::with_prefix("test_copy_dir_overlap_error").unwrap(); + let path = tmp_dir.path(); + + fs::create_dir(&path.join("src")).await.unwrap(); + fs::symlink("./link-target", &path.join("src/link")).await.unwrap(); + + assert!(matches!( + copy_dir(&path.join("src"), &path.join("dst")).await, + Err(error::CopyDir::Symlink(p)) if p == path.join("src/link") + )); + + tmp_dir.close().unwrap(); + } }