add copy_dir helper function

This commit is contained in:
Benjamin Lee 2024-09-09 18:19:12 -07:00
parent d7dedb8f10
commit a2ed21f1c2
No known key found for this signature in database
GPG key ID: FB9624E2885D55A4
4 changed files with 371 additions and 1 deletions

20
Cargo.lock generated
View file

@ -640,6 +640,12 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "fastrand"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6"
[[package]] [[package]]
name = "fdeflate" name = "fdeflate"
version = "0.3.4" version = "0.3.4"
@ -843,6 +849,7 @@ dependencies = [
"serde_yaml", "serde_yaml",
"sha-1", "sha-1",
"strum", "strum",
"tempfile",
"thiserror", "thiserror",
"thread_local", "thread_local",
"tikv-jemallocator", "tikv-jemallocator",
@ -2782,6 +2789,19 @@ dependencies = [
"futures-core", "futures-core",
] ]
[[package]]
name = "tempfile"
version = "3.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64"
dependencies = [
"cfg-if",
"fastrand",
"once_cell",
"rustix",
"windows-sys 0.59.0",
]
[[package]] [[package]]
name = "terminal_size" name = "terminal_size"
version = "0.3.0" version = "0.3.0"

View file

@ -154,3 +154,6 @@ jemalloc = ["dep:tikv-jemallocator"]
rocksdb = ["dep:rocksdb"] rocksdb = ["dep:rocksdb"]
sqlite = ["dep:rusqlite", "dep:parking_lot", "tokio/signal"] sqlite = ["dep:rusqlite", "dep:parking_lot", "tokio/signal"]
systemd = ["dep:sd-notify"] systemd = ["dep:sd-notify"]
[dev-dependencies]
tempfile = "3.12.0"

View file

@ -85,6 +85,50 @@ pub(crate) enum ServerNameChanged {
Renamed, Renamed,
} }
/// Errors copying a directory recursively.
///
/// Returned by the [`crate::utils::copy_dir`] function.
// Missing docs are allowed here since that kind of information should be
// encoded in the error messages themselves anyway.
#[allow(missing_docs)]
#[derive(Error, Debug)]
pub(crate) enum CopyDir {
#[error("source and destination paths overlap")]
Overlap,
#[error("destination path already exists")]
AlreadyExists,
#[error("failed to canonicalize source path to check for overlap")]
CanonicalizeIn(#[source] std::io::Error),
#[error("failed to canonicalize destination path to check for overlap")]
CanonicalizeOut(#[source] std::io::Error),
#[error("failed to check whether destination path exists")]
CheckExists(#[source] std::io::Error),
#[error("failed to create destination directory at {}", _0.display())]
CreateDir(PathBuf, #[source] std::io::Error),
#[error("failed to read contents of directory at {}", _0.display())]
ReadDir(PathBuf, #[source] std::io::Error),
#[error("failed to read file metadata at {}", _0.display())]
Metadata(PathBuf, #[source] std::io::Error),
#[error("failed to copy file from {} to {}", from.display(), to.display())]
CopyFile {
from: PathBuf,
to: PathBuf,
#[source]
error: std::io::Error,
},
#[error("source directory contains a symlink at {}. Refusing to copy.", _0.display())]
Symlink(PathBuf),
}
/// Observability initialization errors /// Observability initialization errors
// Missing docs are allowed here since that kind of information should be // Missing docs are allowed here since that kind of information should be
// encoded in the error messages themselves anyway. // encoded in the error messages themselves anyway.

View file

@ -6,6 +6,8 @@ use std::{
borrow::Cow, borrow::Cow,
cmp, fmt, cmp, fmt,
fmt::Write, fmt::Write,
io,
path::{Component, Path, PathBuf},
str::FromStr, str::FromStr,
time::{SystemTime, UNIX_EPOCH}, time::{SystemTime, UNIX_EPOCH},
}; };
@ -18,6 +20,7 @@ use ruma::{
api::client::error::ErrorKind, canonical_json::try_from_json_map, api::client::error::ErrorKind, canonical_json::try_from_json_map,
CanonicalJsonError, CanonicalJsonObject, MxcUri, MxcUriError, OwnedMxcUri, CanonicalJsonError, CanonicalJsonObject, MxcUri, MxcUriError, OwnedMxcUri,
}; };
use tokio::fs;
use crate::{Error, Result}; use crate::{Error, Result};
@ -379,9 +382,165 @@ pub(crate) fn u8_slice_to_hex(slice: &[u8]) -> String {
}) })
} }
/// Canonicalize a path where some components may not exist yet.
///
/// It's assumed that non-existent components will be created as
/// directories. This should match the result of [`fs::canonicalize`]
/// _after_ calling [`fs::create_dir_all`] on `path`.
async fn partial_canonicalize(path: &Path) -> io::Result<PathBuf> {
let mut ret = std::env::current_dir()?;
let mut base_path = Cow::Borrowed(path);
let mut components = base_path.components();
while let Some(component) = components.next() {
match component {
Component::Prefix(_) | Component::RootDir => {
let component_path: &Path = component.as_ref();
component_path.clone_into(&mut ret);
}
Component::CurDir => (),
Component::ParentDir => {
ret.pop();
}
Component::Normal(p) => {
let component_path = ret.join(p);
match fs::symlink_metadata(&component_path).await {
// path is a symlink
Ok(metadata) if metadata.is_symlink() => {
let destination =
fs::read_link(&component_path).await?;
// iterate over the symlink destination components
// before continuing with the original path
base_path =
Cow::Owned(destination.join(components.as_path()));
components = base_path.components();
}
// path exists, not a symlink
Ok(_) => {
ret.push(p);
}
// path does not exist
Err(error) if error.kind() == io::ErrorKind::NotFound => {
// assume a directory will be created here
ret.push(p);
}
Err(error) => return Err(error),
}
}
}
}
Ok(ret)
}
/// Recursively copy a directory from `root_in` to `root_out`.
///
/// This function is not protected against symlink-swapping attacks. Do not use
/// it when any subdirectories or parents of `root_in` or `root_out` may be
/// writable by an untrusted user.
///
/// If `root_in` and `root_out` are the same path, an error is returned.
///
/// If a directory or file already exists at `root_out`, an error is returned.
///
/// If the parent directories of the `root_out` path do not exist, they will be
/// created.
///
/// If an error occurs, the copy will be interrupted, and the output directory
/// may be left in an intermediate state. If this is undesirable, the caller
/// should delete the output directory on an error.
///
/// If the `root_in` directory contains a symlink, aborts the copy and returns
/// an [`crate::error::CopyDir::Symlink`].
pub(crate) async fn copy_dir(
root_in: &Path,
root_out: &Path,
) -> Result<(), crate::error::CopyDir> {
use crate::error::CopyDir as Error;
let root_in =
fs::canonicalize(root_in).await.map_err(Error::CanonicalizeIn)?;
let root_out =
partial_canonicalize(root_out).await.map_err(Error::CanonicalizeOut)?;
if root_in.starts_with(&root_out) || root_out.starts_with(&root_in) {
return Err(Error::Overlap);
}
if fs::try_exists(&root_out).await.map_err(Error::CheckExists)? {
return Err(Error::AlreadyExists);
}
if let Some(parent) = root_out.parent() {
fs::create_dir_all(parent)
.await
.map_err(|e| Error::CreateDir(parent.to_owned(), e))?;
}
// Call 'create_dir' separately for the last dir so that we get an error if
// it already exists. 'try_exists' doesn't fully check for this case
// because TOCTOU.
fs::create_dir(&root_out)
.await
.map_err(|e| Error::CreateDir(root_out.clone(), e))?;
let mut todo = vec![PathBuf::from(".")];
while let Some(path) = todo.pop() {
let dir_in = root_in.join(&path);
let dir_out = root_out.join(&path);
let mut entries = fs::read_dir(&dir_in)
.await
.map_err(|e| Error::ReadDir(dir_in.clone(), e))?;
while let Some(entry) = entries
.next_entry()
.await
.map_err(|e| Error::ReadDir(dir_in.clone(), e))?
{
let entry_in = dir_in.join(entry.file_name());
let entry_out = dir_out.join(entry.file_name());
let file_type = entry
.file_type()
.await
.map_err(|e| Error::Metadata(entry_in.clone(), e))?;
if file_type.is_dir() {
fs::create_dir(&entry_out)
.await
.map_err(|e| Error::CreateDir(entry_out.clone(), e))?;
todo.push(path.join(entry.file_name()));
} else if file_type.is_symlink() {
return Err(Error::Symlink(entry_in));
} else {
fs::copy(&entry_in, &entry_out).await.map_err(|error| {
Error::CopyFile {
from: entry_in.clone(),
to: entry_out.clone(),
error,
}
})?;
}
}
}
Ok(())
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::utils::dbg_truncate_str; use std::{
collections::HashMap,
io,
path::{Path, PathBuf},
};
use tempfile::TempDir;
use tokio::fs;
use crate::{
error,
utils::{copy_dir, dbg_truncate_str, partial_canonicalize},
};
#[test] #[test]
fn test_truncate_str() { fn test_truncate_str() {
@ -395,4 +554,148 @@ mod tests {
assert_eq!(dbg_truncate_str(ok_hand, ok_hand.len() - 1), "👌🏽"); assert_eq!(dbg_truncate_str(ok_hand, ok_hand.len() - 1), "👌🏽");
assert_eq!(dbg_truncate_str(ok_hand, ok_hand.len()), "👌🏽"); assert_eq!(dbg_truncate_str(ok_hand, ok_hand.len()), "👌🏽");
} }
#[tokio::test]
async fn test_partial_canonicalize() {
let tmp_dir =
TempDir::with_prefix("test_partial_canonicalize").unwrap();
let path = tmp_dir.path();
fs::create_dir(&path.join("dir")).await.unwrap();
fs::symlink(path.join("dir"), path.join("absolute-link-to-dir"))
.await
.unwrap();
fs::symlink("./dir", path.join("relative-link-to-dir")).await.unwrap();
assert_eq!(partial_canonicalize(path).await.unwrap(), path);
assert_eq!(partial_canonicalize(&path.join("./")).await.unwrap(), path);
assert_eq!(
partial_canonicalize(&path.join("dir/..")).await.unwrap(),
path
);
assert_eq!(
partial_canonicalize(&path.join("absolute-link-to-dir"))
.await
.unwrap(),
path.join("dir")
);
assert_eq!(
partial_canonicalize(&path.join("relative-link-to-dir"))
.await
.unwrap(),
path.join("dir")
);
assert_eq!(
partial_canonicalize(&path.join("absolute-link-to-dir/new-dir"))
.await
.unwrap(),
path.join("dir/new-dir")
);
assert_eq!(
partial_canonicalize(
&path.join("absolute-link-to-dir/new-dir/../..")
)
.await
.unwrap(),
path,
);
tmp_dir.close().unwrap();
}
#[derive(Clone, Debug, Eq, PartialEq)]
enum PathContents {
Dir,
Symlink(PathBuf),
File(Vec<u8>),
}
async fn dir_contents(
root: &Path,
) -> io::Result<HashMap<PathBuf, PathContents>> {
let mut ret = HashMap::new();
let mut todo = vec![root.to_owned()];
while let Some(path) = todo.pop() {
let metadata = fs::symlink_metadata(&path).await?;
let contents = if metadata.is_file() {
PathContents::File(fs::read(&path).await?)
} else if metadata.is_dir() {
let mut entries = fs::read_dir(&path).await?;
while let Some(entry) = entries.next_entry().await? {
todo.push(entry.path());
}
PathContents::Dir
} else if metadata.is_symlink() {
PathContents::Symlink(fs::read_link(&path).await?)
} else {
continue;
};
ret.insert(path.strip_prefix(root).unwrap().to_owned(), contents);
}
Ok(ret)
}
#[tokio::test]
async fn test_copy_dir_simple() {
let tmp_dir = TempDir::with_prefix("test_copy_dir_simple").unwrap();
let path = tmp_dir.path();
fs::create_dir(&path.join("src")).await.unwrap();
fs::create_dir(&path.join("src/subdir")).await.unwrap();
fs::create_dir(&path.join("src/empty-subdir")).await.unwrap();
fs::write(&path.join("src/a.txt"), b"foo").await.unwrap();
fs::write(&path.join("src/subdir/b.txt"), b"bar").await.unwrap();
copy_dir(&path.join("src"), &path.join("dst")).await.unwrap();
let src_contents = dir_contents(&path.join("src")).await.unwrap();
let dst_contents = dir_contents(&path.join("dst")).await.unwrap();
assert_eq!(src_contents, dst_contents);
tmp_dir.close().unwrap();
}
#[tokio::test]
async fn test_copy_dir_overlap_error() {
let tmp_dir =
TempDir::with_prefix("test_copy_dir_overlap_error").unwrap();
let path = tmp_dir.path();
fs::create_dir(&path.join("src")).await.unwrap();
assert!(matches!(
copy_dir(&path.join("src"), &path.join("src/dst")).await,
Err(error::CopyDir::Overlap)
));
assert!(matches!(
copy_dir(&path.join("src"), &path.join("src")).await,
Err(error::CopyDir::Overlap)
));
assert!(matches!(
copy_dir(&path.join("src"), path).await,
Err(error::CopyDir::Overlap)
));
tmp_dir.close().unwrap();
}
#[tokio::test]
async fn test_copy_dir_symlink_error() {
let tmp_dir =
TempDir::with_prefix("test_copy_dir_overlap_error").unwrap();
let path = tmp_dir.path();
fs::create_dir(&path.join("src")).await.unwrap();
fs::symlink("./link-target", &path.join("src/link")).await.unwrap();
assert!(matches!(
copy_dir(&path.join("src"), &path.join("dst")).await,
Err(error::CopyDir::Symlink(p)) if p == path.join("src/link")
));
tmp_dir.close().unwrap();
}
} }