Files
continuwuity/src/service/media/mod.rs
T

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

463 lines
14 KiB
Rust
Raw Normal View History

2022-09-07 13:25:51 +02:00
mod data;
2024-06-23 20:48:40 +00:00
mod tests;
2022-10-09 15:34:36 +02:00
2024-07-07 04:46:16 +00:00
use std::{collections::HashMap, io::Cursor, num::Saturating as Sat, path::PathBuf, sync::Arc, time::SystemTime};
2024-06-23 20:48:40 +00:00
2024-07-13 07:05:51 +00:00
use async_trait::async_trait;
2024-06-23 20:48:40 +00:00
use base64::{engine::general_purpose, Engine as _};
2024-07-07 04:46:16 +00:00
use conduit::{checked, debug, debug_error, error, utils, Error, Result, Server};
2024-05-26 21:29:19 +00:00
use data::Data;
2022-10-09 15:34:36 +02:00
use image::imageops::FilterType;
2024-03-17 01:42:30 -04:00
use ruma::{OwnedMxcUri, OwnedUserId};
2024-02-09 23:16:06 -05:00
use serde::Serialize;
use tokio::{
2024-06-23 20:48:40 +00:00
fs,
2023-06-25 19:31:40 +02:00
io::{AsyncReadExt, AsyncWriteExt, BufReader},
sync::{Mutex, RwLock},
2024-03-05 19:48:54 -05:00
};
2020-10-19 15:29:36 +02:00
2024-06-23 20:48:40 +00:00
use crate::services;
2020-05-18 17:53:34 +02:00
2024-02-21 20:34:11 -05:00
#[derive(Debug)]
2024-05-09 15:59:08 -07:00
pub struct FileMeta {
#[allow(dead_code)]
2024-05-09 15:59:08 -07:00
pub content_disposition: Option<String>,
pub content_type: Option<String>,
pub file: Vec<u8>,
}
2024-02-09 23:16:06 -05:00
#[derive(Serialize, Default)]
2024-05-09 15:59:08 -07:00
pub struct UrlPreviewData {
2024-02-09 23:16:06 -05:00
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:title"))]
2024-05-09 15:59:08 -07:00
pub title: Option<String>,
2024-02-09 23:16:06 -05:00
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:description"))]
2024-05-09 15:59:08 -07:00
pub description: Option<String>,
2024-02-09 23:16:06 -05:00
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:image"))]
2024-05-09 15:59:08 -07:00
pub image: Option<String>,
2024-02-09 23:16:06 -05:00
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "matrix:image:size"))]
2024-05-09 15:59:08 -07:00
pub image_size: Option<usize>,
2024-02-09 23:16:06 -05:00
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:image:width"))]
2024-05-09 15:59:08 -07:00
pub image_width: Option<u32>,
2024-02-09 23:16:06 -05:00
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:image:height"))]
2024-05-09 15:59:08 -07:00
pub image_height: Option<u32>,
2024-02-09 23:16:06 -05:00
}
2024-05-09 15:59:08 -07:00
pub struct Service {
2024-06-23 20:48:40 +00:00
server: Arc<Server>,
2024-06-28 22:51:39 +00:00
pub(crate) db: Data,
2024-05-09 15:59:08 -07:00
pub url_preview_mutex: RwLock<HashMap<String, Arc<Mutex<()>>>>,
2020-05-18 17:53:34 +02:00
}
2024-07-13 07:05:51 +00:00
#[async_trait]
2024-07-04 03:26:19 +00:00
impl crate::Service for Service {
fn build(args: crate::Args<'_>) -> Result<Arc<Self>> {
Ok(Arc::new(Self {
server: args.server.clone(),
db: Data::new(args.db),
2024-06-23 20:48:40 +00:00
url_preview_mutex: RwLock::new(HashMap::new()),
2024-07-04 03:26:19 +00:00
}))
2024-06-23 20:48:40 +00:00
}
2024-07-13 07:05:51 +00:00
async fn worker(self: Arc<Self>) -> Result<()> {
self.create_media_dir().await?;
Ok(())
}
2024-07-04 03:26:19 +00:00
fn name(&self) -> &str { crate::service::make_name(std::module_path!()) }
}
impl Service {
2021-06-04 08:06:12 +04:30
/// Uploads a file.
2024-05-09 15:59:08 -07:00
pub async fn create(
2024-06-28 23:23:59 +00:00
&self, sender_user: Option<OwnedUserId>, mxc: &str, content_disposition: Option<&str>,
2024-03-17 01:42:30 -04:00
content_type: Option<&str>, file: &[u8],
2020-05-18 17:53:34 +02:00
) -> Result<()> {
2022-09-07 13:25:51 +02:00
// Width, Height = 0 if it's not a thumbnail
2024-03-17 01:42:30 -04:00
let key = if let Some(user) = sender_user {
2024-03-25 17:05:11 -04:00
self.db
.create_file_metadata(Some(user.as_str()), mxc, 0, 0, content_disposition, content_type)?
2024-03-17 01:42:30 -04:00
} else {
2024-03-25 17:05:11 -04:00
self.db
.create_file_metadata(None, mxc, 0, 0, content_disposition, content_type)?
2024-03-17 01:42:30 -04:00
};
2024-03-05 19:48:54 -05:00
2024-06-23 20:48:40 +00:00
//TODO: Dangling metadata in database if creation fails
let mut f = self.create_media_file(&key).await?;
2021-06-04 08:06:12 +04:30
f.write_all(file).await?;
2020-05-18 17:53:34 +02:00
Ok(())
}
2024-03-05 19:48:54 -05:00
2024-02-21 20:34:11 -05:00
/// Deletes a file in the database and from the media directory via an MXC
2024-06-28 23:23:59 +00:00
pub async fn delete(&self, mxc: &str) -> Result<()> {
if let Ok(keys) = self.db.search_mxc_metadata_prefix(mxc) {
2024-02-22 00:08:08 -05:00
for key in keys {
2024-06-23 20:48:40 +00:00
self.remove_media_file(&key).await?;
2024-03-05 19:48:54 -05:00
2024-02-22 00:08:08 -05:00
debug!("Deleting MXC {mxc} from database");
2024-06-28 23:23:59 +00:00
self.db.delete_file_mxc(mxc)?;
2024-02-21 20:34:11 -05:00
}
2024-03-05 19:48:54 -05:00
2024-02-22 00:08:08 -05:00
Ok(())
2024-02-21 20:34:11 -05:00
} else {
2024-02-22 00:08:08 -05:00
error!("Failed to find any media keys for MXC \"{mxc}\" in our database (MXC does not exist)");
Err(Error::bad_database(
"Failed to find any media keys for the provided MXC in our database (MXC does not exist)",
))
2024-03-05 19:48:54 -05:00
}
2024-02-21 20:34:11 -05:00
}
2024-03-05 19:48:54 -05:00
/// Uploads or replaces a file thumbnail.
2024-03-17 01:42:30 -04:00
#[allow(clippy::too_many_arguments)]
2024-05-09 15:59:08 -07:00
pub async fn upload_thumbnail(
2024-06-28 23:23:59 +00:00
&self, sender_user: Option<OwnedUserId>, mxc: &str, content_disposition: Option<&str>,
2024-03-17 01:42:30 -04:00
content_type: Option<&str>, width: u32, height: u32, file: &[u8],
) -> Result<()> {
2024-03-17 01:42:30 -04:00
let key = if let Some(user) = sender_user {
2024-03-25 17:05:11 -04:00
self.db
.create_file_metadata(Some(user.as_str()), mxc, width, height, content_disposition, content_type)?
2024-03-17 01:42:30 -04:00
} else {
2024-03-25 17:05:11 -04:00
self.db
.create_file_metadata(None, mxc, width, height, content_disposition, content_type)?
2024-03-17 01:42:30 -04:00
};
2024-06-23 20:48:40 +00:00
//TODO: Dangling metadata in database if creation fails
let mut f = self.create_media_file(&key).await?;
2021-06-04 08:06:12 +04:30
f.write_all(file).await?;
2024-03-05 19:48:54 -05:00
Ok(())
}
2024-03-05 19:48:54 -05:00
2020-05-18 17:53:34 +02:00
/// Downloads a file.
2024-06-28 23:23:59 +00:00
pub async fn get(&self, mxc: &str) -> Result<Option<FileMeta>> {
2022-10-05 20:34:31 +02:00
if let Ok((content_disposition, content_type, key)) = self.db.search_file_metadata(mxc, 0, 0) {
2021-06-08 20:53:24 +04:30
let mut file = Vec::new();
2024-06-23 20:48:40 +00:00
let path = self.get_media_file(&key);
BufReader::new(fs::File::open(path).await?)
2024-03-25 17:05:11 -04:00
.read_to_end(&mut file)
.await?;
2024-03-05 19:48:54 -05:00
Ok(Some(FileMeta {
content_disposition,
content_type,
2021-06-04 08:06:12 +04:30
file,
}))
2020-05-19 18:31:34 +02:00
} else {
Ok(None)
2024-03-05 19:48:54 -05:00
}
2020-05-19 18:31:34 +02:00
}
2024-03-05 19:48:54 -05:00
/// Deletes all remote only media files in the given at or after
/// time/duration. Returns a u32 with the amount of media files deleted.
pub async fn delete_all_remote_media_at_after_time(&self, time: String, force: bool) -> Result<usize> {
let all_keys = self.db.get_all_media_keys();
let user_duration: SystemTime = match cyborgtime::parse_duration(&time) {
Ok(duration) => {
debug!("Parsed duration: {:?}", duration);
debug!("System time now: {:?}", SystemTime::now());
SystemTime::now().checked_sub(duration).ok_or_else(|| {
Error::bad_database("Duration specified is not valid against the current system time")
})?
},
Err(e) => {
error!("Failed to parse user-specified time duration: {}", e);
return Err(Error::bad_database("Failed to parse user-specified time duration."));
},
};
2024-03-05 19:48:54 -05:00
let mut remote_mxcs: Vec<String> = vec![];
for key in all_keys {
debug!("Full MXC key from database: {:?}", key);
// we need to get the MXC URL from the first part of the key (the first 0xff /
// 255 push). this is all necessary because of conduit using magic keys for
// media
let mut parts = key.split(|&b| b == 0xFF);
let mxc = parts
.next()
.map(|bytes| {
utils::string_from_bytes(bytes).map_err(|e| {
error!("Failed to parse MXC unicode bytes from our database: {}", e);
Error::bad_database("Failed to parse MXC unicode bytes from our database")
})
})
.transpose()?;
2024-03-05 19:48:54 -05:00
let Some(mxc_s) = mxc else {
return Err(Error::bad_database(
"Parsed MXC URL unicode bytes from database but still is None",
));
};
2024-03-05 19:48:54 -05:00
debug!("Parsed MXC key to URL: {}", mxc_s);
2024-03-05 19:48:54 -05:00
let mxc = OwnedMxcUri::from(mxc_s);
if mxc.server_name() == Ok(services().globals.server_name()) {
debug!("Ignoring local media MXC: {}", mxc);
// ignore our own MXC URLs as this would be local media.
continue;
}
2024-03-05 19:48:54 -05:00
2024-06-23 20:48:40 +00:00
let path = self.get_media_file(&key);
debug!("MXC path: {path:?}");
2024-03-05 19:48:54 -05:00
let file_metadata = fs::metadata(path.clone()).await?;
2024-06-23 20:48:40 +00:00
debug!("File metadata: {file_metadata:?}");
2024-03-05 19:48:54 -05:00
let file_created_at = match file_metadata.created() {
Ok(value) => value,
Err(err) if err.kind() == std::io::ErrorKind::Unsupported => {
debug!("btime is unsupported, using mtime instead");
file_metadata.modified()?
},
Err(err) => {
if force {
error!("Could not delete MXC path {:?}: {:?}. Skipping...", path, err);
continue;
}
return Err(err.into());
},
};
debug!("File created at: {:?}", file_created_at);
2024-03-05 19:48:54 -05:00
if file_created_at <= user_duration {
debug!("File is within user duration, pushing to list of file paths and keys to delete.");
remote_mxcs.push(mxc.to_string());
}
}
2024-03-05 19:48:54 -05:00
debug!(
"Finished going through all our media in database for eligible keys to delete, checking if these are empty"
);
2024-03-05 19:48:54 -05:00
if remote_mxcs.is_empty() {
return Err(Error::bad_database("Did not found any eligible MXCs to delete."));
}
2024-03-05 19:48:54 -05:00
debug!("Deleting media now in the past \"{:?}\".", user_duration);
2024-03-05 19:48:54 -05:00
let mut deletion_count: usize = 0;
2024-03-05 19:48:54 -05:00
for mxc in remote_mxcs {
debug!("Deleting MXC {mxc} from database and filesystem");
2024-06-28 23:23:59 +00:00
self.delete(&mxc).await?;
deletion_count = deletion_count.saturating_add(1);
}
Ok(deletion_count)
2024-03-05 19:48:54 -05:00
}
2020-10-19 15:29:36 +02:00
/// Returns width, height of the thumbnail and whether it should be cropped.
/// Returns None when the server should send the original file.
2024-05-09 15:59:08 -07:00
pub fn thumbnail_properties(&self, width: u32, height: u32) -> Option<(u32, u32, bool)> {
2020-10-19 15:29:36 +02:00
match (width, height) {
(0..=32, 0..=32) => Some((32, 32, true)),
(0..=96, 0..=96) => Some((96, 96, true)),
(0..=320, 0..=240) => Some((320, 240, false)),
(0..=640, 0..=480) => Some((640, 480, false)),
(0..=800, 0..=600) => Some((800, 600, false)),
_ => None,
}
2024-03-05 19:48:54 -05:00
}
2020-05-19 18:31:34 +02:00
/// Downloads a file's thumbnail.
2020-10-19 15:29:36 +02:00
///
/// Here's an example on how it works:
///
/// - Client requests an image with width=567, height=567
/// - Server rounds that up to (800, 600), so it doesn't have to save too
/// many thumbnails
/// - Server rounds that up again to (958, 600) to fix the aspect ratio
/// (only for width,height>96)
/// - Server creates the thumbnail and sends it to the user
///
/// For width,height <= 96 the server uses another thumbnailing algorithm
/// which crops the image afterwards.
2024-06-28 23:23:59 +00:00
pub async fn get_thumbnail(&self, mxc: &str, width: u32, height: u32) -> Result<Option<FileMeta>> {
2024-03-25 17:05:11 -04:00
let (width, height, crop) = self
.thumbnail_properties(width, height)
.unwrap_or((0, 0, false)); // 0, 0 because that's the original file
2024-03-05 19:48:54 -05:00
2024-06-28 23:23:59 +00:00
if let Ok((content_disposition, content_type, key)) = self.db.search_file_metadata(mxc, width, height) {
2020-05-19 18:31:34 +02:00
// Using saved thumbnail
2021-06-08 20:53:24 +04:30
let mut file = Vec::new();
2024-06-23 20:48:40 +00:00
let path = self.get_media_file(&key);
fs::File::open(path).await?.read_to_end(&mut file).await?;
2024-03-05 19:48:54 -05:00
Ok(Some(FileMeta {
content_disposition,
content_type,
2024-03-02 20:55:02 -05:00
file: file.clone(),
}))
2024-06-28 23:23:59 +00:00
} else if let Ok((content_disposition, content_type, key)) = self.db.search_file_metadata(mxc, 0, 0) {
2020-05-19 18:31:34 +02:00
// Generate a thumbnail
2021-06-08 20:53:24 +04:30
let mut file = Vec::new();
2024-06-23 20:48:40 +00:00
let path = self.get_media_file(&key);
fs::File::open(path).await?.read_to_end(&mut file).await?;
2024-03-05 19:48:54 -05:00
2020-05-19 18:31:34 +02:00
if let Ok(image) = image::load_from_memory(&file) {
2020-10-19 15:29:36 +02:00
let original_width = image.width();
let original_height = image.height();
if width > original_width || height > original_height {
return Ok(Some(FileMeta {
content_disposition,
2020-10-19 15:29:36 +02:00
content_type,
2024-03-02 20:55:02 -05:00
file: file.clone(),
2020-10-19 15:29:36 +02:00
}));
}
2024-03-05 19:48:54 -05:00
2020-10-19 15:29:36 +02:00
let thumbnail = if crop {
image.resize_to_fill(width, height, FilterType::CatmullRom)
2020-10-19 15:29:36 +02:00
} else {
let (exact_width, exact_height) = {
2024-07-07 04:46:16 +00:00
let ratio = Sat(original_width) * Sat(height);
let nratio = Sat(width) * Sat(original_height);
2024-03-05 19:48:54 -05:00
let use_width = nratio <= ratio;
2020-10-19 15:29:36 +02:00
let intermediate = if use_width {
2024-07-07 04:46:16 +00:00
Sat(original_height) * Sat(checked!(width / original_width)?)
2020-10-19 15:29:36 +02:00
} else {
2024-07-07 04:46:16 +00:00
Sat(original_width) * Sat(checked!(height / original_height)?)
2020-10-19 15:29:36 +02:00
};
2024-07-07 04:46:16 +00:00
2020-10-19 15:29:36 +02:00
if use_width {
2024-07-07 04:46:16 +00:00
(width, intermediate.0)
2020-10-19 15:29:36 +02:00
} else {
2024-07-07 04:46:16 +00:00
(intermediate.0, height)
2020-10-19 15:29:36 +02:00
}
};
2024-03-05 19:48:54 -05:00
2021-03-24 11:52:10 +01:00
image.thumbnail_exact(exact_width, exact_height)
2020-10-19 15:29:36 +02:00
};
2024-03-05 19:48:54 -05:00
2020-05-19 18:31:34 +02:00
let mut thumbnail_bytes = Vec::new();
2024-03-10 20:39:42 -04:00
thumbnail.write_to(&mut Cursor::new(&mut thumbnail_bytes), image::ImageFormat::Png)?;
2024-03-05 19:48:54 -05:00
2020-05-19 18:31:34 +02:00
// Save thumbnail in database so we don't have to generate it again next time
2022-10-05 20:34:31 +02:00
let thumbnail_key = self.db.create_file_metadata(
2024-03-17 01:42:30 -04:00
None,
2022-10-05 20:34:31 +02:00
mxc,
width,
height,
content_disposition.as_deref(),
content_type.as_deref(),
)?;
2024-03-05 19:48:54 -05:00
2024-06-23 20:48:40 +00:00
let mut f = self.create_media_file(&thumbnail_key).await?;
2021-06-04 08:06:12 +04:30
f.write_all(&thumbnail_bytes).await?;
2024-03-05 19:48:54 -05:00
Ok(Some(FileMeta {
content_disposition,
content_type,
2024-03-02 20:55:02 -05:00
file: thumbnail_bytes.clone(),
}))
2020-05-19 18:31:34 +02:00
} else {
2020-12-08 10:33:44 +01:00
// Couldn't parse file to generate thumbnail, send original
Ok(Some(FileMeta {
content_disposition,
2020-12-08 10:33:44 +01:00
content_type,
2024-03-02 20:55:02 -05:00
file: file.clone(),
2020-12-08 10:33:44 +01:00
}))
2020-05-19 18:31:34 +02:00
}
2020-05-18 17:53:34 +02:00
} else {
Ok(None)
}
2024-03-05 19:48:54 -05:00
}
2024-05-09 15:59:08 -07:00
pub async fn get_url_preview(&self, url: &str) -> Option<UrlPreviewData> { self.db.get_url_preview(url) }
2024-03-05 19:48:54 -05:00
/// TODO: use this?
#[allow(dead_code)]
2024-05-09 15:59:08 -07:00
pub async fn remove_url_preview(&self, url: &str) -> Result<()> {
2024-02-09 23:16:06 -05:00
// TODO: also remove the downloaded image
self.db.remove_url_preview(url)
}
2024-03-05 19:48:54 -05:00
2024-05-09 15:59:08 -07:00
pub async fn set_url_preview(&self, url: &str, data: &UrlPreviewData) -> Result<()> {
2024-03-25 17:05:11 -04:00
let now = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.expect("valid system time");
2024-02-09 23:16:06 -05:00
self.db.set_url_preview(url, data, now)
}
2024-06-23 20:48:40 +00:00
pub async fn create_media_dir(&self) -> Result<()> {
let dir = self.get_media_dir();
Ok(fs::create_dir_all(dir).await?)
}
2024-06-23 20:48:40 +00:00
async fn remove_media_file(&self, key: &[u8]) -> Result<()> {
let path = self.get_media_file(key);
let legacy = self.get_media_file_b64(key);
debug!(?key, ?path, ?legacy, "Removing media file");
let file_rm = fs::remove_file(&path);
let legacy_rm = fs::remove_file(&legacy);
let (file_rm, legacy_rm) = tokio::join!(file_rm, legacy_rm);
if let Err(e) = legacy_rm {
if self.server.config.media_compat_file_link {
debug_error!(?key, ?legacy, "Failed to remove legacy media symlink: {e}");
}
2024-06-23 20:48:40 +00:00
}
2024-03-05 19:48:54 -05:00
2024-06-23 20:48:40 +00:00
Ok(file_rm?)
}
2024-03-05 19:48:54 -05:00
2024-06-23 20:48:40 +00:00
async fn create_media_file(&self, key: &[u8]) -> Result<fs::File> {
let path = self.get_media_file(key);
debug!(?key, ?path, "Creating media file");
let file = fs::File::create(&path).await?;
if self.server.config.media_compat_file_link {
let legacy = self.get_media_file_b64(key);
if let Err(e) = fs::symlink(&path, &legacy).await {
debug_error!(
key = ?encode_key(key), ?path, ?legacy,
"Failed to create legacy media symlink: {e}"
);
}
2024-06-23 20:48:40 +00:00
}
2024-03-05 19:48:54 -05:00
2024-06-23 20:48:40 +00:00
Ok(file)
}
2024-03-05 19:48:54 -05:00
2024-06-23 20:48:40 +00:00
#[inline]
pub fn get_media_file(&self, key: &[u8]) -> PathBuf { self.get_media_file_sha256(key) }
/// new SHA256 file name media function. requires database migrated. uses
/// SHA256 hash of the base64 key as the file name
pub fn get_media_file_sha256(&self, key: &[u8]) -> PathBuf {
let mut r = self.get_media_dir();
// Using the hash of the base64 key as the filename
// This is to prevent the total length of the path from exceeding the maximum
// length in most filesystems
let digest = <sha2::Sha256 as sha2::Digest>::digest(key);
let encoded = encode_key(&digest);
r.push(encoded);
r
}
2024-03-05 19:48:54 -05:00
2024-06-23 20:48:40 +00:00
/// old base64 file name media function
/// This is the old version of `get_media_file` that uses the full base64
/// key as the filename.
pub fn get_media_file_b64(&self, key: &[u8]) -> PathBuf {
let mut r = self.get_media_dir();
let encoded = encode_key(key);
r.push(encoded);
r
}
2024-03-05 19:48:54 -05:00
2024-06-23 20:48:40 +00:00
pub fn get_media_dir(&self) -> PathBuf {
let mut r = PathBuf::new();
r.push(self.server.config.database_path.clone());
r.push("media");
r
}
}
2024-06-23 20:48:40 +00:00
#[inline]
#[must_use]
pub fn encode_key(key: &[u8]) -> String { general_purpose::URL_SAFE_NO_PAD.encode(key) }