Files
continuwuity/src/database/key_value/rooms/search.rs
T

79 lines
2.7 KiB
Rust
Raw Normal View History

2022-09-06 23:15:09 +02:00
use ruma::RoomId;
2024-02-20 22:40:46 -05:00
use tracing::debug;
2022-09-06 23:15:09 +02:00
2022-10-05 20:34:31 +02:00
use crate::{database::KeyValueDatabase, service, services, utils, Result};
2022-09-06 23:15:09 +02:00
type SearchPdusResult<'a> = Result<Option<(Box<dyn Iterator<Item = Vec<u8>> + 'a>, Vec<String>)>>;
2022-10-05 18:36:12 +02:00
impl service::rooms::search::Data for KeyValueDatabase {
2022-10-08 13:02:52 +02:00
fn index_pdu<'a>(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()> {
2022-09-06 23:15:09 +02:00
let mut batch = message_body
2022-07-10 16:28:43 +02:00
.split_terminator(|c: char| !c.is_alphanumeric())
.filter(|s| !s.is_empty())
.filter(|word| word.len() <= 50)
.map(str::to_lowercase)
.map(|word| {
let mut key = shortroomid.to_be_bytes().to_vec();
key.extend_from_slice(word.as_bytes());
key.push(0xff);
key.extend_from_slice(pdu_id); // TODO: currently we save the room id a second time here
2022-07-10 16:28:43 +02:00
(key, Vec::new())
});
2022-06-20 11:31:27 +02:00
2022-09-06 23:15:09 +02:00
self.tokenids.insert_batch(&mut batch)
}
2024-02-20 22:40:46 -05:00
fn delete_all_search_tokenids_for_room(&self, room_id: &RoomId) -> Result<()> {
let mut prefix = room_id.as_bytes().to_vec();
prefix.push(0xff);
for (key, _) in self.tokenids.scan_prefix(prefix) {
debug!("Removing key: {:?}", key);
self.tokenids.remove(&key)?;
}
Ok(())
}
fn search_pdus<'a>(&'a self, room_id: &RoomId, search_string: &str) -> SearchPdusResult<'a> {
2022-10-05 20:34:31 +02:00
let prefix = services()
.rooms
.short
2021-08-14 19:47:49 +02:00
.get_shortroomid(room_id)?
.expect("room exists")
.to_be_bytes()
.to_vec();
2020-08-18 12:15:27 +02:00
2021-10-13 11:51:30 +02:00
let words: Vec<_> = search_string
2020-08-18 12:15:27 +02:00
.split_terminator(|c: char| !c.is_alphanumeric())
2021-08-19 14:05:23 +02:00
.filter(|s| !s.is_empty())
2020-08-18 12:15:27 +02:00
.map(str::to_lowercase)
2021-10-13 11:51:30 +02:00
.collect();
2020-08-18 12:15:27 +02:00
let iterators = words.clone().into_iter().map(move |word| {
let mut prefix2 = prefix.clone();
prefix2.extend_from_slice(word.as_bytes());
prefix2.push(0xff);
let prefix3 = prefix2.clone();
let mut last_possible_id = prefix2.clone();
last_possible_id.extend_from_slice(&u64::MAX.to_be_bytes());
self.tokenids
.iter_from(&last_possible_id, true) // Newest pdus first
.take_while(move |(k, _)| k.starts_with(&prefix2))
.map(move |(key, _)| key[prefix3.len()..].to_vec())
});
2020-08-18 12:15:27 +02:00
2022-10-05 20:33:55 +02:00
let common_elements = match utils::common_elements(iterators, |a, b| {
2022-02-04 17:15:21 +01:00
// We compare b with a because we reversed the iterator earlier
b.cmp(a)
2022-10-05 20:33:55 +02:00
}) {
Some(it) => it,
None => return Ok(None),
};
Ok(Some((Box::new(common_elements), words)))
2020-08-21 21:22:59 +02:00
}
2022-09-06 23:15:09 +02:00
}