feat(stitched): Initial algorithm implementation

This commit is contained in:
Ginger
2025-12-18 12:46:42 -05:00
parent c85e710760
commit a1ad9f0144
3 changed files with 263 additions and 0 deletions
@@ -0,0 +1,152 @@
use std::{
cmp::Ordering,
collections::{BTreeSet, HashSet},
};
use itertools::Itertools;
use ruma::{EventId, OwnedEventId};
use super::{Batch, Gap, OrderKey, StitchedItem, StitcherBackend};
/// Updates to a gap in the stitched order.
pub(super) struct GapUpdate<'id, K: OrderKey> {
/// The opaque key of the gap to update.
pub key: K,
/// The new contents of the gap. If this is empty, the gap should be
/// deleted.
pub gap: Gap,
/// New items to insert after the gap. These items _should not_ be
/// synchronized to clients.
pub inserted_items: Vec<StitchedItem<'id>>,
}
/// Updates to the stitched order.
pub(super) struct OrderUpdates<'id, K: OrderKey> {
/// Updates to individual gaps. The items inserted by these updates _should
/// not_ be synchronized to clients.
pub gap_updates: Vec<GapUpdate<'id, K>>,
/// New items to append to the end of the order. These items _should_ be
/// synchronized to clients.
pub new_items: Vec<StitchedItem<'id>>,
}
pub(super) struct Stitcher<'backend, B: StitcherBackend> {
backend: &'backend B,
}
impl<B: StitcherBackend> Stitcher<'_, B> {
pub(super) fn new<'backend>(backend: &'backend B) -> Stitcher<'backend, B> {
Stitcher { backend }
}
pub(super) fn stitch<'id>(&self, batch: Batch<'id>) -> OrderUpdates<'id, B::Key> {
let mut gap_updates = Vec::new();
let mut remaining_events: BTreeSet<&EventId> = batch.events().collect();
// 1: Find existing gaps which include IDs of events in `batch`
let matching_gaps = self.backend.find_matching_gaps(batch.events());
// Repeat steps 2-9 for each matching gap
for (key, mut gap) in matching_gaps {
// 2. Find events in `batch` which are mentioned in `gap`
let matching_events = remaining_events.iter().filter(|id| gap.contains(**id));
// 3. Create the to-insert list from the predecessor sets of each matching event
let events_to_insert: Vec<&'id EventId> = matching_events
.filter_map(|event| batch.predecessors(event))
.flat_map(|predecessors| predecessors.predecessor_set.iter())
.filter(|event| remaining_events.contains(*event))
.copied()
.collect();
// 4. Remove the events in the to-insert list from `remaining_events` so they
// aren't processed again
remaining_events.retain(|event| !events_to_insert.contains(event));
// 5 and 6
let inserted_items = self.sort_events_and_create_gaps(&batch, events_to_insert);
// 8. Update gap
gap.retain(|id| !batch.contains(id));
// 7 and 9. Append to-insert list and delete gap if empty
// (the actual work of doing this is handled by the callee)
gap_updates.push(GapUpdate { key: key.clone(), gap, inserted_items });
}
// 10. Append remaining events and gaps
let new_items = self.sort_events_and_create_gaps(&batch, remaining_events);
OrderUpdates { gap_updates, new_items }
}
fn sort_events_and_create_gaps<'id>(
&self,
batch: &Batch<'id>,
events_to_insert: impl IntoIterator<Item = &'id EventId>,
) -> Vec<StitchedItem<'id>> {
// 5. Sort the to-insert list with DAG;received order
let events_to_insert = events_to_insert
.into_iter()
.sorted_by(Self::compare_by_dag_received(batch))
.collect_vec();
let mut items = Vec::with_capacity(
events_to_insert.capacity() + events_to_insert.capacity().div_euclid(2),
);
for event in events_to_insert.into_iter() {
let missing_prev_events: HashSet<OwnedEventId> = batch
.predecessors(event)
.expect("events in to_insert should be in batch")
.prev_events
.iter()
.filter(|prev_event| {
!(batch.contains(prev_event) || self.backend.event_exists(prev_event))
})
.map(|id| OwnedEventId::from(*id))
.collect();
if !missing_prev_events.is_empty() {
items.push(StitchedItem::Gap(missing_prev_events));
}
items.push(StitchedItem::Event(event))
}
items
}
/// Compare two events by DAG;received order.
///
/// If either event is in the other's predecessor set it comes first,
/// otherwise they are sorted by which comes first in the batch.
fn compare_by_dag_received<'id>(
batch: &Batch<'id>,
) -> impl FnMut(&&'id EventId, &&'id EventId) -> Ordering {
|a, b| {
if batch
.predecessors(a)
.is_some_and(|it| it.predecessor_set.contains(b))
{
Ordering::Greater
} else if batch
.predecessors(b)
.is_some_and(|it| it.predecessor_set.contains(a))
{
Ordering::Less
} else {
for event in batch.events() {
if event == *a {
return Ordering::Greater;
} else if event == *b {
return Ordering::Less;
}
}
panic!("neither {} nor {} in batch", a, b);
}
}
}
}