Compare commits

..

8 Commits

Author SHA1 Message Date
Ginger 624bd3796a fix: Don't panic on missing SSH in sliding sync 2026-05-26 14:28:09 -04:00
Ginger 7f21f0d6ab fix: Additional sync logic fixes 2026-05-26 14:28:08 -04:00
Ginger b6158c73db fix: Upgrade warning on room load failures to error 2026-05-26 14:28:08 -04:00
Ginger bf4c716c7f fix: Don't panic on missing SSH 2026-05-26 14:28:08 -04:00
Ginger 4facaa4440 fix: Calculate state at end of last sync correctly 2026-05-26 14:28:08 -04:00
Ginger 28aed31874 chore: News fragments 2026-05-26 14:28:08 -04:00
Ginger af8e28559e feat: Add support for state_after 2026-05-26 14:28:08 -04:00
Ginger 6a2480774d feat: Remove all uses of roomsynctoken_shortstatehash 2026-05-26 14:28:08 -04:00
29 changed files with 703 additions and 906 deletions
+1
View File
@@ -0,0 +1 @@
Added support for Matrix 1.16's `state_after` feature, allowing clients which understand it to sync room state changes more reliably. Contributed by @ginger.
-2
View File
@@ -1,2 +0,0 @@
Improved the performance and reliability of fetching missing events, improving network partition recovery. Contributed
by @nex.
+1
View File
@@ -0,0 +1 @@
Adjusted legacy sync logic to no longer use the `roomsynctoken_shortstatehash` database column. Once this change has been confirmed to be stable and reliable, a future update will remove it entirely, significantly decreasing database sizes. Contributed by @ginger.
+1 -1
View File
@@ -297,7 +297,7 @@
# This item is undocumented. Please contribute documentation for it.
#
#max_fetch_prev_events = 256
#max_fetch_prev_events = 192
# How many incoming federation transactions the server is willing to be
# processing at any given time before it becomes overloaded and starts
+7
View File
@@ -48,6 +48,13 @@ async fn load_timeline(
ending_count: Option<PduCount>,
limit: usize,
) -> Result<TimelinePdus> {
if let (Some(starting_count), Some(ending_count)) = (starting_count, ending_count) {
debug_assert!(
starting_count <= ending_count,
"starting count {starting_count} > ending count {ending_count}"
);
}
let mut pdu_stream = match starting_count {
| Some(starting_count) => {
let last_timeline_count = services
+76 -73
View File
@@ -38,6 +38,7 @@ use ruma::{
uint,
};
use service::{account_data::AnyRawAccountDataEvent, rooms::short::ShortStateHash};
use tokio::pin;
use super::{load_timeline, share_encrypted_room};
use crate::client::{
@@ -96,12 +97,19 @@ pub(super) async fn load_joined_room(
);
}
let state_events =
StateEvents::with_events(state_events.into_iter().map(Event::into_format).collect());
let joined_room = assign!(JoinedRoom::new(), {
account_data,
summary: summary.unwrap_or_default(),
unread_notifications: notification_counts.unwrap_or_default(),
timeline,
state: RoomState::Before(StateEvents::with_events(state_events.into_iter().map(Event::into_format).collect())),
state: if sync_context.use_state_after {
RoomState::After(state_events)
} else {
RoomState::Before(state_events)
},
ephemeral,
unread_thread_notifications: BTreeMap::new(),
});
@@ -344,7 +352,7 @@ struct ShortStateHashes {
#[tracing::instrument(level = "debug", skip_all)]
async fn fetch_shortstatehashes(
services: &Services,
SyncContext { last_sync_end_count, current_count, .. }: SyncContext<'_>,
SyncContext { last_sync_end_count, .. }: SyncContext<'_>,
room_id: &RoomId,
) -> Result<ShortStateHashes> {
// the room state currently.
@@ -354,46 +362,41 @@ async fn fetch_shortstatehashes(
.rooms
.state
.get_room_shortstatehash(room_id)
.map_err(|_| err!(Database(error!("Room {room_id} has no state"))));
.map_err(|_| err!(Database(error!("Room {room_id} has no state"))))
.await?;
// the room state as of the end of the last sync.
// this will be None if we are doing an initial sync or if we just joined this
// room.
// The room state as of the end of the last sync.
// This will be None if we are doing an initial sync.
let last_sync_end_shortstatehash =
OptionFuture::from(last_sync_end_count.map(|last_sync_end_count| {
// look up the shortstatehash saved by the last sync's call to
// `associate_token_shortstatehash`
services
.rooms
.user
.get_token_shortstatehash(room_id, last_sync_end_count)
.inspect_err(move |_| {
debug_warn!(
token = last_sync_end_count,
"Room has no shortstatehash for this token"
);
})
.ok()
OptionFuture::from(last_sync_end_count.map(async |last_sync_end_count| {
pin! {
let pdus = services
.rooms
.timeline
.pdus(room_id, Some(PduCount::Normal(last_sync_end_count)))
.ignore_err();
}
match pdus.next().await {
| Some((_, pdu_after_last_sync_end)) => {
trace!(?pdu_after_last_sync_end.event_id, "pdu at last sync end");
services
.rooms
.state_accessor
.pdu_shortstatehash(&pdu_after_last_sync_end.event_id)
.await
.map_err(|err| err!("Last sync end PDU has no shortstatehash: {err}"))
},
| None => {
// No events have been sent since the last sync, or we just joined this room,
// so the state then is the same as the state now
Ok(current_shortstatehash)
},
}
}))
.map(Option::flatten)
.map(Ok);
let (current_shortstatehash, last_sync_end_shortstatehash) =
try_join(current_shortstatehash, last_sync_end_shortstatehash).await?;
/*
associate the `current_count` with the `current_shortstatehash`, so we can
use it on the next sync as the `last_sync_end_shortstatehash`.
TODO: the table written to by this call grows extremely fast, gaining one new entry for each
joined room on _every single sync request_. we need to find a better way to remember the shortstatehash
between syncs.
*/
services
.rooms
.user
.associate_token_shortstatehash(room_id, current_count, current_shortstatehash)
.await;
.await
.transpose()?;
Ok(ShortStateHashes {
current_shortstatehash,
@@ -452,6 +455,7 @@ async fn build_state_events(
syncing_user,
last_sync_end_count,
full_state,
use_state_after,
..
} = sync_context;
@@ -460,32 +464,28 @@ async fn build_state_events(
last_sync_end_shortstatehash,
} = shortstatehashes;
// the spec states that the `state` property only includes state events up to
// the beginning of the timeline, so we determine the state of the syncing room
// as of the first timeline event. NOTE: this explanation is not entirely
// accurate; see the implementation of `build_state_incremental`.
let timeline_start_shortstatehash = async {
if let Some((_, pdu)) = timeline.pdus.front() {
if let Ok(shortstatehash) = services
let timeline_start_shortstatehash = if let Some((count, pdu)) = timeline.pdus.front() {
if matches!(count, PduCount::Backfilled(_)) {
// We don't have shortstatehashes for backfilled PDUs, the best we can
// do is to use the current state
current_shortstatehash
} else {
services
.rooms
.state_accessor
.pdu_shortstatehash(&pdu.event_id)
.await
{
return shortstatehash;
}
.map_err(|err| err!("Timeline start has no shortstatehash: {err}"))?
}
current_shortstatehash
} else {
// if the timeline is empty there can't possibly be any changes to the state
return Ok(vec![]);
};
// the user IDs of members whose membership needs to be sent to the client, if
// lazy-loading is enabled.
let lazily_loaded_members =
prepare_lazily_loaded_members(services, sync_context, room_id, timeline.senders());
let (timeline_start_shortstatehash, lazily_loaded_members) =
join(timeline_start_shortstatehash, lazily_loaded_members).await;
prepare_lazily_loaded_members(services, sync_context, room_id, timeline.senders()).await;
// compute the state delta between the previous sync and this sync.
match (last_sync_end_count, last_sync_end_shortstatehash) {
@@ -494,16 +494,15 @@ async fn build_state_events(
is Some (meaning the syncing user didn't just join this room for the first time ever), and `full_state` is false,
then use `build_state_incremental`.
*/
| (Some(last_sync_end_count), Some(last_sync_end_shortstatehash)) if !full_state =>
| (Some(_), Some(last_sync_end_shortstatehash)) if !full_state =>
build_state_incremental(
services,
syncing_user,
room_id,
PduCount::Normal(last_sync_end_count),
last_sync_end_shortstatehash,
timeline_start_shortstatehash,
current_shortstatehash,
timeline,
use_state_after,
lazily_loaded_members.as_ref(),
)
.boxed()
@@ -518,6 +517,8 @@ async fn build_state_events(
services,
syncing_user,
timeline_start_shortstatehash,
current_shortstatehash,
use_state_after,
lazily_loaded_members.as_ref(),
)
.boxed()
@@ -598,23 +599,25 @@ async fn check_joined_since_last_sync(
ShortStateHashes { last_sync_end_shortstatehash, .. }: ShortStateHashes,
SyncContext { syncing_user, .. }: SyncContext<'_>,
) -> Result<bool> {
// fetch the syncing user's membership event during the last sync.
// this will be None if `previous_sync_end_shortstatehash` is None.
let membership_during_previous_sync = match last_sync_end_shortstatehash {
| Some(last_sync_end_shortstatehash) => services
.rooms
.state_accessor
.state_get_content(
last_sync_end_shortstatehash,
&StateEventType::RoomMember,
syncing_user.as_str(),
)
.await
.inspect_err(|_| debug_warn!("User has no previous membership"))
.ok(),
| None => None,
let Some(last_sync_end_shortstatehash) = last_sync_end_shortstatehash else {
// For initial syncs always return false, since there's no "last sync" for the
// user to have joined since.
return Ok(false);
};
// Fetch the syncing user's membership event during the last sync.
let membership_during_previous_sync = services
.rooms
.state_accessor
.state_get_content(
last_sync_end_shortstatehash,
&StateEventType::RoomMember,
syncing_user.as_str(),
)
.await
.inspect_err(|_| debug_warn!("User has no previous membership"))
.ok();
// TODO: If the requesting user got state-reset out of the room, this
// will be `true` when it shouldn't be. this function should never be called
// in that situation, but it may be if the membership cache didn't get updated.
+10 -1
View File
@@ -181,6 +181,9 @@ pub(super) async fn load_left_room(
.collect::<Vec<_>>()
.await;
let state_events =
StateEvents::with_events(state_events.into_iter().map(Event::into_format).collect());
Ok(Some(assign!(LeftRoom::new(), {
account_data: RoomAccountData::new(),
timeline: assign!(Timeline::new(), {
@@ -188,7 +191,11 @@ pub(super) async fn load_left_room(
prev_batch: Some(current_count.to_string()),
events: raw_timeline_pdus,
}),
state: State::Before(StateEvents::with_events(state_events.into_iter().map(Event::into_format).collect())),
state: if sync_context.use_state_after {
State::After(state_events)
} else {
State::Before(state_events)
},
})))
}
@@ -264,6 +271,8 @@ async fn build_left_state_and_timeline(
services,
syncing_user,
timeline_start_shortstatehash,
leave_shortstatehash,
sync_context.use_state_after,
lazily_loaded_members.as_ref(),
)
.await?;
+7 -4
View File
@@ -11,12 +11,11 @@ use std::{
use axum::extract::State;
use axum_client_ip::ClientIp;
use conduwuit::{
Err, Result, at, extract_variant,
Err, Result, at, error, extract_variant,
utils::{
ReadyExt, TryFutureExtExt,
stream::{BroadbandExt, Tools, WidebandExt},
},
warn,
};
use conduwuit_service::Services;
use futures::{FutureExt, StreamExt, TryFutureExt, future::OptionFuture};
@@ -110,6 +109,9 @@ struct SyncContext<'a> {
/// The sync filter, which the client uses to specify what data should be
/// included in the sync response.
filter: &'a FilterDefinition,
/// Whether the state at the end of the timeline should be used when
/// calculating state diffs for sync.
use_state_after: bool,
}
impl<'a> SyncContext<'a> {
@@ -263,6 +265,7 @@ pub(crate) async fn build_sync_events(
current_count,
full_state,
filter: &filter,
use_state_after: body.use_state_after,
};
let joined_rooms = services
@@ -275,7 +278,7 @@ pub(crate) async fn build_sync_events(
match joined_room {
| Ok((room, updates)) => Some((room_id, room, updates)),
| Err(err) => {
warn!(?err, %room_id, "error loading joined room");
error!(?err, %room_id, "error loading joined room");
None
},
}
@@ -304,7 +307,7 @@ pub(crate) async fn build_sync_events(
| Ok(Some(left_room)) => Some((room_id, left_room)),
| Ok(None) => None,
| Err(err) => {
warn!(?err, %room_id, "error loading joined room");
error!(?err, %room_id, "error loading joined room");
None
},
}
+60 -143
View File
@@ -1,11 +1,8 @@
use std::{collections::BTreeSet, ops::ControlFlow};
use std::collections::HashSet;
use conduwuit::{
Result, at, is_equal_to,
matrix::{
Event,
pdu::{PduCount, PduEvent},
},
Result, at,
matrix::{Event, pdu::PduEvent},
utils::{
BoolExt, IterStream, ReadyExt, TryFutureExtExt,
stream::{BroadbandExt, TryIgnore},
@@ -16,9 +13,7 @@ use conduwuit_service::{
rooms::{lazy_loading::MemberSet, short::ShortStateHash},
};
use futures::{FutureExt, StreamExt};
use itertools::Itertools;
use ruma::{OwnedEventId, RoomId, UserId, events::StateEventType};
use service::rooms::short::ShortEventId;
use ruma::{OwnedEventId, UserId, events::StateEventType};
use tracing::trace;
use crate::client::TimelinePdus;
@@ -39,13 +34,19 @@ pub(super) async fn build_state_initial(
services: &Services,
sender_user: &UserId,
timeline_start_shortstatehash: ShortStateHash,
timeline_end_shortstatehash: ShortStateHash,
use_state_after: bool,
lazily_loaded_members: Option<&MemberSet>,
) -> Result<Vec<PduEvent>> {
// load the keys and event IDs of the state events at the start of the timeline
let (shortstatekeys, event_ids): (Vec<_>, Vec<_>) = services
.rooms
.state_accessor
.state_full_ids(timeline_start_shortstatehash)
.state_full_ids(if use_state_after {
timeline_end_shortstatehash
} else {
timeline_start_shortstatehash
})
.unzip()
.await;
@@ -92,82 +93,34 @@ pub(super) async fn build_state_initial(
pub(super) async fn build_state_incremental<'a>(
services: &Services,
sender_user: &'a UserId,
room_id: &RoomId,
last_sync_end_count: PduCount,
last_sync_end_shortstatehash: ShortStateHash,
timeline_start_shortstatehash: ShortStateHash,
timeline_end_shortstatehash: ShortStateHash,
timeline: &TimelinePdus,
use_state_after: bool,
lazily_loaded_members: Option<&'a MemberSet>,
) -> Result<Vec<PduEvent>> {
/*
NB: a limited sync is one where `timeline.limited == true`. Synapse calls this a "gappy" sync internally.
let mut state_event_ids: HashSet<OwnedEventId> = HashSet::new();
The algorithm implemented in this function is, currently, quite different from the algorithm vaguely described
by the Matrix specification. This is because the specification's description of the `state` property does not accurately
reflect how Synapse behaves, and therefore how client SDKs behave. Notable differences include:
1. We do not compute the delta using the naive approach of "every state event from the end of the last sync
up to the start of this sync's timeline". see below for details.
2. If lazy-loading is enabled, we include lazily-loaded membership events. The specific users to include are determined
elsewhere and supplied to this function in the `lazily_loaded_members` parameter.
*/
trace!(
%use_state_after,
%last_sync_end_shortstatehash,
%timeline_start_shortstatehash,
%timeline_end_shortstatehash,
"computing state for incremental sync"
);
/*
the `state` property of an incremental sync which isn't limited are _usually_ empty.
(note: the specification says that the `state` property is _always_ empty for limited syncs, which is incorrect.)
however, if an event in the timeline (`timeline.pdus`) merges a split in the room's DAG (i.e. has multiple `prev_events`),
the state at the _end_ of the timeline may include state events which were merged in and don't exist in the state
at the _start_ of the timeline. because this is uncommon, we check here to see if any events in the timeline
merged a split in the DAG.
// Fetch lazy-loaded membership events if lazy-loading is enabled
if let Some(lazily_loaded_members) = lazily_loaded_members
&& !lazily_loaded_members.is_empty()
{
trace!("including lazy membership events for members: {:?}", lazily_loaded_members);
see: https://github.com/element-hq/synapse/issues/16941
*/
let timeline_is_linear = timeline.pdus.is_empty() || {
let last_pdu_of_last_sync = services
services
.rooms
.timeline
.pdus_rev(room_id, Some(last_sync_end_count.saturating_add(1)))
.boxed()
.next()
.await
.transpose()
.expect("last sync should have had some PDUs")
.map(at!(1));
// make sure the prev_events of each pdu in the timeline refer only to the
// previous pdu
timeline
.pdus
.iter()
.try_fold(last_pdu_of_last_sync.map(|pdu| pdu.event_id), |prev_event_id, (_, pdu)| {
if let Ok(pdu_prev_event_id) = pdu.prev_events.iter().exactly_one() {
if prev_event_id
.as_ref()
.is_none_or(is_equal_to!(pdu_prev_event_id))
{
return ControlFlow::Continue(Some(pdu_prev_event_id.to_owned()));
}
}
trace!(
"pdu {:?} has split prev_events (expected {:?}): {:?}",
pdu.event_id, prev_event_id, pdu.prev_events
);
ControlFlow::Break(())
})
.is_continue()
};
if timeline_is_linear && !timeline.limited {
// if there are no splits in the DAG and the timeline isn't limited, then
// `state` will always be empty unless lazy loading is enabled.
if let Some(lazily_loaded_members) = lazily_loaded_members {
if !timeline.pdus.is_empty() {
// lazy loading is enabled, so we return the membership events which were
// requested by the caller.
let lazy_membership_events: Vec<_> = lazily_loaded_members
.short
.multi_get_eventid_from_short::<'_, OwnedEventId, _>(
lazily_loaded_members
.iter()
.stream()
.broad_filter_map(|user_id| async move {
@@ -178,71 +131,24 @@ pub(super) async fn build_state_incremental<'a>(
services
.rooms
.state_accessor
.state_get(
.state_get_shortid(
timeline_start_shortstatehash,
&StateEventType::RoomMember,
user_id.as_str(),
)
.ok()
.await
})
.collect()
.await;
if !lazy_membership_events.is_empty() {
trace!(
"syncing lazy membership events for members: {:?}",
lazy_membership_events
.iter()
.map(|pdu| pdu.state_key().unwrap())
.collect::<Vec<_>>()
);
}
return Ok(lazy_membership_events);
}
}
// lazy loading is disabled, `state` is empty.
return Ok(vec![]);
}),
)
.ignore_err()
.ready_for_each(|event_id| {
state_event_ids.insert(event_id);
})
.await;
}
/*
at this point, either the timeline is `limited` or the DAG has a split in it. this necessitates
computing the incremental state (which may be empty).
NOTE: this code path does not use the `lazy_membership_events` parameter. any changes to membership will be included
in the incremental state. therefore, the incremental state may include "redundant" membership events,
which we do not filter out because A. the spec forbids lazy-load filtering if the timeline is `limited`,
and B. DAG splits which require sending extra membership state events are (probably) uncommon enough that
the performance penalty is acceptable.
*/
trace!(%timeline_is_linear, %timeline.limited, "computing state for incremental sync");
// fetch the shorteventids of state events in the timeline
let state_events_in_timeline: BTreeSet<ShortEventId> = services
.rooms
.short
.multi_get_or_create_shorteventid(timeline.pdus.iter().filter_map(|(_, pdu)| {
if pdu.state_key().is_some() {
Some(pdu.event_id.as_ref())
} else {
None
}
}))
.collect()
.await;
trace!("{} state events in timeline", state_events_in_timeline.len());
/*
fetch the state events which were added since the last sync.
specifically we fetch the difference between the state at the last sync and the state at the _end_
of the timeline, and then we filter out state events in the timeline itself using the shorteventids we fetched.
this is necessary to account for splits in the DAG, as explained above.
*/
let state_diff = services
// Fetch the state events added since the last sync.
services
.rooms
.short
.multi_get_eventid_from_short::<'_, OwnedEventId, _>(
@@ -252,18 +158,29 @@ pub(super) async fn build_state_incremental<'a>(
.state_added((last_sync_end_shortstatehash, timeline_end_shortstatehash))
.await?
.stream()
.ready_filter_map(|(_, shorteventid)| {
if state_events_in_timeline.contains(&shorteventid) {
None
} else {
Some(shorteventid)
}
}),
.map(at!(1)),
)
.ignore_err();
.ignore_err()
.ready_for_each(|event_id| {
state_event_ids.insert(event_id);
})
.await;
// finally, fetch the PDU contents and collect them into a vec
let state_diff_pdus = state_diff
if !use_state_after {
// If state_after isn't enabled, filter out state events which also exist
// in the timeline. If splits exist in the DAG, this may not be exactly the same
// thing as the state diff ending at the start of the timeline, but Synapse
// also does this and it's technically more useful behavior anyway.
// See: https://github.com/element-hq/synapse/issues/16941
for (_, pdu) in &timeline.pdus {
state_event_ids.remove(pdu.event_id());
}
}
// Finally, fetch the PDU contents and collect them into a vec
let state_diff_pdus = state_event_ids
.stream()
.broad_filter_map(|event_id| async move {
services
.rooms
+27 -7
View File
@@ -15,7 +15,7 @@ use conduwuit::{
BoolExt, FutureBoolExt, IterStream, ReadyExt, TryFutureExtExt,
future::ReadyEqExt,
math::{ruma_from_usize, usize_from_ruma},
stream::WidebandExt,
stream::{TryIgnore, WidebandExt},
},
warn,
};
@@ -41,6 +41,7 @@ use ruma::{
uint,
};
use service::account_data::AnyRawAccountDataEvent;
use tokio::pin;
use super::share_encrypted_room;
use crate::{
@@ -858,12 +859,31 @@ where
continue;
};
let since_shortstatehash = services
.rooms
.user
.get_token_shortstatehash(room_id, globalsince)
.await
.ok();
let since_shortstatehash = async {
pin! {
let pdus_rev = services
.rooms
.timeline
.pdus_rev(room_id, Some(PduCount::Normal(globalsince.saturating_sub(1))))
.ignore_err();
}
let (count, pdu_at_last_sync_end) = pdus_rev.next().await?;
if matches!(count, PduCount::Backfilled(_)) {
None
} else {
Some(
services
.rooms
.state_accessor
.pdu_shortstatehash(&pdu_at_last_sync_end.event_id)
.await
.expect("pdu should have a shortstatehash"),
)
}
}
.await;
let encrypted_room = services
.rooms
-1
View File
@@ -381,7 +381,6 @@ async fn handle_room(
.rooms
.event_handler
.handle_incoming_pdu(origin, room_id, &event_id, value, true)
.boxed()
.await
.map(|_| ());
results.push((event_id, result));
+2 -2
View File
@@ -375,7 +375,7 @@ pub struct Config {
#[serde(default = "default_max_request_size")]
pub max_request_size: usize,
/// default: 256
/// default: 192
#[serde(default = "default_max_fetch_prev_events")]
pub max_fetch_prev_events: u16,
@@ -2549,7 +2549,7 @@ fn default_pusher_timeout() -> u64 { 60 }
fn default_pusher_idle_timeout() -> u64 { 15 }
fn default_max_fetch_prev_events() -> u16 { 256_u16 }
fn default_max_fetch_prev_events() -> u16 { 192_u16 }
fn default_max_concurrent_inbound_transactions() -> usize { 150 }
+1
View File
@@ -21,6 +21,7 @@ pub fn versions() -> Vec<String> {
"v1.12".to_owned(),
"v1.13".to_owned(),
"v1.14".to_owned(),
"v1.16".to_owned(),
]
}
+1 -10
View File
@@ -187,22 +187,13 @@ pub(super) static MAPS: &[Descriptor] = &[
val_size_hint: Some(8),
..descriptor::RANDOM_SMALL
},
Descriptor {
name: "roomid_mindepth",
..descriptor::RANDOM_SMALL
},
Descriptor {
name: "roomserverids",
..descriptor::RANDOM_SMALL
},
Descriptor {
name: "roomsynctoken_shortstatehash",
file_shape: 3,
val_size_hint: Some(8),
block_size: 512,
compression_level: 3,
bottommost_level: Some(6),
..descriptor::SEQUENTIAL
..descriptor::DROPPED
},
Descriptor {
name: "roomuserdataid_accountdata",
@@ -1,456 +1,233 @@
use std::{
collections::{BTreeMap, HashMap, HashSet, VecDeque, hash_map},
collections::{BTreeMap, HashSet, VecDeque, hash_map},
time::Instant,
};
use assign::assign;
use conduwuit::{
Event, PduEvent, debug, debug_info, debug_warn, err, error,
matrix::event::gen_event_id_canonical_json,
state_res::lexicographical_topological_sort,
trace,
utils::{IterStream, continue_exponential_backoff_secs, stream::BroadbandExt},
warn,
Event, PduEvent, debug, debug_warn, implement, matrix::event::gen_event_id_canonical_json,
trace, utils::continue_exponential_backoff_secs, warn,
};
use futures::StreamExt;
use ruma::{
CanonicalJsonObject, CanonicalJsonValue, EventId, MilliSecondsSinceUnixEpoch, OwnedEventId,
RoomId, ServerName, UInt,
api::federation::event::{get_event, get_missing_events},
int,
CanonicalJsonValue, EventId, OwnedEventId, RoomId, ServerName,
api::federation::event::get_event,
};
use super::get_room_version_rules;
/// Attempts to build a localised directed acyclic graph out of the given PDUs,
/// returning them in a topologically sorted order.
/// Find the event and auth it. Once the event is validated (steps 1 - 8)
/// it is appended to the outliers Tree.
///
/// This is used to attempt to process PDUs in an order that respects their
/// dependencies, however it is ultimately the sender's responsibility to send
/// them in a processable order, so this is just a best effort attempt. It does
/// not account for power levels or other tie breaks.
pub async fn build_local_dag<S: std::hash::BuildHasher>(
pdu_map: &HashMap<OwnedEventId, CanonicalJsonObject, S>,
) -> conduwuit::Result<Vec<OwnedEventId>> {
debug_assert!(pdu_map.len() >= 2, "needless call to build_local_dag with less than 2 PDUs");
let mut dag: HashMap<OwnedEventId, HashSet<OwnedEventId>> =
HashMap::with_capacity(pdu_map.len());
let mut id_origin_ts: HashMap<OwnedEventId, _> = HashMap::with_capacity(pdu_map.len());
/// Returns pdu and if we fetched it over federation the raw json.
///
/// a. Look in the main timeline (pduid_pdu tree)
/// b. Look at outlier pdu tree
/// c. Ask origin server over federation
/// d. TODO: Ask other servers over federation?
#[implement(super::Service)]
pub(super) async fn fetch_and_handle_outliers<'a, Pdu, Events>(
&self,
origin: &'a ServerName,
events: Events,
create_event: &'a Pdu,
room_id: &'a RoomId,
) -> Vec<(PduEvent, Option<BTreeMap<String, CanonicalJsonValue>>)>
where
Pdu: Event + Send + Sync,
Events: Iterator<Item = &'a EventId> + Clone + Send,
{
let back_off = |id| match self
.services
.globals
.bad_event_ratelimiter
.write()
.entry(id)
{
| hash_map::Entry::Vacant(e) => {
e.insert((Instant::now(), 1));
},
| hash_map::Entry::Occupied(mut e) => {
*e.get_mut() = (Instant::now(), e.get().1.saturating_add(1));
},
};
for (event_id, value) in pdu_map {
// We already checked that these properties are correct in parse_incoming_pdu,
// so it's safe to unwrap here.
// We also filter to remove any prev_events that are not in this pdu_map, as we
// need to have at least one event with zero out degrees for the lexico-topo
// sort below. If there are multiple events with omitted prevs, they will be
// ordered by timestamp, then event ID. At that point though, it's unlikely to
// matter.
let prev_events = value
.get("prev_events")
.unwrap()
.as_array()
.unwrap()
.iter()
.map(|v| EventId::parse(v.as_str().unwrap()).unwrap())
.filter(|id| pdu_map.contains_key(id))
.collect();
let mut events_with_auth_events = Vec::with_capacity(events.clone().count());
trace!("Fetching {} outlier pdus", events.clone().count());
dag.insert(event_id.clone(), prev_events);
let origin_server_ts = value
.get("origin_server_ts")
.and_then(CanonicalJsonValue::as_integer)
.unwrap_or_default();
id_origin_ts.insert(event_id.clone(), origin_server_ts);
}
for id in events {
// a. Look in the main timeline (pduid_pdu tree)
// b. Look at outlier pdu tree
// (get_pdu_json checks both)
if let Ok(local_pdu) = self.services.timeline.get_pdu(id).await {
trace!("Found {id} in main timeline or outlier tree");
events_with_auth_events.push((id.to_owned(), Some(local_pdu), vec![]));
continue;
}
debug!(count = dag.len(), "Sorting incoming events with partial graph");
lexicographical_topological_sort(&dag, &async |node_id| {
// Note: we don't bother fetching power levels because that would massively slow
// this function down. This is a best-effort attempt to order events correctly
// for processing, however ultimately that should be the sender's job.
let ts = id_origin_ts
.get(&node_id)
.copied()
.unwrap_or_else(|| int!(0))
.to_string()
.parse::<u64>()
.ok()
.and_then(UInt::new)
.unwrap_or_default();
Ok((int!(0), MilliSecondsSinceUnixEpoch(ts)))
})
.await
.inspect(|sorted| {
debug_assert_eq!(
sorted.len(),
pdu_map.len(),
"Sorted graph was not the same size as the input graph"
);
})
.map_err(|e| err!("failed to resolve local graph: {e}"))
}
// c. Ask origin server over federation
// We also handle its auth chain here so we don't get a stack overflow in
// handle_outlier_pdu.
let mut todo_auth_events: VecDeque<_> = [id.to_owned()].into();
let mut events_in_reverse_order = Vec::with_capacity(todo_auth_events.len());
impl super::Service {
/// Uses `/_matrix/federation/v1/get_missing_events` to fill gaps in the
/// DAG.
///
/// When this function is called, the "earliest events" (current forward
/// extremities) will be collected, and the function will loop with an
/// exponentially incrementing limit (up to 100 per request) until it has
/// filled the gap, i.e. when the remote says there's no more events.
///
/// This function will iterate until the remote returns no more events,
/// increasing the limit by a factor of 10. If 100 iterations are reached or
/// max_fetch_prev_events events are backfilled, the function will give up
/// and return what it has, to avoid pulling in too much data (for example,
/// absurdly large gaps).
///
/// This function does not persist the events. The caller is responsible for
/// passing them through handle_incoming_pdu.
///
/// ## Parameters
///
/// - `room_id`: The room's ID.
/// - `head`: The event we are potentially missing prev_events for.
/// - `tail`: The most recently known events in the graph (typically forward
/// extremities).
/// - `via`: The server to ask for missing events.
/// - `min_depth`: Don't process events with a `depth` lower than this
/// value. Not massively useful, but can help short-circuit infinite loops
/// and weird edge paths.
pub async fn get_missing_events(
&self,
room_id: &RoomId,
head: &PduEvent,
tail: Vec<OwnedEventId>,
via: &ServerName,
min_depth: UInt,
) -> conduwuit::Result<HashMap<OwnedEventId, PduEvent>> {
#[cfg(debug_assertions)]
{
let missing_count = head
.prev_events()
.stream()
.broad_filter_map(|event_id| async move {
match self
.services
.timeline
.get_non_outlier_pdu_json(event_id)
.await
.inspect(|_| debug!("Found prev_event {event_id} locally."))
.inspect_err(
|e| debug!(%e, "Could not find prev_event {event_id} locally."),
) {
| Ok(_) => None,
| Err(_) => Some(event_id),
}
})
.count()
.await;
debug_assert_ne!(
missing_count, 0,
"event passed to get_missing_events is not missing any events (wasteful call)"
);
};
let mut discovered = HashMap::with_capacity(20);
let mut latest_events = vec![head.event_id().to_owned()];
let mut iterations = 0_u8;
loop {
iterations = iterations.saturating_add(1);
let limit = iterations.saturating_mul(10).min(100);
debug_info!(%limit, %via, %iterations, discovered=discovered.len(), %min_depth, "Attempting to gap fill missing events");
let response: get_missing_events::v1::Response = self
let mut events_all = HashSet::with_capacity(todo_auth_events.len());
while let Some(next_id) = todo_auth_events.pop_front() {
if let Some((time, tries)) = self
.services
.sending
.send_federation_request(
via,
assign!(
get_missing_events::v1::Request::new(
room_id.to_owned(),
tail.clone(),
latest_events.clone()
),
{limit: limit.into(), min_depth}
),
)
.await?;
if response.events.is_empty() {
debug_info!(%via, "Finished gap filling missing events (remote returned no more events).");
break;
}
debug_info!("Got {} events back from remote", response.events.len());
latest_events.clear();
for raw_event in response.events {
let (_, event_id, pdu_json) = self.parse_incoming_pdu(&raw_event).await?;
let pdu = PduEvent::from_id_val(&event_id, pdu_json).map_err(|e| {
err!(Request(BadJson("Failed to parse backfilled event {event_id}: {e}")))
})?;
if pdu.depth < min_depth {
.globals
.bad_event_ratelimiter
.read()
.get(&*next_id)
{
// Exponential backoff
const MIN_DURATION: u64 = 60 * 2;
const MAX_DURATION: u64 = 60 * 60 * 8;
if continue_exponential_backoff_secs(
MIN_DURATION,
MAX_DURATION,
time.elapsed(),
*tries,
) {
debug_warn!(
"Received PDU with depth {} below min_depth {}, ignoring",
pdu.depth,
min_depth
tried = ?*tries,
elapsed = ?time.elapsed(),
"Backing off from {next_id}",
);
continue;
}
for prev_event_id in pdu.prev_events() {
if discovered.contains_key(prev_event_id) {
continue;
}
if self
.services
.timeline
.non_outlier_pdu_exists(prev_event_id)
.await
{
continue;
}
latest_events.push(prev_event_id.to_owned());
break;
}
discovered.insert(event_id.clone(), pdu);
}
if latest_events.is_empty() {
break;
} else if discovered.len() > self.services.server.config.max_fetch_prev_events.into()
|| iterations >= 20
{
error!(
filled=discovered.len(),
max_fetch_prev_events=self.services.server.config.max_fetch_prev_events,
%iterations,
"Gap too large, giving up"
);
break;
}
}
Ok(discovered)
}
/// Find the event and auth it. Once the event is validated (steps 1 - 8)
/// it is appended to the outliers Tree.
///
/// Returns pdu and if we fetched it over federation the raw json.
///
/// a. Look in the main timeline (pduid_pdu tree)
/// b. Look at outlier pdu tree
/// c. Ask origin server over federation
/// d. TODO: Ask other servers over federation?
#[deprecated]
pub(super) async fn fetch_and_handle_outliers<'a, Pdu, Events>(
&self,
origin: &'a ServerName,
events: Events,
create_event: &'a Pdu,
room_id: &'a RoomId,
) -> Vec<(PduEvent, Option<BTreeMap<String, CanonicalJsonValue>>)>
where
Pdu: Event + Send + Sync,
Events: Iterator<Item = &'a EventId> + Clone + Send,
{
let back_off = |id| match self
.services
.globals
.bad_event_ratelimiter
.write()
.entry(id)
{
| hash_map::Entry::Vacant(e) => {
e.insert((Instant::now(), 1));
},
| hash_map::Entry::Occupied(mut e) => {
*e.get_mut() = (Instant::now(), e.get().1.saturating_add(1));
},
};
let mut events_with_auth_events = Vec::with_capacity(events.clone().count());
trace!("Fetching {} outlier pdus", events.clone().count());
for id in events {
// a. Look in the main timeline (pduid_pdu tree)
// b. Look at outlier pdu tree
// (get_pdu_json checks both)
if let Ok(local_pdu) = self.services.timeline.get_pdu(id).await {
trace!("Found {id} in main timeline or outlier tree");
events_with_auth_events.push((id.to_owned(), Some(local_pdu), vec![]));
if events_all.contains(&next_id) {
continue;
}
// c. Ask origin server over federation
// We also handle its auth chain here so we don't get a stack overflow in
// handle_outlier_pdu.
let mut todo_auth_events: VecDeque<_> = [id.to_owned()].into();
let mut events_in_reverse_order = Vec::with_capacity(todo_auth_events.len());
let mut events_all = HashSet::with_capacity(todo_auth_events.len());
while let Some(next_id) = todo_auth_events.pop_front() {
if let Some((time, tries)) = self
.services
.globals
.bad_event_ratelimiter
.read()
.get(&*next_id)
{
// Exponential backoff
const MIN_DURATION: u64 = 60 * 2;
const MAX_DURATION: u64 = 60 * 60 * 8;
if continue_exponential_backoff_secs(
MIN_DURATION,
MAX_DURATION,
time.elapsed(),
*tries,
) {
debug_warn!(
tried = ?*tries,
elapsed = ?time.elapsed(),
"Backing off from {next_id}",
);
continue;
}
}
if events_all.contains(&next_id) {
continue;
}
if self.services.timeline.pdu_exists(&next_id).await {
trace!("Found {next_id} in db");
continue;
}
debug!("Fetching {next_id} over federation from {origin}.");
match self
.services
.sending
.send_federation_request(
origin,
get_event::v1::Request::new((*next_id).to_owned()),
)
.await
{
| Ok(res) => {
debug!("Got {next_id} over federation from {origin}");
let Ok(room_version_rules) = get_room_version_rules(create_event) else {
back_off((*next_id).to_owned());
continue;
};
let Ok((calculated_event_id, value)) =
gen_event_id_canonical_json(&res.pdu, &room_version_rules)
else {
back_off((*next_id).to_owned());
continue;
};
if calculated_event_id != *next_id {
warn!(
"Server didn't return event id we requested: requested: \
{next_id}, we got {calculated_event_id}. Event: {:?}",
&res.pdu
);
}
if let Some(auth_events) = value
.get("auth_events")
.and_then(CanonicalJsonValue::as_array)
{
for auth_event in auth_events {
match serde_json::from_value::<OwnedEventId>(
auth_event.clone().into(),
) {
| Ok(auth_event) => {
trace!(
"Found auth event id {auth_event} for event \
{next_id}"
);
todo_auth_events.push_back(auth_event);
},
| _ => {
warn!("Auth event id is not valid");
},
}
}
} else {
warn!("Auth event list invalid");
}
events_in_reverse_order.push((next_id.clone(), value));
events_all.insert(next_id);
},
| Err(e) => {
warn!("Failed to fetch auth event {next_id} from {origin}: {e}");
back_off((*next_id).to_owned());
},
}
if self.services.timeline.pdu_exists(&next_id).await {
trace!("Found {next_id} in db");
continue;
}
events_with_auth_events.push((id.to_owned(), None, events_in_reverse_order));
}
let mut pdus = Vec::with_capacity(events_with_auth_events.len());
for (id, local_pdu, events_in_reverse_order) in events_with_auth_events {
// a. Look in the main timeline (pduid_pdu tree)
// b. Look at outlier pdu tree
// (get_pdu_json checks both)
if let Some(local_pdu) = local_pdu {
trace!("Found {id} in main timeline or outlier tree");
pdus.push((local_pdu.clone(), None));
}
for (next_id, value) in events_in_reverse_order.into_iter().rev() {
if let Some((time, tries)) = self
.services
.globals
.bad_event_ratelimiter
.read()
.get(&*next_id)
{
// Exponential backoff
const MIN_DURATION: u64 = 5 * 60;
const MAX_DURATION: u64 = 60 * 60 * 24;
if continue_exponential_backoff_secs(
MIN_DURATION,
MAX_DURATION,
time.elapsed(),
*tries,
) {
debug!("Backing off from {next_id}");
continue;
}
}
trace!("Handling outlier {next_id}");
match Box::pin(self.handle_outlier_pdu(
debug!("Fetching {next_id} over federation from {origin}.");
match self
.services
.sending
.send_federation_request(
origin,
create_event,
&next_id,
room_id,
value.clone(),
true,
))
get_event::v1::Request::new((*next_id).to_owned()),
)
.await
{
| Ok((pdu, json)) =>
if next_id == *id {
trace!("Handled outlier {next_id} (original request)");
pdus.push((pdu, Some(json)));
},
| Err(e) => {
warn!("Authentication of event {next_id} failed: {e:?}");
back_off(next_id);
},
}
{
| Ok(res) => {
debug!("Got {next_id} over federation from {origin}");
let Ok(room_version_rules) = get_room_version_rules(create_event) else {
back_off((*next_id).to_owned());
continue;
};
let Ok((calculated_event_id, value)) =
gen_event_id_canonical_json(&res.pdu, &room_version_rules)
else {
back_off((*next_id).to_owned());
continue;
};
if calculated_event_id != *next_id {
warn!(
"Server didn't return event id we requested: requested: {next_id}, \
we got {calculated_event_id}. Event: {:?}",
&res.pdu
);
}
if let Some(auth_events) = value
.get("auth_events")
.and_then(CanonicalJsonValue::as_array)
{
for auth_event in auth_events {
match serde_json::from_value::<OwnedEventId>(
auth_event.clone().into(),
) {
| Ok(auth_event) => {
trace!(
"Found auth event id {auth_event} for event {next_id}"
);
todo_auth_events.push_back(auth_event);
},
| _ => {
warn!("Auth event id is not valid");
},
}
}
} else {
warn!("Auth event list invalid");
}
events_in_reverse_order.push((next_id.clone(), value));
events_all.insert(next_id);
},
| Err(e) => {
warn!("Failed to fetch auth event {next_id} from {origin}: {e}");
back_off((*next_id).to_owned());
},
}
}
trace!("Fetched and handled {} outlier pdus", pdus.len());
pdus
events_with_auth_events.push((id.to_owned(), None, events_in_reverse_order));
}
let mut pdus = Vec::with_capacity(events_with_auth_events.len());
for (id, local_pdu, events_in_reverse_order) in events_with_auth_events {
// a. Look in the main timeline (pduid_pdu tree)
// b. Look at outlier pdu tree
// (get_pdu_json checks both)
if let Some(local_pdu) = local_pdu {
trace!("Found {id} in main timeline or outlier tree");
pdus.push((local_pdu.clone(), None));
}
for (next_id, value) in events_in_reverse_order.into_iter().rev() {
if let Some((time, tries)) = self
.services
.globals
.bad_event_ratelimiter
.read()
.get(&*next_id)
{
// Exponential backoff
const MIN_DURATION: u64 = 5 * 60;
const MAX_DURATION: u64 = 60 * 60 * 24;
if continue_exponential_backoff_secs(
MIN_DURATION,
MAX_DURATION,
time.elapsed(),
*tries,
) {
debug!("Backing off from {next_id}");
continue;
}
}
trace!("Handling outlier {next_id}");
match Box::pin(self.handle_outlier_pdu(
origin,
create_event,
&next_id,
room_id,
value.clone(),
true,
))
.await
{
| Ok((pdu, json)) =>
if next_id == *id {
trace!("Handled outlier {next_id} (original request)");
pdus.push((pdu, Some(json)));
},
| Err(e) => {
warn!("Authentication of event {next_id} failed: {e:?}");
back_off(next_id);
},
}
}
}
trace!("Fetched and handled {} outlier pdus", pdus.len());
pdus
}
+115 -83
View File
@@ -1,96 +1,128 @@
use std::collections::HashMap;
use std::{
collections::{BTreeMap, HashMap, HashSet, VecDeque},
iter::once,
};
use conduwuit::{
Event, PduEvent, debug, debug_info,
utils::{BoolExt, IterStream, stream::BroadbandExt},
warn,
Event, PduEvent, Result, debug_warn, err, implement,
state_res::{self},
};
use futures::{FutureExt, future};
use ruma::{
CanonicalJsonValue, EventId, MilliSecondsSinceUnixEpoch, OwnedEventId, RoomId, ServerName,
int, uint,
};
use futures::StreamExt;
use ruma::{RoomId, ServerName};
use crate::rooms::event_handler::build_local_dag;
use super::check_room_id;
impl super::Service {
/// Fetches any missing prev_events for this event and persists them before
/// returning.
pub(super) async fn fetch_prevs(
&self,
room_id: &RoomId,
create_event: &PduEvent,
incoming_pdu: &PduEvent,
origin: &ServerName,
) -> conduwuit::Result<()> {
let missing = incoming_pdu
.prev_events()
.stream()
.broad_filter_map(|event_id| async move {
self.services
.timeline
.get_non_outlier_pdu_json(event_id)
.await
.is_ok()
.or(|| event_id.to_owned())
})
.collect::<Vec<_>>()
.await;
if missing.is_empty() {
debug!(event_id=%incoming_pdu.event_id(), "No missing prev events.");
return Ok(());
}
debug!(%room_id, event_id=%incoming_pdu.event_id(), ?missing, "Fetching previous events");
let tail = self
.services
.state
.get_forward_extremities(room_id)
.collect::<Vec<_>>()
.await;
#[implement(super::Service)]
#[tracing::instrument(
level = "debug",
skip_all,
fields(%origin),
)]
#[allow(clippy::type_complexity)]
pub(super) async fn fetch_prev<'a, Pdu, Events>(
&self,
origin: &ServerName,
create_event: &Pdu,
room_id: &RoomId,
first_ts_in_room: MilliSecondsSinceUnixEpoch,
initial_set: Events,
) -> Result<(
Vec<OwnedEventId>,
HashMap<OwnedEventId, (PduEvent, BTreeMap<String, CanonicalJsonValue>)>,
)>
where
Pdu: Event + Send + Sync,
Events: Iterator<Item = &'a EventId> + Clone + Send,
{
let num_ids = initial_set.clone().count();
let mut eventid_info = HashMap::new();
let mut graph: HashMap<OwnedEventId, _> = HashMap::with_capacity(num_ids);
let mut todo_outlier_stack: VecDeque<OwnedEventId> =
initial_set.map(ToOwned::to_owned).collect();
let backfilled = self
.get_missing_events(
room_id,
incoming_pdu,
tail,
let mut amount = 0;
while let Some(prev_event_id) = todo_outlier_stack.pop_front() {
self.services.server.check_running()?;
match self
.fetch_and_handle_outliers(
origin,
self.services.metadata.get_mindepth(room_id).await,
once(prev_event_id.as_ref()),
create_event,
room_id,
)
.await?;
debug_info!("Fetched {} missing events", backfilled.len());
.boxed()
.await
.pop()
{
| Some((pdu, mut json_opt)) => {
check_room_id(room_id, &pdu)?;
// Persist all fetched events
let mapped = backfilled
.iter()
.map(|(eid, evt)| {
let mut obj = evt.to_canonical_object();
obj.remove("event_id"); // event_id is inserted by backfill_missing_events
(eid.clone(), obj)
})
.collect::<HashMap<_, _>>();
let to_persist = if mapped.len() <= 1 {
mapped.keys().map(ToOwned::to_owned).collect()
} else {
build_local_dag(&mapped).await?
};
for event_id in to_persist {
debug_info!("Persisting fetched prev event {event_id}");
let obj = mapped.get(&event_id).cloned().unwrap();
match self
.handle_outlier_pdu(origin, create_event, &event_id, room_id, obj, false)
.await
{
| Ok((pdu, val)) =>
self.upgrade_outlier_to_timeline_pdu(pdu, val, create_event, origin, room_id)
.await,
| Err(e) => {
warn!("Failed to persist prev_event {event_id}: {e}");
let limit = self.services.server.config.max_fetch_prev_events;
if amount > limit {
debug_warn!("Max prev event limit reached! Limit: {limit}");
graph.insert(prev_event_id.clone(), HashSet::new());
continue;
},
}?;
}
}
// NOTE because i keep forgetting: the caller persists incoming_pdu.
// we only care about its prev events
Ok(())
if json_opt.is_none() {
json_opt = self
.services
.outlier
.get_outlier_pdu_json(&prev_event_id)
.await
.ok();
}
if let Some(json) = json_opt {
if pdu.origin_server_ts() > first_ts_in_room {
amount = amount.saturating_add(1);
for prev_prev in pdu.prev_events() {
if !graph.contains_key(prev_prev) {
todo_outlier_stack.push_back(prev_prev.to_owned());
}
}
graph.insert(
prev_event_id.clone(),
pdu.prev_events().map(ToOwned::to_owned).collect(),
);
} else {
// Time based check failed
graph.insert(prev_event_id.clone(), HashSet::new());
}
eventid_info.insert(prev_event_id.clone(), (pdu, json));
} else {
// Get json failed, so this was not fetched over federation
graph.insert(prev_event_id.clone(), HashSet::new());
}
},
| _ => {
// Fetch and handle failed
graph.insert(prev_event_id.clone(), HashSet::new());
},
}
}
let event_fetch = |event_id| {
let origin_server_ts = eventid_info
.get(&event_id)
.map_or_else(|| uint!(0), |info| info.0.origin_server_ts().get());
// This return value is the key used for sorting events,
// events are then sorted by power level, time,
// and lexically by event_id.
future::ok((int!(0), MilliSecondsSinceUnixEpoch(origin_server_ts)))
};
let sorted = state_res::lexicographical_topological_sort(&graph, &event_fetch)
.await
.map_err(|e| err!(Database(error!("Error sorting prev events: {e}"))))?;
Ok((sorted, eventid_info))
}
@@ -1,6 +1,7 @@
use std::collections::{HashMap, hash_map};
use conduwuit::{Err, Event, Result, debug, debug_warn, err, implement};
use futures::FutureExt;
use ruma::{
EventId, OwnedEventId, RoomId, ServerName, api::federation::event::get_room_state_ids,
events::StateEventType,
@@ -41,6 +42,7 @@ where
let state_ids = res.pdu_ids.iter().map(AsRef::as_ref);
let state_vec = self
.fetch_and_handle_outliers(origin, state_ids, create_event, room_id)
.boxed()
.await;
let mut state: HashMap<ShortStateKey, OwnedEventId> = HashMap::with_capacity(state_vec.len());
@@ -1,11 +1,14 @@
use std::{collections::BTreeMap, time::Instant};
use std::{
collections::{BTreeMap, hash_map},
time::Instant,
};
use conduwuit::{
Err, Event, PduEvent, Result, debug::INFO_SPAN_LEVEL, debug_error, debug_info, defer, err,
implement, info, trace, warn,
implement, info, trace, utils::stream::IterStream, warn,
};
use futures::{
FutureExt,
FutureExt, TryFutureExt, TryStreamExt,
future::{OptionFuture, try_join4},
};
use ruma::{
@@ -233,21 +236,63 @@ pub async fn handle_incoming_pdu<'a>(
}
// Skip old events
// let first_ts_in_room = self
// .services
// .timeline
// .first_pdu_in_room(room_id)
// .await?
// .origin_server_ts();
let first_ts_in_room = self
.services
.timeline
.first_pdu_in_room(room_id)
.await?
.origin_server_ts();
// 9. Fetch any missing prev events doing all checks listed here starting at 1.
// These are timeline events
debug!("Handling previous events");
let (sorted_prev_events, mut eventid_info) = self
.fetch_prev(origin, create_event, room_id, first_ts_in_room, incoming_pdu.prev_events())
.await?;
self.fetch_prevs(room_id, create_event, &incoming_pdu, origin)
debug!(
events = ?sorted_prev_events,
"Handling previous events"
);
sorted_prev_events
.iter()
.try_stream()
.map_ok(AsRef::as_ref)
.try_for_each(|prev_id| {
self.handle_prev_pdu(
origin,
event_id,
room_id,
eventid_info.remove(prev_id),
create_event,
first_ts_in_room,
prev_id,
)
.inspect_err(move |e| {
warn!("Prev {prev_id} failed: {e}");
match self
.services
.globals
.bad_event_ratelimiter
.write()
.entry(prev_id.into())
{
| hash_map::Entry::Vacant(e) => {
e.insert((Instant::now(), 1));
},
| hash_map::Entry::Occupied(mut e) => {
let tries = e.get().1.saturating_add(1);
*e.get_mut() = (Instant::now(), tries);
},
}
})
.map(|_| self.services.server.check_running())
})
.boxed()
.await?;
// Done with prev events, now handling the incoming event
self.upgrade_outlier_to_timeline_pdu(incoming_pdu, val, create_event, origin, room_id)
.boxed()
.await
}
@@ -1,13 +1,13 @@
use std::collections::{BTreeMap, HashMap, hash_map};
use conduwuit::{
Err, Event, PduEvent, Result, debug, debug_info, debug_warn, err, implement, info, state_res,
Err, Event, PduEvent, Result, debug, debug_info, debug_warn, err, implement, state_res,
trace, warn,
};
use futures::future::ready;
use ruma::{
CanonicalJsonObject, CanonicalJsonValue, EventId, OwnedEventId, RoomId, ServerName,
api::federation::authorization::get_event_authorization, events::StateEventType,
events::StateEventType,
};
use super::{check_room_id, get_room_version_rules};
@@ -22,7 +22,7 @@ pub(super) async fn handle_outlier_pdu<'a, Pdu>(
event_id: &'a EventId,
room_id: &'a RoomId,
mut value: CanonicalJsonObject,
_auth_events_known: bool,
auth_events_known: bool,
) -> Result<(PduEvent, BTreeMap<String, CanonicalJsonValue>)>
where
Pdu: Event + Send + Sync,
@@ -107,52 +107,45 @@ where
}
// Fetch any missing ones & reject invalid ones
if auth_events.len() != pdu_event.auth_events().count() {
info!("Missing some auth events, asking remote for auth chain");
let response: get_event_authorization::v1::Response = self
.services
.sending
.send_federation_request(
let missing_auth_events = if auth_events_known {
pdu_event
.auth_events()
.filter(|id| !auth_events.contains_key(*id))
.collect::<Vec<_>>()
} else {
pdu_event.auth_events().collect::<Vec<_>>()
};
if !missing_auth_events.is_empty() || !auth_events_known {
debug_info!(
"Fetching {} missing auth events for outlier event {event_id}",
missing_auth_events.len()
);
for (pdu, _) in self
.fetch_and_handle_outliers(
origin,
get_event_authorization::v1::Request::new(
room_id.to_owned(),
event_id.to_owned(),
),
missing_auth_events.iter().copied(),
create_event,
room_id,
)
.await
.map_err(|e| {
err!(Request(Forbidden(
"Remote server is not divulging incoming event's auth chain: {e}"
)))
})?;
let mut auth_chain_map = HashMap::with_capacity(response.auth_chain.len());
for auth_pdu_json in response.auth_chain {
let (auth_event_room_id, auth_event_id, auth_pdu_json) =
self.parse_incoming_pdu(&auth_pdu_json).await?;
if auth_event_room_id != room_id {
return Err!(Request(BadJson(
"Auth event {auth_event_id} is in {auth_event_room_id}, not {room_id}."
)));
}
let auth_pdu = PduEvent::from_id_val(&auth_event_id, auth_pdu_json)
.map_err(|e| err!(Request(BadJson("Invalid PDU {auth_event_id}: {e}"))))?;
auth_chain_map.insert(auth_event_id, auth_pdu);
{
auth_events.insert(pdu.event_id().to_owned(), pdu);
}
for aid in pdu_event.auth_events() {
if auth_events.contains_key(aid) {
continue;
}
if let Some(auth_event) = auth_chain_map.get(aid) {
auth_events.insert(aid.to_owned(), auth_event.clone());
} else {
return Err!(Request(Forbidden(
"Remote server is not divulging incoming event's auth events (missing: \
{aid})"
)));
}
}
// TODO: do events received from auth chain need persisting? that sounds
// awfully slow
} else {
debug!("No missing auth events for outlier event {event_id}");
}
// reject if we are still missing some
let still_missing = pdu_event
.auth_events()
.filter(|id| !auth_events.contains_key(*id))
.collect::<Vec<_>>();
if !still_missing.is_empty() {
// Don't reject: this could be a temporary condition
// TODO: use get_missing_events?
return Err!(Request(InvalidParam(
"Could not fetch all auth events for outlier event {event_id}, still missing: \
{still_missing:?}"
)));
}
// 6. Reject "due to auth events" if the event doesn't pass auth based on the
@@ -0,0 +1,89 @@
use std::{collections::BTreeMap, time::Instant};
use conduwuit::{
Err, Event, PduEvent, Result, debug::INFO_SPAN_LEVEL, defer, implement,
utils::continue_exponential_backoff_secs,
};
use ruma::{CanonicalJsonValue, EventId, MilliSecondsSinceUnixEpoch, RoomId, ServerName};
use tracing::debug;
#[implement(super::Service)]
#[allow(clippy::type_complexity)]
#[allow(clippy::too_many_arguments)]
#[tracing::instrument(
name = "prev",
level = INFO_SPAN_LEVEL,
skip_all,
fields(%prev_id),
)]
pub(super) async fn handle_prev_pdu<'a, Pdu>(
&self,
origin: &'a ServerName,
event_id: &'a EventId,
room_id: &'a RoomId,
eventid_info: Option<(PduEvent, BTreeMap<String, CanonicalJsonValue>)>,
create_event: &'a Pdu,
first_ts_in_room: MilliSecondsSinceUnixEpoch,
prev_id: &'a EventId,
) -> Result
where
Pdu: Event + Send + Sync,
{
// Check for disabled again because it might have changed
if self.services.metadata.is_disabled(room_id).await {
return Err!(Request(Forbidden(debug_warn!(
"Federaton of room {room_id} is currently disabled on this server. Request by \
origin {origin} and event ID {event_id}"
))));
}
if let Some((time, tries)) = self
.services
.globals
.bad_event_ratelimiter
.read()
.get(prev_id)
{
// Exponential backoff
const MIN_DURATION: u64 = 5 * 60;
const MAX_DURATION: u64 = 60 * 60 * 24;
if continue_exponential_backoff_secs(MIN_DURATION, MAX_DURATION, time.elapsed(), *tries) {
debug!(
?tries,
duration = ?time.elapsed(),
"Backing off from prev_event"
);
return Ok(());
}
}
let Some((pdu, json)) = eventid_info else {
return Ok(());
};
// Skip old events
if pdu.origin_server_ts() < first_ts_in_room {
return Ok(());
}
let start_time = Instant::now();
self.federation_handletime
.write()
.insert(room_id.into(), ((*prev_id).to_owned(), start_time));
defer! {{
self.federation_handletime
.write()
.remove(room_id);
}};
self.upgrade_outlier_to_timeline_pdu(pdu, json, create_event, origin, room_id)
.await?;
debug!(
elapsed = ?start_time.elapsed(),
"Handled prev_event",
);
Ok(())
}
+2 -1
View File
@@ -4,6 +4,7 @@ mod fetch_prev;
mod fetch_state;
mod handle_incoming_pdu;
mod handle_outlier_pdu;
mod handle_prev_pdu;
mod parse_incoming_pdu;
mod policy_server;
mod resolve_state;
@@ -14,7 +15,6 @@ use std::{collections::HashMap, fmt::Write, sync::Arc, time::Instant};
use async_trait::async_trait;
use conduwuit::{Err, Event, PduEvent, Result, Server, SyncRwLock, utils::MutexMap};
pub use fetch_and_handle_outliers::build_local_dag;
use ruma::{
OwnedEventId, OwnedRoomId, RoomId, events::room::create::RoomCreateEventContent,
room_version_rules::RoomVersionRules,
@@ -22,6 +22,7 @@ use ruma::{
use tokio::sync::Notify;
use crate::{Dep, globals, rooms, sending, server_keys};
pub struct Service {
pub mutex_federation: RoomMutexMap,
pub federation_handletime: SyncRwLock<HandleTimeMap>,
@@ -56,10 +56,7 @@ fn extract_room_id(event_type: &str, pdu: &CanonicalJsonObject) -> Result<OwnedR
/// Parses every entry in an array as an event ID, returning an error if any
/// step fails.
pub(super) fn expect_event_id_array(
value: &CanonicalJsonObject,
field: &str,
) -> Result<Vec<OwnedEventId>> {
fn expect_event_id_array(value: &CanonicalJsonObject, field: &str) -> Result<Vec<OwnedEventId>> {
value
.get(field)
.ok_or_else(|| err!(Request(BadJson("missing field `{field}` on PDU"))))?
@@ -5,7 +5,7 @@ use std::{
};
use conduwuit::{
Result, debug, debug_error, err, error, implement,
Result, debug, err, error, implement,
matrix::{Event, StateMap},
trace,
utils::stream::{BroadbandExt, IterStream, ReadyExt, TryBroadbandExt, TryWidebandExt},
@@ -37,7 +37,6 @@ where
.pdu_shortstatehash(prev_event)
.await
else {
trace!("No shortstatehash for {prev_event}, cannot calculate one-degree state.");
return Ok(None);
};
@@ -100,7 +99,6 @@ where
.map_ok(move |sstatehash| (sstatehash, prev_event))
})
.try_collect::<HashMap<_, _>>()
.inspect_err(|e| debug_error!("failed to calculate N-degree short state hashes: {e}"))
.await
else {
return Ok(None);
@@ -41,7 +41,6 @@ where
.get_pdu_id(incoming_pdu.event_id())
.await
{
trace!(event_id=%incoming_pdu.event_id(), "Skipping upgrade of already upgraded PDU");
return Ok(Some(pduid));
}
@@ -64,7 +63,6 @@ where
"Upgrading PDU from outlier to timeline"
);
let timer = Instant::now();
let min_depth = self.services.metadata.get_mindepth(room_id).await;
let room_version_rules = get_room_version_rules(create_event)?;
// 10. Fetch missing state and auth chain events by calling /state_ids at
@@ -83,7 +81,6 @@ where
};
if state_at_incoming_event.is_none() {
trace!("Could not calculate incoming state, asking remote {origin} for it");
state_at_incoming_event = self
.fetch_state(origin, create_event, room_id, incoming_pdu.event_id())
.await?;
@@ -385,11 +382,6 @@ where
// Event has passed all auth/stateres checks
drop(state_lock);
if incoming_pdu.depth > min_depth {
self.services
.metadata
.set_mindepth(room_id, incoming_pdu.depth.into());
}
Ok(pdu_id)
}
-4
View File
@@ -626,10 +626,6 @@ impl Service {
room_id,
)
.await?;
self.services
.metadata
.maybe_set_mindepth(room_id, parsed_join_pdu.depth.into())
.await;
info!("Setting final room state for new room");
// We set the room state after inserting the pdu, so that we never have a moment
+2 -28
View File
@@ -1,9 +1,9 @@
use std::sync::Arc;
use conduwuit::{Result, implement, utils::stream::TryIgnore};
use database::{Deserialized, Map};
use database::Map;
use futures::{Stream, StreamExt};
use ruma::{OwnedRoomId, RoomId, UInt, uint};
use ruma::{OwnedRoomId, RoomId};
use crate::{Dep, rooms};
@@ -17,7 +17,6 @@ struct Data {
bannedroomids: Arc<Map>,
roomid_shortroomid: Arc<Map>,
pduid_pdu: Arc<Map>,
roomid_mindepth: Arc<Map>,
}
struct Services {
@@ -32,7 +31,6 @@ impl crate::Service for Service {
bannedroomids: args.db["bannedroomids"].clone(),
roomid_shortroomid: args.db["roomid_shortroomid"].clone(),
pduid_pdu: args.db["pduid_pdu"].clone(),
roomid_mindepth: args.db["roomid_mindepth"].clone(),
},
services: Services {
short: args.depend::<rooms::short::Service>("rooms::short"),
@@ -100,27 +98,3 @@ pub async fn is_disabled(&self, room_id: &RoomId) -> bool {
pub async fn is_banned(&self, room_id: &RoomId) -> bool {
self.db.bannedroomids.get(room_id).await.is_ok()
}
#[implement(Service)]
pub async fn get_mindepth(&self, room_id: &RoomId) -> UInt {
self.db
.roomid_mindepth
.get(room_id)
.await
.deserialized::<UInt>()
.unwrap_or_else(|_| uint!(0))
}
#[implement(Service)]
pub fn set_mindepth(&self, room_id: &RoomId, min_depth: u64) {
self.db
.roomid_mindepth
.put_raw(room_id.as_bytes(), min_depth.to_be_bytes());
}
#[implement(Service)]
pub async fn maybe_set_mindepth(&self, room_id: &RoomId, min_depth: u64) {
if min_depth > self.get_mindepth(room_id).await.into() {
self.set_mindepth(room_id, min_depth);
}
}
-4
View File
@@ -173,10 +173,6 @@ impl Service {
self.db.get_non_outlier_pdu_json(event_id).await
}
pub async fn non_outlier_pdu_exists(&self, event_id: &EventId) -> bool {
self.db.non_outlier_pdu_exists(event_id).await.is_ok()
}
/// Returns the pdu's id.
#[inline]
pub async fn get_pdu_id(&self, event_id: &EventId) -> Result<RawPduId> {
+2 -46
View File
@@ -1,10 +1,10 @@
use std::sync::Arc;
use conduwuit::{Result, implement};
use database::{Database, Deserialized, Map};
use database::{Deserialized, Map};
use ruma::{RoomId, UserId};
use crate::{Dep, globals, rooms, rooms::short::ShortStateHash};
use crate::{Dep, globals};
pub struct Service {
db: Data,
@@ -12,32 +12,25 @@ pub struct Service {
}
struct Data {
db: Arc<Database>,
userroomid_notificationcount: Arc<Map>,
userroomid_highlightcount: Arc<Map>,
roomuserid_lastnotificationread: Arc<Map>,
roomsynctoken_shortstatehash: Arc<Map>,
}
struct Services {
globals: Dep<globals::Service>,
short: Dep<rooms::short::Service>,
}
impl crate::Service for Service {
fn build(args: crate::Args<'_>) -> Result<Arc<Self>> {
Ok(Arc::new(Self {
db: Data {
db: args.db.clone(),
userroomid_notificationcount: args.db["userroomid_notificationcount"].clone(),
userroomid_highlightcount: args.db["userroomid_highlightcount"].clone(),
roomuserid_lastnotificationread: args.db["userroomid_highlightcount"].clone(),
roomsynctoken_shortstatehash: args.db["roomsynctoken_shortstatehash"].clone(),
},
services: Services {
globals: args.depend::<globals::Service>("globals"),
short: args.depend::<rooms::short::Service>("rooms::short"),
},
}))
}
@@ -90,40 +83,3 @@ pub async fn last_notification_read(&self, user_id: &UserId, room_id: &RoomId) -
.deserialized()
.unwrap_or(0)
}
#[implement(Service)]
pub async fn associate_token_shortstatehash(
&self,
room_id: &RoomId,
token: u64,
shortstatehash: ShortStateHash,
) {
let shortroomid = self
.services
.short
.get_shortroomid(room_id)
.await
.expect("room exists");
let _cork = self.db.db.cork();
let key: &[u64] = &[shortroomid, token];
self.db
.roomsynctoken_shortstatehash
.put(key, shortstatehash);
}
#[implement(Service)]
pub async fn get_token_shortstatehash(
&self,
room_id: &RoomId,
token: u64,
) -> Result<ShortStateHash> {
let shortroomid = self.services.short.get_shortroomid(room_id).await?;
let key: &[u64] = &[shortroomid, token];
self.db
.roomsynctoken_shortstatehash
.qry(key)
.await
.deserialized()
}
+2 -3
View File
@@ -34,9 +34,8 @@ where
batch
});
if server_keys.is_empty() {
return Ok(vec![]);
}
debug_assert!(!server_keys.is_empty(), "empty batch request to notary");
let mut results = Vec::new();
while let Some(batch) = server_keys