Skip to content

Commit

Permalink
Sliding sync: don't fetch room summary for named rooms. (#17683)
Browse files Browse the repository at this point in the history
For rooms with a name we can skip fetching a full room summary, as we
don't need to calculate heroes, and instead just fetch the room counts
directly.

This also changes things to not return counts and heroes for non-joined
rooms. For left/banned rooms we were returning zero values anyway, and
for invite/knock rooms we don't really want to leak such information
(even if some of is included in the stripped state).
  • Loading branch information
erikjohnston committed Sep 11, 2024
1 parent a7fcac5 commit f6c2b0e
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 76 deletions.
1 change: 1 addition & 0 deletions changelog.d/17683.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Speed up sliding sync by reducing amount of data pulled out of the database for large rooms.
78 changes: 38 additions & 40 deletions synapse/handlers/sliding_sync/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,32 +784,10 @@ async def get_room_sync_data(
):
avatar_changed = True

# We only need the room summary for calculating heroes, however if we do
# fetch it then we can use it to calculate `joined_count` and
# `invited_count`.
room_membership_summary: Optional[Mapping[str, MemberSummary]] = None
empty_membership_summary = MemberSummary([], 0)
# We need the room summary for:
# - Always for initial syncs (or the first time we send down the room)
# - When the room has no name, we need `heroes`
# - When the membership has changed so we need to give updated `heroes` and
# `joined_count`/`invited_count`.
#
# Ideally, instead of just looking at `name_changed`, we'd check if the room
# name is not set but this is a good enough approximation that saves us from
# having to pull out the full event. This just means, we're generating the
# summary whenever the room name changes instead of only when it changes to
# `None`.
if initial or name_changed or membership_changed:
# We can't trace the function directly because it's cached and the `@cached`
# decorator doesn't mix with `@trace` yet.
with start_active_span("get_room_summary"):
if room_membership_for_user_at_to_token.membership in (
Membership.LEAVE,
Membership.BAN,
):
# TODO: Figure out how to get the membership summary for left/banned rooms
room_membership_summary = {}
else:
room_membership_summary = await self.store.get_room_summary(room_id)
# TODO: Reverse/rewind back to the `to_token`

# `heroes` are required if the room name is not set.
#
Expand All @@ -828,11 +806,45 @@ async def get_room_sync_data(
# get them on initial syncs (or the first time we send down the room) or if the
# membership has changed which may change the heroes.
if name_event_id is None and (initial or (not initial and membership_changed)):
assert room_membership_summary is not None
# We need the room summary to extract the heroes from
if room_membership_for_user_at_to_token.membership != Membership.JOIN:
# TODO: Figure out how to get the membership summary for left/banned rooms
# For invite/knock rooms we don't include the information.
room_membership_summary = {}
else:
room_membership_summary = await self.store.get_room_summary(room_id)
# TODO: Reverse/rewind back to the `to_token`

hero_user_ids = extract_heroes_from_room_summary(
room_membership_summary, me=user.to_string()
)

# Fetch the membership counts for rooms we're joined to.
#
# Similarly to other metadata, we only need to calculate the member
# counts if this is an initial sync or the memberships have changed.
joined_count: Optional[int] = None
invited_count: Optional[int] = None
if (
initial or membership_changed
) and room_membership_for_user_at_to_token.membership == Membership.JOIN:
# If we have the room summary (because we calculated heroes above)
# then we can simply pull the counts from there.
if room_membership_summary is not None:
empty_membership_summary = MemberSummary([], 0)

joined_count = room_membership_summary.get(
Membership.JOIN, empty_membership_summary
).count

invited_count = room_membership_summary.get(
Membership.INVITE, empty_membership_summary
).count
else:
member_counts = await self.store.get_member_counts(room_id)
joined_count = member_counts.get(Membership.JOIN, 0)
invited_count = member_counts.get(Membership.INVITE, 0)

# Fetch the `required_state` for the room
#
# No `required_state` for invite/knock rooms (just `stripped_state`)
Expand Down Expand Up @@ -1090,20 +1102,6 @@ async def get_room_sync_data(

set_tag(SynapseTags.RESULT_PREFIX + "initial", initial)

joined_count: Optional[int] = None
if initial or membership_changed:
assert room_membership_summary is not None
joined_count = room_membership_summary.get(
Membership.JOIN, empty_membership_summary
).count

invited_count: Optional[int] = None
if initial or membership_changed:
assert room_membership_summary is not None
invited_count = room_membership_summary.get(
Membership.INVITE, empty_membership_summary
).count

return SlidingSyncResult.RoomResult(
name=room_name,
avatar=room_avatar,
Expand Down
2 changes: 2 additions & 0 deletions synapse/storage/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def _invalidate_state_caches(
self._attempt_to_invalidate_cache(
"get_number_joined_users_in_room", (room_id,)
)
self._attempt_to_invalidate_cache("get_member_counts", (room_id,))
self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,))

# There's no easy way of invalidating this cache for just the users
Expand Down Expand Up @@ -153,6 +154,7 @@ def _invalidate_state_caches_all(self, room_id: str) -> None:
self._attempt_to_invalidate_cache("get_current_hosts_in_room", (room_id,))
self._attempt_to_invalidate_cache("get_users_in_room_with_profiles", (room_id,))
self._attempt_to_invalidate_cache("get_number_joined_users_in_room", (room_id,))
self._attempt_to_invalidate_cache("get_member_counts", (room_id,))
self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,))
self._attempt_to_invalidate_cache("does_pair_of_users_share_a_room", None)
self._attempt_to_invalidate_cache("get_user_in_room_with_profile", None)
Expand Down
37 changes: 27 additions & 10 deletions synapse/storage/databases/main/roommember.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,18 +312,10 @@ def _get_room_summary_txn(
# We do this all in one transaction to keep the cache small.
# FIXME: get rid of this when we have room_stats

# Note, rejected events will have a null membership field, so
# we we manually filter them out.
sql = """
SELECT count(*), membership FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ?
AND membership IS NOT NULL
GROUP BY membership
"""
counts = self._get_member_counts_txn(txn, room_id)

txn.execute(sql, (room_id,))
res: Dict[str, MemberSummary] = {}
for count, membership in txn:
for membership, count in counts.items():
res.setdefault(membership, MemberSummary([], count))

# Order by membership (joins -> invites -> leave (former insiders) ->
Expand Down Expand Up @@ -369,6 +361,31 @@ def _get_room_summary_txn(
"get_room_summary", _get_room_summary_txn
)

@cached()
async def get_member_counts(self, room_id: str) -> Mapping[str, int]:
"""Get a mapping of number of users by membership"""

return await self.db_pool.runInteraction(
"get_member_counts", self._get_member_counts_txn, room_id
)

def _get_member_counts_txn(
self, txn: LoggingTransaction, room_id: str
) -> Dict[str, int]:
"""Get a mapping of number of users by membership"""

# Note, rejected events will have a null membership field, so
# we we manually filter them out.
sql = """
SELECT count(*), membership FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ?
AND membership IS NOT NULL
GROUP BY membership
"""

txn.execute(sql, (room_id,))
return {membership: count for count, membership in txn}

@cached()
async def get_number_joined_users_in_room(self, room_id: str) -> int:
return await self.db_pool.simple_select_one_onecol(
Expand Down
8 changes: 8 additions & 0 deletions synapse/storage/databases/main/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,7 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx"
EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index"
DELETE_CURRENT_STATE_UPDATE_NAME = "delete_old_current_state_events"
MEMBERS_CURRENT_STATE_UPDATE_NAME = "current_state_events_members_room_index"

def __init__(
self,
Expand Down Expand Up @@ -764,6 +765,13 @@ def __init__(
self.DELETE_CURRENT_STATE_UPDATE_NAME,
self._background_remove_left_rooms,
)
self.db_pool.updates.register_background_index_update(
self.MEMBERS_CURRENT_STATE_UPDATE_NAME,
index_name="current_state_events_members_room_index",
table="current_state_events",
columns=["room_id", "membership"],
where_clause="type='m.room.member'",
)

async def _background_remove_left_rooms(
self, progress: JsonDict, batch_size: int
Expand Down
19 changes: 19 additions & 0 deletions synapse/storage/schema/main/delta/87/03_current_state_index.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
--
-- This file is licensed under the Affero General Public License (AGPL) version 3.
--
-- Copyright (C) 2024 New Vector, Ltd
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Affero General Public License as
-- published by the Free Software Foundation, either version 3 of the
-- License, or (at your option) any later version.
--
-- See the GNU Affero General Public License for more details:
-- <https://www.gnu.org/licenses/agpl-3.0.html>.


-- Add a background updates to add a new index:
-- `current_state_events(room_id, membership) WHERE type = 'm.room.member'
-- This makes counting membership in rooms (for syncs) much faster
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
(8701, 'current_state_events_members_room_index', '{}');
52 changes: 26 additions & 26 deletions tests/rest/client/sliding_sync/test_rooms_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,14 +371,17 @@ def test_rooms_meta_when_invited(self) -> None:
"mxc://UPDATED_DUMMY_MEDIA_ID",
response_body["rooms"][room_id1],
)
self.assertEqual(
response_body["rooms"][room_id1]["joined_count"],
1,

# We don't give extra room information to invitees
self.assertNotIn(
"joined_count",
response_body["rooms"][room_id1],
)
self.assertEqual(
response_body["rooms"][room_id1]["invited_count"],
1,
self.assertNotIn(
"invited_count",
response_body["rooms"][room_id1],
)

self.assertIsNone(
response_body["rooms"][room_id1].get("is_dm"),
)
Expand Down Expand Up @@ -450,15 +453,16 @@ def test_rooms_meta_when_banned(self) -> None:
"mxc://DUMMY_MEDIA_ID",
response_body["rooms"][room_id1],
)
self.assertEqual(
response_body["rooms"][room_id1]["joined_count"],
# FIXME: The actual number should be "1" (user2) but we currently don't
# support this for rooms where the user has left/been banned.
0,

# FIXME: We possibly want to return joined and invited counts for rooms
# you're banned form
self.assertNotIn(
"joined_count",
response_body["rooms"][room_id1],
)
self.assertEqual(
response_body["rooms"][room_id1]["invited_count"],
0,
self.assertNotIn(
"invited_count",
response_body["rooms"][room_id1],
)
self.assertIsNone(
response_body["rooms"][room_id1].get("is_dm"),
Expand Down Expand Up @@ -692,19 +696,15 @@ def test_rooms_meta_heroes_when_banned(self) -> None:
[],
)

self.assertEqual(
response_body["rooms"][room_id1]["joined_count"],
# FIXME: The actual number should be "1" (user2) but we currently don't
# support this for rooms where the user has left/been banned.
0,
# FIXME: We possibly want to return joined and invited counts for rooms
# you're banned form
self.assertNotIn(
"joined_count",
response_body["rooms"][room_id1],
)
self.assertEqual(
response_body["rooms"][room_id1]["invited_count"],
# We shouldn't see user5 since they were invited after user1 was banned.
#
# FIXME: The actual number should be "1" (user3) but we currently don't
# support this for rooms where the user has left/been banned.
0,
self.assertNotIn(
"invited_count",
response_body["rooms"][room_id1],
)

def test_rooms_meta_heroes_incremental_sync_no_change(self) -> None:
Expand Down

0 comments on commit f6c2b0e

Please sign in to comment.