From 304b5f339229954f3c506b1f400a64fc3a8b5dd8 Mon Sep 17 00:00:00 2001 From: Jodi Jang <116035587+jangjodi@users.noreply.github.com> Date: Fri, 20 Sep 2024 11:32:59 -0700 Subject: [PATCH] fix(similarity): Catch InvalidEnhancerConfig in backfill (#77861) Catch InvalidEnhancerConfig in similarity backfill and skip that group --- ...kfill_seer_grouping_records_for_project.py | 4 ++- .../test_backfill_seer_grouping_records.py | 35 +++++++++++-------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/sentry/tasks/embeddings_grouping/backfill_seer_grouping_records_for_project.py b/src/sentry/tasks/embeddings_grouping/backfill_seer_grouping_records_for_project.py index 864d4bfce56db..487b00ba86d69 100644 --- a/src/sentry/tasks/embeddings_grouping/backfill_seer_grouping_records_for_project.py +++ b/src/sentry/tasks/embeddings_grouping/backfill_seer_grouping_records_for_project.py @@ -7,6 +7,7 @@ from sentry import options from sentry.api.exceptions import ResourceDoesNotExist from sentry.grouping.api import GroupingConfigNotFound +from sentry.grouping.enhancer.exceptions import InvalidEnhancerConfig from sentry.models.project import Project from sentry.seer.similarity.utils import killswitch_enabled, project_is_seer_eligible from sentry.silo.base import SiloMode @@ -31,6 +32,7 @@ BACKFILL_NAME = "backfill_grouping_records" BULK_DELETE_METADATA_CHUNK_SIZE = 100 SEER_ACCEPTABLE_FAILURE_REASONS = ["Gateway Timeout", "Service Unavailable"] +EVENT_INFO_EXCEPTIONS = (GroupingConfigNotFound, ResourceDoesNotExist, InvalidEnhancerConfig) logger = logging.getLogger(__name__) @@ -219,7 +221,7 @@ def backfill_seer_grouping_records_for_project( nodestore_results, group_hashes_dict = get_events_from_nodestore( project, filtered_snuba_results, groups_to_backfill_with_no_embedding_has_snuba_row ) - except (GroupingConfigNotFound, ResourceDoesNotExist): + except EVENT_INFO_EXCEPTIONS: metrics.incr("sentry.tasks.backfill_seer_grouping_records.grouping_config_error") nodestore_results, group_hashes_dict = GroupStacktraceData(data=[], stacktrace_list=[]), {} diff --git a/tests/sentry/tasks/test_backfill_seer_grouping_records.py b/tests/sentry/tasks/test_backfill_seer_grouping_records.py index 69220c247f9fd..9d7af966ff19e 100644 --- a/tests/sentry/tasks/test_backfill_seer_grouping_records.py +++ b/tests/sentry/tasks/test_backfill_seer_grouping_records.py @@ -18,6 +18,8 @@ from sentry.api.exceptions import ResourceDoesNotExist from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION from sentry.eventstore.models import Event +from sentry.grouping.api import GroupingConfigNotFound +from sentry.grouping.enhancer.exceptions import InvalidEnhancerConfig from sentry.issues.occurrence_consumer import EventLookupError from sentry.models.group import Group, GroupStatus from sentry.models.grouphash import GroupHash @@ -1500,23 +1502,26 @@ def test_backfill_seer_grouping_records_nodestore_grouping_config_not_found( mock_lookup_group_data_stacktrace_bulk, mock_logger, ): - mock_lookup_group_data_stacktrace_bulk.side_effect = ResourceDoesNotExist() + exceptions = (GroupingConfigNotFound(), ResourceDoesNotExist(), InvalidEnhancerConfig()) - with TaskRunner(): - backfill_seer_grouping_records_for_project(self.project.id, None) + for exception in exceptions: + mock_lookup_group_data_stacktrace_bulk.side_effect = exception - groups = Group.objects.all() - group_ids_sorted = sorted([group.id for group in groups], reverse=True) - mock_call_next_backfill.assert_called_with( - last_processed_group_id=group_ids_sorted[-1], - project_id=self.project.id, - last_processed_project_index=0, - cohort=None, - enable_ingestion=False, - skip_processed_projects=False, - skip_project_ids=None, - worker_number=None, - ) + with TaskRunner(): + backfill_seer_grouping_records_for_project(self.project.id, None) + + groups = Group.objects.all() + group_ids_sorted = sorted([group.id for group in groups], reverse=True) + mock_call_next_backfill.assert_called_with( + last_processed_group_id=group_ids_sorted[-1], + project_id=self.project.id, + last_processed_project_index=0, + cohort=None, + enable_ingestion=False, + skip_processed_projects=False, + skip_project_ids=None, + worker_number=None, + ) @with_feature("projects:similarity-embeddings-backfill") @patch("sentry.tasks.embeddings_grouping.utils.logger")