From aaa7d66486c966dad24b67569674d656b3e7b866 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Mon, 15 May 2023 13:11:38 +0200 Subject: [PATCH] instr(txnames): Count number of discovered rules (#49085) In order to detect regressions in our transaction clusterer, keep track of the number of _new_ rules discovered across projects. ref: https://github.com/getsentry/team-ingest/issues/124 --- .../ingest/transaction_clusterer/rules.py | 18 +++++++++++++++--- .../ingest/transaction_clusterer/tasks.py | 5 ++++- .../ingest/test_transaction_clusterer.py | 6 +++--- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/sentry/ingest/transaction_clusterer/rules.py b/src/sentry/ingest/transaction_clusterer/rules.py index 9d28585ec39e23..a4c92a50168737 100644 --- a/src/sentry/ingest/transaction_clusterer/rules.py +++ b/src/sentry/ingest/transaction_clusterer/rules.py @@ -83,7 +83,9 @@ def read_sorted(self, project: Project) -> List[Tuple[ReplacementRule, int]]: return [tuple(lst) for lst in ret] # type: ignore[misc] def read(self, project: Project) -> RuleSet: - return {rule: last_seen for rule, last_seen in self.read_sorted(project)} + rules = {rule: last_seen for rule, last_seen in self.read_sorted(project)} + self.last_read = rules + return rules def _sort(self, rules: RuleSet) -> List[Tuple[ReplacementRule, int]]: """Sort rules by number of slashes, i.e. depth of the rule""" @@ -188,7 +190,11 @@ def get_sorted_rules(project: Project) -> List[Tuple[ReplacementRule, int]]: return ProjectOptionRuleStore().read_sorted(project) -def update_rules(project: Project, new_rules: Sequence[ReplacementRule]) -> None: +def update_rules(project: Project, new_rules: Sequence[ReplacementRule]) -> int: + """Write newly discovered rules to projection option and redis, and update last_used. + + Return the number of _new_ rules (that were not already present in project option). + """ # Run the updates even if there aren't any new rules, to get all the stores # up-to-date. # NOTE: keep in mind this function writes to Postgres, so it shouldn't be @@ -196,15 +202,21 @@ def update_rules(project: Project, new_rules: Sequence[ReplacementRule]) -> None last_seen = _now() new_rule_set = {rule: last_seen for rule in new_rules} + project_option = ProjectOptionRuleStore() rule_store = CompositeRuleStore( [ RedisRuleStore(), - ProjectOptionRuleStore(), + project_option, LocalRuleStore(new_rule_set), ] ) + rule_store.merge(project) + num_rules_added = len(new_rule_set.keys() - project_option.last_read.keys()) + + return num_rules_added + def bump_last_used(project: Project, pattern: str) -> None: """If an entry for `pattern` exists, bump its last_used timestamp in redis. diff --git a/src/sentry/ingest/transaction_clusterer/tasks.py b/src/sentry/ingest/transaction_clusterer/tasks.py index dd1509a4ae7d44..16474da00c44af 100644 --- a/src/sentry/ingest/transaction_clusterer/tasks.py +++ b/src/sentry/ingest/transaction_clusterer/tasks.py @@ -71,7 +71,10 @@ def cluster_projects(projects: Sequence[Project]) -> None: # The Redis store may have more up-to-date last_seen values, # so we must update the stores to bring these values to # project options, even if there aren't any new rules. - rules.update_rules(project, new_rules) + num_rules_added = rules.update_rules(project, new_rules) + + # Track a global counter of new rules: + metrics.incr("txcluster.new_rules_discovered", num_rules_added) # Clear transaction names to prevent the set from picking up # noise over a long time range. diff --git a/tests/sentry/ingest/test_transaction_clusterer.py b/tests/sentry/ingest/test_transaction_clusterer.py index 619e17077809dc..12daf56719d435 100644 --- a/tests/sentry/ingest/test_transaction_clusterer.py +++ b/tests/sentry/ingest/test_transaction_clusterer.py @@ -176,12 +176,12 @@ def test_save_rules(default_project): assert project_rules == {} with freeze_time("2012-01-14 12:00:01"): - update_rules(project, [ReplacementRule("foo"), ReplacementRule("bar")]) + assert 2 == update_rules(project, [ReplacementRule("foo"), ReplacementRule("bar")]) project_rules = get_rules(project) assert project_rules == {"foo": 1326542401, "bar": 1326542401} with freeze_time("2012-01-14 12:00:02"): - update_rules(project, [ReplacementRule("bar"), ReplacementRule("zap")]) + assert 1 == update_rules(project, [ReplacementRule("bar"), ReplacementRule("zap")]) project_rules = get_rules(project) assert {"bar": 1326542402, "foo": 1326542401, "zap": 1326542402} @@ -352,7 +352,7 @@ def test_transaction_clusterer_bumps_rules(_, default_organization): assert get_rules(project1) == {"/user/*/**": 1} # Update rules to update the project option storage. with mock.patch("sentry.ingest.transaction_clusterer.rules._now", lambda: 3): - update_rules(project1, []) + assert 0 == update_rules(project1, []) # After project options are updated, the last_seen should also be updated. assert get_redis_rules(project1) == {"/user/*/**": 2} assert get_rules(project1) == {"/user/*/**": 2}