Skip to content

Commit

Permalink
instr(txnames): Count number of discovered rules (#49085)
Browse files Browse the repository at this point in the history
In order to detect regressions in our transaction clusterer, keep track
of the number of _new_ rules discovered across projects.

ref: getsentry/team-ingest#124
  • Loading branch information
jjbayer committed May 15, 2023
1 parent 97cd3ad commit aaa7d66
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 7 deletions.
18 changes: 15 additions & 3 deletions src/sentry/ingest/transaction_clusterer/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ def read_sorted(self, project: Project) -> List[Tuple[ReplacementRule, int]]:
return [tuple(lst) for lst in ret] # type: ignore[misc]

def read(self, project: Project) -> RuleSet:
return {rule: last_seen for rule, last_seen in self.read_sorted(project)}
rules = {rule: last_seen for rule, last_seen in self.read_sorted(project)}
self.last_read = rules
return rules

def _sort(self, rules: RuleSet) -> List[Tuple[ReplacementRule, int]]:
"""Sort rules by number of slashes, i.e. depth of the rule"""
Expand Down Expand Up @@ -188,23 +190,33 @@ def get_sorted_rules(project: Project) -> List[Tuple[ReplacementRule, int]]:
return ProjectOptionRuleStore().read_sorted(project)


def update_rules(project: Project, new_rules: Sequence[ReplacementRule]) -> None:
def update_rules(project: Project, new_rules: Sequence[ReplacementRule]) -> int:
"""Write newly discovered rules to projection option and redis, and update last_used.
Return the number of _new_ rules (that were not already present in project option).
"""
# Run the updates even if there aren't any new rules, to get all the stores
# up-to-date.
# NOTE: keep in mind this function writes to Postgres, so it shouldn't be
# called often.

last_seen = _now()
new_rule_set = {rule: last_seen for rule in new_rules}
project_option = ProjectOptionRuleStore()
rule_store = CompositeRuleStore(
[
RedisRuleStore(),
ProjectOptionRuleStore(),
project_option,
LocalRuleStore(new_rule_set),
]
)

rule_store.merge(project)

num_rules_added = len(new_rule_set.keys() - project_option.last_read.keys())

return num_rules_added


def bump_last_used(project: Project, pattern: str) -> None:
"""If an entry for `pattern` exists, bump its last_used timestamp in redis.
Expand Down
5 changes: 4 additions & 1 deletion src/sentry/ingest/transaction_clusterer/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,10 @@ def cluster_projects(projects: Sequence[Project]) -> None:
# The Redis store may have more up-to-date last_seen values,
# so we must update the stores to bring these values to
# project options, even if there aren't any new rules.
rules.update_rules(project, new_rules)
num_rules_added = rules.update_rules(project, new_rules)

# Track a global counter of new rules:
metrics.incr("txcluster.new_rules_discovered", num_rules_added)

# Clear transaction names to prevent the set from picking up
# noise over a long time range.
Expand Down
6 changes: 3 additions & 3 deletions tests/sentry/ingest/test_transaction_clusterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,12 +176,12 @@ def test_save_rules(default_project):
assert project_rules == {}

with freeze_time("2012-01-14 12:00:01"):
update_rules(project, [ReplacementRule("foo"), ReplacementRule("bar")])
assert 2 == update_rules(project, [ReplacementRule("foo"), ReplacementRule("bar")])
project_rules = get_rules(project)
assert project_rules == {"foo": 1326542401, "bar": 1326542401}

with freeze_time("2012-01-14 12:00:02"):
update_rules(project, [ReplacementRule("bar"), ReplacementRule("zap")])
assert 1 == update_rules(project, [ReplacementRule("bar"), ReplacementRule("zap")])
project_rules = get_rules(project)
assert {"bar": 1326542402, "foo": 1326542401, "zap": 1326542402}

Expand Down Expand Up @@ -352,7 +352,7 @@ def test_transaction_clusterer_bumps_rules(_, default_organization):
assert get_rules(project1) == {"/user/*/**": 1}
# Update rules to update the project option storage.
with mock.patch("sentry.ingest.transaction_clusterer.rules._now", lambda: 3):
update_rules(project1, [])
assert 0 == update_rules(project1, [])
# After project options are updated, the last_seen should also be updated.
assert get_redis_rules(project1) == {"/user/*/**": 2}
assert get_rules(project1) == {"/user/*/**": 2}
Expand Down

0 comments on commit aaa7d66

Please sign in to comment.