Skip to content

Commit

Permalink
add get_seer_similar_issues function
Browse files Browse the repository at this point in the history
  • Loading branch information
lobsterkatie committed May 16, 2024
1 parent ffcba47 commit 164ceb3
Showing 1 changed file with 70 additions and 0 deletions.
70 changes: 70 additions & 0 deletions src/sentry/grouping/ingest/seer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,23 @@
import logging
from dataclasses import asdict

from sentry import features
from sentry.api.endpoints.group_similar_issues_embeddings import get_stacktrace_string
from sentry.constants import PLACEHOLDER_EVENT_TITLES
from sentry.eventstore.models import Event
from sentry.grouping.grouping_info import get_grouping_info_from_variants
from sentry.grouping.result import CalculatedHashes
from sentry.models.group import Group
from sentry.models.project import Project
from sentry.seer.utils import (
SeerSimilarIssuesMetadata,
SimilarIssuesEmbeddingsRequest,
get_similarity_data_from_seer,
)
from sentry.utils.safe import get_path

logger = logging.getLogger("sentry.events.grouping")


def should_call_seer_for_grouping(event: Event, project: Project) -> bool:
"""
Expand All @@ -23,3 +37,59 @@ def should_call_seer_for_grouping(event: Event, project: Project) -> bool:
return features.has("projects:similarity-embeddings-metadata", project) or features.has(
"projects:similarity-embeddings-grouping", project
)


def get_seer_similar_issues(
event: Event,
primary_hashes: CalculatedHashes,
num_neighbors: int = 1,
) -> tuple[
dict[
str, str | list[dict[str, float | bool | int | str]]
], # a SeerSimilarIssuesMetadata instance, dictified
Group | None,
]:
"""
Ask Seer for the given event's nearest neighbor(s) and return the seer response data, sorted
with the best matches first, along with the group Seer decided the event should go in, if any,
or None if no neighbor was near enough.
Will also return `None` for the neighboring group if the `projects:similarity-embeddings-grouping`
feature flag is off.
"""

# TODO: Once we get rid of hierarchical hashing, we'll be able to make `variants` required in
# `CalculatedHashes`, meaning we can remove this check. (See note in `CalculatedHashes` class definition.)
if primary_hashes.variants is None:
raise Exception("Primary hashes missing variants data")

event_hash = primary_hashes.hashes[0]
stacktrace_string = get_stacktrace_string(
get_grouping_info_from_variants(primary_hashes.variants)
)

request_data: SimilarIssuesEmbeddingsRequest = {
"hash": event_hash,
"project_id": event.project.id,
"stacktrace": stacktrace_string,
"message": event.title,
"k": num_neighbors,
}

# Similar issues are returned with the closest match first
seer_results = get_similarity_data_from_seer(request_data)

similar_issues_metadata = asdict(
SeerSimilarIssuesMetadata(request_hash=event_hash, results=seer_results)
)
parent_group = (
Group.objects.filter(id=seer_results[0].parent_group_id).first()
if (
seer_results
and seer_results[0].should_group
and features.has("projects:similarity-embeddings-grouping", event.project)
)
else None
)

return (similar_issues_metadata, parent_group)

0 comments on commit 164ceb3

Please sign in to comment.