Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(replay): add has_viewed to details and index #68391

Merged
merged 14 commits into from
Apr 8, 2024
1 change: 1 addition & 0 deletions src/sentry/replays/lib/new_query/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

Functions in this module coerce external types to internal types. Else they die.
"""

import uuid

from sentry.replays.lib.new_query.errors import CouldNotParseValue
Expand Down
18 changes: 15 additions & 3 deletions src/sentry/replays/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,7 @@ def _empty_uuids_lambda():
"info_ids": ["info_ids"],
"count_warnings": ["count_warnings"],
"count_infos": ["count_infos"],
"has_viewed": ["has_viewed"],
}


Expand Down Expand Up @@ -757,9 +758,16 @@ def collect_aliases(fields: list[str]) -> list[str]:
return list(result)


def select_from_fields(fields: list[str]) -> list[Column | Function]:
def select_from_fields(fields: list[str], user_id: int | None) -> list[Column | Function]:
"""Return a list of columns to select."""
return [QUERY_ALIAS_COLUMN_MAP[alias] for alias in collect_aliases(fields)]
selection = []
for alias in collect_aliases(fields):
if alias == "has_viewed":
selection.append(compute_has_viewed(user_id))
else:
selection.append(QUERY_ALIAS_COLUMN_MAP[alias])

return selection


def _extract_children(expression: ParenExpression) -> Generator[SearchFilter, None, None]:
Expand All @@ -770,7 +778,11 @@ def _extract_children(expression: ParenExpression) -> Generator[SearchFilter, No
yield from _extract_children(child)


def compute_has_viewed(viewed_by_id: int) -> Function:
def compute_has_viewed(viewed_by_id: int | None) -> Function:
if viewed_by_id is None:
# Return the literal "false" if no user-id was specified.
return Function("equals", parameters=[1, 2])

return Function(
"greater",
parameters=[
Expand Down
27 changes: 27 additions & 0 deletions src/sentry/replays/testutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,33 @@ def mock_replay_click(
}


def mock_replay_viewed(
timestamp: datetime.datetime,
project_id: str,
replay_id: str,
viewed_by_id: int,
retention_days: int = 30,
) -> dict[str, Any]:
return {
"type": "replay_event",
"start_time": sec(timestamp),
"replay_id": replay_id,
"project_id": project_id,
"retention_days": retention_days,
"payload": list(
bytes(
cmanallen marked this conversation as resolved.
Show resolved Hide resolved
json.dumps(
{
"type": "replay_viewed",
"timestamp": timestamp,
"viewed_by_id": viewed_by_id,
}
).encode()
)
),
}


def mock_segment_init(
timestamp: datetime.datetime,
href: str = "http://localhost/",
Expand Down
18 changes: 3 additions & 15 deletions src/sentry/replays/usecases/query/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,28 +287,16 @@ def make_full_aggregation_query(
project_ids: list[int],
period_start: datetime,
period_end: datetime,
request_user_id: int | None = None,
request_user_id: int | None,
) -> Query:
"""Return a query to fetch every replay in the set."""
from sentry.replays.query import QUERY_ALIAS_COLUMN_MAP, compute_has_viewed, select_from_fields

def _select_from_fields() -> list[Column | Function]:
if fields:
select_cols = []
non_parametrized_fields = set(fields)
# temporary hard-coded solution for fields that need context, for ex request authorization
if "has_viewed" in non_parametrized_fields:
non_parametrized_fields.remove("has_viewed")
if request_user_id is not None:
select_cols.append(compute_has_viewed(request_user_id))
select_cols.extend(select_from_fields(list(non_parametrized_fields)))

return select_from_fields(list(set(fields)), user_id=request_user_id)
else:
select_cols = list(QUERY_ALIAS_COLUMN_MAP.values())
if request_user_id:
select_cols.append(compute_has_viewed(request_user_id))

return select_cols
return list(QUERY_ALIAS_COLUMN_MAP.values()) + [compute_has_viewed(request_user_id)]

return Query(
match=Entity("replays"),
Expand Down
9 changes: 8 additions & 1 deletion src/sentry/replays/usecases/query/conditions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"SumOfClickScalar",
"SumOfClickSelectorComposite",
"SumOfDeadClickSelectorComposite",
"SumOfIntegerIdScalar",
"SumOfIPv4Scalar",
"SumOfRageClickSelectorComposite",
"SumOfStringArray",
Expand All @@ -22,7 +23,13 @@


from .activity import AggregateActivityScalar
from .aggregate import SumOfIPv4Scalar, SumOfStringArray, SumOfStringScalar, SumOfUUIDArray
from .aggregate import (
SumOfIntegerIdScalar,
SumOfIPv4Scalar,
SumOfStringArray,
SumOfStringScalar,
SumOfUUIDArray,
)
from .duration import SimpleAggregateDurationScalar
from .error_ids import ErrorIdsArray
from .selector import (
Expand Down
20 changes: 20 additions & 0 deletions src/sentry/replays/usecases/query/conditions/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
aggregation of the row-wise condition visitor is 0 or not 0. This works because we're looking
for presence in the set; not for conformity across all rows.
"""

from __future__ import annotations

from uuid import UUID
Expand All @@ -29,6 +30,7 @@

from sentry.replays.lib.new_query.conditions import (
GenericBase,
IntegerScalar,
IPv4Scalar,
StringArray,
StringScalar,
Expand All @@ -38,6 +40,24 @@
from sentry.replays.lib.new_query.utils import contains, does_not_contain


class SumOfIntegerIdScalar(GenericBase):
@staticmethod
def visit_eq(expression: Expression, value: int) -> Condition:
return contains(IntegerScalar.visit_eq(expression, value))

@staticmethod
def visit_neq(expression: Expression, value: int) -> Condition:
return does_not_contain(IntegerScalar.visit_eq(expression, value))

@staticmethod
def visit_in(expression: Expression, value: list[int]) -> Condition:
return contains(IntegerScalar.visit_in(expression, value))

@staticmethod
def visit_not_in(expression: Expression, value: list[int]) -> Condition:
return does_not_contain(IntegerScalar.visit_in(expression, value))


class SumOfIPv4Scalar(GenericBase):
@staticmethod
def visit_eq(expression: Expression, value: str) -> Condition:
Expand Down
14 changes: 9 additions & 5 deletions src/sentry/replays/usecases/query/configs/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
must parse to the data type of its source even if its later transformed into another type. This
acts as a validation step as must as a type coercion step.
"""

from __future__ import annotations

from sentry.replays.lib.new_query.conditions import IntegerScalar, UUIDScalar
from sentry.replays.lib.new_query.fields import (
ColumnField,
CountField,
FieldProtocol,
IntegerColumnField,
StringColumnField,
SumField,
SumLengthField,
Expand All @@ -31,6 +33,7 @@
SumOfClickScalar,
SumOfClickSelectorComposite,
SumOfDeadClickSelectorComposite,
SumOfIntegerIdScalar,
SumOfIPv4Scalar,
SumOfRageClickSelectorComposite,
SumOfStringArray,
Expand Down Expand Up @@ -86,12 +89,12 @@ def array_string_field(column_name: str) -> StringColumnField:
"click.textContent": click_field("click_text"),
"click.title": click_field("click_title"),
"count_dead_clicks": sum_field("click_is_dead"),
"count_infos": sum_field("count_info_events"),
"count_warnings": sum_field("count_warning_events"),
"count_errors": sum_field("count_error_events"),
"count_infos": sum_field("count_info_events"),
"count_rage_clicks": sum_field("click_is_rage"),
"count_segments": count_field("segment_id"),
"count_urls": sum_field("count_urls"),
"count_warnings": sum_field("count_warning_events"),
"dead.selector": ComputedField(parse_selector, SumOfDeadClickSelectorComposite),
"device.brand": string_field("device_brand"),
"device.family": string_field("device_family"),
Expand All @@ -101,11 +104,10 @@ def array_string_field(column_name: str) -> StringColumnField:
"duration": ComputedField(parse_int, SimpleAggregateDurationScalar),
"environment": string_field("environment"),
"error_ids": ComputedField(parse_uuid, SumOfErrorIdScalar),
"warning_ids": UUIDColumnField("warning_id", parse_uuid, SumOfUUIDScalar),
"info_ids": ComputedField(parse_uuid, SumOfInfoIdScalar),
# Backwards Compat: We pass a simple string to the UUID column. Older versions of ClickHouse
# do not understand the UUID type.
"id": ColumnField("replay_id", parse_uuid, UUIDScalar),
"info_ids": ComputedField(parse_uuid, SumOfInfoIdScalar),
"os.name": string_field("os_name"),
"os.version": string_field("os_version"),
"platform": string_field("platform"),
Expand All @@ -120,6 +122,8 @@ def array_string_field(column_name: str) -> StringColumnField:
"user.id": string_field("user_id"),
"user.ip_address": StringColumnField("ip_address_v4", parse_str, SumOfIPv4Scalar),
"user.username": string_field("user_name"),
"viewed_by_id": IntegerColumnField("viewed_by_id", parse_int, SumOfIntegerIdScalar),
"warning_ids": UUIDColumnField("warning_id", parse_uuid, SumOfUUIDScalar),
}


Expand All @@ -136,8 +140,8 @@ def array_string_field(column_name: str) -> StringColumnField:
# Fields which have multiple names that represent the same search operation are defined here.
# QQ:JFERG: why dont we have these on the scalar search
search_config["error_id"] = search_config["error_ids"]
search_config["warning_id"] = search_config["warning_ids"]
search_config["info_id"] = search_config["info_ids"]
search_config["warning_id"] = search_config["warning_ids"]


search_config["release"] = search_config["releases"]
Expand Down
36 changes: 36 additions & 0 deletions tests/sentry/replays/test_organization_replay_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
mock_expected_response,
mock_replay,
mock_replay_click,
mock_replay_viewed,
)
from sentry.testutils.cases import APITestCase, ReplaysSnubaTestCase
from sentry.utils.cursors import Cursor
Expand Down Expand Up @@ -147,6 +148,35 @@ def test_get_replays(self):
)
assert_expected_response(response_data["data"][0], expected_response)

def test_get_replays_viewed(self):
"""Test replays conform to the interchange format."""
project = self.create_project(teams=[self.team])

replay1_id = uuid.uuid4().hex
seq1_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=22)
seq2_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=5)
self.store_replays(mock_replay(seq1_timestamp, project.id, replay1_id))
self.store_replays(mock_replay(seq2_timestamp, project.id, replay1_id))
self.store_replays(mock_replay_viewed(seq2_timestamp, project.id, replay1_id, self.user.id))

replay2_id = uuid.uuid4().hex
seq1_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=20)
seq2_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=5)
self.store_replays(mock_replay(seq1_timestamp, project.id, replay2_id))
self.store_replays(mock_replay(seq2_timestamp, project.id, replay2_id))

with self.feature(REPLAYS_FEATURES):
response = self.client.get(self.url)
assert response.status_code == 200

response_data = response.json()
assert "data" in response_data
assert len(response_data["data"]) == 2

# Assert the first replay was viewed and the second replay was not.
assert response_data["data"][0]["has_viewed"] is True
cmanallen marked this conversation as resolved.
Show resolved Hide resolved
assert response_data["data"][1]["has_viewed"] is False

def test_get_replays_browse_screen_fields(self):
"""Test replay response with fields requested in production."""
project = self.create_project(teams=[self.team])
Expand Down Expand Up @@ -597,6 +627,9 @@ def test_get_replays_user_filters(self):
self.store_replays(
self.mock_event_links(seq1_timestamp, project.id, "debug", replay1_id, uuid.uuid4().hex)
)
self.store_replays(
mock_replay_viewed(seq1_timestamp, project.id, replay1_id, viewed_by_id=1)
)

with self.feature(REPLAYS_FEATURES):
# Run all the queries individually to determine compliance.
Expand Down Expand Up @@ -670,6 +703,8 @@ def test_get_replays_user_filters(self):
"count_infos:2",
"count_infos:>1",
"count_infos:<3",
"viewed_by_id:1",
"!viewed_by_id:2",
]

for query in queries:
Expand Down Expand Up @@ -718,6 +753,7 @@ def test_get_replays_user_filters(self):
"!c:*st",
"!activity:3",
"activity:<2",
"viewed_by_id:2",
]
for query in null_queries:
response = self.client.get(self.url + f"?field=id&query={query}")
Expand Down
Loading