getsentry · cmanallen · Apr 8, 2024 · Apr 5, 2024 · Apr 8, 2024 · Apr 8, 2024
@@ -2,6 +2,7 @@
 
 Functions in this module coerce external types to internal types.  Else they die.
 """
+
 import uuid
 
 from sentry.replays.lib.new_query.errors import CouldNotParseValue

@@ -559,6 +559,7 @@ def _empty_uuids_lambda():
     "info_ids": ["info_ids"],
     "count_warnings": ["count_warnings"],
     "count_infos": ["count_infos"],
+    "has_viewed": ["has_viewed"],
 }
 
 
@@ -757,9 +758,16 @@ def collect_aliases(fields: list[str]) -> list[str]:
     return list(result)
 
 
-def select_from_fields(fields: list[str]) -> list[Column | Function]:
+def select_from_fields(fields: list[str], user_id: int | None) -> list[Column | Function]:
     """Return a list of columns to select."""
-    return [QUERY_ALIAS_COLUMN_MAP[alias] for alias in collect_aliases(fields)]
+    selection = []
+    for alias in collect_aliases(fields):
+        if alias == "has_viewed":
+            selection.append(compute_has_viewed(user_id))
+        else:
+            selection.append(QUERY_ALIAS_COLUMN_MAP[alias])
+
+    return selection
 
 
 def _extract_children(expression: ParenExpression) -> Generator[SearchFilter, None, None]:
@@ -770,7 +778,11 @@ def _extract_children(expression: ParenExpression) -> Generator[SearchFilter, No
             yield from _extract_children(child)
 
 
-def compute_has_viewed(viewed_by_id: int) -> Function:
+def compute_has_viewed(viewed_by_id: int | None) -> Function:
+    if viewed_by_id is None:
+        # Return the literal "false" if no user-id was specified.
+        return Function("equals", parameters=[1, 2])
+
     return Function(
         "greater",
         parameters=[

@@ -253,6 +253,33 @@ def mock_replay_click(
     }
 
 
+def mock_replay_viewed(
+    timestamp: datetime.datetime,
+    project_id: str,
+    replay_id: str,
+    viewed_by_id: int,
+    retention_days: int = 30,
+) -> dict[str, Any]:
+    return {
+        "type": "replay_event",
+        "start_time": sec(timestamp),
+        "replay_id": replay_id,
+        "project_id": project_id,
+        "retention_days": retention_days,
+        "payload": list(
+            bytes(
+                json.dumps(
+                    {
+                        "type": "replay_viewed",
+                        "timestamp": timestamp,
+                        "viewed_by_id": viewed_by_id,
+                    }
+                ).encode()
+            )
+        ),
+    }
+
+
 def mock_segment_init(
     timestamp: datetime.datetime,
     href: str = "http://localhost/",

@@ -287,28 +287,16 @@ def make_full_aggregation_query(
     project_ids: list[int],
     period_start: datetime,
     period_end: datetime,
-    request_user_id: int | None = None,
+    request_user_id: int | None,
 ) -> Query:
     """Return a query to fetch every replay in the set."""
     from sentry.replays.query import QUERY_ALIAS_COLUMN_MAP, compute_has_viewed, select_from_fields
 
     def _select_from_fields() -> list[Column | Function]:
         if fields:
-            select_cols = []
-            non_parametrized_fields = set(fields)
-            # temporary hard-coded solution for fields that need context, for ex request authorization
-            if "has_viewed" in non_parametrized_fields:
-                non_parametrized_fields.remove("has_viewed")
-                if request_user_id is not None:
-                    select_cols.append(compute_has_viewed(request_user_id))
-            select_cols.extend(select_from_fields(list(non_parametrized_fields)))
-
+            return select_from_fields(list(set(fields)), user_id=request_user_id)
         else:
-            select_cols = list(QUERY_ALIAS_COLUMN_MAP.values())
-            if request_user_id:
-                select_cols.append(compute_has_viewed(request_user_id))
-
-        return select_cols
+            return list(QUERY_ALIAS_COLUMN_MAP.values()) + [compute_has_viewed(request_user_id)]
 
     return Query(
         match=Entity("replays"),

@@ -11,6 +11,7 @@
     "SumOfClickScalar",
     "SumOfClickSelectorComposite",
     "SumOfDeadClickSelectorComposite",
+    "SumOfIntegerIdScalar",
     "SumOfIPv4Scalar",
     "SumOfRageClickSelectorComposite",
     "SumOfStringArray",
@@ -22,7 +23,13 @@
 
 
 from .activity import AggregateActivityScalar
-from .aggregate import SumOfIPv4Scalar, SumOfStringArray, SumOfStringScalar, SumOfUUIDArray
+from .aggregate import (
+    SumOfIntegerIdScalar,
+    SumOfIPv4Scalar,
+    SumOfStringArray,
+    SumOfStringScalar,
+    SumOfUUIDArray,
+)
 from .duration import SimpleAggregateDurationScalar
 from .error_ids import ErrorIdsArray
 from .selector import (

@@ -20,6 +20,7 @@
 aggregation of the row-wise condition visitor is 0 or not 0.  This works because we're looking
 for presence in the set; not for conformity across all rows.
 """
+
 from __future__ import annotations
 
 from uuid import UUID
@@ -29,6 +30,7 @@
 
 from sentry.replays.lib.new_query.conditions import (
     GenericBase,
+    IntegerScalar,
     IPv4Scalar,
     StringArray,
     StringScalar,
@@ -38,6 +40,24 @@
 from sentry.replays.lib.new_query.utils import contains, does_not_contain
 
 
+class SumOfIntegerIdScalar(GenericBase):
+    @staticmethod
+    def visit_eq(expression: Expression, value: int) -> Condition:
+        return contains(IntegerScalar.visit_eq(expression, value))
+
+    @staticmethod
+    def visit_neq(expression: Expression, value: int) -> Condition:
+        return does_not_contain(IntegerScalar.visit_eq(expression, value))
+
+    @staticmethod
+    def visit_in(expression: Expression, value: list[int]) -> Condition:
+        return contains(IntegerScalar.visit_in(expression, value))
+
+    @staticmethod
+    def visit_not_in(expression: Expression, value: list[int]) -> Condition:
+        return does_not_contain(IntegerScalar.visit_in(expression, value))
+
+
 class SumOfIPv4Scalar(GenericBase):
     @staticmethod
     def visit_eq(expression: Expression, value: str) -> Condition:

@@ -10,13 +10,15 @@
 must parse to the data type of its source even if its later transformed into another type.  This
 acts as a validation step as must as a type coercion step.
 """
+
 from __future__ import annotations
 
 from sentry.replays.lib.new_query.conditions import IntegerScalar, UUIDScalar
 from sentry.replays.lib.new_query.fields import (
     ColumnField,
     CountField,
     FieldProtocol,
+    IntegerColumnField,
     StringColumnField,
     SumField,
     SumLengthField,
@@ -31,6 +33,7 @@
     SumOfClickScalar,
     SumOfClickSelectorComposite,
     SumOfDeadClickSelectorComposite,
+    SumOfIntegerIdScalar,
     SumOfIPv4Scalar,
     SumOfRageClickSelectorComposite,
     SumOfStringArray,
@@ -86,12 +89,12 @@ def array_string_field(column_name: str) -> StringColumnField:
     "click.textContent": click_field("click_text"),
     "click.title": click_field("click_title"),
     "count_dead_clicks": sum_field("click_is_dead"),
-    "count_infos": sum_field("count_info_events"),
-    "count_warnings": sum_field("count_warning_events"),
     "count_errors": sum_field("count_error_events"),
+    "count_infos": sum_field("count_info_events"),
     "count_rage_clicks": sum_field("click_is_rage"),
     "count_segments": count_field("segment_id"),
     "count_urls": sum_field("count_urls"),
+    "count_warnings": sum_field("count_warning_events"),
     "dead.selector": ComputedField(parse_selector, SumOfDeadClickSelectorComposite),
     "device.brand": string_field("device_brand"),
     "device.family": string_field("device_family"),
@@ -101,11 +104,10 @@ def array_string_field(column_name: str) -> StringColumnField:
     "duration": ComputedField(parse_int, SimpleAggregateDurationScalar),
     "environment": string_field("environment"),
     "error_ids": ComputedField(parse_uuid, SumOfErrorIdScalar),
-    "warning_ids": UUIDColumnField("warning_id", parse_uuid, SumOfUUIDScalar),
-    "info_ids": ComputedField(parse_uuid, SumOfInfoIdScalar),
     # Backwards Compat: We pass a simple string to the UUID column. Older versions of ClickHouse
     # do not understand the UUID type.
     "id": ColumnField("replay_id", parse_uuid, UUIDScalar),
+    "info_ids": ComputedField(parse_uuid, SumOfInfoIdScalar),
     "os.name": string_field("os_name"),
     "os.version": string_field("os_version"),
     "platform": string_field("platform"),
@@ -120,6 +122,8 @@ def array_string_field(column_name: str) -> StringColumnField:
     "user.id": string_field("user_id"),
     "user.ip_address": StringColumnField("ip_address_v4", parse_str, SumOfIPv4Scalar),
     "user.username": string_field("user_name"),
+    "viewed_by_id": IntegerColumnField("viewed_by_id", parse_int, SumOfIntegerIdScalar),
+    "warning_ids": UUIDColumnField("warning_id", parse_uuid, SumOfUUIDScalar),
 }
 
 
@@ -136,8 +140,8 @@ def array_string_field(column_name: str) -> StringColumnField:
 # Fields which have multiple names that represent the same search operation are defined here.
 # QQ:JFERG: why dont we have these on the scalar search
 search_config["error_id"] = search_config["error_ids"]
-search_config["warning_id"] = search_config["warning_ids"]
 search_config["info_id"] = search_config["info_ids"]
+search_config["warning_id"] = search_config["warning_ids"]
 
 
 search_config["release"] = search_config["releases"]

@@ -9,6 +9,7 @@
     mock_expected_response,
     mock_replay,
     mock_replay_click,
+    mock_replay_viewed,
 )
 from sentry.testutils.cases import APITestCase, ReplaysSnubaTestCase
 from sentry.utils.cursors import Cursor
@@ -147,6 +148,35 @@ def test_get_replays(self):
             )
             assert_expected_response(response_data["data"][0], expected_response)
 
+    def test_get_replays_viewed(self):
+        """Test replays conform to the interchange format."""
+        project = self.create_project(teams=[self.team])
+
+        replay1_id = uuid.uuid4().hex
+        seq1_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=22)
+        seq2_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=5)
+        self.store_replays(mock_replay(seq1_timestamp, project.id, replay1_id))
+        self.store_replays(mock_replay(seq2_timestamp, project.id, replay1_id))
+        self.store_replays(mock_replay_viewed(seq2_timestamp, project.id, replay1_id, self.user.id))
+
+        replay2_id = uuid.uuid4().hex
+        seq1_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=20)
+        seq2_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=5)
+        self.store_replays(mock_replay(seq1_timestamp, project.id, replay2_id))
+        self.store_replays(mock_replay(seq2_timestamp, project.id, replay2_id))
+
+        with self.feature(REPLAYS_FEATURES):
+            response = self.client.get(self.url)
+            assert response.status_code == 200
+
+            response_data = response.json()
+            assert "data" in response_data
+            assert len(response_data["data"]) == 2
+
+            # Assert the first replay was viewed and the second replay was not.
+            assert response_data["data"][0]["has_viewed"] is True
+            assert response_data["data"][1]["has_viewed"] is False
+
     def test_get_replays_browse_screen_fields(self):
         """Test replay response with fields requested in production."""
         project = self.create_project(teams=[self.team])
@@ -597,6 +627,9 @@ def test_get_replays_user_filters(self):
         self.store_replays(
             self.mock_event_links(seq1_timestamp, project.id, "debug", replay1_id, uuid.uuid4().hex)
         )
+        self.store_replays(
+            mock_replay_viewed(seq1_timestamp, project.id, replay1_id, viewed_by_id=1)
+        )
 
         with self.feature(REPLAYS_FEATURES):
             # Run all the queries individually to determine compliance.
@@ -670,6 +703,8 @@ def test_get_replays_user_filters(self):
                 "count_infos:2",
                 "count_infos:>1",
                 "count_infos:<3",
+                "viewed_by_id:1",
+                "!viewed_by_id:2",
             ]
 
             for query in queries:
@@ -718,6 +753,7 @@ def test_get_replays_user_filters(self):
                 "!c:*st",
                 "!activity:3",
                 "activity:<2",
+                "viewed_by_id:2",
             ]
             for query in null_queries:
                 response = self.client.get(self.url + f"?field=id&query={query}")
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,7 @@ @@
     Functions in this module coerce external types to internal types.  Else they die.
     """
     import uuid
     from sentry.replays.lib.new_query.errors import CouldNotParseValue
@@ Expand Down @@