-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(experiments): Initial data warehouse Trend support #26356
Changes from 29 commits
57ca26b
3fb6f4e
e4ff1e9
4cd89e4
3f9f2de
e4566be
33d0149
dcd47d4
31606f1
927a3c2
23cf381
7d7b3e8
fddffa8
a35d509
f7c042e
22d7652
b3fbff1
2e737af
15edbc2
0df2168
bf607d1
dff03ea
7f273c6
79fc1a2
310b03f
8330c53
3163cb6
27416ac
6b3b721
a07bb2a
f131df9
c6d70fc
e435442
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,9 @@ | |
from django.conf import settings | ||
from posthog.constants import ExperimentNoResultsErrorKeys | ||
from posthog.hogql import ast | ||
from posthog.hogql.context import HogQLContext | ||
from posthog.hogql.database.database import create_hogql_database | ||
from posthog.hogql.database.models import LazyJoin | ||
from posthog.hogql_queries.experiments import CONTROL_VARIANT_KEY | ||
from posthog.hogql_queries.experiments.trends_statistics import ( | ||
are_results_significant, | ||
|
@@ -19,6 +22,8 @@ | |
BreakdownFilter, | ||
CachedExperimentTrendsQueryResponse, | ||
ChartDisplayType, | ||
DataWarehouseNode, | ||
DataWarehousePropertyFilter, | ||
EventPropertyFilter, | ||
EventsNode, | ||
ExperimentSignificanceCode, | ||
|
@@ -27,11 +32,12 @@ | |
ExperimentVariantTrendsBaseStats, | ||
InsightDateRange, | ||
PropertyMathType, | ||
PropertyOperator, | ||
TrendsFilter, | ||
TrendsQuery, | ||
TrendsQueryResponse, | ||
) | ||
from typing import Any, Optional | ||
from typing import Any, Optional, cast | ||
import threading | ||
|
||
|
||
|
@@ -89,12 +95,18 @@ def _get_insight_date_range(self) -> InsightDateRange: | |
explicitDate=True, | ||
) | ||
|
||
def _get_breakdown_filter(self) -> BreakdownFilter: | ||
def _get_event_breakdown_filter(self) -> BreakdownFilter: | ||
return BreakdownFilter( | ||
breakdown=self.breakdown_key, | ||
breakdown_type="event", | ||
) | ||
|
||
def _get_data_warehouse_breakdown_filter(self) -> BreakdownFilter: | ||
return BreakdownFilter( | ||
breakdown=f"events.properties.{self.breakdown_key}", | ||
breakdown_type="data_warehouse", | ||
) | ||
|
||
def _prepare_count_query(self) -> TrendsQuery: | ||
""" | ||
This method takes the raw trend query and adapts it | ||
|
@@ -118,15 +130,32 @@ def _prepare_count_query(self) -> TrendsQuery: | |
|
||
prepared_count_query.trendsFilter = TrendsFilter(display=ChartDisplayType.ACTIONS_LINE_GRAPH_CUMULATIVE) | ||
prepared_count_query.dateRange = self._get_insight_date_range() | ||
prepared_count_query.breakdownFilter = self._get_breakdown_filter() | ||
prepared_count_query.properties = [ | ||
EventPropertyFilter( | ||
key=self.breakdown_key, | ||
value=self.variants, | ||
operator="exact", | ||
type="event", | ||
) | ||
] | ||
if self._is_data_warehouse_query(prepared_count_query): | ||
prepared_count_query.breakdownFilter = self._get_data_warehouse_breakdown_filter() | ||
prepared_count_query.properties = [ | ||
DataWarehousePropertyFilter( | ||
key="events.event", | ||
value="$feature_flag_called", | ||
operator=PropertyOperator.EXACT, | ||
type="data_warehouse", | ||
), | ||
DataWarehousePropertyFilter( | ||
key=f"events.properties.{self.breakdown_key}", | ||
value=self.variants, | ||
operator=PropertyOperator.EXACT, | ||
type="data_warehouse", | ||
), | ||
] | ||
else: | ||
prepared_count_query.breakdownFilter = self._get_event_breakdown_filter() | ||
prepared_count_query.properties = [ | ||
EventPropertyFilter( | ||
key=self.breakdown_key, | ||
value=self.variants, | ||
operator=PropertyOperator.EXACT, | ||
type="event", | ||
) | ||
] | ||
|
||
return prepared_count_query | ||
|
||
|
@@ -152,7 +181,7 @@ def _prepare_exposure_query(self) -> TrendsQuery: | |
|
||
if hasattr(count_event, "event"): | ||
prepared_exposure_query.dateRange = self._get_insight_date_range() | ||
prepared_exposure_query.breakdownFilter = self._get_breakdown_filter() | ||
prepared_exposure_query.breakdownFilter = self._get_event_breakdown_filter() | ||
prepared_exposure_query.trendsFilter = TrendsFilter( | ||
display=ChartDisplayType.ACTIONS_LINE_GRAPH_CUMULATIVE | ||
) | ||
|
@@ -166,7 +195,7 @@ def _prepare_exposure_query(self) -> TrendsQuery: | |
EventPropertyFilter( | ||
key=self.breakdown_key, | ||
value=self.variants, | ||
operator="exact", | ||
operator=PropertyOperator.EXACT, | ||
type="event", | ||
) | ||
] | ||
|
@@ -177,13 +206,13 @@ def _prepare_exposure_query(self) -> TrendsQuery: | |
elif self.query.exposure_query: | ||
prepared_exposure_query = TrendsQuery(**self.query.exposure_query.model_dump()) | ||
prepared_exposure_query.dateRange = self._get_insight_date_range() | ||
prepared_exposure_query.breakdownFilter = self._get_breakdown_filter() | ||
prepared_exposure_query.trendsFilter = TrendsFilter(display=ChartDisplayType.ACTIONS_LINE_GRAPH_CUMULATIVE) | ||
prepared_exposure_query.breakdownFilter = self._get_event_breakdown_filter() | ||
prepared_exposure_query.properties = [ | ||
EventPropertyFilter( | ||
key=self.breakdown_key, | ||
value=self.variants, | ||
operator="exact", | ||
operator=PropertyOperator.EXACT, | ||
type="event", | ||
) | ||
] | ||
|
@@ -206,13 +235,13 @@ def _prepare_exposure_query(self) -> TrendsQuery: | |
EventPropertyFilter( | ||
key="$feature_flag_response", | ||
value=self.variants, | ||
operator="exact", | ||
operator=PropertyOperator.EXACT, | ||
type="event", | ||
), | ||
EventPropertyFilter( | ||
key="$feature_flag", | ||
value=[self.feature_flag.key], | ||
operator="exact", | ||
operator=PropertyOperator.EXACT, | ||
type="event", | ||
), | ||
], | ||
|
@@ -226,7 +255,63 @@ def calculate(self) -> ExperimentTrendsQueryResponse: | |
|
||
def run(query_runner: TrendsQueryRunner, result_key: str, is_parallel: bool): | ||
try: | ||
result = query_runner.calculate() | ||
database = create_hogql_database(team_id=self.team.pk) | ||
if self._is_data_warehouse_query(query_runner.query): | ||
series_node = cast(DataWarehouseNode, query_runner.query.series[0]) | ||
table = database.get_table(series_node.table_name) | ||
table.fields["events"] = LazyJoin( | ||
from_field=[series_node.distinct_id_field], | ||
join_table=database.get_table("events"), | ||
join_function=lambda join_to_add, context, node: ( | ||
ast.JoinExpr( | ||
table=ast.SelectQuery( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is currently joining over all events, but we should join only over the In the future we should also support custom exposure events here, but for now feel free to hardcode There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch, handled with f131df9 |
||
select=[ | ||
ast.Alias(alias=name, expr=ast.Field(chain=["events", *chain])) | ||
for name, chain in { | ||
**join_to_add.fields_accessed, | ||
"timestamp": ["timestamp"], | ||
"distinct_id": ["distinct_id"], | ||
"properties": ["properties"], | ||
}.items() | ||
], | ||
select_from=ast.JoinExpr(table=ast.Field(chain=["events"])), | ||
), | ||
join_type="ASOF LEFT JOIN", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd love to have a comment here explaining what we're doing, especially because some of how the ASOF JOIN works is only explicit in the Clickhouse docs:
(feel free to adjust) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good suggestion, added the |
||
alias=join_to_add.to_table, | ||
constraint=ast.JoinConstraint( | ||
expr=ast.And( | ||
exprs=[ | ||
ast.CompareOperation( | ||
left=ast.Field( | ||
chain=[ | ||
join_to_add.from_table, | ||
series_node.distinct_id_field, | ||
] | ||
), | ||
op=ast.CompareOperationOp.Eq, | ||
right=ast.Field(chain=[join_to_add.to_table, "distinct_id"]), | ||
), | ||
ast.CompareOperation( | ||
left=ast.Field( | ||
chain=[ | ||
join_to_add.from_table, | ||
series_node.timestamp_field, | ||
] | ||
), | ||
op=ast.CompareOperationOp.GtEq, | ||
right=ast.Field(chain=[join_to_add.to_table, "timestamp"]), | ||
), | ||
] | ||
), | ||
constraint_type="ON", | ||
), | ||
) | ||
), | ||
) | ||
|
||
context = HogQLContext(team_id=self.team.pk, database=database) | ||
|
||
result = query_runner.calculate(context=context) | ||
shared_results[result_key] = result | ||
except Exception as e: | ||
errors.append(e) | ||
|
@@ -362,5 +447,8 @@ def _validate_event_variants(self, count_result: TrendsQueryResponse): | |
if has_errors: | ||
raise ValidationError(detail=json.dumps(errors)) | ||
|
||
def _is_data_warehouse_query(self, query: TrendsQuery) -> bool: | ||
return isinstance(query.series[0], DataWarehouseNode) | ||
|
||
def to_query(self) -> ast.SelectQuery: | ||
raise ValueError(f"Cannot convert source query of type {self.query.count_query.kind} to query") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just want to confirm my understanding: the purpose of this is to create a
Database
instance, which is an object that lets you run HogQL queries across various data sources, like person-events or data warehouse tables.We do this because we need to build a custom context with our own
Database
, including our own table where we define the join in a custom way. Then, we pass this custom context to the query runner.Please check if the above is correct and leave a comment in the code :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is correct :) Left a comment in a07bb2a