Skip to content

Commit

Permalink
Remove simplification-specific logic from queries
Browse files Browse the repository at this point in the history
  • Loading branch information
macobo committed Oct 4, 2021
1 parent 2a00c9f commit e507b54
Show file tree
Hide file tree
Showing 18 changed files with 25 additions and 101 deletions.
5 changes: 0 additions & 5 deletions ee/clickhouse/models/property.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ def parse_prop_clauses(
prepend: str = "global",
table_name: str = "",
allow_denormalized_props: bool = True,
filter_test_accounts=False,
is_person_query=False,
person_properties_column: Optional[str] = None,
) -> Tuple[str, Dict]:
Expand All @@ -34,10 +33,6 @@ def parse_prop_clauses(
if table_name != "":
table_name += "."

if filter_test_accounts:
test_account_filters = Team.objects.only("test_account_filters").get(id=team_id).test_account_filters
filters.extend([Property(**prop) for prop in test_account_filters])

for idx, prop in enumerate(filters):
if prop.type == "cohort":
try:
Expand Down
4 changes: 1 addition & 3 deletions ee/clickhouse/models/test/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@
def _filter_events(
filter: Filter, team: Team, person_query: Optional[bool] = False, order_by: Optional[str] = None,
):
prop_filters, prop_filter_params = parse_prop_clauses(
filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts
)
prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team.pk)
params = {"team_id": team.pk, **prop_filter_params}

if order_by == "id":
Expand Down
1 change: 0 additions & 1 deletion ee/clickhouse/queries/breakdown_props.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def get_breakdown_prop_values(
team_id,
table_name="e",
prepend="e_brkdwn",
filter_test_accounts=filter.filter_test_accounts,
person_properties_column="person_props",
allow_denormalized_props=True,
)
Expand Down
4 changes: 1 addition & 3 deletions ee/clickhouse/queries/clickhouse_retention.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,7 @@ def _retrieve_people(self, filter: RetentionFilter, team: Team):
period = filter.period
is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME
trunc_func = get_trunc_func_ch(period)
prop_filters, prop_filter_params = parse_prop_clauses(
filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts
)
prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team.pk)

returning_entity = filter.returning_entity if filter.selected_interval > 0 else filter.target_entity
target_query, target_params = self._get_condition(filter.target_entity, table="e")
Expand Down
8 changes: 2 additions & 6 deletions ee/clickhouse/queries/clickhouse_stickiness.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ class ClickhouseStickiness(Stickiness):
def stickiness(self, entity: Entity, filter: StickinessFilter, team_id: int) -> Dict[str, Any]:

parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team_id)
prop_filters, prop_filter_params = parse_prop_clauses(
filter.properties + entity.properties, team_id, filter_test_accounts=filter.filter_test_accounts,
)
prop_filters, prop_filter_params = parse_prop_clauses(filter.properties + entity.properties, team_id)
trunc_func = get_trunc_func_ch(filter.interval)

params: Dict = {"team_id": team_id}
Expand Down Expand Up @@ -92,9 +90,7 @@ def _format_entity_filter(entity: Entity) -> Tuple[str, Dict]:

def _process_content_sql(target_entity: Entity, filter: StickinessFilter, team: Team) -> Tuple[str, Dict[str, Any]]:
parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team.pk)
prop_filters, prop_filter_params = parse_prop_clauses(
filter.properties + target_entity.properties, team.pk, filter_test_accounts=filter.filter_test_accounts,
)
prop_filters, prop_filter_params = parse_prop_clauses(filter.properties + target_entity.properties)
entity_sql, entity_params = _format_entity_filter(entity=target_entity)
trunc_func = get_trunc_func_ch(filter.interval)

Expand Down
9 changes: 0 additions & 9 deletions ee/clickhouse/queries/column_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,6 @@ def should_query_elements_chain_column(self) -> bool:
if has_element_type_property(self.filter.properties):
return True

if self.filter.filter_test_accounts:
test_account_filters = Team.objects.only("test_account_filters").get(id=self.team_id).test_account_filters
properties = [Property(**prop) for prop in test_account_filters]
if has_element_type_property(properties):
return True

# Both entities and funnel exclusions can contain nested elements_chain inclusions
for entity in self.filter.entities + cast(List[Entity], self.filter.exclusions):
if has_element_type_property(entity.properties):
Expand All @@ -92,9 +86,6 @@ def properties_used_in_filter(self) -> Set[Tuple[PropertyName, PropertyType]]:
result: Set[Tuple[PropertyName, PropertyType]] = set()

result |= extract_tables_and_properties(self.filter.properties)
if self.filter.filter_test_accounts:
test_account_filters = Team.objects.only("test_account_filters").get(id=self.team_id).test_account_filters
result |= extract_tables_and_properties([Property(**prop) for prop in test_account_filters])

# Some breakdown types read properties
#
Expand Down
15 changes: 0 additions & 15 deletions ee/clickhouse/queries/event_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,6 @@ def _determine_should_join_persons(self) -> None:
self._should_join_persons = True
return

if self._filter.filter_test_accounts:
test_account_filters = Team.objects.only("test_account_filters").get(id=self._team_id).test_account_filters
test_filter_props = [Property(**prop) for prop in test_account_filters]
if any(self._should_property_join_persons(prop) for prop in test_filter_props):
self._should_join_distinct_ids = True
self._should_join_persons = True
return

def _should_property_join_persons(self, prop: Property) -> bool:
if prop.type == "person":
return True
Expand Down Expand Up @@ -152,18 +144,11 @@ def _get_date_filter(self) -> Tuple[str, Dict]:
return query, date_params

def _get_props(self, filters: List[Property]) -> Tuple[str, Dict]:

filter_test_accounts = self._filter.filter_test_accounts
team_id = self._team_id
prepend = "global"

final = []
params: Dict[str, Any] = {}

if filter_test_accounts:
test_account_filters = Team.objects.only("test_account_filters").get(id=team_id).test_account_filters
filters += [Property(**prop) for prop in test_account_filters]

for idx, prop in enumerate(filters):
if prop.type == "cohort":
person_id_query, cohort_filter_params = self._get_cohort_subquery(prop)
Expand Down
4 changes: 1 addition & 3 deletions ee/clickhouse/queries/sessions/average.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ def calculate_avg(self, filter: Filter, team: Team):

parsed_date_from, parsed_date_to, _ = parse_timestamps(filter, team.pk)

filters, params = parse_prop_clauses(
filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts
)
filters, params = parse_prop_clauses(filter.properties, team.pk)

trunc_func = get_trunc_func_ch(filter.interval)
interval_func = get_interval_func_ch(filter.interval)
Expand Down
4 changes: 1 addition & 3 deletions ee/clickhouse/queries/sessions/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@ def calculate_dist(self, filter: Filter, team: Team):

parsed_date_from, parsed_date_to, _ = parse_timestamps(filter, team.pk)

filters, params = parse_prop_clauses(
filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts
)
filters, params = parse_prop_clauses(filter.properties, team.pk)

entity_conditions, entity_params = entity_query_conditions(filter, team)
if not entity_conditions:
Expand Down
6 changes: 1 addition & 5 deletions ee/clickhouse/queries/trends/breakdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,7 @@ def get_query(self) -> Tuple[str, Dict, Callable]:

props_to_filter = [*self.filter.properties, *self.entity.properties]
prop_filters, prop_filter_params = parse_prop_clauses(
props_to_filter,
self.team_id,
table_name="e",
filter_test_accounts=self.filter.filter_test_accounts,
person_properties_column="person_props",
props_to_filter, self.team_id, table_name="e", person_properties_column="person_props",
)
aggregate_operation, _, math_params = process_math(self.entity)

Expand Down
8 changes: 2 additions & 6 deletions ee/clickhouse/queries/trends/lifecycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@ def _format_lifecycle_query(self, entity: Entity, filter: Filter, team_id: int)
event_params: Dict[str, Any] = {}

props_to_filter = [*filter.properties, *entity.properties]
prop_filters, prop_filter_params = parse_prop_clauses(
props_to_filter, team_id, filter_test_accounts=filter.filter_test_accounts
)
prop_filters, prop_filter_params = parse_prop_clauses(props_to_filter, team_id)

_, _, date_params = parse_timestamps(filter=filter, team_id=team_id)

Expand Down Expand Up @@ -140,9 +138,7 @@ def get_people(
event_params = {"event": entity.id}

props_to_filter = [*filter.properties, *entity.properties]
prop_filters, prop_filter_params = parse_prop_clauses(
props_to_filter, team_id, filter_test_accounts=filter.filter_test_accounts
)
prop_filters, prop_filter_params = parse_prop_clauses(props_to_filter, team_id)

result = sync_execute(
LIFECYCLE_PEOPLE_SQL.format(
Expand Down
2 changes: 1 addition & 1 deletion posthog/models/filters/mixins/simplify.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def simplify(self: T, team: "Team") -> T: # type: ignore
- if filter.filter_test_accounts, adds property filters to `filter.properties`
"""

result = self
result = self.with_data({"is_simplified": True})
if getattr(self, "filter_test_accounts", False):
result = result.with_data(
{"properties": result.properties + team.test_account_filters, "filter_test_accounts": False}
Expand Down
4 changes: 1 addition & 3 deletions posthog/models/filters/test/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,9 +426,7 @@ def _filter_events(filter: Filter, team: Team, person_query: Optional[bool] = Fa
if person_query:
events = events.add_person_id(team.pk)

events = events.filter(
properties_to_Q(filter.properties, team_id=team.pk, filter_test_accounts=filter.filter_test_accounts)
)
events = events.filter(properties_to_Q(filter.properties, team_id=team.pk))
events = events.filter(team_id=team.pk)
if order_by:
events = events.order_by(order_by)
Expand Down
12 changes: 3 additions & 9 deletions posthog/queries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,26 +115,20 @@ def filter_events(
filters &= Q(timestamp__gte=date_from)
if include_dates:
filters &= Q(timestamp__lte=filter.date_to + relativity)
if filter.properties or filter.filter_test_accounts:
filters &= properties_to_Q(filter.properties, team_id=team_id, filter_test_accounts=filter.filter_test_accounts)
if filter.properties:
filters &= properties_to_Q(filter.properties, team_id=team_id)
if entity and entity.properties:
filters &= properties_to_Q(entity.properties, team_id=team_id)
return filters


def properties_to_Q(
properties: List[Property], team_id: int, is_person_query: bool = False, filter_test_accounts: bool = False
) -> Q:
def properties_to_Q(properties: List[Property], team_id: int, is_person_query: bool = False) -> Q:
"""
Converts a filter to Q, for use in Django ORM .filter()
If you're filtering a Person QuerySet, use is_person_query to avoid doing an unnecessary nested loop
"""
filters = Q()

if filter_test_accounts:
test_account_filters = Team.objects.only("test_account_filters").get(id=team_id).test_account_filters
properties.extend([Property(**prop) for prop in test_account_filters])

if len(properties) == 0:
return filters

Expand Down
14 changes: 4 additions & 10 deletions posthog/queries/funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,7 @@ def _gen_lateral_bodies(self, within_time: Optional[str] = None):
else {}
),
)
.filter(
properties_to_Q(
self._filter.properties,
team_id=self._team.pk,
filter_test_accounts=self._filter.filter_test_accounts,
)
)
.filter(properties_to_Q(self._filter.properties, team_id=self._team.pk,))
.filter(properties_to_Q(step.properties, team_id=self._team.pk))
)
with connection.cursor() as cursor:
Expand Down Expand Up @@ -160,9 +154,9 @@ def _build_query(self, within_time: Optional[str] = None):
event_chain_query = sql.SQL(" ").join(lateral_joins).as_string(connection.connection)

query = f"""
SELECT
SELECT
DISTINCT ON (person.id)
person.uuid,
person.uuid,
person.created_at,
person.team_id,
person.properties,
Expand All @@ -171,7 +165,7 @@ def _build_query(self, within_time: Optional[str] = None):
FROM posthog_person person
JOIN posthog_persondistinctid pdi ON pdi.person_id = person.id
JOIN {event_chain_query}
-- join on person_id for the first event.
-- join on person_id for the first event.
-- NOTE: there is some implicit coupling here in that I am
-- assuming the name of the first event select is "step_0".
-- Maybe worth cleaning up in the future
Expand Down
6 changes: 1 addition & 5 deletions posthog/queries/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,7 @@ def calculate_paths(self, filter: PathFilter, team: Team):
if event is None
else Q()
)
.filter(
properties_to_Q(filter.properties, team_id=team.pk, filter_test_accounts=filter.filter_test_accounts)
if filter and (filter.properties or filter.filter_test_accounts)
else Q()
)
.filter(properties_to_Q(filter.properties, team_id=team.pk) if filter and filter.properties else Q())
.annotate(
previous_timestamp=Window(
expression=Lag("timestamp", default=None),
Expand Down
16 changes: 5 additions & 11 deletions posthog/queries/retention.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,7 @@ def _determine_query_params(self, filter: RetentionFilter, team: Team):
trunc, fields = self._get_trunc_func("timestamp", period)

if is_first_time_retention:
filtered_events = events.filter(
properties_to_Q(filter.properties, team_id=team.pk, filter_test_accounts=filter.filter_test_accounts)
)
filtered_events = events.filter(properties_to_Q(filter.properties, team_id=team.pk))
first_date = (
filtered_events.filter(entity_condition)
.values("person_id", "event", "action")
Expand All @@ -118,7 +116,7 @@ def _determine_query_params(self, filter: RetentionFilter, team: Team):
)
else:
filtered_events = events.filter(filter.date_filter_Q).filter(
properties_to_Q(filter.properties, team_id=team.pk, filter_test_accounts=filter.filter_test_accounts)
properties_to_Q(filter.properties, team_id=team.pk)
)
first_date = (
filtered_events.filter(entity_condition)
Expand Down Expand Up @@ -206,14 +204,12 @@ def _retrieve_people(self, filter: RetentionFilter, team: Team):
events = Event.objects.filter(team_id=team.pk).add_person_id(team.pk)

filtered_events = events.filter(filter.recurring_date_filter_Q()).filter(
properties_to_Q(filter.properties, team_id=team.pk, filter_test_accounts=filter.filter_test_accounts)
properties_to_Q(filter.properties, team_id=team.pk)
)

inner_events = (
Event.objects.filter(team_id=team.pk)
.filter(
properties_to_Q(filter.properties, team_id=team.pk, filter_test_accounts=filter.filter_test_accounts)
)
.filter(properties_to_Q(filter.properties, team_id=team.pk))
.add_person_id(team.pk)
.filter(**{"person_id": OuterRef("id")})
.filter(entity_condition)
Expand All @@ -224,9 +220,7 @@ def _retrieve_people(self, filter: RetentionFilter, team: Team):
if is_first_time_retention
else Event.objects.filter(team_id=team.pk)
.filter(filter.reference_date_filter_Q())
.filter(
properties_to_Q(filter.properties, team_id=team.pk, filter_test_accounts=filter.filter_test_accounts)
)
.filter(properties_to_Q(filter.properties, team_id=team.pk))
.add_person_id(team.pk)
.filter(**{"person_id": OuterRef("id")})
.filter(entity_condition)
Expand Down
4 changes: 1 addition & 3 deletions posthog/queries/sessions/sessions.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,7 @@ def events_query(self, filter: Filter, team: Team) -> QuerySet:
Event.objects.filter(team=team)
.add_person_id(team.pk)
.filter(~Q(event="$feature_flag_called"))
.filter(
properties_to_Q(filter.properties, team_id=team.pk, filter_test_accounts=filter.filter_test_accounts)
)
.filter(properties_to_Q(filter.properties, team_id=team.pk))
.order_by("-timestamp")
)

Expand Down

0 comments on commit e507b54

Please sign in to comment.