Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(hogql): add readonly=1 and max_execution_time=60 on every query #14870

Merged
merged 2 commits into from
Mar 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 54 additions & 27 deletions posthog/api/test/__snapshots__/test_query.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
WHERE and(equals(events.team_id, 66), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_event_property_filter.1
Expand All @@ -25,7 +26,8 @@
WHERE and(equals(events.team_id, 66), equals(replaceRegexpAll(JSONExtractRaw(events.properties, 'key'), '^"|"$', ''), 'test_val3'), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_event_property_filter.2
Expand All @@ -40,7 +42,8 @@
WHERE and(equals(events.team_id, 66), ilike(replaceRegexpAll(JSONExtractRaw(events.properties, 'path'), '^"|"$', ''), '%/%'), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_event_property_filter_materialized
Expand All @@ -55,7 +58,8 @@
WHERE and(equals(events.team_id, 67), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_event_property_filter_materialized.1
Expand All @@ -70,7 +74,8 @@
WHERE and(equals(events.team_id, 67), equals(events.mat_key, 'test_val3'), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_event_property_filter_materialized.2
Expand All @@ -85,7 +90,8 @@
WHERE and(equals(events.team_id, 67), ilike(events.mat_path, '%/%'), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_full_hogql_query
Expand All @@ -97,7 +103,8 @@
FROM events
WHERE equals(events.team_id, 68)
ORDER BY events.timestamp ASC
LIMIT 100
LIMIT 100 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_full_hogql_query_materialized
Expand All @@ -109,7 +116,8 @@
FROM events
WHERE equals(events.team_id, 69)
ORDER BY events.timestamp ASC
LIMIT 100
LIMIT 100 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_hogql_property_filter
Expand All @@ -124,7 +132,8 @@
WHERE and(equals(events.team_id, 70), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_hogql_property_filter.1
Expand All @@ -139,7 +148,8 @@
WHERE and(equals(events.team_id, 70), equals('a%sd', 'foo'), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_hogql_property_filter.2
Expand All @@ -154,7 +164,8 @@
WHERE and(equals(events.team_id, 70), equals('a%sd', 'a%sd'), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_hogql_property_filter.3
Expand All @@ -169,7 +180,8 @@
WHERE and(equals(events.team_id, 70), equals(replaceRegexpAll(JSONExtractRaw(events.properties, 'key'), '^"|"$', ''), 'test_val2'), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_hogql_property_filter_materialized
Expand All @@ -184,7 +196,8 @@
WHERE and(equals(events.team_id, 71), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_hogql_property_filter_materialized.1
Expand All @@ -199,7 +212,8 @@
WHERE and(equals(events.team_id, 71), equals('a%sd', 'foo'), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_hogql_property_filter_materialized.2
Expand All @@ -214,7 +228,8 @@
WHERE and(equals(events.team_id, 71), equals('a%sd', 'a%sd'), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_hogql_property_filter_materialized.3
Expand All @@ -229,7 +244,8 @@
WHERE and(equals(events.team_id, 71), equals(events.mat_key, 'test_val2'), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_person_property_filter
Expand Down Expand Up @@ -258,7 +274,8 @@
WHERE and(equals(events.team_id, 75), equals(events__pdi__person.properties___email, '[email protected]'), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_person_property_filter_materialized
Expand Down Expand Up @@ -287,7 +304,8 @@
WHERE and(equals(events.team_id, 76), equals(events__pdi__person.properties___email, '[email protected]'), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_property_filter_aggregations
Expand All @@ -300,7 +318,8 @@
GROUP BY replaceRegexpAll(JSONExtractRaw(events.properties, 'key'), '^"|"$', '')
ORDER BY count() DESC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_property_filter_aggregations.1
Expand All @@ -314,7 +333,8 @@
HAVING and(greater(count(), 1))
ORDER BY count() DESC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_property_filter_aggregations_materialized
Expand All @@ -327,7 +347,8 @@
GROUP BY events.mat_key
ORDER BY count() DESC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_property_filter_aggregations_materialized.1
Expand All @@ -341,7 +362,8 @@
HAVING and(greater(count(), 1))
ORDER BY count() DESC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_select_event_person
Expand All @@ -354,7 +376,8 @@
WHERE and(equals(events.team_id, 79), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_select_hogql_expressions
Expand All @@ -368,7 +391,8 @@
WHERE and(equals(events.team_id, 80), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY replaceRegexpAll(JSONExtractRaw(events.properties, 'key'), '^"|"$', '') ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_select_hogql_expressions.1
Expand All @@ -380,7 +404,8 @@
WHERE and(equals(events.team_id, 80), less(events.timestamp, '2020-01-10 12:14:05.000000'), greater(events.timestamp, '2020-01-09 12:00:00.000000'))
ORDER BY tuple(events.uuid, events.event, events.properties, events.timestamp, events.team_id, events.distinct_id, events.elements_chain, events.created_at) ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_select_hogql_expressions.2
Expand All @@ -393,7 +418,8 @@
GROUP BY events.event
ORDER BY count() DESC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
# name: TestQuery.test_select_hogql_expressions.3
Expand All @@ -406,6 +432,7 @@
GROUP BY events.event
ORDER BY count() DESC, events.event ASC
LIMIT 101
OFFSET 0
OFFSET 0 SETTINGS readonly=1,
max_execution_time=60
'
---
12 changes: 12 additions & 0 deletions posthog/hogql/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# HogQL -> ClickHouse allowed transformations
from typing import Optional

from pydantic import BaseModel, Extra

CLICKHOUSE_FUNCTIONS = {
# arithmetic
"abs": "abs",
Expand Down Expand Up @@ -103,3 +107,11 @@
# Never return more rows than this in top level HogQL SELECT statements
DEFAULT_RETURNED_ROWS = 100
MAX_SELECT_RETURNED_ROWS = 65535

# Settings applied on top of all HogQL queries.
class HogQLSettings(BaseModel):
class Config:
extra = Extra.forbid

readonly: Optional[int] = 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to be able to remove readonly? Should this be constant 1 forcing us to change code if we wanted to let HogQL do non-selects?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we can change it then :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is allowed

import dataclasses
from typing import Optional


@dataclasses.dataclass
class Thing:
    optional_value: Optional[int] = 1


x = Thing()
y = Thing(optional_value=None)

print(x.optional_value)
print(y.optional_value)

this prints

1
None

So, if we can accidentally or someone can maliciously get us to create settings with HogQLSettings(readonly=None) then we don't have readonly set on the query.

If we're saying that HogQL is readonly a constant here feels safer

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, but none of the settings come from users --> we set them ourselves, and it's hardcoded in. I opted out of using feature flags to set these settings just for this reason. Now, if we want, we could, e.g. override some of them for some longer queries, e.g. setting max execution time to 5min for exports.

Theoretically we can also override readonly, but 1) nothing does that now, 2) users will not be able to set these settings... at least not directly, 3) it's still a select query so it's anyway an extra safety measure.

max_execution_time: Optional[int] = 60
38 changes: 32 additions & 6 deletions posthog/hogql/printer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import re
from dataclasses import dataclass
from typing import List, Literal, Optional, Union, cast

from clickhouse_driver.util.escape import escape_param

from ee.clickhouse.materialized_columns.columns import TablesWithMaterializedColumns, get_materialized_columns
from posthog.hogql import ast
from posthog.hogql.constants import CLICKHOUSE_FUNCTIONS, HOGQL_AGGREGATIONS, MAX_SELECT_RETURNED_ROWS
from posthog.hogql.constants import CLICKHOUSE_FUNCTIONS, HOGQL_AGGREGATIONS, MAX_SELECT_RETURNED_ROWS, HogQLSettings
from posthog.hogql.context import HogQLContext
from posthog.hogql.database import Table, create_hogql_database
from posthog.hogql.print_string import print_clickhouse_identifier, print_hogql_identifier
Expand Down Expand Up @@ -33,9 +36,10 @@ def print_ast(
context: HogQLContext,
dialect: Literal["hogql", "clickhouse"],
stack: Optional[List[ast.SelectQuery]] = None,
settings: Optional[HogQLSettings] = None,
) -> str:
prepared_ast = prepare_ast_for_printing(node=node, context=context, dialect=dialect, stack=stack)
return print_prepared_ast(node=prepared_ast, context=context, dialect=dialect, stack=stack)
return print_prepared_ast(node=prepared_ast, context=context, dialect=dialect, stack=stack, settings=settings)


def prepare_ast_for_printing(
Expand Down Expand Up @@ -64,9 +68,10 @@ def print_prepared_ast(
context: HogQLContext,
dialect: Literal["hogql", "clickhouse"],
stack: Optional[List[ast.SelectQuery]] = None,
settings: Optional[HogQLSettings] = None,
) -> str:
# _Printer also adds a team_id guard if printing clickhouse
return _Printer(context=context, dialect=dialect, stack=stack or []).visit(node)
return _Printer(context=context, dialect=dialect, stack=stack or [], settings=settings).visit(node)


@dataclass
Expand All @@ -79,17 +84,38 @@ class _Printer(Visitor):
# NOTE: Call "print_ast()", not this class directly.

def __init__(
self, context: HogQLContext, dialect: Literal["hogql", "clickhouse"], stack: Optional[List[ast.AST]] = None
self,
context: HogQLContext,
dialect: Literal["hogql", "clickhouse"],
stack: Optional[List[ast.AST]] = None,
settings: Optional[HogQLSettings] = None,
):
self.context = context
self.dialect = dialect
# Keep track of all traversed nodes.
self.stack: List[ast.AST] = stack or []
self.stack: List[ast.AST] = stack or [] # Keep track of all traversed nodes.
self.settings = settings

def visit(self, node: ast.AST):
self.stack.append(node)
response = super().visit(node)
self.stack.pop()

if len(self.stack) == 0 and self.dialect == "clickhouse" and self.settings:
if not isinstance(node, ast.SelectQuery) and not isinstance(node, ast.SelectUnionQuery):
raise ValueError("Settings can only be applied to SELECT queries")
settings = []
for key, value in self.settings:
if not isinstance(value, (int, float, str)):
raise ValueError(f"Setting {key} must be a string, int, or float")
if not re.match(r"^[a-zA-Z0-9_]+$", key):
raise ValueError(f"Setting {key} is not supported")
if isinstance(value, int) or isinstance(value, float):
settings.append(f"{key}={value}")
else:
settings.append(f"{key}={escape_param(value)}")
if len(settings) > 0:
response += f" SETTINGS {', '.join(settings)}"

return response

def visit_select_union_query(self, node: ast.SelectUnionQuery):
Expand Down
7 changes: 5 additions & 2 deletions posthog/hogql/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from posthog.clickhouse.client.connection import Workload
from posthog.hogql import ast
from posthog.hogql.constants import DEFAULT_RETURNED_ROWS
from posthog.hogql.constants import DEFAULT_RETURNED_ROWS, HogQLSettings
from posthog.hogql.hogql import HogQLContext
from posthog.hogql.parser import parse_select
from posthog.hogql.placeholders import assert_no_placeholders, replace_placeholders
Expand Down Expand Up @@ -32,6 +32,7 @@ def execute_hogql_query(
query_type: str = "hogql_query",
placeholders: Optional[Dict[str, ast.Expr]] = None,
workload: Workload = Workload.ONLINE,
settings: Optional[HogQLSettings] = None,
) -> HogQLQueryResponse:
if isinstance(query, ast.SelectQuery):
select_query = query
Expand Down Expand Up @@ -69,7 +70,9 @@ def execute_hogql_query(
clickhouse_context = HogQLContext(
team_id=team.pk, enable_select_queries=True, person_on_events_mode=team.person_on_events_mode
)
clickhouse = print_ast(select_query, clickhouse_context, "clickhouse")
clickhouse = print_ast(
select_query, context=clickhouse_context, dialect="clickhouse", settings=settings or HogQLSettings()
)

results, types = insight_sync_execute(
clickhouse,
Expand Down
Loading