Skip to content

Commit

Permalink
deeper json access
Browse files Browse the repository at this point in the history
  • Loading branch information
mariusandra committed Apr 4, 2023
1 parent 1dcf545 commit c12ca33
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 28 deletions.
8 changes: 4 additions & 4 deletions posthog/hogql/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,25 +223,25 @@ def get_child(self, name: str) -> Ref:
if database_field is None:
raise ValueError(f'Can not access property "{name}" on field "{self.name}".')
if isinstance(database_field, StringJSONDatabaseField):
return PropertyRef(name=name, parent=self)
return PropertyRef(chain=[name], parent=self)
raise ValueError(
f'Can not access property "{name}" on field "{self.name}" of type: {type(database_field).__name__}'
)


class PropertyRef(Ref):
name: str
chain: List[str]
parent: FieldRef

# The property has been moved into a field we query from a joined subquery
joined_subquery: Optional[SelectQueryAliasRef]
joined_subquery_field_name: Optional[str]

def get_child(self, name: str) -> "Ref":
raise NotImplementedError("JSON property traversal is not yet supported")
return PropertyRef(chain=self.chain + [name], parent=self.parent)

def has_child(self, name: str) -> bool:
return False
return True


class Alias(Expr):
Expand Down
37 changes: 24 additions & 13 deletions posthog/hogql/printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,14 +508,13 @@ def visit_property_ref(self, ref: ast.PropertyRef):
field_ref = ref.parent
field = field_ref.resolve_database_field()

key = f"hogql_val_{len(self.context.values)}"
self.context.values[key] = ref.name

# check for a materialised column
table = field_ref.table
while isinstance(table, ast.TableAliasRef):
table = table.table_ref

# find a materialized property for the first part of the chain
materialized_property_sql: Optional[str] = None
if isinstance(table, ast.TableRef):
if self.dialect == "clickhouse":
table_name = table.table.clickhouse_table()
Expand All @@ -525,30 +524,42 @@ def visit_property_ref(self, ref: ast.PropertyRef):
raise ValueError(f"Can't resolve field {field_ref.name} on table {table_name}")
field_name = cast(Union[Literal["properties"], Literal["person_properties"]], field.name)

materialized_column = self._get_materialized_column(table_name, ref.name, field_name)
materialized_column = self._get_materialized_column(table_name, ref.chain[0], field_name)
if materialized_column:
property_sql = self._print_identifier(materialized_column)
if not self.context.within_non_hogql_query:
property_sql = f"{self.visit(field_ref.table)}.{property_sql}"
return property_sql
else:
field_sql = self.visit(field_ref)
return trim_quotes_expr(f"JSONExtractRaw({field_sql}, %({key})s)")
materialized_property_sql = property_sql
elif (
self.context.within_non_hogql_query
and (isinstance(table, ast.SelectQueryAliasRef) and table.name == "events__pdi__person")
or (isinstance(table, ast.VirtualTableRef) and table.field == "poe")
):
# :KLUDGE: Legacy person properties handling. Only used within non-HogQL queries, such as insights.
if self.context.person_on_events_mode != PersonOnEventsMode.DISABLED:
materialized_column = self._get_materialized_column("events", ref.name, "person_properties")
materialized_column = self._get_materialized_column("events", ref.chain[0], "person_properties")
else:
materialized_column = self._get_materialized_column("person", ref.name, "properties")
materialized_column = self._get_materialized_column("person", ref.chain[0], "properties")
if materialized_column:
return self._print_identifier(materialized_column)
materialized_property_sql = self._print_identifier(materialized_column)

field_sql = self.visit(field_ref)
return trim_quotes_expr(f"JSONExtractRaw({field_sql}, %({key})s)")
if materialized_property_sql is not None:
if len(ref.chain) == 1:
return materialized_property_sql
else:
args = [materialized_property_sql]
for name in ref.chain[1:]:
key = f"hogql_val_{len(self.context.values)}"
self.context.values[key] = name
args.append(f"%({key})s")
return trim_quotes_expr(f"JSONExtractRaw({', '.join(args)})")

args = [self.visit(field_ref)]
for name in ref.chain:
key = f"hogql_val_{len(self.context.values)}"
self.context.values[key] = name
args.append(f"%({key})s")
return trim_quotes_expr(f"JSONExtractRaw({', '.join(args)})")

def visit_sample_expr(self, node: ast.SampleExpr):
sample_value = self.visit_ratio_expr(node.sample_value)
Expand Down
24 changes: 23 additions & 1 deletion posthog/hogql/test/test_printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,29 @@ def test_hogql_properties(self):
"properties.'no strings'", "mismatched input ''no strings'' expecting DECIMAL_LITERAL", "hogql"
)

def test_hogql_properties_json(self):
try:
from ee.clickhouse.materialized_columns.analyze import materialize
except:
# EE not available? Assume we're good
self.assertEqual(1 + 2, 3)
return

materialize("events", "withmat")
context = HogQLContext(team_id=self.team.pk)
self.assertEqual(
self._expr("properties.withmat.json.yet", context),
"replaceRegexpAll(JSONExtractRaw(events.mat_withmat, %(hogql_val_0)s, %(hogql_val_1)s), '^\"|\"$', '')",
)
self.assertEqual(context.values, {"hogql_val_0": "json", "hogql_val_1": "yet"})

context = HogQLContext(team_id=self.team.pk)
self.assertEqual(
self._expr("properties.nomat.json.yet", context),
"replaceRegexpAll(JSONExtractRaw(events.properties, %(hogql_val_0)s, %(hogql_val_1)s, %(hogql_val_2)s), '^\"|\"$', '')",
)
self.assertEqual(context.values, {"hogql_val_0": "nomat", "hogql_val_1": "json", "hogql_val_2": "yet"})

def test_materialized_fields_and_properties(self):
try:
from ee.clickhouse.materialized_columns.analyze import materialize
Expand Down Expand Up @@ -180,7 +203,6 @@ def test_expr_parse_errors(self):
"avg(avg(properties.bla))", "Aggregation 'avg' cannot be nested inside another aggregation 'avg'."
)
self._assert_expr_error("person.chipotle", "Field not found: chipotle")
self._assert_expr_error("properties.no.json.yet", "JSON property traversal is not yet supported")

def test_expr_syntax_errors(self):
self._assert_expr_error("(", "line 1, column 1: no viable alternative at input '('")
Expand Down
4 changes: 2 additions & 2 deletions posthog/hogql/transforms/lazy_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ def visit_select_query(self, node: ast.SelectQuery):
chain.append(table_ref.resolve_database_table().hogql_table())
chain.append(field.name)
if property is not None:
chain.append(property.name)
property.joined_subquery_field_name = f"{field.name}___{property.name}"
chain.extend(property.chain)
property.joined_subquery_field_name = f"{field.name}___{'___'.join(property.chain)}"
new_join.fields_accessed[property.joined_subquery_field_name] = ast.Field(chain=chain)
else:
new_join.fields_accessed[field.name] = ast.Field(chain=chain)
Expand Down
16 changes: 8 additions & 8 deletions posthog/hogql/transforms/property_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ def __init__(self):
self.found_timestamps = False

def visit_property_ref(self, node: ast.PropertyRef):
if node.parent.name == "properties":
if node.parent.name == "properties" and len(node.chain) == 1:
if isinstance(node.parent.table, ast.BaseTableRef):
table = node.parent.table.resolve_database_table().hogql_table()
if table == "persons":
self.person_properties.add(node.name)
self.person_properties.add(node.chain[0])
if table == "events":
self.event_properties.add(node.name)
self.event_properties.add(node.chain[0])

def visit_field(self, node: ast.Field):
super().visit_field(node)
Expand All @@ -83,15 +83,15 @@ def visit_field(self, node: ast.Field):
return ast.Call(name="toTimezone", args=[node, ast.Constant(value=self.timezone)])

ref = node.ref
if isinstance(ref, ast.PropertyRef) and ref.parent.name == "properties":
if isinstance(ref, ast.PropertyRef) and ref.parent.name == "properties" and len(ref.chain) == 1:
if isinstance(ref.parent.table, ast.BaseTableRef):
table = ref.parent.table.resolve_database_table().hogql_table()
if table == "persons":
if ref.name in self.person_properties:
return self._add_type_to_string_field(node, self.person_properties[ref.name])
if ref.chain[0] in self.person_properties:
return self._add_type_to_string_field(node, self.person_properties[ref.chain[0]])
if table == "events":
if ref.name in self.event_properties:
return self._add_type_to_string_field(node, self.event_properties[ref.name])
if ref.chain[0] in self.event_properties:
return self._add_type_to_string_field(node, self.event_properties[ref.chain[0]])

return node

Expand Down
16 changes: 16 additions & 0 deletions posthog/hogql/transforms/test/test_lazy_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,22 @@ def test_resolve_lazy_tables_one_level_properties(self):
)
self.assertEqual(printed, expected)

@override_settings(PERSON_ON_EVENTS_OVERRIDE=False)
def test_resolve_lazy_tables_one_level_properties_deep(self):
printed = self._print_select("select person.properties.$browser.in.json from person_distinct_ids")
expected = (
"SELECT person_distinct_ids__person.`properties___$browser___in___json` "
"FROM person_distinct_id2 INNER JOIN "
"(SELECT argMax(replaceRegexpAll(JSONExtractRaw(person.properties, %(hogql_val_0)s, %(hogql_val_1)s, %(hogql_val_2)s), '^\"|\"$', ''), person.version) "
"AS `properties___$browser___in___json`, person.id FROM person "
f"WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id "
"HAVING equals(argMax(person.is_deleted, person.version), 0)"
") AS person_distinct_ids__person ON equals(person_distinct_id2.person_id, person_distinct_ids__person.id) "
f"WHERE equals(person_distinct_id2.team_id, {self.team.pk}) "
"LIMIT 65535"
)
self.assertEqual(printed, expected)

def test_resolve_lazy_tables_two_levels_properties(self):
printed = self._print_select("select event, pdi.person.properties.$browser from events")
expected = (
Expand Down

0 comments on commit c12ca33

Please sign in to comment.