Skip to content

Commit

Permalink
cr fixes #6
Browse files Browse the repository at this point in the history
  • Loading branch information
sdafni committed Nov 23, 2023
1 parent 906ffd6 commit 40873dc
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 16 deletions.
2 changes: 1 addition & 1 deletion dagshub/data_engine/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,6 @@ def _from_gql_result(repo: str, dataset_result: "DatasetResult") -> "Datasource"

query_dict = json.loads(dataset_result.datasetQuery)

Datasource.deserialize_gql_result(ds, query_dict)
ds._deserialize_gql_result(query_dict)

return ds
9 changes: 5 additions & 4 deletions dagshub/data_engine/model/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class Field:

@property
def as_of_timestamp(self) -> Optional[int]:
if self.as_of_time:
if self.as_of_time is not None:
if isinstance(self.as_of_time, datetime.datetime):
return int(self.as_of_time.timestamp())
else:
Expand Down Expand Up @@ -129,7 +129,7 @@ def serialize_gql_query_input(self):
result["select"] = self._select
return result

def deserialize_gql_result(self, query_dict):
def _deserialize_gql_result(self, query_dict):
if "query" in query_dict:
self._query = DatasourceQuery.deserialize(query_dict["query"])
self._select = query_dict.get("select")
Expand Down Expand Up @@ -157,14 +157,15 @@ def all(self) -> "QueryResult":
self._check_preprocess()
return self._source.client.get_datapoints(self)

def select(self, *selected: Field):
def select(self, *selected: Union[str, Field]):
"""
Choose which columns will appear on the query result, what their names will be (alias) and from what time.
example:
t = int((datetime.datetime.now()-datetime.timedelta(hours=24)).timestamp())
q1 = (ds["episode"] > 5).select(Field("episode", as_of_time=t, alias="episode_asof_t"), Field("size"))
"""
self._select = [s.to_dict(self) for s in selected]

self._select = [s.to_dict(self) if isinstance(s, Field) else {"name": s} for s in selected]

return self

Expand Down
38 changes: 27 additions & 11 deletions tests/data_engine/test_querying.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,23 @@ def test_simple_filter(ds):
assert q.to_dict() == expected


def test_versioning_query_ts_format(ds):
def test_versioning_query_datetime(ds):
add_int_fields(ds, "x")

# timestamp
ds2 = ds[ds[Field("x", as_of_time=1700604000)] > 1]
q = ds2.get_query()
assert q.to_dict() == {'gt': {'data': {'as_of': 1700604000, 'field': 'x', 'value': 1}}}

# datetime
ds2 = ds[ds[Field("x", as_of_time=dateutil.parser.parse("Wed 22 Nov 2023"))] > 1]
q = ds2.get_query()
assert q.to_dict() == {'gt': {'data': {'as_of': int(dateutil.parser.parse("Wed 22 Nov 2023").timestamp()),
'field': 'x', 'value': 1}}}


def test_versioning_query_timestamp(ds):
add_int_fields(ds, "x")
# timestamp
ds2 = ds[ds[Field("x", as_of_time=1700604000)] > 1]
q = ds2.get_query()
assert q.to_dict() == {'gt': {'data': {'as_of': 1700604000, 'field': 'x', 'value': 1}}}


def test_versioning_select(ds):
add_int_fields(ds, "x")
add_int_fields(ds, "y")
Expand All @@ -69,9 +71,23 @@ def test_versioning_select(ds):
'select': [{'name': 'y', 'asOf': 123}, {'name': 'x', 'asOf': 456, 'alias': 'y_t1'}]}
assert ds2.serialize_gql_query_input() == expected_serialized

# test select non-exising
with pytest.raises(FieldNotFoundError):
_ = ds[ds[Field("x", as_of_time=123.99)] > 1].select(Field("x_not_there"))

def test_versioning_select_as_strings(ds):
add_int_fields(ds, "x")
add_int_fields(ds, "y")
add_int_fields(ds, "z")

ds2 = (ds[ds["x"] > 1]).select("y", "z")
print(ds2.serialize_gql_query_input())
assert ds2.serialize_gql_query_input() == {'query': {'filter': {'key': 'x', 'value': '1', 'valueType': 'INTEGER', 'comparator': 'GREATER_THAN'}}, 'select': [{'name': 'y'}, {'name': 'z'}]}

Check failure on line 82 in tests/data_engine/test_querying.py

View workflow job for this annotation

GitHub Actions / Flake8

tests/data_engine/test_querying.py#L82

Line too long (192 > 120 characters) (E501)

Check failure on line 82 in tests/data_engine/test_querying.py

View workflow job for this annotation

GitHub Actions / Flake8

tests/data_engine/test_querying.py#L82

Multiple spaces after ',' (E241)

ds2 = (ds[ds["x"] > 1]).select("y", Field("x"), "z")
print(ds2.serialize_gql_query_input())
assert ds2.serialize_gql_query_input() == {'query': {'filter': {'key': 'x', 'value': '1', 'valueType': 'INTEGER', 'comparator': 'GREATER_THAN'}}, 'select': [{'name': 'y'}, {'name': 'x'}, {'name': 'z'}]}

Check failure on line 86 in tests/data_engine/test_querying.py

View workflow job for this annotation

GitHub Actions / Flake8

tests/data_engine/test_querying.py#L86

Line too long (206 > 120 characters) (E501)

ds2 = (ds[ds["x"] > 1]).select("y", Field("x", as_of_time=1234), "z")
print(ds2.serialize_gql_query_input())
assert ds2.serialize_gql_query_input() == {'query': {'filter': {'key': 'x', 'value': '1', 'valueType': 'INTEGER', 'comparator': 'GREATER_THAN'}}, 'select': [{'name': 'y'}, {'name': 'x','asOf': 1234 }, {'name': 'z'}]}

Check failure on line 90 in tests/data_engine/test_querying.py

View workflow job for this annotation

GitHub Actions / Flake8

tests/data_engine/test_querying.py#L90

Line too long (220 > 120 characters) (E501)

Check failure on line 90 in tests/data_engine/test_querying.py

View workflow job for this annotation

GitHub Actions / Flake8

tests/data_engine/test_querying.py#L90

Missing whitespace after ',' (E231)

Check failure on line 90 in tests/data_engine/test_querying.py

View workflow job for this annotation

GitHub Actions / Flake8

tests/data_engine/test_querying.py#L90

Whitespace before '}' (E202)


def test_versioning_dataset_deserialize(ds):
Expand All @@ -80,7 +96,7 @@ def test_versioning_dataset_deserialize(ds):
'query': {'filter': {'key': 'x', 'value': 'dogs', 'valueType': 'STRING', 'comparator': 'EQUAL',
'asOf': 1700651563}}}

Datasource.deserialize_gql_result(ds, query)
ds._deserialize_gql_result(query)
assert ds._select == [{'name': 'x', 'asOf': 1700651566}, {'name': 'y', 'alias': 'y_t1', 'asOf': 1700651563}]


Expand Down

0 comments on commit 40873dc

Please sign in to comment.