Skip to content

Commit

Permalink
Review comments
Browse files Browse the repository at this point in the history
Primarily change to report leaf key nodes with None rather than an empty JSON
object at Varshini's request.

Revise the "left outer join" comment to clarify.
  • Loading branch information
dbutenhof committed Mar 15, 2023
1 parent dc72827 commit ba43de7
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 34 deletions.
44 changes: 24 additions & 20 deletions lib/pbench/server/api/resources/datasets_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,61 +294,65 @@ def filter_query(filters: list[str], query: Query) -> Query:

return query.filter(and_(*and_list))

def accumulate(self, keys: JSONOBJECT, key: str, value: Any):
def accumulate(self, aggregate: JSONOBJECT, key: str, value: Any):
"""Recursive helper to accumulate the metadata namespace
Iterate through a list of metadata key/value pairs to construct a
herarchical aggregation of all metadata keys across the selected
hierarchical aggregation of all metadata keys across the selected
datasets. Each key in the hierarchy is represented as a key in a
nested JSON object. "Leaf" keys are represented with empty JSON
values. E.g.,
nested JSON object. "Leaf" keys have the value None. E.g.,
{
"dataset": {"name": {}, "metalog": {"pbench": {"script": {}}}},
"server": {"deletion": {}, "tarball-path": {}},
"global": {"server": {"legacy": {"sha1": {}}}}
"dataset": {"name": None, "metalog": {"pbench": {"script": None}}},
"server": {"deletion": None, "tarball-path": None},
"global": {"server": {"legacy": {"sha1": None}}}
}
Args:
keys: a JSONOBJECT to update with the recursive key/value
aggregate: a JSONOBJECT to update with the recursive key/value
key: the current metadata key path element
value: the current metadata key's value
"""
if key in keys:
p = keys[key]
else:
p = {}
keys[key] = p
if isinstance(value, dict):
p = aggregate.get(key)
if p is None:
p = {}
aggregate[key] = p
for k, v in value.items():
self.accumulate(p, k, v)
elif key not in aggregate:
aggregate[key] = None

def keyspace(self, query: Query) -> JSONOBJECT:
"""Aggregate the dataset metadata keyspace
Run the query we've compiled, but instead of returning Dataset proxies,
we only want the metadata key/value pairs we've selected.
NOTE: In general we expect every dataset to have some metadata; however
in the event one doesn't, the key and value returned by the joined
query will be None. We handle this by ignoring those values.
NOTE: The SQL left outer join returns a row for each row in the "left"
table (Dataset) even if there is no matching foreign key in the "right"
table (Metadata). This means a dataset with no metadata will result in
a join row here with key and value of None. The `elif` in the loop will
silently ignore rows with a null key to handle this case.
Args:
query: The basic filtered SQLAlchemy query object
Returns:
The aggregated keyspace JSON object
"""
keys: JSONOBJECT = {"dataset": {c.name: {} for c in Dataset.__table__._columns}}
aggregate: JSONOBJECT = {
"dataset": {c.name: None for c in Dataset.__table__._columns}
}
list = query.with_entities(Metadata.key, Metadata.value).all()
for k, v in list:
# "metalog" is a top-level key in the Metadata schema, but we
# report it as a sub-key of "dataset".
if k == Metadata.METALOG:
self.accumulate(keys["dataset"], k, v)
self.accumulate(aggregate["dataset"], k, v)
elif k:
self.accumulate(keys, k, v)
return keys
self.accumulate(aggregate, k, v)
return aggregate

def datasets(self, request: Request, json: JSONOBJECT, query: Query) -> JSONOBJECT:
"""Gather and paginate the selected datasets
Expand Down
33 changes: 19 additions & 14 deletions lib/pbench/test/unit/server/test_datasets_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,25 +515,30 @@ def test_key_summary(self, query_as):
response = query_as({"keysummary": "true"}, "drb", HTTPStatus.OK)
assert response.json == {
"dataset": {
"access": {},
"id": {},
"access": None,
"id": None,
"metalog": {
"pbench": {"config": {}, "date": {}, "name": {}, "script": {}},
"run": {"controller": {}},
"pbench": {
"config": None,
"date": None,
"name": None,
"script": None,
},
"run": {"controller": None},
},
"name": {},
"owner_id": {},
"resource_id": {},
"uploaded": {},
"name": None,
"owner_id": None,
"resource_id": None,
"uploaded": None,
},
"global": {"contact": {}, "legacy": {"server": {}}},
"global": {"contact": None, "legacy": {"server": None}},
"server": {
"deletion": {},
"deletion": None,
"index-map": {
"unit-test.v5.result-data-sample.2020-08": {},
"unit-test.v6.run-data.2020-08": {},
"unit-test.v6.run-toc.2020-05": {},
"unit-test.v5.result-data-sample.2020-08": None,
"unit-test.v6.run-data.2020-08": None,
"unit-test.v6.run-toc.2020-05": None,
},
"origin": {},
"origin": None,
},
}

0 comments on commit ba43de7

Please sign in to comment.