diff --git a/docs/getting_started/setting_up_an_api.md b/docs/getting_started/setting_up_an_api.md index 32f69c0e6..7efaa13fc 100644 --- a/docs/getting_started/setting_up_an_api.md +++ b/docs/getting_started/setting_up_an_api.md @@ -37,6 +37,7 @@ Instead, if you are storing chemical formulae as an unreduced count per simulati This would then instead require option 2 above, namely either the addition of auxiliary fields that store the correct (or mappable) OPTIMADE format in the database, or the creation of a secondary database that returns the pre-converted structures. In the simplest case, the mapper classes can be used to define aliases between fields in the database and the OPTIMADE field name; these can be configured via the [`aliases`][optimade.server.config.ServerConfig.aliases] option as a dictionary mapping stored in a dictionary under the appropriate endpoint name, e.g. `"aliases": {"structures": {"chemical_formula_reduced": "my_chem_form"}}`, or defined as part of a custom mapper class. +If the alias is a nested field (i.e., a field within a dictionary), the field names should be separated by `"."`, for example: `"aliases": { "structures": {"chemical_formula_reduced": "formulae.reduced"}}`. In either option, you should now be able to insert your data into the corresponding MongoDB (or otherwise) collection. diff --git a/optimade/models/jsonapi.py b/optimade/models/jsonapi.py index 23e0db241..3c2cb5307 100644 --- a/optimade/models/jsonapi.py +++ b/optimade/models/jsonapi.py @@ -298,6 +298,19 @@ def check_illegal_attributes_fields(cls, values): ) return values + @root_validator(pre=True) + def set_missing_to_none(cls, values): + if "set_missing_to_none" in values and values.pop("set_missing_to_none"): + for field in cls.schema()["required"]: + if field not in values: + if ( + field == "structure_features" + ): # It would be nice if there would be a more universal way to handle special cases like this. + values[field] = [] + else: + values[field] = None + return values + class Resource(BaseResource): """Resource objects appear in a JSON API document to represent resources.""" diff --git a/optimade/models/links.py b/optimade/models/links.py index 9cb316112..2cf0cb3b1 100644 --- a/optimade/models/links.py +++ b/optimade/models/links.py @@ -35,11 +35,11 @@ class Aggregate(Enum): class LinksResourceAttributes(Attributes): """Links endpoint resource object attributes""" - name: str = StrictField( + name: Optional[str] = StrictField( ..., description="Human-readable name for the OPTIMADE API implementation, e.g., for use in clients to show the name to the end-user.", ) - description: str = StrictField( + description: Optional[str] = StrictField( ..., description="Human-readable description for the OPTIMADE API implementation, e.g., for use in clients to show a description to the end-user.", ) diff --git a/optimade/models/references.py b/optimade/models/references.py index afdd2f48f..808ebe61c 100644 --- a/optimade/models/references.py +++ b/optimade/models/references.py @@ -1,7 +1,7 @@ # pylint: disable=line-too-long,no-self-argument from typing import List, Optional -from pydantic import AnyUrl, BaseModel, validator # pylint: disable=no-name-in-module +from pydantic import AnyUrl, BaseModel # pylint: disable=no-name-in-module from optimade.models.entries import EntryResource, EntryResourceAttributes from optimade.models.utils import OptimadeField, SupportLevel @@ -260,9 +260,3 @@ class ReferenceResource(EntryResource): queryable=SupportLevel.MUST, ) attributes: ReferenceResourceAttributes - - @validator("attributes") - def validate_attributes(cls, v): - if not any(prop[1] is not None for prop in v): - raise ValueError("reference object must have at least one field defined") - return v diff --git a/optimade/server/data/test_structures.json b/optimade/server/data/test_structures.json index 18a7eba03..4340ed3bb 100644 --- a/optimade/server/data/test_structures.json +++ b/optimade/server/data/test_structures.json @@ -5,6 +5,7 @@ }, "assemblies": null, "chemsys": "Ac", + "dichtheid": 10.07, "cartesian_site_positions": [ [ 0.17570227444196573, @@ -1222,6 +1223,7 @@ "nelements": 5, "nsites": 24, "pretty_formula": "Ag2C6ClH12N3", + "fancy_formulas": {"hill": "C6H12Ag2ClN3"}, "species": [ { "chemical_symbols": [ @@ -1475,6 +1477,9 @@ "nelements": 5, "nsites": 25, "pretty_formula": "Ag2C2H2N6O13", + "fancy_formulas" : { + "hill": "C2H2Ag2N6O13" + }, "species": [ { "chemical_symbols": [ @@ -1723,6 +1728,7 @@ "nelements": 7, "nsites": 23, "pretty_formula": "Ag2C2ClH8N5O3S2", + "fancy_formulas": {"hill": "C2H8Ag2ClN5O3S2"}, "species": [ { "chemical_symbols": [ @@ -2467,6 +2473,7 @@ "nelements": 8, "nsites": 74, "pretty_formula": "AgB10C15Cl2H40NO3P2", + "fancy_formulas": {"hill": "C15H40AgB10Cl2NO3P2"}, "species": [ { "chemical_symbols": [ @@ -2821,6 +2828,7 @@ "nelements": 7, "nsites": 29, "pretty_formula": "AgC3ClH14N6OS3", + "fancy_formulas":{"hill": "C3H14AgClN6OS3"}, "species": [ { "chemical_symbols": [ diff --git a/optimade/server/entry_collections/elasticsearch.py b/optimade/server/entry_collections/elasticsearch.py index 54fe00e78..a8612d6e6 100644 --- a/optimade/server/entry_collections/elasticsearch.py +++ b/optimade/server/entry_collections/elasticsearch.py @@ -168,9 +168,7 @@ def _run_db_query( page_offset = criteria.get("skip", 0) limit = criteria.get("limit", CONFIG.page_limit) - all_aliased_fields = [ - self.resource_mapper.get_backend_field(field) for field in self.all_fields - ] + all_aliased_fields = [field for field in criteria.get("projection", [])] search = search.source(includes=all_aliased_fields) elastic_sort = [ diff --git a/optimade/server/entry_collections/entry_collections.py b/optimade/server/entry_collections/entry_collections.py index 9b307672f..5790c6365 100644 --- a/optimade/server/entry_collections/entry_collections.py +++ b/optimade/server/entry_collections/entry_collections.py @@ -1,6 +1,7 @@ import re import warnings from abc import ABC, abstractmethod +from functools import lru_cache from typing import Any, Dict, Iterable, List, Set, Tuple, Type, Union from lark import Transformer @@ -11,6 +12,7 @@ from optimade.server.config import CONFIG, SupportedBackend from optimade.server.mappers import BaseResourceMapper from optimade.server.query_params import EntryListingQueryParams, SingleEntryQueryParams +from optimade.utils import set_field_to_none_if_missing_in_dict from optimade.warnings import ( FieldValueNotRecognized, QueryParamNotUsed, @@ -121,13 +123,7 @@ def count(self, **kwargs: Any) -> int: def find( self, params: Union[EntryListingQueryParams, SingleEntryQueryParams] - ) -> Tuple[ - Union[List[EntryResource], EntryResource], - int, - bool, - Set[str], - Set[str], - ]: + ) -> Tuple[Union[List[EntryResource], EntryResource], int, bool, Set[str]]: """ Fetches results and indicates if more data is available. @@ -146,23 +142,49 @@ def find( criteria = self.handle_query_params(params) single_entry = isinstance(params, SingleEntryQueryParams) response_fields = criteria.pop("fields") + response_fields_set = criteria.pop("response_fields_set", False) raw_results, data_returned, more_data_available = self._run_db_query( criteria, single_entry ) + exclude_fields = self.all_fields - response_fields + + results: List = [self.resource_mapper.map_back(doc) for doc in raw_results] + + self.check_and_add_missing_fields(results, response_fields, response_fields_set) + + if results: + results = self.resource_mapper.deserialize(results) + if single_entry: - raw_results = raw_results[0] if raw_results else None # type: ignore[assignment] + results = results[0] if results else None # type: ignore[assignment] if data_returned > 1: raise NotFound( detail=f"Instead of a single entry, {data_returned} entries were found", ) - exclude_fields = self.all_fields - response_fields + return results, data_returned, more_data_available, exclude_fields + + def check_and_add_missing_fields( + self, results: List[dict], response_fields: set, response_fields_set: bool + ): + """Checks whether the response_fields and mandatory fields are present. + If they are not present the values are set to None, so the deserialization works correctly. + It also checks whether all fields in the response have been defined either in the model or in the config file. + If not it raises an appropriate error or warning.""" include_fields = ( response_fields - self.resource_mapper.TOP_LEVEL_NON_ATTRIBUTES_FIELDS ) + # Include missing fields + for result in results: + for field in include_fields: + set_field_to_none_if_missing_in_dict(result["attributes"], field) + + if response_fields_set: + for result in results: + result["attributes"]["set_missing_to_none"] = True bad_optimade_fields = set() bad_provider_fields = set() @@ -189,19 +211,6 @@ def find( detail=f"Unrecognised OPTIMADE field(s) in requested `response_fields`: {bad_optimade_fields}." ) - if raw_results is not None: - results = self.resource_mapper.deserialize(raw_results) - else: - results = None - - return ( - results, - data_returned, - more_data_available, - exclude_fields, - include_fields, - ) - @abstractmethod def _run_db_query( self, criteria: Dict[str, Any], single_entry: bool = False @@ -244,6 +253,7 @@ def all_fields(self) -> Set[str]: return self._all_fields + @lru_cache(maxsize=4) def get_attribute_fields(self) -> Set[str]: """Get the set of attribute fields @@ -327,16 +337,16 @@ def handle_query_params( cursor_kwargs["limit"] = CONFIG.page_limit # response_fields - cursor_kwargs["projection"] = { - f"{self.resource_mapper.get_backend_field(f)}": True - for f in self.all_fields - } - if getattr(params, "response_fields", False): + cursor_kwargs["response_fields_set"] = True response_fields = set(params.response_fields.split(",")) response_fields |= self.resource_mapper.get_required_fields() else: response_fields = self.all_fields.copy() + cursor_kwargs["projection"] = { + f"{self.resource_mapper.get_backend_field(f)}": True + for f in response_fields + } cursor_kwargs["fields"] = response_fields diff --git a/optimade/server/mappers/entries.py b/optimade/server/mappers/entries.py index fc1953696..eb02b4f33 100644 --- a/optimade/server/mappers/entries.py +++ b/optimade/server/mappers/entries.py @@ -1,8 +1,14 @@ import warnings from functools import lru_cache -from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type, Union +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type from optimade.models.entries import EntryResource +from optimade.server.config import CONFIG +from optimade.utils import ( + read_from_nested_dict, + remove_from_nested_dict, + write_to_nested_dict, +) __all__ = ("BaseResourceMapper",) @@ -72,36 +78,61 @@ class BaseResourceMapper: @classmethod @lru_cache(maxsize=1) - def all_aliases(cls) -> Iterable[Tuple[str, str]]: - """Returns all of the associated aliases for this entry type, + def all_prefixed_fields(cls) -> Iterable[Tuple[str, str]]: + """Returns all of the prefixed, unprefixed field name pairs, including those defined by the server config. The first member - of each tuple is the OPTIMADE-compliant field name, the second - is the backend-specific field name. + of each tuple is the prefixed field name, the second + is the field name as presented in the optimade database without prefix. Returns: - A tuple of alias tuples. + A list of alias tuples. """ - from optimade.server.config import CONFIG - - return ( - tuple( - (f"_{CONFIG.provider.prefix}_{field}", field) + field_list = ( + [ + field for field in CONFIG.provider_fields.get(cls.ENDPOINT, []) if isinstance(field, str) - ) - + tuple( - (f"_{CONFIG.provider.prefix}_{field['name']}", field["name"]) + ] + + [ + field["name"] for field in CONFIG.provider_fields.get(cls.ENDPOINT, []) if isinstance(field, dict) - ) - + tuple( - (f"_{CONFIG.provider.prefix}_{field}", field) - for field in cls.PROVIDER_FIELDS - ) - + tuple(CONFIG.aliases.get(cls.ENDPOINT, {}).items()) - + cls.ALIASES + ] + + list(cls.PROVIDER_FIELDS) ) + prefixed_field_pairs = [] + for field in field_list: + split_field = field.split( + ".", 1 + ) # For now I assume there are no nested dictionaries for the official Optimade fields + if split_field[0] in cls.ENTRY_RESOURCE_ATTRIBUTES: + prefixed_field_pairs.append( + ( + f"{split_field[0]}._{CONFIG.provider.prefix}_{split_field[1]}", + field, + ) + ) + else: + prefixed_field_pairs.append( + (f"_{CONFIG.provider.prefix}_{field}", field) + ) + return prefixed_field_pairs + + @classmethod + @lru_cache(maxsize=1) + def all_aliases(cls) -> Iterable[Tuple[str, str]]: + """Returns all of the associated aliases for this entry type, + including those defined by the server config. The first member + of each tuple is the field name as presented in the optimade database without prefix, the second + is the backend-specific field name. + + Returns: + A tuple of alias tuples. + + """ + + return tuple(CONFIG.aliases.get(cls.ENDPOINT, {}).items()) + cls.ALIASES @classproperty @lru_cache(maxsize=1) @@ -115,14 +146,12 @@ def SUPPORTED_PREFIXES(cls) -> Set[str]: domain-specific terms). """ - from optimade.server.config import CONFIG return {CONFIG.provider.prefix} @classproperty def ALL_ATTRIBUTES(cls) -> Set[str]: """Returns all attributes served by this entry.""" - from optimade.server.config import CONFIG return ( set(cls.ENTRY_RESOURCE_ATTRIBUTES) @@ -170,7 +199,6 @@ def all_length_aliases(cls) -> Tuple[Tuple[str, str], ...]: A tuple of length alias tuples. """ - from optimade.server.config import CONFIG return cls.LENGTH_ALIASES + tuple( CONFIG.length_aliases.get(cls.ENDPOINT, {}).items() @@ -191,6 +219,24 @@ def length_alias_for(cls, field: str) -> Optional[str]: """ return dict(cls.all_length_aliases()).get(field, None) + @classmethod + def get_map_field_from_dict(cls, field: str, aliases: dict): + """Replaces (part of) the field_name "field" with the matching field in the dictionary dict" + It first tries to find the entire field name(incl. subfields(which are separated by:".")) in the dictionary. + If it is not present it removes the deepest nesting level and checks again. + If the field occurs in the dictionary it is replaced by the value in the dictionary. + Any unmatched subfields are appended. + """ + split = field.split(".") + for i in range(len(split), 0, -1): + field_path = ".".join(split[0:i]) + if field_path in aliases: + field_alias = aliases[field_path] + if split[i:]: + field_alias += "." + ".".join(split[i:]) + return field_alias + return field + @classmethod @lru_cache(maxsize=128) def get_backend_field(cls, optimade_field: str) -> str: @@ -201,9 +247,12 @@ def get_backend_field(cls, optimade_field: str) -> str: Aliases are read from [`all_aliases()`][optimade.server.mappers.entries.BaseResourceMapper.all_aliases]. - If a dot-separated OPTIMADE field is provided, e.g., `species.mass`, only the first part will be mapped. + If a dot-separated field is provided, the mapper first looks for that field. + If it is not present in the aliases it repeats the search with one nesting level less untill the field is found. + If the field is not found, the unmodified `optimade_field` is returned. + This means for an (OPTIMADE, DB) alias of (`species`, `kinds`), `get_backend_fields("species.mass")` - will return `kinds.mass`. + will return `kinds.mass` as there is no specific entry for "species.mass" in the aliases. Arguments: optimade_field: The OPTIMADE field to attempt to map to the backend-specific field. @@ -220,11 +269,9 @@ def get_backend_field(cls, optimade_field: str) -> str: The mapped field name to be used in the query to the backend. """ - split = optimade_field.split(".") - alias = dict(cls.all_aliases()).get(split[0], None) - if alias is not None: - return alias + ("." + ".".join(split[1:]) if len(split) > 1 else "") - return optimade_field + prefixed = dict(cls.all_prefixed_fields()) + optimade_field = cls.get_map_field_from_dict(optimade_field, prefixed) + return cls.get_map_field_from_dict(optimade_field, dict(cls.all_aliases())) @classmethod @lru_cache(maxsize=128) @@ -253,9 +300,11 @@ def alias_for(cls, field: str) -> str: def get_optimade_field(cls, backend_field: str) -> str: """Return the corresponding OPTIMADE field name for the underlying database field, ready to be used to construct the OPTIMADE-compliant JSON response. + !!Warning!! Incase a backend_field maps to multiple OPTIMADE fields, only one of the fields is returned. Aliases are read from [`all_aliases()`][optimade.server.mappers.entries.BaseResourceMapper.all_aliases]. + [`all_prefixed_fields()`][optimade.server.mappers.entries.BaseResourceMapper.all_prefixed_fields] Arguments: backend_field: The backend field to attempt to map to an OPTIMADE field. @@ -272,9 +321,13 @@ def get_optimade_field(cls, backend_field: str) -> str: The mapped field name to be used in an OPTIMADE-compliant response. """ - return {alias: real for real, alias in cls.all_aliases()}.get( - backend_field, backend_field + # first map the property back to the field as presented in the optimade database. + inv_alias_dict = dict((real, alias) for alias, real in cls.all_aliases()) + backend_field = cls.get_map_field_from_dict(backend_field, inv_alias_dict) + inv_prefix_dict = dict( + (real, alias) for alias, real in cls.all_prefixed_fields() ) + return cls.get_map_field_from_dict(backend_field, inv_prefix_dict) @classmethod @lru_cache(maxsize=128) @@ -312,30 +365,22 @@ def get_required_fields(cls) -> set: @classmethod def map_back(cls, doc: dict) -> dict: - """Map properties from MongoDB to OPTIMADE. + """Map properties in a dictionary to the OPTIMADE fields. - Starting from a MongoDB document `doc`, map the DB fields to the corresponding OPTIMADE fields. + Starting from a document `doc`, map the DB fields to the corresponding OPTIMADE fields. Then, the fields are all added to the top-level field "attributes", with the exception of other top-level fields, defined in `cls.TOP_LEVEL_NON_ATTRIBUTES_FIELDS`. All fields not in `cls.TOP_LEVEL_NON_ATTRIBUTES_FIELDS` + "attributes" will be removed. Finally, the `type` is given the value of the specified `cls.ENDPOINT`. Parameters: - doc: A resource object in MongoDB format. + doc: A resource object. Returns: A resource object in OPTIMADE format. """ - mapping = ((real, alias) for alias, real in cls.all_aliases()) - newdoc = {} - reals = {real for alias, real in cls.all_aliases()} - for key in doc: - if key not in reals: - newdoc[key] = doc[key] - for real, alias in mapping: - if real in doc: - newdoc[alias] = doc[real] + newdoc = cls.add_alias_and_prefix(doc) if "attributes" in newdoc: raise Exception("Will overwrite doc field!") @@ -355,10 +400,41 @@ def map_back(cls, doc: dict) -> dict: return newdoc @classmethod - def deserialize( - cls, results: Union[dict, Iterable[dict]] - ) -> Union[List[EntryResource], EntryResource]: - if isinstance(results, dict): - return cls.ENTRY_RESOURCE_CLASS(**cls.map_back(results)) + def add_alias_and_prefix(cls, doc: dict) -> dict: + """Converts a dictionary with field names that match the backend database with the field names that are presented in the OPTIMADE database. + The way these fields are converted is read from: + [`all_aliases()`][optimade.server.mappers.entries.BaseResourceMapper.all_aliases]. + [`all_prefixed_fields()`][optimade.server.mappers.entries.BaseResourceMapper.all_prefixed_fields] - return [cls.ENTRY_RESOURCE_CLASS(**cls.map_back(doc)) for doc in results] + Parameters: + doc: A dictionary with the backend fields. + + Returns: + A dictionary with the fieldnames as presented by OPTIMADE + """ + newdoc: dict = {} + mod_doc = doc.copy() + # First apply all the aliases (They are sorted so the deepest nesting level occurs first.) + sorted_aliases = sorted(cls.all_aliases(), key=lambda ele: ele[0], reverse=True) + for alias, real in sorted_aliases: + value, found = read_from_nested_dict(mod_doc, real) + if not found: + # Some backend fields are used for more than one optimade field. As they are deleted from mod_doc the first time they are mapped we need a backup option to read the data. + value, found = read_from_nested_dict(doc, real) + if found: + write_to_nested_dict(newdoc, alias, value) + remove_from_nested_dict(mod_doc, real) + # move fields without alias to new doc + newdoc.update(mod_doc) + # apply prefixes + for prefixed_field, unprefixed_field in cls.all_prefixed_fields(): + value, found = read_from_nested_dict(newdoc, unprefixed_field) + if found: + write_to_nested_dict(newdoc, prefixed_field, value) + remove_from_nested_dict(newdoc, unprefixed_field) + + return newdoc + + @classmethod + def deserialize(cls, results: Iterable[dict]) -> List[EntryResource]: + return [cls.ENTRY_RESOURCE_CLASS(**result) for result in results] diff --git a/optimade/server/routers/utils.py b/optimade/server/routers/utils.py index ac19df72f..3cad26124 100644 --- a/optimade/server/routers/utils.py +++ b/optimade/server/routers/utils.py @@ -25,7 +25,7 @@ __all__ = ( "BASE_URL_PREFIXES", "meta_values", - "handle_response_fields", + "remove_exclude_fields", "get_included_relationships", "get_base_url", "get_entries", @@ -90,20 +90,17 @@ def meta_values( ) -def handle_response_fields( +def remove_exclude_fields( results: Union[List[EntryResource], EntryResource], exclude_fields: Set[str], - include_fields: Set[str], ) -> List[Dict[str, Any]]: - """Handle query parameter `response_fields`. + """Removes the fields that are present in exclude_fields from the entries in the results.`. - It is assumed that all fields are under `attributes`. - This is due to all other top-level fields are REQUIRED in the response. + It is assumed that all fields are under `attributes`, because all top-level fields are REQUIRED in the response. Parameters: + results: A list with resources with dictionaries from which the fields in exclude_fields should be removed. exclude_fields: Fields under `attributes` to be excluded from the response. - include_fields: Fields under `attributes` that were requested that should be - set to null if missing in the entry. Returns: List of resulting resources as dictionaries after pruning according to @@ -122,11 +119,6 @@ def handle_response_fields( if field in new_entry["attributes"]: del new_entry["attributes"][field] - # Include missing fields that were requested in `response_fields` - for field in include_fields: - if field not in new_entry["attributes"]: - new_entry["attributes"][field] = None - new_results.append(new_entry) return new_results @@ -202,7 +194,7 @@ def get_included_relationships( ) # still need to handle pagination - ref_results, _, _, _, _ = ENTRY_COLLECTIONS[entry_type].find(params) + ref_results, _, _, _ = ENTRY_COLLECTIONS[entry_type].find(params) included[entry_type] = ref_results # flatten dict by endpoint to list @@ -244,8 +236,7 @@ def get_entries( results, data_returned, more_data_available, - fields, - include_fields, + exclude_fields, ) = collection.find(params) include = [] @@ -268,8 +259,8 @@ def get_entries( else: links = ToplevelLinks(next=None) - if results is not None and (fields or include_fields): - results = handle_response_fields(results, fields, include_fields) # type: ignore[assignment] + if exclude_fields: + results = remove_exclude_fields(results, exclude_fields) # type: ignore[assignment] return response( links=links, @@ -302,8 +293,7 @@ def get_single_entry( results, data_returned, more_data_available, - fields, - include_fields, + exclude_fields, ) = collection.find(params) if more_data_available: @@ -321,8 +311,8 @@ def get_single_entry( links = ToplevelLinks(next=None) - if results is not None and (fields or include_fields): - results = handle_response_fields(results, fields, include_fields)[0] # type: ignore[assignment] + if exclude_fields and results is not None: + results = remove_exclude_fields(results, exclude_fields)[0] # type: ignore[assignment] return response( links=links, diff --git a/optimade/utils.py b/optimade/utils.py index 6f4e3be78..389008956 100644 --- a/optimade/utils.py +++ b/optimade/utils.py @@ -1,10 +1,10 @@ """This submodule implements some useful utilities for dealing -with OPTIMADE providers that can be used in server or client code. +with OPTIMADE providers that can be used in server or client code and for handling nested dicts. """ import json -from typing import Iterable, List +from typing import Any, Iterable, List from pydantic import ValidationError @@ -167,3 +167,82 @@ def get_all_databases() -> Iterable[str]: yield str(link.attributes.base_url) except RuntimeError: pass + + +def write_to_nested_dict(dictionary: dict, composite_key: str, value: Any): + """Puts a value into an arbitrary position in a nested dict. + + Arguments: + dictionary: the dictionary to which the value should be added under the composite_key. + composite_key: The key under which the value should be stored. The sub keys should be separated by a ".". + e.g. "outer_level_key.inner_level_key" + value: The value that should be stored in the dictionary. + + """ + if "." in composite_key: + split_key = composite_key.split(".", 1) + if split_key[0] not in dictionary: + dictionary[split_key[0]] = {} + write_to_nested_dict(dictionary[split_key[0]], split_key[1], value) + else: + dictionary[composite_key] = value + + +def read_from_nested_dict(dictionary: dict, composite_key: str) -> Any: + """Reads a value from an arbitrary position in a nested dict. + + Arguments: + dictionary: the dictionary from which the value under the composite_key should be read . + composite_key: The key under which the value should be read. The sub keys should be separated by a ".". + e.g. "outer_level_key.inner_level_key" + + Returns: + The value as stored in the dictionary. If the value is not stored in the dictionary it returns None. + A boolean. True indicates that the composite_key was present in the dictionary, False that it is not present. + + """ + split_key = composite_key.split(".", 1) + if split_key[0] in dictionary: + if len(split_key) > 1: + return read_from_nested_dict(dictionary[split_key[0]], split_key[1]) + else: + return dictionary[composite_key], True + return None, False + + +def remove_from_nested_dict(dictionary: dict, composite_key: str): + """Removes an entry from an arbitrary position in a nested dict. + + Arguments: + dictionary: the dictionary from which the composite key should be removed. + composite_key: The key that should be removed. The sub keys should be separated by a ".". + e.g. "outer_level_key.inner_level_key" + If the removal of key causes the dictionary one level up to be empty it is removed as well. + """ + split_key = composite_key.split(".", 1) + if split_key[0] in dictionary: + if len(split_key) > 1: + empty = remove_from_nested_dict(dictionary[split_key[0]], split_key[1]) + if empty: + return remove_from_nested_dict(dictionary, split_key[0]) + else: + return False + else: + del dictionary[composite_key] + if not dictionary: + return True + else: + return False + + +def set_field_to_none_if_missing_in_dict(entry: dict, field: str) -> dict: + _, present = read_from_nested_dict(entry, field) + if not present: + split_field = field.split(".", 1) + # It would be nice if there would be a more universal way to handle special cases like this. + if split_field[0] == "structure_features": + value: Any = [] + else: + value = None + write_to_nested_dict(entry, field, value) + return entry diff --git a/optimade/validator/validator.py b/optimade/validator/validator.py index b72909fb6..d60bd2847 100644 --- a/optimade/validator/validator.py +++ b/optimade/validator/validator.py @@ -558,6 +558,7 @@ def _check_response_fields( subset_fields = random.sample(fields, min(len(fields) - 1, 3)) test_query = f"{endp}?response_fields={','.join(subset_fields)}&page_limit=1" response, _ = self._get_endpoint(test_query, multistage=True) + subset_fields = [field.split(".")[0] for field in subset_fields] if response and len(response.json()["data"]) >= 0: doc = response.json()["data"][0] diff --git a/setup.py b/setup.py index 95dda5908..4e3b522a8 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,6 @@ server_deps = [ "uvicorn~=0.19", "fastapi~=0.86", - "pyyaml>=5.4,<7", # Keep at pyyaml 5.4 for aiida-core support ] + mongo_deps @@ -103,6 +102,7 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Intended Audience :: Developers", "Topic :: Database", "Topic :: Database :: Database Engines/Servers", @@ -114,6 +114,7 @@ "pydantic~=1.10,>=1.10.2", "email_validator~=1.2", "requests~=2.28", + "pyyaml>=5.4, <7", # Keep at pyyaml 5.4 for aiida-core support ], extras_require={ "all": all_deps, diff --git a/tests/server/routers/test_info.py b/tests/server/routers/test_info.py index b5e2369e1..2fd27e0ee 100644 --- a/tests/server/routers/test_info.py +++ b/tests/server/routers/test_info.py @@ -86,7 +86,10 @@ def test_info_structures_unit(self): if field in unit_fields: assert "unit" in info_keys, f"Field: {field}" else: - assert "unit" not in info_keys, f"Field: {field}" + if not field.startswith( + "_" + ): # database specific properties can also have units + assert "unit" not in info_keys, f"Field: {field}" def test_provider_fields(self): """Check the presence of provider-specific fields""" diff --git a/tests/server/routers/test_references.py b/tests/server/routers/test_references.py index c40c8e195..7dd06360a 100644 --- a/tests/server/routers/test_references.py +++ b/tests/server/routers/test_references.py @@ -23,6 +23,12 @@ class TestSingleReferenceEndpointDifficult(RegularEndpointTests): response_cls = ReferenceResponseOne +class TestResponseFields(RegularEndpointTests): + + request_str = "references?response_fields=volume,month,organization&page_limit=1" + response_cls = ReferenceResponseMany + + class TestMissingSingleReferenceEndpoint(RegularEndpointTests): """Tests for /references/ for unknown """ diff --git a/tests/server/routers/test_structures.py b/tests/server/routers/test_structures.py index af39669d5..a70d7ebc8 100644 --- a/tests/server/routers/test_structures.py +++ b/tests/server/routers/test_structures.py @@ -67,6 +67,16 @@ def test_structures_endpoint_data(self): assert self.json_response["data"]["type"] == "structures" assert "attributes" in self.json_response["data"] assert "_exmpl_chemsys" in self.json_response["data"]["attributes"] + assert ( + "density" + in self.json_response["data"]["attributes"]["_exmpl_physical_properties"] + ) + assert ( + self.json_response["data"]["attributes"]["_exmpl_physical_properties"][ + "density" + ] + == 10.07 + ) def test_check_response_single_structure(check_response): diff --git a/tests/server/test_mappers.py b/tests/server/test_mappers.py index 696a7ab5c..4b6899d8a 100644 --- a/tests/server/test_mappers.py +++ b/tests/server/test_mappers.py @@ -23,14 +23,37 @@ class MyMapper(mapper(MAPPER)): def test_property_aliases(mapper): class MyMapper(mapper(MAPPER)): - PROVIDER_FIELDS = ("dft_parameters", "test_field") + PROVIDER_FIELDS = ( + "dft_parameters", + "test_field", + "database_specific_field", + "species.oxidation_state", + ) LENGTH_ALIASES = (("_exmpl_test_field", "test_field_len"),) - ALIASES = (("field", "completely_different_field"),) + ALIASES = ( + ("field", "completely_different_field"), + ("species", "particle_type"), + ("species.name", "particles.class"), + ("database_specific_field", "backend.subfield.sub_sub_field"), + ) + ENTRY_RESOURCE_ATTRIBUTES = { + "species": 42 + } # This is not a proper value for ENTRY_RESOURCE_ATTRIBUTES but we need ut to test allowing database specific properties within optimade dictionary fields. mapper = MyMapper() assert mapper.get_backend_field("_exmpl_dft_parameters") == "dft_parameters" assert mapper.get_backend_field("_exmpl_test_field") == "test_field" assert mapper.get_backend_field("field") == "completely_different_field" + assert mapper.get_backend_field("species.mass") == "particle_type.mass" + assert ( + mapper.get_backend_field("species.oxidation_state") + == "particle_type.oxidation_state" + ) + assert mapper.get_backend_field("species.name") == "particles.class" + assert ( + mapper.get_backend_field("_exmpl_database_specific_field") + == "backend.subfield.sub_sub_field" + ) assert mapper.length_alias_for("_exmpl_test_field") == "test_field_len" assert mapper.length_alias_for("test_field") is None assert mapper.get_backend_field("test_field") == "test_field" @@ -41,6 +64,16 @@ class MyMapper(mapper(MAPPER)): assert mapper.get_optimade_field("test_field") == "_exmpl_test_field" assert mapper.get_optimade_field("completely_different_field") == "field" assert mapper.get_optimade_field("nonexistent_field") == "nonexistent_field" + assert mapper.get_optimade_field("particles.class") == "species.name" + assert mapper.get_optimade_field("particle_type.mass") == "species.mass" + assert ( + mapper.get_optimade_field("backend.subfield.sub_sub_field") + == "_exmpl_database_specific_field" + ) + assert ( + mapper.get_optimade_field("particle_type.oxidation_state") + == "species._exmpl_oxidation_state" + ) with pytest.warns(DeprecationWarning): assert mapper.alias_of("nonexistent_field") == "nonexistent_field" @@ -62,3 +95,35 @@ class MyMapper(mapper(MAPPER)): mapper.get_backend_field("_exmpl_dft_parameters_dft_parameters.nested.property") == "_exmpl_dft_parameters_dft_parameters.nested.property" ) + + +def test_map_back_nested_field(mapper): + class MyMapper(mapper(MAPPER)): + ALIASES = (("some_field", "main_field.nested_field.field_we_need"),) + + mapper = MyMapper() + input_dict = { + "main_field": { + "nested_field": {"field_we_need": 42, "other_field": 78}, + "another_nested_field": 89, + }, + "secondary_field": 52, + } + output_dict = mapper.map_back(input_dict) + assert output_dict["attributes"]["some_field"] == 42 + + +def test_map_back_to_nested_field(mapper): + class MyMapper(mapper(MAPPER)): + ALIASES = (("some_field.subfield", "main_field.nested_field.field_we_need"),) + + mapper = MyMapper() + input_dict = { + "main_field": { + "nested_field": {"field_we_need": 42, "other_field": 78}, + "another_nested_field": 89, + }, + "secondary_field": 52, + } + output_dict = mapper.map_back(input_dict) + assert output_dict["attributes"]["some_field"]["subfield"] == 42 diff --git a/tests/test_config.json b/tests/test_config.json index 84e05066c..39ce39c07 100644 --- a/tests/test_config.json +++ b/tests/test_config.json @@ -18,7 +18,8 @@ "provider_fields": { "structures": [ "band_gap", - {"name": "chemsys", "type": "string", "description": "A string representing the chemical system in an ordered fashion"} + {"name": "chemsys", "type": "string", "description": "A string representing the chemical system in an ordered fashion"}, + {"name":"physical_properties.density", "type": "float", "description": "The density of the material.", "unit": "kg/L"} ] }, "aliases": { @@ -27,7 +28,9 @@ "immutable_id": "_id", "chemical_formula_descriptive": "pretty_formula", "chemical_formula_reduced": "pretty_formula", - "chemical_formula_anonymous": "formula_anonymous" + "chemical_formula_anonymous": "formula_anonymous", + "chemical_formula_hill": "fancy_formulas.hill", + "physical_properties.density": "dichtheid" } }, "length_aliases": {