Skip to content

Commit

Permalink
FolderData: Expose repository API on top-level namespace (#6150)
Browse files Browse the repository at this point in the history
In 8293e45, the `Node` interface was
refactored to move the API of the repository to the `base.repository`
namespace. The original methods would be forwarded with a deprecation
message being printed.

Although this made sense for most node types, in an effort to clean up
the node interface which was overpopulated, for the `FolderData` the
repository interface is the main interface and it doesn't make sense to
force the users to go all the way down to the nested `base.repository`
namespace to access it. Therefore the public API of the repository is
restored on the top-level namespace of the `FolderData` class.
  • Loading branch information
sphuber authored Oct 17, 2023
1 parent 0620588 commit 3e1f873
Show file tree
Hide file tree
Showing 4 changed files with 219 additions and 26 deletions.
154 changes: 154 additions & 0 deletions aiida/orm/nodes/data/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,21 @@
# For further information please visit http://www.aiida.net #
###########################################################################
"""`Data` sub class to represent a folder on a file system."""
from __future__ import annotations

import contextlib
import io
import pathlib
from typing import BinaryIO, Iterable, Iterator, Optional, TextIO, Union

from aiida.repository import File

from .data import Data

__all__ = ('FolderData',)

FilePath = Union[str, pathlib.PurePosixPath]


class FolderData(Data):
"""`Data` sub class to represent a folder on a file system."""
Expand All @@ -38,3 +48,147 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)
if tree:
self.base.repository.put_object_from_tree(tree)

def list_objects(self, path: str | None = None) -> list[File]:
"""Return a list of the objects contained in this repository sorted by name, optionally in given sub directory.
:param path: optional relative path inside the repository whose objects to list.
:return: a list of `File` named tuples representing the objects present in directory with the given key.
:raises TypeError: if the path is not a string and relative path.
:raises FileNotFoundError: if no object exists for the given path.
:raises NotADirectoryError: if the object at the given path is not a directory.
"""
return self.base.repository.list_objects(path)

def list_object_names(self, path: str | None = None) -> list[str]:
"""Return a sorted list of the object names contained in this repository, optionally in the given sub directory.
:param path: optional relative path inside the repository whose objects to list.
:return: a list of `File` named tuples representing the objects present in directory with the given key.
:raises TypeError: if the path is not a string and relative path.
:raises FileNotFoundError: if no object exists for the given path.
:raises NotADirectoryError: if the object at the given path is not a directory.
"""
return self.base.repository.list_object_names(path)

@contextlib.contextmanager
def open(self, path: str, mode='r') -> Iterator[BinaryIO | TextIO]:
"""Open a file handle to an object stored under the given key.
.. note:: this should only be used to open a handle to read an existing file. To write a new file use the method
``put_object_from_filelike`` instead.
:param path: the relative path of the object within the repository.
:return: yield a byte stream object.
:raises TypeError: if the path is not a string and relative path.
:raises FileNotFoundError: if the file does not exist.
:raises IsADirectoryError: if the object is a directory and not a file.
:raises OSError: if the file could not be opened.
"""
with self.base.repository.open(path, mode) as handle:
yield handle

def get_object(self, path: FilePath | None = None) -> File:
"""Return the object at the given path.
:param path: the relative path of the object within the repository.
:return: the `File` representing the object located at the given relative path.
:raises TypeError: if the path is not a string or ``Path``, or is an absolute path.
:raises FileNotFoundError: if no object exists for the given path.
"""
return self.base.repository.get_object(path)

def get_object_content(self, path: str, mode='r') -> str | bytes:
"""Return the content of a object identified by key.
:param path: the relative path of the object within the repository.
:raises TypeError: if the path is not a string and relative path.
:raises FileNotFoundError: if the file does not exist.
:raises IsADirectoryError: if the object is a directory and not a file.
:raises OSError: if the file could not be opened.
"""
return self.base.repository.get_object_content(path, mode)

def put_object_from_bytes(self, content: bytes, path: str) -> None:
"""Store the given content in the repository at the given path.
:param path: the relative path where to store the object in the repository.
:param content: the content to store.
:raises TypeError: if the path is not a string and relative path.
:raises FileExistsError: if an object already exists at the given path.
"""
return self.base.repository.put_object_from_bytes(content, path)

def put_object_from_filelike(self, handle: io.BufferedReader, path: str) -> None:
"""Store the byte contents of a file in the repository.
:param handle: filelike object with the byte content to be stored.
:param path: the relative path where to store the object in the repository.
:raises TypeError: if the path is not a string and relative path.
:raises `~aiida.common.exceptions.ModificationNotAllowed`: when the node is stored and therefore immutable.
"""
return self.base.repository.put_object_from_filelike(handle, path)

def put_object_from_file(self, filepath: str, path: str) -> None:
"""Store a new object under `path` with contents of the file located at `filepath` on the local file system.
:param filepath: absolute path of file whose contents to copy to the repository
:param path: the relative path where to store the object in the repository.
:raises TypeError: if the path is not a string and relative path, or the handle is not a byte stream.
:raises `~aiida.common.exceptions.ModificationNotAllowed`: when the node is stored and therefore immutable.
"""
return self.base.repository.put_object_from_file(filepath, path)

def put_object_from_tree(self, filepath: str, path: Optional[str] = None) -> None:
"""Store the entire contents of `filepath` on the local file system in the repository with under given `path`.
:param filepath: absolute path of the directory whose contents to copy to the repository.
:param path: the relative path where to store the objects in the repository.
:raises TypeError: if the path is not a string and relative path.
:raises `~aiida.common.exceptions.ModificationNotAllowed`: when the node is stored and therefore immutable.
"""
return self.base.repository.put_object_from_tree(filepath, path)

def walk(self, path: Optional[FilePath] = None) -> Iterable[tuple[pathlib.PurePosixPath, list[str], list[str]]]:
"""Walk over the directories and files contained within this repository.
.. note:: the order of the dirname and filename lists that are returned is not necessarily sorted. This is in
line with the ``os.walk`` implementation where the order depends on the underlying file system used.
:param path: the relative path of the directory within the repository whose contents to walk.
:return: tuples of root, dirnames and filenames just like ``os.walk``, with the exception that the root path is
always relative with respect to the repository root, instead of an absolute path and it is an instance of
``pathlib.PurePosixPath`` instead of a normal string
"""
yield from self.base.repository.walk(path)

def glob(self) -> Iterable[pathlib.PurePosixPath]:
"""Yield a recursive list of all paths (files and directories)."""
yield from self.base.repository.glob()

def copy_tree(self, target: str | pathlib.Path, path: Optional[FilePath] = None) -> None:
"""Copy the contents of the entire node repository to another location on the local file system.
:param target: absolute path of the directory where to copy the contents to.
:param path: optional relative path whose contents to copy.
"""
self.base.repository.copy_tree(target, path)

def delete_object(self, path: str) -> None:
"""Delete the object from the repository.
:param key: fully qualified identifier for the object within the repository.
:raises TypeError: if the path is not a string and relative path.
:raises FileNotFoundError: if the file does not exist.
:raises IsADirectoryError: if the object is a directory and not a file.
:raises OSError: if the file could not be deleted.
:raises `~aiida.common.exceptions.ModificationNotAllowed`: when the node is stored and therefore immutable.
"""
self.base.repository.delete_object(path)

def erase(self) -> None:
"""Delete all objects from the repository.
:raises `~aiida.common.exceptions.ModificationNotAllowed`: when the node is stored and therefore immutable.
"""
self.base.repository.erase()
48 changes: 24 additions & 24 deletions aiida/orm/nodes/repository.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# -*- coding: utf-8 -*-
"""Interface to the file repository of a node instance."""
from __future__ import annotations

import contextlib
import copy
import io
import pathlib
import tempfile
from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List, Optional, TextIO, Tuple, Union
from typing import TYPE_CHECKING, Any, BinaryIO, Iterable, Iterator, TextIO, Union

from aiida.common import exceptions
from aiida.manage import get_config_option
Expand All @@ -28,25 +30,23 @@ class NodeRepository:
exception if the node is stored. Otherwise the operation is just forwarded to the repository instance.
The repository instance keeps an internal mapping of the file hierarchy that it maintains, starting from an empty
hierarchy if the instance was constructed normally, or from a specific hierarchy if reconstructred through the
hierarchy if the instance was constructed normally, or from a specific hierarchy if reconstructed through the
``Repository.from_serialized`` classmethod. This is only the case for stored nodes, because unstored nodes do not
have any files yet when they are constructed. Once the node get's stored, the repository is asked to serialize its
metadata contents which is then stored in the ``repository_metadata`` field of the backend node.
This layer explicitly does not update the metadata of the node on a mutation action.
The reason is that for stored nodes these actions are anyway forbidden and for unstored nodes,
the final metadata will be stored in one go, once the node is stored,
so there is no need to keep updating the node metadata intermediately.
Note that this does mean that ``repository_metadata`` does not give accurate information,
as long as the node is not yet stored.
metadata contents which is then stored in the ``repository_metadata`` field of the backend node. This layer
explicitly does not update the metadata of the node on a mutation action. The reason is that for stored nodes these
actions are anyway forbidden and for unstored nodes, the final metadata will be stored in one go, once the node is
stored, so there is no need to keep updating the node metadata intermediately. Note that this does mean that
``repository_metadata`` does not give accurate information, as long as the node is not yet stored.
"""

def __init__(self, node: 'Node') -> None:
"""Construct a new instance of the repository interface."""
self._node: 'Node' = node
self._repository_instance: Optional[Repository] = None
self._repository_instance: Repository | None = None

@property
def metadata(self) -> Dict[str, Any]:
def metadata(self) -> dict[str, Any]:
"""Return the repository metadata, representing the virtual file hierarchy.
Note, this is only accurate if the node is stored.
Expand Down Expand Up @@ -127,7 +127,7 @@ def _clone(self, repo: 'NodeRepository') -> None:
"""
self._repository.clone(repo._repository) # pylint: disable=protected-access

def serialize(self) -> Dict:
def serialize(self) -> dict:
"""Serialize the metadata of the repository content into a JSON-serializable format.
:return: dictionary with the content metadata.
Expand All @@ -141,21 +141,21 @@ def hash(self) -> str:
"""
return self._repository.hash()

def list_objects(self, path: Optional[str] = None) -> List[File]:
def list_objects(self, path: str | None = None) -> list[File]:
"""Return a list of the objects contained in this repository sorted by name, optionally in given sub directory.
:param path: the relative path where to store the object in the repository.
:param path: optional relative path inside the repository whose objects to list.
:return: a list of `File` named tuples representing the objects present in directory with the given key.
:raises TypeError: if the path is not a string and relative path.
:raises FileNotFoundError: if no object exists for the given path.
:raises NotADirectoryError: if the object at the given path is not a directory.
"""
return self._repository.list_objects(path)

def list_object_names(self, path: Optional[str] = None) -> List[str]:
def list_object_names(self, path: str | None = None) -> list[str]:
"""Return a sorted list of the object names contained in this repository, optionally in the given sub directory.
:param path: the relative path where to store the object in the repository.
:param path: optional relative path inside the repository whose objects to list.
:return: a list of `File` named tuples representing the objects present in directory with the given key.
:raises TypeError: if the path is not a string and relative path.
:raises FileNotFoundError: if no object exists for the given path.
Expand All @@ -164,7 +164,7 @@ def list_object_names(self, path: Optional[str] = None) -> List[str]:
return self._repository.list_object_names(path)

@contextlib.contextmanager
def open(self, path: str, mode='r') -> Iterator[Union[BinaryIO, TextIO]]:
def open(self, path: str, mode='r') -> Iterator[BinaryIO | TextIO]:
"""Open a file handle to an object stored under the given key.
.. note:: this should only be used to open a handle to read an existing file. To write a new file use the method
Expand All @@ -186,20 +186,20 @@ def open(self, path: str, mode='r') -> Iterator[Union[BinaryIO, TextIO]]:
else:
yield handle

def get_object(self, path: Optional[FilePath] = None) -> File:
def get_object(self, path: FilePath | None = None) -> File:
"""Return the object at the given path.
:param path: the relative path where to store the object in the repository.
:param path: the relative path of the object within the repository.
:return: the `File` representing the object located at the given relative path.
:raises TypeError: if the path is not a string or ``Path``, or is an absolute path.
:raises FileNotFoundError: if no object exists for the given path.
"""
return self._repository.get_object(path)

def get_object_content(self, path: str, mode='r') -> Union[str, bytes]:
def get_object_content(self, path: str, mode='r') -> str | bytes:
"""Return the content of a object identified by key.
:param key: fully qualified identifier for the object within the repository.
:param path: the relative path of the object within the repository.
:raises TypeError: if the path is not a string and relative path.
:raises FileNotFoundError: if the file does not exist.
:raises IsADirectoryError: if the object is a directory and not a file.
Expand Down Expand Up @@ -259,7 +259,7 @@ def put_object_from_file(self, filepath: str, path: str):
self._repository.put_object_from_file(filepath, path)
self._update_repository_metadata()

def put_object_from_tree(self, filepath: str, path: Optional[str] = None):
def put_object_from_tree(self, filepath: str, path: str | None = None):
"""Store the entire contents of `filepath` on the local file system in the repository with under given `path`.
:param filepath: absolute path of the directory whose contents to copy to the repository.
Expand All @@ -271,7 +271,7 @@ def put_object_from_tree(self, filepath: str, path: Optional[str] = None):
self._repository.put_object_from_tree(filepath, path)
self._update_repository_metadata()

def walk(self, path: Optional[FilePath] = None) -> Iterable[Tuple[pathlib.PurePosixPath, List[str], List[str]]]:
def walk(self, path: FilePath | None = None) -> Iterable[tuple[pathlib.PurePosixPath, list[str], list[str]]]:
"""Walk over the directories and files contained within this repository.
.. note:: the order of the dirname and filename lists that are returned is not necessarily sorted. This is in
Expand All @@ -292,7 +292,7 @@ def glob(self) -> Iterable[pathlib.PurePosixPath]:
for filename in filenames:
yield dirpath / filename

def copy_tree(self, target: Union[str, pathlib.Path], path: Optional[FilePath] = None) -> None:
def copy_tree(self, target: str | pathlib.Path, path: FilePath | None = None) -> None:
"""Copy the contents of the entire node repository to another location on the local file system.
:param target: absolute path of the directory where to copy the contents to.
Expand Down
4 changes: 2 additions & 2 deletions aiida/parsers/plugins/diff_tutorial/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def parse(self, **kwargs):

# add output file
self.logger.info(f"Parsing '{output_filename}'")
with self.retrieved.open(output_filename, 'rb') as handle:
with self.retrieved.open(output_filename, 'rb') as handle: # type: ignore[arg-type]
output_node = SinglefileData(file=handle)
self.out('diff', output_node)

Expand All @@ -59,7 +59,7 @@ def parse(self, **kwargs):

# add output file
self.logger.info(f"Parsing '{output_filename}'")
with self.retrieved.open(output_filename, 'rb') as handle:
with self.retrieved.open(output_filename, 'rb') as handle: # type: ignore[arg-type]
output_node = SinglefileData(file=handle)
self.out('diff', output_node)

Expand Down
39 changes: 39 additions & 0 deletions tests/orm/nodes/data/test_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
# For further information please visit http://www.aiida.net #
###########################################################################
"""Tests for the `FolderData` class."""
import pytest

from aiida.orm import FolderData


Expand All @@ -21,3 +23,40 @@ def test_constructor_tree(tmp_path):
tmp_path.joinpath(filename).write_text(content, encoding='utf8')
node = FolderData(tree=str(tmp_path))
assert sorted(node.base.repository.list_object_names()) == sorted(tree.keys())


@pytest.mark.parametrize(
'method', (
'list_objects',
'list_object_names',
'open',
'get_object',
'get_object_content',
'put_object_from_bytes',
'put_object_from_filelike',
'put_object_from_file',
'put_object_from_tree',
'walk',
'glob',
'copy_tree',
'delete_object',
'erase',
)
)
def test_api(method, recwarn):
"""Test the direct interface can be called without deprecation warnings.
During the reorganization of the ``Node`` interface, the repository methods were moved to the ``base.repository``
namespace and deprecation warnings would be printed when the repository interface would be called directly from the
top-level namespace. The change was corrected for the ``FolderData`` since for that data type the repository API
_should_ be the direct interface, and users should not have to go down the ``.base.repository`` namespace. Here we
test that no deprecation warnings are emitted for the public API. Once the deprecation warnings are removed in
AiiDA v3.0, this test can also be removed.
"""
node = FolderData()

try:
getattr(node, method)()
except Exception: # pylint: disable=broad-except
pass
assert len(recwarn) == 0

0 comments on commit 3e1f873

Please sign in to comment.