Skip to content

Commit

Permalink
Overhaul of directories to use relative directories wherever possible…
Browse files Browse the repository at this point in the history
…; regenerated test benchmarks
  • Loading branch information
elinscott committed Sep 26, 2024
1 parent 2d5e137 commit 55287ac
Show file tree
Hide file tree
Showing 8,411 changed files with 731 additions and 539 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
54 changes: 43 additions & 11 deletions src/koopmans/calculators/_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ class ExtendedCalc(CalculatorExt, ASECalc, CalculatorABC):
import os
from abc import ABC, abstractmethod, abstractproperty
from pathlib import Path
from typing import (Any, Dict, Generic, List, Optional, Tuple, Type, TypeVar,
Union)
from typing import (TYPE_CHECKING, Any, Dict, Generic, List, Optional, Tuple,
Type, TypeVar, Union)
from uuid import UUID, uuid4

import ase.io as ase_io
import numpy as np
Expand All @@ -38,6 +39,9 @@ class ExtendedCalc(CalculatorExt, ASECalc, CalculatorABC):
from koopmans import settings, utils
from koopmans.files import FilePointer

if TYPE_CHECKING:
from koopmans.workflows import Workflow


def sanitize_filenames(filenames: Union[str, Path, List[str], List[Path]], ext_in: str, ext_out: str) -> List[Path]:
# Generic function for sanitizing the input of CalculatorExt.fromfile()
Expand Down Expand Up @@ -74,8 +78,10 @@ class CalculatorExt():
results: Dict[str, Any]
ext_in: str = ''
ext_out: str = ''
parent: Workflow | None
uuid: str

def __init__(self, skip_qc: bool = False, **kwargs: Any):
def __init__(self, parent=None, skip_qc: bool = False, **kwargs: Any):
# Remove arguments that should not be treated as QE keywords
kwargs.pop('directory', None)

Expand All @@ -87,7 +93,13 @@ def __init__(self, skip_qc: bool = False, **kwargs: Any):
self.skip_qc = skip_qc

# Prepare a dictionary to store a record of linked files
self.linked_files: Dict[str, Tuple[utils.HasDirectoryAttr | None, Path, bool, bool, bool]] = {}
self.linked_files: Dict[str, Tuple[utils.HasDirectoryInfo | None, Path, bool, bool, bool]] = {}

# Store the parent workflow
self.parent = parent

# Generate a unique identifier for this calculation
self.uuid = str(uuid4())

def __repr__(self):
entries = []
Expand Down Expand Up @@ -129,8 +141,29 @@ def directory(self, value: Path | str | None):
return
if not isinstance(value, Path):
value = Path(value)
if value.is_absolute():
raise ValueError(f'`{self.__class__.__name__}.directory` must be a relative path')
self._directory = value

@property
def absolute_directory(self) -> Path:
assert self.directory is not None
if self.parent is None:
return self.directory.resolve()
path = self.parent.directory / self.directory

# Recursive through the parents, adding their directories to path (these are all relative paths)...
obj = self.parent
while getattr(obj, 'parent', None):
assert obj.parent is not None
path = obj.parent.directory / path
obj = obj.parent

# ... until we reach the top-level parent, which should have a base_directory attribute (an absolute path)
if not hasattr(obj, 'base_directory'):
raise AttributeError(f'Expected `{obj.__class__.__name__}` instance to have a `base_directory` attribute')
return obj.base_directory / path

def calculate(self):
"""Generic function for running a calculator"""

Expand Down Expand Up @@ -166,7 +199,7 @@ def _fetch_linked_files(self):
if src_calc is None:
src_filename = src_filename.resolve()
else:
src_filename = (src_calc.directory / src_filename).resolve()
src_filename = src_calc.absolute_directory / src_filename
dest_filename = self.directory / dest_filename

if not src_filename.exists():
Expand Down Expand Up @@ -232,9 +265,7 @@ def read_input(self, input_file: Optional[Path] = None):
# Load calculator from input file
calc: Calculator = ase_io.read(input_file).calc

# Update self based off the input file, first updating self.directory in order to ensure any settings that are
# relative paths are appropriately stored
self.directory = input_file.parent
# Update self based off the input file
self.parameters = calc.parameters
if isinstance(calc.atoms, Atoms):
# Some calculators (e.g. wann2kc) can't reconstruct atoms from an input file
Expand Down Expand Up @@ -281,7 +312,7 @@ def fromdict(cls: Type[TCalc], dct: Any) -> TCalc:
setattr(calc, k.lstrip('_'), v)
return calc

def link_file(self, src_calc: utils.HasDirectoryAttr | None, src_filename: Path, dest_filename: Path, symlink: bool = False,
def link_file(self, src_calc: utils.HasDirectoryInfo | None, src_filename: Path, dest_filename: Path, symlink: bool = False,
recursive_symlink: bool = False, overwrite: bool = False):
if src_filename.is_absolute() and src_calc is not None:
raise ValueError(f'`src_filename` in `{self.__class__.__name__}.link_file()` must be a relative path if a '
Expand Down Expand Up @@ -359,7 +390,6 @@ def fromfile(cls, filenames: Union[str, Path, List[str], List[Path]]):

# Read qe output file
for filename in [f for f in sanitized_filenames if f.suffix == cls.ext_out]:
calc.directory = filename.parent
calc.prefix = filename.stem
try:
calc.read_results()
Expand All @@ -368,7 +398,9 @@ def fromfile(cls, filenames: Union[str, Path, List[str], List[Path]]):
pass

# Update calc.directory and calc.parameters.prefix
calc.directory = sanitized_filenames[0].parent
assert hasattr(calc, 'parent')
base_directory = Path() if calc.parent is None else calc.parent.base_directory / calc.parent.directory
calc.directory = Path(os.path.relpath(sanitized_filenames[0].parent, base_directory))
calc.prefix = sanitized_filenames[0].stem

# Return the new calc object
Expand Down
6 changes: 6 additions & 0 deletions src/koopmans/calculators/_koopmans_cp.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,10 @@ def has_empty_states(self, spin: Optional[int] = None) -> bool:
return self.parameters.nbnd > nel

def nspin1_dummy_calculator(self) -> KoopmansCPCalculator:
self.parent, parent = None, self.parent
calc = copy.deepcopy(self)
self.parent = parent
calc.parent = parent
calc.prefix += '_nspin1_dummy'
calc.parameters.do_outerloop = False
calc.parameters.do_outerloop_empty = False
Expand All @@ -536,7 +539,10 @@ def nspin1_dummy_calculator(self) -> KoopmansCPCalculator:
return calc

def nspin1_calculator(self) -> KoopmansCPCalculator:
self.parent, parent = None, self.parent
calc = copy.deepcopy(self)
calc.parent = parent
self.parent = parent
calc.prefix += '_nspin1'
calc.parameters.nspin = 1
calc.parameters.nelup = None
Expand Down
47 changes: 37 additions & 10 deletions src/koopmans/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

import numpy as np

from koopmans.utils import (HasDirectoryAttr, get_binary_content, get_content,
write_binary_content, write_content)
from koopmans.utils import (HasDirectoryInfo, get_binary_content, get_content,
warn, write_binary_content, write_content)


class FilePointer(NamedTuple):
Expand All @@ -17,17 +17,19 @@ class FilePointer(NamedTuple):
to koopmans/AiiDA) and a name (which is the path of the file relative to the parent's directory).
"""
parent: HasDirectoryAttr
parent: HasDirectoryInfo
name: Path

def __repr__(self):
assert self.parent.directory is not None
relpath: str = os.path.relpath(self.parent.directory, Path.cwd())
return f'FilePointer({relpath}/{self.name})'
assert self.parent.absolute_directory is not None
return f'FilePointer({self.parent.absolute_directory}/{self.name})'

def aspath(self):
assert self.parent.directory is not None
return self.parent.directory / self.name
def aspath(self, absolute: bool = True) -> Path:
if absolute:
assert self.parent.absolute_directory is not None
return self.parent.absolute_directory / self.name
else:
return self.name

def copy(self, dst: Path, binary=False):
if binary:
Expand Down Expand Up @@ -56,16 +58,37 @@ def read(self, binary: bool = False, numpy: bool = False) -> Any:

def rglob(self, pattern: str):
for f in self.aspath().rglob(pattern):
yield FilePointer(parent=self.parent, name=f.relative_to(self.parent.directory))
assert self.parent.absolute_directory is not None
yield FilePointer(parent=self.parent, name=f.relative_to(self.parent.absolute_directory))

def is_dir(self):
return self.aspath().is_dir()

def __eq__(self, other):
if not isinstance(other, FilePointer):
return False
# Note that we don't check self.parent.absolute_directory
return self.parent.directory == other.parent.directory and self.name == other.name

def __reduce__(self):
# We don't want to store the entire parent object in the database; we only need the directory information
abs_dir = self.parent.absolute_directory
dummy_parent = ParentPlaceholder(directory=self.parent.directory,
absolute_directory=self.parent.absolute_directory)
return (FilePointer, (dummy_parent, self.name))


class ParentPlaceholder:
# Move into the test suite?
def __init__(self, directory, absolute_directory):
self.directory = directory
self._absolute_directory = absolute_directory

@property
def absolute_directory(self):
# This is a property in order to follow the HasDirectoryInfo protocol
return Path(__file__).parents[2] / self._absolute_directory


class AbsolutePath:
""" A class that can stand in as a parent in a FilePointer when the file is unattached to a Calculator or Process """
Expand All @@ -79,6 +102,10 @@ def __eq__(self, other):
return False
return self.directory == other.directory

@property
def absolute_directory(self) -> Path | None:
return self.directory


def AbsoluteFilePointer(path: Path | str) -> FilePointer:
path = path if isinstance(path, Path) else Path(path)
Expand Down
34 changes: 3 additions & 31 deletions src/koopmans/io/_dill.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,18 @@
import os
from pathlib import Path
from typing import Any

import dill


class PicklerWithRelativePaths(dill.Pickler):
def __init__(self, base_directory, *args, **kwargs):
super().__init__(*args, **kwargs)
self.base_directory = base_directory

def reducer_override(self, obj):
if isinstance(obj, Path):
return Path, pickle_path(obj, self.base_directory)
else:
return NotImplemented


def pickle_path(path: Path, base_directory: Path):
# Convert the path to relative if it's absolute
if path.is_absolute():
try:
rel_path = path.relative_to(base_directory.resolve())
except ValueError:
rel_path = path
return (str(rel_path),)
else:
# Return relative paths as-is
return (str(path),)


def read_pkl(filename: Path | str) -> Any:
with open(filename, 'rb') as fd:
out = dill.load(fd)
return out


def write_pkl(obj: Any, filename: Path | str, base_directory: Path | None = None):
def write_pkl(obj: Any, filename: Path | str):
filename = Path(filename) if not isinstance(filename, Path) else filename

if base_directory is None:
base_directory = filename.parent

with open(filename, 'wb') as fd:
pickler = PicklerWithRelativePaths(base_directory, fd)
pickler.dump(obj)
dill.dump(obj, fd)
47 changes: 45 additions & 2 deletions src/koopmans/processes/_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@
import re
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Dict, Generic, Tuple, Type, TypeVar
from typing import TYPE_CHECKING, Dict, Generic, Tuple, Type, TypeVar
from uuid import uuid4

import dill
import numpy as np
from pydantic import BaseModel

from koopmans import utils

if TYPE_CHECKING:
from koopmans.workflows import Workflow


class IOModel(BaseModel):
# BaseModel with an __eq__ method that compares attributes rather than memory addresses
Expand All @@ -36,7 +40,7 @@ def __eq__(self, other):

class Process(ABC, Generic[InputModel, OutputModel]):

__slots__ = ['inputs', '_outputs', 'name', 'directory']
__slots__ = ['inputs', '_outputs', 'name', 'directory', 'parent', 'uuid', '_base_directory']

def __init__(self, name: str | None = None, **kwargs):
self.inputs: InputModel = self.input_model(**kwargs)
Expand All @@ -48,6 +52,11 @@ def __init__(self, name: str | None = None, **kwargs):
else:
self.name = name
self.directory: Path | None = None
self.parent: Workflow | None = None
self.uuid = str(uuid4())
self._base_directory: Path | None = None
if self.parent is None:
self.base_directory = Path()

def run(self):
assert self.directory is not None, 'Process directory must be set before running'
Expand Down Expand Up @@ -111,3 +120,37 @@ def is_complete(self):
if self.directory is None:
raise ValueError('Process directory must be set before checking if it is complete')
return (self.directory / f'{self.name}_outputs.pkl').exists()

@property
def base_directory(self) -> Path:
if self.parent is not None:
return self.parent.base_directory
else:
if self._base_directory is None:
raise ValueError(f'{self.__class__.__name__}.base_directory has not been set')
return self._base_directory

@base_directory.setter
def base_directory(self, value: Path):
if self.parent is not None:
raise ValueError(f'{self.__class__.__name__}.base_directory should not be set for processes with parents')
self._base_directory = value.resolve()

@property
def absolute_directory(self) -> Path:
assert self.directory is not None
if self.parent is None:
return self.base_directory / self.directory
path = self.parent.directory / self.directory

# Recursive through the parents, adding their directories to path (these are all relative paths)...
obj = self.parent
while getattr(obj, 'parent', None):
assert obj.parent is not None
path = obj.parent.directory / path
obj = obj.parent

# ... until we reach the top-level parent, which should have a base_directory attribute (an absolute path)
if not hasattr(obj, 'base_directory'):
raise AttributeError(f'Expected `{obj.__class__.__name__}` instance to have a `base_directory` attribute')
return obj.base_directory / path
2 changes: 1 addition & 1 deletion src/koopmans/processes/bin2xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Bin2XMLProcess(CommandLineTool):
output_model = Bin2XMLOutput

def _pre_run(self):
if not (self.inputs.binary.parent.directory / self.inputs.binary.name).exists():
if not (self.inputs.binary.parent.absolute_directory / self.inputs.binary.name).exists():
raise FileNotFoundError(f'`{self.inputs.binary}` does not exist')

# Load the file
Expand Down
2 changes: 2 additions & 0 deletions src/koopmans/processes/koopmans_cp.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ class SwapSpinFilesProcess(Process):
output_model = SwapSpinFilesOutputModel

def _run(self):
if not self.inputs.read_directory.exists():
raise FileNotFoundError(f'{self.inputs.read_directory} does not exist')
spin_up_files = list(self.inputs.read_directory.rglob('*1.*'))
spin_down_files = list(self.inputs.read_directory.rglob('*2.*'))
for src in self.inputs.read_directory.rglob('*'):
Expand Down
Loading

0 comments on commit 55287ac

Please sign in to comment.