diff --git a/CHANGELOG.md b/CHANGELOG.md index ee5735df8..2edcc3daa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,14 +26,20 @@ There is one key exception to the rules above -- and that is with `MAJOR`=0 rele - REST API fields can now be specified directly with the `api_filters` attribute of any `DatabaseTable` class & fields from mix-ins are automatically added - add `archive_fields` attribute that sets the "raw data" for the database table & fields from mix-ins are automatically added - accept `TOML` input files in addition to `YAML` +- convergence plots and extras are now written for many workflow types (such as relaxations) +- when `use_database=True`, output files are automatically written and the workup method is directly paired with the database table. +- NEB workflow now accepts parameters to tune how distinct pathways are determined, including the max pathway length and cutoffs at 1D percolation. **Refactors** - the `website.core_components.filters` module has been absorbed into the `DatabaseTable` class/module - yaml input for custom workflows now matches the python input format +- workup methods are largely depreciated and now database entries are returned when a workflow has `use_database=True` +- several NEB input parameters have been renamed to accurate depict their meaning. **Fixes** - fix bug in windows dev env where `simmate run-server` fails to find python path - fix bug in `workflows explore` command where 'vasp' is the assumed calculator name +- fix broken example code in custom workflow docs # v0.10.0 (2022.08.29) diff --git a/docs/contributing/first_time_setup.md b/docs/contributing/first_time_setup.md index 6ad18efb3..b7ab2f916 100644 --- a/docs/contributing/first_time_setup.md +++ b/docs/contributing/first_time_setup.md @@ -25,16 +25,21 @@ conda activate simmate_dev pip install -e . ``` -6. Make sure everything works properly by running our tests +6. When resetting your database, make sure you do **NOT** use the prebuilt database. Pre-builts are only made for new releases and the dev database may differ from the most recent release. +``` bash +simmate database reset --confirm-delete --use-prebuilt false +``` + +7. Make sure everything works properly by running our tests ``` shell # you can optionally run tests in parallel # with a command such as "pytest -n 4" pytest ``` -7. In GitKraken, make sure you have the `main` branch of your repo (`yourname/simmate`) checked out. +8. In GitKraken, make sure you have the `main` branch of your repo (`yourname/simmate`) checked out. -8. In Spyder, go `Projects` > `New Project...`. Check `existing directory`, select your `~/Documents/github/simmate` directory, and then `create` your Project! +9. In Spyder, go `Projects` > `New Project...`. Check `existing directory`, select your `~/Documents/github/simmate` directory, and then `create` your Project! -9. You can now explore the source code and add/edit files! Move to the next section on how to format, test, and submit these changes to our team. +10. You can now explore the source code and add/edit files! Move to the next section on how to format, test, and submit these changes to our team. diff --git a/docs/full_guides/database/notes.md b/docs/full_guides/database/notes.md new file mode 100644 index 000000000..8bce163cb --- /dev/null +++ b/docs/full_guides/database/notes.md @@ -0,0 +1,22 @@ + +<< _register_calc >> +from_run_context +from_toolkit + +<< _update_database_with_results >> from_run_context --> grabs from _register_calc +update_database_from_results +update_from_results + update_from_toolkit + from_toolkit(as_dict=True) + update_from_directory + from_directory(as_dict=True) + from_vasp_directory(as_dict=True) ---> unexpected as_dict + from_vasp_run(as_dict=True) + update_from_toolkit() + from_toolkit(as_dict=True) + +<< load_completed_calc >> +from_toolkit +from_directory + from_vasp_directory + from_vasp_run diff --git a/docs/full_guides/workflows/creating_new_workflows.md b/docs/full_guides/workflows/creating_new_workflows.md index ad05fbea2..7621be667 100644 --- a/docs/full_guides/workflows/creating_new_workflows.md +++ b/docs/full_guides/workflows/creating_new_workflows.md @@ -48,7 +48,7 @@ class Example__Python__MyFavoriteSettings(Workflow): @staticmethod def run_config(**kwargs): print("This workflow doesn't do much") - return 42 + return 12345 ``` !!! note @@ -602,13 +602,11 @@ class Example__Python__MyFavoriteSettings(Workflow): # just running the workflow 10 times in row on different # perturbations or "rattling" of the original structure for n in range(10): + structure.perturb(0.05) # modifies in-place another_workflow.run( - structure=structure.perturb(0.05) + structure=structure, directory= directory / f"perturb_number_{n}", - # **kwargs, <-- you may want to pass kwargs too. ) - - return 42 ``` !!! warning diff --git a/docs/parameters.md b/docs/parameters.md index 4de8b9d86..9260d6f1d 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -1,6 +1,5 @@ # Parameters - ## Overview Knowing which parameters are available and how to use them is essential. We therefore outline **all** unique parameters for **all** workflows here. @@ -98,6 +97,33 @@ The command that will be called during execution of a program. There is typicall command = "mpirun -n 8 vasp_std > vasp.out" ``` + used for bulk crystal relaxation and static energy + - cmd2 --> used for endpoint supercell relaxations + - cmd3 --> used for NEB +Thus, you can scale your resources for each step. Here's a full -c option: +-c "vasp_std > vasp.out; mpirun -n 12 vasp_std > vasp.out; mpirun -n 70 vasp_std > vasp.out" +--> + -------------------------- ## composition @@ -322,19 +348,19 @@ For evolutionary searches, fixed compositions will be stopped when the best indi -------------------------- ## max_atoms -For workflows that involve generating a supercell or random structure, this will be the maximum number of sites to allow in the generate structure(s). For example, NEB workflows would set this value to something like 100 atoms to limit their supercell image sizes. Alternatively, a evolutionary search may set this to 10 atoms to limit the compositions & stoichiometries that are explored. +For workflows that involve generating a supercell or random structure, this will be the maximum number of sites to allow in the generated structure(s). For example, an evolutionary search may set this to 10 atoms to limit the compositions & stoichiometries that are explored. === "yaml" ``` yaml - max_atoms: 100 + max_atoms: 10 ``` === "toml" ``` toml - max_atoms = 100 + max_atoms = 10 ``` === "python" ``` python - max_atoms = 100 + max_atoms = 10 ``` -------------------------- @@ -357,6 +383,24 @@ For workflows that generate new structures (and potentially run calculations on -------------------------- +## max_supercell_atoms +For workflows that involve generating a supercell, this will be the maximum number of sites to allow in the generated structure(s). For example, NEB workflows would set this value to something like 100 atoms to limit their supercell image sizes. + +=== "yaml" + ``` yaml + max_supercell_atoms: 100 + ``` +=== "toml" + ``` toml + max_supercell_atoms = 100 + ``` +=== "python" + ``` python + max_supercell_atoms = 100 + ``` + +-------------------------- + ## migrating_specie This is the atomic species/element that will be moving in the analysis (typically NEB or MD diffusion calculations). Note, oxidation states (e.g. "Ca2+") can be used, but this requires your input structure to be oxidation-state decorated as well. @@ -386,11 +430,6 @@ The atomic path that should be analyzed. Inputs are anything compatible with the -------------------------- -## migration_hop_id -(advanced users only) The entry id from the `MigrationHop` table to link the results to. This is set automatically by higher-level workflows and rarely (if ever) set by the user. If used, you'll likely need to set `diffusion_analysis_id` as well. - --------------------------- - ## migration_images The full set of images (including endpoint images) that should be analyzed. Inputs are anything compatible with the `MigrationImages` class of the `simmate.toolkit.diffusion` module, which is effectively a list of `structure` inputs. This includes: @@ -433,24 +472,38 @@ The full set of images (including endpoint images) that should be analyzed. Inpu -------------------------- ## min_atoms -This is the opposite of `max_atoms` as this will be the minimum number of sites to allow in the generate structure(s). See `max_atoms` for details. +This is the opposite of `max_atoms` as this will be the minimum number of sites allowed in the generate structure(s). See `max_atoms` for details. + +-------------------------- + +## min_structures_exact + +(experimental) The minimum number of structures that must be calculated with exactly +matching nsites as specified in the fixed-composition. + +-------------------------- + +## min_supercell_atoms + +This is the opposite of `max_supercell_atoms` as this will be the minimum number of sites allowed in the generated supercell structure. -------------------------- -## min_length -When generating a supercell, this is the minimum length for each lattice vector of the generate cell (in Angstroms). +## min_supercell_vector_lengths + +When generating a supercell, this is the minimum length for each lattice vector of the generated cell (in Angstroms). For workflows such as NEB, larger is better but more computationally expensive. === "yaml" ``` yaml - min_length: 7.5 + min_supercell_vector_lengths: 7.5 ``` === "toml" ``` toml - min_length = 7.5 + min_supercell_vector_lengths = 7.5 ``` === "python" ``` python - min_length = 7.5 + min_supercell_vector_lengths = 7.5 ``` -------------------------- @@ -531,6 +584,26 @@ The total number of steps to run the calculation on. For example, in molecular d -------------------------- +## percolation_mode +The percolating type to detect. The default is ">1d", which search for percolating +paths up to the `max_path_length`. Alternatively, this can be set to "1d" in order +to stop unique pathway finding when 1D percolation is achieved. + +=== "yaml" + ``` yaml + percolation_mode: 1d + ``` +=== "toml" + ``` toml + percolation_mode = "1d" + ``` +=== "python" + ``` python + percolation_mode = "1d" + ``` + +-------------------------- + ## run_id The id assigned to a specific workflow run / calculation. If not provided this will be randomly generated, and we highly recommended leaving this at the default value. Note, this is based on unique-ids (UUID), so every id should be 100% unique and in a string format. @@ -986,6 +1059,25 @@ Unique to `customized.vasp.user-config`. This is a list of parameters to update -------------------------- +## vacancy_mode +For NEB and diffusion workfows, this determines whether vacancy or interstitial +diffusion is analyzed. Default of True corresponds to vacancy-based diffusion. + +=== "yaml" + ``` yaml + vacancy_mode: false + ``` +=== "toml" + ``` toml + vacancy_mode = false + ``` +=== "python" + ``` python + vacancy_mode = False + ``` + +-------------------------- + ## validator_kwargs (advanced users only) Extra conditions to use when initializing the validator class. `MyValidator(**validator_kwargs)`. The input should be given as a dictionary. Note, for evolutionary searches, the composition kwarg is added automatically. This is closely tied with the `validator_name` parameter so be sure to read that section as well. diff --git a/src/simmate/calculators/bader/outputs/acf.py b/src/simmate/calculators/bader/outputs/acf.py index 70815273b..300a99c09 100644 --- a/src/simmate/calculators/bader/outputs/acf.py +++ b/src/simmate/calculators/bader/outputs/acf.py @@ -3,12 +3,18 @@ from pathlib import Path import pandas +from pymatgen.io.vasp import Potcar +from pymatgen.io.vasp.outputs import Chgcar -def ACF(filename="ACF.dat"): +def ACF(directory: Path = None, filename="ACF.dat"): + + # grab working directory if one wasn't provided + if not directory: + directory = Path.cwd() # convert to path obj - filename = Path(filename) + filename = directory / filename # open the file, grab the lines, and then close it with filename.open() as file: @@ -46,4 +52,64 @@ def ACF(filename="ACF.dat"): "nelectrons": float(lines[-1].split()[-1]), } + # The remaining code is to analyze the results and calculate extra + # information such as the final oxidation states. This requires extra + # files to be present, such as from a vasp calculation + + potcar_filename = directory / "POTCAR" + chgcar_filename = directory / "CHGCAR" + chgcar_empty_filename = directory / "CHGCAR_empty" # SPECIAL CASE + + # check if the required vasp files are present before doing the workup + if potcar_filename.exists() and ( + chgcar_filename.exists() or chgcar_empty_filename.exists() + ): + + # load the electron counts used by VASP from the POTCAR files + # OPTIMIZE: this can be much faster if I have a reference file + potcars = Potcar.from_file(potcar_filename) + nelectron_data = {} + # the result is a list because there can be multiple element potcars + # in the file (e.g. for NaCl, POTCAR = POTCAR_Na + POTCAR_Cl) + for potcar in potcars: + nelectron_data.update({potcar.element: potcar.nelectrons}) + + # SPECIAL CASE: in scenarios where empty atoms are added to the structure, + # we should grab that modified structure instead of the one from the POSCAR. + # the empty file will always take preference + if chgcar_empty_filename.exists(): + chgcar = Chgcar.from_file(chgcar_empty_filename) + structure = chgcar.structure + # We typically use hydrogen ("H") as the empty atom, so we will + # need to add this to our element list for oxidation analysis. + # We use 0 for electron count because this is an 'empty' atom, and + # not actually Hydrogen + nelectron_data.update({"H": 0}) + + # otherwise, grab the structure from the CHGCAR + # OPTIMIZE: consider grabbing from the POSCAR or CONTCAR for speed + else: + chgcar = Chgcar.from_file(chgcar_filename) + structure = chgcar.structure + + # Calculate the oxidation state of each site where it is simply the + # change in number of electrons associated with it from vasp potcar vs + # the bader charge I also add the element strings for filtering functionality + elements = [] + oxi_state_data = [] + for site, site_charge in zip(structure, dataframe.charge.values): + element_str = site.specie.name + elements.append(element_str) + oxi_state = nelectron_data[element_str] - site_charge + oxi_state_data.append(oxi_state) + + # add the new column to the dataframe + dataframe = dataframe.assign( + oxidation_state=oxi_state_data, + element=elements, + ) + # !!! There are multiple ways to do this, but I don't know which is best + # dataframe["oxidation_state"] = pandas.Series( + # oxi_state_data, index=dataframe.index) + return dataframe, extra_data diff --git a/src/simmate/calculators/bader/workflows/bader.py b/src/simmate/calculators/bader/workflows/bader.py index a26dc421a..7a35c3d2d 100644 --- a/src/simmate/calculators/bader/workflows/bader.py +++ b/src/simmate/calculators/bader/workflows/bader.py @@ -1,18 +1,13 @@ # -*- coding: utf-8 -*- -from pathlib import Path - -import yaml -from pandas import DataFrame -from pymatgen.io.vasp import Potcar -from pymatgen.io.vasp.outputs import Chgcar - -from simmate.calculators.bader.outputs import ACF from simmate.workflow_engine import S3Workflow class PopulationAnalysis__Bader__Bader(S3Workflow): + required_files = ["CHGCAR_sum", "POTCAR"] + use_database = False + command = "bader CHGCAR -ref CHGCAR_sum -b weight > bader.out" """ The command to call the executable, which is typically bader. Note we @@ -28,100 +23,3 @@ class PopulationAnalysis__Bader__Bader(S3Workflow): density that is not associated with any atomic orbital. For these cases, you will see files like "CHGCAR_empty" used in the command. """ - - required_files = ["CHGCAR_sum", "POTCAR"] - use_database = False - - @classmethod - def workup(cls, directory: Path): - """ - A basic workup process that reads Bader analysis results from the ACF.dat - file and calculates the corresponding oxidation states with the existing - POTCAR files. - """ - - # load the ACF.dat file - acf_filename = directory / "ACF.dat" - dataframe, extra_data = ACF(filename=acf_filename) - - # load the electron counts used by VASP from the POTCAR files - # OPTIMIZE: this can be much faster if I have a reference file - potcar_filename = directory / "POTCAR" - potcars = Potcar.from_file(potcar_filename) - nelectron_data = {} - # the result is a list because there can be multiple element potcars - # in the file (e.g. for NaCl, POTCAR = POTCAR_Na + POTCAR_Cl) - for potcar in potcars: - nelectron_data.update({potcar.element: potcar.nelectrons}) - - # SPECIAL CASE: in scenarios where empty atoms are added to the structure, - # we should grab that modified structure instead of the one from the POSCAR. - chgcar_filename = directory / "CHGCAR" - chgcar_empty_filename = directory / "CHGCAR_empty" - - # the empty file will always take preference - if chgcar_empty_filename.exists(): - chgcar = Chgcar.from_file(chgcar_empty_filename) - structure = chgcar.structure - # We typically use hydrogen ("H") as the empty atom, so we will - # need to add this to our element list for oxidation analysis. - # We use 0 for electron count because this is an 'empty' atom, and - # not actually Hydrogen - nelectron_data.update({"H": 0}) - # otherwise, grab the structure from the CHGCAR - # OPTIMIZE: consider grabbing from the POSCAR or CONTCAR for speed - else: - chgcar = Chgcar.from_file(chgcar_filename) - structure = chgcar.structure - - # Calculate the oxidation state of each site where it is simply the - # change in number of electrons associated with it from vasp potcar vs - # the bader charge I also add the element strings for filtering functionality - elements = [] - oxi_state_data = [] - for site, site_charge in zip(structure, dataframe.charge.values): - element_str = site.specie.name - elements.append(element_str) - oxi_state = nelectron_data[element_str] - site_charge - oxi_state_data.append(oxi_state) - - # add the new column to the dataframe - dataframe = dataframe.assign( - oxidation_state=oxi_state_data, - element=elements, - ) - # !!! There are multiple ways to do this, but I don't know which is best - # dataframe["oxidation_state"] = pandas.Series( - # oxi_state_data, index=dataframe.index) - - # write output files/plots for the user to quickly reference - cls._write_output_summary(directory, dataframe, extra_data) - - # return all of our results - return dataframe, extra_data - - @staticmethod - def _write_output_summary( - directory: Path, - dataframe: DataFrame, - extra_data: dict, - ): - """ - This prints a "simmate_summary.yaml" file with key output information. - - This method should not be called directly as it used within workup(). - """ - - # write output of the dataframe - summary_csv_filename = directory / "simmate_summary_bader.csv" - dataframe.to_csv(summary_csv_filename) - - summary = { - "notes": "view simmate_summary_bader.csv for more information", - **extra_data, - } - - summary_filename = directory / "simmate_summary.yaml" - with summary_filename.open("w") as file: - content = yaml.dump(summary) - file.write(content) diff --git a/src/simmate/calculators/vasp/error_handlers/test/test_brmix.py b/src/simmate/calculators/vasp/error_handlers/test/test_brmix.py index 242a553b5..91e891501 100644 --- a/src/simmate/calculators/vasp/error_handlers/test/test_brmix.py +++ b/src/simmate/calculators/vasp/error_handlers/test/test_brmix.py @@ -50,14 +50,14 @@ def test_brmix(tmp_path): fix = error_handler.correct(tmp_path) assert fix == "removed any IMIX tag and switched KGAMMA to False" incar = Incar.from_file(incar_filename) - assert incar.get("IMIX", None) == None + assert incar.get("IMIX", None) is None assert incar["KGAMMA"] == False # Make third attempt at fixing the error fix = error_handler.correct(tmp_path) assert fix == "removed any IMIX tag and switched KGAMMA to True" incar = Incar.from_file(incar_filename) - assert incar.get("IMIX", None) == None + assert incar.get("IMIX", None) is None assert incar["KGAMMA"] == True # Make final attempt at fixing the error diff --git a/src/simmate/calculators/vasp/error_handlers/walltime.py b/src/simmate/calculators/vasp/error_handlers/walltime.py index 87caea75d..e4d3856e2 100644 --- a/src/simmate/calculators/vasp/error_handlers/walltime.py +++ b/src/simmate/calculators/vasp/error_handlers/walltime.py @@ -79,7 +79,7 @@ def check(self, directory: Path) -> bool: remaining_time = self._get_remaining_time(directory) # None means we ignore this error handler - if remaining_time == None: + if remaining_time is None: return False # If the remaining time is less than our buffer time or time for 2 diff --git a/src/simmate/calculators/vasp/outputs/__init__.py b/src/simmate/calculators/vasp/outputs/__init__.py index cddada5ec..8f7701c7c 100644 --- a/src/simmate/calculators/vasp/outputs/__init__.py +++ b/src/simmate/calculators/vasp/outputs/__init__.py @@ -1,3 +1,4 @@ # -*- coding: utf-8 -*- from .oszicar import Oszicar +from .vasprun import Vasprun diff --git a/src/simmate/calculators/vasp/outputs/vasprun.py b/src/simmate/calculators/vasp/outputs/vasprun.py new file mode 100644 index 000000000..cdc109db4 --- /dev/null +++ b/src/simmate/calculators/vasp/outputs/vasprun.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- + +import logging +import shutil +from pathlib import Path + +from pymatgen.analysis.transition_state import NEBAnalysis +from pymatgen.io.vasp.outputs import Vasprun as VasprunPymatgen + + +class Vasprun(VasprunPymatgen): + @classmethod + def from_directory(cls, directory: Path = None): + + if not directory: + directory = Path.cwd() + + vasprun_filename = directory / "vasprun.xml" + + # load the xml file and all of the vasprun data + try: + vasprun = cls( + filename=vasprun_filename, + exception_on_bad_xml=True, + ) + except: + logging.warning( + "XML is malformed. This typically means there's an error with your" + " calculation that wasn't caught by your ErrorHandlers. We try" + " salvaging data here though." + ) + vasprun = cls( + filename=vasprun_filename, + exception_on_bad_xml=False, + ) + vasprun.final_structure = vasprun.structures[-1] + # This try/except is just for my really rough calculations + # where I don't use any ErrorHandlers and still want the final structure + # regarless of what went wrong. In the future, I should consider writing + # a separate method for those that loads the CONTCAR and moves on. + + # set source directory for convenience elsewhere + vasprun.directory = directory + + return vasprun + + @property + def neb_results(self): + + directory = getattr(self, "directory", None) + if not directory: + raise Exception( + "The Vasprun must have been created with the `from_directory` " + "method in order to load neb results because it involves loading " + "more results from files." + ) + + # Make sure there is "*.start" and "*.end" directory present. These + # will be our start/end folders. We go through all foldernames in the + # directory and grab the first that matches + # BUG: For now I assume there are start/end image directories are located + # in the working directory. These relaxation are actually ran by a + # separate workflow, which is thus a prerequisite for this workflow. + + # assume foldernames of start and end until proven otherwise + start_dirname = directory / "start" + end_dirname = directory / "end" + + for name in directory.iterdir(): + if name.suffix == ".start": + start_dirname = name + elif name.suffix == ".end": + end_dirname = name + + if not start_dirname.exists() or not end_dirname.exists(): + raise Exception( + "Your NEB calculation finished (possibly successfully). However, " + "in order to run the workup, Simmate needs the start/end point " + "relaxations. These should be located in the same directory as " + "the NEB run and with folder names ending with '*.start' and" + " '*.end' (e.g. 'image.start' and image.end' will work)" + ) + + ################ + # BUG: NEBAnalysis.from_dir is broken for all folder structures except + # when the start/end points are in the 00 and N folders. I therefore + # need to copy the OUTCAR from the endpoint relaxations to these folders. + # I don't want to mess with opening a pull request with them / waiting + # on a new release, so I make this hacky fix here + new_start_filename = directory / "00" / "OUTCAR" + # the end filename should be the highest number in the directory + numbered_dirs = [d for d in directory.iterdir() if d.name.isdigit()] + numbered_dirs.sort() + new_end_filename = directory / numbered_dirs[-1] / "OUTCAR" + # now copy the outcars over + shutil.copyfile(start_dirname / "OUTCAR", new_start_filename) + shutil.copyfile(end_dirname / "OUTCAR", new_end_filename) + ################ + + neb_results = NEBAnalysis.from_dir( + directory, + # BUG: see bug fix right above this + # relaxation_dirs=[ + # "endpoint_relaxation_start", + # "endpoint_relaxation_end", + # ], + ) + return neb_results diff --git a/src/simmate/calculators/vasp/workflows/base.py b/src/simmate/calculators/vasp/workflows/base.py index c889a5620..72f772723 100644 --- a/src/simmate/calculators/vasp/workflows/base.py +++ b/src/simmate/calculators/vasp/workflows/base.py @@ -1,12 +1,9 @@ # -*- coding: utf-8 -*- -import logging import shutil from pathlib import Path -import yaml from pymatgen.analysis.structure_matcher import StructureMatcher -from pymatgen.io.vasp.outputs import Vasprun from pymatgen.symmetry.analyzer import SpacegroupAnalyzer from simmate.calculators.vasp.inputs import Incar, Kpoints, Poscar, Potcar @@ -105,18 +102,6 @@ class VaspWorkflow(S3Workflow): Read more on this inside the Potcar class and be careful with updating! """ - confirm_convergence: bool = True - """ - This flag controls whether or not we raise an error when the calculation - failed to converge. In somecases we still want results from calculations - that did NOT converge successfully. - """ - # OPTIMIZE: What if I updated the ErrorHandler class to allow for "warnings" - # instead of raising the error and applying the correction...? This functionality - # could then be moved to the UnconvergedErrorHandler. I'd have a fix_error=True - # attribute that is used in the .check() method. and If fix_error=False, I - # simply print a warning & also add that warning to simmate_corrections.csv - standardize_structure: str | bool = False """ In some cases, we may want to standardize the structure during our setup(). @@ -285,74 +270,6 @@ def setup_restart(cls, directory: Path, **kwargs): # then CONTCAR over to the POSCAR shutil.move(contcar_filename, poscar_filename) - @classmethod - def workup(cls, directory: Path): - """ - This is the most basic VASP workup where I simply load the final structure, - final energy, and (if requested) confirm convergence. I will likely make - this a common function for this vasp module down the road. - """ - - # load the xml file and all of the vasprun data - try: - vasprun = Vasprun( - filename=directory / "vasprun.xml", - exception_on_bad_xml=True, - ) - except: - logging.warning( - "XML is malformed. This typically means there's an error with your" - " calculation that wasn't caught by your ErrorHandlers. We try" - " salvaging data here though." - ) - vasprun = Vasprun( - filename=directory / "vasprun.xml", - exception_on_bad_xml=False, - ) - vasprun.final_structure = vasprun.structures[-1] - # BUG: This try/except is 100% just for my really rough calculations - # where I don't use any ErrorHandlers and still want the final structure - # regarless of what when wrong. In the future, I should consider writing - # a separate method for those that loads the CONTCAR and moves on. - - # write output files/plots for the user to quickly reference - cls._write_output_summary(directory, vasprun) - - # confirm that the calculation converged (ionicly and electronically) - if cls.confirm_convergence: - assert vasprun.converged - - # return vasprun object - return vasprun - - @staticmethod - def _write_output_summary(directory: Path, vasprun: Vasprun): - """ - This prints a "simmate_summary.yaml" file with key output information. - - This method should not be called directly as it used within workup(). - """ - # OPTIMIZE: Ideally, I could take the vasprun object and run to_json, - # but this output is extremely difficult to read. - - results = vasprun.as_dict()["output"] - - summary = { - "structure_final": "The final structure is located in the CONTCAR file", - "energy_final": float(results.get("final_energy", None)), - "energy_final_per_atom": float(results.get("final_energy_per_atom", None)), - "converged_electroinc": vasprun.converged_electronic, - "converged_ionic": vasprun.converged_ionic, - "fermi_energy": results.get("efermi", None), - "valence_band_maximum": results.get("vbm", None), - "conduction_band_minimum": results.get("vbm", None), - } - - summary_filename = directory / "simmate_summary.yaml" - with summary_filename.open("w") as file: - content = yaml.dump(summary) - file.write(content) - @classmethod def get_config(cls): """ @@ -363,7 +280,6 @@ def get_config(cls): key: getattr(cls, key) for key in [ "__module__", - "confirm_convergence", "functional", "incar", "potcar_mappings", diff --git a/src/simmate/calculators/vasp/workflows/diffusion/neb_all_paths_base.py b/src/simmate/calculators/vasp/workflows/diffusion/neb_all_paths_base.py index e40dd1b19..f883b9d1e 100644 --- a/src/simmate/calculators/vasp/workflows/diffusion/neb_all_paths_base.py +++ b/src/simmate/calculators/vasp/workflows/diffusion/neb_all_paths_base.py @@ -1,27 +1,5 @@ # -*- coding: utf-8 -*- -""" -Runs a NEB on all unique pathways within a structure. - -The folder tree looks like... -``` -simmate-task-12345/ # determined by simmate.utilities.get_directory - ├── bulk_relaxation - ├── bulk_static_energy - ├── migration_hop_00 - ├── migration_hop_01 - ... - └── migration_hop_N # all migration_hop folders have the same structure - ├── endpoint_relaxation_start - ├── endpoint_relaxation_end - ├── 01 - ├── 02 - ├── 03 - ... - └── N # corresponds to image number -``` -""" - from pathlib import Path from simmate.toolkit import Structure @@ -45,38 +23,35 @@ class NebAllPathsWorkflow(Workflow): - static-energy/mit - a mini task that identifies unique migration hops - (for each hop) diffusion/single-path + + + The folder tree looks like... + + ``` + # note, folder names will match the workflow used + simmate-task-12345 + ├── bulk relaxation + ├── bulk static-energy + ├── migration hop 00 + ├── migration hop 01 + ... + └── migration_hop_N # all migration_hop folders have the same structure + ├── endpoint relaxation start + ├── endpoint relaxation end + ├── 01 + ├── 02 + ├── 03 + ... + └── N # corresponds to image number + ``` """ - use_database = False + update_database_from_results = False bulk_relaxation_workflow: Workflow = None bulk_static_energy_workflow: Workflow = None single_path_workflow: Workflow = None - # command list expects three subcommands: - # command_bulk, command_supercell, and command_neb - # - # I separate these out because each calculation is a very different scale. - # For example, you may want to run the bulk relaxation on 10 cores, the - # supercell on 50, and the NEB on 200. Even though more cores are available, - # running smaller calculation on more cores could slow down the calc. - # ["command_bulk", "command_supercell", "command_neb"] - # - # - # If you are running this workflow via the command-line, you can run this - # with... - - # ``` bash - # simmate workflows run diffusion/all-paths -s example.cif -c "cmd1; cmd2; cmd3" - # ``` - # Note, the `-c` here is very important! Here we are passing three commands - # separated by semicolons. Each command is passed to a specific workflow call: - # - cmd1 --> used for bulk crystal relaxation and static energy - # - cmd2 --> used for endpoint supercell relaxations - # - cmd3 --> used for NEB - # Thus, you can scale your resources for each step. Here's a full -c option: - # -c "vasp_std > vasp.out; mpirun -n 12 vasp_std > vasp.out; mpirun -n 70 vasp_std > vasp.out" - @classmethod def run_config( cls, @@ -88,14 +63,18 @@ def run_config( is_restart: bool = False, # parameters for supercell and image generation nimages: int = 5, - min_atoms: int = 80, - max_atoms: int = 240, - min_length: float = 10, + min_supercell_atoms: int = 80, + max_supercell_atoms: int = 240, + min_supercell_vector_lengths: float = 10, + # extra parameters for distinct path finding + max_path_length: float = None, + percolation_mode: str = ">1d", + vacancy_mode: bool = True, + run_id: str = None, **kwargs, ): - # Our step is to run a relaxation on the bulk structure and it uses our inputs - # directly. The remaining one tasks pass on results. + # run a relaxation on the bulk structure bulk_relax_result = cls.bulk_relaxation_workflow.run( structure=structure, command=command, # subcommands["command_bulk"] @@ -103,12 +82,11 @@ def run_config( is_restart=is_restart, ).result() - # A static energy calculation on the relaxed structure. This isn't necessarily - # required for NEB, but it takes very little time. + # run static energy calculation on the relaxed structure bulk_static_energy_result = cls.bulk_static_energy_workflow.run( structure={ "database_table": cls.bulk_relaxation_workflow.database_table.table_name, - "directory": bulk_relax_result["directory"], + "database_id": bulk_relax_result.id, "structure_field": "structure_final", }, command=command, # subcommands["command_bulk"] @@ -116,124 +94,40 @@ def run_config( is_restart=is_restart, ).result() - # This step does NOT run any calculation, but instead, identifies all - # diffusion pathways and builds the necessary database entries. - migration_hop_ids = cls._build_diffusion_analysis( - structure={ - "database_table": cls.bulk_static_energy_workflow.database_table.table_name, - "directory": bulk_static_energy_result["directory"], - }, + # Using the relaxed structure, detect all symmetrically unique paths + pathfinder = DistinctPathFinder( + structure=bulk_static_energy_result.to_toolkit(), migrating_specie=migrating_specie, - directory=directory, - vacancy_mode=True, # assumed for now + max_path_length=max_path_length, + perc_mode=percolation_mode, ) + migration_hops = pathfinder.get_paths() + + # Write the paths found so user can preview what's analyzed below + pathfinder.write_all_migration_hops(directory) + + # load the current database entry so we can link the other runs + # to it up front + current_calc = cls.database_table.from_run_context(run_id=run_id) # Run NEB single_path workflow for all these. - for i, hop_id in enumerate(migration_hop_ids): + for i, hop in enumerate(migration_hops): state = cls.single_path_workflow.run( - migration_hop={ - "migration_hop_table": "MigrationHop", - "migration_hop_id": hop_id, - }, + # !!! The hop object gives an ugly output. Should I use the + # database dictionary instead? + migration_hop=hop, directory=directory / f"{cls.single_path_workflow.name_full}.{str(i).zfill(2)}", - diffusion_analysis_id=None, - migration_hop_id=None, command=command, # subcommands["command_supercell"] # + ";" # + subcommands["command_neb"], is_restart=is_restart, - min_atoms=min_atoms, - max_atoms=max_atoms, - min_length=min_length, + min_atoms=min_supercell_atoms, + max_atoms=max_supercell_atoms, + min_length=min_supercell_vector_lengths, nimages=nimages, - ) # we don't want to wait on results to in order to allow parallel runs - - @classmethod - def _build_diffusion_analysis( - cls, - structure: Structure, - migrating_specie: str, - vacancy_mode: bool, - directory: Path = None, - **kwargs, - ) -> list[str]: - """ - Given a bulk crystal structure, returns all symmetrically unique pathways - for the migrating specie (up until the path is percolating). This - also create all relevent database entries for this struture and its - migration hops. - - #### Parameters - - - `structure`: - bulk crystal structure to be analyzed. Can be in any format supported - by Structure.from_dynamic method. - - - `migrating_specie`: - Element or ion symbol of the diffusion specie (e.g. "Li") - - - `directory`: - where to write the CIF file visualizing all migration hops. If no - directory is provided, it will be written in the working directory. - - - `**kwargs`: - Any parameter normally accepted by DistinctPathFinder - """ - if not directory: - directory = Path("") - - ###### STEP 1: creating the toolkit objects and writing them to file - - structure_cleaned = Structure.from_dynamic(structure) - - pathfinder = DistinctPathFinder( - structure_cleaned, - migrating_specie, - **kwargs, - ) - migration_hops = pathfinder.get_paths() - - # We write all the path files so users can visualized them if needed - filename = directory / "migration_hop_all.cif" - pathfinder.write_all_paths(filename, nimages=10) - for i, migration_hop in enumerate(migration_hops): - number = str(i).zfill(2) # converts numbers like 2 to "02" - # the files names here will be like "migration_hop_02.cif" - migration_hop.write_path( - directory / f"migration_hop_{number}.cif", - nimages=10, # this is just for visualization - ) - - ###### STEP 2: creating the database objects and saving them to the db - - # Create the main DiffusionAnalysis object that others will link to. - da_obj = cls.database_table.from_toolkit( - structure=structure_cleaned, - migrating_specie=migrating_specie, - vacancy_mode=vacancy_mode, - ) - da_obj.save() - # TODO: should I search for a matching bulk structure before deciding - # to create a new DiffusionAnalysis entry? - - # grab the linked MigrationHop class - hop_class = da_obj.migration_hops.model - - # Now iterate through the hops and add them to the database - hop_ids = [] - for i, hop in enumerate(migration_hops): - hop_db = hop_class.from_toolkit( - migration_hop=hop, - number=i, - diffusion_analysis_id=da_obj.id, + vacancy_mode=vacancy_mode, + diffusion_analysis_id=current_calc.id, ) - hop_db.save() - hop_ids.append(hop_db.id) - - # TODO: still figuring out if toolkit vs. db objects should be returned. - # Maybe add ids to the toolkit objects? Or dynamic DB dictionaries? - # For now I return the MigrationHop ids -- because this let's me - # indicate which MigrationHops should be updated later on. - return hop_ids + state.result() # wait until the job finishes diff --git a/src/simmate/calculators/vasp/workflows/diffusion/neb_from_endpoints_base.py b/src/simmate/calculators/vasp/workflows/diffusion/neb_from_endpoints_base.py index fa5cfa021..bbc1720c1 100644 --- a/src/simmate/calculators/vasp/workflows/diffusion/neb_from_endpoints_base.py +++ b/src/simmate/calculators/vasp/workflows/diffusion/neb_from_endpoints_base.py @@ -45,6 +45,11 @@ class NebFromEndpointWorkflow(Workflow): description_doc_short = "runs NEB using two endpoint structures as input" + # Oddly enough, the from_images_workflow and this workflow share a table + # entry, so nothing needs to be done for working up results. See + # how we pass run_id=run_id below. + update_database_from_results = False + endpoint_relaxation_workflow: Workflow = None from_images_workflow: Workflow = None @@ -57,9 +62,9 @@ def run_config( source: dict = None, command: str = None, nimages: int = 5, - # This helps link results to a higher-level table. diffusion_analysis_id: int = None, is_restart: bool = False, + run_id: str = None, **kwargs, ): # command list expects three subcommands: @@ -93,12 +98,12 @@ def run_config( images = get_migration_images_from_endpoints( supercell_start={ "database_table": cls.endpoint_relaxation_workflow.database_table.table_name, - "directory": endpoint_start_result["directory"], + "database_id": endpoint_start_result.id, "structure_field": "structure_final", }, supercell_end={ "database_table": cls.endpoint_relaxation_workflow.database_table.table_name, - "directory": endpoint_end_result["directory"], + "database_id": endpoint_end_result.id, "structure_field": "structure_final", }, nimages=nimages, @@ -111,4 +116,10 @@ def run_config( directory=directory, diffusion_analysis_id=diffusion_analysis_id, is_restart=is_restart, + # Run id is very important here as it tells the underlying + # workflow that it doesn't need to create a new database object + # during registration -- as it will use the one that was registered + # when this workflow started. This is also why we have a dummy + # `update_database_from_results` method below + run_id=run_id, ) diff --git a/src/simmate/calculators/vasp/workflows/diffusion/neb_from_images_base.py b/src/simmate/calculators/vasp/workflows/diffusion/neb_from_images_base.py index 9ea9e6a4d..a7f82f6e7 100644 --- a/src/simmate/calculators/vasp/workflows/diffusion/neb_from_images_base.py +++ b/src/simmate/calculators/vasp/workflows/diffusion/neb_from_images_base.py @@ -1,13 +1,8 @@ # -*- coding: utf-8 -*- import logging -import shutil from pathlib import Path -import numpy -import yaml -from pymatgen.analysis.transition_state import NEBAnalysis - from simmate.calculators.vasp.inputs import Incar, Poscar, Potcar from simmate.calculators.vasp.workflows.base import VaspWorkflow from simmate.toolkit.diffusion import MigrationImages @@ -32,19 +27,14 @@ class VaspNebFromImagesWorkflow(VaspWorkflow): may be useful if you'd like to make your own variation of this class. """ - _parameter_methods = ["run_config", "setup"] + _parameter_methods = VaspWorkflow._parameter_methods.copy() + _parameter_methods.remove("_get_clean_structure") # NEB does not require a POSCAR file because input structures are organized # into folders. required_files = ["INCAR", "POTCAR"] - # Pymatgen's NEB parser does not read from the vasprun.xml so it can't - # confirm convergence here. I'll have to write my own output class to do this. - confirm_convergence = False - - use_database = False description_doc_short = "runs NEB using a series of structures images as input" - # register_run=False, # temporary fix bc no calc table exists yet @classmethod def setup( @@ -56,35 +46,38 @@ def setup( """ Writes input files for a NEB calculation. Each structure image recieves it's own folder within the parent directory. - - This method is typically not called directly. Instead, users should - use the `run` method which calls setup within it. - - #### Parameters - - - `structure`: - This parameter does NOTHING! NEB is a special-case workflow that - accepts a list of structures instead of a single one. Therefore, it - is strictly for compatibility with the core S3Task. Leave this - value at None. - - - `directory`: - The name of the directory to write all input files in. This directory - should exists before calling. (see utilities.get_directory) - - - `structures`: - The list of structures to use as a MigrationImages object. """ - # !!! The structure input is confusing for users, so I should consider - # removing it from the S3Task... # run some prechecks to make sure the user has everything set up properly. - migration_images_cleaned = cls._pre_checks(migration_images, directory) + + # One common mistake is to mislabel the number of images in the + # INCAR file. + # first, we check if the user set this. + nimages = cls.incar.get("IMAGES") + if nimages: + # if so, we check that it was set correctly. It should be equal to + # the number of structures minus 2 (because we don't count the + # start and end images here.) + if nimages != (len(migration_images) - 2): + raise Exception( + "IMAGES looks to be improperly set! This value should not" + " include the start/end images -- so make sure you counted" + " properly. Alternatively, you also can remove this keyword" + " from your INCAR and Simmate will provide it automatically" + " for you." + ) + + # TODO: add a precheck that ensures the number of cores VASP is ran on + # is also divisible by the number of images. For example... + # "mpirun -n 16 vasp" will not work for IMAGES=3 because 16 is not + # divisible by 3. But this also may be better suited for an ErrorHandler. + # An example error message from from VASP is... + # "M_divide: can not subdivide 16 nodes by 3" # Here, each image (start to end structures) is put inside of its own # folder. We make those folders here, where they are named 00, 01, 02...N # Also recall that "structure" is really a list of structures here. - for i, image in enumerate(migration_images_cleaned): + for i, image in enumerate(migration_images): # first make establish the foldername # The zfill function converts numbers from "1" to "01" for us @@ -104,7 +97,7 @@ def setup( # !!! Should this code be moved to the INCAR class? Or would that require # too much reworking to allow INCAR to accept a list of structures? if not incar.get("IMAGES") and incar.pop("IMAGES__auto", None): - incar["IMAGES"] = len(migration_images_cleaned) - 2 + incar["IMAGES"] = len(migration_images) - 2 # Combine our base incar settings with those of our parallel settings # and then write the incar file @@ -113,7 +106,7 @@ def setup( filename=directory / "INCAR", # we can use the start image for our structure -- as all structures # should give the same result. - structure=migration_images_cleaned[0], + structure=migration_images[0], ) # if KSPACING is not provided in the incar AND kpoints is attached to this @@ -129,7 +122,7 @@ def setup( Potcar.to_file_from_type( # we can use the start image for our structure -- as all structures # should give the same result. - migration_images_cleaned[0].composition.elements, + migration_images[0].composition.elements, cls.functional, directory / "POTCAR", cls.potcar_mappings, @@ -138,8 +131,8 @@ def setup( # For the user's reference, we also like to write an image of the # starting path to a cif file. This can be slow for large structures # (>1s), but it is very little time compared to a full NEB run. - path_vis = migration_images_cleaned.get_sum_structure() - path_vis.to("cif", directory / "path_relaxed_idpp.cif") + path_vis = migration_images.get_sum_structure() + path_vis.to("cif", directory / "simmate_path_relaxed_idpp.cif") @classmethod def setup_restart(cls, directory: Path, **kwargs): @@ -155,204 +148,3 @@ def setup_restart(cls, directory: Path, **kwargs): # delete the stopcar if it exists if stopcar_filename.exists(): stopcar_filename.unlink() - - @classmethod - def _pre_checks( - cls, - migration_images: MigrationImages, - directory: Path, - ): - """ - Runs a series of checks to ensure the user configured the job correctly. - - This is called automatically within the setup() method and shouldn't be - used directly. - """ - - # The next common mistake is to mislabel the number of images in the - # INCAR file. - # first, we check if the user set this. - nimages = cls.incar.get("IMAGES") - if nimages: - # if so, we check that it was set correctly. It should be equal to - # the number of structures minus 2 (because we don't count the - # start and end images here.) - if nimages != (len(migration_images) - 2): - raise Exception( - "IMAGES looks to be improperly set! This value should not" - " include the start/end images -- so make sure you counted" - " properly. Alternatively, you also can remove this keyword" - " from your INCAR and Simmate will provide it automatically" - " for you." - ) - - # TODO: add a precheck that ensures the number of cores VASP is ran on - # is also divisible by the number of images. For example... - # "mpirun -n 16 vasp" will not work for IMAGES=3 because 16 is not - # divisible by 3. But this also may be better suited for an ErrorHandler. - # An example error message from from VASP is... - # "M_divide: can not subdivide 16 nodes by 3" - - # make sure all images are contained with the cell - migration_images_cleaned = cls._process_structures(migration_images) - return migration_images_cleaned - - @staticmethod - def _process_structures(structures: MigrationImages): - """ - Remove any atom jumps across the cell. - - This method is copied directly from pymatgen's MITNEBset and has not - been refactored/reviewed yet. - """ - # TODO: This code would be better placed as a method of MigrationImages - - input_structures = structures - structures = [input_structures[0]] - for s in input_structures[1:]: - prev = structures[-1] - for i, site in enumerate(s): - t = numpy.round(prev[i].frac_coords - site.frac_coords) - if numpy.any(numpy.abs(t) > 0.5): - s.translate_sites([i], t, to_unit_cell=False) - structures.append(s) - return MigrationImages(structures) # convert back to simmate object - - @classmethod - def workup(cls, directory: Path): - """ - Works up data from a NEB run, including confirming convergence and - writing summary output files (structures, data, and plots). - - #### Parameters - - - `directory`: - Name of the base folder where all results are located. - """ - - # Make sure there is "*.start" and "*.end" directory present. These - # will be our start/end folders. We go through all foldernames in the - # directory and grab the first that matches - # BUG: For now I assume there are start/end image directories are located - # in the working directory. These relaxation are actually ran by a - # separate workflow, which is thus a prerequisite for this workflow. - - # assume foldernames of start and end until proven otherwise - start_dirname = directory / "start" - end_dirname = directory / "end" - - for name in directory.iterdir(): - if name.suffix == ".start": - start_dirname = name - elif name.suffix == ".end": - end_dirname = name - - if not start_dirname.exists() or not end_dirname.exists(): - raise Exception( - "Your NEB calculation finished (possibly successfully). However, " - "in order to run the workup, Simmate needs the start/end point " - "relaxations. These should be located in the same directory as " - "the NEB run and with folder names ending with '*.start' and" - " '*.end' (e.g. 'image.start' and image.end' will work)" - ) - - ################ - # BUG: NEBAnalysis.from_dir is broken for all folder structures except - # when the start/end points are in the 00 and N folders. I therefore - # need to copy the OUTCAR from the endpoint relaxations to these folders. - # I don't want to mess with opening a pull request with them / waiting - # on a new release, so I make this hacky fix here - new_start_filename = directory / "00" / "OUTCAR" - - # the end filename should be the highest number in the directory - numbered_dirs = [d for d in directory.iterdir() if d.name.isdigit()] - numbered_dirs.sort() - new_end_filename = directory / numbered_dirs[-1] / "OUTCAR" - - # now copy the outcars over - shutil.copyfile(start_dirname / "OUTCAR", new_start_filename) - shutil.copyfile(end_dirname / "OUTCAR", new_end_filename) - ################ - - neb_results = NEBAnalysis.from_dir( - directory, - # BUG: see bug fix right above this - # relaxation_dirs=[ - # "endpoint_relaxation_start", - # "endpoint_relaxation_end", - # ], - ) - - # write output files/plots for the user to quickly reference - cls._write_output_summary(directory, neb_results) - - return neb_results - - @staticmethod - def _write_output_summary(directory: Path, neb_results: NEBAnalysis): - """ - This is an EXPERIMENTAL feature. - - This prints a "simmate_summary.yaml" file with key output information. - - This method should not be called directly as it used within workup(). - """ - - # plot the results - plot = neb_results.get_plot() - plot.savefig(directory / "NEB_plot.jpeg") - - # convert all the structures to a MigrationImages object so we can write - # the summed structure. - migration_images = MigrationImages(neb_results.structures) - structure_sum = migration_images.get_sum_structure() - structure_sum.to("cif", directory / "path_relaxed_neb.cif") - - results_dict = neb_results.as_dict() - summary = { - "structures": "Final structure images are the CONTCARs within image directories (00-N)", - "forces_tangent": results_dict["forces"], - "energies": results_dict["energies"], - "structure_distances": results_dict["r"], - "energy_barrier": float( - max(neb_results.energies) - min(neb_results.energies) - ), - } - - summary_filename = directory / "simmate_summary.yaml" - with summary_filename.open("w") as file: - content = yaml.dump(summary) - file.write(content) - - @classmethod - def _save_to_database( - cls, - output, - diffusion_analysis_id: int = None, - migration_hop_id: int = None, - ): - - # split our results and corrections (which are given as a dict) into - # separate variables - # Our result here is not a VaspRun object, but instead a NEBAnalysis - # object. See NudgedElasticBandTask.workup() - result = output["result"] - - # TODO: These aren't saved for now. Consider making MigrationHopTable - # a Calculation and attaching these there. - corrections = output["corrections"] - directory = output["directory"] - - # First, we need a migration_hop database object. - # All of hops should link to a diffusion_analysis entry, so we check - # for that here too. The key thing of these statements is that we - # have a migration_hop_id at the end. - - migration_hop_db = cls.database_table.from_pymatgen( - analysis=result, - diffusion_analysis_id=diffusion_analysis_id, - migration_hop_id=migration_hop_id, - ) - - # If the user wants to access results, they can do so through the hop id - return migration_hop_db.id diff --git a/src/simmate/calculators/vasp/workflows/diffusion/neb_single_path_base.py b/src/simmate/calculators/vasp/workflows/diffusion/neb_single_path_base.py index cc9b84c23..895d70bc4 100644 --- a/src/simmate/calculators/vasp/workflows/diffusion/neb_single_path_base.py +++ b/src/simmate/calculators/vasp/workflows/diffusion/neb_single_path_base.py @@ -32,13 +32,14 @@ class SinglePathWorkflow(Workflow): have not implemented a file format for MigrationHop's yet. """ - use_database = False - endpoint_relaxation_workflow: Workflow = None + from_images_workflow: Workflow = None - # TODO: - # commands_out=["command_supercell", "command_neb"] + # Oddly enough, the from_images_workflow and this workflow share a table + # entry, so nothing needs to be done for working up results. See + # how we pass run_id=run_id below. + update_database_from_results = False @classmethod def run_config( @@ -47,21 +48,16 @@ def run_config( directory: Path = None, source: dict = None, command: str = None, - # These help link results to a higher-level table. diffusion_analysis_id: int = None, - migration_hop_id: int = None, - # TODO: Can the hop id be inferred from the migration_hop or somewhere - # else in this context? Maybe even load_input_and_register will use - # prefect id once it's a Calculation? is_restart: bool = False, # parameters for supercell and image generation nimages: int = 5, min_atoms: int = 80, max_atoms: int = 240, min_length: float = 10, + run_id: str = None, **kwargs, ): - # get the supercell endpoint structures supercell_start, supercell_end, _ = migration_hop.get_sc_structures( min_atoms=min_atoms, @@ -93,12 +89,12 @@ def run_config( images = get_migration_images_from_endpoints( supercell_start={ "database_table": cls.endpoint_relaxation_workflow.database_table.table_name, - "directory": endpoint_start_result["directory"], + "database_id": endpoint_start_result.id, "structure_field": "structure_final", }, supercell_end={ "database_table": cls.endpoint_relaxation_workflow.database_table.table_name, - "directory": endpoint_end_result["directory"], + "database_id": endpoint_end_result.id, "structure_field": "structure_final", }, nimages=nimages, @@ -111,6 +107,12 @@ def run_config( source=source, directory=directory, diffusion_analysis_id=diffusion_analysis_id, - migration_hop_id=migration_hop_id, is_restart=is_restart, + # Run id is very important here as it tells the underlying + # workflow that it doesn't need to create a new database object + # during registration -- as it will use the one that was registered + # when this workflow started. This is also why we have a dummy + # `update_database_from_results` method below + run_id=run_id, ) + neb_images = neb_state.result() diff --git a/src/simmate/calculators/vasp/workflows/dynamics/matproj.py b/src/simmate/calculators/vasp/workflows/dynamics/matproj.py index 264131b6d..58a8d9403 100644 --- a/src/simmate/calculators/vasp/workflows/dynamics/matproj.py +++ b/src/simmate/calculators/vasp/workflows/dynamics/matproj.py @@ -25,8 +25,6 @@ class Dynamics__Vasp__Matproj(DynamicsWorkflow, Relaxation__Vasp__Matproj): calculation does not modify your input structure. """ - confirm_convergence = False - incar = Relaxation__Vasp__Matproj.incar.copy() incar.update( dict( diff --git a/src/simmate/calculators/vasp/workflows/dynamics/mit.py b/src/simmate/calculators/vasp/workflows/dynamics/mit.py index 9104c345e..119a9b2f9 100644 --- a/src/simmate/calculators/vasp/workflows/dynamics/mit.py +++ b/src/simmate/calculators/vasp/workflows/dynamics/mit.py @@ -23,8 +23,6 @@ class Dynamics__Vasp__Mit(DynamicsWorkflow, Relaxation__Vasp__Mit): calculation does not modify your input structure. """ - confirm_convergence = False - incar = Relaxation__Vasp__Mit.incar.copy() incar.update( dict( diff --git a/src/simmate/calculators/vasp/workflows/dynamics/mvl_npt.py b/src/simmate/calculators/vasp/workflows/dynamics/mvl_npt.py index f68fd2033..d0776ff64 100644 --- a/src/simmate/calculators/vasp/workflows/dynamics/mvl_npt.py +++ b/src/simmate/calculators/vasp/workflows/dynamics/mvl_npt.py @@ -11,8 +11,6 @@ class Dynamics__Vasp__MvlNpt(Dynamics__Vasp__Mit): [MVLNPTMDSet](https://pymatgen.org/pymatgen.io.vasp.sets.html#pymatgen.io.vasp.sets.MVLNPTMDSet). """ - confirm_convergence = False - incar = Dynamics__Vasp__Mit.incar.copy() incar.update( dict( diff --git a/src/simmate/calculators/vasp/workflows/electronic_structure/base_band_structure.py b/src/simmate/calculators/vasp/workflows/electronic_structure/base_band_structure.py index 84fd9bc10..ed8547111 100644 --- a/src/simmate/calculators/vasp/workflows/electronic_structure/base_band_structure.py +++ b/src/simmate/calculators/vasp/workflows/electronic_structure/base_band_structure.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -from pymatgen.electronic_structure.plotter import BSPlotter from pymatgen.io.vasp.inputs import Kpoints from pymatgen.symmetry.bandstructure import HighSymmKpath @@ -84,18 +83,3 @@ def setup(cls, directory, structure, **kwargs): directory / "POTCAR", cls.potcar_mappings, ) - - @staticmethod - def _write_output_summary(directory, vasprun): - """ - In addition to writing the normal VASP output summary, this also plots - the bandstructure to "band_structure.png" - """ - - # run the normal output - StaticEnergy__Vasp__Matproj._write_output_summary(directory, vasprun) - - bs_plotter = BSPlotter(vasprun.get_band_structure(line_mode=True)) - plot = bs_plotter.get_plot() - plot_filename = directory / "band_structure.png" - plot.savefig(plot_filename) diff --git a/src/simmate/calculators/vasp/workflows/electronic_structure/base_density_of_states.py b/src/simmate/calculators/vasp/workflows/electronic_structure/base_density_of_states.py index 828ce2837..23b8475e5 100644 --- a/src/simmate/calculators/vasp/workflows/electronic_structure/base_density_of_states.py +++ b/src/simmate/calculators/vasp/workflows/electronic_structure/base_density_of_states.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- -from pymatgen.electronic_structure.plotter import DosPlotter - from simmate.calculators.vasp.workflows.static_energy.matproj import ( StaticEnergy__Vasp__Matproj, ) @@ -17,33 +15,3 @@ class VaspDensityOfStates(StaticEnergy__Vasp__Matproj): """ required_files = StaticEnergy__Vasp__Matproj.required_files + ["CHGCAR"] - - @staticmethod - def _write_output_summary(directory, vasprun): - """ - In addition to writing the normal VASP output summary, this also plots - the DOS to "density_of_states.png" - """ - - # run the normal output - StaticEnergy__Vasp__Matproj._write_output_summary(directory, vasprun) - - # and then generate a DOS plot - plotter = DosPlotter() - - # Add the total density of States - plotter.add_dos("Total DOS", vasprun.complete_dos) - - # add element-projected density of states - plotter.add_dos_dict(vasprun.complete_dos.get_element_dos()) - - # If I want plots for individual orbitals - # for site in vasprun.final_structure: - # spd_dos = vasprun.complete_dos.get_site_spd_dos(site) - # plotter.add_dos_dict(spd_dos) - - # NOTE: get_dos_dict may be useful in the future - - plot = plotter.get_plot() - plot_filename = directory / "density_of_states.png" - plot.savefig(plot_filename) diff --git a/src/simmate/calculators/vasp/workflows/electronic_structure/base_full.py b/src/simmate/calculators/vasp/workflows/electronic_structure/base_full.py index b04d5c5af..325820abe 100644 --- a/src/simmate/calculators/vasp/workflows/electronic_structure/base_full.py +++ b/src/simmate/calculators/vasp/workflows/electronic_structure/base_full.py @@ -44,13 +44,13 @@ def run_config( directory=directory / cls.static_energy_workflow.name_full, source=source, # For band-structures, unit cells should be in the standardized format - pre_standardize_structure=True, + standardize_structure="primitive", ).result() # block until complete dos_state = cls.density_of_states_workflow.run( structure={ "database_table": cls.static_energy_workflow.database_table.table_name, - "directory": static_result["directory"], + "database_id": static_result.id, }, command=command, directory=directory / cls.density_of_states_workflow.name_full, @@ -60,7 +60,7 @@ def run_config( bs_state = cls.band_structure_workflow.run( structure={ "database_table": cls.static_energy_workflow.database_table.table_name, - "directory": static_result["directory"], + "database_id": static_result.id, }, command=command, directory=directory / cls.band_structure_workflow.name_full, diff --git a/src/simmate/calculators/vasp/workflows/population_analysis/badelf.py b/src/simmate/calculators/vasp/workflows/population_analysis/badelf.py index 430262fad..bed8838d0 100644 --- a/src/simmate/calculators/vasp/workflows/population_analysis/badelf.py +++ b/src/simmate/calculators/vasp/workflows/population_analysis/badelf.py @@ -163,8 +163,9 @@ def save_badelf_results(bader_result, run_id): # load the calculation entry for this workflow run. This should already # exist thanks to the load_input_and_register task of the prebader workflow calculation = PopulationAnalysis__Bader__Badelf.database_table.from_run_context( - run_id, - PopulationAnalysis__Vasp__PrebadelfMatproj.name_full, + run_id=run_id, + workflow_name=PopulationAnalysis__Vasp__PrebadelfMatproj.name_full, + workflow_version=PopulationAnalysis__Vasp__PrebadelfMatproj.version, ) # BUG: can't use context to grab the id because workflow tasks generate a # different id than the main workflow diff --git a/src/simmate/calculators/vasp/workflows/population_analysis/bader.py b/src/simmate/calculators/vasp/workflows/population_analysis/bader.py index 8938cae5c..9987210f9 100644 --- a/src/simmate/calculators/vasp/workflows/population_analysis/bader.py +++ b/src/simmate/calculators/vasp/workflows/population_analysis/bader.py @@ -29,7 +29,7 @@ def run_config( **kwargs, ): - prebader_result = PopulationAnalysis__Vasp__PrebaderMatproj.run( + prebader_result = StaticEnergy__Vasp__PrebaderMatproj.run( structure=structure, command=command, source=source, @@ -38,39 +38,18 @@ def run_config( # Setup chargecars for the bader analysis and wait until complete PopulationAnalysis__Bader__CombineChgcars.run( - directory=prebader_result["directory"], + directory=prebader_result.directory, ).result() # Bader only adds files and doesn't overwrite any, so I just run it # in the original directory. I may switch to copying over to a new # directory in the future though. - bader_result = PopulationAnalysis__Bader__Bader.run( - directory=prebader_result["directory"] + PopulationAnalysis__Bader__Bader.run( + directory=prebader_result.directory, ).result() - return bader_result - @classmethod - def _save_to_database(cls, bader_result, run_id): - # load the results. We are particullary after the first result with - # is a pandas dataframe of oxidation states. - oxidation_data, extra_data = bader_result["result"] - - # load the calculation entry for this workflow run. This should already - # exist thanks to the load_input_and_register task of the prebader workflow - calculation = cls.database_table.from_run_context( - run_id=run_id, - workflow_name=cls.name_full, - ) - # BUG: can't use context to grab the id because workflow tasks generate a - # different id than the main workflow - - # now update the calculation entry with our results - calculation.oxidation_states = list(oxidation_data.oxidation_state.values) - calculation.save() - - -class PopulationAnalysis__Vasp__PrebaderMatproj(StaticEnergy__Vasp__Matproj): +class StaticEnergy__Vasp__PrebaderMatproj(StaticEnergy__Vasp__Matproj): """ Runs a static energy calculation with a high-density FFT grid under settings from the Materials Project. Results can be used for Bader analysis. diff --git a/src/simmate/calculators/vasp/workflows/population_analysis/elf.py b/src/simmate/calculators/vasp/workflows/population_analysis/elf.py index b27457ef1..0e85896bc 100644 --- a/src/simmate/calculators/vasp/workflows/population_analysis/elf.py +++ b/src/simmate/calculators/vasp/workflows/population_analysis/elf.py @@ -3,6 +3,7 @@ from simmate.calculators.vasp.workflows.static_energy.matproj import ( StaticEnergy__Vasp__Matproj, ) +from simmate.database.workflow_results import StaticEnergy class PopulationAnalysis__Vasp__ElfMatproj(StaticEnergy__Vasp__Matproj): @@ -19,6 +20,11 @@ class PopulationAnalysis__Vasp__ElfMatproj(StaticEnergy__Vasp__Matproj): # fails and tells the user to specify a setting ) + # even though the category is "population-analysis", we only store + # static energy data. So we manually set that table here. + database_table = StaticEnergy + # NOTE: the code below, once implemented, could change this. + # ----------------------------------------------------------------------------- diff --git a/src/simmate/calculators/vasp/workflows/relaxation/quality00.py b/src/simmate/calculators/vasp/workflows/relaxation/quality00.py index 3a2899a17..073b329bf 100644 --- a/src/simmate/calculators/vasp/workflows/relaxation/quality00.py +++ b/src/simmate/calculators/vasp/workflows/relaxation/quality00.py @@ -24,10 +24,6 @@ class Relaxation__Vasp__Quality00(VaspWorkflow): functional = "PBE" potcar_mappings = PBE_ELEMENT_MAPPINGS_LOW_QUALITY - # because this calculation is such a low quality we don't raise an error - # if the calculation fails to converge - confirm_convergence = False - # Make the unitcell relatively cubic before relaxing standardize_structure = "primitive-LLL" diff --git a/src/simmate/calculators/vasp/workflows/relaxation/quality01.py b/src/simmate/calculators/vasp/workflows/relaxation/quality01.py index 2bc967fe4..ae011194a 100644 --- a/src/simmate/calculators/vasp/workflows/relaxation/quality01.py +++ b/src/simmate/calculators/vasp/workflows/relaxation/quality01.py @@ -25,10 +25,6 @@ class Relaxation__Vasp__Quality01(VaspWorkflow): functional = "PBE" potcar_mappings = PBE_ELEMENT_MAPPINGS_LOW_QUALITY - # because this calculation is such a low quality we don't raise an error - # if the calculation fails to converge - confirm_convergence = False - # Make the unitcell relatively cubic before relaxing standardize_structure = "primitive-LLL" diff --git a/src/simmate/calculators/vasp/workflows/relaxation/quality02.py b/src/simmate/calculators/vasp/workflows/relaxation/quality02.py index 97eeab371..a20e9690e 100644 --- a/src/simmate/calculators/vasp/workflows/relaxation/quality02.py +++ b/src/simmate/calculators/vasp/workflows/relaxation/quality02.py @@ -25,10 +25,6 @@ class Relaxation__Vasp__Quality02(VaspWorkflow): functional = "PBE" potcar_mappings = PBE_ELEMENT_MAPPINGS_LOW_QUALITY - # because this calculation is such a low quality we don't raise an error - # if the calculation fails to converge - confirm_convergence = False - # Make the unitcell relatively cubic before relaxing standardize_structure = "primitive-LLL" symmetry_tolerance = 0.1 diff --git a/src/simmate/calculators/vasp/workflows/relaxation/quality03.py b/src/simmate/calculators/vasp/workflows/relaxation/quality03.py index 2d7bfbfc5..67fa8b36a 100644 --- a/src/simmate/calculators/vasp/workflows/relaxation/quality03.py +++ b/src/simmate/calculators/vasp/workflows/relaxation/quality03.py @@ -25,10 +25,6 @@ class Relaxation__Vasp__Quality03(VaspWorkflow): functional = "PBE" potcar_mappings = PBE_ELEMENT_MAPPINGS_LOW_QUALITY - # because this calculation is such a low quality we don't raise an error - # if the calculation fails to converge - confirm_convergence = False - # Make the unitcell relatively cubic before relaxing standardize_structure = "primitive-LLL" symmetry_tolerance = 0.1 diff --git a/src/simmate/calculators/vasp/workflows/relaxation/quality04.py b/src/simmate/calculators/vasp/workflows/relaxation/quality04.py index 6fe5733de..8a6f076b2 100644 --- a/src/simmate/calculators/vasp/workflows/relaxation/quality04.py +++ b/src/simmate/calculators/vasp/workflows/relaxation/quality04.py @@ -28,10 +28,6 @@ class Relaxation__Vasp__Quality04(VaspWorkflow): functional = "PBE" potcar_mappings = PBE_ELEMENT_MAPPINGS_LOW_QUALITY - # because this calculation is such a low quality we don't raise an error - # if the calculation fails to converge - confirm_convergence = False - # Make the unitcell relatively cubic before relaxing standardize_structure = "primitive-LLL" symmetry_tolerance = 0.1 diff --git a/src/simmate/calculators/vasp/workflows/relaxation/staged.py b/src/simmate/calculators/vasp/workflows/relaxation/staged.py index 7d1a86d8a..91fd58978 100644 --- a/src/simmate/calculators/vasp/workflows/relaxation/staged.py +++ b/src/simmate/calculators/vasp/workflows/relaxation/staged.py @@ -86,7 +86,7 @@ def run_config( state = current_task.run( structure={ "database_table": preceding_task.database_table.table_name, - "directory": result["directory"], # uses preceding result + "database_id": result.id, # uses preceding result "structure_field": "structure_final", }, command=command, @@ -94,9 +94,18 @@ def run_config( ) result = state.result() - # we return the final step but update the directory to the parent one - result["directory"] = directory - return result + # when updating the original entry, we want to use the data from the + # final result. + final_result = { + "structure": result.to_toolkit(), + "energy": result.energy, + "band_gap": result.band_gap, + "is_gap_direct": result.is_gap_direct, + "energy_fermi": result.energy_fermi, + "conduction_band_minimum": result.conduction_band_minimum, + "valence_band_maximum": result.valence_band_maximum, + } + return final_result @classmethod def _get_final_energy_series(cls, df, directory: str): diff --git a/src/simmate/calculators/vasp/workflows/test/all_paths.zip b/src/simmate/calculators/vasp/workflows/test/all_paths.zip deleted file mode 100644 index eb11b7a23..000000000 Binary files a/src/simmate/calculators/vasp/workflows/test/all_paths.zip and /dev/null differ diff --git a/src/simmate/calculators/vasp/workflows/test/nudged_elastic_band.zip b/src/simmate/calculators/vasp/workflows/test/nudged_elastic_band.zip index f3af0f32f..688bedd83 100644 Binary files a/src/simmate/calculators/vasp/workflows/test/nudged_elastic_band.zip and b/src/simmate/calculators/vasp/workflows/test/nudged_elastic_band.zip differ diff --git a/src/simmate/calculators/vasp/workflows/test/test_all_paths.py b/src/simmate/calculators/vasp/workflows/test/test_all_paths.py deleted file mode 100644 index f4a37c2c8..000000000 --- a/src/simmate/calculators/vasp/workflows/test/test_all_paths.py +++ /dev/null @@ -1,46 +0,0 @@ -# -*- coding: utf-8 -*- - -import pytest - -from simmate.calculators.vasp.inputs import Potcar -from simmate.calculators.vasp.workflows.diffusion.all import ( - Diffusion__Vasp__NebAllPathsMit, -) -from simmate.conftest import copy_test_files -from simmate.workflow_engine import S3Workflow - - -# @pytest.mark.prefect_db -@pytest.mark.slow -@pytest.mark.django_db -def test_neb(sample_structures, tmp_path, mocker): - - copy_test_files( - tmp_path, - test_directory=__file__, - test_folder="all_paths.zip", - ) - - # For testing, look at I- diffusion in Y2CF2 - structure = sample_structures["Y2CI2_mp-1206803_primitive"] - - # Because we won't have POTCARs accessible, we need to cover this function - # call -- specifically have it pretend to make a file - mocker.patch.object(Potcar, "to_file_from_type", return_value=None) - - # We also don't want to run any commands -- for any task. We skip these - # by having the base S3task.execute just return an empty list (meaning - # no corrections were made). - mocker.patch.object(S3Workflow, "execute", return_value=[]) - - # Don't check for proper input files because POTCARs will be missing - mocker.patch.object(S3Workflow, "_check_input_files", return_value=None) - - # run the workflow and make sure it handles data properly. - state = Diffusion__Vasp__NebAllPathsMit.run( - structure=structure, - migrating_specie="I", - command="dummycmd1; dummycmd2; dummycmd3", - directory=str(tmp_path), - ) - assert state.is_completed() diff --git a/src/simmate/calculators/vasp/workflows/test/test_band_structure.py b/src/simmate/calculators/vasp/workflows/test/test_band_structure.py index bc6a37c79..81b8623d2 100644 --- a/src/simmate/calculators/vasp/workflows/test/test_band_structure.py +++ b/src/simmate/calculators/vasp/workflows/test/test_band_structure.py @@ -1,28 +1,23 @@ # -*- coding: utf-8 -*- -from simmate.calculators.vasp.inputs import Potcar +import pytest + from simmate.calculators.vasp.inputs.potcar_mappings import PBE_ELEMENT_MAPPINGS from simmate.calculators.vasp.workflows.electronic_structure.matproj_band_structure import ( ElectronicStructure__Vasp__MatprojBandStructure, ) -from simmate.conftest import copy_test_files, make_dummy_files +from simmate.conftest import SimmateMockHelper, copy_test_files def test_band_structure_setup(structure, tmp_path, mocker): + Potcar = SimmateMockHelper.get_mocked_potcar(mocker, tmp_path) + # estabilish filenames that we make and commonly reference incar_filename = tmp_path / "INCAR" poscar_filename = tmp_path / "POSCAR" potcar_filename = tmp_path / "POTCAR" - # Because we won't have POTCARs accessible, we need to cover this function - # call -- specifically have it pretend to make a file - mocker.patch.object( - Potcar, - "to_file_from_type", - return_value=make_dummy_files(potcar_filename), - ) - # try to make input files in the tmp_path ElectronicStructure__Vasp__MatprojBandStructure.setup( directory=tmp_path, structure=structure @@ -38,18 +33,26 @@ def test_band_structure_setup(structure, tmp_path, mocker): ) -def test_band_structure_workup(tmp_path): +@pytest.mark.django_db +def test_band_structure_run(sample_structures, tmp_path, mocker): copy_test_files( tmp_path, test_directory=__file__, test_folder="band_structure.zip", ) - # estabilish filenames that we make and commonly reference - summary_filename = tmp_path / "simmate_summary.yaml" - plot_filename = tmp_path / "band_structure.png" + SimmateMockHelper.mock_vasp(mocker) + + # run the full workflow, where the output files were pre-generated with + # a specific structures + structure = sample_structures["Fe_mp-13_primitive"] + ElectronicStructure__Vasp__MatprojBandStructure.run( + structure=structure, + directory=tmp_path, + ) - # run the full workup - ElectronicStructure__Vasp__MatprojBandStructure.workup(tmp_path) + # check output files + summary_filename = tmp_path / "simmate_summary.yaml" + plot_filename = tmp_path / "simmate_band_structure.png" assert summary_filename.exists() assert plot_filename.exists() diff --git a/src/simmate/calculators/vasp/workflows/test/test_base.py b/src/simmate/calculators/vasp/workflows/test/test_base.py index 4534177d7..7ae0e3fa0 100644 --- a/src/simmate/calculators/vasp/workflows/test/test_base.py +++ b/src/simmate/calculators/vasp/workflows/test/test_base.py @@ -2,10 +2,9 @@ import pytest -from simmate.calculators.vasp.inputs import Potcar from simmate.calculators.vasp.inputs.potcar_mappings import PBE_ELEMENT_MAPPINGS from simmate.calculators.vasp.workflows.base import VaspWorkflow -from simmate.conftest import copy_test_files, make_dummy_files +from simmate.conftest import SimmateMockHelper, copy_test_files class Testing__Vasp__Dummy(VaspWorkflow): @@ -17,7 +16,6 @@ class Testing__Vasp__Dummy(VaspWorkflow): functional = "PBE" potcar_mappings = PBE_ELEMENT_MAPPINGS - confirm_convergence = True standardize_structure = "primitive-LLL" symmetry_tolerance = 0.1 angle_tolerance = 10.0 @@ -35,19 +33,13 @@ class Testing__Vasp__Dummy(VaspWorkflow): def test_base_setup(structure, tmp_path, mocker): + Potcar = SimmateMockHelper.get_mocked_potcar(mocker, tmp_path) + # estabilish filenames that we make and commonly reference incar_filename = tmp_path / "INCAR" poscar_filename = tmp_path / "POSCAR" potcar_filename = tmp_path / "POTCAR" - # Because we won't have POTCARs accessible, we need to cover this function - # call -- specifically have it pretend to make a file - mocker.patch.object( - Potcar, - "to_file_from_type", - return_value=make_dummy_files(potcar_filename), - ) - # try to make input files in the tmp_path DummyWorkflow.setup(directory=tmp_path, structure=structure) assert incar_filename.exists() @@ -61,20 +53,20 @@ def test_base_setup(structure, tmp_path, mocker): ) -def test_base_workup(tmp_path): +def test_base_vasp_run(structure, tmp_path, mocker): copy_test_files( tmp_path, test_directory=__file__, test_folder="base.zip", ) + SimmateMockHelper.mock_vasp(mocker) + # estabilish filenames that we make and commonly reference - summary_filename = tmp_path / "simmate_summary.yaml" vasprun_filename = tmp_path / "vasprun.xml" - # run the full workup - DummyWorkflow.workup(tmp_path) - assert summary_filename.exists() + # run the full workflow + DummyWorkflow.run(structure=structure, directory=tmp_path) # run the workup again with a malformed xml with vasprun_filename.open("r") as file: @@ -82,4 +74,4 @@ def test_base_workup(tmp_path): with vasprun_filename.open("w") as file: file.writelines(contents[50]) with pytest.raises(Exception): - DummyWorkflow.workup(tmp_path) + DummyWorkflow.run(tmp_path) diff --git a/src/simmate/calculators/vasp/workflows/test/test_density_of_states.py b/src/simmate/calculators/vasp/workflows/test/test_density_of_states.py index 102987706..93df2805d 100644 --- a/src/simmate/calculators/vasp/workflows/test/test_density_of_states.py +++ b/src/simmate/calculators/vasp/workflows/test/test_density_of_states.py @@ -1,23 +1,33 @@ # -*- coding: utf-8 -*- +import pytest + from simmate.calculators.vasp.workflows.electronic_structure.matproj_density_of_states import ( ElectronicStructure__Vasp__MatprojDensityOfStates, ) -from simmate.conftest import copy_test_files +from simmate.conftest import SimmateMockHelper, copy_test_files -def test_density_of_states_workup(tmp_path): +@pytest.mark.django_db +def test_density_of_states_run(sample_structures, tmp_path, mocker): copy_test_files( tmp_path, test_directory=__file__, - test_folder="density_of_states.zip", + test_folder="band_structure.zip", ) - # estabilish filenames that we make and commonly reference - summary_filename = tmp_path / "simmate_summary.yaml" - plot_filename = tmp_path / "density_of_states.png" + SimmateMockHelper.mock_vasp(mocker) - # run the full workup - ElectronicStructure__Vasp__MatprojDensityOfStates.workup(tmp_path) + # run the full workflow, where the output files were pre-generated with + # a specific structures + structure = sample_structures["Fe_mp-13_primitive"] + ElectronicStructure__Vasp__MatprojDensityOfStates.run( + structure=structure, + directory=tmp_path, + ) + + # check output files + summary_filename = tmp_path / "simmate_summary.yaml" + plot_filename = tmp_path / "simmate_density_of_states.png" assert summary_filename.exists() assert plot_filename.exists() diff --git a/src/simmate/calculators/vasp/workflows/test/test_nudged_elastic_band.py b/src/simmate/calculators/vasp/workflows/test/test_nudged_elastic_band.py index c5a2370e9..922bb0fd4 100644 --- a/src/simmate/calculators/vasp/workflows/test/test_nudged_elastic_band.py +++ b/src/simmate/calculators/vasp/workflows/test/test_nudged_elastic_band.py @@ -1,17 +1,56 @@ # -*- coding: utf-8 -*- -from simmate.calculators.vasp.inputs import Potcar +import pytest + from simmate.calculators.vasp.inputs.potcar_mappings import ( PBE_ELEMENT_MAPPINGS_LOW_QUALITY, ) -from simmate.calculators.vasp.workflows.diffusion.neb_from_images_mit import ( +from simmate.calculators.vasp.workflows.diffusion.all import ( + Diffusion__Vasp__NebAllPathsMit, Diffusion__Vasp__NebFromImagesMit, ) -from simmate.conftest import copy_test_files, make_dummy_files +from simmate.conftest import SimmateMockHelper, copy_test_files from simmate.toolkit.diffusion import MigrationImages -def test_neb_setup(sample_structures, tmp_path, mocker): +@pytest.mark.slow +@pytest.mark.django_db +def test_neb_all_paths(sample_structures, tmp_path, mocker): + + SimmateMockHelper.mock_vasp(mocker) + copy_test_files( + tmp_path, + test_directory=__file__, + test_folder="nudged_elastic_band.zip", + ) + + # For testing, look at I- diffusion in Y2CF2 + structure = sample_structures["Y2CI2_mp-1206803_primitive"] + + # run the workflow and make sure it handles data properly. + state = Diffusion__Vasp__NebAllPathsMit.run( + structure=structure, + migrating_specie="I", + # command="dummycmd1; dummycmd2; dummycmd3", + directory=tmp_path, + ) + assert state.is_completed() + + # estabilish filenames that we make and commonly reference + path_dir = "diffusion.vasp.neb-single-path-mit.00" + summary_filename = tmp_path / "simmate_summary.yaml" + plot_filename = tmp_path / path_dir / "simmate_neb_plot.jpeg" + cif_filename = tmp_path / path_dir / "simmate_path_relaxed_neb.cif" + + # run the full workup + assert summary_filename.exists() + assert plot_filename.exists() + assert cif_filename.exists() + + +def test_neb_from_images_setup(sample_structures, tmp_path, mocker): + + Potcar = SimmateMockHelper.get_mocked_potcar(mocker, tmp_path) # To test this task we need to create images, which we do using I diffusion # in Y2CI2. We use [0] to grab the shortest path. @@ -24,44 +63,19 @@ def test_neb_setup(sample_structures, tmp_path, mocker): # These files exist within a series of directories 00, 01,..., 05 poscar_filenames = [tmp_path / str(n).zfill(2) / "POSCAR" for n in range(5)] - # Because we won't have POTCARs accessible, we need to cover this function - # call -- specifically have it pretend to make a file - mocker.patch.object( - Potcar, - "to_file_from_type", - return_value=make_dummy_files(potcar_filename), - ) - # try to make input files in the tmp_path Diffusion__Vasp__NebFromImagesMit.setup( migration_images=images, directory=tmp_path, ) assert incar_filename.exists() - assert potcar_filename.exists() + assert all([f.exists() for f in poscar_filenames]) + + assert potcar_filename.exists() Potcar.to_file_from_type.assert_called_with( structure.composition.elements, "PBE", potcar_filename, PBE_ELEMENT_MAPPINGS_LOW_QUALITY, ) - - -def test_neb_workup(tmp_path): - copy_test_files( - tmp_path, - test_directory=__file__, - test_folder="nudged_elastic_band.zip", - ) - - # estabilish filenames that we make and commonly reference - summary_filename = tmp_path / "simmate_summary.yaml" - plot_filename = tmp_path / "NEB_plot.jpeg" - cif_filename = tmp_path / "path_relaxed_neb.cif" - - # run the full workup - Diffusion__Vasp__NebFromImagesMit.workup(tmp_path) - assert summary_filename.exists() - assert plot_filename.exists() - assert cif_filename.exists() diff --git a/src/simmate/command_line/database.py b/src/simmate/command_line/database.py index e25f84a00..26c54675e 100644 --- a/src/simmate/command_line/database.py +++ b/src/simmate/command_line/database.py @@ -46,7 +46,7 @@ def reset(confirm_delete: bool = False, use_prebuilt: bool = None): from simmate.configuration.django.settings import DATABASES using_sqlite = DATABASES["default"]["ENGINE"] == "django.db.backends.sqlite3" - if using_sqlite and use_prebuilt == None: + if using_sqlite and use_prebuilt is None: use_prebuilt = typer.confirm( "\nIt looks like you are using the default database backend (sqlite3). \n" "Would you like to use a prebuilt-database with all third-party data " diff --git a/src/simmate/conftest.py b/src/simmate/conftest.py index 00d7d02a5..753200058 100644 --- a/src/simmate/conftest.py +++ b/src/simmate/conftest.py @@ -21,10 +21,12 @@ from django.contrib.auth.models import User from typer.testing import CliRunner +from simmate.calculators.vasp.inputs import Potcar from simmate.database.base_data_types import Spacegroup from simmate.toolkit import Composition, Structure, base_data_types from simmate.utilities import get_directory from simmate.website.test_app.models import TestStructure +from simmate.workflow_engine import S3Workflow COMPOSITIONS_STRS = [ "Fe1", @@ -181,6 +183,31 @@ def django_db_setup( structure_db.save() +@pytest.fixture(scope="session") +def command_line_runner(): + """ + Allows us to call the command line in a controlled manner, rather than + through subprocess. + """ + return CliRunner() + + +# !!! Disable harness until prefect is reimplemented +# from prefect.testing.utilities import prefect_test_harness +# @pytest.fixture(autouse=True, scope="session") +# def prefect_test_fixture(): +# """ +# For all prefect flows and tasks, this will automatically use a dummy-database +# """ +# with prefect_test_harness(): +# yield + +# ----------------------------------------------------------------------------- +# The remaining functions are utilities that help us manage test files and/or +# pretending to call fuctions +# ----------------------------------------------------------------------------- + + def copy_test_files(tmp_path, test_directory, test_folder): """ This is a test utility that takes a given directory and copies it's content @@ -253,17 +280,30 @@ def make_dummy_files(*filenames: str): file.write("This is a dummy file for testing.") -@pytest.fixture(scope="session") -def command_line_runner(): - return CliRunner() +class SimmateMockHelper: + @staticmethod + def get_mocked_potcar(mocker, directory: Path): + # Because we won't have POTCARs accessible, we need to cover this function + # call -- specifically have it pretend to make a file + mocker.patch.object( + Potcar, + "to_file_from_type", + return_value=make_dummy_files(directory / "POTCAR"), + ) -# !!! Disable harness until prefect is reimplemented -# from prefect.testing.utilities import prefect_test_harness -# @pytest.fixture(autouse=True, scope="session") -# def prefect_test_fixture(): -# """ -# For all prefect flows and tasks, this will automatically use a dummy-database -# """ -# with prefect_test_harness(): -# yield + return Potcar # in case we need to "assert_called_with" + + def mock_vasp(mocker): + + # Because we won't have POTCARs accessible, we need to skip this function + # call -- specifically have it pretend to make a file + mocker.patch.object(Potcar, "to_file_from_type", return_value=None) + + # We also don't want to run any commands -- for any task. We skip these + # by having the base S3task.execute just return an empty list (meaning + # no corrections were made). + mocker.patch.object(S3Workflow, "execute", return_value=[]) + + # Don't check for proper input files because POTCARs will be missing + mocker.patch.object(S3Workflow, "_check_input_files", return_value=None) diff --git a/src/simmate/database/base_data_types/band_structure.py b/src/simmate/database/base_data_types/band_structure.py index 011c9916b..970603f98 100644 --- a/src/simmate/database/base_data_types/band_structure.py +++ b/src/simmate/database/base_data_types/band_structure.py @@ -25,6 +25,8 @@ class BandStructure(DatabaseTable): class Meta: abstract = True + exclude_from_summary = ["band_structure_data"] + archive_fields = ["band_structure_data"] api_filters = dict( @@ -93,6 +95,14 @@ class Meta: # magnetic_ordering (Magnetic ordering of the calculation.) # equivalent_labels (Equivalent k-point labels in other k-path conventions) + def write_output_summary(self, directory: Path): + """ + In addition to writing the normal VASP output summary, this also plots + the bandstructure to "band_structure.png" + """ + super().write_output_summary(directory) + self.write_bandstructure_plot(directory) + @classmethod def _from_toolkit( cls, @@ -145,6 +155,11 @@ def get_bandstructure_plot(self): # -> matplotlib figure plot = bs_plotter.get_plot() return plot + def write_bandstructure_plot(self, directory: Path): + plot = self.get_bandstructure_plot() + plot_filename = directory / "simmate_band_structure.png" + plot.savefig(plot_filename) + class BandStructureCalc(Structure, BandStructure, Calculation): """ @@ -156,52 +171,15 @@ class BandStructureCalc(Structure, BandStructure, Calculation): class Meta: app_label = "workflows" - def update_from_vasp_run( - self, - vasprun: Vasprun, - corrections: list, - directory: Path, - ): - """ - Given a pymatgen VaspRun object, which is what's typically returned - from a simmate VaspWorkflow.run() call, this will update the database entry - with the results. - """ - - # All data analysis is done via a BandStructure object, so we convert - # the vasprun object to that first. - band_structure = vasprun.get_band_structure(line_mode=True) - - # Take our band_structure and expand its data for the rest of the columns. - new_kwargs = BandStructure.from_toolkit( - band_structure=band_structure, - as_dict=True, - ) - for key, value in new_kwargs.items(): - setattr(self, key, value) - - # lastly, we also want to save the corrections made and directory it ran in - self.corrections = corrections - self.directory = directory - - # Now we have the relaxation data all loaded and can save it to the database - self.save() - @classmethod - def from_directory(cls, directory: Path): - """ - Creates a new database entry from a directory that holds band-structure - results. For now, this assumes the directory holds vasp output files. - """ + def from_vasp_run(cls, vasprun: Vasprun, as_dict: bool = False): - # I assume the directory is from a vasp calculation, but I need to update - # this when I begin adding new calculators. - vasprun_filename = directory / "vasprun.xml" - vasprun = Vasprun(vasprun_filename) band_structure = vasprun.get_band_structure(line_mode=True) band_structure_db = cls.from_toolkit( structure=vasprun.structures[0], band_structure=band_structure, + as_dict=as_dict, ) - band_structure_db.save() + if not as_dict: + band_structure_db.save() return band_structure_db diff --git a/src/simmate/database/base_data_types/base.py b/src/simmate/database/base_data_types/base.py index e8917327c..32526e1af 100644 --- a/src/simmate/database/base_data_types/base.py +++ b/src/simmate/database/base_data_types/base.py @@ -382,6 +382,18 @@ class Meta: See the `api_filterset` property for the final filter object. """ + exclude_from_summary: list[str] = [] + """ + When writing output summaries, these columns will be ignored. This is useful + if you have a column for storing raw data that isn't friendly to read in + the yaml format. 'structure_string' is an example of a field we'd want to + exclude. + """ + + # ------------------------------------------------------------------------- + # Core methods accessing key information and writing summary files + # ------------------------------------------------------------------------- + @classmethod @property def table_name(cls) -> str: @@ -395,6 +407,208 @@ class name. """ return cls.__name__ + @classmethod + def get_column_names(cls) -> list[str]: + """ + Returns a list of all the column names for this table and indicates which + columns are related to other tables. This is primarily used to help + view what data is available. + """ + return [column.name for column in cls._meta.get_fields()] + + @classmethod + def show_columns(cls): + """ + Prints a list of all the column names for this table and indicates which + columns are related to other tables. This is primarily used to help users + interactively view what data is available. + """ + # Iterate through and grab the columns. Note we don't use get_column_names + # here because we are attaching relation data as well. + column_names = [ + column.name + f" (relation to {column.related_model.table_name})" + if column.is_relation + else column.name + for column in cls._meta.get_fields() + ] + + # Then use yaml to make the printout pretty (no quotes and separate lines) + print(yaml.dump(column_names)) + + @classmethod + def get_mixins(cls) -> list: # -> List[DatabaseTable] + """ + Grabs the mix-in Tables that were used to make this class. This will + be mix-ins like Structure, Forces, etc. from the + `simmate.database.base_data_types` module. + """ + # this must be imported locally because it depends on all other classes + # from this module -- and will create circular import issues if outside + from simmate.database import base_data_types as simmate_mixins + + return [ + parent + for parent in cls.__bases__ + if hasattr(simmate_mixins, parent.table_name) + and parent.table_name != "DatabaseTable" + ] + + @classmethod + def get_mixin_names(cls) -> list[str]: + """ + Grabs the mix-in Tables that were used to make this class and returns + a list of their names. + """ + return [mixin.table_name for mixin in cls.get_mixins()] + + @classmethod + def get_extra_columns(cls) -> list[str]: + """ + Finds all columns that aren't covered by the supported Table mix-ins. + + For example, a table made from... + + ``` python + from simmate.database.base_data_types import ( + table_column, + Structure, + Forces, + ) + + class ExampleTable(Structure, Forces): + custom_column1 = table_column.FloatField() + custom_column2 = table_column.FloatField() + ``` + + ... would return ... + + ``` python + ["custom_column1", "custom_column2"] + ``` + """ + + all_columns = cls.get_column_names() + columns_w_mixin = [ + column for mixin in cls.get_mixins() for column in mixin.get_column_names() + ] + extra_columns = [ + column + for column in all_columns + if column not in columns_w_mixin and column != "id" + ] + return extra_columns + + def write_output_summary(self, directory: Path): + """ + This writes a "simmate_summary.yaml" file with key output information. + """ + + fields_to_exclude = self.exclude_from_summary + [ + field for mixin in self.get_mixins() for field in mixin.exclude_from_summary + ] + + all_data = {} + for column, value in self.__dict__.items(): + if ( + value != None + and not column.startswith("_") + and column not in fields_to_exclude + ): + all_data[column] = value + + # also add the table name and entry id + all_data["database_table"] = self.table_name + + summary_filename = directory / "simmate_summary.yaml" + with summary_filename.open("w") as file: + content = yaml.dump(all_data) + file.write(content) + + # ------------------------------------------------------------------------- + # Methods for loading results from files + # ------------------------------------------------------------------------- + + @classmethod + def from_directory(cls, directory: Path, as_dict: bool = False): + """ + Loads data from a directory of files + """ + + # check if we have a VASP directory + vasprun_filename = directory / "vasprun.xml" + if vasprun_filename.exists(): + return cls.from_vasp_directory(directory, as_dict=as_dict) + + # TODO: add new elif statements when I begin adding new calculators. + + # If we don't detect any directory, we return an empty dictionary. + # We don't print a warning or error for now because users may want + # to populate data entirely in python. + return {} if as_dict else None + + @classmethod + def from_vasp_directory(cls, directory: Path, as_dict: bool = False): + + from simmate.calculators.vasp.outputs import Vasprun + + vasprun = Vasprun.from_directory(directory) + return cls.from_vasp_run(vasprun, as_dict=as_dict) + + # ------------------------------------------------------------------------- + # Methods that handle updating a database entry and its related entries + # ------------------------------------------------------------------------- + + def update_from_fields(self, **fields_to_update): + # go through each key in the dictionary and attach the value to the + # attribute of this entry + for key, value in fields_to_update.items(): + setattr(self, key, value) + + # Now we have all data loaded and attached to the database entry, we + # can call save() to actually update the database + self.save() + + def update_from_toolkit(self, **fields_to_update): + """ + Given fundamental "base info" and toolkit objects, this method will + populate all relevant columns. + + Note, base info also corresponds to the `archive_fieldset`, which + represents raw data. + + This method is meant for updating existing database entries with new + data. If your creating a brand-new database entry, use the + `from_toolkit` method instead. + """ + fields_expanded = self.from_toolkit(as_dict=True, **fields_to_update) + self.update_from_fields(**fields_expanded) + + def update_from_directory(self, directory: Path): + # This is for simple tables. If there are related table entries that + # need to be created/updated, then this method should be overwritten. + data_from_dir = self.from_directory(directory, as_dict=True) + self.update_from_toolkit(**data_from_dir) + + def update_from_results(self, results: dict, directory: Path): + """ + Updates a database from the results of a workflow run. + + Typically this method is not called directly, as it is used within + `Workflow._save_to_database` automatically. + """ + + # First update using the results dictionary + self.update_from_toolkit(directory=str(directory), **results) + + # Many calculations and datatables will have a "from_directory" method + # that loads data from files. We use this to grab extra fields and + # add them to our results. + self.update_from_directory(directory) + + # ------------------------------------------------------------------------- + # Methods that handle updating a database entry and its related entries + # ------------------------------------------------------------------------- + @classmethod def from_toolkit(cls, as_dict: bool = False, **kwargs): """ @@ -437,6 +651,7 @@ def from_toolkit(cls, as_dict: bool = False, **kwargs): # a database column. all_data.pop("structure", None) all_data.pop("migration_hop", None) + all_data.pop("migration_images", None) all_data.pop("band_structure", None) all_data.pop("density_of_states", None) @@ -477,86 +692,49 @@ def from_toolkit(cls, as_dict: bool = False, **kwargs): # return the dictionary return all_data if as_dict else cls(**all_data) - def update_from_toolkit(self, **kwargs): - """ - Given fundamental "base info" and toolkit objects, this method will - populate all relevant columns. - - Note, base info also corresponds to the `archive_fieldset`, which - represents raw data. + # ------------------------------------------------------------------------- + # Methods creating new archives + # ------------------------------------------------------------------------- - This method is meant for updating existing database entries with new - data. If your creating a brand-new database entry, use the - `from_toolkit` method instead. + @classmethod + def to_archive(cls, filename: Path | str = None): """ - new_kwargs = self.from_toolkit(as_dict=True, **kwargs) - for new_kwarg, new_value in new_kwargs.items(): - setattr(self, new_kwarg, new_value) - self.save() + Writes the entire database table to an archive file. If you prefer + a subset of entries for the archive, use the to_archive method + on your SearchResults instead (e.g. MyTable.objects.filter(...).to_archive()) + """ + cls.objects.all().to_archive(filename) @classmethod - def _confirm_override( - cls, - confirm_override: bool, - parallel: bool, - confirm_sqlite_parallel: bool, - ): - """ - A utility to make sure the user wants to load new data into their table - and (if they are using sqlite) that they are aware of the risks of - parallelizing their loading. + @property + def archive_fieldset(cls) -> list[str]: - This utility should not be called directly, as it is used within - load_archive and load_remote_archive. - """ - # first check if the table has data in it already. We raise errors - # to stop the user from doing unneccessary and potentiall destructive - # downloads - if cls.objects.exists() and not confirm_override: - # if the user has a third-party app, we can be more specific with - # our error message. - if cls._meta.app_label == "third_parties": - raise Exception( - "It looks like you're using a third-party database table and " - "that the table already has data in it! This means you already " - "called load_remote_archive and don't need to do it again. " - "If you are trying reload a newer version of this data, make " - "sure you empty this table first. This can be done by " - "reseting your database or manually deleting all objects " - "with `ExampleTable.objects.all().delete()`" - ) + all_fields = ["id", "updated_at", "created_at", "source"] - # otherwise warning the user of overwriting data with matching - # primary keys -- and ask them to use confirm_override. - raise Exception( - "It looks like this table already has data in it! By loading an " - "archive, you could potentially overwrite this data. The most " - "common mistake is non-unique primary keys between your current " - "data and the archive -- if there is a duplicate primary key, it " - "will overwrite your data. If you are confident the data is safe " - "to load into your database, run this command again with " - "confirm_override=True." - ) + # If calling this method on the base class, just return the sole mix-in. + if cls == DatabaseTable: + return all_fields - # Django and Dask can only handle so much for the parallelization - # of database writing with SQLite. So if the user has SQLite as their - # backend, we need to stop them from using this feature. - from simmate.configuration.django.settings import DATABASES + # Otherwise we need to go through the mix-ins and add their fields to + # the list + all_fields += [ + field for mixin in cls.get_mixins() for field in mixin.archive_fields + ] - if parallel and not confirm_sqlite_parallel and "sqlite3" in str(DATABASES): - raise Exception( - "It looks like you are trying to run things in parallel but are " - "using the default database backend (sqlite3), which is not " - "always stable for massively parallel methods. You can still " - "do this, but this message serves as a word of caution. " - "You If you see error messages pop up saying 'database is " - "locked', then your database is not stable at the rate you're " - "trying to write data. This is a sign that you should either " - "(1) switch to a different database backend such as Postgres " - "or (2) reduce the parallelization of your tasks. If you are " - "comfortable with these warnings and know what you're doing, " - "set confirm_sqlite_parallel=True." - ) + # Sometimes a column will be disabled by adding "--" in front of the + # column name. For example, "--band_gap" would exclude storing the band + # gap in the archive. We look for any columns that start with this + # and then remove them + for field in cls.archive_fields: + if field.startswith("--"): + all_fields.remove(field.removeprefix("--")) + else: + all_fields.append(field) + return all_fields + + # ------------------------------------------------------------------------- + # Methods that handle loading results from archives + # ------------------------------------------------------------------------- # @transaction.atomic # We can't have an atomic transaction if we use Dask @classmethod @@ -798,122 +976,72 @@ def load_remote_archive( logging.info("Done.") @classmethod - def get_column_names(cls) -> list[str]: - """ - Returns a list of all the column names for this table and indicates which - columns are related to other tables. This is primarily used to help - view what data is available. - """ - return [column.name for column in cls._meta.get_fields()] - - @classmethod - def show_columns(cls): - """ - Prints a list of all the column names for this table and indicates which - columns are related to other tables. This is primarily used to help users - interactively view what data is available. - """ - # Iterate through and grab the columns. Note we don't use get_column_names - # here because we are attaching relation data as well. - column_names = [ - column.name + f" (relation to {column.related_model.table_name})" - if column.is_relation - else column.name - for column in cls._meta.get_fields() - ] - - # Then use yaml to make the printout pretty (no quotes and separate lines) - print(yaml.dump(column_names)) - - @classmethod - def get_mixins(cls) -> list: # -> List[DatabaseTable] - """ - Grabs the mix-in Tables that were used to make this class. This will - be mix-ins like Structure, Forces, etc. from the - `simmate.database.base_data_types` module. - """ - # this must be imported locally because it depends on all other classes - # from this module -- and will create circular import issues if outside - from simmate.database import base_data_types as simmate_mixins - - return [ - parent - for parent in cls.__bases__ - if hasattr(simmate_mixins, parent.table_name) - and parent.table_name != "DatabaseTable" - ] - - @classmethod - def get_mixin_names(cls) -> list[str]: - """ - Grabs the mix-in Tables that were used to make this class and returns - a list of their names. - """ - return [mixin.table_name for mixin in cls.get_mixins()] - - @classmethod - def get_extra_columns(cls) -> list[str]: + def _confirm_override( + cls, + confirm_override: bool, + parallel: bool, + confirm_sqlite_parallel: bool, + ): """ - Finds all columns that aren't covered by the supported Table mix-ins. - - For example, a table made from... - - ``` python - from simmate.database.base_data_types import ( - table_column, - Structure, - Forces, - ) - - class ExampleTable(Structure, Forces): - custom_column1 = table_column.FloatField() - custom_column2 = table_column.FloatField() - ``` - - ... would return ... + A utility to make sure the user wants to load new data into their table + and (if they are using sqlite) that they are aware of the risks of + parallelizing their loading. - ``` python - ["custom_column1", "custom_column2"] - ``` + This utility should not be called directly, as it is used within + load_archive and load_remote_archive. """ + # first check if the table has data in it already. We raise errors + # to stop the user from doing unneccessary and potentiall destructive + # downloads + if cls.objects.exists() and not confirm_override: + # if the user has a third-party app, we can be more specific with + # our error message. + if cls._meta.app_label == "third_parties": + raise Exception( + "It looks like you're using a third-party database table and " + "that the table already has data in it! This means you already " + "called load_remote_archive and don't need to do it again. " + "If you are trying reload a newer version of this data, make " + "sure you empty this table first. This can be done by " + "reseting your database or manually deleting all objects " + "with `ExampleTable.objects.all().delete()`" + ) - all_columns = cls.get_column_names() - columns_w_mixin = [ - column for mixin in cls.get_mixins() for column in mixin.get_column_names() - ] - extra_columns = [ - column - for column in all_columns - if column not in columns_w_mixin and column != "id" - ] - return extra_columns - - @classmethod - @property - def archive_fieldset(cls) -> list[str]: - - all_fields = ["id", "updated_at", "created_at", "source"] + # otherwise warning the user of overwriting data with matching + # primary keys -- and ask them to use confirm_override. + raise Exception( + "It looks like this table already has data in it! By loading an " + "archive, you could potentially overwrite this data. The most " + "common mistake is non-unique primary keys between your current " + "data and the archive -- if there is a duplicate primary key, it " + "will overwrite your data. If you are confident the data is safe " + "to load into your database, run this command again with " + "confirm_override=True." + ) - # If calling this method on the base class, just return the sole mix-in. - if cls == DatabaseTable: - return all_fields + # Django and Dask can only handle so much for the parallelization + # of database writing with SQLite. So if the user has SQLite as their + # backend, we need to stop them from using this feature. + from simmate.configuration.django.settings import DATABASES - # Otherwise we need to go through the mix-ins and add their fields to - # the list - all_fields += [ - field for mixin in cls.get_mixins() for field in mixin.archive_fields - ] + if parallel and not confirm_sqlite_parallel and "sqlite3" in str(DATABASES): + raise Exception( + "It looks like you are trying to run things in parallel but are " + "using the default database backend (sqlite3), which is not " + "always stable for massively parallel methods. You can still " + "do this, but this message serves as a word of caution. " + "You If you see error messages pop up saying 'database is " + "locked', then your database is not stable at the rate you're " + "trying to write data. This is a sign that you should either " + "(1) switch to a different database backend such as Postgres " + "or (2) reduce the parallelization of your tasks. If you are " + "comfortable with these warnings and know what you're doing, " + "set confirm_sqlite_parallel=True." + ) - # Sometimes a column will be disabled by adding "--" in front of the - # column name. For example, "--band_gap" would exclude storing the band - # gap in the archive. We look for any columns that start with this - # and then remove them - for field in cls.archive_fields: - if field.startswith("--"): - all_fields.remove(field.removeprefix("--")) - else: - all_fields.append(field) - return all_fields + # ------------------------------------------------------------------------- + # Methods that set up the REST API and filters that can be queried with + # ------------------------------------------------------------------------- @classmethod @property diff --git a/src/simmate/database/base_data_types/calculation.py b/src/simmate/database/base_data_types/calculation.py index 304d5b728..5af0ae083 100644 --- a/src/simmate/database/base_data_types/calculation.py +++ b/src/simmate/database/base_data_types/calculation.py @@ -20,18 +20,20 @@ class Meta: archive_fields = [ "workflow_name", - "location", + "workflow_version", + "computer_system", "directory", "run_id", "corrections", ] - api_filters = dict( - workflow_name=["exact"], - location=["exact"], - directory=["exact"], - run_id=["exact"], - ) + api_filters = { + "workflow_name": ["exact"], + "workflow_version": ["exact"], + "computer_system": ["exact"], + "directory": ["exact"], + "run_id": ["exact"], + } workflow_name = table_column.CharField( max_length=75, @@ -42,7 +44,16 @@ class Meta: The full name of the workflow used, such as "relaxation.vasp.matproj". """ - location = table_column.CharField( + workflow_version = table_column.CharField( + max_length=75, + blank=True, + null=True, + ) + """ + The version of the workflow being used, such as "0.7.0". + """ + + computer_system = table_column.CharField( max_length=75, blank=True, null=True, @@ -79,74 +90,18 @@ class Meta: corrections = table_column.JSONField(blank=True, null=True) """ - Simmate workflows often have ErrorHandlers that fix any issues while the + S3 workflows often have ErrorHandlers that fix any issues while the calaculation ran. This often involves changing settings, so we store any of those changes here. """ - @property - def prefect_cloud_link(self) -> str: - """ - URL to this calculation (flow-run) in the Prefect Cloud website. - - This assumes that the calculation was registered with prefect cloud and - doesn't check to confirm it's been registered. To actually confirm that, - use the `flow_run_view` attribute instead. - """ - return f"https://cloud.prefect.io/simmate/flow-run/{self.run_id}" - - @property - def flow_run_view(self): # -> FlowRunView - """ - Checks if the run_id was registered with Prefect Cloud, and - if so, returns a - [FlowRunView](https://docs.prefect.io/orchestration/flow-runs/inspection.html) - that hold metadata such as the status. This metadata includes... - - agent_id - - auto_scheduled - - context - - created_by_user_id - - end_time - - flow_id - - labels - - name - - parameters - - scheduled_start_time - - start_time - - state - - tenant_id - - times_resurrected - - version - - If Prefect Cloud is not configured or if the calculation was ran - locally, the None is returned. - """ - raise NotImplementedError("This feature has no been migrated to Prefect 2.0") - - @property - def prefect_flow_run_name(self) -> str: - """ - Gives the user-friendly name of this run if the run_id - was registered with Prefect Cloud. (an example name is "friendly-bumblebee"). - """ - flowrunview = self.flow_run_view - return flowrunview.name if flowrunview else None - - @property - def prefect_state(self) -> str: - """ - Gives the current state of this run if the run_id - was registered with Prefect Cloud. (ex: "Scheduled" or "Successful") - """ - flowrunview = self.flow_run_view - return flowrunview.state.__class__.__name__ if flowrunview else None - @classmethod def from_run_context( cls, - run_id: str = None, + run_id: str, workflow_name: str = None, - **kwargs, + workflow_version: str = None, + **kwargs, # other parameters you'd normally pass to 'from_toolkit' ): """ Given a prefect id, this method will do one of the following... @@ -157,25 +112,9 @@ def from_run_context( It will then return the corresponding Calculation instance. """ - if not run_id or not workflow_name: - # Grab the database_table that we want to save the results in - from prefect.context import FlowRunContext - - run_context = FlowRunContext.get() - if run_context: - workflow = run_context.flow.simmate_workflow - workflow_name = workflow.name_full - run_id = str(run_context.flow_run.id) - assert workflow.database_table == cls - else: - raise Exception( - "No Prefect FlowRunContext was detected, so you must provide " - "flow_id and workflow_name to the from_run_context method." - ) - # Depending on how a workflow was submitted, there may be a calculation # extry existing already -- which we need to grab and then update. If it's - # not there, we create a new one! + # not there, we create a new one # check if the calculation already exists in our database, and if so, # grab it and return it. @@ -187,30 +126,93 @@ def from_run_context( # information to the from_toolkit method rather than directly to cls. calculation = cls.from_toolkit( run_id=run_id, - location=platform.node(), + computer_system=platform.node(), workflow_name=workflow_name, + workflow_version=workflow_version, **kwargs, ) calculation.save() return calculation - # TODO: Consider adding resource use metadata (e.g. from VASP) - # I may want to add these fields because Prefect doesn't store run stats - # indefinitely AND it doesn't give detail memory use, number of cores, etc. - # If all of these are too much, I could do a json field of "run_stats" instead - # - average_memory (The average memory used in kb) - # - max_memory (The maximum memory used in kb) - # - elapsed_time (The real time elapsed in seconds) - # - system_time(The system CPU time in seconds) - # - user_time(The user CPU time spent by VASP in seconds) - # - total_time (The total CPU time for this calculation) - # - cores (The number of cores used by VASP (some clusters print `mpi-ranks` here)) - - # TODO: Consider linking to parent calculations for nested workflow runs - # Where this calculation plays a role within a "nested" workflow calculation. - # Becuase this structure can be reused by multiple workflows, we make this - # a list of source-like objects. For example, a relaxation could be part of - # a series of relaxations (like in StagedRelaxation) or it can be an initial - # step of a BandStructure calculation. - # parent_nested_calculations = table_column.JSONField(blank=True, null=True) + # ------------------------------------------------------------------------- + # All methods below are for prefect, but because of the prefect 2.0 migration, + # these are disabled for the time being. + # ------------------------------------------------------------------------- + + # @classmethod + # def from_run_context( + # cls, + # run_id: str = None, # inputs are optional when using prefect + # workflow_name: str = None, + # **kwargs, # other parameters you'd normally pass to 'from_toolkit' + # ): + # if not run_id or not workflow_name: + # # Grab the database_table that we want to save the results in + # from prefect.context import FlowRunContext + # run_context = FlowRunContext.get() + # if run_context: + # workflow = run_context.flow.simmate_workflow + # workflow_name = workflow.name_full + # run_id = str(run_context.flow_run.id) + # assert workflow.database_table == cls + # else: + # raise Exception( + # "No Prefect FlowRunContext was detected, so you must provide " + # "flow_id and workflow_name to the from_run_context method." + # ) + + # @property + # def prefect_cloud_link(self) -> str: + # """ + # URL to this calculation (flow-run) in the Prefect Cloud website. + # This assumes that the calculation was registered with prefect cloud and + # doesn't check to confirm it's been registered. To actually confirm that, + # use the `flow_run_view` attribute instead. + # """ + # return f"https://cloud.prefect.io/simmate/flow-run/{self.run_id}" + + # @property + # def flow_run_view(self): # -> FlowRunView + # """ + # Checks if the run_id was registered with Prefect Cloud, and + # if so, returns a + # [FlowRunView](https://docs.prefect.io/orchestration/flow-runs/inspection.html) + # that hold metadata such as the status. This metadata includes... + # - agent_id + # - auto_scheduled + # - context + # - created_by_user_id + # - end_time + # - flow_id + # - labels + # - name + # - parameters + # - scheduled_start_time + # - start_time + # - state + # - tenant_id + # - times_resurrected + # - version + # If Prefect Cloud is not configured or if the calculation was ran + # locally, the None is returned. + # """ + # raise NotImplementedError("This feature has no been migrated to Prefect 2.0") + + # @property + # def prefect_flow_run_name(self) -> str: + # """ + # Gives the user-friendly name of this run if the run_id + # was registered with Prefect Cloud. (an example name is "friendly-bumblebee"). + # """ + # flowrunview = self.flow_run_view + # return flowrunview.name if flowrunview else None + + # @property + # def prefect_state(self) -> str: + # """ + # Gives the current state of this run if the run_id + # was registered with Prefect Cloud. (ex: "Scheduled" or "Successful") + # """ + # flowrunview = self.flow_run_view + # return flowrunview.state.__class__.__name__ if flowrunview else None diff --git a/src/simmate/database/base_data_types/calculation_nested.py b/src/simmate/database/base_data_types/calculation_nested.py index 330d399e1..4380f8587 100644 --- a/src/simmate/database/base_data_types/calculation_nested.py +++ b/src/simmate/database/base_data_types/calculation_nested.py @@ -1,6 +1,14 @@ # -*- coding: utf-8 -*- -from simmate.database.base_data_types import Calculation, table_column +""" +WARNING: This module is still at the planning stage and the code below serve as +placeholder notes. + +In the future, it will be a table that helps link together results from +multi-stage workflows. +""" + +from simmate.database.base_data_types import Calculation # , table_column class NestedCalculation(Calculation): @@ -30,96 +38,6 @@ class Meta: corrections = None # For now I delete this column. This line removes the field. - @classmethod - def create_subclass_from_calcs( - cls, - name: str, - child_database_tables: list[Calculation], - module: str, - **extra_columns, - ): - """ - Dynamically creates a subclass of NestedCalculation -- and handles linking - together all child calculation tables. - - `simmate.calculators.vasp.database.relaxation` shows an example of creating - a table from this class: - - ``` python - StagedRelaxation = NestedCalculation.create_subclass_from_calcs( - "StagedRelaxation", - [ - Quality00Relaxation, - Quality01Relaxation, - Quality02Relaxation, - Quality03Relaxation, - Quality04Relaxation, - ], - module=__name__, - ) - ``` - - To add custom columns, you can do the following: - - ``` python - from simmate.database.base_data_types import table_column - - StagedRelaxation = NestedCalculation.create_subclass_from_calcs( - ... # everything is the same as above - custom_column_01=table_column.FloatField() - ) - ``` - - #### Parameters - - - `name` : - Name of the subclass that is output. - - `child_database_tables` : - list of database tables for the nested workflows. This table links - these sub-tables together so results can be viewed from each step. - - `module` : - name of the module this subclass should be associated with. Typically, - you should pass __name__ to this. - **extra_columns : TYPE - Additional columns to add to the table. The keyword will be the - column name and the value should match django options - (e.g. table_column.FloatField()) - - #### Returns - - NewClass : - A subclass of NestedCalculation. - - """ - - # BUG: I assume a workflow won't point to the save calculation table - # more than once... What's a scenario where this isn't true? - # I can only think of multi-structure workflows (like EvolutionarySearch) - # which I don't give their own table for now. - new_columns = {} - for child_calc in child_database_tables: - new_column = table_column.OneToOneField( - child_calc, - on_delete=table_column.CASCADE, - # related_name=..., - blank=True, - null=True, - ) - new_columns[f"{child_calc._meta.model_name}"] = new_column - - # Now put all the fields together to make the new class - NewClass = cls.create_subclass( - name, - **new_columns, - **extra_columns, - # also have child calcs list as an attribute - child_database_tables=child_database_tables, - module=module, - ) - - # we now have a new child class and avoided writing some boilerplate code! - return NewClass - # def update_calculation(self): # """ # This is an experimental method that iterates through child workflow tables diff --git a/src/simmate/database/base_data_types/density_of_states.py b/src/simmate/database/base_data_types/density_of_states.py index b0b981ed7..789d611a7 100644 --- a/src/simmate/database/base_data_types/density_of_states.py +++ b/src/simmate/database/base_data_types/density_of_states.py @@ -23,6 +23,8 @@ class DensityofStates(DatabaseTable): class Meta: abstract = True + exclude_from_summary = ["density_of_states_data"] + archive_fields = ["density_of_states_data"] api_filters = dict( @@ -67,6 +69,15 @@ class Meta: # spin_polarization (float "Spin polarization at the fermi level.") # magnetic_ordering (Magnetic ordering of the calculation.) + def write_output_summary(self, directory: Path): + """ + In addition to writing the normal VASP output summary, this also plots + the DOS to "density_of_states.png" + """ + + super().write_output_summary(directory) + self.write_densityofstates_plot(directory) + @classmethod def _from_toolkit( cls, @@ -78,10 +89,10 @@ def _from_toolkit( data = ( dict( density_of_states_data=density_of_states.as_dict(), - band_gap=density_of_states.get_gap(), + band_gap=float(density_of_states.get_gap()), energy_fermi=density_of_states.efermi, - conduction_band_minimum=density_of_states.get_cbm_vbm()[0], - valence_band_maximum=density_of_states.get_cbm_vbm()[1], + conduction_band_minimum=float(density_of_states.get_cbm_vbm()[0]), + valence_band_maximum=float(density_of_states.get_cbm_vbm()[1]), ) if density_of_states else {} @@ -130,6 +141,11 @@ def get_densityofstates_plot(self): plot = plotter.get_plot() return plot + def write_densityofstates_plot(self, directory: Path): + plot = self.get_densityofstates_plot() + plot_filename = directory / "simmate_density_of_states.png" + plot.savefig(plot_filename) + class DensityofStatesCalc(Structure, DensityofStates, Calculation): """ @@ -141,51 +157,14 @@ class DensityofStatesCalc(Structure, DensityofStates, Calculation): class Meta: app_label = "workflows" - def update_from_vasp_run( - self, vasprun: Vasprun, corrections: list, directory: Path - ): - """ - Given a pymatgen VaspRun object, which is what's typically returned - from a simmate VaspWorkflow.run() call, this will update the database entry - with the results. - """ - - # Takes a pymatgen VaspRun object, which is what's typically returned - # from a simmate VaspWorkflow.run() call. - - # All data analysis is done via a CompleteDOS object, so we convert - # the vasprun object to that first. - density_of_states = vasprun.complete_dos - - # Take our dos and expand its data for the rest of the columns. - new_kwargs = DensityofStates.from_toolkit( - density_of_states=density_of_states, - as_dict=True, - ) - for key, value in new_kwargs.items(): - setattr(self, key, value) - - # lastly, we also want to save the corrections made and directory it ran in - self.corrections = corrections - self.directory = directory - - # Now we have the relaxation data all loaded and can save it to the database - self.save() - @classmethod - def from_directory(cls, directory: Path): - """ - Creates a new database entry from a directory that holds band-structure - results. For now, this assumes the directory holds vasp output files. - """ + def from_vasp_run(cls, vasprun: Vasprun, as_dict: bool = False): - # I assume the directory is from a vasp calculation, but I need to update - # this when I begin adding new calculators. - vasprun_filename = directory / "vasprun.xml" - vasprun = Vasprun(vasprun_filename) density_of_states_db = cls.from_toolkit( structure=vasprun.structures[0], density_of_states=vasprun.complete_dos, + as_dict=as_dict, ) - density_of_states_db.save() + if not as_dict: + density_of_states_db.save() return density_of_states_db diff --git a/src/simmate/database/base_data_types/dynamics.py b/src/simmate/database/base_data_types/dynamics.py index 507bef7fe..226a649b9 100644 --- a/src/simmate/database/base_data_types/dynamics.py +++ b/src/simmate/database/base_data_types/dynamics.py @@ -1,11 +1,9 @@ # -*- coding: utf-8 -*- -""" -This module is experimental and subject to change. -""" - from pathlib import Path +import plotly.graph_objects as plotly_go +from plotly.subplots import make_subplots from pymatgen.io.vasp.outputs import Vasprun from simmate.database.base_data_types import ( @@ -59,12 +57,35 @@ class Meta: stopped early. """ - def update_from_vasp_run( - self, - vasprun: Vasprun, - corrections: list[tuple[str]], - directory: Path, - ): + def write_output_summary(self, directory: Path): + super().write_output_summary(directory) + self.write_convergence_plot(directory) + + @classmethod + def from_vasp_run(cls, vasprun: Vasprun): + raise NotImplementedError( + "Dynamics runs cannot currently be loaded from a dir/vasprun, so" + "input parameters such as temperature and nsteps are not loaded. " + "This feature is still under development" + ) + # See Relaxation.from_vasp_run as a starting point + + def update_from_directory(self, directory: Path): + + # check if we have a VASP directory + vasprun_filename = directory / "vasprun.xml" + if not vasprun_filename.exists(): + # raise Exception( + # "Only VASP output directories are supported at the moment" + # ) + return # just exit + + from simmate.calculators.vasp.outputs import Vasprun + + vasprun = Vasprun.from_directory(directory) + self.update_from_vasp_run(vasprun) + + def update_from_vasp_run(self, vasprun: Vasprun): """ Given a Vasprun object from a finished dynamics run, this will update the DynamicsRun table entry and the corresponding DynamicsIonicStep entries. @@ -102,14 +123,11 @@ def update_from_vasp_run( site_forces=ionic_step.get("forces", None), lattice_stress=ionic_step.get("stress", None), temperature=self._get_temperature_at_step(number), + # simulation_time=number*self.time_step, dynamics_run=self, # this links the structure to this dynamics run ) structure.save() - # lastly, we also want to save the corrections made and directory it ran in - self.corrections = corrections - self.directory = directory - # Now we have the relaxation data all loaded and can save it to the database self.save() @@ -119,6 +137,94 @@ def _get_temperature_at_step(self, step_number: int): def _get_temperature_step_size(self): return (self.temperature_end - self.temperature_start) / self.nsteps + def get_convergence_plot(self): + + # Grab the calculation's structure and convert it to a dataframe + structures_dataframe = self.structures.order_by("number").to_dataframe() + + # We will be making a figure that consists of 3 stacked subplots that + # all share the x-axis of ionic_step_number + figure = make_subplots( + rows=4, + cols=1, + shared_xaxes=True, + ) + + # Generate a plot for Energy (per atom) + figure.add_trace( + plotly_go.Scatter( + x=structures_dataframe["number"], + y=structures_dataframe["energy_per_atom"], + ), + row=1, + col=1, + ) + + # Generate a plot for Forces (norm per atom) + figure.add_trace( + plotly_go.Scatter( + x=structures_dataframe["number"], + y=structures_dataframe["site_forces_norm_per_atom"], + ), + row=2, + col=1, + ) + + # Generate a plot for Stress (norm per atom) + figure.add_trace( + plotly_go.Scatter( + x=structures_dataframe["number"], + y=structures_dataframe["lattice_stress_norm_per_atom"], + ), + row=3, + col=1, + ) + + # Generate a plot for Stress (norm per atom) + figure.add_trace( + plotly_go.Scatter( + x=structures_dataframe["number"], + y=structures_dataframe["temperature"], + ), + row=4, + col=1, + ) + + # Now let's clean up some formatting and add the axes titles + figure.update_layout( + width=600, + height=600, + showlegend=False, + xaxis3_title="Ionic Step (#)", + yaxis_title="Energy (eV/atom)", + yaxis2_title="Site Forces", + yaxis3_title="Lattice Stress", + yaxis4_title="Temperature (K)", + ) + + # we return the figure object for the user + return figure + + def write_convergence_plot(self, directory: Path): + figure = self.get_convergence_plot() + figure.write_html( + directory / "simmate_convergence.html", + include_plotlyjs="cdn", + ) + + def view_convergence_plot(self): + figure = self.get_convergence_plot() + figure.show(renderer="browser") + + def get_convergence_plot_html(self): + # Make the convergence figure and convert it to an html div + figure_convergence = self.get_convergence_plot() + figure_convergence_html = figure_convergence.to_html( + full_html=False, + include_plotlyjs=False, + ) + return figure_convergence_html + class DynamicsIonicStep(Structure, Thermodynamics, Forces): """ @@ -152,6 +258,27 @@ class Meta: is off-temperature. """ + dynamics_run = table_column.ForeignKey( + DynamicsRun, + on_delete=table_column.CASCADE, + related_name="structures", + ) + """ + All structures in this table come from dynamics run calculations, where + there can be many structures (one for each ionic step) linked to a + single run. This means the start structure, end structure, and + those structure in-between are stored together here. + Therefore, there's just a simple column stating which relaxation it + belongs to. + """ + + # TODO: + # simulation_time = table_column.FloatField(blank=True, null=True) + # """ + # The total simulation time up to this ionic step (in picoseconds). This is + # just "number*time_step", but we store this in the database for easy access. + # """ + # TODO: Additional options from Vasprun.as_dict to consider adding # e_0_energy # e_fr_energy @@ -159,15 +286,3 @@ class Meta: # lattice kinetic # nosekinetic # nosepot - - # All structures in this table come from dynamics run calculations, where - # there can be many structures (one for each ionic step) linked to a - # single run. This means the start structure, end structure, and - # those structure in-between are stored together here. - # Therefore, there's just a simple column stating which relaxation it - # belongs to. - dynamics_run = table_column.ForeignKey( - DynamicsRun, - on_delete=table_column.CASCADE, - related_name="structures", - ) diff --git a/src/simmate/database/base_data_types/forces.py b/src/simmate/database/base_data_types/forces.py index c35cdc25e..b2d6e1d4c 100644 --- a/src/simmate/database/base_data_types/forces.py +++ b/src/simmate/database/base_data_types/forces.py @@ -10,6 +10,11 @@ class Forces(DatabaseTable): class Meta: abstract = True + exclude_from_summary = [ + "site_forces", + "lattice_stress", + ] + archive_fields = [ "site_forces", "lattice_stress", @@ -71,7 +76,7 @@ class Meta: @classmethod def _from_toolkit( cls, - structure: ToolkitStructure, + structure: ToolkitStructure = None, site_forces=None, lattice_stress=None, as_dict=False, @@ -80,15 +85,21 @@ def _from_toolkit( Given site forces and lattice stress, this function builds the rest of the required fields for this class as a dictionary. """ + # TODO: in the future, this should accept an IonicStep toolkit object # or maybe Structure + Forces toolkit objects. site_data = ( dict( site_forces=site_forces, - site_force_norm_max=max([numpy.linalg.norm(f) for f in site_forces]), - site_forces_norm=numpy.linalg.norm(site_forces), - site_forces_norm_per_atom=numpy.linalg.norm(site_forces) - / structure.num_sites, + site_force_norm_max=float( + max([numpy.linalg.norm(f) for f in site_forces]) + ), + site_forces_norm=float(numpy.linalg.norm(site_forces)), + site_forces_norm_per_atom=float( + numpy.linalg.norm(site_forces) / structure.num_sites + ) + if structure + else None, ) if site_forces else {} @@ -97,9 +108,12 @@ def _from_toolkit( lattice_data = ( dict( lattice_stress=lattice_stress, - lattice_stress_norm=numpy.linalg.norm(lattice_stress), - lattice_stress_norm_per_atom=numpy.linalg.norm(lattice_stress) - / structure.num_sites, + lattice_stress_norm=float(numpy.linalg.norm(lattice_stress)), + lattice_stress_norm_per_atom=float( + numpy.linalg.norm(lattice_stress) / structure.num_sites + ) + if structure + else None, ) if lattice_stress else {} diff --git a/src/simmate/database/base_data_types/nudged_elastic_band.py b/src/simmate/database/base_data_types/nudged_elastic_band.py index 96290f350..e41e1a4e7 100644 --- a/src/simmate/database/base_data_types/nudged_elastic_band.py +++ b/src/simmate/database/base_data_types/nudged_elastic_band.py @@ -1,22 +1,19 @@ # -*- coding: utf-8 -*- -""" -WARNING: This module is experimental and subject to change. -""" - from pathlib import Path from pymatgen.analysis.transition_state import NEBAnalysis from pymatgen.core.sites import PeriodicSite from pymatgen.symmetry.analyzer import SpacegroupAnalyzer -from simmate.database.base_data_types import DatabaseTable, Structure, table_column +from simmate.calculators.vasp.outputs import Vasprun +from simmate.database.base_data_types import Calculation, Structure, table_column from simmate.toolkit import Structure as ToolkitStructure from simmate.toolkit.diffusion import MigrationHop as ToolkitMigrationHop +from simmate.toolkit.diffusion import MigrationImages -# TODO: consider making a NestedCalculation -class DiffusionAnalysis(Structure): +class DiffusionAnalysis(Structure, Calculation): class Meta: app_label = "workflows" @@ -92,7 +89,7 @@ def from_toolkit( return structure_dict if as_dict else cls(**structure_dict) @classmethod - def from_directory(cls, directory: Path, **kwargs): + def from_directory(cls, directory: Path): """ Creates a new database entry from a directory that holds diffusion analysis results. For now, this assumes the directory holds vasp output files. @@ -101,17 +98,23 @@ def from_directory(cls, directory: Path, **kwargs): # I assume the directory is from a vasp calculation, but I need to update # this when I begin adding new calculators. - # TODO: It there a way I can figure out which tables the other calculations - # are linked to? Specifically, the bulk relaxation, bulk static energy, - # and the supercell relaxations. They are all in these directories too - # but I can't save those results until I know where they belong. - # Consider adding an attribute that points to those tables...? Or - # maybe a relationship (which I'd rather avoid bc it makes things very - # messy for users) # For now, I only grab the structure from the static-energy and store # it in the DiffusionAnalysis table. - bulk_filename = directory / "static-energy.vasp.matproj" / "POSCAR" + found_dir = False + for name in directory.iterdir(): + if name.is_dir() and name.stem.startswith("static-energy"): + static_energy_dir = name + found_dir = True + break + if not found_dir: + raise Exception( + "Unable to detect 'static-energy' directory and therefore unable " + "to determine the bulk structure used for this analysis." + ) + + bulk_filename = static_energy_dir / "POSCAR" bulk_structure = ToolkitStructure.from_file(bulk_filename) + # BUG: I assume the directory location but this will fail if changed. # Save a diffusion analysis object so we can connect all other data # to it. @@ -121,6 +124,15 @@ def from_directory(cls, directory: Path, **kwargs): ) analysis_db.save() + # load the remaining data - such as migration hops + analysis_db.update_from_directory(directory) + + return analysis_db + + def update_from_directory(self, directory: Path): + # NOTE: This method is not called at the end of the workflow as + # subflows often created the data already. + # Iterate through all the subdirectories that start with "migration_hop*". # We also need to make sure we only grab directories because there are # also cifs present that match this naming convention. @@ -128,22 +140,19 @@ def from_directory(cls, directory: Path, **kwargs): migration_directories = [ f.absolute() for f in directory.iterdir() - if f.absolute().is_dir() and f.startswith("migration_hop_") + if f.is_dir() and "single-path" in f.stem ] # now save each migration hop present for migration_dir in migration_directories: - cls.migration_hops.field.model.from_directory( + hop = self.migration_hops.field.model.from_directory( directory=migration_dir, - diffusion_analysis_id=analysis_db.id, ) - - return analysis_db + hop.diffusion_analysis_id = self.id + hop.save() -# TODO: consider making a Calculation bc this is what the corrections/directory -# information should be attached to. -class MigrationHop(DatabaseTable): +class MigrationHop(Calculation): class Meta: app_label = "workflows" @@ -165,6 +174,9 @@ class Meta: energy_barrier=["range"], ) + # is_from_hop_obj = table_column.BooleanField(blank=True, null=True) + # source = hop_obj / endpoints / images + # OPTIMIZE: site_start and site_end # Really, this is a list of float values, but I save it as a string. # For robustness, should I save cartesian coordinates and/or lattice as well? @@ -238,8 +250,35 @@ class Meta: DiffusionAnalysis, on_delete=table_column.CASCADE, related_name="migration_hops", + blank=True, + null=True, ) + def write_output_summary(self, directory: Path): + super().write_output_summary(directory) + self.write_neb_plot(directory) + self.write_migration_images(directory) + + def get_neb_plot(self): + neb_results = self.to_neb_toolkit() + plot = neb_results.get_plot() + return plot + + def write_neb_plot(self, directory: Path): + plot = self.get_neb_plot() + filename = directory / "simmate_neb_plot.jpeg" + plot.savefig(filename) + + def get_migration_images(self) -> MigrationImages: + structures = self.migration_images.order_by("number").to_toolkit() + migration_images = MigrationImages(structures) + return migration_images + + def write_migration_images(self, directory: Path): + migration_images = self.get_migration_images() + structure_sum = migration_images.get_sum_structure() + structure_sum.to("cif", directory / "simmate_path_relaxed_neb.cif") + # TODO: # image_start --> OneToOneField for specific MigrationHop # image_end --> OneToOneField for specific MigrationHop @@ -247,35 +286,11 @@ class Meta: # Just like Relaxation points to IonicSteps, NEB will point to MigrationImages - @classmethod - def _from_toolkit( - cls, - migration_hop: ToolkitMigrationHop, - as_dict: bool = False, - number: int = None, - **kwargs, - ): - - # convert the pathway object into the database table format - hop_dict = dict( - site_start=" ".join(str(c) for c in migration_hop.isite.frac_coords), - site_end=" ".join(str(c) for c in migration_hop.esite.frac_coords), - index_start=migration_hop.iindex, - index_end=migration_hop.eindex, - length=migration_hop.length, - # diffusion_analysis_id=diffusion_analysis_id, - **kwargs, - ) - - # If as_dict is false, we build this into an Object. Otherwise, just - # return the dictionary - return hop_dict if as_dict else cls(**hop_dict) - # BUG: because of rounding in the from_toolkit method, the get_sc_structures # is unable to identify equivalent sites. I opened an issue for this # with their team: # https://github.com/materialsvirtuallab/pymatgen-analysis-diffusion/issues/296 - def to_toolkit(self) -> ToolkitMigrationHop: + def to_migration_hop_toolkit(self) -> ToolkitMigrationHop: """ converts the database MigrationHop to a toolkit MigrationHop """ @@ -307,100 +322,106 @@ def to_toolkit(self) -> ToolkitMigrationHop: # if the pathways match, then we can return the pathway object! return path - ####### - # BUG: I need to distinguish between the from_toolkit/to_toolkit methods - # above that just load a MigrationImages object vs. the from_directory and - # from_pymatgen methods below that include results. As-is these similar - # names but very different use cases makes things confusing for users. - ####### + def to_neb_toolkit(self) -> NEBAnalysis: - @classmethod - def from_directory(cls, directory: Path, **kwargs): - # I assume the directory is from a vasp calculation, but I need to update - # this when I begin adding new calculators. + images = self.migration_images.all() + structures = self.migration_images.to_toolkit() + + neb_toolkit = NEBAnalysis( + r=[i.structure_distance for i in images], + energies=[i.energy for i in images], + forces=[i.force_tangent for i in images], + structures=structures, + ) + return neb_toolkit - # BUG: A fix is make during the workup() method that may be relevant here. - # simmate.calculators.vasp.tasks.nudged_elastic_band.MITNudgedElasticBand.workup - analysis = NEBAnalysis.from_dir(directory) - return cls.from_pymatgen(analysis=analysis, **kwargs) + def update_from_directory(self, directory: Path): + + # check if we have a VASP directory + vasprun_filename = directory / "vasprun.xml" + if not vasprun_filename.exists(): + raise Exception("Only VASP outputs are supported for NEB") + + from simmate.calculators.vasp.outputs import Vasprun + + vasprun = Vasprun.from_directory(directory) + self.update_from_neb_toolkit(vasprun.neb_results) @classmethod - def from_pymatgen( - cls, - analysis: NEBAnalysis, - diffusion_analysis_id: int = None, # only used if updating - migration_hop_id: int = None, # only used if updating - ): - # We reference these related tables frequently below so it easier to - # grab them up front. - diffusion_analysis_table = cls.diffusion_analysis.field.related_model - migration_image_table = cls.migration_images.field.model - - # First, we need a migration_hop database object. - # All of hops should link to a diffusion_analysis entry, so we check - # for that here too. The key thing of these statements is that we - # have a migration_hop_id at the end. - - # If no ids were given, then we make a new entries for each. - if not diffusion_analysis_id and not migration_hop_id: - # Note, we have minimal information if this is the case, so these - # table entries will have a bunch of empty columns. - - # We don't have a bulk structure to use for this class, so we use - # the first image - analysis_db = diffusion_analysis_table.from_toolkit( - structure=analysis.structures[0], - vacancy_mode=True, # assume this for now - ) - analysis_db.save() + def from_vasp_run(cls, vasprun: Vasprun): + return cls.from_neb_toolkit(neb_results=vasprun.neb_results) - # This table entry will actually be completely empty... It only - # serves to link the MigrationImages together - hop_db = cls( - diffusion_analysis_id=analysis_db.id, - ) - hop_db.save() - migration_hop_id = hop_db.id - - elif diffusion_analysis_id and not migration_hop_id: - # This table entry will actually be completely empty... It only - # serves to link the MigrationImages together - hop_db = cls(diffusion_analysis_id=diffusion_analysis_id) - hop_db.save() - migration_hop_id = hop_db.id - - elif migration_hop_id: - # We don't use the hop_id, but making this query ensures it exists. - hop_db = cls.objects.get(id=migration_hop_id) - # Even though it's not required, we make sure the id given for the - # diffusion analysis table matches this existing hop id. - if diffusion_analysis_id: - assert hop_db.diffusion_analysis.id == diffusion_analysis_id - - # Now same migration images and link them to this parent object. + @classmethod + def from_neb_toolkit(cls, neb_results: NEBAnalysis): + + # This table entry will actually be completely empty... It only + # serves to link the MigrationImages together. We only create this + # if we are updating + hop_db = cls() + hop_db.save() + + # build out the images in the related table + hop_db.update_from_neb_toolkit(neb_results) + + return hop_db + + def update_from_neb_toolkit(self, neb_results: NEBAnalysis): + + # build migration images and link them to this parent object. # Note, the start/end Migration images will exist already in the # relaxation database table. We still want to save them again here for # easy access. for image_number, image_data in enumerate( zip( - analysis.structures, - analysis.energies, - analysis.forces, - analysis.r, + neb_results.structures, + neb_results.energies, + neb_results.forces, + neb_results.r, ) ): image, energy, force, distance = image_data - image_db = migration_image_table.from_toolkit( + image_db = self.migration_images.field.model.from_toolkit( structure=image, number=image_number, force_tangent=force, energy=energy, structure_distance=distance, - migration_hop_id=migration_hop_id, + migration_hop_id=self.id, ) image_db.save() - return hop_db + @classmethod + def from_toolkit( # from_migration_hop_toolkit -- registration uses this + cls, + migration_hop: ToolkitMigrationHop = None, + as_dict: bool = False, + number: int = None, + **kwargs, + ): + # the algorithm doesn't change for this method, but we do want to add + # a few extra columns. Therefore we make the dictionary as normal and + # then add those extra columns here. + structure_dict = super().from_toolkit(as_dict=True, **kwargs) + + if migration_hop: + # convert the pathway object into the database table format + hop_dict = dict( + site_start=" ".join(str(c) for c in migration_hop.isite.frac_coords), + site_end=" ".join(str(c) for c in migration_hop.esite.frac_coords), + index_start=migration_hop.iindex, + index_end=migration_hop.eindex, + length=migration_hop.length, + # diffusion_analysis_id=diffusion_analysis_id, + **kwargs, + ) + else: + hop_dict = {} + + all_data = {**structure_dict, **hop_dict} + + # If as_dict is false, we build this into an Object. Otherwise, just + # return the dictionary + return all_data if as_dict else cls(**all_data) class MigrationImage(Structure): diff --git a/src/simmate/database/base_data_types/population_analysis.py b/src/simmate/database/base_data_types/population_analysis.py index 78b6d0c00..176296581 100644 --- a/src/simmate/database/base_data_types/population_analysis.py +++ b/src/simmate/database/base_data_types/population_analysis.py @@ -1,6 +1,10 @@ # -*- coding: utf-8 -*- +from pathlib import Path +from pandas import DataFrame + +from simmate.calculators.bader.outputs import ACF from simmate.database.base_data_types import StaticEnergy, table_column @@ -13,9 +17,122 @@ class PopulationAnalysis(StaticEnergy): class Meta: app_label = "workflows" + exclude_from_summary = [ + "oxidation_states", + "charges", + "min_dists", + "atomic_volumes", + "element_list", + ] + oxidation_states = table_column.JSONField(blank=True, null=True) """ - A list of calculated oxidation states based on some analysis. This is - given back as a list of float values in the same order as sites in the - source structure. + A list of calculated oxidation states for each site. + """ + + charges = table_column.JSONField(blank=True, null=True) + """ + A list of total "valence" electron counts for each site. + + WARNING: this count is dependent on the potentials used. For example, + Yttrium could have used a potential where 2 or even 10 electrons are used + as the basis for the calculation. Use 'oxidation_states' for a more + consistent and accurate count of valence electrons + """ + + min_dists = table_column.JSONField(blank=True, null=True) + """ + A list of minimum radii distance for bader volumes. i.e. the minimum + distance from the origin of the site to the bader surface. This can be used + as a minimum radius for the site. + """ + + atomic_volumes = table_column.JSONField(blank=True, null=True) + """ + A list of site volumes from the oxidation analysis (i.e. the bader volume) + """ + + element_list = table_column.JSONField(blank=True, null=True) + """ + A list of all element species in order that appear in the structure. + + This information is stored in the structure_string as well, but it is stored + here as an extra for convenience. + """ + + vacuum_charge = table_column.FloatField(blank=True, null=True) + """ + Total electron count that was NOT assigned to ANY site -- and therefore + assigned to 'vacuum'. + + In most cases, this value should be zero. + """ + + vacuum_volume = table_column.FloatField(blank=True, null=True) + """ + Total volume from electron density that was NOT assigned to ANY site -- + and therefore assigned to 'vacuum'. + + In most cases, this value should be zero. """ + + nelectrons = table_column.FloatField(blank=True, null=True) + """ + The total number of electrons involved in the charge density partitioning. + + WARNING: this count is dependent on the potentials used. For example, + Yttrium could have used a potential where 2 or even 10 electrons are used + as the basis for the calculation. Use 'oxidation_states' for a more + consistent and accurate count of valence electrons + """ + + def write_output_summary(self, directory: Path): + super().write_output_summary(directory) + self.write_summary_dataframe(directory) + + @classmethod + def from_vasp_directory(cls, directory: Path, as_dict: bool = False): + """ + A basic workup process that reads Bader analysis results from the ACF.dat + file and calculates the corresponding oxidation states with the existing + POTCAR files. + """ + + # For loading the static-energy data, we can just call the parent + # method of this class. + energy_data = StaticEnergy.from_vasp_directory(directory, as_dict=as_dict) + + # We must then look for the bader analysis data + + # load the ACF.dat file + dataframe, extra_data = ACF(directory) + + all_data = { + # OPTIMIZE: consider a related table for Sites + "oxidation_states": list(dataframe.oxidation_state.values), + "charges": list(dataframe.charge.values), + "min_dists": list(dataframe.min_dist.values), + "atomic_volumes": list(dataframe.atomic_vol.values), + "element_list": list(dataframe.element.values), + **extra_data, + **energy_data, + } + + return all_data if as_dict else cls(**all_data) + + def get_summary_dataframe(self): + df = DataFrame( + { + "element": self.element_list, + "oxidation_state": self.oxidation_states, + "charge": self.charges, + "min_dist": self.min_dists, + "atomic_volume": self.atomic_volumes, + } + ) + return df + + def write_summary_dataframe(self, directory: Path): + df = self.get_summary_dataframe() + filename = directory / "simmate_population_summary.csv" + df.to_csv(filename) diff --git a/src/simmate/database/base_data_types/relaxation.py b/src/simmate/database/base_data_types/relaxation.py index c3edaeaea..fae5d16e6 100644 --- a/src/simmate/database/base_data_types/relaxation.py +++ b/src/simmate/database/base_data_types/relaxation.py @@ -13,54 +13,8 @@ (aka geometry optimizations). This will store all ionic steps and the forces/stress associated with each step. -When creating new tables for Relaxations, you should use the `Relaxation.create_subclasses` method, which helps remove all the -boilerplate code needed. For Django users, it may be tricky to understand what's -happening behind the scenes, so here's an example: - -These two lines... - -``` python -from simmate.database.base_data_types import Relaxation - -ExampleRelaxation, ExampleIonicStep = Relaxation.create_subclasses( - "Example", - module=__name__, -) -``` - -... do exactly the same thing as all of these lines... - -``` python -from simmate.database.base_data_types import table_column -from simmate.database.base_data_types import IonicStep, Relaxation - -class ExampleIonicStep(IonicStep): - relaxation = table_column.ForeignKey( - "ExampleRelaxation", # in quotes becuase this is defined below - on_delete=table_column.CASCADE, - related_name="structures", - ) - -class ExampleRelaxation(Relaxation): - structure_start = table_column.OneToOneField( - ExampleIonicStep, - on_delete=table_column.CASCADE, - related_name="relaxations_as_start", - blank=True, - null=True, - ) - structure_final = table_column.OneToOneField( - ExampleIonicStep, - on_delete=table_column.CASCADE, - related_name="relaxations_as_final", - blank=True, - null=True, - ) -``` - Note there are two tables involved. One stores all of the ionic steps, and the other connects all ionic steps to a specific calculation and result. - """ from pathlib import Path @@ -94,6 +48,8 @@ class Relaxation(Structure, Thermodynamics, Calculation): class Meta: app_label = "workflows" + exclude_from_summary = ["structure_start", "structure_final"] + archive_fields = [ "band_gap", "is_gap_direct", @@ -176,13 +132,9 @@ class Meta: null=True, ) - @classmethod - def from_directory(cls, directory: Path): - # I assume the directory is from a vasp calculation, but I need to update - # this when I begin adding new calculators. - vasprun_filename = directory / "vasprun.xml" - vasprun = Vasprun(vasprun_filename) - return cls.from_vasp_run(vasprun) + def write_output_summary(self, directory: Path): + super().write_output_summary(directory) + self.write_convergence_plot(directory) @classmethod def from_vasp_run(cls, vasprun: Vasprun): @@ -193,27 +145,29 @@ def from_vasp_run(cls, vasprun: Vasprun): # Note, the information does not matter at this point because it will be # populated below relaxation = cls.from_toolkit(structure=vasprun.structures[-1]) - # TODO: need to pull run_id from metadata file. # Now we have the relaxation data all loaded and can save it to the database relaxation.save() - # Save the rest of the results using the update method from this class - relaxation.update_from_vasp_run( - vasprun=vasprun, - corrections=[], - directory=None, - ) - # TODO: load corrections/directory from the metadata and corrections files. + # and we can populate the rest of the tables as if its the workup + relaxation.update_from_vasprun(vasprun) + + def update_from_directory(self, directory: Path): + + # check if we have a VASP directory + vasprun_filename = directory / "vasprun.xml" + if not vasprun_filename.exists(): + # raise Exception( + # "Only VASP output directories are supported at the moment" + # ) + return # just exit - return relaxation + from simmate.calculators.vasp.outputs import Vasprun - def update_from_vasp_run( - self, - vasprun: Vasprun, - corrections: list, - directory: Path, - ): + vasprun = Vasprun.from_directory(directory) + self.update_from_vasp_run(vasprun) + + def update_from_vasp_run(self, vasprun: Vasprun): """ Given a Vasprun object from a finished relaxation, this will update the Relaxation table entry and the corresponding IonicStep entries. @@ -278,9 +232,6 @@ def update_from_vasp_run( energy_fermi=data.get("efermi"), conduction_band_minimum=data.get("cbm"), valence_band_maximum=data.get("vbm"), - # lastly, we also want to save the corrections made and directory it ran in - corrections=corrections, - directory=directory, ) def get_convergence_plot(self): @@ -340,6 +291,13 @@ def get_convergence_plot(self): # we return the figure object for the user return figure + def write_convergence_plot(self, directory: Path): + figure = self.get_convergence_plot() + figure.write_html( + directory / "simmate_convergence.html", + include_plotlyjs="cdn", + ) + def view_convergence_plot(self): figure = self.get_convergence_plot() figure.show(renderer="browser") diff --git a/src/simmate/database/base_data_types/static_energy.py b/src/simmate/database/base_data_types/static_energy.py index 0c429f5fd..c67870497 100644 --- a/src/simmate/database/base_data_types/static_energy.py +++ b/src/simmate/database/base_data_types/static_energy.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- -from pathlib import Path - from pymatgen.io.vasp.outputs import Vasprun from simmate.database.base_data_types import ( @@ -60,15 +58,7 @@ class Meta: """ @classmethod - def from_directory(cls, directory: Path): - # I assume the directory is from a vasp calculation, but I need to update - # this when I begin adding new calculators. - vasprun_filename = directory / "vasprun.xml" - vasprun = Vasprun(vasprun_filename) - return cls.from_vasp_run(vasprun) - - @classmethod - def from_vasp_run(cls, vasprun: Vasprun): + def from_vasp_run(cls, vasprun: Vasprun, as_dict: bool = False): # Takes a pymatgen VaspRun object, which is what's typically returned # from a simmate VaspWorkflow.run() call. @@ -91,51 +81,12 @@ def from_vasp_run(cls, vasprun: Vasprun): energy_fermi=data.get("efermi"), conduction_band_minimum=data.get("cbm"), valence_band_maximum=data.get("vbm"), + as_dict=as_dict, ) - static_energy.save() - return static_energy - def update_from_vasp_run( - self, - vasprun: Vasprun, - corrections: list, - directory: Path, - ): - # Takes a pymatgen VaspRun object, which is what's typically returned - # from a simmate VaspWorkflow.run() call. + # If we don't want the data as a dictionary, then we are saving a new + # object and can go ahead and do that here. + if not as_dict: + static_energy.save() - # The data is actually easier to access as a dictionary and everything - # we need is stored under the "output" key. - data = vasprun.as_dict()["output"] - # In a static energy calculation, there is only one ionic step so we - # grab that up front. - ionic_step = data["ionic_steps"][0] - - # Take our structure, energy, and forces to build all of our other - # fields for this datatable - # OPTIMIZE: this overwrites structure data, which should already be there. - # Is there a faster way to grab this data and update attributes? - new_kwargs = self.from_toolkit( - structure=vasprun.final_structure, - energy=ionic_step["e_wo_entrp"], - site_forces=ionic_step["forces"], - lattice_stress=ionic_step["stress"], - as_dict=True, - ) - for key, value in new_kwargs.items(): - setattr(self, key, value) - - # There is also extra data for the final structure that we save directly - # in the relaxation table. We use .get() in case the key isn't provided - self.band_gap = data.get("bandgap") - self.is_gap_direct = data.get("is_gap_direct") - self.energy_fermi = data.get("efermi") - self.conduction_band_minimum = data.get("cbm") - self.valence_band_maximum = data.get("vbm") - - # lastly, we also want to save the corrections made and directory it ran in - self.corrections = corrections - self.directory = directory - - # Now we have the relaxation data all loaded and can save it to the database - self.save() + return static_energy diff --git a/src/simmate/database/base_data_types/structure.py b/src/simmate/database/base_data_types/structure.py index 05c367f09..346e841ff 100644 --- a/src/simmate/database/base_data_types/structure.py +++ b/src/simmate/database/base_data_types/structure.py @@ -12,6 +12,8 @@ class Structure(DatabaseTable): class Meta: abstract = True + exclude_from_summary = ["structure_string", "elements"] + archive_fields = ["structure_string"] api_filters = dict( @@ -195,10 +197,13 @@ def filter_chemical_system(self, queryset, name, value): @classmethod def _from_toolkit( cls, - structure: ToolkitStructure, + structure: ToolkitStructure = None, as_dict: bool = False, **kwargs, ): + # if there isn't a structure, nothing is to be done. + if not structure: + return kwargs if as_dict else cls(**kwargs) # BUG: This is an old line and I can't remember why I have it. Once I # have implemented more unittests, consider deleting. This method is @@ -233,7 +238,7 @@ def _from_toolkit( nelements=len(structure.composition), elements=[str(e) for e in structure.composition.elements], chemical_system=structure.composition.chemical_system, - density=structure.density, + density=float(structure.density), density_atomic=structure.num_sites / structure.volume, volume=structure.volume, # 1e-27 is to convert from cubic angstroms to Liter and then 1e3 to diff --git a/src/simmate/database/base_data_types/test/test_calculation_db.py b/src/simmate/database/base_data_types/test/test_calculation_db.py index cebd6f429..51ccd37e4 100644 --- a/src/simmate/database/base_data_types/test/test_calculation_db.py +++ b/src/simmate/database/base_data_types/test/test_calculation_db.py @@ -15,6 +15,7 @@ def test_calculation_table(): calc_db = TestCalculation.from_run_context( run_id="example-id-123", workflow_name="example.test.workflow", + workflow_version="1.2.3", ) calc_db.save() @@ -23,21 +24,15 @@ def test_calculation_table(): calc_db2 = TestCalculation.from_run_context( run_id="example-id-123", workflow_name="example.test.workflow", + workflow_version="1.2.3", ) assert calc_db.id == calc_db2.id # grab prefect url for this id - assert ( - calc_db.prefect_cloud_link - == "https://cloud.prefect.io/simmate/flow-run/example-id-123" - ) - - # and test incorrect passing - with pytest.raises(Exception): - calc_db2 = TestCalculation.from_run_context( - run_id="example-id-123", - # workflow_name --> missing but required - ) + # assert ( + # calc_db.prefect_cloud_link + # == "https://cloud.prefect.io/simmate/flow-run/example-id-123" + # ) @pytest.mark.django_db @@ -46,11 +41,13 @@ def test_calculation_archives(): calc_db = TestCalculation.from_run_context( run_id="example-id-123", workflow_name="example.test.workflow", + workflow_version="1.2.3", ) calc_db.save() calc_db2 = TestCalculation.from_run_context( run_id="example-id-123", workflow_name="example.test.workflow", + workflow_version="1.2.3", ) calc_db2.save() diff --git a/src/simmate/database/base_data_types/test/test_dynamics_db.py b/src/simmate/database/base_data_types/test/test_dynamics_db.py index 263a811ed..bcc97b4ad 100644 --- a/src/simmate/database/base_data_types/test/test_dynamics_db.py +++ b/src/simmate/database/base_data_types/test/test_dynamics_db.py @@ -20,6 +20,7 @@ def test_static_energy_table(structure): structure_db = DynamicsRun.from_run_context( run_id="example-id-123", workflow_name="example.test.workflow", + workflow_version="1.2.3", structure=structure, ) structure_db.save() @@ -29,6 +30,7 @@ def test_static_energy_table(structure): structure_db2 = DynamicsRun.from_run_context( run_id="example-id-123", workflow_name="example.test.workflow", + workflow_version="1.2.3", ) assert structure_db.id == structure_db.id diff --git a/src/simmate/database/base_data_types/test/test_relaxation_db.py b/src/simmate/database/base_data_types/test/test_relaxation_db.py index afa980f20..b8b984780 100644 --- a/src/simmate/database/base_data_types/test/test_relaxation_db.py +++ b/src/simmate/database/base_data_types/test/test_relaxation_db.py @@ -20,6 +20,7 @@ def test_relaxation_table(structure): structure_db = Relaxation.from_run_context( run_id="example-id-123", workflow_name="example.test.workflow", + workflow_version="1.2.3", structure=structure, ) structure_db.save() @@ -29,6 +30,7 @@ def test_relaxation_table(structure): structure_db2 = Relaxation.from_run_context( run_id="example-id-123", workflow_name="example.test.workflow", + workflow_version="1.2.3", ) assert structure_db.id == structure_db2.id diff --git a/src/simmate/database/base_data_types/test/test_static_energy_db.py b/src/simmate/database/base_data_types/test/test_static_energy_db.py index d0041c239..d1a652518 100644 --- a/src/simmate/database/base_data_types/test/test_static_energy_db.py +++ b/src/simmate/database/base_data_types/test/test_static_energy_db.py @@ -18,6 +18,7 @@ def test_static_energy_table(structure, tmp_path): run_id="example-id-123", workflow_name="example.test.workflow", structure=structure, + workflow_version="1.2.3", ) structure_db.save() @@ -26,6 +27,7 @@ def test_static_energy_table(structure, tmp_path): structure_db2 = StaticEnergy.from_run_context( run_id="example-id-123", workflow_name="example.test.workflow", + workflow_version="1.2.3", ) assert structure_db.id == structure_db2.id diff --git a/src/simmate/database/base_data_types/thermodynamics.py b/src/simmate/database/base_data_types/thermodynamics.py index 71ad031f1..04ec2acb3 100644 --- a/src/simmate/database/base_data_types/thermodynamics.py +++ b/src/simmate/database/base_data_types/thermodynamics.py @@ -11,7 +11,7 @@ # BUG: This prints a tqdm error so we silence it here. with warnings.catch_warnings(record=True): - from pymatgen.analysis.phase_diagram import PDEntry, PhaseDiagram + from pymatgen.analysis.phase_diagram import PDEntry, PDPlotter, PhaseDiagram class Thermodynamics(DatabaseTable): @@ -90,7 +90,7 @@ class Meta: @classmethod def _from_toolkit( cls, - structure: ToolkitStructure, + structure: ToolkitStructure = None, energy: float = None, as_dict: bool = False, ): @@ -102,7 +102,7 @@ def _from_toolkit( data = ( dict( energy=energy, - energy_per_atom=energy / structure.num_sites, + energy_per_atom=energy / structure.num_sites if structure else None, energy_above_hull=None, is_stable=None, decomposes_to=None, @@ -119,24 +119,26 @@ def _from_toolkit( def update_chemical_system_stabilities(cls, chemical_system: str): # NOTE: I assume we are using a Child(Structure, Thermodynamics) + # Maybe check for the Structure mix-in as well. + # ------- This is a copy/paste of the get_phase_diagram method ------- # if we have a multi-element system, we need to include subsystems as # well. ex: Na --> Na, Cl, Na-Cl subsystems = get_chemical_subsystems(chemical_system) - # grab all entries for this chemical system entries = ( cls.objects.filter( + # workflow_name="relaxation.vasp.staged", chemical_system__in=subsystems, energy__isnull=False, # only completed calculations ) .only("energy", "formula_full") .all() ) - # convert to pymatgen PDEntries and build into PhaseDiagram object entries_pmg = [PDEntry(entry.formula_full, entry.energy) for entry in entries] phase_diagram = PhaseDiagram(entries_pmg) + # --------------------------------------------------------------------- # now go through the entries and update stability values for entry, entry_pmg in zip(entries, entries_pmg): @@ -205,11 +207,7 @@ def update_all_stabilities(cls): # ) @classmethod - def get_hull_diagram(cls, chemical_system: str): - - # BUG: This prints a tqdm error so we silence it here. - with warnings.catch_warnings(record=True): - from pymatgen.analysis.phase_diagram import PDEntry, PhaseDiagram, PDPlotter + def get_phase_diagram(cls, chemical_system: str): # if we have a multi-element system, we need to include subsystems as # well. ex: Na --> Na, Cl, Na-Cl @@ -230,6 +228,12 @@ def get_hull_diagram(cls, chemical_system: str): entries_pmg = [PDEntry(entry.formula_full, entry.energy) for entry in entries] phase_diagram = PhaseDiagram(entries_pmg) + return phase_diagram + + def show_hull_diagram(cls, chemical_system: str): + + phase_diagram = cls.get_phase_diagram(chemical_system) + plotter = PDPlotter(phase_diagram) # alternatively use backend="matplotlib" plotter.get_plot(label_unstable=False).show(renderer="browser") diff --git a/src/simmate/toolkit/diffusion/__init__.py b/src/simmate/toolkit/diffusion/__init__.py index 286d34ff4..b4f37c0f4 100644 --- a/src/simmate/toolkit/diffusion/__init__.py +++ b/src/simmate/toolkit/diffusion/__init__.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -from pymatgen.analysis.diffusion.neb.pathfinder import DistinctPathFinder - +from .distinct_path_finder import DistinctPathFinder from .migration_hop import MigrationHop from .migration_images import MigrationImages diff --git a/src/simmate/toolkit/diffusion/distinct_path_finder.py b/src/simmate/toolkit/diffusion/distinct_path_finder.py new file mode 100644 index 000000000..601d434e2 --- /dev/null +++ b/src/simmate/toolkit/diffusion/distinct_path_finder.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +from pathlib import Path + +from pymatgen.analysis.diffusion.neb.pathfinder import DistinctPathFinder as PymatgenDPF + + +class DistinctPathFinder(PymatgenDPF): + def write_all_migration_hops(self, directory: Path): + # We write all the path files so users can visualized them if needed + filename = directory / "all_migration_hops.cif" + self.write_all_paths(filename, nimages=10) + migration_hops = self.get_paths() + for i, migration_hop in enumerate(migration_hops): + number = str(i).zfill(2) # converts numbers like 2 to "02" + # the files names here will be like "migration_hop_02.cif" + migration_hop.write_path( + directory / f"migration_hop_{number}.cif", + nimages=10, # this is just for visualization + ) diff --git a/src/simmate/toolkit/diffusion/migration_hop.py b/src/simmate/toolkit/diffusion/migration_hop.py index 68e218fb3..67735bc70 100644 --- a/src/simmate/toolkit/diffusion/migration_hop.py +++ b/src/simmate/toolkit/diffusion/migration_hop.py @@ -51,8 +51,8 @@ def from_database_dict(cls, migration_hop: dict): ex: { - "migration_hop_table": "MITMigrationHop", - "migration_hop_id": 1, + "database_table": "MITMigrationHop", + "database_id": 1, } """ @@ -62,16 +62,16 @@ def from_database_dict(cls, migration_hop: dict): from simmate.database import connect from simmate.website.workflows import models as all_datatables - datatable_str = migration_hop["migration_hop_table"] + datatable_str = migration_hop["database_table"] if hasattr(all_datatables, datatable_str): datatable = getattr(all_datatables, datatable_str) else: datatable = import_string(datatable_str) # for now I only support migration_hop_id - migration_hop_db = datatable.objects.get(id=migration_hop["migration_hop_id"]) - migration_hop_cleaned = migration_hop_db.to_toolkit() - migration_hop_cleaned.database_entry = migration_hop_db + migration_hop_db = datatable.objects.get(id=migration_hop["database_id"]) + migration_hop_cleaned = migration_hop_db.to_migration_hop_toolkit() + migration_hop_cleaned.database_object = migration_hop_db return migration_hop_cleaned diff --git a/src/simmate/toolkit/diffusion/migration_images.py b/src/simmate/toolkit/diffusion/migration_images.py index c80240bee..f2cb09795 100644 --- a/src/simmate/toolkit/diffusion/migration_images.py +++ b/src/simmate/toolkit/diffusion/migration_images.py @@ -29,7 +29,26 @@ class MigrationImages(list): def __init__(self, structures: list[Structure]): # This init function does nothing except apply typing -- specifically, # it says that it expects a list of structures. - super().__init__(structures) + structures_cleaned = self._process_structures(structures) + super().__init__(structures_cleaned) + + @staticmethod + def _process_structures(structures: list[Structure]) -> list[Structure]: + """ + Remove any atom jumps across the cell. + """ + # This method is copied directly from pymatgen's MITNEBset and has not + # been refactored/reviewed yet. + input_structures = structures + structures = [input_structures[0]] + for s in input_structures[1:]: + prev = structures[-1] + for i, site in enumerate(s): + t = numpy.round(prev[i].frac_coords - site.frac_coords) + if numpy.any(numpy.abs(t) > 0.5): + s.translate_sites([i], t, to_unit_cell=False) + structures.append(s) + return structures def get_sum_structure(self, tolerance: float = 1e-3): """ diff --git a/src/simmate/website/workflows/test/test_workflows_views.py b/src/simmate/website/workflows/test/test_workflows_views.py index 9fc1871f6..9d9f310a5 100644 --- a/src/simmate/website/workflows/test/test_workflows_views.py +++ b/src/simmate/website/workflows/test/test_workflows_views.py @@ -42,10 +42,15 @@ def test_workflow_detail_view(client, workflow_name): if workflow_name in [ "restart.simmate.automatic", "electronic-structure.vasp.matproj-full", + "electronic-structure.vasp.matproj-hse-full", "structure-prediction.python.fixed-composition", "structure-prediction.python.new-individual", "structure-prediction.python.variable-composition", "structure-prediction.python.binary-composition", + "diffusion.vasp.neb-all-paths-mit", + "diffusion.vasp.neb-from-endpoints-mit", + "diffusion.vasp.neb-from-images-mit", + "diffusion.vasp.neb-single-path-mit", ]: return diff --git a/src/simmate/website/workflows/views.py b/src/simmate/website/workflows/views.py index 470f7a29d..bf4eda521 100644 --- a/src/simmate/website/workflows/views.py +++ b/src/simmate/website/workflows/views.py @@ -47,11 +47,11 @@ def workflows_all(request): "often involves iteratively evaluating the energy/forces of structure at " "specific temperature (or temperature ramp)." ), - "diffusion": ( - "These workflows evaluate the diffusion of an atom through a material. " - "At this time, these workflows are entirely Nudged-Elastic-Band (NEB) " - "calculations." - ), + # "diffusion": ( + # "These workflows evaluate the diffusion of an atom through a material. " + # "At this time, these workflows are entirely Nudged-Elastic-Band (NEB) " + # "calculations." + # ), } # now let's put the data and template together to send the user diff --git a/src/simmate/workflow_engine/s3_workflow.py b/src/simmate/workflow_engine/s3_workflow.py index f2962566c..562b65e58 100644 --- a/src/simmate/workflow_engine/s3_workflow.py +++ b/src/simmate/workflow_engine/s3_workflow.py @@ -91,7 +91,7 @@ def run_config( command: str = None, is_restart: bool = False, **kwargs, - ): + ) -> dict: """ Runs the entire staged task (setup, execution, workup), which includes supervising during execution. @@ -180,13 +180,22 @@ def run_config( # run the workup stage of the task. This is where the data/info is pulled # out from the calculation and is thus our "result". - result = cls.workup(directory=directory) + extra_results = cls.workup(directory=directory) or {} + + # Make sure the user is returning a compatible result from the workup + # method. + if not isinstance(extra_results, dict): + raise Exception( + "When defining a custom `workup` method, you must return a dictionary " + "(or None). This is so `corrections` can be added to your dictionary " + "and also the dictionary is used to update database columns when " + " `use_database=True`" + ) # Return our final information as a dictionary result = { - "result": result, "corrections": corrections, - "directory": directory, + **extra_results, } return result diff --git a/src/simmate/workflow_engine/test/test_s3_workflow.py b/src/simmate/workflow_engine/test/test_s3_workflow.py index 65ca871bc..3d272d708 100644 --- a/src/simmate/workflow_engine/test/test_s3_workflow.py +++ b/src/simmate/workflow_engine/test/test_s3_workflow.py @@ -83,7 +83,7 @@ def terminate_job(self, directory, **kwargs): # @pytest.mark.prefect_db -def test_s3workflow_methods(): +def test_s3workflow_methods(tmp_path): class Customized__Testing__DummyWorkflow(S3Workflow): command = "echo dummy" use_database = False @@ -96,14 +96,13 @@ class Customized__Testing__DummyWorkflow(S3Workflow): workflow.show_config() # a print statment w. nothing else to check # Test basic run - state = workflow.run() + state = workflow.run(directory=tmp_path) result = state.result() assert state.is_completed() - assert result["directory"].exists() - shutil.rmtree(result["directory"]) + assert result == {"corrections": []} -def test_s3workflow_1(): +def test_s3workflow_1(tmp_path): # run a basic task w.o. any handlers or monitoring class Customized__Testing__DummyWorkflow(S3Workflow): @@ -111,16 +110,9 @@ class Customized__Testing__DummyWorkflow(S3Workflow): use_database = False monitor = False - output = Customized__Testing__DummyWorkflow.run_config() + output = Customized__Testing__DummyWorkflow.run_config(directory=tmp_path) - assert output["result"] == None - assert output["corrections"] == [] - - # make sure that a "simmate-task-*" directory was created - assert output["directory"].exists() - - # and delete that directory - output["directory"].rmdir() + assert output == {"corrections": []} def test_s3workflow_2(tmp_path): @@ -138,11 +130,8 @@ class Customized__Testing__DummyWorkflow(S3Workflow): ] # use the temporary directory - assert Customized__Testing__DummyWorkflow.run_config(directory=tmp_path) == { - "result": None, - "corrections": [], - "directory": tmp_path, - } + output = Customized__Testing__DummyWorkflow.run_config(directory=tmp_path) + assert output == {"corrections": []} def test_s3workflow_3(tmp_path): diff --git a/src/simmate/workflow_engine/workflow.py b/src/simmate/workflow_engine/workflow.py index bf5bc2170..f3c684dfa 100644 --- a/src/simmate/workflow_engine/workflow.py +++ b/src/simmate/workflow_engine/workflow.py @@ -59,14 +59,16 @@ class Workflow: """ Whether to use Simmate database features or not. - This includes calling the `_register_calculation` and `_save_to_database` - methods attached to this workflow. + This includes calling the `_register_calculation` and + `_update_database_with_results` methods attached to this workflow. `_register_calculation` will save a database entry before the workflow starts. This is useful to keep track of workflows that have been submitted/started but haven't finished yet. - `_save_to_database` saves the output of the `workup` method to the database. + `_update_database_with_results` saves the output of the `workup` method + to the database entry -- and this the same entry that was created by + `_register_calculation`. """ _parameter_methods = ["run_config", "_run_full"] @@ -151,10 +153,30 @@ def _run_full( source=source, **kwargs, ) - result = cls.run_config(**kwargs_cleaned) + + # Finally run the core part of the workflow. This should return a + # dictionary object if we have "use_database=True", but can be + # any python object if "use_database=False" + results = cls.run_config(**kwargs_cleaned) + + # save the result to the database if cls.use_database: - result["calculation_id"] = cls._save_to_database( - result, + + # make sure the workflow is returning a dictionary that be used + # to update the database columns. None is also allowed as it + # represents an empty dictionary + if not isinstance(results, dict) and results != None: + raise Exception( + "When using a database table, your `run_config` method must " + "return a dictionary object. The dictionary is used to " + "update columns in your table entry and is therefore a " + "required format. If you do not want to save to the database " + "(and avoid this message), set `use_database=False`" + ) + logging.info("Saving to database and writing outputs") + database_entry = cls._update_database_with_results( + results=results if results != None else {}, + directory=kwargs_cleaned["directory"], run_id=kwargs_cleaned["run_id"], ) @@ -164,8 +186,12 @@ def _run_full( logging.info("Compressing result to a ZIP file.") make_archive(kwargs_cleaned["directory"]) + # If we made it this far, we successfully completed the workflow run logging.info(f"Completed {cls.name_full}") - return result + + # If we are using the database, then we return the database object. + # Otherwise, we want to return the original result from run_config + return database_entry if cls.use_database else results @classmethod def run_cloud( @@ -193,7 +219,7 @@ def run_cloud( logging.info(f"Submitting new run of `{cls.name_full}` to cloud") # To help with tracking the flow in cloud, we create the flow_id up front. - kwargs["run_id"] = kwargs.get("run_id", None) or cls._get_run_id() + kwargs["run_id"] = kwargs.get("run_id", None) or cls._get_new_run_id() run_id = kwargs["run_id"] # just for easy reference below # If we are submitting using a filename, we don't want to @@ -216,15 +242,12 @@ def run_cloud( tags=tags or cls.tags, **parameters_serialized, ) + state.run_id = run_id # attach the run id as an extra logging.info(f"Successfully submitted (workitem_id={state.pk})") # If the user wants the future, return that instead of the run_id - if return_state: - state.run_id = run_id # attach the run id as an extra - return state - - return run_id + return state if return_state else run_id @classmethod def run_config(cls, **kwargs) -> any: @@ -327,11 +350,11 @@ def database_table(cls) -> Calculation: """ The database table where calculation information (such as the run_id) is stored. The table should use `simmate.database.base_data_types.Calculation` - - In many cases, this table will contain all of the results you need. However, - pay special attention to NestedWorkflows, where your results are often tied - to a final task. + as one of its mix-ins. """ + # OPTIMIZE: a mapping dictionary or some standardized way to name + # database tables would simplify this. + flow_type = cls.name_type flow_preset = cls.name_preset @@ -361,11 +384,11 @@ def database_table(cls) -> Calculation: return DynamicsRun elif flow_type == "diffusion": - if "from-images" in flow_preset: - from simmate.database.base_data_types import MigrationImage + if "from-images" in flow_preset or "single-path" in flow_preset: + from simmate.database.base_data_types import MigrationHop - return MigrationImage - else: + return MigrationHop + elif "all-paths" in flow_preset: from simmate.database.base_data_types import DiffusionAnalysis return DiffusionAnalysis @@ -373,38 +396,76 @@ def database_table(cls) -> Calculation: from simmate.database.base_data_types import CustomizedCalculation return CustomizedCalculation - else: - raise NotImplementedError("Unable to detect proper database table") + + raise NotImplementedError( + "Unable to detect proper database table. Are you sure your workflow " + "should be using a table? If not, set `use_database=False` on your " + "workflow as shown in the 'basic' example workflow from the guides." + ) @classmethod @property def all_results(cls): # -> SearchResults - # BUG: pdoc raises an error because name_full fails. - try: - return cls.database_table.objects.filter(workflow_name=cls.name_full).all() - except: - return + """ + Filters results from the database table down to the results from this + workflow (i.e. matching workflow_name) + """ + return cls.database_table.objects.filter(workflow_name=cls.name_full).all() @classmethod - def _save_to_database(cls, result: any, run_id: str): + def _update_database_with_results( + cls, + results: dict, + run_id: str, + directory: Path, + ) -> Calculation: + """ + Take the output of the `run_config` and any extra information and + saves it to the database. - # split our results and corrections (which are given as a dict) into - # separate variables - vasprun = result["result"] - corrections = result["corrections"] - directory = result["directory"] + An output summary is also written to file for quick viewing. + """ # load the calculation entry for this workflow run. This should already # exist thanks to the load_input_and_register task. calculation = cls.database_table.from_run_context( run_id=run_id, workflow_name=cls.name_full, + workflow_version=cls.version, ) - # now update the calculation entry with our results - calculation.update_from_vasp_run(vasprun, corrections, directory) + # Now update the calculation entry with our results. Typically, all of this + # is handled by the calculation table's "update_from" methods, but in + # rare cares, we may want to attach an update method directly to the + # workflow class. I can only imagine this is used when... + # (1) workflow attributes are important during the update + # (2) when several workflows share a table and need to isolate + # their workup method (e.g. the MigrationHop table for NEB) + if hasattr(cls, "update_database_from_results"): + # The attribute can also be set to false to disable updates + if cls.update_database_from_results: + cls.update_database_from_results( + calculation=calculation, + results=results, + directory=directory, + ) + # Otherwise we hand this off to the database object + else: + calculation.update_from_results( + results=results, + directory=directory, + ) + + # write the output summary to file + calculation.write_output_summary(directory) + # TODO: consider making this optional to improve speedup + + return calculation - return calculation.id + @classmethod + def load_completed_calc(cls, directory: Path): + # TODO: maybe load the yaml file to get extra kwargs, run_id, etc. + return cls.database_table.from_directory(directory) # ------------------------------------------------------------------------- # Properties that enforce the naming convention for workflows @@ -715,7 +776,7 @@ def _load_input_and_register(cls, **parameters: any) -> dict: # and also see which structures/runs have been submitted aready. parameters_cleaned["run_id"] = ( - parameters_cleaned.get("run_id", None) or cls._get_run_id() + parameters_cleaned.get("run_id", None) or cls._get_new_run_id() ) if cls.use_database: @@ -771,7 +832,7 @@ def _load_input_and_register(cls, **parameters: any) -> dict: return parameters_cleaned @staticmethod - def _get_run_id(): + def _get_new_run_id(): """ Generates a random id to use as a workflow run id. @@ -796,6 +857,14 @@ def _parameters_to_register(cls) -> list[str]: table_columns = cls.database_table.get_column_names() + # as an extra, we need to check for relations and add also check for + # "_id" added on to the name in case we want to register the new entry + # with this relation. An example of this is "diffusion_analysis_id" + # which is a related object and column. + for field in cls.database_table._meta.get_fields(): + if field.is_relation: # and isinstance(ForeignKey) + table_columns.append(f"{field.name}_id") + for parameter in cls.parameter_names: if parameter in table_columns: parameters_to_register.append(parameter) @@ -821,41 +890,10 @@ def _register_calculation(cls, **kwargs) -> Calculation: `run_prefect_cloud` method and `load_input_and_register` task. """ - # We first need to grab the database table where we want to register - # the calculation run to. We can grab the table from either... - # 1. the database_table attribute - # 2. flow_context --> flow_name --> flow --> then grab its database_table - - # If this method is being called on the base Workflow class, that - # means we are trying to register a calculation from within a flow - # context -- where the context has information such as the workflow - # we are using (and the database table linked to that workflow). - if cls == Workflow: - raise Exception("Checking if this method is ever used") - - from prefect.context import FlowRunContext - - run_context = FlowRunContext.get() - workflow = run_context.flow.simmate_workflow - database_table = workflow.database_table - - # Otherwise we should be using the subclass Workflow that has the - # database_table property set. - else: - workflow = cls # we have the workflow class already - database_table = cls.database_table - - # Registration is only possible if a table is provided. Some - # special-case workflows don't store calculation information bc the flow - # is just a quick python analysis. - if not database_table: - logging.warning("No database table found. Skipping registration.") - return - # grab the registration kwargs from the parameters provided and then # convert them to a python object format for the database method register_kwargs = { - key: kwargs.get(key, None) for key in workflow._parameters_to_register + key: kwargs.get(key, None) for key in cls._parameters_to_register } register_kwargs_cleaned = cls._deserialize_parameters( add_defaults_from_attr=False, **register_kwargs @@ -873,14 +911,16 @@ def _register_calculation(cls, **kwargs) -> Calculation: # back to json before saving to the database. if "workflow_base" in register_kwargs_cleaned: parameters_serialized = cls._serialize_parameters(**register_kwargs_cleaned) - calculation = database_table.from_run_context( + calculation = cls.database_table.from_run_context( workflow_name=cls.name_full, + workflow_version=cls.version, **parameters_serialized, ) else: # load/create the calculation for this workflow run - calculation = database_table.from_run_context( + calculation = cls.database_table.from_run_context( workflow_name=cls.name_full, + workflow_version=cls.version, **register_kwargs_cleaned, ) @@ -996,7 +1036,7 @@ def _deserialize_parameters( # the class attribute. if add_defaults_from_attr: for parameter in cls.parameter_names: - if parameters.get(parameter, None) == None and hasattr(cls, parameter): + if parameters.get(parameter, None) is None and hasattr(cls, parameter): parameters_cleaned[parameter] = getattr(cls, parameter) # The remaining checks look to intialize input to toolkit objects using @@ -1005,41 +1045,26 @@ def _deserialize_parameters( # potentially grab the from_dynamic method on the fly -- rather than # doing these repeated steps here. - structure = parameters.get("structure", None) - if structure: - parameters_cleaned["structure"] = Structure.from_dynamic(structure) - else: - parameters_cleaned.pop("structure", None) - - if "composition" in parameters.keys(): - migration_hop = Composition.from_dynamic(parameters["composition"]) - parameters_cleaned["composition"] = migration_hop - - if "structures" in parameters.keys(): - structure_filenames = parameters["structures"].split(";") - parameters_cleaned["structures"] = [ - Structure.from_dynamic(file) for file in structure_filenames - ] - - if "migration_hop" in parameters.keys(): - migration_hop = MigrationHop.from_dynamic(parameters["migration_hop"]) - parameters_cleaned["migration_hop"] = migration_hop - - if "migration_images" in parameters.keys(): - migration_images = MigrationImages.from_dynamic( - parameters["migration_images"] - ) - parameters_cleaned["migration_images"] = migration_images + parameter_mappings = { + "structure": Structure, + "composition": Composition, + "migration_hop": MigrationHop, + "migration_images": MigrationImages, + "supercell_start": Structure, + "supercell_end": Structure, + } - if "supercell_start" in parameters.keys(): - parameters_cleaned["supercell_start"] = Structure.from_dynamic( - parameters["supercell_start"] - ) + for parameter, target_class in parameter_mappings.items(): + if parameter in parameters.keys(): + parameter_orig = parameters.get(parameter, None) + parameters_cleaned[parameter] = target_class.from_dynamic( + parameter_orig + ) - if "supercell_end" in parameters.keys(): - parameters_cleaned["supercell_end"] = Structure.from_dynamic( - parameters["supercell_end"] - ) + # directory and source are two extra parameters that cant be used in the + # mapping above because they don't have a `from_dynamic` method. + # Note these also pull from 'parameters_cleaned' as they might have been + # populated during registration. if parameters.get("directory", None): parameters_cleaned["directory"] = Path(parameters_cleaned["directory"]) diff --git a/src/simmate/workflows/test/test_workflows_utilities.py b/src/simmate/workflows/test/test_workflows_utilities.py index fb8fcb8aa..689137c87 100644 --- a/src/simmate/workflows/test/test_workflows_utilities.py +++ b/src/simmate/workflows/test/test_workflows_utilities.py @@ -141,6 +141,7 @@ def test_get_custom_workflow(tmp_path): def test_get_unique_paramters(): + assert get_unique_parameters() == [ "angle_tolerance", "chemical_system", @@ -158,18 +159,22 @@ def test_get_unique_paramters(): "is_restart", "limit_best_survival", "max_atoms", + "max_path_length", "max_structures", + "max_supercell_atoms", "migrating_specie", "migration_hop", - "migration_hop_id", "migration_images", "min_atoms", "min_length", "min_structures_exact", + "min_supercell_atoms", + "min_supercell_vector_lengths", "nfirst_generation", "nimages", "nsteadystate", "nsteps", + "percolation_mode", "run_id", "search_id", "selector_kwargs", @@ -191,6 +196,7 @@ def test_get_unique_paramters(): "temperature_start", "time_step", "updated_settings", + "vacancy_mode", "validator_kwargs", "validator_name", "workflow_base",