diff --git a/benchmarks/creators/00_create_and_log_times.py b/benchmarks/creators/00_create_and_log_times.py index d92a6b5ba..c72446006 100644 --- a/benchmarks/creators/00_create_and_log_times.py +++ b/benchmarks/creators/00_create_and_log_times.py @@ -114,6 +114,7 @@ def time_test_creation(creator_class, creator_kwargs): from simmate.toolkit.creators.structure.third_party.xtalopt import XtaloptStructure RandomSymStructure.name = "Simmate" +# RandomSymStructure.name = "Simmate (strict)" XtaloptStructure.name = "XtalOpt" AseStructure.name = "ASE" PyXtalStructure.name = "PyXtal" diff --git a/benchmarks/creators/01_creation_times_plot.py b/benchmarks/creators/01_creation_times_plot.py index 040b3c596..967e5840f 100644 --- a/benchmarks/creators/01_creation_times_plot.py +++ b/benchmarks/creators/01_creation_times_plot.py @@ -9,6 +9,7 @@ CREATORS_TO_TEST = [ "Simmate", + "Simmate (strict)", # 0.75 packing factor for dist cutoffs "XtalOpt", "ASE", "PyXtal", diff --git a/benchmarks/creators/02_submit_calcs.py b/benchmarks/creators/02_submit_calcs.py index 396863925..cc6b7f0eb 100644 --- a/benchmarks/creators/02_submit_calcs.py +++ b/benchmarks/creators/02_submit_calcs.py @@ -23,6 +23,7 @@ CREATORS_TO_TEST = [ # "Simmate", done + "Simmate (strict)", # "XtalOpt", done # "ASE", done # "PyXtal", done diff --git a/benchmarks/creators/04_calc_time.py b/benchmarks/creators/04_calc_time.py index 5cbc02751..262807677 100644 --- a/benchmarks/creators/04_calc_time.py +++ b/benchmarks/creators/04_calc_time.py @@ -88,6 +88,7 @@ ), yaxis=dict( title_text="Calculation time (min)", + type="log", ticks="outside", tickwidth=2, showline=True, @@ -106,5 +107,19 @@ ) fig = go.Figure(data=subplots, layout=layout) +fig.update_xaxes( + categoryorder="array", + categoryarray=[ + "Fe", + "Si", + "C", + "TiO2", + "SiO2", + "Al2O3", + "Si2N2O", + "SrSiN2", + "MgSiO3", + ], +) plot(fig, config={"scrollZoom": True}) diff --git a/benchmarks/creators/05_fingerprint_initial.py b/benchmarks/creators/05_fingerprint_initial.py index 0c45cb1a2..35b7dbf49 100644 --- a/benchmarks/creators/05_fingerprint_initial.py +++ b/benchmarks/creators/05_fingerprint_initial.py @@ -33,6 +33,7 @@ CREATORS_TO_TEST = [ "Simmate", + "Simmate (strict)", "XtalOpt", "ASE", "PyXtal", diff --git a/docs/change_log.md b/docs/change_log.md index 701152042..9b46ffcbf 100644 --- a/docs/change_log.md +++ b/docs/change_log.md @@ -76,7 +76,8 @@ There is one key exception to the rules above -- and that is with `MAJOR`=0 rele - add `show-stats`, `delete-finished`, and `delete-all` commands to `workflow-engine` - add `Cluster` base class + commands that allow submitting a steady-state cluster via subprocesses or slurm - add `started_at`, `created_at`, `total_time`, and `queue_time` columns to `Calculation` tables - +- add `exlcude_from_archives` field to workflows to optionally delete files when compressing outputs to zip archives +- various improvements added for evolutionary search workflows, such as parameter optimization, new output files, and website views -------------------------------------------------------------------------------- diff --git a/docs/getting_started/evolutionary_search/quick_start.md b/docs/getting_started/evolutionary_search/quick_start.md index 10763934d..1a59d3a22 100644 --- a/docs/getting_started/evolutionary_search/quick_start.md +++ b/docs/getting_started/evolutionary_search/quick_start.md @@ -9,7 +9,7 @@ In this tutorial, you will learn how to submit an evolutionary search and other from different computers. Your calculations will be **slower** and **error-prone** with sqlite. - If you are seeing the error `Database Connection Closed`, then you have + If you are seeing the error `database is locked`, then you have exceeded the capabilities of sqlite. !!! warning diff --git a/src/simmate/calculators/vasp/workflows/base.py b/src/simmate/calculators/vasp/workflows/base.py index 72f772723..72215c5c3 100644 --- a/src/simmate/calculators/vasp/workflows/base.py +++ b/src/simmate/calculators/vasp/workflows/base.py @@ -29,6 +29,7 @@ class VaspWorkflow(S3Workflow): _parameter_methods = S3Workflow._parameter_methods + ["_get_clean_structure"] required_files = ["INCAR", "POTCAR", "POSCAR"] + exlcude_from_archives = ["POTCAR"] command: str = "vasp_std > vasp.out" """ diff --git a/src/simmate/calculators/vasp/workflows/relaxation/staged.py b/src/simmate/calculators/vasp/workflows/relaxation/staged.py index bbb3079b6..53201df36 100644 --- a/src/simmate/calculators/vasp/workflows/relaxation/staged.py +++ b/src/simmate/calculators/vasp/workflows/relaxation/staged.py @@ -1,32 +1,17 @@ # -*- coding: utf-8 -*- import math +from functools import cache +from pathlib import Path +import numpy import plotly.graph_objects as plotly_go from plotly.subplots import make_subplots -from simmate.calculators.vasp.workflows.relaxation.quality00 import ( - Relaxation__Vasp__Quality00, -) -from simmate.calculators.vasp.workflows.relaxation.quality01 import ( - Relaxation__Vasp__Quality01, -) -from simmate.calculators.vasp.workflows.relaxation.quality02 import ( - Relaxation__Vasp__Quality02, -) -from simmate.calculators.vasp.workflows.relaxation.quality03 import ( - Relaxation__Vasp__Quality03, -) -from simmate.calculators.vasp.workflows.relaxation.quality04 import ( - Relaxation__Vasp__Quality04, -) -from simmate.calculators.vasp.workflows.static_energy.quality04 import ( - StaticEnergy__Vasp__Quality04, -) +from simmate.toolkit import Structure from simmate.visualization.plotting import PlotlyFigure from simmate.workflow_engine import Workflow - -# from simmate.calculators.vasp.database.relaxation import StagedRelaxation +from simmate.workflows.utilities import get_workflow class Relaxation__Vasp__Staged(Workflow): @@ -34,39 +19,45 @@ class Relaxation__Vasp__Staged(Workflow): Runs a series of increasing-quality relaxations and then finishes with a single static energy calculation. - This is therefore a "Nested Workflow" made of the following smaller workflows: - - - relaxation.vasp.quality00 - - relaxation.vasp.quality01 - - relaxation.vasp.quality02 - - relaxation.vasp.quality03 - - relaxation.vasp.quality04 - - static-energy.vasp.quality04 - This workflow is most useful for randomly-created structures or extremely large supercells. More precise relaxations+energy calcs should be done afterwards because ettings are still below MIT and Materials Project quality. """ + exlcude_from_archives = [ + "CHG", + "CHGCAR", + "DOSCAR", + "EIGENVAL", + "IBZKPT", + "OSZICAR", + "OUTCAR", + "PCDAT", + "POTCAR", + "REPORT", + "WAVECAR", + "XDATCAR", + ] + description_doc_short = "runs a series of relaxations (00-04 quality)" - subworkflows = [ - Relaxation__Vasp__Quality00, - Relaxation__Vasp__Quality01, - Relaxation__Vasp__Quality02, - Relaxation__Vasp__Quality03, - Relaxation__Vasp__Quality04, - StaticEnergy__Vasp__Quality04, + subworkflow_names = [ + "relaxation.vasp.quality00", + "relaxation.vasp.quality01", + "relaxation.vasp.quality02", + "relaxation.vasp.quality03", + "relaxation.vasp.quality04", + "static-energy.vasp.quality04", ] @classmethod def run_config( cls, - structure, - command=None, - source=None, - directory=None, - copy_previous_directory=False, + structure: Structure, + command: str = None, + source: dict = None, + directory: Path = None, + copy_previous_directory: bool = False, **kwargs, ): @@ -81,7 +72,6 @@ def run_config( # The remaining tasks continue and use the past results as an input for i, current_task in enumerate(cls.subworkflows[1:]): - preceding_task = cls.subworkflows[i] # will be one before because of [:1] state = current_task.run( structure=result, # this is the result of the last run command=command, @@ -103,13 +93,20 @@ def run_config( return final_result @classmethod - def get_energy_series(cls, **filter_kwargs): + @property + @cache + def subworkflows(cls): + return [get_workflow(name) for name in cls.subworkflow_names] + + @classmethod + def get_series(cls, value: str, **filter_kwargs): directories = ( cls.all_results.filter(**filter_kwargs) .values_list("directory", flat=True) .all() ) + # OPTIMIZE: This is a query for EACH entry which is very inefficient all_energy_series = [] for directory in directories: energy_series = [] @@ -118,12 +115,12 @@ def get_energy_series(cls, **filter_kwargs): workflow_name=subflow.name_full, directory__startswith=directory, energy_per_atom__isnull=False, - ).only("energy_per_atom") + ).values_list(value) if query.exists(): result = query.get() - energy_series.append(result.energy_per_atom) + energy_series.append(result[0]) else: - energy_series.append(None) + energy_series.append(numpy.nan) all_energy_series.append(energy_series) return all_energy_series @@ -136,7 +133,10 @@ def get_plot( workflow, # Relaxation__Vasp__Staged **filter_kwargs, ): - all_energy_series = workflow.get_energy_series(**filter_kwargs) + all_energy_series = workflow.get_series( + value="energy_per_atom", + **filter_kwargs, + ) figure = make_subplots( rows=math.ceil((len(workflow.subworkflows) - 1) / 3), @@ -192,7 +192,10 @@ def get_plot( **filter_kwargs, ): - all_energy_series = workflow.get_energy_series(**filter_kwargs) + all_energy_series = workflow.get_series( + value="energy_per_atom", + **filter_kwargs, + ) figure = plotly_go.Figure() @@ -229,6 +232,55 @@ def get_plot( return figure +class StagedSeriesTimes(PlotlyFigure): + + method_type = "classmethod" + + def get_plot( + workflow, # Relaxation__Vasp__Staged + **filter_kwargs, + ): + + all_time_series = workflow.get_series( + value="total_time", + **filter_kwargs, + ) + + figure = plotly_go.Figure() + + all_time_series = numpy.transpose(all_time_series) + all_time_series = all_time_series / 60 # convert to minutes + traces = [] + for i, times in enumerate(all_time_series): + + trace = plotly_go.Histogram( + x=times, + name=f"{workflow.subworkflows[i].name_full}", + ) + traces.append(trace) + + # add them to the figure in reverse, so that the first relaxations are + # in the front and not hidden + traces.reverse() + for trace in traces: + figure.add_trace(trace=trace) + + figure.update_layout( + barmode="overlay", + xaxis_title_text="Calculation time (min)", + yaxis_title_text="Structures (#)", + bargap=0.05, + legend=dict( + yanchor="top", + y=0.99, + xanchor="left", + x=0.01, + ), + ) + figure.update_traces(opacity=0.75) + return figure + + # register all plotting methods to the database table -for _plot in [StagedSeriesConvergence, StagedSeriesHistogram]: +for _plot in [StagedSeriesConvergence, StagedSeriesHistogram, StagedSeriesTimes]: _plot.register_to_class(Relaxation__Vasp__Staged) diff --git a/src/simmate/database/base_data_types/thermodynamics.py b/src/simmate/database/base_data_types/thermodynamics.py index 5addecc8a..97ddb223f 100644 --- a/src/simmate/database/base_data_types/thermodynamics.py +++ b/src/simmate/database/base_data_types/thermodynamics.py @@ -119,11 +119,16 @@ def _from_toolkit( return data if as_dict else cls(**data) @classmethod - def update_chemical_system_stabilities(cls, chemical_system: str): + def update_chemical_system_stabilities( + cls, + chemical_system: str, + workflow_name: str = None, + ): phase_diagram, entries, entries_pmg = cls.get_phase_diagram( chemical_system, return_entries=True, + workflow_name=workflow_name, ) # now go through the entries and update stability values @@ -162,7 +167,7 @@ def update_chemical_system_stabilities(cls, chemical_system: str): ) @classmethod - def update_all_stabilities(cls): + def update_all_stabilities(cls, workflow_name: str = None): # grab all unique chemical systems chemical_systems = cls.objects.values_list( @@ -174,48 +179,59 @@ def update_all_stabilities(cls): # C would be repeatedly updated through C, C-O, Y-C-F, etc. for chemical_system in track(chemical_systems): try: - cls.update_chemical_system_stabilities(chemical_system) + cls.update_chemical_system_stabilities( + chemical_system, + workflow_name, + ) except ValueError as exception: logging.warning(f"Failed for {chemical_system} with error: {exception}") - # BUG: can't use parallel=True as an input - # Because different systems may need to update a single one at the same - # time, errors will be thrown due to row locking. For example, Y-C and - # Sc-C system might both try to update a C structure at the same time - # and one will throw an error. - # - # from simmate.configuration.dask import batch_submit - # - # batch_submit( - # function=cls.update_chemical_system_stabilities, - # args_list=chemical_systems, - # batch_size=1000, - # ) - @classmethod def get_phase_diagram( cls, chemical_system: str, + workflow_name: str = None, return_entries: bool = False, ) -> PhaseDiagram: + if workflow_name is None and hasattr(cls, "workflow_name"): + raise Exception( + "This table contains results from multiple workflows, so you must " + "provide a workflow_name as an input to indicate which entries " + "should be loaded/updated." + ) + # if we have a multi-element system, we need to include subsystems as # well. ex: Na --> Na, Cl, Na-Cl subsystems = get_chemical_subsystems(chemical_system) # grab all entries for this chemical system - entries = ( - cls.objects.filter( - # workflow_name="relaxation.vasp.staged", - chemical_system__in=subsystems, - energy__isnull=False, # only completed calculations - ) - .only("energy", "formula_full") - .all() + entries = cls.objects.filter( + # workflow_name="relaxation.vasp.staged", + chemical_system__in=subsystems, + energy__isnull=False, # only completed calculations ) + # add an extra filter if provided + if workflow_name: + entries = entries.filter(workflow_name=workflow_name) + + # now make the queryy + entries = entries.only("id", "energy", "formula_full").all() # convert to pymatgen PDEntries and build into PhaseDiagram object - entries_pmg = [PDEntry(entry.formula_full, entry.energy) for entry in entries] + entries_pmg = [] + for entry in entries: + pde = PDEntry( + composition=entry.formula_full, + energy=entry.energy, + # name=entry.id, see bug below + ) + + # BUG: pymatgen grabs entry_id, when it should really be grabbing name. + # https://github.com/materialsproject/pymatgen/blob/de17dd84ba90dbf7a8ed709a33d894a4edb82d02/pymatgen/analysis/phase_diagram.py#L2926 + pde.entry_id = f"id={entry.id}" + entries_pmg.append(pde) + phase_diagram = PhaseDiagram(entries_pmg) return ( @@ -236,11 +252,13 @@ class HullDiagram(PlotlyFigure): def get_plot( table, # Thermodynamics + Structure table chemical_system: str, + workflow_name: str = None, ): - phase_diagram = table.get_phase_diagram(chemical_system) + phase_diagram = table.get_phase_diagram(chemical_system, workflow_name) - plotter = PDPlotter(phase_diagram) # alternatively use backend="matplotlib" + # alternatively use backend="matplotlib" + plotter = PDPlotter(phase_diagram, show_unstable=True) plot = plotter.get_plot(label_unstable=False) diff --git a/src/simmate/toolkit/base_data_types/composition.py b/src/simmate/toolkit/base_data_types/composition.py index 624e5b827..c6026653e 100644 --- a/src/simmate/toolkit/base_data_types/composition.py +++ b/src/simmate/toolkit/base_data_types/composition.py @@ -166,7 +166,8 @@ def distance_matrix_estimate( - `packing_factor` : Scaling value for the volume in relation to predict ionic radii. A value of 1 means the total volume will be exactly same as the sum - of all spheres. Larger values will give smaller volumes. Because this + of all spheres. Larger values corresponds to better packing (and + higher density) and therefore will give smaller volumes. Because this method is commonly used to define minimum possible distances for random structure creation, we make the default 0.5. diff --git a/src/simmate/toolkit/structure_prediction/evolution/database/binary_system.py b/src/simmate/toolkit/structure_prediction/evolution/database/binary_system.py index 1ef5a9cbf..e90c3656b 100644 --- a/src/simmate/toolkit/structure_prediction/evolution/database/binary_system.py +++ b/src/simmate/toolkit/structure_prediction/evolution/database/binary_system.py @@ -1,6 +1,16 @@ # -*- coding: utf-8 -*- +import logging +import traceback +import warnings +from pathlib import Path + from simmate.database.base_data_types import Calculation, table_column +from simmate.utilities import get_chemical_subsystems, get_directory + +# BUG: This prints a tqdm error so we silence it here. +with warnings.catch_warnings(record=True): + from pymatgen.analysis.phase_diagram import PhaseDiagram class BinarySystemSearch(Calculation): @@ -13,3 +23,233 @@ class Meta: max_atoms = table_column.IntegerField(null=True, blank=True) max_stoich_factor = table_column.IntegerField(null=True, blank=True) singleshot_sources = table_column.JSONField(default=list, null=True, blank=True) + + # DEV NOTE: many of the methods below are copy/pasted from the fixed + # composition table and functionality should be merged in the future. + + # ------------------------------------------------------------------------- + # Core methods that help grab key information about the search + # ------------------------------------------------------------------------- + + def to_toolkit(self) -> PhaseDiagram: + phase_diagram = self.individuals_datatable.get_phase_diagram( + chemical_system=self.chemical_system, + workflow_name=self.subworkflow.name_full, + ) + return phase_diagram + + @property + def chemical_system_cleaned(self): + # simply ordered elements in alphabetical order as that is how they + # are stored in the database + elements = [e for e in self.chemical_system.split("-")] + elements.sort() + chemical_system = "-".join(elements) + return chemical_system + + @property + def chemical_subsystems(self): + return get_chemical_subsystems(self.chemical_system_cleaned) + + @property + def subworkflow(self): + + from simmate.workflows.utilities import get_workflow + + if self.subworkflow_name == "relaxation.vasp.staged": + return get_workflow(self.subworkflow_name) + else: + raise Exception( + "Only `relaxation.vasp.staged` is supported in early testing" + ) + + @property + def individuals_datatable(self): + return self.subworkflow.database_table + + @property + def individuals(self): + return self.individuals_datatable.objects.filter( + chemical_system__in=self.chemical_subsystems, + workflow_name=self.subworkflow_name, + ) + + @property + def individuals_completed(self): + return self.individuals.filter(energy_per_atom__isnull=False) + + @property + def individuals_incomplete(self): + # If there is an energy_per_atom, we can treat the calculation as completed + return self.individuals.filter(energy_per_atom__isnull=True) + + @property + def stable_structures(self): + structures = self.individuals_completed.filter(energy_above_hull=0).to_toolkit() + return structures + + @property + def best_structure_for_each_composition(self): + # BUG: for sqlite, you can't use distinct. + # structures = ( + # self.individuals_completed.order_by("energy_above_hull") + # .distinct("formula_reduced") + # .to_toolkit() + # ) + + # Instead, I just use pymatgen. This is slower but still works + phase_diagram = self.to_toolkit() + + structures = [ + self.individuals.get(id=int(e.entry_id.split("=")[-1])) + for e in phase_diagram.qhull_entries + ] + return [s.to_toolkit() for s in structures] + + def update_stabilities(self): + self.individuals_datatable.update_chemical_system_stabilities( + chemical_system=self.chemical_system_cleaned, + workflow_name=self.subworkflow.name_full, + ) + + # ------------------------------------------------------------------------- + # Writing CSVs summaries and CIFs of best structures + # ------------------------------------------------------------------------- + + def write_output_summary(self, directory): + + # If the output fails to write, we have a non-critical issue that + # doesn't affect the search. We therefore don't want to raise an + # error here -- but instead warn the user and then continue the search + try: + + if not self.individuals_completed.exists(): + logging.info("No structures completed yet. Skipping output writing.") + return + + logging.info(f"Writing search summary to {directory}") + + super().write_output_summary(directory=directory) + + # update all chemical stabilites before creating the output files + self.update_stabilities() + + self.individuals_datatable.write_hull_diagram_plot( + chemical_system=self.chemical_system_cleaned, + workflow_name=self.subworkflow.name_full, + directory=directory, + ) + + stable_dir = get_directory(directory / "stable_structures") + self.write_stable_structures(stable_dir) + + all_comps_dir = get_directory(directory / "best_structure_per_composition") + self.write_stable_structures(all_comps_dir, include_best_metastable=True) + + except Exception as error: + + if ( + isinstance(error, ValueError) + and "no entries for the terminal elements" in error.args[0] + ): + logging.warning( + "The convex hull and structure stabilities cannot be calculated " + "without terminal elements. Either manually submit pure-element " + "structures to your subworkflow, or make sure you run your " + "search with singleshot sources active (the default) AND " + "your database populated with third-party data. Output files " + "will not be written until this is done." + ) + + else: + logging.warning( + "Failed to write the output summary. This issue will be silenced " + "to avoid stopping the search. But please report the following " + "error to our github: https://github.com/jacksund/simmate/issues/" + ) + + # prints the most recent exception traceback + traceback.print_exc() + + def write_stable_structures( + self, + directory: Path, + include_best_metastable: bool = False, + ): + + # if the directory is filled, we need to delete all the files + # before writing the new ones. + for file in directory.iterdir(): + try: + file.unlink() + except OSError: + logging.warning("Unable to delete a CIF file: {file}") + logging.warning( + "Updating the 'best structures' directory involves deleting " + "and re-writing all CIF files each cycle. If you have a file " + "open while this step occurs, then you'll see this warning." + "Close your file for this to go away." + ) + + structures = ( + self.stable_structures + if not include_best_metastable + else self.best_structure_for_each_composition + ) + + for rank, structure in enumerate(structures): + rank_cleaned = str(rank).zfill(2) # converts 1 to 01 + structure_filename = ( + directory + / f"rank-{str(rank_cleaned)}__id-{structure.database_object.id}.cif" + ) + structure.to("cif", structure_filename) + + def write_individuals_completed_full(self, directory: Path): + columns = self.individuals_datatable.get_column_names() + columns.remove("structure") + df = self.individuals_completed.defer("structure").to_dataframe(columns) + csv_filename = directory / "individuals_completed__ALLDATA.csv" + df.to_csv(csv_filename) + + def write_individuals_completed(self, directory: Path): + columns = [ + "id", + "energy_per_atom", + "finished_at", + "source", + "spacegroup__number", + ] + df = ( + self.individuals_completed.order_by(self.fitness_field) + .only(*columns) + .to_dataframe(columns) + ) + # label the index column + df.index.name = "rank" + + # make the timestamps easier to read + def format_date(date): + return date.strftime("%Y-%m-%d %H:%M:%S") + + df["finished_at"] = df.finished_at.apply(format_date) + + def format_parents(source): + return source.get("parent_ids", None) if source else None + + df["parent_ids"] = df.source.apply(format_parents) + + def format_source(source): + return ( + None + if not source + else source.get("creator", None) or source.get("transformation", None) + ) + + df["source"] = df.source.apply(format_source) + + # shorten the column name for easier reading + df.rename(columns={"spacegroup__number": "spacegroup"}, inplace=True) + + md_filename = directory / "individuals_completed.md" + df.to_markdown(md_filename) diff --git a/src/simmate/toolkit/structure_prediction/evolution/database/fixed_composition.py b/src/simmate/toolkit/structure_prediction/evolution/database/fixed_composition.py index 28ce4239f..ea5a083ae 100644 --- a/src/simmate/toolkit/structure_prediction/evolution/database/fixed_composition.py +++ b/src/simmate/toolkit/structure_prediction/evolution/database/fixed_composition.py @@ -7,6 +7,7 @@ import pandas import plotly.graph_objects as plotly_go +from pymatgen.analysis.structure_matcher import StructureMatcher from rich.progress import track from simmate.database.base_data_types import Calculation, table_column @@ -482,7 +483,7 @@ def get_best_individual_history(self): return best_history - def write_summary(self, directory: Path): + def write_output_summary(self, directory: Path): logging.info(f"Writing search summary to {directory}") super().write_output_summary(directory=directory) @@ -492,8 +493,11 @@ def write_summary(self, directory: Path): # error here -- but instead warn the user and then continue the search try: # calls all the key methods defined below - best_cifs_directory = get_directory(directory / "best_structures_cifs") + best_cifs_directory = get_directory(directory / "best_structures") self.write_best_structures(100, best_cifs_directory) + best_cifs_directory = get_directory(directory / "best_structures_unique") + self.write_best_structures(200, best_cifs_directory, remove_matching=True) + self.write_individuals_completed(directory=directory) self.write_individuals_completed_full(directory=directory) self.write_best_individuals_history(directory=directory) @@ -504,21 +508,9 @@ def write_summary(self, directory: Path): # BUG: This is only for "relaxation.vasp.staged", which the assumed # workflow for now. - composition = Composition(self.composition) - self.subworkflow.write_staged_series_convergence_plot( - directory=directory, - # See `individuals` method for why we use these filters - formula_reduced=composition.reduced_formula, - nsites__lte=composition.num_atoms, - energy_per_atom__isnull=False, - ) - self.subworkflow.write_staged_series_histogram_plot( - directory=directory, - # See `individuals` method for why we use these filters - formula_reduced=composition.reduced_formula, - nsites__lte=composition.num_atoms, - energy_per_atom__isnull=False, - ) + self.write_staged_series_convergence_plot(directory=directory) + self.write_staged_series_histogram_plot(directory=directory) + self.write_staged_series_times_plot(directory=directory) logging.info("Done writing summary.") @@ -578,7 +570,12 @@ def validator(self): # Writing CSVs summaries and CIFs of best structures # ------------------------------------------------------------------------- - def write_best_structures(self, nbest: int, directory: Path): + def write_best_structures( + self, + nbest: int, + directory: Path, + remove_matching: bool = False, + ): # if the directory is filled, we need to delete all the files # before writing the new ones. for file in directory.iterdir(): @@ -595,6 +592,12 @@ def write_best_structures(self, nbest: int, directory: Path): best = self.get_nbest_indiviudals(nbest) structures = best.only("structure", "id").to_toolkit() + + if remove_matching: + matcher = StructureMatcher() + groups = matcher.group_structures(structures) + structures = [group[0] for group in groups] + for rank, structure in enumerate(structures): rank_cleaned = str(rank).zfill(2) # converts 1 to 01 structure_filename = ( @@ -819,15 +822,15 @@ def get_plot(search: FixedCompositionSearch): # time is stored in seconds and we convert to minutes total_times = [e[0] / 60 for e in data] - queue_times = [e[1] / 60 for e in data] + # queue_times = [e[1] / 60 for e in data] figure = plotly_go.Figure() - hist_1 = plotly_go.Histogram(x=total_times, name="Total run time (min)") + hist_1 = plotly_go.Histogram(x=total_times) # , name="Total run time (min)" # hist_2 = plotly_go.Histogram(x=queue_times, name="Total queue time (min)") figure.add_trace(hist_1) # figure.add_trace(hist_2) figure.update_layout( - xaxis_title="Total time (min)", + xaxis_title="Total calculation time (min)", yaxis_title="Individuals (#)", barmode="overlay", ) @@ -860,11 +863,50 @@ def get_plot(search: FixedCompositionSearch): return figure +class StagedSeriesConvergence(PlotlyFigure): + def get_plot(search: FixedCompositionSearch): + composition = Composition(search.composition) + plot = search.subworkflow.get_staged_series_convergence_plot( + # See `individuals` method for why we use these filters + formula_reduced=composition.reduced_formula, + nsites__lte=composition.num_atoms, + energy_per_atom__isnull=False, + ) + return plot + + +class StagedSeriesHistogram(PlotlyFigure): + def get_plot(search: FixedCompositionSearch): + composition = Composition(search.composition) + plot = search.subworkflow.get_staged_series_histogram_plot( + # See `individuals` method for why we use these filters + formula_reduced=composition.reduced_formula, + nsites__lte=composition.num_atoms, + energy_per_atom__isnull=False, + ) + return plot + + +class StagedSeriesTimes(PlotlyFigure): + def get_plot(search: FixedCompositionSearch): + composition = Composition(search.composition) + plot = search.subworkflow.get_staged_series_times_plot( + # See `individuals` method for why we use these filters + formula_reduced=composition.reduced_formula, + nsites__lte=composition.num_atoms, + energy_per_atom__isnull=False, + ) + return plot + + # register all plotting methods to the database table for _plot in [ FitnessConvergence, Correctness, FitnessDistribution, SubworkflowTimes, + StagedSeriesConvergence, + StagedSeriesHistogram, + StagedSeriesTimes, ]: _plot.register_to_class(FixedCompositionSearch) diff --git a/src/simmate/toolkit/structure_prediction/evolution/workflows/binary_system.py b/src/simmate/toolkit/structure_prediction/evolution/workflows/binary_system.py index 909f8c967..8d16d45a9 100644 --- a/src/simmate/toolkit/structure_prediction/evolution/workflows/binary_system.py +++ b/src/simmate/toolkit/structure_prediction/evolution/workflows/binary_system.py @@ -30,6 +30,8 @@ class StructurePrediction__Toolkit__BinarySystem(Workflow): of a binary phase system (e.g Na-Cl or Y-C) """ + description_doc_short = "hull diagram for a two-element system (e.g. Na-Cl)" + database_table = BinarySystemSearch fixed_comp_workflow = StructurePrediction__Toolkit__FixedComposition @@ -42,18 +44,23 @@ def run_config( subworkflow_name: str = "relaxation.vasp.staged", subworkflow_kwargs: dict = {}, max_stoich_factor: int = 4, + nsteadystate: int = 40, directory: Path = None, singleshot_sources: list[str] = [ "third_parties", "prototypes", ], - **kwargs, # passed to fixed_comp_workflow + run_id: str = None, + **kwargs, ): # --------------------------------------------------------------------- # Setting up # --------------------------------------------------------------------- + # grab the calculation table linked to this workflow run + search_datatable = cls.database_table.objects.get(run_id=run_id) + subworkflow = get_workflow(subworkflow_name) # Grab the two elements that we are working with. Also if this raises @@ -127,7 +134,7 @@ def run_config( logging.info( f"Generated {len(structures_known)} structures from other databases" ) - write_and_submit_structures( + states_known = write_and_submit_structures( structures=structures_known, foldername=directory / "from_third_parties", workflow=subworkflow, @@ -156,13 +163,29 @@ def run_config( logging.info( f"Generated {len(structures_prototype)} structures from prototypes" ) - write_and_submit_structures( + states_prototype = write_and_submit_structures( structures=structures_prototype, foldername=directory / "from_prototypes", workflow=subworkflow, workflow_kwargs=subworkflow_kwargs, ) + # --------------------------------------------------------------------- + # Wait for singlshot submissions if there are many of them + # --------------------------------------------------------------------- + + all_submissions = states_prototype + states_known + if len(all_submissions) > (nsteadystate * 2): + number_to_wait_for = len(all_submissions) - nsteadystate - 20 + logging.info( + f"Waiting for at least {number_to_wait_for} singleshot " + "submissions to finish" + ) + for state in all_submissions[:number_to_wait_for]: + state.result() + + search_datatable.write_output_summary(directory) + # --------------------------------------------------------------------- # Starting search # --------------------------------------------------------------------- @@ -193,8 +216,8 @@ def run_config( # stopping conditions. # for n in range(1, 10): # min_structures_exact = int(5 * n) - # best_survival_cutoff = int(10 * n) - # max_structures = int(25 * n) + # best_survival_cutoff = int(20 * n) + # max_structures = int(30 * n) # # convergence_cutoff = 0.01 # print( # f"{n}\t{min_structures_exact}\t" @@ -242,8 +265,7 @@ def run_config( # Because we submitted all steady states above, we don't # need the other workflows to do these anymore. singleshot_sources=[], - # If set to True, then the current fixed composition will be - # written to fixed-compositon-logs - ###### OPTIMIZE --- set this to false in the future...? - # write_summary_files=False, ) + + # after each fixed-composition, we can reevaluate the hull diagram + search_datatable.write_output_summary(directory) diff --git a/src/simmate/toolkit/structure_prediction/evolution/workflows/fixed_composition.py b/src/simmate/toolkit/structure_prediction/evolution/workflows/fixed_composition.py index ba72fcdcb..bf47a390f 100644 --- a/src/simmate/toolkit/structure_prediction/evolution/workflows/fixed_composition.py +++ b/src/simmate/toolkit/structure_prediction/evolution/workflows/fixed_composition.py @@ -15,6 +15,8 @@ class StructurePrediction__Toolkit__FixedComposition(Workflow): number of sites. (e.g. Ca2N or Na4Cl4) """ + description_doc_short = "fixed number of sites (e.g. Ca2N or Na4Cl4)" + database_table = FixedCompositionSearch @classmethod @@ -50,7 +52,7 @@ def run_config( selector_kwargs: dict = {}, validator_name: str = "PartialRdfFingerprint", validator_kwargs: dict = { - "distance_tolerance": 0.001, + "distance_tolerance": 0.01, "cutoff": 10.0, "bin_size": 0.1, }, @@ -127,7 +129,7 @@ def run_config( # Write the output summary if there is at least one structure completed if write_summary_files: if search_datatable.individuals_completed.count() >= 1: - search_datatable.write_summary(directory) + search_datatable.write_output_summary(directory) else: search_datatable.write_individuals_incomplete(directory) diff --git a/src/simmate/toolkit/structure_prediction/evolution/workflows/new_individual.py b/src/simmate/toolkit/structure_prediction/evolution/workflows/new_individual.py index 6fe3dcb91..4c909bbb1 100644 --- a/src/simmate/toolkit/structure_prediction/evolution/workflows/new_individual.py +++ b/src/simmate/toolkit/structure_prediction/evolution/workflows/new_individual.py @@ -15,9 +15,13 @@ class StructurePrediction__Toolkit__NewIndividual(Workflow): Generates a new individual for an evolutionary search algorithm. Note, this workflow should not be called directly, but instead used within - higher level workflows (such as `fixed-composition`) + higher level workflows (such as `fixed-composition`). + + Users will rarely (if ever) need to call this workflow """ + description_doc_short = "a single structure submission for a search" + use_database = False @staticmethod @@ -30,6 +34,7 @@ def run_config( search_db = FixedCompositionSearch.objects.get(id=search_id) source_db = search_db.steadystate_sources.get(id=steadystate_source_id) + validator = search_db.validator # Check the stop condition of the search and see if this new individual # is even needed. This will catch when a search ends while a new @@ -45,15 +50,33 @@ def run_config( if source_db.is_transformation: transformer = source_db.to_toolkit() - parent_ids, new_structure = transformer.apply_from_database_and_selector( + output = transformer.apply_from_database_and_selector( selector=search_db.selector, datatable=search_db.individuals_completed, select_kwargs=dict( ranking_column=search_db.fitness_field, query_limit=200, # Smarter way to do this...? ), - validators=[search_db.validator], + validators=[validator], ) + + # if the source failed to create a structure, then we want to remove + # it to prevent repeated issues. + if output == False: + # TODO: consider more advanced logic for changing the steady + # state values of each source -- rather than just disabling + # them here. + logging.warning( + "Failed to create new individual with steady-state " + f"source {source_db.name}. Removing steady-state." + ) + source_db.nsteadystate_target = 0 + source_db.save() + shutil.rmtree(directory) + return + + # otherwise we have a successful output that we can use + parent_ids, new_structure = output source = { "transformation": source_db.name, "parent_ids": parent_ids, @@ -64,7 +87,7 @@ def run_config( creator = source_db.to_toolkit() new_structure = creator.create_structure_with_validation( - validators=[search_db.validator], + validators=[validator], ) source = { "creator": source_db.name, @@ -72,13 +95,29 @@ def run_config( # if structure creation was successful, run the workflow for it if new_structure: + state = search_db.subworkflow.run( structure=new_structure, source=source, directory=directory, **search_db.subworkflow_kwargs, ) - result = state.result() + state.result() # NOTE: we tell the workflow to use the same directory. There is # good chance the user indicates that they want to compress the # folder to. + + # TODO: when I allow a series of subworkflows, I can do validation checks + # between each run. + # if a validator was given, we want to check the current structure + # and see if it passes our test. This is typically only done in + # expensive analysis -- like evolutionary searches + # current_structure = result.to_toolkit() + # if validator and not validator.check_structure(current_structure): + # # if it fails the check, we want to stop the series of calculations + # # and just exit the workflow run. We can, however, update the + # # database entry with the final structure. + # logging.info( + # "Did not pass validation checkpoint. Stopping workflow series." + # ) + # return {"structure": current_structure} diff --git a/src/simmate/toolkit/structure_prediction/evolution/workflows/utilities.py b/src/simmate/toolkit/structure_prediction/evolution/workflows/utilities.py index 70b370159..fd81f90ba 100644 --- a/src/simmate/toolkit/structure_prediction/evolution/workflows/utilities.py +++ b/src/simmate/toolkit/structure_prediction/evolution/workflows/utilities.py @@ -16,7 +16,7 @@ def write_and_submit_structures( workflow_kwargs: dict, ): if not structures: - return + return [] logging.info("Writing CIFs and submitting structures") @@ -29,6 +29,7 @@ def write_and_submit_structures( nalready_submitted = 0 directory = get_directory(foldername) + states = [] for i, s in enumerate(track(structures)): # check if the structure has been submitted before, and if so, skip it @@ -39,10 +40,11 @@ def write_and_submit_structures( i_cleaned = str(i).zfill(3) # converts 1 to 001 s.to("cif", directory / f"{i_cleaned}.cif") - workflow.run_cloud( + state = workflow.run_cloud( structure=s, **workflow_kwargs, ) + states.append(state) logger.disabled = False @@ -51,3 +53,5 @@ def write_and_submit_structures( f"{nalready_submitted} structures were already submitted " "and therefore skipped." ) + + return states diff --git a/src/simmate/toolkit/structure_prediction/evolution/workflows/variable_nsites_composition.py b/src/simmate/toolkit/structure_prediction/evolution/workflows/variable_nsites_composition.py index 3010a3fce..d7fcb1f54 100644 --- a/src/simmate/toolkit/structure_prediction/evolution/workflows/variable_nsites_composition.py +++ b/src/simmate/toolkit/structure_prediction/evolution/workflows/variable_nsites_composition.py @@ -21,6 +21,8 @@ class StructurePrediction__Toolkit__VariableNsitesComposition(Workflow): For example, this would be Ca2N and up to 12 atoms (Ca8N4). """ + description_doc_short = "variable number of sites (e.g. Ca2N through Ca8N4)" + database_table = VariableNsitesCompositionSearch fixed_comp_workflow = StructurePrediction__Toolkit__FixedComposition diff --git a/src/simmate/utilities/files.py b/src/simmate/utilities/files.py index d5bbec9b9..431d4419d 100644 --- a/src/simmate/utilities/files.py +++ b/src/simmate/utilities/files.py @@ -138,7 +138,7 @@ def copy_directory( return directory_new_cleaned -def make_archive(directory: Path): +def make_archive(directory: Path, files_to_exclude: list[str] = []): """ Compresses the directory to a zip file of the same name. After compressing, it then deletes the original directory. @@ -151,6 +151,12 @@ def make_archive(directory: Path): directory_full = directory.absolute() + # Remove any files that were requested to be deleted. For example, POTCAR + # files of VASP calculations. + for file_to_remove in files_to_exclude: + for file_found in directory.rglob(file_to_remove): + file_found.unlink() + # This wraps shutil.make_archive to change the default parameters. Normally, # it writes the archive in the working directory, but we update it to use the # the same directory as the folder being archived. The format is also set diff --git a/src/simmate/website/templates/core_components/base_data_types/calculation.html b/src/simmate/website/templates/core_components/base_data_types/calculation.html index e21df7784..dea40f2dc 100644 --- a/src/simmate/website/templates/core_components/base_data_types/calculation.html +++ b/src/simmate/website/templates/core_components/base_data_types/calculation.html @@ -31,12 +31,21 @@

Prefect Cloud:

Calculation Information:

- \ No newline at end of file + diff --git a/src/simmate/website/templates/core_components/base_data_types/fixed_composition_search.html b/src/simmate/website/templates/core_components/base_data_types/fixed_composition_search.html new file mode 100644 index 000000000..751d836c4 --- /dev/null +++ b/src/simmate/website/templates/core_components/base_data_types/fixed_composition_search.html @@ -0,0 +1,34 @@ + +{% include "core_components/base_data_types/calculation.html" %} + +
+

Search Settings:

+ +
+ +
+

Plots:

+ {{ calculation.get_fitness_convergence_html_div | safe }} + {{ calculation.get_fitness_distribution_html_div | safe }} + {{ calculation.get_staged_series_convergence_html_div | safe }} + {{ calculation.get_staged_series_histogram_html_div | safe }} + {{ calculation.get_staged_series_times_html_div | safe }} + {{ calculation.get_subworkflow_times_html_div | safe }} +
diff --git a/src/simmate/website/templates/core_components/base_filter_types/calculation.html b/src/simmate/website/templates/core_components/base_filter_types/calculation.html index ace4963fb..965107c20 100644 --- a/src/simmate/website/templates/core_components/base_filter_types/calculation.html +++ b/src/simmate/website/templates/core_components/base_filter_types/calculation.html @@ -1,18 +1,27 @@ {% load crispy_forms_tags %} +
+
+
+ + By Calculation Info: + + +
+
-
-

- -

-
-
+
+
{{ form.run_id | as_crispy_field }} {{ form.directory | as_crispy_field }} {{ form.created_at__range | as_crispy_field }} {{ form.updated_at__range | as_crispy_field }} + {{ form.workflow_name | as_crispy_field }} + {{ form.computer_system | as_crispy_field }} + {{ form.directory | as_crispy_field }} + {{ form.total_time__range | as_crispy_field }} + {{ form.queue_time__range | as_crispy_field }}
\ No newline at end of file diff --git a/src/simmate/website/templates/core_components/base_filter_types/dynamics-run.html b/src/simmate/website/templates/core_components/base_filter_types/dynamics-run.html index e2059fbe8..a41a13e10 100644 --- a/src/simmate/website/templates/core_components/base_filter_types/dynamics-run.html +++ b/src/simmate/website/templates/core_components/base_filter_types/dynamics-run.html @@ -2,15 +2,20 @@ -
-

- -

-
-
+
+ + +
+
{{ form.temperature_start__range | as_crispy_field }} {{ form.temperature_end__range | as_crispy_field }} {{ form.time_step__range | as_crispy_field }} diff --git a/src/simmate/website/templates/core_components/base_filter_types/forces.html b/src/simmate/website/templates/core_components/base_filter_types/forces.html index be2317a84..70ca4c8e8 100644 --- a/src/simmate/website/templates/core_components/base_filter_types/forces.html +++ b/src/simmate/website/templates/core_components/base_filter_types/forces.html @@ -1,14 +1,19 @@ {% load crispy_forms_tags %} -
-

- -

-
-
+
+ + +
+
{{ form.site_force_norm_max__range | as_crispy_field }} {{ form.site_forces_norm_per_atom__range | as_crispy_field }} {{ form.lattice_stress_norm__range | as_crispy_field }} diff --git a/src/simmate/website/templates/core_components/base_filter_types/relaxation.html b/src/simmate/website/templates/core_components/base_filter_types/relaxation.html index ba0e2c520..8094ab155 100644 --- a/src/simmate/website/templates/core_components/base_filter_types/relaxation.html +++ b/src/simmate/website/templates/core_components/base_filter_types/relaxation.html @@ -1,15 +1,20 @@ {% load crispy_forms_tags %} -
-

- -

-
-
+
+ + +
+
{{ form.volume_change__range | as_crispy_field }}
@@ -19,15 +24,20 @@

{% include "core_components/base_filter_types/structure.html" %} -
-

- -

-
-
+
+ + +
+
WARNING: even though some relaxations provide this information, these values are very rough estimates. diff --git a/src/simmate/website/templates/core_components/base_filter_types/static-energy.html b/src/simmate/website/templates/core_components/base_filter_types/static-energy.html index 997dbfe39..0caf92697 100644 --- a/src/simmate/website/templates/core_components/base_filter_types/static-energy.html +++ b/src/simmate/website/templates/core_components/base_filter_types/static-energy.html @@ -6,15 +6,20 @@ {% include "core_components/base_filter_types/forces.html" %} -
-

- -

-
-
+
+ + +
+
WARNING: even though some static energy calculations provide this information, these values are very rough estimates. diff --git a/src/simmate/website/templates/core_components/base_filter_types/structure.html b/src/simmate/website/templates/core_components/base_filter_types/structure.html index 02879716a..d1beb1366 100644 --- a/src/simmate/website/templates/core_components/base_filter_types/structure.html +++ b/src/simmate/website/templates/core_components/base_filter_types/structure.html @@ -1,14 +1,20 @@ {% load crispy_forms_tags %} -
-

- -

-
-
+ +
+ + +
+
- {{ form.nelements__range | as_crispy_field }} + + {{ form.formula_anonymous | as_crispy_field }} {{ form.formula_full | as_crispy_field }} {{ form.formula_reduced | as_crispy_field }} @@ -33,15 +40,21 @@

-
-

- -

-
-
+ +
+ + +
+
{{ form.nsites__range | as_crispy_field }} {{ form.density__range | as_crispy_field }} {{ form.density_atomic__range | as_crispy_field }} @@ -51,15 +64,21 @@

-
-

- -

-
-
+ +
+ + +
+
{{ form.spacegroup__number | as_crispy_field }} {{ form.spacegroup__symbol | as_crispy_field }} {{ form.spacegroup__crystal_system | as_crispy_field }} diff --git a/src/simmate/website/templates/core_components/base_filter_types/thermodynamics.html b/src/simmate/website/templates/core_components/base_filter_types/thermodynamics.html index 35a8992c6..bd2983d89 100644 --- a/src/simmate/website/templates/core_components/base_filter_types/thermodynamics.html +++ b/src/simmate/website/templates/core_components/base_filter_types/thermodynamics.html @@ -1,14 +1,19 @@ {% load crispy_forms_tags %} -
-

- -

-
-
+
+ + +
+
{{ form.energy__range | as_crispy_field }} {{ form.energy_per_atom__range | as_crispy_field }} {{ form.energy_above_hull__range | as_crispy_field }} diff --git a/src/simmate/website/templates/core_components/search_form.html b/src/simmate/website/templates/core_components/search_form.html index 596fd9d3b..22ea4ac8b 100644 --- a/src/simmate/website/templates/core_components/search_form.html +++ b/src/simmate/website/templates/core_components/search_form.html @@ -2,13 +2,16 @@ {% load crispy_forms_tags %}

Filter Results:

+ +
{{ form.non_field_errors }} -
+
{% if filterset.filter_name == "StaticEnergy" %} @@ -58,22 +61,37 @@

Filter Results:

{% endif %} -
- - - - {% if extra_filters %} -
-

By Extra Info:

- {% for field in form %} - {% if field in extra_filters %} - {{ field | as_crispy_field }} + + + + {% if extra_filters %} +
+ + +
+
+ {% for field in form %} + {% if field.name in extra_filters %} + {{ field | as_crispy_field }} + {% endif %} + {% endfor %} +
+
+
{% endif %} - {% endfor %} + +
- {% endif %} - - +
\ No newline at end of file diff --git a/src/simmate/website/templates/core_components/sidebar.html b/src/simmate/website/templates/core_components/sidebar.html index 30d5665cc..5c49a4151 100644 --- a/src/simmate/website/templates/core_components/sidebar.html +++ b/src/simmate/website/templates/core_components/sidebar.html @@ -101,7 +101,7 @@
Download Simmate
-

-
-

Description:

- {{ workflow.description_doc | markdown }} -
+
-

Required Parameters:

-
    - {% for parameter in workflow.parameter_names_required %} -
  • {{ parameter }}
  • - {% endfor %} -
-
+

Workflow details:

-
+
- -
-

Extras:

- {% if flow_id %} - Monitor Runs on Prefect ({{ nflows_submitted }}) - {% endif %} + +
+ +
+
+ {{ workflow.description_doc | markdown }} +
+
+
+ + +
+ +
+
+ +
+
+
+ + +
+ +
+
+
    + {% for parameter, default_value in workflow.parameter_defaults.items %} +
  • + + {{ parameter }} + : {{ default_value }} +
  • + {% endfor %} +
+

Read through our + our parameter documentation + for more information. +

+
+
+
+ +
diff --git a/src/simmate/website/workflows/views.py b/src/simmate/website/workflows/views.py index c6f38d45b..9f398360c 100644 --- a/src/simmate/website/workflows/views.py +++ b/src/simmate/website/workflows/views.py @@ -41,6 +41,11 @@ # "At this time, these workflows are entirely Nudged-Elastic-Band (NEB) " # "calculations." # ), + "structure-prediction": ( + "Predict the most stable structure when given only chemical composition " + "or system. Strategies range from evolutionary searches to " + "substituition of known materials." + ), } @@ -67,7 +72,11 @@ def workflows_by_type(request, workflow_type): # workflow calculator. workflow_dict = {} for calculator in calculators: - workflow_names = get_workflow_names_by_type(workflow_type, calculator) + workflow_names = get_workflow_names_by_type( + workflow_type, + calculator, + remove_no_database_flows=True, + ) workflow_dict[calculator] = [get_workflow(n) for n in workflow_names] # now let's put the data and template together to send the user @@ -127,11 +136,7 @@ def get_list_context( # ncalculations = MITRelaxation.objects.count() # nflows_submitted = workflow.nflows_submitted - return { - "workflow": workflow, - "flow_id": None, # TODO - "nflows_submitted": None, - } + return {"workflow": workflow} def get_retrieve_context( self, diff --git a/src/simmate/workflow_engine/workflow.py b/src/simmate/workflow_engine/workflow.py index cb855eb4e..d20622bbe 100644 --- a/src/simmate/workflow_engine/workflow.py +++ b/src/simmate/workflow_engine/workflow.py @@ -72,12 +72,21 @@ class Workflow: `_register_calculation`. """ - _parameter_methods = ["run_config", "_run_full"] + _parameter_methods: list[str] = ["run_config", "_run_full"] """ List of methods that allow unique input parameters. This helps track where `**kwargs` are passed and let's us gather the inputs in one place. """ + exlcude_from_archives: list[str] = [] + """ + List of filenames that should be deleted when compressing the output files + to a zip file (i.e. when compress_output=True). Any file name is searched + for recursively in all subdirectories and removed. + + For example, VASP calculations remove all POTCAR files from archives. + """ + # ------------------------------------------------------------------------- # Core methods that handle how and what a workflow run does # and how it is submitted @@ -187,7 +196,10 @@ def _run_full( # the directory. if compress_output: logging.info("Compressing result to a ZIP file.") - make_archive(kwargs_cleaned["directory"]) + make_archive( + directory=kwargs_cleaned["directory"], + files_to_exclude=cls.exlcude_from_archives, + ) # If we made it this far, we successfully completed the workflow run logging.info(f"Completed '{cls.name_full}'") diff --git a/src/simmate/workflows/utilities.py b/src/simmate/workflows/utilities.py index 38e7a30b7..fa11f76ed 100644 --- a/src/simmate/workflows/utilities.py +++ b/src/simmate/workflows/utilities.py @@ -129,6 +129,7 @@ def get_workflow_names_by_type( calculator_name: str = None, full_name: bool = True, precheck_type_exists: bool = True, + remove_no_database_flows: bool = False, ) -> list[str]: """ Returns a list of all the workflows of a given type. Optionally, the @@ -153,6 +154,9 @@ def get_workflow_names_by_type( if calculator_name and flow.name_calculator != calculator_name: continue # Skip those that don't match + if remove_no_database_flows and not flow.use_database: + continue + if full_name: workflow_name = flow.name_full else: