diff --git a/src/nomad_parser_vasp/parsers/xml_parser.py b/src/nomad_parser_vasp/parsers/xml_parser.py index 1c0192c..f1c9fb5 100644 --- a/src/nomad_parser_vasp/parsers/xml_parser.py +++ b/src/nomad_parser_vasp/parsers/xml_parser.py @@ -7,15 +7,8 @@ from nomad_simulations.schema_packages.general import Program, Simulation from nomad_simulations.schema_packages.model_method import DFT, XCFunctional from nomad_simulations.schema_packages.model_system import AtomicCell, ModelSystem -from nomad_simulations.schema_packages.outputs import Outputs from structlog.stdlib import BoundLogger -from nomad_parser_vasp.schema_packages.vasp_schema import ( - HartreeDCEnergy, - TotalEnergy, - XCdcEnergy, -) - configuration = config.get_plugin_entry_point( 'nomad_parser_vasp.parsers:xml_entry_point' ) @@ -99,36 +92,3 @@ def xml_get(path: str, slicer=slice(0, 1), fallback=None): hartreedc = hartreedc[0] * ureg.eV if hartreedc else None xcdc = xml_get("i[@name='XCdc']", slice(-2, -1)) xcdc = xcdc[0] * ureg.eV if xcdc else None - - #################################################### - # Create the outputs section, populate it with the # - # parsed energies, and add it to the archive # - #################################################### - output = Outputs() - archive.data.outputs.append(output) - output.total_energy.append(TotalEnergy(value=total_energy)) - output.total_energy[0].contributions.append(HartreeDCEnergy(value=hartreedc)) - output.total_energy[0].contributions.append(XCdcEnergy(value=xcdc)) - - ############################################################## - # Add a new contribution to the total energy that quantifies # - # its unknown contributions (3 ways, choose 1) # - ############################################################## - - # Case 1: Don't include UnknownEnergy in parsing - # Expected Results: UnknownEnergy is added to contribution list by the normalizer - - # Case 2: Add UnknownEnergy to contribution list in the parser but without a value - # from nomad_parser_vasp.schema_packages.vasp_schema import UnknownEnergy - - # output.total_energy[0].contributions.append(UnknownEnergy(value=None)) - # Expected Results: UnknownEnergy value is calculated by the normalizer and placed into this section - - # Case 3: Add UnknownEnergy to contribution list in the parser with a value - # from nomad_parser_vasp.schema_packages.vasp_schema import UnknownEnergy - - # output.total_energy[0].contributions.append( - # UnknownEnergy(value=(total_energy - 2 * hartreedc - xcdc)) - # ) - # Expected Results: normalizer does not change the value of UnknownEnergy - # (for testing purposes we subtract double the hartreedc value) diff --git a/src/nomad_parser_vasp/parsers/xml_parser_all_solutions.py b/src/nomad_parser_vasp/parsers/xml_parser_all_solutions.py new file mode 100644 index 0000000..222a0e3 --- /dev/null +++ b/src/nomad_parser_vasp/parsers/xml_parser_all_solutions.py @@ -0,0 +1,134 @@ +import numpy as np +from nomad.config import config +from nomad.datamodel.datamodel import EntryArchive +from nomad.parsing import MatchingParser +from nomad.parsing.file_parser.xml_parser import XMLParser +from nomad.units import ureg +from nomad_simulations.schema_packages.general import Program, Simulation +from nomad_simulations.schema_packages.model_method import DFT, XCFunctional +from nomad_simulations.schema_packages.model_system import AtomicCell, ModelSystem +from nomad_simulations.schema_packages.outputs import Outputs +from structlog.stdlib import BoundLogger + +from nomad_parser_vasp.schema_packages.vasp_schema import ( + HartreeDCEnergy, + TotalEnergy, + XCdcEnergy, +) + +configuration = config.get_plugin_entry_point( + 'nomad_parser_vasp.parsers:xml_entry_point' +) + + +class VasprunXMLParser(MatchingParser): + convert_xc: dict[str, str] = { + '--': 'GGA_XC_PBE', + 'PE': 'GGA_XC_PBE', + } + + def parse( + self, + mainfile: str, + archive: EntryArchive, + logger: BoundLogger, + child_archives: dict[str, EntryArchive] = None, + ) -> None: + logger.info('VasprunXMLParser.parse', parameter=configuration.parameter) + xml_reader = XMLParser(mainfile=mainfile) # XPath syntax + + def xml_get(path: str, slicer=slice(0, 1), fallback=None): + try: + return xml_reader.parse(path)._results[path][slicer] + except KeyError: + return fallback + + #################################################### + # Parse the basic program, method, and system data # + #################################################### + archive.data = Simulation( + program=Program( + name='VASP', + version=xml_get("//generator/i[@name='version']")[0], + ), + model_method=[ + DFT( + xc_functionals=[ + XCFunctional( + libxc_name=self.convert_xc.get( + xml_get( + "///separator[@name='electronic exchange-correlation']/i[@name='LSDA']" + ), + {}, + ) + .get( + xml_get( + "///separator[@name='electronic exchange-correlation']/i[@name='METAGGA']" + ), + {}, + ) + .get( + xml_get( + "///separator[@name='electronic exchange-correlation']/i[@name='GGA']" + ), + 'PE', + ), + ), + ], + ), + ], + ) + + if ( + positions := xml_get( + "structure[@name='finalpos']/./varray[@name='positions']/v", + slice(None), + fallback=np.array([]), + ) + ).any(): + archive.data.model_system.append( + ModelSystem(cell=[AtomicCell(positions=positions)]) + ) + + ##################################################### + # Get the energy data from the raw simulation files # + ##################################################### + total_energy = xml_get("i[@name='e_fr_energy']", slice(-2, -1)) + total_energy = total_energy[0] * ureg.eV if total_energy else None + hartreedc = xml_get("i[@name='hartreedc']", slice(-2, -1)) + hartreedc = hartreedc[0] * ureg.eV if hartreedc else None + xcdc = xml_get("i[@name='XCdc']", slice(-2, -1)) + xcdc = xcdc[0] * ureg.eV if xcdc else None + + #################################################### + # Create the outputs section, populate it with the # + # parsed energies, and add it to the archive # + #################################################### + output = Outputs() + archive.data.outputs.append(output) + output.total_energy.append(TotalEnergy(value=total_energy)) + output.total_energy[0].contributions.append(HartreeDCEnergy(value=hartreedc)) + output.total_energy[0].contributions.append(XCdcEnergy(value=xcdc)) + + ############################################################## + # Add a new contribution to the total energy that quantifies # + # its unknown contributions (3 ways, choose 1) # + ############################################################## + + # Case 1: Don't include UnknownEnergy in parsing + # Expected Results: UnknownEnergy is added to contribution list by the normalizer + + # # Case 2: Add UnknownEnergy to contribution list in the parser but without a value + # from nomad_parser_vasp.schema_packages.vasp_schema import UnknownEnergy + + # output.total_energy[0].contributions.append(UnknownEnergy(value=None)) + # # Expected Results: UnknownEnergy value is calculated by the normalizer and placed into this section + + # Case 3: Add UnknownEnergy to contribution list in the parser with a value + # from nomad_parser_vasp.schema_packages.vasp_schema import UnknownEnergy + + # output.total_energy[0].contributions.append( + # UnknownEnergy(value=(total_energy - 2 * hartreedc - xcdc)) + # ) + # Expected Results: normalizer does not change the value of UnknownEnergy + # (for testing purposes we subtract double the hartreedc value) diff --git a/src/nomad_parser_vasp/schema_packages/vasp_schema.py b/src/nomad_parser_vasp/schema_packages/vasp_schema.py index 339e35b..e69de29 100644 --- a/src/nomad_parser_vasp/schema_packages/vasp_schema.py +++ b/src/nomad_parser_vasp/schema_packages/vasp_schema.py @@ -1,80 +0,0 @@ -import nomad_simulations -from nomad.metainfo import MEnum, Quantity -from nomad_simulations.schema_packages.properties.energies import EnergyContribution - - -class DoubleCountingEnergy(EnergyContribution): - type = Quantity( - type=MEnum('double_counting'), - ) - - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: - super().normalize(archive, logger) - - if not self.type: - self.type = 'double_counting' - - -class HartreeDCEnergy(DoubleCountingEnergy): - def __init__( - self, m_def: 'Section' = None, m_context: 'Context' = None, **kwargs - ) -> None: - super().__init__(m_def, m_context, **kwargs) - self.name = self.m_def.name - - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: - super().normalize(archive, logger) - - -class XCdcEnergy(DoubleCountingEnergy): - def __init__( - self, m_def: 'Section' = None, m_context: 'Context' = None, **kwargs - ) -> None: - super().__init__(m_def, m_context, **kwargs) - self.name = self.m_def.name - - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: - super().normalize(archive, logger) - - -class UnknownEnergy(EnergyContribution): - def __init__( - self, m_def: 'Section' = None, m_context: 'Context' = None, **kwargs - ) -> None: - super().__init__(m_def, m_context, **kwargs) - self.name = self.m_def.name - - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: - super().normalize(archive, logger) - - -class TotalEnergy(nomad_simulations.schema_packages.properties.TotalEnergy): - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: - super().normalize(archive, logger) - - if not self.value: - return - if not self.contributions: - return - - value = self.value - unknown_exists = False - unknown_has_value = False - i_unknown = None - for i_cont, contribution in enumerate(self.contributions): - if contribution.name == 'UnknownEnergy': - unknown_exists = True - i_unknown = i_cont - unknown_has_value = True if contribution.value else False - - if not contribution.value: - continue - - value -= contribution.value - - if unknown_exists: - if not unknown_has_value: - self.contributions[i_unknown].value = value - else: - self.contributions.append(UnknownEnergy(value=value)) - self.contributions[-1].normalize(archive, logger) diff --git a/src/nomad_parser_vasp/schema_packages/vasp_schema_all_solutions.py b/src/nomad_parser_vasp/schema_packages/vasp_schema_all_solutions.py new file mode 100644 index 0000000..339e35b --- /dev/null +++ b/src/nomad_parser_vasp/schema_packages/vasp_schema_all_solutions.py @@ -0,0 +1,80 @@ +import nomad_simulations +from nomad.metainfo import MEnum, Quantity +from nomad_simulations.schema_packages.properties.energies import EnergyContribution + + +class DoubleCountingEnergy(EnergyContribution): + type = Quantity( + type=MEnum('double_counting'), + ) + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + if not self.type: + self.type = 'double_counting' + + +class HartreeDCEnergy(DoubleCountingEnergy): + def __init__( + self, m_def: 'Section' = None, m_context: 'Context' = None, **kwargs + ) -> None: + super().__init__(m_def, m_context, **kwargs) + self.name = self.m_def.name + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + +class XCdcEnergy(DoubleCountingEnergy): + def __init__( + self, m_def: 'Section' = None, m_context: 'Context' = None, **kwargs + ) -> None: + super().__init__(m_def, m_context, **kwargs) + self.name = self.m_def.name + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + +class UnknownEnergy(EnergyContribution): + def __init__( + self, m_def: 'Section' = None, m_context: 'Context' = None, **kwargs + ) -> None: + super().__init__(m_def, m_context, **kwargs) + self.name = self.m_def.name + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + +class TotalEnergy(nomad_simulations.schema_packages.properties.TotalEnergy): + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + if not self.value: + return + if not self.contributions: + return + + value = self.value + unknown_exists = False + unknown_has_value = False + i_unknown = None + for i_cont, contribution in enumerate(self.contributions): + if contribution.name == 'UnknownEnergy': + unknown_exists = True + i_unknown = i_cont + unknown_has_value = True if contribution.value else False + + if not contribution.value: + continue + + value -= contribution.value + + if unknown_exists: + if not unknown_has_value: + self.contributions[i_unknown].value = value + else: + self.contributions.append(UnknownEnergy(value=value)) + self.contributions[-1].normalize(archive, logger) diff --git a/tests/test_parse.ipynb b/tests/test_parse.ipynb index 427f61a..5e2d951 100644 --- a/tests/test_parse.ipynb +++ b/tests/test_parse.ipynb @@ -32,10 +32,6 @@ "name": "stderr", "output_type": "stream", "text": [ - "The used property is not defined in the FAIRmat taxonomy (https://fairmat-nfdi.github.io/fairmat-taxonomy/). You can contribute there if you want to extend the list of available materials properties. ()\n", - "The used property is not defined in the FAIRmat taxonomy (https://fairmat-nfdi.github.io/fairmat-taxonomy/). You can contribute there if you want to extend the list of available materials properties. ()\n", - "The used property is not defined in the FAIRmat taxonomy (https://fairmat-nfdi.github.io/fairmat-taxonomy/). You can contribute there if you want to extend the list of available materials properties. ()\n", - "The used property is not defined in the FAIRmat taxonomy (https://fairmat-nfdi.github.io/fairmat-taxonomy/). You can contribute there if you want to extend the list of available materials properties. ()\n", "Length of `AtomicCell.positions` does not coincide with the length of the `AtomicCell.atoms_state`. (normalizer=MetainfoNormalizer)\n", "Could not extract the geometric space information from ASE Atoms object. (normalizer=MetainfoNormalizer)\n", "could not normalize section (normalizer=MetainfoNormalizer, section=DFT, exc_info=max() arg is an empty sequence)\n", @@ -56,120 +52,45 @@ "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "TotalEnergy\n", - "-1.1442321664199474e-18 joule\n", - "[HartreeDCEnergy:HartreeDCEnergy(name, type, is_derived, variables, value), XCdcEnergy:XCdcEnergy(name, type, is_derived, variables, value), UnknownEnergy:UnknownEnergy(name, is_derived, value)]\n" - ] - } - ], - "source": [ - "total_energy = a.data.outputs[0].total_energy[0]\n", - "print(total_energy.name)\n", - "print(total_energy.value)\n", - "print(total_energy.contributions)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HartreeDCEnergy\n", - "double_counting\n", - "-6.432015131607956e-17 joule\n" - ] - } - ], - "source": [ - "hartreedc = total_energy.contributions[0]\n", - "print(hartreedc.name)\n", - "print(hartreedc.type)\n", - "print(hartreedc.value)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "XCdcEnergy\n", - "double_counting\n", - "-7.660195079365588e-18 joule\n" - ] - } - ], - "source": [ - "xcdc = total_energy.contributions[1]\n", - "print(xcdc.name)\n", - "print(xcdc.type)\n", - "print(xcdc.value)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "UnknownEnergy\n", - "7.08361142290252e-17 joule\n" - ] - } - ], - "source": [ - "unknown = total_energy.contributions[2]\n", - "print(unknown.name)\n", - "print(unknown.value)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "7.08361142290252×10-17 joule" - ], - "text/latex": [ - "$7.08361142290252\\times 10^{-17}\\ \\mathrm{joule}$" - ], "text/plain": [ - "7.08361142290252e-17 " + "{'data': {'m_def': 'nomad_simulations.schema_packages.general.Simulation',\n", + " 'program': {'name': 'VASP', 'version': '5.3.2'},\n", + " 'model_system': [{'datetime': '2024-08-15T15:55:41.135408+00:00',\n", + " 'branch_depth': 0,\n", + " 'cell': [{'m_def': 'nomad_simulations.schema_packages.model_system.AtomicCell',\n", + " 'name': 'AtomicCell',\n", + " 'positions': [[0.0, 0.0, 0.0], [0.500001, 0.500001, 0.500001]],\n", + " 'periodic_boundary_conditions': [False, False, False]}]}],\n", + " 'model_method': [{'m_def': 'nomad_simulations.schema_packages.model_method.DFT',\n", + " 'xc_functionals': [{'libxc_name': 'PE'}]}]},\n", + " 'results': {'eln': {'sections': ['ModelSystem']}}}" ] }, - "execution_count": 7, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "total_energy.value - hartreedc.value - xcdc.value" + "a.m_to_dict()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { "display_name": "vasp-plugin", "language": "python", - "name": "python3" + "name": "vasp-plugin" }, "language_info": { "codemirror_mode": { @@ -181,9 +102,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.0" + "version": "3.9.5" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/tests/test_parse_all_solutions.pdf b/tests/test_parse_all_solutions.pdf new file mode 100644 index 0000000..bb060f5 Binary files /dev/null and b/tests/test_parse_all_solutions.pdf differ