Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add diagonalization errors handler to ph base workchain #757

Merged
merged 3 commits into from
Nov 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion aiida_quantumespresso/calculations/ph.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ def define(cls, spec):
message='The calculation stopped prematurely because it ran out of walltime.')
spec.exit_code(410, 'ERROR_CONVERGENCE_NOT_REACHED',
message='The minimization cycle did not reach self-consistency.')
spec.exit_code(462, 'ERROR_COMPUTING_CHOLESKY',
message='The code failed during the cholesky factorization.')
# yapf: enable

def prepare_for_submission(self, folder):
Expand Down Expand Up @@ -170,7 +172,7 @@ def prepare_for_submission(self, folder):
try:
mesh, offset = self.inputs.qpoints.get_kpoints_mesh()

if any([i != 0. for i in offset]):
if any(i != 0. for i in offset):
raise NotImplementedError(
'Computation of phonons on a mesh with non zero offset is not implemented, at the level of ph.x'
)
Expand Down
1 change: 1 addition & 0 deletions aiida_quantumespresso/parsers/parse_raw/ph.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def detect_important_message(logs, line):
'error': {
'Maximum CPU time exceeded': 'ERROR_OUT_OF_WALLTIME',
'No convergence has been achieved': 'ERROR_CONVERGENCE_NOT_REACHED',
'problems computing cholesky': 'ERROR_COMPUTING_CHOLESKY',
},
'warning': {
'Warning:': None,
Expand Down
6 changes: 6 additions & 0 deletions aiida_quantumespresso/parsers/ph.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,9 @@ def parse(self, **kwargs):

if 'ERROR_CONVERGENCE_NOT_REACHED' in logs['error']:
return self.exit_codes.ERROR_CONVERGENCE_NOT_REACHED

if 'ERROR_COMPUTING_CHOLESKY' in logs['error']:
return self.exit_codes.ERROR_COMPUTING_CHOLESKY

if 'ERROR_OUTPUT_STDOUT_INCOMPLETE' in logs['error']:
return self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE
17 changes: 17 additions & 0 deletions aiida_quantumespresso/workflows/ph/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,23 @@ def handle_scheduler_out_of_walltime(self, node):
self.report_error_handled(node, action)
return ProcessHandlerReport(True)

@process_handler(priority=585, exit_codes=PhCalculation.exit_codes.ERROR_COMPUTING_CHOLESKY)
def handle_diagonalization_errors(self, calculation):
"""Handle known issues related to the diagonalization.

Switch to ``diagonalization = 'cg'`` if not already running with this setting, and restart from the charge
density. In case the run already used conjugate gradient diagonalization, abort.
"""
if self.ctx.inputs.parameters['INPUTPH'].get('diagonalization', None) == 'cg':
action = 'found diagonalization issues but already running with conjugate gradient algorithm, aborting...'
self.report_error_handled(calculation, action)
return ProcessHandlerReport(True, self.exit_codes.ERROR_UNRECOVERABLE_FAILURE)

self.ctx.inputs.parameters['INPUTPH']['diagonalization'] = 'cg'
action = 'found diagonalization issues, switching to conjugate gradient diagonalization.'
self.report_error_handled(calculation, action)
return ProcessHandlerReport(True)

@process_handler(priority=580, exit_codes=PhCalculation.exit_codes.ERROR_OUT_OF_WALLTIME)
def handle_out_of_walltime(self, node):
"""Handle `ERROR_OUT_OF_WALLTIME` exit code: calculation shut down neatly and we can simply restart."""
Expand Down
Empty file.
95 changes: 95 additions & 0 deletions tests/parsers/fixtures/ph/failed_computing_cholesky/aiida.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@

Program PHONON v.6.3MaX starts on 9Aug2019 at 12:13:51

This program is part of the open-source Quantum ESPRESSO suite
for quantum simulation of materials; please cite
"P. Giannozzi et al., J. Phys.:Condens. Matter 21 395502 (2009);
"P. Giannozzi et al., J. Phys.:Condens. Matter 29 465901 (2017);
URL http://www.quantum-espresso.org",
in publications or presentations arising from this work. More details at
http://www.quantum-espresso.org/quote

*** WARNING: using old-style file format, will disappear from next version ***

Serial version
Title line not specified: using 'default'.
Message from routine phq_readin:
iverbosity is obsolete, use "verbosity" instead

Reading data from directory:
./out/aiida.save
Message from routine volume:
axis vectors are left-handed

IMPORTANT: XC functional enforced from input :
Exchange-correlation = PBE ( 1 4 3 4 0 0)
Any further DFT definition will be discarded
Please, verify this is what you really want


G-vector sticks info
--------------------
sticks: dense smooth PW G-vecs: dense smooth PW
Sum 859 433 127 16889 5985 965

3 / 3 q-points for this run, from 1 to 3:
N xq(1) xq(2) xq(3)
1 0.000000000 0.000000000 0.000000000
2 0.353553391 -0.353553391 -0.353553391
3 0.000000000 0.000000000 -0.707106781


Calculation of q = 0.0000000 0.0000000 0.0000000

Restart in Phonon calculation



bravais-lattice index = 0
lattice parameter (alat) = 7.2558 a.u.
unit-cell volume = 270.1072 (a.u.)^3
number of atoms/cell = 2
number of atomic types = 1
kinetic-energy cut-off = 30.0000 Ry
charge density cut-off = 240.0000 Ry
convergence threshold = 1.0E-12
beta = 0.7000
number of iterations used = 4
Exchange-correlation = PBE ( 1 4 3 4 0 0)


celldm(1)= 7.25577 celldm(2)= 0.00000 celldm(3)= 0.00000
celldm(4)= 0.00000 celldm(5)= 0.00000 celldm(6)= 0.00000

crystal axes: (cart. coord. in units of alat)
a(1) = ( 0.7071 0.7071 0.0000 )
a(2) = ( 0.7071 0.0000 0.7071 )
a(3) = ( 0.0000 0.7071 0.7071 )

reciprocal axes: (cart. coord. in units 2 pi/alat)
b(1) = ( 0.7071 0.7071 -0.7071 )
b(2) = ( 0.7071 -0.7071 0.7071 )
b(3) = ( -0.7071 0.7071 0.7071 )


Atoms inside the unit cell:

Cartesian axes

site n. atom mass positions (alat units)
1 Si 28.0855 tau( 1) = ( 0.00000 0.00000 0.00000 )
2 Si 28.0855 tau( 2) = ( 0.35355 0.35355 0.35355 )

Computing dynamical matrix for
q = ( 0.0000000 0.0000000 0.0000000 )

49 Sym.Ops. (with q -> -q+G )

s frac. trans.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Error in routine cdiaghg (25):
problems computing cholesky
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

stopping ...
19 changes: 19 additions & 0 deletions tests/parsers/test_ph.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,22 @@ def test_ph_out_of_walltime(fixture_localhost, generate_calc_job_node, generate_
assert calcfunction.exit_status == node.process_class.exit_codes.ERROR_OUT_OF_WALLTIME.status
assert 'output_parameters' in results
data_regression.check(results['output_parameters'].get_dict())


def test_pw_failed_computing_cholesky(fixture_localhost, generate_calc_job_node, generate_parser):
"""Test the parsing of a calculation that failed during cholesky factorization.

In this test the stdout is incomplete, and the XML is missing completely. The stdout contains
the relevant error message.
"""
name = 'failed_computing_cholesky'
entry_point_calc_job = 'quantumespresso.ph'
entry_point_parser = 'quantumespresso.ph'

node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, name, generate_inputs())
parser = generate_parser(entry_point_parser)
_, calcfunction = parser.parse_from_node(node, store_provenance=False)

assert calcfunction.is_finished, calcfunction.exception
assert calcfunction.is_failed, calcfunction.exit_status
assert calcfunction.exit_status == node.process_class.exit_codes.ERROR_COMPUTING_CHOLESKY.status
21 changes: 21 additions & 0 deletions tests/workflows/ph/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,27 @@ def test_handle_convergence_not_reached(generate_workchain_ph):
assert result.status == 0


def test_handle_diagonalization_errors(generate_workchain_ph):
"""Test `PhBaseWorkChain.handle_diagonalization_errors`."""
process = generate_workchain_ph(exit_code=PhCalculation.exit_codes.ERROR_COMPUTING_CHOLESKY)
process.setup()
process.validate_parameters()
process.prepare_process()

process.ctx.inputs.parameters['INPUTPH']['diagonalization'] = 'david'

result = process.handle_diagonalization_errors(process.ctx.children[-1])
assert isinstance(result, ProcessHandlerReport)
assert process.ctx.inputs.parameters['INPUTPH']['diagonalization'] == 'cg'
assert result.do_break

result = process.handle_diagonalization_errors(process.ctx.children[-1])
assert result.do_break

result = process.inspect_process()
assert result == PhBaseWorkChain.exit_codes.ERROR_UNRECOVERABLE_FAILURE


def test_set_max_seconds(generate_workchain_ph):
"""Test that `max_seconds` gets set in the parameters based on `max_wallclock_seconds` unless already set."""
inputs = generate_workchain_ph(return_inputs=True)
Expand Down