From 958c1edcb5eb270c30cb2287aec6a9ee0cbd48f7 Mon Sep 17 00:00:00 2001 From: Kotresh HR Date: Thu, 29 Jul 2021 18:16:03 +0530 Subject: [PATCH] task/internal/syslog: Remove old kernel failures from exclude list Add 'syslog' dict to config dictionary which holds the 'ignorelist' of kernel failures. Fixes: https://tracker.ceph.com/issues/50150 Signed-off-by: Kotresh HR --- teuthology/run.py | 2 +- teuthology/suite/placeholder.py | 3 ++ teuthology/task/internal/syslog.py | 57 ++++++------------------------ 3 files changed, 15 insertions(+), 47 deletions(-) diff --git a/teuthology/run.py b/teuthology/run.py index 37fb42b602..c301bda7e4 100644 --- a/teuthology/run.py +++ b/teuthology/run.py @@ -224,7 +224,7 @@ def get_initial_tasks(lock, config, machine_type): {'internal.archive': None}, {'internal.coredump': None}, {'internal.sudo': None}, - {'internal.syslog': None}, + {'internal.syslog': config.get('syslog', {})}, ]) init_tasks.append({'internal.timer': None}) diff --git a/teuthology/suite/placeholder.py b/teuthology/suite/placeholder.py index 4138541723..9ee6b315a7 100644 --- a/teuthology/suite/placeholder.py +++ b/teuthology/suite/placeholder.py @@ -98,6 +98,9 @@ def _substitute(input_dict, values_dict): 'sha1': Placeholder('suite_hash'), } }, + 'syslog': { + 'ignorelist': ['WARNING*.*check_session_state', 'WARNING*.*__ceph_remove_cap'], + }, 'repo': Placeholder('ceph_repo'), 'sleep_before_teardown': 0, 'suite': Placeholder('suite'), diff --git a/teuthology/task/internal/syslog.py b/teuthology/task/internal/syslog.py index ae6a5324ca..c61ecabffc 100644 --- a/teuthology/task/internal/syslog.py +++ b/teuthology/task/internal/syslog.py @@ -93,56 +93,21 @@ def syslog(ctx, config): # flush the file fully. oh well. log.info('Checking logs for errors...') + exclude_errors = config.get('ignorelist', []) + log.info('Exclude error list : {0}'.format(exclude_errors)) for rem in ctx.cluster.remotes.keys(): log.debug('Checking %s', rem.name) - stdout = rem.sh( - [ + args = [ 'egrep', '--binary-files=text', - '\\bBUG\\b|\\bINFO\\b|\\bDEADLOCK\\b', + '\\bBUG\\b|\\bINFO\\b|\\bDEADLOCK\\b|\\bOops\\b|\\bWARNING\\b|\\bKASAN\\b', run.Raw(f'{archive_dir}/syslog/kern.log'), - run.Raw('|'), - 'grep', '-v', 'task .* blocked for more than .* seconds', - run.Raw('|'), - 'grep', '-v', 'lockdep is turned off', - run.Raw('|'), - 'grep', '-v', 'trying to register non-static key', - run.Raw('|'), - 'grep', '-v', 'DEBUG: fsize', # xfs_fsr - run.Raw('|'), - 'grep', '-v', 'CRON', # ignore cron noise - run.Raw('|'), - 'grep', '-v', 'BUG: bad unlock balance detected', # #6097 - run.Raw('|'), - 'grep', '-v', 'inconsistent lock state', # FIXME see #2523 - run.Raw('|'), - 'grep', '-v', '*** DEADLOCK ***', # part of lockdep output - run.Raw('|'), - 'grep', '-v', - # FIXME see #2590 and #147 - 'INFO: possible irq lock inversion dependency detected', - run.Raw('|'), - 'grep', '-v', - 'INFO: NMI handler (perf_event_nmi_handler) took too long to run', # noqa - run.Raw('|'), - 'grep', '-v', 'INFO: recovery required on readonly', - run.Raw('|'), - 'grep', '-v', 'ceph-create-keys: INFO', - run.Raw('|'), - 'grep', '-v', 'INFO:ceph-create-keys', - run.Raw('|'), - 'grep', '-v', 'Loaded datasource DataSourceOpenStack', - run.Raw('|'), - 'grep', '-v', 'container-storage-setup: INFO: Volume group backing root filesystem could not be determined', # noqa - run.Raw('|'), - 'egrep', '-v', '\\bsalt-master\\b|\\bsalt-minion\\b|\\bsalt-api\\b', - run.Raw('|'), - 'grep', '-v', 'ceph-crash', - run.Raw('|'), - 'egrep', '-v', '\\btcmu-runner\\b.*\\bINFO\\b', - run.Raw('|'), - 'head', '-n', '1', - ], - ) + ] + for exclude in exclude_errors: + args.extend([run.Raw('|'), 'egrep', '-v', exclude]) + args.extend([ + run.Raw('|'), 'head', '-n', '1', + ]) + stdout = rem.sh(args) if stdout != '': log.error('Error in syslog on %s: %s', rem.name, stdout) set_status(ctx.summary, 'fail')