Skip to content
This repository has been archived by the owner on Feb 23, 2019. It is now read-only.

Commit

Permalink
check_ceph_libs_mk: Various fixes
Browse files Browse the repository at this point in the history
* Bugs in python-psutil
  Need to upgrade to package from jessie-backports in order
  to address the following issues:
    giampaolo/psutil#522
    giampaolo/psutil#572

* Race when checking for processes
  It seems that is possible to query for a process that no longer
  exists. Handle that situation by ignoring it.

* Wrong message when no processes are matches
  Not a problem at all, but handle that situation by printing a
  different message, just to be clear.

* Filter out qemu-system-x86_64 processes that do not have Ceph disks
  For some reason, it seems that QEMU maps librados and librbd
  libraries even when not using them (ie NFS, DRBD). Add a function
  that parses the cmdline of each QEMU process, looks for disk drives,
  and by looking at the path, tells if we should check that process or
  not. Only check VMs with rbd and tapdev (Archipelago) disks.

Also, split out process filtering stuff to a separate function.
  • Loading branch information
Nikos Kormpakis committed Apr 23, 2018
1 parent 8743b8a commit 647d387
Showing 1 changed file with 54 additions and 6 deletions.
60 changes: 54 additions & 6 deletions monitoring/checkmk/check_ceph_libs_mk
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ import psutil
import sys
import argparse
import json
import re
import os

CHECK_NAME = 'check_ceph_libs'

Expand Down Expand Up @@ -71,15 +73,59 @@ def check_result(msg, status):
sys.exit(0)


def check_qemu_process(cmdline):
"""
An ugly function to determine if a QEMU process needs to be checked or not
Only check QEMU instances with rbd or archipelago disks
"""
ret = False

regex = re.compile(r'-drive file=([\d\w\-\.:/]+)')
results = regex.findall(' '.join(cmdline))
if results:
for disk in results:
if 'rbd' in disk:
ret = True
else:
if os.path.exists(disk):
if 'tapdev' in os.path.realpath(disk):
ret = True

return ret


def check_process(proc, procs_to_check):
"""
Determine if a given process should be checked or not
GRNET-specific rules in this function
"""
check_proc = False
if any(x in proc['name'] for x in procs_to_check):
if proc['name'] == 'qemu-system-x86_64':
if check_qemu_process(proc['cmdline']):
check_proc = True
else:
check_proc = True

return check_proc


def fetch_procs(procs_to_check):
"""
Fetch all processes with name that matches entries in procs_to_check
"""
procs = [
p.as_dict()
for p in psutil.process_iter()
if any(x in p.as_dict()['name'] for x in procs_to_check)
]
procs = []
for p in psutil.process_iter():
# It is possible that a process found by process_iter() has died.
# Catch that exception and ignore it.
try:
_pinfo = p.as_dict()
except psutil.NoSuchProcess:
continue
if check_process(_pinfo, procs_to_check):
procs.append(_pinfo)

return procs

Expand Down Expand Up @@ -150,6 +196,8 @@ def main():
try:
# Fetch all processes using ceph-related libraries
procs = fetch_procs(PROCS_TO_CHECK)
if not procs:
check_result('No processes matches on node', 'OK')

# Find processes running old libraries
procs_old_libs = find_procs_old_libs(procs, LIBS_TO_CHECK)
Expand All @@ -164,7 +212,7 @@ def main():
check_result('{} processes running with old Ceph libraries.'
.format(len(procs_old_libs['processes'])), 'WARNING')
else:
check_result('All processes are running latest installed libs', 'OK')
check_result('All processes are running the latest installed libs', 'OK')

except Exception as e:
check_result('Something went wrong: {}'.format(str(e)), 'UNKNOWN')
Expand Down

0 comments on commit 647d387

Please sign in to comment.