diff --git a/Dockerfile b/Dockerfile index 8ebe2fb..ff6f2cd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,4 +2,4 @@ FROM python:2.7-alpine ADD . /usr/src/hpilo_exporter RUN pip install -e /usr/src/hpilo_exporter ENTRYPOINT ["hpilo-exporter"] -EXPOSE 8080 +EXPOSE 9416 diff --git a/README.md b/README.md index a809519..479f11b 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,20 @@ # HP iLO Metrics Exporter -Exports HP Server Integrated Lights Out (iLO) heath_at_a_glance states to Prometheus gauges, from either a single server (via command line flags) or multiple servers (via query string parameters) +Blackbox likes exporter used to exports HP Server Integrated Lights Out (iLO) states to Prometheus. ### Gauges +Here are the status code of gauge ``` 0 - OK 1 - Degraded 2 - Dead (Other) ``` -### iLO +### Output example + +Example of status of your iLO ``` health_at_a_glance: battery: {status: OK} @@ -23,59 +26,113 @@ health_at_a_glance: processor: {status: OK} storage: {status: Degraded} temperature: {status: OK} + vrm: {status: Ok} + drive: {status: Ok} ``` -### Output - +The returned output would be: ``` -hpilo_battery 0.0 -hpilo_storage 1.0 -hpilo_fans 0.0 -hpilo_bios_hardware 0.0 -hpilo_memory 0.0 -hpilo_power_supplies 0.0 -hpilo_processor 0.0 -hpilo_network 2.0 -hpilo_temperature 0.0 +hpilo_battery{product_name="ProLiant DL360 Gen9",server_name="name.fqdn.domain"} 0.0 +hpilo_storage{product_name="ProLiant DL360 Gen9",server_name="name.fqdn.domain"} 1.0 +hpilo_fans{product_name="ProLiant DL360 Gen9",server_name="name.fqdn.domain"} 0.0 +hpilo_bios_hardware{product_name="ProLiant DL360 Gen9",server_name="name.fqdn.domain"} 0.0 +hpilo_memory{product_name="ProLiant DL360 Gen9",server_name="name.fqdn.domain"} 0.0 +hpilo_power_supplies{product_name="ProLiant DL360 Gen9",server_name="name.fqdn.domain"} 0.0 +hpilo_processor{product_name="ProLiant DL360 Gen9",server_name="name.fqdn.domain"} 0.0 +hpilo_network{product_name="ProLiant DL360 Gen9",server_name="name.fqdn.domain"} 2.0 +hpilo_temperature{product_name="ProLiant DL360 Gen9",server_name="name.fqdn.domain"} 0.0 +hpilo_vrm{product_name="ProLiant DL380 Gen6",server_name="name.fqdn.domain"} 0.0 +hpilo_drive{product_name="ProLiant DL380 Gen6",server_name="name.fqdn.domain"} 0.0 +hpilo_firmware_version{product_name="ProLiant DL360 Gen9",server_name="name.fqdn.domain"} 2.5 ``` -## Installing +### Installing You can install exporter on the server directly or on separate machine. To run, you must have `Python` and `pip` installed. To install with `pip`: - ``` pip install -e $HPILO_EXPORTER_DIR ``` Then just: +``` +hpilo-exporter [--address=0.0.0.0 --port=9416 --endpoint="/metrics"] +``` + +### Docker +Prebuild images are available from the docker repository: ``` -hpilo-exporter [--address=0.0.0.0 --port=8080 --ilo-host=127.0.0.1 --ilo-port=443 --ilo-user=monitoring --ilo-password=monitoring] +idnt/hpilo-exporter:latest ``` -## Docker -To run the container, assuming it has been built locally: +To build the image yourself +``` +docker build --rm -t hpilo-exporter . +``` -`docker run -p 8080:8080 hpilo-exporter:latest --ilo-addr=127.0.0.1 --ilo-user=monitoring --ilo-password=monitoring` +To run the container +``` +docker run -p 9416:9416 hpilo-exporter:latest +``` -Example Docker Compose: +You can then call the web server on the defined endpoint, `/metrics` by default. +``` +curl 'http://127.0.0.1:9416/metrics?ilo_host=127.0.0.1&ilo_port=443&ilo_user=admin&ilo_password=admin' +``` +Passing argument to the docker run command ``` - hpilo-exporter: - command: --address=0.0.0.0 --port=8080 --ilo-host=127.0.0.1 --ilo-port=443 --ilo-user=monitoring --ilo-password=monitoring - image: hpilo-exporter:latest - ports: - - "8080:8080" +docker run -p 9416:9416 hpilo-exporter:latest --port 9416 --ilo_user my_user --ilo_password my_secret_password ``` -## Multi iLO communication +### Docker compose -Using query string parameters to the `/metrics` endpoint you can point the exporter to different iLO's +Here is an example of Docker Compose deployment: +```yml +hpilo: + image: my.registry/hpilo-exporter + ports: + - 9416:9416 + command: + - '--port=9416' + deploy: + placement: + constraints: + - node.hostname == my_node.domain ``` -curl 'http://127.0.0.1:8080/metrics?ilo_host=127.0.0.1&ilo_port=9018&ilo_user=admin&ilo_password=admin' + +### Kubernetes + +A helm chart is available at [prometheus-helm-addons](https://github.com/IDNT/prometheus-helm-addons). + +### Prometheus config + +Assuming: +- the exporter is available on `http://hpilo:9416` +- you use same the port,username and password for all your iLO + +```yml +- job_name: 'hpilo' + scrape_interval: 1m + params: + ilo_port: ['443'] + ilo_user: ['my_ilo_user'] + ilo_password: ['my_ilo_password'] + static_configs: + - targets: + - ilo_fqdn.domain + + relabel_configs: + - source_labels: [__address__] + target_label: __param_ilo_host + - source_labels: [__param_ilo_host] + target_label: ilo_host + - target_label: __address__ + replacement: hpilo:8082 # hpilo exporter. ``` + diff --git a/setup.py b/setup.py index 01caed2..2673100 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,7 @@ import io from setuptools import setup, find_packages -import sys -VERSION = "0.3.3" +VERSION = "0.3.4" PACKAGE_NAME = "hpilo-exporter" SOURCE_DIR_NAME = "src" @@ -11,6 +10,7 @@ def readme(): with io.open('README.md', 'r', encoding='utf-8') as f: return f.read() + setup( name=PACKAGE_NAME, version=VERSION, @@ -40,7 +40,7 @@ def readme(): ], entry_points={ 'console_scripts': [ - 'hpilo-exporter = hpilo_exporter.__main__:main', + 'hpilo-exporter = hpilo_exporter.main:main', ], } ) diff --git a/src/hpilo_exporter/__init__.py b/src/hpilo_exporter/__init__.py index 41447e9..8b13789 100644 --- a/src/hpilo_exporter/__init__.py +++ b/src/hpilo_exporter/__init__.py @@ -1 +1 @@ -from hpilo_exporter.exporter import iLOExporterServer + diff --git a/src/hpilo_exporter/__main__.py b/src/hpilo_exporter/__main__.py deleted file mode 100644 index 53a9d48..0000000 --- a/src/hpilo_exporter/__main__.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -Entrypoint for the application -""" - -import argparse - -from hpilo_exporter import iLOExporterServer - - -def main(): - parser = argparse.ArgumentParser(description='Exports ilo heath_at_a_glance state to Prometheus') - - parser.add_argument('--address', type=str, dest='address', default='0.0.0.0', help='address to serve on') - parser.add_argument('--port', type=int, dest='port', default='8080', help='port to bind') - parser.add_argument('--ilo-host', type=str, dest='ilo_host', default='127.0.0.1', help='iLO hostname/ip') - parser.add_argument('--ilo-port', type=int, dest='ilo_port', default='443', help='iLO port') - parser.add_argument('--ilo-user', type=str, dest='ilo_user', default='user', help='iLO user') - parser.add_argument('--ilo-password', type=str, dest='ilo_password', default='pass', help='iLO password') - - args = parser.parse_args() - - exposer = iLOExporterServer(**vars(args)) - exposer.run() diff --git a/src/hpilo_exporter/exporter.py b/src/hpilo_exporter/exporter.py index 0893ffe..ec9a5f6 100644 --- a/src/hpilo_exporter/exporter.py +++ b/src/hpilo_exporter/exporter.py @@ -1,19 +1,30 @@ """ Pulls data from specified iLO and presents as Prometheus metrics """ -import hpilo -import prometheus_metrics +from __future__ import print_function +from _socket import gaierror import sys +import hpilo +import time +import prometheus_metrics from BaseHTTPServer import BaseHTTPRequestHandler from BaseHTTPServer import HTTPServer from SocketServer import ForkingMixIn -from prometheus_client import Gauge -from prometheus_client import generate_latest +from prometheus_client import generate_latest, Summary from urlparse import parse_qs from urlparse import urlparse +def print_err(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + +# Create a metric to track time spent and requests made. +REQUEST_TIME = Summary( + 'request_processing_seconds', 'Time spent processing request') + + class ForkingHTTPServer(ForkingMixIn, HTTPServer): max_children = 30 timeout = 30 @@ -23,6 +34,9 @@ class RequestHandler(BaseHTTPRequestHandler): """ Endpoint handler """ + def return_error(self): + self.send_response(500) + self.end_headers() def do_GET(self): """ @@ -30,55 +44,89 @@ def do_GET(self): :return: Response with Prometheus metrics """ + # this will be used to return the total amount of time the request took + start_time = time.time() + # get parameters from the URL url = urlparse(self.path) - + # following boolean will be passed to True if an error is detected during the argument parsing + error_detected = False query_components = parse_qs(urlparse(self.path).query) - if url.path == '/metrics': - - query_ilo_host = query_components.get('ilo_host', [self.server.ilo_host]) - if query_ilo_host: - ilo_host = query_ilo_host[0] - - query_ilo_port = query_components.get('ilo_port', [self.server.ilo_port]) - if query_ilo_port: - ilo_port = int(query_ilo_port[0]) - - query_ilo_user = query_components.get('ilo_user', [self.server.ilo_user]) - if query_ilo_user: - ilo_user = query_ilo_user[0] - - query_ilo_password = query_components.get('ilo_password', [self.server.ilo_password]) - if query_ilo_password: - ilo_password = query_ilo_password[0] - - data = {} + ilo_host = None + ilo_port = None + ilo_user = None + ilo_password = None + try: + ilo_host = query_components['ilo_host'][0] + ilo_port = int(query_components['ilo_port'][0]) + ilo_user = query_components['ilo_user'][0] + ilo_password = query_components['ilo_password'][0] + except KeyError, e: + print_err("missing parameter %s" % e) + self.return_error() + error_detected = True + + if url.path == self.server.endpoint and ilo_host and ilo_user and ilo_password and ilo_port: + + ilo = None try: - data = hpilo.Ilo(hostname=ilo_host, login=ilo_user, password=ilo_password, - port=ilo_port, timeout=10).get_embedded_health()['health_at_a_glance'] - - for key, value in data.items(): + ilo = hpilo.Ilo(hostname=ilo_host, + login=ilo_user, + password=ilo_password, + port=ilo_port, timeout=10) + except hpilo.IloLoginFailed: + print("ILO login failed") + self.return_error() + except gaierror: + print("ILO invalid address or port") + self.return_error() + except hpilo.IloCommunicationError, e: + print(e) + + # get product and server name + try: + product_name = ilo.get_product_name() + except: + product_name = "Unknown HP Server" + + try: + server_name = ilo.get_server_name() + except: + server_name = "" + + # get health at glance + health_at_glance = ilo.get_embedded_health()['health_at_a_glance'] + + if health_at_glance is not None: + for key, value in health_at_glance.items(): for status in value.items(): if status[0] == 'status': gauge = 'hpilo_{}_gauge'.format(key) - - if status[1] == 'OK': - prometheus_metrics.gauges[gauge].set(0) - elif status[1] == 'Degraded': - prometheus_metrics.gauges[gauge].set(1) + if status[1].upper() == 'OK': + prometheus_metrics.gauges[gauge].labels(product_name=product_name, + server_name=server_name).set(0) + elif status[1].upper() == 'DEGRADED': + prometheus_metrics.gauges[gauge].labels(product_name=product_name, + server_name=server_name).set(1) else: - prometheus_metrics.gauges[gauge].set(2) + prometheus_metrics.gauges[gauge].labels(product_name=product_name, + server_name=server_name).set(2) - metrics = generate_latest(prometheus_metrics.registry) + # get firmware version + fw_version = ilo.get_fw_version()["firmware_version"] + # prometheus_metrics.hpilo_firmware_version.set(fw_version) + prometheus_metrics.hpilo_firmware_version.labels(product_name=product_name, + server_name=server_name).set(fw_version) - self.send_response(200) - self.send_header('Content-Type', 'text/plain') - self.end_headers() - self.wfile.write(metrics) + # get the amount of time the request took + REQUEST_TIME.observe(time.time() - start_time) - except: - self.send_response(500) - self.end_headers() + # generate and publish metrics + metrics = generate_latest(prometheus_metrics.registry) + self.send_response(200) + self.send_header('Content-Type', 'text/plain') + self.end_headers() + self.wfile.write(metrics) elif url.path == '/': self.send_response(200) @@ -88,46 +136,39 @@ def do_GET(self): HP iLO Exporter

HP iLO Exporter

-

Visit /metrics to use.

+

Visit Metrics to use.

""") else: - self.send_response(404) - self.end_headers() + if not error_detected: + self.send_response(404) + self.end_headers() -class iLOExporterServer(object): +class ILOExporterServer(object): """ Basic server implementation that exposes metrics to Prometheus """ - def __init__(self, address, port, ilo_host, ilo_port, ilo_user, ilo_password): + def __init__(self, address='0.0.0.0', port=8080, endpoint="/metrics"): self._address = address self._port = port - self._ilo_host = ilo_host - self._ilo_port = ilo_port - self._ilo_user = ilo_user - self._ilo_password = ilo_password + self.endpoint = endpoint def print_info(self): - print("Starting exporter on: http://{}:{}/metrics".format(self._address, self._port)) - print("Default iLO: {}@{}:{}".format(self._ilo_user, self._ilo_host, self._ilo_port)) - print("Press Ctrl+C to quit") + print_err("Starting exporter on: http://{}:{}{}".format(self._address, self._port, self.endpoint)) + print_err("Press Ctrl+C to quit") def run(self): self.print_info() server = ForkingHTTPServer((self._address, self._port), RequestHandler) - - server.ilo_host = self._ilo_host - server.ilo_port = self._ilo_port - server.ilo_user = self._ilo_user - server.ilo_password = self._ilo_password + server.endpoint = self.endpoint try: while True: - sys.stdout.flush() server.handle_request() except KeyboardInterrupt: - print("Killing exporter") + print_err("Killing exporter") + server.server_close() diff --git a/src/hpilo_exporter/main.py b/src/hpilo_exporter/main.py new file mode 100644 index 0000000..b41497d --- /dev/null +++ b/src/hpilo_exporter/main.py @@ -0,0 +1,25 @@ +""" +Entrypoint for the application +""" + +import argparse + +from hpilo_exporter.exporter import ILOExporterServer + + +def main(): + parser = argparse.ArgumentParser(description='Exports ilo heath_at_a_glance state to Prometheus') + + parser.add_argument('--address', type=str, dest='address', default='0.0.0.0', help='address to serve on') + parser.add_argument('--port', type=int, dest='port', default='9416', help='port to bind') + parser.add_argument('--endpoint', type=str, dest='endpoint', default='/metrics', + help='endpoint where metrics will be published') + + args = parser.parse_args() + + exporter = ILOExporterServer(**vars(args)) + exporter.run() + + +if __name__ == '__main__': + main() diff --git a/src/hpilo_exporter/prometheus_metrics.py b/src/hpilo_exporter/prometheus_metrics.py index 58520c7..16f9f72 100644 --- a/src/hpilo_exporter/prometheus_metrics.py +++ b/src/hpilo_exporter/prometheus_metrics.py @@ -3,17 +3,23 @@ registry = REGISTRY -hpilo_battery_gauge = Gauge('hpilo_battery', 'HP iLO battery status') -hpilo_storage_gauge = Gauge('hpilo_storage', 'HP iLO storage status') -hpilo_fans_gauge = Gauge('hpilo_fans', 'HP iLO fans status') -hpilo_bios_hardware_gauge = Gauge('hpilo_bios_hardware', 'HP iLO bios_hardware status') -hpilo_memory_gauge = Gauge('hpilo_memory', 'HP iLO memory status') -hpilo_power_supplies_gauge = Gauge('hpilo_power_supplies', 'HP iLO power_supplies status') -hpilo_processor_gauge = Gauge('hpilo_processor', 'HP iLO processor status') -hpilo_network_gauge = Gauge('hpilo_network', 'HP iLO network status') -hpilo_temperature_gauge = Gauge('hpilo_temperature', 'HP iLO temperature status') +hpilo_vrm_gauge = Gauge('hpilo_vrm', 'HP iLO vrm status', ["product_name", "server_name"]) +hpilo_drive_gauge = Gauge('hpilo_drive', 'HP iLO drive status', ["product_name", "server_name"]) +hpilo_battery_gauge = Gauge('hpilo_battery', 'HP iLO battery status', ["product_name", "server_name"]) +hpilo_storage_gauge = Gauge('hpilo_storage', 'HP iLO storage status', ["product_name", "server_name"]) +hpilo_fans_gauge = Gauge('hpilo_fans', 'HP iLO fans status', ["product_name", "server_name"]) +hpilo_bios_hardware_gauge = Gauge('hpilo_bios_hardware', 'HP iLO bios_hardware status', ["product_name", "server_name"]) +hpilo_memory_gauge = Gauge('hpilo_memory', 'HP iLO memory status', ["product_name", "server_name"]) +hpilo_power_supplies_gauge = Gauge('hpilo_power_supplies', 'HP iLO power_supplies status', ["product_name", + "server_name"]) +hpilo_processor_gauge = Gauge('hpilo_processor', 'HP iLO processor status', ["product_name", "server_name"]) +hpilo_network_gauge = Gauge('hpilo_network', 'HP iLO network status', ["product_name", "server_name"]) +hpilo_temperature_gauge = Gauge('hpilo_temperature', 'HP iLO temperature status', ["product_name", "server_name"]) +hpilo_firmware_version = Gauge('hpilo_firmware_version', 'HP iLO firmware version', ["product_name", "server_name"]) gauges = { + 'hpilo_vrm_gauge': hpilo_vrm_gauge, + 'hpilo_drive_gauge': hpilo_drive_gauge, 'hpilo_battery_gauge': hpilo_battery_gauge, 'hpilo_storage_gauge': hpilo_storage_gauge, 'hpilo_fans_gauge': hpilo_fans_gauge, @@ -23,4 +29,5 @@ 'hpilo_processor_gauge': hpilo_processor_gauge, 'hpilo_network_gauge': hpilo_network_gauge, 'hpilo_temperature_gauge': hpilo_temperature_gauge, + 'hpilo_firmware_version': hpilo_firmware_version, }