From fbe084f79739d34a6004ec1bdff7be162c573723 Mon Sep 17 00:00:00 2001 From: Torsten Gippert <46532698+tgip-work@users.noreply.github.com> Date: Thu, 8 Sep 2022 13:17:26 +0200 Subject: [PATCH 01/11] add python kubernetes library --- source/requirements.txt | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/source/requirements.txt b/source/requirements.txt index 1a097a9..9763563 100644 --- a/source/requirements.txt +++ b/source/requirements.txt @@ -1,6 +1,19 @@ certifi==2022.6.15 -charset-normalizer==2.1.0 +charset-normalizer==2.1.1 idna==3.3 prometheus-client==0.14.1 requests==2.28.1 -urllib3==1.26.10 \ No newline at end of file +urllib3==1.26.12 +## The following requirements were added by pip freeze: +cachetools==5.2.0 +google-auth==2.11.0 +kubernetes==24.2.0 +oauthlib==3.2.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +python-dateutil==2.8.2 +PyYAML==6.0 +requests-oauthlib==1.3.1 +rsa==4.9 +six==1.16.0 +websocket-client==1.4.1 \ No newline at end of file From bc7b07ad377bb41573a7ead7fb1617df256f866b Mon Sep 17 00:00:00 2001 From: Torsten Gippert <46532698+tgip-work@users.noreply.github.com> Date: Tue, 13 Sep 2022 17:55:55 +0200 Subject: [PATCH 02/11] network connectivitsy check --- k8s/configmap.yaml | 30 +++- source/main.py | 18 ++- source/utils/Config.py | 84 ++++++++++-- source/utils/CustomCollector.py | 6 +- source/utils/IMetricsProvider.py | 10 ++ source/utils/KubeExecMetricsProvider.py | 129 ++++++++++++++++++ ...tricsProvider.py => RpcMetricsProvider.py} | 15 +- 7 files changed, 259 insertions(+), 33 deletions(-) create mode 100644 source/utils/IMetricsProvider.py create mode 100644 source/utils/KubeExecMetricsProvider.py rename source/utils/{MetricsProvider.py => RpcMetricsProvider.py} (90%) diff --git a/k8s/configmap.yaml b/k8s/configmap.yaml index 9cf3375..fddb561 100644 --- a/k8s/configmap.yaml +++ b/k8s/configmap.yaml @@ -8,37 +8,53 @@ metadata: data: # The config. # Required attributes: "rpc_url" and "peers" + # - "namespace" = The name of the k8s namespace where the Quorum deployment is located + # - "deployment" = The name of the Quorum k8s deployment # - "rpc_url" = The full URL of the RPC endpoint of the quorum node, e.g. "http://quorum-node-0-rpc.quorum:8545" # - "peers" = A list of all known peers via their "enode".# - # "peers" contains an array of objects. Each object must have attributes "company-name" and "enode". + # "peers" contains an array of objects. Each object must have attributes "company-name", "enode", "enodeAddress" and "enodeAddressPort" # Note: If there are no known peers, provide an empty array/list of peers. config.json: |- { + "namespace": "epi-poc-quorum", + "deployment": "quorum-node-0", "rpc_url": "http://quorum-node-0-rpc.epi-poc-quorum:8545", "peers": [ { "company-name": "company_a", - "enode": "4312d5056db7edf8b6..." + "enode": "4312d5056db7edf8b6...", + "enodeAddress": "1.2.3.4", + "enodeAddressPort": "30303" }, { "company-name": "company_a", - "enode": "a36ceb6ccdf5ff8a7c..." + "enode": "a36ceb6ccdf5ff8a7c...", + "enodeAddress": "2.3.4.5", + "enodeAddressPort": "30303" }, { "company-name": "company_a", - "enode": "4801af270f75e9352b..." + "enode": "4801af270f75e9352b...", + "enodeAddress": "3.4.5.6", + "enodeAddressPort": "30303" }, { "company-name": "company_a", - "enode": "456a860cb1275dd23..." + "enode": "456a860cb1275dd23...", + "enodeAddress": "4.5.6.7", + "enodeAddressPort": "30303" }, { "company-name": "company_b", - "enode": "bc03e0353fe10d0261..." + "enode": "bc03e0353fe10d0261...", + "enodeAddress": "5.6.7.8", + "enodeAddressPort": "30303" }, { "company-name": "company_c", - "enode": "b06bca847a8c27e7d..." + "enode": "b06bca847a8c27e7d...", + "enodeAddress": "6.7.8.9", + "enodeAddressPort": "30303" }, ] } \ No newline at end of file diff --git a/source/main.py b/source/main.py index 3b285f9..9954207 100644 --- a/source/main.py +++ b/source/main.py @@ -4,7 +4,8 @@ import threading from utils.ConfigLoader import ConfigLoader from utils.CustomCollector import CustomCollector -from utils.MetricsProvider import MetricsProvider +from utils.KubeExecMetricsProvider import KubeExecMetricsProvider +from utils.RpcMetricsProvider import RpcMetricsProvider from prometheus_client import start_http_server from prometheus_client.core import REGISTRY @@ -17,10 +18,14 @@ if not config: sys.exit(1) - # Init MetricsProvider and register CustomCollector - metrics_provider = MetricsProvider(config=config) - custom_collector = CustomCollector(metrics_provider=metrics_provider) - REGISTRY.register(custom_collector) + # Init MetricsProviders and register CustomCollectors + rpc_metrics_provider = RpcMetricsProvider(config=config) + rpc_custom_collector = CustomCollector(metrics_provider=rpc_metrics_provider) + REGISTRY.register(rpc_custom_collector) + + kube_exec_metrics_provider = KubeExecMetricsProvider(config=config) + kube_exec_custom_collector = CustomCollector(metrics_provider=kube_exec_metrics_provider) + REGISTRY.register(kube_exec_custom_collector) # Start up the server to expose the metrics. start_http_server(8000) @@ -31,7 +36,8 @@ signal.signal(signal.SIGTERM, lambda *_args: (logging.info("SIGTERM received") and False) or quit_event.set()) while not quit_event.is_set(): logging.info("Preparing metrics - rpc_url=%s", config.rpc_url) - metrics_provider.process() + rpc_metrics_provider.process() + kube_exec_metrics_provider.process() logging.info("Done. Sleeping for %s seconds", sleep_time) quit_event.wait(timeout=sleep_time) diff --git a/source/utils/Config.py b/source/utils/Config.py index 49103ca..f364fcd 100644 --- a/source/utils/Config.py +++ b/source/utils/Config.py @@ -1,11 +1,14 @@ +from audioop import add import logging class Config: """Encapsulates the application configuration. """ def __init__(self): - self.rpc_url = None - self.peers = {} + self._rpc_url = None + self._namespace = None + self._deployment = None + self._peers = {} def load(self, config_object) -> bool: """Load the config from an object @@ -16,18 +19,29 @@ def load(self, config_object) -> bool: Returns: bool: True if successful else False """ - self.rpc_url = None - self.peers = {} + self._rpc_url = None + self._namespace = None + self._deployment = None + self._peers = {} if config_object is None: logging.error("'config_object' not set.") return False - rpc_url = config_object.get('rpc_url') - if not rpc_url: + self._rpc_url = config_object.get('rpc_url') + if not self._rpc_url: logging.error("'rpc_url' is not set in config. E.g. 'rpc_url': 'http://quorum-node-0.quorum:8545'") return False - self.rpc_url = rpc_url + + self._namespace = config_object.get('namespace') + if not self._namespace: + logging.error("'namespace' is not set in config. E.g. 'namespace': 'quorum'") + return False + + self._deployment = config_object.get('deployment') + if not self._deployment: + logging.error("'deployment' is not set in config. E.g. 'deployment': 'quorum'") + return False peers = config_object.get('peers') if not peers: @@ -37,15 +51,61 @@ def load(self, config_object) -> bool: for each in peers: enode = each.get('enode') company_name = each.get('company-name') - if not enode or not company_name: - logging.error("'enode' or 'company-name' not set at a peer object.") + address = each.get('enodeAddress') + port = each.get('enodeAddressPort') + + if enode is None or company_name is None or address is None or port is None: + logging.error("'enode', 'company-name', 'enodeAddress', 'enodeAddressPort' must be set at a peer object.") return False - # Check is company_name is unique. If not use company_name plus first 5 chars of enode + # Check is company_name is unique. If not, use company_name plus first 5 chars of enode count_same_company_name = len(list(filter(lambda peer: peer['company-name'] == company_name, peers))) + name = None if count_same_company_name > 1: - self.peers[enode] = company_name + " (" + enode[0:5] + ")" + name = company_name + " (" + enode[0:5] + ")" else: - self.peers[enode] = company_name + name = company_name + + self._peers[enode] = PeerConfig(name, enode, address, port) return True + + @property + def rpc_url(self) -> str: + return self._rpc_url + + @property + def peers(self) -> dict: + return self._peers + + @property + def deployment(self) -> str: + return self._deployment + + @property + def namespace(self) -> str: + return self._namespace + + +class PeerConfig: + def __init__(self, name:str, enode:str, address:str, port: str): + self._name = name + self._enode = enode + self._address = address + self._port = port + + @property + def name(self) -> str: + return self._name + + @property + def enode(self) -> dict: + return self._enode + + @property + def address(self) -> str: + return self._address + + @property + def port(self) -> str: + return self._port diff --git a/source/utils/CustomCollector.py b/source/utils/CustomCollector.py index cb04d16..33bca59 100644 --- a/source/utils/CustomCollector.py +++ b/source/utils/CustomCollector.py @@ -1,4 +1,4 @@ -from .MetricsProvider import MetricsProvider +from .IMetricsProvider import IMetricsProvider class CustomCollector(object): """A custom Collector to report Metrics. @@ -8,8 +8,8 @@ class CustomCollector(object): object (_type_): _description_ """ - def __init__(self, metrics_provider:MetricsProvider): + def __init__(self, metrics_provider:IMetricsProvider): self.metrics_provider = metrics_provider def collect(self): - return self.metrics_provider.current_metrics + return self.metrics_provider.getCurrentMetrics() diff --git a/source/utils/IMetricsProvider.py b/source/utils/IMetricsProvider.py new file mode 100644 index 0000000..59dbbb5 --- /dev/null +++ b/source/utils/IMetricsProvider.py @@ -0,0 +1,10 @@ +class IMetricsProvider: + """Interface for providing the current metrics + """ + def getCurrentMetrics(self) -> list: + """Get the current metrics + + Returns: + list: The current metrics + """ + pass diff --git a/source/utils/KubeExecMetricsProvider.py b/source/utils/KubeExecMetricsProvider.py new file mode 100644 index 0000000..2edc2b7 --- /dev/null +++ b/source/utils/KubeExecMetricsProvider.py @@ -0,0 +1,129 @@ +import logging + +from .Config import Config, PeerConfig +from .Helper import Helper +from .IMetricsProvider import IMetricsProvider +from kubernetes import config +from kubernetes.client.api import core_v1_api, apps_v1_api +from kubernetes.stream import stream +from prometheus_client.core import GaugeMetricFamily + +class KubeExecMetricsProvider(IMetricsProvider): + """Executes commands via "kubectl exec" in remote pod and provides metrics + """ + def __init__(self, config:Config): + # The current metrics to be reported on a Prometheus scrape. + # Note: We cannot use "default" Gauge as the values remain even if the peer does not exists anymore. + # Therefore we set fresh metrics every time we collect peers information. + self._current_metrics = [] + self._config = config + self._helper = Helper() + + def getCurrentMetrics(self) -> list: + """Get the current metrics. Implementation of the IMetricsProvider + + Returns: + list: The current metrics + """ + return self._current_metrics + + def lookup_pod_name_and_ip(self) -> tuple: + """Looks up for the pod and its ip address by the deployment name and namespace from configuration + + Returns: + tuple: pod name and ip address + """ + apps_v1 = apps_v1_api.AppsV1Api() + core_v1 = core_v1_api.CoreV1Api() + + # Determine Deployment and get labels selectors + # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1Deployment.md + logging.debug("%s >> Getting deployment - name=%s, namespace=%s", type(self).__name__, self._config.deployment, self._config.namespace) + deployment = apps_v1.read_namespaced_deployment(name=self._config.deployment, namespace=self._config.namespace, _request_timeout=(1,2)) + if deployment is None: + logging.error("%s >> Deployment not found - name=%s, namespace=%s", type(self).__name__, self._config.deployment, self._config.namespace) + return (None, None) + + # Format Label Selectors into key1=value1,key2=value2,... format + deployment_label_selector = deployment.spec.selector + label_selector_string = ','.join(map(lambda key: "{0}={1}".format(key, deployment_label_selector.match_labels[key]), deployment_label_selector.match_labels.keys())) + logging.debug("%s >> Deployment found - label_selector_string=%s", type(self).__name__, label_selector_string) + + # Get pods by selector + pod_list = core_v1.list_namespaced_pod(namespace=self._config.namespace, label_selector=label_selector_string, watch=False, _request_timeout=(1,2)) + if pod_list is None or len(pod_list.items) == 0: + logging.warning("%s >> No pods found - label_selector=%s, namespace=%s", type(self).__name__, label_selector_string, self._config.namespace) + return (None, None) + + if len(pod_list.items) > 1: + logging.warning("%s >> More than one pod found - label_selector=%s, namespace=%s", type(self).__name__, label_selector_string, self._config.namespace) + for each_pod in pod_list.items: + logging.info("%s >> Pod found - name=%s", type(self).__name__, each_pod.metadata.name) + return (None, None) + + return (pod_list.items[0].metadata.name, pod_list.items[0].status.pod_ip) + + def kube_exec_check_connectivity(self, pod_name:str, peer:PeerConfig): + """Checks the connectivity from within the pod to the peer + + Returns: + _type_: True if connection could be established else False + """ + shell_command = 'nc -z -w 1 {0} {1};echo -n $?'.format(peer.address, peer.port) + logging.debug("%s >> shell_command=%s", type(self).__name__, shell_command) + exec_command = [ + '/bin/sh', + '-c', + shell_command] + + core_v1 = core_v1_api.CoreV1Api() + resp = stream(core_v1.connect_get_namespaced_pod_exec, + name=pod_name, + namespace=self._config.namespace, + command=exec_command, + stderr=True, stdin=False, + stdout=True, tty=False, + _request_timeout=3) # do not use a tuple here, otherwise: kubernetes.client.exceptions.ApiException: (0) Reason: '<' not supported between instances of 'float' and 'tuple' + + connection_successful = resp == '0' + logging.debug("%s >> %s %s (%s:%s) ", type(self).__name__, 'OK: Connected to' if connection_successful else 'CANNOT connect to', peer.name, peer.address, peer.port) + return connection_successful + + def createCurrentMetrics(self, instance: str, instance_name: str, pod_name: str): + """Creates the current metrics + + Args: + instance (str): The instance name + instance_name (str): A pretty instance name + pod_name (str): The pod name + """ + logging.info("%s >> Creating metrics for %s peers - instance=%s, instance_name=%s, pod_name=%s, namespace=%s", type(self).__name__, len(self._config.peers.keys()), instance, instance_name, pod_name, self._config.namespace) + + metrics = GaugeMetricFamily('quorum_tcp_egress_connectivity', 'Quorum TCP egress connectivity to other nodes by enode. (0) for no connectivity, (1) for connectivity can be established', labels=['instance', 'instance_name', 'enode', 'enode_short', 'name']) + for each_config_peer in self._config.peers.values(): + connection_successful = self.kube_exec_check_connectivity(pod_name=pod_name, peer=each_config_peer) + metrics.add_metric([instance, instance_name, each_config_peer.enode, each_config_peer.enode[0:20], each_config_peer.name], (1 if connection_successful else 0)) + + # Set current metrics to be reported by CustomCollector in a single atomic operation + self._current_metrics = [metrics] + + def process(self): + """Processes getting information and preparing metrics + + """ + # 1. Use Incluster Config + config.load_incluster_config() + + # Get DNS name and IP address of current instance by taking a deeper look at the rpc_url + instance_dns_name = self._helper.getHostName(url=self._config.rpc_url) + + # We do not use the IP address of the RPC Endpoint as this may be the clusterIp of the service if it is not a headless service. + # Therefore we determine the instance from network.localAddress + # instance_ip = resolveIpAddress(dns_name=instance_dns_name) + ':9545' # default metrics also add port + pod_name, pod_ip = self.lookup_pod_name_and_ip() + + if pod_name is not None and pod_ip is not None: + self.createCurrentMetrics(instance="{0}:9545".format(pod_ip), instance_name=instance_dns_name, pod_name=pod_name) + else: + logging.warn("%s >> Cannot create metrics as pod_name or pod_ip is None - pod_name=%s, pod_ip=%s", type(self).__name__, pod_name, pod_ip) + \ No newline at end of file diff --git a/source/utils/MetricsProvider.py b/source/utils/RpcMetricsProvider.py similarity index 90% rename from source/utils/MetricsProvider.py rename to source/utils/RpcMetricsProvider.py index bc2e869..fb70408 100644 --- a/source/utils/MetricsProvider.py +++ b/source/utils/RpcMetricsProvider.py @@ -2,11 +2,12 @@ import requests from .Config import Config from .Helper import Helper +from .IMetricsProvider import IMetricsProvider from prometheus_client.core import GaugeMetricFamily from requests.structures import CaseInsensitiveDict -class MetricsProvider: - """Collects data and provides metrics data. +class RpcMetricsProvider(IMetricsProvider): + """Collects data from Quorum RPC API and provides metrics data. """ def __init__(self, config:Config): @@ -17,6 +18,9 @@ def __init__(self, config:Config): self.config = config self.helper = Helper() + def getCurrentMetrics(self) -> list: + return self.current_metrics + def getPeersData(self): """Get data of the current peers by querying Quorum nodes RPC endpoint @@ -46,7 +50,7 @@ def createCurrentMetrics(self, instance: str, instance_name: str, peersData: lis if peersData is None: peersData = [] - logging.info("Creating metrics for %s peers - instance=%s, instance_name=%s", len(peersData), ('[None=will be determined from localAddress]' if instance is None else instance), instance_name) + logging.info("%s >> Creating metrics for %s peers - instance=%s, instance_name=%s", type(self).__name__, len(peersData), ('[None=will be determined from localAddress]' if instance is None else instance), instance_name) # Prepare current metrics metric_peers = GaugeMetricFamily('quorum_peers', 'Quorum peers by enode', labels=['instance', 'instance_name', 'enode', 'enode_short', 'name']) @@ -77,7 +81,7 @@ def createCurrentMetrics(self, instance: str, instance_name: str, peersData: lis instance = '' # Get pretty name. If not defined use enode_short instead - name = self.config.peers.get(enode, enode_short) + name = enode_short if enode not in self.config.peers else self.config.peers[enode].name # 1. metric_peers # Set value (1) that enode is found @@ -105,7 +109,8 @@ def createCurrentMetrics(self, instance: str, instance_name: str, peersData: lis if not enodes_found.get(each_config_peer_enode, False): enode = each_config_peer_enode enode_short = enode[0:20] - name = self.config.peers.get(enode, enode_short) + # name = self.config.peers.get(enode, enode_short) + name = enode_short if enode not in self.config.peers else self.config.peers[enode].name # 1. metric_peers # Set value (0) that enode is NOT found From 826a718bc4ba02abefc6332c22a990bfc05fe2e4 Mon Sep 17 00:00:00 2001 From: Torsten Gippert <46532698+tgip-work@users.noreply.github.com> Date: Wed, 14 Sep 2022 11:18:26 +0200 Subject: [PATCH 03/11] cleanup and pylint --- source/main.py | 36 ++-- source/utils/ConfigLoader.py | 23 --- source/utils/CustomCollector.py | 15 -- source/utils/IMetricsProvider.py | 10 - source/utils/RpcMetricsProvider.py | 149 --------------- source/utils/common.py | 29 +++ source/utils/{Config.py => config.py} | 90 ++++++++- source/utils/{Helper.py => helper.py} | 24 +-- ...vider.py => kube_exec_metrics_provider.py} | 49 ++--- source/utils/rpc_metrics_provider.py | 176 ++++++++++++++++++ 10 files changed, 345 insertions(+), 256 deletions(-) delete mode 100644 source/utils/ConfigLoader.py delete mode 100644 source/utils/CustomCollector.py delete mode 100644 source/utils/IMetricsProvider.py delete mode 100644 source/utils/RpcMetricsProvider.py create mode 100644 source/utils/common.py rename source/utils/{Config.py => config.py} (52%) rename source/utils/{Helper.py => helper.py} (73%) rename source/utils/{KubeExecMetricsProvider.py => kube_exec_metrics_provider.py} (73%) create mode 100644 source/utils/rpc_metrics_provider.py diff --git a/source/main.py b/source/main.py index 9954207..4b6d15c 100644 --- a/source/main.py +++ b/source/main.py @@ -2,29 +2,32 @@ import signal import sys import threading -from utils.ConfigLoader import ConfigLoader -from utils.CustomCollector import CustomCollector -from utils.KubeExecMetricsProvider import KubeExecMetricsProvider -from utils.RpcMetricsProvider import RpcMetricsProvider + from prometheus_client import start_http_server from prometheus_client.core import REGISTRY +import utils.config +from utils.common import CustomCollector +from utils.kube_exec_metrics_provider import KubeExecMetricsProvider +from utils.rpc_metrics_provider import RpcMetricsProvider + if __name__ == '__main__': - logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) + logging.basicConfig( + format='%(levelname)s: %(message)s', level=logging.INFO) # Load Config - sleep_time = 10.0 - config = ConfigLoader.load() - if not config: + SLEEP_TIME = 10.0 + config = utils.config.load() + if config is None: sys.exit(1) # Init MetricsProviders and register CustomCollectors - rpc_metrics_provider = RpcMetricsProvider(config=config) - rpc_custom_collector = CustomCollector(metrics_provider=rpc_metrics_provider) + rpc_metrics_provider = RpcMetricsProvider(config) + rpc_custom_collector = CustomCollector(rpc_metrics_provider) REGISTRY.register(rpc_custom_collector) - kube_exec_metrics_provider = KubeExecMetricsProvider(config=config) - kube_exec_custom_collector = CustomCollector(metrics_provider=kube_exec_metrics_provider) + kube_exec_metrics_provider = KubeExecMetricsProvider(config) + kube_exec_custom_collector = CustomCollector(kube_exec_metrics_provider) REGISTRY.register(kube_exec_custom_collector) # Start up the server to expose the metrics. @@ -33,12 +36,13 @@ # Graceful and fast shutdown quit_event = threading.Event() # https://stackoverflow.com/questions/862412/is-it-possible-to-have-multiple-statements-in-a-python-lambda-expression - signal.signal(signal.SIGTERM, lambda *_args: (logging.info("SIGTERM received") and False) or quit_event.set()) + signal.signal(signal.SIGTERM, + lambda *_args: (logging.info("SIGTERM received") and False) or quit_event.set()) while not quit_event.is_set(): - logging.info("Preparing metrics - rpc_url=%s", config.rpc_url) + logging.info("Preparing metrics") rpc_metrics_provider.process() kube_exec_metrics_provider.process() - logging.info("Done. Sleeping for %s seconds", sleep_time) - quit_event.wait(timeout=sleep_time) + logging.info("Done. Sleeping for %s seconds", SLEEP_TIME) + quit_event.wait(timeout=SLEEP_TIME) logging.info("Leaving - quit_event.is_set()=%s", quit_event.is_set()) diff --git a/source/utils/ConfigLoader.py b/source/utils/ConfigLoader.py deleted file mode 100644 index fdf84c8..0000000 --- a/source/utils/ConfigLoader.py +++ /dev/null @@ -1,23 +0,0 @@ -import json -from .Config import Config - -class ConfigLoader: - """Functionality to load the application configuration - """ - def load(filename:str = 'config.json') -> Config: - """Load the application configuration - - Args: - filename (str, optional): The configuration file name. Defaults to 'config.json'. - - Returns: - Config: The application configuration or None on error during load. - """ - with open(file=filename, mode='r') as f: - config_object = json.load(f) - - config = Config() - if config.load(config_object) == True: - return config - - return None diff --git a/source/utils/CustomCollector.py b/source/utils/CustomCollector.py deleted file mode 100644 index 33bca59..0000000 --- a/source/utils/CustomCollector.py +++ /dev/null @@ -1,15 +0,0 @@ -from .IMetricsProvider import IMetricsProvider - -class CustomCollector(object): - """A custom Collector to report Metrics. - See https://github.com/prometheus/client_python#custom-collectors - - Args: - object (_type_): _description_ - """ - - def __init__(self, metrics_provider:IMetricsProvider): - self.metrics_provider = metrics_provider - - def collect(self): - return self.metrics_provider.getCurrentMetrics() diff --git a/source/utils/IMetricsProvider.py b/source/utils/IMetricsProvider.py deleted file mode 100644 index 59dbbb5..0000000 --- a/source/utils/IMetricsProvider.py +++ /dev/null @@ -1,10 +0,0 @@ -class IMetricsProvider: - """Interface for providing the current metrics - """ - def getCurrentMetrics(self) -> list: - """Get the current metrics - - Returns: - list: The current metrics - """ - pass diff --git a/source/utils/RpcMetricsProvider.py b/source/utils/RpcMetricsProvider.py deleted file mode 100644 index fb70408..0000000 --- a/source/utils/RpcMetricsProvider.py +++ /dev/null @@ -1,149 +0,0 @@ -import logging -import requests -from .Config import Config -from .Helper import Helper -from .IMetricsProvider import IMetricsProvider -from prometheus_client.core import GaugeMetricFamily -from requests.structures import CaseInsensitiveDict - -class RpcMetricsProvider(IMetricsProvider): - """Collects data from Quorum RPC API and provides metrics data. - """ - def __init__(self, config:Config): - - # The current metrics to be reported on a Prometheus scrape. - # Note: We cannot use "default" Gauge as the values remain even if the peer does not exists anymore. - # Therefore we set fresh metrics every time we collect peers information. - self.current_metrics = [] - self.config = config - self.helper = Helper() - - def getCurrentMetrics(self) -> list: - return self.current_metrics - - def getPeersData(self): - """Get data of the current peers by querying Quorum nodes RPC endpoint - - Returns: - _type_: _description_ - """ - headers = CaseInsensitiveDict() - headers["Content-Type"] = "application/json" - # https://getblock.io/docs/available-nodes-methods/ETH/JSON-RPC/admin_peers/ - # https://geth.ethereum.org/docs/rpc/ns-admin#admin_peers - # https://consensys.net/docs/goquorum/en/latest/develop/connecting-to-a-node/ - data = '{"jsonrpc": "2.0","method": "admin_peers","params": [],"id": "getblock.io"}' - response = requests.post(self.config.rpc_url, headers=headers, data=data) - - if response.status_code == 200: - return response.json().get('result') - return [] - - def createCurrentMetrics(self, instance: str, instance_name: str, peersData: list): - """Get current data and create metrics - - Args: - instance (str): _description_ - instance_name (str): _description_ - peersData (list): The current peers data queried from RPC endpoint - """ - if peersData is None: - peersData = [] - - logging.info("%s >> Creating metrics for %s peers - instance=%s, instance_name=%s", type(self).__name__, len(peersData), ('[None=will be determined from localAddress]' if instance is None else instance), instance_name) - - # Prepare current metrics - metric_peers = GaugeMetricFamily('quorum_peers', 'Quorum peers by enode', labels=['instance', 'instance_name', 'enode', 'enode_short', 'name']) - metric_peers_network_direction = GaugeMetricFamily('quorum_peers_network_direction', 'Quorum peers network inbound (1) or outbound (2) by enode', labels=['instance', 'instance_name', 'enode', 'enode_short', 'name']) - metric_peers_head_block = GaugeMetricFamily('quorum_peers_head_block', 'Quorum peers head block by enode and protocol eth or istanbul', labels=['instance', 'instance_name', 'enode', 'enode_short', 'name', 'protocol']) - - # A dict of all enodes currently connected as peers - enodes_found = {} - - for each_peer in peersData: - # enodeUrl = "enode://[HERE IS THE 128 HEX-CHARS LONG ENODE]@1.2.3.4:30303?discport=0" - enode = self.helper.getEnode(enodeUrl=each_peer.get('enode')) - - # https://github.com/prometheus/client_python - if enode: - enode_short = enode[0:20] - # Remember that enode was found - enodes_found[enode] = True - - # If instance is not provided, we determine it from network.localAddress - if instance is None: - localAddress = self.helper.deep_get(each_peer, 'network.localAddress') - if localAddress: - instance = self.helper.getHostName(localAddress) - if instance: - instance = instance + ':9545' # add same port as Quorum Node default metrics also do - else: - instance = '' - - # Get pretty name. If not defined use enode_short instead - name = enode_short if enode not in self.config.peers else self.config.peers[enode].name - - # 1. metric_peers - # Set value (1) that enode is found - metric_peers.add_metric([instance, instance_name, enode, enode_short, name], 1) - - # 2. metric_peers_network_direction - # Set network inbound (1) or outbound (2) - inbound = self.helper.deep_get(each_peer, 'network.inbound') - if inbound == True: - metric_peers_network_direction.add_metric([instance, instance_name, enode, enode_short, name], 1) - elif inbound == False: - metric_peers_network_direction.add_metric([instance, instance_name, enode, enode_short, name], 2) - - # 3. metric_peers_head_block - eth_difficulty = self.helper.deep_get(each_peer, 'protocols.eth.difficulty') - if eth_difficulty: - metric_peers_head_block.add_metric([instance, instance_name, enode, enode_short, name, 'eth'], eth_difficulty) - - istanbul_difficulty = self.helper.deep_get(each_peer, 'protocols.istanbul.difficulty') - if istanbul_difficulty: - metric_peers_head_block.add_metric([instance, instance_name, enode, enode_short, name, 'istanbul'], istanbul_difficulty) - - # Add metrics for all expected peers that are currenty NOT connected - for each_config_peer_enode in self.config.peers.keys(): - if not enodes_found.get(each_config_peer_enode, False): - enode = each_config_peer_enode - enode_short = enode[0:20] - # name = self.config.peers.get(enode, enode_short) - name = enode_short if enode not in self.config.peers else self.config.peers[enode].name - - # 1. metric_peers - # Set value (0) that enode is NOT found - metric_peers.add_metric([instance, instance_name, enode, enode_short, name], 0) - - # 2. metric_peers_network_direction - # Set network not connected (0) - metric_peers_network_direction.add_metric([instance, instance_name, enode, enode_short, name], 0) - - # 3. metric_peers_head_block - # WE DO NOT ADD THESE METRICS AS THEY DO NOT MAKE SENSE: - # Providing a value of zero (0) may also be a correct value, therefore we do not create head block metrics for non connected peers! - # metric_peers_head_block.add_metric([instance, instance_name, enode, enode_short, name, 'eth'], 0) - # metric_peers_head_block.add_metric([instance, instance_name, enode, enode_short, name, 'istanbul'], 0) - - # Set current metrics to be reported by CustomCollector in a single atomic operation - self.current_metrics = [metric_peers, metric_peers_network_direction, metric_peers_head_block] - - def process(self): - """Processes getting peers info and preparing metrics - - Args: - rpc_url (str): URL of Quorum nodes RPC endpoint - known_peers (list): A list of objects of the known peers. Each object contains 'company-name' and 'enode' - """ - # Get DNS name and IP address of current instance by taking a deeper look at the rpc_url - instance_dns_name = self.helper.getHostName(url=self.config.rpc_url) - - # We do not use the IP address of the RPC Endpoint as this may be the clusterIp of the service if it is not a headless service. - # Therefore we determine the instance from network.localAddress - # instance_ip = resolveIpAddress(dns_name=instance_dns_name) + ':9545' # default metrics also add port - - # Get data of all currently connected peers - peersData = self.getPeersData() - # Report metrics for this instance - self.createCurrentMetrics(instance=None, instance_name=instance_dns_name, peersData=peersData) diff --git a/source/utils/common.py b/source/utils/common.py new file mode 100644 index 0000000..a92725c --- /dev/null +++ b/source/utils/common.py @@ -0,0 +1,29 @@ +"""Abstraction for providing metrics from multiple providers. +See See https://github.com/prometheus/client_python#custom-collectors + +""" + +class IMetricsProvider: + """Interface for providing the current metrics + """ + def get_current_metrics(self) -> list: + """Get the current metrics + + Returns: + list: The current metrics + """ + +class CustomCollector: + """A custom Collector for reporting Metrics. + """ + + def __init__(self, metrics_provider:IMetricsProvider): + self.metrics_provider = metrics_provider + + def collect(self) -> list: + """Collects the current metrics from the IMetricsProvider + + Returns: + list: the current metrics of the MetricsProvider + """ + return self.metrics_provider.get_current_metrics() diff --git a/source/utils/Config.py b/source/utils/config.py similarity index 52% rename from source/utils/Config.py rename to source/utils/config.py index f364fcd..ee0ec82 100644 --- a/source/utils/Config.py +++ b/source/utils/config.py @@ -1,9 +1,13 @@ -from audioop import add +"""This module contains the Config and loading procedures for the config + +""" import logging +import json class Config: """Encapsulates the application configuration. """ + def __init__(self): self._rpc_url = None self._namespace = None @@ -30,17 +34,20 @@ def load(self, config_object) -> bool: self._rpc_url = config_object.get('rpc_url') if not self._rpc_url: - logging.error("'rpc_url' is not set in config. E.g. 'rpc_url': 'http://quorum-node-0.quorum:8545'") + logging.error( + "'rpc_url' is not set in config. E.g. 'rpc_url': 'http://quorum-node-0.quorum:8545'") return False self._namespace = config_object.get('namespace') if not self._namespace: - logging.error("'namespace' is not set in config. E.g. 'namespace': 'quorum'") + logging.error( + "'namespace' is not set in config. E.g. 'namespace': 'quorum'") return False self._deployment = config_object.get('deployment') if not self._deployment: - logging.error("'deployment' is not set in config. E.g. 'deployment': 'quorum'") + logging.error( + "'deployment' is not set in config. E.g. 'deployment': 'quorum'") return False peers = config_object.get('peers') @@ -55,11 +62,13 @@ def load(self, config_object) -> bool: port = each.get('enodeAddressPort') if enode is None or company_name is None or address is None or port is None: - logging.error("'enode', 'company-name', 'enodeAddress', 'enodeAddressPort' must be set at a peer object.") + logging.error( + "'enode', 'company-name', 'enodeAddress', 'enodeAddressPort' must be set at a peer object.") return False - - # Check is company_name is unique. If not, use company_name plus first 5 chars of enode - count_same_company_name = len(list(filter(lambda peer: peer['company-name'] == company_name, peers))) + + # Check if company_name is unique. If not, use company_name plus first 5 chars of enode + count_same_company_name = len(list(filter( + lambda peer: peer['company-name'] == company_name, peers))) # pylint: disable=W0640 name = None if count_same_company_name > 1: name = company_name + " (" + enode[0:5] + ")" @@ -72,23 +81,46 @@ def load(self, config_object) -> bool: @property def rpc_url(self) -> str: + """The RPC URL + + Returns: + str: The RPC URL + """ return self._rpc_url @property def peers(self) -> dict: + """A dictionary of peers + + Returns: + dict: dcitionary of peers. Key is the enode, value is of type PeerConfig + """ return self._peers @property def deployment(self) -> str: + """The Name of the K8S deployment of the Quorum node + + Returns: + str: Name of the K8S deployment + """ return self._deployment @property def namespace(self) -> str: + """The Namespace of the K8S deployment of the Quorum node + + Returns: + str: Namespace of the K8S deployment + """ return self._namespace class PeerConfig: - def __init__(self, name:str, enode:str, address:str, port: str): + """Peer Data + """ + + def __init__(self, name: str, enode: str, address: str, port: str): self._name = name self._enode = enode self._address = address @@ -96,16 +128,54 @@ def __init__(self, name:str, enode:str, address:str, port: str): @property def name(self) -> str: + """Name of the peer + + Returns: + str: Name of the peer + """ return self._name @property - def enode(self) -> dict: + def enode(self) -> str: + """Enode of the peer + + Returns: + str: Enode of the peer + """ return self._enode @property def address(self) -> str: + """IP Address of the peer + + Returns: + str: IP Address of the peer + """ return self._address @property def port(self) -> str: + """Port number of the peer + + Returns: + str: Port number of the peer + """ return self._port + +def load(filename:str = 'config.json') -> Config: + """Load the application configuration + + Args: + filename (str, optional): The configuration file name. Defaults to 'config.json'. + + Returns: + Config: The application configuration or None on error during load. + """ + with open(file=filename, mode='r', encoding='utf-8') as file: + config_object = json.load(file) + + config = Config() + if config.load(config_object) is True: + return config + + return None diff --git a/source/utils/Helper.py b/source/utils/helper.py similarity index 73% rename from source/utils/Helper.py rename to source/utils/helper.py index 5a291a0..82f35ed 100644 --- a/source/utils/Helper.py +++ b/source/utils/helper.py @@ -10,7 +10,7 @@ class Helper: """Helper class""" - def getHostName(self, url: str) -> str: + def get_host_name(self, url: str) -> str: """Get Hostname of an URL Args: @@ -23,14 +23,15 @@ def getHostName(self, url: str) -> str: parsed_url = urllib.parse.urlparse(url) if parsed_url and parsed_url.hostname: return parsed_url.hostname - - # Maybe it is not an absolute url: urlparse() and urlsplit() insists on absolute URLs starting with "//" + + # Maybe it is not an absolute url: + # urlparse() and urlsplit() insists on absolute URLs starting with "//" parsed_url = urllib.parse.urlparse('//' + url) if parsed_url and parsed_url.hostname: return parsed_url.hostname return None - def resolveIpAddress(self, dns_name: str) -> str: + def resolve_ip_address(self, dns_name: str) -> str: """Resolve the IP address of an DNS name Args: @@ -45,7 +46,7 @@ def resolveIpAddress(self, dns_name: str) -> str: logging.error("%s: %s", ex.strerror, dns_name) return None - def getEnode(self, enodeUrl: str) -> str: + def get_enode(self, enode_url: str) -> str: """Get 128 hex chars enode from enodeUrl Args: @@ -54,14 +55,15 @@ def getEnode(self, enodeUrl: str) -> str: Returns: str: The enode (128 hex chars) """ - if enodeUrl: - enodeList = enode_pattern.findall(enodeUrl) - if len(enodeList) > 0: - return enodeList[0] + if enode_url: + enode_list = enode_pattern.findall(enode_url) + if len(enode_list) > 0: + return enode_list[0] return None def deep_get(self, dictionary, keys, default=None): - """See https://stackoverflow.com/questions/25833613/safe-method-to-get-value-of-nested-dictionary + """See + https://stackoverflow.com/questions/25833613/safe-method-to-get-value-of-nested-dictionary Args: dictionary (_type_): _description_ @@ -71,4 +73,4 @@ def deep_get(self, dictionary, keys, default=None): Returns: _type_: _description_ """ - return reduce(lambda d, key: d.get(key, default) if isinstance(d, dict) else default, keys.split("."), dictionary) \ No newline at end of file + return reduce(lambda d, key: d.get(key, default) if isinstance(d, dict) else default, keys.split("."), dictionary) diff --git a/source/utils/KubeExecMetricsProvider.py b/source/utils/kube_exec_metrics_provider.py similarity index 73% rename from source/utils/KubeExecMetricsProvider.py rename to source/utils/kube_exec_metrics_provider.py index 2edc2b7..818036b 100644 --- a/source/utils/KubeExecMetricsProvider.py +++ b/source/utils/kube_exec_metrics_provider.py @@ -1,17 +1,18 @@ import logging -from .Config import Config, PeerConfig -from .Helper import Helper -from .IMetricsProvider import IMetricsProvider from kubernetes import config from kubernetes.client.api import core_v1_api, apps_v1_api from kubernetes.stream import stream from prometheus_client.core import GaugeMetricFamily +from .common import IMetricsProvider # pylint: disable=E0402 +from .config import Config, PeerConfig # pylint: disable=E0402 +from .helper import Helper # pylint: disable=E0402 + class KubeExecMetricsProvider(IMetricsProvider): """Executes commands via "kubectl exec" in remote pod and provides metrics """ - def __init__(self, config:Config): + def __init__(self, config: Config): # The current metrics to be reported on a Prometheus scrape. # Note: We cannot use "default" Gauge as the values remain even if the peer does not exists anymore. # Therefore we set fresh metrics every time we collect peers information. @@ -19,7 +20,7 @@ def __init__(self, config:Config): self._config = config self._helper = Helper() - def getCurrentMetrics(self) -> list: + def get_current_metrics(self) -> list: """Get the current metrics. Implementation of the IMetricsProvider Returns: @@ -27,7 +28,7 @@ def getCurrentMetrics(self) -> list: """ return self._current_metrics - def lookup_pod_name_and_ip(self) -> tuple: + def _lookup_pod_name_and_ip(self) -> tuple: """Looks up for the pod and its ip address by the deployment name and namespace from configuration Returns: @@ -63,13 +64,13 @@ def lookup_pod_name_and_ip(self) -> tuple: return (pod_list.items[0].metadata.name, pod_list.items[0].status.pod_ip) - def kube_exec_check_connectivity(self, pod_name:str, peer:PeerConfig): + def _kube_exec_check_connectivity(self, pod_name:str, peer:PeerConfig): """Checks the connectivity from within the pod to the peer Returns: _type_: True if connection could be established else False """ - shell_command = 'nc -z -w 1 {0} {1};echo -n $?'.format(peer.address, peer.port) + shell_command = f'nc -z -w 1 {peer.address} {peer.port};echo -n $?' logging.debug("%s >> shell_command=%s", type(self).__name__, shell_command) exec_command = [ '/bin/sh', @@ -83,14 +84,17 @@ def kube_exec_check_connectivity(self, pod_name:str, peer:PeerConfig): command=exec_command, stderr=True, stdin=False, stdout=True, tty=False, - _request_timeout=3) # do not use a tuple here, otherwise: kubernetes.client.exceptions.ApiException: (0) Reason: '<' not supported between instances of 'float' and 'tuple' + _request_timeout=3) + # do not use a tuple for _request_timeout, otherwise: + # kubernetes.client.exceptions.ApiException: + # (0) Reason: '<' not supported between instances of 'float' and 'tuple' - connection_successful = resp == '0' + connection_successful = resp == '0' # either '0' or '1' logging.debug("%s >> %s %s (%s:%s) ", type(self).__name__, 'OK: Connected to' if connection_successful else 'CANNOT connect to', peer.name, peer.address, peer.port) return connection_successful - def createCurrentMetrics(self, instance: str, instance_name: str, pod_name: str): - """Creates the current metrics + def create_current_metrics(self, instance: str, instance_name: str, pod_name: str): + """Creates the current metrics Args: instance (str): The instance name @@ -101,8 +105,11 @@ def createCurrentMetrics(self, instance: str, instance_name: str, pod_name: str) metrics = GaugeMetricFamily('quorum_tcp_egress_connectivity', 'Quorum TCP egress connectivity to other nodes by enode. (0) for no connectivity, (1) for connectivity can be established', labels=['instance', 'instance_name', 'enode', 'enode_short', 'name']) for each_config_peer in self._config.peers.values(): - connection_successful = self.kube_exec_check_connectivity(pod_name=pod_name, peer=each_config_peer) - metrics.add_metric([instance, instance_name, each_config_peer.enode, each_config_peer.enode[0:20], each_config_peer.name], (1 if connection_successful else 0)) + connection_successful = self._kube_exec_check_connectivity(pod_name=pod_name, peer=each_config_peer) + metrics.add_metric( + [instance, instance_name, each_config_peer.enode, + each_config_peer.enode[0:20], each_config_peer.name], + (1 if connection_successful else 0)) # Set current metrics to be reported by CustomCollector in a single atomic operation self._current_metrics = [metrics] @@ -114,16 +121,14 @@ def process(self): # 1. Use Incluster Config config.load_incluster_config() - # Get DNS name and IP address of current instance by taking a deeper look at the rpc_url - instance_dns_name = self._helper.getHostName(url=self._config.rpc_url) + # Get DNS name and use it as "pretty" instance name + instance_name = self._helper.get_host_name(url=self._config.rpc_url) - # We do not use the IP address of the RPC Endpoint as this may be the clusterIp of the service if it is not a headless service. - # Therefore we determine the instance from network.localAddress - # instance_ip = resolveIpAddress(dns_name=instance_dns_name) + ':9545' # default metrics also add port - pod_name, pod_ip = self.lookup_pod_name_and_ip() + pod_name, pod_ip = self._lookup_pod_name_and_ip() if pod_name is not None and pod_ip is not None: - self.createCurrentMetrics(instance="{0}:9545".format(pod_ip), instance_name=instance_dns_name, pod_name=pod_name) + instance = f"{pod_ip}:9545" + self.create_current_metrics(instance, instance_name, pod_name) else: - logging.warn("%s >> Cannot create metrics as pod_name or pod_ip is None - pod_name=%s, pod_ip=%s", type(self).__name__, pod_name, pod_ip) + logging.warning("%s >> Cannot create metrics as pod_name or pod_ip is None - pod_name=%s, pod_ip=%s", type(self).__name__, pod_name, pod_ip) \ No newline at end of file diff --git a/source/utils/rpc_metrics_provider.py b/source/utils/rpc_metrics_provider.py new file mode 100644 index 0000000..a9202b9 --- /dev/null +++ b/source/utils/rpc_metrics_provider.py @@ -0,0 +1,176 @@ +import logging +import requests + +from prometheus_client.core import GaugeMetricFamily +from requests.structures import CaseInsensitiveDict + +from .common import IMetricsProvider # pylint: disable=E0402 +from .config import Config # pylint: disable=E0402 +from .helper import Helper # pylint: disable=E0402 + +class RpcMetricsProvider(IMetricsProvider): + """Collects data from Quorum RPC API and provides metrics data. + """ + + def __init__(self, config: Config): + + # The current metrics to be reported on a Prometheus scrape. + # Note: We cannot use "default" Gauge as the values remain even if the peer does not exists anymore. + # Therefore we set fresh metrics every time we collect peers information. + self._current_metrics = [] + self._config = config + self._helper = Helper() + + def get_current_metrics(self) -> list: + """Get the current metrics. Implementation of the IMetricsProvider + + Returns: + list: The current metrics + """ + return self._current_metrics + + def _get_peers_data(self): + """Get data of the current peers by querying Quorum nodes RPC endpoint + + Returns: + _type_: _description_ + """ + headers = CaseInsensitiveDict() + headers["Content-Type"] = "application/json" + # https://getblock.io/docs/available-nodes-methods/ETH/JSON-RPC/admin_peers/ + # https://geth.ethereum.org/docs/rpc/ns-admin#admin_peers + # https://consensys.net/docs/goquorum/en/latest/develop/connecting-to-a-node/ + data = '{"jsonrpc": "2.0","method": "admin_peers","params": [],"id": "getblock.io"}' + response = requests.post(self._config.rpc_url, + headers=headers, data=data) + + if response.status_code == 200: + return response.json().get('result') + return [] + + def _create_current_metrics(self, instance_name: str, peers_data: list): + """Get current data and create metrics + + Args: + instance_name (str): _description_ + peersData (list): The current peers data queried from RPC endpoint + """ + if peers_data is None: + peers_data = [] + + logging.info("%s >> Creating metrics for %s peers - instance_name=%s, rpc_url=%s", + type(self).__name__, len(peers_data), instance_name, self._config.rpc_url) + + # Prepare current metrics + metric_peers = GaugeMetricFamily( + 'quorum_peers', 'Quorum peers by enode', + labels=['instance', 'instance_name', + 'enode', 'enode_short', 'name']) + metric_peers_network_direction = GaugeMetricFamily( + 'quorum_peers_network_direction', 'Quorum peers network inbound (1) or outbound (2) by enode', + labels=['instance', 'instance_name', + 'enode', 'enode_short', 'name']) + metric_peers_head_block = GaugeMetricFamily( + 'quorum_peers_head_block', 'Quorum peers head block by enode and protocol eth or istanbul', + labels=['instance', 'instance_name', 'enode', + 'enode_short', 'name', 'protocol']) + + # A dict of all enodes currently connected as peers + enodes_found = {} + + for each_peer in peers_data: + # enodeUrl = "enode://[HERE IS THE 128 HEX-CHARS LONG ENODE]@1.2.3.4:30303?discport=0" + enode = self._helper.get_enode(enode_url=each_peer.get('enode')) + + # https://github.com/prometheus/client_python + if enode: + enode_short = enode[0:20] + # Remember that enode was found + enodes_found[enode] = True + + # If instance is not provided, we determine it from network.localAddress + instance = '' + local_address = self._helper.deep_get( + each_peer, 'network.localAddress') + if local_address: + instance = self._helper.get_host_name(local_address) + if instance: + instance = instance + ':9545' # add same port as Quorum Node default metrics also do + + # Get pretty name. If not defined use enode_short instead + name = enode_short if enode not in self._config.peers else self._config.peers[ + enode].name + + # 1. metric_peers + # Set value (1) that enode is found + metric_peers.add_metric( + [instance, instance_name, enode, enode_short, name], 1) + + # 2. metric_peers_network_direction + # Set network inbound (1) or outbound (2) + inbound = self._helper.deep_get(each_peer, 'network.inbound') + metric_peers_network_direction.add_metric( + [instance, instance_name, enode, enode_short, name], + 1 if inbound is True else 2) + + # 3. metric_peers_head_block + eth_difficulty = self._helper.deep_get( + each_peer, 'protocols.eth.difficulty') + if eth_difficulty: + metric_peers_head_block.add_metric( + [instance, instance_name, enode, enode_short, name, 'eth'], + eth_difficulty) + + istanbul_difficulty = self._helper.deep_get( + each_peer, 'protocols.istanbul.difficulty') + if istanbul_difficulty: + metric_peers_head_block.add_metric( + [instance, instance_name, enode, + enode_short, name, 'istanbul'], + istanbul_difficulty) + + # Add metrics for all expected peers that are currenty NOT connected + for each_config_peer_enode in self._config.peers.keys(): + if not enodes_found.get(each_config_peer_enode, False): + enode = each_config_peer_enode + enode_short = enode[0:20] + # name = self.config.peers.get(enode, enode_short) + name = enode_short if enode not in self._config.peers else self._config.peers[ + enode].name + + # 1. metric_peers + # Set value (0) that enode is NOT found + metric_peers.add_metric( + [instance, instance_name, enode, enode_short, name], 0) + + # 2. metric_peers_network_direction + # Set network not connected (0) + metric_peers_network_direction.add_metric( + [instance, instance_name, enode, enode_short, name], 0) + + # 3. metric_peers_head_block + # WE DO NOT ADD THESE METRICS AS THEY DO NOT MAKE SENSE: + # Providing a value of zero (0) may also be a correct value, therefore we do not create head block metrics for non connected peers! + # metric_peers_head_block.add_metric([instance, instance_name, enode, enode_short, name, 'eth'], 0) + # metric_peers_head_block.add_metric([instance, instance_name, enode, enode_short, name, 'istanbul'], 0) + + # Set current metrics to be reported by CustomCollector in a single atomic operation + self._current_metrics = [ + metric_peers, metric_peers_network_direction, metric_peers_head_block] + + def process(self): + """Processes getting peers info and preparing metrics + + Args: + rpc_url (str): URL of Quorum nodes RPC endpoint + known_peers (list): A list of objects of the known peers. Each object contains 'company-name' and 'enode' + """ + # Get DNS name and use it as "pretty" instance name + instance_name = self._helper.get_host_name(url=self._config.rpc_url) + + # Get data of all currently connected peers + peers_data = self._get_peers_data() + + # Report metrics for this instance + self._create_current_metrics( + instance_name=instance_name, peers_data=peers_data) From 53b038c3cc42e4ea9310931d7d52ed25acc2dee2 Mon Sep 17 00:00:00 2001 From: Torsten Gippert <46532698+tgip-work@users.noreply.github.com> Date: Wed, 14 Sep 2022 14:52:28 +0200 Subject: [PATCH 04/11] cleanup and refactor --- source/main.py | 13 +- source/utils/kube_exec_metrics_provider.py | 104 ++++++++----- source/utils/rpc_metrics_provider.py | 167 +++++++++++---------- 3 files changed, 163 insertions(+), 121 deletions(-) diff --git a/source/main.py b/source/main.py index 4b6d15c..46a1b97 100644 --- a/source/main.py +++ b/source/main.py @@ -11,7 +11,12 @@ from utils.kube_exec_metrics_provider import KubeExecMetricsProvider from utils.rpc_metrics_provider import RpcMetricsProvider -if __name__ == '__main__': +def main() -> int: + """Main + + Returns: + int: Return code + """ logging.basicConfig( format='%(levelname)s: %(message)s', level=logging.INFO) @@ -19,7 +24,7 @@ SLEEP_TIME = 10.0 config = utils.config.load() if config is None: - sys.exit(1) + return 1 # Init MetricsProviders and register CustomCollectors rpc_metrics_provider = RpcMetricsProvider(config) @@ -46,3 +51,7 @@ quit_event.wait(timeout=SLEEP_TIME) logging.info("Leaving - quit_event.is_set()=%s", quit_event.is_set()) + return 0 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/source/utils/kube_exec_metrics_provider.py b/source/utils/kube_exec_metrics_provider.py index 818036b..273e394 100644 --- a/source/utils/kube_exec_metrics_provider.py +++ b/source/utils/kube_exec_metrics_provider.py @@ -1,20 +1,23 @@ +"""Provides Metrics collected via executing command in the Quorum pod via "kubectl exec" +""" import logging -from kubernetes import config +import kubernetes from kubernetes.client.api import core_v1_api, apps_v1_api from kubernetes.stream import stream from prometheus_client.core import GaugeMetricFamily -from .common import IMetricsProvider # pylint: disable=E0402 -from .config import Config, PeerConfig # pylint: disable=E0402 -from .helper import Helper # pylint: disable=E0402 +from .common import IMetricsProvider # pylint: disable=E0402 +from .config import Config, PeerConfig # pylint: disable=E0402 +from .helper import Helper # pylint: disable=E0402 class KubeExecMetricsProvider(IMetricsProvider): """Executes commands via "kubectl exec" in remote pod and provides metrics """ + def __init__(self, config: Config): # The current metrics to be reported on a Prometheus scrape. - # Note: We cannot use "default" Gauge as the values remain even if the peer does not exists anymore. + # We cannot use "default" Gauge as the values remain even if the peer does not exist anymore # Therefore we set fresh metrics every time we collect peers information. self._current_metrics = [] self._config = config @@ -29,7 +32,8 @@ def get_current_metrics(self) -> list: return self._current_metrics def _lookup_pod_name_and_ip(self) -> tuple: - """Looks up for the pod and its ip address by the deployment name and namespace from configuration + """Looks up for the pod and its ip address by configuration settings, + e.g. by deployment name and namespace. Returns: tuple: pod name and ip address @@ -39,39 +43,51 @@ def _lookup_pod_name_and_ip(self) -> tuple: # Determine Deployment and get labels selectors # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1Deployment.md - logging.debug("%s >> Getting deployment - name=%s, namespace=%s", type(self).__name__, self._config.deployment, self._config.namespace) - deployment = apps_v1.read_namespaced_deployment(name=self._config.deployment, namespace=self._config.namespace, _request_timeout=(1,2)) + logging.debug("%s >> Getting deployment - name=%s, namespace=%s", + type(self).__name__, self._config.deployment, self._config.namespace) + deployment = apps_v1.read_namespaced_deployment( + name=self._config.deployment, namespace=self._config.namespace, _request_timeout=(1, 2)) if deployment is None: - logging.error("%s >> Deployment not found - name=%s, namespace=%s", type(self).__name__, self._config.deployment, self._config.namespace) + logging.error("%s >> Deployment not found - name=%s, namespace=%s", + type(self).__name__, self._config.deployment, self._config.namespace) return (None, None) # Format Label Selectors into key1=value1,key2=value2,... format deployment_label_selector = deployment.spec.selector - label_selector_string = ','.join(map(lambda key: "{0}={1}".format(key, deployment_label_selector.match_labels[key]), deployment_label_selector.match_labels.keys())) - logging.debug("%s >> Deployment found - label_selector_string=%s", type(self).__name__, label_selector_string) + label_selector_string = ','.join(map( + lambda key: f'{key}={deployment_label_selector.match_labels[key]}', + deployment_label_selector.match_labels.keys())) + logging.debug("%s >> Deployment found - label_selector_string=%s", + type(self).__name__, label_selector_string) # Get pods by selector - pod_list = core_v1.list_namespaced_pod(namespace=self._config.namespace, label_selector=label_selector_string, watch=False, _request_timeout=(1,2)) + pod_list = core_v1.list_namespaced_pod(namespace=self._config.namespace, + label_selector=label_selector_string, watch=False, _request_timeout=(1, 2)) + if pod_list is None or len(pod_list.items) == 0: - logging.warning("%s >> No pods found - label_selector=%s, namespace=%s", type(self).__name__, label_selector_string, self._config.namespace) + logging.warning("%s >> No pods found - label_selector=%s, namespace=%s", + type(self).__name__, label_selector_string, self._config.namespace) return (None, None) if len(pod_list.items) > 1: - logging.warning("%s >> More than one pod found - label_selector=%s, namespace=%s", type(self).__name__, label_selector_string, self._config.namespace) + logging.warning("%s >> More than one pod found - label_selector=%s, namespace=%s", + type(self).__name__, label_selector_string, self._config.namespace) for each_pod in pod_list.items: - logging.info("%s >> Pod found - name=%s", type(self).__name__, each_pod.metadata.name) + logging.info("%s >> Pod found - name=%s", + type(self).__name__, each_pod.metadata.name) return (None, None) return (pod_list.items[0].metadata.name, pod_list.items[0].status.pod_ip) - def _kube_exec_check_connectivity(self, pod_name:str, peer:PeerConfig): + def _kube_exec_check_connectivity(self, pod_name: str, peer: PeerConfig): """Checks the connectivity from within the pod to the peer Returns: _type_: True if connection could be established else False """ shell_command = f'nc -z -w 1 {peer.address} {peer.port};echo -n $?' - logging.debug("%s >> shell_command=%s", type(self).__name__, shell_command) + logging.debug("%s >> shell_command=%s", + type(self).__name__, shell_command) exec_command = [ '/bin/sh', '-c', @@ -79,18 +95,21 @@ def _kube_exec_check_connectivity(self, pod_name:str, peer:PeerConfig): core_v1 = core_v1_api.CoreV1Api() resp = stream(core_v1.connect_get_namespaced_pod_exec, - name=pod_name, - namespace=self._config.namespace, - command=exec_command, - stderr=True, stdin=False, - stdout=True, tty=False, - _request_timeout=3) - # do not use a tuple for _request_timeout, otherwise: - # kubernetes.client.exceptions.ApiException: - # (0) Reason: '<' not supported between instances of 'float' and 'tuple' - - connection_successful = resp == '0' # either '0' or '1' - logging.debug("%s >> %s %s (%s:%s) ", type(self).__name__, 'OK: Connected to' if connection_successful else 'CANNOT connect to', peer.name, peer.address, peer.port) + name=pod_name, + namespace=self._config.namespace, + command=exec_command, + stderr=True, stdin=False, + stdout=True, tty=False, + _request_timeout=3) + # do not use a tuple for _request_timeout, otherwise: + # kubernetes.client.exceptions.ApiException: + # (0) Reason: '<' not supported between instances of 'float' and 'tuple' + + connection_successful = resp == '0' # either '0' or '1' + logging.debug("%s >> %s %s (%s:%s) ", type(self).__name__, + 'OK: Connected to' if connection_successful else 'CANNOT connect to', + peer.name, peer.address, peer.port) + return connection_successful def create_current_metrics(self, instance: str, instance_name: str, pod_name: str): @@ -101,15 +120,20 @@ def create_current_metrics(self, instance: str, instance_name: str, pod_name: st instance_name (str): A pretty instance name pod_name (str): The pod name """ - logging.info("%s >> Creating metrics for %s peers - instance=%s, instance_name=%s, pod_name=%s, namespace=%s", type(self).__name__, len(self._config.peers.keys()), instance, instance_name, pod_name, self._config.namespace) + logging.info("%s >> Creating metrics for %s peers - instance=%s, instance_name=%s, pod_name=%s, namespace=%s", + type(self).__name__, len(self._config.peers.keys()), + instance, instance_name, pod_name, self._config.namespace) + + metrics = GaugeMetricFamily('quorum_tcp_egress_connectivity', + 'Quorum TCP egress connectivity to other nodes by enode. (0) for no connectivity, (1) for connectivity can be established', + labels=['instance', 'instance_name', 'enode', 'enode_short', 'name']) - metrics = GaugeMetricFamily('quorum_tcp_egress_connectivity', 'Quorum TCP egress connectivity to other nodes by enode. (0) for no connectivity, (1) for connectivity can be established', labels=['instance', 'instance_name', 'enode', 'enode_short', 'name']) for each_config_peer in self._config.peers.values(): - connection_successful = self._kube_exec_check_connectivity(pod_name=pod_name, peer=each_config_peer) - metrics.add_metric( - [instance, instance_name, each_config_peer.enode, + connection_successful = self._kube_exec_check_connectivity( + pod_name=pod_name, peer=each_config_peer) + metrics.add_metric([instance, instance_name, each_config_peer.enode, each_config_peer.enode[0:20], each_config_peer.name], - (1 if connection_successful else 0)) + 1 if connection_successful else 0) # Set current metrics to be reported by CustomCollector in a single atomic operation self._current_metrics = [metrics] @@ -118,17 +142,17 @@ def process(self): """Processes getting information and preparing metrics """ - # 1. Use Incluster Config - config.load_incluster_config() + # Use Incluster Config + kubernetes.config.load_incluster_config() # Get DNS name and use it as "pretty" instance name instance_name = self._helper.get_host_name(url=self._config.rpc_url) - + # Retrieve pods name and ip pod_name, pod_ip = self._lookup_pod_name_and_ip() if pod_name is not None and pod_ip is not None: instance = f"{pod_ip}:9545" self.create_current_metrics(instance, instance_name, pod_name) else: - logging.warning("%s >> Cannot create metrics as pod_name or pod_ip is None - pod_name=%s, pod_ip=%s", type(self).__name__, pod_name, pod_ip) - \ No newline at end of file + logging.warning("%s >> Cannot create metrics as pod_name or pod_ip is None - pod_name=%s, pod_ip=%s", + type(self).__name__, pod_name, pod_ip) diff --git a/source/utils/rpc_metrics_provider.py b/source/utils/rpc_metrics_provider.py index a9202b9..f444178 100644 --- a/source/utils/rpc_metrics_provider.py +++ b/source/utils/rpc_metrics_provider.py @@ -1,3 +1,5 @@ +"""Provides Metrics collected via the Quorum RPC endpoint +""" import logging import requests @@ -13,10 +15,6 @@ class RpcMetricsProvider(IMetricsProvider): """ def __init__(self, config: Config): - - # The current metrics to be reported on a Prometheus scrape. - # Note: We cannot use "default" Gauge as the values remain even if the peer does not exists anymore. - # Therefore we set fresh metrics every time we collect peers information. self._current_metrics = [] self._config = config self._helper = Helper() @@ -61,7 +59,9 @@ def _create_current_metrics(self, instance_name: str, peers_data: list): logging.info("%s >> Creating metrics for %s peers - instance_name=%s, rpc_url=%s", type(self).__name__, len(peers_data), instance_name, self._config.rpc_url) - # Prepare current metrics + # The current metrics to be reported on a Prometheus scrape. + # We cannot use "default" Gauge as the values remain even if the peer does not exist anymore + # Therefore we set fresh metrics every time we collect peers information. metric_peers = GaugeMetricFamily( 'quorum_peers', 'Quorum peers by enode', labels=['instance', 'instance_name', @@ -76,88 +76,97 @@ def _create_current_metrics(self, instance_name: str, peers_data: list): 'enode_short', 'name', 'protocol']) # A dict of all enodes currently connected as peers - enodes_found = {} + enodes_connected = {} + # Add metrics for all connected peers for each_peer in peers_data: - # enodeUrl = "enode://[HERE IS THE 128 HEX-CHARS LONG ENODE]@1.2.3.4:30303?discport=0" - enode = self._helper.get_enode(enode_url=each_peer.get('enode')) - - # https://github.com/prometheus/client_python - if enode: - enode_short = enode[0:20] - # Remember that enode was found - enodes_found[enode] = True - - # If instance is not provided, we determine it from network.localAddress - instance = '' - local_address = self._helper.deep_get( - each_peer, 'network.localAddress') - if local_address: - instance = self._helper.get_host_name(local_address) - if instance: - instance = instance + ':9545' # add same port as Quorum Node default metrics also do - - # Get pretty name. If not defined use enode_short instead - name = enode_short if enode not in self._config.peers else self._config.peers[ - enode].name - - # 1. metric_peers - # Set value (1) that enode is found - metric_peers.add_metric( - [instance, instance_name, enode, enode_short, name], 1) - - # 2. metric_peers_network_direction - # Set network inbound (1) or outbound (2) - inbound = self._helper.deep_get(each_peer, 'network.inbound') - metric_peers_network_direction.add_metric( - [instance, instance_name, enode, enode_short, name], - 1 if inbound is True else 2) - - # 3. metric_peers_head_block - eth_difficulty = self._helper.deep_get( - each_peer, 'protocols.eth.difficulty') - if eth_difficulty: - metric_peers_head_block.add_metric( - [instance, instance_name, enode, enode_short, name, 'eth'], - eth_difficulty) - - istanbul_difficulty = self._helper.deep_get( - each_peer, 'protocols.istanbul.difficulty') - if istanbul_difficulty: - metric_peers_head_block.add_metric( - [instance, instance_name, enode, - enode_short, name, 'istanbul'], - istanbul_difficulty) - - # Add metrics for all expected peers that are currenty NOT connected + enode = self._set_metrics_for_connected_peer(each_peer, instance_name, metric_peers, metric_peers_network_direction, metric_peers_head_block) + if enode is not None: + enodes_connected[enode] = True + + # Add metrics for all configured/expected peers that are currenty NOT connected for each_config_peer_enode in self._config.peers.keys(): - if not enodes_found.get(each_config_peer_enode, False): - enode = each_config_peer_enode - enode_short = enode[0:20] - # name = self.config.peers.get(enode, enode_short) - name = enode_short if enode not in self._config.peers else self._config.peers[ - enode].name - - # 1. metric_peers - # Set value (0) that enode is NOT found - metric_peers.add_metric( - [instance, instance_name, enode, enode_short, name], 0) - - # 2. metric_peers_network_direction - # Set network not connected (0) - metric_peers_network_direction.add_metric( - [instance, instance_name, enode, enode_short, name], 0) - - # 3. metric_peers_head_block - # WE DO NOT ADD THESE METRICS AS THEY DO NOT MAKE SENSE: - # Providing a value of zero (0) may also be a correct value, therefore we do not create head block metrics for non connected peers! - # metric_peers_head_block.add_metric([instance, instance_name, enode, enode_short, name, 'eth'], 0) - # metric_peers_head_block.add_metric([instance, instance_name, enode, enode_short, name, 'istanbul'], 0) + if each_config_peer_enode not in enodes_connected: +# enodes_connected.get(each_config_peer_enode, False) is False: + self._set_metrics_for_expected_but_unconnected_peer(each_config_peer_enode, instance_name, metric_peers, metric_peers_network_direction) # Set current metrics to be reported by CustomCollector in a single atomic operation self._current_metrics = [ metric_peers, metric_peers_network_direction, metric_peers_head_block] + def _set_metrics_for_connected_peer(self, each_peer, instance_name, metric_peers, metric_peers_network_direction, metric_peers_head_block) -> str: + # enodeUrl = "enode://[HERE IS THE 128 HEX-CHARS LONG ENODE]@1.2.3.4:30303?discport=0" + enode = self._helper.get_enode(enode_url=each_peer.get('enode')) + + if enode is None: + return None + + enode_short = enode[0:20] + + # If instance is not provided, we determine it from network.localAddress + instance = '' + local_address = self._helper.deep_get( + each_peer, 'network.localAddress') + if local_address: + instance = self._helper.get_host_name(local_address) + if instance: + instance = instance + ':9545' # add same port as Quorum Node default metrics also do + + # Get pretty name. If not defined use enode_short instead + name = enode_short if enode not in self._config.peers else self._config.peers[ + enode].name + + # 1. metric_peers + # Set value (1) that enode is found + metric_peers.add_metric( + [instance, instance_name, enode, enode_short, name], 1) + + # 2. metric_peers_network_direction + # Set network inbound (1) or outbound (2) + inbound = self._helper.deep_get(each_peer, 'network.inbound') + metric_peers_network_direction.add_metric( + [instance, instance_name, enode, enode_short, name], + 1 if inbound is True else 2) + + # 3. metric_peers_head_block + eth_difficulty = self._helper.deep_get( + each_peer, 'protocols.eth.difficulty') + if eth_difficulty: + metric_peers_head_block.add_metric( + [instance, instance_name, enode, enode_short, name, 'eth'], + eth_difficulty) + + istanbul_difficulty = self._helper.deep_get( + each_peer, 'protocols.istanbul.difficulty') + if istanbul_difficulty: + metric_peers_head_block.add_metric( + [instance, instance_name, enode, + enode_short, name, 'istanbul'], + istanbul_difficulty) + + return enode + + def _set_metrics_for_expected_but_unconnected_peer(self, enode, instance_name, metric_peers, metric_peers_network_direction) -> str: + instance = '' + enode_short = enode[0:20] + name = self._config.peers[enode].name + + # 1. metric_peers + # Set value (0) that enode is NOT found + metric_peers.add_metric( + [instance, instance_name, enode, enode_short, name], 0) + + # 2. metric_peers_network_direction + # Set network not connected (0) + metric_peers_network_direction.add_metric( + [instance, instance_name, enode, enode_short, name], 0) + + # 3. metric_peers_head_block + # WE DO NOT ADD THESE METRICS AS THEY DO NOT MAKE SENSE: + # Providing a value of zero (0) may also be a correct value, therefore we do not create head block metrics for non connected peers! + # metric_peers_head_block.add_metric([instance, instance_name, enode, enode_short, name, 'eth'], 0) + # metric_peers_head_block.add_metric([instance, instance_name, enode, enode_short, name, 'istanbul'], 0) + def process(self): """Processes getting peers info and preparing metrics From b6ab669850628c49e03e731094ee262f65c220b9 Mon Sep 17 00:00:00 2001 From: Torsten Gippert <46532698+tgip-work@users.noreply.github.com> Date: Wed, 14 Sep 2022 14:54:04 +0200 Subject: [PATCH 05/11] format doc --- source/utils/common.py | 5 ++++- source/utils/config.py | 4 +++- source/utils/helper.py | 1 + source/utils/kube_exec_metrics_provider.py | 19 ++++++++++--------- source/utils/rpc_metrics_provider.py | 14 ++++++++------ 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/source/utils/common.py b/source/utils/common.py index a92725c..b03a695 100644 --- a/source/utils/common.py +++ b/source/utils/common.py @@ -3,9 +3,11 @@ """ + class IMetricsProvider: """Interface for providing the current metrics """ + def get_current_metrics(self) -> list: """Get the current metrics @@ -13,11 +15,12 @@ def get_current_metrics(self) -> list: list: The current metrics """ + class CustomCollector: """A custom Collector for reporting Metrics. """ - def __init__(self, metrics_provider:IMetricsProvider): + def __init__(self, metrics_provider: IMetricsProvider): self.metrics_provider = metrics_provider def collect(self) -> list: diff --git a/source/utils/config.py b/source/utils/config.py index ee0ec82..cf492a5 100644 --- a/source/utils/config.py +++ b/source/utils/config.py @@ -4,6 +4,7 @@ import logging import json + class Config: """Encapsulates the application configuration. """ @@ -162,7 +163,8 @@ def port(self) -> str: """ return self._port -def load(filename:str = 'config.json') -> Config: + +def load(filename: str = 'config.json') -> Config: """Load the application configuration Args: diff --git a/source/utils/helper.py b/source/utils/helper.py index 82f35ed..48f4cd6 100644 --- a/source/utils/helper.py +++ b/source/utils/helper.py @@ -7,6 +7,7 @@ # The regex pattern to extract the 128 hex chars enode from enode url enode_pattern = re.compile(r'[0-9a-fA-F]{128}') + class Helper: """Helper class""" diff --git a/source/utils/kube_exec_metrics_provider.py b/source/utils/kube_exec_metrics_provider.py index 273e394..0c80cad 100644 --- a/source/utils/kube_exec_metrics_provider.py +++ b/source/utils/kube_exec_metrics_provider.py @@ -11,6 +11,7 @@ from .config import Config, PeerConfig # pylint: disable=E0402 from .helper import Helper # pylint: disable=E0402 + class KubeExecMetricsProvider(IMetricsProvider): """Executes commands via "kubectl exec" in remote pod and provides metrics """ @@ -55,14 +56,14 @@ def _lookup_pod_name_and_ip(self) -> tuple: # Format Label Selectors into key1=value1,key2=value2,... format deployment_label_selector = deployment.spec.selector label_selector_string = ','.join(map( - lambda key: f'{key}={deployment_label_selector.match_labels[key]}', - deployment_label_selector.match_labels.keys())) + lambda key: f'{key}={deployment_label_selector.match_labels[key]}', + deployment_label_selector.match_labels.keys())) logging.debug("%s >> Deployment found - label_selector_string=%s", type(self).__name__, label_selector_string) # Get pods by selector pod_list = core_v1.list_namespaced_pod(namespace=self._config.namespace, - label_selector=label_selector_string, watch=False, _request_timeout=(1, 2)) + label_selector=label_selector_string, watch=False, _request_timeout=(1, 2)) if pod_list is None or len(pod_list.items) == 0: logging.warning("%s >> No pods found - label_selector=%s, namespace=%s", @@ -107,8 +108,8 @@ def _kube_exec_check_connectivity(self, pod_name: str, peer: PeerConfig): connection_successful = resp == '0' # either '0' or '1' logging.debug("%s >> %s %s (%s:%s) ", type(self).__name__, - 'OK: Connected to' if connection_successful else 'CANNOT connect to', - peer.name, peer.address, peer.port) + 'OK: Connected to' if connection_successful else 'CANNOT connect to', + peer.name, peer.address, peer.port) return connection_successful @@ -125,15 +126,15 @@ def create_current_metrics(self, instance: str, instance_name: str, pod_name: st instance, instance_name, pod_name, self._config.namespace) metrics = GaugeMetricFamily('quorum_tcp_egress_connectivity', - 'Quorum TCP egress connectivity to other nodes by enode. (0) for no connectivity, (1) for connectivity can be established', - labels=['instance', 'instance_name', 'enode', 'enode_short', 'name']) + 'Quorum TCP egress connectivity to other nodes by enode. (0) for no connectivity, (1) for connectivity can be established', + labels=['instance', 'instance_name', 'enode', 'enode_short', 'name']) for each_config_peer in self._config.peers.values(): connection_successful = self._kube_exec_check_connectivity( pod_name=pod_name, peer=each_config_peer) metrics.add_metric([instance, instance_name, each_config_peer.enode, - each_config_peer.enode[0:20], each_config_peer.name], - 1 if connection_successful else 0) + each_config_peer.enode[0:20], each_config_peer.name], + 1 if connection_successful else 0) # Set current metrics to be reported by CustomCollector in a single atomic operation self._current_metrics = [metrics] diff --git a/source/utils/rpc_metrics_provider.py b/source/utils/rpc_metrics_provider.py index f444178..da7e635 100644 --- a/source/utils/rpc_metrics_provider.py +++ b/source/utils/rpc_metrics_provider.py @@ -6,9 +6,10 @@ from prometheus_client.core import GaugeMetricFamily from requests.structures import CaseInsensitiveDict -from .common import IMetricsProvider # pylint: disable=E0402 -from .config import Config # pylint: disable=E0402 -from .helper import Helper # pylint: disable=E0402 +from .common import IMetricsProvider # pylint: disable=E0402 +from .config import Config # pylint: disable=E0402 +from .helper import Helper # pylint: disable=E0402 + class RpcMetricsProvider(IMetricsProvider): """Collects data from Quorum RPC API and provides metrics data. @@ -80,15 +81,16 @@ def _create_current_metrics(self, instance_name: str, peers_data: list): # Add metrics for all connected peers for each_peer in peers_data: - enode = self._set_metrics_for_connected_peer(each_peer, instance_name, metric_peers, metric_peers_network_direction, metric_peers_head_block) + enode = self._set_metrics_for_connected_peer( + each_peer, instance_name, metric_peers, metric_peers_network_direction, metric_peers_head_block) if enode is not None: enodes_connected[enode] = True # Add metrics for all configured/expected peers that are currenty NOT connected for each_config_peer_enode in self._config.peers.keys(): if each_config_peer_enode not in enodes_connected: -# enodes_connected.get(each_config_peer_enode, False) is False: - self._set_metrics_for_expected_but_unconnected_peer(each_config_peer_enode, instance_name, metric_peers, metric_peers_network_direction) + self._set_metrics_for_expected_but_unconnected_peer( + each_config_peer_enode, instance_name, metric_peers, metric_peers_network_direction) # Set current metrics to be reported by CustomCollector in a single atomic operation self._current_metrics = [ From 993b2998be92222ebe6b306e57fd61a801300e5a Mon Sep 17 00:00:00 2001 From: Torsten Gippert <46532698+tgip-work@users.noreply.github.com> Date: Wed, 14 Sep 2022 15:20:15 +0200 Subject: [PATCH 06/11] doc --- source/utils/helper.py | 6 +-- source/utils/kube_exec_metrics_provider.py | 1 + source/utils/rpc_metrics_provider.py | 54 ++++++++++++++-------- 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/source/utils/helper.py b/source/utils/helper.py index 48f4cd6..99fe6fc 100644 --- a/source/utils/helper.py +++ b/source/utils/helper.py @@ -47,11 +47,11 @@ def resolve_ip_address(self, dns_name: str) -> str: logging.error("%s: %s", ex.strerror, dns_name) return None - def get_enode(self, enode_url: str) -> str: - """Get 128 hex chars enode from enodeUrl + def get_enode_from_url(self, enode_url: str) -> str: + """Get 128 hex chars enode from enode_url Args: - enodeUrl (str): The enode URL, e.g. enode://632176321637217632721@1.2.3.4:30303 + enode_url (str): The enode URL, e.g. enode://632176321637217632721@1.2.3.4:30303 Returns: str: The enode (128 hex chars) diff --git a/source/utils/kube_exec_metrics_provider.py b/source/utils/kube_exec_metrics_provider.py index 0c80cad..97984bc 100644 --- a/source/utils/kube_exec_metrics_provider.py +++ b/source/utils/kube_exec_metrics_provider.py @@ -152,6 +152,7 @@ def process(self): pod_name, pod_ip = self._lookup_pod_name_and_ip() if pod_name is not None and pod_ip is not None: + # add same port as Quorum Node default metrics also do instance = f"{pod_ip}:9545" self.create_current_metrics(instance, instance_name, pod_name) else: diff --git a/source/utils/rpc_metrics_provider.py b/source/utils/rpc_metrics_provider.py index da7e635..6adc91c 100644 --- a/source/utils/rpc_metrics_provider.py +++ b/source/utils/rpc_metrics_provider.py @@ -82,7 +82,8 @@ def _create_current_metrics(self, instance_name: str, peers_data: list): # Add metrics for all connected peers for each_peer in peers_data: enode = self._set_metrics_for_connected_peer( - each_peer, instance_name, metric_peers, metric_peers_network_direction, metric_peers_head_block) + each_peer, instance_name, metric_peers, + metric_peers_network_direction, metric_peers_head_block) if enode is not None: enodes_connected[enode] = True @@ -90,33 +91,51 @@ def _create_current_metrics(self, instance_name: str, peers_data: list): for each_config_peer_enode in self._config.peers.keys(): if each_config_peer_enode not in enodes_connected: self._set_metrics_for_expected_but_unconnected_peer( - each_config_peer_enode, instance_name, metric_peers, metric_peers_network_direction) + each_config_peer_enode, instance_name, + metric_peers, metric_peers_network_direction) # Set current metrics to be reported by CustomCollector in a single atomic operation self._current_metrics = [ metric_peers, metric_peers_network_direction, metric_peers_head_block] - def _set_metrics_for_connected_peer(self, each_peer, instance_name, metric_peers, metric_peers_network_direction, metric_peers_head_block) -> str: - # enodeUrl = "enode://[HERE IS THE 128 HEX-CHARS LONG ENODE]@1.2.3.4:30303?discport=0" - enode = self._helper.get_enode(enode_url=each_peer.get('enode')) + def _set_metrics_for_connected_peer(self, each_peer, instance_name: str, + metric_peers: GaugeMetricFamily, + metric_peers_network_direction: GaugeMetricFamily, + metric_peers_head_block: GaugeMetricFamily) -> str: + """Sets the metrics for a connected + Args: + each_peer (_type_): The data of the connected peer + instance_name (str): The instance name + metric_peers (GaugeMetricFamily): The metrics if a peer is connected or not + metric_peers_network_direction (GaugeMetricFamily): Metrics in- or outbound connected + metric_peers_head_block (GaugeMetricFamily): The metrics for the head block of the peer + + Returns: + str: The enode or None if enode cannot be determined + """ + # enode_url = "enode://[HERE IS THE 128 HEX-CHARS LONG ENODE]@1.2.3.4:30303?discport=0" + enode = self._helper.get_enode_from_url(enode_url=each_peer.get('enode')) if enode is None: return None + # + # Prepare metric labels: + # enode_short = enode[0:20] + # Get pretty name. If not defined use enode_short instead + name = enode_short + if enode in self._config.peers: + name= self._config.peers[enode].name - # If instance is not provided, we determine it from network.localAddress instance = '' local_address = self._helper.deep_get( each_peer, 'network.localAddress') if local_address: instance = self._helper.get_host_name(local_address) if instance: - instance = instance + ':9545' # add same port as Quorum Node default metrics also do - - # Get pretty name. If not defined use enode_short instead - name = enode_short if enode not in self._config.peers else self._config.peers[ - enode].name + # add same port as Quorum Node default metrics also do + instance = instance + ':9545' # 1. metric_peers # Set value (1) that enode is found @@ -148,7 +167,9 @@ def _set_metrics_for_connected_peer(self, each_peer, instance_name, metric_peers return enode - def _set_metrics_for_expected_but_unconnected_peer(self, enode, instance_name, metric_peers, metric_peers_network_direction) -> str: + def _set_metrics_for_expected_but_unconnected_peer(self, enode: str, instance_name: str, + metric_peers: GaugeMetricFamily, + metric_peers_network_direction: GaugeMetricFamily) -> str: instance = '' enode_short = enode[0:20] name = self._config.peers[enode].name @@ -165,16 +186,11 @@ def _set_metrics_for_expected_but_unconnected_peer(self, enode, instance_name, m # 3. metric_peers_head_block # WE DO NOT ADD THESE METRICS AS THEY DO NOT MAKE SENSE: - # Providing a value of zero (0) may also be a correct value, therefore we do not create head block metrics for non connected peers! - # metric_peers_head_block.add_metric([instance, instance_name, enode, enode_short, name, 'eth'], 0) - # metric_peers_head_block.add_metric([instance, instance_name, enode, enode_short, name, 'istanbul'], 0) + # Providing a value of zero (0) may also be a correct value, + # therefore we do not create head block metrics for non connected peers! def process(self): """Processes getting peers info and preparing metrics - - Args: - rpc_url (str): URL of Quorum nodes RPC endpoint - known_peers (list): A list of objects of the known peers. Each object contains 'company-name' and 'enode' """ # Get DNS name and use it as "pretty" instance name instance_name = self._helper.get_host_name(url=self._config.rpc_url) From e2ba1f8b7a2ad186eecd5691b562429bb8727efc Mon Sep 17 00:00:00 2001 From: Torsten Gippert <46532698+tgip-work@users.noreply.github.com> Date: Wed, 14 Sep 2022 16:27:54 +0200 Subject: [PATCH 07/11] rename and simplify --- source/main.py | 22 ++++++------- source/utils/common.py | 32 ------------------- ...ider.py => kube_exec_metrics_collector.py} | 15 +++++---- ...s_provider.py => rpc_metrics_collector.py} | 17 +++++----- 4 files changed, 28 insertions(+), 58 deletions(-) delete mode 100644 source/utils/common.py rename source/utils/{kube_exec_metrics_provider.py => kube_exec_metrics_collector.py} (94%) rename source/utils/{rpc_metrics_provider.py => rpc_metrics_collector.py} (95%) diff --git a/source/main.py b/source/main.py index 46a1b97..7c06921 100644 --- a/source/main.py +++ b/source/main.py @@ -1,3 +1,5 @@ +"""Main program +""" import logging import signal import sys @@ -7,9 +9,9 @@ from prometheus_client.core import REGISTRY import utils.config -from utils.common import CustomCollector -from utils.kube_exec_metrics_provider import KubeExecMetricsProvider -from utils.rpc_metrics_provider import RpcMetricsProvider +from utils.kube_exec_metrics_collector import KubeExecMetricsCollector +from utils.rpc_metrics_collector import RpcMetricsCollector + def main() -> int: """Main @@ -27,13 +29,11 @@ def main() -> int: return 1 # Init MetricsProviders and register CustomCollectors - rpc_metrics_provider = RpcMetricsProvider(config) - rpc_custom_collector = CustomCollector(rpc_metrics_provider) - REGISTRY.register(rpc_custom_collector) + rpc_metrics_collector = RpcMetricsCollector(config) + REGISTRY.register(rpc_metrics_collector) - kube_exec_metrics_provider = KubeExecMetricsProvider(config) - kube_exec_custom_collector = CustomCollector(kube_exec_metrics_provider) - REGISTRY.register(kube_exec_custom_collector) + kube_exec_metrics_collector = KubeExecMetricsCollector(config) + REGISTRY.register(kube_exec_metrics_collector) # Start up the server to expose the metrics. start_http_server(8000) @@ -45,8 +45,8 @@ def main() -> int: lambda *_args: (logging.info("SIGTERM received") and False) or quit_event.set()) while not quit_event.is_set(): logging.info("Preparing metrics") - rpc_metrics_provider.process() - kube_exec_metrics_provider.process() + rpc_metrics_collector.process() + kube_exec_metrics_collector.process() logging.info("Done. Sleeping for %s seconds", SLEEP_TIME) quit_event.wait(timeout=SLEEP_TIME) diff --git a/source/utils/common.py b/source/utils/common.py deleted file mode 100644 index b03a695..0000000 --- a/source/utils/common.py +++ /dev/null @@ -1,32 +0,0 @@ -"""Abstraction for providing metrics from multiple providers. -See See https://github.com/prometheus/client_python#custom-collectors - -""" - - -class IMetricsProvider: - """Interface for providing the current metrics - """ - - def get_current_metrics(self) -> list: - """Get the current metrics - - Returns: - list: The current metrics - """ - - -class CustomCollector: - """A custom Collector for reporting Metrics. - """ - - def __init__(self, metrics_provider: IMetricsProvider): - self.metrics_provider = metrics_provider - - def collect(self) -> list: - """Collects the current metrics from the IMetricsProvider - - Returns: - list: the current metrics of the MetricsProvider - """ - return self.metrics_provider.get_current_metrics() diff --git a/source/utils/kube_exec_metrics_provider.py b/source/utils/kube_exec_metrics_collector.py similarity index 94% rename from source/utils/kube_exec_metrics_provider.py rename to source/utils/kube_exec_metrics_collector.py index 97984bc..c59f573 100644 --- a/source/utils/kube_exec_metrics_provider.py +++ b/source/utils/kube_exec_metrics_collector.py @@ -1,18 +1,19 @@ -"""Provides Metrics collected via executing command in the Quorum pod via "kubectl exec" +"""Prometheus metrics collector provided by executing commands in the Quorum pod via "kubectl exec" """ import logging +from typing import Iterable import kubernetes from kubernetes.client.api import core_v1_api, apps_v1_api from kubernetes.stream import stream -from prometheus_client.core import GaugeMetricFamily +from prometheus_client.core import GaugeMetricFamily, Metric +from prometheus_client.registry import Collector -from .common import IMetricsProvider # pylint: disable=E0402 from .config import Config, PeerConfig # pylint: disable=E0402 from .helper import Helper # pylint: disable=E0402 -class KubeExecMetricsProvider(IMetricsProvider): +class KubeExecMetricsCollector(Collector): """Executes commands via "kubectl exec" in remote pod and provides metrics """ @@ -24,11 +25,11 @@ def __init__(self, config: Config): self._config = config self._helper = Helper() - def get_current_metrics(self) -> list: - """Get the current metrics. Implementation of the IMetricsProvider + def collect(self) -> Iterable[Metric]: + """Get the current metrics. Implementation of the Collector Returns: - list: The current metrics + Iterable[Metric]: The current metrics """ return self._current_metrics diff --git a/source/utils/rpc_metrics_provider.py b/source/utils/rpc_metrics_collector.py similarity index 95% rename from source/utils/rpc_metrics_provider.py rename to source/utils/rpc_metrics_collector.py index 6adc91c..3b7e56d 100644 --- a/source/utils/rpc_metrics_provider.py +++ b/source/utils/rpc_metrics_collector.py @@ -1,17 +1,18 @@ -"""Provides Metrics collected via the Quorum RPC endpoint +"""Prometheus metrics collector provided by querying the Quorum RPC endpoint """ import logging -import requests +from typing import Iterable -from prometheus_client.core import GaugeMetricFamily +import requests +from prometheus_client.core import GaugeMetricFamily, Metric +from prometheus_client.registry import Collector from requests.structures import CaseInsensitiveDict -from .common import IMetricsProvider # pylint: disable=E0402 from .config import Config # pylint: disable=E0402 from .helper import Helper # pylint: disable=E0402 -class RpcMetricsProvider(IMetricsProvider): +class RpcMetricsCollector(Collector): """Collects data from Quorum RPC API and provides metrics data. """ @@ -20,11 +21,11 @@ def __init__(self, config: Config): self._config = config self._helper = Helper() - def get_current_metrics(self) -> list: - """Get the current metrics. Implementation of the IMetricsProvider + def collect(self) -> Iterable[Metric]: + """Get the current metrics. Implementation of the Collector Returns: - list: The current metrics + Iterable[Metric]: The current metrics """ return self._current_metrics From 6a0cbc9ee08c3dc74ffc6c9c2737ea158b5cb608 Mon Sep 17 00:00:00 2001 From: Torsten Gippert <46532698+tgip-work@users.noreply.github.com> Date: Wed, 14 Sep 2022 16:31:42 +0200 Subject: [PATCH 08/11] rbac --- k8s/configmap.yaml | 2 +- k8s/deployment.yaml | 2 +- k8s/netpol.yaml | 6 +++--- k8s/rbac.yaml | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 5 deletions(-) create mode 100644 k8s/rbac.yaml diff --git a/k8s/configmap.yaml b/k8s/configmap.yaml index fddb561..01522dc 100644 --- a/k8s/configmap.yaml +++ b/k8s/configmap.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/name: quorum-node-metrics-exporter name: quorum-node-metrics-exporter - namespace: epi-poc-quorum-metrics + namespace: epi-poc-quorum data: # The config. # Required attributes: "rpc_url" and "peers" diff --git a/k8s/deployment.yaml b/k8s/deployment.yaml index fbe673c..d4d5b34 100644 --- a/k8s/deployment.yaml +++ b/k8s/deployment.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/name: quorum-node-metrics-exporter name: quorum-node-metrics-exporter - namespace: epi-poc-quorum-metrics + namespace: epi-poc-quorum spec: replicas: 1 selector: diff --git a/k8s/netpol.yaml b/k8s/netpol.yaml index 6789576..3ee0f44 100644 --- a/k8s/netpol.yaml +++ b/k8s/netpol.yaml @@ -2,7 +2,7 @@ apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: ingress-from-prometheus - namespace: epi-poc-quorum-metrics + namespace: epi-poc-quorum spec: ingress: - from: @@ -25,7 +25,7 @@ apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: egress-to-quorum - namespace: epi-poc-quorum-metrics + namespace: epi-poc-quorum spec: egress: - ports: @@ -48,7 +48,7 @@ apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: egress-to-dns - namespace: epi-poc-quorum-metrics + namespace: epi-poc-quorum spec: egress: - ports: diff --git a/k8s/rbac.yaml b/k8s/rbac.yaml new file mode 100644 index 0000000..0b9bf5a --- /dev/null +++ b/k8s/rbac.yaml @@ -0,0 +1,36 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: quorum-node-metrics-exporter + namespace: epi-poc-quorum +rules: + - apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["list","get"] + - apiGroups: [""] + resources: ["pods/exec"] + # https://github.com/kubernetes-client/python/issues/690 + verbs: ["create","watch", "get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: quorum-node-metrics-exporter + namespace: epi-poc-quorum +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: quorum-node-metrics-exporter +subjects: +- kind: ServiceAccount + name: quorum-node-metrics-exporter +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: quorum-node-metrics-exporter + namespace: epi-poc-quorum +--- From c8ec79323eb373ca493d371e952bfcbb7ecf942a Mon Sep 17 00:00:00 2001 From: Torsten Gippert <46532698+tgip-work@users.noreply.github.com> Date: Wed, 14 Sep 2022 16:32:48 +0200 Subject: [PATCH 09/11] serviceaccount and token --- k8s/deployment.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/k8s/deployment.yaml b/k8s/deployment.yaml index d4d5b34..04c5f3b 100644 --- a/k8s/deployment.yaml +++ b/k8s/deployment.yaml @@ -26,7 +26,8 @@ spec: app.kubernetes.io/name: quorum-node-metrics-exporter app.kubernetes.io/instance: quorum-node-metrics-exporter spec: - automountServiceAccountToken: false + automountServiceAccountToken: true + serviceAccountName: quorum-node-metrics-exporter containers: - image: REGISTRY/REPO:TAG imagePullPolicy: Always From aa9c93a8d48f01f6d941059189bef5fc9e5a515a Mon Sep 17 00:00:00 2001 From: Torsten Gippert <46532698+tgip-work@users.noreply.github.com> Date: Wed, 14 Sep 2022 16:40:31 +0200 Subject: [PATCH 10/11] netpol --- k8s/netpol.yaml | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/k8s/netpol.yaml b/k8s/netpol.yaml index 3ee0f44..a276c8c 100644 --- a/k8s/netpol.yaml +++ b/k8s/netpol.yaml @@ -1,7 +1,7 @@ apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: - name: ingress-from-prometheus + name: quorum-node-metrics-exporter-ingress-from-prometheus namespace: epi-poc-quorum spec: ingress: @@ -24,7 +24,7 @@ spec: apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: - name: egress-to-quorum + name: quorum-node-metrics-exporter-egress-to-quorum namespace: epi-poc-quorum spec: egress: @@ -47,7 +47,7 @@ spec: apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: - name: egress-to-dns + name: quorum-node-metrics-exporter-egress-to-dns namespace: epi-poc-quorum spec: egress: @@ -64,3 +64,30 @@ spec: app.kubernetes.io/name: quorum-node-metrics-exporter policyTypes: - Egress +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: quorum-node-metrics-exporter-egress-to-kubeapi + namespace: epi-poc-quorum +spec: + egress: + - ports: + - port: 443 + protocol: TCP + to: + # The IP Address of the Kube API Service (see service kubernetes.default) + - ipBlock: + cidr: 172.20.0.1/32 + # Determine Kube API Endpoint via + # kubectl get endpoints --namespace default kubernetes + # Also see https://pauldally.medium.com/accessing-kubernetes-api-server-when-there-is-an-egress-networkpolicy-af4435e005f9 + - ipBlock: + cidr: 10.0.17.52/32 + - ipBlock: + cidr: 10.0.58.124/32 + podSelector: + matchLabels: + app.kubernetes.io/name: quorum-node-metrics-exporter + policyTypes: + - Egress From 96f1e9824f1b50946bacaa8591a613de79becbe3 Mon Sep 17 00:00:00 2001 From: Torsten Gippert <46532698+tgip-work@users.noreply.github.com> Date: Wed, 14 Sep 2022 16:54:40 +0200 Subject: [PATCH 11/11] doc update --- README.md | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b491565..5bcb234 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,27 @@ # Quorum Node Metrics Exporter -A Docker image based on a [Python script](./source/main.py) to gather additional information about peers of a Quorum Node via the RPC endpoint and to provide metrics in Prometheus format. +A Docker image based on a [Python script](./source/main.py) to provide additional metrics of a Quorum node in Prometheus format. + +- Information about peers +- Information if the quorum node can establish a TCP connection to another peer (good for finding out firewall misconfigurations). ## Howto 1. Build the docker image, e.g. `docker build -t REGISTYR/REPO:TAG .` 2. Push to your registry - `docker push REGISTYR/REPO:TAG` -3. There is no helm chart yet as of 2022-July-15 ! +3. There is no helm chart yet ! 4. Set the image `.spec.template.spec.containers[0].image` in file [deployment.yaml](./k8s/deployment.yaml). -5. Set `rpc_url` and `peers` in file [configmap.yaml](./k8s/configmap.yaml). -6. Deploy to Kubernetes` +5. Set `namespace`, `deployment`, `rpc_url` and `peers` in file [configmap.yaml](./k8s/configmap.yaml). +6. Set the `metadata.namespace` in all Kubernetes yaml files. Must be deployed into the same namespace as Quorum is running! +7. Deploy to Kubernetes` ```bash kubectl apply -n=my-custom-namespace k8s/configmap.yaml + kubectl apply -n=my-custom-namespace k8s/rbac.yaml kubectl apply -n=my-custom-namespace k8s/deployment.yaml ``` -7. In case you are using network policies, take a look at [netpol.yaml](./k8s/netpol.yaml) and modify the policies according to your needs. +8. In case you are using network policies, take a look at [netpol.yaml](./k8s/netpol.yaml) and modify the policies according to your needs. ## Grafana Dashboard @@ -40,6 +45,10 @@ Metrics are provided for current connected peers and for well known peers define - Description: Quorum peers head block by enode and protocol eth or istanbul - Labels: instance, instance_name, enode, enode_short, name, protocol - Values: The latest block of the connected peer +- `quorum_tcp_egress_connectivity`: + - Description: Quorum TCP egress connectivity to other nodes by enode. + - Labels: instance, instance_name, enode, enode_short, name + - Values: 0=no connectivity/an outbound connection cannot be established, 1=connection can be established ### Metric Labels