Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix juju_unit and juju_application labels #137

Merged
merged 11 commits into from
May 29, 2024
33 changes: 28 additions & 5 deletions lib/charms/nrpe_exporter/v0/nrpe_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 5
LIBPATCH = 6


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -441,10 +441,14 @@ def _generate_data(self, relation) -> Tuple[list, list]:
)
id = re.sub(r"^juju[-_]", "", id)

nagios_host_context = relation.data[unit].get("nagios_host_context", "")

alerts.append(self._generate_alert(relation, cmd, id, unit))

nrpe_endpoints.append(
self._generate_prometheus_job(relation, unit, cmd, exporter_address, id)
self._generate_prometheus_job(
relation, unit, cmd, exporter_address, id, nagios_host_context
)
)
else:
logger.debug("No NRPE check is defined.")
Expand Down Expand Up @@ -485,13 +489,19 @@ def _generate_alert(self, relation, cmd, id, unit) -> dict:
},
}

def _generate_prometheus_job(self, relation, unit, cmd, exporter_address, id) -> dict:
def _generate_prometheus_job(
self, relation, unit, cmd, exporter_address, id, nagios_host_context
Abuelodelanada marked this conversation as resolved.
Show resolved Hide resolved
) -> dict:
"""Generate an on-the-fly Prometheus scrape job."""
# IP address could be 'target-address' OR 'target_address'
addr = relation.data[unit].get("target-address", "") or relation.data[unit].get(
"target_address", ""
)

# "nagios_host_content" is needed to extract it from the "id" parameter (target-id)
# so that we can correctly relabel juju_application and juju_unit.
nagios_host_context = nagios_host_context + "-" if nagios_host_context else ""

return {
"app_name": relation.app.name,
"target": {
Expand All @@ -511,8 +521,21 @@ def _generate_prometheus_job(self, relation, unit, cmd, exporter_address, id) ->
},
{
"target_label": "juju_unit",
# Turn sql-foo-0 or redis_bar_1 into sql-foo/0 or redis-bar/1
"replacement": re.sub(r"^(.*?)[-_](\d+)$", r"\1/\2", id.replace("_", "-")),
# Turn nagios-host-context-sql-foo-0 into sql-foo/0
"replacement": re.sub(
r"^(.*?)[-_](\d+)$",
r"\1/\2",
id.replace("_", "-").replace(nagios_host_context, ""),
),
},
{
"target_label": "juju_application",
# Turn nagios-host-context-sql-foo-0 into sql-foo
"replacement": re.sub(
r"^(.*?)[-_](\d+)$",
r"\1",
id.replace("_", "-").replace(nagios_host_context, ""),
),
},
],
"updates": {
Expand Down
21 changes: 10 additions & 11 deletions lib/charms/prometheus_k8s/v0/prometheus_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def __init__(self, *args):
- `scrape_timeout`
- `proxy_url`
- `relabel_configs`
- `metrics_relabel_configs`
- `metric_relabel_configs`
- `sample_limit`
- `label_limit`
- `label_name_length_limit`
Expand Down Expand Up @@ -362,7 +362,7 @@ def _on_scrape_targets_changed(self, event):

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 44
LIBPATCH = 47

PYDEPS = ["cosl"]

Expand All @@ -377,7 +377,7 @@ def _on_scrape_targets_changed(self, event):
"scrape_timeout",
"proxy_url",
"relabel_configs",
"metrics_relabel_configs",
"metric_relabel_configs",
"sample_limit",
"label_limit",
"label_name_length_limit",
Expand Down Expand Up @@ -521,8 +521,8 @@ def expand_wildcard_targets_into_individual_jobs(
# for such a target. Therefore labeling with Juju topology, excluding the
# unit name.
non_wildcard_static_config["labels"] = {
**non_wildcard_static_config.get("labels", {}),
**topology.label_matcher_dict,
**non_wildcard_static_config.get("labels", {}),
}

non_wildcard_static_configs.append(non_wildcard_static_config)
Expand All @@ -547,9 +547,9 @@ def expand_wildcard_targets_into_individual_jobs(
if topology:
# Add topology labels
modified_static_config["labels"] = {
**modified_static_config.get("labels", {}),
**topology.label_matcher_dict,
**{"juju_unit": unit_name},
**modified_static_config.get("labels", {}),
}

# Instance relabeling for topology should be last in order.
Expand Down Expand Up @@ -1537,12 +1537,11 @@ def set_scrape_job_spec(self, _=None):
relation.data[self._charm.app]["scrape_metadata"] = json.dumps(self._scrape_metadata)
relation.data[self._charm.app]["scrape_jobs"] = json.dumps(self._scrape_jobs)

if alert_rules_as_dict:
# Update relation data with the string representation of the rule file.
# Juju topology is already included in the "scrape_metadata" field above.
# The consumer side of the relation uses this information to name the rules file
# that is written to the filesystem.
relation.data[self._charm.app]["alert_rules"] = json.dumps(alert_rules_as_dict)
# Update relation data with the string representation of the rule file.
# Juju topology is already included in the "scrape_metadata" field above.
# The consumer side of the relation uses this information to name the rules file
# that is written to the filesystem.
relation.data[self._charm.app]["alert_rules"] = json.dumps(alert_rules_as_dict)

def _set_unit_ip(self, _=None):
"""Set unit host address.
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,7 @@ pythonPlatfrom = "All"
[tool.pytest.ini_options]
minversion = "6.0"
log_cli_level = "INFO"

[tool.codespell]
skip = ".git,.tox,build,lib,venv*,.mypy_cache"
ignore-words-list = "assertIn"
2 changes: 1 addition & 1 deletion src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def __init__(self, *args):
self.metrics_aggregator = MetricsEndpointAggregator(self, resolve_addresses=True)
self.cos_agent = COSAgentProvider(
self,
scrape_configs=self._get_scrape_configs,
scrape_configs=self._get_scrape_configs(),
metrics_rules_dir=RULES_DIR,
dashboard_dirs=[COS_PROXY_DASHBOARDS_DIR, DASHBOARDS_DIR],
refresh_events=[
Expand Down
15 changes: 14 additions & 1 deletion tests/unit/test_relation_monitors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import tempfile
import unittest
from pathlib import Path
Expand All @@ -24,6 +25,7 @@ def setUp(self):
"private-address": "10.41.168.226",
"target-address": "10.41.168.226",
"target-id": "ubuntu-0",
"nagios_host_context": "my-nagios-host-context",
}

for p in [
Expand Down Expand Up @@ -97,10 +99,21 @@ def test_prometheus(self):

# THEN alert rules are transferred to prometheus over relation data
app_data = self.harness.get_relation_data(rel_id_prom, "cos-proxy")
self.assertIn("alert_rules", app_data)

self.assertIn("alert_rules", app_data) # pyright: ignore

# AND status is "active"
self.assertIsInstance(
self.harness.model.unit.status,
ActiveStatus,
)
# AND relabel configs are ok (we are removing nagios_host_context)
scrape_jobs = json.loads(app_data["scrape_jobs"])
for job in scrape_jobs:
relabel_configs = job["relabel_configs"]
for config in relabel_configs:
if target_level := config.get("target_label"):
if target_level == "juju_application":
self.assertEquals(config["replacement"], "ubuntu")
elif target_level == "juju_unit":
self.assertEquals(config["replacement"], "ubuntu/0")
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ deps =
codespell<2.3.0 # https://github.com/codespell-project/codespell/issues/3430
commands =
codespell {[vars]lib_path}
codespell . --skip .git --skip .tox --skip build --skip lib --skip venv* --skip .mypy_cache
codespell .
ruff {[vars]all_path}
black --check --diff {[vars]all_path}

Expand Down
Loading