diff --git a/charms/kserve-controller/lib/charms/loki_k8s/v1/loki_push_api.py b/charms/kserve-controller/lib/charms/loki_k8s/v1/loki_push_api.py
new file mode 100644
index 0000000..c3c1d08
--- /dev/null
+++ b/charms/kserve-controller/lib/charms/loki_k8s/v1/loki_push_api.py
@@ -0,0 +1,2750 @@
+#!/usr/bin/env python3
+# Copyright 2023 Canonical Ltd.
+# See LICENSE file for licensing details.
+#
+# Learn more at: https://juju.is/docs/sdk
+
+r"""## Overview.
+
+This document explains how to use the two principal objects this library provides:
+
+- `LokiPushApiProvider`: This object is meant to be used by any Charmed Operator that needs to
+implement the provider side of the `loki_push_api` relation interface. For instance, a Loki charm.
+The provider side of the relation represents the server side, to which logs are being pushed.
+
+- `LokiPushApiConsumer`: This object is meant to be used by any Charmed Operator that needs to
+send log to Loki by implementing the consumer side of the `loki_push_api` relation interface.
+For instance, a Promtail or Grafana agent charm which needs to send logs to Loki.
+
+- `LogProxyConsumer`: DEPRECATED.
+This object can be used by any Charmed Operator which needs to send telemetry, such as logs, to
+Loki through a Log Proxy by implementing the consumer side of the `loki_push_api` relation
+interface.
+In order to be able to control the labels on the logs pushed this object adds a Pebble layer
+that runs Promtail in the workload container, injecting Juju topology labels into the
+logs on the fly.
+This object is deprecated. Consider migrating to LogForwarder with the release of Juju 3.6 LTS.
+
+- `LogForwarder`: This object can be used by any Charmed Operator which needs to send the workload
+standard output (stdout) through Pebble's log forwarding mechanism, to Loki endpoints through the
+`loki_push_api` relation interface.
+In order to be able to control the labels on the logs pushed this object updates the pebble layer's
+"log-targets" section with Juju topology.
+
+Filtering logs in Loki is largely performed on the basis of labels. In the Juju ecosystem, Juju
+topology labels are used to uniquely identify the workload which generates telemetry like logs.
+
+
+## LokiPushApiProvider Library Usage
+
+This object may be used by any Charmed Operator which implements the `loki_push_api` interface.
+For instance, Loki or Grafana Agent.
+
+For this purpose a charm needs to instantiate the `LokiPushApiProvider` object with one mandatory
+and three optional arguments.
+
+- `charm`: A reference to the parent (Loki) charm.
+
+- `relation_name`: The name of the relation that the charm uses to interact
+ with its clients, which implement `LokiPushApiConsumer` `LogForwarder`, or `LogProxyConsumer`
+ (note that LogProxyConsumer is deprecated).
+
+ If provided, this relation name must match a provided relation in metadata.yaml with the
+ `loki_push_api` interface.
+
+ The default relation name is "logging" for `LokiPushApiConsumer` and `LogForwarder`, and
+ "log-proxy" for `LogProxyConsumer` (note that LogProxyConsumer is deprecated).
+
+ For example, a provider's `metadata.yaml` file may look as follows:
+
+ ```yaml
+ provides:
+ logging:
+ interface: loki_push_api
+ ```
+
+ Subsequently, a Loki charm may instantiate the `LokiPushApiProvider` in its constructor as
+ follows:
+
+ from charms.loki_k8s.v1.loki_push_api import LokiPushApiProvider
+ from loki_server import LokiServer
+ ...
+
+ class LokiOperatorCharm(CharmBase):
+ ...
+
+ def __init__(self, *args):
+ super().__init__(*args)
+ ...
+ external_url = urlparse(self._external_url)
+ self.loki_provider = LokiPushApiProvider(
+ self,
+ address=external_url.hostname or self.hostname,
+ port=external_url.port or 80,
+ scheme=external_url.scheme,
+ path=f"{external_url.path}/loki/api/v1/push",
+ )
+ ...
+
+ - `port`: Loki Push Api endpoint port. Default value: `3100`.
+ - `scheme`: Loki Push Api endpoint scheme (`HTTP` or `HTTPS`). Default value: `HTTP`
+ - `address`: Loki Push Api endpoint address. Default value: `localhost`
+ - `path`: Loki Push Api endpoint path. Default value: `loki/api/v1/push`
+
+
+The `LokiPushApiProvider` object has several responsibilities:
+
+1. Set the URL of the Loki Push API in the relation application data bag; the URL
+ must be unique to all instances (e.g. using a load balancer).
+
+2. Set the Promtail binary URL (`promtail_binary_zip_url`) so clients that use
+ `LogProxyConsumer` object could download and configure it.
+
+3. Process the metadata of the consumer application, provided via the
+ "metadata" field of the consumer data bag, which are used to annotate the
+ alert rules (see next point). An example for "metadata" is the following:
+
+ {'model': 'loki',
+ 'model_uuid': '0b7d1071-ded2-4bf5-80a3-10a81aeb1386',
+ 'application': 'promtail-k8s'
+ }
+
+4. Process alert rules set into the relation by the `LokiPushApiConsumer`
+ objects, e.g.:
+
+ '{
+ "groups": [{
+ "name": "loki_0b7d1071-ded2-4bf5-80a3-10a81aeb1386_promtail-k8s_alerts",
+ "rules": [{
+ "alert": "HighPercentageError",
+ "expr": "sum(rate({app=\\"foo\\", env=\\"production\\"} |= \\"error\\" [5m]))
+ by (job) \\n /\\nsum(rate({app=\\"foo\\", env=\\"production\\"}[5m]))
+ by (job)\\n > 0.05
+ \\n", "for": "10m",
+ "labels": {
+ "severity": "page",
+ "juju_model": "loki",
+ "juju_model_uuid": "0b7d1071-ded2-4bf5-80a3-10a81aeb1386",
+ "juju_application": "promtail-k8s"
+ },
+ "annotations": {
+ "summary": "High request latency"
+ }
+ }]
+ }]
+ }'
+
+
+Once these alert rules are sent over relation data, the `LokiPushApiProvider` object
+stores these files in the directory `/loki/rules` inside the Loki charm container. After
+storing alert rules files, the object will check alert rules by querying Loki API
+endpoint: [`loki/api/v1/rules`](https://grafana.com/docs/loki/latest/api/#list-rule-groups).
+If there are changes in the alert rules a `loki_push_api_alert_rules_changed` event will
+be emitted with details about the `RelationEvent` which triggered it.
+
+This events should be observed in the charm that uses `LokiPushApiProvider`:
+
+```python
+ def __init__(self, *args):
+ super().__init__(*args)
+ ...
+ self.loki_provider = LokiPushApiProvider(self)
+ self.framework.observe(
+ self.loki_provider.on.loki_push_api_alert_rules_changed,
+ self._loki_push_api_alert_rules_changed,
+ )
+```
+
+
+## LokiPushApiConsumer Library Usage
+
+This Loki charm interacts with its clients using the Loki charm library. Charms
+seeking to send log to Loki, must do so using the `LokiPushApiConsumer` object from
+this charm library.
+
+> **NOTE**: `LokiPushApiConsumer` also depends on an additional charm library.
+>
+> Ensure sure you `charmcraft fetch-lib charms.observability_libs.v0.juju_topology`
+> when using this library.
+
+For the simplest use cases, using the `LokiPushApiConsumer` object only requires
+instantiating it, typically in the constructor of your charm (the one which
+sends logs).
+
+```python
+from charms.loki_k8s.v1.loki_push_api import LokiPushApiConsumer
+
+class LokiClientCharm(CharmBase):
+
+ def __init__(self, *args):
+ super().__init__(*args)
+ ...
+ self._loki_consumer = LokiPushApiConsumer(self)
+```
+
+The `LokiPushApiConsumer` constructor requires two things:
+
+- A reference to the parent (LokiClientCharm) charm.
+
+- Optionally, the name of the relation that the Loki charm uses to interact
+ with its clients. If provided, this relation name must match a required
+ relation in metadata.yaml with the `loki_push_api` interface.
+
+ This argument is not required if your metadata.yaml has precisely one
+ required relation in metadata.yaml with the `loki_push_api` interface, as the
+ lib will automatically resolve the relation name inspecting the using the
+ meta information of the charm
+
+Any time the relation between a Loki provider charm and a Loki consumer charm is
+established, a `LokiPushApiEndpointJoined` event is fired. In the consumer side
+is it possible to observe this event with:
+
+```python
+
+self.framework.observe(
+ self._loki_consumer.on.loki_push_api_endpoint_joined,
+ self._on_loki_push_api_endpoint_joined,
+)
+```
+
+Any time there are departures in relations between the consumer charm and Loki
+the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event, for instance:
+
+```python
+self.framework.observe(
+ self._loki_consumer.on.loki_push_api_endpoint_departed,
+ self._on_loki_push_api_endpoint_departed,
+)
+```
+
+The consumer charm can then choose to update its configuration in both situations.
+
+Note that LokiPushApiConsumer does not add any labels automatically on its own. In
+order to better integrate with the Canonical Observability Stack, you may want to configure your
+software to add Juju topology labels. The
+[observability-libs](https://charmhub.io/observability-libs) library can be used to get topology
+labels in charm code. See :func:`LogProxyConsumer._scrape_configs` for an example of how
+to do this with promtail.
+
+## LogProxyConsumer Library Usage
+
+> Note: This object is deprecated. Consider migrating to LogForwarder with the release of Juju 3.6
+> LTS.
+
+Let's say that we have a workload charm that produces logs, and we need to send those logs to a
+workload implementing the `loki_push_api` interface, such as `Loki` or `Grafana Agent`.
+
+Adopting this object in a Charmed Operator consist of two steps:
+
+1. Use the `LogProxyConsumer` class by instantiating it in the `__init__` method of the charmed
+ operator. There are two ways to get logs in to promtail. You can give it a list of files to
+ read, or you can write to it using the syslog protocol.
+
+ For example:
+
+ ```python
+ from charms.loki_k8s.v1.loki_push_api import LogProxyConsumer
+
+ ...
+
+ def __init__(self, *args):
+ ...
+ self._log_proxy = LogProxyConsumer(
+ self,
+ logs_scheme={
+ "workload-a": {
+ "log-files": ["/tmp/worload-a-1.log", "/tmp/worload-a-2.log"],
+ "syslog-port": 1514,
+ },
+ "workload-b": {"log-files": ["/tmp/worload-b.log"], "syslog-port": 1515},
+ },
+ relation_name="log-proxy",
+ )
+ self.framework.observe(
+ self._log_proxy.on.promtail_digest_error,
+ self._promtail_error,
+ )
+
+ def _promtail_error(self, event):
+ logger.error(event.message)
+ self.unit.status = BlockedStatus(event.message)
+ ```
+
+ Any time the relation between a provider charm and a LogProxy consumer charm is
+ established, a `LogProxyEndpointJoined` event is fired. In the consumer side is it
+ possible to observe this event with:
+
+ ```python
+
+ self.framework.observe(
+ self._log_proxy.on.log_proxy_endpoint_joined,
+ self._on_log_proxy_endpoint_joined,
+ )
+ ```
+
+ Any time there are departures in relations between the consumer charm and the provider
+ the consumer charm is informed, through a `LogProxyEndpointDeparted` event, for instance:
+
+ ```python
+ self.framework.observe(
+ self._log_proxy.on.log_proxy_endpoint_departed,
+ self._on_log_proxy_endpoint_departed,
+ )
+ ```
+
+ The consumer charm can then choose to update its configuration in both situations.
+
+ Note that:
+
+ - You can configure your syslog software using `localhost` as the address and the method
+ `LogProxyConsumer.syslog_port("container_name")` to get the port, or, alternatively, if you are using rsyslog
+ you may use the method `LogProxyConsumer.rsyslog_config("container_name")`.
+
+2. Modify the `metadata.yaml` file to add:
+
+ - The `log-proxy` relation in the `requires` section:
+ ```yaml
+ requires:
+ log-proxy:
+ interface: loki_push_api
+ optional: true
+ ```
+
+Once the library is implemented in a Charmed Operator and a relation is established with
+the charm that implements the `loki_push_api` interface, the library will inject a
+Pebble layer that runs Promtail in the workload container to send logs.
+
+By default, the promtail binary injected into the container will be downloaded from the internet.
+If, for any reason, the container has limited network access, you may allow charm administrators
+to provide their own promtail binary at runtime by adding the following snippet to your charm
+metadata:
+
+```yaml
+resources:
+ promtail-bin:
+ type: file
+ description: Promtail binary for logging
+ filename: promtail-linux
+```
+
+Which would then allow operators to deploy the charm this way:
+
+```
+juju deploy \
+ ./your_charm.charm \
+ --resource promtail-bin=/tmp/promtail-linux-amd64
+```
+
+If a different resource name is used, it can be specified with the `promtail_resource_name`
+argument to the `LogProxyConsumer` constructor.
+
+The object can emit a `PromtailDigestError` event:
+
+- Promtail binary cannot be downloaded.
+- The sha256 sum mismatch for promtail binary.
+
+The object can raise a `ContainerNotFoundError` event:
+
+- No `container_name` parameter has been specified and the Pod has more than 1 container.
+
+These can be monitored via the PromtailDigestError events via:
+
+```python
+ self.framework.observe(
+ self._loki_consumer.on.promtail_digest_error,
+ self._promtail_error,
+ )
+
+ def _promtail_error(self, event):
+ logger.error(msg)
+ self.unit.status = BlockedStatus(event.message)
+ )
+```
+
+## LogForwarder class Usage
+
+Let's say that we have a charm's workload that writes logs to the standard output (stdout),
+and we need to send those logs to a workload implementing the `loki_push_api` interface,
+such as `Loki` or `Grafana Agent`. To know how to reach a Loki instance, a charm would
+typically use the `loki_push_api` interface.
+
+Use the `LogForwarder` class by instantiating it in the `__init__` method of the charm:
+
+```python
+from charms.loki_k8s.v1.loki_push_api import LogForwarder
+
+...
+
+ def __init__(self, *args):
+ ...
+ self._log_forwarder = LogForwarder(
+ self,
+ relation_name="logging" # optional, defaults to `logging`
+ )
+```
+
+The `LogForwarder` by default will observe relation events on the `logging` endpoint and
+enable/disable log forwarding automatically.
+Next, modify the `metadata.yaml` file to add:
+
+The `log-forwarding` relation in the `requires` section:
+```yaml
+requires:
+ logging:
+ interface: loki_push_api
+ optional: true
+```
+
+Once the LogForwader class is implemented in your charm and the relation (implementing the
+`loki_push_api` interface) is active and healthy, the library will inject a Pebble layer in
+each workload container the charm has access to, to configure Pebble's log forwarding
+feature and start sending logs to Loki.
+
+## Alerting Rules
+
+This charm library also supports gathering alerting rules from all related Loki client
+charms and enabling corresponding alerts within the Loki charm. Alert rules are
+automatically gathered by `LokiPushApiConsumer` object from a directory conventionally
+named `loki_alert_rules`.
+
+This directory must reside at the top level in the `src` folder of the
+consumer charm. Each file in this directory is assumed to be a single alert rule
+in YAML format. The file name must have the `.rule` extension.
+The format of this alert rule conforms to the
+[Loki docs](https://grafana.com/docs/loki/latest/rules/#alerting-rules).
+
+An example of the contents of one such file is shown below.
+
+```yaml
+alert: HighPercentageError
+expr: |
+ sum(rate({%%juju_topology%%} |= "error" [5m])) by (job)
+ /
+ sum(rate({%%juju_topology%%}[5m])) by (job)
+ > 0.05
+for: 10m
+labels:
+ severity: page
+annotations:
+ summary: High request latency
+
+```
+
+It is **critical** to use the `%%juju_topology%%` filter in the expression for the alert
+rule shown above. This filter is a stub that is automatically replaced by the
+`LokiPushApiConsumer` following Loki Client's Juju topology (application, model and its
+UUID). Such a topology filter is essential to ensure that alert rules submitted by one
+provider charm generates alerts only for that same charm.
+
+The Loki charm may be related to multiple Loki client charms. Without this, filter
+rules submitted by one provider charm will also result in corresponding alerts for other
+provider charms. Hence, every alert rule expression must include such a topology filter stub.
+
+Gathering alert rules and generating rule files within the Loki charm is easily done using
+the `alerts()` method of `LokiPushApiProvider`. Alerts generated by Loki will automatically
+include Juju topology labels in the alerts. These labels indicate the source of the alert.
+
+The following labels are automatically added to every alert
+
+- `juju_model`
+- `juju_model_uuid`
+- `juju_application`
+
+
+Whether alert rules files does not contain the keys `alert` or `expr` or there is no alert
+rules file in `alert_rules_path` a `loki_push_api_alert_rules_error` event is emitted.
+
+To handle these situations the event must be observed in the `LokiClientCharm` charm.py file:
+
+```python
+class LokiClientCharm(CharmBase):
+
+ def __init__(self, *args):
+ super().__init__(*args)
+ ...
+ self._loki_consumer = LokiPushApiConsumer(self)
+
+ self.framework.observe(
+ self._loki_consumer.on.loki_push_api_alert_rules_error,
+ self._alert_rules_error
+ )
+
+ def _alert_rules_error(self, event):
+ self.unit.status = BlockedStatus(event.message)
+```
+
+## Relation Data
+
+The Loki charm uses both application and unit relation data to obtain information regarding
+Loki Push API and alert rules.
+
+Units of consumer charm send their alert rules over app relation data using the `alert_rules`
+key.
+"""
+
+import json
+import logging
+import os
+import platform
+import re
+import socket
+import subprocess
+import tempfile
+import typing
+from copy import deepcopy
+from gzip import GzipFile
+from hashlib import sha256
+from io import BytesIO
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union
+from urllib import request
+from urllib.error import URLError
+
+import yaml
+from cosl import JujuTopology
+from ops.charm import (
+ CharmBase,
+ HookEvent,
+ PebbleReadyEvent,
+ RelationBrokenEvent,
+ RelationCreatedEvent,
+ RelationDepartedEvent,
+ RelationEvent,
+ RelationJoinedEvent,
+ RelationRole,
+ WorkloadEvent,
+)
+from ops.framework import EventBase, EventSource, Object, ObjectEvents
+from ops.jujuversion import JujuVersion
+from ops.model import Container, ModelError, Relation
+from ops.pebble import APIError, ChangeError, Layer, PathError, ProtocolError
+
+# The unique Charmhub library identifier, never change it
+LIBID = "bf76f23cdd03464b877c52bd1d2f563e"
+
+# Increment this major API version when introducing breaking changes
+LIBAPI = 1
+
+# Increment this PATCH version before using `charmcraft publish-lib` or reset
+# to 0 if you are raising the major API version
+LIBPATCH = 11
+
+PYDEPS = ["cosl"]
+
+logger = logging.getLogger(__name__)
+
+RELATION_INTERFACE_NAME = "loki_push_api"
+DEFAULT_RELATION_NAME = "logging"
+DEFAULT_ALERT_RULES_RELATIVE_PATH = "./src/loki_alert_rules"
+DEFAULT_LOG_PROXY_RELATION_NAME = "log-proxy"
+
+PROMTAIL_BASE_URL = "https://github.com/canonical/loki-k8s-operator/releases/download"
+# To update Promtail version you only need to change the PROMTAIL_VERSION and
+# update all sha256 sums in PROMTAIL_BINARIES. To support a new architecture
+# you only need to add a new key value pair for the architecture in PROMTAIL_BINARIES.
+PROMTAIL_VERSION = "v2.9.7"
+PROMTAIL_ARM_BINARY = {
+ "filename": "promtail-static-arm64",
+ "zipsha": "c083fdb45e5c794103f974eeb426489b4142438d9e10d0ae272b2aff886e249b",
+ "binsha": "4cd055c477a301c0bdfdbcea514e6e93f6df5d57425ce10ffc77f3e16fec1ddf",
+}
+
+PROMTAIL_BINARIES = {
+ "amd64": {
+ "filename": "promtail-static-amd64",
+ "zipsha": "6873cbdabf23062aeefed6de5f00ff382710332af3ab90a48c253ea17e08f465",
+ "binsha": "28da9b99f81296fe297831f3bc9d92aea43b4a92826b8ff04ba433b8cb92fb50",
+ },
+ "arm64": PROMTAIL_ARM_BINARY,
+ "aarch64": PROMTAIL_ARM_BINARY,
+}
+
+# Paths in `charm` container
+BINARY_DIR = "/tmp"
+
+# Paths in `workload` container
+WORKLOAD_BINARY_DIR = "/opt/promtail"
+WORKLOAD_CONFIG_DIR = "/etc/promtail"
+WORKLOAD_CONFIG_FILE_NAME = "promtail_config.yaml"
+WORKLOAD_CONFIG_PATH = "{}/{}".format(WORKLOAD_CONFIG_DIR, WORKLOAD_CONFIG_FILE_NAME)
+WORKLOAD_POSITIONS_PATH = "{}/positions.yaml".format(WORKLOAD_BINARY_DIR)
+WORKLOAD_SERVICE_NAME = "promtail"
+
+# These are the initial port values. As we can have more than one container,
+# we use odd and even numbers to avoid collisions.
+# Each new container adds 2 to the previous value.
+HTTP_LISTEN_PORT_START = 9080 # even start port
+GRPC_LISTEN_PORT_START = 9095 # odd start port
+
+
+class RelationNotFoundError(ValueError):
+ """Raised if there is no relation with the given name."""
+
+ def __init__(self, relation_name: str):
+ self.relation_name = relation_name
+ self.message = "No relation named '{}' found".format(relation_name)
+
+ super().__init__(self.message)
+
+
+class RelationInterfaceMismatchError(Exception):
+ """Raised if the relation with the given name has a different interface."""
+
+ def __init__(
+ self,
+ relation_name: str,
+ expected_relation_interface: str,
+ actual_relation_interface: str,
+ ):
+ self.relation_name = relation_name
+ self.expected_relation_interface = expected_relation_interface
+ self.actual_relation_interface = actual_relation_interface
+ self.message = (
+ "The '{}' relation has '{}' as interface rather than the expected '{}'".format(
+ relation_name, actual_relation_interface, expected_relation_interface
+ )
+ )
+ super().__init__(self.message)
+
+
+class RelationRoleMismatchError(Exception):
+ """Raised if the relation with the given name has a different direction."""
+
+ def __init__(
+ self,
+ relation_name: str,
+ expected_relation_role: RelationRole,
+ actual_relation_role: RelationRole,
+ ):
+ self.relation_name = relation_name
+ self.expected_relation_interface = expected_relation_role
+ self.actual_relation_role = actual_relation_role
+ self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format(
+ relation_name, repr(actual_relation_role), repr(expected_relation_role)
+ )
+ super().__init__(self.message)
+
+
+def _validate_relation_by_interface_and_direction(
+ charm: CharmBase,
+ relation_name: str,
+ expected_relation_interface: str,
+ expected_relation_role: RelationRole,
+):
+ """Verifies that a relation has the necessary characteristics.
+
+ Verifies that the `relation_name` provided: (1) exists in metadata.yaml,
+ (2) declares as interface the interface name passed as `relation_interface`
+ and (3) has the right "direction", i.e., it is a relation that `charm`
+ provides or requires.
+
+ Args:
+ charm: a `CharmBase` object to scan for the matching relation.
+ relation_name: the name of the relation to be verified.
+ expected_relation_interface: the interface name to be matched by the
+ relation named `relation_name`.
+ expected_relation_role: whether the `relation_name` must be either
+ provided or required by `charm`.
+
+ Raises:
+ RelationNotFoundError: If there is no relation in the charm's metadata.yaml
+ with the same name as provided via `relation_name` argument.
+ RelationInterfaceMismatchError: The relation with the same name as provided
+ via `relation_name` argument does not have the same relation interface
+ as specified via the `expected_relation_interface` argument.
+ RelationRoleMismatchError: If the relation with the same name as provided
+ via `relation_name` argument does not have the same role as specified
+ via the `expected_relation_role` argument.
+ """
+ if relation_name not in charm.meta.relations:
+ raise RelationNotFoundError(relation_name)
+
+ relation = charm.meta.relations[relation_name]
+
+ actual_relation_interface = relation.interface_name
+ if actual_relation_interface != expected_relation_interface:
+ raise RelationInterfaceMismatchError(
+ relation_name,
+ expected_relation_interface,
+ actual_relation_interface, # pyright: ignore
+ )
+
+ if expected_relation_role == RelationRole.provides:
+ if relation_name not in charm.meta.provides:
+ raise RelationRoleMismatchError(
+ relation_name, RelationRole.provides, RelationRole.requires
+ )
+ elif expected_relation_role == RelationRole.requires:
+ if relation_name not in charm.meta.requires:
+ raise RelationRoleMismatchError(
+ relation_name, RelationRole.requires, RelationRole.provides
+ )
+ else:
+ raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role))
+
+
+class InvalidAlertRulePathError(Exception):
+ """Raised if the alert rules folder cannot be found or is otherwise invalid."""
+
+ def __init__(
+ self,
+ alert_rules_absolute_path: Path,
+ message: str,
+ ):
+ self.alert_rules_absolute_path = alert_rules_absolute_path
+ self.message = message
+
+ super().__init__(self.message)
+
+
+def _is_official_alert_rule_format(rules_dict: dict) -> bool:
+ """Are alert rules in the upstream format as supported by Loki.
+
+ Alert rules in dictionary format are in "official" form if they
+ contain a "groups" key, since this implies they contain a list of
+ alert rule groups.
+
+ Args:
+ rules_dict: a set of alert rules in Python dictionary format
+
+ Returns:
+ True if alert rules are in official Loki file format.
+ """
+ return "groups" in rules_dict
+
+
+def _is_single_alert_rule_format(rules_dict: dict) -> bool:
+ """Are alert rules in single rule format.
+
+ The Loki charm library supports reading of alert rules in a
+ custom format that consists of a single alert rule per file. This
+ does not conform to the official Loki alert rule file format
+ which requires that each alert rules file consists of a list of
+ alert rule groups and each group consists of a list of alert
+ rules.
+
+ Alert rules in dictionary form are considered to be in single rule
+ format if in the least it contains two keys corresponding to the
+ alert rule name and alert expression.
+
+ Returns:
+ True if alert rule is in single rule file format.
+ """
+ # one alert rule per file
+ return set(rules_dict) >= {"alert", "expr"}
+
+
+class AlertRules:
+ """Utility class for amalgamating Loki alert rule files and injecting juju topology.
+
+ An `AlertRules` object supports aggregating alert rules from files and directories in both
+ official and single rule file formats using the `add_path()` method. All the alert rules
+ read are annotated with Juju topology labels and amalgamated into a single data structure
+ in the form of a Python dictionary using the `as_dict()` method. Such a dictionary can be
+ easily dumped into JSON format and exchanged over relation data. The dictionary can also
+ be dumped into YAML format and written directly into an alert rules file that is read by
+ Loki. Note that multiple `AlertRules` objects must not be written into the same file,
+ since Loki allows only a single list of alert rule groups per alert rules file.
+
+ The official Loki format is a YAML file conforming to the Loki documentation
+ (https://grafana.com/docs/loki/latest/api/#list-rule-groups).
+ The custom single rule format is a subsection of the official YAML, having a single alert
+ rule, effectively "one alert per file".
+ """
+
+ # This class uses the following terminology for the various parts of a rule file:
+ # - alert rules file: the entire groups[] yaml, including the "groups:" key.
+ # - alert groups (plural): the list of groups[] (a list, i.e. no "groups:" key) - it is a list
+ # of dictionaries that have the "name" and "rules" keys.
+ # - alert group (singular): a single dictionary that has the "name" and "rules" keys.
+ # - alert rules (plural): all the alerts in a given alert group - a list of dictionaries with
+ # the "alert" and "expr" keys.
+ # - alert rule (singular): a single dictionary that has the "alert" and "expr" keys.
+
+ def __init__(self, topology: Optional[JujuTopology] = None):
+ """Build and alert rule object.
+
+ Args:
+ topology: a `JujuTopology` instance that is used to annotate all alert rules.
+ """
+ self.topology = topology
+ self.tool = CosTool(None)
+ self.alert_groups = [] # type: List[dict]
+
+ def _from_file(self, root_path: Path, file_path: Path) -> List[dict]:
+ """Read a rules file from path, injecting juju topology.
+
+ Args:
+ root_path: full path to the root rules folder (used only for generating group name)
+ file_path: full path to a *.rule file.
+
+ Returns:
+ A list of dictionaries representing the rules file, if file is valid (the structure is
+ formed by `yaml.safe_load` of the file); an empty list otherwise.
+ """
+ with file_path.open() as rf:
+ # Load a list of rules from file then add labels and filters
+ try:
+ rule_file = yaml.safe_load(rf) or {}
+
+ except Exception as e:
+ logger.error("Failed to read alert rules from %s: %s", file_path.name, e)
+ return []
+
+ if _is_official_alert_rule_format(rule_file):
+ alert_groups = rule_file["groups"]
+ elif _is_single_alert_rule_format(rule_file):
+ # convert to list of alert groups
+ # group name is made up from the file name
+ alert_groups = [{"name": file_path.stem, "rules": [rule_file]}]
+ else:
+ # invalid/unsupported
+ reason = "file is empty" if not rule_file else "unexpected file structure"
+ logger.error("Invalid rules file (%s): %s", reason, file_path.name)
+ return []
+
+ # update rules with additional metadata
+ for alert_group in alert_groups:
+ # update group name with topology and sub-path
+ alert_group["name"] = self._group_name(
+ str(root_path),
+ str(file_path),
+ alert_group["name"],
+ )
+
+ # add "juju_" topology labels
+ for alert_rule in alert_group["rules"]:
+ if "labels" not in alert_rule:
+ alert_rule["labels"] = {}
+
+ if self.topology:
+ # only insert labels that do not already exist
+ for label, val in self.topology.label_matcher_dict.items():
+ if label not in alert_rule["labels"]:
+ alert_rule["labels"][label] = val
+
+ # insert juju topology filters into a prometheus alert rule
+ # logql doesn't like empty matchers, so add a job matcher which hits
+ # any string as a "wildcard" which the topology labels will
+ # filter down
+ alert_rule["expr"] = self.tool.inject_label_matchers(
+ re.sub(r"%%juju_topology%%", r'job=~".+"', alert_rule["expr"]),
+ self.topology.label_matcher_dict,
+ )
+
+ return alert_groups
+
+ def _group_name(
+ self,
+ root_path: typing.Union[Path, str],
+ file_path: typing.Union[Path, str],
+ group_name: str,
+ ) -> str:
+ """Generate group name from path and topology.
+
+ The group name is made up of the relative path between the root dir_path, the file path,
+ and topology identifier.
+
+ Args:
+ root_path: path to the root rules dir.
+ file_path: path to rule file.
+ group_name: original group name to keep as part of the new augmented group name
+
+ Returns:
+ New group name, augmented by juju topology and relative path.
+ """
+ file_path = Path(file_path) if not isinstance(file_path, Path) else file_path
+ root_path = Path(root_path) if not isinstance(root_path, Path) else root_path
+ rel_path = file_path.parent.relative_to(root_path.as_posix())
+
+ # We should account for both absolute paths and Windows paths. Convert it to a POSIX
+ # string, strip off any leading /, then join it
+
+ path_str = ""
+ if not rel_path == Path("."):
+ # Get rid of leading / and optionally drive letters so they don't muck up
+ # the template later, since Path.parts returns them. The 'if relpath.is_absolute ...'
+ # isn't even needed since re.sub doesn't throw exceptions if it doesn't match, so it's
+ # optional, but it makes it clear what we're doing.
+
+ # Note that Path doesn't actually care whether the path is valid just to instantiate
+ # the object, so we can happily strip that stuff out to make templating nicer
+ rel_path = Path(
+ re.sub(r"^([A-Za-z]+:)?/", "", rel_path.as_posix())
+ if rel_path.is_absolute()
+ else str(rel_path)
+ )
+
+ # Get rid of relative path characters in the middle which both os.path and pathlib
+ # leave hanging around. We could use path.resolve(), but that would lead to very
+ # long template strings when rules come from pods and/or other deeply nested charm
+ # paths
+ path_str = "_".join(filter(lambda x: x not in ["..", "/"], rel_path.parts))
+
+ # Generate group name:
+ # - name, from juju topology
+ # - suffix, from the relative path of the rule file;
+ group_name_parts = [self.topology.identifier] if self.topology else []
+ group_name_parts.extend([path_str, group_name, "alerts"])
+ # filter to remove empty strings
+ return "_".join(filter(lambda x: x, group_name_parts))
+
+ @classmethod
+ def _multi_suffix_glob(
+ cls, dir_path: Path, suffixes: List[str], recursive: bool = True
+ ) -> list:
+ """Helper function for getting all files in a directory that have a matching suffix.
+
+ Args:
+ dir_path: path to the directory to glob from.
+ suffixes: list of suffixes to include in the glob (items should begin with a period).
+ recursive: a flag indicating whether a glob is recursive (nested) or not.
+
+ Returns:
+ List of files in `dir_path` that have one of the suffixes specified in `suffixes`.
+ """
+ all_files_in_dir = dir_path.glob("**/*" if recursive else "*")
+ return list(filter(lambda f: f.is_file() and f.suffix in suffixes, all_files_in_dir))
+
+ def _from_dir(self, dir_path: Path, recursive: bool) -> List[dict]:
+ """Read all rule files in a directory.
+
+ All rules from files for the same directory are loaded into a single
+ group. The generated name of this group includes juju topology.
+ By default, only the top directory is scanned; for nested scanning, pass `recursive=True`.
+
+ Args:
+ dir_path: directory containing *.rule files (alert rules without groups).
+ recursive: flag indicating whether to scan for rule files recursively.
+
+ Returns:
+ a list of dictionaries representing prometheus alert rule groups, each dictionary
+ representing an alert group (structure determined by `yaml.safe_load`).
+ """
+ alert_groups = [] # type: List[dict]
+
+ # Gather all alerts into a list of groups
+ for file_path in self._multi_suffix_glob(dir_path, [".rule", ".rules"], recursive):
+ alert_groups_from_file = self._from_file(dir_path, file_path)
+ if alert_groups_from_file:
+ logger.debug("Reading alert rule from %s", file_path)
+ alert_groups.extend(alert_groups_from_file)
+
+ return alert_groups
+
+ def add_path(self, path_str: str, *, recursive: bool = False):
+ """Add rules from a dir path.
+
+ All rules from files are aggregated into a data structure representing a single rule file.
+ All group names are augmented with juju topology.
+
+ Args:
+ path_str: either a rules file or a dir of rules files.
+ recursive: whether to read files recursively or not (no impact if `path` is a file).
+
+ Raises:
+ InvalidAlertRulePathError: if the provided path is invalid.
+ """
+ path = Path(path_str) # type: Path
+ if path.is_dir():
+ self.alert_groups.extend(self._from_dir(path, recursive))
+ elif path.is_file():
+ self.alert_groups.extend(self._from_file(path.parent, path))
+ else:
+ logger.debug("The alerts file does not exist: %s", path)
+
+ def as_dict(self) -> dict:
+ """Return standard alert rules file in dict representation.
+
+ Returns:
+ a dictionary containing a single list of alert rule groups.
+ The list of alert rule groups is provided as value of the
+ "groups" dictionary key.
+ """
+ return {"groups": self.alert_groups} if self.alert_groups else {}
+
+
+def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> str:
+ """Resolve the provided path items against the directory of the main file.
+
+ Look up the directory of the `main.py` file being executed. This is normally
+ going to be the charm.py file of the charm including this library. Then, resolve
+ the provided path elements and, if the result path exists and is a directory,
+ return its absolute path; otherwise, raise en exception.
+
+ Raises:
+ InvalidAlertRulePathError, if the path does not exist or is not a directory.
+ """
+ charm_dir = Path(str(charm.charm_dir))
+ if not charm_dir.exists() or not charm_dir.is_dir():
+ # Operator Framework does not currently expose a robust
+ # way to determine the top level charm source directory
+ # that is consistent across deployed charms and unit tests
+ # Hence for unit tests the current working directory is used
+ # TODO: updated this logic when the following ticket is resolved
+ # https://github.com/canonical/operator/issues/643
+ charm_dir = Path(os.getcwd())
+
+ alerts_dir_path = charm_dir.absolute().joinpath(*path_elements)
+
+ if not alerts_dir_path.exists():
+ raise InvalidAlertRulePathError(alerts_dir_path, "directory does not exist")
+ if not alerts_dir_path.is_dir():
+ raise InvalidAlertRulePathError(alerts_dir_path, "is not a directory")
+
+ return str(alerts_dir_path)
+
+
+class NoRelationWithInterfaceFoundError(Exception):
+ """No relations with the given interface are found in the charm meta."""
+
+ def __init__(self, charm: CharmBase, relation_interface: Optional[str] = None):
+ self.charm = charm
+ self.relation_interface = relation_interface
+ self.message = (
+ "No relations with interface '{}' found in the meta of the '{}' charm".format(
+ relation_interface, charm.meta.name
+ )
+ )
+
+ super().__init__(self.message)
+
+
+class MultipleRelationsWithInterfaceFoundError(Exception):
+ """Multiple relations with the given interface are found in the charm meta."""
+
+ def __init__(self, charm: CharmBase, relation_interface: str, relations: list):
+ self.charm = charm
+ self.relation_interface = relation_interface
+ self.relations = relations
+ self.message = (
+ "Multiple relations with interface '{}' found in the meta of the '{}' charm.".format(
+ relation_interface, charm.meta.name
+ )
+ )
+ super().__init__(self.message)
+
+
+class LokiPushApiEndpointDeparted(EventBase):
+ """Event emitted when Loki departed."""
+
+
+class LokiPushApiEndpointJoined(EventBase):
+ """Event emitted when Loki joined."""
+
+
+class LokiPushApiAlertRulesChanged(EventBase):
+ """Event emitted if there is a change in the alert rules."""
+
+ def __init__(self, handle, relation, relation_id, app=None, unit=None):
+ """Pretend we are almost like a RelationEvent.
+
+ Fields to serialize:
+ {
+ "relation_name": ,
+ "relation_id": ,
+ "app_name": ,
+ "unit_name":
+ }
+
+ In this way, we can transparently use `RelationEvent.snapshot()` to pass
+ it back if we need to log it.
+ """
+ super().__init__(handle)
+ self.relation = relation
+ self.relation_id = relation_id
+ self.app = app
+ self.unit = unit
+
+ def snapshot(self) -> Dict:
+ """Save event information."""
+ if not self.relation:
+ return {}
+ snapshot = {"relation_name": self.relation.name, "relation_id": self.relation.id}
+ if self.app:
+ snapshot["app_name"] = self.app.name
+ if self.unit:
+ snapshot["unit_name"] = self.unit.name
+ return snapshot
+
+ def restore(self, snapshot: dict):
+ """Restore event information."""
+ self.relation = self.framework.model.get_relation(
+ snapshot["relation_name"], snapshot["relation_id"]
+ )
+ app_name = snapshot.get("app_name")
+ if app_name:
+ self.app = self.framework.model.get_app(app_name)
+ else:
+ self.app = None
+ unit_name = snapshot.get("unit_name")
+ if unit_name:
+ self.unit = self.framework.model.get_unit(unit_name)
+ else:
+ self.unit = None
+
+
+class InvalidAlertRuleEvent(EventBase):
+ """Event emitted when alert rule files are not parsable.
+
+ Enables us to set a clear status on the provider.
+ """
+
+ def __init__(self, handle, errors: str = "", valid: bool = False):
+ super().__init__(handle)
+ self.errors = errors
+ self.valid = valid
+
+ def snapshot(self) -> Dict:
+ """Save alert rule information."""
+ return {
+ "valid": self.valid,
+ "errors": self.errors,
+ }
+
+ def restore(self, snapshot):
+ """Restore alert rule information."""
+ self.valid = snapshot["valid"]
+ self.errors = snapshot["errors"]
+
+
+class LokiPushApiEvents(ObjectEvents):
+ """Event descriptor for events raised by `LokiPushApiProvider`."""
+
+ loki_push_api_endpoint_departed = EventSource(LokiPushApiEndpointDeparted)
+ loki_push_api_endpoint_joined = EventSource(LokiPushApiEndpointJoined)
+ loki_push_api_alert_rules_changed = EventSource(LokiPushApiAlertRulesChanged)
+ alert_rule_status_changed = EventSource(InvalidAlertRuleEvent)
+
+
+class LokiPushApiProvider(Object):
+ """A LokiPushApiProvider class."""
+
+ on = LokiPushApiEvents() # pyright: ignore
+
+ def __init__(
+ self,
+ charm,
+ relation_name: str = DEFAULT_RELATION_NAME,
+ *,
+ port: Union[str, int] = 3100,
+ scheme: str = "http",
+ address: str = "localhost",
+ path: str = "loki/api/v1/push",
+ ):
+ """A Loki service provider.
+
+ Args:
+ charm: a `CharmBase` instance that manages this
+ instance of the Loki service.
+ relation_name: an optional string name of the relation between `charm`
+ and the Loki charmed service. The default is "logging".
+ It is strongly advised not to change the default, so that people
+ deploying your charm will have a consistent experience with all
+ other charms that consume metrics endpoints.
+ port: an optional port of the Loki service (default is "3100").
+ scheme: an optional scheme of the Loki API URL (default is "http").
+ address: an optional address of the Loki service (default is "localhost").
+ path: an optional path of the Loki API URL (default is "loki/api/v1/push")
+
+ Raises:
+ RelationNotFoundError: If there is no relation in the charm's metadata.yaml
+ with the same name as provided via `relation_name` argument.
+ RelationInterfaceMismatchError: The relation with the same name as provided
+ via `relation_name` argument does not have the `loki_push_api` relation
+ interface.
+ RelationRoleMismatchError: If the relation with the same name as provided
+ via `relation_name` argument does not have the `RelationRole.requires`
+ role.
+ """
+ _validate_relation_by_interface_and_direction(
+ charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides
+ )
+ super().__init__(charm, relation_name)
+ self._charm = charm
+ self._relation_name = relation_name
+ self._tool = CosTool(self)
+ self.port = int(port)
+ self.scheme = scheme
+ self.address = address
+ self.path = path
+
+ events = self._charm.on[relation_name]
+ self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event)
+ self.framework.observe(events.relation_joined, self._on_logging_relation_joined)
+ self.framework.observe(events.relation_changed, self._on_logging_relation_changed)
+ self.framework.observe(events.relation_departed, self._on_logging_relation_departed)
+ self.framework.observe(events.relation_broken, self._on_logging_relation_broken)
+
+ def _on_lifecycle_event(self, _):
+ # Upgrade event or other charm-level event
+ should_update = False
+ for relation in self._charm.model.relations[self._relation_name]:
+ # Don't accidentally flip a True result back.
+ should_update = should_update or self._process_logging_relation_changed(relation)
+ if should_update:
+ # We don't have a RelationEvent, so build it up by hand
+ first_rel = self._charm.model.relations[self._relation_name][0]
+ self.on.loki_push_api_alert_rules_changed.emit(
+ relation=first_rel,
+ relation_id=first_rel.id,
+ )
+
+ def _on_logging_relation_joined(self, event: RelationJoinedEvent):
+ """Set basic data on relation joins.
+
+ Set the promtail binary URL location, which will not change, and anything
+ else which may be required, but is static..
+
+ Args:
+ event: a `CharmEvent` in response to which the consumer
+ charm must set its relation data.
+ """
+ if self._charm.unit.is_leader():
+ event.relation.data[self._charm.app].update(self._promtail_binary_url)
+ logger.debug("Saved promtail binary url: %s", self._promtail_binary_url)
+
+ def _on_logging_relation_changed(self, event: HookEvent):
+ """Handle changes in related consumers.
+
+ Anytime there are changes in the relation between Loki
+ and its consumers charms.
+
+ Args:
+ event: a `CharmEvent` in response to which the consumer
+ charm must update its relation data.
+ """
+ should_update = self._process_logging_relation_changed(event.relation) # pyright: ignore
+ if should_update:
+ self.on.loki_push_api_alert_rules_changed.emit(
+ relation=event.relation, # pyright: ignore
+ relation_id=event.relation.id, # pyright: ignore
+ app=self._charm.app,
+ unit=self._charm.unit,
+ )
+
+ def _on_logging_relation_broken(self, event: RelationBrokenEvent):
+ """Removes alert rules files when consumer charms left the relation with Loki.
+
+ Args:
+ event: a `CharmEvent` in response to which the Loki
+ charm must update its relation data.
+ """
+ self.on.loki_push_api_alert_rules_changed.emit(
+ relation=event.relation,
+ relation_id=event.relation.id,
+ app=self._charm.app,
+ unit=self._charm.unit,
+ )
+
+ def _on_logging_relation_departed(self, event: RelationDepartedEvent):
+ """Removes alert rules files when consumer charms left the relation with Loki.
+
+ Args:
+ event: a `CharmEvent` in response to which the Loki
+ charm must update its relation data.
+ """
+ self.on.loki_push_api_alert_rules_changed.emit(
+ relation=event.relation,
+ relation_id=event.relation.id,
+ app=self._charm.app,
+ unit=self._charm.unit,
+ )
+
+ def _should_update_alert_rules(self, relation) -> bool:
+ """Determine whether alert rules should be regenerated.
+
+ If there are alert rules in the relation data bag, tell the charm
+ whether to regenerate them based on the boolean returned here.
+ """
+ if relation.data.get(relation.app).get("alert_rules", None) is not None:
+ return True
+ return False
+
+ def _process_logging_relation_changed(self, relation: Relation) -> bool:
+ """Handle changes in related consumers.
+
+ Anytime there are changes in relations between Loki
+ and its consumers charms, Loki set the `loki_push_api`
+ into the relation data. Set the endpoint building
+ appropriately, and if there are alert rules present in
+ the relation, let the caller know.
+ Besides Loki generates alert rules files based what
+ consumer charms forwards,
+
+ Args:
+ relation: the `Relation` instance to update.
+
+ Returns:
+ A boolean indicating whether an event should be emitted, so we
+ only emit one on lifecycle events
+ """
+ relation.data[self._charm.unit]["public_address"] = socket.getfqdn() or ""
+ self.update_endpoint(relation=relation)
+ return self._should_update_alert_rules(relation)
+
+ @property
+ def _promtail_binary_url(self) -> dict:
+ """URL from which Promtail binary can be downloaded."""
+ # construct promtail binary url paths from parts
+ promtail_binaries = {}
+ for arch, info in PROMTAIL_BINARIES.items():
+ info["url"] = "{}/promtail-{}/{}.gz".format(
+ PROMTAIL_BASE_URL, PROMTAIL_VERSION, info["filename"]
+ )
+ promtail_binaries[arch] = info
+
+ return {"promtail_binary_zip_url": json.dumps(promtail_binaries)}
+
+ def update_endpoint(self, url: str = "", relation: Optional[Relation] = None) -> None:
+ """Triggers programmatically the update of endpoint in unit relation data.
+
+ This method should be used when the charm relying on this library needs
+ to update the relation data in response to something occurring outside
+ the `logging` relation lifecycle, e.g., in case of a
+ host address change because the charmed operator becomes connected to an
+ Ingress after the `logging` relation is established.
+
+ Args:
+ url: An optional url value to update relation data.
+ relation: An optional instance of `class:ops.model.Relation` to update.
+ """
+ # if no relation is specified update all of them
+ if not relation:
+ if not self._charm.model.relations.get(self._relation_name):
+ return
+
+ relations_list = self._charm.model.relations.get(self._relation_name)
+ else:
+ relations_list = [relation]
+
+ endpoint = self._endpoint(url or self._url)
+
+ for relation in relations_list:
+ relation.data[self._charm.unit].update({"endpoint": json.dumps(endpoint)})
+
+ logger.debug("Saved endpoint in unit relation data")
+
+ @property
+ def _url(self) -> str:
+ """Get local Loki Push API url.
+
+ Return url to loki, including port number, but without the endpoint subpath.
+ """
+ return "http://{}:{}".format(socket.getfqdn(), self.port)
+
+ def _endpoint(self, url) -> dict:
+ """Get Loki push API endpoint for a given url.
+
+ Args:
+ url: A loki unit URL.
+
+ Returns: str
+ """
+ endpoint = "/loki/api/v1/push"
+ return {"url": url.rstrip("/") + endpoint}
+
+ @property
+ def alerts(self) -> dict: # noqa: C901
+ """Fetch alerts for all relations.
+
+ A Loki alert rules file consists of a list of "groups". Each
+ group consists of a list of alerts (`rules`) that are sequentially
+ executed. This method returns all the alert rules provided by each
+ related metrics provider charm. These rules may be used to generate a
+ separate alert rules file for each relation since the returned list
+ of alert groups are indexed by relation ID. Also for each relation ID
+ associated scrape metadata such as Juju model, UUID and application
+ name are provided so a unique name may be generated for the rules
+ file. For each relation the structure of data returned is a dictionary
+ with four keys
+
+ - groups
+ - model
+ - model_uuid
+ - application
+
+ The value of the `groups` key is such that it may be used to generate
+ a Loki alert rules file directly using `yaml.dump` but the
+ `groups` key itself must be included as this is required by Loki,
+ for example as in `yaml.dump({"groups": alerts["groups"]})`.
+
+ Currently only accepts a list of rules and these
+ rules are all placed into a single group, even though Loki itself
+ allows for multiple groups within a single alert rules file.
+
+ Returns:
+ a dictionary of alert rule groups and associated scrape
+ metadata indexed by relation ID.
+ """
+ alerts = {} # type: Dict[str, dict] # mapping b/w juju identifiers and alert rule files
+ for relation in self._charm.model.relations[self._relation_name]:
+ if not relation.units or not relation.app:
+ continue
+
+ alert_rules = json.loads(relation.data[relation.app].get("alert_rules", "{}"))
+ if not alert_rules:
+ continue
+
+ alert_rules = self._inject_alert_expr_labels(alert_rules)
+
+ identifier, topology = self._get_identifier_by_alert_rules(alert_rules)
+ if not topology:
+ try:
+ metadata = json.loads(relation.data[relation.app]["metadata"])
+ identifier = JujuTopology.from_dict(metadata).identifier
+ alerts[identifier] = self._tool.apply_label_matchers(alert_rules) # type: ignore
+
+ except KeyError as e:
+ logger.debug(
+ "Relation %s has no 'metadata': %s",
+ relation.id,
+ e,
+ )
+
+ if not identifier:
+ logger.error(
+ "Alert rules were found but no usable group or identifier was present."
+ )
+ continue
+
+ _, errmsg = self._tool.validate_alert_rules(alert_rules)
+ if errmsg:
+ relation.data[self._charm.app]["event"] = json.dumps({"errors": errmsg})
+ continue
+
+ alerts[identifier] = alert_rules
+
+ return alerts
+
+ def _get_identifier_by_alert_rules(
+ self, rules: dict
+ ) -> Tuple[Union[str, None], Union[JujuTopology, None]]:
+ """Determine an appropriate dict key for alert rules.
+
+ The key is used as the filename when writing alerts to disk, so the structure
+ and uniqueness is important.
+
+ Args:
+ rules: a dict of alert rules
+ Returns:
+ A tuple containing an identifier, if found, and a JujuTopology, if it could
+ be constructed.
+ """
+ if "groups" not in rules:
+ logger.debug("No alert groups were found in relation data")
+ return None, None
+
+ # Construct an ID based on what's in the alert rules if they have labels
+ for group in rules["groups"]:
+ try:
+ labels = group["rules"][0]["labels"]
+ topology = JujuTopology(
+ # Don't try to safely get required constructor fields. There's already
+ # a handler for KeyErrors
+ model_uuid=labels["juju_model_uuid"],
+ model=labels["juju_model"],
+ application=labels["juju_application"],
+ unit=labels.get("juju_unit", ""),
+ charm_name=labels.get("juju_charm", ""),
+ )
+ return topology.identifier, topology
+ except KeyError:
+ logger.debug("Alert rules were found but no usable labels were present")
+ continue
+
+ logger.warning(
+ "No labeled alert rules were found, and no 'scrape_metadata' "
+ "was available. Using the alert group name as filename."
+ )
+ try:
+ for group in rules["groups"]:
+ return group["name"], None
+ except KeyError:
+ logger.debug("No group name was found to use as identifier")
+
+ return None, None
+
+ def _inject_alert_expr_labels(self, rules: Dict[str, Any]) -> Dict[str, Any]:
+ """Iterate through alert rules and inject topology into expressions.
+
+ Args:
+ rules: a dict of alert rules
+ """
+ if "groups" not in rules:
+ return rules
+
+ modified_groups = []
+ for group in rules["groups"]:
+ # Copy off rules, so we don't modify an object we're iterating over
+ rules_copy = group["rules"]
+ for idx, rule in enumerate(rules_copy):
+ labels = rule.get("labels")
+
+ if labels:
+ try:
+ topology = JujuTopology(
+ # Don't try to safely get required constructor fields. There's already
+ # a handler for KeyErrors
+ model_uuid=labels["juju_model_uuid"],
+ model=labels["juju_model"],
+ application=labels["juju_application"],
+ unit=labels.get("juju_unit", ""),
+ charm_name=labels.get("juju_charm", ""),
+ )
+
+ # Inject topology and put it back in the list
+ rule["expr"] = self._tool.inject_label_matchers(
+ re.sub(r"%%juju_topology%%,?", "", rule["expr"]),
+ topology.label_matcher_dict,
+ )
+ except KeyError:
+ # Some required JujuTopology key is missing. Just move on.
+ pass
+
+ group["rules"][idx] = rule
+
+ modified_groups.append(group)
+
+ rules["groups"] = modified_groups
+ return rules
+
+
+class ConsumerBase(Object):
+ """Consumer's base class."""
+
+ def __init__(
+ self,
+ charm: CharmBase,
+ relation_name: str = DEFAULT_RELATION_NAME,
+ alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH,
+ recursive: bool = False,
+ skip_alert_topology_labeling: bool = False,
+ ):
+ super().__init__(charm, relation_name)
+ self._charm = charm
+ self._relation_name = relation_name
+ self.topology = JujuTopology.from_charm(charm)
+
+ try:
+ alert_rules_path = _resolve_dir_against_charm_path(charm, alert_rules_path)
+ except InvalidAlertRulePathError as e:
+ logger.debug(
+ "Invalid Loki alert rules folder at %s: %s",
+ e.alert_rules_absolute_path,
+ e.message,
+ )
+ self._alert_rules_path = alert_rules_path
+ self._skip_alert_topology_labeling = skip_alert_topology_labeling
+
+ self._recursive = recursive
+
+ def _handle_alert_rules(self, relation):
+ if not self._charm.unit.is_leader():
+ return
+
+ alert_rules = (
+ AlertRules(None) if self._skip_alert_topology_labeling else AlertRules(self.topology)
+ )
+ alert_rules.add_path(self._alert_rules_path, recursive=self._recursive)
+ alert_rules_as_dict = alert_rules.as_dict()
+
+ relation.data[self._charm.app]["metadata"] = json.dumps(self.topology.as_dict())
+ relation.data[self._charm.app]["alert_rules"] = json.dumps(
+ alert_rules_as_dict,
+ sort_keys=True, # sort, to prevent unnecessary relation_changed events
+ )
+
+ @property
+ def loki_endpoints(self) -> List[dict]:
+ """Fetch Loki Push API endpoints sent from LokiPushApiProvider through relation data.
+
+ Returns:
+ A list of dictionaries with Loki Push API endpoints, for instance:
+ [
+ {"url": "http://loki1:3100/loki/api/v1/push"},
+ {"url": "http://loki2:3100/loki/api/v1/push"},
+ ]
+ """
+ endpoints = [] # type: list
+
+ for relation in self._charm.model.relations[self._relation_name]:
+ for unit in relation.units:
+ if unit.app == self._charm.app:
+ # This is a peer unit
+ continue
+
+ endpoint = relation.data[unit].get("endpoint")
+ if endpoint:
+ deserialized_endpoint = json.loads(endpoint)
+ endpoints.append(deserialized_endpoint)
+
+ return endpoints
+
+
+class LokiPushApiConsumer(ConsumerBase):
+ """Loki Consumer class."""
+
+ on = LokiPushApiEvents() # pyright: ignore
+
+ def __init__(
+ self,
+ charm: CharmBase,
+ relation_name: str = DEFAULT_RELATION_NAME,
+ alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH,
+ recursive: bool = True,
+ skip_alert_topology_labeling: bool = False,
+ ):
+ """Construct a Loki charm client.
+
+ The `LokiPushApiConsumer` object provides configurations to a Loki client charm, such as
+ the Loki API endpoint to push logs. It is intended for workloads that can speak
+ loki_push_api (https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki), such
+ as grafana-agent.
+ (If you need to forward workload stdout logs, then use LogForwarder; if you need to forward
+ log files, then use LogProxyConsumer.)
+
+ `LokiPushApiConsumer` can be instantiated as follows:
+
+ self._loki_consumer = LokiPushApiConsumer(self)
+
+ Args:
+ charm: a `CharmBase` object that manages this `LokiPushApiConsumer` object.
+ Typically, this is `self` in the instantiating class.
+ relation_name: the string name of the relation interface to look up.
+ If `charm` has exactly one relation with this interface, the relation's
+ name is returned. If none or multiple relations with the provided interface
+ are found, this method will raise either a NoRelationWithInterfaceFoundError or
+ MultipleRelationsWithInterfaceFoundError exception, respectively.
+ alert_rules_path: a string indicating a path where alert rules can be found
+ recursive: Whether to scan for rule files recursively.
+ skip_alert_topology_labeling: whether to skip the alert topology labeling.
+
+ Raises:
+ RelationNotFoundError: If there is no relation in the charm's metadata.yaml
+ with the same name as provided via `relation_name` argument.
+ RelationInterfaceMismatchError: The relation with the same name as provided
+ via `relation_name` argument does not have the `loki_push_api` relation
+ interface.
+ RelationRoleMismatchError: If the relation with the same name as provided
+ via `relation_name` argument does not have the `RelationRole.provides`
+ role.
+
+ Emits:
+ loki_push_api_endpoint_joined: This event is emitted when the relation between the
+ Charmed Operator that instantiates `LokiPushApiProvider` (Loki charm for instance)
+ and the Charmed Operator that instantiates `LokiPushApiConsumer` is established.
+ loki_push_api_endpoint_departed: This event is emitted when the relation between the
+ Charmed Operator that implements `LokiPushApiProvider` (Loki charm for instance)
+ and the Charmed Operator that implements `LokiPushApiConsumer` is removed.
+ loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules
+ file is encountered or if `alert_rules_path` is empty.
+ """
+ _validate_relation_by_interface_and_direction(
+ charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires
+ )
+ super().__init__(
+ charm, relation_name, alert_rules_path, recursive, skip_alert_topology_labeling
+ )
+ events = self._charm.on[relation_name]
+ self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event)
+ self.framework.observe(events.relation_joined, self._on_logging_relation_joined)
+ self.framework.observe(events.relation_changed, self._on_logging_relation_changed)
+ self.framework.observe(events.relation_departed, self._on_logging_relation_departed)
+
+ def _on_lifecycle_event(self, _: HookEvent):
+ """Update require relation data on charm upgrades and other lifecycle events.
+
+ Args:
+ event: a `CharmEvent` in response to which the consumer
+ charm must update its relation data.
+ """
+ # Upgrade event or other charm-level event
+ self._reinitialize_alert_rules()
+ self.on.loki_push_api_endpoint_joined.emit()
+
+ def _on_logging_relation_joined(self, event: RelationJoinedEvent):
+ """Handle changes in related consumers.
+
+ Update relation data and emit events when a relation is established.
+
+ Args:
+ event: a `CharmEvent` in response to which the consumer
+ charm must update its relation data.
+
+ Emits:
+ loki_push_api_endpoint_joined: Once the relation is established, this event is emitted.
+ loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules
+ file is encountered or if `alert_rules_path` is empty.
+ """
+ # Alert rules will not change over the lifecycle of a charm, and do not need to be
+ # constantly set on every relation_changed event. Leave them here.
+ self._handle_alert_rules(event.relation)
+ self.on.loki_push_api_endpoint_joined.emit()
+
+ def _on_logging_relation_changed(self, event: RelationEvent):
+ """Handle changes in related consumers.
+
+ Anytime there are changes in the relation between Loki
+ and its consumers charms.
+
+ Args:
+ event: a `CharmEvent` in response to which the consumer
+ charm must update its relation data.
+
+ Emits:
+ loki_push_api_endpoint_joined: Once the relation is established, this event is emitted.
+ loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules
+ file is encountered or if `alert_rules_path` is empty.
+ """
+ if self._charm.unit.is_leader():
+ ev = json.loads(event.relation.data[event.app].get("event", "{}"))
+
+ if ev:
+ valid = bool(ev.get("valid", True))
+ errors = ev.get("errors", "")
+
+ if valid and not errors:
+ self.on.alert_rule_status_changed.emit(valid=valid)
+ else:
+ self.on.alert_rule_status_changed.emit(valid=valid, errors=errors)
+
+ self.on.loki_push_api_endpoint_joined.emit()
+
+ def _reinitialize_alert_rules(self):
+ """Reloads alert rules and updates all relations."""
+ for relation in self._charm.model.relations[self._relation_name]:
+ self._handle_alert_rules(relation)
+
+ def _process_logging_relation_changed(self, relation: Relation):
+ self._handle_alert_rules(relation)
+ self.on.loki_push_api_endpoint_joined.emit()
+
+ def _on_logging_relation_departed(self, _: RelationEvent):
+ """Handle departures in related providers.
+
+ Anytime there are departures in relations between the consumer charm and Loki
+ the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event.
+ The consumer charm can then choose to update its configuration.
+ """
+ # Provide default to avoid throwing, as in some complicated scenarios with
+ # upgrades and hook failures we might not have data in the storage
+ self.on.loki_push_api_endpoint_departed.emit()
+
+
+class ContainerNotFoundError(Exception):
+ """Raised if the specified container does not exist."""
+
+ def __init__(self):
+ msg = "The specified container does not exist."
+ self.message = msg
+
+ super().__init__(self.message)
+
+
+class PromtailDigestError(EventBase):
+ """Event emitted when there is an error with Promtail initialization."""
+
+ def __init__(self, handle, message):
+ super().__init__(handle)
+ self.message = message
+
+ def snapshot(self):
+ """Save message information."""
+ return {"message": self.message}
+
+ def restore(self, snapshot):
+ """Restore message information."""
+ self.message = snapshot["message"]
+
+
+class LogProxyEndpointDeparted(EventBase):
+ """Event emitted when a Log Proxy has departed."""
+
+
+class LogProxyEndpointJoined(EventBase):
+ """Event emitted when a Log Proxy joins."""
+
+
+class LogProxyEvents(ObjectEvents):
+ """Event descriptor for events raised by `LogProxyConsumer`."""
+
+ promtail_digest_error = EventSource(PromtailDigestError)
+ log_proxy_endpoint_departed = EventSource(LogProxyEndpointDeparted)
+ log_proxy_endpoint_joined = EventSource(LogProxyEndpointJoined)
+
+
+class LogProxyConsumer(ConsumerBase):
+ """LogProxyConsumer class.
+
+ > Note: This object is deprecated. Consider migrating to LogForwarder with the release of Juju
+ > 3.6 LTS.
+
+ The `LogProxyConsumer` object provides a method for attaching `promtail` to
+ a workload in order to generate structured logging data from applications
+ which traditionally log to syslog or do not have native Loki integration.
+ The `LogProxyConsumer` can be instantiated as follows:
+
+ self._log_proxy = LogProxyConsumer(
+ self,
+ logs_scheme={
+ "workload-a": {
+ "log-files": ["/tmp/worload-a-1.log", "/tmp/worload-a-2.log"],
+ "syslog-port": 1514,
+ },
+ "workload-b": {"log-files": ["/tmp/worload-b.log"], "syslog-port": 1515},
+ },
+ relation_name="log-proxy",
+ )
+
+ Args:
+ charm: a `CharmBase` object that manages this `LokiPushApiConsumer` object.
+ Typically, this is `self` in the instantiating class.
+ logs_scheme: a dict which maps containers and a list of log files and syslog port.
+ relation_name: the string name of the relation interface to look up.
+ If `charm` has exactly one relation with this interface, the relation's
+ name is returned. If none or multiple relations with the provided interface
+ are found, this method will raise either a NoRelationWithInterfaceFoundError or
+ MultipleRelationsWithInterfaceFoundError exception, respectively.
+ containers_syslog_port: a dict which maps (and enable) containers and syslog port.
+ alert_rules_path: an optional path for the location of alert rules
+ files. Defaults to "./src/loki_alert_rules",
+ resolved from the directory hosting the charm entry file.
+ The alert rules are automatically updated on charm upgrade.
+ recursive: Whether to scan for rule files recursively.
+ promtail_resource_name: An optional promtail resource name from metadata
+ if it has been modified and attached
+ insecure_skip_verify: skip SSL verification.
+
+ Raises:
+ RelationNotFoundError: If there is no relation in the charm's metadata.yaml
+ with the same name as provided via `relation_name` argument.
+ RelationInterfaceMismatchError: The relation with the same name as provided
+ via `relation_name` argument does not have the `loki_push_api` relation
+ interface.
+ RelationRoleMismatchError: If the relation with the same name as provided
+ via `relation_name` argument does not have the `RelationRole.provides`
+ role.
+ """
+
+ on = LogProxyEvents() # pyright: ignore
+
+ def __init__(
+ self,
+ charm,
+ *,
+ logs_scheme=None,
+ relation_name: str = DEFAULT_LOG_PROXY_RELATION_NAME,
+ alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH,
+ recursive: bool = False,
+ promtail_resource_name: Optional[str] = None,
+ insecure_skip_verify: bool = False,
+ ):
+ super().__init__(charm, relation_name, alert_rules_path, recursive)
+ self._charm = charm
+ self._logs_scheme = logs_scheme or {}
+ self._relation_name = relation_name
+ self.topology = JujuTopology.from_charm(charm)
+ self._promtail_resource_name = promtail_resource_name or "promtail-bin"
+ self.insecure_skip_verify = insecure_skip_verify
+ self._promtails_ports = self._generate_promtails_ports(logs_scheme)
+
+ # architecture used for promtail binary
+ arch = platform.processor()
+ if arch in ["x86_64", "amd64"]:
+ self._arch = "amd64"
+ elif arch in ["aarch64", "arm64", "armv8b", "armv8l"]:
+ self._arch = "arm64"
+ else:
+ self._arch = arch
+
+ events = self._charm.on[relation_name]
+ self.framework.observe(events.relation_created, self._on_relation_created)
+ self.framework.observe(events.relation_changed, self._on_relation_changed)
+ self.framework.observe(events.relation_departed, self._on_relation_departed)
+ self._observe_pebble_ready()
+
+ def _observe_pebble_ready(self):
+ for container in self._containers.keys():
+ snake_case_container_name = container.replace("-", "_")
+ self.framework.observe(
+ getattr(self._charm.on, f"{snake_case_container_name}_pebble_ready"),
+ self._on_pebble_ready,
+ )
+
+ def _on_pebble_ready(self, event: WorkloadEvent):
+ """Event handler for `pebble_ready`."""
+ if self.model.relations[self._relation_name]:
+ self._setup_promtail(event.workload)
+
+ def _on_relation_created(self, _: RelationCreatedEvent) -> None:
+ """Event handler for `relation_created`."""
+ for container in self._containers.values():
+ if container.can_connect():
+ self._setup_promtail(container)
+
+ def _on_relation_changed(self, event: RelationEvent) -> None:
+ """Event handler for `relation_changed`.
+
+ Args:
+ event: The event object `RelationChangedEvent`.
+ """
+ self._handle_alert_rules(event.relation)
+
+ if self._charm.unit.is_leader():
+ ev = json.loads(event.relation.data[event.app].get("event", "{}"))
+
+ if ev:
+ valid = bool(ev.get("valid", True))
+ errors = ev.get("errors", "")
+
+ if valid and not errors:
+ self.on.alert_rule_status_changed.emit(valid=valid)
+ else:
+ self.on.alert_rule_status_changed.emit(valid=valid, errors=errors)
+
+ for container in self._containers.values():
+ if not container.can_connect():
+ continue
+ if self.model.relations[self._relation_name]:
+ if "promtail" not in container.get_plan().services:
+ self._setup_promtail(container)
+ continue
+
+ new_config = self._promtail_config(container.name)
+ if new_config != self._current_config(container):
+ container.push(
+ WORKLOAD_CONFIG_PATH, yaml.safe_dump(new_config), make_dirs=True
+ )
+
+ # Loki may send endpoints late. Don't necessarily start, there may be
+ # no clients
+ if new_config["clients"]:
+ container.restart(WORKLOAD_SERVICE_NAME)
+ self.on.log_proxy_endpoint_joined.emit()
+ else:
+ self.on.promtail_digest_error.emit("No promtail client endpoints available!")
+
+ def _on_relation_departed(self, _: RelationEvent) -> None:
+ """Event handler for `relation_departed`.
+
+ Args:
+ event: The event object `RelationDepartedEvent`.
+ """
+ for container in self._containers.values():
+ if not container.can_connect():
+ continue
+ if not self._charm.model.relations[self._relation_name]:
+ container.stop(WORKLOAD_SERVICE_NAME)
+ continue
+
+ new_config = self._promtail_config(container.name)
+ if new_config != self._current_config(container):
+ container.push(WORKLOAD_CONFIG_PATH, yaml.safe_dump(new_config), make_dirs=True)
+
+ if new_config["clients"]:
+ container.restart(WORKLOAD_SERVICE_NAME)
+ else:
+ container.stop(WORKLOAD_SERVICE_NAME)
+ self.on.log_proxy_endpoint_departed.emit()
+
+ def _add_pebble_layer(self, workload_binary_path: str, container: Container) -> None:
+ """Adds Pebble layer that manages Promtail service in Workload container.
+
+ Args:
+ workload_binary_path: string providing path to promtail binary in workload container.
+ container: container into which the layer is to be added.
+ """
+ pebble_layer = Layer(
+ {
+ "summary": "promtail layer",
+ "description": "pebble config layer for promtail",
+ "services": {
+ WORKLOAD_SERVICE_NAME: {
+ "override": "replace",
+ "summary": WORKLOAD_SERVICE_NAME,
+ "command": f"{workload_binary_path} {self._cli_args}",
+ "startup": "disabled",
+ }
+ },
+ }
+ )
+ container.add_layer(container.name, pebble_layer, combine=True)
+
+ def _create_directories(self, container: Container) -> None:
+ """Creates the directories for Promtail binary and config file."""
+ container.make_dir(path=WORKLOAD_BINARY_DIR, make_parents=True)
+ container.make_dir(path=WORKLOAD_CONFIG_DIR, make_parents=True)
+
+ def _obtain_promtail(self, promtail_info: dict, container: Container) -> None:
+ """Obtain promtail binary from an attached resource or download it.
+
+ Args:
+ promtail_info: dictionary containing information about promtail binary
+ that must be used. The dictionary must have three keys
+ - "filename": filename of promtail binary
+ - "zipsha": sha256 sum of zip file of promtail binary
+ - "binsha": sha256 sum of unpacked promtail binary
+ container: container into which promtail is to be obtained.
+ """
+ workload_binary_path = os.path.join(WORKLOAD_BINARY_DIR, promtail_info["filename"])
+ if self._promtail_attached_as_resource:
+ self._push_promtail_if_attached(container, workload_binary_path)
+ return
+
+ if self._promtail_must_be_downloaded(promtail_info):
+ self._download_and_push_promtail_to_workload(container, promtail_info)
+ else:
+ binary_path = os.path.join(BINARY_DIR, promtail_info["filename"])
+ self._push_binary_to_workload(container, binary_path, workload_binary_path)
+
+ def _push_binary_to_workload(
+ self, container: Container, binary_path: str, workload_binary_path: str
+ ) -> None:
+ """Push promtail binary into workload container.
+
+ Args:
+ binary_path: path in charm container from which promtail binary is read.
+ workload_binary_path: path in workload container to which promtail binary is pushed.
+ container: container into which promtail is to be uploaded.
+ """
+ with open(binary_path, "rb") as f:
+ container.push(workload_binary_path, f, permissions=0o755, make_dirs=True)
+ logger.debug("The promtail binary file has been pushed to the workload container.")
+
+ @property
+ def _promtail_attached_as_resource(self) -> bool:
+ """Checks whether Promtail binary is attached to the charm or not.
+
+ Returns:
+ a boolean representing whether Promtail binary is attached as a resource or not.
+ """
+ try:
+ self._charm.model.resources.fetch(self._promtail_resource_name)
+ return True
+ except ModelError:
+ return False
+ except NameError as e:
+ if "invalid resource name" in str(e):
+ return False
+ raise
+
+ def _push_promtail_if_attached(self, container: Container, workload_binary_path: str) -> bool:
+ """Checks whether Promtail binary is attached to the charm or not.
+
+ Args:
+ workload_binary_path: string specifying expected path of promtail
+ in workload container
+ container: container into which promtail is to be pushed.
+
+ Returns:
+ a boolean representing whether Promtail binary is attached or not.
+ """
+ logger.info("Promtail binary file has been obtained from an attached resource.")
+ resource_path = self._charm.model.resources.fetch(self._promtail_resource_name)
+ self._push_binary_to_workload(container, resource_path, workload_binary_path)
+ return True
+
+ def _promtail_must_be_downloaded(self, promtail_info: dict) -> bool:
+ """Checks whether promtail binary must be downloaded or not.
+
+ Args:
+ promtail_info: dictionary containing information about promtail binary
+ that must be used. The dictionary must have three keys
+ - "filename": filename of promtail binary
+ - "zipsha": sha256 sum of zip file of promtail binary
+ - "binsha": sha256 sum of unpacked promtail binary
+
+ Returns:
+ a boolean representing whether Promtail binary must be downloaded or not.
+ """
+ binary_path = os.path.join(BINARY_DIR, promtail_info["filename"])
+ if not self._is_promtail_binary_in_charm(binary_path):
+ return True
+
+ if not self._sha256sums_matches(binary_path, promtail_info["binsha"]):
+ return True
+
+ logger.debug("Promtail binary file is already in the the charm container.")
+ return False
+
+ def _sha256sums_matches(self, file_path: str, sha256sum: str) -> bool:
+ """Checks whether a file's sha256sum matches or not with a specific sha256sum.
+
+ Args:
+ file_path: A string representing the files' patch.
+ sha256sum: The sha256sum against which we want to verify.
+
+ Returns:
+ a boolean representing whether a file's sha256sum matches or not with
+ a specific sha256sum.
+ """
+ try:
+ with open(file_path, "rb") as f:
+ file_bytes = f.read()
+ result = sha256(file_bytes).hexdigest()
+
+ if result != sha256sum:
+ msg = "File sha256sum mismatch, expected:'{}' but got '{}'".format(
+ sha256sum, result
+ )
+ logger.debug(msg)
+ return False
+
+ return True
+ except (APIError, FileNotFoundError):
+ msg = "File: '{}' could not be opened".format(file_path)
+ logger.error(msg)
+ return False
+
+ def _is_promtail_binary_in_charm(self, binary_path: str) -> bool:
+ """Check if Promtail binary is already stored in charm container.
+
+ Args:
+ binary_path: string path of promtail binary to check
+
+ Returns:
+ a boolean representing whether Promtail is present or not.
+ """
+ return True if Path(binary_path).is_file() else False
+
+ def _download_and_push_promtail_to_workload(
+ self, container: Container, promtail_info: dict
+ ) -> None:
+ """Downloads a Promtail zip file and pushes the binary to the workload.
+
+ Args:
+ promtail_info: dictionary containing information about promtail binary
+ that must be used. The dictionary must have three keys
+ - "filename": filename of promtail binary
+ - "zipsha": sha256 sum of zip file of promtail binary
+ - "binsha": sha256 sum of unpacked promtail binary
+ container: container into which promtail is to be uploaded.
+ """
+ # Check for Juju proxy variables and fall back to standard ones if not set
+ # If no Juju proxy variable was set, we set proxies to None to let the ProxyHandler get
+ # the proxy env variables from the environment
+ proxies = {
+ # The ProxyHandler uses only the protocol names as keys
+ # https://docs.python.org/3/library/urllib.request.html#urllib.request.ProxyHandler
+ "https": os.environ.get("JUJU_CHARM_HTTPS_PROXY", ""),
+ "http": os.environ.get("JUJU_CHARM_HTTP_PROXY", ""),
+ # The ProxyHandler uses `no` for the no_proxy key
+ # https://github.com/python/cpython/blob/3.12/Lib/urllib/request.py#L2553
+ "no": os.environ.get("JUJU_CHARM_NO_PROXY", ""),
+ }
+ proxies = {k: v for k, v in proxies.items() if v != ""} or None
+
+ proxy_handler = request.ProxyHandler(proxies)
+ opener = request.build_opener(proxy_handler)
+
+ with opener.open(promtail_info["url"]) as r:
+ file_bytes = r.read()
+ file_path = os.path.join(BINARY_DIR, promtail_info["filename"] + ".gz")
+ with open(file_path, "wb") as f:
+ f.write(file_bytes)
+ logger.info(
+ "Promtail binary zip file has been downloaded and stored in: %s",
+ file_path,
+ )
+
+ decompressed_file = GzipFile(fileobj=BytesIO(file_bytes))
+ binary_path = os.path.join(BINARY_DIR, promtail_info["filename"])
+ with open(binary_path, "wb") as outfile:
+ outfile.write(decompressed_file.read())
+ logger.debug("Promtail binary file has been downloaded.")
+
+ workload_binary_path = os.path.join(WORKLOAD_BINARY_DIR, promtail_info["filename"])
+ self._push_binary_to_workload(container, binary_path, workload_binary_path)
+
+ @property
+ def _cli_args(self) -> str:
+ """Return the cli arguments to pass to promtail.
+
+ Returns:
+ The arguments as a string
+ """
+ return "-config.file={}".format(WORKLOAD_CONFIG_PATH)
+
+ def _current_config(self, container) -> dict:
+ """Property that returns the current Promtail configuration.
+
+ Returns:
+ A dict containing Promtail configuration.
+ """
+ if not container.can_connect():
+ logger.debug("Could not connect to promtail container!")
+ return {}
+ try:
+ raw_current = container.pull(WORKLOAD_CONFIG_PATH).read()
+ return yaml.safe_load(raw_current)
+ except (ProtocolError, PathError) as e:
+ logger.warning(
+ "Could not check the current promtail configuration due to "
+ "a failure in retrieving the file: %s",
+ e,
+ )
+ return {}
+
+ def _promtail_config(self, container_name: str) -> dict:
+ """Generates the config file for Promtail.
+
+ Reference: https://grafana.com/docs/loki/latest/send-data/promtail/configuration
+ """
+ config = {"clients": self._clients_list()}
+ if self.insecure_skip_verify:
+ for client in config["clients"]:
+ client["tls_config"] = {"insecure_skip_verify": True}
+
+ config.update(self._server_config(container_name))
+ config.update(self._positions)
+ config.update(self._scrape_configs(container_name))
+ return config
+
+ def _clients_list(self) -> list:
+ """Generates a list of clients for use in the promtail config.
+
+ Returns:
+ A list of endpoints
+ """
+ return self.loki_endpoints
+
+ def _server_config(self, container_name: str) -> dict:
+ """Generates the server section of the Promtail config file.
+
+ Returns:
+ A dict representing the `server` section.
+ """
+ return {
+ "server": {
+ "http_listen_port": self._promtails_ports[container_name]["http_listen_port"],
+ "grpc_listen_port": self._promtails_ports[container_name]["grpc_listen_port"],
+ }
+ }
+
+ @property
+ def _positions(self) -> dict:
+ """Generates the positions section of the Promtail config file.
+
+ Returns:
+ A dict representing the `positions` section.
+ """
+ return {"positions": {"filename": WORKLOAD_POSITIONS_PATH}}
+
+ def _scrape_configs(self, container_name: str) -> dict:
+ """Generates the scrape_configs section of the Promtail config file.
+
+ Returns:
+ A dict representing the `scrape_configs` section.
+ """
+ job_name = f"juju_{self.topology.identifier}"
+
+ # The new JujuTopology doesn't include unit, but LogProxyConsumer should have it
+ common_labels = {
+ f"juju_{k}": v
+ for k, v in self.topology.as_dict(remapped_keys={"charm_name": "charm"}).items()
+ }
+ common_labels["container"] = container_name
+ scrape_configs = []
+
+ # Files config
+ labels = common_labels.copy()
+ labels.update(
+ {
+ "job": job_name,
+ "__path__": "",
+ }
+ )
+ config = {"targets": ["localhost"], "labels": labels}
+ scrape_config = {
+ "job_name": "system",
+ "static_configs": self._generate_static_configs(config, container_name),
+ }
+ scrape_configs.append(scrape_config)
+
+ # Syslog config
+ syslog_port = self._logs_scheme.get(container_name, {}).get("syslog-port")
+ if syslog_port:
+ relabel_mappings = [
+ "severity",
+ "facility",
+ "hostname",
+ "app_name",
+ "proc_id",
+ "msg_id",
+ ]
+ syslog_labels = common_labels.copy()
+ syslog_labels.update({"job": f"{job_name}_syslog"})
+ syslog_config = {
+ "job_name": "syslog",
+ "syslog": {
+ "listen_address": f"127.0.0.1:{syslog_port}",
+ "label_structured_data": True,
+ "labels": syslog_labels,
+ },
+ "relabel_configs": [
+ {"source_labels": [f"__syslog_message_{val}"], "target_label": val}
+ for val in relabel_mappings
+ ]
+ + [{"action": "labelmap", "regex": "__syslog_message_sd_(.+)"}],
+ }
+ scrape_configs.append(syslog_config) # type: ignore
+
+ return {"scrape_configs": scrape_configs}
+
+ def _generate_static_configs(self, config: dict, container_name: str) -> list:
+ """Generates static_configs section.
+
+ Returns:
+ - a list of dictionaries representing static_configs section
+ """
+ static_configs = []
+
+ for _file in self._logs_scheme.get(container_name, {}).get("log-files", []):
+ conf = deepcopy(config)
+ conf["labels"]["__path__"] = _file
+ static_configs.append(conf)
+
+ return static_configs
+
+ def _setup_promtail(self, container: Container) -> None:
+ # Use the first
+ relations = self._charm.model.relations[self._relation_name]
+ if len(relations) > 1:
+ logger.debug(
+ "Multiple log_proxy relations. Getting Promtail from application {}".format(
+ relations[0].app.name
+ )
+ )
+ relation = relations[0]
+ promtail_binaries = json.loads(
+ relation.data[relation.app].get("promtail_binary_zip_url", "{}")
+ )
+ if not promtail_binaries:
+ return
+
+ self._create_directories(container)
+ self._ensure_promtail_binary(promtail_binaries, container)
+
+ container.push(
+ WORKLOAD_CONFIG_PATH,
+ yaml.safe_dump(self._promtail_config(container.name)),
+ make_dirs=True,
+ )
+
+ workload_binary_path = os.path.join(
+ WORKLOAD_BINARY_DIR, promtail_binaries[self._arch]["filename"]
+ )
+ self._add_pebble_layer(workload_binary_path, container)
+
+ if self._current_config(container).get("clients"):
+ try:
+ container.restart(WORKLOAD_SERVICE_NAME)
+ except ChangeError as e:
+ self.on.promtail_digest_error.emit(str(e))
+ else:
+ self.on.log_proxy_endpoint_joined.emit()
+ else:
+ self.on.promtail_digest_error.emit("No promtail client endpoints available!")
+
+ def _ensure_promtail_binary(self, promtail_binaries: dict, container: Container):
+ if self._is_promtail_installed(promtail_binaries[self._arch], container):
+ return
+
+ try:
+ self._obtain_promtail(promtail_binaries[self._arch], container)
+ except URLError as e:
+ msg = f"Promtail binary couldn't be downloaded - {str(e)}"
+ logger.warning(msg)
+ self.on.promtail_digest_error.emit(msg)
+
+ def _is_promtail_installed(self, promtail_info: dict, container: Container) -> bool:
+ """Determine if promtail has already been installed to the container.
+
+ Args:
+ promtail_info: dictionary containing information about promtail binary
+ that must be used. The dictionary must at least contain a key
+ "filename" giving the name of promtail binary
+ container: container in which to check whether promtail is installed.
+ """
+ workload_binary_path = f"{WORKLOAD_BINARY_DIR}/{promtail_info['filename']}"
+ try:
+ container.list_files(workload_binary_path)
+ except (APIError, FileNotFoundError):
+ return False
+ return True
+
+ def _generate_promtails_ports(self, logs_scheme) -> dict:
+ return {
+ container: {
+ "http_listen_port": HTTP_LISTEN_PORT_START + 2 * i,
+ "grpc_listen_port": GRPC_LISTEN_PORT_START + 2 * i,
+ }
+ for i, container in enumerate(logs_scheme.keys())
+ }
+
+ def syslog_port(self, container_name: str) -> str:
+ """Gets the port on which promtail is listening for syslog in this container.
+
+ Returns:
+ A str representing the port
+ """
+ return str(self._logs_scheme.get(container_name, {}).get("syslog-port"))
+
+ def rsyslog_config(self, container_name: str) -> str:
+ """Generates a config line for use with rsyslog.
+
+ Returns:
+ The rsyslog config line as a string
+ """
+ return 'action(type="omfwd" protocol="tcp" target="127.0.0.1" port="{}" Template="RSYSLOG_SyslogProtocol23Format" TCP_Framing="octet-counted")'.format(
+ self._logs_scheme.get(container_name, {}).get("syslog-port")
+ )
+
+ @property
+ def _containers(self) -> Dict[str, Container]:
+ return {cont: self._charm.unit.get_container(cont) for cont in self._logs_scheme.keys()}
+
+
+class _PebbleLogClient:
+ @staticmethod
+ def check_juju_version() -> bool:
+ """Make sure the Juju version supports Log Forwarding."""
+ juju_version = JujuVersion.from_environ()
+ if not juju_version > JujuVersion(version=str("3.3")):
+ msg = f"Juju version {juju_version} does not support Pebble log forwarding. Juju >= 3.4 is needed."
+ logger.warning(msg)
+ return False
+ return True
+
+ @staticmethod
+ def _build_log_target(
+ unit_name: str, loki_endpoint: str, topology: JujuTopology, enable: bool
+ ) -> Dict:
+ """Build a log target for the log forwarding Pebble layer.
+
+ Log target's syntax for enabling/disabling forwarding is explained here:
+ https://github.com/canonical/pebble?tab=readme-ov-file#log-forwarding
+ """
+ services_value = ["all"] if enable else ["-all"]
+
+ log_target = {
+ "override": "replace",
+ "services": services_value,
+ "type": "loki",
+ "location": loki_endpoint,
+ }
+ if enable:
+ log_target.update(
+ {
+ "labels": {
+ "product": "Juju",
+ "charm": topology._charm_name,
+ "juju_model": topology._model,
+ "juju_model_uuid": topology._model_uuid,
+ "juju_application": topology._application,
+ "juju_unit": topology._unit,
+ },
+ }
+ )
+
+ return {unit_name: log_target}
+
+ @staticmethod
+ def _build_log_targets(
+ loki_endpoints: Optional[Dict[str, str]], topology: JujuTopology, enable: bool
+ ):
+ """Build all the targets for the log forwarding Pebble layer."""
+ targets = {}
+ if not loki_endpoints:
+ return targets
+
+ for unit_name, endpoint in loki_endpoints.items():
+ targets.update(
+ _PebbleLogClient._build_log_target(
+ unit_name=unit_name,
+ loki_endpoint=endpoint,
+ topology=topology,
+ enable=enable,
+ )
+ )
+ return targets
+
+ @staticmethod
+ def disable_inactive_endpoints(
+ container: Container, active_endpoints: Dict[str, str], topology: JujuTopology
+ ):
+ """Disable forwarding for inactive endpoints by checking against the Pebble plan."""
+ pebble_layer = container.get_plan().to_dict().get("log-targets", None)
+ if not pebble_layer:
+ return
+
+ for unit_name, target in pebble_layer.items():
+ # If the layer is a disabled log forwarding endpoint, skip it
+ if "-all" in target["services"]: # pyright: ignore
+ continue
+
+ if unit_name not in active_endpoints:
+ layer = Layer(
+ { # pyright: ignore
+ "log-targets": _PebbleLogClient._build_log_targets(
+ loki_endpoints={unit_name: "(removed)"},
+ topology=topology,
+ enable=False,
+ )
+ }
+ )
+ container.add_layer(f"{container.name}-log-forwarding", layer=layer, combine=True)
+
+ @staticmethod
+ def enable_endpoints(
+ container: Container, active_endpoints: Dict[str, str], topology: JujuTopology
+ ):
+ """Enable forwarding for the specified Loki endpoints."""
+ layer = Layer(
+ { # pyright: ignore
+ "log-targets": _PebbleLogClient._build_log_targets(
+ loki_endpoints=active_endpoints,
+ topology=topology,
+ enable=True,
+ )
+ }
+ )
+ container.add_layer(f"{container.name}-log-forwarding", layer, combine=True)
+
+
+class LogForwarder(ConsumerBase):
+ """Forward the standard outputs of all workloads operated by a charm to one or multiple Loki endpoints.
+
+ This class implements Pebble log forwarding. Juju >= 3.4 is needed.
+ """
+
+ def __init__(
+ self,
+ charm: CharmBase,
+ *,
+ relation_name: str = DEFAULT_RELATION_NAME,
+ alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH,
+ recursive: bool = True,
+ skip_alert_topology_labeling: bool = False,
+ ):
+ _PebbleLogClient.check_juju_version()
+ super().__init__(
+ charm, relation_name, alert_rules_path, recursive, skip_alert_topology_labeling
+ )
+ self._charm = charm
+ self._relation_name = relation_name
+
+ on = self._charm.on[self._relation_name]
+ self.framework.observe(on.relation_joined, self._update_logging)
+ self.framework.observe(on.relation_changed, self._update_logging)
+ self.framework.observe(on.relation_departed, self._update_logging)
+ self.framework.observe(on.relation_broken, self._update_logging)
+
+ for container_name in self._charm.meta.containers.keys():
+ snake_case_container_name = container_name.replace("-", "_")
+ self.framework.observe(
+ getattr(self._charm.on, f"{snake_case_container_name}_pebble_ready"),
+ self._on_pebble_ready,
+ )
+
+ def _on_pebble_ready(self, event: PebbleReadyEvent):
+ if not (loki_endpoints := self._retrieve_endpoints_from_relation()):
+ logger.warning("No Loki endpoints available")
+ return
+
+ self._update_endpoints(event.workload, loki_endpoints)
+
+ def _update_logging(self, _):
+ """Update the log forwarding to match the active Loki endpoints."""
+ if not (loki_endpoints := self._retrieve_endpoints_from_relation()):
+ logger.warning("No Loki endpoints available")
+ return
+
+ for container in self._charm.unit.containers.values():
+ self._update_endpoints(container, loki_endpoints)
+
+ def _retrieve_endpoints_from_relation(self) -> dict:
+ loki_endpoints = {}
+
+ # Get the endpoints from relation data
+ for relation in self._charm.model.relations[self._relation_name]:
+ loki_endpoints.update(self._fetch_endpoints(relation))
+
+ return loki_endpoints
+
+ def _update_endpoints(self, container: Container, loki_endpoints: dict):
+ _PebbleLogClient.disable_inactive_endpoints(
+ container=container,
+ active_endpoints=loki_endpoints,
+ topology=self.topology,
+ )
+ _PebbleLogClient.enable_endpoints(
+ container=container, active_endpoints=loki_endpoints, topology=self.topology
+ )
+
+ def is_ready(self, relation: Optional[Relation] = None):
+ """Check if the relation is active and healthy."""
+ if not relation:
+ relations = self._charm.model.relations[self._relation_name]
+ if not relations:
+ return False
+ return all(self.is_ready(relation) for relation in relations)
+
+ try:
+ if self._extract_urls(relation):
+ return True
+ return False
+ except (KeyError, json.JSONDecodeError):
+ return False
+
+ def _extract_urls(self, relation: Relation) -> Dict[str, str]:
+ """Default getter function to extract Loki endpoints from a relation.
+
+ Returns:
+ A dictionary of remote units and the respective Loki endpoint.
+ {
+ "loki/0": "http://loki:3100/loki/api/v1/push",
+ "another-loki/0": "http://another-loki:3100/loki/api/v1/push",
+ }
+ """
+ endpoints: Dict = {}
+
+ for unit in relation.units:
+ endpoint = relation.data[unit]["endpoint"]
+ deserialized_endpoint = json.loads(endpoint)
+ url = deserialized_endpoint["url"]
+ endpoints[unit.name] = url
+
+ return endpoints
+
+ def _fetch_endpoints(self, relation: Relation) -> Dict[str, str]:
+ """Fetch Loki Push API endpoints from relation data using the endpoints getter."""
+ endpoints: Dict = {}
+
+ if not self.is_ready(relation):
+ logger.warning(f"The relation '{relation.name}' is not ready yet.")
+ return endpoints
+
+ # if the code gets here, the function won't raise anymore because it's
+ # also called in is_ready()
+ endpoints = self._extract_urls(relation)
+
+ return endpoints
+
+
+class CosTool:
+ """Uses cos-tool to inject label matchers into alert rule expressions and validate rules."""
+
+ _path = None
+ _disabled = False
+
+ def __init__(self, charm):
+ self._charm = charm
+
+ @property
+ def path(self):
+ """Lazy lookup of the path of cos-tool."""
+ if self._disabled:
+ return None
+ if not self._path:
+ self._path = self._get_tool_path()
+ if not self._path:
+ logger.debug("Skipping injection of juju topology as label matchers")
+ self._disabled = True
+ return self._path
+
+ def apply_label_matchers(self, rules) -> dict:
+ """Will apply label matchers to the expression of all alerts in all supplied groups."""
+ if not self.path:
+ return rules
+ for group in rules["groups"]:
+ rules_in_group = group.get("rules", [])
+ for rule in rules_in_group:
+ topology = {}
+ # if the user for some reason has provided juju_unit, we'll need to honor it
+ # in most cases, however, this will be empty
+ for label in [
+ "juju_model",
+ "juju_model_uuid",
+ "juju_application",
+ "juju_charm",
+ "juju_unit",
+ ]:
+ if label in rule["labels"]:
+ topology[label] = rule["labels"][label]
+
+ rule["expr"] = self.inject_label_matchers(rule["expr"], topology)
+ return rules
+
+ def validate_alert_rules(self, rules: dict) -> Tuple[bool, str]:
+ """Will validate correctness of alert rules, returning a boolean and any errors."""
+ if not self.path:
+ logger.debug("`cos-tool` unavailable. Not validating alert correctness.")
+ return True, ""
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ rule_path = Path(tmpdir + "/validate_rule.yaml")
+
+ # Smash "our" rules format into what upstream actually uses, which is more like:
+ #
+ # groups:
+ # - name: foo
+ # rules:
+ # - alert: SomeAlert
+ # expr: up
+ # - alert: OtherAlert
+ # expr: up
+ transformed_rules = {"groups": []} # type: ignore
+ for rule in rules["groups"]:
+ transformed_rules["groups"].append(rule)
+
+ rule_path.write_text(yaml.dump(transformed_rules))
+ args = [str(self.path), "--format", "logql", "validate", str(rule_path)]
+ # noinspection PyBroadException
+ try:
+ self._exec(args)
+ return True, ""
+ except subprocess.CalledProcessError as e:
+ logger.debug("Validating the rules failed: %s", e.output)
+ return False, ", ".join([line for line in e.output if "error validating" in line])
+
+ def inject_label_matchers(self, expression, topology) -> str:
+ """Add label matchers to an expression."""
+ if not topology:
+ return expression
+ if not self.path:
+ logger.debug("`cos-tool` unavailable. Leaving expression unchanged: %s", expression)
+ return expression
+ args = [str(self.path), "--format", "logql", "transform"]
+ args.extend(
+ ["--label-matcher={}={}".format(key, value) for key, value in topology.items()]
+ )
+
+ args.extend(["{}".format(expression)])
+ # noinspection PyBroadException
+ try:
+ return self._exec(args)
+ except subprocess.CalledProcessError as e:
+ logger.debug('Applying the expression failed: "%s", falling back to the original', e)
+ print('Applying the expression failed: "{}", falling back to the original'.format(e))
+ return expression
+
+ def _get_tool_path(self) -> Optional[Path]:
+ arch = platform.processor()
+ arch = "amd64" if arch == "x86_64" else arch
+ res = "cos-tool-{}".format(arch)
+ try:
+ path = Path(res).resolve()
+ path.chmod(0o777)
+ return path
+ except NotImplementedError:
+ logger.debug("System lacks support for chmod")
+ except FileNotFoundError:
+ logger.debug('Could not locate cos-tool at: "{}"'.format(res))
+ return None
+
+ def _exec(self, cmd) -> str:
+ result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE)
+ output = result.stdout.decode("utf-8").strip()
+ return output
diff --git a/charms/kserve-controller/metadata.yaml b/charms/kserve-controller/metadata.yaml
index 0c0ac02..1571ea1 100644
--- a/charms/kserve-controller/metadata.yaml
+++ b/charms/kserve-controller/metadata.yaml
@@ -63,3 +63,6 @@ requires:
interface: kubernetes_manifest
service-accounts:
interface: kubernetes_manifest
+ logging:
+ interface: loki_push_api
+ optional: true
diff --git a/charms/kserve-controller/requirements-integration.in b/charms/kserve-controller/requirements-integration.in
index 57f4d9b..3fda88d 100644
--- a/charms/kserve-controller/requirements-integration.in
+++ b/charms/kserve-controller/requirements-integration.in
@@ -7,3 +7,6 @@ lightkube
pytest-operator
requests
tenacity
+# Pin to >=0.4.0 because the reusable test infrastructure is on that version and above
+# This prevents pip-compile from trying to pin an earlier version
+charmed-kubeflow-chisme>=0.4.0
diff --git a/charms/kserve-controller/requirements-integration.txt b/charms/kserve-controller/requirements-integration.txt
index 361840b..0997dd9 100644
--- a/charms/kserve-controller/requirements-integration.txt
+++ b/charms/kserve-controller/requirements-integration.txt
@@ -4,29 +4,27 @@
#
# pip-compile requirements-integration.in
#
-aiohttp==3.8.6
+aiohttp==3.9.5
# via -r requirements-integration.in
aiosignal==1.3.1
# via aiohttp
-anyio==4.0.0
- # via httpcore
-appnope==0.1.4
- # via ipython
-asttokens==2.4.0
+anyio==4.4.0
+ # via httpx
+asttokens==2.4.1
# via stack-data
async-timeout==4.0.3
# via aiohttp
-attrs==23.1.0
+attrs==23.2.0
# via
# aiohttp
# jsonschema
backcall==0.2.0
# via ipython
-bcrypt==4.0.1
+bcrypt==4.1.3
# via paramiko
-cachetools==5.3.1
+cachetools==5.3.3
# via google-auth
-certifi==2023.7.22
+certifi==2024.7.4
# via
# httpcore
# httpx
@@ -36,13 +34,11 @@ cffi==1.16.0
# via
# cryptography
# pynacl
-charmed-kubeflow-chisme==0.2.0
+charmed-kubeflow-chisme==0.4.1
# via -r requirements-integration.in
-charset-normalizer==3.3.0
- # via
- # aiohttp
- # requests
-cryptography==41.0.4
+charset-normalizer==3.3.2
+ # via requests
+cryptography==42.0.8
# via paramiko
decorator==5.1.1
# via
@@ -50,33 +46,33 @@ decorator==5.1.1
# ipython
deepdiff==6.2.1
# via charmed-kubeflow-chisme
-exceptiongroup==1.1.3
+exceptiongroup==1.2.1
# via
# anyio
# pytest
-executing==2.0.0
+executing==2.0.1
# via stack-data
-frozenlist==1.4.0
+frozenlist==1.4.1
# via
# aiohttp
# aiosignal
-google-auth==2.23.3
+google-auth==2.31.0
# via kubernetes
h11==0.14.0
# via httpcore
-httpcore==0.18.0
+httpcore==1.0.5
# via httpx
-httpx==0.25.0
+httpx==0.27.0
# via lightkube
-hvac==1.2.1
+hvac==2.3.0
# via juju
-idna==3.4
+idna==3.7
# via
# anyio
# httpx
# requests
# yarl
-importlib-resources==6.1.0
+importlib-resources==6.4.0
# via jsonschema
iniconfig==2.0.0
# via pytest
@@ -86,32 +82,33 @@ ipython==8.12.3
# via ipdb
jedi==0.19.1
# via ipython
-jinja2==3.1.2
+jinja2==3.1.4
# via
# -r requirements-integration.in
# charmed-kubeflow-chisme
# pytest-operator
jsonschema==4.17.3
# via serialized-data-interface
-juju==3.2.2
+juju==3.5.0.0
# via
# -r requirements-integration.in
+ # charmed-kubeflow-chisme
# pytest-operator
-kubernetes==28.1.0
+kubernetes==30.1.0
# via juju
-lightkube==0.14.0
+lightkube==0.15.3
# via
# -r requirements-integration.in
# charmed-kubeflow-chisme
-lightkube-models==1.28.1.4
+lightkube-models==1.30.0.8
# via lightkube
-macaroonbakery==1.3.1
+macaroonbakery==1.3.4
# via juju
-markupsafe==2.1.3
+markupsafe==2.1.5
# via jinja2
-matplotlib-inline==0.1.6
+matplotlib-inline==0.1.7
# via ipython
-multidict==6.0.4
+multidict==6.0.5
# via
# aiohttp
# yarl
@@ -121,47 +118,47 @@ oauthlib==3.2.2
# via
# kubernetes
# requests-oauthlib
-ops==2.14.0
+ops==2.14.1
# via
# charmed-kubeflow-chisme
# serialized-data-interface
ordered-set==4.1.0
# via deepdiff
-packaging==23.2
- # via pytest
-paramiko==2.12.0
+packaging==24.1
+ # via
+ # juju
+ # pytest
+paramiko==3.4.0
# via juju
-parso==0.8.3
+parso==0.8.4
# via jedi
-pexpect==4.8.0
+pexpect==4.9.0
# via ipython
pickleshare==0.7.5
# via ipython
pkgutil-resolve-name==1.3.10
# via jsonschema
-pluggy==1.3.0
+pluggy==1.5.0
# via pytest
-prompt-toolkit==3.0.39
+prompt-toolkit==3.0.47
# via ipython
-protobuf==3.20.3
+protobuf==5.27.2
# via macaroonbakery
ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.2
# via stack-data
-pyasn1==0.5.0
+pyasn1==0.6.0
# via
# juju
# pyasn1-modules
# rsa
-pyasn1-modules==0.3.0
+pyasn1-modules==0.4.0
# via google-auth
-pycparser==2.21
+pycparser==2.22
# via cffi
-pygments==2.16.1
+pygments==2.18.0
# via ipython
-pyhcl==0.4.5
- # via hvac
pymacaroons==0.13.0
# via macaroonbakery
pynacl==1.5.0
@@ -173,19 +170,19 @@ pyrfc3339==1.1
# via
# juju
# macaroonbakery
-pyrsistent==0.19.3
+pyrsistent==0.20.0
# via jsonschema
-pytest==7.4.2
+pytest==8.2.2
# via
# pytest-asyncio
# pytest-operator
-pytest-asyncio==0.21.1
+pytest-asyncio==0.21.2
# via pytest-operator
-pytest-operator==0.29.0
+pytest-operator==0.35.0
# via -r requirements-integration.in
-python-dateutil==2.8.2
+python-dateutil==2.9.0.post0
# via kubernetes
-pytz==2023.3.post1
+pytz==2024.1
# via pyrfc3339
pyyaml==6.0.1
# via
@@ -195,7 +192,7 @@ pyyaml==6.0.1
# ops
# pytest-operator
# serialized-data-interface
-requests==2.31.0
+requests==2.32.3
# via
# -r requirements-integration.in
# hvac
@@ -203,11 +200,11 @@ requests==2.31.0
# macaroonbakery
# requests-oauthlib
# serialized-data-interface
-requests-oauthlib==1.3.1
+requests-oauthlib==2.0.0
# via kubernetes
rsa==4.9
# via google-auth
-ruamel-yaml==0.17.35
+ruamel-yaml==0.18.6
# via charmed-kubeflow-chisme
ruamel-yaml-clib==0.2.8
# via ruamel-yaml
@@ -218,17 +215,15 @@ six==1.16.0
# asttokens
# kubernetes
# macaroonbakery
- # paramiko
# pymacaroons
# python-dateutil
-sniffio==1.3.0
+sniffio==1.3.1
# via
# anyio
- # httpcore
# httpx
stack-data==0.6.3
# via ipython
-tenacity==8.2.3
+tenacity==8.5.0
# via
# -r requirements-integration.in
# charmed-kubeflow-chisme
@@ -238,29 +233,30 @@ tomli==2.0.1
# pytest
toposort==1.10
# via juju
-traitlets==5.11.2
+traitlets==5.14.3
# via
# ipython
# matplotlib-inline
-typing-extensions==4.8.0
+typing-extensions==4.12.2
# via
+ # anyio
# ipython
# typing-inspect
typing-inspect==0.9.0
# via juju
-urllib3==1.26.17
+urllib3==2.2.2
# via
# kubernetes
# requests
-wcwidth==0.2.8
+wcwidth==0.2.13
# via prompt-toolkit
-websocket-client==1.6.4
+websocket-client==1.8.0
# via
# kubernetes
# ops
-websockets==8.1
+websockets==12.0
# via juju
-yarl==1.9.2
+yarl==1.9.4
# via aiohttp
-zipp==3.17.0
+zipp==3.19.2
# via importlib-resources
diff --git a/charms/kserve-controller/requirements-unit.txt b/charms/kserve-controller/requirements-unit.txt
index becc8ad..e4faf57 100644
--- a/charms/kserve-controller/requirements-unit.txt
+++ b/charms/kserve-controller/requirements-unit.txt
@@ -19,6 +19,8 @@ charmed-kubeflow-chisme==0.2.0
# -r requirements.in
charset-normalizer==3.3.0
# via requests
+cosl==0.0.12
+ # via -r requirements.in
coverage==7.3.2
# via -r requirements-unit.in
deepdiff==6.2.1
@@ -57,11 +59,12 @@ lightkube-models==1.28.1.4
# lightkube
markupsafe==2.1.3
# via jinja2
-ops==2.14.0
+ops==2.14.1
# via
# -r requirements-unit.in
# -r requirements.in
# charmed-kubeflow-chisme
+ # cosl
# serialized-data-interface
ordered-set==4.1.0
# via deepdiff
@@ -82,6 +85,7 @@ pytest-mock==3.11.1
pyyaml==6.0.1
# via
# -r requirements.in
+ # cosl
# lightkube
# ops
# serialized-data-interface
@@ -104,6 +108,8 @@ tenacity==8.2.3
# via charmed-kubeflow-chisme
tomli==2.0.1
# via pytest
+typing-extensions==4.12.2
+ # via cosl
urllib3==2.0.6
# via requests
websocket-client==1.6.4
diff --git a/charms/kserve-controller/requirements.in b/charms/kserve-controller/requirements.in
index dd79464..d246a88 100644
--- a/charms/kserve-controller/requirements.in
+++ b/charms/kserve-controller/requirements.in
@@ -4,3 +4,5 @@ lightkube-models
ops
pyyaml==6.0.1
serialized-data-interface
+# from loki_k8s.v1.loki_push_api.py
+cosl
diff --git a/charms/kserve-controller/requirements.txt b/charms/kserve-controller/requirements.txt
index ac304f6..efc2246 100644
--- a/charms/kserve-controller/requirements.txt
+++ b/charms/kserve-controller/requirements.txt
@@ -17,6 +17,8 @@ charmed-kubeflow-chisme==0.2.0
# via -r requirements.in
charset-normalizer==3.3.0
# via requests
+cosl==0.0.12
+ # via -r requirements.in
deepdiff==6.2.1
# via charmed-kubeflow-chisme
exceptiongroup==1.1.3
@@ -48,10 +50,11 @@ lightkube-models==1.28.1.4
# lightkube
markupsafe==2.1.3
# via jinja2
-ops==2.14.0
+ops==2.14.1
# via
# -r requirements.in
# charmed-kubeflow-chisme
+ # cosl
# serialized-data-interface
ordered-set==4.1.0
# via deepdiff
@@ -62,6 +65,7 @@ pyrsistent==0.19.3
pyyaml==6.0.1
# via
# -r requirements.in
+ # cosl
# lightkube
# ops
# serialized-data-interface
@@ -82,6 +86,8 @@ sniffio==1.3.0
# httpx
tenacity==8.2.3
# via charmed-kubeflow-chisme
+typing-extensions==4.12.2
+ # via cosl
urllib3==2.0.6
# via requests
websocket-client==1.6.4
diff --git a/charms/kserve-controller/src/charm.py b/charms/kserve-controller/src/charm.py
index 7242d46..e974574 100755
--- a/charms/kserve-controller/src/charm.py
+++ b/charms/kserve-controller/src/charm.py
@@ -28,6 +28,7 @@
GatewayRelationMissingError,
GatewayRequirer,
)
+from charms.loki_k8s.v1.loki_push_api import LogForwarder
from charms.resource_dispatcher.v0.kubernetes_manifests import (
KubernetesManifest,
KubernetesManifestRequirerWrapper,
@@ -163,6 +164,8 @@ def __init__(self, *args):
self._rbac_proxy_container_name = "kube-rbac-proxy"
self.rbac_proxy_container = self.unit.get_container(self._rbac_proxy_container_name)
+ self._logging = LogForwarder(charm=self)
+
@property
def _context(self):
"""Returns a dictionary containing context to be used for rendering."""
diff --git a/charms/kserve-controller/tests/integration/test_charm.py b/charms/kserve-controller/tests/integration/test_charm.py
index e30da83..8613f4f 100644
--- a/charms/kserve-controller/tests/integration/test_charm.py
+++ b/charms/kserve-controller/tests/integration/test_charm.py
@@ -15,6 +15,11 @@
import tenacity
import yaml
from charmed_kubeflow_chisme.kubernetes import KubernetesResourceHandler
+from charmed_kubeflow_chisme.testing import (
+ GRAFANA_AGENT_APP,
+ assert_logging,
+ deploy_and_assert_grafana_agent,
+)
from lightkube.core.exceptions import ApiError
from lightkube.models.meta_v1 import ObjectMeta
from lightkube.resources.core_v1 import (
@@ -189,7 +194,7 @@ async def test_build_and_deploy(ops_test: OpsTest):
config={"kind": "ingress"},
trust=True,
)
- await ops_test.model.add_relation("istio-pilot", "istio-ingressgateway")
+ await ops_test.model.integrate("istio-pilot", "istio-ingressgateway")
await ops_test.model.wait_for_idle(
["istio-pilot", "istio-ingressgateway"],
raise_on_blocked=False,
@@ -212,7 +217,7 @@ async def test_build_and_deploy(ops_test: OpsTest):
application_name=APP_NAME,
trust=True,
)
- await ops_test.model.add_relation("istio-pilot", "kserve-controller")
+ await ops_test.model.integrate("istio-pilot", APP_NAME)
# issuing dummy update_status just to trigger an event
async with ops_test.fast_forward():
@@ -224,6 +229,11 @@ async def test_build_and_deploy(ops_test: OpsTest):
)
assert ops_test.model.applications[APP_NAME].units[0].workload_status == "active"
+ # Deploying grafana-agent-k8s and add all relations
+ await deploy_and_assert_grafana_agent(
+ ops_test.model, APP_NAME, metrics=False, dashboard=False, logging=True
+ )
+
@pytest.fixture()
def test_namespace(lightkube_client: lightkube.Client):
@@ -304,6 +314,12 @@ def assert_inf_svc_state():
assert_inf_svc_state()
+async def test_logging(ops_test: OpsTest):
+ """Test logging is defined in relation data bag."""
+ app = ops_test.model.applications[GRAFANA_AGENT_APP]
+ await assert_logging(app)
+
+
# # Remove the InferenceService deployed in RawDeployment mode
# lightkube_client.delete(
# inference_service_resource, name=inf_svc_name, namespace=rawdeployment_mode_namespace
@@ -350,15 +366,13 @@ async def test_deploy_knative_dependencies(ops_test: OpsTest):
)
# Relate kserve-controller and knative-serving
- await ops_test.model.add_relation("knative-serving", "kserve-controller")
+ await ops_test.model.integrate("knative-serving", APP_NAME)
# Change deployment mode to Serverless
- await ops_test.model.applications["kserve-controller"].set_config(
- {"deployment-mode": "serverless"}
- )
+ await ops_test.model.applications[APP_NAME].set_config({"deployment-mode": "serverless"})
await ops_test.model.wait_for_idle(
- ["kserve-controller"],
+ [APP_NAME],
raise_on_blocked=False,
status="active",
timeout=90 * 10,
@@ -446,11 +460,11 @@ async def test_configmap_changes_with_config(
lightkube_client (lightkube.Client): The Lightkube client to interact with Kubernetes.
ops_test (OpsTest): The Juju OpsTest fixture to interact with the deployed model.
"""
- await ops_test.model.applications["kserve-controller"].set_config(
+ await ops_test.model.applications[APP_NAME].set_config(
{"custom_images": '{"configmap__batcher": "custom:1.0"}'} # noqa: E501
)
await ops_test.model.wait_for_idle(
- apps=["kserve-controller"], status="active", raise_on_blocked=True, timeout=300
+ apps=[APP_NAME], status="active", raise_on_blocked=True, timeout=300
)
inferenceservice_config = lightkube_client.get(
ConfigMap, CONFIGMAP_NAME, namespace=ops_test.model_name
@@ -461,7 +475,10 @@ async def test_configmap_changes_with_config(
async def test_relate_to_object_store(ops_test: OpsTest):
"""Test if the charm can relate to minio and stay in Active state"""
await ops_test.model.deploy(
- OBJECT_STORAGE_CHARM_NAME, channel="ckf-1.7/stable", config=OBJECT_STORAGE_CONFIG
+ OBJECT_STORAGE_CHARM_NAME,
+ channel="ckf-1.7/stable",
+ config=OBJECT_STORAGE_CONFIG,
+ trust=True,
)
await ops_test.model.wait_for_idle(
apps=[OBJECT_STORAGE_CHARM_NAME],
@@ -470,7 +487,7 @@ async def test_relate_to_object_store(ops_test: OpsTest):
raise_on_error=False,
timeout=600,
)
- await ops_test.model.relate(OBJECT_STORAGE_CHARM_NAME, CHARM_NAME)
+ await ops_test.model.integrate(OBJECT_STORAGE_CHARM_NAME, CHARM_NAME)
await ops_test.model.wait_for_idle(
apps=[CHARM_NAME],
status="active",
@@ -510,10 +527,10 @@ async def test_deploy_resource_dispatcher(ops_test: OpsTest):
idle_period=60,
)
- await ops_test.model.relate(
+ await ops_test.model.integrate(
f"{CHARM_NAME}:service-accounts", f"{RESOURCE_DISPATCHER_CHARM_NAME}:service-accounts"
)
- await ops_test.model.relate(
+ await ops_test.model.integrate(
f"{CHARM_NAME}:secrets", f"{RESOURCE_DISPATCHER_CHARM_NAME}:secrets"
)
@@ -553,8 +570,8 @@ async def test_blocked_on_invalid_config(ops_test: OpsTest):
Args:
ops_test (OpsTest): The Juju OpsTest fixture to interact with the deployed model.
"""
- await ops_test.model.applications["kserve-controller"].set_config({"custom_images": "{"})
+ await ops_test.model.applications[APP_NAME].set_config({"custom_images": "{"})
await ops_test.model.wait_for_idle(
- apps=["kserve-controller"], status="blocked", raise_on_blocked=False, timeout=300
+ apps=[APP_NAME], status="blocked", raise_on_blocked=False, timeout=300
)
- assert ops_test.model.applications["kserve-controller"].units[0].workload_status == "blocked"
+ assert ops_test.model.applications[APP_NAME].units[0].workload_status == "blocked"
diff --git a/charms/kserve-controller/tests/unit/test_charm.py b/charms/kserve-controller/tests/unit/test_charm.py
index 9d92414..0a21e09 100644
--- a/charms/kserve-controller/tests/unit/test_charm.py
+++ b/charms/kserve-controller/tests/unit/test_charm.py
@@ -91,6 +91,13 @@ def mocked_lightkube_client(mocker, mocked_resource_handler):
yield mocked_resource_handler.lightkube_client
+def test_log_forwarding(harness):
+ """Test LogForwarder initialization."""
+ with patch("charm.LogForwarder") as mock_logging:
+ harness.begin()
+ mock_logging.assert_called_once_with(charm=harness.charm)
+
+
def test_events(harness, mocked_resource_handler, mocker):
harness.begin()
on_event = mocker.patch("charm.KServeControllerCharm._on_event")