Skip to content

Commit

Permalink
New Relic Integration
Browse files Browse the repository at this point in the history
  • Loading branch information
msvolenski committed Oct 25, 2023
1 parent db5292c commit e9ed3bf
Show file tree
Hide file tree
Showing 15 changed files with 316 additions and 92 deletions.
46 changes: 46 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -598,8 +598,54 @@ The buildpack includes a variety of telemetry agents that can be configured to c

### New Relic

#### Set up New Relic integration

[Fluent Bit](https://docs.fluentbit.io/manual/) is used to collect Mendix Runtime logs to [New Relic](https://newrelic.com/).

The metrics are collected by the [New Relic Java Agent](https://docs.newrelic.com/docs/apm/agents/java-agent/features/jvms-page-java-view-app-server-metrics-jmx/) and an integration with the [Telegraf agent](https://docs.influxdata.com/telegraf/).

To enable the integration you must provide the following variables:

| Environment variable | Value example | Default | Description |
|-------------------------|------------------------------------------------|--------------------------|----------------------------------------------------------------------------------------------------------------------------------------|
| `NEW_RELIC_LICENSE_KEY` | `api_key` | - | License Key or API Key ([docs](https://docs.newrelic.com/docs/apis/intro-apis/new-relic-api-keys/)) |
| `NEW_RELIC_METRICS_URI` | `https://metric-api.eu.newrelic.com/metric/v1` | - | Metrics endpoint API ([docs](https://docs.newrelic.com/docs/data-apis/ingest-apis/metric-api/report-metrics-metric-api/#api-endpoint)) |
| `NEW_RELIC_LOGS_URI` | `https://log-api.eu.newrelic.com/log/v1` | - | Logs endpoint API ([docs](https://docs.newrelic.com/docs/logs/log-api/introduction-log-api/)) |
| `NEW_RELIC_APP_NAME` | `MyApp` | application domain name | Optional. Mendix App name shown on New Relic |

:warning: For the first usage of the New Relic integration, the Mendix app should be redeployed after setting the variables up.

#### Tags/Metadata in metrics and logs

In addition to the runtime application logs, the following JSON-formatted metadata is automatically sent to New Relic, both for
the metrics collected by the agent and the custom ones, pushed by Telegraf:

* `environment_id` - unique identifier of the environment;
* `instance_index` - number of the application instance;
* `hostname` - name of the application host;
* `application_name` - default application name, retrieved from domain name;
* `model_version` - model version of the Mendix runtime;
* `runtime_version` - version of the Mendix runtime.

:info: `model_version` and `runtime_version` are only available to the custom metrics.

#### Custom tags

Metrics also support custom tags in the following format `key:value`.
Below, are listed some suggested tags that you might want to use:

* `app:{app_name}` – this enables you to identify all logs sent from your app (for example, **app:customermanagement**)
* `env:{environment_name}` – this enables you to identify logs sent from a particular environment so you can separate out production logs from test logs (for example, **env:accp**)

#### Service-based integration (on-prem only)

To enable New Relic, simply bind a New Relic service to this app and settings will be picked up automatically. Afterwards you have to restage your application to enable the New Relic agent.

This integration does not support logs or custom metrics.

:warning: The default NEW_RELIC_APP_NAME for this integration used to be the environment ID of the application. Now the value is the domain name set to the application.
If you want to keep using the environment id, you will have to set this variable yourself to that value.

### Splunk

#### Set up Splunk integration
Expand Down
2 changes: 1 addition & 1 deletion buildpack/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,8 @@ def cleanup_dependency_cache(cached_dir, dependency_list):
appdynamics.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR)
dynatrace.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR)
splunk.stage()
fluentbit.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR)
newrelic.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR)
fluentbit.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR)
mx_java_agent.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR, runtime_version)
telegraf.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR, runtime_version)
datadog.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR)
Expand Down
104 changes: 64 additions & 40 deletions buildpack/telemetry/fluentbit.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,22 @@
import subprocess
import shutil
import socket
from typing import List, Tuple

import backoff

from buildpack import util
from buildpack.telemetry import splunk
from buildpack.telemetry import newrelic, splunk


NAMESPACE = "fluentbit"
CONF_FILENAME = f"{NAMESPACE}.conf"
FILTER_FILENAMES = ("redaction.lua", "metadata.lua")
FLUENTBIT_ENV_VARS = {
"FLUENTBIT_LOGS_PORT": os.getenv("FLUENTBIT_LOGS_PORT", default="5170"),
"FLUENTBIT_LOG_LEVEL": os.getenv(
"FLUENTBIT_LOG_LEVEL", default="info"
).lower(),
}


Expand All @@ -23,8 +27,20 @@ def _set_default_env(m2ee):
util.upsert_custom_environment_variable(m2ee, var_name, value)


def stage(buildpack_dir, destination_path, cache_path):
def _get_output_conf_filenames() -> List[str]:
"""
Determine the output configs to use. Only enabled integrations
will have the output file in the container.
"""
output_conf_files: List[str] = []
if splunk.is_splunk_enabled():
output_conf_files.append("output_splunk.conf")
if newrelic.is_enabled():
output_conf_files.append("output_newrelic.conf")
return output_conf_files


def stage(buildpack_dir, destination_path, cache_path):
if not is_fluentbit_enabled():
return

Expand All @@ -36,20 +52,19 @@ def stage(buildpack_dir, destination_path, cache_path):
cache_dir=cache_path,
)

for filename in (CONF_FILENAME, *FILTER_FILENAMES):
output_conf_files = _get_output_conf_filenames()

for filename in (
CONF_FILENAME, *FILTER_FILENAMES, *output_conf_files
):
shutil.copy(
os.path.join(buildpack_dir, "etc", NAMESPACE, filename),
os.path.join(
destination_path,
NAMESPACE,
),
os.path.join(destination_path, NAMESPACE),
)

logging.info("Fluent Bit has been installed successfully.")


def update_config(m2ee):

if not is_fluentbit_enabled():
return

Expand All @@ -68,80 +83,89 @@ def update_config(m2ee):


def run(model_version, runtime_version):

if not is_fluentbit_enabled():
return

fluentbit_dir = os.path.join(
os.path.abspath(".local"),
NAMESPACE,
)

fluentbit_bin_path = os.path.join(
fluentbit_dir,
"fluent-bit",
)

fluentbit_config_path = os.path.join(
fluentbit_dir,
CONF_FILENAME,
)
fluentbit_dir = os.path.join(os.path.abspath(".local"), NAMESPACE)
fluentbit_bin_path = os.path.join(fluentbit_dir, "fluent-bit")
fluentbit_config_path = os.path.join(fluentbit_dir, CONF_FILENAME)
print_logs = _print_logs()

if not os.path.exists(fluentbit_bin_path):
logging.warning(
"Fluent Bit is not installed yet. "
"Please redeploy your application to complete "
"Fluent Bit installation."
)
splunk.print_failed_message()
splunk.integration_complete(success=False)
newrelic.integration_complete(success=False)
return

agent_environment = _set_up_environment(model_version, runtime_version)

logging.info("Starting Fluent Bit...")

subprocess.Popen(
(fluentbit_bin_path, "-c", fluentbit_config_path), env=agent_environment
(fluentbit_bin_path, "-c", fluentbit_config_path, *print_logs),
env=agent_environment,
)

# The runtime does not handle a non-open logs endpoint socket
# gracefully, so wait until it's up
@backoff.on_predicate(backoff.expo, lambda x: x > 0, max_time=10)
@backoff.on_predicate(backoff.expo, lambda x: x > 0, max_time=120)
def _await_logging_endpoint():
return socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect_ex(
("localhost", int(FLUENTBIT_ENV_VARS["FLUENTBIT_LOGS_PORT"]))
)

logging.info("Awaiting Fluent Bit log subscriber...")
if _await_logging_endpoint() == 0:
success = True
if _await_logging_endpoint() != 0:
success = False

_integration_complete(success)
splunk.integration_complete(success)
newrelic.integration_complete(success)


def _integration_complete(success: bool) -> None:
"""Call when the setup is done."""
if success:
logging.info("Fluent Bit log subscriber is ready.")
splunk.print_ready_message()
else:
logging.error(
"Fluent Bit log subscriber was not initialized correctly."
"Fluent Bit log subscriber was not initialized correctly. "
"Application logs will not be shipped to Fluent Bit."
)
splunk.print_failed_message()


def _set_up_environment(model_version, runtime_version):
fluentbit_env_vars = FLUENTBIT_ENV_VARS

env_vars = dict(os.environ.copy())

env_vars["SPLUNK_APP_HOSTNAME"] = util.get_hostname()
env_vars["SPLUNK_APP_NAME"] = util.get_app_from_domain()
env_vars["SPLUNK_APP_RUNTIME_VERSION"] = str(runtime_version)
env_vars["SPLUNK_APP_MODEL_VERSION"] = model_version
env_vars["FLUENTBIT_APP_HOSTNAME"] = util.get_hostname()
env_vars["FLUENTBIT_APP_NAME"] = util.get_app_from_domain()
env_vars["FLUENTBIT_APP_RUNTIME_VERSION"] = str(runtime_version)
env_vars["FLUENTBIT_APP_MODEL_VERSION"] = model_version

return env_vars
fluentbit_env_vars.update(env_vars)
return fluentbit_env_vars


def is_fluentbit_enabled():
"""
The function checks if some modules which requires
Fluent Bit is configured.
"""

return any(
[splunk.is_splunk_enabled()]
[splunk.is_splunk_enabled(), newrelic.is_enabled()]
) # Add other modules, where Fluent Bit is used


def _print_logs() -> Tuple:
"""Discard logs unless debug is active."""
# FluentBit currently does not support log rotation, therefore
# logs don't go to a file. If debug on, send to stdout
if FLUENTBIT_ENV_VARS["FLUENTBIT_LOG_LEVEL"] == "debug":
return tuple()
return "-l", "/dev/null"
3 changes: 2 additions & 1 deletion buildpack/telemetry/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from lib.m2ee.version import MXVersion
from lib.m2ee.util import strtobool

from . import datadog, appdynamics, dynatrace
from . import appdynamics, datadog, dynatrace, newrelic

METRICS_REGISTRIES_KEY = "Metrics.Registries"

Expand Down Expand Up @@ -136,6 +136,7 @@ def configure_metrics_registry(m2ee):
or get_appmetrics_target()
or appdynamics.machine_agent_enabled()
or dynatrace.is_telegraf_enabled()
or newrelic.is_enabled()
):
allow_list, deny_list = get_apm_filters()
paidapps_registries.append(get_statsd_registry(allow_list, deny_list))
Expand Down
92 changes: 79 additions & 13 deletions buildpack/telemetry/newrelic.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,34 @@
import logging
import os
from typing import Dict, Optional

from buildpack import util

NAMESPACE = "newrelic"
ROOT_DIR = ".local"

REQUIRED_NEW_RELIC_ENV_VARS = [
"NEW_RELIC_LICENSE_KEY", "NEW_RELIC_LOGS_URI", "NEW_RELIC_METRICS_URI"
]
NEW_RELIC_ENV_VARS = {
"NEW_RELIC_APP_NAME": os.getenv(
"NEW_RELIC_APP_NAME", util.get_app_from_domain()
),
"NEW_RELIC_LOG": os.path.join(
os.path.abspath(os.path.join(ROOT_DIR, NAMESPACE)),
"newrelic",
"agent.log",
),
}


def _set_default_env(m2ee):
for var_name, value in NEW_RELIC_ENV_VARS.items():
util.upsert_custom_environment_variable(m2ee, var_name, value)


def stage(buildpack_dir, install_path, cache_path):
if get_new_relic_license_key():
if _get_new_relic_license_key():
util.resolve_dependency(
f"{NAMESPACE}.agent",
_get_destination_dir(install_path),
Expand All @@ -22,29 +42,75 @@ def _get_destination_dir(dot_local=ROOT_DIR):


def update_config(m2ee, app_name):
if get_new_relic_license_key() is None:
if _get_new_relic_license_key() is None:
logging.debug("Skipping New Relic setup, no license key found in environment")
return
logging.info("Adding new relic")

util.upsert_custom_environment_variable(
m2ee, "NEW_RELIC_LICENSE_KEY", get_new_relic_license_key()
)
util.upsert_custom_environment_variable(m2ee, "NEW_RELIC_APP_NAME", app_name)
util.upsert_custom_environment_variable(
m2ee,
"NEW_RELIC_LOG",
os.path.join(_get_destination_dir(), "newrelic", "agent.log"),
m2ee, "NEW_RELIC_LICENSE_KEY", _get_new_relic_license_key()
)

_set_default_env(m2ee)

util.upsert_javaopts(
m2ee,
f"-javaagent:{os.path.join(_get_destination_dir(), 'newrelic', 'newrelic.jar')}", # noqa: line-too-long
[
f"-javaagent:{os.path.join(_get_destination_dir(), 'newrelic', 'newrelic.jar')}", # noqa: line-too-long
f"-Dnewrelic.config.labels=\"{_get_labels(app_name)}\"",
]
)


def get_new_relic_license_key():
def _get_new_relic_license_key() -> Optional[str]:
"""Get the New Relic's license key."""
# Service-binding based integration (on-prem only)
vcap_services = util.get_vcap_services_data()
if vcap_services and "newrelic" in vcap_services:
return vcap_services["newrelic"][0]["credentials"]["licenseKey"]
return None

return os.getenv("NEW_RELIC_LICENSE_KEY", None)


def is_enabled() -> bool:
"""
The function checks if all environment variables required
for New Relic connection are set up. The service-binding
based integration (on-prem only) does not care about this.
"""
return all(map(os.getenv, REQUIRED_NEW_RELIC_ENV_VARS))


def get_metrics_config() -> Dict:
"""Configs to be used by telegraf."""
return {
"api_key": os.getenv("NEW_RELIC_LICENSE_KEY", default=""),
"metrics_base_url": os.getenv("NEW_RELIC_METRICS_URI", default=""),
}


def _get_labels(app_name) -> str:
"""Labels (tags) to be used by New Relic agent."""
tags = get_metrics_tags(app_name)
string_tags = ";".join([f"{k}:{v}" for k, v in tags.items()])
return string_tags


def get_metrics_tags(app_name) -> Dict:
"""Tags to be used by telegraf."""
return {
"application_name": util.get_app_from_domain(),
"instance_index": int(os.getenv("CF_INSTANCE_INDEX", "0")),
"environment_id": app_name,
"hostname": util.get_hostname()
}


def integration_complete(success: bool) -> None:
"""Call when the setup is done."""
if not is_enabled():
return

if success:
logging.info("New Relic has been configured successfully.")
else:
logging.error("Failed to configure New Relic.")
Loading

0 comments on commit e9ed3bf

Please sign in to comment.