Merge pull request #685 from msvolenski/LM-1560_new_relic_integration

New Relic Integration
mendix · Nov 2, 2023 · 36064b2 · 36064b2
2 parents bf9126c + b826b65
commit 36064b2
Show file tree

Hide file tree

Showing 17 changed files with 357 additions and 107 deletions.
diff --git a/README.md b/README.md
@@ -598,8 +598,55 @@ The buildpack includes a variety of telemetry agents that can be configured to c
 
 ### New Relic
 
+#### Set up New Relic integration
+
+[Fluent Bit](https://docs.fluentbit.io/manual/) is used to collect Mendix Runtime logs to [New Relic](https://newrelic.com/).
+
+The metrics are collected by the [New Relic Java Agent](https://docs.newrelic.com/docs/apm/agents/java-agent/features/jvms-page-java-view-app-server-metrics-jmx/) and an integration with the [Telegraf agent](https://docs.influxdata.com/telegraf/).
+
+To enable the integration you must provide the following variables:
+
+| Environment variable    | Value example                                  | Default                 | Description                                                                                                                            |
+|-------------------------|------------------------------------------------|-------------------------|----------------------------------------------------------------------------------------------------------------------------------------|
+| `NEW_RELIC_LICENSE_KEY` | `api_key`                                      | -                       | License Key or API Key ([docs](https://docs.newrelic.com/docs/apis/intro-apis/new-relic-api-keys/))                                    |
+| `NEW_RELIC_METRICS_URI` | `https://metric-api.eu.newrelic.com/metric/v1` | -                       | Metrics endpoint API ([docs](https://docs.newrelic.com/docs/data-apis/ingest-apis/metric-api/report-metrics-metric-api/#api-endpoint)) |
+| `NEW_RELIC_LOGS_URI`    | `https://log-api.eu.newrelic.com/log/v1`       | -                       | Logs endpoint API ([docs](https://docs.newrelic.com/docs/logs/log-api/introduction-log-api/))                                          |
+| `NEW_RELIC_APP_NAME`    | `MyApp`                                        | application domain name | Optional. Mendix App name shown on New Relic                                                                                           |
+| `LOGS_REDACTION`        | `true`                                         | `true`                  | Optional. Enables email address redaction from logs                                                                                    |
+
+:warning: For the first usage of the New Relic integration, the Mendix app should be redeployed after setting the variables up.
+
+#### Tags/Metadata in metrics and logs
+
+In addition to the runtime application logs, the following JSON-formatted metadata is automatically sent to New Relic, both for
+the metrics collected by the agent and the custom ones, pushed by Telegraf:
+
+* `environment_id` - unique identifier of the environment;
+* `instance_index` - number of the application instance;
+* `hostname` - name of the application host;
+* `application_name` - default application name, retrieved from domain name;
+* `model_version` - model version of the Mendix runtime;
+* `runtime_version` - version of the Mendix runtime.
+
+:information_source: `model_version` and `runtime_version` are only available to the custom metrics.
+
+#### Custom tags
+
+You can also set up custom tags in the following format `key:value`.
+Below, are listed some suggested tags that you might want to use:
+
+* `app:{app_name}` – this enables you to identify all logs sent from your app (for example, **app:customermanagement**)
+* `env:{environment_name}` – this enables you to identify logs sent from a particular environment so you can separate out production logs from test logs (for example, **env:accp**)
+
+#### Service-binding integration (on-prem only) - DEPRECATED
+
 To enable New Relic, simply bind a New Relic service to this app and settings will be picked up automatically. Afterwards you have to restage your application to enable the New Relic agent.
 
+This integration does not support logs or custom metrics.
+
+:warning: The default `NEW_RELIC_APP_NAME` for this integration used to be the environment ID of the application. Now the value is the domain name set to the application.
+If you want to keep using the environment id, you will have to set this variable yourself to that value.
+
 ### Splunk
 
 #### Set up Splunk integration
@@ -611,12 +658,13 @@ To enable Splunk integration for a Mendix application, following environment var
 
 :warning: For the first usage of Splunk integration the Mendix app should be **redeployed** after setting the variables up.
 
-| Environment variable | Value example | Default | Description |
-|-|-|-|-|
-| `SPLUNK_HOST` | `test.splunkcloud.com` | - | Host of Splunk Cloud without 'http://' |
-| `SPLUNK_PORT` | `8088` | `8088` | Port of Splunk Cloud |
-| `SPLUNK_TOKEN`¹ | `uuid token` | - | Token from Splunk Cloud dashboard |
-| `SPLUNK_LOGS_REDACTION` | `true` | `true` | If `true` emails in log message are redacted |
+| Environment variable    | Value example          | Default | Description                                                                                |
+|-------------------------|------------------------|---------|--------------------------------------------------------------------------------------------|
+| `SPLUNK_HOST`           | `test.splunkcloud.com` | -       | Host of Splunk Cloud without 'http://'                                                     |
+| `SPLUNK_PORT`           | `8088`                 | `8088`  | Port of Splunk Cloud                                                                       |
+| `SPLUNK_TOKEN`¹         | `uuid token`           | -       | Token from Splunk Cloud dashboard                                                          |
+| `SPLUNK_LOGS_REDACTION` | `true`                 | `true`  | **DEPRECATED** - If `true` emails in log message are redacted - Use LOGS_REDACTION instead |
+| `LOGS_REDACTION`        | `true`                 | `true`  | Enables email address redaction from logs                                                  |
 
 1) To create new token on Splunk Cloud dashboard go to `Settings -> Data Input -> HTTP Event Collector` and push
 button `New Token` in the top-right corner of the page.
@@ -724,7 +772,8 @@ Additionally, the following integration-specific variables are available:
 | ------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `DATADOG_DATABASE_DISKSTORAGE_METRIC` | `true`        | Enables a metric denoting the disk storage size available to the database. This metric is set in the `DATABASE_DISKSTORAGE` environment variable. |
 | `DATADOG_DATABASE_RATE_COUNT_METRICS` | `false`       | Enables additional rate / count database metrics currently not compatible with the Datadog PostgreSQL integration                                 |
-| `DATADOG_LOGS_REDACTION`              | `true`        | Enables email address redaction from logs                                                                                                         |
+| `DATADOG_LOGS_REDACTION`              | `true`        | **DEPRECATED** - Enables email address redaction from logs - Use LOGS_REDACTION instead                                                           |
+| `LOGS_REDACTION`                      | `true`        | Enables email address redaction from logs                                                                                                         |
 
 To receive metrics from the runtime, the Mendix Java Agent is added to the runtime as Java agent. This agent can be configured by passing a JSON in the environment variable `METRICS_AGENT_CONFIG` as described in [Datadog for v4 Mendix Cloud](https://docs.mendix.com/developerportal/operate/datadog-metrics).
 

diff --git a/buildpack/stage.py b/buildpack/stage.py
@@ -200,8 +200,8 @@ def cleanup_dependency_cache(cached_dir, dependency_list):
     appdynamics.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR)
     dynatrace.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR)
     splunk.stage()
-    fluentbit.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR)
     newrelic.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR)
+    fluentbit.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR)
     mx_java_agent.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR, runtime_version)
     telegraf.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR, runtime_version)
     datadog.stage(BUILDPACK_DIR, DOT_LOCAL_LOCATION, CACHE_DIR)

diff --git a/buildpack/telemetry/datadog.py b/buildpack/telemetry/datadog.py
@@ -80,6 +80,15 @@ def _is_tracing_enabled():
 
 # Toggles logs redaction (email addresses are replaced by a generic string)
 def _is_logs_redaction_enabled():
+    """Check if logs should be redacted."""
+
+    # Use this, if it is set
+    logs_redaction = os.getenv("LOGS_REDACTION")
+    if logs_redaction is not None:
+        return strtobool(logs_redaction)
+
+    # Turned on by default
+    # DEPRECATED - Datadog-specific LOGS_REDACTION variable
     return strtobool(os.environ.get("DATADOG_LOGS_REDACTION", "true"))
 
 

diff --git a/buildpack/telemetry/fluentbit.py b/buildpack/telemetry/fluentbit.py
@@ -3,18 +3,22 @@
 import subprocess
 import shutil
 import socket
+from typing import List, Tuple
 
 import backoff
 
 from buildpack import util
-from buildpack.telemetry import splunk
-
+from buildpack.telemetry import newrelic, splunk
+from lib.m2ee.util import strtobool
 
 NAMESPACE = "fluentbit"
 CONF_FILENAME = f"{NAMESPACE}.conf"
 FILTER_FILENAMES = ("redaction.lua", "metadata.lua")
 FLUENTBIT_ENV_VARS = {
     "FLUENTBIT_LOGS_PORT": os.getenv("FLUENTBIT_LOGS_PORT", default="5170"),
+    "FLUENTBIT_LOG_LEVEL": os.getenv(
+        "FLUENTBIT_LOG_LEVEL", default="info"
+    ).lower(),
 }
 
 
@@ -23,8 +27,20 @@ def _set_default_env(m2ee):
         util.upsert_custom_environment_variable(m2ee, var_name, value)
 
 
-def stage(buildpack_dir, destination_path, cache_path):
+def _get_output_conf_filenames() -> List[str]:
+    """
+    Determine the output configs to use. Only enabled integrations
+    will have the output file in the container.
+    """
+    output_conf_files: List[str] = []
+    if splunk.is_splunk_enabled():
+        output_conf_files.append("output_splunk.conf")
+    if newrelic.is_enabled():
+        output_conf_files.append("output_newrelic.conf")
+    return output_conf_files
 
+
+def stage(buildpack_dir, destination_path, cache_path):
     if not is_fluentbit_enabled():
         return
 
@@ -36,20 +52,19 @@ def stage(buildpack_dir, destination_path, cache_path):
         cache_dir=cache_path,
     )
 
-    for filename in (CONF_FILENAME, *FILTER_FILENAMES):
+    output_conf_files = _get_output_conf_filenames()
+
+    for filename in (
+            CONF_FILENAME, *FILTER_FILENAMES, *output_conf_files
+    ):
         shutil.copy(
             os.path.join(buildpack_dir, "etc", NAMESPACE, filename),
-            os.path.join(
-                destination_path,
-                NAMESPACE,
-            ),
+            os.path.join(destination_path, NAMESPACE),
         )
-
     logging.info("Fluent Bit has been installed successfully.")
 
 
 def update_config(m2ee):
-
     if not is_fluentbit_enabled():
         return
 
@@ -68,80 +83,107 @@ def update_config(m2ee):
 
 
 def run(model_version, runtime_version):
-
     if not is_fluentbit_enabled():
         return
 
-    fluentbit_dir = os.path.join(
-        os.path.abspath(".local"),
-        NAMESPACE,
-    )
-
-    fluentbit_bin_path = os.path.join(
-        fluentbit_dir,
-        "fluent-bit",
-    )
-
-    fluentbit_config_path = os.path.join(
-        fluentbit_dir,
-        CONF_FILENAME,
-    )
+    fluentbit_dir = os.path.join(os.path.abspath(".local"), NAMESPACE)
+    fluentbit_bin_path = os.path.join(fluentbit_dir, "fluent-bit")
+    fluentbit_config_path = os.path.join(fluentbit_dir, CONF_FILENAME)
+    print_logs = _print_logs()
 
     if not os.path.exists(fluentbit_bin_path):
         logging.warning(
             "Fluent Bit is not installed yet. "
             "Please redeploy your application to complete "
             "Fluent Bit installation."
         )
-        splunk.print_failed_message()
+        splunk.integration_complete(success=False)
+        newrelic.integration_complete(success=False)
         return
 
     agent_environment = _set_up_environment(model_version, runtime_version)
 
     logging.info("Starting Fluent Bit...")
-
     subprocess.Popen(
-        (fluentbit_bin_path, "-c", fluentbit_config_path), env=agent_environment
+        (fluentbit_bin_path, "-c", fluentbit_config_path, *print_logs),
+        env=agent_environment,
     )
 
     # The runtime does not handle a non-open logs endpoint socket
     # gracefully, so wait until it's up
-    @backoff.on_predicate(backoff.expo, lambda x: x > 0, max_time=10)
+    @backoff.on_predicate(backoff.expo, lambda x: x > 0, max_time=120)
     def _await_logging_endpoint():
         return socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect_ex(
             ("localhost", int(FLUENTBIT_ENV_VARS["FLUENTBIT_LOGS_PORT"]))
         )
 
     logging.info("Awaiting Fluent Bit log subscriber...")
-    if _await_logging_endpoint() == 0:
+    success = True
+    if _await_logging_endpoint() != 0:
+        success = False
+
+    _integration_complete(success)
+    splunk.integration_complete(success)
+    newrelic.integration_complete(success)
+
+
+def _integration_complete(success: bool) -> None:
+    """Call when the setup is done."""
+    if success:
         logging.info("Fluent Bit log subscriber is ready.")
-        splunk.print_ready_message()
     else:
         logging.error(
-            "Fluent Bit log subscriber was not initialized correctly."
+            "Fluent Bit log subscriber was not initialized correctly. "
             "Application logs will not be shipped to Fluent Bit."
         )
-        splunk.print_failed_message()
 
 
 def _set_up_environment(model_version, runtime_version):
+    fluentbit_env_vars = FLUENTBIT_ENV_VARS
+
     env_vars = dict(os.environ.copy())
 
-    env_vars["SPLUNK_APP_HOSTNAME"] = util.get_hostname()
-    env_vars["SPLUNK_APP_NAME"] = util.get_app_from_domain()
-    env_vars["SPLUNK_APP_RUNTIME_VERSION"] = str(runtime_version)
-    env_vars["SPLUNK_APP_MODEL_VERSION"] = model_version
+    env_vars["FLUENTBIT_APP_HOSTNAME"] = util.get_hostname()
+    env_vars["FLUENTBIT_APP_NAME"] = util.get_app_from_domain()
+    env_vars["FLUENTBIT_APP_RUNTIME_VERSION"] = str(runtime_version)
+    env_vars["FLUENTBIT_APP_MODEL_VERSION"] = model_version
+
+    env_vars["LOGS_REDACTION"] = str(_is_logs_redaction_enabled())
 
-    return env_vars
+    fluentbit_env_vars.update(env_vars)
+    return fluentbit_env_vars
 
 
 def is_fluentbit_enabled():
     """
     The function checks if some modules which requires
     Fluent Bit is configured.
-
     """
-
     return any(
-        [splunk.is_splunk_enabled()]
+        [splunk.is_splunk_enabled(), newrelic.is_enabled()]
     )  # Add other modules, where Fluent Bit is used
+
+
+def _print_logs() -> Tuple:
+    """Discard logs unless debug is active."""
+    # FluentBit currently does not support log rotation, therefore
+    # logs don't go to a file. If debug on, send to stdout
+    if FLUENTBIT_ENV_VARS["FLUENTBIT_LOG_LEVEL"] == "debug":
+        return tuple()
+    return "-l", "/dev/null"
+
+
+def _is_logs_redaction_enabled() -> bool:
+    """Check if logs should be redacted."""
+
+    # Use this, if it is set
+    logs_redaction = os.getenv("LOGS_REDACTION")
+    if logs_redaction is not None:
+        return bool(strtobool(logs_redaction))
+
+    # DEPRECATED - Splunk-specific LOGS_REDACTION variable
+    if splunk.is_splunk_enabled():
+        return bool(strtobool(os.getenv("SPLUNK_LOGS_REDACTION", "true")))
+
+    # Turned on by default
+    return True
diff --git a/buildpack/telemetry/metrics.py b/buildpack/telemetry/metrics.py
@@ -18,7 +18,7 @@
 from lib.m2ee.version import MXVersion
 from lib.m2ee.util import strtobool
 
-from . import datadog, appdynamics, dynatrace
+from . import appdynamics, datadog, dynatrace, newrelic
 
 METRICS_REGISTRIES_KEY = "Metrics.Registries"
 
@@ -136,6 +136,7 @@ def configure_metrics_registry(m2ee):
         or get_appmetrics_target()
         or appdynamics.machine_agent_enabled()
         or dynatrace.is_telegraf_enabled()
+        or newrelic.is_enabled()
     ):
         allow_list, deny_list = get_apm_filters()
         paidapps_registries.append(get_statsd_registry(allow_list, deny_list))