[Issue #3127] Add a New Relic agent to the API (#3130)

## Summary Fixes #3127 ### Time to review: 10 mins ## Changes proposed Add a New Relic ini file, dependency, and lightweight utilities to send custom events. ## Context for reviewers New Relic monitoring is needed for the application and the .ini file is the start place for configuration. Environment-specific configuration is available as well. The `NEW_RELIC_LICENSE_KEY` will be provided as an environment variable (command line?) to the application in non-local environments. ## Additional information Local env reports to a fake collector. Other envs will report property to NR. <img width="940" alt="Screenshot 2024-12-06 at 2 32 34 PM" src="https://github.com/user-attachments/assets/4110dd94-7ff8-47d9-a88f-2a5185d757af"> Local env running gunicorn showing NewRelic initializing and sending events to the fake collector: <img width="1409" alt="Screenshot 2024-12-09 at 3 08 45 PM" src="https://github.com/user-attachments/assets/a940309c-702a-4144-8f63-f179db76c997"> <img width="1385" alt="Screenshot 2024-12-09 at 3 08 38 PM" src="https://github.com/user-attachments/assets/0fd795a5-1a4c-4d4f-b86c-6f6edc25363e">
HHS · Dec 9, 2024 · 21e5c05 · 21e5c05
1 parent 87519fd
commit 21e5c05
Show file tree

Hide file tree

Showing 9 changed files with 400 additions and 5 deletions.
diff --git a/.dockleconfig b/.dockleconfig
@@ -3,4 +3,4 @@
 # DOCKLE_ACCEPT_FILES="file1,path/to/file2,file3/path,etc"
 # https://github.com/goodwithtech/dockle#accept-suspicious-environment-variables--files--file-extensions
 # The apiflask/settings file is a stub file that apiflask creates, and has no sensitive data in. We are ignoring it since it is unused
-DOCKLE_ACCEPT_FILES=api/.venv/lib/python3.13/site-packages/apiflask/settings.py,analytics/.venv/lib/python3.13/site-packages/jedi/settings.py
+DOCKLE_ACCEPT_FILES=api/.venv/lib/python3.13/site-packages/newrelic/api/settings.py,api/.venv/lib/python3.13/site-packages/apiflask/settings.py,analytics/.venv/lib/python3.13/site-packages/jedi/settings.py
diff --git a/analytics/src/analytics/logs/ecs_background_task.py b/analytics/src/analytics/logs/ecs_background_task.py
@@ -63,8 +63,6 @@ def _ecs_background_task_impl(task_name: str) -> Generator[None, None, None]:
     start = time.perf_counter()
     _add_log_metadata(task_name)
 
-    # initialize new relic here when we add that
-
     logger.info("Starting ECS task %s", task_name)
 
     try:

diff --git a/api/newrelic.ini b/api/newrelic.ini
@@ -0,0 +1,270 @@
+# ---------------------------------------------------------------------------
+
+#
+# This file configures the New Relic Python Agent.
+#
+# The path to the configuration file should be supplied to the function
+# newrelic.agent.initialize() when the agent is being initialized.
+#
+# The configuration file follows a structure similar to what you would
+# find for Microsoft Windows INI files. For further information on the
+# configuration file format see the Python ConfigParser documentation at:
+#
+#    http://docs.python.org/library/configparser.html
+#
+# For further discussion on the behaviour of the Python agent that can
+# be configured via this configuration file see:
+#
+#    https://docs.newrelic.com/docs/apm/agents/python-agent/configuration/python-agent-configuration/
+#
+
+# ---------------------------------------------------------------------------
+
+# Here are the settings that are common to all environments.
+
+[newrelic]
+
+# You must specify the license key associated with your New
+# Relic account. This key binds the Python Agent's data to your
+# account in the New Relic service. For more information on
+# storing and generating license keys, see
+# https://docs.newrelic.com/docs/apis/intro-apis/new-relic-api-keys/#ingest-license-key
+# license_key = # Supplied through NEW_RELIC_LICENSE_KEY by AWS SSM.
+
+# The application name. Set this to be the name of your
+# application as you would like it to show up in New Relic UI.
+# The UI will then auto-map instances of your application into a
+# entry on your home dashboard page. You can also specify multiple
+# app names to group your aggregated data. For further details,
+# please see:
+# https://docs.newrelic.com/docs/apm/agents/manage-apm-agents/app-naming/use-multiple-names-app/
+# app_name = # Parameterized by environment at the end of this file.
+
+# When "true", the agent collects performance data about your
+# application and reports this data to the New Relic UI at
+# newrelic.com. This global switch is normally overridden for
+# each environment below.
+monitor_mode = false
+
+# Sets the name of a file to log agent messages to. Whatever you
+# set this to, you must ensure that the permissions for the
+# containing directory and the file itself are correct, and
+# that the user that your web application runs as can write out
+# to the file. If not able to out a log file, it is also
+# possible to say "stderr" and output to standard error output.
+# This would normally result in output appearing in your web
+# server log.
+# TODO: Figure out where (in CloudWatch or Splunk) New Relic agent messages may need to be sent.
+log_file = stderr
+
+# Sets the level of detail of messages sent to the log file, if
+# a log file location has been provided. Possible values, in
+# increasing order of detail, are: "critical", "error", "warning",
+# "info" and "debug". When reporting any agent issues to New
+# Relic technical support, the most useful setting for the
+# support engineers is "debug". However, this can generate a lot
+# of information very quickly, so it is best not to keep the
+# agent at this level for longer than it takes to reproduce the
+# problem you are experiencing.
+log_level = info
+
+# High Security Mode enforces certain security settings, and prevents
+# them from being overridden, so that no sensitive data is sent to New
+# Relic. Enabling High Security Mode means that request parameters are
+# not collected and SQL can not be sent to New Relic in its raw form.
+# To activate High Security Mode, it must be set to 'true' in this
+# local .ini configuration file AND be set to 'true' in the
+# server-side configuration in the New Relic user interface. For
+# details, see
+# https://docs.newrelic.com/docs/subscriptions/high-security
+high_security = false
+
+# The Python Agent will attempt to connect directly to the New
+# Relic service. If there is an intermediate firewall between
+# your host and the New Relic service that requires you to use a
+# HTTP proxy, then you should set both the "proxy_host" and
+# "proxy_port" settings to the required values for the HTTP
+# proxy. The "proxy_user" and "proxy_pass" settings should
+# additionally be set if proxy authentication is implemented by
+# the HTTP proxy. The "proxy_scheme" setting dictates what
+# protocol scheme is used in talking to the HTTP proxy. This
+# would normally always be set as "http" which will result in the
+# agent then using a SSL tunnel through the HTTP proxy for end to
+# end encryption.
+# proxy_scheme = http
+# proxy_host = hostname
+# proxy_port = 8080
+# proxy_user =
+# proxy_pass =
+
+# Capturing request parameters is off by default. To enable the
+# capturing of request parameters, first ensure that the setting
+# "attributes.enabled" is set to "true" (the default value), and
+# then add "request.parameters.*" to the "attributes.include"
+# setting. For details about attributes configuration, please
+# consult the documentation.
+# TODO: Figure out if add'l attrs will be important to capture (e.g. in events or transaction traces) later on.
+# attributes.enabled = true
+# attributes.include = request.parameters.*
+
+# The transaction tracer captures deep information about slow
+# transactions and sends this to the UI on a periodic basis. The
+# transaction tracer is enabled by default. Set this to "false"
+# to turn it off.
+transaction_tracer.enabled = true
+
+# Threshold in seconds for when to collect a transaction trace.
+# When the response time of a controller action exceeds this
+# threshold, a transaction trace will be recorded and sent to
+# the UI. Valid values are any positive float value, or (default)
+# "apdex_f", which will use the threshold for a dissatisfying
+# Apdex controller action - four times the Apdex T value.
+transaction_tracer.transaction_threshold = apdex_f
+
+# When the transaction tracer is on, SQL statements can
+# optionally be recorded. The recorder has three modes, "off"
+# which sends no SQL, "raw" which sends the SQL statement in its
+# original form, and "obfuscated", which strips out numeric and
+# string literals.
+transaction_tracer.record_sql = obfuscated
+
+# Threshold in seconds for when to collect stack trace for a SQL
+# call. In other words, when SQL statements exceed this
+# threshold, then capture and send to the UI the current stack
+# trace. This is helpful for pinpointing where long SQL calls
+# originate from in an application.
+transaction_tracer.stack_trace_threshold = 0.5
+
+# Determines whether the agent will capture query plans for slow
+# SQL queries. Only supported in MySQL and PostgreSQL. Set this
+# to "false" to turn it off.
+transaction_tracer.explain_enabled = true
+
+# Threshold for query execution time below which query plans
+# will not not be captured. Relevant only when "explain_enabled"
+# is true.
+transaction_tracer.explain_threshold = 0.5
+
+# Space separated list of function or method names in form
+# 'module:function' or 'module:class.function' for which
+# additional function timing instrumentation will be added.
+transaction_tracer.function_trace =
+
+# The error collector captures information about uncaught
+# exceptions or logged exceptions and sends them to UI for
+# viewing. The error collector is enabled by default. Set this
+# to "false" to turn it off. For more details on errors, see
+# https://docs.newrelic.com/docs/apm/agents/manage-apm-agents/agent-data/manage-errors-apm-collect-ignore-or-mark-expected/
+error_collector.enabled = true
+
+# To stop specific errors from reporting to the UI, set this to
+# a space separated list of the Python exception type names to
+# ignore. The exception name should be of the form 'module:class'.
+#
+# Explicitly not on the list for now:
+# - pydantic.error_wrappers:ValidationError (These seem like real coding issues)
+#
+# Note that most of these except for UnsupportedMediaTypeProblem are 400 responses.
+#
+error_collector.ignore_classes = 
+
+# Expected errors are reported to the UI but will not affect the
+# Apdex or error rate. To mark specific errors as expected, set this
+# to a space separated list of the Python exception type names to
+# expected. The exception name should be of the form 'module:class'.
+error_collector.expected_classes =
+
+# Status codes ignored by default: 100-102 200-208 226 300-308 404
+
+# Addtional status codes to ignore reporting:
+# 401: Unauthorized - Invalid or missing JWT
+# 402: Payment Required - Employer does not have withholding data
+# 403: Forbidden - User does not have access to endpoint or resource
+# 405: Method Not Allowed
+# 406: Not Acceptable - Invalid Accept header (API does not support it)
+# 415: Unsupported Media Type - Invalid media types for upload
+# 503: Service Unavailable - Temporary service unavailability. High volume should trigger a New Relic alarm.
+#
+# Status codes that we may want to ignore in the future:
+# 
+# - 400: Bad Request - Validation and extra parameter errors. Unclear if we want to catch pydantic ValidationErrors here so we're keeping them for now. Instead, selectively ignore specific error classes above.
+# - 422: Unprocessable Entity - Haven't seen these yet so we'll report them.
+# - 504: Gateway Timeout - We do not expect any 504s to be thrown from the API server itself. This should come from the API Gateway that sits in front of it.
+#
+error_collector.ignore_status_codes = 401 402 403 404 405 406 415 503
+
+# Browser monitoring is the Real User Monitoring feature of the UI.
+# For those Python web frameworks that are supported, this
+# setting enables the auto-insertion of the browser monitoring
+# JavaScript fragments.
+browser_monitoring.auto_instrument = false
+
+# A thread profiling session can be scheduled via the UI when
+# this option is enabled. The thread profiler will periodically
+# capture a snapshot of the call stack for each active thread in
+# the application to construct a statistically representative
+# call tree. For more details on the thread profiler tool, see
+# https://docs.newrelic.com/docs/apm/apm-ui-pages/events/thread-profiler-tool/
+thread_profiler.enabled = true
+
+# Your application deployments can be recorded through the
+# New Relic REST API. To use this feature provide your API key
+# below then use the `newrelic-admin record-deploy` command.
+# api_key =
+
+# Distributed tracing lets you see the path that a request takes
+# through your distributed system. For more information, please
+# consult our distributed tracing planning guide.
+# https://docs.newrelic.com/docs/transition-guide-distributed-tracing
+distributed_tracing.enabled = true
+
+# When storing errors in database, distributed tracing solution captures the database query
+# and sends the full, unscrubbed message to New Relic. This enablement will ensure that
+# no PII data is captured in messages of new relic.
+strip_exception_messages.enabled = true
+
+# ---------------------------------------------------------------------------
+
+#
+# The application environments. These are specific settings which
+# override the common environment settings. The settings related to a
+# specific environment will be used when the environment argument to the
+# newrelic.agent.initialize() function has been defined to be either
+# "local", "stage", "performance", "training", "prod", or "uat".
+#
+
+[newrelic:local] 
+# Don't turn on data reporting by default when running the API locally.
+#
+# To enable New Relic locally, set the following variables:
+# - developer mode: false
+# - monitor_mode: true
+# - license_key: retrieved from New Relic here: https://one.newrelic.com/launcher/api-keys-ui.launcher
+#
+# NOTE: DO NOT COMMIT THE LICENSE KEY IN A GIT COMMIT.
+#
+# Less scary note: do not wrap your license key in quotes, it should look like this:
+#   license_key=1234abcd
+#
+app_name = SIMPLER-GRANTS-API-LOCAL
+developer_mode = true
+monitor_mode = false
+license_key=replace_me
+
+application_logging.enabled = false
+application_logging.forwarding.enabled = false
+application_logging.local_decorating.enabled = false
+
+[newrelic:staging]
+app_name = SIMPLER-GRANTS-API-STAGING
+monitor_mode = true
+
+[newrelic:prod]
+app_name = SIMPLER-GRANTS-API-PROD
+monitor_mode = true
+
+[newrelic:dev]
+app_name = SIMPLER-GRANTS-API-DEV
+monitor_mode = true
+
+# ---------------------------------------------------------------------------
diff --git a/api/poetry.lock b/api/poetry.lock
diff --git a/api/pyproject.toml b/api/pyproject.toml
@@ -4,6 +4,7 @@ version = "0.1.0"
 description = "Back end API for simpler.grants.gov"
 packages = [{ include = "src" }]
 authors = ["Nava Engineering <[email protected]>"]
+include = ["newrelic.ini"]
 
 [tool.poetry.dependencies]
 # See /documentation/api/package-depedency-management.md#Upgrading Python
@@ -26,6 +27,7 @@ pydantic-settings = "^2.0.3"
 flask-cors = "^5.0.0"
 opensearch-py = "^2.5.0"
 pyjwt = "^2.9.0"
+newrelic = "10.3.1"
 
 [tool.poetry.group.dev.dependencies]
 black = "^24.0.0"

diff --git a/api/src/adapters/newrelic/__init__.py b/api/src/adapters/newrelic/__init__.py
@@ -0,0 +1,14 @@
+import logging
+import os
+
+import newrelic.agent
+
+logger = logging.getLogger(__name__)
+
+
+def init_newrelic() -> None:
+    logger.info("Initializing New Relic")
+    newrelic.agent.initialize(
+        config_file=os.path.join(os.path.dirname(__file__), "../../..", "newrelic.ini"),
+        environment=os.environ.get("ENVIRONMENT", "local"),
+    )