Skip to content

Commit

Permalink
chore(service-naming): improve inferred service naming algorithm [bac…
Browse files Browse the repository at this point in the history
…kport 2.17] (#11475)

Backport 110dcfa from #11458 to 2.17.

## Motivation

Updates inferred service naming algorithm. Previously, the service
naming algorithm would skip over any flag arguments (args starting with
`-`), as well as the following argument as it was assumed to be the
argument value. The update changes the algorithm to run again if no
service name was found the first time. The second time, the algorithm
will not skip arguments that were preceded by a flag argument.

Effect:
-- Previous Behavior: `pytest --no-cov my/file.py` -> service name =
`""`
-- New Behavior: `pytest --no-cov my/file.py` -> service name =
`"my.file"`

Additionally adds check to ensure a python module included after `-m`
module is importable before using it as the service name.

Also updates the service naming algorithm to use try-catches to prevent
any unforeseen errors.

Adds many more test cases, fixes a few snapshots.
  • Loading branch information
github-actions[bot] authored Nov 21, 2024
1 parent e97c293 commit 89fb1ca
Show file tree
Hide file tree
Showing 19 changed files with 228 additions and 86 deletions.
2 changes: 1 addition & 1 deletion ddtrace/contrib/internal/bottle/trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class TracePlugin(object):
api = 2

def __init__(self, service="bottle", tracer=None, distributed_tracing=None):
self.service = config.service or service
self.service = config._get_service(default=service)
self.tracer = tracer or ddtrace.tracer
if distributed_tracing is not None:
config.bottle.distributed_tracing = distributed_tracing
Expand Down
123 changes: 77 additions & 46 deletions ddtrace/settings/_inferred_base_service.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import fnmatch
import importlib.util
import os
import pathlib
import re
Expand All @@ -8,6 +9,11 @@
from typing import Optional
from typing import Tuple

from ..internal.logger import get_logger


log = get_logger(__name__)


INIT_PY = "__init__.py"
ALL_PY_FILES = "*.py"
Expand All @@ -33,7 +39,7 @@ def __init__(self, environ: Dict[str, str]):
# - Match /python, /python3.7, etc.
self.pattern = r"(^|/)(?!.*\.py$)(" + re.escape("python") + r"(\d+\.\d+)?$)"

def detect(self, args: List[str]) -> Optional[ServiceMetadata]:
def detect(self, args: List[str], skip_args_preceded_by_flags=True) -> Optional[ServiceMetadata]:
"""
Detects and returns service metadata based on the provided list of arguments.
Expand All @@ -59,22 +65,27 @@ def detect(self, args: List[str]) -> Optional[ServiceMetadata]:
has_flag_prefix = arg.startswith("-") and not arg.startswith("--ddtrace")
is_env_variable = "=" in arg

should_skip_arg = prev_arg_is_flag or has_flag_prefix or is_env_variable
should_skip_arg = (prev_arg_is_flag and skip_args_preceded_by_flags) or has_flag_prefix or is_env_variable

if module_flag:
return ServiceMetadata(arg)
if _module_exists(arg):
return ServiceMetadata(arg)

if not should_skip_arg:
abs_path = pathlib.Path(arg).resolve()
if not abs_path.exists():
continue
stripped = abs_path
if not stripped.is_dir():
stripped = stripped.parent
value, ok = self.deduce_package_name(stripped)
if ok:
return ServiceMetadata(value)
return ServiceMetadata(self.find_nearest_top_level(stripped))
try:
abs_path = pathlib.Path(arg).resolve()
if not abs_path.exists():
continue
stripped = abs_path
if not stripped.is_dir():
stripped = stripped.parent
value, ok = self.deduce_package_name(stripped)
if ok:
return ServiceMetadata(value)
return ServiceMetadata(self.find_nearest_top_level(stripped))
except Exception as ex:
# Catch any unexpected errors
log.debug("Unexpected error while processing argument: ", arg, "Exception: ", ex)

if has_flag_prefix and arg == "-m":
module_flag = True
Expand Down Expand Up @@ -145,39 +156,51 @@ def detect_service(args: List[str]) -> Optional[str]:
if cache_key in CACHE:
return CACHE.get(cache_key)

# Check both the included command args as well as the executable being run
possible_commands = [*args, sys.executable]
executable_args = set()

# List of detectors to try in order
detectors = {}
for detector_class in detector_classes:
detector_instance = detector_class(dict(os.environ))

for i, command in enumerate(possible_commands):
detector_name = detector_instance.name

if detector_instance.matches(command):
detectors.update({detector_name: detector_instance})
# append to a list of arg indexes to ignore since they are executables
executable_args.add(i)
continue
elif _is_executable(command):
# append to a list of arg indexes to ignore since they are executables
executable_args.add(i)

args_to_search = []
for i, arg in enumerate(args):
# skip any executable args
if i not in executable_args:
args_to_search.append(arg)

# Iterate through the matched detectors
for detector in detectors.values():
metadata = detector.detect(args_to_search)
if metadata and metadata.name:
CACHE[cache_key] = metadata.name
return metadata.name
try:
# Check both the included command args as well as the executable being run
possible_commands = [*args, sys.executable]
executable_args = set()

# List of detectors to try in order
detectors = {}
for detector_class in detector_classes:
detector_instance = detector_class(dict(os.environ))

for i, command in enumerate(possible_commands):
detector_name = detector_instance.name

if detector_instance.matches(command):
detectors.update({detector_name: detector_instance})
# append to a list of arg indexes to ignore since they are executables
executable_args.add(i)
continue
elif _is_executable(command):
# append to a list of arg indexes to ignore since they are executables
executable_args.add(i)

args_to_search = []
for i, arg in enumerate(args):
# skip any executable args
if i not in executable_args:
args_to_search.append(arg)

# Iterate through the matched detectors
for detector in detectors.values():
metadata = detector.detect(args_to_search)
if metadata and metadata.name:
CACHE[cache_key] = metadata.name
return metadata.name

# Iterate through the matched detectors again, this time not skipping args preceded by flag args
for detector in detectors.values():
metadata = detector.detect(args_to_search, skip_args_preceded_by_flags=False)
if metadata and metadata.name:
CACHE[cache_key] = metadata.name
return metadata.name
except Exception as ex:
# Catch any unexpected errors to be extra safe
log.warning("Unexpected error during inferred base service detection: ", ex)

CACHE[cache_key] = None
return None

Expand All @@ -195,3 +218,11 @@ def _is_executable(file_path: str) -> bool:
directory = os.path.dirname(directory)

return False


def _module_exists(module_name: str) -> bool:
"""Check if a module can be imported."""
try:
return importlib.util.find_spec(module_name) is not None
except ModuleNotFoundError:
return False
14 changes: 14 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,20 @@ def create_ddtrace_subprocess_dir_and_return_test_pyfile(tmpdir):
return pyfile


@pytest.fixture
def ddtrace_tmp_path(tmp_path):
# Create a test dir named `ddtrace_subprocess_dir` that will be used by the tracers
ddtrace_dir = tmp_path / DEFAULT_DDTRACE_SUBPROCESS_TEST_SERVICE_NAME
ddtrace_dir.mkdir(exist_ok=True) # Create the directory if it doesn't exist

# Check for __init__.py and create it if it doesn't exist
init_file = ddtrace_dir / "__init__.py"
if not init_file.exists():
init_file.write_text("") # Create an empty __init__.py file

return ddtrace_dir


@pytest.fixture
def run_python_code_in_subprocess(tmpdir):
def _run(code, **kwargs):
Expand Down
4 changes: 2 additions & 2 deletions tests/contrib/gunicorn/test_gunicorn.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def test_no_known_errors_occur(tmp_path):

@flaky(until=1706677200)
@pytest.mark.skipif(sys.version_info >= (3, 11), reason="Gunicorn is only supported up to 3.10")
def test_span_schematization(tmp_path):
def test_span_schematization(ddtrace_tmp_path):
for schema_version in [None, "v0", "v1"]:
for service_name in [None, "mysvc"]:
gunicorn_settings = _gunicorn_settings_factory(
Expand All @@ -201,7 +201,7 @@ def test_span_schematization(tmp_path):
),
ignores=["meta.result_class"],
):
with gunicorn_server(gunicorn_settings, tmp_path) as context:
with gunicorn_server(gunicorn_settings, ddtrace_tmp_path) as context:
_, client = context
response = client.get("/")
assert response.status_code == 200
105 changes: 99 additions & 6 deletions tests/internal/service_name/test_inferred_base_service.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import pathlib
import shlex
import tempfile
from unittest.mock import patch

import pytest

from ddtrace.settings._inferred_base_service import _module_exists
from ddtrace.settings._inferred_base_service import detect_service


Expand All @@ -21,7 +23,9 @@ def mock_file_system():
(base_path / "venv" / "bin" / "gunicorn").mkdir(parents=True)

# add a test dir
(base_path / "tests" / "contrib" / "aiohttp").mkdir(parents=True)
(base_path / "tests" / "contrib" / "aiohttp" / "app").mkdir(parents=True)

# other cases

(base_path / "modules" / "m1" / "first" / "nice" / "package").mkdir(parents=True)
(base_path / "modules" / "m2").mkdir(parents=True)
Expand All @@ -35,15 +39,19 @@ def mock_file_system():
(base_path / "venv" / "bin" / "python3.11" / "gunicorn" / "__init__.py").mkdir(parents=True)
(base_path / "venv" / "bin" / "gunicorn" / "__init__.py").touch()

# Create `__init__.py` files that indicate packages
(base_path / "modules" / "m1" / "first" / "nice" / "package" / "__init__.py").touch()
(base_path / "modules" / "m1" / "first" / "nice" / "__init__.py").touch()
(base_path / "modules" / "m1" / "first" / "nice" / "something.py").touch()
(base_path / "modules" / "m1" / "first" / "nice" / "app.py").touch()
(base_path / "modules" / "m1" / "first" / "__init__.py").touch()
(base_path / "modules" / "m1" / "__init__.py").touch()
(base_path / "modules" / "m2" / "__init__.py").touch()
(base_path / "apps" / "app1" / "__main__.py").touch()
(base_path / "apps" / "app2" / "cmd" / "run.py").touch()
(base_path / "apps" / "app2" / "setup.py").touch()

(base_path / "tests" / "contrib" / "aiohttp" / "app" / "web.py").touch()
(base_path / "tests" / "contrib" / "aiohttp" / "app" / "__init__.py").touch()
(base_path / "tests" / "contrib" / "aiohttp" / "test.py").touch()
(base_path / "tests" / "contrib" / "aiohttp" / "__init__.py").touch()
(base_path / "tests" / "contrib" / "__init__.py").touch()
Expand All @@ -59,9 +67,13 @@ def mock_file_system():
@pytest.mark.parametrize(
"cmd,expected",
[
("python tests/contrib/aiohttp/app/web.py", "tests.contrib.aiohttp.app"),
("python tests/contrib/aiohttp", "tests.contrib.aiohttp"),
("python tests/contrib", "tests.contrib"),
("python tests", "tests"),
("python modules/m1/first/nice/package", "m1.first.nice.package"),
("python modules/m1/first/nice", "m1.first.nice"),
("python modules/m1/first/nice/something.py", "m1.first.nice"),
("python modules/m1/first/nice/app.py", "m1.first.nice"),
("python modules/m1/first", "m1.first"),
("python modules/m2", "m2"),
("python apps/app1", "app1"),
Expand All @@ -75,17 +87,98 @@ def mock_file_system():
("venv/bin/python3.11/ddtrace-run python apps/app2/setup.py", "app2"),
("ddtrace-run python apps/app2/setup.py", "app2"),
("python3.12 apps/app2/cmd/run.py", "app2"),
("python -m m1.first.nice.package", "m1.first.nice.package"),
("python -m tests.contrib.aiohttp.app.web", "tests.contrib.aiohttp.app.web"),
("python -m tests.contrib.aiohttp.app", "tests.contrib.aiohttp.app"),
("python -m tests.contrib.aiohttp", "tests.contrib.aiohttp"),
("python -m tests.contrib", "tests.contrib"),
("python -m tests", "tests"),
("python -m http.server 8000", "http.server"),
("python --some-flag apps/app1", "app1"),
# pytest
("pytest tests/contrib/aiohttp", "tests.contrib.aiohttp"),
("pytest --ddtrace tests/contrib/aiohttp", "tests.contrib.aiohttp"),
("pytest --no-cov tests/contrib/aiohttp", "tests.contrib.aiohttp"),
],
)
def test_python_detector(cmd, expected, mock_file_system):
def test_python_detector_service_name_should_exist_file_exists(cmd, expected, mock_file_system):
# Mock the current working directory to the test_modules path
with patch("os.getcwd", return_value=str(mock_file_system)):
cmd_parts = cmd.split(" ")
cmd_parts = shlex.split(cmd)
detected_name = detect_service(cmd_parts)

assert detected_name == expected, f"Test failed for command: [{cmd}]"


@pytest.mark.parametrize(
"cmd,expected",
[
# Commands that should not produce a service name
("", None), # Empty command
("python non_existing_file.py", None), # Non-existing Python script
("python invalid_script.py", None), # Invalid script that isn't found
("gunicorn app:app", None), # Non-Python command
("ls -la", None), # Non-Python random command
("cat README.md", None), # Another random command
("python -m non_existing_module", None), # Non-existing Python module
("python -c 'print([])'", None), # Python inline code not producing a service
("python -m -c 'print([]])'", None), # Inline code with module flag
("echo 'Hello, World!'", None), # Not a Python service
("python3.11 /path/to/some/non_python_file.txt", None), # Non-Python file
("/usr/bin/ls", None), # Another system command
("some_executable --ddtrace hello", None),
("python version", None),
("python -m -v --hello=maam", None),
# error produced from a test, ensure an arg that is very long doesn't break stuff
(
"ddtrace-run pytest -k 'not test_reloader and not test_reload_listeners and not "
+ "test_no_exceptions_when_cancel_pending_request and not test_add_signal and not "
+ "test_ode_removes and not test_skip_touchup and not test_dispatch_signal_triggers"
+ " and not test_keep_alive_connection_context and not test_redirect_with_params and"
+ " not test_keep_alive_client_timeout and not test_logger_vhosts and not test_ssl_in_multiprocess_mode'",
None,
),
],
)
def test_no_service_name(cmd, expected, mock_file_system):
with patch("os.getcwd", return_value=str(mock_file_system)):
cmd_parts = shlex.split(cmd)
detected_name = detect_service(cmd_parts)

assert detected_name == expected, f"Test failed for command: [{cmd}]"


@pytest.mark.parametrize(
"cmd,expected",
[
# Command that is too long
("python " + " ".join(["arg"] * 1000), None), # Excessively long command
# Path with special characters
(r"python /path/with/special/characters/!@#$%^&*()_/some_script.py", None), # Special characters
# Path too deep
(f"python {'/'.join(['deep' * 50])}/script.py", None), # Excessively deep path
],
)
def test_chaos(cmd, expected, mock_file_system):
with patch("os.getcwd", return_value=str(mock_file_system)):
cmd_parts = shlex.split(cmd)
detected_name = detect_service(cmd_parts)

assert detected_name == expected, f"Chaos test failed for command: [{cmd}]"


@pytest.mark.parametrize(
"module_name,should_exist",
[
("tests.contrib.aiohttp.app.web", True),
("tests.contrib.aiohttp.app", True),
("tests.contrib.aiohttp", True),
("tests.contrib", True),
("tests", True),
("tests.releasenotes", False),
("non_existing_module", False),
],
)
def test_module_exists(module_name, should_exist):
exists = _module_exists(module_name)

assert exists == should_exist, f"Module {module_name} existence check failed."
Loading

0 comments on commit 89fb1ca

Please sign in to comment.