Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce a bootstrap command to auto-install packages #650

Merged
merged 1 commit into from
May 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions opentelemetry-auto-instrumentation/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## Unreleased

- Add a new bootstrap command that enables automatic instrument installations.
([#650](https://github.com/open-telemetry/opentelemetry-python/pull/650))

## 0.7b1

Released 2020-05-12
Expand Down
1 change: 1 addition & 0 deletions opentelemetry-auto-instrumentation/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,4 @@ where = src
[options.entry_points]
console_scripts =
opentelemetry-auto-instrumentation = opentelemetry.auto_instrumentation.auto_instrumentation:run
opentelemetry-bootstrap = opentelemetry.auto_instrumentation.bootstrap:run
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
# limitations under the License.

"""
Usage
-----

This package provides a command that automatically instruments a program:
This package provides a couple of commands that help automatically instruments a program:

opentelemetry-auto-instrumentation
-----------------------------------

::

Expand All @@ -25,4 +26,18 @@
The code in ``program.py`` needs to use one of the packages for which there is
an OpenTelemetry integration. For a list of the available integrations please
check :doc:`here <../../index>`.


opentelemetry-bootstrap
------------------------

::

opentelemetry-bootstrap --action=install|requirements

This commands inspects the active Python site-packages and figures out which
instrumentation packages the user might want to install. By default it prints out
a list of the suggested instrumentation packages which can be added to a requirements.txt
file. It also supports installing the suggested packages when run with :code:`--action=install`
flag.
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
#!/usr/bin/env python3

# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import pkgutil
import subprocess
import sys
from logging import getLogger

logger = getLogger(__file__)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be preferred to have a description of what this module does here as well as in the README so users will know what is going on and being installed automatically.


# target library to desired instrumentor path/versioned package name
instrumentations = {
"dbapi": "opentelemetry-ext-dbapi>=0.8b0",
"django": "opentelemetry-ext-django>=0.8b0",
"flask": "opentelemetry-ext-flask>=0.8b0",
"grpc": "opentelemetry-ext-grpc>=0.8b0",
"requests": "opentelemetry-ext-requests>=0.8b0",
"jinja2": "opentelemetry-ext-jinja2>=0.8b0",
"mysql": "opentelemetry-ext-mysql>=0.8b0",
"psycopg2": "opentelemetry-ext-psycopg2>=0.8b0",
"pymongo": "opentelemetry-ext-pymongo>=0.8b0",
"pymysql": "opentelemetry-ext-pymysql>=0.8b0",
"redis": "opentelemetry-ext-redis>=0.8b0",
"sqlalchemy": "opentelemetry-ext-sqlalchemy>=0.8b0",
"wsgi": "opentelemetry-ext-wsgi>=0.8b0",
}

# relevant instrumentors and tracers to uninstall and check for conflicts for target libraries
libraries = {
"dbapi": ("opentelemetry-ext-dbapi",),
"django": ("opentelemetry-ext-django",),
"flask": ("opentelemetry-ext-flask",),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that opentelemetry-ext-django is merged, can you add django as well?

"grpc": ("opentelemetry-ext-grpc",),
"requests": ("opentelemetry-ext-requests",),
"jinja2": ("opentelemetry-ext-jinja2",),
"mysql": ("opentelemetry-ext-mysql",),
"psycopg2": ("opentelemetry-ext-psycopg2",),
"pymongo": ("opentelemetry-ext-pymongo",),
"pymysql": ("opentelemetry-ext-pymysql",),
"redis": ("opentelemetry-ext-redis",),
"sqlalchemy": ("opentelemetry-ext-sqlalchemy",),
"wsgi": ("opentelemetry-ext-wsgi",),
}


def _install_package(library, instrumentation):
"""
Ensures that desired version is installed w/o upgrading its dependencies
by uninstalling where necessary (if `target` is not provided).


OpenTelemetry auto-instrumentation packages often have traced libraries
as instrumentation dependency (e.g. flask for opentelemetry-ext-flask),
so using -I on library could cause likely undesired Flask upgrade.
Using --no-dependencies alone would leave potential for nonfunctional
installations.
"""
pip_list = _sys_pip_freeze()
for package in libraries[library]:
if "{}==".format(package).lower() in pip_list:
logger.info(
"Existing %s installation detected. Uninstalling.", package
)
_sys_pip_uninstall(package)
_sys_pip_install(instrumentation)


def _syscall(func):
def wrapper(package=None):
try:
if package:
return func(package)
return func()
except subprocess.SubprocessError as exp:
cmd = getattr(exp, "cmd", None)
if cmd:
msg = 'Error calling system command "{0}"'.format(
" ".join(cmd)
)
if package:
msg = '{0} for package "{1}"'.format(msg, package)
raise RuntimeError(msg)

return wrapper


@_syscall
def _sys_pip_freeze():
return (
subprocess.check_output([sys.executable, "-m", "pip", "freeze"])
.decode()
.lower()
)


@_syscall
def _sys_pip_install(package):
# explicit upgrade strategy to override potential pip config
subprocess.check_call(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to have exception handling for this and uninstall

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can do that. What would you like the user error to be? Another custom exception that wraps the one raised by subprocess or just a plain error message? I like that the error from subprocess call to pip is surfaced to the user right now. We can wrap it and add a nicer error message on top.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wrapping the syscall errors now with a custom error message but I don't see a lot of value in doing this. The end result isn't any better IMO.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be useful to know which package had failed to install/uninstall.

[
sys.executable,
"-m",
"pip",
"install",
"-U",
"--upgrade-strategy",
"only-if-needed",
package,
]
)


@_syscall
def _sys_pip_uninstall(package):
subprocess.check_call(
[sys.executable, "-m", "pip", "uninstall", "-y", package]
)


def _pip_check():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we please refrain from single use functions? It forces the reader to look back in the file while reading this code.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think single use functions are good, when you are reading the code you find the function and you know what it is doing without having to understand how it is done. If you remove them, you'll end up with a very long function having to understand all the implementation details at once.

"""Ensures none of the instrumentations have dependency conflicts.
Clean check reported as:
'No broken requirements found.'
Dependency conflicts are reported as:
'opentelemetry-ext-flask 1.0.1 has requirement opentelemetry-sdk<2.0,>=1.0, but you have opentelemetry-sdk 0.5.'
To not be too restrictive, we'll only check for relevant packages.
"""
check_pipe = subprocess.Popen(
[sys.executable, "-m", "pip", "check"], stdout=subprocess.PIPE
)
pip_check = check_pipe.communicate()[0].decode()
pip_check_lower = pip_check.lower()
Comment on lines +146 to +147
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
pip_check = check_pipe.communicate()[0].decode()
pip_check_lower = pip_check.lower()
pip_check_lower = check_pipe.communicate()[0].decode().lower()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious as to why we can't simply string search for "opentelemetry-ext" here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better to be more explicit, check for each library and to not encode the knowledge about libraries in multiple places. Right now the libraries mapping remains the single source of truth for all libraries we support. Adding sub-string matching on common prefix leaks that knowledge into this function. I don't much benefit in doing that. Rest of the program follows "configuration over convention" and I'd prefer to continue doing that but happy to hear other thoughts.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good reason. Would there be the case that some other library has a dependency conflict (not an integration) with one of the dependencies and then will generate a false positive?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is possible I suppose but not sure if it we should try to account for all possible conflicts globally in site-packages. Such conflicts could also be present independently of the bootstrap command so not sure if bootstrap should complain about them. In such case, users can use the default option to print out a list of dependencies instead of installing them and then figure out the conflicts as they would with any other library. Happy to hear other thoughts.

for package_tup in libraries.values():
for package in package_tup:
if package.lower() in pip_check_lower:
raise RuntimeError(
"Dependency conflict found: {}".format(pip_check)
)


def _is_installed(library):
return library in sys.modules or pkgutil.find_loader(library) is not None


def _find_installed_libraries():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Single use function here

return {k: v for k, v in instrumentations.items() if _is_installed(k)}


def _run_requirements(packages):
print("\n".join(packages.values()), end="")


def _run_install(packages):
for pkg, inst in packages.items():
_install_package(pkg, inst)

_pip_check()


def run() -> None:
action_install = "install"
action_requirements = "requirements"

parser = argparse.ArgumentParser(
description="""
opentelemetry-bootstrap detects installed libraries and automatically
installs the relevant instrumentation packages for them.
"""
)
parser.add_argument(
"-a",
"--action",
choices=[action_install, action_requirements],
default=action_requirements,
help="""
install - uses pip to install the new requirements using to the
currently active site-package.
requirements - prints out the new requirements to stdout. Action can
be piped and appended to a requirements.txt file.
""",
)
args = parser.parse_args()

cmd = {
action_install: _run_install,
action_requirements: _run_requirements,
}[args.action]
cmd(_find_installed_libraries())
125 changes: 125 additions & 0 deletions opentelemetry-auto-instrumentation/tests/test_bootstrap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# type: ignore

from functools import reduce
from io import StringIO
from random import sample
from unittest import TestCase
from unittest.mock import call, patch

from opentelemetry.auto_instrumentation import bootstrap


def sample_packages(packages, rate):
sampled = sample(list(packages), int(len(packages) * rate),)
return {k: v for k, v in packages.items() if k in sampled}


class TestBootstrap(TestCase):

installed_libraries = {}
installed_instrumentations = {}

@classmethod
def setUpClass(cls):
# select random 60% of instrumentations
cls.installed_libraries = sample_packages(
bootstrap.instrumentations, 0.6
)

# treat 50% of sampled packages as pre-installed
cls.installed_instrumentations = sample_packages(
cls.installed_libraries, 0.5
)

cls.pkg_patcher = patch(
"opentelemetry.auto_instrumentation.bootstrap._find_installed_libraries",
return_value=cls.installed_libraries,
)

pip_freeze_output = []
for inst in cls.installed_instrumentations.values():
inst = inst.replace(">=", "==")
if "==" not in inst:
inst = "{}==x.y".format(inst)
pip_freeze_output.append(inst)

cls.pip_freeze_patcher = patch(
"opentelemetry.auto_instrumentation.bootstrap._sys_pip_freeze",
return_value="\n".join(pip_freeze_output),
)
cls.pip_install_patcher = patch(
"opentelemetry.auto_instrumentation.bootstrap._sys_pip_install",
)
cls.pip_uninstall_patcher = patch(
"opentelemetry.auto_instrumentation.bootstrap._sys_pip_uninstall",
)
cls.pip_check_patcher = patch(
"opentelemetry.auto_instrumentation.bootstrap._pip_check",
)

cls.pkg_patcher.start()
cls.mock_pip_freeze = cls.pip_freeze_patcher.start()
cls.mock_pip_install = cls.pip_install_patcher.start()
cls.mock_pip_uninstall = cls.pip_uninstall_patcher.start()
cls.mock_pip_check = cls.pip_check_patcher.start()

@classmethod
def tearDownClass(cls):
cls.pip_check_patcher.start()
cls.pip_uninstall_patcher.start()
cls.pip_install_patcher.start()
cls.pip_freeze_patcher.start()
cls.pkg_patcher.stop()

@patch("sys.argv", ["bootstrap", "-a", "pipenv"])
def test_run_unknown_cmd(self):
with self.assertRaises(SystemExit):
bootstrap.run()

@patch("sys.argv", ["bootstrap", "-a", "requirements"])
def test_run_cmd_print(self):
with patch("sys.stdout", new=StringIO()) as fake_out:
bootstrap.run()
self.assertEqual(
fake_out.getvalue(),
"\n".join(self.installed_libraries.values()),
)

@patch("sys.argv", ["bootstrap", "-a", "install"])
def test_run_cmd_install(self):
bootstrap.run()

self.assertEqual(
self.mock_pip_freeze.call_count, len(self.installed_libraries)
)

to_uninstall = reduce(
lambda x, y: x + y,
[
pkgs
for lib, pkgs in bootstrap.libraries.items()
if lib in self.installed_instrumentations
],
)
self.mock_pip_uninstall.assert_has_calls(
[call(i) for i in to_uninstall], any_order=True
)

self.mock_pip_install.assert_has_calls(
[call(i) for i in self.installed_libraries.values()],
any_order=True,
)
self.assertEqual(self.mock_pip_check.call_count, 1)