From 27bc7eeef9a15aff5eeb5c135cc29669128f45b6 Mon Sep 17 00:00:00 2001 From: Caden Marofke Date: Thu, 7 Sep 2023 19:08:23 +0000 Subject: [PATCH] feat!: Adds telemetry client calls to worker Signed-off-by: Caden Marofke --- pyproject.toml | 2 +- .../aws/deadline/__init__.py | 17 +++++- .../installer/__init__.py | 14 ++++- .../installer/install.sh | 58 +++++++++++++++---- .../startup/entrypoint.py | 7 +++ test/unit/install/test_install.py | 19 +++++- test/unit/startup/test_entrypoint.py | 6 ++ 7 files changed, 105 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 30c7cba8..11649657 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ dynamic = ["version"] dependencies = [ "requests ~= 2.29", "boto3 ~= 1.26", - "deadline == 0.28.*", + "deadline == 0.29.*", "openjd-sessions == 0.2.*", # tomli became tomllib in standard library in Python 3.11 "tomli >= 1.1.0 ; python_version<'3.11'", diff --git a/src/deadline_worker_agent/aws/deadline/__init__.py b/src/deadline_worker_agent/aws/deadline/__init__.py index a82ebf7a..3f9c66c9 100644 --- a/src/deadline_worker_agent/aws/deadline/__init__.py +++ b/src/deadline_worker_agent/aws/deadline/__init__.py @@ -9,7 +9,10 @@ from botocore.retries.standard import RetryContext from botocore.exceptions import ClientError -from ...startup.config import Configuration +from deadline.client.api import get_telemetry_client, TelemetryClient + +from ..._version import __version__ as version # noqa +from ...startup.config import Configuration, Capabilities from ...boto import DeadlineClient, NoOverflowExponentialBackoff as Backoff from ...api_models import ( AssumeFleetRoleForWorkerResponse, @@ -735,3 +738,15 @@ def update_worker_schedule( raise DeadlineRequestUnrecoverableError(e) return response + + +def _get_deadline_telemetry_client() -> TelemetryClient: + """Wrapper around the Deadline Client Library telmetry client, in order to set package-specific information""" + return get_telemetry_client("deadline-cloud-worker-agent", version) + + +def record_worker_start_event(capabilities: Capabilities) -> None: + """Calls the telemetry client to record an event capturing generic machine information.""" + _get_deadline_telemetry_client().record_event( + event_type="com.amazon.rum.deadline.worker_agent.start", event_details=capabilities.dict() + ) diff --git a/src/deadline_worker_agent/installer/__init__.py b/src/deadline_worker_agent/installer/__init__.py index 3d0efbfa..bac5e399 100644 --- a/src/deadline_worker_agent/installer/__init__.py +++ b/src/deadline_worker_agent/installer/__init__.py @@ -23,7 +23,7 @@ def install() -> None: arg_parser = get_argument_parser() args = arg_parser.parse_args(namespace=ParsedCommandLineArguments) - worker_agent_program = Path(sysconfig.get_path("scripts")) / "deadline-worker-agent" + scripts_path = Path(sysconfig.get_path("scripts")) cmd = [ "sudo", @@ -36,8 +36,8 @@ def install() -> None: args.region, "--user", args.user, - "--worker-agent-program", - str(worker_agent_program), + "--scripts-path", + str(scripts_path), ] if args.group: cmd += ["--group", args.group] @@ -49,6 +49,8 @@ def install() -> None: cmd.append("--allow-shutdown") if not args.install_service: cmd.append("--no-install-service") + if args.telemetry_opt_out: + cmd.append("--telemetry-opt-out") try: run( @@ -72,6 +74,7 @@ class ParsedCommandLineArguments(Namespace): service_start: bool allow_shutdown: bool install_service: bool + telemetry_opt_out: bool def get_argument_parser() -> ArgumentParser: # pragma: no cover @@ -122,6 +125,11 @@ def get_argument_parser() -> ArgumentParser: # pragma: no cover action="store_false", dest="install_service", ) + parser.add_argument( + "--telemetry-opt-out", + help="Opts out of telemetry data collection", + action="store_true", + ) parser.add_argument( "--yes", "-y", diff --git a/src/deadline_worker_agent/installer/install.sh b/src/deadline_worker_agent/installer/install.sh index a0759044..c7a6f62d 100755 --- a/src/deadline_worker_agent/installer/install.sh +++ b/src/deadline_worker_agent/installer/install.sh @@ -37,17 +37,20 @@ fleet_id=unset wa_user=$default_wa_user confirm="" region="us-west-2" -worker_agent_program="unset" +scripts_path="unset" +worker_agent_program="deadline-worker-agent" +client_library_program="deadline" allow_shutdown="no" no_install_service="no" start_service="no" +telemetry_opt_out="no" warning_lines=() usage() { echo "Usage: install.sh --farm-id FARM_ID --fleet-id FLEET_ID" echo " [--region REGION] [--user USER]" - echo " [--worker-agent-program WORKER_AGENT_PROGRAM]" + echo " [--scripts-path SCRIPTS_PATH]" echo " [-y]" echo "" echo "Arguments" @@ -64,15 +67,18 @@ usage() echo " A group name that the Worker Agent shares with the user(s) that Jobs will be running as." echo " Do not use the primary/effective group of the Worker Agent user specifeid in --user as" echo " this is not a secure configuration. Defaults to $default_job_group." - echo " --worker-agent-program WORKER_AGENT_PROGRAM" - echo " An optional path to the Worker Agent program. This is used as the program path" - echo " when creating the systemd service. If not specified, the first program named" - echo " deadline-worker-agent found in the search path will be used." + echo " --scripts-path SCRIPTS_PATH" + echo " An optional path to the directory that the Worker Agent and Deadline Cloud Library are" + echo " installed. This is used as the program path when creating the systemd service for the " + echo " Worker Agent. If not specified, the first program named 'deadline-worker-agent' and" + echo " 'deadline' found in the search path will be used." echo " --allow-shutdown" echo " Dictates whether a sudoers rule is created/deleted allowing the worker agent the" echo " ability to shutdown the host system" echo " --no-install-service" echo " Skips the worker agent systemd service installation" + echo " --telemetry-opt-out" + echo " Opts out of telemetry collection for the worker agent" echo " --start" echo " Starts the systemd service as part of the installation. By default, the systemd" echo " service is configured to start on system boot, but not started immediately." @@ -104,7 +110,7 @@ validate_deadline_id() { } # Validate arguments -PARSED_ARGUMENTS=$(getopt -n install.sh --longoptions farm-id:,fleet-id:,region:,user:,group:,worker-agent-program:,start,allow-shutdown,no-install-service -- "y" "$@") +PARSED_ARGUMENTS=$(getopt -n install.sh --longoptions farm-id:,fleet-id:,region:,user:,group:,scripts-path:,start,allow-shutdown,no-install-service,telemetry-opt-out -- "y" "$@") VALID_ARGUMENTS=$? if [ "${VALID_ARGUMENTS}" != "0" ]; then usage @@ -122,9 +128,10 @@ do --region) region="$2" ; shift 2 ;; --user) wa_user="$2" ; shift 2 ;; --group) job_group="$2" ; shift 2 ;; - --worker-agent-program) worker_agent_program="$2" ; shift 2 ;; + --scripts-path) scripts_path="$2" ; shift 2 ;; --allow-shutdown) allow_shutdown="yes" ; shift ;; --no-install-service) no_install_service="yes" ; shift ;; + --telemetry-opt-out) telemetry_opt_out="yes" ; shift ;; --start) start_service="yes" ; shift ;; -y) confirm="$1" ; shift ;; # -- means the end of the arguments; drop this, and break out of the while loop @@ -152,18 +159,38 @@ elif ! validate_deadline_id fleet "${fleet_id}"; then echo "ERROR: Non a valid value for --fleet-id: ${fleet_id}" usage fi -if [[ "${worker_agent_program}" == "unset" ]]; then +if [[ "${scripts_path}" == "unset" ]]; then set +e worker_agent_program=$(which deadline-worker-agent) if [[ "$?" != "0" ]]; then echo "ERROR: Could not find deadline-worker-agent in search path" exit 1 fi + client_library_program=$(which deadline) + if [[ "$?" != "0" ]]; then + echo "ERROR: Could not find deadline in search path" + exit 1 + fi set -e -elif [[ ! -f "${worker_agent_program}" ]]; then - echo "ERROR: The specified Worker Agent path is not found: \"${worker_agent_program}\"" +elif [[ ! -d "${scripts_path}" ]]; then + echo "ERROR: The specified scripts path is not found: \"${scripts_path}\"" usage +else + set +e + # We have a provided scripts path, so we append it to the program paths + worker_agent_program="${scripts_path}"/deadline-worker-agent + if [[ ! -f "${worker_agent_program}" ]]; then + echo "ERROR: Could not find deadline-worker-agent in scripts path: \"${worker_agent_program}\"" + exit 1 + fi + client_library_program="${scripts_path}"/deadline + if [[ ! -f "${client_library_program}" ]]; then + echo "ERROR: Could not find deadline in scripts path: \"${client_library_program}\"" + exit 1 + fi + set -e fi + if [[ ! -z "${region}" ]] && [[ ! "${region}" =~ ^[a-z]+-[a-z]+-[0-9]+$ ]]; then echo "ERROR: Not a valid value for --region: ${region}" usage @@ -196,9 +223,12 @@ echo "Fleet ID: ${fleet_id}" echo "Region: ${region}" echo "Worker agent user: ${wa_user}" echo "Worker job group: ${job_group}" +echo "Scripts path: ${scripts_path}" echo "Worker agent program path: ${worker_agent_program}" +echo "Worker agent program path: ${client_library_program}" echo "Allow worker agent shutdown: ${allow_shutdown}" echo "Start systemd service: ${start_service}" +echo "Telemetry opt-out: ${telemetry_opt_out}" # Confirmation prompt if [ -z "$confirm" ]; then @@ -352,6 +382,12 @@ EOF fi fi +if [[ "${telemetry_opt_out}" == "yes" ]]; then + # Set the Deadline Client Lib configuration setting + echo "Opting out of telemetry collection" + sudo -u $wa_user $client_library_program config set telemetry.opt_out true +fi + echo "Done" # Output warning lines if any diff --git a/src/deadline_worker_agent/startup/entrypoint.py b/src/deadline_worker_agent/startup/entrypoint.py index 55956d27..bf536a00 100644 --- a/src/deadline_worker_agent/startup/entrypoint.py +++ b/src/deadline_worker_agent/startup/entrypoint.py @@ -29,6 +29,12 @@ from .bootstrap import bootstrap_worker from .capabilities import AmountCapabilityName, AttributeCapabilityName, Capabilities from .config import Capabilities, Configuration, ConfigurationError +from ..aws.deadline import ( + DeadlineRequestError, + delete_worker, + update_worker, + record_worker_start_event, +) __all__ = ["entrypoint"] _logger = logging.getLogger(__name__) @@ -86,6 +92,7 @@ def entrypoint(cli_args: Optional[list[str]] = None) -> None: # if customer manually provided the capabilities (to be added in this function) # then we default to the customer provided ones system_capabilities = detect_system_capabilities() + record_worker_start_event(system_capabilities) config.capabilities = system_capabilities.merge(config.capabilities) # Log the configuration diff --git a/test/unit/install/test_install.py b/test/unit/install/test_install.py index c1bd78b0..e708e130 100644 --- a/test/unit/install/test_install.py +++ b/test/unit/install/test_install.py @@ -72,6 +72,11 @@ def allow_shutdown() -> bool: return False +@pytest.fixture +def telemetry_opt_out() -> bool: + return True + + @pytest.fixture def install_service() -> bool: return True @@ -88,6 +93,7 @@ def parsed_args( confirmed: bool, allow_shutdown: bool, install_service: bool, + telemetry_opt_out: bool, ) -> ParsedCommandLineArguments: parsed_args = ParsedCommandLineArguments() parsed_args.farm_id = farm_id @@ -99,6 +105,7 @@ def parsed_args( parsed_args.confirmed = confirmed parsed_args.allow_shutdown = allow_shutdown parsed_args.install_service = install_service + parsed_args.telemetry_opt_out = telemetry_opt_out return parsed_args @@ -131,8 +138,8 @@ def expected_cmd( parsed_args.region, "--user", parsed_args.user, - "--worker-agent-program", - os.path.join(sysconfig.get_path("scripts"), "deadline-worker-agent"), + "--scripts-path", + sysconfig.get_path("scripts"), ] if parsed_args.group is not None: expected_cmd.extend(("--group", parsed_args.group)) @@ -142,6 +149,8 @@ def expected_cmd( expected_cmd.append("--start") if parsed_args.allow_shutdown: expected_cmd.append("--allow-shutdown") + if parsed_args.telemetry_opt_out: + expected_cmd.append("--telemetry-opt-out") return expected_cmd @@ -218,6 +227,12 @@ def user(self, request: pytest.FixtureRequest) -> str: def allow_shutdown(self, request: pytest.FixtureRequest) -> bool: return request.param + @pytest.fixture( + params=(True, False), + ) + def telemetry_opt_out(self, request: pytest.FixtureRequest) -> bool: + return request.param + @pytest.fixture( params=( True, diff --git a/test/unit/startup/test_entrypoint.py b/test/unit/startup/test_entrypoint.py index 964c16bd..df7dc597 100644 --- a/test/unit/startup/test_entrypoint.py +++ b/test/unit/startup/test_entrypoint.py @@ -148,6 +148,12 @@ def block_rich_import() -> Generator[None, None, None]: yield +@pytest.fixture(autouse=True) +def block_telemetry_client() -> Generator[MagicMock, None, None]: + with patch.object(entrypoint_mod, "record_worker_start_event") as telem_mock: + yield telem_mock + + def test_calls_worker_run( mock_worker_run: MagicMock, ) -> None: