From 499a26e03a3a0d2ffba6dbc7df92770532c1d4d2 Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Wed, 18 Oct 2023 11:23:48 -0700 Subject: [PATCH] Revert "add and emit pool owner metadata for alerting (#327)" This reverts commit 2595b5e6cba926421ee71846bf099e1d515c77f5. --- .../local-dev/default.kubernetes | 1 - .../local-dev/default.mesos | 1 - clusterman/autoscaler/autoscaler.py | 18 +++++++----------- clusterman/autoscaler/pool_manager.py | 1 - clusterman/simulator/simulated_pool_manager.py | 1 - examples/schemas/pool.json | 3 +-- itests/environment.py | 2 -- tests/autoscaler/autoscaler_test.py | 18 ++++-------------- tests/conftest.py | 2 -- 9 files changed, 12 insertions(+), 35 deletions(-) diff --git a/acceptance/srv-configs/clusterman-clusters/local-dev/default.kubernetes b/acceptance/srv-configs/clusterman-clusters/local-dev/default.kubernetes index f0a8f9792..11859c701 100644 --- a/acceptance/srv-configs/clusterman-clusters/local-dev/default.kubernetes +++ b/acceptance/srv-configs/clusterman-clusters/local-dev/default.kubernetes @@ -21,4 +21,3 @@ autoscaling: instance_loss_threshold: 3 alert_on_max_capacity: false -pool_owner: compute_infra diff --git a/acceptance/srv-configs/clusterman-clusters/local-dev/default.mesos b/acceptance/srv-configs/clusterman-clusters/local-dev/default.mesos index 6bb9f05a7..4fd9aec12 100644 --- a/acceptance/srv-configs/clusterman-clusters/local-dev/default.mesos +++ b/acceptance/srv-configs/clusterman-clusters/local-dev/default.mesos @@ -29,4 +29,3 @@ autoscale_signal: minute_range: 10 alert_on_max_capacity: false -pool_owner: compute_infra diff --git a/clusterman/autoscaler/autoscaler.py b/clusterman/autoscaler/autoscaler.py index 6eccbe58f..0424c7cba 100644 --- a/clusterman/autoscaler/autoscaler.py +++ b/clusterman/autoscaler/autoscaler.py @@ -178,9 +178,12 @@ def run(self, dry_run: bool = False, timestamp: Optional[arrow.Arrow] = None) -> else: capacity_offset = get_capacity_offset(self.cluster, self.pool, self.scheduler, timestamp) new_target_capacity = self._compute_target_capacity(resource_request) + capacity_offset - self.target_capacity_gauge.set(new_target_capacity, self.add_metric_labels(dry_run)) - self.max_capacity_gauge.set(self.pool_manager.max_capacity, self.add_metric_labels(dry_run)) - self.setpoint_gauge.set(self.autoscaling_config.setpoint, self.add_metric_labels(dry_run)) + self.target_capacity_gauge.set(new_target_capacity, {"dry_run": dry_run}) + self.max_capacity_gauge.set( + self.pool_manager.max_capacity, + {"dry_run": dry_run, "alert_on_max_capacity": self.pool_manager.alert_on_max_capacity}, + ) + self.setpoint_gauge.set(self.autoscaling_config.setpoint, {"dry_run": dry_run}) self._emit_requested_resource_metrics(resource_request, dry_run=dry_run) try: @@ -199,14 +202,7 @@ def run(self, dry_run: bool = False, timestamp: Optional[arrow.Arrow] = None) -> def _emit_requested_resource_metrics(self, resource_request: SignalResourceRequest, dry_run: bool) -> None: for resource_type, resource_gauge in self.resource_request_gauges.items(): if getattr(resource_request, resource_type) is not None: - resource_gauge.set(getattr(resource_request, resource_type), self.add_metric_labels(dry_run)) - - def add_metric_labels(self, dry_run): - return { - "dry_run": dry_run, - "alert_on_max_capacity": self.pool_manager.alert_on_max_capacity, - "team": self.pool_manager.pool_owner, - } + resource_gauge.set(getattr(resource_request, resource_type), {"dry_run": dry_run}) def _get_signal_for_app(self, app: str) -> Signal: """Load the signal object to use for autoscaling for a particular app diff --git a/clusterman/autoscaler/pool_manager.py b/clusterman/autoscaler/pool_manager.py index c06226c2c..6ca514dc3 100644 --- a/clusterman/autoscaler/pool_manager.py +++ b/clusterman/autoscaler/pool_manager.py @@ -86,7 +86,6 @@ def __init__( "autoscaling.killable_nodes_prioritizing_v2", default=False ) self.alert_on_max_capacity = self.pool_config.read_bool("alert_on_max_capacity", default=True) - self.pool_owner = self.pool_config.read_string("pool_owner", default="compute_infra") monitoring_info = {"cluster": cluster, "pool": pool} self.killable_nodes_counter = get_monitoring_client().create_counter(SFX_KILLABLE_NODES_COUNT, monitoring_info) diff --git a/clusterman/simulator/simulated_pool_manager.py b/clusterman/simulator/simulated_pool_manager.py index 05446c098..d796f8387 100644 --- a/clusterman/simulator/simulated_pool_manager.py +++ b/clusterman/simulator/simulated_pool_manager.py @@ -59,7 +59,6 @@ def __init__( MAX_MIN_NODE_SCALEIN_UPTIME_SECONDS, ) self.alert_on_max_capacity = self.pool_config.read_bool("alert_on_max_capacity", default=True) - self.pool_owner = self.pool_config.read_string("pool_owner", default="compute_infra") self.killable_nodes_prioritizing_v2 = self.pool_config.read_bool( "autoscaling.killable_nodes_prioritizing_v2", default=False ) diff --git a/examples/schemas/pool.json b/examples/schemas/pool.json index d8fb9a6c7..f4fd7fbf5 100644 --- a/examples/schemas/pool.json +++ b/examples/schemas/pool.json @@ -64,8 +64,7 @@ "additionalProperties": false }, "sensu_config": {"$ref": "definitions.json#sensu_config"}, - "alert_on_max_capacity": {"type": "boolean"}, - "pool_owner": {"type": "string"} + "alert_on_max_capacity": {"type": "boolean"} }, "additionalProperties": false } diff --git a/itests/environment.py b/itests/environment.py index a65546b34..eabe17174 100644 --- a/itests/environment.py +++ b/itests/environment.py @@ -121,7 +121,6 @@ def setup_configurations(context): ], }, "alert_on_max_capacity": True, - "pool_owner": "compute_infra", } kube_pool_config = { "resource_groups": [ @@ -145,7 +144,6 @@ def setup_configurations(context): "period_minutes": 7, }, "alert_on_max_capacity": True, - "pool_owner": "compute_infra", } with staticconf.testing.MockConfiguration( boto_config, namespace=CREDENTIALS_NAMESPACE diff --git a/tests/autoscaler/autoscaler_test.py b/tests/autoscaler/autoscaler_test.py index 335c13e92..c4555a152 100644 --- a/tests/autoscaler/autoscaler_test.py +++ b/tests/autoscaler/autoscaler_test.py @@ -49,7 +49,6 @@ def pool_configs(): "max_weight_to_remove": 10, }, "alert_on_max_capacity": True, - "pool_owner": "compute_infra", }, namespace=POOL_NAMESPACE.format(pool="bar", scheduler="mesos"), ): @@ -92,10 +91,6 @@ def mock_autoscaler(): "alert_on_max_capacity", namespace=POOL_NAMESPACE.format(pool="bar", scheduler="mesos"), ) - mock_autoscaler.pool_manager.pool_owner = staticconf.read_string( - "pool_owner", - namespace=POOL_NAMESPACE.format(pool="bar", scheduler="mesos"), - ) mock_autoscaler.pool_manager.non_orphan_fulfilled_capacity = 0 mock_autoscaler.target_capacity_gauge = mock.Mock(spec=GaugeProtocol) @@ -163,22 +158,17 @@ def test_autoscaler_run(dry_run, mock_autoscaler, run_timestamp): ), pytest.raises(ValueError): mock_autoscaler.run(dry_run=dry_run, timestamp=run_timestamp) - assert mock_autoscaler.target_capacity_gauge.set.call_args == mock.call( - 100, {"dry_run": dry_run, "alert_on_max_capacity": True, "team": "compute_infra"} - ) + assert mock_autoscaler.target_capacity_gauge.set.call_args == mock.call(100, {"dry_run": dry_run}) assert mock_autoscaler.max_capacity_gauge.set.call_args == mock.call( - mock_autoscaler.pool_manager.max_capacity, - {"dry_run": dry_run, "alert_on_max_capacity": True, "team": "compute_infra"}, - ) - assert mock_autoscaler.setpoint_gauge.set.call_args == mock.call( - 0.7, {"dry_run": dry_run, "alert_on_max_capacity": True, "team": "compute_infra"} + mock_autoscaler.pool_manager.max_capacity, {"dry_run": dry_run, "alert_on_max_capacity": True} ) + assert mock_autoscaler.setpoint_gauge.set.call_args == mock.call(0.7, {"dry_run": dry_run}) assert mock_autoscaler._compute_target_capacity.call_args == mock.call(resource_request) assert mock_autoscaler.pool_manager.modify_target_capacity.call_count == 1 assert mock_autoscaler.resource_request_gauges["cpus"].set.call_args == mock.call( resource_request.cpus, - {"dry_run": dry_run, "alert_on_max_capacity": True, "team": "compute_infra"}, + {"dry_run": dry_run}, ) assert mock_autoscaler.resource_request_gauges["mem"].set.call_count == 0 assert mock_autoscaler.resource_request_gauges["disk"].set.call_count == 0 diff --git a/tests/conftest.py b/tests/conftest.py index 9651b2516..55f5606f1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -145,7 +145,6 @@ def clusterman_pool_config(): ], }, "alert_on_max_capacity": True, - "pool_owner": "compute_infra", } with staticconf.testing.MockConfiguration(config, namespace="bar.mesos_config"): yield @@ -203,7 +202,6 @@ def clusterman_k8s_pool_config(): "disable_autoscaling": False, }, "alert_on_max_capacity": False, - "pool_owner": "foo", } with staticconf.testing.MockConfiguration(config, namespace="bar.kubernetes_config"): yield