Skip to content

Commit

Permalink
Merge branch 'master' into zhengda.lu/win
Browse files Browse the repository at this point in the history
  • Loading branch information
lu-zhengda authored Oct 22, 2024
2 parents bb91b6c + b84e4d8 commit 27d2079
Show file tree
Hide file tree
Showing 31 changed files with 2,345 additions and 1,286 deletions.
8 changes: 8 additions & 0 deletions cloudera/assets/configuration/spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ files:
value:
example: <KEY_FILE_PATH>
type: string
- name: pools_size
description: |
Number of connection pools to cache before discarding the least recently used pool.
hidden: true
value:
default: 4
type: integer
example: 10
- name: max_parallel_requests
description: |
The maximum number of requests to Cloudera Manager that are allowed in parallel.
Expand Down
1 change: 1 addition & 0 deletions cloudera/changelog.d/18886.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[cloudera] Configure the number of connection pools to cache
1 change: 1 addition & 0 deletions cloudera/datadog_checks/cloudera/api/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def make_api(check) -> Api:
'api_url': check.config.api_url,
'workload_username': check.shared_config.workload_username,
'workload_password': check.shared_config.workload_password,
'pools_size': check.config.pools_size,
'max_parallel_requests': check.config.max_parallel_requests,
'verify_ssl': check.config.verify_ssl,
'ssl_ca_cert': check.config.ssl_ca_cert,
Expand Down
4 changes: 3 additions & 1 deletion cloudera/datadog_checks/cloudera/client/cm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ def __init__(self, log, **kwargs):
cm_client.configuration.ssl_ca_cert = kwargs.get('ssl_ca_cert')
cm_client.configuration.cert_file = kwargs.get('cert_file')
cm_client.configuration.key_file = kwargs.get('key_file')
self._client.rest_client = RESTClientObject(maxsize=kwargs.get('max_parallel_requests'))
self._client.rest_client = RESTClientObject(
pools_size=kwargs.get('pools_size'), maxsize=kwargs.get('max_parallel_requests')
)

def get_version(self) -> Version:
self._log.debug('getting version from cloudera')
Expand Down
4 changes: 4 additions & 0 deletions cloudera/datadog_checks/cloudera/config_models/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,9 @@ def instance_min_collection_interval():
return 15


def instance_pools_size():
return 4


def instance_verify_ssl():
return True
1 change: 1 addition & 0 deletions cloudera/datadog_checks/cloudera/config_models/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class InstanceConfig(BaseModel):
max_parallel_requests: Optional[int] = None
metric_patterns: Optional[MetricPatterns] = None
min_collection_interval: Optional[float] = None
pools_size: Optional[int] = None
service: Optional[str] = None
ssl_ca_cert: Optional[str] = None
tags: Optional[tuple[str, ...]] = None
Expand Down
1 change: 1 addition & 0 deletions cloudera/tests/test_unit_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def test_client_ssl(dd_run_check, cloudera_check, cloudera_cm_client):
api_url='http://localhost:8080/api/v48/',
workload_username='~',
workload_password='~',
pools_size=4,
max_parallel_requests=4,
verify_ssl=True,
ssl_ca_cert='ssl_ca_cert_path',
Expand Down
20 changes: 20 additions & 0 deletions sqlserver/changelog.d/18883.added
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Migrate following dynamic metrics to database_metrics for better maintainability and testability.
- SQLServer AlwaysOn metrics
- SQLServer FCI metrics
- SQLServer file stats metrics
- SQLServer primary log shipping metrics
- SQLServer secondary log shipping metrics
- SQLServer server state metrics
- SQLServer tempdb file space usage metrics
- SQLServer index usage metrics
- SQLServer database index fragmentation metrics
- SQLServer os tasks metrics
- SQLServer master files metrics
- SQLServer database files metrics
- SQLServer database stats metrics
- SQLServer database backup metrics
- SQLServer os schedulers metrics
- SQLServer database replication stats metrics
- SQLServer availability replicas metrics
- SQLServer availability groups metrics
Increase database backup metrics and index fragmentation metrics collection interval to 5 minutes.
17 changes: 16 additions & 1 deletion sqlserver/datadog_checks/sqlserver/database_metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
# (C) Datadog, Inc. 2024-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
from .ao_metrics import SqlserverAoMetrics
from .availability_groups_metrics import SqlserverAvailabilityGroupsMetrics
from .availability_replicas_metrics import SqlserverAvailabilityReplicasMetrics
from .database_agent_metrics import SqlserverAgentMetrics
from .database_backup_metrics import SqlserverDatabaseBackupMetrics
from .database_files_metrics import SqlserverDatabaseFilesMetrics
from .database_replication_stats_metrics import SqlserverDatabaseReplicationStatsMetrics
from .database_stats_metrics import SqlserverDatabaseStatsMetrics
from .db_fragmentation_metrics import SqlserverDBFragmentationMetrics
from .fci_metrics import SqlserverFciMetrics
from .file_stats_metrics import SqlserverFileStatsMetrics
from .index_usage_metrics import SqlserverIndexUsageMetrics
from .database_agent_metrics import SqlserverAgentMetrics
from .master_files_metrics import SqlserverMasterFilesMetrics
from .os_schedulers_metrics import SqlserverOsSchedulersMetrics
from .os_tasks_metrics import SqlserverOsTasksMetrics
from .primary_log_shipping_metrics import SqlserverPrimaryLogShippingMetrics
from .secondary_log_shipping_metrics import SqlserverSecondaryLogShippingMetrics
from .server_state_metrics import SqlserverServerStateMetrics
from .tempdb_file_space_usage_metrics import SqlserverTempDBFileSpaceUsageMetrics
from .xe_session_metrics import SQLServerXESessionMetrics
221 changes: 221 additions & 0 deletions sqlserver/datadog_checks/sqlserver/database_metrics/ao_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
# (C) Datadog, Inc. 2024-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)

from typing import List

from datadog_checks.base.config import is_affirmative
from datadog_checks.sqlserver.utils import is_azure_database

from .base import SqlserverDatabaseMetricsBase

QUERY_AO_FAILOVER_CLUSTER = {
"name": "sys.dm_hadr_cluster",
"query": """
SELECT
LOWER(quorum_type_desc) AS quorum_type_desc,
LOWER(quorum_state_desc) AS quorum_state_desc,
cluster_name,
1,
1
FROM sys.dm_hadr_cluster
""".strip(),
"columns": [
{"name": "quorum_type", "type": "tag"},
{"name": "quorum_state", "type": "tag"},
{"name": "failover_cluster", "type": "tag"},
{"name": "ao.quorum_type", "type": "gauge"},
{"name": "ao.quorum_state", "type": "gauge"},
],
}

# sys.dm_hadr_cluster does not have a related column to join on, this cross join will add the
# cluster_name column to every row by multiplying all the rows in the left table against
# all the rows in the right table. Note, there will only be one row from sys.dm_hadr_cluster.
QUERY_AO_FAILOVER_CLUSTER_MEMBER = {
"name": "sys.dm_hadr_cluster_members",
"query": """
SELECT
member_name,
LOWER(member_type_desc) AS member_type_desc,
LOWER(member_state_desc) AS member_state_desc,
FC.cluster_name,
1,
1,
number_of_quorum_votes
FROM sys.dm_hadr_cluster_members
CROSS JOIN (SELECT TOP 1 cluster_name FROM sys.dm_hadr_cluster) AS FC
""".strip(),
"columns": [
{"name": "member_name", "type": "tag"},
{"name": "member_type", "type": "tag"},
{"name": "member_state", "type": "tag"},
{"name": "failover_cluster", "type": "tag"},
{"name": "ao.member.type", "type": "gauge"},
{"name": "ao.member.state", "type": "gauge"},
{"name": "ao.member.number_of_quorum_votes", "type": "gauge"},
],
}


class SqlserverAoMetrics(SqlserverDatabaseMetricsBase):
@property
def include_ao_metrics(self) -> bool:
return is_affirmative(self.instance_config.get('include_ao_metrics', False))

@property
def enabled(self) -> bool:
if not self.include_ao_metrics:
return False
if not self.major_version and not is_azure_database(self.engine_edition):
return False
if self.major_version > 2012 or is_azure_database(self.engine_edition):
return True
return False

@property
def queries(self) -> List[dict]:
return [
self.__get_query_ao_availability_groups(),
QUERY_AO_FAILOVER_CLUSTER,
QUERY_AO_FAILOVER_CLUSTER_MEMBER,
]

def __repr__(self) -> str:
return (
f"{self.__class__.__name__}("
f"enabled={self.enabled}, "
f"major_version={self.major_version}, "
f"engine_edition={self.engine_edition}, "
f"include_ao_metrics={self.include_ao_metrics})"
)

def __get_query_ao_availability_groups(self) -> dict:
"""
Construct the sys.availability_groups QueryExecutor configuration based on the SQL Server major version
:params sqlserver_major_version: SQL Server major version (i.e. 2012, 2019, ...)
:return: a QueryExecutor query config object
"""
column_definitions_tags = {
# AG - sys.availability_groups
"AG.group_id AS availability_group": {
"name": "availability_group",
"type": "tag",
},
"AG.name AS availability_group_name": {
"name": "availability_group_name",
"type": "tag",
},
# AR - sys.availability_replicas
"AR.replica_server_name": {"name": "replica_server_name", "type": "tag"},
"LOWER(AR.failover_mode_desc) AS failover_mode_desc": {
"name": "failover_mode",
"type": "tag",
},
"LOWER(AR.availability_mode_desc) AS availability_mode_desc": {
"name": "availability_mode",
"type": "tag",
},
# ADC - sys.availability_databases_cluster
"ADC.database_name": {"name": "database_name", "type": "tag"},
# DRS - sys.dm_hadr_database_replica_states
"DRS.replica_id": {"name": "replica_id", "type": "tag"},
"DRS.database_id": {"name": "database_id", "type": "tag"},
"LOWER(DRS.database_state_desc) AS database_state_desc": {
"name": "database_state",
"type": "tag",
},
"LOWER(DRS.synchronization_state_desc) AS synchronization_state_desc": {
"name": "synchronization_state",
"type": "tag",
},
# FC - sys.dm_hadr_cluster
"FC.cluster_name": {
"name": "failover_cluster",
"type": "tag",
},
}
column_definitions_metrics = {
"(DRS.log_send_queue_size * 1024) AS log_send_queue_size": {
"name": "ao.log_send_queue_size",
"type": "gauge",
},
"(DRS.log_send_rate * 1024) AS log_send_rate": {
"name": "ao.log_send_rate",
"type": "gauge",
},
"(DRS.redo_queue_size * 1024) AS redo_queue_size": {
"name": "ao.redo_queue_size",
"type": "gauge",
},
"(DRS.redo_rate * 1024) AS redo_rate": {
"name": "ao.redo_rate",
"type": "gauge",
},
"DRS.low_water_mark_for_ghosts": {
"name": "ao.low_water_mark_for_ghosts",
"type": "gauge",
},
"(DRS.filestream_send_rate * 1024) AS filestream_send_rate": {
"name": "ao.filestream_send_rate",
"type": "gauge",
},
# Other
"1 AS replica_sync_topology_indicator": {
"name": "ao.replica_status",
"type": "gauge",
},
}

# Include metrics based on version
if self.major_version >= 2016:
column_definitions_metrics["DRS.secondary_lag_seconds"] = {
"name": "ao.secondary_lag_seconds",
"type": "gauge",
}
if self.major_version >= 2014:
column_definitions_metrics["DRS.is_primary_replica"] = {
"name": "ao.is_primary_replica",
"type": "gauge",
}
column_definitions_tags[
"""
CASE
WHEN DRS.is_primary_replica = 1 THEN 'primary'
WHEN DRS.is_primary_replica = 0 THEN 'secondary'
END AS replica_role_desc
"""
] = {"name": "replica_role", "type": "tag"}

# Sort columns to ensure a static column order
sql_columns = []
metric_columns = []
for column in sorted(column_definitions_tags.keys()):
sql_columns.append(column)
metric_columns.append(column_definitions_tags[column])
for column in sorted(column_definitions_metrics.keys()):
sql_columns.append(column)
metric_columns.append(column_definitions_metrics[column])

return {
"name": "sys.availability_groups",
"query": """
SELECT
{sql_columns}
FROM
sys.availability_groups AS AG
INNER JOIN sys.availability_replicas AS AR ON AG.group_id = AR.group_id
INNER JOIN sys.availability_databases_cluster AS ADC ON AG.group_id = ADC.group_id
INNER JOIN sys.dm_hadr_database_replica_states AS DRS ON AG.group_id = DRS.group_id
AND ADC.group_database_id = DRS.group_database_id
AND AR.replica_id = DRS.replica_id
-- `sys.dm_hadr_cluster` does not have a related column to join on, this cross join will add the
-- `cluster_name` column to every row by multiplying all the rows in the left table against
-- all the rows in the right table. Note, there will only be one row from `sys.dm_hadr_cluster`.
CROSS JOIN (SELECT TOP 1 cluster_name FROM sys.dm_hadr_cluster) AS FC
""".strip().format(
sql_columns=", ".join(sql_columns),
),
"columns": metric_columns,
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# (C) Datadog, Inc. 2024-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)

from datadog_checks.base.config import is_affirmative

from .base import SqlserverDatabaseMetricsBase

AVAILABILITY_GROUPS_METRICS_QUERY = {
"name": "sys.dm_hadr_availability_group_states",
"query": """SELECT
resource_group_id,
name,
synchronization_health_desc,
synchronization_health,
primary_recovery_health,
secondary_recovery_health
from sys.dm_hadr_availability_group_states as dhdrcs
inner join sys.availability_groups as ag
on ag.group_id = dhdrcs.group_id
""".strip(),
"columns": [
{"name": "availability_group", "type": "tag"},
{"name": "availability_group_name", "type": "tag"},
{"name": "synchronization_health_desc", "type": "tag"},
{"name": "ao.ag_sync_health", "type": "gauge"},
{"name": "ao.primary_replica_health", "type": "gauge"},
{"name": "ao.secondary_replica_health", "type": "gauge"},
],
}


class SqlserverAvailabilityGroupsMetrics(SqlserverDatabaseMetricsBase):
# sys.dm_hadr_availability_group_states
# Returns a row for each Always On availability group that possesses an availability replica on the local instance
# of SQL Server. Each row displays the states that define the health of a given availability group.
#
# https://docs.microsoft.com/en-us/sql/relational-databases/system-dynamic-management-views/sys-dm-hadr-availability-group-states-transact-sql?view=sql-server-ver15
@property
def include_ao_metrics(self) -> bool:
return is_affirmative(self.instance_config.get('include_ao_metrics', False))

@property
def availability_group(self):
return self.instance_config.get('availability_group')

@property
def enabled(self):
if not self.include_ao_metrics:
return False
return True

@property
def queries(self):
query = AVAILABILITY_GROUPS_METRICS_QUERY.copy()
if self.availability_group:
query['query'] += f" where resource_group_id = '{self.availability_group}'"
return [query]

def __repr__(self) -> str:
return (
f"{self.__class__.__name__}("
f"enabled={self.enabled}, "
f"include_ao_metrics={self.include_ao_metrics}, "
f"availability_group={self.availability_group})"
)
Loading

0 comments on commit 27d2079

Please sign in to comment.