Skip to content

Commit

Permalink
Add process/cpu live metrics (Azure#34735)
Browse files Browse the repository at this point in the history
  • Loading branch information
lzchen authored Mar 13, 2024
1 parent c3c285e commit b3411ac
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 3 deletions.
2 changes: 2 additions & 0 deletions sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

- Add live metrics collection of requests/dependencies/exceptions
([#34673](https://github.com/Azure/azure-sdk-for-python/pull/34673))
- Add live metrics collection of cpu time/process memory
([#34735](https://github.com/Azure/azure-sdk-for-python/pull/34735))

### Breaking Changes

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@ class _DocumentIngressDocumentType(Enum):
Event = "Event"
Trace = "Trace"

# cSpell:disable
# cSpell:enable
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# cSpell:disable

from typing import Any, Iterable, Optional

import platform
from typing import Any, Optional
import psutil

from opentelemetry.metrics import CallbackOptions, Observation
from opentelemetry.sdk._logs import LogData
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.resources import Resource
Expand All @@ -13,10 +18,12 @@

from azure.monitor.opentelemetry.exporter._generated.models import ContextTagKeys
from azure.monitor.opentelemetry.exporter._quickpulse._constants import (
_COMMITTED_BYTES_NAME,
_DEPENDENCY_DURATION_NAME,
_DEPENDENCY_FAILURE_RATE_NAME,
_DEPENDENCY_RATE_NAME,
_EXCEPTION_RATE_NAME,
_PROCESSOR_TIME_NAME,
_REQUEST_DURATION_NAME,
_REQUEST_FAILURE_RATE_NAME,
_REQUEST_RATE_NAME,
Expand All @@ -43,6 +50,8 @@
)


PROCESS = psutil.Process()

def enable_live_metrics(**kwargs: Any) -> None:
"""Live metrics entry point.
Expand Down Expand Up @@ -113,6 +122,14 @@ def __init__(self, connection_string: Optional[str], resource: Optional[Resource
"exc/sec",
"live metrics exception rate per second"
)
self._process_memory_gauge = self._meter.create_observable_gauge(
_COMMITTED_BYTES_NAME[0],
[_get_process_memory],
)
self._processor_time_gauge = self._meter.create_observable_gauge(
_PROCESSOR_TIME_NAME[0],
[_get_processor_time],
)

def _record_span(self, span: ReadableSpan) -> None:
# Only record if in post state
Expand Down Expand Up @@ -150,3 +167,23 @@ def _record_log_record(self, log_data: LogData) -> None:
exc_message = log_record.attributes.get(SpanAttributes.EXCEPTION_MESSAGE)
if exc_type is not None or exc_message is not None:
self._exception_rate_counter.add(1)


# pylint: disable=unused-argument
def _get_process_memory(options: CallbackOptions) -> Iterable[Observation]:
# rss is non-swapped physical memory a process has used
yield Observation(
PROCESS.memory_info().rss,
{},
)


# pylint: disable=unused-argument
def _get_processor_time(options: CallbackOptions) -> Iterable[Observation]:
# Processor time does not include idle time
yield Observation(
100 - psutil.cpu_times_percent().idle,
{},
)

# cSpell:enable
4 changes: 4 additions & 0 deletions sdk/monitor/azure-monitor-opentelemetry-exporter/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# license information.
# --------------------------------------------------------------------------

# cSpell:disable

import os
import re
Expand Down Expand Up @@ -86,6 +87,7 @@
"msrest>=0.6.10",
"opentelemetry-api~=1.21",
"opentelemetry-sdk~=1.21",
"psutil>=5.9.8",
],
entry_points={
"opentelemetry_traces_exporter": [
Expand All @@ -102,3 +104,5 @@
]
}
)

# cSpell:enable
Original file line number Diff line number Diff line change
@@ -1,22 +1,44 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

# cSpell:disable

import collections
import platform
import unittest
from unittest import mock

from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics import (
Counter,
Histogram,
Meter,
MeterProvider,
ObservableGauge,
)
from opentelemetry.sdk.resources import Resource, ResourceAttributes
from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.trace import SpanKind

from azure.monitor.opentelemetry.exporter._generated.models import ContextTagKeys
from azure.monitor.opentelemetry.exporter._quickpulse._constants import (
_COMMITTED_BYTES_NAME,
_DEPENDENCY_DURATION_NAME,
_DEPENDENCY_FAILURE_RATE_NAME,
_DEPENDENCY_RATE_NAME,
_EXCEPTION_RATE_NAME,
_PROCESSOR_TIME_NAME,
_REQUEST_DURATION_NAME,
_REQUEST_FAILURE_RATE_NAME,
_REQUEST_RATE_NAME,
)
from azure.monitor.opentelemetry.exporter._quickpulse._exporter import (
_QuickpulseExporter,
_QuickpulseMetricReader,
)
from azure.monitor.opentelemetry.exporter._quickpulse._live_metrics import (
enable_live_metrics,
_get_process_memory,
_get_processor_time,
_QuickpulseManager,
)
from azure.monitor.opentelemetry.exporter._quickpulse._state import (
Expand Down Expand Up @@ -92,6 +114,28 @@ def test_init(self, generator_mock):
self.assertEqual(qpm._reader._base_monitoring_data_point, qpm._base_monitoring_data_point)
self.assertTrue(isinstance(qpm._meter_provider, MeterProvider))
self.assertEqual(qpm._meter_provider._sdk_config.metric_readers, [qpm._reader])
self.assertTrue(isinstance(qpm._meter, Meter))
self.assertEqual(qpm._meter.name, "azure_monitor_live_metrics")
self.assertTrue(isinstance(qpm._request_duration, Histogram))
self.assertEqual(qpm._request_duration.name, _REQUEST_DURATION_NAME[0])
self.assertTrue(isinstance(qpm._dependency_duration, Histogram))
self.assertEqual(qpm._dependency_duration.name, _DEPENDENCY_DURATION_NAME[0])
self.assertTrue(isinstance(qpm._request_rate_counter, Counter))
self.assertEqual(qpm._request_rate_counter.name, _REQUEST_RATE_NAME[0])
self.assertTrue(isinstance(qpm._request_failed_rate_counter, Counter))
self.assertEqual(qpm._request_failed_rate_counter.name, _REQUEST_FAILURE_RATE_NAME[0])
self.assertTrue(isinstance(qpm._dependency_rate_counter, Counter))
self.assertEqual(qpm._dependency_rate_counter.name, _DEPENDENCY_RATE_NAME[0])
self.assertTrue(isinstance(qpm._dependency_failure_rate_counter, Counter))
self.assertEqual(qpm._dependency_failure_rate_counter.name, _DEPENDENCY_FAILURE_RATE_NAME[0])
self.assertTrue(isinstance(qpm._exception_rate_counter, Counter))
self.assertEqual(qpm._exception_rate_counter.name, _EXCEPTION_RATE_NAME[0])
self.assertTrue(isinstance(qpm._process_memory_gauge, ObservableGauge))
self.assertEqual(qpm._process_memory_gauge.name, _COMMITTED_BYTES_NAME[0])
self.assertEqual(qpm._process_memory_gauge._callbacks, [_get_process_memory])
self.assertTrue(isinstance(qpm._processor_time_gauge, ObservableGauge))
self.assertEqual(qpm._processor_time_gauge.name, _PROCESSOR_TIME_NAME[0])
self.assertEqual(qpm._processor_time_gauge._callbacks, [_get_processor_time])


def test_singleton(self):
Expand Down Expand Up @@ -247,3 +291,23 @@ def test_record_log_exception(self, post_state_mock, log_doc_mock, append_doc_mo
qpm._record_log_record(log_data_mock)
append_doc_mock.assert_called_once_with(log_record_doc)
qpm._exception_rate_counter.add.assert_called_once_with(1)

def test_process_memory(self):
with mock.patch("azure.monitor.opentelemetry.exporter._quickpulse._live_metrics.PROCESS") as process_mock:
memory = collections.namedtuple('memory', 'rss')
pmem = memory(rss=40)
process_mock.memory_info.return_value = pmem
mem = _get_process_memory(None)
obs = next(mem)
self.assertEqual(obs.value, 40)

@mock.patch("psutil.cpu_times_percent")
def test_processor_time(self, processor_mock):
cpu = collections.namedtuple('cpu', 'idle')
cpu_times = cpu(idle=94.5)
processor_mock.return_value = cpu_times
time = _get_processor_time(None)
obs = next(time)
self.assertEqual(obs.value, 5.5)

# cSpell:enable
1 change: 1 addition & 0 deletions shared_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ azureml-telemetry
cryptography
msrestazure
requests
psutil
opencensus
opencensus-ext-azure
opencensus-ext-threading
Expand Down

0 comments on commit b3411ac

Please sign in to comment.