opentelemetry-sdk: speed up exemplars a bit (open-telemetry#4260)

* opentelemetry-sdk: speed up exemplars a bit Use a sparse dict to allocate ExemplarsBucket on demand instead of preallocating all of them in FixedSizeExemplarReservoirABC. Make the following return around 2X more loops for both trace_based and always_off exemplars filter: .tox/benchmark-opentelemetry-sdk/bin/pytest opentelemetry-sdk/benchmarks/metrics/ -k 'test_histogram_record_1000[7]' * Add benchmarks for steady state * Ignore benchmarks dirs for exported public symbols * Add changelog
fidelity-contributions · Dec 2, 2024 · 72fad82 · 72fad82
1 parent db4ef06
commit 72fad82
Show file tree

Hide file tree

Showing 4 changed files with 138 additions and 12 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,6 +23,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   ([#4324](https://github.com/open-telemetry/opentelemetry-python/pull/4324))
 - Remove `TestBase.assertEqualSpanInstrumentationInfo` method, use `assertEqualSpanInstrumentationScope` instead
   ([#4310](https://github.com/open-telemetry/opentelemetry-python/pull/4310))
+- sdk: instantiate lazily `ExemplarBucket`s in `ExemplarReservoir`s
+  ([#4260](https://github.com/open-telemetry/opentelemetry-python/pull/4260))
 
 ## Version 1.28.0/0.49b0 (2024-11-05)
 

diff --git a/opentelemetry-sdk/benchmarks/metrics/test_benchmark_metrics_histogram_steady.py b/opentelemetry-sdk/benchmarks/metrics/test_benchmark_metrics_histogram_steady.py
@@ -0,0 +1,111 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# pylint: disable=invalid-name
+import itertools
+
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import InMemoryMetricReader
+from opentelemetry.sdk.metrics.view import (
+    ExplicitBucketHistogramAggregation,
+    View,
+)
+
+MAX_BOUND_VALUE = 10000
+
+
+def _generate_bounds(bound_count):
+    bounds = []
+    for i in range(bound_count):
+        bounds.append(i * MAX_BOUND_VALUE / bound_count)
+    return bounds
+
+
+hist_view_10 = View(
+    instrument_name="test_histogram_10_bound",
+    aggregation=ExplicitBucketHistogramAggregation(_generate_bounds(10)),
+)
+hist_view_49 = View(
+    instrument_name="test_histogram_49_bound",
+    aggregation=ExplicitBucketHistogramAggregation(_generate_bounds(49)),
+)
+hist_view_50 = View(
+    instrument_name="test_histogram_50_bound",
+    aggregation=ExplicitBucketHistogramAggregation(_generate_bounds(50)),
+)
+hist_view_1000 = View(
+    instrument_name="test_histogram_1000_bound",
+    aggregation=ExplicitBucketHistogramAggregation(_generate_bounds(1000)),
+)
+reader = InMemoryMetricReader()
+provider = MeterProvider(
+    metric_readers=[reader],
+    views=[
+        hist_view_10,
+        hist_view_49,
+        hist_view_50,
+        hist_view_1000,
+    ],
+)
+meter = provider.get_meter("sdk_meter_provider")
+hist = meter.create_histogram("test_histogram_default")
+hist10 = meter.create_histogram("test_histogram_10_bound")
+hist49 = meter.create_histogram("test_histogram_49_bound")
+hist50 = meter.create_histogram("test_histogram_50_bound")
+hist1000 = meter.create_histogram("test_histogram_1000_bound")
+
+
+def test_histogram_record(benchmark):
+    values = itertools.cycle(_generate_bounds(10))
+
+    def benchmark_histogram_record():
+        hist.record(next(values))
+
+    benchmark(benchmark_histogram_record)
+
+
+def test_histogram_record_10(benchmark):
+    values = itertools.cycle(_generate_bounds(10))
+
+    def benchmark_histogram_record_10():
+        hist10.record(next(values))
+
+    benchmark(benchmark_histogram_record_10)
+
+
+def test_histogram_record_49(benchmark):
+    values = itertools.cycle(_generate_bounds(49))
+
+    def benchmark_histogram_record_49():
+        hist49.record(next(values))
+
+    benchmark(benchmark_histogram_record_49)
+
+
+def test_histogram_record_50(benchmark):
+    values = itertools.cycle(_generate_bounds(50))
+
+    def benchmark_histogram_record_50():
+        hist50.record(next(values))
+
+    benchmark(benchmark_histogram_record_50)
+
+
+def test_histogram_record_1000(benchmark):
+    values = itertools.cycle(_generate_bounds(1000))
+
+    def benchmark_histogram_record_1000():
+        hist1000.record(next(values))
+
+    benchmark(benchmark_histogram_record_1000)
diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exemplar/exemplar_reservoir.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exemplar/exemplar_reservoir.py
@@ -13,8 +13,18 @@
 # limitations under the License.
 
 from abc import ABC, abstractmethod
+from collections import defaultdict
 from random import randrange
-from typing import Any, Callable, Dict, List, Optional, Sequence, Union
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Mapping,
+    Optional,
+    Sequence,
+    Union,
+)
 
 from opentelemetry import trace
 from opentelemetry.context import Context
@@ -155,9 +165,9 @@ class FixedSizeExemplarReservoirABC(ExemplarReservoir):
     def __init__(self, size: int, **kwargs) -> None:
         super().__init__(**kwargs)
         self._size: int = size
-        self._reservoir_storage: List[ExemplarBucket] = [
-            ExemplarBucket() for _ in range(self._size)
-        ]
+        self._reservoir_storage: Mapping[int, ExemplarBucket] = defaultdict(
+            ExemplarBucket
+        )
 
     def collect(self, point_attributes: Attributes) -> List[Exemplar]:
         """Returns accumulated Exemplars and also resets the reservoir for the next
@@ -171,15 +181,16 @@ def collect(self, point_attributes: Attributes) -> List[Exemplar]:
             exemplars contain the attributes that were filtered out by the aggregator,
             but recorded alongside the original measurement.
         """
-        exemplars = filter(
-            lambda e: e is not None,
-            map(
-                lambda bucket: bucket.collect(point_attributes),
-                self._reservoir_storage,
-            ),
-        )
+        exemplars = [
+            e
+            for e in (
+                bucket.collect(point_attributes)
+                for _, bucket in sorted(self._reservoir_storage.items())
+            )
+            if e is not None
+        ]
         self._reset()
-        return [*exemplars]
+        return exemplars
 
     def offer(
         self,

diff --git a/scripts/public_symbols_checker.py b/scripts/public_symbols_checker.py
@@ -55,6 +55,8 @@ def get_symbols(change_type, diff_lines_getter, prefix):
                 and part[1] != "_"
                 # tests directories
                 or part == "tests"
+                # benchmarks directories
+                or part == "benchmarks"
                 for part in b_file_path_obj.parts
             )
         ):