matrix-org · babolivier · Sep 5, 2022 · Aug 30, 2022 · Aug 30, 2022 · Aug 30, 2022
@@ -0,0 +1 @@
+Introduce a `SharedUsageMetrics` class to share some usage metrics between the Prometheus exporter and the phone home stats.
@@ -478,9 +478,10 @@ def run_sighup(*args: Any, **kwargs: Any) -> None:
     setup_sentry(hs)
     setup_sdnotify(hs)
 
-    # If background tasks are running on the main process, start collecting the
-    # phone home stats.
+    # If background tasks are running on the main process or this is the worker in
+    # charge of them, start collecting the phone home stats and shared usage metrics.
     if hs.config.worker.run_background_tasks:
+        await hs.get_shared_usage_metrics().setup()
         start_phone_stats_home(hs)
 
     # We now freeze all allocated objects in the hopes that (almost)

@@ -20,6 +20,7 @@
 from prometheus_client import Gauge
 
 from synapse.metrics.background_process_metrics import wrap_as_background_process
+from synapse.metrics.shared_usage_metrics import SharedUsageMetrics
 from synapse.types import JsonDict
 
 if TYPE_CHECKING:
@@ -49,8 +50,21 @@
 async def phone_stats_home(
     hs: "HomeServer",
     stats: JsonDict,
+    shared_metrics: SharedUsageMetrics,
     stats_process: List[Tuple[int, "resource.struct_rusage"]] = _stats_process,
 ) -> None:
+    """Collect usage statistics and send them to the configured endpoint.
+
+    Args:
+        hs: the HomeServer object to use for gathering usage data.
+        stats: the dict in which to store the statistics sent to the configured
+            endpoint. Mostly used in tests to figure out the data that is supposed to
+            be sent.
+        shared_metrics: metrics shared between the Prometheus exporter and the phone
+            home stats, populated outside of this function.
+        stats_process: statistics about resource usage of the process.
+    """
+
     logger.info("Gathering stats for reporting")
     now = int(hs.get_clock().time())
     # Ensure the homeserver has started.
@@ -104,7 +118,7 @@ async def phone_stats_home(
     room_count = await store.get_room_count()
     stats["total_room_count"] = room_count
 
-    stats["daily_active_users"] = await store.count_daily_users()
+    stats["daily_active_users"] = shared_metrics.daily_active_users
     stats["monthly_active_users"] = await store.count_monthly_users()
     daily_active_e2ee_rooms = await store.count_daily_active_e2ee_rooms()
     stats["daily_active_e2ee_rooms"] = daily_active_e2ee_rooms
@@ -205,14 +219,19 @@ async def generate_monthly_active_users() -> None:
         clock.looping_call(generate_monthly_active_users, 5 * 60 * 1000)
     # End of monthly active user settings
 
+    # Collect metrics shared between the Prometheus exporter and the phone home stats.
+    shared_metrics = hs.get_shared_usage_metrics()
+
     if hs.config.metrics.report_stats:
         logger.info("Scheduling stats reporting for 3 hour intervals")
-        clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000, hs, stats)
+        clock.looping_call(
+            phone_stats_home, 3 * 60 * 60 * 1000, hs, stats, shared_metrics
+        )
 
         # We need to defer this init for the cases that we daemonize
         # otherwise the process ID we get is that of the non-daemon process
         clock.call_later(0, performance_stats_init)
 
         # We wait 5 minutes to send the first set of stats as the server can
         # be quite busy the first few minutes
-        clock.call_later(5 * 60, phone_stats_home, hs, stats)
+        clock.call_later(5 * 60, phone_stats_home, hs, stats, shared_metrics)
diff --git a/synapse/metrics/shared_usage_metrics.py b/synapse/metrics/shared_usage_metrics.py
@@ -0,0 +1,59 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+
+from synapse.metrics.background_process_metrics import run_as_background_process
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+from prometheus_client import Gauge
+
+# Gauge to expose daily active users metrics
+current_dau_gauge = Gauge(
+    "synapse_admin_daily_active_users",
+    "Current daily active users count",
+)
+
+
+class SharedUsageMetrics:
+    """Usage metrics shared between the phone home stats and the prometheus exporter."""
+
+    def __init__(self, hs: "HomeServer") -> None:
+        self._store = hs.get_datastores().main
+        self._clock = hs.get_clock()
+
+        self.daily_active_users = -1
+
+    async def setup(self) -> None:
+        """Reads the current values for the shared usage metrics and starts a looping
+        call to keep them updated.
+        """
+        await self.update()
+        self._clock.looping_call(
+            run_as_background_process,
+            5 * 60 * 1000,
+            desc="update_shared_usage_metrics",
+            func=self.update,
+        )
+
+    async def update(self) -> None:
+        """Updates the shared usage metrics."""
+        await self.update_daily_active_users()
+
+    async def update_daily_active_users(self) -> None:
+        """Updates the daily active users count."""
+        dau_count = await self._store.count_daily_users()
+        current_dau_gauge.set(float(dau_count))
+        self.daily_active_users = dau_count
@@ -105,6 +105,7 @@
 from synapse.handlers.user_directory import UserDirectoryHandler
 from synapse.http.client import InsecureInterceptableContextFactory, SimpleHttpClient
 from synapse.http.matrixfederationclient import MatrixFederationHttpClient
+from synapse.metrics.shared_usage_metrics import SharedUsageMetrics
 from synapse.module_api import ModuleApi
 from synapse.notifier import Notifier
 from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator
@@ -827,3 +828,8 @@ def get_request_ratelimiter(self) -> RequestRatelimiter:
             self.config.ratelimiting.rc_message,
             self.config.ratelimiting.rc_admin_redaction,
         )
+
+    @cache_in_self
+    def get_shared_usage_metrics(self) -> SharedUsageMetrics:
+        """Usage metrics shared between phone home stats and the prometheus exporter."""
+        return SharedUsageMetrics(self)
@@ -15,8 +15,15 @@
 import resource
 from unittest import mock
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.app.phone_stats_home import phone_stats_home
+from synapse.metrics.shared_usage_metrics import SharedUsageMetrics
+from synapse.rest import admin
+from synapse.rest.client import login, sync
+from synapse.server import HomeServer
 from synapse.types import JsonDict
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
@@ -30,7 +37,9 @@ def test_performance_frozen_clock(self) -> None:
             (self.hs.get_clock().time(), resource.getrusage(resource.RUSAGE_SELF))
         ]
         stats: JsonDict = {}
-        self.get_success(phone_stats_home(self.hs, stats, past_stats))
+        self.get_success(
+            phone_stats_home(self.hs, stats, SharedUsageMetrics(self.hs), past_stats)
+        )
         self.assertEqual(stats["cpu_average"], 0)
 
     def test_performance_100(self) -> None:
@@ -47,5 +56,38 @@ def test_performance_100(self) -> None:
         stats: JsonDict = {}
         self.reactor.advance(1)
         # `old_resource` has type `Mock` instead of `struct_rusage`
-        self.get_success(phone_stats_home(self.hs, stats, past_stats))  # type: ignore[arg-type]
+        self.get_success(
+            phone_stats_home(
+                self.hs,
+                stats,
+                SharedUsageMetrics(self.hs),
+                past_stats,  # type: ignore[arg-type]
+            )
+        )
         self.assertApproximates(stats["cpu_average"], 100, tolerance=2.5)
+
+
+class SharedMetricsTestCase(HomeserverTestCase):
+    servlets = [
+        admin.register_servlets,
+        login.register_servlets,
+        sync.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.metrics = hs.get_shared_usage_metrics()
+        self.get_success(self.metrics.setup())
+
+    def test_dau(self) -> None:
+        """Tests that the daily active users count is correctly updated."""
+        self.assertEqual(self.metrics.daily_active_users, 0)
+
+        self.register_user("user", "password")
+        tok = self.login("user", "password")
+        self.make_request("GET", "/sync", access_token=tok)
+
+        self.assertEqual(self.metrics.daily_active_users, 0)
+
+        self.reactor.advance(3000)
+
+        self.assertEqual(self.metrics.daily_active_users, 1)