From a1fff419616174f47d1b9ac72d4ce3bc84fa09e6 Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Sun, 1 Nov 2020 01:11:28 +0100 Subject: [PATCH 1/6] Add an admin API for users' media statistics --- docs/admin_api/statistics.rst | 82 +++++ synapse/rest/admin/__init__.py | 2 + synapse/rest/admin/statistics.py | 127 +++++++ synapse/storage/databases/main/stats.py | 127 +++++++ tests/rest/admin/test_statistics.py | 463 ++++++++++++++++++++++++ 5 files changed, 801 insertions(+) create mode 100644 docs/admin_api/statistics.rst create mode 100644 synapse/rest/admin/statistics.py create mode 100644 tests/rest/admin/test_statistics.py diff --git a/docs/admin_api/statistics.rst b/docs/admin_api/statistics.rst new file mode 100644 index 000000000000..3a9279a7699b --- /dev/null +++ b/docs/admin_api/statistics.rst @@ -0,0 +1,82 @@ +Users' media usage statistics +----------------------------- +Gets information about all local media usage of all users. +Gives the possibility to filter them by time and user. + +The API is:: + + GET /_synapse/admin/v1/statistics/users/media + +To use it, you will need to authenticate by providing an ``access_token`` for a +server admin: see `README.rst `_. + +A response body like the following is returned: + +.. code:: json + + { + "users_media": [ + { + "displayname": "foo_user_0", + "media_count": 2, + "media_length": 134, + "user_id": "@foo_user_0:test" + }, + { + "displayname": "foo_user_1", + "media_count": 2, + "media_length": 134, + "user_id": "@foo_user_1:test" + } + ], + "next_token": 3, + "total": 10 + } + +To paginate, check for ``next_token`` and if present, call the endpoint again +with ``from`` set to the value of ``next_token``. This will return a new page. + +If the endpoint does not return a ``next_token`` then there are no more +reports to paginate through. + +**Parameters** + +The following parameters should be set in the URL: + +- ``limit``: string representing a positive integer - Is optional but is used for pagination, + denoting the maximum number of items to return in this call. Defaults to ``100``. +- ``from``: string representing a positive integer - Is optional but used for pagination, + denoting the offset in the returned results. This should be treated as an opaque value and + not explicitly set to anything other than the return value of ``next_token`` from a previous call. + Defaults to ``0``. +- ``order_by`` - string - The method in which to sort the returned list of users. Valid values are: + + - ``user_id`` - Users are ordered alphabetically by ``user_id``. This is the default. + - ``displayname`` - Users are ordered alphabetically by ``displayname``. + - ``media_length`` - Users are ordered by size of uploaded media. Smallest to largest. + - ``media_count`` - Users are ordered by number of uploaded media. Smallest to largest. + +- ``from_ts`` - string representing a positive integer - Considers only files created + later than this timestamp. Unix timestamp in ms. +- ``until_ts`` - string representing a positive integer - Considers only files created + earlier than this timestamp. Unix timestamp in ms. +- ``search_term`` - string - Filter users by their ``user_id`` or ``displayname``. Search term can be + contained in any part of the ``user_id`` or ``displayname``. Defaults to no filtering. +- ``dir`` - string - Direction of order. Either ``f`` for forwards or ``b`` for backwards. Setting + this value to ``b`` will reverse the above sort order. Defaults to ``f``. + + +**Response** + +The following fields are returned in the JSON response body: + +- ``users_media`` - An array of objects, each containing information about the user + and his local media. Objects contain the following fields: + + - ``displayname`` - string - Displayname of this user. + - ``media_count`` - integer - Number of uploaded media by this user. + - ``media_length`` - integer - Size of uploaded media by this user. + - ``user_id`` - string - fully-qualified user id: for example, @user:server.com. + +- ``next_token`` - integer - Indication for pagination. See above. +- ``total`` - integer - Total number of users. diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index fa7e9e404301..a41b42e29251 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -62,6 +62,7 @@ UsersRestServletV2, WhoisRestServlet, ) +from synapse.rest.admin.statistics import UserMediaStatisticsRestServlet from synapse.types import RoomStreamToken from synapse.util.versionstring import get_version_string @@ -227,6 +228,7 @@ def register_servlets(hs, http_server): DeviceRestServlet(hs).register(http_server) DevicesRestServlet(hs).register(http_server) DeleteDevicesRestServlet(hs).register(http_server) + UserMediaStatisticsRestServlet(hs).register(http_server) EventReportDetailRestServlet(hs).register(http_server) EventReportsRestServlet(hs).register(http_server) PushersRestServlet(hs).register(http_server) diff --git a/synapse/rest/admin/statistics.py b/synapse/rest/admin/statistics.py new file mode 100644 index 000000000000..eed217cc2b21 --- /dev/null +++ b/synapse/rest/admin/statistics.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Dirk Klimpel +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import Tuple + +from synapse.api.errors import Codes, SynapseError +from synapse.http.servlet import ( + RestServlet, + parse_integer, + parse_string, +) +from synapse.http.site import SynapseRequest +from synapse.rest.admin._base import ( + assert_requester_is_admin, + admin_patterns, +) +from synapse.server import HomeServer +from synapse.storage.databases.main.stats import UserSortOrder +from synapse.types import JsonDict + +logger = logging.getLogger(__name__) + + +class UserMediaStatisticsRestServlet(RestServlet): + """ + Get statistics about uploaded media by users. + """ + + PATTERNS = admin_patterns("/statistics/users/media$") + + def __init__(self, hs: HomeServer): + self.hs = hs + self.auth = hs.get_auth() + self.store = hs.get_datastore() + + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self.auth, request) + + order_by = parse_string( + request, "order_by", default=UserSortOrder.USER_ID.value + ) + if order_by not in ( + UserSortOrder.MEDIA_LENGTH.value, + UserSortOrder.MEDIA_COUNT.value, + UserSortOrder.USER_ID.value, + UserSortOrder.DISPLAYNAME.value, + ): + raise SynapseError( + 400, + "Unknown value for order_by: %s" % (order_by,), + errcode=Codes.INVALID_PARAM, + ) + + start = parse_integer(request, "from", default=0) + if start < 0: + raise SynapseError( + 400, + "Query parameter from must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + limit = parse_integer(request, "limit", default=100) + if limit < 0: + raise SynapseError( + 400, + "Query parameter limit must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + from_ts = parse_integer(request, "from_ts", default=0) + if from_ts < 0: + raise SynapseError( + 400, + "Query parameter from_ts must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + until_ts = parse_integer(request, "until_ts") + if until_ts is not None: + if until_ts < 0: + raise SynapseError( + 400, + "Query parameter until_ts must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + if until_ts <= from_ts: + raise SynapseError( + 400, + "Query parameter until_ts must be greater than from_ts.", + errcode=Codes.INVALID_PARAM, + ) + + search_term = parse_string(request, "search_term") + if search_term == "": + raise SynapseError( + 400, + "Query parameter search_term cannot be an empty string.", + errcode=Codes.INVALID_PARAM, + ) + + direction = parse_string(request, "dir", default="f") + if direction not in ("f", "b"): + raise SynapseError( + 400, "Unknown direction: %s" % (direction,), errcode=Codes.INVALID_PARAM + ) + + users_media, total = await self.store.get_users_media_usage_paginate( + start, limit, from_ts, until_ts, order_by, direction, search_term + ) + ret = {"users_media": users_media, "total": total} + if (start + limit) < total: + ret["next_token"] = start + len(users_media) + + return 200, ret diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index 5beb302be343..a4686fee853a 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -17,11 +17,13 @@ import logging from collections import Counter from itertools import chain +from enum import Enum from typing import Any, Dict, List, Optional, Tuple from twisted.internet.defer import DeferredLock from synapse.api.constants import EventTypes, Membership +from synapse.api.errors import StoreError from synapse.storage.database import DatabasePool from synapse.storage.databases.main.state_deltas import StateDeltasStore from synapse.storage.engines import PostgresEngine @@ -59,6 +61,23 @@ TYPE_TO_ORIGIN_TABLE = {"room": ("rooms", "room_id"), "user": ("users", "name")} +class UserSortOrder(Enum): + """ + Enum to define the sorting method used when returning users + with get_users_media_usage_paginate + + MEDIA_LENGTH = ordered by size of uploaded media. Smallest to largest. + MEDIA_COUNT = ordered by number of uploaded media. Smallest to largest. + USER_ID = ordered alphabetically by `user_id`. + DISPLAYNAME = ordered alphabetically by `displayname` + """ + + MEDIA_LENGTH = "media_length" + MEDIA_COUNT = "media_count" + USER_ID = "user_id" + DISPLAYNAME = "displayname" + + class StatsStore(StateDeltasStore): def __init__(self, database: DatabasePool, db_conn, hs): super().__init__(database, db_conn, hs) @@ -882,3 +901,111 @@ def _calculate_and_set_initial_state_for_user_txn(txn): complete_with_stream_id=pos, absolute_field_overrides={"joined_rooms": joined_rooms}, ) + + async def get_users_media_usage_paginate( + self, + start: int, + limit: int, + from_ts: Optional[int] = None, + until_ts: Optional[int] = None, + order_by: Optional[UserSortOrder] = UserSortOrder.USER_ID.value, + direction: Optional[str] = "f", + search_term: Optional[str] = None, + ) -> Tuple[List[Dict[str, Any]], Dict[str, int]]: + """Function to retrieve a paginated list of users and their uploaded local media + (size and number). This will return a json list of users and the + total number of users matching the filter criteria. + + Args: + start: start number to begin the query from + limit: number of rows to retrieve + from_ts: request only media that are created later than this timestamp (ms) + until_ts: request only media that are created earlier than this timestamp (ms) + order_by: the sort order of the returned list + direction: sort ascending or descending + search_term: a string to filter user names by + Returns: + A list of user dicts and an integer representing the total number of + users that exist given this query + """ + + def get_users_media_usage_paginate_txn(txn): + filters = [] + args = [self.hs.config.server_name] + + if search_term: + filters.append("(lmr.user_id LIKE ? OR displayname LIKE ?)") + args.extend(["@%" + search_term + "%:%", "%" + search_term + "%"]) + + if from_ts: + filters.append("created_ts >= ?") + args.extend([from_ts]) + if until_ts: + filters.append("created_ts <= ?") + args.extend([until_ts]) + + # Set ordering + if UserSortOrder(order_by) == UserSortOrder.MEDIA_LENGTH: + order_by_column = "media_length" + elif UserSortOrder(order_by) == UserSortOrder.MEDIA_COUNT: + order_by_column = "media_count" + elif UserSortOrder(order_by) == UserSortOrder.USER_ID: + order_by_column = "lmr.user_id" + elif UserSortOrder(order_by) == UserSortOrder.DISPLAYNAME: + order_by_column = "displayname" + else: + raise StoreError( + 500, "Incorrect value for order_by provided: %s" % order_by + ) + + if direction == "b": + order = "DESC" + else: + order = "ASC" + + sql_order = "ORDER BY %s %s" % (order_by_column, order) + where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else "" + + sql_base = """ + FROM local_media_repository as lmr + LEFT JOIN profiles AS p ON lmr.user_id = '@' || p.user_id || ':' || ? + {} + GROUP BY lmr.user_id, displayname + """.format( + where_clause + ) + + # SQlite does not support SELECT COUNT(*) OVER() + sql = """ + SELECT COUNT(*) FROM ( + SELECT lmr.user_id + {} + ) AS count_user_ids + """.format( + sql_base + ) + txn.execute(sql, args) + count = txn.fetchone()[0] + + sql = """ + SELECT + lmr.user_id, + displayname, + COUNT(lmr.user_id) as media_count, + SUM(media_length) as media_length + {sql_base} + ORDER BY {order_by_column} {order} + LIMIT ? OFFSET ? + """.format( + sql_base=sql_base, order_by_column=order_by_column, order=order, + ) + + args += [limit, start] + txn.execute(sql, args) + users = self.db_pool.cursor_to_dict(txn) + + return users, count + + return await self.db_pool.runInteraction( + "get_users_media_usage_paginate_txn", get_users_media_usage_paginate_txn + ) diff --git a/tests/rest/admin/test_statistics.py b/tests/rest/admin/test_statistics.py new file mode 100644 index 000000000000..98aed2f6d83c --- /dev/null +++ b/tests/rest/admin/test_statistics.py @@ -0,0 +1,463 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Dirk Klimpel +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from binascii import unhexlify +from typing import Any, Dict, List, Optional + +import synapse.rest.admin +from synapse.api.errors import Codes +from synapse.rest.client.v1 import login, profile + +from tests import unittest + + +class UserMediaStatisticsTestCase(unittest.HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + login.register_servlets, + profile.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() + self.media_repo = hs.get_media_repository_resource() + + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.other_user = self.register_user("user", "pass") + self.other_user_tok = self.login("user", "pass") + + self.url = "/_synapse/admin/v1/statistics/users/media" + + def test_no_auth(self): + """ + Try to list users without authentication. + """ + request, channel = self.make_request("GET", self.url, b"{}") + self.render(request) + + self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) + + def test_requester_is_no_admin(self): + """ + If the user is not a server admin, an error 403 is returned. + """ + request, channel = self.make_request( + "GET", self.url, json.dumps({}), access_token=self.other_user_tok, + ) + self.render(request) + + self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_invalid_parameter(self): + """ + If parameters are invalid, an error is returned. + """ + # unkown order_by + request, channel = self.make_request( + "GET", self.url + "?order_by=bar", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual("Unknown value for order_by: bar", channel.json_body["error"]) + + # negative from + request, channel = self.make_request( + "GET", self.url + "?from=-5", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "Query parameter from must be a string representing a positive integer.", + channel.json_body["error"], + ) + + # negative limit + request, channel = self.make_request( + "GET", self.url + "?limit=-5", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "Query parameter limit must be a string representing a positive integer.", + channel.json_body["error"], + ) + + # negative from_ts + request, channel = self.make_request( + "GET", self.url + "?from_ts=-1234", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "Query parameter from_ts must be a string representing a positive integer.", + channel.json_body["error"], + ) + + # negative until_ts + request, channel = self.make_request( + "GET", self.url + "?until_ts=-1234", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "Query parameter until_ts must be a string representing a positive integer.", + channel.json_body["error"], + ) + + # until_ts smaller from_ts + request, channel = self.make_request( + "GET", + self.url + "?from_ts=10&until_ts=5", + access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "Query parameter until_ts must be greater than from_ts.", + channel.json_body["error"], + ) + + # empty search term + request, channel = self.make_request( + "GET", self.url + "?search_term=", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "Query parameter search_term cannot be an empty string.", + channel.json_body["error"], + ) + + # invalid search order + request, channel = self.make_request( + "GET", self.url + "?dir=bar", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual("Unknown direction: bar", channel.json_body["error"]) + + def test_limit(self): + """ + Testing list of media with limit + """ + self._create_users_with_media(10, 2) + + request, channel = self.make_request( + "GET", self.url + "?limit=5", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], 10) + self.assertEqual(len(channel.json_body["users_media"]), 5) + self.assertEqual(channel.json_body["next_token"], 5) + self._check_fields(channel.json_body["users_media"]) + + def test_from(self): + """ + Testing list of media with a defined starting point (from) + """ + self._create_users_with_media(20, 2) + + request, channel = self.make_request( + "GET", self.url + "?from=5", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], 20) + self.assertEqual(len(channel.json_body["users_media"]), 15) + self.assertNotIn("next_token", channel.json_body) + self._check_fields(channel.json_body["users_media"]) + + def test_limit_and_from(self): + """ + Testing list of media with a defined starting point and limit + """ + self._create_users_with_media(20, 2) + + request, channel = self.make_request( + "GET", self.url + "?from=5&limit=10", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], 20) + self.assertEqual(channel.json_body["next_token"], 15) + self.assertEqual(len(channel.json_body["users_media"]), 10) + self._check_fields(channel.json_body["users_media"]) + + def test_order_by(self): + """ + Testing order list with parameter `order_by` + """ + + # create users + userA = self.register_user("user_a", "pass") + userA_tok = self.login("user_a", "pass") + self._set_displayname(userA, userA_tok, "UserZ") + self._create_media(userA_tok, 1) + + userB = self.register_user("user_b", "pass") + userB_tok = self.login("user_b", "pass") + self._set_displayname(userB, userB_tok, "UserY") + self._create_media(userB_tok, 3) + + userC = self.register_user("user_c", "pass") + userC_tok = self.login("user_c", "pass") + self._set_displayname(userC, userC_tok, "UserX") + self._create_media(userC_tok, 2) + + # order by user_id + self._order_test("user_id", ["@user_a:test", "@user_b:test", "@user_c:test"]) + self._order_test( + "user_id", ["@user_a:test", "@user_b:test", "@user_c:test"], "f", + ) + self._order_test( + "user_id", ["@user_c:test", "@user_b:test", "@user_a:test"], "b", + ) + + # order by displayname + self._order_test( + "displayname", ["@user_c:test", "@user_b:test", "@user_a:test"] + ) + self._order_test( + "displayname", ["@user_c:test", "@user_b:test", "@user_a:test"], "f", + ) + self._order_test( + "displayname", ["@user_a:test", "@user_b:test", "@user_c:test"], "b", + ) + + # order by media_length + self._order_test( + "media_length", ["@user_a:test", "@user_c:test", "@user_b:test"], + ) + self._order_test( + "media_length", ["@user_a:test", "@user_c:test", "@user_b:test"], "f", + ) + self._order_test( + "media_length", ["@user_b:test", "@user_c:test", "@user_a:test"], "b", + ) + + # order by media_count + self._order_test( + "media_count", ["@user_a:test", "@user_c:test", "@user_b:test"], + ) + self._order_test( + "media_count", ["@user_a:test", "@user_c:test", "@user_b:test"], "f", + ) + self._order_test( + "media_count", ["@user_b:test", "@user_c:test", "@user_a:test"], "b", + ) + + def test_from_until_ts(self): + """ + Testing filter by time with parameters `from_ts` and `until_ts` + """ + # create media earlier than `ts1` to ensure that `from_ts` is working + self._create_media(self.other_user_tok, 3) + self.pump(1) + ts1 = self.clock.time_msec() + + # list all media when filter is not set + request, channel = self.make_request( + "GET", self.url, access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["users_media"][0]["media_count"], 3) + + # filter media starting at `ts1` after creating first media + # result is 0 + request, channel = self.make_request( + "GET", self.url + "?from_ts=%s" % (ts1,), access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], 0) + + self._create_media(self.other_user_tok, 3) + self.pump(1) + ts2 = self.clock.time_msec() + # create media after `ts2` to ensure that `until_ts` is working + self._create_media(self.other_user_tok, 3) + + # filter media between `ts1` and `ts2` + request, channel = self.make_request( + "GET", + self.url + "?from_ts=%s&until_ts=%s" % (ts1, ts2), + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["users_media"][0]["media_count"], 3) + + # filter media until `ts2` and earlier than creating last media + request, channel = self.make_request( + "GET", self.url + "?until_ts=%s" % (ts2,), access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["users_media"][0]["media_count"], 6) + + def test_search_term(self): + self._create_users_with_media(20, 1) + + # check without filter get all users + request, channel = self.make_request( + "GET", self.url, access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], 20) + + # filter user 1 and 10-19 by `user_id` + request, channel = self.make_request( + "GET", + self.url + "?search_term=foo_user_1", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], 11) + + # set a displayname for one user + self._create_media(self.other_user_tok, 1) + self._set_displayname(self.other_user, self.other_user_tok, "UserZ") + + # filter on this user in `displayname` + request, channel = self.make_request( + "GET", self.url + "?search_term=Z", access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["users_media"][0]["displayname"], "UserZ") + self.assertEqual(channel.json_body["total"], 1) + + # filter and get empty result + request, channel = self.make_request( + "GET", self.url + "?search_term=foobar", access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], 0) + + def _create_users_with_media(self, number_users: int, media_per_user: int): + """ + Create a number of users with a number of media + Args: + number_users: Number of users to be created + media_per_user: Number of media to be created for each user + """ + for i in range(number_users): + self.register_user("foo_user_%s" % i, "pass") + user_tok = self.login("foo_user_%s" % i, "pass") + self._create_media(user_tok, media_per_user) + + def _create_media(self, user_token: str, number_media: int): + """ + Create a number of media for a specific user + Args: + user_token: Access token of the user + number_media: Number of media to be created for the user + """ + upload_resource = self.media_repo.children[b"upload"] + for i in range(number_media): + # file size is 67 Byte + image_data = unhexlify( + b"89504e470d0a1a0a0000000d4948445200000001000000010806" + b"0000001f15c4890000000a49444154789c63000100000500010d" + b"0a2db40000000049454e44ae426082" + ) + + # Upload some media into the room + self.helper.upload_media( + upload_resource, image_data, tok=user_token, expect_code=200 + ) + + def _check_fields(self, content: List[Dict[str, Any]]): + """Checks that all attributes are present in content + Args: + content: List that is checked for content + """ + for c in content: + self.assertIn("user_id", c) + self.assertIn("displayname", c) + self.assertIn("media_count", c) + self.assertIn("media_length", c) + + def _order_test( + self, order_type: str, expected_user_list: List[str], dir: Optional[str] = None + ): + """Request the list of users in a certain order. Assert that order is what + we expect + Args: + order_type: The type of ordering to give the server + expected_user_list: The list of user_ids in the order we expect to get + back from the server + dir: The direction of ordering to give the server + """ + + url = self.url + "?order_by=%s" % (order_type,) + if dir is not None: + if dir in ("b", "f"): + url += "&dir=%s" % (dir,) + request, channel = self.make_request( + "GET", url.encode("ascii"), access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], len(expected_user_list)) + + returned_order = [row["user_id"] for row in channel.json_body["users_media"]] + self.assertListEqual(expected_user_list, returned_order) + self._check_fields(channel.json_body["users_media"]) + + def _set_displayname(self, user_id: str, accesss_token: str, displayname: str): + """Set a displayname of a specific user + Args: + user_id: user ID of the user whose displayname should be set + access_token: Access token of the user whose displayname should be set + displayname: new displayname to be set + """ + request, channel = self.make_request( + "PUT", + "/profile/%s/displayname" % (user_id,), + content=json.dumps({"displayname": displayname}), + access_token=accesss_token, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) From 07e8a09e5f62eac776b839a595e91e745e98ae7e Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Sun, 1 Nov 2020 01:24:24 +0100 Subject: [PATCH 2/6] add newsfile --- changelog.d/8700.feature | 1 + synapse/rest/admin/__init__.py | 2 +- synapse/rest/admin/statistics.py | 11 ++--------- synapse/storage/databases/main/stats.py | 2 +- 4 files changed, 5 insertions(+), 11 deletions(-) create mode 100644 changelog.d/8700.feature diff --git a/changelog.d/8700.feature b/changelog.d/8700.feature new file mode 100644 index 000000000000..fd2201d6ffa2 --- /dev/null +++ b/changelog.d/8700.feature @@ -0,0 +1 @@ +Add an admin API for users' media statistics. Contributed by @dklimpel. \ No newline at end of file diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index a41b42e29251..2a4f7a1740b5 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -47,6 +47,7 @@ ShutdownRoomRestServlet, ) from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet +from synapse.rest.admin.statistics import UserMediaStatisticsRestServlet from synapse.rest.admin.users import ( AccountValidityRenewServlet, DeactivateAccountRestServlet, @@ -62,7 +63,6 @@ UsersRestServletV2, WhoisRestServlet, ) -from synapse.rest.admin.statistics import UserMediaStatisticsRestServlet from synapse.types import RoomStreamToken from synapse.util.versionstring import get_version_string diff --git a/synapse/rest/admin/statistics.py b/synapse/rest/admin/statistics.py index eed217cc2b21..68e141f43dde 100644 --- a/synapse/rest/admin/statistics.py +++ b/synapse/rest/admin/statistics.py @@ -17,16 +17,9 @@ from typing import Tuple from synapse.api.errors import Codes, SynapseError -from synapse.http.servlet import ( - RestServlet, - parse_integer, - parse_string, -) +from synapse.http.servlet import RestServlet, parse_integer, parse_string from synapse.http.site import SynapseRequest -from synapse.rest.admin._base import ( - assert_requester_is_admin, - admin_patterns, -) +from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin from synapse.server import HomeServer from synapse.storage.databases.main.stats import UserSortOrder from synapse.types import JsonDict diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index a4686fee853a..a1d09426beb4 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -16,8 +16,8 @@ import logging from collections import Counter -from itertools import chain from enum import Enum +from itertools import chain from typing import Any, Dict, List, Optional, Tuple from twisted.internet.defer import DeferredLock From b83bede6b07aa4b0ccff37ded2b936fbb73c69a3 Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Sun, 1 Nov 2020 12:36:12 +0100 Subject: [PATCH 3/6] small fixes --- synapse/rest/admin/statistics.py | 8 +++++--- synapse/storage/databases/main/stats.py | 1 - 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/rest/admin/statistics.py b/synapse/rest/admin/statistics.py index 68e141f43dde..4362ac335285 100644 --- a/synapse/rest/admin/statistics.py +++ b/synapse/rest/admin/statistics.py @@ -14,16 +14,18 @@ # limitations under the License. import logging -from typing import Tuple +from typing import TYPE_CHECKING, Tuple from synapse.api.errors import Codes, SynapseError from synapse.http.servlet import RestServlet, parse_integer, parse_string from synapse.http.site import SynapseRequest from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin -from synapse.server import HomeServer from synapse.storage.databases.main.stats import UserSortOrder from synapse.types import JsonDict +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -34,7 +36,7 @@ class UserMediaStatisticsRestServlet(RestServlet): PATTERNS = admin_patterns("/statistics/users/media$") - def __init__(self, hs: HomeServer): + def __init__(self, hs: "HomeServer"): self.hs = hs self.auth = hs.get_auth() self.store = hs.get_datastore() diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index a1d09426beb4..513bf7e49d40 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -963,7 +963,6 @@ def get_users_media_usage_paginate_txn(txn): else: order = "ASC" - sql_order = "ORDER BY %s %s" % (order_by_column, order) where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else "" sql_base = """ From 3916e5fd0cf11ddee3644cfa7cdeaf8ef73a629a Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Tue, 3 Nov 2020 21:41:43 +0100 Subject: [PATCH 4/6] Change documentation from rst to md, add tests --- docs/admin_api/statistics.md | 83 +++++++++++++++ docs/admin_api/statistics.rst | 82 --------------- synapse/rest/admin/statistics.py | 2 +- synapse/storage/databases/main/stats.py | 9 +- tests/rest/admin/test_statistics.py | 129 ++++++++++++++++-------- 5 files changed, 176 insertions(+), 129 deletions(-) create mode 100644 docs/admin_api/statistics.md delete mode 100644 docs/admin_api/statistics.rst diff --git a/docs/admin_api/statistics.md b/docs/admin_api/statistics.md new file mode 100644 index 000000000000..bdca5befd0c1 --- /dev/null +++ b/docs/admin_api/statistics.md @@ -0,0 +1,83 @@ +# Users' media usage statistics + +Gets information about all local media usage of all users. Gives the +possibility to filter them by time and user. + +The API is: + +``` +GET /_synapse/admin/v1/statistics/users/media +``` + +To use it, you will need to authenticate by providing an `access_token` +for a server admin: see [README.rst](README.rst). + +A response body like the following is returned: + +```json +{ + "users": [ + { + "displayname": "foo_user_0", + "media_count": 2, + "media_length": 134, + "user_id": "@foo_user_0:test" + }, + { + "displayname": "foo_user_1", + "media_count": 2, + "media_length": 134, + "user_id": "@foo_user_1:test" + } + ], + "next_token": 3, + "total": 10 +} +``` + +To paginate, check for `next_token` and if present, call the endpoint +again with `from` set to the value of `next_token`. This will return a new page. + +If the endpoint does not return a `next_token` then there are no more +reports to paginate through. + +**Parameters** + +The following parameters should be set in the URL: + +* `limit`: string representing a positive integer - Is optional but is + used for pagination, denoting the maximum number of items to return + in this call. Defaults to `100`. +* `from`: string representing a positive integer - Is optional but used for pagination, + denoting the offset in the returned results. This should be treated as an opaque value + and not explicitly set to anything other than the return value of `next_token` from a + previous call. Defaults to `0`. +* `order_by` - string - The method in which to sort the returned list of users. Valid values are: + - `user_id` - Users are ordered alphabetically by `user_id`. This is the default. + - `displayname` - Users are ordered alphabetically by `displayname`. + - `media_length` - Users are ordered by the total size of uploaded media in bytes. + Smallest to largest. + - `media_count` - Users are ordered by number of uploaded media. Smallest to largest. +* `from_ts` - string representing a positive integer - Considers only + files created at this timestamp or later. Unix timestamp in ms. +* `until_ts` - string representing a positive integer - Considers only + files created at this timestamp or earlier. Unix timestamp in ms. +* `search_term` - string - Filter users by their user ID localparts **or** `displayname`. + Search term can be contained in any part of the user ID localparts or `displayname`. + Defaults to no filtering. +* `dir` - string - Direction of order. Either `f` for forwards or `b` for backwards. + Setting this value to `b` will reverse the above sort order. Defaults to `f`. + + +**Response** + +The following fields are returned in the JSON response body: + +* `users` - An array of objects, each containing information + about the user and their local media. Objects contain the following fields: + - `displayname` - string - Displayname of this user. + - `media_count` - integer - Number of uploaded media by this user. + - `media_length` - integer - Size of uploaded media in bytes by this user. + - `user_id` - string - Fully-qualified user id: for example, @user:server.com. +* `next_token` - integer - Indication for pagination. See above. +* `total` - integer - Total number of users. diff --git a/docs/admin_api/statistics.rst b/docs/admin_api/statistics.rst deleted file mode 100644 index 3a9279a7699b..000000000000 --- a/docs/admin_api/statistics.rst +++ /dev/null @@ -1,82 +0,0 @@ -Users' media usage statistics ------------------------------ -Gets information about all local media usage of all users. -Gives the possibility to filter them by time and user. - -The API is:: - - GET /_synapse/admin/v1/statistics/users/media - -To use it, you will need to authenticate by providing an ``access_token`` for a -server admin: see `README.rst `_. - -A response body like the following is returned: - -.. code:: json - - { - "users_media": [ - { - "displayname": "foo_user_0", - "media_count": 2, - "media_length": 134, - "user_id": "@foo_user_0:test" - }, - { - "displayname": "foo_user_1", - "media_count": 2, - "media_length": 134, - "user_id": "@foo_user_1:test" - } - ], - "next_token": 3, - "total": 10 - } - -To paginate, check for ``next_token`` and if present, call the endpoint again -with ``from`` set to the value of ``next_token``. This will return a new page. - -If the endpoint does not return a ``next_token`` then there are no more -reports to paginate through. - -**Parameters** - -The following parameters should be set in the URL: - -- ``limit``: string representing a positive integer - Is optional but is used for pagination, - denoting the maximum number of items to return in this call. Defaults to ``100``. -- ``from``: string representing a positive integer - Is optional but used for pagination, - denoting the offset in the returned results. This should be treated as an opaque value and - not explicitly set to anything other than the return value of ``next_token`` from a previous call. - Defaults to ``0``. -- ``order_by`` - string - The method in which to sort the returned list of users. Valid values are: - - - ``user_id`` - Users are ordered alphabetically by ``user_id``. This is the default. - - ``displayname`` - Users are ordered alphabetically by ``displayname``. - - ``media_length`` - Users are ordered by size of uploaded media. Smallest to largest. - - ``media_count`` - Users are ordered by number of uploaded media. Smallest to largest. - -- ``from_ts`` - string representing a positive integer - Considers only files created - later than this timestamp. Unix timestamp in ms. -- ``until_ts`` - string representing a positive integer - Considers only files created - earlier than this timestamp. Unix timestamp in ms. -- ``search_term`` - string - Filter users by their ``user_id`` or ``displayname``. Search term can be - contained in any part of the ``user_id`` or ``displayname``. Defaults to no filtering. -- ``dir`` - string - Direction of order. Either ``f`` for forwards or ``b`` for backwards. Setting - this value to ``b`` will reverse the above sort order. Defaults to ``f``. - - -**Response** - -The following fields are returned in the JSON response body: - -- ``users_media`` - An array of objects, each containing information about the user - and his local media. Objects contain the following fields: - - - ``displayname`` - string - Displayname of this user. - - ``media_count`` - integer - Number of uploaded media by this user. - - ``media_length`` - integer - Size of uploaded media by this user. - - ``user_id`` - string - fully-qualified user id: for example, @user:server.com. - -- ``next_token`` - integer - Indication for pagination. See above. -- ``total`` - integer - Total number of users. diff --git a/synapse/rest/admin/statistics.py b/synapse/rest/admin/statistics.py index 4362ac335285..f2490e382dcf 100644 --- a/synapse/rest/admin/statistics.py +++ b/synapse/rest/admin/statistics.py @@ -115,7 +115,7 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: users_media, total = await self.store.get_users_media_usage_paginate( start, limit, from_ts, until_ts, order_by, direction, search_term ) - ret = {"users_media": users_media, "total": total} + ret = {"users": users_media, "total": total} if (start + limit) < total: ret["next_token"] = start + len(users_media) diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index 513bf7e49d40..1e2a89855b33 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -27,6 +27,7 @@ from synapse.storage.database import DatabasePool from synapse.storage.databases.main.state_deltas import StateDeltasStore from synapse.storage.engines import PostgresEngine +from synapse.types import JsonDict from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) @@ -911,7 +912,7 @@ async def get_users_media_usage_paginate( order_by: Optional[UserSortOrder] = UserSortOrder.USER_ID.value, direction: Optional[str] = "f", search_term: Optional[str] = None, - ) -> Tuple[List[Dict[str, Any]], Dict[str, int]]: + ) -> Tuple[List[JsonDict], Dict[str, int]]: """Function to retrieve a paginated list of users and their uploaded local media (size and number). This will return a json list of users and the total number of users matching the filter criteria. @@ -974,14 +975,14 @@ def get_users_media_usage_paginate_txn(txn): where_clause ) - # SQlite does not support SELECT COUNT(*) OVER() + # SQLite does not support SELECT COUNT(*) OVER() sql = """ SELECT COUNT(*) FROM ( SELECT lmr.user_id - {} + {sql_base} ) AS count_user_ids """.format( - sql_base + sql_base=sql_base, ) txn.execute(sql, args) count = txn.fetchone()[0] diff --git a/tests/rest/admin/test_statistics.py b/tests/rest/admin/test_statistics.py index 98aed2f6d83c..6f570ac7b5e8 100644 --- a/tests/rest/admin/test_statistics.py +++ b/tests/rest/admin/test_statistics.py @@ -77,7 +77,6 @@ def test_invalid_parameter(self): self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) - self.assertEqual("Unknown value for order_by: bar", channel.json_body["error"]) # negative from request, channel = self.make_request( @@ -87,10 +86,6 @@ def test_invalid_parameter(self): self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) - self.assertEqual( - "Query parameter from must be a string representing a positive integer.", - channel.json_body["error"], - ) # negative limit request, channel = self.make_request( @@ -100,10 +95,6 @@ def test_invalid_parameter(self): self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) - self.assertEqual( - "Query parameter limit must be a string representing a positive integer.", - channel.json_body["error"], - ) # negative from_ts request, channel = self.make_request( @@ -113,10 +104,6 @@ def test_invalid_parameter(self): self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) - self.assertEqual( - "Query parameter from_ts must be a string representing a positive integer.", - channel.json_body["error"], - ) # negative until_ts request, channel = self.make_request( @@ -126,10 +113,6 @@ def test_invalid_parameter(self): self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) - self.assertEqual( - "Query parameter until_ts must be a string representing a positive integer.", - channel.json_body["error"], - ) # until_ts smaller from_ts request, channel = self.make_request( @@ -141,10 +124,6 @@ def test_invalid_parameter(self): self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) - self.assertEqual( - "Query parameter until_ts must be greater than from_ts.", - channel.json_body["error"], - ) # empty search term request, channel = self.make_request( @@ -154,10 +133,6 @@ def test_invalid_parameter(self): self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) - self.assertEqual( - "Query parameter search_term cannot be an empty string.", - channel.json_body["error"], - ) # invalid search order request, channel = self.make_request( @@ -167,7 +142,6 @@ def test_invalid_parameter(self): self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) - self.assertEqual("Unknown direction: bar", channel.json_body["error"]) def test_limit(self): """ @@ -182,9 +156,9 @@ def test_limit(self): self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(channel.json_body["total"], 10) - self.assertEqual(len(channel.json_body["users_media"]), 5) + self.assertEqual(len(channel.json_body["users"]), 5) self.assertEqual(channel.json_body["next_token"], 5) - self._check_fields(channel.json_body["users_media"]) + self._check_fields(channel.json_body["users"]) def test_from(self): """ @@ -199,9 +173,9 @@ def test_from(self): self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(channel.json_body["total"], 20) - self.assertEqual(len(channel.json_body["users_media"]), 15) + self.assertEqual(len(channel.json_body["users"]), 15) self.assertNotIn("next_token", channel.json_body) - self._check_fields(channel.json_body["users_media"]) + self._check_fields(channel.json_body["users"]) def test_limit_and_from(self): """ @@ -217,8 +191,80 @@ def test_limit_and_from(self): self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(channel.json_body["total"], 20) self.assertEqual(channel.json_body["next_token"], 15) - self.assertEqual(len(channel.json_body["users_media"]), 10) - self._check_fields(channel.json_body["users_media"]) + self.assertEqual(len(channel.json_body["users"]), 10) + self._check_fields(channel.json_body["users"]) + + def test_next_token(self): + """ + Testing that `next_token` appears at the right place + """ + + number_users = 20 + self._create_users_with_media(number_users, 3) + + # `next_token` does not appear + # Number of results is the number of entries + request, channel = self.make_request( + "GET", self.url + "?limit=20", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_users) + self.assertEqual(len(channel.json_body["users"]), number_users) + self.assertNotIn("next_token", channel.json_body) + + # `next_token` does not appear + # Number of max results is larger than the number of entries + request, channel = self.make_request( + "GET", self.url + "?limit=21", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_users) + self.assertEqual(len(channel.json_body["users"]), number_users) + self.assertNotIn("next_token", channel.json_body) + + # `next_token` does appear + # Number of max results is smaller than the number of entries + request, channel = self.make_request( + "GET", self.url + "?limit=19", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_users) + self.assertEqual(len(channel.json_body["users"]), 19) + self.assertEqual(channel.json_body["next_token"], 19) + + # Check + # Set `from` to value of `next_token` for request remaining entries + # `next_token` does not appear + request, channel = self.make_request( + "GET", self.url + "?from=19", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_users) + self.assertEqual(len(channel.json_body["users"]), 1) + self.assertNotIn("next_token", channel.json_body) + + def test_no_media(self): + """ + Tests that a normal lookup for statistics is successfully + if users have no media created + """ + + request, channel = self.make_request( + "GET", self.url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(0, channel.json_body["total"]) + self.assertEqual(0, len(channel.json_body["users"])) def test_order_by(self): """ @@ -298,7 +344,7 @@ def test_from_until_ts(self): ) self.render(request) self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) - self.assertEqual(channel.json_body["users_media"][0]["media_count"], 3) + self.assertEqual(channel.json_body["users"][0]["media_count"], 3) # filter media starting at `ts1` after creating first media # result is 0 @@ -323,15 +369,15 @@ def test_from_until_ts(self): ) self.render(request) self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) - self.assertEqual(channel.json_body["users_media"][0]["media_count"], 3) + self.assertEqual(channel.json_body["users"][0]["media_count"], 3) - # filter media until `ts2` and earlier than creating last media + # filter media until `ts2` and earlier request, channel = self.make_request( "GET", self.url + "?until_ts=%s" % (ts2,), access_token=self.admin_user_tok, ) self.render(request) self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) - self.assertEqual(channel.json_body["users_media"][0]["media_count"], 6) + self.assertEqual(channel.json_body["users"][0]["media_count"], 6) def test_search_term(self): self._create_users_with_media(20, 1) @@ -364,7 +410,7 @@ def test_search_term(self): ) self.render(request) self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) - self.assertEqual(channel.json_body["users_media"][0]["displayname"], "UserZ") + self.assertEqual(channel.json_body["users"][0]["displayname"], "UserZ") self.assertEqual(channel.json_body["total"], 1) # filter and get empty result @@ -432,9 +478,8 @@ def _order_test( """ url = self.url + "?order_by=%s" % (order_type,) - if dir is not None: - if dir in ("b", "f"): - url += "&dir=%s" % (dir,) + if dir is not None and dir in ("b", "f"): + url += "&dir=%s" % (dir,) request, channel = self.make_request( "GET", url.encode("ascii"), access_token=self.admin_user_tok, ) @@ -442,9 +487,9 @@ def _order_test( self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(channel.json_body["total"], len(expected_user_list)) - returned_order = [row["user_id"] for row in channel.json_body["users_media"]] + returned_order = [row["user_id"] for row in channel.json_body["users"]] self.assertListEqual(expected_user_list, returned_order) - self._check_fields(channel.json_body["users_media"]) + self._check_fields(channel.json_body["users"]) def _set_displayname(self, user_id: str, accesss_token: str, displayname: str): """Set a displayname of a specific user From c2b96b6eafa92579e624fd70d86b2601e1c55312 Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Thu, 5 Nov 2020 15:59:33 +0100 Subject: [PATCH 5/6] change usage of set_displayname to new register user api --- tests/rest/admin/test_statistics.py | 40 +++++++---------------------- 1 file changed, 9 insertions(+), 31 deletions(-) diff --git a/tests/rest/admin/test_statistics.py b/tests/rest/admin/test_statistics.py index 6f570ac7b5e8..278c69be2fb8 100644 --- a/tests/rest/admin/test_statistics.py +++ b/tests/rest/admin/test_statistics.py @@ -19,7 +19,7 @@ import synapse.rest.admin from synapse.api.errors import Codes -from synapse.rest.client.v1 import login, profile +from synapse.rest.client.v1 import login from tests import unittest @@ -28,7 +28,6 @@ class UserMediaStatisticsTestCase(unittest.HomeserverTestCase): servlets = [ synapse.rest.admin.register_servlets, login.register_servlets, - profile.register_servlets, ] def prepare(self, reactor, clock, hs): @@ -272,19 +271,16 @@ def test_order_by(self): """ # create users - userA = self.register_user("user_a", "pass") + self.register_user("user_a", "pass", displayname="UserZ") userA_tok = self.login("user_a", "pass") - self._set_displayname(userA, userA_tok, "UserZ") self._create_media(userA_tok, 1) - userB = self.register_user("user_b", "pass") + self.register_user("user_b", "pass", displayname="UserY") userB_tok = self.login("user_b", "pass") - self._set_displayname(userB, userB_tok, "UserY") self._create_media(userB_tok, 3) - userC = self.register_user("user_c", "pass") + self.register_user("user_c", "pass", displayname="UserX") userC_tok = self.login("user_c", "pass") - self._set_displayname(userC, userC_tok, "UserX") self._create_media(userC_tok, 2) # order by user_id @@ -400,17 +396,15 @@ def test_search_term(self): self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(channel.json_body["total"], 11) - # set a displayname for one user - self._create_media(self.other_user_tok, 1) - self._set_displayname(self.other_user, self.other_user_tok, "UserZ") - # filter on this user in `displayname` request, channel = self.make_request( - "GET", self.url + "?search_term=Z", access_token=self.admin_user_tok, + "GET", + self.url + "?search_term=bar_user_10", + access_token=self.admin_user_tok, ) self.render(request) self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) - self.assertEqual(channel.json_body["users"][0]["displayname"], "UserZ") + self.assertEqual(channel.json_body["users"][0]["displayname"], "bar_user_10") self.assertEqual(channel.json_body["total"], 1) # filter and get empty result @@ -429,7 +423,7 @@ def _create_users_with_media(self, number_users: int, media_per_user: int): media_per_user: Number of media to be created for each user """ for i in range(number_users): - self.register_user("foo_user_%s" % i, "pass") + self.register_user("foo_user_%s" % i, "pass", displayname="bar_user_%s" % i) user_tok = self.login("foo_user_%s" % i, "pass") self._create_media(user_tok, media_per_user) @@ -490,19 +484,3 @@ def _order_test( returned_order = [row["user_id"] for row in channel.json_body["users"]] self.assertListEqual(expected_user_list, returned_order) self._check_fields(channel.json_body["users"]) - - def _set_displayname(self, user_id: str, accesss_token: str, displayname: str): - """Set a displayname of a specific user - Args: - user_id: user ID of the user whose displayname should be set - access_token: Access token of the user whose displayname should be set - displayname: new displayname to be set - """ - request, channel = self.make_request( - "PUT", - "/profile/%s/displayname" % (user_id,), - content=json.dumps({"displayname": displayname}), - access_token=accesss_token, - ) - self.render(request) - self.assertEqual(200, channel.code, msg=channel.json_body) From a02faedb14b6a2bb1729b9dcec69ba1730be4a20 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 5 Nov 2020 18:57:34 +0000 Subject: [PATCH 6/6] wording changes --- changelog.d/8700.feature | 2 +- docs/admin_api/statistics.md | 12 ++++++------ synapse/storage/databases/main/stats.py | 2 +- tests/rest/admin/test_statistics.py | 3 +-- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/changelog.d/8700.feature b/changelog.d/8700.feature index fd2201d6ffa2..47d63dce028f 100644 --- a/changelog.d/8700.feature +++ b/changelog.d/8700.feature @@ -1 +1 @@ -Add an admin API for users' media statistics. Contributed by @dklimpel. \ No newline at end of file +Add an admin API for local user media statistics. Contributed by @dklimpel. diff --git a/docs/admin_api/statistics.md b/docs/admin_api/statistics.md index bdca5befd0c1..d398a120fb7e 100644 --- a/docs/admin_api/statistics.md +++ b/docs/admin_api/statistics.md @@ -1,6 +1,6 @@ # Users' media usage statistics -Gets information about all local media usage of all users. Gives the +Returns information about all local media usage of users. Gives the possibility to filter them by time and user. The API is: @@ -62,8 +62,8 @@ The following parameters should be set in the URL: files created at this timestamp or later. Unix timestamp in ms. * `until_ts` - string representing a positive integer - Considers only files created at this timestamp or earlier. Unix timestamp in ms. -* `search_term` - string - Filter users by their user ID localparts **or** `displayname`. - Search term can be contained in any part of the user ID localparts or `displayname`. +* `search_term` - string - Filter users by their user ID localpart **or** displayname. + The search term can be found in any part of the string. Defaults to no filtering. * `dir` - string - Direction of order. Either `f` for forwards or `b` for backwards. Setting this value to `b` will reverse the above sort order. Defaults to `f`. @@ -78,6 +78,6 @@ The following fields are returned in the JSON response body: - `displayname` - string - Displayname of this user. - `media_count` - integer - Number of uploaded media by this user. - `media_length` - integer - Size of uploaded media in bytes by this user. - - `user_id` - string - Fully-qualified user id: for example, @user:server.com. -* `next_token` - integer - Indication for pagination. See above. -* `total` - integer - Total number of users. + - `user_id` - string - Fully-qualified user ID (ex. `@user:server.com`). +* `next_token` - integer - Opaque value used for pagination. See above. +* `total` - integer - Total number of users after filtering. diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index 1e2a89855b33..0cdb3ec1f7ff 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -918,7 +918,7 @@ async def get_users_media_usage_paginate( total number of users matching the filter criteria. Args: - start: start number to begin the query from + start: offset to begin the query from limit: number of rows to retrieve from_ts: request only media that are created later than this timestamp (ms) until_ts: request only media that are created earlier than this timestamp (ms) diff --git a/tests/rest/admin/test_statistics.py b/tests/rest/admin/test_statistics.py index 278c69be2fb8..816683a61235 100644 --- a/tests/rest/admin/test_statistics.py +++ b/tests/rest/admin/test_statistics.py @@ -237,9 +237,8 @@ def test_next_token(self): self.assertEqual(len(channel.json_body["users"]), 19) self.assertEqual(channel.json_body["next_token"], 19) - # Check # Set `from` to value of `next_token` for request remaining entries - # `next_token` does not appear + # Check `next_token` does not appear request, channel = self.make_request( "GET", self.url + "?from=19", access_token=self.admin_user_tok, )