diff --git a/src/tribler/core/components/gigachannel/community/sync_strategy.py b/src/tribler/core/components/gigachannel/community/sync_strategy.py
index 01caab05ec1..c6fc1b3a300 100644
--- a/src/tribler/core/components/gigachannel/community/sync_strategy.py
+++ b/src/tribler/core/components/gigachannel/community/sync_strategy.py
@@ -3,6 +3,9 @@
from ipv8.peerdiscovery.discovery import DiscoveryStrategy
+TARGET_PEERS_NUMBER = 20
+
+
class RemovePeers(DiscoveryStrategy):
"""
Synchronization strategy for remote query community.
@@ -10,8 +13,12 @@ class RemovePeers(DiscoveryStrategy):
Remove a random peer, if we have enough peers to walk to.
"""
+ def __init__(self, overlay, target_peers_number=TARGET_PEERS_NUMBER):
+ super().__init__(overlay)
+ self.target_peers_number = target_peers_number
+
def take_step(self):
with self.walk_lock:
peers = self.overlay.get_peers()
- if peers and len(peers) > 20:
+ if peers and len(peers) > self.target_peers_number:
self.overlay.network.remove_peer(choice(peers))
diff --git a/src/tribler/core/components/metadata_store/db/store.py b/src/tribler/core/components/metadata_store/db/store.py
index 513ef6075ca..78089c42a55 100644
--- a/src/tribler/core/components/metadata_store/db/store.py
+++ b/src/tribler/core/components/metadata_store/db/store.py
@@ -10,6 +10,7 @@
from pony import orm
from pony.orm import db_session, desc, left_join, raw_sql, select
+from pony.orm.dbproviders.sqlite import keep_exception
from tribler.core import notifications
from tribler.core.components.metadata_store.db.orm_bindings import (
@@ -50,9 +51,11 @@
from tribler.core.utilities.notifier import Notifier
from tribler.core.utilities.path_util import Path
from tribler.core.utilities.pony_utils import get_max, get_or_create
+from tribler.core.utilities.search_utils import torrent_rank
from tribler.core.utilities.unicode import hexlify
from tribler.core.utilities.utilities import MEMORY_DB
+
BETA_DB_VERSIONS = [0, 1, 2, 3, 4, 5]
CURRENT_DB_VERSION = 14
@@ -167,7 +170,7 @@ def __init__(
# with the static analysis.
# pylint: disable=unused-variable
@self._db.on_connect(provider='sqlite')
- def sqlite_disable_sync(_, connection):
+ def on_connect(_, connection):
cursor = connection.cursor()
cursor.execute("PRAGMA journal_mode = WAL")
cursor.execute("PRAGMA synchronous = NORMAL")
@@ -180,6 +183,10 @@ def sqlite_disable_sync(_, connection):
# losing power during a write will corrupt the database.
cursor.execute("PRAGMA journal_mode = 0")
cursor.execute("PRAGMA synchronous = 0")
+
+ sqlite_rank = keep_exception(torrent_rank)
+ connection.create_function('search_rank', 5, sqlite_rank)
+
# pylint: enable=unused-variable
self.MiscData = misc.define_binding(self._db)
@@ -591,7 +598,7 @@ def torrent_exists_in_personal_channel(self, infohash):
)
# pylint: disable=unused-argument
- def search_keyword(self, query, lim=100):
+ def search_keyword(self, query):
# Requires FTS5 table "FtsIndex" to be generated and populated.
# FTS table is maintained automatically by SQL triggers.
# BM25 ranking is embedded in FTS5.
@@ -600,10 +607,11 @@ def search_keyword(self, query, lim=100):
if not query or query == "*":
return []
- fts_ids = raw_sql(
- """SELECT rowid FROM ChannelNode WHERE rowid IN (SELECT rowid FROM FtsIndex WHERE FtsIndex MATCH $query
- ORDER BY bm25(FtsIndex) LIMIT $lim) GROUP BY coalesce(infohash, rowid)"""
- )
+ fts_ids = raw_sql("""
+ SELECT rowid FROM ChannelNode
+ WHERE rowid IN (SELECT rowid FROM FtsIndex WHERE FtsIndex MATCH $query)
+ GROUP BY coalesce(infohash, rowid)
+ """)
return left_join(g for g in self.MetadataNode if g.rowid in fts_ids) # pylint: disable=E1135
@db_session
@@ -639,7 +647,7 @@ def get_entries_query(
if cls is None:
cls = self.ChannelNode
- pony_query = self.search_keyword(txt_filter, lim=1000) if txt_filter else left_join(g for g in cls)
+ pony_query = self.search_keyword(txt_filter) if txt_filter else left_join(g for g in cls)
infohash_set = infohash_set or ({infohash} if infohash else None)
if popular:
if metadata_type != REGULAR_TORRENT:
@@ -728,10 +736,49 @@ def get_entries_query(
if sort_by is None:
if txt_filter:
+ # pylint: disable=W0105
+ """
+ The following call of `sort_by` produces an ORDER BY expression that looks like this:
+
+ ORDER BY
+ case when "g"."metadata_type" = $CHANNEL_TORRENT then 1
+ when "g"."metadata_type" = $COLLECTION_NODE then 2
+ else 3 end,
+
+ search_rank(
+ $QUERY_STRING,
+ g.title,
+ torrentstate.seeders,
+ torrentstate.leechers,
+ $CURRENT_TIME - strftime('%s', g.torrent_date)
+ ) DESC,
+
+ "torrentstate"."last_check" DESC,
+
+ So, the channel torrents and channel folders are always on top if they are not filtered out.
+ Then regular torrents are selected in order of their relevance according to a search_rank() result.
+ If two torrents have the same search rank, they are ordered by the last time they were checked.
+
+ The search_rank() function is called directly from the SQLite query, but is implemented in Python,
+ it is actually the torrent_rank() function from core/utilities/search_utils.py, wrapped with
+ keep_exception() to return possible exception from SQLite to Python.
+
+ The search_rank() function receives the following arguments:
+ - the current query string (like "Big Buck Bunny");
+ - the title of the current torrent;
+ - the number of seeders;
+ - the number of leechers;
+ - the number of seconds since the torrent's creation time.
+ """
+
pony_query = pony_query.sort_by(
f"""
(1 if g.metadata_type == {CHANNEL_TORRENT} else 2 if g.metadata_type == {COLLECTION_NODE} else 3),
- desc(g.health.seeders), desc(g.health.leechers)
+ raw_sql('''search_rank(
+ $txt_filter, g.title, torrentstate.seeders, torrentstate.leechers,
+ $int(time()) - strftime('%s', g.torrent_date)
+ ) DESC'''),
+ desc(g.health.last_check) # just to trigger the TorrentState table inclusion into the left join
"""
)
elif popular:
diff --git a/src/tribler/core/components/metadata_store/remote_query_community/remote_query_community.py b/src/tribler/core/components/metadata_store/remote_query_community/remote_query_community.py
index cf443e8d47a..aa6fd817fef 100644
--- a/src/tribler/core/components/metadata_store/remote_query_community/remote_query_community.py
+++ b/src/tribler/core/components/metadata_store/remote_query_community/remote_query_community.py
@@ -1,8 +1,10 @@
import json
import struct
+import time
from asyncio import Future
from binascii import unhexlify
-from typing import List, Optional, Set
+from itertools import count
+from typing import Any, Dict, List, Optional, Set
from ipv8.lazy_community import lazy_wrapper
from ipv8.messaging.lazy_payload import VariablePayload, vp_compile
@@ -26,7 +28,7 @@
BINARY_FIELDS = ("infohash", "channel_pk")
-def sanitize_query(query_dict, cap=100):
+def sanitize_query(query_dict: Dict[str, Any], cap=100) -> Dict[str, Any]:
sanitized_dict = dict(query_dict)
# We impose a cap on max numbers of returned entries to prevent DDOS-like attacks
@@ -151,6 +153,8 @@ def __init__(self, my_peer, endpoint, network,
self.add_message_handler(SelectResponsePayload, self.on_remote_select_response)
self.eva = EVAProtocol(self, self.on_receive, self.on_send_complete, self.on_error)
+ self.remote_queries_in_progress = 0
+ self.next_remote_query_num = count().__next__ # generator of sequential numbers, for logging & debug purposes
async def on_receive(self, result: TransferResult):
self.logger.debug(f"EVA data received: peer {hexlify(result.peer.mid)}, info {result.info}")
@@ -183,16 +187,32 @@ def send_remote_select(self, peer, processing_callback=None, force_eva_response=
self.ez_send(peer, RemoteSelectPayload(*args))
return request
- async def process_rpc_query(self, json_bytes: bytes):
+ def should_limit_rate_for_query(self, sanitized_parameters: Dict[str, Any]) -> bool:
+ return 'txt_filter' in sanitized_parameters
+
+ async def process_rpc_query_rate_limited(self, sanitized_parameters: Dict[str, Any]) -> List:
+ query_num = self.next_remote_query_num()
+ if self.remote_queries_in_progress and self.should_limit_rate_for_query(sanitized_parameters):
+ self.logger.warning(f'Ignore remote query {query_num} as another one is already processing. '
+ f'The ignored query: {sanitized_parameters}')
+ return []
+
+ self.logger.info(f'Process remote query {query_num}: {sanitized_parameters}')
+ self.remote_queries_in_progress += 1
+ t = time.time()
+ try:
+ return await self.process_rpc_query(sanitized_parameters)
+ finally:
+ self.remote_queries_in_progress -= 1
+ self.logger.info(f'Remote query {query_num} processed in {time.time()-t} seconds: {sanitized_parameters}')
+
+ async def process_rpc_query(self, sanitized_parameters: Dict[str, Any]) -> List:
"""
Retrieve the result of a database query from a third party, encoded as raw JSON bytes (through `dumps`).
:raises TypeError: if the JSON contains invalid keys.
:raises ValueError: if no JSON could be decoded.
:raises pony.orm.dbapiprovider.OperationalError: if an illegal query was performed.
"""
- parameters = json.loads(json_bytes)
- sanitized_parameters = sanitize_query(parameters, self.rqc_settings.max_response_size)
-
# tags should be extracted because `get_entries_threaded` doesn't expect them as a parameter
tags = sanitized_parameters.pop('tags', None)
@@ -237,9 +257,14 @@ async def on_remote_select_eva(self, peer, request_payload):
async def on_remote_select(self, peer, request_payload):
await self._on_remote_select_basic(peer, request_payload)
+ def parse_parameters(self, json_bytes: bytes) -> Dict[str, Any]:
+ parameters = json.loads(json_bytes)
+ return sanitize_query(parameters, self.rqc_settings.max_response_size)
+
async def _on_remote_select_basic(self, peer, request_payload, force_eva_response=False):
try:
- db_results = await self.process_rpc_query(request_payload.json)
+ sanitized_parameters = self.parse_parameters(request_payload.json)
+ db_results = await self.process_rpc_query_rate_limited(sanitized_parameters)
# When we send our response to a host, we open a window of opportunity
# for it to push back updates
diff --git a/src/tribler/core/components/metadata_store/remote_query_community/settings.py b/src/tribler/core/components/metadata_store/remote_query_community/settings.py
index ea380da426e..9fcd21bdbf5 100644
--- a/src/tribler/core/components/metadata_store/remote_query_community/settings.py
+++ b/src/tribler/core/components/metadata_store/remote_query_community/settings.py
@@ -5,7 +5,16 @@ class RemoteQueryCommunitySettings(TriblerConfigSection):
minimal_blob_size: int = 200
maximum_payload_size: int = 1300
max_entries: int = maximum_payload_size // minimal_blob_size
- max_query_peers: int = 5
+
+ # The next option is currently used by GigaChannelCommunity only. We probably should move it to the
+ # GigaChannelCommunity settings or to a dedicated search-related section. The value of the option is corresponding
+ # with the TARGET_PEERS_NUMBER of src/tribler/core/components/gigachannel/community/sync_strategy.py, that is, to
+ # the number of peers that GigaChannelCommunity will have after a long run (initially, the number of peers in
+ # GigaChannelCommunity can rise up to several hundred due to DiscoveryBooster). The number of parallel remote
+ # requests should be not too small (to have various results from remote peers) and not too big (to avoid flooding
+ # the network with exceedingly high number of queries). TARGET_PEERS_NUMBER looks like a good middle ground here.
+ max_query_peers: int = 20
+
max_response_size: int = 100 # Max number of entries returned by SQL query
max_channel_query_back: int = 4 # Max number of entries to query back on receiving an unknown channel
push_updates_back_enabled = True
diff --git a/src/tribler/core/components/metadata_store/remote_query_community/tests/test_remote_query_community.py b/src/tribler/core/components/metadata_store/remote_query_community/tests/test_remote_query_community.py
index 2a82b8c666d..fbd6d25d5bb 100644
--- a/src/tribler/core/components/metadata_store/remote_query_community/tests/test_remote_query_community.py
+++ b/src/tribler/core/components/metadata_store/remote_query_community/tests/test_remote_query_community.py
@@ -1,11 +1,10 @@
import random
import string
+import time
from asyncio import sleep
from binascii import unhexlify
-from json import dumps
from operator import attrgetter
from os import urandom
-from time import time
from unittest.mock import Mock, patch
from ipv8.keyvault.crypto import default_eccrypto
@@ -112,7 +111,7 @@ async def test_remote_select(self):
channel=channel,
seeders=2 * i,
leechers=i,
- last_check=int(time()) + i,
+ last_check=int(time.time()) + i,
)
kwargs_dict = {"txt_filter": "ubuntu*", "metadata_type": [REGULAR_TORRENT]}
@@ -345,7 +344,7 @@ async def test_process_rpc_query_match_many(self):
channel = self.channel_metadata(0).create_channel("a channel", "")
add_random_torrent(self.torrent_metadata(0), name="a torrent", channel=channel)
- results = await self.overlay(0).process_rpc_query(dumps({}))
+ results = await self.overlay(0).process_rpc_query({})
self.assertEqual(2, len(results))
channel_md, torrent_md = results if isinstance(results[0], self.channel_metadata(0)) else results[::-1]
@@ -359,7 +358,7 @@ async def test_process_rpc_query_match_one(self):
with db_session:
self.channel_metadata(0).create_channel("a channel", "")
- results = await self.overlay(0).process_rpc_query(dumps({}))
+ results = await self.overlay(0).process_rpc_query({})
self.assertEqual(1, len(results))
(channel_md,) = results
@@ -369,22 +368,22 @@ async def test_process_rpc_query_match_none(self):
"""
Check if a correct query with no match in our database returns no result.
"""
- results = await self.overlay(0).process_rpc_query(dumps({}))
+ results = await self.overlay(0).process_rpc_query({})
self.assertEqual(0, len(results))
- async def test_process_rpc_query_match_empty_json(self):
+ def test_parse_parameters_match_empty_json(self):
"""
Check if processing an empty request causes a ValueError (JSONDecodeError) to be raised.
"""
with self.assertRaises(ValueError):
- await self.overlay(0).process_rpc_query(b'')
+ self.overlay(0).parse_parameters(b'')
- async def test_process_rpc_query_match_illegal_json(self):
+ def test_parse_parameters_match_illegal_json(self):
"""
Check if processing a request with illegal JSON causes a UnicodeDecodeError to be raised.
"""
with self.assertRaises(UnicodeDecodeError):
- await self.overlay(0).process_rpc_query(b'{"akey":\x80}')
+ self.overlay(0).parse_parameters(b'{"akey":\x80}')
async def test_process_rpc_query_match_invalid_json(self):
"""
@@ -394,21 +393,24 @@ async def test_process_rpc_query_match_invalid_json(self):
self.channel_metadata(0).create_channel("a channel", "")
query = b'{"id_":' + b'\x31' * 200 + b'}'
with self.assertRaises(ValueError):
- await self.overlay(0).process_rpc_query(query)
+ parameters = self.overlay(0).parse_parameters(query)
+ await self.overlay(0).process_rpc_query(parameters)
async def test_process_rpc_query_match_invalid_key(self):
"""
Check if processing a request with invalid flags causes a UnicodeDecodeError to be raised.
"""
with self.assertRaises(TypeError):
- await self.overlay(0).process_rpc_query(b'{"bla":":("}')
+ parameters = self.overlay(0).parse_parameters(b'{"bla":":("}')
+ await self.overlay(0).process_rpc_query(parameters)
async def test_process_rpc_query_no_column(self):
"""
Check if processing a request with no database columns causes an OperationalError.
"""
with self.assertRaises(OperationalError):
- await self.overlay(0).process_rpc_query(b'{"txt_filter":{"key":"bla"}}')
+ parameters = self.overlay(0).parse_parameters(b'{"txt_filter":{"key":"bla"}}')
+ await self.overlay(0).process_rpc_query(parameters)
async def test_remote_query_big_response(self):
@@ -574,3 +576,45 @@ async def test_remote_select_force_eva(self):
await self.deliver_messages(timeout=0.5)
self.nodes[1].overlay.eva.send_binary.assert_called_once()
+
+ async def test_multiple_parallel_request(self):
+ peer_a = self.nodes[0].my_peer
+ a = self.nodes[0].overlay
+ b = self.nodes[1].overlay
+
+ # Peer A has two torrents "foo" and "bar"
+ with db_session:
+ add_random_torrent(a.mds.TorrentMetadata, name="foo")
+ add_random_torrent(a.mds.TorrentMetadata, name="bar")
+
+ # Peer B sends two parallel full-text search queries, only one of them should be processed
+ callback1 = Mock()
+ kwargs1 = {"txt_filter": "foo", "metadata_type": [REGULAR_TORRENT]}
+ b.send_remote_select(peer_a, **kwargs1, processing_callback=callback1)
+
+ callback2 = Mock()
+ kwargs2 = {"txt_filter": "bar", "metadata_type": [REGULAR_TORRENT]}
+ b.send_remote_select(peer_a, **kwargs2, processing_callback=callback2)
+
+ original_get_entries = MetadataStore.get_entries
+ # Add a delay to ensure that the first query is still being processed when the second one arrives
+ # (the mds.get_entries() method is a synchronous one and is called from a worker thread)
+
+ def slow_get_entries(self, *args, **kwargs):
+ time.sleep(0.1)
+ return original_get_entries(self, *args, **kwargs)
+
+ with patch.object(a, 'logger') as logger, patch.object(MetadataStore, 'get_entries', slow_get_entries):
+ await self.deliver_messages(timeout=0.5)
+
+ torrents1 = list(b.mds.get_entries(**kwargs1))
+ torrents2 = list(b.mds.get_entries(**kwargs2))
+
+ # Both remote queries should return results to the peer B...
+ assert callback1.called and callback2.called
+ # ...but one of them should return an empty list, as the database query was not actually executed
+ assert bool(torrents1) != bool(torrents2)
+
+ # Check that on peer A there is exactly one warning about an ignored remote query
+ warnings = [call.args[0] for call in logger.warning.call_args_list]
+ assert len([msg for msg in warnings if msg.startswith('Ignore remote query')]) == 1
diff --git a/src/tribler/core/components/metadata_store/remote_query_community/tests/test_remote_search_by_tags.py b/src/tribler/core/components/metadata_store/remote_query_community/tests/test_remote_search_by_tags.py
index 95321f121cc..0ab2fdb645b 100644
--- a/src/tribler/core/components/metadata_store/remote_query_community/tests/test_remote_search_by_tags.py
+++ b/src/tribler/core/components/metadata_store/remote_query_community/tests/test_remote_search_by_tags.py
@@ -72,9 +72,7 @@ def test_search_for_tags_only_valid_tags(self, mocked_get_subjects_intersection:
async def test_process_rpc_query_no_tags(self, mocked_get_entries_threaded: AsyncMock):
# test that in case of missed tags, the remote search works like normal remote search
parameters = {'first': 0, 'infohash_set': None, 'last': 100}
- json = dumps(parameters).encode('utf-8')
-
- await self.rqc.process_rpc_query(json)
+ await self.rqc.process_rpc_query(parameters)
expected_parameters = {'infohash_set': None}
expected_parameters.update(parameters)
@@ -117,10 +115,8 @@ def _add(infohash):
# Then we try to query search for three tags: 'tag1', 'tag2', 'tag3'
parameters = {'first': 0, 'infohash_set': None, 'last': 100, 'tags': ['tag1']}
- json = dumps(parameters).encode('utf-8')
-
with db_session:
- query_results = [r.to_dict() for r in await self.rqc.process_rpc_query(json)]
+ query_results = [r.to_dict() for r in await self.rqc.process_rpc_query(parameters)]
# Expected results: only one infohash (b'infohash1') should be returned.
result_infohash_list = [r['infohash'] for r in query_results]
diff --git a/src/tribler/core/components/metadata_store/restapi/search_endpoint.py b/src/tribler/core/components/metadata_store/restapi/search_endpoint.py
index e0710d53db0..a7a1963871f 100644
--- a/src/tribler/core/components/metadata_store/restapi/search_endpoint.py
+++ b/src/tribler/core/components/metadata_store/restapi/search_endpoint.py
@@ -1,3 +1,4 @@
+import time
from collections import defaultdict
from typing import Dict, List
@@ -119,13 +120,27 @@ async def search(self, request):
def search_db():
with db_session:
+ t1 = time.time()
pony_query = mds.get_entries(**sanitized)
+ t2 = time.time()
search_results = [r.to_simple_dict() for r in pony_query]
+ t3 = time.time()
if include_total:
total = mds.get_total_count(**sanitized)
+ t4 = time.time()
max_rowid = mds.get_max_rowid()
+ t5 = time.time()
+ self._logger.info(f'Search performance for {sanitized}:\n'
+ f'Main query executed in {t2 - t1:.6} seconds;\n'
+ f'Result constructed in {t3 - t2:.6} seconds;\n'
+ f'Total rows count calculated in {t4 - t3:.6} seconds;\n'
+ f'Max rowid determined in {t5 - t4:.6} seconds.')
else:
total = max_rowid = None
+ self._logger.info(f'Search performance for {sanitized}:\n'
+ f'Main query executed in {t2 - t1:.6} seconds;\n'
+ f'Result constructed in {t3 - t2:.6} seconds.')
+
return search_results, total, max_rowid
try:
diff --git a/src/tribler/core/tests/test_search_utils.py b/src/tribler/core/tests/test_search_utils.py
index 0506d136f57..b1a1fe94a18 100644
--- a/src/tribler/core/tests/test_search_utils.py
+++ b/src/tribler/core/tests/test_search_utils.py
@@ -1,4 +1,12 @@
-from tribler.core.utilities.search_utils import filter_keywords, split_into_keywords
+from collections import deque
+
+import pytest
+
+from tribler.core.utilities.search_utils import filter_keywords, find_word_and_rotate_title, freshness_rank, item_rank,\
+ seeders_rank, split_into_keywords, torrent_rank, title_rank
+
+
+DAY = 60 * 60 * 24
def test_split_into_keywords():
@@ -15,3 +23,181 @@ def test_filter_keywords():
result = filter_keywords(["to", "be", "or", "not", "to", "be"])
assert isinstance(result, list)
assert len(result) == 4
+
+
+def test_title_rank_range():
+ assert title_rank('Big Buck Bunny', 'Big Buck Bunny') == 1
+
+ long_query = ' '.join(['foo'] * 1000)
+ long_title = ' '.join(['bar'] * 1000)
+ assert title_rank(long_query, long_title) == pytest.approx(0.03554968)
+
+
+def test_freshness_rank_range():
+ assert freshness_rank(-1) == 0
+ assert freshness_rank(0) == 0
+ assert freshness_rank(0.001) == pytest.approx(1.0)
+ assert freshness_rank(1000000000) == pytest.approx(0.0025852989)
+
+
+def test_seeders_rank_range():
+ assert seeders_rank(0) == 0
+ assert seeders_rank(1000000) == pytest.approx(0.9999)
+
+
+def test_torrent_rank_range():
+ assert torrent_rank('Big Buck Bunny', 'Big Buck Bunny', seeders=1000000, freshness=0.01) == pytest.approx(0.99999)
+
+ long_query = ' '.join(['foo'] * 1000)
+ long_title = ' '.join(['bar'] * 1000)
+ assert torrent_rank(long_query, long_title, freshness=1000000 * 365 * DAY) == pytest.approx(+0.02879524)
+
+
+def test_torrent_rank():
+ query = 'Big Buck Bunny'
+ # The exact match ranked as pretty high
+ title_match = torrent_rank(query, 'Big Buck Bunny') # 0.81
+ assert title_match > 0.8
+
+ # Seeders are good for the rank
+ # The more seeders the better
+ # The fewer days have passed since the creation of the torrent, the higher its rank
+ assert torrent_rank(query, 'Big Buck Bunny', seeders=1000, freshness=1 * DAY) > \
+ torrent_rank(query, 'Big Buck Bunny', seeders=1000, freshness=100 * DAY) > \
+ torrent_rank(query, 'Big Buck Bunny', seeders=100, freshness=100 * DAY) > \
+ title_match
+
+ # If a title contains non-matching words missed in the query string it is not as good as the exact match
+ # The closer to the start of the string non-matching words are placed in the title, the worse is rank
+ assert title_match > \
+ torrent_rank(query, 'Big Buck Bunny II') > \
+ torrent_rank(query, 'Big Buck Brown Bunny') > \
+ torrent_rank(query, 'Big Bad Buck Bunny') > \
+ torrent_rank(query, 'Boring Big Buck Bunny')
+
+ # The more non-matching words are in the title, the worse is rank
+ assert title_match > \
+ torrent_rank(query, 'Big Buck A Bunny') > \
+ torrent_rank(query, 'Big Buck A B Bunny') > \
+ torrent_rank(query, 'Big Buck A B C Bunny')
+
+ # Non-matching words close to the beginning of the title give a bigger penalty
+ assert title_match > \
+ torrent_rank(query, 'Big A Buck Bunny') > \
+ torrent_rank(query, 'Big A B Buck Bunny') > \
+ torrent_rank(query, 'Big A B C Buck Bunny')
+
+ assert title_match > \
+ torrent_rank(query, 'A Big Buck Bunny') > \
+ torrent_rank(query, 'A B Big Buck Bunny') > \
+ torrent_rank(query, 'A B C Big Buck Bunny')
+
+ assert torrent_rank(query, 'Big A Buck Bunny') > \
+ torrent_rank(query, 'A Big Buck Bunny')
+
+ assert torrent_rank(query, 'Big A B Buck Bunny') > \
+ torrent_rank(query, 'A B Big Buck Bunny')
+
+ assert torrent_rank(query, 'Big A B C Buck Bunny') > \
+ torrent_rank(query, 'A B C Big Buck Bunny')
+
+ # Wrong order of words in the title imposes a penalty to the rank
+ assert title_match > \
+ torrent_rank(query, 'Big Bunny Buck')
+
+ # Missed query words imposes a really big penalty
+ assert torrent_rank(query, 'Big Buck') < 0.5
+
+ # The close the missed words to the beginning of the query, the worse
+ assert torrent_rank(query, 'Big Buck') > \
+ torrent_rank(query, 'Big Bunny') > \
+ torrent_rank(query, 'Buck Bunny')
+
+ # The more seeders is still better
+ assert torrent_rank(query, 'Buck Bunny', seeders=1000, freshness=5 * DAY) > \
+ torrent_rank(query, 'Buck Bunny', seeders=100, freshness=5 * DAY) > \
+ torrent_rank(query, 'Buck Bunny', seeders=10, freshness=5 * DAY) > \
+ torrent_rank(query, 'Buck Bunny')
+
+ # The more days from the check the less relevant the number of seeders is
+ assert torrent_rank(query, 'Buck Bunny', freshness=5 * DAY) > \
+ torrent_rank(query, 'Buck Bunny', freshness=10 * DAY) > \
+ torrent_rank(query, 'Buck Bunny', freshness=20 * DAY)
+
+ # The exact match has a good rank
+ assert torrent_rank('Sintel', 'Sintel') > 0.8
+
+ # Non-matching words at the end of the title give slightly worse results
+ # Non-matching words at the beginning of the title are much worse
+ # Too many non-matching words give a bigger penalty
+ assert torrent_rank('Sintel', 'Sintel') > \
+ torrent_rank('Sintel', 'Sintel Part II') > \
+ torrent_rank('Sintel', 'Part of Sintel') > \
+ torrent_rank('Sintel', 'the.script.from.the.movie.Sintel.pdf')
+
+ # Some more examples
+ assert torrent_rank("Internet's Own Boy", "Internet's Own Boy") > \
+ torrent_rank("Internet's Own Boy", "Internet's very Own Boy") > \
+ torrent_rank("Internet's Own Boy", "Internet's very special Boy person")
+
+
+def test_title_rank():
+ # tests for better covarage of corner cases
+ assert title_rank("", "title") == pytest.approx(1.0)
+ assert title_rank("query", "") == pytest.approx(0.0)
+
+
+def test_item_rank():
+ item = dict(name="abc", num_seeders=10, num_leechers=20)
+ assert item_rank("abc", item) == pytest.approx(0.81978445)
+
+
+def test_find_word():
+ # To use the find_word_and_rotate_title function, you can call it one time for each word from the query and see:
+ # - how many query words are missed in the title;
+ # - how many excess or out-of-place title words are found before each query word;
+ # - and how many title words are not mentioned in the query.
+
+ # Example 1, query "A B C", title "A B C"
+ title = deque(["A", "B", "C"])
+ assert find_word_and_rotate_title("A", title) == (True, 0) and title == deque(["B", "C"])
+ assert find_word_and_rotate_title("B", title) == (True, 0) and title == deque(["C"])
+ assert find_word_and_rotate_title("C", title) == (True, 0) and title == deque([])
+ # Conclusion: exact match.
+
+ # Example 2, query "A B C", title "A B C D"
+ title = deque(["A", "B", "C", "D"])
+ assert find_word_and_rotate_title("A", title) == (True, 0) and title == deque(["B", "C", "D"])
+ assert find_word_and_rotate_title("B", title) == (True, 0) and title == deque(["C", "D"])
+ assert find_word_and_rotate_title("C", title) == (True, 0) and title == deque(["D"])
+ # Conclusion: minor penalty for one excess word in the title that is not in the query.
+
+ # Example 3, query "A B C", title "X Y A B C"
+ title = deque(["X", "Y", "A", "B", "C"])
+ assert find_word_and_rotate_title("A", title) == (True, 2) and title == deque(["B", "C", "X", "Y"])
+ assert find_word_and_rotate_title("B", title) == (True, 0) and title == deque(["C", "X", "Y"])
+ assert find_word_and_rotate_title("C", title) == (True, 0) and title == deque(["X", "Y"])
+ # Conclusion: major penalty for skipping two words at the beginning of the title plus a minor penalty for two
+ # excess words in the title that are not in the query.
+
+ # Example 4, query "A B C", title "A B X Y C"
+ title = deque(["A", "B", "X", "Y", "C"])
+ assert find_word_and_rotate_title("A", title) == (True, 0) and title == deque(["B", "X", "Y", "C"])
+ assert find_word_and_rotate_title("B", title) == (True, 0) and title == deque(["X", "Y", "C"])
+ assert find_word_and_rotate_title("C", title) == (True, 2) and title == deque(["X", "Y"])
+ # Conclusion: average penalty for skipping two words in the middle of the title plus a minor penalty for two
+ # excess words in the title that are not in the query.
+
+ # Example 5, query "A B C", title "A C B"
+ title = deque(["A", "C", "B"])
+ assert find_word_and_rotate_title("A", title) == (True, 0) and title == deque(["C", "B"])
+ assert find_word_and_rotate_title("B", title) == (True, 1) and title == deque(["C"])
+ assert find_word_and_rotate_title("C", title) == (True, 0) and title == deque([])
+ # Conclusion: average penalty for skipping one word in the middle of the title.
+
+ # Example 6, query "A B C", title "A C X"
+ title = deque(["A", "C", "X"])
+ assert find_word_and_rotate_title("A", title) == (True, 0) and title == deque(["C", "X"])
+ assert find_word_and_rotate_title("B", title) == (False, 0) and title == deque(["C", "X"])
+ assert find_word_and_rotate_title("C", title) == (True, 0) and title == deque(["X"])
+ # Conclusion: huge penalty for missing one query word plus a minor penalty for one excess title word.
diff --git a/src/tribler/core/utilities/search_utils.py b/src/tribler/core/utilities/search_utils.py
index 6c4180a41b9..ca09b0398c3 100644
--- a/src/tribler/core/utilities/search_utils.py
+++ b/src/tribler/core/utilities/search_utils.py
@@ -1,13 +1,18 @@
"""
Search utilities.
-Author(s): Jelle Roozenburg, Arno Bakker
+Author(s): Jelle Roozenburg, Arno Bakker, Alexander Kozlovsky
"""
import re
+import time
+from collections import deque
+from typing import Deque, List, Optional, Tuple
RE_KEYWORD_SPLIT = re.compile(r"[\W_]", re.UNICODE)
DIALOG_STOPWORDS = {'an', 'and', 'by', 'for', 'from', 'of', 'the', 'to', 'with'}
+SECONDS_IN_DAY = 60 * 60 * 24
+
def split_into_keywords(string, to_filter_stopwords=False):
"""
@@ -27,3 +32,212 @@ def split_into_keywords(string, to_filter_stopwords=False):
def filter_keywords(keywords):
return [kw for kw in keywords if len(kw) > 0 and kw not in DIALOG_STOPWORDS]
+
+
+def item_rank(query: str, item: dict) -> float:
+ """
+ Calculates the torrent rank for item received from remote query. Returns the torrent rank value in range [0, 1].
+
+ :param query: a user-defined query string
+ :param item: a dict with torrent info.
+ Should include key `name`, can include `num_seeders`, `num_leechers`, `updated`
+ :return: the torrent rank value in range [0, 1]
+ """
+
+ title = item['name']
+ seeders = item.get('num_seeders', 0)
+ leechers = item.get('num_leechers', 0)
+ freshness = time.time() - item.get('updated', 0)
+ return torrent_rank(query, title, seeders, leechers, freshness)
+
+
+def torrent_rank(query: str, title: str, seeders: int = 0, leechers: int = 0, freshness: Optional[float] = 0) -> float:
+ """
+ Calculates search rank for a torrent.
+
+ :param query: a user-defined query string
+ :param title: a torrent name
+ :param seeders: the number of seeders
+ :param leechers: the number of leechers
+ :param freshness: the number of seconds since the torrent creation. Zero or negative value means the torrent
+ creation date is unknown. It is more convenient to use comparing to a timestamp, as it avoids
+ using the `time()` function call and simplifies testing.
+ :return: the torrent rank value in range [0, 1]
+
+ Takes into account:
+ - similarity of the title to the query string;
+ - the reported number of seeders;
+ - how long ago the torrent file was created.
+ """
+ tr = title_rank(query or '', title or '')
+ sr = (seeders_rank(seeders or 0, leechers or 0) + 9) / 10 # range [0.9, 1]
+ fr = (freshness_rank(freshness) + 9) / 10 # range [0.9, 1]
+ result = tr * sr * fr
+
+ # uncomment the next line to debug the function inside an SQL query:
+ # print(f'*** {result} : {seeders}/{freshness} ({freshness / SECONDS_IN_DAY} days)/{title} | {query}')
+
+ return result
+
+
+LEECHERS_COEFF = 0.1 # How much leechers are less important compared to seeders (ten times less important)
+SEEDERS_HALF_RANK = 100 # The number of seeders at which the seeders rank is 0.5
+
+
+def seeders_rank(seeders: int, leechers: int = 0) -> float:
+ """
+ Calculates rank based on the number of torrent's seeders and leechers
+
+ :param seeders: the number of seeders for the torrent. It is a positive value, usually in the range [0, 1000]
+ :param leechers: the number of leechers for the torrent. It is a positive value, usually in the range [0, 1000]
+ :return: the torrent rank based on seeders and leechers, normalized to the range [0, 1]
+ """
+
+ # The leechers are treated as less capable seeders
+ sl = seeders + leechers * LEECHERS_COEFF # Seeders and leechers combined
+
+ # The function result has desired properties:
+ # * zero rank for zero seeders;
+ # * 0.5 rating for SEEDERS_HALF_RANK seeders;
+ # * 1.0 rating for an infinite number of seeders;
+ # * soft curve.
+ # It is possible to use different curves with the similar shape, for example:
+ # * 2 * arctan(x / SEEDERS_HALF_RANK) / PI,
+ # * 1 - exp(x * ln(0.5) / SEEDERS_HALF_RANK)
+ # but it does not actually matter in practice
+ return sl / (100 + sl)
+
+
+def freshness_rank(freshness: Optional[float] = 0) -> float:
+ """
+ Calculates a rank value based on the torrent freshness. The result is normalized to the range [0, 1]
+
+ :param freshness: number of seconds since the torrent creation.
+ Zero or negative values means the actual torrent creation date is unknown.
+ :return: the torrent rank based on freshness. The result is normalized to the range [0, 1]
+
+ Example results:
+ 0 seconds since torrent creation -> the actual torrent creation date is unknown, freshness rank 0
+ 1 second since torrent creation -> freshness rank 0.999
+ 1 day since torrent creation -> freshness rank 0.967
+ 30 days since torrent creation -> freshness rank 0.5
+ 1 year since torrent creation -> freshness rank 0.0759
+ """
+ freshness = max(0, freshness or 0)
+ if not freshness:
+ return 0 # for freshness <= 0 the rank value is 0 because of an incorrect freshness value
+
+ # The function declines from 1.0 to 0.0 on range (0..Infinity], with the following properties:
+ # * for just created torrents the rank value is close to 1.0
+ # * for 30-days old torrents the rank value is 0.5
+ # * for very old torrens the rank value is going to zero
+ # It was possible to use another formulas with the same properties (for example, exponent-based),
+ # the exact curve shape is not really important.
+ days = (freshness or 0) / SECONDS_IN_DAY
+ return 1 / (1 + days / 30)
+
+
+word_re = re.compile(r'\w+', re.UNICODE)
+
+
+def title_rank(query: str, title: str) -> float:
+ """
+ Calculate the similarity of the title string to a query string as a float value in range [0, 1]
+
+ :param query: a user-defined query string
+ :param title: a torrent name
+ :return: the similarity of the title string to a query string as a float value in range [0, 1]
+ """
+ query = word_re.findall(query.lower())
+ title = word_re.findall(title.lower())
+ return calculate_rank(query, title)
+
+
+# These coefficients are found empirically. Their exact values are not very important for a relative ranking of results
+
+# The first word in a query is considered as a more important than the next one and so on,
+# 5 means the 5th word in a query is twice as less important as the first one
+POSITION_COEFF = 5
+
+# Some big value for a penalty if a query word is totally missed from a torrent title
+MISSED_WORD_PENALTY = 10
+
+# If a torrent title contains some words at the very end that are not mentioned in a query, we add a very slight
+# penalty for them. The *bigger* the REMAINDER_COEFF is, the *smaller* penalty we add for this excess words
+REMAINDER_COEFF = 10
+
+# The exact value of this coefficient is not important. It is used to convert total_error value to a rank value.
+# The total_error value is some positive number. We want to have the resulted rank in range [0, 1].
+RANK_NORMALIZATION_COEFF = 10
+
+
+def calculate_rank(query: List[str], title: List[str]) -> float:
+ """
+ Calculates the similarity of the title to the query as a float value in range [0, 1].
+
+ :param query: list of query words
+ :param title: list of title words
+ :return: the similarity of the title to the query as a float value in range [0, 1]
+ """
+ if not query:
+ return 1.0
+
+ if not title:
+ return 0.0
+
+ title = deque(title)
+ total_error = 0
+ for i, word in enumerate(query):
+ # The first word is more important than the second word, and so on
+ word_weight = POSITION_COEFF / (POSITION_COEFF + i)
+
+ # Read the description of the `find_word_and_rotate_title` function to understand what is going on.
+ # Basically, we are trying to find each query word in the title words, calculate the penalty if the query word
+ # is not found or if there are some title words before it, and then rotate the skipped title words to the end
+ # of the title. This way, the least penalty got a title that has query words in the proper order at the
+ # beginning of the title.
+ found, skipped = find_word_and_rotate_title(word, title)
+ if found:
+ # if the query word is found in the title, add penalty for skipped words in title before it
+ total_error += skipped * word_weight
+ else:
+ # if the query word is not found in the title, add a big penalty for it
+ total_error += MISSED_WORD_PENALTY * word_weight
+
+ # a small penalty for excess words in the title that was not mentioned in the search phrase
+ remainder_weight = 1 / (REMAINDER_COEFF + len(query))
+ remained_words_error = len(title) * remainder_weight
+ total_error += remained_words_error
+
+ # a search rank should be between 1 and 0
+ return RANK_NORMALIZATION_COEFF / (RANK_NORMALIZATION_COEFF + total_error)
+
+
+def find_word_and_rotate_title(word: str, title: Deque[str]) -> Tuple[bool, int]:
+ """
+ Finds the query word in the title. Returns whether it was found or not and the number of skipped words in the title.
+
+ :param word: a word from the user-defined query string
+ :param title: a deque of words in the title
+ :return: a two-elements tuple, whether the word was found in the title and the number of skipped words
+
+ This is a helper function to efficiently answer a question of how close a query string and a title string are,
+ taking into account the ordering of words in both strings.
+
+ For efficiency reasons, the function modifies the `title` deque in place by removing the first entrance
+ of the found word and rotating all leading non-matching words to the end of the deque. It allows to efficiently
+ perform multiple calls of the `find_word_and_rotate_title` function for subsequent words from the same query string.
+
+ An example: find_word_and_rotate_title('A', deque(['X', 'Y', 'A', 'B', 'C'])) returns `(True, 2)`, where True means
+ that the word 'A' was found in the `title` deque, and 2 is the number of skipped words ('X', 'Y'). Also, it modifies
+ the `title` deque, so it starts looking like deque(['B', 'C', 'X', 'Y']). The found word 'A' was removed, and
+ the leading non-matching words ('X', 'Y') were moved to the end of the deque.
+ """
+ try:
+ skipped = title.index(word) # find the query word placement in the title and the number of preceding words
+ except ValueError:
+ return False, 0
+
+ title.rotate(-skipped) # rotate skipped words to the end
+ title.popleft() # remove found word
+ return True, skipped
diff --git a/src/tribler/gui/qt_resources/search_results.ui b/src/tribler/gui/qt_resources/search_results.ui
index 7ef8639c575..c610b6c2063 100644
--- a/src/tribler/gui/qt_resources/search_results.ui
+++ b/src/tribler/gui/qt_resources/search_results.ui
@@ -110,7 +110,38 @@
-
+
+
+ -
+
+
+
+ 0
+ 0
+
+
+
+
+ 16777215
+ 10
+
+
+
+ 0
+
+
+ Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter
+
+
+
+
+
+
+ -
+
+
+
+
@@ -129,6 +160,12 @@
tribler.gui.widgets.timeoutprogressbar.h
1
+
+ SearchProgressBar
+ QProgressBar
+ tribler.gui.widgets.search_progress_bar.h
+ 1
+
diff --git a/src/tribler/gui/tests/test_gui.py b/src/tribler/gui/tests/test_gui.py
index ad057b8b787..622c622a64d 100644
--- a/src/tribler/gui/tests/test_gui.py
+++ b/src/tribler/gui/tests/test_gui.py
@@ -384,19 +384,17 @@ def test_search_suggestions(window):
def test_search(window):
window.top_search_bar.setText("a") # This is likely to trigger some search results
QTest.keyClick(window.top_search_bar, Qt.Key_Enter)
- wait_for_variable(window, "search_results_page.search_request")
+ QTest.qWait(100)
screenshot(window, name="search_loading_page")
- QTest.mouseClick(window.search_results_page.show_results_button, Qt.LeftButton)
tst_channels_widget(
window,
- window.search_results_page.results_page,
+ window.search_results_page.results_page_content,
"search_results",
sort_column=2,
test_filter=False,
test_subscribe=False,
)
-
@pytest.mark.guitest
def test_add_download_url(window):
go_to_and_wait_for_downloads(window)
diff --git a/src/tribler/gui/widgets/channelcontentswidget.py b/src/tribler/gui/widgets/channelcontentswidget.py
index 34bebc5824a..2d150fc7413 100644
--- a/src/tribler/gui/widgets/channelcontentswidget.py
+++ b/src/tribler/gui/widgets/channelcontentswidget.py
@@ -1,7 +1,7 @@
from base64 import b64encode
from PyQt5 import uic
-from PyQt5.QtCore import QDir, QTimer, Qt
+from PyQt5.QtCore import QDir, QTimer, Qt, pyqtSignal
from PyQt5.QtGui import QIcon
from PyQt5.QtWidgets import QAction, QFileDialog
@@ -39,6 +39,9 @@
# pylint: disable=too-many-instance-attributes, too-many-public-methods
class ChannelContentsWidget(AddBreadcrumbOnShowMixin, widget_form, widget_class):
+
+ model_query_completed = pyqtSignal()
+
def __init__(self, parent=None):
widget_class.__init__(self, parent=parent)
@@ -110,6 +113,10 @@ def personal_channel_model(self):
def model(self):
return self.channels_stack[-1] if self.channels_stack else None
+ @property
+ def root_model(self):
+ return self.channels_stack[0] if self.channels_stack else None
+
def on_channel_committed(self, response):
if not response or not response.get("success", False):
return
@@ -260,6 +267,9 @@ def on_model_info_changed(self, changed_entries):
self.model.channel_info["dirty"] = dirty
self.update_labels()
+ def on_model_query_completed(self):
+ self.model_query_completed.emit()
+
def initialize_root_model_from_channel_info(self, channel_info):
if channel_info.get("state") == CHANNEL_STATE.PERSONAL.value:
self.default_channel_model = self.personal_channel_model
@@ -293,10 +303,13 @@ def reset_view(self, text_filter=None, category_filter=None):
def disconnect_current_model(self):
disconnect(self.window().core_manager.events_manager.node_info_updated, self.model.update_node_info)
disconnect(self.model.info_changed, self.on_model_info_changed)
+ disconnect(self.model.query_complete, self.on_model_query_completed)
+
self.controller.unset_model() # Disconnect the selectionChanged signal
def connect_current_model(self):
connect(self.model.info_changed, self.on_model_info_changed)
+ connect(self.model.query_complete, self.on_model_query_completed)
connect(self.window().core_manager.events_manager.node_info_updated, self.model.update_node_info)
@property
@@ -318,6 +331,10 @@ def on_breadcrumb_clicked(self, tgt_level):
# Reset the view if the user clicks on the last part of the breadcrumb
self.reset_view()
+ def format_search_title(self):
+ text = self.model.format_title()
+ self.channel_name_label.setText(text)
+
def _set_filter_controls_from_model(self):
# This should typically be called under freeze_controls context manager
content_category = ContentCategories.get(self.model.category_filter)
diff --git a/src/tribler/gui/widgets/channelsmenulistwidget.py b/src/tribler/gui/widgets/channelsmenulistwidget.py
index 910fee64050..5c62b6818ce 100644
--- a/src/tribler/gui/widgets/channelsmenulistwidget.py
+++ b/src/tribler/gui/widgets/channelsmenulistwidget.py
@@ -133,7 +133,7 @@ def on_query_results(self, response):
self.items_set = frozenset(entry_to_tuple(channel_info) for channel_info in channels)
- def load_channels(self, request=None):
+ def load_channels(self):
TriblerNetworkRequest(self.base_url, self.on_query_results, url_params={"subscribed": True, "last": 1000})
def reload_if_necessary(self, changed_entries):
diff --git a/src/tribler/gui/widgets/search_progress_bar.py b/src/tribler/gui/widgets/search_progress_bar.py
new file mode 100644
index 00000000000..b6747cae675
--- /dev/null
+++ b/src/tribler/gui/widgets/search_progress_bar.py
@@ -0,0 +1,104 @@
+import time
+
+from PyQt5.QtCore import QTimer, pyqtSignal
+from PyQt5.QtWidgets import QProgressBar
+
+from tribler.gui.utilities import connect
+
+MAX_VALUE = 10000
+UPDATE_DELAY = 0.5
+REMOTE_DELAY = 0.25
+
+
+class SearchProgressBar(QProgressBar):
+ ready_to_update_results = pyqtSignal()
+
+ def __init__(self, parent=None, timeout=20):
+ super().__init__(parent)
+ self.timeout_interval = timeout
+ self.timer = QTimer()
+ self.timer.setSingleShot(False)
+ self.timer.setInterval(100) # update the progress bar tick
+
+ self.start_time = None
+ self.last_update_time = None
+ self.last_remote_result_time = None
+ self.has_new_remote_results = False
+ self.peers_total = 0
+ self.peers_responded = 0
+ self.new_remote_items_count = 0
+ self.total_remote_items_count = 0
+
+ self._value = 0
+ self.setValue(0)
+ self.setMaximum(MAX_VALUE)
+
+ connect(self.timer.timeout, self._update)
+
+ def start(self):
+ t = time.time()
+ self.start_time = t
+ self.peers_total = 0
+ self.peers_responded = 0
+ self.setToolTip('')
+ self.setValue(0)
+ self.timer.start()
+ self.show()
+
+ def _update(self):
+ if self.start_time is None:
+ return
+
+ t = time.time()
+
+ time_progress = (t - self.start_time) / self.timeout_interval
+ response_progress = (self.peers_responded / self.peers_total) if self.peers_total else 0
+ scale = 1 - ((1 - time_progress) * (1 - response_progress)) ** 2
+ value = int(scale * MAX_VALUE)
+ self.setValue(value)
+
+ timeout = time_progress >= 1
+ most_peers_responded = self.peers_total > 0 and self.peers_responded / self.peers_total >= 0.8
+ active_transfers_finished = self.last_remote_result_time and t - self.last_remote_result_time > REMOTE_DELAY
+
+ should_stop = timeout or (most_peers_responded and active_transfers_finished)
+
+ if self.last_update_time is not None and self.has_new_remote_results \
+ and (t - self.last_update_time > UPDATE_DELAY and active_transfers_finished or should_stop):
+ self.last_update_time = t
+ self.has_new_remote_results = False
+ self.new_remote_items_count = 0
+ self.ready_to_update_results.emit()
+
+ if should_stop:
+ self.stop()
+
+ def stop(self):
+ self.start_time = None
+ self.timer.stop()
+ self.hide()
+
+ def mousePressEvent(self, _):
+ self.stop()
+
+ def on_local_results(self):
+ self.last_update_time = time.time()
+ self.has_new_remote_results = False
+ self._update()
+
+ def set_remote_total(self, total: int):
+ self.peers_total = total
+ self.setToolTip(f'0/{total} remote responded')
+ self._update()
+
+ def on_remote_results(self, new_items_count, peers_responded):
+ self.last_remote_result_time = time.time()
+ tool_tip = f'{peers_responded}/{self.peers_total} peers responded'
+ if self.total_remote_items_count:
+ tool_tip += f', {self.total_remote_items_count} new results'
+ self.setToolTip(tool_tip)
+ self.has_new_remote_results = True
+ self.new_remote_items_count += new_items_count
+ self.total_remote_items_count += new_items_count
+ self.peers_responded = peers_responded
+ self._update()
diff --git a/src/tribler/gui/widgets/searchresultswidget.py b/src/tribler/gui/widgets/searchresultswidget.py
index 9ce466c53f4..5eb972d0696 100644
--- a/src/tribler/gui/widgets/searchresultswidget.py
+++ b/src/tribler/gui/widgets/searchresultswidget.py
@@ -60,44 +60,18 @@ def __init__(self, parent=None):
self.hide_xxx = None
self.search_request = None
+ connect(self.results_page_content.model_query_completed, self.on_local_query_completed)
+ connect(self.search_progress_bar.ready_to_update_results, self.on_ready_to_update_results)
+
def initialize(self, hide_xxx=False):
self.hide_xxx = hide_xxx
- self.results_page.initialize_content_page(hide_xxx=hide_xxx)
- self.results_page.channel_torrents_filter_input.setHidden(True)
- connect(self.timeout_progress_bar.timeout, self.show_results)
- connect(self.show_results_button.clicked, self.show_results)
+ self.results_page_content.initialize_content_page(hide_xxx=hide_xxx)
+ self.results_page_content.channel_torrents_filter_input.setHidden(True)
@property
def has_results(self):
return self.last_search_query is not None
- def show_results(self, *_):
- if self.search_request is None:
- # Fixes a race condition where the user clicks the show_results button before the search request
- # has been registered by the Core
- return
- self.timeout_progress_bar.stop()
- query = self.search_request.query
- self.results_page.initialize_root_model(
- SearchResultsModel(
- channel_info={
- "name": (tr("Search results for %s") % query.original_query)
- if len(query.original_query) < 50
- else f"{query.original_query[:50]}..."
- },
- endpoint_url="search",
- hide_xxx=self.results_page.hide_xxx,
- text_filter=to_fts_query(query.fts_text),
- tags=list(query.tags),
- type_filter=[REGULAR_TORRENT],
- )
- )
- self.setCurrentWidget(self.results_page)
-
- # After transitioning to the page with search results, we refresh the viewport since some rows might have been
- # rendered already with an incorrect row height.
- self.results_page.run_brain_dead_refresh()
-
def check_can_show(self, query):
if (
self.last_search_query == query
@@ -119,32 +93,54 @@ def search(self, query: Query) -> bool:
self.last_search_query = query.original_query
self.last_search_time = time.time()
- # Trigger remote search
+ model = SearchResultsModel(
+ endpoint_url="search",
+ hide_xxx=self.results_page_content.hide_xxx,
+ original_query=query.original_query,
+ text_filter=to_fts_query(query.fts_text),
+ tags=list(query.tags),
+ type_filter=[REGULAR_TORRENT],
+ exclude_deleted=True,
+ )
+ self.results_page_content.initialize_root_model(model)
+ self.setCurrentWidget(self.results_page)
+ self.results_page_content.format_search_title()
+ self.search_progress_bar.start()
+
+ # After transitioning to the page with search results, we refresh the viewport since some rows might have been
+ # rendered already with an incorrect row height.
+ self.results_page_content.run_brain_dead_refresh()
+
def register_request(response):
- self._logger.info(f'Request registered: {response}')
- self.search_request = SearchRequest(response["request_uuid"], query, set(response["peers"]))
- self.state_label.setText(format_search_loading_label(self.search_request))
- self.timeout_progress_bar.start()
- self.setCurrentWidget(self.loading_page)
+ peers = set(response["peers"])
+ self.search_request = SearchRequest(response["request_uuid"], query, peers)
+ self.search_progress_bar.set_remote_total(len(peers))
- params = {'txt_filter': fts_query, 'hide_xxx': self.hide_xxx, 'tags': list(query.tags)}
+ params = {'txt_filter': fts_query, 'hide_xxx': self.hide_xxx, 'tags': list(query.tags),
+ 'metadata_type': REGULAR_TORRENT, 'exclude_deleted': True}
TriblerNetworkRequest('remote_query', register_request, method="PUT", url_params=params)
+
return True
+ def on_local_query_completed(self):
+ self.search_progress_bar.on_local_results()
+
def reset(self):
if self.currentWidget() == self.results_page:
- self.results_page.go_back_to_level(0)
+ self.results_page_content.go_back_to_level(0)
def update_loading_page(self, remote_results):
- if (
- not self.search_request
- or remote_results.get("uuid") != self.search_request.uuid
- or self.currentWidget() == self.results_page
- ):
+ if not self.search_request or self.search_request.uuid != remote_results.get("uuid"):
return
+
peer = remote_results["peer"]
+ results = remote_results.get("results", [])
+
self.search_request.peers_complete.add(peer)
- self.search_request.remote_results.append(remote_results.get("results", []))
- self.state_label.setText(format_search_loading_label(self.search_request))
- if self.search_request.complete:
- self.show_results()
+ self.search_request.remote_results.append(results)
+
+ new_items = self.results_page_content.model.add_remote_results(results)
+ self.search_progress_bar.on_remote_results(len(new_items), len(self.search_request.peers_complete))
+
+ def on_ready_to_update_results(self):
+ self.results_page_content.root_model.show_remote_results()
diff --git a/src/tribler/gui/widgets/tablecontentdelegate.py b/src/tribler/gui/widgets/tablecontentdelegate.py
index c9fd42ce1e7..47553016cd5 100644
--- a/src/tribler/gui/widgets/tablecontentdelegate.py
+++ b/src/tribler/gui/widgets/tablecontentdelegate.py
@@ -35,7 +35,7 @@
)
from tribler.gui.utilities import format_votes, get_color, get_gui_setting, get_health, get_image_path, tr, \
get_objects_with_predicate
-from tribler.gui.widgets.tablecontentmodel import Column
+from tribler.gui.widgets.tablecontentmodel import Column, RemoteTableModel
from tribler.gui.widgets.tableiconbuttons import DownloadIconButton
PROGRESS_BAR_BACKGROUND = QColor("#444444")
@@ -268,18 +268,20 @@ def split_rect_into_squares(r, buttons):
yield QRect(x, y, w, h), button
def paint(self, painter, option, index):
- # Draw 'hover' state highlight for every cell of a row
- if index.row() == self.hover_index.row():
+ model: RemoteTableModel = index.model()
+ data_item = model.data_items[index.row()]
+ if index.row() == self.hover_index.row() or model.should_highlight_item(data_item):
+ # Draw 'hover' state highlight for every cell of a row
option.state |= QStyle.State_MouseOver
- if not self.paint_exact(painter, option, index):
+ if not self.paint_exact(painter, option, index, data_item):
# Draw the rest of the columns
super().paint(painter, option, index)
- def paint_exact(self, painter, option, index):
- data_item = index.model().data_items[index.row()]
+ def paint_exact(self, painter, option, index, data_item):
for column, drawing_action in self.column_drawing_actions:
if column in index.model().column_position and index.column() == index.model().column_position[column]:
return drawing_action(painter, option, index, data_item)
+ return False
def editorEvent(self, event, model, option, index):
for control in self.controls:
@@ -372,9 +374,22 @@ class TagsMixin:
edit_tags_icon = QIcon(get_image_path("edit_white.png"))
edit_tags_icon_hover = QIcon(get_image_path("edit_orange.png"))
- def draw_title_and_tags(
- self, painter: QPainter, option: QStyleOptionViewItem, index: QModelIndex, data_item: Dict
- ) -> None:
+ def draw_title_and_tags(self, painter: QPainter, option: QStyleOptionViewItem, index: QModelIndex,
+ data_item: Dict) -> None:
+ debug = False # change to True to see the search rank of items and to highlight remote items
+ item_name = data_item["name"]
+
+ group = data_item.get("group")
+ if group:
+ has_remote_items = any(group_item.get('remote') for group_item in group.values())
+ item_name += f" (+ {len(group)} similar{' *' if debug and has_remote_items else ''})"
+
+ if debug:
+ rank = data_item.get("rank")
+ if rank is not None:
+ item_name += f' rank: {rank:.6}'
+ if data_item.get('remote'):
+ item_name = '* ' + item_name
painter.setRenderHint(QPainter.Antialiasing, True)
title_text_pos = option.rect.topLeft()
title_text_height = 60 if data_item["type"] == SNIPPET else 28
@@ -391,7 +406,7 @@ def draw_title_and_tags(
painter.drawText(
QRectF(title_text_x, title_text_y, option.rect.width() - 6, title_text_height),
Qt.AlignVCenter,
- data_item["name"],
+ item_name,
)
if data_item["type"] == SNIPPET:
diff --git a/src/tribler/gui/widgets/tablecontentmodel.py b/src/tribler/gui/widgets/tablecontentmodel.py
index 00df68041c4..e498e910725 100644
--- a/src/tribler/gui/widgets/tablecontentmodel.py
+++ b/src/tribler/gui/widgets/tablecontentmodel.py
@@ -1,15 +1,18 @@
import json
import logging
+import time
import uuid
+from collections import deque
from dataclasses import dataclass, field
from enum import Enum, auto
from typing import Callable, Dict, List
-from PyQt5.QtCore import QAbstractTableModel, QModelIndex, QRectF, QSize, Qt, pyqtSignal
+from PyQt5.QtCore import QAbstractTableModel, QModelIndex, QRectF, QSize, QTimerEvent, Qt, pyqtSignal
from tribler.core.components.metadata_store.db.orm_bindings.channel_node import NEW
from tribler.core.components.metadata_store.db.serialization import CHANNEL_TORRENT, COLLECTION_NODE, REGULAR_TORRENT, \
SNIPPET
+from tribler.core.utilities.search_utils import item_rank
from tribler.core.utilities.simpledefs import CHANNELS_VIEW_UUID, CHANNEL_STATE
from tribler.core.utilities.utilities import to_fts_query
@@ -18,6 +21,8 @@
from tribler.gui.utilities import connect, format_size, format_votes, get_votes_rating_description, pretty_date, tr
EXPANDING = 0
+HIGHLIGHTING_PERIOD_SECONDS = 1.0
+HIGHLIGHTING_TIMER_INTERVAL_MILLISECONDS = 100
class Column(Enum):
@@ -98,7 +103,6 @@ def __init__(self, parent=None):
self.columns_dict = define_columns()
self.data_items = []
- self.remote_items = []
self.max_rowid = None
self.local_total = None
self.item_load_batch = 50
@@ -108,6 +112,14 @@ def __init__(self, parent=None):
self.saved_scroll_state = None
self.qt_object_destroyed = False
+ self.group_by_name = False
+ self.sort_by_rank = False
+ self.text_filter = ''
+
+ self.highlight_remote_results = False
+ self.highlighted_items = deque()
+ self.highlight_timer = self.startTimer(HIGHLIGHTING_TIMER_INTERVAL_MILLISECONDS)
+
connect(self.destroyed, self.on_destroy)
# Every remote query must be attributed to its specific model to avoid updating wrong models
# on receiving a result. We achieve this by maintaining a set of in-flight remote queries.
@@ -138,13 +150,31 @@ def reset(self):
self.beginResetModel()
self.loaded = False
self.data_items = []
- self.remote_items = []
self.max_rowid = None
self.local_total = None
self.item_uid_map = {}
self.endResetModel()
self.perform_query()
+ def should_highlight_item(self, data_item):
+ return (self.highlight_remote_results and data_item.get('remote')
+ and data_item['item_added_at'] > time.time() - HIGHLIGHTING_PERIOD_SECONDS)
+
+ def timerEvent(self, event: QTimerEvent) -> None:
+ if self.highlight_remote_results and event.timerId() == self.highlight_timer:
+ self.stop_highlighting_old_items()
+
+ def stop_highlighting_old_items(self):
+ now = time.time()
+ then = now - HIGHLIGHTING_PERIOD_SECONDS
+ last_column_offset = len(self.columns_dict) - 1
+ while self.highlighted_items and self.highlighted_items[0]['item_added_at'] < then:
+ item = self.highlighted_items.popleft()
+ uid = get_item_uid(item)
+ row = self.item_uid_map.get(uid)
+ if row is not None:
+ self.dataChanged.emit(self.index(row, 0), self.index(row, last_column_offset))
+
def sort(self, column_index, order):
if not self.columns[column_index].sortable:
return
@@ -168,28 +198,71 @@ def add_items(self, new_items, on_top=False, remote=False):
if not new_items:
return
- if remote and not self.all_local_entries_loaded:
- self.remote_items.extend(new_items)
- return
-
# Note: If we want to block the signal like itemChanged, we must use QSignalBlocker object or blockSignals
# Only add unique items to the table model and reverse mapping from unique ids to rows is built.
insert_index = 0 if on_top else len(self.data_items)
unique_new_items = []
+ name_mapping = {item['name']: item for item in self.data_items} if self.group_by_name else {}
+ now = time.time()
for item in new_items:
+ if remote:
+ item['remote'] = True
+ item['item_added_at'] = now
+ if self.highlight_remote_results:
+ self.highlighted_items.append(item)
+ if self.sort_by_rank:
+ if 'rank' not in item:
+ item['rank'] = item_rank(self.text_filter, item)
+
item_uid = get_item_uid(item)
if item_uid not in self.item_uid_map:
- self.item_uid_map[item_uid] = insert_index
- if 'infohash' in item:
- self.item_uid_map[item['infohash']] = insert_index
- unique_new_items.append(item)
- insert_index += 1
+
+ prev_item = name_mapping.get(item['name'])
+ if self.group_by_name and prev_item is not None and not on_top and prev_item['type'] == REGULAR_TORRENT:
+ group = prev_item.setdefault('group', {})
+ if item_uid not in group:
+ group[item_uid] = item
+ else:
+ self.item_uid_map[item_uid] = insert_index
+ if 'infohash' in item:
+ self.item_uid_map[item['infohash']] = insert_index
+ unique_new_items.append(item)
+
+ if self.group_by_name and item['type'] == REGULAR_TORRENT and prev_item is None:
+ name_mapping[item['name']] = item
+
+ insert_index += 1
# If no new items are found, skip
if not unique_new_items:
return
+ if remote and self.sort_by_rank:
+ torrents = [item for item in self.data_items if item['type'] == REGULAR_TORRENT]
+ non_torrents = [item for item in self.data_items if item['type'] != REGULAR_TORRENT]
+
+ new_torrents = [item for item in unique_new_items if item['type'] == REGULAR_TORRENT]
+ new_non_torrents = [item for item in unique_new_items if item['type'] != REGULAR_TORRENT]
+
+ torrents += new_torrents
+ non_torrents += new_non_torrents
+
+ torrents.sort(key = lambda item: item['rank'], reverse=True)
+ new_data_items = non_torrents + torrents
+
+ new_item_uid_map = {}
+ for item in new_data_items:
+ item_uid = get_item_uid(item)
+ new_item_uid_map[item_uid] = insert_index
+ if 'infohash' in item:
+ new_item_uid_map[item['infohash']] = insert_index
+ self.beginResetModel()
+ self.data_items = new_data_items
+ self.item_uid_map = new_item_uid_map
+ self.endResetModel()
+ return
+
# Else if remote items, to make space for new unique items shift the existing items
if on_top and insert_index > 0:
new_items_map = {}
@@ -211,11 +284,6 @@ def add_items(self, new_items, on_top=False, remote=False):
self.data_items.extend(unique_new_items)
self.endInsertRows()
- if self.all_local_entries_loaded:
- remote_items = self.remote_items
- self.remote_items = []
- self.add_items(remote_items, remote=True) # to filter non-unique entries
-
def remove_items(self, items):
uids_to_remove = []
rows_to_remove = []
@@ -258,6 +326,9 @@ def remove_items(self, items):
self.info_changed.emit(items)
+ def perform_initial_query(self):
+ self.perform_query()
+
def perform_query(self, **kwargs):
"""
Fetch results for a given query.
@@ -304,7 +375,7 @@ def on_query_results(self, response, remote=False, on_top=False):
if not remote:
if "total" in response:
self.local_total = response["total"]
- self.channel_info["total"] = self.local_total + len(self.remote_items)
+ self.channel_info["total"] = self.local_total
elif self.channel_info.get("total"):
self.channel_info["total"] += len(response["results"])
@@ -316,8 +387,8 @@ def on_query_results(self, response, remote=False, on_top=False):
if update_labels:
self.info_changed.emit(response['results'])
- self.query_complete.emit()
self.loaded = True
+ self.query_complete.emit()
return True
@@ -360,7 +431,7 @@ def __init__(
self.endpoint_url_override = endpoint_url
# Load the initial batch of entries
- self.perform_query()
+ self.perform_initial_query()
@property
def edit_enabled(self):
@@ -548,7 +619,59 @@ def perform_query(self, **kwargs):
class SearchResultsModel(ChannelContentModel):
- pass
+ def __init__(self, original_query, **kwargs):
+ self.original_query = original_query
+ self.remote_results = {}
+ title = self.format_title()
+ super().__init__(channel_info={"name": title}, **kwargs)
+ self.remote_results_received = False
+ self.postponed_remote_results = []
+ self.highlight_remote_results = True
+ self.group_by_name = True
+ self.sort_by_rank = True
+
+ def format_title(self):
+ q = self.original_query
+ q = q if len(q) < 50 else q[:50] + '...'
+ return f'Search results for {q}'
+
+ def perform_initial_query(self):
+ return self.perform_query(first=1, last=200)
+
+ def on_query_results(self, response, remote=False, on_top=False):
+ super().on_query_results(response, remote=remote, on_top=on_top)
+ self.add_remote_results([]) # to trigger adding postponed results
+ self.show_remote_results()
+
+ @property
+ def all_local_entries_loaded(self):
+ return self.loaded
+
+ def add_remote_results(self, results):
+ if not self.all_local_entries_loaded:
+ self.postponed_remote_results.extend(results)
+ return []
+
+ results = self.postponed_remote_results + results
+ self.postponed_remote_results = []
+ new_items = []
+ for item in results:
+ uid = get_item_uid(item)
+ if uid not in self.item_uid_map and uid not in self.remote_results:
+ self.remote_results_received = True
+ new_items.append(item)
+ self.remote_results[uid] = item
+ return new_items
+
+ def show_remote_results(self):
+ if not self.all_local_entries_loaded:
+ return
+
+ remote_items = list(self.remote_results.values())
+ self.remote_results.clear()
+ self.remote_results_received = False
+ if remote_items:
+ self.add_items(remote_items, remote=True)
class PopularTorrentsModel(ChannelContentModel):