From 73d3148a261fab72598c41a63242c99e2c23ee5f Mon Sep 17 00:00:00 2001 From: Alexander Kozlovsky Date: Thu, 28 Oct 2021 09:06:29 +0200 Subject: [PATCH 1/3] Fixes #6455: incorrect search results when a query contains spaces --- .../restapi/tests/test_search_endpoint.py | 28 +++++++++++++++++++ src/tribler-gui/tribler_gui/tribler_window.py | 3 +- src/tribler-gui/tribler_gui/utilities.py | 11 +++----- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/src/tribler-core/tribler_core/components/metadata_store/restapi/tests/test_search_endpoint.py b/src/tribler-core/tribler_core/components/metadata_store/restapi/tests/test_search_endpoint.py index 4f5d81da570..d3364a0fc03 100644 --- a/src/tribler-core/tribler_core/components/metadata_store/restapi/tests/test_search_endpoint.py +++ b/src/tribler-core/tribler_core/components/metadata_store/restapi/tests/test_search_endpoint.py @@ -8,6 +8,7 @@ from tribler_core.components.restapi.rest.base_api_test import do_request from tribler_core.utilities.random_utils import random_infohash +from tribler_gui.utilities import to_fts_query # pylint: disable=unused-argument, redefined-outer-name @@ -123,3 +124,30 @@ async def test_completions(rest_api): """ json_response = await do_request(rest_api, 'search/completions?q=tribler', expected_code=200) assert json_response['completions'] == [] + + +async def test_search_with_space(rest_api, metadata_store): + with db_session: + _ = metadata_store.ChannelMetadata(title='test', tags='test', subscribed=True, infohash=random_infohash()) + metadata_store.TorrentMetadata(title='abc', infohash=random_infohash()) + metadata_store.TorrentMetadata(title='abc.def', infohash=random_infohash()) + metadata_store.TorrentMetadata(title='abc def', infohash=random_infohash()) + metadata_store.TorrentMetadata(title='abcxyz def', infohash=random_infohash()) + metadata_store.TorrentMetadata(title='abc defxyz', infohash=random_infohash()) + + s1 = to_fts_query("abc") + assert s1 == '"abc"*' + + s2 = to_fts_query("abc def") + assert s2 == '"abc" "def"*' + + ss2 = to_fts_query(s2) + assert ss2 == s2 + + parsed = await do_request(rest_api, f'search?txt_filter={s1}', expected_code=200) + results = {item["name"] for item in parsed["results"]} + assert results == {'abc', 'abc.def', 'abc def', 'abc defxyz', 'abcxyz def'} + + parsed = await do_request(rest_api, f'search?txt_filter={s2}', expected_code=200) + results = {item["name"] for item in parsed["results"]} + assert results == {'abc.def', 'abc def', 'abc defxyz'} # but not 'abcxyz def' diff --git a/src/tribler-gui/tribler_gui/tribler_window.py b/src/tribler-gui/tribler_gui/tribler_window.py index bacc4e8a3d3..9d4a7f0e9ea 100644 --- a/src/tribler-gui/tribler_gui/tribler_window.py +++ b/src/tribler-gui/tribler_gui/tribler_window.py @@ -77,7 +77,6 @@ get_image_path, get_ui_file_path, is_dir_writable, - sanitize_for_fts, tr, ) from tribler_gui.widgets.channelsmenulistwidget import ChannelsMenuListWidget @@ -648,7 +647,7 @@ def on_search_text_change(self, text): if len(text) < 2: return TriblerNetworkRequest( - "search/completions", self.on_received_search_completions, url_params={'q': sanitize_for_fts(text)} + "search/completions", self.on_received_search_completions, url_params={'q': text} ) def on_received_search_completions(self, completions): diff --git a/src/tribler-gui/tribler_gui/utilities.py b/src/tribler-gui/tribler_gui/utilities.py index dfdc76207da..a89481098e4 100644 --- a/src/tribler-gui/tribler_gui/utilities.py +++ b/src/tribler-gui/tribler_gui/utilities.py @@ -2,6 +2,7 @@ import logging import math import os +import re import sys import traceback import types @@ -428,14 +429,10 @@ def get_translator(language=None): translator.load(locale, filename, directory=TRANSLATIONS_DIR) return translator - -def sanitize_for_fts(text): - return text.translate({ord("\""): "\"\"", ord("\'"): "\'\'"}) - +fts_query_re = re.compile(r'\w+', re.UNICODE) def to_fts_query(text): if not text: return "" - words = text.strip().split(" ") - query_list = ['\"' + sanitize_for_fts(word) + '\"*' for word in words] - return " AND ".join(query_list) + words = fts_query_re.findall(text) + return ' '.join(f'"{word}"' for word in words) + '*' From 330d0208116e6cf98ee0e53b399924e6a36fd16f Mon Sep 17 00:00:00 2001 From: Alexander Kozlovsky Date: Thu, 28 Oct 2021 09:08:02 +0200 Subject: [PATCH 2/3] Fix search autosuggestions --- .../components/metadata_store/db/store.py | 53 +++++++++++++------ .../db/tests/test_torrent_metadata.py | 52 +++++++++++++++--- 2 files changed, 82 insertions(+), 23 deletions(-) diff --git a/src/tribler-core/tribler_core/components/metadata_store/db/store.py b/src/tribler-core/tribler_core/components/metadata_store/db/store.py index 6faf599d31f..9453b6c073e 100644 --- a/src/tribler-core/tribler_core/components/metadata_store/db/store.py +++ b/src/tribler-core/tribler_core/components/metadata_store/db/store.py @@ -1,4 +1,5 @@ import logging +import re import threading from asyncio import get_event_loop from datetime import datetime, timedelta @@ -762,22 +763,44 @@ def get_entries_count(self, **kwargs): def get_max_rowid(self): return select(max(obj.rowid) for obj in self.ChannelNode).get() or 0 - def get_auto_complete_terms(self, keyword, max_terms, limit=10): - if not keyword: + fts_keyword_search_re = re.compile(r'\w+', re.UNICODE) + + def get_auto_complete_terms(self, text, max_terms, limit=10): + if not text: return [] + words = self.fts_keyword_search_re.findall(text) + if not words: + return "" + + fts_query = '"%s"*' % ' '.join(f'{word}' for word in words) + suggestion_pattern = r'\W+'.join(word for word in words) + r'(\W*)((?:[.-]?\w)*)' + suggestion_re = re.compile(suggestion_pattern, re.UNICODE) + with db_session: - result = self.search_keyword("\"" + keyword + "\"*", lim=limit)[:] - titles = [g.title.lower() for g in result] + titles = self._db.select(""" + cn.title + FROM ChannelNode cn + INNER JOIN FtsIndex ON cn.rowid = FtsIndex.rowid + LEFT JOIN TorrentState ts ON cn.health = ts.rowid + WHERE FtsIndex MATCH $fts_query + ORDER BY coalesce(ts.seeders, 0) DESC + LIMIT $limit + """) - # Copy-pasted from the old DBHandler (almost) completely - all_terms = set() - for line in titles: - if len(all_terms) >= max_terms: - break - i1 = line.find(keyword) - i2 = line.find(' ', i1 + len(keyword)) - term = line[i1:i2] if i2 >= 0 else line[i1:] - if term != keyword: - all_terms.add(term) - return list(all_terms) + result = [] + for title in titles: + title = title.lower() + match = suggestion_re.search(title) + if match: + # group(2) is the ending of the last word (if the word is not finished) or the next word + continuation = match.group(2) + if re.match(r'^.*\w$', text) and match.group(1): # group(1) is non-word symbols (spaces, commas, etc.) + continuation = match.group(1) + continuation + suggestion = text + continuation + if suggestion not in result: + result.append(suggestion) + if len(result) >= max_terms: + break + + return result diff --git a/src/tribler-core/tribler_core/components/metadata_store/db/tests/test_torrent_metadata.py b/src/tribler-core/tribler_core/components/metadata_store/db/tests/test_torrent_metadata.py index cac281e11f9..80b6b011e0c 100644 --- a/src/tribler-core/tribler_core/components/metadata_store/db/tests/test_torrent_metadata.py +++ b/src/tribler-core/tribler_core/components/metadata_store/db/tests/test_torrent_metadata.py @@ -160,17 +160,53 @@ def test_get_autocomplete_terms(metadata_store): """ Test fetching autocompletion terms from the database """ - metadata_store.TorrentMetadata.from_dict(dict(rnd_torrent(), title="mountains sheep", tags="video")) - metadata_store.TorrentMetadata.from_dict(dict(rnd_torrent(), title="regular sheepish guy", tags="video")) + metadata_store.TorrentMetadata.from_dict(dict(rnd_torrent(), title="foo: bar baz", tags="video")) + metadata_store.TorrentMetadata.from_dict(dict(rnd_torrent(), title="foo - bar, xyz", tags="video")) + metadata_store.TorrentMetadata.from_dict(dict(rnd_torrent(), title="barbarian xyz!", tags="video")) + metadata_store.TorrentMetadata.from_dict(dict(rnd_torrent(), title="n.a.m.e: foobar", tags="video")) + metadata_store.TorrentMetadata.from_dict(dict(rnd_torrent(), title="xyz n.a.m.e", tags="video")) + + autocomplete_terms = metadata_store.get_auto_complete_terms("", 10) + assert autocomplete_terms == [] - autocomplete_terms = metadata_store.get_auto_complete_terms("shee", 10) - assert 'sheep' in autocomplete_terms + autocomplete_terms = metadata_store.get_auto_complete_terms("foo", 10) + assert set(autocomplete_terms) == {"foo: bar", "foo - bar", "foobar"} - autocomplete_terms = metadata_store.get_auto_complete_terms("shee", 10) - assert 'sheepish' in autocomplete_terms + autocomplete_terms = metadata_store.get_auto_complete_terms("foo: bar", 10) + assert set(autocomplete_terms) == {"foo: bar baz", "foo: bar, xyz"} - autocomplete_terms = metadata_store.get_auto_complete_terms("", 10) - assert [] == autocomplete_terms + autocomplete_terms = metadata_store.get_auto_complete_terms("foo ", 10) + assert set(autocomplete_terms) == {"foo bar"} + + autocomplete_terms = metadata_store.get_auto_complete_terms("bar", 10) + assert set(autocomplete_terms) == {"bar baz", "bar, xyz", "barbarian"} + + autocomplete_terms = metadata_store.get_auto_complete_terms("barb", 10) + assert set(autocomplete_terms) == {"barbarian"} + + autocomplete_terms = metadata_store.get_auto_complete_terms("barbarian", 10) + assert set(autocomplete_terms) == {"barbarian xyz"} + + autocomplete_terms = metadata_store.get_auto_complete_terms("barbarian ", 10) + assert set(autocomplete_terms) == {"barbarian xyz"} + + autocomplete_terms = metadata_store.get_auto_complete_terms("barbarian x", 10) + assert set(autocomplete_terms) == {"barbarian xyz"} + + autocomplete_terms = metadata_store.get_auto_complete_terms("n.a.m", 10) + assert set(autocomplete_terms) == {"n.a.m.e"} + + autocomplete_terms = metadata_store.get_auto_complete_terms("n.a.m.", 10) + assert set(autocomplete_terms) == {"n.a.m.e"} + + autocomplete_terms = metadata_store.get_auto_complete_terms("n.a.m.e", 10) + assert set(autocomplete_terms) == {"n.a.m.e", "n.a.m.e: foobar"} + + autocomplete_terms = metadata_store.get_auto_complete_terms("n.a.m.e ", 10) + assert set(autocomplete_terms) == {"n.a.m.e ", "n.a.m.e foobar"} + + autocomplete_terms = metadata_store.get_auto_complete_terms("n.a.m.e f", 10) + assert set(autocomplete_terms) == {"n.a.m.e foobar"} @db_session From de95754c2ec06d1ce94ecfffbe1b13013323a40f Mon Sep 17 00:00:00 2001 From: Alexander Kozlovsky Date: Thu, 28 Oct 2021 09:32:07 +0200 Subject: [PATCH 3/3] Satisfy linter --- .../tribler_core/components/metadata_store/db/store.py | 6 +++--- .../metadata_store/db/tests/test_torrent_metadata.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tribler-core/tribler_core/components/metadata_store/db/store.py b/src/tribler-core/tribler_core/components/metadata_store/db/store.py index 9453b6c073e..c4a57b041c3 100644 --- a/src/tribler-core/tribler_core/components/metadata_store/db/store.py +++ b/src/tribler-core/tribler_core/components/metadata_store/db/store.py @@ -13,7 +13,6 @@ from tribler_common.simpledefs import NTFY -from tribler_core.exceptions import InvalidSignatureException from tribler_core.components.metadata_store.db.orm_bindings import ( binary_node, channel_description, @@ -34,7 +33,6 @@ from tribler_core.components.metadata_store.db.orm_bindings.channel_metadata import get_mdblob_sequence_number from tribler_core.components.metadata_store.db.orm_bindings.channel_node import LEGACY_ENTRY, TODELETE from tribler_core.components.metadata_store.db.orm_bindings.torrent_metadata import NULL_KEY_SUBST -from tribler_core.components.metadata_store.remote_query_community.payload_checker import process_payload from tribler_core.components.metadata_store.db.serialization import ( BINARY_NODE, CHANNEL_DESCRIPTION, @@ -48,6 +46,8 @@ REGULAR_TORRENT, read_payload_with_offset, ) +from tribler_core.components.metadata_store.remote_query_community.payload_checker import process_payload +from tribler_core.exceptions import InvalidSignatureException from tribler_core.utilities.path_util import Path from tribler_core.utilities.unicode import hexlify from tribler_core.utilities.utilities import MEMORY_DB @@ -773,7 +773,7 @@ def get_auto_complete_terms(self, text, max_terms, limit=10): if not words: return "" - fts_query = '"%s"*' % ' '.join(f'{word}' for word in words) + fts_query = '"%s"*' % ' '.join(f'{word}' for word in words) # pylint: disable=unused-variable suggestion_pattern = r'\W+'.join(word for word in words) + r'(\W*)((?:[.-]?\w)*)' suggestion_re = re.compile(suggestion_pattern, re.UNICODE) diff --git a/src/tribler-core/tribler_core/components/metadata_store/db/tests/test_torrent_metadata.py b/src/tribler-core/tribler_core/components/metadata_store/db/tests/test_torrent_metadata.py index 80b6b011e0c..594129927c1 100644 --- a/src/tribler-core/tribler_core/components/metadata_store/db/tests/test_torrent_metadata.py +++ b/src/tribler-core/tribler_core/components/metadata_store/db/tests/test_torrent_metadata.py @@ -9,8 +9,8 @@ import pytest from tribler_core.components.libtorrent.torrentdef import TorrentDef -from tribler_core.components.metadata_store.db.orm_bindings.discrete_clock import clock from tribler_core.components.metadata_store.db.orm_bindings.channel_node import TODELETE +from tribler_core.components.metadata_store.db.orm_bindings.discrete_clock import clock from tribler_core.components.metadata_store.db.orm_bindings.torrent_metadata import tdef_to_metadata_dict from tribler_core.components.metadata_store.db.serialization import CHANNEL_TORRENT, REGULAR_TORRENT from tribler_core.tests.tools.common import TORRENT_UBUNTU_FILE