Skip to content

Commit

Permalink
Merge pull request #6508 from kozlovsky/search_fixes
Browse files Browse the repository at this point in the history
Fix full text search autosuggestions & results
  • Loading branch information
kozlovsky authored Oct 28, 2021
2 parents b77b642 + de95754 commit be01b34
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 35 deletions.
57 changes: 40 additions & 17 deletions src/tribler-core/tribler_core/components/metadata_store/db/store.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import re
import threading
from asyncio import get_event_loop
from datetime import datetime, timedelta
Expand All @@ -12,7 +13,6 @@

from tribler_common.simpledefs import NTFY

from tribler_core.exceptions import InvalidSignatureException
from tribler_core.components.metadata_store.db.orm_bindings import (
binary_node,
channel_description,
Expand All @@ -33,7 +33,6 @@
from tribler_core.components.metadata_store.db.orm_bindings.channel_metadata import get_mdblob_sequence_number
from tribler_core.components.metadata_store.db.orm_bindings.channel_node import LEGACY_ENTRY, TODELETE
from tribler_core.components.metadata_store.db.orm_bindings.torrent_metadata import NULL_KEY_SUBST
from tribler_core.components.metadata_store.remote_query_community.payload_checker import process_payload
from tribler_core.components.metadata_store.db.serialization import (
BINARY_NODE,
CHANNEL_DESCRIPTION,
Expand All @@ -47,6 +46,8 @@
REGULAR_TORRENT,
read_payload_with_offset,
)
from tribler_core.components.metadata_store.remote_query_community.payload_checker import process_payload
from tribler_core.exceptions import InvalidSignatureException
from tribler_core.utilities.path_util import Path
from tribler_core.utilities.unicode import hexlify
from tribler_core.utilities.utilities import MEMORY_DB
Expand Down Expand Up @@ -762,22 +763,44 @@ def get_entries_count(self, **kwargs):
def get_max_rowid(self):
return select(max(obj.rowid) for obj in self.ChannelNode).get() or 0

def get_auto_complete_terms(self, keyword, max_terms, limit=10):
if not keyword:
fts_keyword_search_re = re.compile(r'\w+', re.UNICODE)

def get_auto_complete_terms(self, text, max_terms, limit=10):
if not text:
return []

words = self.fts_keyword_search_re.findall(text)
if not words:
return ""

fts_query = '"%s"*' % ' '.join(f'{word}' for word in words) # pylint: disable=unused-variable
suggestion_pattern = r'\W+'.join(word for word in words) + r'(\W*)((?:[.-]?\w)*)'
suggestion_re = re.compile(suggestion_pattern, re.UNICODE)

with db_session:
result = self.search_keyword("\"" + keyword + "\"*", lim=limit)[:]
titles = [g.title.lower() for g in result]
titles = self._db.select("""
cn.title
FROM ChannelNode cn
INNER JOIN FtsIndex ON cn.rowid = FtsIndex.rowid
LEFT JOIN TorrentState ts ON cn.health = ts.rowid
WHERE FtsIndex MATCH $fts_query
ORDER BY coalesce(ts.seeders, 0) DESC
LIMIT $limit
""")

# Copy-pasted from the old DBHandler (almost) completely
all_terms = set()
for line in titles:
if len(all_terms) >= max_terms:
break
i1 = line.find(keyword)
i2 = line.find(' ', i1 + len(keyword))
term = line[i1:i2] if i2 >= 0 else line[i1:]
if term != keyword:
all_terms.add(term)
return list(all_terms)
result = []
for title in titles:
title = title.lower()
match = suggestion_re.search(title)
if match:
# group(2) is the ending of the last word (if the word is not finished) or the next word
continuation = match.group(2)
if re.match(r'^.*\w$', text) and match.group(1): # group(1) is non-word symbols (spaces, commas, etc.)
continuation = match.group(1) + continuation
suggestion = text + continuation
if suggestion not in result:
result.append(suggestion)
if len(result) >= max_terms:
break

return result
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import pytest

from tribler_core.components.libtorrent.torrentdef import TorrentDef
from tribler_core.components.metadata_store.db.orm_bindings.discrete_clock import clock
from tribler_core.components.metadata_store.db.orm_bindings.channel_node import TODELETE
from tribler_core.components.metadata_store.db.orm_bindings.discrete_clock import clock
from tribler_core.components.metadata_store.db.orm_bindings.torrent_metadata import tdef_to_metadata_dict
from tribler_core.components.metadata_store.db.serialization import CHANNEL_TORRENT, REGULAR_TORRENT
from tribler_core.tests.tools.common import TORRENT_UBUNTU_FILE
Expand Down Expand Up @@ -160,17 +160,53 @@ def test_get_autocomplete_terms(metadata_store):
"""
Test fetching autocompletion terms from the database
"""
metadata_store.TorrentMetadata.from_dict(dict(rnd_torrent(), title="mountains sheep", tags="video"))
metadata_store.TorrentMetadata.from_dict(dict(rnd_torrent(), title="regular sheepish guy", tags="video"))
metadata_store.TorrentMetadata.from_dict(dict(rnd_torrent(), title="foo: bar baz", tags="video"))
metadata_store.TorrentMetadata.from_dict(dict(rnd_torrent(), title="foo - bar, xyz", tags="video"))
metadata_store.TorrentMetadata.from_dict(dict(rnd_torrent(), title="barbarian xyz!", tags="video"))
metadata_store.TorrentMetadata.from_dict(dict(rnd_torrent(), title="n.a.m.e: foobar", tags="video"))
metadata_store.TorrentMetadata.from_dict(dict(rnd_torrent(), title="xyz n.a.m.e", tags="video"))

autocomplete_terms = metadata_store.get_auto_complete_terms("", 10)
assert autocomplete_terms == []

autocomplete_terms = metadata_store.get_auto_complete_terms("shee", 10)
assert 'sheep' in autocomplete_terms
autocomplete_terms = metadata_store.get_auto_complete_terms("foo", 10)
assert set(autocomplete_terms) == {"foo: bar", "foo - bar", "foobar"}

autocomplete_terms = metadata_store.get_auto_complete_terms("shee", 10)
assert 'sheepish' in autocomplete_terms
autocomplete_terms = metadata_store.get_auto_complete_terms("foo: bar", 10)
assert set(autocomplete_terms) == {"foo: bar baz", "foo: bar, xyz"}

autocomplete_terms = metadata_store.get_auto_complete_terms("", 10)
assert [] == autocomplete_terms
autocomplete_terms = metadata_store.get_auto_complete_terms("foo ", 10)
assert set(autocomplete_terms) == {"foo bar"}

autocomplete_terms = metadata_store.get_auto_complete_terms("bar", 10)
assert set(autocomplete_terms) == {"bar baz", "bar, xyz", "barbarian"}

autocomplete_terms = metadata_store.get_auto_complete_terms("barb", 10)
assert set(autocomplete_terms) == {"barbarian"}

autocomplete_terms = metadata_store.get_auto_complete_terms("barbarian", 10)
assert set(autocomplete_terms) == {"barbarian xyz"}

autocomplete_terms = metadata_store.get_auto_complete_terms("barbarian ", 10)
assert set(autocomplete_terms) == {"barbarian xyz"}

autocomplete_terms = metadata_store.get_auto_complete_terms("barbarian x", 10)
assert set(autocomplete_terms) == {"barbarian xyz"}

autocomplete_terms = metadata_store.get_auto_complete_terms("n.a.m", 10)
assert set(autocomplete_terms) == {"n.a.m.e"}

autocomplete_terms = metadata_store.get_auto_complete_terms("n.a.m.", 10)
assert set(autocomplete_terms) == {"n.a.m.e"}

autocomplete_terms = metadata_store.get_auto_complete_terms("n.a.m.e", 10)
assert set(autocomplete_terms) == {"n.a.m.e", "n.a.m.e: foobar"}

autocomplete_terms = metadata_store.get_auto_complete_terms("n.a.m.e ", 10)
assert set(autocomplete_terms) == {"n.a.m.e ", "n.a.m.e foobar"}

autocomplete_terms = metadata_store.get_auto_complete_terms("n.a.m.e f", 10)
assert set(autocomplete_terms) == {"n.a.m.e foobar"}


@db_session
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from tribler_core.components.restapi.rest.base_api_test import do_request
from tribler_core.utilities.random_utils import random_infohash

from tribler_gui.utilities import to_fts_query

# pylint: disable=unused-argument, redefined-outer-name

Expand Down Expand Up @@ -123,3 +124,30 @@ async def test_completions(rest_api):
"""
json_response = await do_request(rest_api, 'search/completions?q=tribler', expected_code=200)
assert json_response['completions'] == []


async def test_search_with_space(rest_api, metadata_store):
with db_session:
_ = metadata_store.ChannelMetadata(title='test', tags='test', subscribed=True, infohash=random_infohash())
metadata_store.TorrentMetadata(title='abc', infohash=random_infohash())
metadata_store.TorrentMetadata(title='abc.def', infohash=random_infohash())
metadata_store.TorrentMetadata(title='abc def', infohash=random_infohash())
metadata_store.TorrentMetadata(title='abcxyz def', infohash=random_infohash())
metadata_store.TorrentMetadata(title='abc defxyz', infohash=random_infohash())

s1 = to_fts_query("abc")
assert s1 == '"abc"*'

s2 = to_fts_query("abc def")
assert s2 == '"abc" "def"*'

ss2 = to_fts_query(s2)
assert ss2 == s2

parsed = await do_request(rest_api, f'search?txt_filter={s1}', expected_code=200)
results = {item["name"] for item in parsed["results"]}
assert results == {'abc', 'abc.def', 'abc def', 'abc defxyz', 'abcxyz def'}

parsed = await do_request(rest_api, f'search?txt_filter={s2}', expected_code=200)
results = {item["name"] for item in parsed["results"]}
assert results == {'abc.def', 'abc def', 'abc defxyz'} # but not 'abcxyz def'
3 changes: 1 addition & 2 deletions src/tribler-gui/tribler_gui/tribler_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@
get_image_path,
get_ui_file_path,
is_dir_writable,
sanitize_for_fts,
tr,
)
from tribler_gui.widgets.channelsmenulistwidget import ChannelsMenuListWidget
Expand Down Expand Up @@ -648,7 +647,7 @@ def on_search_text_change(self, text):
if len(text) < 2:
return
TriblerNetworkRequest(
"search/completions", self.on_received_search_completions, url_params={'q': sanitize_for_fts(text)}
"search/completions", self.on_received_search_completions, url_params={'q': text}
)

def on_received_search_completions(self, completions):
Expand Down
11 changes: 4 additions & 7 deletions src/tribler-gui/tribler_gui/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import math
import os
import re
import sys
import traceback
import types
Expand Down Expand Up @@ -428,14 +429,10 @@ def get_translator(language=None):
translator.load(locale, filename, directory=TRANSLATIONS_DIR)
return translator


def sanitize_for_fts(text):
return text.translate({ord("\""): "\"\"", ord("\'"): "\'\'"})

fts_query_re = re.compile(r'\w+', re.UNICODE)

def to_fts_query(text):
if not text:
return ""
words = text.strip().split(" ")
query_list = ['\"' + sanitize_for_fts(word) + '\"*' for word in words]
return " AND ".join(query_list)
words = fts_query_re.findall(text)
return ' '.join(f'"{word}"' for word in words) + '*'

0 comments on commit be01b34

Please sign in to comment.