Skip to content

Commit

Permalink
Add reprocess feature for KnowledgeRulesProcessor
Browse files Browse the repository at this point in the history
  • Loading branch information
drew2a committed Apr 20, 2023
1 parent 7d3562f commit 64dd187
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from tribler.core.components.key.key_component import KeyComponent
from tribler.core.components.knowledge.community.knowledge_community import KnowledgeCommunity
from tribler.core.components.knowledge.db.knowledge_db import KnowledgeDatabase
from tribler.core.components.knowledge.rules.tag_rules_processor import KnowledgeRulesProcessor
from tribler.core.components.knowledge.rules.knowledge_rules_processor import KnowledgeRulesProcessor
from tribler.core.components.metadata_store.utils import generate_test_channels
from tribler.core.utilities.simpledefs import STATEDIR_DB_DIR

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@
DEFAULT_BATCH_SIZE = 1000

LAST_PROCESSED_TORRENT_ID = 'last_processed_torrent_id'
RULES_PROCESSOR_VERSION = 'rules_processor_version'


class KnowledgeRulesProcessor(TaskManager):
# this value must be incremented in the case of new rules set has been applied
version: int = 2
version: int = 3

def __init__(self, notifier: Notifier, db: KnowledgeDatabase, mds: MetadataStore,
batch_size: int = DEFAULT_BATCH_SIZE, interval: float = DEFAULT_INTERVAL):
Expand All @@ -45,14 +46,18 @@ def __init__(self, notifier: Notifier, db: KnowledgeDatabase, mds: MetadataStore
def start(self):
self.logger.info('Start')

rules_processor_version = self.get_rules_processor_version()
if rules_processor_version < self.version:
# the database was processed by the previous version of the rules processor
self.set_last_processed_torrent_id(0) # reset the last processed torrent id
self.set_rules_processor_version(self.version)

max_row_id = self.mds.get_max_rowid()
is_finished = self.get_last_processed_torrent_id() >= max_row_id

if not is_finished:
self.logger.info(f'Register process_batch task with interval: {self.interval} sec')
self.register_task(name=self.process_batch.__name__,
interval=self.interval,
task=self.process_batch)
self.register_task(name=self.process_batch.__name__, interval=self.interval, task=self.process_batch)

async def shutdown(self):
await self.shutdown_task_manager()
Expand All @@ -77,7 +82,7 @@ def query(_start, _end):
torrent.tag_processor_version = self.version
processed += 1

self.mds.set_value(LAST_PROCESSED_TORRENT_ID, str(end))
self.set_last_processed_torrent_id(end)
self.logger.info(f'Processed: {processed} titles. Added {added} tags.')

is_finished = end >= max_row_id
Expand Down Expand Up @@ -108,3 +113,12 @@ def save_statements(self, subject_type: ResourceType, subject: str, predicate: R

def get_last_processed_torrent_id(self) -> int:
return int(self.mds.get_value(LAST_PROCESSED_TORRENT_ID, default='0'))

def set_last_processed_torrent_id(self, value: int):
self.mds.set_value(LAST_PROCESSED_TORRENT_ID, str(value))

def get_rules_processor_version(self) -> int:
return int(self.mds.get_value(RULES_PROCESSOR_VERSION, default='0'))

def set_rules_processor_version(self, version: int):
self.mds.set_value(RULES_PROCESSOR_VERSION, str(version))
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from tribler.core import notifications
from tribler.core.components.knowledge.db.knowledge_db import ResourceType
from tribler.core.components.knowledge.rules.tag_rules_processor import KnowledgeRulesProcessor, \
from tribler.core.components.knowledge.rules.knowledge_rules_processor import KnowledgeRulesProcessor, \
LAST_PROCESSED_TORRENT_ID

TEST_BATCH_SIZE = 100
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from tribler.core import notifications
from tribler.core.components.component import Component
from tribler.core.components.key.key_component import KeyComponent
from tribler.core.components.knowledge.rules.tag_rules_processor import KnowledgeRulesProcessor
from tribler.core.components.knowledge.rules.knowledge_rules_processor import KnowledgeRulesProcessor
from tribler.core.components.metadata_store.db.store import MetadataStore
from tribler.core.utilities.simpledefs import STATEDIR_DB_DIR

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pony.orm import db_session

from tribler.core.components.knowledge.db.knowledge_db import KnowledgeDatabase, ResourceType
from tribler.core.components.knowledge.rules.tag_rules_processor import KnowledgeRulesProcessor
from tribler.core.components.knowledge.rules.knowledge_rules_processor import KnowledgeRulesProcessor
from tribler.core.components.metadata_store.category_filter.family_filter import default_xxx_filter
from tribler.core.components.metadata_store.db.serialization import CHANNEL_TORRENT, COLLECTION_NODE, REGULAR_TORRENT
from tribler.core.components.metadata_store.db.store import MetadataStore
Expand Down

0 comments on commit 64dd187

Please sign in to comment.