From 64dd187f29f4956b86775f58dbe5dc3f2cd038d5 Mon Sep 17 00:00:00 2001 From: drew2a Date: Thu, 20 Apr 2023 14:54:02 +0200 Subject: [PATCH] Add reprocess feature for KnowledgeRulesProcessor --- .../knowledge/knowledge_component.py | 2 +- ...cessor.py => knowledge_rules_processor.py} | 24 +++++++++++++++---- ...r.py => test_knowledge_rules_processor.py} | 2 +- .../metadata_store_component.py | 2 +- .../restapi/metadata_endpoint_base.py | 2 +- 5 files changed, 23 insertions(+), 9 deletions(-) rename src/tribler/core/components/knowledge/rules/{tag_rules_processor.py => knowledge_rules_processor.py} (83%) rename src/tribler/core/components/knowledge/rules/tests/{test_tag_rules_processor.py => test_knowledge_rules_processor.py} (98%) diff --git a/src/tribler/core/components/knowledge/knowledge_component.py b/src/tribler/core/components/knowledge/knowledge_component.py index 5c343c8a01d..d5c8d9745ec 100644 --- a/src/tribler/core/components/knowledge/knowledge_component.py +++ b/src/tribler/core/components/knowledge/knowledge_component.py @@ -4,7 +4,7 @@ from tribler.core.components.key.key_component import KeyComponent from tribler.core.components.knowledge.community.knowledge_community import KnowledgeCommunity from tribler.core.components.knowledge.db.knowledge_db import KnowledgeDatabase -from tribler.core.components.knowledge.rules.tag_rules_processor import KnowledgeRulesProcessor +from tribler.core.components.knowledge.rules.knowledge_rules_processor import KnowledgeRulesProcessor from tribler.core.components.metadata_store.utils import generate_test_channels from tribler.core.utilities.simpledefs import STATEDIR_DB_DIR diff --git a/src/tribler/core/components/knowledge/rules/tag_rules_processor.py b/src/tribler/core/components/knowledge/rules/knowledge_rules_processor.py similarity index 83% rename from src/tribler/core/components/knowledge/rules/tag_rules_processor.py rename to src/tribler/core/components/knowledge/rules/knowledge_rules_processor.py index 5ba8642d5c7..299292391e0 100644 --- a/src/tribler/core/components/knowledge/rules/tag_rules_processor.py +++ b/src/tribler/core/components/knowledge/rules/knowledge_rules_processor.py @@ -18,11 +18,12 @@ DEFAULT_BATCH_SIZE = 1000 LAST_PROCESSED_TORRENT_ID = 'last_processed_torrent_id' +RULES_PROCESSOR_VERSION = 'rules_processor_version' class KnowledgeRulesProcessor(TaskManager): # this value must be incremented in the case of new rules set has been applied - version: int = 2 + version: int = 3 def __init__(self, notifier: Notifier, db: KnowledgeDatabase, mds: MetadataStore, batch_size: int = DEFAULT_BATCH_SIZE, interval: float = DEFAULT_INTERVAL): @@ -45,14 +46,18 @@ def __init__(self, notifier: Notifier, db: KnowledgeDatabase, mds: MetadataStore def start(self): self.logger.info('Start') + rules_processor_version = self.get_rules_processor_version() + if rules_processor_version < self.version: + # the database was processed by the previous version of the rules processor + self.set_last_processed_torrent_id(0) # reset the last processed torrent id + self.set_rules_processor_version(self.version) + max_row_id = self.mds.get_max_rowid() is_finished = self.get_last_processed_torrent_id() >= max_row_id if not is_finished: self.logger.info(f'Register process_batch task with interval: {self.interval} sec') - self.register_task(name=self.process_batch.__name__, - interval=self.interval, - task=self.process_batch) + self.register_task(name=self.process_batch.__name__, interval=self.interval, task=self.process_batch) async def shutdown(self): await self.shutdown_task_manager() @@ -77,7 +82,7 @@ def query(_start, _end): torrent.tag_processor_version = self.version processed += 1 - self.mds.set_value(LAST_PROCESSED_TORRENT_ID, str(end)) + self.set_last_processed_torrent_id(end) self.logger.info(f'Processed: {processed} titles. Added {added} tags.') is_finished = end >= max_row_id @@ -108,3 +113,12 @@ def save_statements(self, subject_type: ResourceType, subject: str, predicate: R def get_last_processed_torrent_id(self) -> int: return int(self.mds.get_value(LAST_PROCESSED_TORRENT_ID, default='0')) + + def set_last_processed_torrent_id(self, value: int): + self.mds.set_value(LAST_PROCESSED_TORRENT_ID, str(value)) + + def get_rules_processor_version(self) -> int: + return int(self.mds.get_value(RULES_PROCESSOR_VERSION, default='0')) + + def set_rules_processor_version(self, version: int): + self.mds.set_value(RULES_PROCESSOR_VERSION, str(version)) diff --git a/src/tribler/core/components/knowledge/rules/tests/test_tag_rules_processor.py b/src/tribler/core/components/knowledge/rules/tests/test_knowledge_rules_processor.py similarity index 98% rename from src/tribler/core/components/knowledge/rules/tests/test_tag_rules_processor.py rename to src/tribler/core/components/knowledge/rules/tests/test_knowledge_rules_processor.py index ff6c7bd2a72..67f12aff249 100644 --- a/src/tribler/core/components/knowledge/rules/tests/test_tag_rules_processor.py +++ b/src/tribler/core/components/knowledge/rules/tests/test_knowledge_rules_processor.py @@ -5,7 +5,7 @@ from tribler.core import notifications from tribler.core.components.knowledge.db.knowledge_db import ResourceType -from tribler.core.components.knowledge.rules.tag_rules_processor import KnowledgeRulesProcessor, \ +from tribler.core.components.knowledge.rules.knowledge_rules_processor import KnowledgeRulesProcessor, \ LAST_PROCESSED_TORRENT_ID TEST_BATCH_SIZE = 100 diff --git a/src/tribler/core/components/metadata_store/metadata_store_component.py b/src/tribler/core/components/metadata_store/metadata_store_component.py index 477d682480b..4c16e328c6e 100644 --- a/src/tribler/core/components/metadata_store/metadata_store_component.py +++ b/src/tribler/core/components/metadata_store/metadata_store_component.py @@ -1,7 +1,7 @@ from tribler.core import notifications from tribler.core.components.component import Component from tribler.core.components.key.key_component import KeyComponent -from tribler.core.components.knowledge.rules.tag_rules_processor import KnowledgeRulesProcessor +from tribler.core.components.knowledge.rules.knowledge_rules_processor import KnowledgeRulesProcessor from tribler.core.components.metadata_store.db.store import MetadataStore from tribler.core.utilities.simpledefs import STATEDIR_DB_DIR diff --git a/src/tribler/core/components/metadata_store/restapi/metadata_endpoint_base.py b/src/tribler/core/components/metadata_store/restapi/metadata_endpoint_base.py index 73b3a389544..77f0b6e18e3 100644 --- a/src/tribler/core/components/metadata_store/restapi/metadata_endpoint_base.py +++ b/src/tribler/core/components/metadata_store/restapi/metadata_endpoint_base.py @@ -4,7 +4,7 @@ from pony.orm import db_session from tribler.core.components.knowledge.db.knowledge_db import KnowledgeDatabase, ResourceType -from tribler.core.components.knowledge.rules.tag_rules_processor import KnowledgeRulesProcessor +from tribler.core.components.knowledge.rules.knowledge_rules_processor import KnowledgeRulesProcessor from tribler.core.components.metadata_store.category_filter.family_filter import default_xxx_filter from tribler.core.components.metadata_store.db.serialization import CHANNEL_TORRENT, COLLECTION_NODE, REGULAR_TORRENT from tribler.core.components.metadata_store.db.store import MetadataStore