Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pluggable Sources #15

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,17 @@ dependencies = [
"aiohttp~=3.9.5",
"aiosqlite~=0.20.0",
"beautifulsoup4~=4.12.3",
"cron-descriptor~=1.4.3",
"elasticsearch[async]~=8.13.1",
"fasthx~=0.2403.1",
"fastapi~=0.111.0",
"humanize~=4.9.0",
"ijson~=3.2.3",
"pydantic-settings~=2.2.1",
"python-magic~=0.4.27",
"SQLAlchemy[asyncio]~=2.0.30"
"rocketry~=2.5.1",
"SQLAlchemy[asyncio]~=2.0.30",
"websockets~=12.0"
]
dynamic = ["version", "description", "authors"]

Expand All @@ -38,6 +42,9 @@ Repository = "https://github.com/Sidneys1/Memoria"
AiohttpDownloader = "memoria.plugins.builtin.aiohttp_downloader:AiohttpDownloader"
HtmlContentFinder = "memoria.plugins.builtin.html_content_finder:HtmlContentFinder"
HtmlExtractor = "memoria.plugins.builtin.html_extractor:HtmlExtractor"
PrefixAllowlistRule = "memoria.plugins.builtin.prefix_allowlistrule:PrefixAllowlistRule"
RegexAllowlistRule = "memoria.plugins.builtin.regex_allowlistrule:RegexAllowlistRule"
FirefoxSyncClientSource = "memoria.plugins.builtin.firefox_sync_client_source:FirefoxSyncClientSource"

[project.optional-dependencies]
dev = [
Expand Down
68 changes: 68 additions & 0 deletions src/memoria/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import asyncio
from logging import basicConfig, DEBUG, INFO

class importer:
async def add_many(self, items) -> None:
""""""
async for x in items:
# print("\t", x, sep='')
print('.', end='', flush=True)
print()

async def add(self, item) -> None:
""""""

async def flush(self) -> None:
""""""

async def main() -> None:
from memoria.model.orm.configured_source import ConfiguredSource
from memoria.plugins import PluginSchedule
from memoria.plugins._source_manager import SourcePluginManager
from memoria.db_clients import create_sql_client
from memoria.plugins.source import PluginSchedule

basicConfig(level=INFO)

async with create_sql_client() as session:
found: ConfiguredSource = await ConfiguredSource.find_one(session, ConfiguredSource.id == 1) # type: ignore
# found.display_name = 'Firefox Sync (<code>[email protected]</code>)'
found.schedule = PluginSchedule.Scheduled.value
found.schedule_value = '0 0 * * 0'
# await session.delete(found)

# new = ConfiguredSource(plugin_id='memoria.plugins.builtin.firefox_sync_client_source:FirefoxSyncClientSource', display_name='Firefox Sync: <samp>[email protected]</samp>', config='{}')
# session.add(new)
await session.commit()

async for x in await ConfiguredSource.find_all(session):
print(x.id, x.plugin_id, x.display_name, x.config, x.schedule, x.schedule_value, x.enabled, sep=', ')

# async with SourcePluginManager() as manager:
# print(manager._instances)
# print(manager._config)
# print(manager._schedules)


# plugin_id: str = None # type: ignore
# plugin_type: type[Source] = None # type: ignore
# for pid, plugin in suite.get_plugins_of_type(Source):
# # print(plugin.__name__, ','.join(x.name for x in plugin.SUPPORTED_SCHEDULES))
# plugin_type = plugin
# plugin_id = pid

# config: JsonValue = None



# print("Before 'with'")
# async with plugin_type(config) as plugin:
# return
# i = importer()
# print("Before 'run'")
# await plugin.run(i)
# print("After 'run'")
# print("After 'with'")

if __name__ == '__main__':
asyncio.run(main())
5 changes: 3 additions & 2 deletions src/memoria/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from queue import Queue

from .model.imported_history import ImportedHistory
from .plugins import ProcessingPluginManager, PluginSuite, Result
from .plugins._processing_manager import ProcessingPluginManager
from .plugins.processing import Result
from .settings import SETTINGS

_LOG = getLogger(__spec__.name)
Expand Down Expand Up @@ -86,7 +87,7 @@ async def check_exists(result: Result):

async def worker(log: Logger, queue: 'Queue[ImportedHistory]', no_more: 'Event', canceled: 'Event') -> None:
from .db_clients import create_elasticsearch_client, create_sql_client
processor = PluginSuite().create_processing_manager()
processor = ProcessingPluginManager()
es = await create_elasticsearch_client(SETTINGS.elastic_host,
basic_auth=(SETTINGS.elastic_user, SETTINGS.elastic_password))
async with create_sql_client() as sql_session, es, processor:
Expand Down
10 changes: 10 additions & 0 deletions src/memoria/logic/source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from typing import TYPE_CHECKING, Coroutine, AsyncIterable

from ..model.source import Source
from ..model.orm.configured_source import ConfiguredSource

if TYPE_CHECKING:
from sqlalchemy.ext.asyncio import AsyncSession

def get_sources(session: 'AsyncSession', skip: int = 0, limit: int|None = None) -> Coroutine[None, None, AsyncIterable[ConfiguredSource]]:
return ConfiguredSource.find_all(session, skip=skip, limit=limit, order_by=ConfiguredSource.id.desc())
12 changes: 4 additions & 8 deletions src/memoria/model/allowlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,12 @@ def color(self) -> str|None:

@staticmethod
@lru_cache
def _get_options(plugin_id: str) -> AllowlistPlugin.DisplayOptions|None:
def _get_options(plugin_id: str) -> AllowlistPlugin.DisplayOptions:
from ..plugins._plugin_suite import PluginSuite
from ..plugins.allowlist import AllowlistRule
suite = PluginSuite()
for plugin in suite._plugins.values():
if not issubclass(plugin, AllowlistRule): continue
if plugin.identifier != plugin_id: continue
if (options := plugin.DISPLAY_OPTIONS) is None:
return None
return options
plugin = PluginSuite().get_plugin_by_short_name(plugin_id + "AllowlistRule")
assert plugin is not None and issubclass(plugin.type, AllowlistRule), f"{plugin_id}"
return plugin.type.DISPLAY_OPTIONS

class Config:
from_attributes = True
Expand Down
1 change: 1 addition & 0 deletions src/memoria/model/orm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,6 @@ async def find_all(cls,
from .history import *
from .page import *
from .allowlist import *
from .configured_source import *

__all__ = tuple()
18 changes: 18 additions & 0 deletions src/memoria/model/orm/configured_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from sqlalchemy import Boolean, Integer, String

from . import Column, CrudBase


class ConfiguredSource(CrudBase):
__tablename__ = 'configured_sources'

id = Column(Integer, primary_key=True)
plugin_id = Column(String)
display_name = Column(String)
config = Column(String, default="null")
schedule = Column(Integer, default=0)
schedule_value = Column(String, default="")
enabled = Column(Boolean, default=False)


__all__ = tuple()
9 changes: 9 additions & 0 deletions src/memoria/model/plugins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from pydantic import BaseModel

class Plugin(BaseModel):
id: str
display_name: str
description: str|None

class SourcePlugin(Plugin):
...
71 changes: 71 additions & 0 deletions src/memoria/model/source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from functools import lru_cache

from pydantic import BaseModel, computed_field

from ..plugins.source import PluginSchedule, Source as SourcePlugin


class Source(BaseModel):
id: int
plugin_id: str
display_name: str
config: str
schedule: PluginSchedule
schedule_value: str|None
enabled: bool

@computed_field
def can_run_on_demand(self) -> bool:
return PluginSchedule.OnDemand in self._get_plugin(self.plugin_id).SUPPORTED_SCHEDULES

@computed_field
def display_schedule(self) -> str:
match self.schedule:
case PluginSchedule.Disabled:
return "Only on demand" if self.can_run_on_demand else "Disabled"
case PluginSchedule.Continuous:
return "Always running"
case PluginSchedule.Intermittent:
assert isinstance(self.schedule_value, str)
value = float(self.schedule_value)
if value == 60:
return "Hourly"
elif value == 1440:
return "Daily"
import humanize
return "Every " + humanize.naturaldelta(60.0 * value)
case PluginSchedule.Scheduled:
assert isinstance(self.schedule_value, str)
value = self.schedule_value
try:
from cron_descriptor import get_description
return get_description(value)
except Exception:
from logging import getLogger
getLogger(__spec__.name).exception('Failed to translate cron value %r.', value)
return f"Cron: {value}"
case PluginSchedule.OnDemand:
return "On Demand"

# @computed_field
# def display_name(self) -> str:
# config = self._get_options(self.plugin_id)
# ret = config.display_name or self.plugin_id
# print('!!!!!!! DIsplay name of', self.plugin_id, 'is', ret)
# return ret

@staticmethod
@lru_cache
def _get_options(plugin_id: str) -> SourcePlugin.UxConfig:
return Source._get_plugin(plugin_id).UX_CONFIG

@staticmethod
@lru_cache
def _get_plugin(plugin_id: str) -> type[SourcePlugin]:
from ..plugins._plugin_suite import PluginSuite, PluginId
plugin = PluginSuite().get_plugin_by_id(PluginId(plugin_id))
assert plugin is not None and issubclass(plugin.type, SourcePlugin), f"{plugin_id}"
return plugin.type

class Config:
from_attributes = True
24 changes: 22 additions & 2 deletions src/memoria/plugins/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
from abc import ABC
from contextlib import AbstractAsyncContextManager
from typing import TypeAlias
from enum import IntFlag, auto
from typing import NewType

type Html = str
PluginId = NewType('PluginId', str)


class PluginSchedule(IntFlag):
Disabled = 0
"""The plugin will not be run."""

Continuous = auto()
"""The plugin will manage its own lifecycle (running continuously) through `__aenter__` and `__aexit__`."""

Intermittent = auto()
"""The plugin can be configured to run at specific intervals."""

Scheduled = auto()
"""The plugin can be configured to run at specific cron schedules."""

OnDemand = auto()
"""The plugin can be run on-demand, even if other schedules are selected."""

Html: TypeAlias = str

class Plugin(AbstractAsyncContextManager, ABC):
async def __aexit__(self, *_, **__):
Expand Down
Loading
Loading