Skip to content

Commit

Permalink
add simple scheduler 📆 (#505)
Browse files Browse the repository at this point in the history
  • Loading branch information
jacksund authored Aug 21, 2023
1 parent 67623c0 commit fedfb90
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/change_log.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ There is one key exception to the rules above -- and that is with `MAJOR`=0 rele
- allow "internal only" access to server via evironment variables (disabled by default)
- add `django-simple-history` support to track user changes on specific models
- add `@check_db_conn` decorator to help with database connection closures/timeouts
- add `simmate engine start-schedules` which let's you configure periodic tasks for individual apps (e.g. check a table for updates every 5 minutes). Includes error handling and email alerts. (Note: this a quick alternative to full Prefect system)

**Fixes**

Expand Down
1 change: 1 addition & 0 deletions envs/conda/dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ dependencies:
- requests >=2.28.1, <2.28.3
- rich >=11.0, <=13.5.2
- scikit-learn >=1.1.1, <1.2.2
- schedule >=1.2.0, <=1.2.0
- toml >=0.10.2, <=0.10.2
- typer >=0.6.0, <0.7.1
# Recommended IDE for development -- We leave this as a separate install
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,11 @@ dependencies=[
"plotly >=5.4.0, <5.15.0", # for interactive plots and visualization
"markdown >=3.4.1, <=3.4.3", # for docs and markdown rendering in html templates
"requests >=2.28.1, <2.28.3", # for quick web queries and downloads
"scikit-learn >=1.1.1, <1.2.2", # machine-learning methods (may become core dep)
"scikit-learn >=1.1.1, <1.2.2", # machine-learning methods
"cloudpickle >=2.1.0, <=2.2.0", # for serializing complex python objects
"rich >=11.0, <13.5.2", # for coloring and styling the CLI outputs
"toml >=0.10.2, <=0.10.2", # for toml configuration files
"schedule >=1.2.0, <=1.2.0", # for running periodic tasks (prefect alternative)
#
# These are from the MP stack and I want to phase them out over time
"pymatgen >=2022.1.9, <2023.5.32",
Expand Down
10 changes: 10 additions & 0 deletions src/simmate/command_line/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ def engine():
pass


@engine_app.command()
def start_schedules():
"""
Starts the main process for periodic tasks in each app's "schedules" module.
"""
from simmate.engine.scheduler import SimmateScheduler

SimmateScheduler.start()


@engine_app.command()
def start_worker(
nitems_max: int = None,
Expand Down
132 changes: 132 additions & 0 deletions src/simmate/engine/scheduler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# -*- coding: utf-8 -*-

import datetime
import importlib
import logging
import time
from traceback import format_exc

from django.core.mail import EmailMessage
from rich import print
from schedule import Scheduler

from simmate.configuration.django.settings import ADMINS, SIMMATE_APPS
from simmate.utilities import get_app_submodule

# This string is just something fancy to display in the console when the process
# starts up.
# This uses "Small Slant" from https://patorjk.com/software/taag/
HEADER_ART = r"""
=========================================================================
_____ __ ____ __ __ __
/ __(_)_ _ __ _ ___ _/ /____ / __/___/ / ___ ___/ /_ __/ /__ ____
_\ \/ / ' \/ ' \/ _ `/ __/ -_) _\ \/ __/ _ \/ -_) _ / // / / -_) __/
/___/_/_/_/_/_/_/_/\_,_/\__/\__/ /___/\__/_//_/\__/\_,_/\_,_/_/\__/_/
=========================================================================
"""


class SimmateScheduler(Scheduler):
"""
Starts the main process for periodic tasks in each app's "schedules" module.
NOTE: This is a "basic" alternative to scheduler systems such as Prefect.
Here, only 1 task is ran at a time. So if you have a >1 hr task, this will
block tasks schedules to run every minute until the longer task finishes.
Furthermore, if you schedule a task to run at an exaction date/time, this
scheduler may miss the start time due to other running tasks in front of it.
Lastly, we "sleep" the scheduler every second, so scheduling tasks to run
every <1s will not work as intended. The 1s sleep also means start times
will have an error of up to 1s -- even when there's only one scheduled task.
"""

def __init__(self, reschedule_on_failure=True):
"""
If reschedule_on_failure is True, jobs will be rescheduled for their
next run as if they had completed successfully. If False, they'll run
on the next run_pending() tick.
"""
self.reschedule_on_failure = reschedule_on_failure
super().__init__()

@classmethod
def start(cls, sleep_step: float = 1):
"""
Starts the main process for periodic tasks in each app's "schedules" module.
NOTE: This is a "basic" alternative to scheduler systems such as Prefect.
Here, only 1 task is ran at a time. So if you have a >1 hr task, this will
block tasks schedules to run every minute until the longer task finishes.
Furthermore, if you schedule a task to run at an exaction date/time, this
scheduler may miss the start time due to other running tasks in front of it.
Lastly, we "sleep" the scheduler every second, so scheduling tasks to run
every <1s will not work as intended. The 1s sleep also means start times
will have an error of up to 1s -- even when there's only one scheduled task.
"""

# TODO: consider parallel runs using threads, dask, or workers...
# https://schedule.readthedocs.io/en/stable/parallel-execution.html

# TODO: for error handling, read more at...
# https://schedule.readthedocs.io/en/stable/exception-handling.html

# print the header in the console to let the user know the worker started
print("[bold cyan]" + HEADER_ART)

# HACK-FIX:
# Jobs will register to the default scheduler. We instead want to use
# this custom class, so we patch the schedule module
# https://github.com/dbader/schedule/blob/master/schedule/__init__.py#L801C1-L881C31
import schedule

schedule.default_scheduler = cls()

# Now run the registration where the scheduler shortcuts will work
cls._register_app_schedules()

# And now run the infinite loop of schedules
logging.info("Starting schedules...")
while True: # Run indefinitely
schedule.run_pending()
# save some CPU time by sleeping for an extra second
time.sleep(sleep_step)

@staticmethod
def _register_app_schedules(apps_to_search: list[str] = SIMMATE_APPS):
"""
Goes through a list of apps and imports the "schedules" module in each.
By default, this will grab all installed SIMMATE_APPs
"""
logging.info("Searching for registrations...")
for app_name in apps_to_search:
# check if there is a schedule module for this app and load it if so
schedule_path = get_app_submodule(app_name, "schedules")
if not schedule_path:
continue # skip to the next app
# We use the python 'schedule' package, so simply importing these
# modules is enough to register them.
importlib.import_module(schedule_path)
logging.info(f"Registered schedules for '{schedule_path}'")
logging.info("Completed registrations :sparkles:")

def _run_job(self, job):
# This is a modified run method that catches failed jobs and optionally
# sends an email alert on failure events

try:
super()._run_job(job)
except Exception:
# log errors but still continue
error_msg = format_exc()
logging.critical(error_msg)
job.last_run = datetime.datetime.now()
job._schedule_next_run()

# if emails are configured, send an alert of the failure
email = EmailMessage(
subject="[SIMMATE] Scheduled job failure",
body=error_msg,
to=[a[1] for a in ADMINS], # get admin emails
)
email.send(fail_silently=True)

0 comments on commit fedfb90

Please sign in to comment.