From 9965e38cd7d31d1798f16b3d1bc8263f6be89a0b Mon Sep 17 00:00:00 2001 From: Arun Prasad Date: Thu, 30 Mar 2023 19:37:51 -0700 Subject: [PATCH 01/18] Add flask-debugtoolbar --- ambuda/__init__.py | 4 ++++ pyproject.toml | 9 +++++---- requirements.txt | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ambuda/__init__.py b/ambuda/__init__.py index c5c62c4f..a32ef6a9 100644 --- a/ambuda/__init__.py +++ b/ambuda/__init__.py @@ -142,6 +142,10 @@ def get_locale(): # Debug-only routes for local development. if app.debug: + from flask_debugtoolbar import DebugToolbarExtension + + toolbar = DebugToolbarExtension(app) + from ambuda.views.debug import bp as debug_bp app.register_blueprint(debug_bp, url_prefix="/debug") diff --git a/pyproject.toml b/pyproject.toml index 0942c876..5d8566b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -158,15 +158,16 @@ zipp = "3.8.0" [tool.poetry.dev-dependencies] -pytest = "^7.1.2" black = "^22.3.0" -setuptools = "^57.5.0" -swig = "^4.0.2" -isort = "^5.10.1" flake8 = "5.0.4" +flask-debugtoolbar = "0.13.1" +isort = "^5.10.1" +pytest = "^7.1.2" pytest-cov = "4.0.0" pytest-cover = "3.0.0" pytest-coverage = "0.0" +setuptools = "^57.5.0" +swig = "^4.0.2" [build-system] diff --git a/requirements.txt b/requirements.txt index 40eedcc1..ec2f7834 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,6 +35,7 @@ Flask-Bcrypt==1.0.1 Flask-Login==0.6.1 Flask-Mail==0.9.1 Flask-WTF==1.0.1 +flask-debugtoolbar==0.13.1 fonttools==4.33.3 gevent==21.12.0 google-api-core==2.8.2 From d3d8921e36e61b34b3bf89c7c253490fd406449f Mon Sep 17 00:00:00 2001 From: Arun Prasad Date: Thu, 30 Mar 2023 21:44:58 -0700 Subject: [PATCH 02/18] WIP --- Makefile | 2 +- ambuda/__init__.py | 42 ++++---------- ambuda/admin.py | 4 +- ambuda/checks.py | 4 +- ambuda/database.py | 1 - ambuda/models/auth.py | 10 ++-- ambuda/models/base.py | 7 +-- ambuda/models/blog.py | 4 +- ambuda/models/dictionaries.py | 6 +- ambuda/models/parse.py | 4 +- ambuda/models/proofing.py | 10 ++-- ambuda/models/site.py | 4 +- ambuda/models/talk.py | 8 +-- ambuda/models/texts.py | 8 +-- ambuda/queries.py | 44 +-------------- ambuda/tasks/__init__.py | 51 +++++++++++------ ambuda/tasks/ocr.py | 72 ++++++++++-------------- ambuda/tasks/projects.py | 30 ++++------ ambuda/views/proofing/main.py | 1 - ambuda/views/proofing/project.py | 7 ++- ambuda/views/reader/texts.py | 4 +- cli.py | 1 - config.py | 8 ++- make_celery.py | 12 ++++ pyproject.toml | 4 +- requirements.txt | 16 +++++- test/ambuda/conftest.py | 11 ++-- test/ambuda/tasks/test_projects_tasks.py | 1 - 28 files changed, 167 insertions(+), 209 deletions(-) create mode 100644 make_celery.py diff --git a/Makefile b/Makefile index d35afc79..9a746398 100644 --- a/Makefile +++ b/Makefile @@ -135,7 +135,7 @@ devserver: py-venv-check # Run a local Celery instance for background tasks. celery: - celery -A ambuda.tasks worker --loglevel=INFO + celery -A make_celery worker --loglevel=INFO # Docker commands diff --git a/ambuda/__init__.py b/ambuda/__init__.py index a32ef6a9..35308360 100644 --- a/ambuda/__init__.py +++ b/ambuda/__init__.py @@ -13,17 +13,18 @@ from flask import Flask, session from flask_babel import Babel, pgettext from sentry_sdk.integrations.flask import FlaskIntegration -from sqlalchemy import exc import config from ambuda import admin as admin_manager from ambuda import auth as auth_manager -from ambuda import checks, filters, queries +from ambuda import checks, filters from ambuda.consts import LOCALES from ambuda.mail import mailer +from ambuda.tasks import celery_init_app from ambuda.utils import assets from ambuda.utils.json_serde import AmbudaJSONEncoder from ambuda.utils.url_converters import ListConverter +from ambuda.models.base import db from ambuda.views.about import bp as about from ambuda.views.api import bp as api from ambuda.views.auth import bp as auth @@ -42,32 +43,6 @@ def _initialize_sentry(sentry_dsn: str): ) -def _initialize_db_session(app, config_name: str): - """Ensure that our SQLAlchemy session behaves well. - - The Flask-SQLAlchemy library manages all of this boilerplate for us - automatically, but Flask-SQLAlchemy has relatively poor support for using - our models outside of the application context, e.g. when running seed - scripts or other batch jobs. So instead of using that extension, we manage - the boilerplate ourselves. - """ - - @app.teardown_appcontext - def shutdown_session(exception=None): - """Reset session state to prevent caching and memory leaks.""" - queries.get_session_class().remove() - - if config_name == config.PRODUCTION: - # The hook below hides database errors. So, install the hook only if - # we're in production. - - @app.errorhandler(exc.SQLAlchemyError) - def handle_db_exceptions(error): - """Rollback errors so that the db can handle future requests.""" - session = queries.get_session() - session.rollback() - - def _initialize_logger(log_level: int) -> None: """Initialize a simple logger for all requests.""" handler = logging.StreamHandler(sys.stderr) @@ -99,6 +74,12 @@ def create_app(config_env: str): # Config app.config.from_object(config_spec) + # Database + db.init_app(app) + + # Celery + celery_init_app(app) + # Sanity checks assert config_env == config_spec.AMBUDA_ENVIRONMENT if config_env != config.TESTING: @@ -108,9 +89,6 @@ def create_app(config_env: str): # Logger _initialize_logger(config_spec.LOG_LEVEL) - # Database - _initialize_db_session(app, config_env) - # Extensions babel = Babel(app) @@ -144,7 +122,7 @@ def get_locale(): if app.debug: from flask_debugtoolbar import DebugToolbarExtension - toolbar = DebugToolbarExtension(app) + DebugToolbarExtension(app) from ambuda.views.debug import bp as debug_bp diff --git a/ambuda/admin.py b/ambuda/admin.py index 62d68bbf..5dc4994e 100644 --- a/ambuda/admin.py +++ b/ambuda/admin.py @@ -6,7 +6,7 @@ from flask_login import current_user import ambuda.database as db -import ambuda.queries as q +from ambuda.models.base import db as flask_sqla class AmbudaIndexView(AdminIndexView): @@ -68,7 +68,7 @@ class SponsorshipView(ModeratorBaseView): def create_admin_manager(app): - session = q.get_session_class() + session = flask_sqla.session admin = Admin( app, name="Ambuda", diff --git a/ambuda/checks.py b/ambuda/checks.py index 0c3a2d39..3968f623 100644 --- a/ambuda/checks.py +++ b/ambuda/checks.py @@ -9,7 +9,7 @@ from ambuda import consts, enums from ambuda import database as db from ambuda import queries as q -from ambuda.models.base import Base +from ambuda.models.base import db as fsqa def _warn(text: str = ""): @@ -70,7 +70,7 @@ def _check_app_schema_matches_db_schema(database_uri: str) -> list[str]: errors = [] - for table_name, table in Base.metadata.tables.items(): + for table_name, table in fsqa.Model.metadata.tables.items(): app_columns = table.columns db_columns = {c["name"]: c for c in inspector.get_columns(table_name)} diff --git a/ambuda/database.py b/ambuda/database.py index ea5b7e45..35d64f80 100644 --- a/ambuda/database.py +++ b/ambuda/database.py @@ -4,7 +4,6 @@ from ambuda.enums import SiteRole # NOQA F401 from ambuda.models.auth import * # NOQA F401,F403 -from ambuda.models.base import Base # NOQA F401,F403 from ambuda.models.blog import * # NOQA F401,F403 from ambuda.models.dictionaries import * # NOQA F401,F403 from ambuda.models.parse import * # NOQA F401,F403 diff --git a/ambuda/models/auth.py b/ambuda/models/auth.py index a811581a..450107f8 100644 --- a/ambuda/models/auth.py +++ b/ambuda/models/auth.py @@ -7,11 +7,11 @@ from sqlalchemy.orm import relationship from werkzeug.security import check_password_hash, generate_password_hash -from ambuda.models.base import Base, foreign_key, pk +from ambuda.models.base import db, foreign_key, pk from ambuda.utils.user_mixins import AmbudaUserMixin -class User(AmbudaUserMixin, Base): +class User(AmbudaUserMixin, db.Model): """A user.""" __tablename__ = "users" @@ -63,7 +63,7 @@ def check_password(self, raw_password: str) -> bool: return check_password_hash(self.password_hash, raw_password) -class Role(Base): +class Role(db.Model): """A role. @@ -83,7 +83,7 @@ def __repr__(self): return f"" -class UserRoles(Base): +class UserRoles(db.Model): """Secondary table for users and roles.""" @@ -95,7 +95,7 @@ class UserRoles(Base): role_id = Column(Integer, ForeignKey("roles.id"), primary_key=True, index=True) -class PasswordResetToken(Base): +class PasswordResetToken(db.Model): """Models a "forgot password" recovery token.""" diff --git a/ambuda/models/base.py b/ambuda/models/base.py index c8004e71..a8cd907f 100644 --- a/ambuda/models/base.py +++ b/ambuda/models/base.py @@ -1,11 +1,10 @@ """Base model and utilities.""" +from flask_sqlalchemy import SQLAlchemy from sqlalchemy import Column, ForeignKey, Integer -from sqlalchemy.orm import declarative_base -#: The base class for all of Ambuda's models. All new models should inherit -#: from this class. -Base = declarative_base() + +db = SQLAlchemy() def pk(): diff --git a/ambuda/models/blog.py b/ambuda/models/blog.py index 8968f8b4..8a01c744 100644 --- a/ambuda/models/blog.py +++ b/ambuda/models/blog.py @@ -4,10 +4,10 @@ from sqlalchemy import Text as Text_ from sqlalchemy.orm import relationship -from ambuda.models.base import Base, foreign_key, pk, same_as +from ambuda.models.base import db, foreign_key, pk, same_as -class BlogPost(Base): +class BlogPost(db.Model): """A blog post.""" diff --git a/ambuda/models/dictionaries.py b/ambuda/models/dictionaries.py index 9787680b..915c6d6d 100644 --- a/ambuda/models/dictionaries.py +++ b/ambuda/models/dictionaries.py @@ -1,10 +1,10 @@ from sqlalchemy import Column, String from sqlalchemy.orm import relationship -from ambuda.models.base import Base, foreign_key, pk +from ambuda.models.base import db, foreign_key, pk -class Dictionary(Base): +class Dictionary(db.Model): """A dictionary that maps Sanskrit expressions to definitions in various languages.""" @@ -21,7 +21,7 @@ class Dictionary(Base): entries = relationship("DictionaryEntry", backref="dictionary", cascade="delete") -class DictionaryEntry(Base): +class DictionaryEntry(db.Model): """Dictionary definitions for a specific entry key. diff --git a/ambuda/models/parse.py b/ambuda/models/parse.py index 447997d8..c91abc7d 100644 --- a/ambuda/models/parse.py +++ b/ambuda/models/parse.py @@ -3,10 +3,10 @@ from sqlalchemy import Column from sqlalchemy import Text as _Text -from ambuda.models.base import Base, foreign_key, pk +from ambuda.models.base import db, foreign_key, pk -class BlockParse(Base): +class BlockParse(db.Model): """Parse data for a `TextBlock`.""" __tablename__ = "block_parses" diff --git a/ambuda/models/proofing.py b/ambuda/models/proofing.py index e2dfebcd..ec7152c9 100644 --- a/ambuda/models/proofing.py +++ b/ambuda/models/proofing.py @@ -6,7 +6,7 @@ from sqlalchemy import Text as Text_ from sqlalchemy.orm import relationship -from ambuda.models.base import Base, foreign_key, pk, same_as +from ambuda.models.base import db, foreign_key, pk, same_as def string(): @@ -19,7 +19,7 @@ def text(): return Column(Text_, nullable=False, default="") -class Project(Base): +class Project(db.Model): """A proofreading project. @@ -69,7 +69,7 @@ class Project(Base): ) -class Page(Base): +class Page(db.Model): """A page in a proofreading project. @@ -115,7 +115,7 @@ class Page(Base): ) -class PageStatus(Base): +class PageStatus(db.Model): """The transcription status of a given page. @@ -130,7 +130,7 @@ class PageStatus(Base): name = Column(String, nullable=False, unique=True) -class Revision(Base): +class Revision(db.Model): """A specific page revision. diff --git a/ambuda/models/site.py b/ambuda/models/site.py index 7b97fd68..d6948872 100644 --- a/ambuda/models/site.py +++ b/ambuda/models/site.py @@ -8,10 +8,10 @@ from sqlalchemy import Column, Integer, String from sqlalchemy import Text as Text_ -from ambuda.models.base import Base, pk +from ambuda.models.base import db, pk -class ProjectSponsorship(Base): +class ProjectSponsorship(db.Model): """A project that a donor can sponsor.""" diff --git a/ambuda/models/talk.py b/ambuda/models/talk.py index 45133103..eaaaffc9 100644 --- a/ambuda/models/talk.py +++ b/ambuda/models/talk.py @@ -6,7 +6,7 @@ from sqlalchemy import Text as Text_ from sqlalchemy.orm import relationship -from ambuda.models.base import Base, foreign_key, pk, same_as +from ambuda.models.base import db, foreign_key, pk, same_as def string(): @@ -14,7 +14,7 @@ def string(): return Column(String, nullable=False, default="") -class Board(Base): +class Board(db.Model): """A list of threads.""" @@ -34,7 +34,7 @@ class Board(Base): ) -class Thread(Base): +class Thread(db.Model): """A list of posts.""" @@ -59,7 +59,7 @@ class Thread(Base): posts = relationship("Post", order_by=lambda: Post.created_at, backref="thread") -class Post(Base): +class Post(db.Model): """A post.""" diff --git a/ambuda/models/texts.py b/ambuda/models/texts.py index 38d8a1be..09d5cc1d 100644 --- a/ambuda/models/texts.py +++ b/ambuda/models/texts.py @@ -11,10 +11,10 @@ from sqlalchemy import Text as _Text from sqlalchemy.orm import relationship -from ambuda.models.base import Base, foreign_key, pk +from ambuda.models.base import db, foreign_key, pk -class Text(Base): +class Text(db.Model): """A text with its metadata.""" @@ -32,7 +32,7 @@ class Text(Base): sections = relationship("TextSection", backref="text", cascade="delete") -class TextSection(Base): +class TextSection(db.Model): """Ordered divisions of text content. This represent divisions like kāṇḍas, sargas, etc. @@ -63,7 +63,7 @@ class TextSection(Base): ) -class TextBlock(Base): +class TextBlock(db.Model): """A verse or paragraph. A TextBlock is the "unit of reuse." When we make cross-references between diff --git a/ambuda/queries.py b/ambuda/queries.py index ec7e9122..ad5440ed 100644 --- a/ambuda/queries.py +++ b/ambuda/queries.py @@ -6,52 +6,14 @@ import functools -from flask import current_app -from sqlalchemy import create_engine -from sqlalchemy.orm import load_only, scoped_session, selectinload, sessionmaker +from sqlalchemy.orm import load_only, selectinload import ambuda.database as db - -# NOTE: this logic is copied from Flask-SQLAlchemy. We avoid Flask-SQLAlchemy -# because we also need to access the database from a non-Flask context when -# we run database seed scripts. -# ~~~ -# Scope the session to the current greenlet if greenlet is available, -# otherwise fall back to the current thread. -try: - from greenlet import getcurrent as _ident_func -except ImportError: - from threading import get_ident as _ident_func - - -# functools.cache makes this return value a singleton. -@functools.cache -def get_engine(): - database_uri = current_app.config["SQLALCHEMY_DATABASE_URI"] - return create_engine(database_uri) - - -# functools.cache makes this return value a singleton. -@functools.cache -def get_session_class(): - # Scoped sessions remove various kinds of errors, e.g. when using database - # objects created on different threads. - # - # For details, see: - # - https://stackoverflow.com/questions/12223335 - # - https://flask.palletsprojects.com/en/2.1.x/patterns/sqlalchemy/ - session_factory = sessionmaker(bind=get_engine(), autoflush=False, autocommit=False) - return scoped_session(session_factory, scopefunc=_ident_func) +from ambuda.models.base import db as flask_sqla def get_session(): - """Instantiate a scoped session. - - If we implemented this right, there should be exactly one unique session - per request. - """ - Session = get_session_class() - return Session() + return flask_sqla.session def texts() -> list[db.Text]: diff --git a/ambuda/tasks/__init__.py b/ambuda/tasks/__init__.py index cbf78c6c..f25ecbab 100644 --- a/ambuda/tasks/__init__.py +++ b/ambuda/tasks/__init__.py @@ -13,23 +13,42 @@ import os -from celery import Celery +from celery import Celery, Task +from flask import Flask # For context on why we use Redis for both the backend and the broker, see the # "Background tasks with Celery" doc. redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0") -app = Celery( - "ambuda-tasks", - backend=redis_url, - broker=redis_url, - include=[ - "ambuda.tasks.projects", - "ambuda.tasks.ocr", - ], -) -app.conf.update( - # Run all tasks asynchronously by default. - task_always_eager=False, - # Force arguments to be plain data by requiring them to be JSON-compatible. - task_serializer="json", -) + + +def celery_init_app(app: Flask) -> Celery: + """Initialize the Celery app against our Flask instance. + + Source: https://flask.palletsprojects.com/en/2.2.x/patterns/celery/ + """ + + class FlaskTask(Task): + def __call__(self, *args: object, **kwargs: object) -> object: + with flask_app.app_context(): + return self.run(*args, **kwargs) + + celery_app = Celery( + "ambuda-tasks", + task_cls=FlaskTask, + backend=redis_url, + broker=redis_url, + include=[ + "ambuda.tasks.projects", + "ambuda.tasks.ocr", + ], + ) + celery_app.set_default() + celery_app.conf.update( + # Run all tasks asynchronously by default. + task_always_eager=False, + # Force arguments to be plain data by requiring them to be JSON-compatible. + task_serializer="json", + ) + + app.extensions["celery"] = celery_app + return celery_app diff --git a/ambuda/tasks/ocr.py b/ambuda/tasks/ocr.py index d165c843..2c788972 100644 --- a/ambuda/tasks/ocr.py +++ b/ambuda/tasks/ocr.py @@ -1,80 +1,69 @@ """Background tasks for proofing projects.""" -from celery import group +from celery import group, shared_task from celery.result import GroupResult from ambuda import consts from ambuda import database as db from ambuda import queries as q from ambuda.enums import SitePageStatus -from ambuda.tasks import app from ambuda.utils import google_ocr from ambuda.utils.assets import get_page_image_filepath from ambuda.utils.revisions import add_revision -from config import create_config_only_app def _run_ocr_for_page_inner( - app_env: str, project_slug: str, page_slug: str, ) -> int: """Must run in the application context.""" + bot_user = q.user(consts.BOT_USERNAME) + if bot_user is None: + raise ValueError(f'User "{consts.BOT_USERNAME}" is not defined.') - flask_app = create_config_only_app(app_env) - with flask_app.app_context(): - bot_user = q.user(consts.BOT_USERNAME) - if bot_user is None: - raise ValueError(f'User "{consts.BOT_USERNAME}" is not defined.') + # The actual API call. + image_path = get_page_image_filepath(project_slug, page_slug) + ocr_response = google_ocr.run(image_path) - # The actual API call. - image_path = get_page_image_filepath(project_slug, page_slug) - ocr_response = google_ocr.run(image_path) + session = q.get_session() + project = q.project(project_slug) + page = q.page(project.id, page_slug) - session = q.get_session() - project = q.project(project_slug) - page = q.page(project.id, page_slug) - - page.ocr_bounding_boxes = google_ocr.serialize_bounding_boxes( - ocr_response.bounding_boxes + page.ocr_bounding_boxes = google_ocr.serialize_bounding_boxes( + ocr_response.bounding_boxes + ) + session.add(page) + session.commit() + + summary = "Run OCR" + try: + return add_revision( + page=page, + summary=summary, + content=ocr_response.text_content, + status=SitePageStatus.R0, + version=0, + author_id=bot_user.id, ) - session.add(page) - session.commit() - - summary = "Run OCR" - try: - return add_revision( - page=page, - summary=summary, - content=ocr_response.text_content, - status=SitePageStatus.R0, - version=0, - author_id=bot_user.id, - ) - except Exception as e: - raise ValueError( - f'OCR failed for page "{project.slug}/{page.slug}".' - ) from e + except Exception: + raise ValueError(f'OCR failed for page "{project.slug}/{page.slug}".') -@app.task(bind=True) +@shared_task(bind=True) def run_ocr_for_page( self, *, - app_env: str, project_slug: str, page_slug: str, ): _run_ocr_for_page_inner( - app_env, project_slug, page_slug, ) def run_ocr_for_project( - app_env: str, project: db.Project, ) -> GroupResult | None: """Create a `group` task to run OCR on a project. @@ -86,14 +75,11 @@ def run_ocr_for_project( :return: the Celery result, or ``None`` if no tasks were run. """ - flask_app = create_config_only_app(app_env) - with flask_app.app_context(): - unedited_pages = [p for p in project.pages if p.version == 0] + unedited_pages = [p for p in project.pages if p.version == 0] if unedited_pages: tasks = group( run_ocr_for_page.s( - app_env=app_env, project_slug=project.slug, page_slug=p.slug, ) diff --git a/ambuda/tasks/projects.py b/ambuda/tasks/projects.py index 68f4e874..a83ee8dc 100644 --- a/ambuda/tasks/projects.py +++ b/ambuda/tasks/projects.py @@ -9,11 +9,10 @@ import fitz from slugify import slugify +from celery import shared_task from ambuda import database as db from ambuda import queries as q -from ambuda.tasks import app from ambuda.tasks.utils import CeleryTaskStatus, TaskStatus -from config import create_config_only_app def _split_pdf_into_pages( @@ -75,7 +74,6 @@ def create_project_inner( title: str, pdf_path: str, output_dir: str, - app_environment: str, creator_id: int, task_status: TaskStatus, ): @@ -87,18 +85,15 @@ def create_project_inner( :param title: the project title. :param pdf_path: local path to the source PDF. :param output_dir: local path where page images will be stored. - :param app_environment: the app environment, e.g. `"development"`. :param creator_id: the user that created this project. :param task_status: tracks progress on the task. """ logging.info(f'Received upload task "{title}" for path {pdf_path}.') # Tasks must be idempotent. Exit if the project already exists. - app = create_config_only_app(app_environment) - with app.app_context(): - session = q.get_session() - slug = slugify(title) - project = session.query(db.Project).filter_by(slug=slug).first() + session = q.get_session() + slug = slugify(title) + project = session.query(db.Project).filter_by(slug=slug).first() if project: raise ValueError( @@ -109,25 +104,23 @@ def create_project_inner( pages_dir = Path(output_dir) num_pages = _split_pdf_into_pages(Path(pdf_path), Path(pages_dir), task_status) - with app.app_context(): - _add_project_to_database( - title=title, - slug=slug, - num_pages=num_pages, - creator_id=creator_id, - ) + _add_project_to_database( + title=title, + slug=slug, + num_pages=num_pages, + creator_id=creator_id, + ) task_status.success(num_pages, slug) -@app.task(bind=True) +@shared_task(bind=True) def create_project( self, *, title: str, pdf_path: str, output_dir: str, - app_environment: str, creator_id: int, ): """Split the given PDF into pages and register the project on the database. @@ -139,7 +132,6 @@ def create_project( title=title, pdf_path=pdf_path, output_dir=output_dir, - app_environment=app_environment, creator_id=creator_id, task_status=task_status, ) diff --git a/ambuda/views/proofing/main.py b/ambuda/views/proofing/main.py index 39b2f135..0c3672bb 100644 --- a/ambuda/views/proofing/main.py +++ b/ambuda/views/proofing/main.py @@ -199,7 +199,6 @@ def create_project(): title=title, pdf_path=str(pdf_path), output_dir=str(page_image_dir), - app_environment=current_app.config["AMBUDA_ENVIRONMENT"], creator_id=current_user.id, ) return render_template( diff --git a/ambuda/views/proofing/project.py b/ambuda/views/proofing/project.py index 185f5582..8f761e8f 100644 --- a/ambuda/views/proofing/project.py +++ b/ambuda/views/proofing/project.py @@ -32,7 +32,6 @@ from ambuda import database as db from ambuda import queries as q -from ambuda.tasks import app as celery_app from ambuda.tasks import ocr as ocr_tasks from ambuda.utils import project_utils, proofing_utils from ambuda.utils.revisions import add_revision @@ -42,6 +41,10 @@ LOG = logging.getLogger(__name__) +def get_celery_app(): + return current_app.extensions["celery"] + + def _is_valid_page_number_spec(_, field): try: _ = project_utils.parse_page_number_spec(field.data) @@ -632,7 +635,7 @@ def batch_ocr(slug): @bp.route("/batch-ocr-status/") def batch_ocr_status(task_id): - r = GroupResult.restore(task_id, app=celery_app) + r = GroupResult.restore(task_id, app=get_celery_app()) assert r, task_id if r.results: diff --git a/ambuda/views/reader/texts.py b/ambuda/views/reader/texts.py index d67ca71c..4200c568 100644 --- a/ambuda/views/reader/texts.py +++ b/ambuda/views/reader/texts.py @@ -150,9 +150,7 @@ def section(text_slug, section_slug): # Fetch with content blocks cur = q.text_section(text_.id, section_slug) - - with q.get_session() as _: - _ = cur.blocks + _ = cur.blocks blocks = [] for block in cur.blocks: diff --git a/cli.py b/cli.py index 608928b9..54c6e5b0 100755 --- a/cli.py +++ b/cli.py @@ -101,7 +101,6 @@ def create_project(title, pdf_path): title=title, pdf_path=pdf_path, output_dir=str(page_image_dir), - app_environment=current_app.config["AMBUDA_ENVIRONMENT"], creator_id=arbitrary_user.id, task_status=LocalTaskStatus(), ) diff --git a/config.py b/config.py index 056ad7f8..7be66124 100644 --- a/config.py +++ b/config.py @@ -80,6 +80,9 @@ class BaseConfig: #: https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls SQLALCHEMY_DATABASE_URI = _env("SQLALCHEMY_DATABASE_URI") + #: If set, record queries during a request. + SQLALCHEMY_RECORD_QUERIES = False + #: Where to store user uploads (PDFs, images, etc.). UPLOAD_FOLDER = _env("FLASK_UPLOAD_FOLDER") @@ -173,6 +176,9 @@ class UnitTestConfig(BaseConfig): class DevelopmentConfig(BaseConfig): """For local development.""" + #: Disable redirect intercepts when using flask-debugtoolbar + DEBUG_TB_INTERCEPT_REDIRECTS = False + AMBUDA_ENVIRONMENT = DEVELOPMENT DEBUG = True #: If set, automatically reload Flask templates (including imports) when @@ -269,7 +275,7 @@ def _validate_config(config: BaseConfig): assert Path(google_creds).exists() -def load_config_object(name: str): +def load_config_object(name: str) -> BaseConfig: """Load and validate an application config.""" config_map = { TESTING: UnitTestConfig, diff --git a/make_celery.py b/make_celery.py new file mode 100644 index 00000000..4773270e --- /dev/null +++ b/make_celery.py @@ -0,0 +1,12 @@ +"""Entrypoint for Celery runner.""" + + +import os + +from dotenv import load_dotenv +from ambuda import create_app + +load_dotenv(".env") +config_env = os.environ["FLASK_ENV"] +flask_app = create_app(config_env) +celery = flask_app.extensions["celery"] diff --git a/pyproject.toml b/pyproject.toml index 5d8566b7..ca7dc67f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,11 +42,11 @@ dnspython = "2.2.1" docutils = "0.17.1" email-validator = "1.2.1" fabric = "2.7.0" -Flask = "2.1.2" +Flask = "2.2.3" Flask-Admin = "1.6.0" Flask-Babel = "2.0.0" Flask-Bcrypt = "1.0.1" -Flask-Login = "0.6.1" +Flask-Login = "0.6.2" Flask-Mail = "0.9.1" Flask-WTF = "1.0.1" fonttools = "4.33.3" diff --git a/requirements.txt b/requirements.txt index ec2f7834..8835c6fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,13 +27,16 @@ Deprecated==1.2.13 dnspython==2.2.1 docutils==0.17.1 email-validator==1.2.1 +exceptiongroup==1.1.1 fabric==2.7.0 -Flask==2.1.2 +Flask==2.2.3 Flask-Admin==1.6.0 Flask-Babel==2.0.0 Flask-Bcrypt==1.0.1 -Flask-Login==0.6.1 +flask-debugToolbar==0.13.1 +Flask-Login==0.6.2 Flask-Mail==0.9.1 +Flask-SQLAlchemy==3.0.3 Flask-WTF==1.0.1 flask-debugtoolbar==0.13.1 fonttools==4.33.3 @@ -88,6 +91,11 @@ pyasn1==0.4.8 pyasn1-modules==0.2.8 pycodestyle==2.9.1 pycparser==2.21 +<<<<<<< HEAD +======= +pyee==9.0.4 +pyflakes==2.5.0 +>>>>>>> WIP Pygments==2.12.0 pykakasi==2.2.1 PyMuPDF==1.20.2 @@ -108,6 +116,7 @@ requests==2.27.1 roman==3.3 ruff==0.0.260 rsa==4.8 +ruff==0.0.260 selenium==4.2.0 sentry-sdk==1.6.0 six==1.16.0 @@ -123,6 +132,7 @@ sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 SQLAlchemy==1.4.37 +swig==4.0.2 text-unidecode==1.3 toml==0.10.2 tomli==2.0.1 @@ -137,7 +147,7 @@ vine==5.0.0 watchdog==2.1.9 watchdog-gevent==0.1.1 wcwidth==0.2.5 -Werkzeug==2.1.2 +Werkzeug==2.2.3 wrapt==1.14.1 wsproto==1.1.0 WTForms==3.0.1 diff --git a/test/ambuda/conftest.py b/test/ambuda/conftest.py index b8d12dc9..3b8df966 100644 --- a/test/ambuda/conftest.py +++ b/test/ambuda/conftest.py @@ -3,8 +3,8 @@ import ambuda.database as db from ambuda import create_app +from ambuda.models.base import db as flask_sqla from ambuda.consts import BOT_USERNAME, TEXT_CATEGORIES -from ambuda.queries import get_engine, get_session def _add_dictionaries(session): @@ -21,13 +21,10 @@ def _add_dictionaries(session): def initialize_test_db(): - engine = get_engine() - assert ":memory:" in engine.url + flask_sqla.drop_all() + flask_sqla.create_all() - db.Base.metadata.drop_all(engine) - db.Base.metadata.create_all(engine) - - session = get_session() + session = flask_sqla.session # Text and parse data text = db.Text(slug="pariksha", title="parIkSA") diff --git a/test/ambuda/tasks/test_projects_tasks.py b/test/ambuda/tasks/test_projects_tasks.py index d178ef3b..841583ff 100644 --- a/test/ambuda/tasks/test_projects_tasks.py +++ b/test/ambuda/tasks/test_projects_tasks.py @@ -29,7 +29,6 @@ def test_create_project_inner(flask_app): title="Cool project", pdf_path=f.name, output_dir=flask_app.config["UPLOAD_FOLDER"], - app_environment=flask_app.config["AMBUDA_ENVIRONMENT"], creator_id=1, task_status=ambuda.tasks.utils.LocalTaskStatus(), ) From c01ef3efde9f39dff37e390f7f29c11732c3f820 Mon Sep 17 00:00:00 2001 From: Arun Prasad Date: Fri, 31 Mar 2023 18:00:19 -0700 Subject: [PATCH 03/18] Model works --- ambuda/models/base.py | 2 +- ambuda/queries.py | 4 +++- pyproject.toml | 2 +- requirements.txt | 2 +- test/ambuda/conftest.py | 9 +++++---- 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/ambuda/models/base.py b/ambuda/models/base.py index a8cd907f..769c963d 100644 --- a/ambuda/models/base.py +++ b/ambuda/models/base.py @@ -4,7 +4,7 @@ from sqlalchemy import Column, ForeignKey, Integer -db = SQLAlchemy() +db = SQLAlchemy(session_options=dict(autoflush=False, autocommit=False)) def pk(): diff --git a/ambuda/queries.py b/ambuda/queries.py index ad5440ed..343bc091 100644 --- a/ambuda/queries.py +++ b/ambuda/queries.py @@ -6,6 +6,7 @@ import functools +from flask import current_app from sqlalchemy.orm import load_only, selectinload import ambuda.database as db @@ -13,7 +14,8 @@ def get_session(): - return flask_sqla.session + with current_app.app_context(): + return flask_sqla.session def texts() -> list[db.Text]: diff --git a/pyproject.toml b/pyproject.toml index ca7dc67f..95c64c62 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,7 +120,7 @@ requests = "2.27.1" roman = "3.3" rsa = "4.8" selenium = "4.2.0" -sentry-sdk = "1.6.0" +sentry-sdk = "1.18.0" six = "1.16.0" sniffio = "1.2.0" snowballstemmer = "2.2.0" diff --git a/requirements.txt b/requirements.txt index 8835c6fe..8652d7cb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -118,7 +118,7 @@ ruff==0.0.260 rsa==4.8 ruff==0.0.260 selenium==4.2.0 -sentry-sdk==1.6.0 +sentry-sdk==1.18.0 six==1.16.0 sniffio==1.2.0 snowballstemmer==2.2.0 diff --git a/test/ambuda/conftest.py b/test/ambuda/conftest.py index 3b8df966..7dfe9db8 100644 --- a/test/ambuda/conftest.py +++ b/test/ambuda/conftest.py @@ -57,13 +57,13 @@ def initialize_test_db(): _add_dictionaries(session) - # Bot + # Bot user bot = db.User(username=BOT_USERNAME, email="ambuda-bot@ambuda.org") bot.set_password("password") session.add(bot) session.flush() - # Auth + # Basic user rama = db.User(username="ramacandra", email="rama@ayodhya.com") rama.set_password("maithili") session.add(rama) @@ -117,7 +117,7 @@ def initialize_test_db(): session.add(banned) session.flush() - # Blog + # Blog posts post = db.BlogPost( title="Sample post", slug="sample-post", @@ -176,7 +176,8 @@ def flask_app(): @pytest.fixture() def client(flask_app): - return flask_app.test_client() + with flask_app.app_context(): + yield flask_app.test_client() @pytest.fixture() From 49e903168c993c6d2305dbf30f30d9f4a9cece80 Mon Sep 17 00:00:00 2001 From: Arun Prasad Date: Fri, 31 Mar 2023 18:03:52 -0700 Subject: [PATCH 04/18] All tests pass --- test/ambuda/conftest.py | 44 +++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/test/ambuda/conftest.py b/test/ambuda/conftest.py index 7dfe9db8..21d9ce9d 100644 --- a/test/ambuda/conftest.py +++ b/test/ambuda/conftest.py @@ -3,8 +3,9 @@ import ambuda.database as db from ambuda import create_app -from ambuda.models.base import db as flask_sqla from ambuda.consts import BOT_USERNAME, TEXT_CATEGORIES +from ambuda.models.base import db as flask_sqla +from ambuda.queries import get_session def _add_dictionaries(session): @@ -70,7 +71,7 @@ def initialize_test_db(): session.flush() # Moderator - moderator = db.User(username="user-mod", email="mod@ambuda.org") + moderator = db.User(username="u-mod", email="mod@ambuda.org") moderator.set_password("secret password") session.add(moderator) session.flush() @@ -82,11 +83,11 @@ def initialize_test_db(): session.flush() # Deleted and Banned - deleted_admin = db.User(username="sandrocottus-deleted", email="cgm@ambuda.org") + deleted_admin = db.User(username="u-deleted-banned", email="cgm@ambuda.org") deleted_admin.set_password("maurya") deleted_admin.set_is_deleted(True) - banned = db.User(username="sikander-banned", email="alex@ambuda.org") + banned = db.User(username="u-banned", email="alex@ambuda.org") banned.set_password("onesicritus") banned.set_is_banned(True) @@ -182,34 +183,39 @@ def client(flask_app): @pytest.fixture() def rama_client(flask_app): - session = get_session() - user = session.query(db.User).filter_by(username="ramacandra").first() - return flask_app.test_client(user=user) + with flask_app.app_context(): + session = get_session() + user = session.query(db.User).filter_by(username="ramacandra").first() + return flask_app.test_client(user=user) @pytest.fixture() def moderator_client(flask_app): - session = get_session() - moderator = session.query(db.User).filter_by(username="user-mod").first() - return flask_app.test_client(user=moderator) + with flask_app.app_context(): + session = get_session() + moderator = session.query(db.User).filter_by(username="u-mod").first() + return flask_app.test_client(user=moderator) @pytest.fixture() def admin_client(flask_app): - session = get_session() - user = session.query(db.User).filter_by(username="u-admin").first() - return flask_app.test_client(user=user) + with flask_app.app_context(): + session = get_session() + user = session.query(db.User).filter_by(username="u-admin").first() + return flask_app.test_client(user=user) @pytest.fixture() def deleted_client(flask_app): - session = get_session() - user = session.query(db.User).filter_by(username="sandrocottus-deleted").first() - return flask_app.test_client(user=user) + with flask_app.app_context(): + session = get_session() + user = session.query(db.User).filter_by(username="u-deleted-banned").first() + return flask_app.test_client(user=user) @pytest.fixture() def banned_client(flask_app): - session = get_session() - user = session.query(db.User).filter_by(username="sikander-banned").first() - return flask_app.test_client(user=user) + with flask_app.app_context(): + session = get_session() + user = session.query(db.User).filter_by(username="u-banned").first() + return flask_app.test_client(user=user) From 5b660568d0caa60139f4b16fff43d91985ed76a0 Mon Sep 17 00:00:00 2001 From: Arun Prasad Date: Fri, 31 Mar 2023 18:09:00 -0700 Subject: [PATCH 05/18] 2 warnings to go --- ambuda/__init__.py | 2 -- ambuda/utils/json_serde.py | 12 ------------ ambuda/views/reader/texts.py | 7 ++----- pyproject.toml | 2 +- requirements.txt | 2 +- test/ambuda/utils/test_json_serde.py | 15 --------------- 6 files changed, 4 insertions(+), 36 deletions(-) delete mode 100644 test/ambuda/utils/test_json_serde.py diff --git a/ambuda/__init__.py b/ambuda/__init__.py index 35308360..68513a56 100644 --- a/ambuda/__init__.py +++ b/ambuda/__init__.py @@ -22,7 +22,6 @@ from ambuda.mail import mailer from ambuda.tasks import celery_init_app from ambuda.utils import assets -from ambuda.utils.json_serde import AmbudaJSONEncoder from ambuda.utils.url_converters import ListConverter from ambuda.models.base import db from ambuda.views.about import bp as about @@ -149,5 +148,4 @@ def get_locale(): } ) - app.json_encoder = AmbudaJSONEncoder return app diff --git a/ambuda/utils/json_serde.py b/ambuda/utils/json_serde.py index 966d401b..e69de29b 100644 --- a/ambuda/utils/json_serde.py +++ b/ambuda/utils/json_serde.py @@ -1,12 +0,0 @@ -import dataclasses - -from flask import json - - -class AmbudaJSONEncoder(json.JSONEncoder): - """Extend Flask's default encoder to support dataclasses.""" - - def default(self, o): - if dataclasses.is_dataclass(o): - return dataclasses.asdict(o) - return super().default(o) diff --git a/ambuda/views/reader/texts.py b/ambuda/views/reader/texts.py index 4200c568..7b044a00 100644 --- a/ambuda/views/reader/texts.py +++ b/ambuda/views/reader/texts.py @@ -1,15 +1,12 @@ """Views related to texts: title pages, sections, verses, etc.""" -import json - -from flask import Blueprint, abort, jsonify, render_template, url_for +from flask import Blueprint, abort, json, jsonify, render_template, url_for from indic_transliteration import sanscript import ambuda.database as db import ambuda.queries as q from ambuda.consts import TEXT_CATEGORIES from ambuda.utils import xml -from ambuda.utils.json_serde import AmbudaJSONEncoder from ambuda.views.api import bp as api from ambuda.views.reader.schema import Block, Section @@ -168,7 +165,7 @@ def section(text_slug, section_slug): prev_url=_make_section_url(text_, prev), next_url=_make_section_url(text_, next_), ) - json_payload = json.dumps(data, cls=AmbudaJSONEncoder) + json_payload = json.dumps(data) return render_template( "texts/section.html", diff --git a/pyproject.toml b/pyproject.toml index 95c64c62..84f5c9d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ docutils = "0.17.1" email-validator = "1.2.1" fabric = "2.7.0" Flask = "2.2.3" -Flask-Admin = "1.6.0" +Flask-Admin = "1.6.1" Flask-Babel = "2.0.0" Flask-Bcrypt = "1.0.1" Flask-Login = "0.6.2" diff --git a/requirements.txt b/requirements.txt index 8652d7cb..674c1ff1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,7 +30,7 @@ email-validator==1.2.1 exceptiongroup==1.1.1 fabric==2.7.0 Flask==2.2.3 -Flask-Admin==1.6.0 +Flask-Admin==1.6.1 Flask-Babel==2.0.0 Flask-Bcrypt==1.0.1 flask-debugToolbar==0.13.1 diff --git a/test/ambuda/utils/test_json_serde.py b/test/ambuda/utils/test_json_serde.py deleted file mode 100644 index 2b529623..00000000 --- a/test/ambuda/utils/test_json_serde.py +++ /dev/null @@ -1,15 +0,0 @@ -import json -from dataclasses import dataclass - -from ambuda.utils.json_serde import AmbudaJSONEncoder - - -@dataclass -class Dummy: - foo: str - bar: str - - -def test_encode(): - dummy = Dummy(foo="oof", bar="rab") - assert json.dumps(dummy, cls=AmbudaJSONEncoder) == '{"foo": "oof", "bar": "rab"}' From 2e7a13c07632eda1b730a31d5697cefc015ffeda Mon Sep 17 00:00:00 2001 From: Arun Prasad Date: Fri, 31 Mar 2023 18:10:53 -0700 Subject: [PATCH 06/18] Update google cloud vision version --- pyproject.toml | 7 +------ requirements.txt | 6 +----- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 84f5c9d9..77810919 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,10 +51,7 @@ Flask-Mail = "0.9.1" Flask-WTF = "1.0.1" fonttools = "4.33.3" gevent = "21.12.0" -google-api-core = "2.8.2" -google-auth = "2.9.0" -google-cloud-vision = "3.1.1" -googleapis-common-protos = "1.56.3" +google-cloud-vision = "3.4.1" greenlet = "1.1.2" grpcio = "1.47.0" grpcio-status = "1.47.0" @@ -93,8 +90,6 @@ platformdirs = "2.5.2" pluggy = "1.0.0" prometheus-client = "0.14.1" prompt-toolkit = "3.0.30" -proto-plus = "1.22.0" -protobuf = "4.21.6" psutil = "5.9.1" py = "1.11.0" pyasn1 = "0.4.8" diff --git a/requirements.txt b/requirements.txt index 674c1ff1..7ad715bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -41,10 +41,7 @@ Flask-WTF==1.0.1 flask-debugtoolbar==0.13.1 fonttools==4.33.3 gevent==21.12.0 -google-api-core==2.8.2 -google-auth==2.9.0 -google-cloud-vision==3.1.1 -googleapis-common-protos==1.56.3 +google-cloud-vision==3.4.1 greenlet==1.1.2 grpcio==1.47.0 grpcio-status==1.47.0 @@ -84,7 +81,6 @@ pluggy==1.0.0 prometheus-client==0.14.1 prompt-toolkit==3.0.30 proto-plus==1.22.0 -protobuf==3.20.1 psutil==5.9.1 py==1.11.0 pyasn1==0.4.8 From 8eaf5311d2f38604467a1c7f548f327962b5918c Mon Sep 17 00:00:00 2001 From: Arun Prasad Date: Fri, 31 Mar 2023 18:11:14 -0700 Subject: [PATCH 07/18] WIP --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7ad715bd..51ffb78e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -80,7 +80,6 @@ platformdirs==2.5.2 pluggy==1.0.0 prometheus-client==0.14.1 prompt-toolkit==3.0.30 -proto-plus==1.22.0 psutil==5.9.1 py==1.11.0 pyasn1==0.4.8 From 3800c2f83cc2cb37c88ce86defbb743eda81b8b4 Mon Sep 17 00:00:00 2001 From: Arun Prasad Date: Fri, 31 Mar 2023 18:22:43 -0700 Subject: [PATCH 08/18] WIP What works: - unit tests - flask and celery - upload What's broken: - sqlalchemy in toolbar --- ambuda/tasks/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ambuda/tasks/__init__.py b/ambuda/tasks/__init__.py index f25ecbab..f6300c21 100644 --- a/ambuda/tasks/__init__.py +++ b/ambuda/tasks/__init__.py @@ -29,7 +29,7 @@ def celery_init_app(app: Flask) -> Celery: class FlaskTask(Task): def __call__(self, *args: object, **kwargs: object) -> object: - with flask_app.app_context(): + with app.app_context(): return self.run(*args, **kwargs) celery_app = Celery( From 87febc21d1394658efada4da500d2559e7d7bcd6 Mon Sep 17 00:00:00 2001 From: Arun Prasad Date: Fri, 31 Mar 2023 18:35:37 -0700 Subject: [PATCH 09/18] Sort imports and lint --- ambuda/__init__.py | 2 +- ambuda/models/base.py | 1 - ambuda/tasks/projects.py | 2 +- cli.py | 58 +++++++++++++++++++--------------------- config.py | 2 ++ make_celery.py | 1 + 6 files changed, 33 insertions(+), 33 deletions(-) diff --git a/ambuda/__init__.py b/ambuda/__init__.py index 68513a56..c0ad5665 100644 --- a/ambuda/__init__.py +++ b/ambuda/__init__.py @@ -20,10 +20,10 @@ from ambuda import checks, filters from ambuda.consts import LOCALES from ambuda.mail import mailer +from ambuda.models.base import db from ambuda.tasks import celery_init_app from ambuda.utils import assets from ambuda.utils.url_converters import ListConverter -from ambuda.models.base import db from ambuda.views.about import bp as about from ambuda.views.api import bp as api from ambuda.views.auth import bp as auth diff --git a/ambuda/models/base.py b/ambuda/models/base.py index 769c963d..1de8b862 100644 --- a/ambuda/models/base.py +++ b/ambuda/models/base.py @@ -3,7 +3,6 @@ from flask_sqlalchemy import SQLAlchemy from sqlalchemy import Column, ForeignKey, Integer - db = SQLAlchemy(session_options=dict(autoflush=False, autocommit=False)) diff --git a/ambuda/tasks/projects.py b/ambuda/tasks/projects.py index a83ee8dc..0cc5ba74 100644 --- a/ambuda/tasks/projects.py +++ b/ambuda/tasks/projects.py @@ -7,9 +7,9 @@ # package called `fitz` (https://pypi.org/project/fitz/) that is completely # unrelated to PDF parsing. import fitz +from celery import shared_task from slugify import slugify -from celery import shared_task from ambuda import database as db from ambuda import queries as q from ambuda.tasks.utils import CeleryTaskStatus, TaskStatus diff --git a/cli.py b/cli.py index 54c6e5b0..8aa53cac 100755 --- a/cli.py +++ b/cli.py @@ -15,8 +15,6 @@ from ambuda.tasks.projects import create_project_inner from ambuda.tasks.utils import LocalTaskStatus -engine = create_db() - @click.group() def cli(): @@ -33,22 +31,22 @@ def create_user(): raw_password = getpass.getpass("Password: ") email = input("Email: ") - with Session(engine) as session: - u = ( - session.query(db.User) - .where(or_(db.User.username == username, db.User.email == email)) - .first() - ) - if u is not None: - if u.username == username: - raise click.ClickException(f'User "{username}" already exists.') - else: - raise click.ClickException(f'Email "{email}" already exists.') + session = q.get_session() + u = ( + session.query(db.User) + .where(or_(db.User.username == username, db.User.email == email)) + .first() + ) + if u is not None: + if u.username == username: + raise click.ClickException(f'User "{username}" already exists.') + else: + raise click.ClickException(f'Email "{email}" already exists.') - user = db.User(username=username, email=email) - user.set_password(raw_password) - session.add(user) - session.commit() + user = db.User(username=username, email=email) + user.set_password(raw_password) + session.add(user) + session.commit() @cli.command() @@ -60,19 +58,19 @@ def add_role(username, role): In particular, `add-role admin` will give a user administrator privileges and grant them full access to Ambuda's data and content. """ - with Session(engine) as session: - u = session.query(db.User).where(db.User.username == username).first() - if u is None: - raise click.ClickException(f'User "{username}" does not exist.') - r = session.query(db.Role).where(db.Role.name == role).first() - if r is None: - raise click.ClickException(f'Role "{role}" does not exist.') - if r in u.roles: - raise click.ClickException(f'User "{username}" already has role "{role}".') - - u.roles.append(r) - session.add(u) - session.commit() + session = q.get_session() + u = session.query(db.User).where(db.User.username == username).first() + if u is None: + raise click.ClickException(f'User "{username}" does not exist.') + r = session.query(db.Role).where(db.Role.name == role).first() + if r is None: + raise click.ClickException(f'Role "{role}" does not exist.') + if r in u.roles: + raise click.ClickException(f'User "{username}" already has role "{role}".') + + u.roles.append(r) + session.add(u) + session.commit() print(f'Added role "{role}" to user "{username}".') diff --git a/config.py b/config.py index 7be66124..2b22d08e 100644 --- a/config.py +++ b/config.py @@ -178,6 +178,8 @@ class DevelopmentConfig(BaseConfig): #: Disable redirect intercepts when using flask-debugtoolbar DEBUG_TB_INTERCEPT_REDIRECTS = False + #: Record queries for debugging + SQLALCHEMY_RECORD_QUERIES = True AMBUDA_ENVIRONMENT = DEVELOPMENT DEBUG = True diff --git a/make_celery.py b/make_celery.py index 4773270e..19b81783 100644 --- a/make_celery.py +++ b/make_celery.py @@ -4,6 +4,7 @@ import os from dotenv import load_dotenv + from ambuda import create_app load_dotenv(".env") From 427c264a10bb1522fce18a1293ff907cff18e883 Mon Sep 17 00:00:00 2001 From: Arun Prasad Date: Fri, 31 Mar 2023 19:45:13 -0700 Subject: [PATCH 10/18] Fix OCR task --- ambuda/views/proofing/project.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ambuda/views/proofing/project.py b/ambuda/views/proofing/project.py index 8f761e8f..e3315f9b 100644 --- a/ambuda/views/proofing/project.py +++ b/ambuda/views/proofing/project.py @@ -610,10 +610,7 @@ def batch_ocr(slug): abort(404) if request.method == "POST": - task = ocr_tasks.run_ocr_for_project( - app_env=current_app.config["AMBUDA_ENVIRONMENT"], - project=project_, - ) + task = ocr_tasks.run_ocr_for_project(project=project_) if task: return render_template( "proofing/projects/batch-ocr-post.html", From 7091dd8217a08444fa35978c2ad8d291236d0d03 Mon Sep 17 00:00:00 2001 From: Arun Prasad Date: Fri, 31 Mar 2023 20:36:42 -0700 Subject: [PATCH 11/18] Add paginated recent changes --- ambuda/queries.py | 1 - ambuda/tasks/ocr.py | 4 +- ambuda/templates/macros/proofing.html | 16 ++---- ambuda/templates/proofing/recent-changes.html | 42 ++++++++++++++- ambuda/views/proofing/main.py | 53 +++++++------------ cli.py | 2 - 6 files changed, 65 insertions(+), 53 deletions(-) diff --git a/ambuda/queries.py b/ambuda/queries.py index 343bc091..6eaf15be 100644 --- a/ambuda/queries.py +++ b/ambuda/queries.py @@ -4,7 +4,6 @@ For simple or adhoc queries, you can just write them in their corresponding view. """ -import functools from flask import current_app from sqlalchemy.orm import load_only, selectinload diff --git a/ambuda/tasks/ocr.py b/ambuda/tasks/ocr.py index 2c788972..38da9da4 100644 --- a/ambuda/tasks/ocr.py +++ b/ambuda/tasks/ocr.py @@ -46,8 +46,8 @@ def _run_ocr_for_page_inner( version=0, author_id=bot_user.id, ) - except Exception: - raise ValueError(f'OCR failed for page "{project.slug}/{page.slug}".') + except Exception as e: + raise ValueError(f'OCR failed for page "{project.slug}/{page.slug}".') from e @shared_task(bind=True) diff --git a/ambuda/templates/macros/proofing.html b/ambuda/templates/macros/proofing.html index f19679da..f6b9b695 100644 --- a/ambuda/templates/macros/proofing.html +++ b/ambuda/templates/macros/proofing.html @@ -170,22 +170,12 @@

{% endmacro %} -{# List various activity (edits, new projects, etc.) #} -{% macro activity_log(activity) %} -
    -{% for type_, _, obj in activity %} - {% if type_ == 'revision' %}{{ revision_li(obj) }} - {% elif type_ == 'project' %}{{ project_li(obj) }} - {% endif %} -{% endfor %} -
-{% endmacro %} - - {# List the given revisions. Revisions might correspond to multiple pages. #} {% macro revision_list(revisions) %}
    - {% for r in revisions %}{{ revision_li(r) }}{% endfor %} + {% for r in revisions -%} + {{ revision_li(r) }} + {%- endfor %}
{% endmacro %} diff --git a/ambuda/templates/proofing/recent-changes.html b/ambuda/templates/proofing/recent-changes.html index 1882b417..06304456 100644 --- a/ambuda/templates/proofing/recent-changes.html +++ b/ambuda/templates/proofing/recent-changes.html @@ -2,6 +2,16 @@ {% import "macros/proofing.html" as m %} +{% macro page_link(text, page) %} +{% if page %} + {% set url = url_for("proofing.recent_changes", page=page) %} +
  • {{ text|safe }}
  • +{% else %} +
  • {{ text|safe }}
  • +{% endif %} +{% endmacro %} + + {% block title %}Recent changes | Ambuda{% endblock %} @@ -13,8 +23,36 @@ _("Recent changes"), "Recent edits and revisions made by people like you!") }} -{% if recent_activity -%} -{{ m.activity_log(recent_activity) }} +{% if recent_revisions -%} +
      + {% for r in recent_revisions -%} + {{ m.revision_li(r) }} + {%- endfor %} +
    + + {# Page links #} + {% set revs = recent_revisions %} + {% else %}

    No changes found.

    {% endif %} diff --git a/ambuda/views/proofing/main.py b/ambuda/views/proofing/main.py index 0c3672bb..e9e567e3 100644 --- a/ambuda/views/proofing/main.py +++ b/ambuda/views/proofing/main.py @@ -13,9 +13,10 @@ from wtforms.widgets import TextArea from ambuda import consts -from ambuda import database as db from ambuda import queries as q +from ambuda.database import Page, Project, Revision, User from ambuda.enums import SitePageStatus +from ambuda.models.base import db from ambuda.tasks import projects as project_tasks from ambuda.views.proofing.decorators import moderator_required, p2_required @@ -97,11 +98,9 @@ def index(): # Fetch all project data in a single query for better performance. session = q.get_session() projects = ( - session.query(db.Project) + session.query(Project) .options( - orm.joinedload(db.Project.pages) - .load_only(db.Page.id) - .joinedload(db.Page.status) + orm.joinedload(Project.pages).load_only(Page.id).joinedload(Page.status) ) .all() ) @@ -241,35 +240,25 @@ def create_project_status(task_id): @bp.route("/recent-changes") def recent_changes(): """Show recent changes across all projects.""" - num_per_page = 100 + per_page = 100 # Exclude bot edits, which overwhelm all other edits on the site. bot_user = q.user(consts.BOT_USERNAME) assert bot_user, "Bot user not defined" - session = q.get_session() - recent_revisions = ( - session.query(db.Revision) - .options(orm.defer(db.Revision.content)) - .filter(db.Revision.author_id != bot_user.id) - .order_by(db.Revision.created.desc()) - .limit(num_per_page) - .all() - ) - recent_activity = [("revision", r.created, r) for r in recent_revisions] - - recent_projects = ( - session.query(db.Project) - .order_by(db.Project.created_at.desc()) - .limit(num_per_page) - .all() + q.get_session() + recent_revisions = db.paginate( + db.select(Revision) + # Defer slow columns + .options(orm.defer(Revision.content)) + # .filter(Revision.author_id != bot_user.id) + .order_by(Revision.created.desc()), + per_page=per_page, + max_per_page=per_page, ) - recent_activity += [("project", p.created_at, p) for p in recent_projects] - recent_activity.sort(key=lambda x: x[1], reverse=True) - recent_activity = recent_activity[:num_per_page] return render_template( - "proofing/recent-changes.html", recent_activity=recent_activity + "proofing/recent-changes.html", recent_revisions=recent_revisions ) @@ -294,16 +283,14 @@ def dashboard(): days_ago_1d = now - timedelta(days=1) session = q.get_session() - bot = session.query(db.User).filter_by(username=consts.BOT_USERNAME).one() + bot = session.query(User).filter_by(username=consts.BOT_USERNAME).one() bot_id = bot.id revisions_30d = ( - session.query(db.Revision) - .filter( - (db.Revision.created >= days_ago_30d) & (db.Revision.author_id != bot_id) - ) - .options(orm.load_only(db.Revision.created, db.Revision.author_id)) - .order_by(db.Revision.created) + session.query(Revision) + .filter((Revision.created >= days_ago_30d) & (Revision.author_id != bot_id)) + .options(orm.load_only(Revision.created, Revision.author_id)) + .order_by(Revision.created) .all() ) revisions_7d = [x for x in revisions_30d if x.created >= days_ago_7d] diff --git a/cli.py b/cli.py index 8aa53cac..2b46c633 100755 --- a/cli.py +++ b/cli.py @@ -6,12 +6,10 @@ import click from slugify import slugify from sqlalchemy import or_ -from sqlalchemy.orm import Session import ambuda from ambuda import database as db from ambuda import queries as q -from ambuda.seed.utils.data_utils import create_db from ambuda.tasks.projects import create_project_inner from ambuda.tasks.utils import LocalTaskStatus From d49afbe2d6cb9db732f4e2f6b9baa540df5d141d Mon Sep 17 00:00:00 2001 From: Arun Prasad Date: Fri, 31 Mar 2023 20:38:00 -0700 Subject: [PATCH 12/18] Fix showing text --- ambuda/templates/proofing/recent-changes.html | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ambuda/templates/proofing/recent-changes.html b/ambuda/templates/proofing/recent-changes.html index 06304456..b59cbf35 100644 --- a/ambuda/templates/proofing/recent-changes.html +++ b/ambuda/templates/proofing/recent-changes.html @@ -33,7 +33,9 @@ {# Page links #} {% set revs = recent_revisions %}