diff --git a/Makefile b/Makefile index ede40895..d2b7b839 100644 --- a/Makefile +++ b/Makefile @@ -209,6 +209,9 @@ test: py-venv-check coverage: pytest --cov=ambuda --cov-report=html test/ +coverage-report: coverage + coverage report --fail-under=80 + # Generate Ambuda's technical documentation. # After the command completes, open "docs/_build/index.html". docs: py-venv-check @@ -275,6 +278,8 @@ babel-update: py-venv-check babel-compile: py-venv-check pybabel compile -d ambuda/translations +# Clean up +# =============================================== clean: @rm -rf deploy/data/ @rm -rf ambuda/translations/* diff --git a/ambuda/templates/proofing/projects/confirm_changes.html b/ambuda/templates/proofing/projects/confirm_changes.html new file mode 100644 index 00000000..19b52656 --- /dev/null +++ b/ambuda/templates/proofing/projects/confirm_changes.html @@ -0,0 +1,44 @@ +{% extends 'proofing/base.html' %} +{% from "macros/forms.html" import field %} +{% import "macros/proofing.html" as m %} + +{% block title %} Search and Replace | {{ project.title }}{% endblock %} + +{% block content %} + +{{ m.project_header_nested('Review and Confirm Changes', project) }} +{{ m.project_nav(project=project, active='edit') }} + +
+

Confirm Changes

+ {% macro sp(s, p, n) %}{% if n == 1 %}{{ s }}{% else %}{{ p }}{% endif %}{% endmacro %} +

Please carefully review and confirm the changes you selected:

+
+ {{ form.csrf_token }} + + + + {% set match_counts = results|map(attribute='matches')|map('length')|list %} + {% set nr = match_counts|sum %} +

Confirm changes on {{ nr }} {{ sp("match", "matches", nr) }} that {{ sp("contains", "contain", nr) }} {{ query }} to be replaced by {{ replace }}.

+ + {% for result in results %} + {% set page = result.page %} + {% set page_url = url_for("proofing.page.edit", project_slug=project.slug, page_slug=page.slug) %} + {% set matches = result.matches %} + {% for match in matches %} +
+

Page {{ project.title }}/{{ page.slug }}: Line {{ match.line_num }}

+ + +
+ + +
+ {% endfor %} + {% endfor %} + + +
+
+{% endblock %} diff --git a/ambuda/templates/proofing/projects/replace.html b/ambuda/templates/proofing/projects/replace.html index 3bbb8f98..ebb151c2 100644 --- a/ambuda/templates/proofing/projects/replace.html +++ b/ambuda/templates/proofing/projects/replace.html @@ -10,38 +10,95 @@ {{ m.project_nav(project=project, active='edit') }}
-

Use this simple search and replace form to make edits across this project.

+

Replace

+

Use this simple search and replace form to make edits across this project. The search supports regular expressions.

+
+ + {{ field(form.query) }} + {{ field(form.replace) }} + +
-
- {{ field(form.query) }} - {{ field(form.replace) }} - -
- -{% if query %} +{% if results %}
+

Matches

+ {% macro sp(s, p, n) %}{% if n == 1 %}{{ s }}{% else %}{{ p }}{% endif %}{% endmacro %} +
+ {{ submit_changes_form.csrf_token }} + + -{% macro sp(s, p, n) %}{% if n == 1 %}{{ s }}{% else %}{{ p }}{% endif %}{% endmacro %} - -{% set nr = results|length %} -

Found {{ nr }} {{ sp("page", "pages", nr) }} that {{ sp("contains", "contain", nr) }} {{ query }}.

- -
+
{% endif %} {% endblock %} \ No newline at end of file diff --git a/ambuda/views/proofing/project.py b/ambuda/views/proofing/project.py index 9a375790..5df87c1e 100644 --- a/ambuda/views/proofing/project.py +++ b/ambuda/views/proofing/project.py @@ -1,3 +1,6 @@ +import logging +import re + from celery.result import GroupResult from flask import ( Blueprint, @@ -9,13 +12,21 @@ url_for, ) from flask_babel import lazy_gettext as _l -from flask_login import login_required +from flask_login import current_user, login_required from flask_wtf import FlaskForm from markupsafe import Markup, escape from sqlalchemy import orm from werkzeug.exceptions import abort from werkzeug.utils import redirect -from wtforms import StringField +from wtforms import ( + BooleanField, + FieldList, + Form, + FormField, + HiddenField, + StringField, + SubmitField, +) from wtforms.validators import DataRequired, ValidationError from wtforms.widgets import TextArea @@ -24,9 +35,11 @@ from ambuda.tasks import app as celery_app from ambuda.tasks import ocr as ocr_tasks from ambuda.utils import project_utils, proofing_utils +from ambuda.utils.revisions import add_revision from ambuda.views.proofing.decorators import moderator_required, p2_required bp = Blueprint("project", __name__) +LOG = logging.getLogger(__name__) def _is_valid_page_number_spec(_, field): @@ -85,6 +98,11 @@ class EditMetadataForm(FlaskForm): ) +class MatchForm(Form): + selected = BooleanField() + replace = HiddenField(validators=[DataRequired()]) + + class SearchForm(FlaskForm): class Meta: csrf = False @@ -103,6 +121,28 @@ class Meta: replace = StringField(_l("Replace"), validators=[DataRequired()]) +def validate_matches(form, field): + for match_form in field: + if match_form.errors: + raise ValidationError("Invalid match form values.") + + +class SubmitChangesForm(ReplaceForm): + class Meta: + csrf = False + + matches = FieldList(FormField(MatchForm), validators=[validate_matches]) + submit = SubmitField("Submit Changes") + + +class ConfirmChangesForm(ReplaceForm): + class Meta: + csrf = False + + confirm = SubmitField("Confirm") + cancel = SubmitField("Cancel") + + @bp.route("//") def summary(slug): """Show basic information about the project.""" @@ -286,7 +326,68 @@ def search(slug): ) -@bp.route("//replace") +def _replace_text(project_, replace_form: ReplaceForm, query: str, replace: str): + """ + Gather all matches for the "query" string and pair them the "replace" string. + """ + + results = [] + + query_pattern = re.compile( + query, re.UNICODE + ) # Compile the regex pattern with Unicode support + + LOG.debug(f"Search/Replace text with {query} and {replace}") + for page_ in project_.pages: + if not page_.revisions: + continue + matches = [] + latest = page_.revisions[-1] + LOG.debug(f"{__name__}: {page_.slug}") + for line_num, line in enumerate(latest.content.splitlines()): + if query_pattern.search(line): + try: + marked_query = query_pattern.sub( + lambda m: Markup(f"{escape(m.group(0))}"), line + ) + marked_replace = query_pattern.sub( + Markup(f"{escape(replace)}"), line + ) + LOG.debug(f"Search/Replace > marked query: {marked_query}") + LOG.debug(f"Search/Replace > marked replace: {marked_replace}") + matches.append( + { + "query": marked_query, + "replace": marked_replace, + "checked": False, + "line_num": line_num, + } + ) + except TimeoutError: + # Handle the timeout for regex operation, e.g., log a warning or show an error message + LOG.warning( + f"Regex operation timed out for line {line_num}: {line}" + ) + + if matches: + results.append( + { + "slug": page_.slug, + "matches": matches, + } + ) + return render_template( + "proofing/projects/replace.html", + project=project_, + form=replace_form, + submit_changes_form=SubmitChangesForm(), + query=query, + replace=replace, + results=results, + ) + + +@bp.route("//replace", methods=["GET", "POST"]) @login_required def replace(slug): """Search and replace a string across all of the project's pages. @@ -297,53 +398,207 @@ def replace(slug): if project_ is None: abort(404) - form = ReplaceForm(request.args) + form = ReplaceForm(request.form) if not form.validate(): + invalid_keys = list(form.errors.keys()) + LOG.debug(f"Invalid form - {request.method}, invalid keys: {invalid_keys}") return render_template( - "proofing/projects/replace.html", project=project_, form=form + "proofing/projects/replace.html", project=project_, form=ReplaceForm() ) # search for "query" string and replace with "update" string query = form.query.data - update = form.replace.data + replace = form.replace.data + render = _replace_text(project_, replace_form=form, query=query, replace=replace) + return render + +def _select_changes(project_, selected_keys, query: str, replace: str): + """ + Mark "query" strings + """ results = [] + LOG.debug(f"{__name__}: Mark changes with {query} and {replace}") + query_pattern = re.compile( + query, re.UNICODE + ) # Compile the regex pattern with Unicode support for page_ in project_.pages: if not page_.revisions: continue + latest = page_.revisions[-1] matches = [] + for line_num, line in enumerate(latest.content.splitlines()): - latest = page_.revisions[-1] - for line in latest.content.splitlines(): - if query in line: + form_key = f"match{page_.slug}-{line_num}" + replace_form_key = f"match{page_.slug}-{line_num}-replace" + + if selected_keys.get(form_key) == "selected": + LOG.debug(f"{__name__}: {form_key}: {selected_keys.get(form_key)}") + LOG.debug( + f"{__name__}: {replace_form_key}: {request.form.get(replace_form_key)}" + ) + LOG.debug(f"{__name__}: {form_key}: Appended") + replaced_line = query_pattern.sub(replace, line) matches.append( { - "query": escape(line).replace( - query, Markup(f"{escape(query)}") - ), - "update": escape(line).replace( - query, Markup(f"{escape(update)}") - ), + "query": line, + "replace": replaced_line, + "line_num": line_num, } ) - if matches: - results.append( - { - "slug": page_.slug, - "matches": matches, - } - ) + + results.append({"page": page_, "matches": matches}) + LOG.debug(f"{__name__}: Total matches appended: {len(matches)}") + + selected_count = sum(value == "selected" for value in selected_keys.values()) + LOG.debug(f"{__name__} > Number of selected changes = {selected_count}") + return render_template( - "proofing/projects/replace.html", + "proofing/projects/confirm_changes.html", project=project_, - form=form, + form=ConfirmChangesForm(), query=query, - update=update, + replace=replace, results=results, ) +@bp.route("//submit_changes", methods=["GET", "POST"]) +@login_required +def submit_changes(slug): + """Submit selected changes across all of the project's pages. + + This is useful to replace a string across the project in one shot. + """ + + project_ = q.project(slug) + if project_ is None: + abort(404) + + LOG.debug( + f"{__name__}: SUBMIT_CHANGES --- {request.method} > {list(request.form.keys())}" + ) + + # FIXME: find a way to validate this form. Current `matches` are coming in the way of validators. + form = SubmitChangesForm(request.form) + # if not form.validate(): + # # elif request.form.get("form_submitted") is None: + # invalid_keys = list(form.errors.keys()) + # LOG.debug(f'{__name__}: Invalid form values - {request.method}, invalid keys: {invalid_keys}') + # return redirect(url_for("proofing.project.replace", slug=slug)) + + render = None + # search for "query" string and replace with "update" string + query = form.query.data + replace = form.replace.data + + LOG.debug( + f"{__name__}: ({request.method})> Got to submit method with {query}->{replace} " + ) + LOG.debug(f"{__name__}: {request.method} > {list(request.form.keys())}") + selected_keys = { + key: value + for key, value in request.form.items() + if key.startswith("match") and not key.endswith("replace") + } + render = _select_changes(project_, selected_keys, query=query, replace=replace) + + return render + + +@bp.route("//confirm_changes", methods=["GET", "POST"]) +@login_required +def confirm_changes(slug): + """Confirm changes to replace a string across all of the project's pages.""" + project_ = q.project(slug) + if project_ is None: + abort(404) + LOG.debug( + f"{__name__}: confirm_changes {request.method} > Keys: {list(request.form.keys())}, Items: {list(request.form.items())}" + ) + form = ConfirmChangesForm(request.form) + if not form.validate(): + flash("Invalid input.", "danger") + invalid_keys = list(form.errors.keys()) + LOG.error( + f"{__name__}: Invalid form - {request.method}, invalid keys: {invalid_keys}" + ) + return redirect(url_for("proofing.project.replace", slug=slug)) + + if form.confirm.data: + LOG.debug(f"{__name__}: {request.method} > Confirmed!") + query = form.query.data + replace = form.replace.data + + # Get the changes from the form and store them in a list + pages = {} + + # Iterate over the dictionary `request.form` + for key, value in request.form.items(): + # Check if key matches the pattern + match = re.match(r"match(\d+)-(\d+)-replace", key) + if match: + # Extract page_slug and line_num from the key + page_slug = match.group(1) + line_num = int(match.group(2)) + if page_slug not in pages: + pages[page_slug] = {} + pages[page_slug][line_num] = value + + for page_slug, changed_lines in pages.items(): + # Get the corresponding `Page` object + LOG.debug(f"{__name__}: Project - {project_.slug}, Page : {page_slug}") + + # Page query needs id for project and slug for page + page = q.page(project_.id, page_slug) + if not page: + LOG.error( + f"{__name__}: Page not found for project - {project_.slug}, page : {page_slug}" + ) + return render_template(url_for("proofing.project.replace", slug=slug)) + + latest = page.revisions[-1] + current_lines = latest.content.splitlines() + # Iterate over the `lines` dictionary + for line_num, replace_value in changed_lines.items(): + # Check if the line_num exists in the dictionary for this page + LOG.debug( + f"{__name__}: Current - {current_lines[line_num]}, Length of lines = {len(current_lines)}" + ) + if line_num < len(current_lines): + # Replace the line with the replacement value + current_lines[line_num] = replace_value + else: + LOG.error( + f"{__name__}: Invalid line number {line_num} in {page_slug} which has only {len(current_lines)}" + ) + continue + # Join the lines into a single string + new_content = "\n".join(current_lines) + # Check if the page content has changed + if new_content != latest.content: + # Add a new revision to the page + new_summary = f'Replaced "{query}" with "{replace}" on page {page.slug}' + new_revision = add_revision( + page=page, + summary=new_summary, + content=new_content, + status=page.status.name, + version=page.version, + author_id=current_user.id, + ) + LOG.debug(f"{__name__}: New reviion > {page_slug}: {new_revision}") + + flash("Changes applied.", "success") + return redirect(url_for("proofing.project.activity", slug=slug)) + elif form.cancel.data: + LOG.debug(f"{__name__}: confirm_changes Cancelled") + return redirect(url_for("proofing.project.edit", slug=slug)) + + return render_template(url_for("proofing.project.edit", slug=slug)) + + @bp.route("//batch-ocr", methods=["GET", "POST"]) @p2_required def batch_ocr(slug): diff --git a/test/ambuda/views/proofing/test_project.py b/test/ambuda/views/proofing/test_project.py index 5dc3c0f9..f4178d23 100644 --- a/test/ambuda/views/proofing/test_project.py +++ b/test/ambuda/views/proofing/test_project.py @@ -105,6 +105,71 @@ def test_search__bad_project(rama_client): assert resp.status_code == 404 +def test_replace(moderator_client): + resp = moderator_client.get("/proofing/test-project/replace") + assert "Replace:" in resp.text + + +def test_replace_post(moderator_client): + resp = moderator_client.post( + "/proofing/test-project/replace", + data={ + "query": "the", + "replace": "the", + }, + ) + assert resp.status_code == 200 + + +def test_replace__unauth(client): + resp = client.get("/proofing/test-project/replace") + assert resp.status_code == 302 + + +def test_replace__bad_project(rama_client): + resp = rama_client.get("/proofing/unknown/replace") + assert resp.status_code == 404 + + +def test_submit_changes(moderator_client): + query = "test_query" + replace = "test_replace" + form_data = {"query": query, "replace": replace} + resp = moderator_client.post( + "/proofing/test-project/submit_changes", data=form_data + ) + assert "Changes:" in resp.text + + +def test_submit_changes_post(moderator_client): + resp = moderator_client.post( + "/proofing/test-project/submit_changes", + data={ + "query": "the", + "replace": "the", + "matches": [], + "submit": True, + }, + ) + + assert resp.status_code == 200 + + +def test_submit_unauth(client): + resp = client.get("/proofing/test-project/submit_changes") + assert resp.status_code == 302 + + +def test_confirm_changes(moderator_client): + resp = moderator_client.get("/proofing/test-project/confirm_changes") + assert "replace" in resp.text + + +def test_confirm_unauth(client): + resp = client.get("/proofing/test-project/confirm_changes") + assert resp.status_code == 302 + + def test_admin__unauth(client): resp = client.get("/proofing/test-project/admin") assert resp.status_code == 302 @@ -130,3 +195,13 @@ def test_admin__has_admin_role(admin_client): def test_admin__has_moderator_role__bad_project(admin_client): resp = admin_client.get("/proofing/unknown/admin") assert resp.status_code == 404 + + +def test_batch_ocr(moderator_client): + resp = moderator_client.get("/proofing/test-project/batch-ocr") + assert resp.status_code == 200 + + +def test_batch_ocr__unauth(client): + resp = client.get("/proofing/test-project/batch-ocr") + assert resp.status_code == 302