diff --git a/app/__init__.py b/app/__init__.py index 159543c..2c9dc54 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -3,6 +3,7 @@ from dotenv import load_dotenv from flask import Flask from flask_bootstrap import Bootstrap +from flask_htmx import HTMX from flask_migrate import Migrate from app.filters import usa_icon @@ -19,6 +20,8 @@ def create_app(): app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False app.config["SECRET_KEY"] = os.getenv("FLASK_APP_SECRET_KEY") Bootstrap(app) + global htmx + htmx = HTMX(app) db.init_app(app) diff --git a/app/forms.py b/app/forms.py index 6d0ef98..281d1cd 100644 --- a/app/forms.py +++ b/app/forms.py @@ -35,11 +35,6 @@ class HarvestSourceForm(FlaskForm): choices=["manual", "daily", "weekly", "biweekly", "monthly"], validators=[DataRequired()], ) - size = SelectField( - "Size", - choices=["small", "medium", "large"], - validators=[DataRequired()], - ) schema_type = SelectField( "Schema Type", choices=["strict", "other"], validators=[DataRequired()] ) diff --git a/app/paginate.py b/app/paginate.py new file mode 100644 index 0000000..af291b7 --- /dev/null +++ b/app/paginate.py @@ -0,0 +1,28 @@ +import math + +from database.interface import PAGINATE_ENTRIES_PER_PAGE + + +class Pagination: + def __init__(self, current: int = 1, count: int = 1): + self.current = current + self.count = count + self.page_count = math.ceil(count / PAGINATE_ENTRIES_PER_PAGE) + self.per_page = PAGINATE_ENTRIES_PER_PAGE + + def to_dict(self): + return { + "current": self.current, + "count": self.count, + "page_count": self.page_count, + "page_label": "Page", + "per_page": self.per_page, + "next": {"label": "Next"}, + "previous": {"label": "Previous"}, + "last_item": { + "label": "Last page", + }, + } + + def update_current(self, current: int) -> dict: + self.current = int(current) diff --git a/app/routes.py b/app/routes.py index 49f78c3..3a24133 100644 --- a/app/routes.py +++ b/app/routes.py @@ -12,11 +12,14 @@ from cryptography.hazmat.primitives.serialization import load_pem_private_key from dotenv import load_dotenv from flask import Blueprint, flash, redirect, render_template, request, session, url_for +from jinja2_fragments.flask import render_block from app.scripts.load_manager import schedule_first_job, trigger_manual_job from database.interface import HarvesterDBInterface +from . import htmx from .forms import HarvestSourceForm, OrganizationForm +from .paginate import Pagination logger = logging.getLogger("harvest_admin") @@ -24,6 +27,7 @@ mod = Blueprint("harvest", __name__) source = Blueprint("harvest_source", __name__) org = Blueprint("org", __name__) +testdata = Blueprint("testdata", __name__) db = HarvesterDBInterface() @@ -245,6 +249,31 @@ def cli_remove_harvest_source(id): print("Failed to delete harvest source") +## Load Test Data +# TODO move this into its own file when you break up routes +@testdata.cli.command("load_test_data") +def fixtures(): + """Load database fixtures from JSON.""" + import json + + file = "./tests/fixtures.json" + click.echo(f"Loading fixtures at `{file}`.") + with open(file, "r") as file: + fixture = json.load(file) + for item in fixture["organization"]: + db.add_organization(item) + for item in fixture["source"]: + db.add_harvest_source(item) + for item in fixture["job"]: + db.add_harvest_job(item) + for item in fixture["record"]: + db.add_harvest_record(item) + for item in fixture["record_error"]: + db.add_harvest_record_error(item) + + click.echo("Done.") + + # Helper Functions def make_new_source_contract(form): return { @@ -434,7 +463,7 @@ def view_harvest_source_data(source_id: str): jobs = db.get_all_harvest_jobs_by_filter({"harvest_source_id": source.id}) records = db.get_harvest_record_by_source(source.id) ckan_records = [record for record in records if record.ckan_id is not None] - error_records = [record for record in records if record.status == 'error'] + error_records = [record for record in records if record.status == "error"] jobs = db.get_all_harvest_jobs_by_filter({"harvest_source_id": source.id}) next_job = "N/A" future_jobs = db.get_new_harvest_jobs_by_source_in_future(source.id) @@ -556,6 +585,7 @@ def clear_harvest_source(source_id): flash("Failed to clear harvest source") return {"message": "failed"} + # Delete Source @mod.route("/harvest_source/config/delete/", methods=["POST"]) @login_required @@ -596,6 +626,32 @@ def add_harvest_job(): @mod.route("/harvest_job/", methods=["GET"]) @mod.route("/harvest_job/", methods=["GET"]) def get_harvest_job(job_id=None): + record_error_count = db.get_harvest_record_errors_by_job( + job_id, count=True, skip_pagination=True + ) + htmx_vars = { + "target_div": "#error_results_pagination", + "endpoint_url": f"/harvest_job/{job_id}", + } + + pagination = Pagination(count=record_error_count) + + if htmx: + page = request.args.get("page") + db_page = int(page) - 1 + record_errors = db.get_harvest_record_errors_by_job(job_id, page=db_page) + data = { + "harvest_job_id": job_id, + "record_errors": db._to_dict(record_errors), + "htmx_vars": htmx_vars, + } + pagination.update_current(page) + return render_block( + "view_job_data.html", + "record_errors_table", + data=data, + pagination=pagination.to_dict(), + ) if job_id: job = db.get_harvest_job(job_id) record_errors = db.get_harvest_record_errors_by_job(job_id) @@ -603,11 +659,15 @@ def get_harvest_job(job_id=None): return db._to_dict(job) if job else ("Not Found", 404) else: data = { + "harvest_job_id": job_id, "harvest_job": job, "harvest_job_dict": db._to_dict(job), "record_errors": db._to_dict(record_errors), + "htmx_vars": htmx_vars, } - return render_template("view_job_data.html", data=data) + return render_template( + "view_job_data.html", data=data, pagination=pagination.to_dict() + ) source_id = request.args.get("harvest_source_id") if source_id: @@ -667,7 +727,7 @@ def get_harvest_record(record_id=None): return "No harvest records found for this harvest source", 404 else: # TODO for test, will remove later - record = db.get_all_harvest_records() + record = db.pget_harvest_records() return db._to_dict(record) @@ -744,3 +804,4 @@ def register_routes(app): app.register_blueprint(user) app.register_blueprint(org) app.register_blueprint(source) + app.register_blueprint(testdata) diff --git a/app/static/_scss/_uswds-theme-custom-styles.scss b/app/static/_scss/_uswds-theme-custom-styles.scss index 8a739e2..09c28e9 100644 --- a/app/static/_scss/_uswds-theme-custom-styles.scss +++ b/app/static/_scss/_uswds-theme-custom-styles.scss @@ -56,3 +56,9 @@ ul.menu { .usa-card__img img { padding: 10px; } + +.usa-pagination { + &__item { + cursor: pointer; + } +} diff --git a/app/static/package-lock.json b/app/static/package-lock.json index 0a11864..49c78c4 100644 --- a/app/static/package-lock.json +++ b/app/static/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "@uswds/uswds": "3.8.0", "chart.js": "^4.4.2", + "htmx.org": "^2.0.2", "rollup": "^4.18.0" }, "devDependencies": { @@ -4408,6 +4409,11 @@ "entities": "^4.4.0" } }, + "node_modules/htmx.org": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/htmx.org/-/htmx.org-2.0.2.tgz", + "integrity": "sha512-eUPIpQaWKKstX393XNCRCMJTrqPzikh36Y9RceqsUZLTtlFjFaVDgwZLUsrFk8J2uzZxkkfiy0TE359j2eN6hA==" + }, "node_modules/iconv-lite": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", diff --git a/app/static/package.json b/app/static/package.json index 19b5019..cf26c1f 100644 --- a/app/static/package.json +++ b/app/static/package.json @@ -25,6 +25,7 @@ "dependencies": { "@uswds/uswds": "3.8.0", "chart.js": "^4.4.2", + "htmx.org": "^2.0.2", "rollup": "^4.18.0" }, "devDependencies": { diff --git a/app/static/rollup.config.mjs b/app/static/rollup.config.mjs index 964a928..9f0af73 100644 --- a/app/static/rollup.config.mjs +++ b/app/static/rollup.config.mjs @@ -8,6 +8,7 @@ export default { targets: [ { src: './node_modules/chart.js/dist/chart.umd.js', dest: './assets/chartjs/' }, { src: './node_modules/chart.js/dist/chart.umd.js.map', dest: './assets/chartjs/' }, + { src: './node_modules/htmx.org/dist/htmx.min.js', dest: './assets/htmx/' }, ] }) ] diff --git a/app/templates/base.html b/app/templates/base.html index 72859d9..ec55345 100644 --- a/app/templates/base.html +++ b/app/templates/base.html @@ -8,6 +8,7 @@ + {% block script_head %} {% endblock %} diff --git a/app/templates/components/pagination/pagination.html b/app/templates/components/pagination/pagination.html new file mode 100644 index 0000000..a2cd88b --- /dev/null +++ b/app/templates/components/pagination/pagination.html @@ -0,0 +1,23 @@ +{% from 'components/pagination/pagination_arrow.html' import pagination_arrow %} + +{% set overflow %} +
  • + + … + +
  • +{% endset %} + + diff --git a/app/templates/components/pagination/pagination_arrow.html b/app/templates/components/pagination/pagination_arrow.html new file mode 100644 index 0000000..7292f4a --- /dev/null +++ b/app/templates/components/pagination/pagination_arrow.html @@ -0,0 +1,37 @@ +{# + The full pagination data object is passed so we can access current state, aria labels, and text labels. + #} + {% macro pagination_arrow(direction, pagination, htmx_vars) %} + {% set page_var = ((pagination.current - 1) if direction == 'previous' else (pagination.current + 1)) | string() %} + {% set placeholder_link = htmx_vars.endpoint_url + "?page=" + page_var %} + + {% set link_attrs = { + 'class': 'usa-pagination__link usa-pagination__' ~ direction ~ '-page', + 'aria_label': pagination[direction]['label'] ~ ' ' ~ pagination.page_label | lower + } %} + +
  • + + {% if direction == 'previous' %} + + {% endif %} + + {{ pagination[direction]['label'] }} + + {% if direction == 'next' %} + + {% endif %} + +
  • + {% endmacro %} diff --git a/app/templates/components/pagination/pagination_button.html b/app/templates/components/pagination/pagination_button.html new file mode 100644 index 0000000..e2c8544 --- /dev/null +++ b/app/templates/components/pagination/pagination_button.html @@ -0,0 +1,23 @@ +{% macro pagination_button(item, pager_opts, htmx_vars) %} + {% set is_current = (item == pager_opts.current) %} + {% set is_last = (item == pager_opts.total) %} + {% set labels = pager_opts.aria_labels %} + {# HTMX page vars#} + {% set item_str = item | string() %} + {% set placeholder_link = htmx_vars.endpoint_url + "?page=" + item_str %} + + {# Display: "Last page, page X" if last item. Otherwise "Page X" #} + {% set aria_label = (labels.last ~ " " ~ labels.page_label | lower if is_last else labels.page_label) ~ " " ~ item %} + +
  • + {# Global variable placeholder_link doesn't work for some reason. #} + + {{ item }} + +
  • +{% endmacro %} diff --git a/app/templates/components/pagination/pagination_numbers.html b/app/templates/components/pagination/pagination_numbers.html new file mode 100644 index 0000000..fb84cb1 --- /dev/null +++ b/app/templates/components/pagination/pagination_numbers.html @@ -0,0 +1,65 @@ +{% from "components/pagination/pagination_button.html" import pagination_button %} + +{# Add +1 to first_five / last_five due to how ranges work in jinja #} +{% set pager_ranges = { + 'default': range(pagination.current - 1, pagination.current + 1), + 'last_item': pagination.page_count, + 'first_five': range(1, 5 + 1), + 'last_five': range(pagination.page_count - 4, pagination.page_count + 1), + } +%} + +{% set pager_button_opts = { + 'current': pagination.current, + 'total': pagination.page_count, + 'aria_labels': { + 'page_label': pagination.page_label, + 'previous': pagination.previous.label, + 'next': pagination.next.label, + 'last': pagination.last_item.label + } +} %} + +{# Page numbers #} +{# List all items if less than 7 #} +{% if pagination.page_count <= 7 %} + {% for item in range(1, pagination.page_count) %} + {{ pagination_button(item, pager_button_opts, data.htmx_vars) }} + {% endfor %} +{# User is at the start of a long dataset #} +{# Example: 1, 2, 3, *4*, 5 … 8 #} +{# Doesn't apply when user gets to 5 of 8 #} +{% elif pagination.current <= 4 and pagination.page_count >= 7 %} + {% for item in pager_ranges.first_five %} + {{ pagination_button(item, pager_button_opts, data.htmx_vars) }} + {% endfor %} + + {{ overflow | trim | safe }} + + {{ pagination_button(pager_ranges.last_item, pager_button_opts, data.htmx_vars) }} + +{# When user is close to the end of dataset #} +{# Example: 1 … 4, *5*, 6, 7, 8 #} +{% elif pagination.current >= pagination.page_count - 3 %} + {{ pagination_button(1, pager_button_opts, data.htmx_vars) }} + + {{ overflow | trim | safe }} + {% for item in pager_ranges.last_five %} + {{ pagination_button(item, pager_button_opts, data.htmx_vars) }} + {% endfor %} +{# Default case: Current - 1, Current, Current + 1 #} +{# Example: 1 … 21, *22*, 23 … 50 #} +{# Example: 1 … 4, *5*, 6 … 9 #} +{% else %} + {{ pagination_button(1, pager_button_opts, data.htmx_vars) }} + + {{ overflow | trim | safe }} + + {% for item in pager_ranges.default %} + {{ pagination_button(item, pager_button_opts, data.htmx_vars) }} + {% endfor %} + + {{ overflow | trim | safe }} + + {{ pagination_button(pager_ranges.last_item, pager_button_opts, data.htmx_vars) }} +{% endif %} diff --git a/app/templates/view_job_data.html b/app/templates/view_job_data.html index 4b5a168..718bb14 100644 --- a/app/templates/view_job_data.html +++ b/app/templates/view_job_data.html @@ -6,11 +6,11 @@ {% block content %}
    - {% if not data.harvest_job.id %} + {% if not data.harvest_job_id %}

    Whooops!

    Looks like you navigated to a job that doesn't exist.

    {% else %} -

    Details for Harvest Job Id: {{data.harvest_job.id}}

    +

    Details for Harvest Job Id: {{data.harvest_job_id}}

    For Harvest Source Id: {{data.harvest_job.harvest_source_id}} @@ -34,7 +34,7 @@

    Job Error Table

    {% else %}
    - + @@ -64,32 +64,50 @@

    Record Error Table

    No record errors found {% else %}
    -
    Harvest Job Errors for {{data.harvest_job.id}} Harvest Job Errors for {{data.harvest_job_id}}
    Date Created
    - - - - - - - - - - - - {% for errors in data.record_errors %} - - - - - - - - {% endfor %} - -
    Harvest Error Info for {{data.harvest_job.id}}
    Date CreatedIdHarvest Record IdMessageType
    {{errors.date_created}}{{errors.id}} - - {{errors.harvest_record_id}} {{errors.message}}{{errors.type}}
    + {% block record_errors_table %} +
    + + + + + + + + + + + + + + {% for errors in data.record_errors %} + + + + + + + + {% endfor %} + {% if pagination.per_page > data.record_errors|count and pagination.count > data.record_errors|count %} + {% for number in range(pagination.per_page - data.record_errors|count) %} + + {% for number in range(5) %} + + {% endfor %} + + {% endfor %} + {% endif %} + +
    Harvest Record Errors for {{data.harvest_job_id}}
    Date CreatedIdHarvest Record IdMessageType
    {{errors.date_created}}{{errors.id}} + + {{errors.harvest_record_id}} {{errors.message}}{{errors.type}}
     
    + {% if pagination.count > data.record_errors|count %} + {% include '/components/pagination/pagination.html' %} + {%endif%} +
    + + {% endblock %}
    {% endif %} diff --git a/app/templates/view_source_data.html b/app/templates/view_source_data.html index 09da4bc..2ad8f8c 100644 --- a/app/templates/view_source_data.html +++ b/app/templates/view_source_data.html @@ -1,7 +1,8 @@ {% extends 'base.html' %} {% block script_head %} - + + {% endblock %} {% block title %} diff --git a/database/interface.py b/database/interface.py index 405eadc..7bf2213 100644 --- a/database/interface.py +++ b/database/interface.py @@ -1,14 +1,15 @@ +import logging import os +import time import uuid from datetime import datetime, timezone +from functools import wraps -from sqlalchemy import create_engine, inspect, or_, text +import ckanapi +from ckanapi import RemoteCKAN +from sqlalchemy import create_engine, func, inspect, or_, select, text from sqlalchemy.exc import NoResultFound from sqlalchemy.orm import scoped_session, sessionmaker -from ckanapi import RemoteCKAN -import ckanapi -import time -import logging from .models import ( HarvestJob, @@ -21,6 +22,47 @@ ) DATABASE_URI = os.getenv("DATABASE_URI") +PAGINATE_ENTRIES_PER_PAGE = 20 +PAGINATE_START_PAGE = 0 + + +def paginate(fn): + @wraps(fn) + def _impl(self, *args, **kwargs): + query = fn(self, *args, **kwargs) + if kwargs.get("skip_pagination") is True: + return query + elif kwargs.get("paginate") is False: + return query.all() + else: + per_page = kwargs.get("per_page") or PAGINATE_ENTRIES_PER_PAGE + page = kwargs.get("page") or PAGINATE_START_PAGE + query = query.limit(per_page) + query = query.offset(page * per_page) + return query.all() + + return _impl + + +# notes on the flag `maintain_column_froms`: +# https://github.com/sqlalchemy/sqlalchemy/discussions/6807#discussioncomment-1043732 +# docs: https://docs.sqlalchemy.org/en/14/core/selectable.html#sqlalchemy.sql.expression.Select.with_only_columns.params.maintain_column_froms +# +def count(fn): + @wraps(fn) + def _impl(self, *args, **kwargs): + query = fn(self, *args, **kwargs) + if kwargs.get("count") is True: + count_q = query.statement.with_only_columns( + func.count(), maintain_column_froms=True + ).order_by(None) + count = query.session.execute(count_q).scalar() + return count + else: + return query + + return _impl + logging.basicConfig(level=logging.INFO) logger = logging.getLogger() @@ -158,13 +200,15 @@ def clear_harvest_source(self, source_id): # delete all HarvestRecords and related HarvestRecordErrors def _clear_harvest_records(): - self.db.query(HarvestRecordError).filter( - HarvestRecordError.harvest_record_id.in_( - self.db.query(HarvestRecord.id).filter_by(harvest_source_id=source_id) + self.db.query(HarvestRecordError).filter( + HarvestRecordError.harvest_record_id.in_( + self.db.query(HarvestRecord.id).filter_by( + harvest_source_id=source_id ) - ).delete(synchronize_session=False) - self.db.query(HarvestRecord).filter_by(harvest_source_id=source_id).delete() - self.db.commit() + ) + ).delete(synchronize_session=False) + self.db.query(HarvestRecord).filter_by(harvest_source_id=source_id).delete() + self.db.commit() source = self.db.get(HarvestSource, source_id) if source is None: @@ -173,50 +217,53 @@ def _clear_harvest_records(): organization_id = source.organization_id records = ( - self.db.query(HarvestRecord) - .filter_by(harvest_source_id=source_id).all() + self.db.query(HarvestRecord).filter_by(harvest_source_id=source_id).all() ) if not records: return "Harvest source has no records to clear." ckan_ids = [record.ckan_id for record in records if record.ckan_id is not None] - error_records = [record for record in records if record.status == 'error'] + error_records = [record for record in records if record.status == "error"] jobs_in_progress = self.get_all_harvest_jobs_by_filter( {"harvest_source_id": source.id, "status": "in_progress"} ) # Ensure no jobs are in progress if jobs_in_progress: - return ("Error: A harvest job is currently in progress. " - "Cannot clear datasets.") + return ( + "Error: A harvest job is currently in progress. " + "Cannot clear datasets." + ) # Ensure (error_records + ckan_ids) = total records if len(error_records) + len(ckan_ids) != len(records): - return ("Error: Not all records are either in an error state " - "or have a CKAN ID. Cannot proceed without clearing the dataset.") + return ( + "Error: Not all records are either in an error state " + "or have a CKAN ID. Cannot proceed without clearing the dataset." + ) - if not ckan_ids: + if not ckan_ids: _clear_harvest_records() return "Harvest source cleared successfully." - ckan = RemoteCKAN( - os.getenv("CKAN_API_URL"), apikey=os.getenv("CKAN_API_TOKEN") - ) + ckan = RemoteCKAN(os.getenv("CKAN_API_URL"), apikey=os.getenv("CKAN_API_TOKEN")) - result = ckan.action.package_search(fq=f'owner_org:{organization_id}') - ckan_datasets = result['count'] + result = ckan.action.package_search(fq=f"owner_org:{organization_id}") + ckan_datasets = result["count"] start = datetime.now(timezone.utc) retry_count = 0 retry_max = 20 - # Retry loop to handle timeouts from cloud.gov and CKAN's Solr backend, + # Retry loop to handle timeouts from cloud.gov and CKAN's Solr backend, # ensuring datasets are cleared despite possible interruptions. while ckan_datasets > 0 and retry_count < retry_max: - result = ckan.action.package_search(fq=f'owner_org:{organization_id}') - ckan_datasets = result['count'] - logger.info(f"Attempt {retry_count + 1}: " - f"{ckan_datasets} datasets remaining in CKAN") + result = ckan.action.package_search(fq=f"owner_org:{organization_id}") + ckan_datasets = result["count"] + logger.info( + f"Attempt {retry_count + 1}: " + f"{ckan_datasets} datasets remaining in CKAN" + ) try: ckan.action.bulk_update_delete( datasets=ckan_ids, org_id=organization_id @@ -237,8 +284,10 @@ def _clear_harvest_records(): logger.info(f"Total time: {datetime.now(timezone.utc) - start}") return "Harvest source cleared successfully." else: - fail_message = (f"Harvest source clearance failed after {retry_count} " - f"attempts. {ckan_datasets} datasets still exist in CKAN.") + fail_message = ( + f"Harvest source clearance failed after {retry_count} " + f"attempts. {ckan_datasets} datasets still exist in CKAN." + ) logger.error(fail_message) return fail_message @@ -248,18 +297,18 @@ def delete_harvest_source(self, source_id): return "Harvest source not found" records = ( - self.db.query(HarvestRecord) - .filter_by(harvest_source_id=source_id).all() + self.db.query(HarvestRecord).filter_by(harvest_source_id=source_id).all() ) - + if len(records) == 0: self.db.delete(source) self.db.commit() return "Harvest source deleted successfully" else: - return (f"Failed: {len(records)} records in the Harvest source, " - "please Clear it first.") - + return ( + f"Failed: {len(records)} records in the Harvest source, " + "please Clear it first." + ) ## HARVEST JOB def add_harvest_job(self, job_data): @@ -372,9 +421,19 @@ def get_harvest_job_errors_by_job(self, job_id: str) -> list[dict]: job = self.get_harvest_job(job_id) return [error for error in job.errors or []] - def get_harvest_record_errors_by_job(self, job_id: str): - job = self.get_harvest_job(job_id) - return [error for record in job.records or [] for error in record.errors or []] + @count + @paginate + def get_harvest_record_errors_by_job(self, job_id: str, **kwargs): + subquery = ( + self.db.query(HarvestRecord.id) + .filter(HarvestRecord.status == "error") + .filter(HarvestRecord.harvest_job_id == job_id) + .subquery() + ) + query = self.db.query(HarvestRecordError).filter( + HarvestRecordError.harvest_record_id.in_(select(subquery)) + ) + return query def get_harvest_error(self, error_id: str) -> dict: job_query = self.db.query(HarvestJobError).filter_by(id=error_id).first() @@ -387,7 +446,6 @@ def get_harvest_error(self, error_id: str) -> dict: return None def get_harvest_record_errors_by_record(self, record_id: str): - # TODO: paginate this errors = self.db.query(HarvestRecordError).filter_by( harvest_record_id=record_id ) @@ -552,6 +610,27 @@ def verify_user(self, usr_data): print("Error:", e) return False + #### PAGINATED QUERIES + @count + @paginate + def pget_harvest_jobs(self, filter=text(""), **kwargs): + return self.db.query(HarvestJob).filter(filter) + + @count + @paginate + def pget_harvest_records(self, filter=text(""), **kwargs): + return self.db.query(HarvestRecord).filter(filter) + + @count + @paginate + def pget_harvest_job_errors(self, filter=text(""), **kwargs): + return self.db.query(HarvestJobError).filter(filter) + + @count + @paginate + def pget_harvest_record_errors(self, filter=text(""), **kwargs): + return self.db.query(HarvestRecordError).filter(filter) + ##### TEST INTERFACES BELOW ##### ######## TO BE REMOVED ########## def get_all_harvest_jobs(self): @@ -559,10 +638,6 @@ def get_all_harvest_jobs(self): harvest_jobs_data = [job for job in harvest_jobs] return harvest_jobs_data - def get_all_harvest_records(self): - harvest_records = self.db.query(HarvestRecord).all() - return [record for record in harvest_records or []] - def get_all_harvest_errors(self): job_errors = self.db.query(HarvestJobError).all() record_errors = self.db.query(HarvestRecordError).all() diff --git a/harvester/lib/cf_handler.py b/harvester/lib/cf_handler.py index f5d2eef..a19b7c9 100644 --- a/harvester/lib/cf_handler.py +++ b/harvester/lib/cf_handler.py @@ -1,4 +1,5 @@ import os + from cloudfoundry_client.client import CloudFoundryClient from cloudfoundry_client.v3.tasks import TaskManager diff --git a/manifest.yml b/manifest.yml index 65114cf..e5c1587 100644 --- a/manifest.yml +++ b/manifest.yml @@ -8,7 +8,8 @@ applications: services: - ((app_name))-db - ((app_name))-secrets - instances: 1 + instances: ((admin_instances)) + memory: ((admin_memory_quota)) env: FLASK_APP: run.py CF_API_URL: ((CF_API_URL)) diff --git a/poetry.lock b/poetry.lock index 2026c48..8022876 100644 --- a/poetry.lock +++ b/poetry.lock @@ -769,6 +769,20 @@ dominate = "*" Flask = ">=0.8" visitor = "*" +[[package]] +name = "flask-htmx" +version = "0.3.2" +description = "A Flask extension to work with HTMX." +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "flask_htmx-0.3.2-py3-none-any.whl", hash = "sha256:a1e0071216349197d6669662c2c35a1ab849b6fca28c89dc90932761f7e73c05"}, + {file = "flask_htmx-0.3.2.tar.gz", hash = "sha256:8def77bb292369ff77513ff7b76d27b06f83e1d8c21165b6714c06c1cc2b9275"}, +] + +[package.dependencies] +Flask = ">=2.0.2,<4.0.0" + [[package]] name = "flask-migrate" version = "4.0.7" @@ -1068,6 +1082,24 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "jinja2-fragments" +version = "1.6.0" +description = "Render Jinja2 template block as HTML page fragments on Python web frameworks." +optional = false +python-versions = "*" +files = [ + {file = "jinja2_fragments-1.6.0-py3-none-any.whl", hash = "sha256:49a5815cd7210edf234bf137b6005abf1ad336ff93c65519deddc41d91713986"}, + {file = "jinja2_fragments-1.6.0.tar.gz", hash = "sha256:5bd7dd49b7dbfa174d45d6a991cfe3cba4a08a4b66f87cf248aa739eebc435f8"}, +] + +[package.dependencies] +jinja2 = ">=3.1.0" + +[package.extras] +dev = ["pre-commit"] +tests = ["fastapi", "flask (>=2.1.0)", "litestar[standard]", "pytest", "pytest-asyncio", "quart (>=0.18.0)", "sanic", "sanic-ext", "sanic-testing", "starlette[full]"] + [[package]] name = "jsonschema" version = "4.21.1" @@ -2347,5 +2379,5 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" -python-versions = ">=3.10" -content-hash = "d81c4af263f6891c477453ddc74ed90cc7da9d3cdac3e945c1ced15930a757ff" +python-versions = ">=3.10,<4.0" +content-hash = "998c720a492a425387d626eaa210b0c479e6683e2a977c0f6f598c239dcb2df6" diff --git a/pyproject.toml b/pyproject.toml index bbb3c66..6f7c32c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ license = "LICENSE.md" # TODO fix me repository = "https://github.com/GSA/datagov-harvester" [tool.poetry.dependencies] -python = ">=3.10" +python = ">=3.10,<4.0" jsonschema = ">=4" python-dotenv = ">=1" deepdiff = ">=6" @@ -32,6 +32,8 @@ cloudfoundry-client = "^1.36.0" pyjwt = "^2.8.0" cryptography = "^42.0.8" boltons = "^24.0.0" +jinja2-fragments = "^1.6.0" +flask-htmx = "^0.3.2" [tool.poetry.group.dev.dependencies] pytest = ">=7.3.2" diff --git a/requirements-dev.txt b/requirements-dev.txt index 08a8dad..256fac5 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,30 +1,30 @@ -black==24.4.2 ; python_version >= "3.10" -cfgv==3.4.0 ; python_version >= "3.10" -click==8.1.7 ; python_version >= "3.10" -colorama==0.4.6 ; python_version >= "3.10" and (sys_platform == "win32" or platform_system == "Windows") -coverage[toml]==7.5.0 ; python_version >= "3.10" -debugpy==1.8.1 ; python_version >= "3.10" -distlib==0.3.8 ; python_version >= "3.10" -exceptiongroup==1.2.1 ; python_version < "3.11" and python_version >= "3.10" -filelock==3.14.0 ; python_version >= "3.10" -freezegun==1.5.1 ; python_version >= "3.10" -identify==2.5.36 ; python_version >= "3.10" -iniconfig==2.0.0 ; python_version >= "3.10" -isort==5.13.2 ; python_version >= "3.10" -mypy-extensions==1.0.0 ; python_version >= "3.10" -nodeenv==1.8.0 ; python_version >= "3.10" -packaging==24.0 ; python_version >= "3.10" -pathspec==0.12.1 ; python_version >= "3.10" -platformdirs==4.2.1 ; python_version >= "3.10" -pluggy==1.5.0 ; python_version >= "3.10" -pre-commit==3.7.1 ; python_version >= "3.10" -pytest-cov==4.1.0 ; python_version >= "3.10" -pytest==8.1.1 ; python_version >= "3.10" -python-dateutil==2.9.0.post0 ; python_version >= "3.10" -pyyaml==6.0.1 ; python_version >= "3.10" -ruff==0.0.291 ; python_version >= "3.10" -setuptools==69.5.1 ; python_version >= "3.10" -six==1.16.0 ; python_version >= "3.10" -tomli==2.0.1 ; python_full_version <= "3.11.0a6" and python_version >= "3.10" -typing-extensions==4.11.0 ; python_version < "3.11" and python_version >= "3.10" -virtualenv==20.26.2 ; python_version >= "3.10" +black==24.4.2 ; python_version >= "3.10" and python_version < "4.0" +cfgv==3.4.0 ; python_version >= "3.10" and python_version < "4.0" +click==8.1.7 ; python_version >= "3.10" and python_version < "4.0" +colorama==0.4.6 ; python_version >= "3.10" and python_version < "4.0" and (sys_platform == "win32" or platform_system == "Windows") +coverage[toml]==7.5.0 ; python_version >= "3.10" and python_version < "4.0" +debugpy==1.8.1 ; python_version >= "3.10" and python_version < "4.0" +distlib==0.3.8 ; python_version >= "3.10" and python_version < "4.0" +exceptiongroup==1.2.1 ; python_version >= "3.10" and python_version < "3.11" +filelock==3.14.0 ; python_version >= "3.10" and python_version < "4.0" +freezegun==1.5.1 ; python_version >= "3.10" and python_version < "4.0" +identify==2.5.36 ; python_version >= "3.10" and python_version < "4.0" +iniconfig==2.0.0 ; python_version >= "3.10" and python_version < "4.0" +isort==5.13.2 ; python_version >= "3.10" and python_version < "4.0" +mypy-extensions==1.0.0 ; python_version >= "3.10" and python_version < "4.0" +nodeenv==1.8.0 ; python_version >= "3.10" and python_version < "4.0" +packaging==24.0 ; python_version >= "3.10" and python_version < "4.0" +pathspec==0.12.1 ; python_version >= "3.10" and python_version < "4.0" +platformdirs==4.2.1 ; python_version >= "3.10" and python_version < "4.0" +pluggy==1.5.0 ; python_version >= "3.10" and python_version < "4.0" +pre-commit==3.7.1 ; python_version >= "3.10" and python_version < "4.0" +pytest-cov==4.1.0 ; python_version >= "3.10" and python_version < "4.0" +pytest==8.1.1 ; python_version >= "3.10" and python_version < "4.0" +python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "4.0" +pyyaml==6.0.1 ; python_version >= "3.10" and python_version < "4.0" +ruff==0.0.291 ; python_version >= "3.10" and python_version < "4.0" +setuptools==69.5.1 ; python_version >= "3.10" and python_version < "4.0" +six==1.16.0 ; python_version >= "3.10" and python_version < "4.0" +tomli==2.0.1 ; python_version >= "3.10" and python_full_version <= "3.11.0a6" +typing-extensions==4.11.0 ; python_version >= "3.10" and python_version < "3.11" +virtualenv==20.26.2 ; python_version >= "3.10" and python_version < "4.0" diff --git a/requirements.txt b/requirements.txt index 2a49425..de8125f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,61 +1,63 @@ -aiohttp==3.9.5 ; python_version >= "3.10" -aiosignal==1.3.1 ; python_version >= "3.10" -alembic==1.13.1 ; python_version >= "3.10" -async-timeout==4.0.3 ; python_version < "3.11" and python_version >= "3.10" -attrs==23.2.0 ; python_version >= "3.10" -beautifulsoup4==4.12.3 ; python_version >= "3.10" -blinker==1.7.0 ; python_version >= "3.10" -boltons==24.0.0 ; python_version >= "3.10" -certifi==2024.2.2 ; python_version >= "3.10" -cffi==1.16.0 ; platform_python_implementation != "PyPy" and python_version >= "3.10" -charset-normalizer==3.3.2 ; python_version >= "3.10" -ckanapi==4.8 ; python_version >= "3.10" -click==8.1.7 ; python_version >= "3.10" -cloudfoundry-client==1.36.0 ; python_version >= "3.10" -colorama==0.4.6 ; python_version >= "3.10" and platform_system == "Windows" -cryptography==42.0.8 ; python_version >= "3.10" -deepdiff==7.0.1 ; python_version >= "3.10" -docopt==0.6.2 ; python_version >= "3.10" -dominate==2.9.1 ; python_version >= "3.10" -flask-bootstrap==3.3.7.1 ; python_version >= "3.10" -flask-migrate==4.0.7 ; python_version >= "3.10" -flask-sqlalchemy==3.1.1 ; python_version >= "3.10" -flask-wtf==1.2.1 ; python_version >= "3.10" -flask==3.0.3 ; python_version >= "3.10" -frozenlist==1.4.1 ; python_version >= "3.10" -greenlet==3.0.3 ; (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") and python_version >= "3.10" -idna==3.7 ; python_version >= "3.10" -itsdangerous==2.2.0 ; python_version >= "3.10" -jinja2==3.1.3 ; python_version >= "3.10" -jsonschema-specifications==2023.12.1 ; python_version >= "3.10" -jsonschema==4.21.1 ; python_version >= "3.10" -mako==1.3.3 ; python_version >= "3.10" -markupsafe==2.1.5 ; python_version >= "3.10" -multidict==6.0.5 ; python_version >= "3.10" -oauth2-client==1.4.2 ; python_version >= "3.10" -ordered-set==4.1.0 ; python_version >= "3.10" -polling2==0.5.0 ; python_version >= "3.10" -protobuf==4.25.3 ; python_version >= "3.10" -psycopg2-binary==2.9.9 ; python_version >= "3.10" -pycparser==2.22 ; platform_python_implementation != "PyPy" and python_version >= "3.10" -pyjwt==2.8.0 ; python_version >= "3.10" -python-dotenv==1.0.1 ; python_version >= "3.10" -python-slugify==8.0.4 ; python_version >= "3.10" -pyyaml==6.0.1 ; python_version >= "3.10" -referencing==0.35.0 ; python_version >= "3.10" -requests==2.31.0 ; python_version >= "3.10" -rpds-py==0.18.0 ; python_version >= "3.10" -sansjson==0.3.0 ; python_version >= "3.10" -setuptools==69.5.1 ; python_version >= "3.10" -simplejson==3.19.2 ; python_version >= "3.10" -six==1.16.0 ; python_version >= "3.10" -soupsieve==2.5 ; python_version >= "3.10" -sqlalchemy==2.0.29 ; python_version >= "3.10" -text-unidecode==1.3 ; python_version >= "3.10" -typing-extensions==4.11.0 ; python_version >= "3.10" -urllib3==2.2.1 ; python_version >= "3.10" -visitor==0.1.3 ; python_version >= "3.10" -websocket-client==1.7.0 ; python_version >= "3.10" -werkzeug==3.0.2 ; python_version >= "3.10" -wtforms==3.1.2 ; python_version >= "3.10" -yarl==1.9.4 ; python_version >= "3.10" +aiohttp==3.9.5 ; python_version >= "3.10" and python_version < "4.0" +aiosignal==1.3.1 ; python_version >= "3.10" and python_version < "4.0" +alembic==1.13.1 ; python_version >= "3.10" and python_version < "4.0" +async-timeout==4.0.3 ; python_version >= "3.10" and python_version < "3.11" +attrs==23.2.0 ; python_version >= "3.10" and python_version < "4.0" +beautifulsoup4==4.12.3 ; python_version >= "3.10" and python_version < "4.0" +blinker==1.7.0 ; python_version >= "3.10" and python_version < "4.0" +boltons==24.0.0 ; python_version >= "3.10" and python_version < "4.0" +certifi==2024.2.2 ; python_version >= "3.10" and python_version < "4.0" +cffi==1.16.0 ; python_version >= "3.10" and python_version < "4.0" and platform_python_implementation != "PyPy" +charset-normalizer==3.3.2 ; python_version >= "3.10" and python_version < "4.0" +ckanapi==4.8 ; python_version >= "3.10" and python_version < "4.0" +click==8.1.7 ; python_version >= "3.10" and python_version < "4.0" +cloudfoundry-client==1.36.0 ; python_version >= "3.10" and python_version < "4.0" +colorama==0.4.6 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Windows" +cryptography==42.0.8 ; python_version >= "3.10" and python_version < "4.0" +deepdiff==7.0.1 ; python_version >= "3.10" and python_version < "4.0" +docopt==0.6.2 ; python_version >= "3.10" and python_version < "4.0" +dominate==2.9.1 ; python_version >= "3.10" and python_version < "4.0" +flask-bootstrap==3.3.7.1 ; python_version >= "3.10" and python_version < "4.0" +flask-htmx==0.3.2 ; python_version >= "3.10" and python_version < "4.0" +flask-migrate==4.0.7 ; python_version >= "3.10" and python_version < "4.0" +flask-sqlalchemy==3.1.1 ; python_version >= "3.10" and python_version < "4.0" +flask-wtf==1.2.1 ; python_version >= "3.10" and python_version < "4.0" +flask==3.0.3 ; python_version >= "3.10" and python_version < "4.0" +frozenlist==1.4.1 ; python_version >= "3.10" and python_version < "4.0" +greenlet==3.0.3 ; python_version >= "3.10" and python_version < "4.0" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") +idna==3.7 ; python_version >= "3.10" and python_version < "4.0" +itsdangerous==2.2.0 ; python_version >= "3.10" and python_version < "4.0" +jinja2-fragments==1.6.0 ; python_version >= "3.10" and python_version < "4.0" +jinja2==3.1.3 ; python_version >= "3.10" and python_version < "4.0" +jsonschema-specifications==2023.12.1 ; python_version >= "3.10" and python_version < "4.0" +jsonschema==4.21.1 ; python_version >= "3.10" and python_version < "4.0" +mako==1.3.3 ; python_version >= "3.10" and python_version < "4.0" +markupsafe==2.1.5 ; python_version >= "3.10" and python_version < "4.0" +multidict==6.0.5 ; python_version >= "3.10" and python_version < "4.0" +oauth2-client==1.4.2 ; python_version >= "3.10" and python_version < "4.0" +ordered-set==4.1.0 ; python_version >= "3.10" and python_version < "4.0" +polling2==0.5.0 ; python_version >= "3.10" and python_version < "4.0" +protobuf==4.25.3 ; python_version >= "3.10" and python_version < "4.0" +psycopg2-binary==2.9.9 ; python_version >= "3.10" and python_version < "4.0" +pycparser==2.22 ; python_version >= "3.10" and python_version < "4.0" and platform_python_implementation != "PyPy" +pyjwt==2.8.0 ; python_version >= "3.10" and python_version < "4.0" +python-dotenv==1.0.1 ; python_version >= "3.10" and python_version < "4.0" +python-slugify==8.0.4 ; python_version >= "3.10" and python_version < "4.0" +pyyaml==6.0.1 ; python_version >= "3.10" and python_version < "4.0" +referencing==0.35.0 ; python_version >= "3.10" and python_version < "4.0" +requests==2.31.0 ; python_version >= "3.10" and python_version < "4.0" +rpds-py==0.18.0 ; python_version >= "3.10" and python_version < "4.0" +sansjson==0.3.0 ; python_version >= "3.10" and python_version < "4.0" +setuptools==69.5.1 ; python_version >= "3.10" and python_version < "4.0" +simplejson==3.19.2 ; python_version >= "3.10" and python_version < "4.0" +six==1.16.0 ; python_version >= "3.10" and python_version < "4.0" +soupsieve==2.5 ; python_version >= "3.10" and python_version < "4.0" +sqlalchemy==2.0.29 ; python_version >= "3.10" and python_version < "4.0" +text-unidecode==1.3 ; python_version >= "3.10" and python_version < "4.0" +typing-extensions==4.11.0 ; python_version >= "3.10" and python_version < "4.0" +urllib3==2.2.1 ; python_version >= "3.10" and python_version < "4.0" +visitor==0.1.3 ; python_version >= "3.10" and python_version < "4.0" +websocket-client==1.7.0 ; python_version >= "3.10" and python_version < "4.0" +werkzeug==3.0.2 ; python_version >= "3.10" and python_version < "4.0" +wtforms==3.1.2 ; python_version >= "3.10" and python_version < "4.0" +yarl==1.9.4 ; python_version >= "3.10" and python_version < "4.0" diff --git a/tests/conftest.py b/tests/conftest.py index b55217f..e95550a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,8 @@ +import json import logging import os from pathlib import Path -from typing import Any, Generator +from typing import Any, Generator, List from unittest.mock import patch import pytest @@ -80,34 +81,33 @@ def default_function_fixture(interface): logger.info("Patching complete. Unpatching") +@pytest.fixture +def fixtures_json(): + file = Path(__file__).parents[0] / "fixtures.json" + with open(file, "r") as file: + return json.load(file) + + ## ORGS @pytest.fixture -def organization_data() -> dict: - return { - "name": "Test Org", - "logo": "https://example.com/logo.png", - "id": "d925f84d-955b-4cb7-812f-dcfd6681a18f", - } +def organization_data(fixtures_json) -> dict: + return fixtures_json["organization"][0] @pytest.fixture -def organization_orm(organization_data: dict) -> Organization: +def organization_data_orm(organization_data: dict) -> Organization: return Organization(**organization_data) ## HARVEST SOURCES @pytest.fixture -def source_data_dcatus(organization_data: dict) -> dict: - return { - "id": "2f2652de-91df-4c63-8b53-bfced20b276b", - "name": "Test Source", - "notification_emails": "email@example.com", - "organization_id": organization_data["id"], - "frequency": "daily", - "url": f"{HARVEST_SOURCE_URL}/dcatus/dcatus.json", - "schema_type": "type1", - "source_type": "dcatus", - } +def source_data_dcatus(fixtures_json) -> dict: + return fixtures_json["source"][0] + + +@pytest.fixture +def source_data_dcatus_orm(source_data_dcatus: dict) -> HarvestSource: + return HarvestSource(**source_data_dcatus) @pytest.fixture @@ -138,11 +138,6 @@ def source_data_dcatus_same_title(organization_data: dict) -> dict: } -@pytest.fixture -def source_orm_dcatus(source_data_dcatus: dict) -> HarvestSource: - return HarvestSource(**source_data_dcatus) - - @pytest.fixture def source_data_waf(organization_data: dict) -> dict: return { @@ -172,43 +167,73 @@ def source_data_dcatus_invalid(organization_data: dict) -> dict: @pytest.fixture -def job_data_dcatus(source_data_dcatus: dict) -> dict: +def source_data_dcatus_single_record(organization_data: dict) -> dict: return { - "id": "6bce761c-7a39-41c1-ac73-94234c139c76", - "status": "new", - "harvest_source_id": source_data_dcatus["id"], + "id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "name": "Single Record Test Source", + "notification_emails": "email@example.com", + "organization_id": organization_data["id"], + "frequency": "daily", + "url": f"{HARVEST_SOURCE_URL}/dcatus/dcatus_single_record.json", + "schema_type": "type1", + "source_type": "dcatus", } @pytest.fixture -def job_data_dcatus_2(source_data_dcatus: dict) -> dict: +def source_data_dcatus_bad_url(organization_data: dict) -> dict: return { - "id": "392ac4b3-79a6-414b-a2b3-d6c607d3b8d4", - "status": "new", - "harvest_source_id": source_data_dcatus["id"], + "id": "b059e587-a4a1-422e-825a-830b4913dbfb", + "name": "Bad URL Source", + "notification_emails": "bad@example.com", + "organization_id": organization_data["id"], + "frequency": "daily", + "url": f"{HARVEST_SOURCE_URL}/dcatus/bad_url.json", + "schema_type": "type1", + "source_type": "dcatus", } @pytest.fixture -def job_orm_dcatus(job_data_dcatus: dict) -> HarvestJob: +def source_data_dcatus_invalid_records(organization_data) -> dict: + return { + "id": "8e7f539b-0a83-43ad-950e-3976bb11a425", + "name": "Invalid Record Source", + "notification_emails": "invalid_record@example.com", + "organization_id": organization_data["id"], + "frequency": "daily", + "url": "http://localhost/dcatus/missing_title.json", + "schema_type": "type1", + "source_type": "dcatus", + } + + +## HARVEST JOBS +@pytest.fixture +def job_data_dcatus(fixtures_json) -> dict: + return fixtures_json["job"][0] + + +@pytest.fixture +def job_data_dcatus_orm(job_data_dcatus: dict) -> HarvestJob: return HarvestJob(**job_data_dcatus) @pytest.fixture -def job_data_waf(source_data_waf: dict) -> dict: +def job_data_dcatus_2(source_data_dcatus_2: dict) -> dict: return { - "id": "963cdc51-94d5-425d-a688-e0a57e0c5dd2", + "id": "392ac4b3-79a6-414b-a2b3-d6c607d3b8d4", "status": "new", - "harvest_source_id": source_data_waf["id"], + "harvest_source_id": source_data_dcatus_2["id"], } @pytest.fixture -def job_error_data(job_data_dcatus) -> dict: +def job_data_waf(source_data_waf: dict) -> dict: return { - "harvest_job_id": job_data_dcatus["id"], - "message": "error reading records from harvest database", - "type": "ExtractInternalException", + "id": "963cdc51-94d5-425d-a688-e0a57e0c5dd2", + "status": "new", + "harvest_source_id": source_data_waf["id"], } @@ -221,89 +246,68 @@ def job_data_dcatus_invalid(source_data_dcatus_invalid: dict) -> dict: } -## HARVEST RECORDS @pytest.fixture -def source_data_dcatus_single_record(organization_data: dict) -> dict: +def job_data_dcatus_bad_url(source_data_dcatus_bad_url: dict) -> dict: return { - "id": "2f2652de-91df-4c63-8b53-bfced20b276b", - "name": "Single Record Test Source", - "notification_emails": "email@example.com", - "organization_id": organization_data["id"], - "frequency": "daily", - "url": f"{HARVEST_SOURCE_URL}/dcatus/dcatus_single_record.json", - "schema_type": "type1", - "source_type": "dcatus", + "id": "707aee7b-bf72-4e07-a5fc-68980765b214", + "status": "new", + "harvest_source_id": source_data_dcatus_bad_url["id"], } @pytest.fixture -def record_data_dcatus(job_data_dcatus: dict) -> dict: +def source_data_dcatus_invalid_records_job( + source_data_dcatus_invalid_records: dict, +) -> dict: return { - "id": "0779c855-df20-49c8-9108-66359d82b77c", - "identifier": "test_identifier", - "harvest_job_id": job_data_dcatus["id"], - "harvest_source_id": job_data_dcatus["harvest_source_id"], - "action": "create", - "status": "success", - "source_raw": "example data", + "id": "2b57046b-cfda-4a37-bf84-a4766a54a743", + "status": "new", + "harvest_source_id": source_data_dcatus_invalid_records["id"], } +## HARVEST JOB ERRORS @pytest.fixture -def record_error_data(record_data_dcatus) -> dict: - return { - "harvest_record_id": record_data_dcatus["id"], - "message": "record is invalid", - "type": "ValidationException", - } +def job_error_data(fixtures_json) -> dict: + return fixtures_json["job_error"][0] +## HARVEST RECORDS @pytest.fixture -def source_data_dcatus_bad_url(organization_data: dict) -> dict: - return { - "id": "b059e587-a4a1-422e-825a-830b4913dbfb", - "name": "Bad URL Source", - "notification_emails": "bad@example.com", - "organization_id": organization_data["id"], - "frequency": "daily", - "url": f"{HARVEST_SOURCE_URL}/dcatus/bad_url.json", - "schema_type": "type1", - "source_type": "dcatus", - } +def record_data_dcatus(fixtures_json) -> List[dict]: + return fixtures_json["record"] @pytest.fixture -def job_data_dcatus_bad_url(source_data_dcatus_bad_url: dict) -> dict: - return { - "id": "707aee7b-bf72-4e07-a5fc-68980765b214", - "status": "new", - "harvest_source_id": source_data_dcatus_bad_url["id"], - } +def record_data_dcatus_2(job_data_dcatus_2): + return [ + { + "id": "72bae4b2-336e-49df-bc4c-410dc73dc316", + "identifier": "test_identifier-2", + "harvest_job_id": job_data_dcatus_2["id"], + "harvest_source_id": job_data_dcatus_2["harvest_source_id"], + "action": "create", + "status": "error", + "source_raw": "example data 2", + } + ] +## HARVEST RECORD ERRORS @pytest.fixture -def source_data_dcatus_invalid_records(organization_data) -> dict: - return { - "id": "8e7f539b-0a83-43ad-950e-3976bb11a425", - "name": "Invalid Record Source", - "notification_emails": "invalid_record@example.com", - "organization_id": organization_data["id"], - "frequency": "daily", - "url": "http://localhost/dcatus/missing_title.json", - "schema_type": "type1", - "source_type": "dcatus", - } +def record_error_data(fixtures_json) -> List[dict]: + return fixtures_json["record_error"] @pytest.fixture -def source_data_dcatus_invalid_records_job( - source_data_dcatus_invalid_records: dict, -) -> dict: - return { - "id": "2b57046b-cfda-4a37-bf84-a4766a54a743", - "status": "new", - "harvest_source_id": source_data_dcatus_invalid_records["id"], - } +def record_error_data_2(record_data_dcatus_2) -> dict: + return [ + { + "harvest_record_id": record_data_dcatus_2[0]["id"], + "message": "record is invalid", + "type": "ValidationException", + } + ] @pytest.fixture @@ -314,6 +318,43 @@ def interface_no_jobs(interface, organization_data, source_data_dcatus): return interface +@pytest.fixture +def interface_with_fixture_json( + interface_no_jobs, + job_data_dcatus, + job_error_data, + record_data_dcatus, + record_error_data, +): + interface_no_jobs.add_harvest_job(job_data_dcatus) + interface_no_jobs.add_harvest_job_error(job_error_data) + for record in record_data_dcatus: + interface_no_jobs.add_harvest_record(record) + for error in record_error_data: + interface_no_jobs.add_harvest_record_error(error) + + return interface_no_jobs + + +@pytest.fixture +def interface_with_multiple_sources( + interface_with_fixture_json, + source_data_dcatus_2, + job_data_dcatus_2, + record_data_dcatus_2, + record_error_data_2, +): + interface_with_fixture_json.add_harvest_source(source_data_dcatus_2) + interface_with_fixture_json.add_harvest_job(job_data_dcatus_2) + for record in record_data_dcatus_2: + interface_with_fixture_json.add_harvest_record(record) + for error in record_error_data_2: + interface_with_fixture_json.add_harvest_record_error(error) + + return interface_with_fixture_json + + +## MISC @pytest.fixture def interface_with_multiple_jobs(interface_no_jobs, source_data_dcatus): statuses = ["new", "in_progress", "complete", "error"] diff --git a/tests/fixtures.json b/tests/fixtures.json new file mode 100644 index 0000000..2cd74fb --- /dev/null +++ b/tests/fixtures.json @@ -0,0 +1,179 @@ +{ + "organization": [ + { + "name": "Test Org", + "logo": "https://example.com/logo.png", + "id": "d925f84d-955b-4cb7-812f-dcfd6681a18f" + } + ], + "source": [ + { + "id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "name": "Test Source", + "notification_emails": "email@example.com", + "organization_id": "d925f84d-955b-4cb7-812f-dcfd6681a18f", + "frequency": "daily", + "url": "http://localhost:80/dcatus/dcatus.json", + "schema_type": "type1", + "source_type": "dcatus" + } + ], + "job": [ + { + "id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "status": "new", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b" + } + ], + "job_error": [ + { + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "message": "error reading records from harvest database", + "type": "ExtractInternalException" + } + ], + "record": [ + { + "id": "0779c855-df20-49c8-9108-66359d82b77c", + "identifier": "test_identifier-1", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "c218c965-3670-45c8-bfcd-f852d71ed917", + "identifier": "test_identifier-2", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "e1f603cc-8b6b-483f-beb4-86bda5462b79", + "identifier": "test_identifier-3", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "1c004473-0802-4f22-a16d-7a2d7559719e", + "identifier": "test_identifier-4", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "deb12fa0-d812-4d6e-98f4-d4f7d776c6b3", + "identifier": "test_identifier-5", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "27b5d5d6-808b-4a8c-ae4a-99f118e282dd", + "identifier": "test_identifier-6", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "c232a2ca-6344-4692-adc2-29f618a2eff3", + "identifier": "test_identifier-7", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "95021355-bad0-442b-98e9-475ecd849033", + "identifier": "test_identifier-8", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "09f073b3-00e3-4147-ba69-a5d0fd7ce027", + "identifier": "test_identifier-9", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "97492788-5d62-4feb-8641-6f6692aec026", + "identifier": "test_identifier-10", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + } + ], + "record_error": [ + { + "harvest_record_id": "0779c855-df20-49c8-9108-66359d82b77c", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "c218c965-3670-45c8-bfcd-f852d71ed917", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "e1f603cc-8b6b-483f-beb4-86bda5462b79", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "1c004473-0802-4f22-a16d-7a2d7559719e", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "deb12fa0-d812-4d6e-98f4-d4f7d776c6b3", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "27b5d5d6-808b-4a8c-ae4a-99f118e282dd", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "c232a2ca-6344-4692-adc2-29f618a2eff3", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "95021355-bad0-442b-98e9-475ecd849033", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "09f073b3-00e3-4147-ba69-a5d0fd7ce027", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "97492788-5d62-4feb-8641-6f6692aec026", + "message": "record is invalid", + "type": "ValidationException" + } + ] +} diff --git a/tests/integration/app/test_load_manager.py b/tests/integration/app/test_load_manager.py index 8252c31..91a2550 100644 --- a/tests/integration/app/test_load_manager.py +++ b/tests/integration/app/test_load_manager.py @@ -27,13 +27,18 @@ class TestLoadManager: @patch("harvester.lib.cf_handler.CloudFoundryClient") @patch("harvester.lib.cf_handler.TaskManager") def test_load_manager_invokes_tasks( - self, TMMock, CFCMock, interface_no_jobs, source_orm_dcatus, mock_good_cf_index + self, + TMMock, + CFCMock, + interface_no_jobs, + source_data_dcatus_orm, + mock_good_cf_index, ): intervals = [-1, -2] jobs = [ { "status": "new", - "harvest_source_id": source_orm_dcatus.id, + "harvest_source_id": source_data_dcatus_orm.id, "date_created": datetime.now() + timedelta(days=interval), } for interval in intervals diff --git a/tests/integration/app/test_login_required.py b/tests/integration/app/test_login_required.py index 60fe4e5..09af18e 100644 --- a/tests/integration/app/test_login_required.py +++ b/tests/integration/app/test_login_required.py @@ -56,9 +56,7 @@ def test_org_edit_buttons__logged_in( res = client.get(f"/organization/{organization_data['id']}") button_string_text = '
    ' org_edit_text = f'