From b03f0d572efa9bc88788a95560a89f1477f44309 Mon Sep 17 00:00:00 2001 From: MarkCalvert <37602611+MarkCalvert@users.noreply.github.com> Date: Thu, 1 Feb 2024 09:46:23 +1300 Subject: [PATCH] Feature/6374 qa extension upgrade (#2) * Installed QA extension Enabled plugins archiver & report Added archiver config values Initialise db for archiver and report Added bulk & priority background jobs for archiver Added cron job for report generation Added nginx config to serve archiver cached resources Added ckan_storage volume to nginx to access cached resources * Install qsv dependency for extension ckanext-qa * Simplified installation of qsv * Fixed issues with env variable CKAN__PLUGINS using the value from base image * Added qa dependency file package Updated qa extension to develop branch --- .gitignore | 1 + .vscode/launch.json | 7 ++-- ckan/Dockerfile | 2 +- ckan/Dockerfile.dev | 3 +- ckan/config/dbca.ini | 10 ++++- ckan/docker-entrypoint.d/02_setup_dbca.sh | 29 +++++++------ ckan/setup/dbca_ckan_cron_jobs | 5 ++- ckan/setup/dbca_requirements.sh | 30 +++++++++---- ckan/setup/dbca_start_ckan.sh.override | 4 +- .../dbca_start_ckan_development.sh.override | 10 ++--- ckan/supervisor/ckan_worker_bulk.conf | 42 +++++++++++++++++++ ckan/supervisor/ckan_worker_default.conf | 2 +- ckan/supervisor/ckan_worker_priority.conf | 42 +++++++++++++++++++ docker-compose.yml | 2 + nginx/Dockerfile | 2 +- nginx/setup/default.conf | 4 ++ src/dbca_install_extensions.sh | 11 ++--- 17 files changed, 157 insertions(+), 49 deletions(-) create mode 100644 ckan/supervisor/ckan_worker_bulk.conf create mode 100644 ckan/supervisor/ckan_worker_priority.conf diff --git a/.gitignore b/.gitignore index a3d88d50..b21eec73 100755 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ _solr/schema.xml _src/* local/* .env +dbca/ \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index 9d24adba..faac609b 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -32,8 +32,7 @@ "--config", "/srv/app/config/dbca.ini", "jobs", - "worker", - "priority" + "worker" ], "justMyCode": true }, @@ -44,9 +43,9 @@ "module": "pdb", "args": [ "-c continue", - "/srv/app/config/dbca.ini", + "/usr/bin/ckan", "--config", - "/srv/app/dbca.ini", + "/srv/app/config/dbca.ini", "" ], "justMyCode": true diff --git a/ckan/Dockerfile b/ckan/Dockerfile index 937c3f2a..1b0176da 100644 --- a/ckan/Dockerfile +++ b/ckan/Dockerfile @@ -32,4 +32,4 @@ RUN chmod +x ${APP_DIR}/start_ckan.sh COPY config/*.ini ${APP_DIR}/config/ # Override default CKAN config file to use dbca.ini -ENV CKAN_INI=${APP_DIR}/config/dbca.ini \ No newline at end of file +ENV CKAN_INI=${APP_DIR}/config/dbca.ini diff --git a/ckan/Dockerfile.dev b/ckan/Dockerfile.dev index bb877c04..0fb294fa 100644 --- a/ckan/Dockerfile.dev +++ b/ckan/Dockerfile.dev @@ -31,7 +31,6 @@ FROM ckan/ckan-dev:2.10.3 #RUN pip3 install -e git+https://github.com/ckan/ckanext-dcat.git@v0.0.6#egg=ckanext-dcat && \ # pip3 install -r https://raw.githubusercontent.com/ckan/ckanext-dcat/v0.0.6/requirements.txt - # Install any extensions needed by your CKAN instance COPY setup/dbca_requirements.sh ${APP_DIR} RUN pip3 install pip --upgrade && \ @@ -66,4 +65,4 @@ RUN chmod +x ${APP_DIR}/start_ckan_development.sh COPY config/*.ini ${APP_DIR}/config/ # Override default CKAN config file to use dbca.ini -ENV CKAN_INI=${APP_DIR}/config/dbca.ini \ No newline at end of file +ENV CKAN_INI=${APP_DIR}/config/dbca.ini diff --git a/ckan/config/dbca.ini b/ckan/config/dbca.ini index a800c93c..b6e6174f 100644 --- a/ckan/config/dbca.ini +++ b/ckan/config/dbca.ini @@ -10,7 +10,7 @@ use = config:/srv/app/ckan.ini ckan.devserver.watch_patterns = /srv/app/ckan.ini ## Plugins Settings ############################################################ -ckan.plugins = dbca image_view text_view datatables_view pdf_view datastore xloader pages showcase hierarchy_display hierarchy_form hierarchy_group_form dcat scheming_datasets spatial_metadata spatial_query doi resource_proxy geo_view shp_view envvars +ckan.plugins = dbca image_view text_view datatables_view pdf_view datastore xloader pages showcase hierarchy_display hierarchy_form hierarchy_group_form dcat scheming_datasets spatial_metadata spatial_query doi qa archiver report resource_proxy geo_view shp_view envvars ## Resource Views Settings ##################################################### ckan.views.default_views = text_view datatables_view pdf_view geo_view shp_view @@ -44,6 +44,14 @@ ckanext.showcase.editor = ckeditor ckanext.pages.allow_html = True ckanext.pages.editor = ckeditor +# ckanext-archiver +ckanext-archiver.archive_dir=/var/lib/ckan/archiver +ckanext-archiver.cache_url_root=/resource_cache/ + +# ckanext-qa +ckanext.qa.qsv_bin=/usr/local/bin/qsv + + ## Logging configuration [loggers] keys = root, ckan, ckanext, werkzeug diff --git a/ckan/docker-entrypoint.d/02_setup_dbca.sh b/ckan/docker-entrypoint.d/02_setup_dbca.sh index 7176537b..f4741616 100644 --- a/ckan/docker-entrypoint.d/02_setup_dbca.sh +++ b/ckan/docker-entrypoint.d/02_setup_dbca.sh @@ -1,7 +1,5 @@ #!/bin/bash -# Get the ckan plugins values from the DBCA CKAN config file -CKAN__PLUGINS=$(grep '^ckan\.plugins' $APP_DIR/config/dbca.ini | cut -d'=' -f2) echo "CKAN__PLUGINS: $CKAN__PLUGINS" if [[ $CKAN__PLUGINS == *"xloader"* ]]; then @@ -10,22 +8,19 @@ if [[ $CKAN__PLUGINS == *"xloader"* ]]; then echo "Setting a temporary value for ckanext.xloader.api_token" ckan config-tool $CKAN_INI "ckanext.xloader.api_token=$(ckan -c $CKAN_INI user token add $CKAN_SYSADMIN_NAME xloader | tail -n 1 | tr -d '\t')" fi +CKAN_INI=$APP_DIR/config/dbca.ini -# Use the DBCA CKAN config file for the CKAN config file -CKAN_INI=$APP_DIR/config/dbca.ini - -## Examples of how to initialise DB for the extensions -# if [[ $CKAN__PLUGINS == *"archiver"* ]]; then -# ckan -c $CKAN_INI archiver init -# fi +if [[ $CKAN__PLUGINS == *"archiver"* ]]; then + ckan -c $CKAN_INI archiver init +fi -# if [[ $CKAN__PLUGINS == *"report"* ]]; then -# ckan -c $CKAN_INI report initdb -# fi +if [[ $CKAN__PLUGINS == *"report"* ]]; then + ckan -c $CKAN_INI report initdb +fi -# if [[ $CKAN__PLUGINS == *"harvest"* ]]; then -# ckan -c $CKAN_INI db upgrade -p harvest -# fi +if [[ $CKAN__PLUGINS == *"qa"* ]]; then + ckan -c $CKAN_INI qa init +fi if [[ $CKAN__PLUGINS == *"pages"* ]]; then ckan -c $CKAN_INI pages initdb @@ -34,3 +29,7 @@ fi if [[ $CKAN__PLUGINS == *"doi"* ]]; then ckan -c $CKAN_INI doi initdb fi + +# if [[ $CKAN__PLUGINS == *"harvest"* ]]; then +# ckan -c $CKAN_INI db upgrade -p harvest +# fi \ No newline at end of file diff --git a/ckan/setup/dbca_ckan_cron_jobs b/ckan/setup/dbca_ckan_cron_jobs index 442bf281..5c9442ff 100644 --- a/ckan/setup/dbca_ckan_cron_jobs +++ b/ckan/setup/dbca_ckan_cron_jobs @@ -1,4 +1,7 @@ # Crontab for CKAN cron jobs # Example cron job runs the harvester run command every 15 mins -#*/15 * * * * /usr/bin/ckan -c /srv/app/config/dbca/ckan.ini harvester run >> $APP_DIR/logs/ckan-cron-jobs.log 2>&1 +#*/15 * * * * /usr/bin/ckan -c /srv/app/ckan.ini harvester run >> $APP_DIR/logs/ckan-cron-jobs.log 2>&1 +# Midnight task to schedule embargo datasets to public visibility 0 0 * * * /usr/bin/ckan -c /srv/app/config/dbca/ckan.ini dbca scheduled_datasets >> $APP_DIR/logs/ckan-cron-jobs.log 2>&1 +# Midnight report generation for archiver broken links +0 0 * * * /usr/bin/ckan -c /srv/app/ckan.ini report generate >> $APP_DIR/logs/ckan-cron-jobs.log 2>&1 \ No newline at end of file diff --git a/ckan/setup/dbca_requirements.sh b/ckan/setup/dbca_requirements.sh index 2ffb4799..eb0d7e81 100644 --- a/ckan/setup/dbca_requirements.sh +++ b/ckan/setup/dbca_requirements.sh @@ -1,8 +1,9 @@ #!/bin/sh -## Must Have ## +## CKAN Core extensions ## + # Archiver -pip3 install -e 'git+https://github.com/ckan/ckanext-archiver.git@master#egg=ckanext-archiver' +pip3 install -e git+https://github.com/ckan/ckanext-archiver.git@master#egg=ckanext-archiver pip3 install -r ${SRC_DIR}/ckanext-archiver/requirements.txt # DCAT @@ -10,7 +11,7 @@ pip3 install -e git+https://github.com/ckan/ckanext-dcat.git@v1.5.1#egg=ckanext- pip3 install -r ${SRC_DIR}/ckanext-dcat/requirements.txt # Harvester -pip3 install -e 'git+https://github.com/ckan/ckanext-harvest.git@v1.5.6#egg=ckanext-harvest' +pip3 install -e git+https://github.com/ckan/ckanext-harvest.git@v1.5.6#egg=ckanext-harvest pip3 install -r ${SRC_DIR}/ckanext-harvest/requirements.txt # Hierarchy @@ -32,7 +33,7 @@ pip3 install -e git+https://github.com/ckan/ckanext-showcase.git@v1.6.1#egg=ckan pip3 install -r ${SRC_DIR}/ckanext-showcase/requirements.txt # Scheming -pip3 install -e 'git+https://github.com/ckan/ckanext-scheming.git@release-3.0.0#egg=ckanext-scheming' +pip3 install -e git+https://github.com/ckan/ckanext-scheming.git@release-3.0.0#egg=ckanext-scheming # Spatial # dependencies @@ -46,15 +47,28 @@ pip3 install -e git+https://github.com/ckan/ckanext-spatial.git@v2.1.1#egg=ckane pip3 install -r ${SRC_DIR}/ckanext-spatial/requirements.txt # XLoader -pip3 install -e 'git+https://github.com/ckan/ckanext-xloader.git@1.0.1#egg=ckanext-xloader' +pip3 install -e git+https://github.com/ckan/ckanext-xloader.git@1.0.1#egg=ckanext-xloader pip3 install -r ${SRC_DIR}/ckanext-xloader/requirements.txt # Geoview -pip3 install -e 'git+https://github.com/ckan/ckanext-geoview.git@v0.1.0#egg=ckanext-geoview' +pip3 install -e git+https://github.com/ckan/ckanext-geoview.git@v0.1.0#egg=ckanext-geoview + -# 3rd Party # +## 3rd Party ## # DOI pip3 install -e git+https://github.com/NaturalHistoryMuseum/ckanext-doi@v3.1.10#egg=ckanext-doi -# DBCA Project + +## DBCA Project ## + +# DBCA pip3 install -e git+https://github.com/dbca-wa/ckanext-dbca.git@develop#egg=ckanext-dbca + +# QA +# Install qsv dependency for extension ckanext-qa +wget -O /tmp/qsv.zip https://github.com/jqnatividad/qsv/releases/download/0.110.0/qsv-0.110.0-x86_64-unknown-linux-musl.zip +unzip /tmp/qsv.zip -d /usr/local/bin +rm /tmp/qsv.zip +apk add file +pip3 install -e git+https://github.com/dbca-wa/ckanext-qa.git@develop#egg=ckanext-qa +pip3 install -r ${SRC_DIR}/ckanext-qa/requirements.txt diff --git a/ckan/setup/dbca_start_ckan.sh.override b/ckan/setup/dbca_start_ckan.sh.override index 174cd06d..f4b19028 100755 --- a/ckan/setup/dbca_start_ckan.sh.override +++ b/ckan/setup/dbca_start_ckan.sh.override @@ -1,5 +1,7 @@ #!/bin/bash +# Set the CKAN plugins variable to the plugins value from the dbca.ini config file +export CKAN__PLUGINS=$(grep '^ckan\.plugins' $APP_DIR/config/dbca.ini | cut -d'=' -f2) # Update the default ckan config file export CKAN_INI=$APP_DIR/ckan.ini @@ -32,8 +34,6 @@ fi # Revert back touse DBCA config file export CKAN_INI=$APP_DIR/config/dbca.ini -# Unset CKAN__PLUGINS to stop the extension envvars overriding the plugins value set in the dbca config file -unset CKAN__PLUGINS if [ $? -eq 0 ] then diff --git a/ckan/setup/dbca_start_ckan_development.sh.override b/ckan/setup/dbca_start_ckan_development.sh.override index 75cce24e..d5fdafeb 100644 --- a/ckan/setup/dbca_start_ckan_development.sh.override +++ b/ckan/setup/dbca_start_ckan_development.sh.override @@ -1,5 +1,8 @@ #!/bin/sh +# Set the CKAN plugins variable to the plugins value from the dbca.ini config file +export CKAN__PLUGINS=$(grep '^ckan\.plugins' $APP_DIR/config/dbca.ini | cut -d'=' -f2) + # Only run these start up scripts the first time the container is created if [ ! -f /tmp/container_ready ]; then # Install any local extensions in the src_extensions volume @@ -60,10 +63,6 @@ if [ ! -f /tmp/container_ready ]; then ckan config-tool $CKAN_INI "api_token.jwt.decode.secret=${JWT_SECRET}" fi - # Update the plugins setting in the ini file with the values defined in the env var - echo "Loading the following plugins: $CKAN__PLUGINS" - ckan config-tool $CKAN_INI "ckan.plugins = $CKAN__PLUGINS" - # Update test-core.ini DB, SOLR & Redis settings echo "Loading test settings into test-core.ini" ckan config-tool $SRC_DIR/ckan/test-core.ini \ @@ -91,8 +90,7 @@ if [ ! -f /tmp/container_ready ]; then # Revert back touse DBCA config file export CKAN_INI=$APP_DIR/config/dbca.ini - # Unset CKAN__PLUGINS to stop the extension envvars overriding the plugins value set in the dbca config file - unset CKAN__PLUGINS + # Set the container as ready so the startup scripts are not run again touch /tmp/container_ready fi diff --git a/ckan/supervisor/ckan_worker_bulk.conf b/ckan/supervisor/ckan_worker_bulk.conf new file mode 100644 index 00000000..1aaa2ed4 --- /dev/null +++ b/ckan/supervisor/ckan_worker_bulk.conf @@ -0,0 +1,42 @@ +; ======================================================= +; Supervisor configuration for CKAN background job worker +; ======================================================= + +; 1. Copy this file to /etc/supervisor/conf.d +; 2. Make sure the paths below match your setup + + +[program:ckan-worker-bulk] + +; Use the full paths to the virtualenv and your configuration file here. +command=/bin/bash -c "export CKAN__PLUGINS=$(grep '^ckan\.plugins' $APP_DIR/config/dbca.ini | cut -d'=' -f2); exec /usr/bin/ckan -c /srv/app/config/dbca.ini jobs worker bulk" + + +; User the worker runs as. +user=ckan + + +; Start just a single worker. Increase this number if you have many or +; particularly long running background jobs. +numprocs=1 +process_name=%(program_name)s-%(process_num)02d + + +; Log files. +stdout_logfile=/srv/app/logs/ckan-worker.stdout.log +stderr_logfile=/srv/app/logs/ckan-worker.stderr.log + + +; Make sure that the worker is started on system start and automatically +; restarted if it crashes unexpectedly. +autostart=true +autorestart=true + + +; Number of seconds the process has to run before it is considered to have +; started successfully. +startsecs=10 + +; Need to wait for currently executing tasks to finish at shutdown. +; Increase this if you have very long running tasks. +stopwaitsecs = 600 diff --git a/ckan/supervisor/ckan_worker_default.conf b/ckan/supervisor/ckan_worker_default.conf index 6d8d7303..30378a35 100644 --- a/ckan/supervisor/ckan_worker_default.conf +++ b/ckan/supervisor/ckan_worker_default.conf @@ -9,7 +9,7 @@ [program:ckan-worker-default] ; Use the full paths to the virtualenv and your configuration file here. -command=/usr/bin/ckan -c /srv/app/config/dbca.ini jobs worker +command=/bin/bash -c "export CKAN__PLUGINS=$(grep '^ckan\.plugins' $APP_DIR/config/dbca.ini | cut -d'=' -f2); exec /usr/bin/ckan -c /srv/app/config/dbca.ini jobs worker" ; User the worker runs as. diff --git a/ckan/supervisor/ckan_worker_priority.conf b/ckan/supervisor/ckan_worker_priority.conf new file mode 100644 index 00000000..bd348274 --- /dev/null +++ b/ckan/supervisor/ckan_worker_priority.conf @@ -0,0 +1,42 @@ +; ======================================================= +; Supervisor configuration for CKAN background job worker +; ======================================================= + +; 1. Copy this file to /etc/supervisor/conf.d +; 2. Make sure the paths below match your setup + + +[program:ckan-worker-priority] + +; Use the full paths to the virtualenv and your configuration file here. +command=/bin/bash -c "export CKAN__PLUGINS=$(grep '^ckan\.plugins' $APP_DIR/config/dbca.ini | cut -d'=' -f2); exec /usr/bin/ckan -c /srv/app/config/dbca.ini jobs worker priority" + + +; User the worker runs as. +user=ckan + + +; Start just a single worker. Increase this number if you have many or +; particularly long running background jobs. +numprocs=1 +process_name=%(program_name)s-%(process_num)02d + + +; Log files. +stdout_logfile=/srv/app/logs/ckan-worker.stdout.log +stderr_logfile=/srv/app/logs/ckan-worker.stderr.log + + +; Make sure that the worker is started on system start and automatically +; restarted if it crashes unexpectedly. +autostart=true +autorestart=true + + +; Number of seconds the process has to run before it is considered to have +; started successfully. +startsecs=10 + +; Need to wait for currently executing tasks to finish at shutdown. +; Increase this if you have very long running tasks. +stopwaitsecs = 600 diff --git a/docker-compose.yml b/docker-compose.yml index 0eaad072..29d12c04 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,6 +23,8 @@ services: condition: service_healthy ports: - "0.0.0.0:${NGINX_SSLPORT_HOST}:${NGINX_SSLPORT}" + volumes: + - ckan_storage:/var/lib/ckan ckan: container_name: ${CKAN_CONTAINER_NAME} diff --git a/nginx/Dockerfile b/nginx/Dockerfile index 2e781263..8e6bbce5 100644 --- a/nginx/Dockerfile +++ b/nginx/Dockerfile @@ -21,4 +21,4 @@ ENTRYPOINT \ -keyout ${NGINX_DIR}/certs/ckan-local.key \ -out ${NGINX_DIR}/certs/ckan-local.crt \ -days 365 && \ - /docker-entrypoint.sh nginx -g 'daemon off;' \ No newline at end of file + /docker-entrypoint.sh nginx -g 'daemon off;' diff --git a/nginx/setup/default.conf b/nginx/setup/default.conf index a628619f..c93e5ccd 100644 --- a/nginx/setup/default.conf +++ b/nginx/setup/default.conf @@ -32,6 +32,10 @@ server { proxy_cache_key $host$scheme$proxy_host$request_uri; } + # archived files from ckanext-archiver + location /resource_cache/ { + alias /var/lib/ckan/archiver/; + } error_page 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 421 422 423 424 425 426 428 429 431 451 500 501 502 503 504 505 506 507 508 510 511 /error.html; # redirect server error pages to the static page /error.html diff --git a/src/dbca_install_extensions.sh b/src/dbca_install_extensions.sh index 1ee8b573..a4db0509 100644 --- a/src/dbca_install_extensions.sh +++ b/src/dbca_install_extensions.sh @@ -5,16 +5,13 @@ # Uncomment the following lines to install these extension you are working on to upgrade to CKAN 2.10 cd src/ - -## Must Have ## +# DBCA git clone https://github.com/dbca-wa/ckanext-dbca.git - # QA -# git clone https://github.com/dbca-wa/ckanext-qa.git -# These extensions will be installed by default, but we don't want them -# sed -i".$(date +%Y%m%d_%H%M%S).bak" -e '/ckanext-report/d' -e '/ckanext-archiver/d' ckanext-qa/dev-requirements.txt +git clone https://github.com/dbca-wa/ckanext-qa.git +#These extensions will be installed by default, but we don't want them +sed -i".$(date +%Y%m%d_%H%M%S).bak" -e '/ckanext-report/d' -e '/ckanext-archiver/d' ckanext-qa/dev-requirements.txt # Office Docs # git clone https://github.com/dbca-wa/ckanext-officedocs - echo "Ready to build project: ahoy build"