diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..7ef4f64
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,22 @@
+# Generated from:
+# https://github.com/plone/meta/tree/master/config/default
+# See the inline comments on how to expand/tweak this configuration file
+[flake8]
+doctests = 1
+ignore =
+ # black takes care of line length
+ E501,
+ # black takes care of where to break lines
+ W503,
+ # black takes care of spaces within slicing (list[:])
+ E203,
+ # black takes care of spaces after commas
+ E231,
+
+##
+# Add extra configuration options in .meta.toml:
+# [flake8]
+# extra_lines = """
+# _your own configuration lines_
+# """
+##
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
new file mode 100644
index 0000000..a2d669b
--- /dev/null
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,25 @@
+name: Python package CI
+
+on:
+ - push
+ - pull_request
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ['3.10', '3.11', '3.12']
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install tox tox-gh-actions
+ - name: Test with tox
+ run: tox
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 79628c7..df70903 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,7 @@ output.xml
pip-selfcheck.json
report.html
.vscode/
+.tox
.python-version
reports/
# excludes
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..02a90f5
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,42 @@
+ci:
+ autofix_prs: false
+ autoupdate_schedule: monthly
+
+repos:
+- repo: https://github.com/asottile/pyupgrade
+ rev: v3.15.0
+ hooks:
+ - id: pyupgrade
+- repo: https://github.com/pycqa/isort
+ rev: 5.12.0
+ hooks:
+ - id: isort
+- repo: https://github.com/psf/black
+ rev: 23.9.1
+ hooks:
+ - id: black
+- repo: https://github.com/PyCQA/flake8
+ rev: 6.1.0
+ hooks:
+ - id: flake8
+- repo: https://github.com/pre-commit/mirrors-mypy
+ rev: v1.5.1
+ hooks:
+ - id: mypy
+ additional_dependencies:
+ - "types-requests"
+ - "pytest-stub"
+# - repo: https://github.com/codespell-project/codespell
+# rev: v2.2.5
+# hooks:
+# - id: codespell
+# additional_dependencies:
+# - tomli
+- repo: https://github.com/mgedmin/check-manifest
+ rev: "0.49"
+ hooks:
+ - id: check-manifest
+- repo: https://github.com/regebro/pyroma
+ rev: "4.2"
+ hooks:
+ - id: pyroma
diff --git a/CHANGES.rst b/CHANGES.rst
index 0910700..ed57bf3 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -15,6 +15,8 @@ Changelog
[jensens]
- Add docker-compose file to start OpensSearch to example directory and move `.env` to example too.
[jensens]
+- rename `ELASTIC_*` environemnt variables to have an consistent naming scheme, see README for details. [jensens]
+- Add tox, Github Actions, CI and CD. [jensens]
diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst
index aa9e86c..87cff98 100644
--- a/CONTRIBUTORS.rst
+++ b/CONTRIBUTORS.rst
@@ -1,5 +1,8 @@
Contributors
============
-- Jens W. Klein, jk@kleinundpartner.at
-- Katja Süss, Rohberg, @ksuess
+- Peter Holzer - Initiative, idea and testing.
+- Jens W. Klein, jk@kleinundpartner.at - Concept & code.
+- Katja Süss, Rohberg, @ksuess - Text analysis code and configuration.
+
+Initial implementation was made possible by `Evangelisch-reformierte Landeskirche des Kantons Zürich `_.
diff --git a/README.rst b/README.rst
index f7759de..a7a8624 100644
--- a/README.rst
+++ b/README.rst
@@ -41,27 +41,27 @@ Configuration is done via environment variables and JSON files.
Environment variables are:
-ELASTICSEARCH_INGEST_SERVER
+INGEST_SERVER
The URL of the ElasticSearch or OpenSearch server.
Default: localhost:9200
-ELASTICSEARCH_INGEST_USE_SSL
- Wether to use a secure connection or not.
+INGEST_USE_SSL
+ Whether to use a secure connection or not.
Default: 0
-OPENSEARCH
- Wether to use OpenSearch or ElasticSearch.
+INGEST_OPENSEARCH
+ Whether to use OpenSearch or ElasticSearch.
Default: 1
-ELASTICSEARCH_INGEST_LOGIN
+INGEST_LOGIN
Username for the ElasticSearch 8+ or OpenSearch server.
Default: admin
-ELASTICSEARCH_INGEST_PASSWORD
+INGEST_PASSWORD
Password for the ElasticSearch 8+ or OpenSearch server.
Default: admin
@@ -139,8 +139,7 @@ A docker-compose file ``docker-compose.yml`` to start an OpenSearch server is pr
Precondition:
- Docker and docker-compose are installed.
-- Max virtual memory map needs increase to run this: `sudo sysctl -w vm.max_map_count=262144`
- (not permanent, `see StackOverflow post `_).
+- Max virtual memory map needs increase to run this: `sudo sysctl -w vm.max_map_count=262144` (not permanent, `see StackOverflow post `_).
Enter the directory ``examples`` and start the server with ``docker-compose up``.
Now you have an OpenSearch server running on ``http://localhost:9200`` and an OpenSearch Dashboard running on ``http://localhost:5601`` (user/pass: admin/admin).
@@ -245,20 +244,6 @@ We appreciate any contribution and if a release is needed to be done on pypi, pl
We also offer commercial support if any training, coaching, integration or adaptions are needed.
--------------
-Contributions
--------------
-
-Initial implementation was made possible by `Evangelisch-reformierte Landeskirche des Kantons Zürich `_.
-
-Idea and testing by Peter Holzer
-
-Concept & code by Jens W. Klein
-
-Text analysis code and configuration by Katja Süss
-
-
-
----------------------------
Installation for development
----------------------------
@@ -270,14 +255,6 @@ Installation for development
- load environment configuration ``source examples/.env``.
-----
-Todo
-----
-
-- query status of a task
-- simple statistics about tasks-count: pending, done, errored
-- celery retry on failure, i.e. restart of ElasticSearch, Plone, ...
-
-------
License
-------
diff --git a/constraints.txt b/constraints.txt
deleted file mode 100644
index f145db9..0000000
--- a/constraints.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-amqp==2.6.1
-billiard==3.6.4.0
-CacheControl==0.12.6
-celery==4.4.7
-certifi==2021.5.30
-chardet==4.0.0
-elasticsearch==7.13.1
-idna==2.10
-kombu==4.6.11
-msgpack==1.0.2
-pytz==2021.1
-redis==3.5.3
-requests==2.25.1
-urllib3==1.26.5
-wcwidth==0.2.5
diff --git a/examples/.env b/examples/.env
index 769e757..3c5ded4 100644
--- a/examples/.env
+++ b/examples/.env
@@ -5,11 +5,11 @@
# Then `source .env` and start the ingest-service with: celery -A collective.elastic.ingest.celery.app worker -l debug
export CELERY_BROKER=redis://localhost:6379/0
-export ELASTICSEARCH_INGEST_SERVER=localhost:9200
-export ELASTICSEARCH_INGEST_USE_SSL=1
-export OPENSEARCH=1
-export ELASTICSEARCH_INGEST_LOGIN=admin
-export ELASTICSEARCH_INGEST_PASSWORD=admin
+export INGEST_SERVER=localhost:9200
+export INGEST_USE_SSL=1
+export INGEST_OPENSEARCH=1
+export INGEST_LOGIN=admin
+export INGEST_PASSWORD=admin
export PLONE_SERVICE=http://localhost:8080
export PLONE_PATH=Plone
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 0000000..f21d8ff
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,2 @@
+[mypy]
+mypy_path=./src
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..6d34897
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,90 @@
+[project]
+name = "collective.elastic.ingest"
+version = "2.0.0dev0"
+description = "Ingestion service queue runner between Plone RestAPI and ElasticSearch or OpenSearch."
+keywords = ["elasticsearch", "opensearch", "plone", "celery", "search", "indexer"]
+readme = "README.rst"
+
+authors = [
+ {name = "Jens Klein", email = "jk@kleinundpartner.at"},
+ {name = "Katja Süss"},
+ {name = "Peter Holzer"},
+]
+requires-python = ">=3.8"
+license = { text = "GPL 2.0" }
+classifiers = [
+ "Environment :: Web Environment",
+ "Framework :: Plone",
+ "Framework :: Plone :: Addon",
+ "Framework :: Plone :: 5.2",
+ "Framework :: Plone :: 6.0",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Operating System :: OS Independent",
+ "License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
+ "Development Status :: 5 - Production/Stable",
+]
+dependencies = [
+ "CacheControl",
+ "celery",
+ "requests",
+ "setuptools",
+]
+
+[project.urls]
+PyPI = "https://pypi.python.org/pypi/collective.elastic.ingest"
+Changelog = "https://github.com/collective/collective.elastic.ingest/blob/main/CHANGES.rst"
+Source = "https://github.com/collective/collective.elastic.ingest"
+Issues = "https://github.com/collective/collective.elastic.ingest/issues"
+
+[project.optional-dependencies]
+redis = ["celery[redis]"]
+rabbitmq = ["celery[librabbitmq]"]
+opensearch = ["opensearch-py"]
+elasticsearch = ["elasticsearch>=8.0"]
+test = [
+ "pytest",
+ "requests-mock",
+ "pdbpp",
+]
+
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.packages.find]
+where = ["src"]
+
+[tool.pytest.ini_options]
+minversion = "6.0"
+testpaths = [
+ "src",
+]
+
+[tool.isort]
+profile = "plone"
+
+[tool.black]
+include = "src"
+
+[tool.codespell]
+ignore-words-list = "discreet,"
+skip = './examples/*,./venv/*'
+
+[tool.check-manifest]
+ignore = [
+ ".editorconfig",
+ ".pre-commit-config.yaml",
+ "tox.ini",
+ "mypy.ini",
+ ".flake8",
+ "mx.ini",
+
+]
+
+[zest.releaser]
+create-wheel = true
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index e4ff5a0..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,11 +0,0 @@
-[mypy]
-ignore_missing_imports = True
-
-[bdist_wheel]
-universal=1
-
-[isort]
-profile = plone
-
-[zest.releaser]
-create-wheel = yes
\ No newline at end of file
diff --git a/setup.py b/setup.py
deleted file mode 100644
index dadfb20..0000000
--- a/setup.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# -*- coding: utf-8 -*-
-from setuptools import find_packages
-from setuptools import setup
-
-
-long_description = "\n\n".join(
- [
- open("README.rst").read(),
- open("CHANGES.rst").read(),
- open("CONTRIBUTORS.rst").read(),
- ]
-)
-
-
-setup(
- name="collective.elastic.ingest",
- version="1.4.1.dev0",
- project_urls={
- "PyPI": "https://pypi.python.org/pypi/collective.elastic.ingest",
- "Source": "https://github.com/collective/collective.elastic.ingest",
- "Tracker": "https://github.com/collective/collective.elastic.ingest/issues",
- },
- description="Addon for ElasticSearch integration with Plone",
- long_description=long_description,
- # Get more from https://pypi.org/classifiers/
- classifiers=[
- "Environment :: Web Environment",
- "Framework :: Plone",
- "Framework :: Plone :: Addon",
- "Framework :: Plone :: 5.2",
- "Framework :: Plone :: 6.0",
- "Programming Language :: Python",
- "Programming Language :: Python :: 3.7",
- "Programming Language :: Python :: 3.8",
- "Programming Language :: Python :: 3.9",
- "Programming Language :: Python :: 3.10",
- "Programming Language :: Python :: 3.11",
- "Operating System :: OS Independent",
- "License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
- ],
- keywords="Python Plone",
- packages=find_packages("src"),
- namespace_packages=["collective", "collective.elastic"],
- package_dir={"": "src"},
- include_package_data=True,
- zip_safe=False,
- python_requires=">=3.7",
- install_requires=[
- "CacheControl",
- "celery",
- "requests",
- "setuptools",
- ],
- extras_require={
- "redis": ["celery[redis]"],
- "rabbitmq": ["celery[librabbitmq]"],
- "opensearch": ["opensearch-py"],
- "elasticsearch7": ["elasticsearch~=7.0"],
- "elasticsearch8": ["elasticsearch~=8.0"],
- },
-)
diff --git a/src/collective/elastic/ingest/__init__.py b/src/collective/elastic/ingest/__init__.py
index 3557620..c8930d4 100644
--- a/src/collective/elastic/ingest/__init__.py
+++ b/src/collective/elastic/ingest/__init__.py
@@ -5,8 +5,11 @@
OPENSEARCH = os.environ.get("OPENSEARCH") == "1"
-version_elasticsearch = version("elasticsearch")
-ELASTICSEARCH_7 = int(version_elasticsearch[0]) <= 7
-
-version_opensearchpy = version("opensearch-py")
-OPENSEARCH_2 = int(version_opensearchpy[0]) <= 2
+if OPENSEARCH:
+ version_opensearchpy = version("opensearch-py")
+ OPENSEARCH_2 = int(version_opensearchpy[0]) <= 2
+ ELASTICSEARCH_7 = False
+else:
+ version_elasticsearch = version("elasticsearch")
+ ELASTICSEARCH_7 = int(version_elasticsearch[0]) <= 7
+ OPENSEARCH_2 = False
diff --git a/src/collective/elastic/ingest/analysis/analysis.py b/src/collective/elastic/ingest/analysis.py
similarity index 96%
rename from src/collective/elastic/ingest/analysis/analysis.py
rename to src/collective/elastic/ingest/analysis.py
index e750015..9473222 100644
--- a/src/collective/elastic/ingest/analysis/analysis.py
+++ b/src/collective/elastic/ingest/analysis.py
@@ -1,5 +1,5 @@
-from ..elastic import get_ingest_client
-from ..logging import logger
+from .elastic import get_ingest_client
+from .logging import logger
from collective.elastic.ingest import ELASTICSEARCH_7
from collective.elastic.ingest import OPENSEARCH
from collective.elastic.ingest import OPENSEARCH_2
diff --git a/src/collective/elastic/ingest/analysis/__init__.py b/src/collective/elastic/ingest/analysis/__init__.py
deleted file mode 100644
index 4665f5c..0000000
--- a/src/collective/elastic/ingest/analysis/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .analysis import update_analysis
diff --git a/src/collective/elastic/ingest/elastic.py b/src/collective/elastic/ingest/elastic.py
index 312efda..07a84cd 100644
--- a/src/collective/elastic/ingest/elastic.py
+++ b/src/collective/elastic/ingest/elastic.py
@@ -1,37 +1,36 @@
-# -*- coding: utf-8 -*-
from . import ELASTICSEARCH_7
from . import OPENSEARCH
-from . import version_elasticsearch
from .logging import logger
import os
+
if OPENSEARCH:
from opensearchpy import OpenSearch
else:
from elasticsearch import Elasticsearch
-
def get_ingest_client(elasticsearch_server_baseurl=None):
"""return elasticsearch client for.ingest"""
raw_addr = elasticsearch_server_baseurl or os.environ.get(
- "ELASTICSEARCH_INGEST_SERVER", "http://localhost:9200"
+ "INGEST_SERVER", "http://localhost:9200"
)
- use_ssl = os.environ.get("ELASTICSEARCH_INGEST_USE_SSL", "0")
+ use_ssl = os.environ.get("INGEST_USE_SSL", "0")
use_ssl = bool(int(use_ssl))
addresses = [x for x in raw_addr.split(",") if x.strip()]
if not addresses:
addresses.append("127.0.0.1:9200")
+
if OPENSEARCH:
hosts = []
for address in addresses:
host, port = address.rsplit(":", 1)
hosts.append({"host": host, "port": port})
auth = (
- os.environ.get("ELASTICSEARCH_INGEST_LOGIN", "admin"),
- os.environ.get("ELASTICSEARCH_INGEST_PASSWORD", "admin"),
+ os.environ.get("INGEST_LOGIN", "admin"),
+ os.environ.get("INGEST_PASSWORD", "admin"),
)
client = OpenSearch(
hosts=hosts,
@@ -43,6 +42,8 @@ def get_ingest_client(elasticsearch_server_baseurl=None):
logger.info(f"OpenSearch client info: {info}")
return client
elif ELASTICSEARCH_7:
+ from . import version_elasticsearch
+
logger.info(f"ElasticSearch version {version_elasticsearch} installed")
return Elasticsearch(
addresses,
diff --git a/src/collective/elastic/ingest/ingest/section.py b/src/collective/elastic/ingest/ingest/section.py
index cebb812..3511a08 100644
--- a/src/collective/elastic/ingest/ingest/section.py
+++ b/src/collective/elastic/ingest/ingest/section.py
@@ -1,5 +1,3 @@
-from ..logging import logger
-
import os
diff --git a/src/collective/elastic/ingest/ingest/vocabularyfields.py b/src/collective/elastic/ingest/ingest/vocabularyfields.py
index 7d2b671..4d596bc 100644
--- a/src/collective/elastic/ingest/ingest/vocabularyfields.py
+++ b/src/collective/elastic/ingest/ingest/vocabularyfields.py
@@ -1,17 +1,14 @@
-from ..logging import logger
-
-
def stripVocabularyTermTitles(content):
"""If field with vocabulary: Convert field value to token or list of tokens."""
for fieldname in content.keys():
- if type(content[fieldname]) == dict:
+ if type(content[fieldname]) is dict:
if sorted(list(content[fieldname].keys())) == ["title", "token"]:
content[fieldname] = content[fieldname]["token"]
- if type(content[fieldname]) == list:
+ if type(content[fieldname]) is list:
if (
len(content[fieldname]) > 0
- and type(content[fieldname][0]) == dict
+ and type(content[fieldname][0]) is dict
and sorted(list(content[fieldname][0].keys())) == ["title", "token"]
):
content[fieldname] = [el["token"] for el in content[fieldname]]
diff --git a/src/collective/elastic/ingest/logging.py b/src/collective/elastic/ingest/logging.py
index 4ce52b0..bb800c2 100644
--- a/src/collective/elastic/ingest/logging.py
+++ b/src/collective/elastic/ingest/logging.py
@@ -1,5 +1,5 @@
try:
- import collective.elastic.plone # noqa: W291
+ import collective.elastic.plone # noqa: W291,F401
import logging
logger = logging.getLogger("collective.elastic.ingest")
diff --git a/src/collective/elastic/ingest/mapping.py b/src/collective/elastic/ingest/mapping.py
index cd57767..78c7f99 100644
--- a/src/collective/elastic/ingest/mapping.py
+++ b/src/collective/elastic/ingest/mapping.py
@@ -1,3 +1,4 @@
+from . import ELASTICSEARCH_7
from .elastic import get_ingest_client
from .logging import logger
from copy import deepcopy
@@ -6,6 +7,7 @@
import operator
import os
import pprint
+import typing
pp = pprint.PrettyPrinter(indent=4)
@@ -16,7 +18,7 @@
STATE = {"initial": True}
-DETECTOR_METHODS = {}
+DETECTOR_METHODS: dict[str, typing.Callable] = {}
_mappings_file = os.environ.get(
"MAPPINGS_FILE", os.path.join(os.path.dirname(__file__), "mappings.json")
@@ -65,7 +67,7 @@ def map_field(field, properties, fqfieldname, seen):
seen.add(field["name"])
logger.debug(f"Map field name {field['name']} to definition {definition}")
if "type" in definition:
- # simple defintion
+ # simple definition
properties[field["name"]] = definition
return
# complex definition
diff --git a/src/collective/elastic/ingest/preprocessing.py b/src/collective/elastic/ingest/preprocessing.py
index 14b3bb2..876f90a 100644
--- a/src/collective/elastic/ingest/preprocessing.py
+++ b/src/collective/elastic/ingest/preprocessing.py
@@ -12,7 +12,7 @@
with open(_preprocessings_file) as fp:
PREPROCESSOR_CONFIGS = json.load(fp)
-### MATCHERS
+# MATCHERS
MATCHING_FUNCTIONS = {}
@@ -36,7 +36,7 @@ def match_content_exists(content, full_schema, config):
MATCHING_FUNCTIONS["content_exists"] = match_content_exists
-### ACTIONS
+# ACTIONS
ACTION_FUNCTIONS = {}
@@ -44,7 +44,7 @@ def match_content_exists(content, full_schema, config):
def action_additional_schema(content, full_schema, config):
"""add additional fields to a full_schema as fetched from Plone"""
if full_schema is None:
- # case: in subsequent calls theres no need to modify schema b/c of caching
+ # case: in subsequent calls there is no need to modify schema b/c of caching
return
if "additional" not in full_schema:
full_schema["additional"] = {}
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..56e00b1
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,53 @@
+# Generated from:
+# https://github.com/plone/meta/tree/master/config/default
+# See the inline comments on how to expand/tweak this configuration file
+[tox]
+# We need 4.4.0 for constrain_package_deps.
+min_version = 4.4.0
+envlist =
+ py38
+ py39
+ py310
+ py311
+ py312
+ test
+ lint
+
+[gh-actions]
+python =
+ 3.8: py38
+ 3.9: py39
+ 3.10: py310
+ 3.11: py311
+ 3.12: py312, lint
+
+[testenv]
+use_develop = true
+skip_install = false
+constrain_package_deps = true
+
+commands =
+ pytest {posargs}
+extras =
+ test
+ elasticsearch
+ opensearch-py
+
+[testenv:format]
+description = automatically reformats code
+skip_install = true
+deps =
+ pre-commit
+commands =
+ pre-commit run -a pyupgrade
+ pre-commit run -a isort
+ pre-commit run -a black
+
+[testenv:lint]
+description = run linters that will help improve the code style
+skip_install = true
+deps =
+ pre-commit
+commands =
+ pre-commit run -a
+