From 26d703b0a20dec4569a9011b0a4b24714a85bcc2 Mon Sep 17 00:00:00 2001 From: "Jens W. Klein" Date: Mon, 13 Nov 2023 19:30:14 +0100 Subject: [PATCH] add how to use the ingest-attachment plugin --- README.rst | 19 ++++++++++++++++--- examples/Dockerfile | 2 ++ examples/docker-compose.yml | 4 ++-- 3 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 examples/Dockerfile diff --git a/README.rst b/README.rst index f173781..7e3558f 100644 --- a/README.rst +++ b/README.rst @@ -135,21 +135,34 @@ Example configuration files are provided in the ``/examples`` directory. OpenSearch with Docker Compose ------------------------------ -A docker-compose file ``docker-compose.yml`` to start an OpenSearch server is provided. +A docker-compose file ``docker-compose.yml`` and a ``Dockerfile`` to start an OpenSearch server is provided. Precondition: + - Docker and docker-compose are installed. - Max virtual memory map needs increase to run this: `sudo sysctl -w vm.max_map_count=262144` (not permanent, `see StackOverflow post `_). -Enter the directory ``examples`` and start the server with ``docker-compose up``. +Steps to start the example OpensSearch Server with ``ingest-attachment`` plugin: + +- enter the directory ``cd examples`` +- build the docker image with + + ```bash + docker buildx use default + docker buildx build --tag opensearch-ingest-attachment:latest Dockerfile + ``` +- start the server with ``docker-compose up``. + Now you have an OpenSearch server running on ``http://localhost:9200`` and an OpenSearch Dashboard running on ``http://localhost:5601`` (user/pass: admin/admin). +The OpenSearch server has the ``ingest-attachment`` plugin installed. +The plugin enables OpenSearch to extract text from binary files like PDFs. Open another terminal. An `.env` file is provided with the environment variables ready to use with the docker-compose file. Run ``source examples/.env`` to load the environment variables. - Then start the celery worker with ``celery -A collective.elastic.ingest.celery.app worker -l debug``. + In another terminal window `run a Plone backend `_ at ``http://localhost:8080/Plone`` with the add-on `collective.elastic.plone` installed. There, create an item or modify an existing one. You should see the indexing task in the celery worker terminal window. diff --git a/examples/Dockerfile b/examples/Dockerfile new file mode 100644 index 0000000..c7a56a4 --- /dev/null +++ b/examples/Dockerfile @@ -0,0 +1,2 @@ +FROM opensearchproject/opensearch:latest +RUN /usr/share/opensearch/bin/opensearch-plugin install --batch ingest-attachment \ No newline at end of file diff --git a/examples/docker-compose.yml b/examples/docker-compose.yml index a2af590..4c3507a 100644 --- a/examples/docker-compose.yml +++ b/examples/docker-compose.yml @@ -3,7 +3,7 @@ version: '3' services: opensearch-node1: # This is also the hostname of the container within the Docker network (i.e. https://opensearch-node1/) - image: opensearchproject/opensearch:latest # Specifying the latest available image - modify if you want a specific version + image: opensearch-ingest-attachment:latest # Specifying the freshly build image container_name: opensearch-node1 environment: - cluster.name=opensearch-cluster # Name the cluster @@ -27,7 +27,7 @@ services: networks: - opensearch-net # All of the containers will join the same Docker bridge network opensearch-node2: - image: opensearchproject/opensearch:latest # This should be the same image used for opensearch-node1 to avoid issues + image: opensearch-ingest-attachment:latest # Specifying the freshly build image container_name: opensearch-node2 environment: - cluster.name=opensearch-cluster