From 1f4fd45f88d5bc937a323b5a567e4d95df3b0c4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Thu, 14 Nov 2024 06:26:56 +0100 Subject: [PATCH] feat: meilisearch backend for notes search (#444) * feat: introduce "make compile-requirement" target This is convenient to compile dependencies without upgrading them. * chore: simplify tox/make test commands This makes it possible to run the make commands directly without going through tox (though tox targets keep working, of course). * chore: more convenient unit test running Previously, it was not possible to run unit tests locally without manually creating mysql & elasticsearch containers. Here, we create a `make pytest` target that automatically starts the required containers. * chore: refactor views for better mysql/es separation Instead of checking a boolean flag in multiple different places, we use class inheritance. This makes it possible to later override the view and implement our own using a different search backend, such as Meilisearch. * feat: meilisearch backend for notes search This is a very simple and basic backend. It is based on Django signals, just like the Elasticsearch backend. But it is much simpler, in the sense that there are just two signals: one for saving documents and one for deletion. This backend is limited, in the sense that it does not support highlighting -- but that's probably not such a big deal. To start using this backend, define the following settings: ES_DISABLED = True MEILISEARCH_ENABLED = True MEILISEARCH_URL = "http://meilisearch:7700" MEILISEARCH_API_KEY = "s3cr3t" MEILISEARCH_INDEX = "tutor_student_notes" --- Makefile | 41 +- README.rst | 17 +- notesapi/v1/tests/test_meilisearch.py | 96 ++++ notesapi/v1/urls.py | 8 +- notesapi/v1/views.py | 610 -------------------------- notesapi/v1/views/__init__.py | 26 ++ notesapi/v1/views/common.py | 560 +++++++++++++++++++++++ notesapi/v1/views/elasticsearch.py | 142 ++++++ notesapi/v1/views/exceptions.py | 2 + notesapi/v1/views/meilisearch.py | 193 ++++++++ notesapi/v1/views/mysql.py | 0 notesserver/docker-compose.test.yml | 18 + notesserver/test_views.py | 6 +- notesserver/views.py | 41 +- requirements/base.in | 1 + requirements/base.txt | 14 +- requirements/quality.txt | 27 ++ requirements/test.txt | 21 + tox.ini | 4 - 19 files changed, 1167 insertions(+), 660 deletions(-) create mode 100644 notesapi/v1/tests/test_meilisearch.py delete mode 100644 notesapi/v1/views.py create mode 100644 notesapi/v1/views/__init__.py create mode 100644 notesapi/v1/views/common.py create mode 100644 notesapi/v1/views/elasticsearch.py create mode 100644 notesapi/v1/views/exceptions.py create mode 100644 notesapi/v1/views/meilisearch.py create mode 100644 notesapi/v1/views/mysql.py create mode 100644 notesserver/docker-compose.test.yml diff --git a/Makefile b/Makefile index b3d9a6ad..2082bb82 100644 --- a/Makefile +++ b/Makefile @@ -3,15 +3,23 @@ PACKAGES = notesserver notesapi validate: test.requirements test +pytest: test-start-services test test-stop-services + test: clean python -Wd -m pytest +test-start-services: + docker compose -f notesserver/docker-compose.test.yml --project-name=edxnotesapi_test up -d --remove-orphans + +test-stop-services: + docker compose -f notesserver/docker-compose.test.yml --project-name=edxnotesapi_test stop + pii_check: test.requirements pii_clean - code_annotations django_find_annotations --config_file .pii_annotations.yml \ + DJANGO_SETTINGS_MODULE=notesserver.settings.test code_annotations django_find_annotations --config_file .pii_annotations.yml \ --lint --report --coverage check_keywords: ## Scan the Django models in all installed apps in this project for restricted field names - python manage.py check_reserved_keywords --override_file db_keyword_overrides.yml + DJANGO_SETTINGS_MODULE=notesserver.settings.test python manage.py check_reserved_keywords --override_file db_keyword_overrides.yml run: ./manage.py runserver 0.0.0.0:8120 @@ -26,9 +34,13 @@ pii_clean: rm -rf pii_report mkdir -p pii_report -quality: - pycodestyle --config=.pycodestyle $(PACKAGES) - pylint $(PACKAGES) +quality: pycodestyle pylint + +pycodestyle: + pycodestyle --config=.pycodestyle $(PACKAGES) + +pylint: + DJANGO_SETTINGS_MODULE=notesserver.settings.test pylint $(PACKAGES) diff-coverage: diff-cover build/coverage/coverage.xml --html-report build/coverage/diff_cover.html @@ -59,18 +71,21 @@ develop: requirements test.requirements piptools: ## install pinned version of pip-compile and pip-sync pip install -r requirements/pip-tools.txt -upgrade: export CUSTOM_COMPILE_COMMAND=make upgrade -upgrade: piptools ## update the requirements/*.txt files with the latest packages satisfying requirements/*.in +compile-requirements: export CUSTOM_COMPILE_COMMAND=make upgrade +compile-requirements: piptools ## Re-compile *.in requirements to *.txt (without upgrading) # Make sure to compile files after any other files they include! - pip-compile --upgrade --rebuild --allow-unsafe -o requirements/pip.txt requirements/pip.in - pip-compile --upgrade -o requirements/pip-tools.txt requirements/pip-tools.in + pip-compile ${COMPILE_OPTS} --rebuild --allow-unsafe -o requirements/pip.txt requirements/pip.in + pip-compile ${COMPILE_OPTS} -o requirements/pip-tools.txt requirements/pip-tools.in pip install -qr requirements/pip.txt pip install -qr requirements/pip-tools.txt - pip-compile --upgrade --allow-unsafe -o requirements/base.txt requirements/base.in - pip-compile --upgrade --allow-unsafe -o requirements/test.txt requirements/test.in - pip-compile --upgrade --allow-unsafe -o requirements/ci.txt requirements/ci.in - pip-compile --upgrade --allow-unsafe -o requirements/quality.txt requirements/quality.in + pip-compile ${COMPILE_OPTS} --allow-unsafe -o requirements/base.txt requirements/base.in + pip-compile ${COMPILE_OPTS} --allow-unsafe -o requirements/test.txt requirements/test.in + pip-compile ${COMPILE_OPTS} --allow-unsafe -o requirements/ci.txt requirements/ci.in + pip-compile ${COMPILE_OPTS} --allow-unsafe -o requirements/quality.txt requirements/quality.in # Let tox control the Django version for tests grep -e "^django==" requirements/base.txt > requirements/django.txt sed '/^[dD]jango==/d' requirements/test.txt > requirements/test.tmp mv requirements/test.tmp requirements/test.txt + +upgrade: ## update the requirements/*.txt files with the latest packages satisfying requirements/*.in + $(MAKE) compile-requirements COMPILE_OPTS="--upgrade" diff --git a/README.rst b/README.rst index fbaaec7e..321452c0 100644 --- a/README.rst +++ b/README.rst @@ -47,8 +47,21 @@ Configuration Running Tests ************* -Run ``make validate`` install the requirements, run the tests, and run -lint. +Install requirements:: + + make test.requirements + +Start mysql/elasticsearch services:: + + make test-start-services + +Run unit tests:: + + make test + +Run quality checks:: + + make quality How To Resync The Index *********************** diff --git a/notesapi/v1/tests/test_meilisearch.py b/notesapi/v1/tests/test_meilisearch.py new file mode 100644 index 00000000..abcc2d1f --- /dev/null +++ b/notesapi/v1/tests/test_meilisearch.py @@ -0,0 +1,96 @@ +from unittest.mock import Mock, patch + +from django.test import TestCase + +from notesapi.v1.models import Note +from notesapi.v1.views import meilisearch + + +class MeilisearchTest(TestCase): + + @classmethod + def setUpClass(cls): + super().setUpClass() + meilisearch.connect_signals() + + @classmethod + def tearDownClass(cls): + super().tearDownClass() + meilisearch.disconnect_signals() + + def setUp(self): + self.enterContext( + patch.object(meilisearch.Client, "meilisearch_client", Mock()) + ) + self.enterContext(patch.object(meilisearch.Client, "meilisearch_index", Mock())) + + @property + def note_dict(self): + return { + "user": "test_user_id", + "usage_id": "i4x://org/course/html/52aa9816425a4ce98a07625b8cb70811", + "course_id": "org/course/run", + "text": "test note text", + "quote": "test note quote", + "ranges": [ + { + "start": "/p[1]", + "end": "/p[1]", + "startOffset": 0, + "endOffset": 10, + } + ], + "tags": ["apple", "pear"], + } + + def test_save_delete_note(self): + note = Note.create(self.note_dict) + note.save() + note_id = note.id + + meilisearch.Client.meilisearch_index.add_documents.assert_called_with( + [ + { + "id": note_id, + "user_id": "test_user_id", + "course_id": "org/course/run", + "text": "test note text", + } + ] + ) + + note.delete() + meilisearch.Client.meilisearch_index.delete_document.assert_called_with(note_id) + + def test_get_queryset_no_result(self): + queryset = meilisearch.AnnotationSearchView().get_queryset() + assert not queryset.all() + + def test_get_queryset_one_match(self): + note1 = Note.create(self.note_dict) + note2 = Note.create(self.note_dict) + note1.save() + note2.save() + view = meilisearch.AnnotationSearchView() + view.params = { + "text": "dummy text", + "user": "someuser", + "course_id": "course/id", + "page_size": 10, + "page": 2, + } + with patch.object( + meilisearch.Client.meilisearch_index, + "search", + Mock(return_value={"hits": [{"id": note2.id}]}), + ) as mock_search: + queryset = view.get_queryset() + mock_search.assert_called_once_with( + "dummy text", + { + "offset": 10, + "limit": 10, + "filter": ["user_id = 'someuser'", "course_id = 'course/id'"], + }, + ) + assert [note2.id] == [note.id for note in queryset] diff --git a/notesapi/v1/urls.py b/notesapi/v1/urls.py index 543ba7a7..cacdb9dd 100644 --- a/notesapi/v1/urls.py +++ b/notesapi/v1/urls.py @@ -1,7 +1,7 @@ from django.urls import path, re_path from notesapi.v1.views import (AnnotationDetailView, AnnotationListView, - AnnotationRetireView, AnnotationSearchView) + AnnotationRetireView, get_annotation_search_view_class) app_name = "notesapi.v1" urlpatterns = [ path('annotations/', AnnotationListView.as_view(), name='annotations'), @@ -11,5 +11,9 @@ AnnotationDetailView.as_view(), name='annotations_detail' ), - path('search/', AnnotationSearchView.as_view(), name='annotations_search'), + path( + 'search/', + get_annotation_search_view_class().as_view(), + name='annotations_search' + ), ] diff --git a/notesapi/v1/views.py b/notesapi/v1/views.py deleted file mode 100644 index 7f479f18..00000000 --- a/notesapi/v1/views.py +++ /dev/null @@ -1,610 +0,0 @@ -# pylint:disable=possibly-used-before-assignment -import json -import logging -import newrelic.agent -from django.conf import settings -from django.core.exceptions import ValidationError -from django.db.models import Q -from django.urls import reverse -from django.utils.translation import gettext as _ -from rest_framework import status -from rest_framework.generics import GenericAPIView, ListAPIView -from rest_framework.response import Response -from rest_framework.views import APIView - -from notesapi.v1.models import Note -from notesapi.v1.search_indexes.documents import NoteDocument -from notesapi.v1.serializers import NoteSerializer - -if not settings.ES_DISABLED: - from elasticsearch_dsl import Search - from elasticsearch_dsl.connections import connections - from django_elasticsearch_dsl_drf.filter_backends import DefaultOrderingFilterBackend, HighlightBackend - from django_elasticsearch_dsl_drf.constants import ( - LOOKUP_FILTER_TERM, - LOOKUP_QUERY_IN, - SEPARATOR_LOOKUP_COMPLEX_VALUE, - ) - from notesapi.v1.search_indexes.paginators import NotesPagination as ESNotesPagination - from notesapi.v1.search_indexes.backends import CompoundSearchFilterBackend, FilteringFilterBackend - from notesapi.v1.search_indexes.serializers import NoteDocumentSerializer as NotesElasticSearchSerializer - -log = logging.getLogger(__name__) - - -class AnnotationsLimitReachedError(Exception): - """ - Exception when trying to create more than allowed annotations - """ - - -class AnnotationSearchView(ListAPIView): - """ - **Use Case** - - * Search and return a list of annotations for a user. - - The annotations are always sorted in descending order by updated date. - - Response is paginated by default except usage_id based search. - - Each page in the list contains 25 annotations by default. The page - size can be altered by passing parameter "page_size=". - - Http400 is returned if the format of the request is not correct. - - **Search Types** - - * There are two types of searches one can perform - - * Database - - If ElasticSearch is disabled or text query param is not present. - - * ElasticSearch - - **Example Requests** - - GET /api/v1/search/ - GET /api/v1/search/?course_id={course_id}&user={user_id} - GET /api/v1/search/?course_id={course_id}&user={user_id}&usage_id={usage_id} - GET /api/v1/search/?course_id={course_id}&user={user_id}&usage_id={usage_id}&usage_id={usage_id} ... - - **Query Parameters for GET** - - All the parameters are optional. - - * course_id: Id of the course. - - * user: Anonymized user id. - - * usage_id: The identifier string of the annotations XBlock. - - * text: Student's thoughts on the quote - - * highlight: dict. Only used when search from ElasticSearch. It contains two keys: - - * highlight_tag: String. HTML tag to be used for highlighting the text. Default is "em" - - * highlight_class: String. CSS class to be used for highlighting the text. - - **Response Values for GET** - - * count: The number of annotations in a course. - - * next: The URI to the next page of annotations. - - * previous: The URI to the previous page of annotations. - - * current: Current page number. - - * num_pages: The number of pages listing annotations. - - * results: A list of annotations returned. Each collection in the list contains these fields. - - * id: String. The primary key of the note. - - * user: String. Anonymized id of the user. - - * course_id: String. The identifier string of the annotations course. - - * usage_id: String. The identifier string of the annotations XBlock. - - * quote: String. Quoted text. - - * text: String. Student's thoughts on the quote. - - * ranges: List. Describes position of quote. - - * tags: List. Comma separated tags. - - * created: DateTime. Creation datetime of annotation. - - * updated: DateTime. When was the last time annotation was updated. - """ - - action = '' - params = {} - query_params = {} - search_with_usage_id = False - document = NoteDocument - search_fields = ('text', 'tags') - filter_fields = { - 'course_id': 'course_id', - 'user': 'user', - 'usage_id': { - 'field': 'usage_id', - 'lookups': [ - LOOKUP_QUERY_IN, - LOOKUP_FILTER_TERM, - ], - }, - - } - highlight_fields = { - 'text': { - 'enabled': True, - 'options': { - 'pre_tags': ['{elasticsearch_highlight_start}'], - 'post_tags': ['{elasticsearch_highlight_end}'], - 'number_of_fragments': 0, - }, - }, - 'tags': { - 'enabled': True, - 'options': { - 'pre_tags': ['{elasticsearch_highlight_start}'], - 'post_tags': ['{elasticsearch_highlight_end}'], - 'number_of_fragments': 0, - }, - }, - } - ordering = ('-updated',) - - def __init__(self, *args, **kwargs): - self.initiate_es_specific_state_if_is_enabled() - super().__init__(*args, **kwargs) - - def initiate_es_specific_state_if_is_enabled(self): - """ - Initiates elasticsearch specific state if elasticsearch is enabled. - - Should be called in the class `__init__` method. - """ - if not settings.ES_DISABLED: - self.client = connections.get_connection(self.document._get_using()) # pylint: disable=protected-access - self.index = self.document._index._name # pylint: disable=protected-access - self.mapping = self.document._doc_type.mapping.properties.name # pylint: disable=protected-access - # pylint: disable=protected-access - self.search = Search(using=self.client, index=self.index, doc_type=self.document._doc_type.name) - - @property - def is_es_disabled(self): - """ - Predicate instance method. - - Search in DB when ES is not available or there is no need to bother it - """ - - return settings.ES_DISABLED or 'text' not in self.params - - def get_queryset(self): - if self.is_es_disabled: - queryset = Note.objects.filter(**self.query_params).order_by('-updated') - if 'text' in self.params: - queryset = queryset.filter( - Q(text__icontains=self.params['text']) | Q(tags__icontains=self.params['text']) - ) - else: - queryset = self.search.query() - queryset.model = self.document.Django.model - - return queryset - - def get_serializer_class(self): - """ - Return the class to use for the serializer. - - Defaults to using `NoteSerializer` if elasticsearch is disabled - or `NotesElasticSearchSerializer` if elasticsearch is enabled - """ - - return NoteSerializer if self.is_es_disabled else NotesElasticSearchSerializer - - @property - def paginator(self): - """ - The paginator instance associated with the view and used data source, or `None`. - """ - if not hasattr(self, '_paginator'): - if self.pagination_class is None: - self._paginator = None # pylint: disable=attribute-defined-outside-init - else: - self._paginator = ( # pylint: disable=attribute-defined-outside-init - self.pagination_class() - if self.is_es_disabled - else ESNotesPagination() - ) - - return self._paginator - - def filter_queryset(self, queryset): - """ - Given a queryset, filter it with whichever filter backend is in use. - - Do not filter additionally if mysql db used or use `CompoundSearchFilterBackend` - and `HighlightBackend` if elasticsearch is the data source. - """ - filter_backends = [] - if not self.is_es_disabled: - filter_backends = [ - FilteringFilterBackend, - CompoundSearchFilterBackend, - DefaultOrderingFilterBackend, - ] - if self.params.get('highlight'): - filter_backends.append(HighlightBackend) - - for backend in filter_backends: - queryset = backend().filter_queryset(self.request, queryset, view=self) - return queryset - - def list(self, *args, **kwargs): - """ - Returns list of students notes. - """ - # Do not send paginated result if usage id based search. - if self.search_with_usage_id: - queryset = self.filter_queryset(self.get_queryset()) - serializer = self.get_serializer(queryset, many=True) - return Response(serializer.data, status=status.HTTP_200_OK) - return super().list(*args, **kwargs) - - def build_query_params_state(self): - """ - Builds a custom query params. - - Use them in order to search annotations in most appropriate storage. - """ - self.query_params = {} - self.params = self.request.query_params.dict() - usage_ids = self.request.query_params.getlist('usage_id') - if usage_ids: - self.search_with_usage_id = True - if not self.is_es_disabled: - usage_ids = SEPARATOR_LOOKUP_COMPLEX_VALUE.join(usage_ids) - - self.query_params['usage_id__in'] = usage_ids - - if 'course_id' in self.params: - self.query_params['course_id'] = self.params['course_id'] - - if 'user' in self.params: - if self.is_es_disabled: - self.query_params['user_id'] = self.params['user'] - else: - self.query_params['user'] = self.params['user'] - - def get(self, *args, **kwargs): - """ - Search annotations in most appropriate storage - """ - self.search_with_usage_id = False - self.build_query_params_state() - - return super().get(*args, **kwargs) - - -class AnnotationRetireView(GenericAPIView): - """ - Administrative functions for the notes service. - """ - - def post(self, *args, **kwargs): - """ - Delete all annotations for a user. - """ - params = self.request.data - if 'user' not in params: - return Response(status=status.HTTP_400_BAD_REQUEST) - - Note.objects.filter(user_id=params['user']).delete() - return Response(status=status.HTTP_204_NO_CONTENT) - - -class AnnotationListView(GenericAPIView): - """ - **Use Case** - - * Get a paginated list of annotations for a user. - - The annotations are always sorted in descending order by updated date. - - Each page in the list contains 25 annotations by default. The page - size can be altered by passing parameter "page_size=". - - HTTP 400 Bad Request: The format of the request is not correct. - - * Create a new annotation for a user. - - HTTP 400 Bad Request: The format of the request is not correct, or the maximum number of notes for a - user has been reached. - - HTTP 201 Created: Success. - - * Delete all annotations for a user. - - HTTP 400 Bad Request: The format of the request is not correct. - - HTTP 200 OK: Either annotations from the user were deleted, or no annotations for the user were found. - - **Example Requests** - - GET /api/v1/annotations/?course_id={course_id}&user={user_id} - - POST /api/v1/annotations/ - user={user_id}&course_id={course_id}&usage_id={usage_id}&ranges={ranges}"e={quote} - - DELETE /api/v1/annotations/ - user={user_id} - - **Query Parameters for GET** - - Both the course_id and user must be provided. - - * course_id: Id of the course. - - * user: Anonymized user id. - - **Response Values for GET** - - * count: The number of annotations in a course. - - * next: The URI to the next page of annotations. - - * previous: The URI to the previous page of annotations. - - * current: Current page number. - - * num_pages: The number of pages listing annotations. - - * results: A list of annotations returned. Each collection in the list contains these fields. - - * id: String. The primary key of the note. - - * user: String. Anonymized id of the user. - - * course_id: String. The identifier string of the annotations course. - - * usage_id: String. The identifier string of the annotations XBlock. - - * quote: String. Quoted text. - - * text: String. Student's thoughts on the quote. - - * ranges: List. Describes position of quote. - - * tags: List. Comma separated tags. - - * created: DateTime. Creation datetime of annotation. - - * updated: DateTime. When was the last time annotation was updated. - - **Form-encoded data for POST** - - user, course_id, usage_id, ranges and quote fields must be provided. - - **Response Values for POST** - - * id: String. The primary key of the note. - - * user: String. Anonymized id of the user. - - * course_id: String. The identifier string of the annotations course. - - * usage_id: String. The identifier string of the annotations XBlock. - - * quote: String. Quoted text. - - * text: String. Student's thoughts on the quote. - - * ranges: List. Describes position of quote in the source text. - - * tags: List. Comma separated tags. - - * created: DateTime. Creation datetime of annotation. - - * updated: DateTime. When was the last time annotation was updated. - - **Form-encoded data for DELETE** - - * user: Anonymized user id. - - **Response Values for DELETE** - - * no content. - - """ - - serializer_class = NoteSerializer - - def get(self, *args, **kwargs): - """ - Get paginated list of all annotations. - """ - params = self.request.query_params.dict() - - if 'course_id' not in params: - return Response(status=status.HTTP_400_BAD_REQUEST) - - if 'user' not in params: - return Response(status=status.HTTP_400_BAD_REQUEST) - - notes = Note.objects.filter(course_id=params['course_id'], user_id=params['user']).order_by('-updated') - page = self.paginate_queryset(notes) - serializer = self.get_serializer(page, many=True) - response = self.get_paginated_response(serializer.data) - return response - - def post(self, *args, **kwargs): - """ - Create a new annotation. - - Returns 400 request if bad payload is sent or it was empty object. - """ - if not self.request.data or 'id' in self.request.data: - return Response(status=status.HTTP_400_BAD_REQUEST) - - try: - total_notes = Note.objects.filter( - user_id=self.request.data['user'], course_id=self.request.data['course_id'] - ).count() - if total_notes >= settings.MAX_NOTES_PER_COURSE: - raise AnnotationsLimitReachedError - - note = Note.create(self.request.data) - note.full_clean() - - # Gather metrics for New Relic so we can slice data in New Relic Insights - newrelic.agent.add_custom_parameter('notes.count', total_notes) - except ValidationError as error: - log.debug(error, exc_info=True) - return Response(status=status.HTTP_400_BAD_REQUEST) - except AnnotationsLimitReachedError: - error_message = _( - 'You can create up to {max_num_annotations_per_course} notes.' - ' You must remove some notes before you can add new ones.' - ).format(max_num_annotations_per_course=settings.MAX_NOTES_PER_COURSE) - log.info('Attempted to create more than %s annotations', settings.MAX_NOTES_PER_COURSE) - - return Response({'error_msg': error_message}, status=status.HTTP_400_BAD_REQUEST) - - note.save() - - location = reverse('api:v1:annotations_detail', kwargs={'annotation_id': note.id}) - serializer = NoteSerializer(note) - return Response(serializer.data, status=status.HTTP_201_CREATED, headers={'Location': location}) - - -class AnnotationDetailView(APIView): - """ - **Use Case** - - * Get a single annotation. - - * Update an annotation. - - * Delete an annotation. - - **Example Requests** - - GET /api/v1/annotations/ - PUT /api/v1/annotations/ - DELETE /api/v1/annotations/ - - **Query Parameters for GET** - - HTTP404 is returned if annotation_id is missing. - - * annotation_id: Annotation id - - **Query Parameters for PUT** - - HTTP404 is returned if annotation_id is missing and HTTP400 is returned if text and tags are missing. - - * annotation_id: String. Annotation id - - * text: String. Text to be updated - - * tags: List. Tags to be updated - - **Query Parameters for DELETE** - - HTTP404 is returned if annotation_id is missing. - - * annotation_id: Annotation id - - **Response Values for GET** - - * id: String. The primary key of the note. - - * user: String. Anonymized id of the user. - - * course_id: String. The identifier string of the annotations course. - - * usage_id: String. The identifier string of the annotations XBlock. - - * quote: String. Quoted text. - - * text: String. Student's thoughts on the quote. - - * ranges: List. Describes position of quote. - - * tags: List. Comma separated tags. - - * created: DateTime. Creation datetime of annotation. - - * updated: DateTime. When was the last time annotation was updated. - - **Response Values for PUT** - - * same as GET with updated values - - **Response Values for DELETE** - - * HTTP_204_NO_CONTENT is returned - """ - - def get(self, *args, **kwargs): - """ - Get an existing annotation. - """ - note_id = self.kwargs.get('annotation_id') - - try: - note = Note.objects.get(id=note_id) - except Note.DoesNotExist: - return Response('Annotation not found!', status=status.HTTP_404_NOT_FOUND) - - serializer = NoteSerializer(note) - return Response(serializer.data) - - def put(self, *args, **kwargs): - """ - Update an existing annotation. - """ - note_id = self.kwargs.get('annotation_id') - - try: - note = Note.objects.get(id=note_id) - except Note.DoesNotExist: - return Response('Annotation not found! No update performed.', status=status.HTTP_404_NOT_FOUND) - - try: - note.text = self.request.data['text'] - note.tags = json.dumps(self.request.data['tags']) - note.full_clean() - except KeyError as error: - log.debug(error, exc_info=True) - return Response(status=status.HTTP_400_BAD_REQUEST) - - note.save() - - serializer = NoteSerializer(note) - return Response(serializer.data) - - def delete(self, *args, **kwargs): - """ - Delete an annotation. - """ - note_id = self.kwargs.get('annotation_id') - - try: - note = Note.objects.get(id=note_id) - except Note.DoesNotExist: - return Response('Annotation not found! No update performed.', status=status.HTTP_404_NOT_FOUND) - - note.delete() - - # Annotation deleted successfully. - return Response(status=status.HTTP_204_NO_CONTENT) diff --git a/notesapi/v1/views/__init__.py b/notesapi/v1/views/__init__.py new file mode 100644 index 00000000..297cb9d5 --- /dev/null +++ b/notesapi/v1/views/__init__.py @@ -0,0 +1,26 @@ +import typing as t +from django.conf import settings + +from .common import ( + AnnotationDetailView, + AnnotationListView, + AnnotationRetireView, + AnnotationSearchView +) + +from .exceptions import SearchViewRuntimeError + + +# pylint: disable=import-outside-toplevel +def get_annotation_search_view_class() -> t.Type[AnnotationSearchView]: + """ + Import views from either mysql, elasticsearch or meilisearch backend + """ + if settings.ES_DISABLED: + if getattr(settings, "MEILISEARCH_ENABLED", False): + from . import meilisearch + return meilisearch.AnnotationSearchView + else: + return AnnotationSearchView + from . import elasticsearch + return elasticsearch.AnnotationSearchView diff --git a/notesapi/v1/views/common.py b/notesapi/v1/views/common.py new file mode 100644 index 00000000..0e4e308a --- /dev/null +++ b/notesapi/v1/views/common.py @@ -0,0 +1,560 @@ +# pylint:disable=possibly-used-before-assignment +import json +import logging +import newrelic.agent +from django.conf import settings +from django.core.exceptions import ValidationError +from django.db.models import Q +from django.urls import reverse +from django.utils.translation import gettext as _ +from rest_framework import status +from rest_framework.generics import GenericAPIView, ListAPIView +from rest_framework.response import Response +from rest_framework.views import APIView + +from notesapi.v1.models import Note +from notesapi.v1.serializers import NoteSerializer + + +log = logging.getLogger(__name__) + + +class AnnotationsLimitReachedError(Exception): + """ + Exception when trying to create more than allowed annotations + """ + + +class AnnotationSearchView(ListAPIView): + """ + **Use Case** + + * Search and return a list of annotations for a user. + + The annotations are always sorted in descending order by updated date. + + Response is paginated by default except usage_id based search. + + Each page in the list contains 25 annotations by default. The page + size can be altered by passing parameter "page_size=". + + Http400 is returned if the format of the request is not correct. + + **Search Types** + + * There are two types of searches one can perform + + * Database + + If ElasticSearch is disabled or text query param is not present. + + * ElasticSearch + + **Example Requests** + + GET /api/v1/search/ + GET /api/v1/search/?course_id={course_id}&user={user_id} + GET /api/v1/search/?course_id={course_id}&user={user_id}&usage_id={usage_id} + GET /api/v1/search/?course_id={course_id}&user={user_id}&usage_id={usage_id}&usage_id={usage_id} ... + + **Query Parameters for GET** + + All the parameters are optional. + + * course_id: Id of the course. + + * user: Anonymized user id. + + * usage_id: The identifier string of the annotations XBlock. + + * text: Student's thoughts on the quote + + * highlight: dict. Only used when search from ElasticSearch. It contains two keys: + + * highlight_tag: String. HTML tag to be used for highlighting the text. Default is "em" + + * highlight_class: String. CSS class to be used for highlighting the text. + + **Response Values for GET** + + * count: The number of annotations in a course. + + * next: The URI to the next page of annotations. + + * previous: The URI to the previous page of annotations. + + * current: Current page number. + + * num_pages: The number of pages listing annotations. + + * results: A list of annotations returned. Each collection in the list contains these fields. + + * id: String. The primary key of the note. + + * user: String. Anonymized id of the user. + + * course_id: String. The identifier string of the annotations course. + + * usage_id: String. The identifier string of the annotations XBlock. + + * quote: String. Quoted text. + + * text: String. Student's thoughts on the quote. + + * ranges: List. Describes position of quote. + + * tags: List. Comma separated tags. + + * created: DateTime. Creation datetime of annotation. + + * updated: DateTime. When was the last time annotation was updated. + """ + + action = "" + params = {} + query_params = {} + search_with_usage_id = False + search_fields = ("text", "tags") + ordering = ("-updated",) + + @property + def is_text_search(self): + """ + We identify text search by the presence of a "text" parameter. Subclasses may + want to have a different behaviour in such cases. + """ + return "text" in self.params + + def get_queryset(self): + queryset = Note.objects.filter(**self.query_params).order_by("-updated") + if "text" in self.params: + qs_filter = Q(text__icontains=self.params["text"]) | Q( + tags__icontains=self.params["text"] + ) + queryset = queryset.filter(qs_filter) + return queryset + + def get_serializer_class(self): + """ + Return the class to use for the serializer. + + Defaults to `NoteSerializer`. + """ + return NoteSerializer + + @property + def paginator(self): + """ + The paginator instance associated with the view and used data source, or `None`. + """ + if not hasattr(self, "_paginator"): + # pylint: disable=attribute-defined-outside-init + self._paginator = self.pagination_class() if self.pagination_class else None + + return self._paginator + + def filter_queryset(self, queryset): + """ + Given a queryset, filter it with whichever filter backend is in use. + + Do not filter additionally if mysql db used or use `CompoundSearchFilterBackend` + and `HighlightBackend` if elasticsearch is the data source. + """ + filter_backends = self.get_filter_backends() + for backend in filter_backends: + queryset = backend().filter_queryset(self.request, queryset, view=self) + return queryset + + def get_filter_backends(self): + """ + List of filter backends, each with a `filter_queryset` method. + """ + return [] + + def list(self, *args, **kwargs): + """ + Returns list of students notes. + """ + # Do not send paginated result if usage id based search. + if self.search_with_usage_id: + queryset = self.filter_queryset(self.get_queryset()) + serializer = self.get_serializer(queryset, many=True) + return Response(serializer.data, status=status.HTTP_200_OK) + return super().list(*args, **kwargs) + + def build_query_params_state(self): + """ + Builds a custom query params. + + Use them in order to search annotations in most appropriate storage. + """ + self.query_params = {} + self.params = self.request.query_params.dict() + usage_ids = self.request.query_params.getlist("usage_id") + if usage_ids: + self.search_with_usage_id = True + self.query_params["usage_id__in"] = usage_ids + + if "course_id" in self.params: + self.query_params["course_id"] = self.params["course_id"] + + if "user" in self.params: + self.query_params["user_id"] = self.params["user"] + + def get(self, *args, **kwargs): + """ + Search annotations in most appropriate storage + """ + self.search_with_usage_id = False + self.build_query_params_state() + + return super().get(*args, **kwargs) + + @classmethod + def selftest(cls): + """ + No-op. + """ + return {} + + @classmethod + def heartbeat(cls): + """ + No-op + """ + return + + +class AnnotationRetireView(GenericAPIView): + """ + Administrative functions for the notes service. + """ + + def post(self, *args, **kwargs): + """ + Delete all annotations for a user. + """ + params = self.request.data + if "user" not in params: + return Response(status=status.HTTP_400_BAD_REQUEST) + + Note.objects.filter(user_id=params["user"]).delete() + return Response(status=status.HTTP_204_NO_CONTENT) + + +class AnnotationListView(GenericAPIView): + """ + **Use Case** + + * Get a paginated list of annotations for a user. + + The annotations are always sorted in descending order by updated date. + + Each page in the list contains 25 annotations by default. The page + size can be altered by passing parameter "page_size=". + + HTTP 400 Bad Request: The format of the request is not correct. + + * Create a new annotation for a user. + + HTTP 400 Bad Request: The format of the request is not correct, or the maximum number of notes for a + user has been reached. + + HTTP 201 Created: Success. + + * Delete all annotations for a user. + + HTTP 400 Bad Request: The format of the request is not correct. + + HTTP 200 OK: Either annotations from the user were deleted, or no annotations for the user were found. + + **Example Requests** + + GET /api/v1/annotations/?course_id={course_id}&user={user_id} + + POST /api/v1/annotations/ + user={user_id}&course_id={course_id}&usage_id={usage_id}&ranges={ranges}"e={quote} + + DELETE /api/v1/annotations/ + user={user_id} + + **Query Parameters for GET** + + Both the course_id and user must be provided. + + * course_id: Id of the course. + + * user: Anonymized user id. + + **Response Values for GET** + + * count: The number of annotations in a course. + + * next: The URI to the next page of annotations. + + * previous: The URI to the previous page of annotations. + + * current: Current page number. + + * num_pages: The number of pages listing annotations. + + * results: A list of annotations returned. Each collection in the list contains these fields. + + * id: String. The primary key of the note. + + * user: String. Anonymized id of the user. + + * course_id: String. The identifier string of the annotations course. + + * usage_id: String. The identifier string of the annotations XBlock. + + * quote: String. Quoted text. + + * text: String. Student's thoughts on the quote. + + * ranges: List. Describes position of quote. + + * tags: List. Comma separated tags. + + * created: DateTime. Creation datetime of annotation. + + * updated: DateTime. When was the last time annotation was updated. + + **Form-encoded data for POST** + + user, course_id, usage_id, ranges and quote fields must be provided. + + **Response Values for POST** + + * id: String. The primary key of the note. + + * user: String. Anonymized id of the user. + + * course_id: String. The identifier string of the annotations course. + + * usage_id: String. The identifier string of the annotations XBlock. + + * quote: String. Quoted text. + + * text: String. Student's thoughts on the quote. + + * ranges: List. Describes position of quote in the source text. + + * tags: List. Comma separated tags. + + * created: DateTime. Creation datetime of annotation. + + * updated: DateTime. When was the last time annotation was updated. + + **Form-encoded data for DELETE** + + * user: Anonymized user id. + + **Response Values for DELETE** + + * no content. + + """ + + serializer_class = NoteSerializer + + def get(self, *args, **kwargs): + """ + Get paginated list of all annotations. + """ + params = self.request.query_params.dict() + + if "course_id" not in params: + return Response(status=status.HTTP_400_BAD_REQUEST) + + if "user" not in params: + return Response(status=status.HTTP_400_BAD_REQUEST) + + notes = Note.objects.filter( + course_id=params["course_id"], user_id=params["user"] + ).order_by("-updated") + page = self.paginate_queryset(notes) + serializer = self.get_serializer(page, many=True) + response = self.get_paginated_response(serializer.data) + return response + + def post(self, *args, **kwargs): + """ + Create a new annotation. + + Returns 400 request if bad payload is sent or it was empty object. + """ + if not self.request.data or "id" in self.request.data: + return Response(status=status.HTTP_400_BAD_REQUEST) + + try: + total_notes = Note.objects.filter( + user_id=self.request.data["user"], + course_id=self.request.data["course_id"], + ).count() + if total_notes >= settings.MAX_NOTES_PER_COURSE: + raise AnnotationsLimitReachedError + + note = Note.create(self.request.data) + note.full_clean() + + # Gather metrics for New Relic so we can slice data in New Relic Insights + newrelic.agent.add_custom_parameter("notes.count", total_notes) + except ValidationError as error: + log.debug(error, exc_info=True) + return Response(status=status.HTTP_400_BAD_REQUEST) + except AnnotationsLimitReachedError: + error_message = _( + "You can create up to {max_num_annotations_per_course} notes." + " You must remove some notes before you can add new ones." + ).format(max_num_annotations_per_course=settings.MAX_NOTES_PER_COURSE) + log.info( + "Attempted to create more than %s annotations", + settings.MAX_NOTES_PER_COURSE, + ) + + return Response( + {"error_msg": error_message}, status=status.HTTP_400_BAD_REQUEST + ) + + note.save() + + location = reverse( + "api:v1:annotations_detail", kwargs={"annotation_id": note.id} + ) + serializer = NoteSerializer(note) + return Response( + serializer.data, + status=status.HTTP_201_CREATED, + headers={"Location": location}, + ) + + +class AnnotationDetailView(APIView): + """ + **Use Case** + + * Get a single annotation. + + * Update an annotation. + + * Delete an annotation. + + **Example Requests** + + GET /api/v1/annotations/ + PUT /api/v1/annotations/ + DELETE /api/v1/annotations/ + + **Query Parameters for GET** + + HTTP404 is returned if annotation_id is missing. + + * annotation_id: Annotation id + + **Query Parameters for PUT** + + HTTP404 is returned if annotation_id is missing and HTTP400 is returned if text and tags are missing. + + * annotation_id: String. Annotation id + + * text: String. Text to be updated + + * tags: List. Tags to be updated + + **Query Parameters for DELETE** + + HTTP404 is returned if annotation_id is missing. + + * annotation_id: Annotation id + + **Response Values for GET** + + * id: String. The primary key of the note. + + * user: String. Anonymized id of the user. + + * course_id: String. The identifier string of the annotations course. + + * usage_id: String. The identifier string of the annotations XBlock. + + * quote: String. Quoted text. + + * text: String. Student's thoughts on the quote. + + * ranges: List. Describes position of quote. + + * tags: List. Comma separated tags. + + * created: DateTime. Creation datetime of annotation. + + * updated: DateTime. When was the last time annotation was updated. + + **Response Values for PUT** + + * same as GET with updated values + + **Response Values for DELETE** + + * HTTP_204_NO_CONTENT is returned + """ + + def get(self, *args, **kwargs): + """ + Get an existing annotation. + """ + note_id = self.kwargs.get("annotation_id") + + try: + note = Note.objects.get(id=note_id) + except Note.DoesNotExist: + return Response("Annotation not found!", status=status.HTTP_404_NOT_FOUND) + + serializer = NoteSerializer(note) + return Response(serializer.data) + + def put(self, *args, **kwargs): + """ + Update an existing annotation. + """ + note_id = self.kwargs.get("annotation_id") + + try: + note = Note.objects.get(id=note_id) + except Note.DoesNotExist: + return Response( + "Annotation not found! No update performed.", + status=status.HTTP_404_NOT_FOUND, + ) + + try: + note.text = self.request.data["text"] + note.tags = json.dumps(self.request.data["tags"]) + note.full_clean() + except KeyError as error: + log.debug(error, exc_info=True) + return Response(status=status.HTTP_400_BAD_REQUEST) + + note.save() + + serializer = NoteSerializer(note) + return Response(serializer.data) + + def delete(self, *args, **kwargs): + """ + Delete an annotation. + """ + note_id = self.kwargs.get("annotation_id") + + try: + note = Note.objects.get(id=note_id) + except Note.DoesNotExist: + return Response( + "Annotation not found! No update performed.", + status=status.HTTP_404_NOT_FOUND, + ) + + note.delete() + + # Annotation deleted successfully. + return Response(status=status.HTTP_204_NO_CONTENT) diff --git a/notesapi/v1/views/elasticsearch.py b/notesapi/v1/views/elasticsearch.py new file mode 100644 index 00000000..32c7207c --- /dev/null +++ b/notesapi/v1/views/elasticsearch.py @@ -0,0 +1,142 @@ +import logging +import traceback + +from django_elasticsearch_dsl_drf.constants import ( + LOOKUP_FILTER_TERM, + LOOKUP_QUERY_IN, + SEPARATOR_LOOKUP_COMPLEX_VALUE, +) +from django_elasticsearch_dsl_drf.filter_backends import ( + DefaultOrderingFilterBackend, + HighlightBackend, +) +from elasticsearch.exceptions import TransportError +from elasticsearch_dsl import Search +from elasticsearch_dsl.connections import connections + +from notesapi.v1.search_indexes.backends import ( + CompoundSearchFilterBackend, + FilteringFilterBackend, +) +from notesapi.v1.search_indexes.documents import NoteDocument +from notesapi.v1.search_indexes.paginators import NotesPagination as ESNotesPagination +from notesapi.v1.search_indexes.serializers import ( + NoteDocumentSerializer as NotesElasticSearchSerializer, +) + +from .common import AnnotationSearchView as BaseAnnotationSearchView +from .exceptions import SearchViewRuntimeError + +logger = logging.getLogger(__name__) + + +class AnnotationSearchView(BaseAnnotationSearchView): + + # https://django-elasticsearch-dsl-drf.readthedocs.io/en/latest/advanced_usage_examples.html + filter_fields = { + "course_id": "course_id", + "user": "user", + "usage_id": { + "field": "usage_id", + "lookups": [ + LOOKUP_QUERY_IN, + LOOKUP_FILTER_TERM, + ], + }, + } + highlight_fields = { + "text": { + "enabled": True, + "options": { + "pre_tags": ["{elasticsearch_highlight_start}"], + "post_tags": ["{elasticsearch_highlight_end}"], + "number_of_fragments": 0, + }, + }, + "tags": { + "enabled": True, + "options": { + "pre_tags": ["{elasticsearch_highlight_start}"], + "post_tags": ["{elasticsearch_highlight_end}"], + "number_of_fragments": 0, + }, + }, + } + + def __init__(self, *args, **kwargs): + self.client = connections.get_connection( + NoteDocument._get_using() + ) # pylint: disable=protected-access + self.index = NoteDocument._index._name # pylint: disable=protected-access + self.mapping = ( + NoteDocument._doc_type.mapping.properties.name + ) # pylint: disable=protected-access + # pylint: disable=protected-access + self.search = Search( + using=self.client, index=self.index, doc_type=NoteDocument._doc_type.name + ) + super().__init__(*args, **kwargs) + + def get_serializer_class(self): + """ + Use Elasticsearch-specific serializer. + """ + if not self.is_text_search: + return super().get_serializer_class() + return NotesElasticSearchSerializer + + def get_queryset(self): + """ + Hackish method that doesn't quite return a Django queryset. + """ + if not self.is_text_search: + return super().get_queryset() + queryset = self.search.query() + queryset.model = NoteDocument.Django.model + return queryset + + def get_filter_backends(self): + if not self.is_text_search: + return super().get_filter_backends() + filter_backends = [ + FilteringFilterBackend, + CompoundSearchFilterBackend, + DefaultOrderingFilterBackend, + ] + if self.params.get("highlight"): + filter_backends.append(HighlightBackend) + return filter_backends + + @property + def pagination_class(self): + if not self.is_text_search: + return super().pagination_class + return ESNotesPagination + + def build_query_params_state(self): + super().build_query_params_state() + if not self.is_text_search: + return + if "usage_id__in" in self.query_params: + usage_ids = self.query_params["usage_id__in"] + usage_ids = SEPARATOR_LOOKUP_COMPLEX_VALUE.join(usage_ids) + self.query_params["usage_id__in"] = usage_ids + + if "user" in self.params: + self.query_params["user"] = self.query_params.pop("user_id") + + @classmethod + def heartbeat(cls): + if not get_es().ping(): + raise SearchViewRuntimeError("es") + + @classmethod + def selftest(cls): + try: + return {"es": get_es().info()} + except TransportError as e: + raise SearchViewRuntimeError({"es_error": traceback.format_exc()}) from e + + +def get_es(): + return connections.get_connection() diff --git a/notesapi/v1/views/exceptions.py b/notesapi/v1/views/exceptions.py new file mode 100644 index 00000000..835bd7f6 --- /dev/null +++ b/notesapi/v1/views/exceptions.py @@ -0,0 +1,2 @@ +class SearchViewRuntimeError(RuntimeError): + pass diff --git a/notesapi/v1/views/meilisearch.py b/notesapi/v1/views/meilisearch.py new file mode 100644 index 00000000..504d327c --- /dev/null +++ b/notesapi/v1/views/meilisearch.py @@ -0,0 +1,193 @@ +""" +Meilisearch views to search for annotations. + +To enable this backend, define the following settings: + +ES_DISABLED = True +MEILISEARCH_ENABLED = True + +Then check the Client class for more information about Meilisearch credential settings. + +When you start using this backend, you might want to re-index all your content. To do that, run: + + ./manage.py shell -c "from notesapi.v1.views.meilisearch import reindex; reindex()" +""" + +import traceback + +import meilisearch +from django.conf import settings +from django.core.paginator import Paginator +from django.db.models import signals + +from notesapi.v1.models import Note + +from .common import AnnotationSearchView as BaseAnnotationSearchView +from .exceptions import SearchViewRuntimeError + + +class Client: + """ + Simple Meilisearch client class + + It depends on the following Django settings: + + - MEILISEARCH_URL + - MEILISEARCH_API_KEY + - MEILISEARCH_INDEX + """ + + _CLIENT = None + _INDEX = None + FILTERABLES = ["user_id", "course_id"] + + @property + def meilisearch_client(self) -> meilisearch.Client: + """ + Return a meilisearch client. + """ + if self._CLIENT is None: + self._CLIENT = meilisearch.Client( + getattr(settings, "MEILISEARCH_URL", "http://meilisearch:7700"), + getattr(settings, "MEILISEARCH_API_KEY", ""), + ) + return self._CLIENT + + @property + def meilisearch_index(self) -> meilisearch.index.Index: + """ + Return the meilisearch index used to store annotations. + + If the index does not exist, it is created. And if it does not have the right + filterable fields, then it is updated. + """ + if self._INDEX is None: + index_name = getattr(settings, "MEILISEARCH_INDEX", "student_notes") + try: + self._INDEX = self.meilisearch_client.get_index(index_name) + except meilisearch.errors.MeilisearchApiError: + task = self.meilisearch_client.create_index( + index_name, {"primaryKey": "id"} + ) + self.meilisearch_client.wait_for_task(task.task_uid, timeout_in_ms=2000) + self._INDEX = self.meilisearch_client.get_index(index_name) + + # Checking filterable attributes + existing_filterables = set(self._INDEX.get_filterable_attributes()) + if not set(self.FILTERABLES).issubset(existing_filterables): + all_filterables = list(existing_filterables.union(self.FILTERABLES)) + self._INDEX.update_filterable_attributes(all_filterables) + + return self._INDEX + + +class AnnotationSearchView(BaseAnnotationSearchView): + def get_queryset(self): + """ + Simple result filtering method based on test search. + + We simply include in the query only those that match the text search query. Note + that this backend does not support highlighting (yet). + """ + if not self.is_text_search: + return super().get_queryset() + + queryset = Note.objects.filter(**self.query_params).order_by("-updated") + + # Define meilisearch params + filters = [ + f"user_id = '{self.params['user']}'", + f"course_id = '{self.params['course_id']}'", + ] + page_size = int(self.params["page_size"]) + offset = (int(self.params["page"]) - 1) * page_size + + # Perform search + search_results = Client().meilisearch_index.search( + self.params["text"], + {"offset": offset, "limit": page_size, "filter": filters}, + ) + + # Limit to these ID + queryset = queryset.filter(id__in=[r["id"] for r in search_results["hits"]]) + return queryset + + @classmethod + def heartbeat(cls): + """ + Check that the meilisearch client is healthy. + """ + if not Client().meilisearch_client.is_healthy(): + raise SearchViewRuntimeError("meilisearch") + + @classmethod + def selftest(cls): + """ + Check that we can access the meilisearch index. + """ + try: + return {"meilisearch": Client().meilisearch_index.created_at} + except meilisearch.errors.MeilisearchError as e: + raise SearchViewRuntimeError( + {"meilisearch_error": traceback.format_exc()} + ) from e + + +def on_note_save(sender, instance, **kwargs): # pylint: disable=unused-argument + """ + Create or update a document. + """ + add_documents([instance]) + + +def on_note_delete(sender, instance, **kwargs): # pylint: disable=unused-argument + """ + Delete a document. + """ + Client().meilisearch_index.delete_document(instance.id) + + +def connect_signals() -> None: + """ + Connect Django signal to meilisearch indexing. + """ + signals.post_save.connect(on_note_save, sender=Note) + signals.post_delete.connect(on_note_delete, sender=Note) + + +def disconnect_signals() -> None: + """ + Disconnect Django signals: this is necessary in unit tests. + """ + signals.post_save.disconnect(on_note_save, sender=Note) + signals.post_delete.disconnect(on_note_delete, sender=Note) + + +connect_signals() + + +def reindex(): + """ + Re-index all notes, in batches of 100. + """ + paginator = Paginator(Note.objects.all(), 100) + for page_number in paginator.page_range: + page = paginator.page(page_number) + add_documents(page.object_list) + + +def add_documents(notes): + """ + Convert some Note objects and insert them in the index. + """ + documents_to_add = [ + { + "id": note.id, + "user_id": note.user_id, + "course_id": note.course_id, + "text": note.text, + } + for note in notes + ] + if documents_to_add: + Client().meilisearch_index.add_documents(documents_to_add) diff --git a/notesapi/v1/views/mysql.py b/notesapi/v1/views/mysql.py new file mode 100644 index 00000000..e69de29b diff --git a/notesserver/docker-compose.test.yml b/notesserver/docker-compose.test.yml new file mode 100644 index 00000000..0dc73b6c --- /dev/null +++ b/notesserver/docker-compose.test.yml @@ -0,0 +1,18 @@ +services: + mysql: + image: mysql:8.0 + environment: + MYSQL_ROOT_PASSWORD: + MYSQL_ALLOW_EMPTY_PASSWORD: "yes" + MYSQL_DATABASE: "edx_notes_api" + ports: + - 127.0.0.1:3306:3306 + + elasticsearch: + image: elasticsearch:7.13.4 + environment: + discovery.type: single-node + bootstrap.memory_lock: "true" + ES_JAVA_OPTS: "-Xms512m -Xmx512m" + ports: + - 127.0.0.1:9200:9200 diff --git a/notesserver/test_views.py b/notesserver/test_views.py index 23a8dee9..301724e7 100644 --- a/notesserver/test_views.py +++ b/notesserver/test_views.py @@ -22,7 +22,7 @@ def test_heartbeat(self): self.assertEqual(json.loads(bytes.decode(response.content, 'utf-8')), {"OK": True}) @skipIf(settings.ES_DISABLED, "Do not test if Elasticsearch service is disabled.") - @patch('notesserver.views.get_es') + @patch("notesapi.v1.views.elasticsearch.get_es") def test_heartbeat_failure_es(self, mocked_get_es): """ Elasticsearch is not reachable. @@ -65,7 +65,7 @@ def test_selftest_status(self): @skipIf(settings.ES_DISABLED, "Do not test if Elasticsearch service is disabled.") @patch('notesserver.views.datetime', datetime=Mock(wraps=datetime.datetime)) - @patch('notesserver.views.get_es') + @patch("notesapi.v1.views.elasticsearch.get_es") def test_selftest_data(self, mocked_get_es, mocked_datetime): """ Test returned data on success. @@ -101,7 +101,7 @@ def test_selftest_data_es_disabled(self, mocked_datetime): ) @skipIf(settings.ES_DISABLED, "Do not test if Elasticsearch service is disabled.") - @patch('notesserver.views.get_es') + @patch("notesapi.v1.views.elasticsearch.get_es") def test_selftest_failure_es(self, mocked_get_es): """ Elasticsearch is not reachable on selftest. diff --git a/notesserver/views.py b/notesserver/views.py index 994cd8fc..bf2026fb 100644 --- a/notesserver/views.py +++ b/notesserver/views.py @@ -1,11 +1,9 @@ import datetime import traceback -from django.conf import settings from django.db import connection from django.http import JsonResponse from django.http import HttpResponse -from elasticsearch.exceptions import TransportError from rest_framework import status from rest_framework.decorators import api_view, permission_classes from rest_framework.permissions import AllowAny @@ -15,11 +13,9 @@ import newrelic.agent except ImportError: # pragma: no cover newrelic = None # pylint: disable=invalid-name -if not settings.ES_DISABLED: - from elasticsearch_dsl.connections import connections - def get_es(): - return connections.get_connection() +from notesapi.v1.views import get_annotation_search_view_class +from notesapi.v1.views import SearchViewRuntimeError @api_view(['GET']) @@ -56,8 +52,10 @@ def heartbeat(request): except Exception: # pylint: disable=broad-exception-caught return JsonResponse({"OK": False, "check": "db"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - if not settings.ES_DISABLED and not get_es().ping(): - return JsonResponse({"OK": False, "check": "es"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + try: + get_annotation_search_view_class().heartbeat() + except SearchViewRuntimeError as e: + return JsonResponse({"OK": False, "check": e.args[0]}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) return JsonResponse({"OK": True}, status=status.HTTP_200_OK) @@ -70,18 +68,18 @@ def selftest(request): """ start = datetime.datetime.now() - if not settings.ES_DISABLED: - try: - es_status = get_es().info() - except TransportError: - return Response( - {"es_error": traceback.format_exc()}, - status=status.HTTP_500_INTERNAL_SERVER_ERROR - ) + response = {} + try: + response.update(get_annotation_search_view_class().selftest()) + except SearchViewRuntimeError as e: + return Response( + e.args[0], + status=status.HTTP_500_INTERNAL_SERVER_ERROR + ) try: db_status() - database = "OK" + response["db"] = "OK" except Exception: # pylint: disable=broad-exception-caught return Response( {"db_error": traceback.format_exc()}, @@ -90,14 +88,7 @@ def selftest(request): end = datetime.datetime.now() delta = end - start - - response = { - "db": database, - "time_elapsed": int(delta.total_seconds() * 1000) # In milliseconds. - } - - if not settings.ES_DISABLED: - response['es'] = es_status + response["time_elapsed"] = int(delta.total_seconds() * 1000) # In milliseconds. return Response(response) diff --git a/requirements/base.in b/requirements/base.in index dc2ab72b..0a9b7105 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -10,6 +10,7 @@ elasticsearch-dsl django-elasticsearch-dsl django-elasticsearch-dsl-drf django-cors-headers +meilisearch mysqlclient PyJWT gunicorn # MIT diff --git a/requirements/base.txt b/requirements/base.txt index 30bc5a40..f6470daa 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,6 +4,8 @@ # # make upgrade # +annotated-types==0.7.0 + # via pydantic asgiref==3.8.1 # via # django @@ -12,6 +14,8 @@ attrs==24.2.0 # via # jsonschema # referencing +camel-converter[pydantic]==4.0.1 + # via meilisearch certifi==2024.8.30 # via # elasticsearch @@ -100,6 +104,8 @@ jsonschema==4.23.0 # via drf-spectacular jsonschema-specifications==2024.10.1 # via jsonschema +meilisearch==0.31.5 + # via -r requirements/base.in mysqlclient==2.2.5 # via -r requirements/base.in newrelic==10.2.0 @@ -120,6 +126,10 @@ psutil==6.1.0 # via edx-django-utils pycparser==2.22 # via cffi +pydantic==2.9.2 + # via camel-converter +pydantic-core==2.23.4 + # via pydantic pyjwt[crypto]==2.9.0 # via # -r requirements/base.in @@ -147,6 +157,7 @@ requests==2.32.3 # via # -r requirements/base.in # edx-drf-extensions + # meilisearch rpds-py==0.21.0 # via # jsonschema @@ -169,7 +180,8 @@ stevedore==5.3.0 typing-extensions==4.12.2 # via # edx-opaque-keys - # elasticsearch-dsl + # pydantic + # pydantic-core uritemplate==4.1.1 # via drf-spectacular urllib3==1.26.20 diff --git a/requirements/quality.txt b/requirements/quality.txt index 7f6ba23f..be859b79 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -4,6 +4,11 @@ # # make upgrade # +annotated-types==0.7.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # pydantic asgiref==3.8.1 # via # -r requirements/base.txt @@ -25,6 +30,11 @@ cachetools==5.5.0 # via # -r requirements/test.txt # tox +camel-converter[pydantic]==4.0.1 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # meilisearch certifi==2024.8.30 # via # -r requirements/base.txt @@ -242,6 +252,10 @@ mccabe==0.7.0 # via # -r requirements/test.txt # pylint +meilisearch==0.31.5 + # via + # -r requirements/base.txt + # -r requirements/test.txt more-itertools==10.5.0 # via -r requirements/test.txt mysqlclient==2.2.5 @@ -302,6 +316,16 @@ pycparser==2.22 # -r requirements/base.txt # -r requirements/test.txt # cffi +pydantic==2.9.2 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # camel-converter +pydantic-core==2.23.4 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # pydantic pygments==2.18.0 # via # -r requirements/test.txt @@ -383,6 +407,7 @@ requests==2.32.3 # -r requirements/base.txt # -r requirements/test.txt # edx-drf-extensions + # meilisearch rpds-py==0.21.0 # via # -r requirements/base.txt @@ -432,6 +457,8 @@ typing-extensions==4.12.2 # -r requirements/test.txt # edx-opaque-keys # faker + # pydantic + # pydantic-core uritemplate==4.1.1 # via # -r requirements/base.txt diff --git a/requirements/test.txt b/requirements/test.txt index d8412d3f..6251f94c 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -4,6 +4,10 @@ # # make upgrade # +annotated-types==0.7.0 + # via + # -r requirements/base.txt + # pydantic asgiref==3.8.1 # via # -r requirements/base.txt @@ -20,6 +24,10 @@ attrs==24.2.0 # referencing cachetools==5.5.0 # via tox +camel-converter[pydantic]==4.0.1 + # via + # -r requirements/base.txt + # meilisearch certifi==2024.8.30 # via # -r requirements/base.txt @@ -175,6 +183,8 @@ markupsafe==3.0.2 # via jinja2 mccabe==0.7.0 # via pylint +meilisearch==0.31.5 + # via -r requirements/base.txt more-itertools==10.5.0 # via -r requirements/test.in mysqlclient==2.2.5 @@ -221,6 +231,14 @@ pycparser==2.22 # via # -r requirements/base.txt # cffi +pydantic==2.9.2 + # via + # -r requirements/base.txt + # camel-converter +pydantic-core==2.23.4 + # via + # -r requirements/base.txt + # pydantic pygments==2.18.0 # via diff-cover pyjwt[crypto]==2.9.0 @@ -273,6 +291,7 @@ requests==2.32.3 # via # -r requirements/base.txt # edx-drf-extensions + # meilisearch rpds-py==0.21.0 # via # -r requirements/base.txt @@ -311,6 +330,8 @@ typing-extensions==4.12.2 # -r requirements/base.txt # edx-opaque-keys # faker + # pydantic + # pydantic-core uritemplate==4.1.1 # via # -r requirements/base.txt diff --git a/tox.ini b/tox.ini index a9688cfb..2c64d0f9 100644 --- a/tox.ini +++ b/tox.ini @@ -24,8 +24,6 @@ commands = [testenv:quality] envdir = {toxworkdir}/{envname} -setenv = - DJANGO_SETTINGS_MODULE = notesserver.settings.test allowlist_externals = make deps = @@ -35,8 +33,6 @@ commands = [testenv:pii_check] envdir = {toxworkdir}/{envname} -setenv = - DJANGO_SETTINGS_MODULE = notesserver.settings.test allowlist_externals = make deps =