Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PRMP-1342: Create lambda to handle deleted document references #487

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/workflows/base-lambdas-reusable-deploy-all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,20 @@ jobs:
secrets:
AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }}

deploy_delete_document_object_handler:
name: Deploy delete_document_object_handler
uses: ./.github/workflows/base-lambdas-reusable-deploy.yml
with:
environment: ${{ inputs.environment}}
python_version: ${{ inputs.python_version }}
build_branch: ${{ inputs.build_branch}}
sandbox: ${{ inputs.sandbox }}
lambda_handler_name: delete_document_object_handler
lambda_aws_name: DeleteDocumentObjectS3
lambda_layer_names: 'core_lambda_layer'
secrets:
AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }}

deploy_document_manifest_job_lambda:
name: Deploy document_manifest_job_lambda
uses: ./.github/workflows/base-lambdas-reusable-deploy.yml
Expand Down
6 changes: 6 additions & 0 deletions lambdas/enums/document_retention.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from enum import IntEnum


class DocumentRetentionDays(IntEnum):
SOFT_DELETE = 56
DEATH = 3650
12 changes: 12 additions & 0 deletions lambdas/enums/lambda_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,14 @@ def to_str(self) -> str:
"""
Errors for DocumentDeletionServiceException
"""
DocDelInvalidStreamEvent = {
"err_code": "DDS_4001",
"message": "Failed to delete document object",
}
DocDelObjectFailure = {
"err_code": "DDS_4002",
"message": "Failed to delete document object",
}
DocDelClient = {
"err_code": "DDS_5001",
"message": "Failed to delete documents",
Expand Down Expand Up @@ -470,6 +478,10 @@ def to_str(self) -> str:
"err_code": "LGL_400",
"message": "Incomplete record, Failed to create document manifest",
}
DynamoInvalidStreamEvent = {
"err_code": "DBS_4001",
"message": "Failed to parse DynamoDb event stream",
}

MockError = {
"message": "Client error",
Expand Down
12 changes: 0 additions & 12 deletions lambdas/enums/s3_lifecycle_tags.py

This file was deleted.

59 changes: 59 additions & 0 deletions lambdas/handlers/delete_document_object_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from enums.lambda_error import LambdaError
from enums.logging_app_interaction import LoggingAppInteraction
from models.document_reference import DocumentReference
from pydantic.v1 import ValidationError
from services.document_deletion_service import DocumentDeletionService
from utils.audit_logging_setup import LoggingService
from utils.decorators.handle_lambda_exceptions import handle_lambda_exceptions
from utils.decorators.override_error_check import override_error_check
from utils.decorators.set_audit_arg import set_request_context_for_logging
from utils.decorators.validate_dynamo_stream_event import validate_dynamo_stream
from utils.dynamo_utils import parse_dynamo_record
from utils.lambda_exceptions import DocumentDeletionServiceException
from utils.lambda_response import ApiGatewayResponse
from utils.request_context import request_context

logger = LoggingService(__name__)


@set_request_context_for_logging
@override_error_check
@handle_lambda_exceptions
@validate_dynamo_stream
def lambda_handler(event, context):
request_context.app_interaction = LoggingAppInteraction.DELETE_RECORD.value

logger.info(
"Delete Document Object handler has been triggered by DynamoDb REMOVE event"
)
try:
event_record = event["Records"][0]

event_type = event_record.get("eventName")
deleted_dynamo_reference = event_record.get("dynamodb").get("OldImage", {})

if event_type != "REMOVE" or not deleted_dynamo_reference:
logger.error(
"Failed to extract deleted record from DynamoDb stream",
{"Results": "Failed to delete document"},
)
raise DocumentDeletionServiceException(
400, LambdaError.DynamoInvalidStreamEvent
)
parsed_dynamo_record = parse_dynamo_record(deleted_dynamo_reference)
document = DocumentReference.model_validate(parsed_dynamo_record)

deletion_service = DocumentDeletionService()
deletion_service.handle_object_delete(deleted_reference=document)
except (ValueError, ValidationError) as e:
logger.error(
f"Failed to parse Document Reference from deleted record: {str(e)}",
{"Results": "Failed to delete document"},
)
raise DocumentDeletionServiceException(
400, LambdaError.DynamoInvalidStreamEvent
)

return ApiGatewayResponse(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

gateway integration set to false in terraform, do we need it for returning gateway responses?

200, "Successfully deleted Document Reference object", "GET"
).create_api_gateway_response()
2 changes: 1 addition & 1 deletion lambdas/handlers/delete_document_reference_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def lambda_handler(event, context):

deletion_service = DocumentDeletionService()

files_deleted = deletion_service.handle_delete(nhs_number, document_types)
files_deleted = deletion_service.handle_reference_delete(nhs_number, document_types)
if files_deleted:
logger.info(
"Documents were deleted successfully", {"Result": "Successful deletion"}
Expand Down
39 changes: 34 additions & 5 deletions lambdas/services/document_deletion_service.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import os
import uuid
from typing import Literal
from urllib.parse import urlparse

from botocore.exceptions import ClientError
from enums.document_retention import DocumentRetentionDays
from enums.lambda_error import LambdaError
from enums.nrl_sqs_upload import NrlActionTypes
from enums.s3_lifecycle_tags import S3LifecycleTags
from enums.snomed_codes import SnomedCodesCategory, SnomedCodesType
from enums.supported_document_types import SupportedDocumentTypes
from models.document_reference import DocumentReference
Expand All @@ -15,7 +16,7 @@
from services.lloyd_george_stitch_job_service import LloydGeorgeStitchJobService
from utils.audit_logging_setup import LoggingService
from utils.common_query_filters import NotDeleted
from utils.exceptions import DynamoServiceException
from utils.exceptions import DocumentServiceException, DynamoServiceException
from utils.lambda_exceptions import DocumentDeletionServiceException

logger = LoggingService(__name__)
Expand All @@ -27,7 +28,7 @@ def __init__(self):
self.stitch_service = LloydGeorgeStitchJobService()
self.sqs_service = SQSService()

def handle_delete(
def handle_reference_delete(
self, nhs_number: str, doc_types: list[SupportedDocumentTypes]
) -> list[DocumentReference]:
files_deleted = []
Expand All @@ -38,6 +39,34 @@ def handle_delete(
self.send_sqs_message_to_remove_pointer(nhs_number)
return files_deleted

def handle_object_delete(self, deleted_reference: DocumentReference):
try:
s3_uri = deleted_reference.file_location

parsed_uri = urlparse(s3_uri)
bucket_name = parsed_uri.netloc
object_key = parsed_uri.path.lstrip("/")

if not bucket_name or not object_key:
raise DocumentDeletionServiceException(
400, LambdaError.DocDelObjectFailure
)

self.document_service.delete_document_object(
bucket=bucket_name, key=object_key
)

logger.info(
"Successfully deleted Document Reference S3 Object",
{"Result": "Successful deletion"},
)
except DocumentServiceException as e:
logger.error(
str(e),
{"Results": "Failed to delete document"},
)
raise DocumentDeletionServiceException(400, LambdaError.DocDelObjectFailure)

def get_documents_references_in_storage(
self,
nhs_number: str,
Expand Down Expand Up @@ -69,10 +98,10 @@ def delete_specific_doc_type(
try:
results = self.get_documents_references_in_storage(nhs_number, doc_type)
if results:
self.document_service.delete_documents(
self.document_service.delete_document_references(
table_name=doc_type.get_dynamodb_table_name(),
document_references=results,
type_of_delete=str(S3LifecycleTags.SOFT_DELETE.value),
document_ttl_days=DocumentRetentionDays.SOFT_DELETE,
)

logger.info(
Expand Down
47 changes: 28 additions & 19 deletions lambdas/services/document_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@

from boto3.dynamodb.conditions import Attr, ConditionBase
from enums.metadata_field_names import DocumentReferenceMetadataFields
from enums.s3_lifecycle_tags import S3LifecycleDays, S3LifecycleTags
from enums.supported_document_types import SupportedDocumentTypes
from models.document_reference import DocumentReference
from services.base.dynamo_service import DynamoDBService
from services.base.s3_service import S3Service
from utils.audit_logging_setup import LoggingService
from utils.dynamo_utils import filter_uploaded_docs_and_recently_uploading_docs
from utils.exceptions import FileUploadInProgress, NoAvailableDocument
from utils.exceptions import (
DocumentServiceException,
FileUploadInProgress,
NoAvailableDocument,
)

logger = LoggingService(__name__)

Expand Down Expand Up @@ -67,22 +70,15 @@ def fetch_documents_from_table_with_filter(
documents.append(document)
return documents

def delete_documents(
def delete_document_references(
self,
table_name: str,
document_references: list[DocumentReference],
type_of_delete: str,
document_ttl_days: int,
):
deletion_date = datetime.now(timezone.utc)

if type_of_delete == S3LifecycleTags.DEATH_DELETE.value:
ttl_days = S3LifecycleDays.DEATH_DELETE
tag_key = str(S3LifecycleTags.DEATH_DELETE.value)
else:
ttl_days = S3LifecycleDays.SOFT_DELETE
tag_key = str(S3LifecycleTags.SOFT_DELETE.value)

ttl_seconds = ttl_days * 24 * 60 * 60
ttl_seconds = document_ttl_days * 24 * 60 * 60
document_reference_ttl = int(deletion_date.timestamp() + ttl_seconds)

update_fields = {
Expand All @@ -95,17 +91,30 @@ def delete_documents(
logger.info(f"Deleting items in table: {table_name}")

for reference in document_references:
self.s3_service.create_object_tag(
file_key=reference.get_file_key(),
s3_bucket_name=reference.get_file_bucket(),
tag_key=tag_key,
tag_value=str(S3LifecycleTags.ENABLE_TAG.value),
)

self.dynamo_service.update_item(
table_name, reference.id, updated_fields=update_fields
)

def delete_document_object(self, bucket: str, key: str):
file_exists = self.s3_service.file_exist_on_s3(
s3_bucket_name=bucket, file_key=key
)

if not file_exists:
raise DocumentServiceException("Document does not exist in S3")

logger.info(
f"Located file `{key}` in `{bucket}`, attempting S3 object deletion"
)
self.s3_service.delete_object(s3_bucket_name=bucket, file_key=key)

file_exists = self.s3_service.file_exist_on_s3(
s3_bucket_name=bucket, file_key=key
)

if file_exists:
raise DocumentServiceException("Document located in S3 after deletion")

def update_documents(
self,
table_name: str,
Expand Down
6 changes: 6 additions & 0 deletions lambdas/tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from unittest import mock

import pytest
from botocore.exceptions import ClientError
from models.document_reference import DocumentReference
from models.pds_models import Patient, PatientDetails
from pydantic import ValidationError
Expand Down Expand Up @@ -290,6 +291,11 @@ class MockError(Enum):
}


MOCK_CLIENT_ERROR = ClientError(
{"Error": {"Code": 500, "Message": "Test error message"}}, "Query"
)


@pytest.fixture
def mock_temp_folder(mocker):
temp_folder = tempfile.mkdtemp()
Expand Down
Loading
Loading