Skip to content

Commit

Permalink
Check if file exists before processing
Browse files Browse the repository at this point in the history
  • Loading branch information
daniel-va committed Oct 16, 2024
1 parent a0bea39 commit 9b7e728
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 5 deletions.
17 changes: 13 additions & 4 deletions api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import os
import shutil
import uuid
from random import randint
from time import sleep
from typing import Annotated

from fastapi import FastAPI, Depends, status, HTTPException, BackgroundTasks, Response
Expand Down Expand Up @@ -41,7 +39,18 @@ def start(
detail={"message": "input must be a PDF file"}
)

task.start(payload.file, background_tasks, lambda: process(payload, settings))
aws_client = aws.connect(settings)
has_file = aws_client.exists_file(
settings.s3_input_bucket,
f'{settings.s3_input_folder}{payload.file}',
)
if not has_file:
raise HTTPException(
status_code=status.HTTP_422_BAD_REQUEST,
detail={"message": "file does not exist"}
)

task.start(payload.file, background_tasks, lambda: process(payload, aws_client, settings))
return Response(status_code=status.HTTP_204_NO_CONTENT)


Expand Down Expand Up @@ -84,6 +93,7 @@ def collect(

def process(
payload: StartPayload,
aws_client: aws.Client,
settings: Annotated[ApiSettings, Depends(api_settings)],
):
if settings.skip_processing:
Expand All @@ -98,7 +108,6 @@ def process(
input_path = os.path.join(tmp_dir, "input.pdf")
output_path = os.path.join(tmp_dir, "output.pdf")

aws_client = aws.connect(settings)
aws.load_file(
aws_client.bucket(settings.s3_input_bucket),
f'{settings.s3_input_folder}{payload.file}',
Expand Down
11 changes: 11 additions & 0 deletions aws/aws.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dataclasses import dataclass

import boto3
from botocore.exceptions import ClientError
from mypy_boto3_s3 import S3ServiceResource
from mypy_boto3_s3.service_resource import Bucket
from mypy_boto3_textract import TextractClient as Textractor
Expand All @@ -18,6 +19,16 @@ class Client:
def bucket(self, name: str) -> Bucket:
return self.s3.Bucket(name)

def exists_file(self, bucket_name: str, key: str) -> bool:
try:
self.s3.Object(bucket_name, key).load()
return True
except ClientError as e:
if e.response['Error']['Code'] == '404':
return False
else:
raise e


def connect(settings: ApiSettings) -> Client:
has_profile = is_set(settings.aws_profile)
Expand Down
2 changes: 1 addition & 1 deletion utils/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def run(file: str, target: typing.Callable[[], Result]):
value = target()
result = Output(ok=True, value=value)
logging.info(f"Task for file '{file}' has been completed.")
except RuntimeError as e:
except Exception as e:
logging.exception(f"Processing of '{file}' failed")
result = Output(ok=False, value=e)

Expand Down

0 comments on commit 9b7e728

Please sign in to comment.