Skip to content

Commit

Permalink
fixes & tests
Browse files Browse the repository at this point in the history
  • Loading branch information
KonstantAnxiety committed Dec 24, 2024
1 parent c195125 commit 2cccae9
Show file tree
Hide file tree
Showing 10 changed files with 136 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def create_app(self, app_version: str) -> web.Application:

app.router.add_route("post", "/api/v2/files", files_views.FilesView)
app.router.add_route("post", "/api/v2/make_presigned_url", files_views.MakePresignedUrlView)
app.router.add_route("post", "/api/v2/download_presigned_url", files_views.DownloadPresignedUrlView)
app.router.add_route("post", "/api/v2/links", files_views.LinksView)
app.router.add_route("post", "/api/v2/documents", files_views.DocumentsView)
app.router.add_route("post", "/api/v2/update_connection_data", files_views.UpdateConnectionDataView)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@

LOGGER = logging.getLogger(__name__)

S3_KEY_PARTS_SEPARATOR = "--" # used to separate author user_id from the rest of the s3 object key to sign it
S3_KEY_PARTS_SEPARATOR = "/" # used to separate author user_id from the rest of the s3 object key to sign it


def get_file_type_from_name(
Expand Down Expand Up @@ -134,6 +134,10 @@ async def _chunk_iter(chunk_size: int = 10 * 1024 * 1024) -> AsyncGenerator[byte


class MakePresignedUrlView(FileUploaderBaseView):
PRESIGNED_URL_EXPIRATION_SECONDS: ClassVar[int] = 60 * 60 # 1 hour
PRESIGNED_URL_MIN_BYTES: ClassVar[int] = 1
PRESIGNED_URL_MAX_BYTES: ClassVar[int] = 200 * 1024 ** 2 # 200 MB

async def post(self) -> web.StreamResponse:
req_data = await self._load_post_request_schema_data(files_schemas.MakePresignedUrlRequestSchema)
content_md5: str = req_data["content_md5"]
Expand All @@ -147,9 +151,9 @@ async def post(self) -> web.StreamResponse:
url = await s3.client.generate_presigned_post(
Bucket=s3.tmp_bucket_name,
Key=s3_key,
ExpiresIn=60 * 60, # 1 hour
ExpiresIn=self.PRESIGNED_URL_EXPIRATION_SECONDS,
Conditions=[
["content-length-range", 1, 200 * 1024 * 1024], # 1B .. 200MB # TODO use constant from DataSink
["content-length-range", self.PRESIGNED_URL_MIN_BYTES, self.PRESIGNED_URL_MAX_BYTES],
{"Content-MD5": content_md5},
],
)
Expand All @@ -164,22 +168,23 @@ class DownloadPresignedUrlView(FileUploaderBaseView):
async def post(self) -> web.StreamResponse:
req_data = await self._load_post_request_schema_data(files_schemas.DownloadPresignedUrlRequestSchema)
filename: str = req_data["filename"]
key: str = req_data["key"]
s3_key: str = req_data["key"]

file_type = get_file_type_from_name(filename=filename, allow_xlsx=self.request.app["ALLOW_XLSX"])

s3 = self.dl_request.get_s3_service()

file_exists = await s3_file_exists(s3.client, s3.tmp_bucket_name, key)
file_exists = await s3_file_exists(s3.client, s3.tmp_bucket_name, s3_key)
if not file_exists:
raise exc.DocumentNotFound()

user_id_from_key = key.split(S3_KEY_PARTS_SEPARATOR)[0]
if user_id_from_key != self.dl_request.rci.user_id:
s3_key_parts = s3_key.split(S3_KEY_PARTS_SEPARATOR)
if len(s3_key_parts) != 2 or s3_key_parts[0] != self.dl_request.rci.user_id:
exc.PermissionDenied()

rmm = self.dl_request.get_redis_model_manager()
dfile = DataFile(
s3_key=s3_key,
manager=rmm,
filename=filename,
file_type=file_type,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@
pass


_TESTS_USER_ID = "_the_tests_asyncapp_user_id_"
_TESTS_USER_NAME = "_the_tests_asyncapp_user_name_"


def pytest_configure(config: Any) -> None: # noqa
common_pytest_configure(tracing_service_name="tests_bi_file_uploader")

Expand Down Expand Up @@ -215,8 +219,8 @@ class TestingFileUploaderApiAppFactory(FileUploaderApiAppFactory[FileUploaderAPI
def get_auth_middlewares(self) -> list[Middleware]:
return [
auth_trust_middleware(
fake_user_id="_the_tests_file_uploader_api_user_id_",
fake_user_name="_the_tests_file_uploader_api_user_name_",
fake_user_id=_TESTS_USER_ID,
fake_user_name=_TESTS_USER_NAME,
)
]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import asyncio
import base64
import hashlib
import logging
import os

from dl_s3.utils import upload_to_s3_by_presigned
import pytest

from dl_api_commons.client.common import Req
Expand Down Expand Up @@ -64,21 +67,39 @@ def upload_file_req_12mb() -> Req:


@pytest.fixture(scope="function")
async def uploaded_file_id(s3_tmp_bucket, fu_client, upload_file_req) -> str:
resp = await fu_client.make_request(upload_file_req)
assert resp.status == 201
async def uploaded_file_id(s3_tmp_bucket, fu_client, csv_data) -> str:
content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8")
presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5))
assert presigned_url_resp.status == 200, presigned_url_resp.json

upload_resp = await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data)
assert upload_resp.status == 204

download_resp = await fu_client.make_request(ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "csv_data.csv"))
assert download_resp.status == 201, download_resp.json

assert download_resp.status == 201
await asyncio.sleep(3)
return resp.json["file_id"]
return download_resp.json["file_id"]


@pytest.fixture(scope="function")
async def uploaded_excel_id(
s3_tmp_bucket,
fu_client,
upload_excel_req,
excel_data,
reader_app,
) -> str:
resp = await fu_client.make_request(upload_excel_req)
assert resp.status == 201
content_md5 = base64.b64encode(hashlib.md5(excel_data).digest()).decode("utf-8")
presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5))
assert presigned_url_resp.status == 200, presigned_url_resp.json

upload_resp = await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, excel_data)
assert upload_resp.status == 204

download_resp = await fu_client.make_request(ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "data.xlsx"))
assert download_resp.status == 201, download_resp.json

assert download_resp.status == 201
await asyncio.sleep(3)
return resp.json["file_id"]
return download_resp.json["file_id"]
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import asyncio
import http
import io
import json
import uuid
import hashlib
from urllib.parse import urlparse
import base64

import aiohttp
import attr
from dl_file_uploader_api_lib.views.files import S3_KEY_PARTS_SEPARATOR
from dl_file_uploader_api_lib.views.files import S3_KEY_PARTS_SEPARATOR, MakePresignedUrlView
from dl_s3.utils import upload_to_s3_by_presigned
import pytest

from dl_api_commons.base_models import RequestContextInfo
Expand Down Expand Up @@ -44,8 +50,42 @@ async def test_make_presigned_url(fu_client, s3_tmp_bucket, rci):


@pytest.mark.asyncio
async def test_download_presigned_url(fu_client, s3_tmp_bucket, rci):
# TODO test
async def test_download_presigned_url(fu_client, s3_tmp_bucket, rci, csv_data):
content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8")

presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5))
assert presigned_url_resp.status == 200, presigned_url_resp.json

upload_resp = await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data)
upload_resp_data = await upload_resp.read()
assert upload_resp.status == 204, upload_resp_data

download_resp = await fu_client.make_request(ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "csv_data.csv"))
assert download_resp.status == 201, download_resp.json


async def test_upload_presigned_too_large_file(monkeypatch, fu_client, s3_tmp_bucket, rci, csv_data):
monkeypatch.setattr(MakePresignedUrlView, "PRESIGNED_URL_MAX_BYTES", 32)

content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8")

presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5))
assert presigned_url_resp.status == 200, presigned_url_resp.json

with pytest.raises(aiohttp.ClientResponseError):
await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data)


async def test_upload_presigned_bad_key(monkeypatch, fu_client, s3_tmp_bucket, rci, csv_data):
content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8")

presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5))
assert presigned_url_resp.status == 200, presigned_url_resp.json

presigned_url_resp.json["fields"]["key"] = "hacker/file"

with pytest.raises(aiohttp.ClientResponseError):
await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data)


@pytest.mark.asyncio
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,18 @@ def presigned_url(cls, content_md5: str, *, require_ok: bool = True) -> Req:
require_ok=require_ok,
)

@classmethod
def presigned_url_download(cls, key: str, filename: str, *, require_ok: bool = True) -> Req:
return Req(
method="post",
url="/api/v2/download_presigned_url",
data_json={
"key": key,
"filename": filename,
},
require_ok=require_ok,
)

@classmethod
def file_status(cls, file_id: str) -> Req:
return Req(
Expand Down
13 changes: 6 additions & 7 deletions lib/dl_file_uploader_api_lib/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,14 @@ services:
- 51404:6379

s3-storage:
build:
context: ../testenv-common/images
dockerfile: Dockerfile.s3-storage
command: bash /data/entrypoint.sh
image: minio/minio:RELEASE.2024-12-18T13-15-44Z@sha256:1dce27c494a16bae114774f1cec295493f3613142713130c2d22dd5696be6ad3
environment:
S3BACKEND: "mem"
REMOTE_MANAGEMENT_DISABLE: 1
MINIO_ROOT_USER: accessKey1
MINIO_ROOT_PASSWORD: verySecretKey1
MINIO_DOMAIN: local
command: server /export
ports:
- 51420:8000
- "51420:9000"

init-db:
depends_on:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,7 @@ class DataFile(RedisModelUserIdAuth):
def s3_key_old(self) -> str:
# transition from s3_key generated by self.id to stored self.s3_key, to be removed in future releases
# see also: DataFileSchema
if self.s3_key is not None:
return self.s3_key
return self.id
return self.s3_key or self.id

def get_secret_keys(self) -> set[DataKey]:
if self.user_source_properties is None:
Expand Down
2 changes: 1 addition & 1 deletion lib/dl_s3/dl_s3/s3_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ async def initialize(self) -> None:
aws_access_key_id=self._access_key_id,
aws_secret_access_key=self._secret_access_key,
endpoint_url=self._endpoint_url,
config=AioConfig(signature_version="s3v4"),
config=AioConfig(signature_version="s3v4"), # v4 signature is required to generate presigned URLs with restriction policies
)

session = get_session()
Expand Down
27 changes: 25 additions & 2 deletions lib/dl_s3/dl_s3/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,36 @@
from __future__ import annotations

from typing import TYPE_CHECKING
import typing

import aiohttp
import botocore.exceptions

if TYPE_CHECKING:
if typing.TYPE_CHECKING:
from types_aiobotocore_s3 import S3Client as AsyncS3Client


async def upload_to_s3_by_presigned(presigned_url: dict[str, typing.Any], content_md5: str, data: str) -> aiohttp.ClientResponse:
upload_url = presigned_url["url"]
upload_url_fields = presigned_url["fields"]
upload_url_fields["content-md5"] = content_md5

async with aiohttp.ClientSession() as session:
with aiohttp.MultipartWriter("form-data") as mpwriter:
for k, v in upload_url_fields.items():
part = mpwriter.append(v, {'Content-Type': 'text/plain', 'Content-Disposition': f'attachment; name="{k}"'})
part.set_content_disposition("form-data", name=k)

part = mpwriter.append(data, {'Content-Type': 'text/plain', 'Content-Disposition': f'attachment; filename="mydata"'})
part.set_content_disposition("form-data", name="file")

async with session.post(
url=upload_url,
data=mpwriter,
) as resp:
resp.raise_for_status()
return resp


async def s3_file_exists(s3_client: AsyncS3Client, bucket: str, key: str) -> bool:
try:
s3_resp = await s3_client.head_object(
Expand Down

0 comments on commit 2cccae9

Please sign in to comment.