From 2cccae9ccefb9d0a72c4ef49fa6b8b0c37902272 Mon Sep 17 00:00:00 2001 From: KonstantAnxiety Date: Tue, 24 Dec 2024 19:49:42 +0300 Subject: [PATCH] fixes & tests --- .../dl_file_uploader_api_lib/app.py | 1 + .../dl_file_uploader_api_lib/views/files.py | 19 +++++--- .../conftest.py | 8 +++- .../db/conftest.py | 37 +++++++++++---- .../db/test_files_api.py | 46 +++++++++++++++++-- .../req_builder.py | 12 +++++ .../docker-compose.yml | 13 +++--- .../redis_model/models/models.py | 4 +- lib/dl_s3/dl_s3/s3_service.py | 2 +- lib/dl_s3/dl_s3/utils.py | 27 ++++++++++- 10 files changed, 136 insertions(+), 33 deletions(-) diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/app.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/app.py index 06fc4723e..6a98e65ef 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/app.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/app.py @@ -137,6 +137,7 @@ def create_app(self, app_version: str) -> web.Application: app.router.add_route("post", "/api/v2/files", files_views.FilesView) app.router.add_route("post", "/api/v2/make_presigned_url", files_views.MakePresignedUrlView) + app.router.add_route("post", "/api/v2/download_presigned_url", files_views.DownloadPresignedUrlView) app.router.add_route("post", "/api/v2/links", files_views.LinksView) app.router.add_route("post", "/api/v2/documents", files_views.DocumentsView) app.router.add_route("post", "/api/v2/update_connection_data", files_views.UpdateConnectionDataView) diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py index 8932e50d4..283e15771 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py @@ -54,7 +54,7 @@ LOGGER = logging.getLogger(__name__) -S3_KEY_PARTS_SEPARATOR = "--" # used to separate author user_id from the rest of the s3 object key to sign it +S3_KEY_PARTS_SEPARATOR = "/" # used to separate author user_id from the rest of the s3 object key to sign it def get_file_type_from_name( @@ -134,6 +134,10 @@ async def _chunk_iter(chunk_size: int = 10 * 1024 * 1024) -> AsyncGenerator[byte class MakePresignedUrlView(FileUploaderBaseView): + PRESIGNED_URL_EXPIRATION_SECONDS: ClassVar[int] = 60 * 60 # 1 hour + PRESIGNED_URL_MIN_BYTES: ClassVar[int] = 1 + PRESIGNED_URL_MAX_BYTES: ClassVar[int] = 200 * 1024 ** 2 # 200 MB + async def post(self) -> web.StreamResponse: req_data = await self._load_post_request_schema_data(files_schemas.MakePresignedUrlRequestSchema) content_md5: str = req_data["content_md5"] @@ -147,9 +151,9 @@ async def post(self) -> web.StreamResponse: url = await s3.client.generate_presigned_post( Bucket=s3.tmp_bucket_name, Key=s3_key, - ExpiresIn=60 * 60, # 1 hour + ExpiresIn=self.PRESIGNED_URL_EXPIRATION_SECONDS, Conditions=[ - ["content-length-range", 1, 200 * 1024 * 1024], # 1B .. 200MB # TODO use constant from DataSink + ["content-length-range", self.PRESIGNED_URL_MIN_BYTES, self.PRESIGNED_URL_MAX_BYTES], {"Content-MD5": content_md5}, ], ) @@ -164,22 +168,23 @@ class DownloadPresignedUrlView(FileUploaderBaseView): async def post(self) -> web.StreamResponse: req_data = await self._load_post_request_schema_data(files_schemas.DownloadPresignedUrlRequestSchema) filename: str = req_data["filename"] - key: str = req_data["key"] + s3_key: str = req_data["key"] file_type = get_file_type_from_name(filename=filename, allow_xlsx=self.request.app["ALLOW_XLSX"]) s3 = self.dl_request.get_s3_service() - file_exists = await s3_file_exists(s3.client, s3.tmp_bucket_name, key) + file_exists = await s3_file_exists(s3.client, s3.tmp_bucket_name, s3_key) if not file_exists: raise exc.DocumentNotFound() - user_id_from_key = key.split(S3_KEY_PARTS_SEPARATOR)[0] - if user_id_from_key != self.dl_request.rci.user_id: + s3_key_parts = s3_key.split(S3_KEY_PARTS_SEPARATOR) + if len(s3_key_parts) != 2 or s3_key_parts[0] != self.dl_request.rci.user_id: exc.PermissionDenied() rmm = self.dl_request.get_redis_model_manager() dfile = DataFile( + s3_key=s3_key, manager=rmm, filename=filename, file_type=file_type, diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py index 0c844d977..546f74c9d 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py @@ -98,6 +98,10 @@ pass +_TESTS_USER_ID = "_the_tests_asyncapp_user_id_" +_TESTS_USER_NAME = "_the_tests_asyncapp_user_name_" + + def pytest_configure(config: Any) -> None: # noqa common_pytest_configure(tracing_service_name="tests_bi_file_uploader") @@ -215,8 +219,8 @@ class TestingFileUploaderApiAppFactory(FileUploaderApiAppFactory[FileUploaderAPI def get_auth_middlewares(self) -> list[Middleware]: return [ auth_trust_middleware( - fake_user_id="_the_tests_file_uploader_api_user_id_", - fake_user_name="_the_tests_file_uploader_api_user_name_", + fake_user_id=_TESTS_USER_ID, + fake_user_name=_TESTS_USER_NAME, ) ] diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/conftest.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/conftest.py index ae254b5c7..30ce34216 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/conftest.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/conftest.py @@ -1,7 +1,10 @@ import asyncio +import base64 +import hashlib import logging import os +from dl_s3.utils import upload_to_s3_by_presigned import pytest from dl_api_commons.client.common import Req @@ -64,21 +67,39 @@ def upload_file_req_12mb() -> Req: @pytest.fixture(scope="function") -async def uploaded_file_id(s3_tmp_bucket, fu_client, upload_file_req) -> str: - resp = await fu_client.make_request(upload_file_req) - assert resp.status == 201 +async def uploaded_file_id(s3_tmp_bucket, fu_client, csv_data) -> str: + content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8") + presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5)) + assert presigned_url_resp.status == 200, presigned_url_resp.json + + upload_resp = await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data) + assert upload_resp.status == 204 + + download_resp = await fu_client.make_request(ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "csv_data.csv")) + assert download_resp.status == 201, download_resp.json + + assert download_resp.status == 201 await asyncio.sleep(3) - return resp.json["file_id"] + return download_resp.json["file_id"] @pytest.fixture(scope="function") async def uploaded_excel_id( s3_tmp_bucket, fu_client, - upload_excel_req, + excel_data, reader_app, ) -> str: - resp = await fu_client.make_request(upload_excel_req) - assert resp.status == 201 + content_md5 = base64.b64encode(hashlib.md5(excel_data).digest()).decode("utf-8") + presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5)) + assert presigned_url_resp.status == 200, presigned_url_resp.json + + upload_resp = await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, excel_data) + assert upload_resp.status == 204 + + download_resp = await fu_client.make_request(ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "data.xlsx")) + assert download_resp.status == 201, download_resp.json + + assert download_resp.status == 201 await asyncio.sleep(3) - return resp.json["file_id"] + return download_resp.json["file_id"] diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py index 0ee5740eb..d92db464e 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py @@ -1,10 +1,16 @@ import asyncio import http +import io import json import uuid +import hashlib +from urllib.parse import urlparse +import base64 +import aiohttp import attr -from dl_file_uploader_api_lib.views.files import S3_KEY_PARTS_SEPARATOR +from dl_file_uploader_api_lib.views.files import S3_KEY_PARTS_SEPARATOR, MakePresignedUrlView +from dl_s3.utils import upload_to_s3_by_presigned import pytest from dl_api_commons.base_models import RequestContextInfo @@ -44,8 +50,42 @@ async def test_make_presigned_url(fu_client, s3_tmp_bucket, rci): @pytest.mark.asyncio -async def test_download_presigned_url(fu_client, s3_tmp_bucket, rci): - # TODO test +async def test_download_presigned_url(fu_client, s3_tmp_bucket, rci, csv_data): + content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8") + + presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5)) + assert presigned_url_resp.status == 200, presigned_url_resp.json + + upload_resp = await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data) + upload_resp_data = await upload_resp.read() + assert upload_resp.status == 204, upload_resp_data + + download_resp = await fu_client.make_request(ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "csv_data.csv")) + assert download_resp.status == 201, download_resp.json + + +async def test_upload_presigned_too_large_file(monkeypatch, fu_client, s3_tmp_bucket, rci, csv_data): + monkeypatch.setattr(MakePresignedUrlView, "PRESIGNED_URL_MAX_BYTES", 32) + + content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8") + + presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5)) + assert presigned_url_resp.status == 200, presigned_url_resp.json + + with pytest.raises(aiohttp.ClientResponseError): + await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data) + + +async def test_upload_presigned_bad_key(monkeypatch, fu_client, s3_tmp_bucket, rci, csv_data): + content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8") + + presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5)) + assert presigned_url_resp.status == 200, presigned_url_resp.json + + presigned_url_resp.json["fields"]["key"] = "hacker/file" + + with pytest.raises(aiohttp.ClientResponseError): + await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data) @pytest.mark.asyncio diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/req_builder.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/req_builder.py index 6c24ee012..96f8ab7ac 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/req_builder.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/req_builder.py @@ -109,6 +109,18 @@ def presigned_url(cls, content_md5: str, *, require_ok: bool = True) -> Req: require_ok=require_ok, ) + @classmethod + def presigned_url_download(cls, key: str, filename: str, *, require_ok: bool = True) -> Req: + return Req( + method="post", + url="/api/v2/download_presigned_url", + data_json={ + "key": key, + "filename": filename, + }, + require_ok=require_ok, + ) + @classmethod def file_status(cls, file_id: str) -> Req: return Req( diff --git a/lib/dl_file_uploader_api_lib/docker-compose.yml b/lib/dl_file_uploader_api_lib/docker-compose.yml index 6f13199cf..ab9402f0e 100644 --- a/lib/dl_file_uploader_api_lib/docker-compose.yml +++ b/lib/dl_file_uploader_api_lib/docker-compose.yml @@ -11,15 +11,14 @@ services: - 51404:6379 s3-storage: - build: - context: ../testenv-common/images - dockerfile: Dockerfile.s3-storage - command: bash /data/entrypoint.sh + image: minio/minio:RELEASE.2024-12-18T13-15-44Z@sha256:1dce27c494a16bae114774f1cec295493f3613142713130c2d22dd5696be6ad3 environment: - S3BACKEND: "mem" - REMOTE_MANAGEMENT_DISABLE: 1 + MINIO_ROOT_USER: accessKey1 + MINIO_ROOT_PASSWORD: verySecretKey1 + MINIO_DOMAIN: local + command: server /export ports: - - 51420:8000 + - "51420:9000" init-db: depends_on: diff --git a/lib/dl_file_uploader_lib/dl_file_uploader_lib/redis_model/models/models.py b/lib/dl_file_uploader_lib/dl_file_uploader_lib/redis_model/models/models.py index 468e2ddf5..790a7f2d2 100644 --- a/lib/dl_file_uploader_lib/dl_file_uploader_lib/redis_model/models/models.py +++ b/lib/dl_file_uploader_lib/dl_file_uploader_lib/redis_model/models/models.py @@ -202,9 +202,7 @@ class DataFile(RedisModelUserIdAuth): def s3_key_old(self) -> str: # transition from s3_key generated by self.id to stored self.s3_key, to be removed in future releases # see also: DataFileSchema - if self.s3_key is not None: - return self.s3_key - return self.id + return self.s3_key or self.id def get_secret_keys(self) -> set[DataKey]: if self.user_source_properties is None: diff --git a/lib/dl_s3/dl_s3/s3_service.py b/lib/dl_s3/dl_s3/s3_service.py index eec46e4da..20a01a2ba 100644 --- a/lib/dl_s3/dl_s3/s3_service.py +++ b/lib/dl_s3/dl_s3/s3_service.py @@ -60,7 +60,7 @@ async def initialize(self) -> None: aws_access_key_id=self._access_key_id, aws_secret_access_key=self._secret_access_key, endpoint_url=self._endpoint_url, - config=AioConfig(signature_version="s3v4"), + config=AioConfig(signature_version="s3v4"), # v4 signature is required to generate presigned URLs with restriction policies ) session = get_session() diff --git a/lib/dl_s3/dl_s3/utils.py b/lib/dl_s3/dl_s3/utils.py index 9f06913eb..3a073f161 100644 --- a/lib/dl_s3/dl_s3/utils.py +++ b/lib/dl_s3/dl_s3/utils.py @@ -1,13 +1,36 @@ from __future__ import annotations -from typing import TYPE_CHECKING +import typing +import aiohttp import botocore.exceptions -if TYPE_CHECKING: +if typing.TYPE_CHECKING: from types_aiobotocore_s3 import S3Client as AsyncS3Client +async def upload_to_s3_by_presigned(presigned_url: dict[str, typing.Any], content_md5: str, data: str) -> aiohttp.ClientResponse: + upload_url = presigned_url["url"] + upload_url_fields = presigned_url["fields"] + upload_url_fields["content-md5"] = content_md5 + + async with aiohttp.ClientSession() as session: + with aiohttp.MultipartWriter("form-data") as mpwriter: + for k, v in upload_url_fields.items(): + part = mpwriter.append(v, {'Content-Type': 'text/plain', 'Content-Disposition': f'attachment; name="{k}"'}) + part.set_content_disposition("form-data", name=k) + + part = mpwriter.append(data, {'Content-Type': 'text/plain', 'Content-Disposition': f'attachment; filename="mydata"'}) + part.set_content_disposition("form-data", name="file") + + async with session.post( + url=upload_url, + data=mpwriter, + ) as resp: + resp.raise_for_status() + return resp + + async def s3_file_exists(s3_client: AsyncS3Client, bucket: str, key: str) -> bool: try: s3_resp = await s3_client.head_object(