diff --git a/examples/quickstart/model_pipeline.py b/examples/quickstart/model_pipeline.py index 1900470e5f4..1584d9efc2e 100644 --- a/examples/quickstart/model_pipeline.py +++ b/examples/quickstart/model_pipeline.py @@ -1,8 +1,9 @@ -import bentoml_io as bentoml import joblib from sklearn import datasets from sklearn import svm +import bentoml + if __name__ == "__main__": # Load training data iris = datasets.load_iris() @@ -13,8 +14,6 @@ clf.fit(X, y) # Save model to BentoML local model store - with bentoml.models.create( - "iris_clf", - ) as bento_model: + with bentoml.models.create("iris_clf") as bento_model: joblib.dump(clf, bento_model.path_of("model.pkl")) print(f"Model saved: {bento_model}") diff --git a/examples/quickstart/service.py b/examples/quickstart/service.py index 2b1e953e049..25e83b9dc55 100644 --- a/examples/quickstart/service.py +++ b/examples/quickstart/service.py @@ -1,6 +1,7 @@ -import bentoml_io as bentoml import numpy as np +import bentoml + @bentoml.service(resources={"cpu": "200m", "memory": "512Mi"}) class Preprocessing: diff --git a/examples/sentence-embedding/service.py b/examples/sentence-embedding/service.py index 4160e90686b..24ad0be0b45 100644 --- a/examples/sentence-embedding/service.py +++ b/examples/sentence-embedding/service.py @@ -2,18 +2,14 @@ import os -import bentoml_io import numpy as np import torch -from pydantic import Field import bentoml +from bentoml import Field -@bentoml_io.service( - resources={"memory": "500MiB"}, - traffic={"timeout": 1}, -) +@bentoml.service(resources={"memory": "500MiB"}, traffic={"timeout": 1}) class SentenceEmbedding: model_ref = bentoml.models.get("all-MiniLM-L6-v2") @@ -43,7 +39,7 @@ def mean_pooling(model_output, attention_mask): input_mask_expanded.sum(1), min=1e-9 ) - @bentoml_io.api(batchable=True) + @bentoml.api(batchable=True) def encode( self, sentences: list[str] = Field(["hello world"], description="input sentences"), diff --git a/pyproject.toml b/pyproject.toml index 1a876a511a7..c7838019f57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -133,9 +133,9 @@ fallback_version = "0.0.0" [tool.hatch.metadata] allow-direct-references = true [tool.hatch.build.targets.sdist] -only-include = ["src/bentoml", "src/bentoml_cli", "src/bentoml_io"] +only-include = ["src/bentoml", "src/bentoml_cli", "src/_bentoml_sdk", "src/_bentoml_impl"] [tool.hatch.build.targets.wheel] -packages = ["src/bentoml", "src/bentoml_cli", "src/bentoml_io"] +packages = ["src/bentoml", "src/bentoml_cli", "src/_bentoml_sdk", "src/_bentoml_impl"] [[tool.pdm.source]] url = "https://download.pytorch.org/whl/cpu" @@ -378,7 +378,7 @@ convention = "google" [tool.ruff.isort] force-single-line = true -known-first-party = ["bentoml"] +known-first-party = ["bentoml", "bentoml_cli", "_bentoml_sdk", "_bentoml_impl"] [tool.pyright] pythonVersion = "3.12" diff --git a/src/bentoml_io/worker/__init__.py b/src/_bentoml_impl/__init__.py similarity index 100% rename from src/bentoml_io/worker/__init__.py rename to src/_bentoml_impl/__init__.py diff --git a/src/bentoml_io/arrow.py b/src/_bentoml_impl/arrow.py similarity index 98% rename from src/bentoml_io/arrow.py rename to src/_bentoml_impl/arrow.py index ded26ff4182..d64362e71f1 100644 --- a/src/bentoml_io/arrow.py +++ b/src/_bentoml_impl/arrow.py @@ -5,7 +5,7 @@ import pyarrow as pa from pydantic import BaseModel -from .types import arrow_serialization +from _bentoml_sdk.schema import arrow_serialization SchemaDict: t.TypeAlias = t.Dict[str, t.Any] diff --git a/src/bentoml_io/client/__init__.py b/src/_bentoml_impl/client/__init__.py similarity index 100% rename from src/bentoml_io/client/__init__.py rename to src/_bentoml_impl/client/__init__.py diff --git a/src/bentoml_io/client/base.py b/src/_bentoml_impl/client/base.py similarity index 100% rename from src/bentoml_io/client/base.py rename to src/_bentoml_impl/client/base.py diff --git a/src/bentoml_io/client/http.py b/src/_bentoml_impl/client/http.py similarity index 86% rename from src/bentoml_io/client/http.py rename to src/_bentoml_impl/client/http.py index 4cd54a368d6..666be48e78b 100644 --- a/src/bentoml_io/client/http.py +++ b/src/_bentoml_impl/client/http.py @@ -5,6 +5,8 @@ import inspect import io import logging +import pathlib +import tempfile import typing as t from http import HTTPStatus from urllib.parse import urljoin @@ -14,12 +16,12 @@ import attr from pydantic import RootModel +from _bentoml_sdk import IODescriptor +from _bentoml_sdk.typing_utils import is_file_like +from _bentoml_sdk.typing_utils import is_image_type from bentoml._internal.utils.uri import uri_to_path from bentoml.exceptions import BentoMLException -from ..types import File -from ..typing_utils import is_file_like -from ..typing_utils import is_image_type from .base import AbstractClient if t.TYPE_CHECKING: @@ -28,8 +30,7 @@ from aiohttp import ClientSession from aiohttp import MultipartWriter - from ..factory import Service - from ..io_models import IODescriptor + from _bentoml_sdk import Service T = t.TypeVar("T", bound="HTTPClient") @@ -37,6 +38,10 @@ MAX_RETRIES = 3 +def _is_file(obj: t.Any) -> bool: + return isinstance(obj, pathlib.PurePath) or is_file_like(obj) + + @attr.define(slots=True) class ClientEndpoint: name: str @@ -59,6 +64,14 @@ class HTTPClient(AbstractClient): token: str | None = None _client: ClientSession | None = attr.field(init=False, default=None) _loop: asyncio.AbstractEventLoop | None = attr.field(init=False, default=None) + _opened_files: list[io.BufferedReader] = attr.field(init=False, factory=list) + _temp_dir: tempfile.TemporaryDirectory[str] = attr.field(init=False) + + @_temp_dir.default + def default_temp_dir(self) -> tempfile.TemporaryDirectory[str]: + return tempfile.TemporaryDirectory( + prefix="bentoml-client-", ignore_cleanup_errors=True + ) def __init__( self, @@ -201,17 +214,22 @@ async def _call( endpoint = self.endpoints[name] except KeyError: raise BentoMLException(f"Endpoint {name} not found") from None - data = await self._prepare_request(endpoint, args, kwargs) - resp = await self._request(endpoint.route, data, headers=headers) - if endpoint.stream_output: - return self._parse_stream_response(endpoint, resp) - elif ( - endpoint.output.get("type") == "file" - and self.media_type == "application/json" - ): - return await self._parse_file_response(endpoint, resp) - else: - return await self._parse_response(endpoint, resp) + try: + data = await self._prepare_request(endpoint, args, kwargs) + resp = await self._request(endpoint.route, data, headers=headers) + if endpoint.stream_output: + return self._parse_stream_response(endpoint, resp) + elif ( + endpoint.output.get("type") == "file" + and self.media_type == "application/json" + ): + return await self._parse_file_response(endpoint, resp) + else: + return await self._parse_response(endpoint, resp) + finally: + for f in self._opened_files: + f.close() + self._opened_files.clear() async def _request( self, @@ -262,10 +280,10 @@ def is_file_field(k: str) -> bool: if isinstance(model, IODescriptor): return k in model.multipart_fields return ( - is_file_like(value := model[k]) + _is_file(value := model[k]) or isinstance(value, t.Sequence) and len(value) > 0 - and is_file_like(value[0]) + and _is_file(value[0]) ) if isinstance(model, dict): @@ -287,6 +305,10 @@ def is_file_field(k: str) -> bool: getattr(v, "_fp", v.fp), headers={"Content-Type": f"image/{v.format.lower()}"}, ) + elif isinstance(v, pathlib.PurePath): + file = open(v, "rb") + part = mp.append(file) + self._opened_files.append(file) else: part = mp.append(v) part.set_content_disposition( @@ -294,9 +316,6 @@ def is_file_field(k: str) -> bool: ) return mp elif isinstance(model, dict): - for k, v in data.items(): - if is_file_field(v) and not isinstance(v, File): - data[k] = File(v) return self.serde.serialize(data) else: return self.serde.serialize_model(model) @@ -309,10 +328,10 @@ async def _prepare_request( ) -> bytes | MultipartWriter: if endpoint.input_spec is not None: model = endpoint.input_spec.from_inputs(*args, **kwargs) - if not model.multipart_fields: - return self.serde.serialize_model(model) - else: + if model.multipart_fields: return self._build_multipart(model) + else: + return self.serde.serialize_model(model) for name, value in zip(endpoint.input["properties"], args): if name in kwargs: @@ -334,7 +353,7 @@ async def _prepare_request( multipart_fields = { k for k, v in kwargs.items() - if is_file_like(v) or isinstance(v, t.Sequence) and v and is_file_like(v[0]) + if _is_file(v) or isinstance(v, t.Sequence) and v and _is_file(v[0]) } if multipart_fields: return self._build_multipart(kwargs) @@ -371,21 +390,21 @@ async def _parse_stream_response( async def _parse_file_response( self, endpoint: ClientEndpoint, resp: ClientResponse - ) -> File: - from ..types import Audio - from ..types import Image - from ..types import Video - - content_type = resp.headers.get("Content-Type") - cls = File - if content_type: - if content_type.startswith("image/"): - cls = Image - elif content_type.startswith("audio/"): - cls = Audio - elif content_type.startswith("video/"): - cls = Video - return cls(io.BytesIO(await resp.read()), media_type=content_type) + ) -> pathlib.Path: + from multipart.multipart import parse_options_header + + content_disposition = resp.headers.get("content-disposition") + filename: str | None = None + if content_disposition: + _, options = parse_options_header(content_disposition) + if b"filename" in options: + filename = str(options[b"filename"], "utf-8", errors="ignore") + + with tempfile.NamedTemporaryFile( + "wb", suffix=filename, dir=self._temp_dir.name, delete=False + ) as f: + f.write(await resp.read()) + return pathlib.Path(f.name) async def close(self) -> None: if self._client is not None and not self._client.closed: diff --git a/src/bentoml_io/client/proxy.py b/src/_bentoml_impl/client/proxy.py similarity index 97% rename from src/bentoml_io/client/proxy.py rename to src/_bentoml_impl/client/proxy.py index fb29ecf7896..4094a54dfe4 100644 --- a/src/bentoml_io/client/proxy.py +++ b/src/_bentoml_impl/client/proxy.py @@ -6,10 +6,10 @@ import logging import typing as t +from _bentoml_sdk import Service +from _bentoml_sdk.api import APIMethod from bentoml._internal.utils import async_gen_to_sync from bentoml.exceptions import BentoMLException -from bentoml_io.api import APIMethod -from bentoml_io.factory import Service from .http import ClientEndpoint from .http import HTTPClient diff --git a/src/bentoml_io/serde.py b/src/_bentoml_impl/serde.py similarity index 97% rename from src/bentoml_io/serde.py rename to src/_bentoml_impl/serde.py index 7672047655d..ad761ac4e17 100644 --- a/src/bentoml_io/serde.py +++ b/src/_bentoml_impl/serde.py @@ -8,12 +8,12 @@ from starlette.datastructures import UploadFile -from .typing_utils import is_list_type +from _bentoml_sdk.typing_utils import is_list_type if t.TYPE_CHECKING: from starlette.requests import Request - from .io_models import IODescriptor + from _bentoml_sdk import IODescriptor T = t.TypeVar("T", bound="IODescriptor") diff --git a/src/bentoml_io/server/.gitignore b/src/_bentoml_impl/server/.gitignore similarity index 100% rename from src/bentoml_io/server/.gitignore rename to src/_bentoml_impl/server/.gitignore diff --git a/src/bentoml_io/server/__init__.py b/src/_bentoml_impl/server/__init__.py similarity index 51% rename from src/bentoml_io/server/__init__.py rename to src/_bentoml_impl/server/__init__.py index c3957f029f8..b4e68d862c8 100644 --- a/src/bentoml_io/server/__init__.py +++ b/src/_bentoml_impl/server/__init__.py @@ -1,8 +1,10 @@ """ - bentoml_io.server - ~~~~~~~~~~~~~~~~~ + _bentoml_impl.server + ~~~~~~~~~~~~~~~~~~~~ A reference implementation of serving a BentoML service. This will be eventually migrated to Rust. """ -from ..factory import Service as Service +from .serving import serve_http + +__all__ = ["serve_http"] diff --git a/src/bentoml_io/server/allocator.py b/src/_bentoml_impl/server/allocator.py similarity index 99% rename from src/bentoml_io/server/allocator.py rename to src/_bentoml_impl/server/allocator.py index 3b05a6366f0..c888bdbfd9c 100644 --- a/src/bentoml_io/server/allocator.py +++ b/src/_bentoml_impl/server/allocator.py @@ -5,12 +5,11 @@ from simple_di import Provide from simple_di import inject +from _bentoml_sdk import Service from bentoml._internal.configuration.containers import BentoMLContainer from bentoml._internal.resource import system_resources from bentoml.exceptions import BentoMLConfigException -from ..factory import Service - class ResourceUnavailable(BentoMLConfigException): pass diff --git a/src/bentoml_io/server/app.py b/src/_bentoml_impl/server/app.py similarity index 72% rename from src/bentoml_io/server/app.py rename to src/_bentoml_impl/server/app.py index a6b700d74d9..28d41f19af8 100644 --- a/src/bentoml_io/server/app.py +++ b/src/_bentoml_impl/server/app.py @@ -13,14 +13,14 @@ from starlette.middleware import Middleware from starlette.staticfiles import StaticFiles +from _bentoml_sdk import Service from bentoml._internal.container import BentoMLContainer from bentoml._internal.marshal.dispatcher import CorkDispatcher from bentoml._internal.server.base_app import BaseAppFactory from bentoml._internal.server.http_app import log_exception +from bentoml.exceptions import BentoMLException from bentoml.exceptions import ServiceUnavailable -from ..factory import Service - if t.TYPE_CHECKING: from opentelemetry.sdk.trace import Span from starlette.applications import Starlette @@ -28,11 +28,31 @@ from starlette.responses import Response from starlette.routing import BaseRoute + from bentoml._internal import external_typing as ext + from bentoml._internal.context import ServiceContext from bentoml._internal.types import LifecycleHook R = t.TypeVar("R") +class ContextMiddleware: + def __init__(self, app: ext.ASGIApp, context: ServiceContext) -> None: + self.app = app + self.context = context + + async def __call__( + self, scope: ext.ASGIScope, receive: ext.ASGIReceive, send: ext.ASGISend + ) -> None: + from starlette.requests import Request + + if scope["type"] not in ("http", "websocket"): + return await self.app(scope, receive, send) + + req = Request(scope, receive, send) + with self.context.in_request(req): + await self.app(scope, receive, send) + + class ServiceAppFactory(BaseAppFactory): @inject def __init__( @@ -87,8 +107,34 @@ async def openapi_spec_view(self, req: Request) -> Response: log_exception(req, sys.exc_info()) raise + async def handle_uncaught_exception(self, req: Request, exc: Exception) -> Response: + from starlette.responses import JSONResponse + + log_exception(req, sys.exc_info()) + return JSONResponse( + {"error": "An unexpected error has occurred, please try again later."}, + status_code=500, + ) + + async def handle_bentoml_exception(self, req: Request, exc: Exception) -> Response: + from starlette.responses import JSONResponse + + log_exception(req, sys.exc_info()) + assert isinstance(exc, BentoMLException) + status = exc.error_code.value + if 400 <= status < 500 and status not in (401, 403): + return JSONResponse( + content="BentoService error handling API request: %s" % str(exc), + status_code=status, + ) + else: + return JSONResponse("", status_code=status) + def __call__(self, is_main: bool = False) -> Starlette: app = super().__call__() + + app.add_exception_handler(BentoMLException, self.handle_bentoml_exception) + app.add_exception_handler(Exception, self.handle_uncaught_exception) app.add_route("/schema.json", self.schema_view, name="schema") if is_main: if BentoMLContainer.new_index: @@ -119,7 +165,9 @@ def middlewares(self) -> list[Middleware]: from bentoml._internal.container import BentoMLContainer - middlewares = super().middlewares + middlewares = super().middlewares + [ + Middleware(ContextMiddleware, context=self.service.context) + ] for middleware_cls, options in self.service.middlewares: middlewares.append(Middleware(middleware_cls, **options)) @@ -261,16 +309,14 @@ async def inner_infer( async def api_endpoint(self, name: str, request: Request) -> Response: from starlette.concurrency import run_in_threadpool - from starlette.responses import JSONResponse + from _bentoml_sdk.io_models import ARGS + from _bentoml_sdk.io_models import KWARGS from bentoml._internal.container import BentoMLContainer from bentoml._internal.context import trace_context from bentoml._internal.utils import is_async_callable from bentoml._internal.utils.http import set_cookies - from bentoml.exceptions import BentoMLException - from ..io_models import ARGS - from ..io_models import KWARGS from ..serde import ALL_SERDE media_type = request.headers.get("Content-Type", "application/json") @@ -278,68 +324,41 @@ async def api_endpoint(self, name: str, request: Request) -> Response: method = self.service.apis[name] func = getattr(self._service_instance, name) - - with self.service.context.in_request(request) as ctx: - try: - serde = ALL_SERDE[media_type]() - input_data = await method.input_spec.from_http_request(request, serde) - input_args: tuple[t.Any, ...] = () - input_params = { - k: getattr(input_data, k) for k in input_data.model_fields - } - if method.ctx_param is not None: - input_params[method.ctx_param] = ctx - if ARGS in input_params: - input_args = tuple(input_params.pop(ARGS)) - if KWARGS in input_params: - input_params.update(input_params.pop(KWARGS)) - if method.batchable: - output = await self.batch_infer(name, input_args, input_params) - elif is_async_callable(func): - output = await func(*input_args, **input_params) - elif inspect.isasyncgenfunction(func): - output = func(*input_args, **input_params) - else: - output = await run_in_threadpool(func, *input_args, **input_params) - - response = await method.output_spec.to_http_response(output, serde) - response.headers.update( - {"Server": f"BentoML Service/{self.service.name}"} - ) - - if method.ctx_param is not None: - response.status_code = ctx.response.status_code - response.headers.update(ctx.response.metadata) - set_cookies(response, ctx.response.cookies) - if trace_context.request_id is not None: - response.headers["X-BentoML-Request-ID"] = str( - trace_context.request_id - ) - if ( - BentoMLContainer.http.response.trace_id.get() - and trace_context.trace_id is not None - ): - response.headers["X-BentoML-Trace-ID"] = str(trace_context.trace_id) - except BentoMLException as e: - log_exception(request, sys.exc_info()) - - status = e.error_code.value - if 400 <= status < 500 and status not in (401, 403): - response = JSONResponse( - content="BentoService error handling API request: %s" % str(e), - status_code=status, - ) - else: - response = JSONResponse("", status_code=status) - except Exception: # pylint: disable=broad-except - # For all unexpected error, return 500 by default. For example, - # if users' model raises an error of division by zero. - log_exception(request, sys.exc_info()) - - response = JSONResponse( - "An error has occurred in BentoML user code when handling this request, find the error details in server logs", - status_code=500, - ) - finally: - await request.close() - return response + ctx = self.service.context + try: + serde = ALL_SERDE[media_type]() + input_data = await method.input_spec.from_http_request(request, serde) + input_args: tuple[t.Any, ...] = () + input_params = {k: getattr(input_data, k) for k in input_data.model_fields} + if method.ctx_param is not None: + input_params[method.ctx_param] = ctx + if ARGS in input_params: + input_args = tuple(input_params.pop(ARGS)) + if KWARGS in input_params: + input_params.update(input_params.pop(KWARGS)) + if method.batchable: + output = await self.batch_infer(name, input_args, input_params) + elif is_async_callable(func): + output = await func(*input_args, **input_params) + elif inspect.isasyncgenfunction(func): + output = func(*input_args, **input_params) + else: + output = await run_in_threadpool(func, *input_args, **input_params) + + response = await method.output_spec.to_http_response(output, serde) + response.headers.update({"Server": f"BentoML Service/{self.service.name}"}) + + if method.ctx_param is not None: + response.status_code = ctx.response.status_code + response.headers.update(ctx.response.metadata) + set_cookies(response, ctx.response.cookies) + if trace_context.request_id is not None: + response.headers["X-BentoML-Request-ID"] = str(trace_context.request_id) + if ( + BentoMLContainer.http.response.trace_id.get() + and trace_context.trace_id is not None + ): + response.headers["X-BentoML-Trace-ID"] = str(trace_context.trace_id) + finally: + await request.close() + return response diff --git a/src/bentoml_io/server/main-openapi.html b/src/_bentoml_impl/server/main-openapi.html similarity index 100% rename from src/bentoml_io/server/main-openapi.html rename to src/_bentoml_impl/server/main-openapi.html diff --git a/src/bentoml_io/server/main-ui.html b/src/_bentoml_impl/server/main-ui.html similarity index 100% rename from src/bentoml_io/server/main-ui.html rename to src/_bentoml_impl/server/main-ui.html diff --git a/src/bentoml_io/server/serving.py b/src/_bentoml_impl/server/serving.py similarity index 98% rename from src/bentoml_io/server/serving.py rename to src/_bentoml_impl/server/serving.py index 3c7ff511506..2e07fbc813c 100644 --- a/src/bentoml_io/server/serving.py +++ b/src/_bentoml_impl/server/serving.py @@ -14,11 +14,10 @@ from simple_di import Provide from simple_di import inject +from _bentoml_sdk import Service from bentoml._internal.container import BentoMLContainer from bentoml.exceptions import BentoMLConfigException -from ..factory import Service - AnyService = Service[t.Any] if t.TYPE_CHECKING: @@ -92,7 +91,7 @@ def _get_server_socket( raise BentoMLException("Unsupported platform") -SERVICE_WORKER_SCRIPT = "bentoml_io.worker.service" +SERVICE_WORKER_SCRIPT = "_bentoml_impl.worker.service" def create_service_watchers( @@ -128,7 +127,7 @@ def create_service_watchers( "--fd", f"$(circus.sockets.{svc.name})", "--working-dir", - svc._working_dir, + svc.working_dir, "--worker-id", "$(CIRCUS.WID)", ] diff --git a/src/_bentoml_impl/worker/__init__.py b/src/_bentoml_impl/worker/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/bentoml_io/worker/service.py b/src/_bentoml_impl/worker/service.py similarity index 97% rename from src/bentoml_io/worker/service.py rename to src/_bentoml_impl/worker/service.py index db2c95f26eb..f91bcc437ac 100644 --- a/src/bentoml_io/worker/service.py +++ b/src/_bentoml_impl/worker/service.py @@ -6,6 +6,9 @@ import click +if t.TYPE_CHECKING: + from _bentoml_sdk import Service + @click.command() @click.argument("bento_identifier", type=click.STRING, required=False, default=".") @@ -143,14 +146,13 @@ def main( from bentoml._internal.log import configure_server_logging from bentoml._internal.service import load - from ..factory import Service from ..server.app import ServiceAppFactory if runner_map: BentoMLContainer.remote_runner_mapping.set( t.cast(t.Dict[str, str], json.loads(runner_map)) ) - service = t.cast(Service[t.Any], load(bento_identifier, working_dir=working_dir)) + service = t.cast("Service[t.Any]", load(bento_identifier, working_dir=working_dir)) service.inject_config() component_context.component_type = "api_server" diff --git a/src/_bentoml_sdk/__init__.py b/src/_bentoml_sdk/__init__.py new file mode 100644 index 00000000000..15050b193dc --- /dev/null +++ b/src/_bentoml_sdk/__init__.py @@ -0,0 +1,34 @@ +from bentoml._internal.utils.pkg import pkg_version_info + +if (ver := pkg_version_info("pydantic")) < (2,): + raise ImportError( + f"The new SDK runs on pydantic>=2.0.0, but the you have {'.'.join(map(str, ver))}. " + "Please upgrade it." + ) + +from ._pydantic import add_custom_preparers + +add_custom_preparers() +del add_custom_preparers +# ruff: noqa + +from .api import api +from .service import depends +from .service import Service +from .service import service +from .service import runner_service +from .io_models import IODescriptor +from .schema import ContentType, DType, Shape + +__all__ = [ + "api", + "depends", + "Service", + "service", + "runner_service", + # io descriptors + "IODescriptor", + "ContentType", + "DType", + "Shape", +] diff --git a/src/bentoml_io/_pydantic.py b/src/_bentoml_sdk/_pydantic.py similarity index 58% rename from src/bentoml_io/_pydantic.py rename to src/_bentoml_sdk/_pydantic.py index 56229628f8a..62f6d814c22 100644 --- a/src/bentoml_io/_pydantic.py +++ b/src/_bentoml_sdk/_pydantic.py @@ -6,9 +6,13 @@ from typing_extensions import get_args from typing_extensions import get_origin -from .types import DataframeSchema -from .types import PILImageEncoder -from .types import TensorSchema +from .schema import ContentType +from .schema import DataframeSchema +from .schema import DType +from .schema import FileSchema +from .schema import PILImageEncoder +from .schema import Shape +from .schema import TensorSchema if t.TYPE_CHECKING: from pydantic import ConfigDict @@ -44,18 +48,19 @@ def numpy_prepare_pydantic_annotations( args = get_args(source) dtype = np.dtype(args[1]).name if args else None + shape: tuple[int, ...] | None = None _, remaining_annotations = _known_annotated_metadata.collect_known_metadata( annotations ) - schema = next( - (a for a in remaining_annotations if isinstance(a, TensorSchema)), None - ) - if schema is None: - remaining_annotations.insert(0, TensorSchema("numpy-array", dtype=dtype)) - elif schema.dtype is None: - schema.dtype = dtype - return source, remaining_annotations + for i, annotation in enumerate(remaining_annotations[:]): + if isinstance(annotation, Shape): + shape = annotation.dimensions + del remaining_annotations[i] + elif isinstance(annotation, DType): + dtype = annotation.dtype + del remaining_annotations[i] + return source, [TensorSchema("numpy-array", dtype, shape), *remaining_annotations] def torch_prepare_pydantic_annotations( @@ -69,12 +74,20 @@ def torch_prepare_pydantic_annotations( if not issubclass(origin, torch.Tensor): return None + dtype: str | None = None + shape: tuple[int, ...] | None = None + _, remaining_annotations = _known_annotated_metadata.collect_known_metadata( annotations ) - if not any(isinstance(a, TensorSchema) for a in remaining_annotations): - remaining_annotations.insert(0, TensorSchema("torch-tensor")) - return origin, remaining_annotations + for i, annotation in enumerate(remaining_annotations[:]): + if isinstance(annotation, Shape): + shape = annotation.dimensions + del remaining_annotations[i] + elif isinstance(annotation, DType): + dtype = annotation.dtype + del remaining_annotations[i] + return source, [TensorSchema("torch-tensor", dtype, shape), *remaining_annotations] def tf_prepare_pydantic_annotations( @@ -87,12 +100,20 @@ def tf_prepare_pydantic_annotations( if not issubclass(origin, tf.Tensor): return None + dtype: str | None = None + shape: tuple[int, ...] | None = None + _, remaining_annotations = _known_annotated_metadata.collect_known_metadata( annotations ) - if not any(isinstance(a, TensorSchema) for a in remaining_annotations): - remaining_annotations.insert(0, TensorSchema("tf-tensor")) - return origin, remaining_annotations + for i, annotation in enumerate(remaining_annotations[:]): + if isinstance(annotation, Shape): + shape = annotation.dimensions + del remaining_annotations[i] + elif isinstance(annotation, DType): + dtype = annotation.dtype + del remaining_annotations[i] + return source, [TensorSchema("tf-tensor", dtype, shape), *remaining_annotations] def pandas_prepare_pydantic_annotations( @@ -114,7 +135,7 @@ def pandas_prepare_pydantic_annotations( def pil_prepare_pydantic_annotations( - source: t.Any, annotations: t.Iterable[t.Any], config: ConfigDict + source: t.Any, annotations: t.Iterable[t.Any], _config: ConfigDict ) -> tuple[t.Any, list[t.Any]] | None: if not getattr(source, "__module__", "").startswith("PIL."): return None @@ -126,9 +147,33 @@ def pil_prepare_pydantic_annotations( _, remaining_annotations = _known_annotated_metadata.collect_known_metadata( annotations ) - if not any(isinstance(a, PILImageEncoder) for a in remaining_annotations): - remaining_annotations.insert(0, PILImageEncoder()) - return origin, remaining_annotations + return origin, [PILImageEncoder(), *remaining_annotations] + + +def pathlib_prepare_pydantic_annotations( + source: t.Any, annotations: t.Iterable[t.Any], _config: ConfigDict +) -> tuple[t.Any, list[t.Any]] | None: + import pathlib + + if source not in { + pathlib.PurePath, + pathlib.PurePosixPath, + pathlib.PureWindowsPath, + pathlib.Path, + pathlib.PosixPath, + pathlib.WindowsPath, + }: + return None + + _, remaining_annotations = _known_annotated_metadata.collect_known_metadata( + annotations + ) + content_type: str | None = None + for i, annotation in enumerate(remaining_annotations[:]): + if isinstance(annotation, ContentType): + content_type = annotation.content_type + del remaining_annotations[i] + return source, [FileSchema(content_type=content_type), *remaining_annotations] def add_custom_preparers(): @@ -137,6 +182,9 @@ def add_custom_preparers(): except ModuleNotFoundError: return _std_types_schema.PREPARE_METHODS = ( + # pathlib + pathlib_prepare_pydantic_annotations, + # inherit from pydantic *_std_types_schema.PREPARE_METHODS, # tensors numpy_prepare_pydantic_annotations, diff --git a/src/bentoml_io/api.py b/src/_bentoml_sdk/api.py similarity index 98% rename from src/bentoml_io/api.py rename to src/_bentoml_sdk/api.py index 19b1ca4cf3c..6d526ee600a 100644 --- a/src/bentoml_io/api.py +++ b/src/_bentoml_sdk/api.py @@ -12,7 +12,6 @@ from bentoml._internal.utils import dict_filter_none from .io_models import IODescriptor -from .openapi import REF_TEMPLATE R = t.TypeVar("R") T = t.TypeVar("T", bound="APIMethod[..., t.Any]") @@ -121,6 +120,8 @@ def schema(self) -> dict[str, t.Any]: ) def openapi_request(self) -> dict[str, t.Any]: + from .service.openapi import REF_TEMPLATE + return { "content": { self.input_spec.mime_type(): MediaType( @@ -137,6 +138,8 @@ def openapi_request(self) -> dict[str, t.Any]: } def openapi_response(self) -> dict[str, t.Any]: + from .service.openapi import REF_TEMPLATE + return { "description": SUCCESS_DESCRIPTION, "content": { diff --git a/src/bentoml_io/io_models.py b/src/_bentoml_sdk/io_models.py similarity index 85% rename from src/bentoml_io/io_models.py rename to src/_bentoml_sdk/io_models.py index 2a64f08902c..4d53a9d8af8 100644 --- a/src/bentoml_io/io_models.py +++ b/src/_bentoml_sdk/io_models.py @@ -1,10 +1,10 @@ from __future__ import annotations import inspect +import pathlib import sys import typing as t from typing import ClassVar -from urllib.parse import quote from pydantic import BaseModel from pydantic import Field @@ -14,8 +14,6 @@ from bentoml._internal.service.openapi.specification import Schema -from .types import File -from .typing_utils import is_file_like from .typing_utils import is_image_type from .typing_utils import is_iterator_type from .typing_utils import is_list_type @@ -24,7 +22,7 @@ from starlette.requests import Request from starlette.responses import Response - from .serde import Serde + from _bentoml_impl.serde import Serde DEFAULT_TEXT_MEDIA_TYPE = "text/plain" @@ -36,7 +34,7 @@ class IOMixin: @classmethod def openapi_components(cls, name: str) -> dict[str, Schema]: - from .openapi import REF_TEMPLATE + from .service.openapi import REF_TEMPLATE if issubclass(cls, RootModel): return {} @@ -67,6 +65,8 @@ def mime_type(cls) -> str: if json_schema.get("type") == "string": return DEFAULT_TEXT_MEDIA_TYPE elif json_schema.get("type") == "file": + if "content_type" in json_schema: + return json_schema["content_type"] if (format := json_schema.get("format")) == "image": return "image/*" elif format == "audio": @@ -87,7 +87,7 @@ def __pydantic_init_subclass__(cls) -> None: annotation = get_args(annotation)[0] if is_annotated(annotation): annotation = get_args(annotation)[0] - if is_file_like(annotation) or is_image_type(annotation): + if issubclass(annotation, pathlib.PurePath) or is_image_type(annotation): cls.multipart_fields.append(k) @classmethod @@ -117,12 +117,14 @@ async def from_http_request(cls, request: Request, serde: Serde) -> IODescriptor @classmethod async def to_http_response(cls, obj: t.Any, serde: Serde) -> Response: """Convert a output value to HTTP response""" + import mimetypes + from pydantic import RootModel from starlette.responses import FileResponse from starlette.responses import Response from starlette.responses import StreamingResponse - from .serde import JSONSerde + from _bentoml_impl.serde import JSONSerde structured_media_type = cls.media_type or serde.media_type @@ -157,32 +159,17 @@ def content_stream() -> t.Generator[str | bytes, None, None]: return StreamingResponse(content_stream(), media_type=cls.mime_type()) else: - if isinstance(obj, File) and isinstance(serde, JSONSerde): - media_type = obj.media_type or "application/octet-stream" - should_inline = obj.media_type and obj.media_type.startswith( - ("image/", "audio/", "video/") - ) + if isinstance(obj, pathlib.PurePath) and isinstance(serde, JSONSerde): + media_type = mimetypes.guess_type(obj)[0] or "application/octet-stream" + should_inline = media_type.startswith(("image/", "audio/", "video/")) content_disposition_type = "inline" if should_inline else "attachment" - if obj.path: - return FileResponse( - obj.path, - filename=obj.filename, - media_type=media_type, - content_disposition_type=content_disposition_type, - ) - else: - headers = None - if obj.filename: - if (quoted := quote(obj.filename)) != obj.filename: - content_disposition = ( - f"{content_disposition_type}; filename*=utf-8''{quoted}" - ) - else: - content_disposition = ( - f'{content_disposition_type}; filename="{obj.filename}"' - ) - headers = {"Content-Disposition": content_disposition} - return Response(obj.read(), media_type=media_type, headers=headers) + return FileResponse( + obj, + filename=obj.name, + media_type=media_type, + content_disposition_type=content_disposition_type, + ) + if not isinstance(obj, RootModel): ins: IODescriptor = t.cast(IODescriptor, cls(obj)) else: diff --git a/src/bentoml_io/types.py b/src/_bentoml_sdk/schema.py similarity index 61% rename from src/bentoml_io/types.py rename to src/_bentoml_sdk/schema.py index 03b9b86fdf2..3525ebd682a 100644 --- a/src/bentoml_io/types.py +++ b/src/_bentoml_sdk/schema.py @@ -1,17 +1,20 @@ from __future__ import annotations import contextlib +import fnmatch import functools import io import operator +import os +import tempfile import typing as t -from mimetypes import guess_type from pathlib import Path +from pathlib import PurePath import attrs +from annotated_types import BaseMetadata from pydantic_core import core_schema from starlette.datastructures import UploadFile -from typing_extensions import Annotated from bentoml._internal.utils import dict_filter_none @@ -28,6 +31,7 @@ from typing_extensions import Literal TensorType = t.Union[np.ndarray[t.Any, t.Any], tf.Tensor, torch.Tensor] + TensorFormat = Literal["numpy-array", "tf-tensor", "torch-tensor"] from PIL import Image as PILImage else: from bentoml._internal.utils.lazy_loader import LazyLoader @@ -41,7 +45,6 @@ T = t.TypeVar("T") -__all__ = ["File", "Image", "Audio", "Video", "Tensor", "Dataframe"] # This is an internal global state that is True when the model is being serialized for arrow __in_arrow_serialization__ = False @@ -65,11 +68,11 @@ def decode( if isinstance(obj, UploadFile): formats = None if obj.headers.get("Content-Type", "").startswith("image/"): - formats = [obj.headers.get("Content-Type")[6:].upper()] + formats = [obj.headers["Content-Type"][6:].upper()] return PILImage.open(obj.file, formats=formats) if is_file_like(obj): return PILImage.open(obj) - return PILImage.open(io.BytesIO(obj)) + return PILImage.open(io.BytesIO(t.cast(bytes, obj))) def encode(self, obj: PILImage.Image) -> bytes: buffer = io.BytesIO() @@ -97,142 +100,73 @@ def __get_pydantic_json_schema__( @attrs.define -class File(t.BinaryIO): - format: t.ClassVar[str] = "binary" +class FileSchema: + format: str = "binary" + content_type: str | None = None - _fp: t.BinaryIO | None = attrs.field(default=None, repr=False) - filename: str | None = None - media_type: str | None = None - path: Path | None = None + def __attrs_post_init__(self) -> None: + if self.content_type is not None: + self.format = self.content_type.split("/")[0] - def __attrs_post_int__(self) -> None: - if self.filename is None: - if self._fp is not None and hasattr(self._fp, "name"): - self.filename = self._fp.name - - @property - def fp(self) -> t.BinaryIO: - if self._fp is None: - if self.path is None: - raise ValueError("File is not initialized") - self._fp = open(self.path, "rb") - return self._fp - - @classmethod - def encode(cls, obj: t.BinaryIO) -> bytes: - obj.seek(0) - return obj.read() - - @classmethod def __get_pydantic_json_schema__( - cls, schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler + self, schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler ) -> dict[str, t.Any]: value = handler(schema) if handler.mode == "validation": - value.update({"type": "file", "format": cls.format}) + value.update({"type": "file", "format": self.format}) + if self.content_type is not None: + value.update({"content_type": self.content_type}) else: value.update({"type": "string", "format": "binary"}) return value - @classmethod - def decode(cls, obj: bytes | t.BinaryIO | UploadFile) -> File: + def encode(self, obj: Path) -> bytes: + return obj.read_bytes() + + def decode(self, obj: bytes | t.BinaryIO | UploadFile | PurePath) -> Path: + from bentoml._internal.context import request_directory + + media_type: str | None = None + if isinstance(obj, PurePath): + return Path(obj) if isinstance(obj, UploadFile): - return cls( - obj.file, - filename=obj.filename, - media_type=obj.content_type, + body = obj.file.read() + filename = obj.filename + media_type = obj.content_type + elif is_file_like(obj): + body = obj.read() + filename = ( + os.path.basename(fn) + if (fn := getattr(obj, "name", None)) is not None + else None ) - if is_file_like(obj): - return cls(obj) - return cls(io.BytesIO(t.cast(bytes, obj))) + else: + body = t.cast(bytes, obj) + filename = None + if media_type is not None and self.content_type is not None: + if not fnmatch.fnmatch(media_type, self.content_type): + raise ValueError( + f"Invalid content type {media_type}, expected {self.content_type}" + ) + with tempfile.NamedTemporaryFile( + suffix=filename, dir=request_directory.get(), delete=False + ) as f: + f.write(body) + return Path(f.name) - @classmethod def __get_pydantic_core_schema__( - cls, source: type[t.Any], handler: t.Callable[[t.Any], core_schema.CoreSchema] + self, source: type[t.Any], handler: t.Callable[[t.Any], core_schema.CoreSchema] ) -> core_schema.CoreSchema: return core_schema.no_info_after_validator_function( - function=cls.decode, + function=self.decode, schema=core_schema.any_schema(), - serialization=core_schema.plain_serializer_function_ser_schema(cls.encode), - ) - - @classmethod - def from_path(cls, path: str | Path, filename: str | None = None) -> File: - if filename is None: - filename = Path(path).name - return cls( - open(path, "rb"), - filename=filename, - media_type=guess_type(filename)[0], - path=Path(path), + serialization=core_schema.plain_serializer_function_ser_schema(self.encode), ) - def __enter__(self) -> t.BinaryIO: - return self - - def __exit__(self, *args: t.Any) -> None: - self.close() - - def close(self) -> None: - if self._fp is not None: - self._fp.close() - - def read(self, __n: int = -1) -> bytes: - return self.fp.read(__n) - - def readlines(self, __hint: int = -1) -> list[bytes]: - return self.fp.readlines(__hint) - - def __iter__(self) -> t.Iterator[bytes]: - return self.fp.__iter__() - - def seek(self, __offset: int, __whence: int = 0) -> int: - return self.fp.seek(__offset, __whence) - - def fileno(self) -> int: - return self.fp.fileno() - - def tell(self) -> int: - return self.fp.tell() - - def __getstate__(self) -> dict[str, t.Any]: - d = attrs.asdict(self) - if self._fp is not None: - d["_pos"] = self._fp.tell() - self._fp.seek(0) - d["_fp"] = self._fp.read() - self._fp.seek(d["_pos"]) - return d - - def __setstate__(self, d: dict[str, t.Any]) -> None: - if d["_fp"] is not None: - fp = d["_fp"] = io.BytesIO(d["_fp"]) - fp.seek(d.pop("_pos", 0)) - for k, v in d.items(): - setattr(self, k, v) - - -class Image(File): - format: t.ClassVar[str] = "image" - - def to_pil_image(self) -> PILImage.Image: - formats = None - if self.media_type and self.media_type.startswith("image/"): - formats = [self.media_type[6:].upper()] - return PILImage.open(self, formats=formats) - - -class Audio(File): - format: t.ClassVar[str] = "audio" - - -class Video(File): - format: t.ClassVar[str] = "video" - @attrs.frozen(unsafe_hash=True) class TensorSchema: - format: str + format: TensorFormat dtype: t.Optional[str] = None shape: t.Optional[t.Tuple[int, ...]] = None @@ -353,43 +287,16 @@ def _validate(self, obj: t.Any) -> pd.DataFrame: return pd.DataFrame(obj, columns=self.columns) -@t.overload -def Tensor( - format: Literal["numpy-array"], dtype: str, shape: tuple[int, ...] -) -> t.Type[np.ndarray[t.Any, t.Any]]: - ... - - -@t.overload -def Tensor( - format: Literal["tf-tensor"], dtype: str, shape: tuple[int, ...] -) -> t.Type[tf.Tensor]: - ... - - -@t.overload -def Tensor( - format: Literal["torch-tensor"], dtype: str, shape: tuple[int, ...] -) -> t.Type[torch.Tensor]: - ... +@attrs.frozen +class ContentType(BaseMetadata): + content_type: str -def Tensor( - format: Literal["numpy-array", "torch-tensor", "tf-tensor"], - dtype: str | None = None, - shape: tuple[int, ...] | None = None, -) -> type: - if format == "numpy-array": - annotation = np.ndarray[t.Any, t.Any] - elif format == "torch-tensor": - annotation = torch.Tensor - else: - annotation = tf.Tensor - return Annotated[annotation, TensorSchema(format, dtype, shape)] +@attrs.frozen +class Shape(BaseMetadata): + dimensions: tuple[int, ...] -def Dataframe( - orient: t.Literal["records", "columns"] = "records", - columns: list[str] | None = None, -) -> t.Type[pd.DataFrame]: - return Annotated[pd.DataFrame, DataframeSchema(orient, columns)] +@attrs.frozen +class DType(BaseMetadata): + dtype: str diff --git a/src/_bentoml_sdk/service/__init__.py b/src/_bentoml_sdk/service/__init__.py new file mode 100644 index 00000000000..d8da0ec9879 --- /dev/null +++ b/src/_bentoml_sdk/service/__init__.py @@ -0,0 +1,7 @@ +from .config import ServiceConfig +from .dependency import depends +from .factory import Service +from .factory import runner_service +from .factory import service + +__all__ = ["Service", "service", "runner_service", "depends", "ServiceConfig"] diff --git a/src/bentoml_io/config.py b/src/_bentoml_sdk/service/config.py similarity index 100% rename from src/bentoml_io/config.py rename to src/_bentoml_sdk/service/config.py diff --git a/src/bentoml_io/dependency.py b/src/_bentoml_sdk/service/dependency.py similarity index 90% rename from src/bentoml_io/dependency.py rename to src/_bentoml_sdk/service/dependency.py index cda78b11ff3..7bdf01b6eed 100644 --- a/src/bentoml_io/dependency.py +++ b/src/_bentoml_sdk/service/dependency.py @@ -30,7 +30,7 @@ def get( BentoMLContainer.remote_runner_mapping ], ) -> T: - from .client.proxy import RemoteProxy + from _bentoml_impl.client.proxy import RemoteProxy key = self.cache_key() if key not in _dependent_cache: @@ -60,7 +60,5 @@ def __getattr__(self, name: str) -> t.Any: def depends(on: Service[T]) -> Dependency[T]: if not isinstance(on, Service): - raise TypeError( - "depends() expects a class decorated with @bentoml_io.service()" - ) + raise TypeError("depends() expects a class decorated with @bentoml.service()") return Dependency(on) diff --git a/src/bentoml_io/factory.py b/src/_bentoml_sdk/service/factory.py similarity index 98% rename from src/bentoml_io/factory.py rename to src/_bentoml_sdk/service/factory.py index 2b25ef38ec8..c9f55c5f696 100644 --- a/src/bentoml_io/factory.py +++ b/src/_bentoml_sdk/service/factory.py @@ -21,7 +21,7 @@ from bentoml._internal.utils import dict_filter_none from bentoml.exceptions import BentoMLException -from .api import APIMethod +from ..api import APIMethod from .config import ServiceConfig as Config from .config import validate @@ -75,7 +75,7 @@ class Service(t.Generic[T]): ) # service context context: ServiceContext = attrs.field(init=False, factory=ServiceContext) - _working_dir: str = attrs.field(init=False, factory=os.getcwd) + working_dir: str = attrs.field(init=False, factory=os.getcwd) # import info _caller_module: str = attrs.field(init=False) _import_str: str | None = attrs.field(init=False, default=None) @@ -260,14 +260,13 @@ def serve_http( development_mode: bool = False, reload: bool = False, ) -> None: + from _bentoml_impl.server import serve_http from bentoml._internal.log import configure_logging - from .server.serving import serve_http - configure_logging() if working_dir is None: - working_dir = self._working_dir + working_dir = self.working_dir serve_http( self, working_dir=working_dir, diff --git a/src/bentoml_io/openapi.py b/src/_bentoml_sdk/service/openapi.py similarity index 100% rename from src/bentoml_io/openapi.py rename to src/_bentoml_sdk/service/openapi.py diff --git a/src/bentoml_io/typing_utils.py b/src/_bentoml_sdk/typing_utils.py similarity index 100% rename from src/bentoml_io/typing_utils.py rename to src/_bentoml_sdk/typing_utils.py diff --git a/src/bentoml/__init__.py b/src/bentoml/__init__.py index 8b08164e7e4..1cbc610e842 100644 --- a/src/bentoml/__init__.py +++ b/src/bentoml/__init__.py @@ -15,6 +15,7 @@ """ from typing import TYPE_CHECKING +from typing import Any from ._internal.configuration import BENTOML_VERSION as __version__ from ._internal.configuration import load_config @@ -24,6 +25,8 @@ # Inject dependencies and configurations load_config() +from pydantic import Field + # BentoML built-in types from ._internal.bento import Bento from ._internal.cloud import YataiClient @@ -98,6 +101,15 @@ from . import cloud # Cloud API # isort: on + from _bentoml_impl.client import AsyncHTTPClient + from _bentoml_impl.client import SyncHTTPClient + from _bentoml_sdk import ContentType + from _bentoml_sdk import DType + from _bentoml_sdk import Shape + from _bentoml_sdk import api + from _bentoml_sdk import depends + from _bentoml_sdk import runner_service + from _bentoml_sdk import service else: from ._internal.utils import LazyLoader as _LazyLoader @@ -157,6 +169,30 @@ del _LazyLoader + _NEW_SDK_ATTRS = [ + "service", + "runner_service", + "api", + "depends", + "ContentType", + "Dtype", + "Shape", + ] + _NEW_CLIENTS = ["SyncHTTPClient", "AsyncHTTPClient"] + + def __getattr__(name: str) -> Any: + if name not in _NEW_SDK_ATTRS + _NEW_CLIENTS: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + import _bentoml_sdk + + if name in _NEW_CLIENTS: + import _bentoml_impl.client + + return getattr(_bentoml_impl.client, name) + else: + return getattr(_bentoml_sdk, name) + __all__ = [ "__version__", @@ -229,4 +265,16 @@ "set_serialization_strategy", "Strategy", "Resource", + # new SDK + "service", + "runner_service", + "api", + "depends", + "ContentType", + "DType", + "Shape", + "Field", + # new implementation + "SyncHTTPClient", + "AsyncHTTPClient", ] diff --git a/src/bentoml/_internal/bento/bento.py b/src/bentoml/_internal/bento/bento.py index 6bde11e3d7f..56b2605e380 100644 --- a/src/bentoml/_internal/bento/bento.py +++ b/src/bentoml/_internal/bento/bento.py @@ -45,11 +45,12 @@ from .build_config import PythonOptions if TYPE_CHECKING: - from bentoml_io.api import APIMethod - from bentoml_io.config import ServiceConfig - from bentoml_io.server import Service as NewService from fs.base import FS + from _bentoml_sdk import Service as NewService + from _bentoml_sdk.api import APIMethod + from _bentoml_sdk.service import ServiceConfig + from ..models import Model from ..service import Service from ..service.inference_api import InferenceAPI @@ -558,19 +559,14 @@ def from_dependency( ) -def get_service_import_str(svc: Service | str): +def get_service_import_str(svc: Service | NewService[t.Any] | str) -> str: from ..service import Service if isinstance(svc, Service): return svc.get_service_import_origin()[0] - try: - from bentoml_io.server import Service as NewService - - if isinstance(svc, NewService): - return svc.import_string - except ImportError: - pass - return svc + if isinstance(svc, str): + return svc + return svc.import_string @attr.frozen(repr=False) diff --git a/src/bentoml/_internal/cloud/schemas.py b/src/bentoml/_internal/cloud/schemas.py index d60a55a7cf5..c7a4aeb48d7 100644 --- a/src/bentoml/_internal/cloud/schemas.py +++ b/src/bentoml/_internal/cloud/schemas.py @@ -15,7 +15,7 @@ T = t.TypeVar("T") if TYPE_CHECKING: - from bentoml_io.config import ServiceConfig + from _bentoml_impl.config import ServiceConfig else: ServiceConfig = t.Dict[str, t.Any] diff --git a/src/bentoml/_internal/context.py b/src/bentoml/_internal/context.py index 548f4fbf687..6392043dfcb 100644 --- a/src/bentoml/_internal/context.py +++ b/src/bentoml/_internal/context.py @@ -3,6 +3,7 @@ import contextlib import contextvars import os +import tempfile import typing as t from abc import ABC from abc import abstractmethod @@ -17,6 +18,11 @@ import starlette.requests import starlette.responses +# A request-unique directory for storing temporary files +request_directory: contextvars.ContextVar[str] = contextvars.ContextVar( + "request_directory" +) + class Metadata(t.Mapping[str, str], ABC): @abstractmethod @@ -92,11 +98,14 @@ def in_request( ServiceContext.RequestContext.from_http(request) ) response_token = self._response_var.set(ServiceContext.ResponseContext()) - try: - yield self - finally: - self._request_var.reset(request_token) - self._response_var.reset(response_token) + with tempfile.TemporaryDirectory(prefix="bentoml-request-") as temp_dir: + dir_token = request_directory.set(temp_dir) + try: + yield self + finally: + self._request_var.reset(request_token) + self._response_var.reset(response_token) + request_directory.reset(dir_token) @property def request(self) -> RequestContext: @@ -106,6 +115,10 @@ def request(self) -> RequestContext: def response(self) -> ResponseContext: return self._response_var.get() + @property + def directory(self) -> str: + return request_directory.get() + @attr.define class RequestContext: metadata: Metadata diff --git a/src/bentoml/_internal/service/loader.py b/src/bentoml/_internal/service/loader.py index e1b729de055..8b0e4cf4e85 100644 --- a/src/bentoml/_internal/service/loader.py +++ b/src/bentoml/_internal/service/loader.py @@ -26,7 +26,7 @@ from .service import on_load_bento if TYPE_CHECKING: - from bentoml_io.server import Service as NewService + from _bentoml_sdk import Service as NewService from ..bento import BentoStore from .service import Service @@ -66,7 +66,7 @@ def import_service( service_types: list[type] = [Service] try: - from bentoml_io import Service as NewService + from _bentoml_sdk import Service as NewService service_types.append(NewService) except ImportError: diff --git a/src/bentoml/_internal/service/service.py b/src/bentoml/_internal/service/service.py index 1acfaff5b9a..c41dfed797b 100644 --- a/src/bentoml/_internal/service/service.py +++ b/src/bentoml/_internal/service/service.py @@ -28,9 +28,9 @@ if t.TYPE_CHECKING: import grpc - from bentoml_io.server import Service as NewService import bentoml + from _bentoml_sdk import Service as NewService from bentoml.grpc.types import AddServicerFn from bentoml.grpc.types import ServicerClass from bentoml.triton import _TritonRunner diff --git a/src/bentoml/_internal/utils/analytics/usage_stats.py b/src/bentoml/_internal/utils/analytics/usage_stats.py index 06aa25b28c3..08253bedd05 100644 --- a/src/bentoml/_internal/utils/analytics/usage_stats.py +++ b/src/bentoml/_internal/utils/analytics/usage_stats.py @@ -31,9 +31,9 @@ T = t.TypeVar("T") AsyncFunc = t.Callable[P, t.Coroutine[t.Any, t.Any, t.Any]] - from bentoml_io import Service as NewService from prometheus_client.samples import Sample + from _bentoml_sdk import Service as NewService from bentoml import Service from ...server.metrics.prometheus import PrometheusClient diff --git a/src/bentoml/bentos.py b/src/bentoml/bentos.py index 950392f990a..f8f044c522c 100644 --- a/src/bentoml/bentos.py +++ b/src/bentoml/bentos.py @@ -501,7 +501,7 @@ def serve( max_concurrent_streams: int | None = Provide[BentoMLContainer.grpc.max_concurrent_streams], grpc_protocol_version: str | None = None, -) -> Server: +) -> Server[t.Any]: logger.warning( "bentoml.serve and bentoml.bentos.serve are deprecated; use bentoml.Server instead." ) diff --git a/src/bentoml/server.py b/src/bentoml/server.py index 48dd3101401..14716d7d1b7 100644 --- a/src/bentoml/server.py +++ b/src/bentoml/server.py @@ -30,6 +30,8 @@ if TYPE_CHECKING: from types import TracebackType + from _bentoml_sdk import Service as NewService + _FILE: t.TypeAlias = None | int | t.IO[t.Any] @@ -43,7 +45,7 @@ class Server(ABC, t.Generic[ClientType]): - servable: str | Bento | Tag | Service + servable: str | Bento | Tag | Service | NewService[t.Any] host: str port: int @@ -54,7 +56,7 @@ class Server(ABC, t.Generic[ClientType]): def __init__( self, - servable: str | Bento | Tag | Service, + servable: str | Bento | Tag | Service | NewService[t.Any], serve_cmd: str, reload: bool, production: bool, @@ -94,6 +96,9 @@ def __init__( "Cannot use 'bentoml.Service' as a server if it is defined in interactive session or Jupyter Notebooks." ) bento_str, working_dir = servable.get_service_import_origin() + elif not isinstance(servable, str): + bento_str = servable.import_string + working_dir = servable.working_dir else: bento_str = servable @@ -219,13 +224,15 @@ def get_client(self) -> ClientType: if self.process.stdout is not None and not self.process.stdout.closed: s = self.process.stdout.read() logs += textwrap.indent( - s.decode("utf-8") if isinstance(s, bytes) else s, " " * 4 # type: ignore # may be string + s.decode("utf-8") if isinstance(s, bytes) else s, + " " * 4, # type: ignore # may be string ) if self.process.stderr is not None and not self.process.stderr.closed: logs += "\nServer Error:\n" s = self.process.stderr.read() logs += textwrap.indent( - s.decode("utf-8") if isinstance(s, bytes) else s, " " * 4 # type: ignore # may be string + s.decode("utf-8") if isinstance(s, bytes) else s, + " " * 4, # type: ignore # may be string ) raise ServerStateException(logs) return self._get_client() @@ -253,13 +260,15 @@ def stop(self) -> None: if self.process.stdout is not None and not self.process.stdout.closed: s = self.process.stdout.read() logs += textwrap.indent( - s.decode("utf-8") if isinstance(s, bytes) else s, " " * 4 # type: ignore # may be string + s.decode("utf-8") if isinstance(s, bytes) else s, + " " * 4, # type: ignore # may be string ) if self.process.stderr is not None and not self.process.stderr.closed: logs += "\nServer Error:\n" s = self.process.stderr.read() logs += textwrap.indent( - s.decode("utf-8") if isinstance(s, bytes) else s, " " * 4 # type: ignore # may be string + s.decode("utf-8") if isinstance(s, bytes) else s, + " " * 4, # type: ignore # may be string ) logger.warning(logs) return diff --git a/src/bentoml/types.py b/src/bentoml/types.py index c380486d9c1..ca9e427a92c 100644 --- a/src/bentoml/types.py +++ b/src/bentoml/types.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from ._internal.models.model import ModelSignature from ._internal.types import ModelSignatureDict diff --git a/src/bentoml_cli/serve.py b/src/bentoml_cli/serve.py index 0c2d96b08d7..ef0fc24c693 100644 --- a/src/bentoml_cli/serve.py +++ b/src/bentoml_cli/serve.py @@ -279,7 +279,7 @@ def serve( # type: ignore (unused warning) development_mode=False, ) else: - from bentoml_io.server.serving import serve_http + from _bentoml_impl.server import serve_http svc.inject_config() serve_http( diff --git a/src/bentoml_cli/start.py b/src/bentoml_cli/start.py index cb6fa3652ab..59aed2a314b 100644 --- a/src/bentoml_cli/start.py +++ b/src/bentoml_cli/start.py @@ -194,7 +194,7 @@ def start_http_server( # type: ignore (unused warning) ssl_ciphers=ssl_ciphers, ) else: - from bentoml_io.server.serving import serve_http + from _bentoml_impl.server import serve_http svc.inject_config() serve_http( diff --git a/src/bentoml_io/__init__.py b/src/bentoml_io/__init__.py deleted file mode 100644 index ea5d34ac243..00000000000 --- a/src/bentoml_io/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -from bentoml._internal.utils.pkg import pkg_version_info - -if (ver := pkg_version_info("pydantic")) < (2,): - raise ImportError( - f"bentoml_io runs on pydantic>=2.0.0, but the you have {'.'.join(map(str, ver))}. " - "Please upgrade it." - ) - -from ._pydantic import add_custom_preparers - -add_custom_preparers() -del add_custom_preparers -# ruff: noqa -# Re-export models for compatibility -from bentoml import models as models - -from .api import api as api -from .dependency import depends as depends -from .factory import Service as Service -from .factory import service as service -from .factory import runner_service as runner_service -from .io_models import IODescriptor as IODescriptor diff --git a/tests/unit/bentoml_cli/test_env_manager.py b/tests/unit/bentoml_cli/test_env_manager.py index b6b1f9dd248..fdb166e6f66 100644 --- a/tests/unit/bentoml_cli/test_env_manager.py +++ b/tests/unit/bentoml_cli/test_env_manager.py @@ -1,6 +1,7 @@ from __future__ import annotations import pytest + from bentoml_cli.env_manager import remove_env_arg testdata = [ diff --git a/tests/unit/bentoml_io/test_allocator.py b/tests/unit/bentoml_io/test_allocator.py index 8e55a12313e..07dd34b7919 100644 --- a/tests/unit/bentoml_io/test_allocator.py +++ b/tests/unit/bentoml_io/test_allocator.py @@ -1,14 +1,15 @@ from unittest import mock import pytest -from bentoml_io import service -from bentoml_io.server.allocator import BentoMLConfigException -from bentoml_io.server.allocator import ResourceAllocator -from bentoml_io.server.allocator import ResourceUnavailable + +from _bentoml_impl.server.allocator import BentoMLConfigException +from _bentoml_impl.server.allocator import ResourceAllocator +from _bentoml_impl.server.allocator import ResourceUnavailable +from _bentoml_sdk import service @mock.patch( - "bentoml_io.server.allocator.system_resources", + "_bentoml_impl.server.allocator.system_resources", return_value={"cpu": 8, "nvidia.com/gpu": list(range(4))}, ) def test_assign_gpus(_): @@ -28,7 +29,7 @@ def test_assign_gpus(_): @mock.patch( - "bentoml_io.server.allocator.system_resources", + "_bentoml_impl.server.allocator.system_resources", return_value={"cpu": 8, "nvidia.com/gpu": list(range(4))}, ) def test_assign_gpus_float(_): @@ -48,7 +49,7 @@ def test_assign_gpus_float(_): @mock.patch( - "bentoml_io.server.allocator.system_resources", + "_bentoml_impl.server.allocator.system_resources", return_value={"cpu": 8, "nvidia.com/gpu": list(range(4))}, ) def test_get_worker_env_gpu(_): @@ -71,7 +72,7 @@ class Foo: @mock.patch( - "bentoml_io.server.allocator.system_resources", + "_bentoml_impl.server.allocator.system_resources", return_value={"cpu": 8, "nvidia.com/gpu": list(range(4))}, ) def test_get_worker_env_gpu_float(_): @@ -101,7 +102,7 @@ class Bar: @mock.patch( - "bentoml_io.server.allocator.system_resources", + "_bentoml_impl.server.allocator.system_resources", return_value={"cpu": 8, "nvidia.com/gpu": list(range(4))}, ) def test_get_worker_env_cpu_count(_): @@ -118,7 +119,7 @@ class Foo: @mock.patch( - "bentoml_io.server.allocator.system_resources", + "_bentoml_impl.server.allocator.system_resources", return_value={"cpu": 8, "nvidia.com/gpu": list(range(4))}, ) def test_get_worker_env_worker_number(_): @@ -150,7 +151,7 @@ class Bar: @mock.patch( - "bentoml_io.server.allocator.system_resources", + "_bentoml_impl.server.allocator.system_resources", return_value={"cpu": 8, "nvidia.com/gpu": list(range(4))}, ) def test_get_worker_env_worker_gpu(_): @@ -180,7 +181,7 @@ class Bar: @mock.patch( - "bentoml_io.server.allocator.system_resources", + "_bentoml_impl.server.allocator.system_resources", return_value={"cpu": 8, "nvidia.com/gpu": list(range(4))}, ) def test_get_worker_env_gpu_id(_): diff --git a/webui/scripts/copy-lib.cjs b/webui/scripts/copy-lib.cjs index 67570abfc60..8904c8afae6 100644 --- a/webui/scripts/copy-lib.cjs +++ b/webui/scripts/copy-lib.cjs @@ -1,7 +1,7 @@ const path = require('node:path') const fs = require('fs-extra') -const targetPath = path.resolve(__dirname, '../../src/bentoml_io/server/assets') +const targetPath = path.resolve(__dirname, '../../src/_bentoml_impl/server/assets') const outDir = path.resolve(__dirname, '../lib/assets') async function main() { diff --git a/webui/src/components/code/Python.tsx b/webui/src/components/code/Python.tsx index 3d7e07e76d3..6eea5b80491 100644 --- a/webui/src/components/code/Python.tsx +++ b/webui/src/components/code/Python.tsx @@ -1,6 +1,6 @@ import { StyledLink } from 'baseui/link' import isEmpty from 'lodash/isEmpty' -import { isFileField } from '../../hooks/useQuery' +import { isFileField, hasFileInSchema } from '../../hooks/useQuery' import type { DataType, TObject } from '../../types' import { useMountOptions } from '../../hooks/useMountOptions' import type { IClientProps } from './Base' @@ -16,11 +16,11 @@ function formatValue(value: unknown, schema?: DataType, indent = 4) { return '[]' return `[\n${(value as File[]).map( - item => `${' '.repeat(indent + 4)}open("${item.name}", "rb")`, + item => `${' '.repeat(indent + 4)}Path("${item.name}")`, ).join(',\n')},\n${' '.repeat(indent)}]` } else { - return `open("${(value as File).name}", "rb")` + return `Path("${(value as File).name}")` } } else { @@ -43,9 +43,9 @@ function formatQuery(data: object, schema: TObject, indent = 4) { function generateCode(data: object, path = '/', schema?: TObject, needAuth?: boolean) { const auth = needAuth ? `, token="******"` : '' - return `from bentoml_io.client import SyncHTTPClient - -with SyncHTTPClient("http://localhost:3000"${auth}) as client: + return `import bentoml +${hasFileInSchema(schema ? { schema } : {}) ? 'from pathlib import Path\n' : ''} +with bentoml.SyncHTTPClient("http://localhost:3000"${auth}) as client: result = client.${path.slice(1)}(${formatQuery(data, schema!, 4)}) ` }