Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add uvicorn timeouts options #4682

Merged
merged 14 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions docs/source/guides/configurations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,22 @@ BentoML parses all the available fields directly to `Uvicorn <https://www.uvicor
class MyService:
# Service implementation

``timeouts``
^^^^^^^^^^^^

``timeouts`` allows you to set the timeout for keep alive and graceful shutdown.

BentoML parses all the available fields directly to `Uvicorn <https://www.uvicorn.org/settings/#timeouts>`_. Here is an example:

.. code-block:: python

@bentoml.service(timeouts={
"keep_alive": 60,
"graceful_shutdown": 120
})
class MyService:
# Service implementation

``http``
^^^^^^^^

Expand Down
10 changes: 10 additions & 0 deletions src/_bentoml_impl/server/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ def serve_http(
ssl_cert_reqs: int | None = Provide[BentoMLContainer.ssl.cert_reqs],
ssl_ca_certs: str | None = Provide[BentoMLContainer.ssl.ca_certs],
ssl_ciphers: str | None = Provide[BentoMLContainer.ssl.ciphers],
timeout_keep_alive: int | None = Provide[BentoMLContainer.timeouts.keep_alive],
timeout_graceful_shutdown: int | None = Provide[
BentoMLContainer.timeouts.graceful_shutdown
],
bentoml_home: str = Provide[BentoMLContainer.bentoml_home],
development_mode: bool = False,
reload: bool = False,
Expand All @@ -167,6 +171,7 @@ def serve_http(
from bentoml._internal.utils.analytics.usage_stats import track_serve
from bentoml._internal.utils.circus import create_standalone_arbiter
from bentoml.serve import construct_ssl_args
from bentoml.serve import construct_timeouts_args
from bentoml.serve import create_watcher
from bentoml.serve import ensure_prometheus_dir
from bentoml.serve import make_reload_plugin
Expand Down Expand Up @@ -257,6 +262,10 @@ def serve_http(
ssl_ca_certs=ssl_ca_certs,
ssl_ciphers=ssl_ciphers,
)
timeouts_args = construct_timeouts_args(
timeout_keep_alive=timeout_keep_alive,
timeout_graceful_shutdown=timeout_graceful_shutdown,
)
timeout_args = ["--timeout", str(timeout)] if timeout else []

server_args = [
Expand All @@ -274,6 +283,7 @@ def serve_http(
"--prometheus-dir",
prometheus_dir,
*ssl_args,
*timeouts_args,
*timeout_args,
]
if worker_envs:
Expand Down
16 changes: 16 additions & 0 deletions src/_bentoml_impl/worker/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,18 @@
default=None,
help="Ciphers to use (see stdlib 'ssl' module)",
)
@click.option(
"--timeout-keep-alive",
type=int,
default=5,
help="Close Keep-Alive connections if no new data is received within this timeout. Default: 5",
)
@click.option(
"--timeout-graceful-shutdown",
type=int,
default=None,
help="Maximum number of seconds to wait for graceful shutdown. After this timeout, the server will start terminating requests.",
)
@click.option(
"--development-mode",
type=click.BOOL,
Expand Down Expand Up @@ -111,6 +123,8 @@ def main(
ssl_cert_reqs: int | None,
ssl_ca_certs: str | None,
ssl_ciphers: str | None,
timeout_keep_alive: int,
timeout_graceful_shutdown: int | None,
development_mode: bool,
timeout: int,
):
Expand Down Expand Up @@ -192,6 +206,8 @@ def main(
ssl_keyfile=ssl_keyfile,
ssl_keyfile_password=ssl_keyfile_password,
ssl_ca_certs=ssl_ca_certs,
timeout_keep_alive=timeout_keep_alive,
timeout_graceful_shutdown=timeout_graceful_shutdown,
server_header=False,
**uvicorn_extra_options,
)
Expand Down
6 changes: 6 additions & 0 deletions src/_bentoml_sdk/service/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ class SSLSchema(TypedDict, total=False):
ciphers: str


class TimeoutsSchema(TypedDict, total=False):
keep_alive: Posint
graceful_shutdown: Posint


class HTTPCorsSchema(TypedDict, total=False):
enabled: bool
access_control_allow_origins: Union[str, List[str]]
Expand Down Expand Up @@ -221,6 +226,7 @@ class ServiceConfig(TypedDict, total=False):
metrics: MetricSchema
logging: LoggingSchema
ssl: SSLSchema
timeouts: TimeoutsSchema
http: HTTPSchema
grpc: GRPCSchema
runner_probe: RunnerProbeSchema
Expand Down
6 changes: 6 additions & 0 deletions src/_bentoml_sdk/service/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,10 @@ def serve_http(
ssl_cert_reqs: int | None = Provide[BentoMLContainer.ssl.cert_reqs],
ssl_ca_certs: str | None = Provide[BentoMLContainer.ssl.ca_certs],
ssl_ciphers: str | None = Provide[BentoMLContainer.ssl.ciphers],
timeout_keep_alive: int | None = Provide[BentoMLContainer.timeouts.keep_alive],
timeout_graceful_shutdown: int | None = Provide[
BentoMLContainer.timeouts.graceful_shutdown
],
bentoml_home: str = Provide[BentoMLContainer.bentoml_home],
development_mode: bool = False,
reload: bool = False,
Expand All @@ -319,6 +323,8 @@ def serve_http(
ssl_cert_reqs=ssl_cert_reqs,
ssl_ca_certs=ssl_ca_certs,
ssl_ciphers=ssl_ciphers,
timeout_keep_alive=timeout_keep_alive,
timeout_graceful_shutdown=timeout_graceful_shutdown,
bentoml_home=bentoml_home,
development_mode=development_mode,
reload=reload,
Expand Down
1 change: 1 addition & 0 deletions src/bentoml/_internal/configuration/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ def cloud_config(bentoml_home: str = Provide[bentoml_home]) -> Path:
grpc = api_server_config.grpc
http = api_server_config.http
ssl = api_server_config.ssl
timeouts = api_server_config.timeouts

development_mode = providers.Static(True)
serialization_strategy: providers.Static[SerializationStrategy] = providers.Static(
Expand Down
4 changes: 4 additions & 0 deletions src/bentoml/_internal/configuration/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@
s.Optional("ca_certs"): s.Or(str, None),
s.Optional("ciphers"): s.Or(str, None),
},
s.Optional("timeouts"): {
s.Optional("keep_alive"): s.Or(int, None),
s.Optional("graceful_shutdown"): s.Or(int, None),
},
"runner_probe": {
"enabled": bool,
"timeout": int,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ api_server:
version: 17 # ssl.PROTOCOL_TLS_SERVER
cert_reqs: 0 # ssl.CERT_NONE
ciphers: TLSv1 # default ciphers
timeouts:
keep_alive: 5
graceful_shutdown: ~
http:
host: 0.0.0.0
port: 3000
Expand Down
4 changes: 4 additions & 0 deletions src/bentoml/_internal/configuration/v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,10 @@
s.Optional("ca_certs"): s.Or(str, None),
s.Optional("ciphers"): s.Or(str, None),
},
s.Optional("timeouts"): {
s.Optional("keep_alive"): s.Or(int, None),
s.Optional("graceful_shutdown"): s.Or(int, None),
},
s.Optional("runner_probe"): {
"enabled": bool,
"timeout": int,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ services:
version: 17 # ssl.PROTOCOL_TLS_SERVER
cert_reqs: 0 # ssl.CERT_NONE
ciphers: TLSv1 # default ciphers
timeouts:
keep_alive: 5
graceful_shutdown: ~
http:
host: 0.0.0.0
port: 3000
Expand Down
6 changes: 6 additions & 0 deletions src/bentoml/bentos.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,10 @@ def serve(
ssl_cert_reqs: int | None = Provide[BentoMLContainer.ssl.cert_reqs],
ssl_ca_certs: str | None = Provide[BentoMLContainer.ssl.ca_certs],
ssl_ciphers: str | None = Provide[BentoMLContainer.ssl.ciphers],
timeout_keep_alive: int | None = Provide[BentoMLContainer.timeouts.keep_alive],
timeout_graceful_shutdown: int | None = Provide[
BentoMLContainer.timeouts.graceful_shutdown
],
enable_reflection: bool = Provide[BentoMLContainer.grpc.reflection.enabled],
enable_channelz: bool = Provide[BentoMLContainer.grpc.channelz.enabled],
max_concurrent_streams: int | None = Provide[
Expand Down Expand Up @@ -534,6 +538,8 @@ def serve(
ssl_cert_reqs=ssl_cert_reqs,
ssl_ca_certs=ssl_ca_certs,
ssl_ciphers=ssl_ciphers,
timeout_keep_alive=timeout_keep_alive,
timeout_graceful_shutdown=timeout_graceful_shutdown,
)
elif server_type == "grpc":
from .server import GrpcServer
Expand Down
30 changes: 30 additions & 0 deletions src/bentoml/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,20 @@ def construct_ssl_args(
return args


def construct_timeouts_args(
timeout_keep_alive: int | None,
timeout_graceful_shutdown: int | None,
) -> list[str]:
args: list[str] = []

if timeout_keep_alive:
args.extend(["--timeout-keep-alive", str(timeout_keep_alive)])
if timeout_graceful_shutdown:
args.extend(["--timeout-graceful-shutdown", str(timeout_graceful_shutdown)])

return args


def find_triton_binary():
binary = shutil.which("tritonserver")
if binary is None:
Expand Down Expand Up @@ -218,6 +232,10 @@ def serve_http_development(
ssl_cert_reqs: int | None = Provide[BentoMLContainer.ssl.cert_reqs],
ssl_ca_certs: str | None = Provide[BentoMLContainer.ssl.ca_certs],
ssl_ciphers: str | None = Provide[BentoMLContainer.ssl.ciphers],
timeout_keep_alive: int | None = Provide[BentoMLContainer.timeouts.keep_alive],
timeout_graceful_shutdown: int | None = Provide[
BentoMLContainer.timeouts.graceful_shutdown
],
reload: bool = False,
) -> None:
logger.warning(
Expand All @@ -238,6 +256,8 @@ def serve_http_development(
ssl_cert_reqs=ssl_cert_reqs,
ssl_ca_certs=ssl_ca_certs,
ssl_ciphers=ssl_ciphers,
timeout_keep_alive=timeout_keep_alive,
timeout_graceful_shutdown=timeout_graceful_shutdown,
reload=reload,
api_workers=1,
development_mode=True,
Expand Down Expand Up @@ -298,6 +318,10 @@ def serve_http_production(
ssl_cert_reqs: int | None = Provide[BentoMLContainer.ssl.cert_reqs],
ssl_ca_certs: str | None = Provide[BentoMLContainer.ssl.ca_certs],
ssl_ciphers: str | None = Provide[BentoMLContainer.ssl.ciphers],
timeout_keep_alive: int | None = Provide[BentoMLContainer.timeouts.keep_alive],
timeout_graceful_shutdown: int | None = Provide[
BentoMLContainer.timeouts.graceful_shutdown
],
bentoml_home: str = Provide[BentoMLContainer.bentoml_home],
development_mode: bool = False,
reload: bool = False,
Expand Down Expand Up @@ -428,6 +452,11 @@ def serve_http_production(
ssl_ciphers=ssl_ciphers,
)

timeouts_args = construct_timeouts_args(
timeout_keep_alive=timeout_keep_alive,
timeout_graceful_shutdown=timeout_graceful_shutdown,
)

api_server_args = [
"-m",
SCRIPT_API_SERVER,
Expand All @@ -445,6 +474,7 @@ def serve_http_production(
"--prometheus-dir",
prometheus_dir,
*ssl_args,
*timeouts_args,
*timeout_args,
]

Expand Down
12 changes: 12 additions & 0 deletions src/bentoml/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,9 +337,14 @@ def __init__(
ssl_cert_reqs: int | None = Provide[BentoMLContainer.ssl.cert_reqs],
ssl_ca_certs: str | None = Provide[BentoMLContainer.ssl.ca_certs],
ssl_ciphers: str | None = Provide[BentoMLContainer.ssl.ciphers],
timeout_keep_alive: int | None = Provide[BentoMLContainer.timeouts.keep_alive],
timeout_graceful_shutdown: int | None = Provide[
BentoMLContainer.timeouts.graceful_shutdown
],
):
# hacky workaround to prevent bentoml.serve being overwritten immediately
from .serve import construct_ssl_args
from .serve import construct_timeouts_args

super().__init__(
bento,
Expand Down Expand Up @@ -369,6 +374,13 @@ def __init__(

self.args.extend(construct_ssl_args(**ssl_args))

timeouts_args = {
"timeout_keep_alive": timeout_keep_alive,
"timeout_graceful_shutdown": timeout_graceful_shutdown,
}

self.args.extend(construct_timeouts_args(**timeouts_args))

def get_client(self) -> HTTPClient:
return super().get_client()

Expand Down
10 changes: 10 additions & 0 deletions src/bentoml/start.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,10 @@ def start_http_server(
ssl_cert_reqs: int | None = Provide[BentoMLContainer.ssl.cert_reqs],
ssl_ca_certs: str | None = Provide[BentoMLContainer.ssl.ca_certs],
ssl_ciphers: str | None = Provide[BentoMLContainer.ssl.ciphers],
timeout_keep_alive: int | None = Provide[BentoMLContainer.timeouts.keep_alive],
timeout_graceful_shutdown: int | None = Provide[
BentoMLContainer.timeouts.graceful_shutdown
],
) -> None:
from .serve import ensure_prometheus_dir

Expand All @@ -173,6 +177,7 @@ def start_http_server(
from .serve import API_SERVER_NAME
from .serve import PROMETHEUS_MESSAGE
from .serve import construct_ssl_args
from .serve import construct_timeouts_args
from .serve import create_watcher

working_dir = os.path.realpath(os.path.expanduser(working_dir))
Expand Down Expand Up @@ -201,6 +206,10 @@ def start_http_server(
ssl_ca_certs=ssl_ca_certs,
ssl_ciphers=ssl_ciphers,
)
timeouts_args = construct_timeouts_args(
timeout_keep_alive=timeout_keep_alive,
timeout_graceful_shutdown=timeout_graceful_shutdown,
)
scheme = "https" if BentoMLContainer.ssl.enabled.get() else "http"
watchers.append(
create_watcher(
Expand All @@ -222,6 +231,7 @@ def start_http_server(
"--prometheus-dir",
prometheus_dir,
*ssl_args,
*timeouts_args,
*timeout_args,
],
working_dir=working_dir,
Expand Down
Loading
Loading