-
Notifications
You must be signed in to change notification settings - Fork 5.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[serve] Reduce cardinality for the route
metric tag
#48290
Changes from 14 commits
c5bdf84
cf486f2
8886330
44ebd7f
997f0ed
6e87140
4c9408d
7c7d04f
ad9cbcb
2822980
af64132
e401fbb
e7d9c22
a287b78
6b03f92
2cde057
5b67bdb
fcb7e77
a9f36ee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -384,7 +384,10 @@ def _get_response_handler_info( | |
return ResponseHandlerInfo( | ||
response_generator=self.not_found_response(proxy_request), | ||
metadata=HandlerMetadata( | ||
route=proxy_request.route_path, | ||
# Don't include the invalid route prefix because it can blow up our | ||
# metrics' cardinality. | ||
# See: https://github.com/ray-project/ray/issues/47999 | ||
route="", | ||
), | ||
should_record_access_log=True, | ||
should_increment_ongoing_requests=False, | ||
|
@@ -404,11 +407,19 @@ def _get_response_handler_info( | |
if version.parse(starlette.__version__) < version.parse("0.33.0"): | ||
proxy_request.set_path(route_path.replace(route_prefix, "", 1)) | ||
|
||
# NOTE(edoakes): we use the route_prefix instead of the full HTTP path | ||
# for logs & metrics to avoid high cardinality. | ||
# See: https://github.com/ray-project/ray/issues/47999 | ||
logs_and_metrics_route = ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Non-blocker: We can probably refactor this into There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah I was highly confused at the fact that our "route" was just an app name for gRPC. the "method" field also seems to be getting overwritten to something else that actually looks like a route.. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea, I think it depends on the context. HTTP we have methods such as "GET", "POST"...etc. gRPC has concept of service methods so we used in those in metrics/ logs. Also, in gRPC we use the app name to route so that's why the equivalent |
||
route_prefix | ||
if self.protocol == RequestProtocol.HTTP | ||
else handle.deployment_id.app_name | ||
) | ||
internal_request_id = generate_request_id() | ||
handle, request_id = self.setup_request_context_and_handle( | ||
app_name=handle.deployment_id.app_name, | ||
handle=handle, | ||
route_path=route_path, | ||
route=logs_and_metrics_route, | ||
proxy_request=proxy_request, | ||
internal_request_id=internal_request_id, | ||
) | ||
|
@@ -426,7 +437,7 @@ def _get_response_handler_info( | |
metadata=HandlerMetadata( | ||
application_name=handle.deployment_id.app_name, | ||
deployment_name=handle.deployment_id.name, | ||
route=route_path, | ||
route=logs_and_metrics_route, | ||
), | ||
should_record_access_log=True, | ||
should_increment_ongoing_requests=True, | ||
|
@@ -486,8 +497,8 @@ async def proxy_request(self, proxy_request: ProxyRequest) -> ResponseGenerator: | |
self.processing_latency_tracker.observe( | ||
latency_ms, | ||
tags={ | ||
"method": proxy_request.method, | ||
"route": response_handler_info.metadata.route, | ||
"method": proxy_request.method, | ||
"application": response_handler_info.metadata.application_name, | ||
"status_code": str(status.code), | ||
}, | ||
|
@@ -496,18 +507,18 @@ async def proxy_request(self, proxy_request: ProxyRequest) -> ResponseGenerator: | |
self.request_error_counter.inc( | ||
tags={ | ||
"route": response_handler_info.metadata.route, | ||
"error_code": str(status.code), | ||
"method": proxy_request.method, | ||
"application": response_handler_info.metadata.application_name, | ||
"error_code": str(status.code), | ||
} | ||
) | ||
self.deployment_request_error_counter.inc( | ||
tags={ | ||
"deployment": response_handler_info.metadata.deployment_name, | ||
"error_code": str(status.code), | ||
"method": proxy_request.method, | ||
"route": response_handler_info.metadata.route, | ||
"method": proxy_request.method, | ||
"application": response_handler_info.metadata.application_name, | ||
"error_code": str(status.code), | ||
"deployment": response_handler_info.metadata.deployment_name, | ||
} | ||
) | ||
|
||
|
@@ -516,7 +527,7 @@ def setup_request_context_and_handle( | |
self, | ||
app_name: str, | ||
handle: DeploymentHandle, | ||
route_path: str, | ||
route: str, | ||
proxy_request: ProxyRequest, | ||
internal_request_id: str, | ||
) -> Tuple[DeploymentHandle, str]: | ||
|
@@ -672,7 +683,7 @@ def setup_request_context_and_handle( | |
self, | ||
app_name: str, | ||
handle: DeploymentHandle, | ||
route_path: str, | ||
route: str, | ||
proxy_request: ProxyRequest, | ||
internal_request_id: str, | ||
) -> Tuple[DeploymentHandle, str]: | ||
|
@@ -694,7 +705,7 @@ def setup_request_context_and_handle( | |
) | ||
|
||
request_context_info = { | ||
"route": route_path, | ||
"route": route, | ||
"request_id": request_id, | ||
"_internal_request_id": internal_request_id, | ||
"app_name": app_name, | ||
|
@@ -902,7 +913,7 @@ def setup_request_context_and_handle( | |
self, | ||
app_name: str, | ||
handle: DeploymentHandle, | ||
route_path: str, | ||
route: str, | ||
proxy_request: ProxyRequest, | ||
internal_request_id: str, | ||
) -> Tuple[DeploymentHandle, str]: | ||
|
@@ -912,7 +923,7 @@ def setup_request_context_and_handle( | |
handle. | ||
""" | ||
request_context_info = { | ||
"route": route_path, | ||
"route": route, | ||
"app_name": app_name, | ||
"_internal_request_id": internal_request_id, | ||
"is_http_request": True, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Non-blocker: We can probably create a named tuple for state object to ensure those required keys exist and accessible.