Skip to content

Commit

Permalink
Propagate custom_info Dict through agent Resource
Browse files Browse the repository at this point in the history
 - The agent defines a Resource return type with values:

   * outputs
   * message
   * log_links
   * phase

   These are all a part of the underlying protobuf contract defined in
   flyteidl.

   However, the message field custom_info from the protobuf is not here

   google.protobuf.Struct custom_info

   https://github.com/flyteorg/flyte/blob/519080b6e4e53fc0e216b5715ad9b5b5270f35c0/flyteidl/protos/flyteidl/admin/agent.proto#L140

   This field was added in flyteorg/flyte#4874
   but never made it into the corresponding flytekit PR
   flyteorg#2146

 - It's useful for agents to return additional metadata about the job,
   and it looks like custom_info is the intended location

 - Make a minor refactor to how the agent responds to requests that
   return Resource by implementing to_flyte_idl / from_flyte_idl
   directly

Signed-off-by: ddl-ebrown <[email protected]>
  • Loading branch information
ddl-ebrown committed May 19, 2024
1 parent 14ed018 commit b6ac1af
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 26 deletions.
27 changes: 3 additions & 24 deletions flytekit/extend/backend/agent_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
GetTaskResponse,
ListAgentsRequest,
ListAgentsResponse,
Resource,
)
from flyteidl.service.agent_pb2_grpc import (
AgentMetadataServiceServicer,
Expand All @@ -25,8 +24,7 @@
)
from prometheus_client import Counter, Summary

from flytekit import FlyteContext, logger
from flytekit.core.type_engine import TypeEngine
from flytekit import logger
from flytekit.exceptions.system import FlyteAgentNotFound
from flytekit.extend.backend.base_agent import AgentRegistry, SyncAgentBase, mirror_async_methods
from flytekit.models.literals import LiteralMap
Expand Down Expand Up @@ -136,16 +134,7 @@ async def GetTask(self, request: GetTaskRequest, context: grpc.ServicerContext)
logger.info(f"{agent.name} start checking the status of the job")
res = await mirror_async_methods(agent.get, resource_meta=agent.metadata_type.decode(request.resource_meta))

if res.outputs is None:
outputs = None
elif isinstance(res.outputs, LiteralMap):
outputs = res.outputs.to_flyte_idl()
else:
ctx = FlyteContext.current_context()
outputs = TypeEngine.dict_to_literal_map_pb(ctx, res.outputs)
return GetTaskResponse(
resource=Resource(phase=res.phase, log_links=res.log_links, message=res.message, outputs=outputs)
)
return GetTaskResponse(resource=res.to_flyte_idl())

@record_agent_metrics
async def DeleteTask(self, request: DeleteTaskRequest, context: grpc.ServicerContext) -> DeleteTaskResponse:
Expand Down Expand Up @@ -175,17 +164,7 @@ async def ExecuteTaskSync(
literal_map = LiteralMap.from_flyte_idl(request.inputs) if request.inputs else None
res = await mirror_async_methods(agent.do, task_template=template, inputs=literal_map)

if res.outputs is None:
outputs = None
elif isinstance(res.outputs, LiteralMap):
outputs = res.outputs.to_flyte_idl()
else:
ctx = FlyteContext.current_context()
outputs = TypeEngine.dict_to_literal_map_pb(ctx, res.outputs)

header = ExecuteTaskSyncResponseHeader(
resource=Resource(phase=res.phase, log_links=res.log_links, message=res.message, outputs=outputs)
)
header = ExecuteTaskSyncResponseHeader(resource=res.to_flyte_idl())
yield ExecuteTaskSyncResponse(header=header)
request_success_count.labels(task_type=task_type, operation=do_operation).inc()
except Exception as e:
Expand Down
35 changes: 35 additions & 0 deletions flytekit/extend/backend/base_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@
from typing import Any, Dict, List, Optional, Union

from flyteidl.admin.agent_pb2 import Agent
from flyteidl.admin.agent_pb2 import Resource as _Resource
from flyteidl.admin.agent_pb2 import TaskCategory as _TaskCategory
from flyteidl.core import literals_pb2
from flyteidl.core.execution_pb2 import TaskExecution, TaskLog
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Struct
from rich.logging import RichHandler
from rich.progress import Progress

Expand Down Expand Up @@ -90,6 +93,38 @@ class Resource:
message: Optional[str] = None
log_links: Optional[List[TaskLog]] = None
outputs: Optional[Union[LiteralMap, typing.Dict[str, Any]]] = None
custom_info: Optional[typing.Dict[str, Any]] = None

def to_flyte_idl(self) -> _Resource:
if self.outputs is None:
outputs = None
elif isinstance(self.outputs, LiteralMap):
outputs = self.outputs.to_flyte_idl()
else:
ctx = FlyteContext.current_context()
outputs = TypeEngine.dict_to_literal_map_pb(ctx, self.outputs)

return _Resource(
phase=self.phase,
message=self.message,
log_links=self.log_links,
outputs=outputs,
custom_info=(
json_format.Parse(json.dumps(self.custom_info), Struct.Struct()) if self.custom_info else None
),
)

@classmethod
def from_flyte_idl(cls, pb2_object: _Resource):
return cls(
phase=pb2_object.phase,
message=pb2_object.message if pb2_object.HasField("message") else None,
log_links=(pb2_object.log_links if pb2_object.HasField("log_links") else None),
outputs=(LiteralMap.from_flyte_idl(pb2_object.outputs) if pb2_object.outputs else None),
custom_info=(
json_format.MessageToDict(pb2_object.custom_info) if pb2_object.HasField("custom_info") else None
),
)


class AgentBase(ABC):
Expand Down
14 changes: 12 additions & 2 deletions tests/flytekit/unit/extend/test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ def create(self, task_template: TaskTemplate, inputs: typing.Optional[LiteralMap
return DummyMetadata(job_id=dummy_id)

def get(self, resource_meta: DummyMetadata, **kwargs) -> Resource:
return Resource(phase=TaskExecution.SUCCEEDED, log_links=[TaskLog(name="console", uri="localhost:3000")])
return Resource(
phase=TaskExecution.SUCCEEDED,
log_links=[TaskLog(name="console", uri="localhost:3000")],
custom_info={"custom": "info", "num": 1},
)

def delete(self, resource_meta: DummyMetadata, **kwargs):
...
Expand All @@ -96,7 +100,11 @@ async def create(
return DummyMetadata(job_id=dummy_id, output_path=output_path, task_name=task_name)

async def get(self, resource_meta: DummyMetadata, **kwargs) -> Resource:
return Resource(phase=TaskExecution.SUCCEEDED, log_links=[TaskLog(name="console", uri="localhost:3000")])
return Resource(
phase=TaskExecution.SUCCEEDED,
log_links=[TaskLog(name="console", uri="localhost:3000")],
custom_info={"custom": "info", "num": 1},
)

async def delete(self, resource_meta: DummyMetadata, **kwargs):
...
Expand Down Expand Up @@ -174,6 +182,8 @@ def test_dummy_agent():
assert resource.phase == TaskExecution.SUCCEEDED
assert resource.log_links[0].name == "console"
assert resource.log_links[0].uri == "localhost:3000"
assert resource.custom_info["custom"] == "info"
assert resource.custom_info["num"] == 1
assert agent.delete(metadata) is None

class DummyTask(AsyncAgentExecutorMixin, PythonFunctionTask):
Expand Down

0 comments on commit b6ac1af

Please sign in to comment.