From ab67b4e5f95bae98814bb7b1ba0e3d5a3893544d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bu=C4=9Fra=20Gedik?= Date: Tue, 5 Nov 2024 09:45:21 -0800 Subject: [PATCH] Add multi file error aggregation strategy (#5795) Signed-off-by: Yee Hing Tong --- flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts | 18 +- .../gen/pb-es/flyteidl/core/execution_pb.ts | 18 +- flyteidl/gen/pb-go/flyteidl/core/errors.pb.go | 104 ++++--- .../gen/pb-go/flyteidl/core/execution.pb.go | 238 ++++++++------- .../flyteidl/service/admin.swagger.json | 9 + flyteidl/gen/pb-js/flyteidl.d.ts | 24 ++ flyteidl/gen/pb-js/flyteidl.js | 72 +++++ .../gen/pb_python/flyteidl/core/errors_pb2.py | 15 +- .../pb_python/flyteidl/core/errors_pb2.pyi | 9 +- .../pb_python/flyteidl/core/execution_pb2.py | 55 ++-- .../pb_python/flyteidl/core/execution_pb2.pyi | 9 +- flyteidl/gen/pb_rust/flyteidl.core.rs | 12 + flyteidl/protos/flyteidl/core/errors.proto | 7 + flyteidl/protos/flyteidl/core/execution.proto | 5 + .../flytek8s/k8s_resource_adds.go | 3 + .../go/tasks/pluginmachinery/io/iface.go | 11 +- .../ioutils/remote_file_output_reader.go | 281 ++++++++++++++++-- .../ioutils/remote_file_output_reader_test.go | 145 ++++++++- .../go/tasks/pluginmachinery/k8s/plugin.go | 23 ++ .../go/tasks/plugins/array/outputs_test.go | 1 + .../k8s/kfoperators/pytorch/pytorch.go | 25 +- .../k8s/kfoperators/pytorch/pytorch_test.go | 26 +- .../nodes/task/k8s/plugin_manager.go | 7 +- .../nodes/task/k8s/plugin_manager_test.go | 5 + .../pkg/controller/workflow/executor_test.go | 3 +- flytestdlib/storage/storage.go | 5 + flytestdlib/storage/storage_test.go | 8 + flytestdlib/storage/stow_store.go | 10 +- flytestdlib/storage/stow_store_test.go | 6 +- 29 files changed, 913 insertions(+), 241 deletions(-) diff --git a/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts b/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts index 42b70dec5b..4c508574c7 100644 --- a/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts +++ b/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts @@ -4,7 +4,7 @@ // @ts-nocheck import type { BinaryReadOptions, FieldList, JsonReadOptions, JsonValue, PartialMessage, PlainMessage } from "@bufbuild/protobuf"; -import { Message, proto3 } from "@bufbuild/protobuf"; +import { Message, proto3, Timestamp } from "@bufbuild/protobuf"; import { ExecutionError_ErrorKind } from "./execution_pb.js"; /** @@ -42,6 +42,20 @@ export class ContainerError extends Message { */ origin = ExecutionError_ErrorKind.UNKNOWN; + /** + * Timestamp of the error + * + * @generated from field: google.protobuf.Timestamp timestamp = 5; + */ + timestamp?: Timestamp; + + /** + * Worker that generated the error + * + * @generated from field: string worker = 6; + */ + worker = ""; + constructor(data?: PartialMessage) { super(); proto3.util.initPartial(data, this); @@ -54,6 +68,8 @@ export class ContainerError extends Message { { no: 2, name: "message", kind: "scalar", T: 9 /* ScalarType.STRING */ }, { no: 3, name: "kind", kind: "enum", T: proto3.getEnumType(ContainerError_Kind) }, { no: 4, name: "origin", kind: "enum", T: proto3.getEnumType(ExecutionError_ErrorKind) }, + { no: 5, name: "timestamp", kind: "message", T: Timestamp }, + { no: 6, name: "worker", kind: "scalar", T: 9 /* ScalarType.STRING */ }, ]); static fromBinary(bytes: Uint8Array, options?: Partial): ContainerError { diff --git a/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts b/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts index 5283936b1f..d9d0a71718 100644 --- a/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts +++ b/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts @@ -4,7 +4,7 @@ // @ts-nocheck import type { BinaryReadOptions, FieldList, JsonReadOptions, JsonValue, PartialMessage, PlainMessage } from "@bufbuild/protobuf"; -import { Duration, Message, proto3 } from "@bufbuild/protobuf"; +import { Duration, Message, proto3, Timestamp } from "@bufbuild/protobuf"; /** * Indicates various phases of Workflow Execution @@ -341,6 +341,20 @@ export class ExecutionError extends Message { */ kind = ExecutionError_ErrorKind.UNKNOWN; + /** + * Timestamp of the error + * + * @generated from field: google.protobuf.Timestamp timestamp = 5; + */ + timestamp?: Timestamp; + + /** + * Worker that generated the error + * + * @generated from field: string worker = 6; + */ + worker = ""; + constructor(data?: PartialMessage) { super(); proto3.util.initPartial(data, this); @@ -353,6 +367,8 @@ export class ExecutionError extends Message { { no: 2, name: "message", kind: "scalar", T: 9 /* ScalarType.STRING */ }, { no: 3, name: "error_uri", kind: "scalar", T: 9 /* ScalarType.STRING */ }, { no: 4, name: "kind", kind: "enum", T: proto3.getEnumType(ExecutionError_ErrorKind) }, + { no: 5, name: "timestamp", kind: "message", T: Timestamp }, + { no: 6, name: "worker", kind: "scalar", T: 9 /* ScalarType.STRING */ }, ]); static fromBinary(bytes: Uint8Array, options?: Partial): ExecutionError { diff --git a/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go b/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go index 61e833ed1d..cb7640d053 100644 --- a/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go +++ b/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go @@ -9,6 +9,7 @@ package core import ( protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" + timestamppb "google.golang.org/protobuf/types/known/timestamppb" reflect "reflect" sync "sync" ) @@ -82,6 +83,10 @@ type ContainerError struct { Kind ContainerError_Kind `protobuf:"varint,3,opt,name=kind,proto3,enum=flyteidl.core.ContainerError_Kind" json:"kind,omitempty"` // Defines the origin of the error (system, user, unknown). Origin ExecutionError_ErrorKind `protobuf:"varint,4,opt,name=origin,proto3,enum=flyteidl.core.ExecutionError_ErrorKind" json:"origin,omitempty"` + // Timestamp of the error + Timestamp *timestamppb.Timestamp `protobuf:"bytes,5,opt,name=timestamp,proto3" json:"timestamp,omitempty"` + // Worker that generated the error + Worker string `protobuf:"bytes,6,opt,name=worker,proto3" json:"worker,omitempty"` } func (x *ContainerError) Reset() { @@ -144,6 +149,20 @@ func (x *ContainerError) GetOrigin() ExecutionError_ErrorKind { return ExecutionError_UNKNOWN } +func (x *ContainerError) GetTimestamp() *timestamppb.Timestamp { + if x != nil { + return x.Timestamp + } + return nil +} + +func (x *ContainerError) GetWorker() string { + if x != nil { + return x.Worker + } + return "" +} + // Defines the errors.pb file format the container can produce to communicate // failure reasons to the execution engine. type ErrorDocument struct { @@ -201,38 +220,45 @@ var file_flyteidl_core_errors_proto_rawDesc = []byte{ 0x65, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0d, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x1a, 0x1d, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, 0x65, 0x2f, 0x65, 0x78, 0x65, 0x63, 0x75, - 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xe5, 0x01, 0x0a, 0x0e, 0x43, - 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x12, 0x0a, - 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x6f, 0x64, - 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x36, 0x0a, 0x04, 0x6b, - 0x69, 0x6e, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x22, 0x2e, 0x66, 0x6c, 0x79, 0x74, - 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69, - 0x6e, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x4b, 0x69, 0x6e, 0x64, 0x52, 0x04, 0x6b, - 0x69, 0x6e, 0x64, 0x12, 0x3f, 0x0a, 0x06, 0x6f, 0x72, 0x69, 0x67, 0x69, 0x6e, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x0e, 0x32, 0x27, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, - 0x6f, 0x72, 0x65, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, 0x72, - 0x6f, 0x72, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x52, 0x06, 0x6f, 0x72, - 0x69, 0x67, 0x69, 0x6e, 0x22, 0x2c, 0x0a, 0x04, 0x4b, 0x69, 0x6e, 0x64, 0x12, 0x13, 0x0a, 0x0f, - 0x4e, 0x4f, 0x4e, 0x5f, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, 0x52, 0x41, 0x42, 0x4c, 0x45, 0x10, - 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, 0x52, 0x41, 0x42, 0x4c, 0x45, - 0x10, 0x01, 0x22, 0x44, 0x0a, 0x0d, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x44, 0x6f, 0x63, 0x75, 0x6d, - 0x65, 0x6e, 0x74, 0x12, 0x33, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, - 0x72, 0x65, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, - 0x72, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x42, 0xb1, 0x01, 0x0a, 0x11, 0x63, 0x6f, 0x6d, - 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x42, 0x0b, - 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x3a, 0x67, - 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x6f, - 0x72, 0x67, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, - 0x6c, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x62, 0x2d, 0x67, 0x6f, 0x2f, 0x66, 0x6c, 0x79, 0x74, - 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, 0x65, 0xa2, 0x02, 0x03, 0x46, 0x43, 0x58, 0xaa, - 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x43, 0x6f, 0x72, 0x65, 0xca, - 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, 0x65, 0xe2, - 0x02, 0x19, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, 0x65, 0x5c, - 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x0e, 0x46, 0x6c, - 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x3a, 0x3a, 0x43, 0x6f, 0x72, 0x65, 0x62, 0x06, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x33, + 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x1f, 0x67, 0x6f, 0x6f, 0x67, + 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x74, 0x69, 0x6d, 0x65, + 0x73, 0x74, 0x61, 0x6d, 0x70, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xb7, 0x02, 0x0a, 0x0e, + 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x12, + 0x0a, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x6f, + 0x64, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x36, 0x0a, 0x04, + 0x6b, 0x69, 0x6e, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x22, 0x2e, 0x66, 0x6c, 0x79, + 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x61, + 0x69, 0x6e, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x4b, 0x69, 0x6e, 0x64, 0x52, 0x04, + 0x6b, 0x69, 0x6e, 0x64, 0x12, 0x3f, 0x0a, 0x06, 0x6f, 0x72, 0x69, 0x67, 0x69, 0x6e, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x0e, 0x32, 0x27, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, + 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, + 0x72, 0x6f, 0x72, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x52, 0x06, 0x6f, + 0x72, 0x69, 0x67, 0x69, 0x6e, 0x12, 0x38, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, + 0x6d, 0x70, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, + 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, + 0x74, 0x61, 0x6d, 0x70, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x12, + 0x16, 0x0a, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x22, 0x2c, 0x0a, 0x04, 0x4b, 0x69, 0x6e, 0x64, 0x12, + 0x13, 0x0a, 0x0f, 0x4e, 0x4f, 0x4e, 0x5f, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, 0x52, 0x41, 0x42, + 0x4c, 0x45, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, 0x52, 0x41, + 0x42, 0x4c, 0x45, 0x10, 0x01, 0x22, 0x44, 0x0a, 0x0d, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x44, 0x6f, + 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x33, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, + 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x45, + 0x72, 0x72, 0x6f, 0x72, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x42, 0xb1, 0x01, 0x0a, 0x11, + 0x63, 0x6f, 0x6d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, + 0x65, 0x42, 0x0b, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, + 0x5a, 0x3a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x6c, 0x79, + 0x74, 0x65, 0x6f, 0x72, 0x67, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x2f, 0x66, 0x6c, 0x79, 0x74, + 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x62, 0x2d, 0x67, 0x6f, 0x2f, 0x66, + 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, 0x65, 0xa2, 0x02, 0x03, 0x46, + 0x43, 0x58, 0xaa, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x43, 0x6f, + 0x72, 0x65, 0xca, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, + 0x72, 0x65, 0xe2, 0x02, 0x19, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, + 0x72, 0x65, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, + 0x0e, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x3a, 0x3a, 0x43, 0x6f, 0x72, 0x65, 0x62, + 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -254,16 +280,18 @@ var file_flyteidl_core_errors_proto_goTypes = []interface{}{ (*ContainerError)(nil), // 1: flyteidl.core.ContainerError (*ErrorDocument)(nil), // 2: flyteidl.core.ErrorDocument (ExecutionError_ErrorKind)(0), // 3: flyteidl.core.ExecutionError.ErrorKind + (*timestamppb.Timestamp)(nil), // 4: google.protobuf.Timestamp } var file_flyteidl_core_errors_proto_depIdxs = []int32{ 0, // 0: flyteidl.core.ContainerError.kind:type_name -> flyteidl.core.ContainerError.Kind 3, // 1: flyteidl.core.ContainerError.origin:type_name -> flyteidl.core.ExecutionError.ErrorKind - 1, // 2: flyteidl.core.ErrorDocument.error:type_name -> flyteidl.core.ContainerError - 3, // [3:3] is the sub-list for method output_type - 3, // [3:3] is the sub-list for method input_type - 3, // [3:3] is the sub-list for extension type_name - 3, // [3:3] is the sub-list for extension extendee - 0, // [0:3] is the sub-list for field type_name + 4, // 2: flyteidl.core.ContainerError.timestamp:type_name -> google.protobuf.Timestamp + 1, // 3: flyteidl.core.ErrorDocument.error:type_name -> flyteidl.core.ContainerError + 4, // [4:4] is the sub-list for method output_type + 4, // [4:4] is the sub-list for method input_type + 4, // [4:4] is the sub-list for extension type_name + 4, // [4:4] is the sub-list for extension extendee + 0, // [0:4] is the sub-list for field type_name } func init() { file_flyteidl_core_errors_proto_init() } diff --git a/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go b/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go index 7befaca1ac..a17e94eba1 100644 --- a/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go +++ b/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go @@ -10,6 +10,7 @@ import ( protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" durationpb "google.golang.org/protobuf/types/known/durationpb" + timestamppb "google.golang.org/protobuf/types/known/timestamppb" reflect "reflect" sync "sync" ) @@ -514,6 +515,10 @@ type ExecutionError struct { // Full error contents accessible via a URI ErrorUri string `protobuf:"bytes,3,opt,name=error_uri,json=errorUri,proto3" json:"error_uri,omitempty"` Kind ExecutionError_ErrorKind `protobuf:"varint,4,opt,name=kind,proto3,enum=flyteidl.core.ExecutionError_ErrorKind" json:"kind,omitempty"` + // Timestamp of the error + Timestamp *timestamppb.Timestamp `protobuf:"bytes,5,opt,name=timestamp,proto3" json:"timestamp,omitempty"` + // Worker that generated the error + Worker string `protobuf:"bytes,6,opt,name=worker,proto3" json:"worker,omitempty"` } func (x *ExecutionError) Reset() { @@ -576,6 +581,20 @@ func (x *ExecutionError) GetKind() ExecutionError_ErrorKind { return ExecutionError_UNKNOWN } +func (x *ExecutionError) GetTimestamp() *timestamppb.Timestamp { + if x != nil { + return x.Timestamp + } + return nil +} + +func (x *ExecutionError) GetWorker() string { + if x != nil { + return x.Worker + } + return "" +} + // Log information for the task that is specific to a log sink // When our log story is flushed out, we may have more metadata here like log link expiry type TaskLog struct { @@ -803,102 +822,109 @@ var file_flyteidl_core_execution_proto_rawDesc = []byte{ 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0d, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x1a, 0x1e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, - 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xa7, - 0x01, 0x0a, 0x11, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x45, 0x78, 0x65, 0x63, 0x75, - 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x91, 0x01, 0x0a, 0x05, 0x50, 0x68, 0x61, 0x73, 0x65, 0x12, 0x0d, + 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x1f, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, + 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, + 0xa7, 0x01, 0x0a, 0x11, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x45, 0x78, 0x65, 0x63, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x91, 0x01, 0x0a, 0x05, 0x50, 0x68, 0x61, 0x73, 0x65, 0x12, + 0x0d, 0x0a, 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0a, + 0x0a, 0x06, 0x51, 0x55, 0x45, 0x55, 0x45, 0x44, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x52, 0x55, + 0x4e, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x55, 0x43, 0x43, 0x45, + 0x45, 0x44, 0x49, 0x4e, 0x47, 0x10, 0x03, 0x12, 0x0d, 0x0a, 0x09, 0x53, 0x55, 0x43, 0x43, 0x45, + 0x45, 0x44, 0x45, 0x44, 0x10, 0x04, 0x12, 0x0b, 0x0a, 0x07, 0x46, 0x41, 0x49, 0x4c, 0x49, 0x4e, + 0x47, 0x10, 0x05, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x06, 0x12, + 0x0b, 0x0a, 0x07, 0x41, 0x42, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, 0x07, 0x12, 0x0d, 0x0a, 0x09, + 0x54, 0x49, 0x4d, 0x45, 0x44, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x08, 0x12, 0x0c, 0x0a, 0x08, 0x41, + 0x42, 0x4f, 0x52, 0x54, 0x49, 0x4e, 0x47, 0x10, 0x09, 0x22, 0xb6, 0x01, 0x0a, 0x0d, 0x4e, 0x6f, + 0x64, 0x65, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0xa4, 0x01, 0x0a, 0x05, + 0x50, 0x68, 0x61, 0x73, 0x65, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, + 0x45, 0x44, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x51, 0x55, 0x45, 0x55, 0x45, 0x44, 0x10, 0x01, + 0x12, 0x0b, 0x0a, 0x07, 0x52, 0x55, 0x4e, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x0d, 0x0a, + 0x09, 0x53, 0x55, 0x43, 0x43, 0x45, 0x45, 0x44, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, + 0x46, 0x41, 0x49, 0x4c, 0x49, 0x4e, 0x47, 0x10, 0x04, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, + 0x4c, 0x45, 0x44, 0x10, 0x05, 0x12, 0x0b, 0x0a, 0x07, 0x41, 0x42, 0x4f, 0x52, 0x54, 0x45, 0x44, + 0x10, 0x06, 0x12, 0x0b, 0x0a, 0x07, 0x53, 0x4b, 0x49, 0x50, 0x50, 0x45, 0x44, 0x10, 0x07, 0x12, + 0x0d, 0x0a, 0x09, 0x54, 0x49, 0x4d, 0x45, 0x44, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x08, 0x12, 0x13, + 0x0a, 0x0f, 0x44, 0x59, 0x4e, 0x41, 0x4d, 0x49, 0x43, 0x5f, 0x52, 0x55, 0x4e, 0x4e, 0x49, 0x4e, + 0x47, 0x10, 0x09, 0x12, 0x0d, 0x0a, 0x09, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, 0x52, 0x45, 0x44, + 0x10, 0x0a, 0x22, 0x96, 0x01, 0x0a, 0x0d, 0x54, 0x61, 0x73, 0x6b, 0x45, 0x78, 0x65, 0x63, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x84, 0x01, 0x0a, 0x05, 0x50, 0x68, 0x61, 0x73, 0x65, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x51, 0x55, 0x45, 0x55, 0x45, 0x44, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x52, 0x55, 0x4e, - 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x55, 0x43, 0x43, 0x45, 0x45, - 0x44, 0x49, 0x4e, 0x47, 0x10, 0x03, 0x12, 0x0d, 0x0a, 0x09, 0x53, 0x55, 0x43, 0x43, 0x45, 0x45, - 0x44, 0x45, 0x44, 0x10, 0x04, 0x12, 0x0b, 0x0a, 0x07, 0x46, 0x41, 0x49, 0x4c, 0x49, 0x4e, 0x47, - 0x10, 0x05, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x06, 0x12, 0x0b, - 0x0a, 0x07, 0x41, 0x42, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, 0x07, 0x12, 0x0d, 0x0a, 0x09, 0x54, - 0x49, 0x4d, 0x45, 0x44, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x08, 0x12, 0x0c, 0x0a, 0x08, 0x41, 0x42, - 0x4f, 0x52, 0x54, 0x49, 0x4e, 0x47, 0x10, 0x09, 0x22, 0xb6, 0x01, 0x0a, 0x0d, 0x4e, 0x6f, 0x64, - 0x65, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0xa4, 0x01, 0x0a, 0x05, 0x50, - 0x68, 0x61, 0x73, 0x65, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, - 0x44, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x51, 0x55, 0x45, 0x55, 0x45, 0x44, 0x10, 0x01, 0x12, - 0x0b, 0x0a, 0x07, 0x52, 0x55, 0x4e, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x0d, 0x0a, 0x09, - 0x53, 0x55, 0x43, 0x43, 0x45, 0x45, 0x44, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x46, - 0x41, 0x49, 0x4c, 0x49, 0x4e, 0x47, 0x10, 0x04, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, - 0x45, 0x44, 0x10, 0x05, 0x12, 0x0b, 0x0a, 0x07, 0x41, 0x42, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, - 0x06, 0x12, 0x0b, 0x0a, 0x07, 0x53, 0x4b, 0x49, 0x50, 0x50, 0x45, 0x44, 0x10, 0x07, 0x12, 0x0d, - 0x0a, 0x09, 0x54, 0x49, 0x4d, 0x45, 0x44, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x08, 0x12, 0x13, 0x0a, - 0x0f, 0x44, 0x59, 0x4e, 0x41, 0x4d, 0x49, 0x43, 0x5f, 0x52, 0x55, 0x4e, 0x4e, 0x49, 0x4e, 0x47, - 0x10, 0x09, 0x12, 0x0d, 0x0a, 0x09, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, 0x52, 0x45, 0x44, 0x10, - 0x0a, 0x22, 0x96, 0x01, 0x0a, 0x0d, 0x54, 0x61, 0x73, 0x6b, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, - 0x69, 0x6f, 0x6e, 0x22, 0x84, 0x01, 0x0a, 0x05, 0x50, 0x68, 0x61, 0x73, 0x65, 0x12, 0x0d, 0x0a, - 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, - 0x51, 0x55, 0x45, 0x55, 0x45, 0x44, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x52, 0x55, 0x4e, 0x4e, - 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x0d, 0x0a, 0x09, 0x53, 0x55, 0x43, 0x43, 0x45, 0x45, 0x44, - 0x45, 0x44, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x41, 0x42, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, - 0x04, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x05, 0x12, 0x10, 0x0a, - 0x0c, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x49, 0x4e, 0x47, 0x10, 0x06, 0x12, - 0x19, 0x0a, 0x15, 0x57, 0x41, 0x49, 0x54, 0x49, 0x4e, 0x47, 0x5f, 0x46, 0x4f, 0x52, 0x5f, 0x52, - 0x45, 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, 0x53, 0x10, 0x07, 0x22, 0xc8, 0x01, 0x0a, 0x0e, 0x45, - 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x12, 0x0a, - 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x6f, 0x64, - 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x65, - 0x72, 0x72, 0x6f, 0x72, 0x5f, 0x75, 0x72, 0x69, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, - 0x65, 0x72, 0x72, 0x6f, 0x72, 0x55, 0x72, 0x69, 0x12, 0x3b, 0x0a, 0x04, 0x6b, 0x69, 0x6e, 0x64, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x27, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, - 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x45, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x52, - 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x22, 0x2e, 0x0a, 0x09, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, - 0x6e, 0x64, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, - 0x08, 0x0a, 0x04, 0x55, 0x53, 0x45, 0x52, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x59, 0x53, - 0x54, 0x45, 0x4d, 0x10, 0x02, 0x22, 0xb2, 0x02, 0x0a, 0x07, 0x54, 0x61, 0x73, 0x6b, 0x4c, 0x6f, - 0x67, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x75, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4b, 0x0a, 0x0e, 0x6d, 0x65, 0x73, 0x73, 0x61, - 0x67, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x24, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, - 0x54, 0x61, 0x73, 0x6b, 0x4c, 0x6f, 0x67, 0x2e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, - 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0d, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, - 0x72, 0x6d, 0x61, 0x74, 0x12, 0x2b, 0x0a, 0x03, 0x74, 0x74, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, - 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x03, 0x74, 0x74, - 0x6c, 0x12, 0x2a, 0x0a, 0x10, 0x53, 0x68, 0x6f, 0x77, 0x57, 0x68, 0x69, 0x6c, 0x65, 0x50, 0x65, - 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x53, 0x68, 0x6f, - 0x77, 0x57, 0x68, 0x69, 0x6c, 0x65, 0x50, 0x65, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x2a, 0x0a, - 0x10, 0x48, 0x69, 0x64, 0x65, 0x4f, 0x6e, 0x63, 0x65, 0x46, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, - 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x48, 0x69, 0x64, 0x65, 0x4f, 0x6e, 0x63, - 0x65, 0x46, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, 0x64, 0x22, 0x2f, 0x0a, 0x0d, 0x4d, 0x65, 0x73, - 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, - 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x43, 0x53, 0x56, 0x10, 0x01, - 0x12, 0x08, 0x0a, 0x04, 0x4a, 0x53, 0x4f, 0x4e, 0x10, 0x02, 0x22, 0x5a, 0x0a, 0x14, 0x51, 0x75, - 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x70, - 0x65, 0x63, 0x12, 0x42, 0x0a, 0x0f, 0x71, 0x75, 0x65, 0x75, 0x65, 0x69, 0x6e, 0x67, 0x5f, 0x62, - 0x75, 0x64, 0x67, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, - 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, - 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0e, 0x71, 0x75, 0x65, 0x75, 0x65, 0x69, 0x6e, 0x67, - 0x42, 0x75, 0x64, 0x67, 0x65, 0x74, 0x22, 0xce, 0x01, 0x0a, 0x10, 0x51, 0x75, 0x61, 0x6c, 0x69, - 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x3a, 0x0a, 0x04, 0x74, - 0x69, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x66, 0x6c, 0x79, 0x74, - 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, - 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x54, 0x69, 0x65, 0x72, 0x48, - 0x00, 0x52, 0x04, 0x74, 0x69, 0x65, 0x72, 0x12, 0x39, 0x0a, 0x04, 0x73, 0x70, 0x65, 0x63, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, - 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, - 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x70, 0x65, 0x63, 0x48, 0x00, 0x52, 0x04, 0x73, 0x70, - 0x65, 0x63, 0x22, 0x34, 0x0a, 0x04, 0x54, 0x69, 0x65, 0x72, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x4e, - 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x48, 0x49, 0x47, - 0x48, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x4d, 0x45, 0x44, 0x49, 0x55, 0x4d, 0x10, 0x02, 0x12, - 0x07, 0x0a, 0x03, 0x4c, 0x4f, 0x57, 0x10, 0x03, 0x42, 0x0d, 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x69, - 0x67, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0xb4, 0x01, 0x0a, 0x11, 0x63, 0x6f, 0x6d, 0x2e, - 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x42, 0x0e, 0x45, - 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, - 0x3a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x6c, 0x79, 0x74, - 0x65, 0x6f, 0x72, 0x67, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, - 0x69, 0x64, 0x6c, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x62, 0x2d, 0x67, 0x6f, 0x2f, 0x66, 0x6c, - 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, 0x65, 0xa2, 0x02, 0x03, 0x46, 0x43, - 0x58, 0xaa, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x43, 0x6f, 0x72, - 0x65, 0xca, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, - 0x65, 0xe2, 0x02, 0x19, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, - 0x65, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x0e, - 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x3a, 0x3a, 0x43, 0x6f, 0x72, 0x65, 0x62, 0x06, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x0d, 0x0a, 0x09, 0x53, 0x55, 0x43, 0x43, 0x45, 0x45, + 0x44, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x41, 0x42, 0x4f, 0x52, 0x54, 0x45, 0x44, + 0x10, 0x04, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x05, 0x12, 0x10, + 0x0a, 0x0c, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x49, 0x4e, 0x47, 0x10, 0x06, + 0x12, 0x19, 0x0a, 0x15, 0x57, 0x41, 0x49, 0x54, 0x49, 0x4e, 0x47, 0x5f, 0x46, 0x4f, 0x52, 0x5f, + 0x52, 0x45, 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, 0x53, 0x10, 0x07, 0x22, 0x9a, 0x02, 0x0a, 0x0e, + 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x12, + 0x0a, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x6f, + 0x64, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x1b, 0x0a, 0x09, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x5f, 0x75, 0x72, 0x69, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x08, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x55, 0x72, 0x69, 0x12, 0x3b, 0x0a, 0x04, 0x6b, 0x69, 0x6e, + 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x27, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, + 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, + 0x52, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x12, 0x38, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, + 0x61, 0x6d, 0x70, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, + 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, + 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, + 0x12, 0x16, 0x0a, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x22, 0x2e, 0x0a, 0x09, 0x45, 0x72, 0x72, 0x6f, + 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, + 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x55, 0x53, 0x45, 0x52, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, + 0x53, 0x59, 0x53, 0x54, 0x45, 0x4d, 0x10, 0x02, 0x22, 0xb2, 0x02, 0x0a, 0x07, 0x54, 0x61, 0x73, + 0x6b, 0x4c, 0x6f, 0x67, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4b, 0x0a, 0x0e, 0x6d, 0x65, + 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, + 0x72, 0x65, 0x2e, 0x54, 0x61, 0x73, 0x6b, 0x4c, 0x6f, 0x67, 0x2e, 0x4d, 0x65, 0x73, 0x73, 0x61, + 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0d, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, + 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, 0x2b, 0x0a, 0x03, 0x74, 0x74, 0x6c, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, + 0x03, 0x74, 0x74, 0x6c, 0x12, 0x2a, 0x0a, 0x10, 0x53, 0x68, 0x6f, 0x77, 0x57, 0x68, 0x69, 0x6c, + 0x65, 0x50, 0x65, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, + 0x53, 0x68, 0x6f, 0x77, 0x57, 0x68, 0x69, 0x6c, 0x65, 0x50, 0x65, 0x6e, 0x64, 0x69, 0x6e, 0x67, + 0x12, 0x2a, 0x0a, 0x10, 0x48, 0x69, 0x64, 0x65, 0x4f, 0x6e, 0x63, 0x65, 0x46, 0x69, 0x6e, 0x69, + 0x73, 0x68, 0x65, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x48, 0x69, 0x64, 0x65, + 0x4f, 0x6e, 0x63, 0x65, 0x46, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, 0x64, 0x22, 0x2f, 0x0a, 0x0d, + 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, 0x0b, 0x0a, + 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x43, 0x53, + 0x56, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x4a, 0x53, 0x4f, 0x4e, 0x10, 0x02, 0x22, 0x5a, 0x0a, + 0x14, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, + 0x65, 0x53, 0x70, 0x65, 0x63, 0x12, 0x42, 0x0a, 0x0f, 0x71, 0x75, 0x65, 0x75, 0x65, 0x69, 0x6e, + 0x67, 0x5f, 0x62, 0x75, 0x64, 0x67, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, + 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, + 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0e, 0x71, 0x75, 0x65, 0x75, 0x65, + 0x69, 0x6e, 0x67, 0x42, 0x75, 0x64, 0x67, 0x65, 0x74, 0x22, 0xce, 0x01, 0x0a, 0x10, 0x51, 0x75, + 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x3a, + 0x0a, 0x04, 0x74, 0x69, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x66, + 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x51, 0x75, 0x61, + 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x54, 0x69, + 0x65, 0x72, 0x48, 0x00, 0x52, 0x04, 0x74, 0x69, 0x65, 0x72, 0x12, 0x39, 0x0a, 0x04, 0x73, 0x70, + 0x65, 0x63, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, + 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, + 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x70, 0x65, 0x63, 0x48, 0x00, 0x52, + 0x04, 0x73, 0x70, 0x65, 0x63, 0x22, 0x34, 0x0a, 0x04, 0x54, 0x69, 0x65, 0x72, 0x12, 0x0d, 0x0a, + 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, + 0x48, 0x49, 0x47, 0x48, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x4d, 0x45, 0x44, 0x49, 0x55, 0x4d, + 0x10, 0x02, 0x12, 0x07, 0x0a, 0x03, 0x4c, 0x4f, 0x57, 0x10, 0x03, 0x42, 0x0d, 0x0a, 0x0b, 0x64, + 0x65, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0xb4, 0x01, 0x0a, 0x11, 0x63, + 0x6f, 0x6d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, + 0x42, 0x0e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x50, 0x72, 0x6f, 0x74, 0x6f, + 0x50, 0x01, 0x5a, 0x3a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, + 0x6c, 0x79, 0x74, 0x65, 0x6f, 0x72, 0x67, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x2f, 0x66, 0x6c, + 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x62, 0x2d, 0x67, 0x6f, + 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, 0x65, 0xa2, 0x02, + 0x03, 0x46, 0x43, 0x58, 0xaa, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, + 0x43, 0x6f, 0x72, 0x65, 0xca, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, + 0x43, 0x6f, 0x72, 0x65, 0xe2, 0x02, 0x19, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, + 0x43, 0x6f, 0x72, 0x65, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0xea, 0x02, 0x0e, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x3a, 0x3a, 0x43, 0x6f, 0x72, + 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -929,20 +955,22 @@ var file_flyteidl_core_execution_proto_goTypes = []interface{}{ (*TaskLog)(nil), // 10: flyteidl.core.TaskLog (*QualityOfServiceSpec)(nil), // 11: flyteidl.core.QualityOfServiceSpec (*QualityOfService)(nil), // 12: flyteidl.core.QualityOfService - (*durationpb.Duration)(nil), // 13: google.protobuf.Duration + (*timestamppb.Timestamp)(nil), // 13: google.protobuf.Timestamp + (*durationpb.Duration)(nil), // 14: google.protobuf.Duration } var file_flyteidl_core_execution_proto_depIdxs = []int32{ 3, // 0: flyteidl.core.ExecutionError.kind:type_name -> flyteidl.core.ExecutionError.ErrorKind - 4, // 1: flyteidl.core.TaskLog.message_format:type_name -> flyteidl.core.TaskLog.MessageFormat - 13, // 2: flyteidl.core.TaskLog.ttl:type_name -> google.protobuf.Duration - 13, // 3: flyteidl.core.QualityOfServiceSpec.queueing_budget:type_name -> google.protobuf.Duration - 5, // 4: flyteidl.core.QualityOfService.tier:type_name -> flyteidl.core.QualityOfService.Tier - 11, // 5: flyteidl.core.QualityOfService.spec:type_name -> flyteidl.core.QualityOfServiceSpec - 6, // [6:6] is the sub-list for method output_type - 6, // [6:6] is the sub-list for method input_type - 6, // [6:6] is the sub-list for extension type_name - 6, // [6:6] is the sub-list for extension extendee - 0, // [0:6] is the sub-list for field type_name + 13, // 1: flyteidl.core.ExecutionError.timestamp:type_name -> google.protobuf.Timestamp + 4, // 2: flyteidl.core.TaskLog.message_format:type_name -> flyteidl.core.TaskLog.MessageFormat + 14, // 3: flyteidl.core.TaskLog.ttl:type_name -> google.protobuf.Duration + 14, // 4: flyteidl.core.QualityOfServiceSpec.queueing_budget:type_name -> google.protobuf.Duration + 5, // 5: flyteidl.core.QualityOfService.tier:type_name -> flyteidl.core.QualityOfService.Tier + 11, // 6: flyteidl.core.QualityOfService.spec:type_name -> flyteidl.core.QualityOfServiceSpec + 7, // [7:7] is the sub-list for method output_type + 7, // [7:7] is the sub-list for method input_type + 7, // [7:7] is the sub-list for extension type_name + 7, // [7:7] is the sub-list for extension extendee + 0, // [0:7] is the sub-list for field type_name } func init() { file_flyteidl_core_execution_proto_init() } diff --git a/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json b/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json index 241baeb53c..c4f6f3ef7f 100644 --- a/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json +++ b/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json @@ -7153,6 +7153,15 @@ }, "kind": { "$ref": "#/definitions/ExecutionErrorErrorKind" + }, + "timestamp": { + "type": "string", + "format": "date-time", + "title": "Timestamp of the error" + }, + "worker": { + "type": "string", + "title": "Worker that generated the error" } }, "description": "Represents the error message from the execution." diff --git a/flyteidl/gen/pb-js/flyteidl.d.ts b/flyteidl/gen/pb-js/flyteidl.d.ts index 0ff2422577..73b5a73eaa 100644 --- a/flyteidl/gen/pb-js/flyteidl.d.ts +++ b/flyteidl/gen/pb-js/flyteidl.d.ts @@ -5748,6 +5748,12 @@ export namespace flyteidl { /** ExecutionError kind */ kind?: (flyteidl.core.ExecutionError.ErrorKind|null); + + /** ExecutionError timestamp */ + timestamp?: (google.protobuf.ITimestamp|null); + + /** ExecutionError worker */ + worker?: (string|null); } /** Represents an ExecutionError. */ @@ -5771,6 +5777,12 @@ export namespace flyteidl { /** ExecutionError kind. */ public kind: flyteidl.core.ExecutionError.ErrorKind; + /** ExecutionError timestamp. */ + public timestamp?: (google.protobuf.ITimestamp|null); + + /** ExecutionError worker. */ + public worker: string; + /** * Creates a new ExecutionError instance using the specified properties. * @param [properties] Properties to set @@ -7555,6 +7567,12 @@ export namespace flyteidl { /** ContainerError origin */ origin?: (flyteidl.core.ExecutionError.ErrorKind|null); + + /** ContainerError timestamp */ + timestamp?: (google.protobuf.ITimestamp|null); + + /** ContainerError worker */ + worker?: (string|null); } /** Represents a ContainerError. */ @@ -7578,6 +7596,12 @@ export namespace flyteidl { /** ContainerError origin. */ public origin: flyteidl.core.ExecutionError.ErrorKind; + /** ContainerError timestamp. */ + public timestamp?: (google.protobuf.ITimestamp|null); + + /** ContainerError worker. */ + public worker: string; + /** * Creates a new ContainerError instance using the specified properties. * @param [properties] Properties to set diff --git a/flyteidl/gen/pb-js/flyteidl.js b/flyteidl/gen/pb-js/flyteidl.js index 042343eecf..970a69229c 100644 --- a/flyteidl/gen/pb-js/flyteidl.js +++ b/flyteidl/gen/pb-js/flyteidl.js @@ -13823,6 +13823,8 @@ * @property {string|null} [message] ExecutionError message * @property {string|null} [errorUri] ExecutionError errorUri * @property {flyteidl.core.ExecutionError.ErrorKind|null} [kind] ExecutionError kind + * @property {google.protobuf.ITimestamp|null} [timestamp] ExecutionError timestamp + * @property {string|null} [worker] ExecutionError worker */ /** @@ -13872,6 +13874,22 @@ */ ExecutionError.prototype.kind = 0; + /** + * ExecutionError timestamp. + * @member {google.protobuf.ITimestamp|null|undefined} timestamp + * @memberof flyteidl.core.ExecutionError + * @instance + */ + ExecutionError.prototype.timestamp = null; + + /** + * ExecutionError worker. + * @member {string} worker + * @memberof flyteidl.core.ExecutionError + * @instance + */ + ExecutionError.prototype.worker = ""; + /** * Creates a new ExecutionError instance using the specified properties. * @function create @@ -13904,6 +13922,10 @@ writer.uint32(/* id 3, wireType 2 =*/26).string(message.errorUri); if (message.kind != null && message.hasOwnProperty("kind")) writer.uint32(/* id 4, wireType 0 =*/32).int32(message.kind); + if (message.timestamp != null && message.hasOwnProperty("timestamp")) + $root.google.protobuf.Timestamp.encode(message.timestamp, writer.uint32(/* id 5, wireType 2 =*/42).fork()).ldelim(); + if (message.worker != null && message.hasOwnProperty("worker")) + writer.uint32(/* id 6, wireType 2 =*/50).string(message.worker); return writer; }; @@ -13937,6 +13959,12 @@ case 4: message.kind = reader.int32(); break; + case 5: + message.timestamp = $root.google.protobuf.Timestamp.decode(reader, reader.uint32()); + break; + case 6: + message.worker = reader.string(); + break; default: reader.skipType(tag & 7); break; @@ -13974,6 +14002,14 @@ case 2: break; } + if (message.timestamp != null && message.hasOwnProperty("timestamp")) { + var error = $root.google.protobuf.Timestamp.verify(message.timestamp); + if (error) + return "timestamp." + error; + } + if (message.worker != null && message.hasOwnProperty("worker")) + if (!$util.isString(message.worker)) + return "worker: string expected"; return null; }; @@ -18268,6 +18304,8 @@ * @property {string|null} [message] ContainerError message * @property {flyteidl.core.ContainerError.Kind|null} [kind] ContainerError kind * @property {flyteidl.core.ExecutionError.ErrorKind|null} [origin] ContainerError origin + * @property {google.protobuf.ITimestamp|null} [timestamp] ContainerError timestamp + * @property {string|null} [worker] ContainerError worker */ /** @@ -18317,6 +18355,22 @@ */ ContainerError.prototype.origin = 0; + /** + * ContainerError timestamp. + * @member {google.protobuf.ITimestamp|null|undefined} timestamp + * @memberof flyteidl.core.ContainerError + * @instance + */ + ContainerError.prototype.timestamp = null; + + /** + * ContainerError worker. + * @member {string} worker + * @memberof flyteidl.core.ContainerError + * @instance + */ + ContainerError.prototype.worker = ""; + /** * Creates a new ContainerError instance using the specified properties. * @function create @@ -18349,6 +18403,10 @@ writer.uint32(/* id 3, wireType 0 =*/24).int32(message.kind); if (message.origin != null && message.hasOwnProperty("origin")) writer.uint32(/* id 4, wireType 0 =*/32).int32(message.origin); + if (message.timestamp != null && message.hasOwnProperty("timestamp")) + $root.google.protobuf.Timestamp.encode(message.timestamp, writer.uint32(/* id 5, wireType 2 =*/42).fork()).ldelim(); + if (message.worker != null && message.hasOwnProperty("worker")) + writer.uint32(/* id 6, wireType 2 =*/50).string(message.worker); return writer; }; @@ -18382,6 +18440,12 @@ case 4: message.origin = reader.int32(); break; + case 5: + message.timestamp = $root.google.protobuf.Timestamp.decode(reader, reader.uint32()); + break; + case 6: + message.worker = reader.string(); + break; default: reader.skipType(tag & 7); break; @@ -18424,6 +18488,14 @@ case 2: break; } + if (message.timestamp != null && message.hasOwnProperty("timestamp")) { + var error = $root.google.protobuf.Timestamp.verify(message.timestamp); + if (error) + return "timestamp." + error; + } + if (message.worker != null && message.hasOwnProperty("worker")) + if (!$util.isString(message.worker)) + return "worker: string expected"; return null; }; diff --git a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py index 68182fd259..fe1be689e4 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py +++ b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py @@ -12,9 +12,10 @@ from flyteidl.core import execution_pb2 as flyteidl_dot_core_dot_execution__pb2 +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1a\x66lyteidl/core/errors.proto\x12\rflyteidl.core\x1a\x1d\x66lyteidl/core/execution.proto\"\xe5\x01\n\x0e\x43ontainerError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x36\n\x04kind\x18\x03 \x01(\x0e\x32\".flyteidl.core.ContainerError.KindR\x04kind\x12?\n\x06origin\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x06origin\",\n\x04Kind\x12\x13\n\x0fNON_RECOVERABLE\x10\x00\x12\x0f\n\x0bRECOVERABLE\x10\x01\"D\n\rErrorDocument\x12\x33\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x1d.flyteidl.core.ContainerErrorR\x05\x65rrorB\xb1\x01\n\x11\x63om.flyteidl.coreB\x0b\x45rrorsProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1a\x66lyteidl/core/errors.proto\x12\rflyteidl.core\x1a\x1d\x66lyteidl/core/execution.proto\x1a\x1fgoogle/protobuf/timestamp.proto\"\xb7\x02\n\x0e\x43ontainerError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x36\n\x04kind\x18\x03 \x01(\x0e\x32\".flyteidl.core.ContainerError.KindR\x04kind\x12?\n\x06origin\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x06origin\x12\x38\n\ttimestamp\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\ttimestamp\x12\x16\n\x06worker\x18\x06 \x01(\tR\x06worker\",\n\x04Kind\x12\x13\n\x0fNON_RECOVERABLE\x10\x00\x12\x0f\n\x0bRECOVERABLE\x10\x01\"D\n\rErrorDocument\x12\x33\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x1d.flyteidl.core.ContainerErrorR\x05\x65rrorB\xb1\x01\n\x11\x63om.flyteidl.coreB\x0b\x45rrorsProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -23,10 +24,10 @@ DESCRIPTOR._options = None DESCRIPTOR._serialized_options = b'\n\021com.flyteidl.coreB\013ErrorsProtoP\001Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\242\002\003FCX\252\002\rFlyteidl.Core\312\002\rFlyteidl\\Core\342\002\031Flyteidl\\Core\\GPBMetadata\352\002\016Flyteidl::Core' - _globals['_CONTAINERERROR']._serialized_start=77 - _globals['_CONTAINERERROR']._serialized_end=306 - _globals['_CONTAINERERROR_KIND']._serialized_start=262 - _globals['_CONTAINERERROR_KIND']._serialized_end=306 - _globals['_ERRORDOCUMENT']._serialized_start=308 - _globals['_ERRORDOCUMENT']._serialized_end=376 + _globals['_CONTAINERERROR']._serialized_start=110 + _globals['_CONTAINERERROR']._serialized_end=421 + _globals['_CONTAINERERROR_KIND']._serialized_start=377 + _globals['_CONTAINERERROR_KIND']._serialized_end=421 + _globals['_ERRORDOCUMENT']._serialized_start=423 + _globals['_ERRORDOCUMENT']._serialized_end=491 # @@protoc_insertion_point(module_scope) diff --git a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi index b13aa40915..c0566c73ad 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi +++ b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi @@ -1,4 +1,5 @@ from flyteidl.core import execution_pb2 as _execution_pb2 +from google.protobuf import timestamp_pb2 as _timestamp_pb2 from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message @@ -7,7 +8,7 @@ from typing import ClassVar as _ClassVar, Mapping as _Mapping, Optional as _Opti DESCRIPTOR: _descriptor.FileDescriptor class ContainerError(_message.Message): - __slots__ = ["code", "message", "kind", "origin"] + __slots__ = ["code", "message", "kind", "origin", "timestamp", "worker"] class Kind(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = [] NON_RECOVERABLE: _ClassVar[ContainerError.Kind] @@ -18,11 +19,15 @@ class ContainerError(_message.Message): MESSAGE_FIELD_NUMBER: _ClassVar[int] KIND_FIELD_NUMBER: _ClassVar[int] ORIGIN_FIELD_NUMBER: _ClassVar[int] + TIMESTAMP_FIELD_NUMBER: _ClassVar[int] + WORKER_FIELD_NUMBER: _ClassVar[int] code: str message: str kind: ContainerError.Kind origin: _execution_pb2.ExecutionError.ErrorKind - def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., kind: _Optional[_Union[ContainerError.Kind, str]] = ..., origin: _Optional[_Union[_execution_pb2.ExecutionError.ErrorKind, str]] = ...) -> None: ... + timestamp: _timestamp_pb2.Timestamp + worker: str + def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., kind: _Optional[_Union[ContainerError.Kind, str]] = ..., origin: _Optional[_Union[_execution_pb2.ExecutionError.ErrorKind, str]] = ..., timestamp: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ..., worker: _Optional[str] = ...) -> None: ... class ErrorDocument(_message.Message): __slots__ = ["error"] diff --git a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py index 2d59497e3a..2ff8f47010 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py +++ b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py @@ -12,9 +12,10 @@ from google.protobuf import duration_pb2 as google_dot_protobuf_dot_duration__pb2 +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x66lyteidl/core/execution.proto\x12\rflyteidl.core\x1a\x1egoogle/protobuf/duration.proto\"\xa7\x01\n\x11WorkflowExecution\"\x91\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\x0e\n\nSUCCEEDING\x10\x03\x12\r\n\tSUCCEEDED\x10\x04\x12\x0b\n\x07\x46\x41ILING\x10\x05\x12\n\n\x06\x46\x41ILED\x10\x06\x12\x0b\n\x07\x41\x42ORTED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x0c\n\x08\x41\x42ORTING\x10\t\"\xb6\x01\n\rNodeExecution\"\xa4\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x46\x41ILING\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x0b\n\x07\x41\x42ORTED\x10\x06\x12\x0b\n\x07SKIPPED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x13\n\x0f\x44YNAMIC_RUNNING\x10\t\x12\r\n\tRECOVERED\x10\n\"\x96\x01\n\rTaskExecution\"\x84\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x41\x42ORTED\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x10\n\x0cINITIALIZING\x10\x06\x12\x19\n\x15WAITING_FOR_RESOURCES\x10\x07\"\xc8\x01\n\x0e\x45xecutionError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1b\n\terror_uri\x18\x03 \x01(\tR\x08\x65rrorUri\x12;\n\x04kind\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x04kind\".\n\tErrorKind\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x08\n\x04USER\x10\x01\x12\n\n\x06SYSTEM\x10\x02\"\xb2\x02\n\x07TaskLog\x12\x10\n\x03uri\x18\x01 \x01(\tR\x03uri\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12K\n\x0emessage_format\x18\x03 \x01(\x0e\x32$.flyteidl.core.TaskLog.MessageFormatR\rmessageFormat\x12+\n\x03ttl\x18\x04 \x01(\x0b\x32\x19.google.protobuf.DurationR\x03ttl\x12*\n\x10ShowWhilePending\x18\x05 \x01(\x08R\x10ShowWhilePending\x12*\n\x10HideOnceFinished\x18\x06 \x01(\x08R\x10HideOnceFinished\"/\n\rMessageFormat\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x07\n\x03\x43SV\x10\x01\x12\x08\n\x04JSON\x10\x02\"Z\n\x14QualityOfServiceSpec\x12\x42\n\x0fqueueing_budget\x18\x01 \x01(\x0b\x32\x19.google.protobuf.DurationR\x0equeueingBudget\"\xce\x01\n\x10QualityOfService\x12:\n\x04tier\x18\x01 \x01(\x0e\x32$.flyteidl.core.QualityOfService.TierH\x00R\x04tier\x12\x39\n\x04spec\x18\x02 \x01(\x0b\x32#.flyteidl.core.QualityOfServiceSpecH\x00R\x04spec\"4\n\x04Tier\x12\r\n\tUNDEFINED\x10\x00\x12\x08\n\x04HIGH\x10\x01\x12\n\n\x06MEDIUM\x10\x02\x12\x07\n\x03LOW\x10\x03\x42\r\n\x0b\x64\x65signationB\xb4\x01\n\x11\x63om.flyteidl.coreB\x0e\x45xecutionProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x66lyteidl/core/execution.proto\x12\rflyteidl.core\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\"\xa7\x01\n\x11WorkflowExecution\"\x91\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\x0e\n\nSUCCEEDING\x10\x03\x12\r\n\tSUCCEEDED\x10\x04\x12\x0b\n\x07\x46\x41ILING\x10\x05\x12\n\n\x06\x46\x41ILED\x10\x06\x12\x0b\n\x07\x41\x42ORTED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x0c\n\x08\x41\x42ORTING\x10\t\"\xb6\x01\n\rNodeExecution\"\xa4\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x46\x41ILING\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x0b\n\x07\x41\x42ORTED\x10\x06\x12\x0b\n\x07SKIPPED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x13\n\x0f\x44YNAMIC_RUNNING\x10\t\x12\r\n\tRECOVERED\x10\n\"\x96\x01\n\rTaskExecution\"\x84\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x41\x42ORTED\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x10\n\x0cINITIALIZING\x10\x06\x12\x19\n\x15WAITING_FOR_RESOURCES\x10\x07\"\x9a\x02\n\x0e\x45xecutionError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1b\n\terror_uri\x18\x03 \x01(\tR\x08\x65rrorUri\x12;\n\x04kind\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x04kind\x12\x38\n\ttimestamp\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\ttimestamp\x12\x16\n\x06worker\x18\x06 \x01(\tR\x06worker\".\n\tErrorKind\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x08\n\x04USER\x10\x01\x12\n\n\x06SYSTEM\x10\x02\"\xb2\x02\n\x07TaskLog\x12\x10\n\x03uri\x18\x01 \x01(\tR\x03uri\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12K\n\x0emessage_format\x18\x03 \x01(\x0e\x32$.flyteidl.core.TaskLog.MessageFormatR\rmessageFormat\x12+\n\x03ttl\x18\x04 \x01(\x0b\x32\x19.google.protobuf.DurationR\x03ttl\x12*\n\x10ShowWhilePending\x18\x05 \x01(\x08R\x10ShowWhilePending\x12*\n\x10HideOnceFinished\x18\x06 \x01(\x08R\x10HideOnceFinished\"/\n\rMessageFormat\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x07\n\x03\x43SV\x10\x01\x12\x08\n\x04JSON\x10\x02\"Z\n\x14QualityOfServiceSpec\x12\x42\n\x0fqueueing_budget\x18\x01 \x01(\x0b\x32\x19.google.protobuf.DurationR\x0equeueingBudget\"\xce\x01\n\x10QualityOfService\x12:\n\x04tier\x18\x01 \x01(\x0e\x32$.flyteidl.core.QualityOfService.TierH\x00R\x04tier\x12\x39\n\x04spec\x18\x02 \x01(\x0b\x32#.flyteidl.core.QualityOfServiceSpecH\x00R\x04spec\"4\n\x04Tier\x12\r\n\tUNDEFINED\x10\x00\x12\x08\n\x04HIGH\x10\x01\x12\n\n\x06MEDIUM\x10\x02\x12\x07\n\x03LOW\x10\x03\x42\r\n\x0b\x64\x65signationB\xb4\x01\n\x11\x63om.flyteidl.coreB\x0e\x45xecutionProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -23,30 +24,30 @@ DESCRIPTOR._options = None DESCRIPTOR._serialized_options = b'\n\021com.flyteidl.coreB\016ExecutionProtoP\001Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\242\002\003FCX\252\002\rFlyteidl.Core\312\002\rFlyteidl\\Core\342\002\031Flyteidl\\Core\\GPBMetadata\352\002\016Flyteidl::Core' - _globals['_WORKFLOWEXECUTION']._serialized_start=81 - _globals['_WORKFLOWEXECUTION']._serialized_end=248 - _globals['_WORKFLOWEXECUTION_PHASE']._serialized_start=103 - _globals['_WORKFLOWEXECUTION_PHASE']._serialized_end=248 - _globals['_NODEEXECUTION']._serialized_start=251 - _globals['_NODEEXECUTION']._serialized_end=433 - _globals['_NODEEXECUTION_PHASE']._serialized_start=269 - _globals['_NODEEXECUTION_PHASE']._serialized_end=433 - _globals['_TASKEXECUTION']._serialized_start=436 - _globals['_TASKEXECUTION']._serialized_end=586 - _globals['_TASKEXECUTION_PHASE']._serialized_start=454 - _globals['_TASKEXECUTION_PHASE']._serialized_end=586 - _globals['_EXECUTIONERROR']._serialized_start=589 - _globals['_EXECUTIONERROR']._serialized_end=789 - _globals['_EXECUTIONERROR_ERRORKIND']._serialized_start=743 - _globals['_EXECUTIONERROR_ERRORKIND']._serialized_end=789 - _globals['_TASKLOG']._serialized_start=792 - _globals['_TASKLOG']._serialized_end=1098 - _globals['_TASKLOG_MESSAGEFORMAT']._serialized_start=1051 - _globals['_TASKLOG_MESSAGEFORMAT']._serialized_end=1098 - _globals['_QUALITYOFSERVICESPEC']._serialized_start=1100 - _globals['_QUALITYOFSERVICESPEC']._serialized_end=1190 - _globals['_QUALITYOFSERVICE']._serialized_start=1193 - _globals['_QUALITYOFSERVICE']._serialized_end=1399 - _globals['_QUALITYOFSERVICE_TIER']._serialized_start=1332 - _globals['_QUALITYOFSERVICE_TIER']._serialized_end=1384 + _globals['_WORKFLOWEXECUTION']._serialized_start=114 + _globals['_WORKFLOWEXECUTION']._serialized_end=281 + _globals['_WORKFLOWEXECUTION_PHASE']._serialized_start=136 + _globals['_WORKFLOWEXECUTION_PHASE']._serialized_end=281 + _globals['_NODEEXECUTION']._serialized_start=284 + _globals['_NODEEXECUTION']._serialized_end=466 + _globals['_NODEEXECUTION_PHASE']._serialized_start=302 + _globals['_NODEEXECUTION_PHASE']._serialized_end=466 + _globals['_TASKEXECUTION']._serialized_start=469 + _globals['_TASKEXECUTION']._serialized_end=619 + _globals['_TASKEXECUTION_PHASE']._serialized_start=487 + _globals['_TASKEXECUTION_PHASE']._serialized_end=619 + _globals['_EXECUTIONERROR']._serialized_start=622 + _globals['_EXECUTIONERROR']._serialized_end=904 + _globals['_EXECUTIONERROR_ERRORKIND']._serialized_start=858 + _globals['_EXECUTIONERROR_ERRORKIND']._serialized_end=904 + _globals['_TASKLOG']._serialized_start=907 + _globals['_TASKLOG']._serialized_end=1213 + _globals['_TASKLOG_MESSAGEFORMAT']._serialized_start=1166 + _globals['_TASKLOG_MESSAGEFORMAT']._serialized_end=1213 + _globals['_QUALITYOFSERVICESPEC']._serialized_start=1215 + _globals['_QUALITYOFSERVICESPEC']._serialized_end=1305 + _globals['_QUALITYOFSERVICE']._serialized_start=1308 + _globals['_QUALITYOFSERVICE']._serialized_end=1514 + _globals['_QUALITYOFSERVICE_TIER']._serialized_start=1447 + _globals['_QUALITYOFSERVICE_TIER']._serialized_end=1499 # @@protoc_insertion_point(module_scope) diff --git a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi index 5c28a55418..08f1937c08 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi +++ b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi @@ -1,4 +1,5 @@ from google.protobuf import duration_pb2 as _duration_pb2 +from google.protobuf import timestamp_pb2 as _timestamp_pb2 from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message @@ -83,7 +84,7 @@ class TaskExecution(_message.Message): def __init__(self) -> None: ... class ExecutionError(_message.Message): - __slots__ = ["code", "message", "error_uri", "kind"] + __slots__ = ["code", "message", "error_uri", "kind", "timestamp", "worker"] class ErrorKind(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = [] UNKNOWN: _ClassVar[ExecutionError.ErrorKind] @@ -96,11 +97,15 @@ class ExecutionError(_message.Message): MESSAGE_FIELD_NUMBER: _ClassVar[int] ERROR_URI_FIELD_NUMBER: _ClassVar[int] KIND_FIELD_NUMBER: _ClassVar[int] + TIMESTAMP_FIELD_NUMBER: _ClassVar[int] + WORKER_FIELD_NUMBER: _ClassVar[int] code: str message: str error_uri: str kind: ExecutionError.ErrorKind - def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., error_uri: _Optional[str] = ..., kind: _Optional[_Union[ExecutionError.ErrorKind, str]] = ...) -> None: ... + timestamp: _timestamp_pb2.Timestamp + worker: str + def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., error_uri: _Optional[str] = ..., kind: _Optional[_Union[ExecutionError.ErrorKind, str]] = ..., timestamp: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ..., worker: _Optional[str] = ...) -> None: ... class TaskLog(_message.Message): __slots__ = ["uri", "name", "message_format", "ttl", "ShowWhilePending", "HideOnceFinished"] diff --git a/flyteidl/gen/pb_rust/flyteidl.core.rs b/flyteidl/gen/pb_rust/flyteidl.core.rs index bfbf82203d..a97a209a47 100644 --- a/flyteidl/gen/pb_rust/flyteidl.core.rs +++ b/flyteidl/gen/pb_rust/flyteidl.core.rs @@ -2129,6 +2129,12 @@ pub struct ExecutionError { pub error_uri: ::prost::alloc::string::String, #[prost(enumeration="execution_error::ErrorKind", tag="4")] pub kind: i32, + /// Timestamp of the error + #[prost(message, optional, tag="5")] + pub timestamp: ::core::option::Option<::prost_types::Timestamp>, + /// Worker that generated the error + #[prost(string, tag="6")] + pub worker: ::prost::alloc::string::String, } /// Nested message and enum types in `ExecutionError`. pub mod execution_error { @@ -3091,6 +3097,12 @@ pub struct ContainerError { /// Defines the origin of the error (system, user, unknown). #[prost(enumeration="execution_error::ErrorKind", tag="4")] pub origin: i32, + /// Timestamp of the error + #[prost(message, optional, tag="5")] + pub timestamp: ::core::option::Option<::prost_types::Timestamp>, + /// Worker that generated the error + #[prost(string, tag="6")] + pub worker: ::prost::alloc::string::String, } /// Nested message and enum types in `ContainerError`. pub mod container_error { diff --git a/flyteidl/protos/flyteidl/core/errors.proto b/flyteidl/protos/flyteidl/core/errors.proto index 4d25389349..71ecd1de84 100644 --- a/flyteidl/protos/flyteidl/core/errors.proto +++ b/flyteidl/protos/flyteidl/core/errors.proto @@ -5,6 +5,7 @@ package flyteidl.core; option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core"; import "flyteidl/core/execution.proto"; +import "google/protobuf/timestamp.proto"; // Error message to propagate detailed errors from container executions to the execution // engine. @@ -25,6 +26,12 @@ message ContainerError { // Defines the origin of the error (system, user, unknown). ExecutionError.ErrorKind origin = 4; + + // Timestamp of the error + google.protobuf.Timestamp timestamp = 5; + + // Worker that generated the error + string worker = 6; } // Defines the errors.pb file format the container can produce to communicate diff --git a/flyteidl/protos/flyteidl/core/execution.proto b/flyteidl/protos/flyteidl/core/execution.proto index 4d55198955..3b9bfbbbb7 100644 --- a/flyteidl/protos/flyteidl/core/execution.proto +++ b/flyteidl/protos/flyteidl/core/execution.proto @@ -5,6 +5,7 @@ package flyteidl.core; option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core"; import "google/protobuf/duration.proto"; +import "google/protobuf/timestamp.proto"; // Indicates various phases of Workflow Execution message WorkflowExecution { @@ -73,6 +74,10 @@ message ExecutionError { SYSTEM = 2; } ErrorKind kind = 4; + // Timestamp of the error + google.protobuf.Timestamp timestamp = 5; + // Worker that generated the error + string worker = 6; } // Log information for the task that is specific to a log sink diff --git a/flyteplugins/go/tasks/pluginmachinery/flytek8s/k8s_resource_adds.go b/flyteplugins/go/tasks/pluginmachinery/flytek8s/k8s_resource_adds.go index b77615120a..3cd000dd40 100644 --- a/flyteplugins/go/tasks/pluginmachinery/flytek8s/k8s_resource_adds.go +++ b/flyteplugins/go/tasks/pluginmachinery/flytek8s/k8s_resource_adds.go @@ -17,6 +17,9 @@ import ( const ( flyteExecutionURL = "FLYTE_EXECUTION_URL" + + FlyteInternalWorkerNameEnvVarKey = "_F_WN" // "FLYTE_INTERNAL_WORKER_NAME" + FlyteInternalDistErrorStrategyEnvVarKey = "_F_DES" // "FLYTE_INTERNAL_DIST_ERROR_STRATEGY" ) func GetContextEnvVars(ownerCtx context.Context) []v1.EnvVar { diff --git a/flyteplugins/go/tasks/pluginmachinery/io/iface.go b/flyteplugins/go/tasks/pluginmachinery/io/iface.go index f876defe5a..1f32717812 100644 --- a/flyteplugins/go/tasks/pluginmachinery/io/iface.go +++ b/flyteplugins/go/tasks/pluginmachinery/io/iface.go @@ -27,13 +27,18 @@ type InputReader interface { Get(ctx context.Context) (*core.LiteralMap, error) } -// OutputReader provides an abstracted OutputReader interface. The plugins are responsible to provide -// the implementations for the interface. Some helper implementations can be found in ioutils -type OutputReader interface { +// ErrorReader provides an abstracted error reading interface, which is part of OutputReader below. +type ErrorReader interface { // IsError returns true if an error was detected when reading the output and false if no error was detected IsError(ctx context.Context) (bool, error) // ReadError returns the error as type ExecutionError ReadError(ctx context.Context) (ExecutionError, error) +} + +// OutputReader provides an abstracted OutputReader interface. The plugins are responsible to provide +// the implementations for the interface. Some helper implementations can be found in ioutils +type OutputReader interface { + ErrorReader // IsFile returns true if the outputs are using the OutputFilePaths specified files. If so it allows the system to // optimize the reads of the files IsFile(ctx context.Context) bool diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index 4f8d678c14..ae880f3640 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -3,77 +3,265 @@ package ioutils import ( "context" "fmt" + "path/filepath" + "strings" + "time" "github.com/pkg/errors" "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core" "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/io" + "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/k8s" "github.com/flyteorg/flyte/flytestdlib/storage" ) -type RemoteFileOutputReader struct { - outPath io.OutputFilePaths +type baseErrorReader struct { store storage.ComposedProtobufStore maxPayloadSize int64 } +type singleFileErrorReader struct { + baseErrorReader + errorFilePath storage.DataReference +} + +type earliestFileErrorReader struct { + baseErrorReader + errorDirPath storage.DataReference + errorFilePathPrefix storage.DataReference + errorFileExtension string +} + +/* + We have a 'single file error reader' and 'earliest file error reader' as two + different strategies for reading task error files. + + Single file error reader is used to check for a single error.pb file uploaded + by a task, and is the default strategy. Earliest file error reader is used to check for + multiple error-.pb files and pick the one that has the earliest error timestamp. + It is used when a distributed task requests earliest timestamp error aggregation + strategy. To support backward compatibility, the earliest file error reader also handles + cases when there is a single error.pb file uploaded by the task. The earliest file + error reader is currently used for the PyTorch plugin. + + A few notes: + + - While the earliest file error reader handles the single error file scenario as well, + it is not set as the default, because its implementation depends on doing a listing operation + on remote storage. We do not want the listing overhead to be paid for the more common case of + having a single error file. + - Under the multiple error aggregation scenario, it is possible that the error aggregation + is performed before all the errors are reported. For PyTorch plugin specifically, the + the training operator will mark the job as 'done' when it detects one of the pods as failing. + Once Propeller detects this, it will perform the error aggregation. There is a rare scenario + where the pod that has the earliest error gets delayed in uploading its error file to + remote storage, and the pod that has a later error ends up completing first. If the + training operator's detection of job completion and Propeller's error aggregation happen so + fast that the pod with the earliest error has not yet uploaded it's error to remote storage, + we may end up reporting the wrong error. This is highly unlikely in practice. The implementation + we have here is significantly better than the prior behavior of reporting the latest written + error.pb file (as there was a race condition on overwriting error files), which is almost always + not the earliest error. + - The training operator does not have any error aggregation strategy implemented. PyTorch + distributed itself aggregates errors from the trainers running under the same elastic agent, + and reports the earliest error. The aggregation we perform here extends that to across pods. +*/ + +const errorFileNotFoundErrorCode = "ErrorFileNotFound" + var ErrRemoteFileExceedsMaxSize = errors.New("remote file exceeds max size") -func (r RemoteFileOutputReader) IsError(ctx context.Context) (bool, error) { - metadata, err := r.store.Head(ctx, r.outPath.GetErrorPath()) - if err != nil { - return false, errors.Wrapf(err, "failed to read error file @[%s]", r.outPath.GetErrorPath()) +func newSingleFileErrorReader(errorFilePath storage.DataReference, store storage.ComposedProtobufStore, maxPayloadSize int64) *singleFileErrorReader { + return &singleFileErrorReader{ + baseErrorReader: baseErrorReader{ + store: store, + maxPayloadSize: maxPayloadSize, + }, + errorFilePath: errorFilePath, } +} + +func (b *baseErrorReader) validatePayloadSize(filePath storage.DataReference, metadata storage.Metadata) error { if metadata.Exists() { - if metadata.Size() > r.maxPayloadSize { - return false, errors.Wrapf(err, "error file @[%s] is too large [%d] bytes, max allowed [%d] bytes", r.outPath.GetErrorPath(), metadata.Size(), r.maxPayloadSize) + if metadata.Size() > b.maxPayloadSize { + return errors.Wrapf(ErrRemoteFileExceedsMaxSize, + "output file @[%s] is too large [%d] bytes, max allowed [%d] bytes", + filePath, metadata.Size(), b.maxPayloadSize) } - return true, nil } - return false, nil + return nil } -func (r RemoteFileOutputReader) ReadError(ctx context.Context) (io.ExecutionError, error) { +func (s *singleFileErrorReader) IsError(ctx context.Context) (bool, error) { + metadata, err := s.store.Head(ctx, s.errorFilePath) + if err != nil { + return false, errors.Wrapf(err, "failed to read error file @[%s]", s.errorFilePath) + } + err = s.validatePayloadSize(s.errorFilePath, metadata) + if err != nil { + return false, err + } + return metadata.Exists(), nil +} + +func errorDoc2ExecutionError(errorDoc *core.ErrorDocument, errorFilePath storage.DataReference) io.ExecutionError { + if errorDoc.Error == nil { + return io.ExecutionError{ + IsRecoverable: true, + ExecutionError: &core.ExecutionError{ + Code: "ErrorFileBadFormat", + Message: fmt.Sprintf("error not formatted correctly, nil error @path [%s]", errorFilePath), + Kind: core.ExecutionError_SYSTEM, + }, + } + } + executionError := io.ExecutionError{ + ExecutionError: &core.ExecutionError{ + Code: errorDoc.Error.Code, + Message: errorDoc.Error.Message, + Kind: errorDoc.Error.Origin, + Timestamp: errorDoc.Error.Timestamp, + Worker: errorDoc.Error.Worker, + }, + } + + if errorDoc.Error.Kind == core.ContainerError_RECOVERABLE { + executionError.IsRecoverable = true + } + + return executionError +} + +func (s *singleFileErrorReader) ReadError(ctx context.Context) (io.ExecutionError, error) { errorDoc := &core.ErrorDocument{} - err := r.store.ReadProtobuf(ctx, r.outPath.GetErrorPath(), errorDoc) + err := s.store.ReadProtobuf(ctx, s.errorFilePath, errorDoc) if err != nil { if storage.IsNotFound(err) { return io.ExecutionError{ IsRecoverable: true, ExecutionError: &core.ExecutionError{ - Code: "ErrorFileNotFound", + Code: errorFileNotFoundErrorCode, Message: err.Error(), Kind: core.ExecutionError_SYSTEM, }, }, nil } - return io.ExecutionError{}, errors.Wrapf(err, "failed to read error data from task @[%s]", r.outPath.GetErrorPath()) + return io.ExecutionError{}, errors.Wrapf(err, "failed to read error data from task @[%s]", s.errorFilePath) } - if errorDoc.Error == nil { - return io.ExecutionError{ - IsRecoverable: true, - ExecutionError: &core.ExecutionError{ - Code: "ErrorFileBadFormat", - Message: fmt.Sprintf("error not formatted correctly, nil error @path [%s]", r.outPath.GetErrorPath()), - Kind: core.ExecutionError_SYSTEM, - }, - }, nil + return errorDoc2ExecutionError(errorDoc, s.errorFilePath), nil +} + +func (e *earliestFileErrorReader) IsError(ctx context.Context) (bool, error) { + hasError := false + const maxItems = 1000 + cursor := storage.NewCursorAtStart() + for cursor != storage.NewCursorAtEnd() { + var err error + var errorFilePaths []storage.DataReference + errorFilePaths, cursor, err = e.store.List(ctx, e.errorFilePathPrefix, maxItems, cursor) + if err != nil { + return false, errors.Wrapf(err, "failed to list error files @[%s]", e.errorDirPath) + } + for _, errorFilePath := range errorFilePaths { + if strings.HasSuffix(errorFilePath.String(), e.errorFileExtension) { + metadata, err := e.store.Head(ctx, errorFilePath) + if err != nil { + return false, errors.Wrapf(err, "failed to read error file @[%s]", errorFilePath) + } + err = e.validatePayloadSize(errorFilePath, metadata) + if err != nil { + return false, err + } + hasError = true + } + } } + return hasError, nil +} - ee := io.ExecutionError{ - ExecutionError: &core.ExecutionError{ - Code: errorDoc.Error.Code, - Message: errorDoc.Error.Message, - Kind: errorDoc.Error.Origin, - }, +func (e *earliestFileErrorReader) ReadError(ctx context.Context) (io.ExecutionError, error) { + var earliestTimestamp *time.Time = nil + earliestExecutionError := io.ExecutionError{} + const maxItems = 1000 + cursor := storage.NewCursorAtStart() + for cursor != storage.NewCursorAtEnd() { + var err error + var errorFilePaths []storage.DataReference + errorFilePaths, cursor, err = e.store.List(ctx, e.errorFilePathPrefix, maxItems, cursor) + if err != nil { + return io.ExecutionError{}, errors.Wrapf(err, "failed to list error files @[%s]", e.errorDirPath) + } + for _, errorFilePath := range errorFilePaths { + if !strings.HasSuffix(errorFilePath.String(), e.errorFileExtension) { + continue + } + errorDoc := &core.ErrorDocument{} + err := e.store.ReadProtobuf(ctx, errorFilePath, errorDoc) + if err != nil { + return io.ExecutionError{}, errors.Wrapf(err, "failed to read error file @[%s]", errorFilePath.String()) + } + timestamp := errorDoc.Error.GetTimestamp().AsTime() + if earliestTimestamp == nil || earliestTimestamp.After(timestamp) { + earliestExecutionError = errorDoc2ExecutionError(errorDoc, errorFilePath) + earliestTimestamp = ×tamp + } + } } + return earliestExecutionError, nil +} - if errorDoc.Error.Kind == core.ContainerError_RECOVERABLE { - ee.IsRecoverable = true +func newEarliestFileErrorReader(errorDirPath storage.DataReference, canonicalErrorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) (*earliestFileErrorReader, error) { + // If the canonical error file name is error.pb, we expect multiple error files + // to have name error.pb + pieces := strings.Split(canonicalErrorFilename, ".") + if len(pieces) != 2 { + return nil, errors.Errorf("expected canonical error filename to have a single dot (.), got %d", len(pieces)) + } + errorFilePrefix := pieces[0] + scheme, container, key, _ := errorDirPath.Split() + errorFilePathPrefix := storage.NewDataReference(scheme, container, filepath.Join(key, errorFilePrefix)) + errorFileExtension := fmt.Sprintf(".%s", pieces[1]) + + return &earliestFileErrorReader{ + baseErrorReader: baseErrorReader{ + store: store, + maxPayloadSize: maxPayloadSize, + }, + errorDirPath: errorDirPath, + errorFilePathPrefix: errorFilePathPrefix, + errorFileExtension: errorFileExtension, + }, nil +} + +func newErrorReader(errorAggregationStrategy k8s.ErrorAggregationStrategy, errorDirPath storage.DataReference, errorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) (io.ErrorReader, error) { + if errorAggregationStrategy == k8s.DefaultErrorAggregationStrategy { + scheme, container, key, err := errorDirPath.Split() + if err != nil { + return nil, errors.Wrapf(err, "invalid error dir path %s", errorDirPath) + } + errorFilePath := storage.NewDataReference(scheme, container, filepath.Join(key, errorFilename)) + return newSingleFileErrorReader(errorFilePath, store, maxPayloadSize), nil } + if errorAggregationStrategy == k8s.EarliestErrorAggregationStrategy { + return newEarliestFileErrorReader(errorDirPath, errorFilename, store, maxPayloadSize) + } + return nil, errors.Errorf("unknown error aggregation strategy: %v", errorAggregationStrategy) +} + +type RemoteFileOutputReader struct { + outPath io.OutputFilePaths + store storage.ComposedProtobufStore + maxPayloadSize int64 + errorReader io.ErrorReader +} + +func (r RemoteFileOutputReader) IsError(ctx context.Context) (bool, error) { + return r.errorReader.IsError(ctx) +} - return ee, nil +func (r RemoteFileOutputReader) ReadError(ctx context.Context) (io.ExecutionError, error) { + return r.errorReader.ReadError(ctx) } func (r RemoteFileOutputReader) Exists(ctx context.Context) (bool, error) { @@ -124,16 +312,43 @@ func (r RemoteFileOutputReader) DeckExists(ctx context.Context) (bool, error) { return md.Exists(), nil } -func NewRemoteFileOutputReader(_ context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64) RemoteFileOutputReader { +func getMaxPayloadSize(maxDatasetSize int64) int64 { // Note: even though the data store retrieval checks against GetLimitMegabytes, there might be external // storage implementations, so we keep this check here as well. maxPayloadSize := maxDatasetSize if maxPayloadSize == 0 { maxPayloadSize = storage.GetConfig().Limits.GetLimitMegabytes * 1024 * 1024 } + return maxPayloadSize +} + +func NewRemoteFileOutputReader(context context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64) RemoteFileOutputReader { + maxPayloadSize := getMaxPayloadSize(maxDatasetSize) + errorReader := newSingleFileErrorReader(outPaths.GetErrorPath(), store, maxPayloadSize) return RemoteFileOutputReader{ outPath: outPaths, store: store, maxPayloadSize: maxPayloadSize, + errorReader: errorReader, + } +} + +func NewRemoteFileOutputReaderWithErrorAggregationStrategy(_ context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64, errorAggregationStrategy k8s.ErrorAggregationStrategy) (*RemoteFileOutputReader, error) { + maxPayloadSize := getMaxPayloadSize(maxDatasetSize) + scheme, container, key, err := outPaths.GetErrorPath().Split() + if err != nil { + return nil, errors.Wrapf(err, "failed to parse error path %s", outPaths.GetErrorPath()) } + errorFilename := filepath.Base(key) + errorDirPath := storage.NewDataReference(scheme, container, filepath.Dir(key)) + errorReader, err := newErrorReader(errorAggregationStrategy, errorDirPath, errorFilename, store, maxPayloadSize) + if err != nil { + return nil, errors.Wrapf(err, "failed to create remote output reader with error aggregation strategy %v", errorAggregationStrategy) + } + return &RemoteFileOutputReader{ + outPath: outPaths, + store: store, + maxPayloadSize: maxPayloadSize, + errorReader: errorReader, + }, nil } diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go index a1393de7e9..1cd7099f78 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go @@ -2,14 +2,20 @@ package ioutils import ( "context" + "fmt" + "strconv" + "strings" "testing" + "time" regErrors "github.com/pkg/errors" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" + "google.golang.org/protobuf/types/known/timestamppb" "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core" pluginsIOMock "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/io/mocks" + "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/k8s" "github.com/flyteorg/flyte/flytestdlib/storage" storageMocks "github.com/flyteorg/flyte/flytestdlib/storage/mocks" ) @@ -92,11 +98,13 @@ func TestReadOrigin(t *testing.T) { exists: true, }, nil) - r := RemoteFileOutputReader{ - outPath: opath, - store: store, - maxPayloadSize: 0, - } + maxPayloadSize := int64(0) + r := NewRemoteFileOutputReader( + ctx, + store, + opath, + maxPayloadSize, + ) ee, err := r.ReadError(ctx) assert.NoError(t, err) @@ -124,15 +132,132 @@ func TestReadOrigin(t *testing.T) { casted.Error = errorDoc.Error }).Return(nil) - r := RemoteFileOutputReader{ - outPath: opath, - store: store, - maxPayloadSize: 0, - } + maxPayloadSize := int64(0) + r := NewRemoteFileOutputReader( + ctx, + store, + opath, + maxPayloadSize, + ) ee, err := r.ReadError(ctx) assert.NoError(t, err) assert.Equal(t, core.ExecutionError_SYSTEM, ee.Kind) assert.True(t, ee.IsRecoverable) }) + + t.Run("multi-user-error", func(t *testing.T) { + outputPaths := &pluginsIOMock.OutputFilePaths{} + outputPaths.OnGetErrorPath().Return("s3://errors/error.pb") + + store := &storageMocks.ComposedProtobufStore{} + store.OnReadProtobufMatch(mock.Anything, mock.Anything, mock.Anything).Run(func(args mock.Arguments) { + errorFilePath := args.Get(1).(storage.DataReference) + workerIdx, err := strconv.Atoi(strings.Split(strings.Split(errorFilePath.String(), "-")[1], ".")[0]) + assert.NoError(t, err) + errorDoc := &core.ErrorDocument{ + Error: &core.ContainerError{ + Code: "red", + Message: fmt.Sprintf("hi-%d", workerIdx), + Kind: core.ContainerError_NON_RECOVERABLE, + Origin: core.ExecutionError_USER, + Worker: fmt.Sprintf("worker-%d", workerIdx), + Timestamp: timestamppb.New(time.Unix(int64(100-workerIdx%2), 0)), + }, + } + incomingErrorDoc := args.Get(2) + assert.NotNil(t, incomingErrorDoc) + casted := incomingErrorDoc.(*core.ErrorDocument) + casted.Error = errorDoc.Error + }).Return(nil) + + store.OnList(ctx, storage.DataReference("s3://errors/error"), 1000, storage.NewCursorAtStart()).Return( + []storage.DataReference{"error-0.pb", "error-1.pb", "error-2.pb"}, storage.NewCursorAtEnd(), nil) + + store.OnHead(ctx, storage.DataReference("error-0.pb")).Return(MemoryMetadata{ + exists: true, + }, nil) + + store.OnHead(ctx, storage.DataReference("error-1.pb")).Return(MemoryMetadata{ + exists: true, + }, nil) + + store.OnHead(ctx, storage.DataReference("error-2.pb")).Return(MemoryMetadata{ + exists: true, + }, nil) + + maxPayloadSize := int64(0) + r, err := NewRemoteFileOutputReaderWithErrorAggregationStrategy( + ctx, + store, + outputPaths, + maxPayloadSize, + k8s.EarliestErrorAggregationStrategy, + ) + assert.NoError(t, err) + + hasError, err := r.IsError(ctx) + assert.NoError(t, err) + assert.True(t, hasError) + + executionError, err := r.ReadError(ctx) + assert.NoError(t, err) + assert.Equal(t, core.ExecutionError_USER, executionError.Kind) + assert.Equal(t, "red", executionError.Code) + assert.Equal(t, "hi-1", executionError.Message) + assert.Equal(t, "worker-1", executionError.Worker) + assert.Equal(t, timestamppb.New(time.Unix(99, 0)), executionError.Timestamp) + assert.False(t, executionError.IsRecoverable) + }) + + t.Run("multi-user-error-backward-compat", func(t *testing.T) { + outputPaths := &pluginsIOMock.OutputFilePaths{} + outputPaths.OnGetErrorPath().Return("s3://errors/error.pb") + + store := &storageMocks.ComposedProtobufStore{} + store.OnReadProtobufMatch(mock.Anything, mock.Anything, mock.Anything).Run(func(args mock.Arguments) { + errorDoc := &core.ErrorDocument{ + Error: &core.ContainerError{ + Code: "red", + Message: "hi", + Kind: core.ContainerError_NON_RECOVERABLE, + Origin: core.ExecutionError_USER, + }, + } + incomingErrorDoc := args.Get(2) + assert.NotNil(t, incomingErrorDoc) + casted := incomingErrorDoc.(*core.ErrorDocument) + casted.Error = errorDoc.Error + }).Return(nil) + + store.OnList(ctx, storage.DataReference("s3://errors/error"), 1000, storage.NewCursorAtStart()).Return( + []storage.DataReference{"error.pb"}, storage.NewCursorAtEnd(), nil) + + store.OnHead(ctx, storage.DataReference("error.pb")).Return(MemoryMetadata{ + exists: true, + }, nil) + + maxPayloadSize := int64(0) + r, err := NewRemoteFileOutputReaderWithErrorAggregationStrategy( + ctx, + store, + outputPaths, + maxPayloadSize, + k8s.EarliestErrorAggregationStrategy, + ) + assert.NoError(t, err) + + hasError, err := r.IsError(ctx) + assert.NoError(t, err) + assert.True(t, hasError) + + executionError, err := r.ReadError(ctx) + assert.NoError(t, err) + assert.Equal(t, core.ExecutionError_USER, executionError.Kind) + assert.Equal(t, "red", executionError.Code) + assert.Equal(t, "hi", executionError.Message) + assert.Equal(t, "", executionError.Worker) + assert.Nil(t, executionError.Timestamp) + assert.False(t, executionError.IsRecoverable) + }) } diff --git a/flyteplugins/go/tasks/pluginmachinery/k8s/plugin.go b/flyteplugins/go/tasks/pluginmachinery/k8s/plugin.go index 38a84f9b2b..8b2124e1cd 100644 --- a/flyteplugins/go/tasks/pluginmachinery/k8s/plugin.go +++ b/flyteplugins/go/tasks/pluginmachinery/k8s/plugin.go @@ -30,6 +30,27 @@ type PluginEntry struct { CustomKubeClient func(ctx context.Context) (pluginsCore.KubeClient, error) } +type ErrorAggregationStrategy int + +const ( + // Single error file from a single container + DefaultErrorAggregationStrategy ErrorAggregationStrategy = iota + + // Earliest error from potentially multiple error files + EarliestErrorAggregationStrategy +) + +func (e ErrorAggregationStrategy) String() string { + switch e { + case DefaultErrorAggregationStrategy: + return "Default" + case EarliestErrorAggregationStrategy: + return "Earliest" + default: + panic("Unknown enum value, cannot happen") + } +} + // System level properties that this Plugin supports type PluginProperties struct { // Disables the inclusion of OwnerReferences in kubernetes resources that this plugin is responsible for. @@ -45,6 +66,8 @@ type PluginProperties struct { // override that behavior unless the resource that gets created for this plugin does not consume resources (cluster's // cpu/memory... etc. or external resources) once the plugin's Plugin.GetTaskPhase() returns a terminal phase. DisableDeleteResourceOnFinalize bool + // Specifies how errors are aggregated + ErrorAggregationStrategy ErrorAggregationStrategy } // Special context passed in to plugins when checking task phase diff --git a/flyteplugins/go/tasks/plugins/array/outputs_test.go b/flyteplugins/go/tasks/plugins/array/outputs_test.go index 529eba0429..eb1e874bc5 100644 --- a/flyteplugins/go/tasks/plugins/array/outputs_test.go +++ b/flyteplugins/go/tasks/plugins/array/outputs_test.go @@ -353,6 +353,7 @@ func TestAssembleFinalOutputs(t *testing.T) { ow := &mocks2.OutputWriter{} ow.OnGetOutputPrefixPath().Return("/prefix/") ow.OnGetOutputPath().Return("/prefix/outputs.pb") + ow.OnGetErrorPath().Return("/prefix/error.pb") ow.On("Put", mock.Anything, mock.Anything).Return(func(ctx context.Context, or io.OutputReader) error { m, ee, err := or.Read(ctx) assert.NoError(t, err) diff --git a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go index 8084b75b4c..6d7c80a7fd 100644 --- a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go +++ b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go @@ -7,6 +7,7 @@ import ( commonOp "github.com/kubeflow/common/pkg/apis/common/v1" kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" + apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/scheme" "sigs.k8s.io/controller-runtime/pkg/client" @@ -16,6 +17,7 @@ import ( flyteerr "github.com/flyteorg/flyte/flyteplugins/go/tasks/errors" "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery" pluginsCore "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/core" + pluginsK8s "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/flytek8s" "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/k8s" "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/utils" "github.com/flyteorg/flyte/flyteplugins/go/tasks/plugins/k8s/kfoperators/common" @@ -28,7 +30,9 @@ type pytorchOperatorResourceHandler struct { var _ k8s.Plugin = pytorchOperatorResourceHandler{} func (pytorchOperatorResourceHandler) GetProperties() k8s.PluginProperties { - return k8s.PluginProperties{} + return k8s.PluginProperties{ + ErrorAggregationStrategy: k8s.EarliestErrorAggregationStrategy, + } } // Defines a func to create a query object (typically just object and type meta portions) that's used to query k8s @@ -99,6 +103,25 @@ func (pytorchOperatorResourceHandler) BuildResource(ctx context.Context, taskCtx return nil, flyteerr.Errorf(flyteerr.BadTaskSpecification, "Unable to create worker replica spec: [%v]", err.Error()) } + updateEnvVars := func(container *apiv1.Container) { + if container.Env == nil { + container.Env = make([]apiv1.EnvVar, 0, 2) + } + container.Env = append(container.Env, apiv1.EnvVar{ + Name: pluginsK8s.FlyteInternalWorkerNameEnvVarKey, + ValueFrom: &apiv1.EnvVarSource{ + FieldRef: &apiv1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }) + container.Env = append(container.Env, apiv1.EnvVar{ + Name: pluginsK8s.FlyteInternalDistErrorStrategyEnvVarKey, + Value: k8s.EarliestErrorAggregationStrategy.String(), + }) + } + updateEnvVars(&workerReplicaSpec.Template.Spec.Containers[0]) + if kfPytorchTaskExtraArgs.GetRunPolicy() != nil { runPolicy = common.ParseRunPolicy(*kfPytorchTaskExtraArgs.GetRunPolicy()) } diff --git a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go index 546b42d7df..814b340fe6 100644 --- a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go +++ b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go @@ -12,6 +12,7 @@ import ( kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" + apiv1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -23,6 +24,7 @@ import ( pluginsCore "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/core" "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/core/mocks" "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/flytek8s" + pluginsK8s "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/flytek8s" flytek8sConfig "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/flytek8s/config" pluginIOMocks "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/io/mocks" "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/k8s" @@ -712,7 +714,9 @@ func TestGetLogsElastic(t *testing.T) { func TestGetProperties(t *testing.T) { pytorchResourceHandler := pytorchOperatorResourceHandler{} - expected := k8s.PluginProperties{} + expected := k8s.PluginProperties{ + ErrorAggregationStrategy: k8s.EarliestErrorAggregationStrategy, + } assert.Equal(t, expected, pytorchResourceHandler.GetProperties()) } @@ -876,6 +880,26 @@ func TestBuildResourcePytorchV1(t *testing.T) { assert.Nil(t, pytorchJob.Spec.RunPolicy.ActiveDeadlineSeconds) assert.Nil(t, pytorchJob.Spec.ElasticPolicy) + + // validate plugin specific environment variables + workerContainerEnv := pytorchJob.Spec.PyTorchReplicaSpecs[kubeflowv1.PyTorchJobReplicaTypeWorker].Template.Spec.Containers[0].Env + assert.Equal(t, + []apiv1.EnvVar{ + { + Name: pluginsK8s.FlyteInternalWorkerNameEnvVarKey, + ValueFrom: &apiv1.EnvVarSource{ + FieldRef: &apiv1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }, + { + Name: pluginsK8s.FlyteInternalDistErrorStrategyEnvVarKey, + Value: "Earliest", + }, + }, + workerContainerEnv[len(workerContainerEnv)-2:], + ) } } diff --git a/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager.go b/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager.go index c9c9167146..431824dad2 100644 --- a/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager.go +++ b/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager.go @@ -294,7 +294,12 @@ func (e *PluginManager) checkResourcePhase(ctx context.Context, tCtx pluginsCore var opReader io.OutputReader if pCtx.ow == nil { logger.Infof(ctx, "Plugin [%s] returned no outputReader, assuming file based outputs", e.id) - opReader = ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0) + opReader, err = ioutils.NewRemoteFileOutputReaderWithErrorAggregationStrategy( + ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0, + e.plugin.GetProperties().ErrorAggregationStrategy) + if err != nil { + return pluginsCore.UnknownTransition, err + } } else { logger.Infof(ctx, "Plugin [%s] returned outputReader", e.id) opReader = pCtx.ow.GetReader() diff --git a/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager_test.go b/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager_test.go index a2bcb57014..1d8d5064d9 100644 --- a/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager_test.go +++ b/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager_test.go @@ -33,6 +33,7 @@ import ( "github.com/flyteorg/flyte/flytestdlib/contextutils" "github.com/flyteorg/flyte/flytestdlib/promutils" "github.com/flyteorg/flyte/flytestdlib/promutils/labeled" + "github.com/flyteorg/flyte/flytestdlib/storage" ) type extendedFakeClient struct { @@ -163,6 +164,10 @@ func (d *dummyOutputWriter) Put(ctx context.Context, reader io.OutputReader) err return nil } +func (d *dummyOutputWriter) GetErrorPath() storage.DataReference { + return "s3://errors/error.pb" +} + func getMockTaskContext(initPhase PluginPhase, wantPhase PluginPhase) pluginsCore.TaskExecutionContext { taskExecutionContext := &pluginsCoreMock.TaskExecutionContext{} taskExecutionContext.OnTaskExecutionMetadata().Return(getMockTaskExecutionMetadata()) diff --git a/flytepropeller/pkg/controller/workflow/executor_test.go b/flytepropeller/pkg/controller/workflow/executor_test.go index f691a0028c..2be7238dbb 100644 --- a/flytepropeller/pkg/controller/workflow/executor_test.go +++ b/flytepropeller/pkg/controller/workflow/executor_test.go @@ -100,7 +100,8 @@ func (f fakeRemoteWritePlugin) Handle(ctx context.Context, tCtx pluginCore.TaskE o.Literals[k] = l } assert.NoError(f.t, tCtx.DataStore().WriteProtobuf(ctx, tCtx.OutputWriter().GetOutputPath(), storage.Options{}, o)) - assert.NoError(f.t, tCtx.OutputWriter().Put(ctx, ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0))) + reader := ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0) + assert.NoError(f.t, tCtx.OutputWriter().Put(ctx, reader)) } return trns, err } diff --git a/flytestdlib/storage/storage.go b/flytestdlib/storage/storage.go index 52e6905513..3d53a4d25f 100644 --- a/flytestdlib/storage/storage.go +++ b/flytestdlib/storage/storage.go @@ -8,6 +8,7 @@ package storage import ( "context" + "fmt" "io" "net/url" "strings" @@ -171,3 +172,7 @@ func (r DataReference) Split() (scheme, container, key string, err error) { func (r DataReference) String() string { return string(r) } + +func NewDataReference(scheme string, container string, key string) DataReference { + return DataReference(fmt.Sprintf("%s://%s/%s", scheme, container, key)) +} diff --git a/flytestdlib/storage/storage_test.go b/flytestdlib/storage/storage_test.go index d4896f274b..3f369bab55 100644 --- a/flytestdlib/storage/storage_test.go +++ b/flytestdlib/storage/storage_test.go @@ -11,6 +11,14 @@ import ( "github.com/flyteorg/flyte/flytestdlib/promutils" ) +func TestDataReference_New(t *testing.T) { + scheme := "s3" + container := "container" + key := "path/to/file" + dataReference := NewDataReference(scheme, container, key) + assert.Equal(t, DataReference("s3://container/path/to/file"), dataReference) +} + func TestDataReference_Split(t *testing.T) { input := DataReference("s3://container/path/to/file") scheme, container, key, err := input.Split() diff --git a/flytestdlib/storage/stow_store.go b/flytestdlib/storage/stow_store.go index 4b8089b502..e86a199bda 100644 --- a/flytestdlib/storage/stow_store.go +++ b/flytestdlib/storage/stow_store.go @@ -263,13 +263,13 @@ func (s *StowStore) Head(ctx context.Context, reference DataReference) (Metadata } func (s *StowStore) List(ctx context.Context, reference DataReference, maxItems int, cursor Cursor) ([]DataReference, Cursor, error) { - _, c, k, err := reference.Split() + scheme, containerName, key, err := reference.Split() if err != nil { s.metrics.BadReference.Inc(ctx) return nil, NewCursorAtEnd(), err } - container, err := s.getContainer(ctx, locationIDMain, c) + container, err := s.getContainer(ctx, locationIDMain, containerName) if err != nil { return nil, NewCursorAtEnd(), err } @@ -284,14 +284,14 @@ func (s *StowStore) List(ctx context.Context, reference DataReference, maxItems } else { stowCursor = cursor.customPosition } - items, stowCursor, err := container.Items(k, stowCursor, maxItems) + items, stowCursor, err := container.Items(key, stowCursor, maxItems) t1.Stop() t2.Stop() if err == nil { results := make([]DataReference, len(items)) for index, item := range items { - results[index] = DataReference(item.URL().String()) + results[index] = DataReference(fmt.Sprintf("%s://%s/%s", scheme, containerName, item.URL().String())) } if stow.IsCursorEnd(stowCursor) { cursor = NewCursorAtEnd() @@ -302,7 +302,7 @@ func (s *StowStore) List(ctx context.Context, reference DataReference, maxItems } incFailureCounterForError(ctx, s.metrics.ListFailure, err) - return nil, NewCursorAtEnd(), errs.Wrapf(err, "path:%v", k) + return nil, NewCursorAtEnd(), errs.Wrapf(err, "path:%v", key) } func (s *StowStore) ReadRaw(ctx context.Context, reference DataReference) (io.ReadCloser, error) { diff --git a/flytestdlib/storage/stow_store_test.go b/flytestdlib/storage/stow_store_test.go index 4de273dd93..aec59051f3 100644 --- a/flytestdlib/storage/stow_store_test.go +++ b/flytestdlib/storage/stow_store_test.go @@ -419,7 +419,7 @@ func TestStowStore_List(t *testing.T) { items, cursor, err := s.List(ctx, dataReference, maxResults, NewCursorAtStart()) assert.NoError(t, err) assert.Equal(t, NewCursorAtEnd(), cursor) - assert.Equal(t, []DataReference{"a/1", "a/2"}, items) + assert.Equal(t, []DataReference{"s3://container/a/1", "s3://container/a/2"}, items) }) t.Run("Listing with pagination", func(t *testing.T) { @@ -446,10 +446,10 @@ func TestStowStore_List(t *testing.T) { var dataReference DataReference = "s3://container/a" items, cursor, err := s.List(ctx, dataReference, maxResults, NewCursorAtStart()) assert.NoError(t, err) - assert.Equal(t, []DataReference{"a/1"}, items) + assert.Equal(t, []DataReference{"s3://container/a/1"}, items) items, _, err = s.List(ctx, dataReference, maxResults, cursor) assert.NoError(t, err) - assert.Equal(t, []DataReference{"a/2"}, items) + assert.Equal(t, []DataReference{"s3://container/a/2"}, items) }) }