From d74d7a2e52e9e2e4b07d8c0f5ee5a7b8a972462f Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Tue, 1 Oct 2024 21:46:16 +0000 Subject: [PATCH 01/29] Add multi file error aggregation strategy --- .../ioutils/remote_file_output_reader.go | 228 +++++++++++++++--- .../ioutils/remote_file_output_reader_test.go | 72 +++++- .../go/tasks/pluginmachinery/k8s/plugin.go | 12 + .../nodes/task/k8s/plugin_manager.go | 4 +- flytestdlib/storage/storage.go | 5 + flytestdlib/storage/stow_store.go | 10 +- flytestdlib/storage/stow_store_test.go | 6 +- 7 files changed, 290 insertions(+), 47 deletions(-) diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index 27d7748701..02439e2daf 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -3,37 +3,90 @@ package ioutils import ( "context" "fmt" + "math" + "path/filepath" + "strings" "github.com/pkg/errors" "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core" "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/io" + "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/k8s" "github.com/flyteorg/flyte/flytestdlib/storage" ) -type RemoteFileOutputReader struct { - outPath io.OutputFilePaths +type ErrorRetriever interface { + HasError(ctx context.Context) (bool, error) + GetError(ctx context.Context) (io.ExecutionError, error) +} + +type ErrorRetrieverBase struct { store storage.ComposedProtobufStore maxPayloadSize int64 } -func (r RemoteFileOutputReader) IsError(ctx context.Context) (bool, error) { - metadata, err := r.store.Head(ctx, r.outPath.GetErrorPath()) +type SingleFileErrorRetriever struct { + ErrorRetrieverBase + errorFilePath storage.DataReference +} + +func NewSingleFileErrorRetriever(errorFilePath storage.DataReference, store storage.ComposedProtobufStore, maxPayloadSize int64) *SingleFileErrorRetriever { + return &SingleFileErrorRetriever{ + ErrorRetrieverBase: ErrorRetrieverBase{ + store: store, + maxPayloadSize: maxPayloadSize, + }, + errorFilePath: errorFilePath, + } +} + +func (s *SingleFileErrorRetriever) HasError(ctx context.Context) (bool, error) { + metadata, err := s.store.Head(ctx, s.errorFilePath) if err != nil { - return false, errors.Wrapf(err, "failed to read error file @[%s]", r.outPath.GetErrorPath()) + return false, errors.Wrapf(err, "failed to read error file @[%s]", s.errorFilePath) } if metadata.Exists() { - if metadata.Size() > r.maxPayloadSize { - return false, errors.Wrapf(err, "error file @[%s] is too large [%d] bytes, max allowed [%d] bytes", r.outPath.GetErrorPath(), metadata.Size(), r.maxPayloadSize) + if metadata.Size() > s.maxPayloadSize { + return false, errors.Wrapf(err, "error file @[%s] is too large [%d] bytes, max allowed [%d] bytes", s.errorFilePath, metadata.Size(), s.maxPayloadSize) } return true, nil } return false, nil } -func (r RemoteFileOutputReader) ReadError(ctx context.Context) (io.ExecutionError, error) { +func errorDoc2ExecutionError(errorDoc *core.ErrorDocument, errorFilePath storage.DataReference) io.ExecutionError { + if errorDoc.Error == nil { + return io.ExecutionError{ + IsRecoverable: true, + ExecutionError: &core.ExecutionError{ + Code: "ErrorFileBadFormat", + Message: fmt.Sprintf("error not formatted correctly, nil error @path [%s]", errorFilePath), + Kind: core.ExecutionError_SYSTEM, + }, + } + } + executionError := io.ExecutionError{ + ExecutionError: &core.ExecutionError{ + Code: errorDoc.Error.Code, + Message: errorDoc.Error.Message, + Kind: errorDoc.Error.Origin, + }, + } + + if errorDoc.Error.Kind == core.ContainerError_RECOVERABLE { + executionError.IsRecoverable = true + } + + if errorDoc.Error.Kind == core.ContainerError_RECOVERABLE { + executionError.IsRecoverable = true + } + + return executionError +} + +func (s *SingleFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionError, error) { errorDoc := &core.ErrorDocument{} - err := r.store.ReadProtobuf(ctx, r.outPath.GetErrorPath(), errorDoc) + err := s.store.ReadProtobuf(ctx, storage.DataReference(s.errorFilePath), errorDoc) if err != nil { if storage.IsNotFound(err) { return io.ExecutionError{ @@ -45,33 +98,143 @@ func (r RemoteFileOutputReader) ReadError(ctx context.Context) (io.ExecutionErro }, }, nil } - return io.ExecutionError{}, errors.Wrapf(err, "failed to read error data from task @[%s]", r.outPath.GetErrorPath()) + return io.ExecutionError{}, errors.Wrapf(err, "failed to read error data from task @[%s]", s.errorFilePath) } - if errorDoc.Error == nil { - return io.ExecutionError{ - IsRecoverable: true, - ExecutionError: &core.ExecutionError{ - Code: "ErrorFileBadFormat", - Message: fmt.Sprintf("error not formatted correctly, nil error @path [%s]", r.outPath.GetErrorPath()), - Kind: core.ExecutionError_SYSTEM, - }, - }, nil + return errorDoc2ExecutionError(errorDoc, s.errorFilePath), nil +} + +type EarliestFileErrorRetriever struct { + ErrorRetrieverBase + errorDirPath storage.DataReference + canonicalErrorFilename string +} + +func (e *EarliestFileErrorRetriever) parseErrorFilename() (errorFilePathPrefix storage.DataReference, errorFileExtension string, err error) { + // If the canonical error file name is error.pb, we expect multiple error files + // to have name error.pb + pieces := strings.Split(e.canonicalErrorFilename, ".") + if len(pieces) != 2 { + err = errors.Errorf("expected canoncal error filename to have a single ., got %d", len(pieces)) + return } + errorFilePrefix := pieces[0] + scheme, container, key, _ := e.errorDirPath.Split() + errorFilePathPrefix = storage.NewDataReference(scheme, container, filepath.Join(key, errorFilePrefix)) + errorFileExtension = fmt.Sprintf(".%s", pieces[1]) + return +} - ee := io.ExecutionError{ - ExecutionError: &core.ExecutionError{ - Code: errorDoc.Error.Code, - Message: errorDoc.Error.Message, - Kind: errorDoc.Error.Origin, +func (e *EarliestFileErrorRetriever) HasError(ctx context.Context) (bool, error) { + errorFilePathPrefix, errorFileExtension, err := e.parseErrorFilename() + if err != nil { + return false, errors.Wrapf(err, "failed to parse canonical error filename @[%s]", e.canonicalErrorFilename) + } + const maxItems = 1000 + cursor := storage.NewCursorAtStart() + for cursor != storage.NewCursorAtEnd() { + var err error + var errorFilePaths []storage.DataReference + errorFilePaths, cursor, err = e.store.List(ctx, errorFilePathPrefix, maxItems, cursor) + if err != nil { + return false, errors.Wrapf(err, "failed to list error files @[%s]", e.errorDirPath) + } + for _, errorFilePath := range errorFilePaths { + if strings.HasSuffix(errorFilePath.String(), errorFileExtension) { + return true, nil + } + } + } + return false, nil +} + +func (e *EarliestFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionError, error) { + errorFilePathPrefix, errorFileExtension, err := e.parseErrorFilename() + if err != nil { + return io.ExecutionError{}, errors.Wrapf(err, "failed to parse canonical error filename @[%s]", e.canonicalErrorFilename) + } + const maxItems = 1000 + cursor := storage.NewCursorAtStart() + type ErrorFileAndDocument struct { + errorFilePath storage.DataReference + errorDoc *core.ErrorDocument + } + var errorFileAndDocs []ErrorFileAndDocument + for cursor != storage.NewCursorAtEnd() { + var err error + var errorFilePaths []storage.DataReference + errorFilePaths, cursor, err = e.store.List(ctx, errorFilePathPrefix, maxItems, cursor) + if err != nil { + return io.ExecutionError{}, errors.Wrapf(err, "failed to list error files @[%s]", e.errorDirPath) + } + for _, errorFilePath := range errorFilePaths { + if strings.HasSuffix(errorFilePath.String(), errorFileExtension) { + errorDoc := &core.ErrorDocument{} + err := e.store.ReadProtobuf(ctx, errorFilePath, errorDoc) + if err != nil { + return io.ExecutionError{}, errors.Wrapf(err, "failed to read error file @[%s]", errorFilePath.String()) + } + errorFileAndDocs = append(errorFileAndDocs, ErrorFileAndDocument{errorFilePath: errorFilePath, errorDoc: errorDoc}) + } + } + } + + extractTimestampFromErrorDoc := func(errorDoc *core.ErrorDocument) int64 { + // TODO: add optional timestamp to ErrorDocument + if errorDoc == nil { + panic("") + } + return 0 + } + + var earliestTimestamp int64 = math.MaxInt64 + earliestExecutionError := io.ExecutionError{} + for _, errorFileAndDoc := range errorFileAndDocs { + timestamp := extractTimestampFromErrorDoc(errorFileAndDoc.errorDoc) + if earliestTimestamp >= timestamp { + earliestExecutionError = errorDoc2ExecutionError(errorFileAndDoc.errorDoc, errorFileAndDoc.errorFilePath) + earliestTimestamp = timestamp + } + } + return earliestExecutionError, nil +} + +func NewEarliestFileErrorRetriever(errorDirPath storage.DataReference, canonicalErrorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) *EarliestFileErrorRetriever { + return &EarliestFileErrorRetriever{ + ErrorRetrieverBase: ErrorRetrieverBase{ + store: store, + maxPayloadSize: maxPayloadSize, }, + errorDirPath: errorDirPath, + canonicalErrorFilename: canonicalErrorFilename, } +} - if errorDoc.Error.Kind == core.ContainerError_RECOVERABLE { - ee.IsRecoverable = true +func NewErrorRetriever(errorAggregationStrategy k8s.ErrorAggregationStrategy, errorDirPath storage.DataReference, errorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) ErrorRetriever { + if errorAggregationStrategy == k8s.DefaultErrorAggregationStrategy { + scheme, container, key, _ := errorDirPath.Split() + errorFilePath := storage.NewDataReference(scheme, container, filepath.Join(key, errorFilename)) + return NewSingleFileErrorRetriever(errorFilePath, store, maxPayloadSize) + } + if errorAggregationStrategy == k8s.EarliestErrorAggregationStrategy { + return NewEarliestFileErrorRetriever(errorDirPath, errorFilename, store, maxPayloadSize) } + return nil +} + +type RemoteFileOutputReader struct { + outPath io.OutputFilePaths + store storage.ComposedProtobufStore + maxPayloadSize int64 + errorRetriever ErrorRetriever +} - return ee, nil +func (r RemoteFileOutputReader) IsError(ctx context.Context) (bool, error) { + return r.errorRetriever.HasError(ctx) +} + +func (r RemoteFileOutputReader) ReadError(ctx context.Context) (io.ExecutionError, error) { + return r.errorRetriever.GetError(ctx) } func (r RemoteFileOutputReader) Exists(ctx context.Context) (bool, error) { @@ -122,16 +285,25 @@ func (r RemoteFileOutputReader) DeckExists(ctx context.Context) (bool, error) { return md.Exists(), nil } -func NewRemoteFileOutputReader(_ context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64) RemoteFileOutputReader { +func NewRemoteFileOutputReader(context context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64) RemoteFileOutputReader { + return NewRemoteFileOutputReaderWithErrorAggregationStrategy(context, store, outPaths, maxDatasetSize, k8s.DefaultErrorAggregationStrategy) +} + +func NewRemoteFileOutputReaderWithErrorAggregationStrategy(_ context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64, errorAggregationStrategy k8s.ErrorAggregationStrategy) RemoteFileOutputReader { // Note: even though the data store retrieval checks against GetLimitMegabytes, there might be external // storage implementations, so we keep this check here as well. maxPayloadSize := maxDatasetSize if maxPayloadSize == 0 { maxPayloadSize = storage.GetConfig().Limits.GetLimitMegabytes * 1024 * 1024 } + scheme, container, key, _ := outPaths.GetErrorPath().Split() + errorFilename := filepath.Base(key) + errorDirPath := storage.NewDataReference(scheme, container, filepath.Dir(key)) + errorRetriever := NewErrorRetriever(errorAggregationStrategy, errorDirPath, errorFilename, store, maxPayloadSize) return RemoteFileOutputReader{ outPath: outPaths, store: store, maxPayloadSize: maxPayloadSize, + errorRetriever: errorRetriever, } } diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go index 251a3adc55..d699403601 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go @@ -2,6 +2,7 @@ package ioutils import ( "context" + "fmt" "testing" "github.com/stretchr/testify/assert" @@ -9,6 +10,7 @@ import ( "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core" pluginsIOMock "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/io/mocks" + "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/k8s" "github.com/flyteorg/flyte/flytestdlib/storage" storageMocks "github.com/flyteorg/flyte/flytestdlib/storage/mocks" ) @@ -65,11 +67,13 @@ func TestReadOrigin(t *testing.T) { exists: true, }, nil) - r := RemoteFileOutputReader{ - outPath: opath, - store: store, - maxPayloadSize: 0, - } + maxPayloadSize := int64(0) + r := NewRemoteFileOutputReader( + ctx, + store, + opath, + maxPayloadSize, + ) ee, err := r.ReadError(ctx) assert.NoError(t, err) @@ -97,15 +101,63 @@ func TestReadOrigin(t *testing.T) { casted.Error = errorDoc.Error }).Return(nil) - r := RemoteFileOutputReader{ - outPath: opath, - store: store, - maxPayloadSize: 0, - } + maxPayloadSize := int64(0) + r := NewRemoteFileOutputReader( + ctx, + store, + opath, + maxPayloadSize, + ) ee, err := r.ReadError(ctx) assert.NoError(t, err) assert.Equal(t, core.ExecutionError_SYSTEM, ee.Kind) assert.True(t, ee.IsRecoverable) }) + + t.Run("multi-user-error", func(t *testing.T) { + outputPaths := &pluginsIOMock.OutputFilePaths{} + outputPaths.OnGetErrorPath().Return("s3://errors/error.pb") + + store := &storageMocks.ComposedProtobufStore{} + store.OnReadProtobufMatch(mock.Anything, mock.Anything, mock.Anything).Run(func(args mock.Arguments) { + errorDoc := &core.ErrorDocument{ + Error: &core.ContainerError{ + Code: "red", + Message: "hi", + Kind: core.ContainerError_NON_RECOVERABLE, + Origin: core.ExecutionError_USER, + }, + } + errorFilePath := args.Get(1) + incomingErrorDoc := args.Get(2) + assert.NotNil(t, incomingErrorDoc) + casted := incomingErrorDoc.(*core.ErrorDocument) + casted.Error = errorDoc.Error + casted.Error.Message = fmt.Sprintf("%s-%s", casted.Error.Message, errorFilePath) + }).Return(nil) + + store.OnList(ctx, storage.DataReference("s3://errors/error"), 1000, storage.NewCursorAtStart()).Return( + []storage.DataReference{"error-0.pb", "error-1.pb", "error-2.pb"}, storage.NewCursorAtEnd(), nil) + + maxPayloadSize := int64(0) + r := NewRemoteFileOutputReaderWithErrorAggregationStrategy( + ctx, + store, + outputPaths, + maxPayloadSize, + k8s.EarliestErrorAggregationStrategy, + ) + + hasError, err := r.IsError(ctx) + assert.NoError(t, err) + assert.True(t, hasError) + + executionError, err := r.ReadError(ctx) + assert.NoError(t, err) + assert.Equal(t, core.ExecutionError_USER, executionError.Kind) + assert.Equal(t, "red", executionError.Code) + assert.Equal(t, "hi-error-2.pb", executionError.Message) + assert.False(t, executionError.IsRecoverable) + }) } diff --git a/flyteplugins/go/tasks/pluginmachinery/k8s/plugin.go b/flyteplugins/go/tasks/pluginmachinery/k8s/plugin.go index 38a84f9b2b..2e4a531fd1 100644 --- a/flyteplugins/go/tasks/pluginmachinery/k8s/plugin.go +++ b/flyteplugins/go/tasks/pluginmachinery/k8s/plugin.go @@ -30,6 +30,16 @@ type PluginEntry struct { CustomKubeClient func(ctx context.Context) (pluginsCore.KubeClient, error) } +type ErrorAggregationStrategy int + +const ( + // Single error file from a single container + DefaultErrorAggregationStrategy ErrorAggregationStrategy = iota + + // Earliest error from potentially multiple error files + EarliestErrorAggregationStrategy +) + // System level properties that this Plugin supports type PluginProperties struct { // Disables the inclusion of OwnerReferences in kubernetes resources that this plugin is responsible for. @@ -45,6 +55,8 @@ type PluginProperties struct { // override that behavior unless the resource that gets created for this plugin does not consume resources (cluster's // cpu/memory... etc. or external resources) once the plugin's Plugin.GetTaskPhase() returns a terminal phase. DisableDeleteResourceOnFinalize bool + // Specifies how errors are aggregated + ErrorAggregationStrategy ErrorAggregationStrategy } // Special context passed in to plugins when checking task phase diff --git a/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager.go b/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager.go index 42d3ad9b85..17935a89e7 100644 --- a/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager.go +++ b/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager.go @@ -290,7 +290,9 @@ func (e *PluginManager) checkResourcePhase(ctx context.Context, tCtx pluginsCore var opReader io.OutputReader if pCtx.ow == nil { logger.Infof(ctx, "Plugin [%s] returned no outputReader, assuming file based outputs", e.id) - opReader = ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0) + opReader = ioutils.NewRemoteFileOutputReaderWithErrorAggregationStrategy( + ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0, + e.plugin.GetProperties().ErrorAggregationStrategy) } else { logger.Infof(ctx, "Plugin [%s] returned outputReader", e.id) opReader = pCtx.ow.GetReader() diff --git a/flytestdlib/storage/storage.go b/flytestdlib/storage/storage.go index 52e6905513..3d53a4d25f 100644 --- a/flytestdlib/storage/storage.go +++ b/flytestdlib/storage/storage.go @@ -8,6 +8,7 @@ package storage import ( "context" + "fmt" "io" "net/url" "strings" @@ -171,3 +172,7 @@ func (r DataReference) Split() (scheme, container, key string, err error) { func (r DataReference) String() string { return string(r) } + +func NewDataReference(scheme string, container string, key string) DataReference { + return DataReference(fmt.Sprintf("%s://%s/%s", scheme, container, key)) +} diff --git a/flytestdlib/storage/stow_store.go b/flytestdlib/storage/stow_store.go index 6b731b9c86..c1950c10de 100644 --- a/flytestdlib/storage/stow_store.go +++ b/flytestdlib/storage/stow_store.go @@ -255,13 +255,13 @@ func (s *StowStore) Head(ctx context.Context, reference DataReference) (Metadata } func (s *StowStore) List(ctx context.Context, reference DataReference, maxItems int, cursor Cursor) ([]DataReference, Cursor, error) { - _, c, k, err := reference.Split() + scheme, containerName, key, err := reference.Split() if err != nil { s.metrics.BadReference.Inc(ctx) return nil, NewCursorAtEnd(), err } - container, err := s.getContainer(ctx, locationIDMain, c) + container, err := s.getContainer(ctx, locationIDMain, containerName) if err != nil { return nil, NewCursorAtEnd(), err } @@ -275,11 +275,11 @@ func (s *StowStore) List(ctx context.Context, reference DataReference, maxItems } else { stowCursor = cursor.customPosition } - items, stowCursor, err := container.Items(k, stowCursor, maxItems) + items, stowCursor, err := container.Items(key, stowCursor, maxItems) if err == nil { results := make([]DataReference, len(items)) for index, item := range items { - results[index] = DataReference(item.URL().String()) + results[index] = DataReference(fmt.Sprintf("%s://%s/%s", scheme, containerName, item.URL().String())) } if stow.IsCursorEnd(stowCursor) { cursor = NewCursorAtEnd() @@ -291,7 +291,7 @@ func (s *StowStore) List(ctx context.Context, reference DataReference, maxItems } incFailureCounterForError(ctx, s.metrics.ListFailure, err) - return nil, NewCursorAtEnd(), errs.Wrapf(err, "path:%v", k) + return nil, NewCursorAtEnd(), errs.Wrapf(err, "path:%v", key) } func (s *StowStore) ReadRaw(ctx context.Context, reference DataReference) (io.ReadCloser, error) { diff --git a/flytestdlib/storage/stow_store_test.go b/flytestdlib/storage/stow_store_test.go index 4de273dd93..aec59051f3 100644 --- a/flytestdlib/storage/stow_store_test.go +++ b/flytestdlib/storage/stow_store_test.go @@ -419,7 +419,7 @@ func TestStowStore_List(t *testing.T) { items, cursor, err := s.List(ctx, dataReference, maxResults, NewCursorAtStart()) assert.NoError(t, err) assert.Equal(t, NewCursorAtEnd(), cursor) - assert.Equal(t, []DataReference{"a/1", "a/2"}, items) + assert.Equal(t, []DataReference{"s3://container/a/1", "s3://container/a/2"}, items) }) t.Run("Listing with pagination", func(t *testing.T) { @@ -446,10 +446,10 @@ func TestStowStore_List(t *testing.T) { var dataReference DataReference = "s3://container/a" items, cursor, err := s.List(ctx, dataReference, maxResults, NewCursorAtStart()) assert.NoError(t, err) - assert.Equal(t, []DataReference{"a/1"}, items) + assert.Equal(t, []DataReference{"s3://container/a/1"}, items) items, _, err = s.List(ctx, dataReference, maxResults, cursor) assert.NoError(t, err) - assert.Equal(t, []DataReference{"a/2"}, items) + assert.Equal(t, []DataReference{"s3://container/a/2"}, items) }) } From b95197f3fb3c8dea53f716b619e37047da97f90f Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Tue, 1 Oct 2024 22:06:20 +0000 Subject: [PATCH 02/29] Add multi file error aggregation strategy --- .../ioutils/remote_file_output_reader.go | 42 +++++++++++++------ .../ioutils/remote_file_output_reader_test.go | 12 ++++++ 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index 02439e2daf..efb9e449dc 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -20,19 +20,19 @@ type ErrorRetriever interface { GetError(ctx context.Context) (io.ExecutionError, error) } -type ErrorRetrieverBase struct { +type BaseErrorRetriever struct { store storage.ComposedProtobufStore maxPayloadSize int64 } type SingleFileErrorRetriever struct { - ErrorRetrieverBase + BaseErrorRetriever errorFilePath storage.DataReference } func NewSingleFileErrorRetriever(errorFilePath storage.DataReference, store storage.ComposedProtobufStore, maxPayloadSize int64) *SingleFileErrorRetriever { return &SingleFileErrorRetriever{ - ErrorRetrieverBase: ErrorRetrieverBase{ + BaseErrorRetriever: BaseErrorRetriever{ store: store, maxPayloadSize: maxPayloadSize, }, @@ -40,18 +40,25 @@ func NewSingleFileErrorRetriever(errorFilePath storage.DataReference, store stor } } +func (b *BaseErrorRetriever) validatePayloadSize(metadata storage.Metadata) error { + if metadata.Exists() { + if metadata.Size() > b.maxPayloadSize { + return errors.Errorf("file is too large [%d] bytes, max allowed [%d] bytes", metadata.Size(), b.maxPayloadSize) + } + } + return nil +} + func (s *SingleFileErrorRetriever) HasError(ctx context.Context) (bool, error) { metadata, err := s.store.Head(ctx, s.errorFilePath) if err != nil { return false, errors.Wrapf(err, "failed to read error file @[%s]", s.errorFilePath) } - if metadata.Exists() { - if metadata.Size() > s.maxPayloadSize { - return false, errors.Wrapf(err, "error file @[%s] is too large [%d] bytes, max allowed [%d] bytes", s.errorFilePath, metadata.Size(), s.maxPayloadSize) - } - return true, nil + err = s.validatePayloadSize(metadata) + if err != nil { + return false, errors.Wrapf(err, "error file @[%s] failed payload size validation", s.errorFilePath) } - return false, nil + return metadata.Exists(), nil } func errorDoc2ExecutionError(errorDoc *core.ErrorDocument, errorFilePath storage.DataReference) io.ExecutionError { @@ -105,7 +112,7 @@ func (s *SingleFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionEr } type EarliestFileErrorRetriever struct { - ErrorRetrieverBase + BaseErrorRetriever errorDirPath storage.DataReference canonicalErrorFilename string } @@ -130,6 +137,7 @@ func (e *EarliestFileErrorRetriever) HasError(ctx context.Context) (bool, error) if err != nil { return false, errors.Wrapf(err, "failed to parse canonical error filename @[%s]", e.canonicalErrorFilename) } + hasError := false const maxItems = 1000 cursor := storage.NewCursorAtStart() for cursor != storage.NewCursorAtEnd() { @@ -141,11 +149,19 @@ func (e *EarliestFileErrorRetriever) HasError(ctx context.Context) (bool, error) } for _, errorFilePath := range errorFilePaths { if strings.HasSuffix(errorFilePath.String(), errorFileExtension) { - return true, nil + metadata, err := e.store.Head(ctx, errorFilePath) + if err != nil { + return false, errors.Wrapf(err, "failed to read error file @[%s]", errorFilePath) + } + err = e.validatePayloadSize(metadata) + if err != nil { + return false, errors.Wrapf(err, "error file @[%s] failed payload size validation", errorFilePath) + } + hasError = true } } } - return false, nil + return hasError, nil } func (e *EarliestFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionError, error) { @@ -201,7 +217,7 @@ func (e *EarliestFileErrorRetriever) GetError(ctx context.Context) (io.Execution func NewEarliestFileErrorRetriever(errorDirPath storage.DataReference, canonicalErrorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) *EarliestFileErrorRetriever { return &EarliestFileErrorRetriever{ - ErrorRetrieverBase: ErrorRetrieverBase{ + BaseErrorRetriever: BaseErrorRetriever{ store: store, maxPayloadSize: maxPayloadSize, }, diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go index d699403601..a30960da93 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go @@ -140,6 +140,18 @@ func TestReadOrigin(t *testing.T) { store.OnList(ctx, storage.DataReference("s3://errors/error"), 1000, storage.NewCursorAtStart()).Return( []storage.DataReference{"error-0.pb", "error-1.pb", "error-2.pb"}, storage.NewCursorAtEnd(), nil) + store.OnHead(ctx, storage.DataReference("error-0.pb")).Return(MemoryMetadata{ + exists: true, + }, nil) + + store.OnHead(ctx, storage.DataReference("error-1.pb")).Return(MemoryMetadata{ + exists: true, + }, nil) + + store.OnHead(ctx, storage.DataReference("error-2.pb")).Return(MemoryMetadata{ + exists: true, + }, nil) + maxPayloadSize := int64(0) r := NewRemoteFileOutputReaderWithErrorAggregationStrategy( ctx, From 4f019ba4d2e0afe97b0ca92f08fafa1c6970e2f1 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Tue, 1 Oct 2024 22:07:12 +0000 Subject: [PATCH 03/29] Add multi file error aggregation strategy --- .../ioutils/remote_file_output_reader.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index efb9e449dc..e2bde43bc4 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -20,19 +20,19 @@ type ErrorRetriever interface { GetError(ctx context.Context) (io.ExecutionError, error) } -type BaseErrorRetriever struct { +type baseErrorRetriever struct { store storage.ComposedProtobufStore maxPayloadSize int64 } type SingleFileErrorRetriever struct { - BaseErrorRetriever + baseErrorRetriever errorFilePath storage.DataReference } func NewSingleFileErrorRetriever(errorFilePath storage.DataReference, store storage.ComposedProtobufStore, maxPayloadSize int64) *SingleFileErrorRetriever { return &SingleFileErrorRetriever{ - BaseErrorRetriever: BaseErrorRetriever{ + baseErrorRetriever: baseErrorRetriever{ store: store, maxPayloadSize: maxPayloadSize, }, @@ -40,7 +40,7 @@ func NewSingleFileErrorRetriever(errorFilePath storage.DataReference, store stor } } -func (b *BaseErrorRetriever) validatePayloadSize(metadata storage.Metadata) error { +func (b *baseErrorRetriever) validatePayloadSize(metadata storage.Metadata) error { if metadata.Exists() { if metadata.Size() > b.maxPayloadSize { return errors.Errorf("file is too large [%d] bytes, max allowed [%d] bytes", metadata.Size(), b.maxPayloadSize) @@ -112,7 +112,7 @@ func (s *SingleFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionEr } type EarliestFileErrorRetriever struct { - BaseErrorRetriever + baseErrorRetriever errorDirPath storage.DataReference canonicalErrorFilename string } @@ -217,7 +217,7 @@ func (e *EarliestFileErrorRetriever) GetError(ctx context.Context) (io.Execution func NewEarliestFileErrorRetriever(errorDirPath storage.DataReference, canonicalErrorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) *EarliestFileErrorRetriever { return &EarliestFileErrorRetriever{ - BaseErrorRetriever: BaseErrorRetriever{ + baseErrorRetriever: baseErrorRetriever{ store: store, maxPayloadSize: maxPayloadSize, }, From 35a33324b928df213a2bec8f14d969f8bdbe24fa Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Tue, 1 Oct 2024 22:15:20 +0000 Subject: [PATCH 04/29] Add multi file error aggregation strategy --- .../pluginmachinery/ioutils/remote_file_output_reader.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index e2bde43bc4..260e0e624e 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -79,11 +79,7 @@ func errorDoc2ExecutionError(errorDoc *core.ErrorDocument, errorFilePath storage Kind: errorDoc.Error.Origin, }, } - - if errorDoc.Error.Kind == core.ContainerError_RECOVERABLE { - executionError.IsRecoverable = true - } - + if errorDoc.Error.Kind == core.ContainerError_RECOVERABLE { executionError.IsRecoverable = true } From 275ab7497e9a48a7843f7faee7f41b13aa70a986 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Tue, 1 Oct 2024 22:18:27 +0000 Subject: [PATCH 05/29] Add multi file error aggregation strategy --- .../pluginmachinery/ioutils/remote_file_output_reader.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index 260e0e624e..30bc616bd4 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -79,7 +79,7 @@ func errorDoc2ExecutionError(errorDoc *core.ErrorDocument, errorFilePath storage Kind: errorDoc.Error.Origin, }, } - + if errorDoc.Error.Kind == core.ContainerError_RECOVERABLE { executionError.IsRecoverable = true } @@ -118,7 +118,7 @@ func (e *EarliestFileErrorRetriever) parseErrorFilename() (errorFilePathPrefix s // to have name error.pb pieces := strings.Split(e.canonicalErrorFilename, ".") if len(pieces) != 2 { - err = errors.Errorf("expected canoncal error filename to have a single ., got %d", len(pieces)) + err = errors.Errorf("expected canonical error filename to have a single dot (.), got %d", len(pieces)) return } errorFilePrefix := pieces[0] From eb479caf157d722a5a1b46083e68758ad14f7e1e Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Tue, 1 Oct 2024 22:58:22 +0000 Subject: [PATCH 06/29] Add multi file error aggregation strategy --- flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts | 18 ++- .../gen/pb-es/flyteidl/core/execution_pb.ts | 8 ++ flyteidl/gen/pb-go/flyteidl/core/errors.pb.go | 63 ++++++---- .../gen/pb-go/flyteidl/core/execution.pb.go | 116 ++++++++++-------- .../flyteidl/service/admin.swagger.json | 4 + flyteidl/gen/pb-js/flyteidl.d.ts | 18 +++ flyteidl/gen/pb-js/flyteidl.js | 51 ++++++++ .../gen/pb_python/flyteidl/core/errors_pb2.py | 12 +- .../pb_python/flyteidl/core/errors_pb2.pyi | 8 +- .../pb_python/flyteidl/core/execution_pb2.py | 28 ++--- .../pb_python/flyteidl/core/execution_pb2.pyi | 6 +- flyteidl/gen/pb_rust/flyteidl.core.rs | 9 ++ flyteidl/protos/flyteidl/core/errors.proto | 6 + flyteidl/protos/flyteidl/core/execution.proto | 2 + .../ioutils/remote_file_output_reader.go | 11 +- .../ioutils/remote_file_output_reader_test.go | 11 +- 16 files changed, 259 insertions(+), 112 deletions(-) diff --git a/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts b/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts index 42b70dec5b..dac74e7e8f 100644 --- a/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts +++ b/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts @@ -4,7 +4,7 @@ // @ts-nocheck import type { BinaryReadOptions, FieldList, JsonReadOptions, JsonValue, PartialMessage, PlainMessage } from "@bufbuild/protobuf"; -import { Message, proto3 } from "@bufbuild/protobuf"; +import { Message, proto3, protoInt64 } from "@bufbuild/protobuf"; import { ExecutionError_ErrorKind } from "./execution_pb.js"; /** @@ -42,6 +42,20 @@ export class ContainerError extends Message { */ origin = ExecutionError_ErrorKind.UNKNOWN; + /** + * Timestamp of the error + * + * @generated from field: int64 timetsamp = 5; + */ + timetsamp = protoInt64.zero; + + /** + * Worker that generated the error + * + * @generated from field: string worker = 6; + */ + worker = ""; + constructor(data?: PartialMessage) { super(); proto3.util.initPartial(data, this); @@ -54,6 +68,8 @@ export class ContainerError extends Message { { no: 2, name: "message", kind: "scalar", T: 9 /* ScalarType.STRING */ }, { no: 3, name: "kind", kind: "enum", T: proto3.getEnumType(ContainerError_Kind) }, { no: 4, name: "origin", kind: "enum", T: proto3.getEnumType(ExecutionError_ErrorKind) }, + { no: 5, name: "timetsamp", kind: "scalar", T: 3 /* ScalarType.INT64 */ }, + { no: 6, name: "worker", kind: "scalar", T: 9 /* ScalarType.STRING */ }, ]); static fromBinary(bytes: Uint8Array, options?: Partial): ContainerError { diff --git a/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts b/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts index 5283936b1f..d8bb872738 100644 --- a/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts +++ b/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts @@ -341,6 +341,13 @@ export class ExecutionError extends Message { */ kind = ExecutionError_ErrorKind.UNKNOWN; + /** + * Worker that generated the error + * + * @generated from field: string worker = 6; + */ + worker = ""; + constructor(data?: PartialMessage) { super(); proto3.util.initPartial(data, this); @@ -353,6 +360,7 @@ export class ExecutionError extends Message { { no: 2, name: "message", kind: "scalar", T: 9 /* ScalarType.STRING */ }, { no: 3, name: "error_uri", kind: "scalar", T: 9 /* ScalarType.STRING */ }, { no: 4, name: "kind", kind: "enum", T: proto3.getEnumType(ExecutionError_ErrorKind) }, + { no: 6, name: "worker", kind: "scalar", T: 9 /* ScalarType.STRING */ }, ]); static fromBinary(bytes: Uint8Array, options?: Partial): ExecutionError { diff --git a/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go b/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go index 61e833ed1d..392c40d462 100644 --- a/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go +++ b/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go @@ -82,6 +82,10 @@ type ContainerError struct { Kind ContainerError_Kind `protobuf:"varint,3,opt,name=kind,proto3,enum=flyteidl.core.ContainerError_Kind" json:"kind,omitempty"` // Defines the origin of the error (system, user, unknown). Origin ExecutionError_ErrorKind `protobuf:"varint,4,opt,name=origin,proto3,enum=flyteidl.core.ExecutionError_ErrorKind" json:"origin,omitempty"` + // Timestamp of the error + Timetsamp int64 `protobuf:"varint,5,opt,name=timetsamp,proto3" json:"timetsamp,omitempty"` + // Worker that generated the error + Worker string `protobuf:"bytes,6,opt,name=worker,proto3" json:"worker,omitempty"` } func (x *ContainerError) Reset() { @@ -144,6 +148,20 @@ func (x *ContainerError) GetOrigin() ExecutionError_ErrorKind { return ExecutionError_UNKNOWN } +func (x *ContainerError) GetTimetsamp() int64 { + if x != nil { + return x.Timetsamp + } + return 0 +} + +func (x *ContainerError) GetWorker() string { + if x != nil { + return x.Worker + } + return "" +} + // Defines the errors.pb file format the container can produce to communicate // failure reasons to the execution engine. type ErrorDocument struct { @@ -201,7 +219,7 @@ var file_flyteidl_core_errors_proto_rawDesc = []byte{ 0x65, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0d, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x1a, 0x1d, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, 0x65, 0x2f, 0x65, 0x78, 0x65, 0x63, 0x75, - 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xe5, 0x01, 0x0a, 0x0e, 0x43, + 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x9b, 0x02, 0x0a, 0x0e, 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x12, 0x0a, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, @@ -213,26 +231,29 @@ var file_flyteidl_core_errors_proto_rawDesc = []byte{ 0x01, 0x28, 0x0e, 0x32, 0x27, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x52, 0x06, 0x6f, 0x72, - 0x69, 0x67, 0x69, 0x6e, 0x22, 0x2c, 0x0a, 0x04, 0x4b, 0x69, 0x6e, 0x64, 0x12, 0x13, 0x0a, 0x0f, - 0x4e, 0x4f, 0x4e, 0x5f, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, 0x52, 0x41, 0x42, 0x4c, 0x45, 0x10, - 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, 0x52, 0x41, 0x42, 0x4c, 0x45, - 0x10, 0x01, 0x22, 0x44, 0x0a, 0x0d, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x44, 0x6f, 0x63, 0x75, 0x6d, - 0x65, 0x6e, 0x74, 0x12, 0x33, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, - 0x72, 0x65, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, - 0x72, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x42, 0xb1, 0x01, 0x0a, 0x11, 0x63, 0x6f, 0x6d, - 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x42, 0x0b, - 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x3a, 0x67, - 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x6f, - 0x72, 0x67, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, - 0x6c, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x62, 0x2d, 0x67, 0x6f, 0x2f, 0x66, 0x6c, 0x79, 0x74, - 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, 0x65, 0xa2, 0x02, 0x03, 0x46, 0x43, 0x58, 0xaa, - 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x43, 0x6f, 0x72, 0x65, 0xca, - 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, 0x65, 0xe2, - 0x02, 0x19, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, 0x65, 0x5c, - 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x0e, 0x46, 0x6c, - 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x3a, 0x3a, 0x43, 0x6f, 0x72, 0x65, 0x62, 0x06, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x33, + 0x69, 0x67, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x74, 0x73, 0x61, 0x6d, + 0x70, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x74, 0x73, 0x61, + 0x6d, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x18, 0x06, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x22, 0x2c, 0x0a, 0x04, 0x4b, 0x69, + 0x6e, 0x64, 0x12, 0x13, 0x0a, 0x0f, 0x4e, 0x4f, 0x4e, 0x5f, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, + 0x52, 0x41, 0x42, 0x4c, 0x45, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x52, 0x45, 0x43, 0x4f, 0x56, + 0x45, 0x52, 0x41, 0x42, 0x4c, 0x45, 0x10, 0x01, 0x22, 0x44, 0x0a, 0x0d, 0x45, 0x72, 0x72, 0x6f, + 0x72, 0x44, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x33, 0x0a, 0x05, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, + 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, + 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x42, 0xb1, + 0x01, 0x0a, 0x11, 0x63, 0x6f, 0x6d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, + 0x63, 0x6f, 0x72, 0x65, 0x42, 0x0b, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x50, 0x72, 0x6f, 0x74, + 0x6f, 0x50, 0x01, 0x5a, 0x3a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, + 0x66, 0x6c, 0x79, 0x74, 0x65, 0x6f, 0x72, 0x67, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x2f, 0x66, + 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x62, 0x2d, 0x67, + 0x6f, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, 0x65, 0xa2, + 0x02, 0x03, 0x46, 0x43, 0x58, 0xaa, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, + 0x2e, 0x43, 0x6f, 0x72, 0x65, 0xca, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, + 0x5c, 0x43, 0x6f, 0x72, 0x65, 0xe2, 0x02, 0x19, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, + 0x5c, 0x43, 0x6f, 0x72, 0x65, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0xea, 0x02, 0x0e, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x3a, 0x3a, 0x43, 0x6f, + 0x72, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go b/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go index 7befaca1ac..c103b83852 100644 --- a/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go +++ b/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go @@ -514,6 +514,8 @@ type ExecutionError struct { // Full error contents accessible via a URI ErrorUri string `protobuf:"bytes,3,opt,name=error_uri,json=errorUri,proto3" json:"error_uri,omitempty"` Kind ExecutionError_ErrorKind `protobuf:"varint,4,opt,name=kind,proto3,enum=flyteidl.core.ExecutionError_ErrorKind" json:"kind,omitempty"` + // Worker that generated the error + Worker string `protobuf:"bytes,6,opt,name=worker,proto3" json:"worker,omitempty"` } func (x *ExecutionError) Reset() { @@ -576,6 +578,13 @@ func (x *ExecutionError) GetKind() ExecutionError_ErrorKind { return ExecutionError_UNKNOWN } +func (x *ExecutionError) GetWorker() string { + if x != nil { + return x.Worker + } + return "" +} + // Log information for the task that is specific to a log sink // When our log story is flushed out, we may have more metadata here like log link expiry type TaskLog struct { @@ -835,7 +844,7 @@ var file_flyteidl_core_execution_proto_rawDesc = []byte{ 0x04, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x05, 0x12, 0x10, 0x0a, 0x0c, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x49, 0x4e, 0x47, 0x10, 0x06, 0x12, 0x19, 0x0a, 0x15, 0x57, 0x41, 0x49, 0x54, 0x49, 0x4e, 0x47, 0x5f, 0x46, 0x4f, 0x52, 0x5f, 0x52, - 0x45, 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, 0x53, 0x10, 0x07, 0x22, 0xc8, 0x01, 0x0a, 0x0e, 0x45, + 0x45, 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, 0x53, 0x10, 0x07, 0x22, 0xe0, 0x01, 0x0a, 0x0e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x12, 0x0a, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, @@ -845,60 +854,61 @@ var file_flyteidl_core_execution_proto_rawDesc = []byte{ 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x27, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x52, - 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x22, 0x2e, 0x0a, 0x09, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, - 0x6e, 0x64, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, - 0x08, 0x0a, 0x04, 0x55, 0x53, 0x45, 0x52, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x59, 0x53, - 0x54, 0x45, 0x4d, 0x10, 0x02, 0x22, 0xb2, 0x02, 0x0a, 0x07, 0x54, 0x61, 0x73, 0x6b, 0x4c, 0x6f, - 0x67, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x75, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4b, 0x0a, 0x0e, 0x6d, 0x65, 0x73, 0x73, 0x61, - 0x67, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x24, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, - 0x54, 0x61, 0x73, 0x6b, 0x4c, 0x6f, 0x67, 0x2e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, - 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0d, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, - 0x72, 0x6d, 0x61, 0x74, 0x12, 0x2b, 0x0a, 0x03, 0x74, 0x74, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, - 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x03, 0x74, 0x74, - 0x6c, 0x12, 0x2a, 0x0a, 0x10, 0x53, 0x68, 0x6f, 0x77, 0x57, 0x68, 0x69, 0x6c, 0x65, 0x50, 0x65, - 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x53, 0x68, 0x6f, - 0x77, 0x57, 0x68, 0x69, 0x6c, 0x65, 0x50, 0x65, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x2a, 0x0a, + 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x18, + 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x22, 0x2e, 0x0a, + 0x09, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, + 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x55, 0x53, 0x45, 0x52, 0x10, + 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x59, 0x53, 0x54, 0x45, 0x4d, 0x10, 0x02, 0x22, 0xb2, 0x02, + 0x0a, 0x07, 0x54, 0x61, 0x73, 0x6b, 0x4c, 0x6f, 0x67, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, 0x6e, + 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, + 0x4b, 0x0a, 0x0e, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, + 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, + 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x54, 0x61, 0x73, 0x6b, 0x4c, 0x6f, 0x67, 0x2e, + 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0d, 0x6d, + 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, 0x2b, 0x0a, 0x03, + 0x74, 0x74, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, + 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x03, 0x74, 0x74, 0x6c, 0x12, 0x2a, 0x0a, 0x10, 0x53, 0x68, 0x6f, + 0x77, 0x57, 0x68, 0x69, 0x6c, 0x65, 0x50, 0x65, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x18, 0x05, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x10, 0x53, 0x68, 0x6f, 0x77, 0x57, 0x68, 0x69, 0x6c, 0x65, 0x50, 0x65, + 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x2a, 0x0a, 0x10, 0x48, 0x69, 0x64, 0x65, 0x4f, 0x6e, 0x63, + 0x65, 0x46, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x48, 0x69, 0x64, 0x65, 0x4f, 0x6e, 0x63, 0x65, 0x46, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, - 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x48, 0x69, 0x64, 0x65, 0x4f, 0x6e, 0x63, - 0x65, 0x46, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, 0x64, 0x22, 0x2f, 0x0a, 0x0d, 0x4d, 0x65, 0x73, - 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, - 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x43, 0x53, 0x56, 0x10, 0x01, - 0x12, 0x08, 0x0a, 0x04, 0x4a, 0x53, 0x4f, 0x4e, 0x10, 0x02, 0x22, 0x5a, 0x0a, 0x14, 0x51, 0x75, + 0x64, 0x22, 0x2f, 0x0a, 0x0d, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, + 0x61, 0x74, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, + 0x07, 0x0a, 0x03, 0x43, 0x53, 0x56, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x4a, 0x53, 0x4f, 0x4e, + 0x10, 0x02, 0x22, 0x5a, 0x0a, 0x14, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, + 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x70, 0x65, 0x63, 0x12, 0x42, 0x0a, 0x0f, 0x71, 0x75, + 0x65, 0x75, 0x65, 0x69, 0x6e, 0x67, 0x5f, 0x62, 0x75, 0x64, 0x67, 0x65, 0x74, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0e, + 0x71, 0x75, 0x65, 0x75, 0x65, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x64, 0x67, 0x65, 0x74, 0x22, 0xce, + 0x01, 0x0a, 0x10, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, + 0x69, 0x63, 0x65, 0x12, 0x3a, 0x0a, 0x04, 0x74, 0x69, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0e, 0x32, 0x24, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, + 0x65, 0x2e, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, + 0x63, 0x65, 0x2e, 0x54, 0x69, 0x65, 0x72, 0x48, 0x00, 0x52, 0x04, 0x74, 0x69, 0x65, 0x72, 0x12, + 0x39, 0x0a, 0x04, 0x73, 0x70, 0x65, 0x63, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, + 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x70, - 0x65, 0x63, 0x12, 0x42, 0x0a, 0x0f, 0x71, 0x75, 0x65, 0x75, 0x65, 0x69, 0x6e, 0x67, 0x5f, 0x62, - 0x75, 0x64, 0x67, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, - 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, - 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0e, 0x71, 0x75, 0x65, 0x75, 0x65, 0x69, 0x6e, 0x67, - 0x42, 0x75, 0x64, 0x67, 0x65, 0x74, 0x22, 0xce, 0x01, 0x0a, 0x10, 0x51, 0x75, 0x61, 0x6c, 0x69, - 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x3a, 0x0a, 0x04, 0x74, - 0x69, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x66, 0x6c, 0x79, 0x74, - 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, - 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x54, 0x69, 0x65, 0x72, 0x48, - 0x00, 0x52, 0x04, 0x74, 0x69, 0x65, 0x72, 0x12, 0x39, 0x0a, 0x04, 0x73, 0x70, 0x65, 0x63, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, - 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, - 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x70, 0x65, 0x63, 0x48, 0x00, 0x52, 0x04, 0x73, 0x70, - 0x65, 0x63, 0x22, 0x34, 0x0a, 0x04, 0x54, 0x69, 0x65, 0x72, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x4e, - 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x48, 0x49, 0x47, - 0x48, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x4d, 0x45, 0x44, 0x49, 0x55, 0x4d, 0x10, 0x02, 0x12, - 0x07, 0x0a, 0x03, 0x4c, 0x4f, 0x57, 0x10, 0x03, 0x42, 0x0d, 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x69, - 0x67, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0xb4, 0x01, 0x0a, 0x11, 0x63, 0x6f, 0x6d, 0x2e, - 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x42, 0x0e, 0x45, - 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, - 0x3a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x6c, 0x79, 0x74, - 0x65, 0x6f, 0x72, 0x67, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, - 0x69, 0x64, 0x6c, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x62, 0x2d, 0x67, 0x6f, 0x2f, 0x66, 0x6c, - 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, 0x65, 0xa2, 0x02, 0x03, 0x46, 0x43, - 0x58, 0xaa, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x43, 0x6f, 0x72, - 0x65, 0xca, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, - 0x65, 0xe2, 0x02, 0x19, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, - 0x65, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x0e, - 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x3a, 0x3a, 0x43, 0x6f, 0x72, 0x65, 0x62, 0x06, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x65, 0x63, 0x48, 0x00, 0x52, 0x04, 0x73, 0x70, 0x65, 0x63, 0x22, 0x34, 0x0a, 0x04, 0x54, 0x69, + 0x65, 0x72, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, + 0x00, 0x12, 0x08, 0x0a, 0x04, 0x48, 0x49, 0x47, 0x48, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x4d, + 0x45, 0x44, 0x49, 0x55, 0x4d, 0x10, 0x02, 0x12, 0x07, 0x0a, 0x03, 0x4c, 0x4f, 0x57, 0x10, 0x03, + 0x42, 0x0d, 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, + 0xb4, 0x01, 0x0a, 0x11, 0x63, 0x6f, 0x6d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, + 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x42, 0x0e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x3a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, + 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x6f, 0x72, 0x67, 0x2f, 0x66, 0x6c, 0x79, + 0x74, 0x65, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x67, 0x65, 0x6e, 0x2f, + 0x70, 0x62, 0x2d, 0x67, 0x6f, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, + 0x6f, 0x72, 0x65, 0xa2, 0x02, 0x03, 0x46, 0x43, 0x58, 0xaa, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, + 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x43, 0x6f, 0x72, 0x65, 0xca, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, + 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, 0x65, 0xe2, 0x02, 0x19, 0x46, 0x6c, 0x79, 0x74, + 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, 0x65, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x0e, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, + 0x3a, 0x3a, 0x43, 0x6f, 0x72, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json b/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json index ef81380d1e..0ff728e9a6 100644 --- a/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json +++ b/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json @@ -7151,6 +7151,10 @@ }, "kind": { "$ref": "#/definitions/ExecutionErrorErrorKind" + }, + "worker": { + "type": "string", + "title": "Worker that generated the error" } }, "description": "Represents the error message from the execution." diff --git a/flyteidl/gen/pb-js/flyteidl.d.ts b/flyteidl/gen/pb-js/flyteidl.d.ts index 0ff2422577..34196b4ec5 100644 --- a/flyteidl/gen/pb-js/flyteidl.d.ts +++ b/flyteidl/gen/pb-js/flyteidl.d.ts @@ -5748,6 +5748,9 @@ export namespace flyteidl { /** ExecutionError kind */ kind?: (flyteidl.core.ExecutionError.ErrorKind|null); + + /** ExecutionError worker */ + worker?: (string|null); } /** Represents an ExecutionError. */ @@ -5771,6 +5774,9 @@ export namespace flyteidl { /** ExecutionError kind. */ public kind: flyteidl.core.ExecutionError.ErrorKind; + /** ExecutionError worker. */ + public worker: string; + /** * Creates a new ExecutionError instance using the specified properties. * @param [properties] Properties to set @@ -7555,6 +7561,12 @@ export namespace flyteidl { /** ContainerError origin */ origin?: (flyteidl.core.ExecutionError.ErrorKind|null); + + /** ContainerError timetsamp */ + timetsamp?: (Long|null); + + /** ContainerError worker */ + worker?: (string|null); } /** Represents a ContainerError. */ @@ -7578,6 +7590,12 @@ export namespace flyteidl { /** ContainerError origin. */ public origin: flyteidl.core.ExecutionError.ErrorKind; + /** ContainerError timetsamp. */ + public timetsamp: Long; + + /** ContainerError worker. */ + public worker: string; + /** * Creates a new ContainerError instance using the specified properties. * @param [properties] Properties to set diff --git a/flyteidl/gen/pb-js/flyteidl.js b/flyteidl/gen/pb-js/flyteidl.js index 042343eecf..5d2a58ecce 100644 --- a/flyteidl/gen/pb-js/flyteidl.js +++ b/flyteidl/gen/pb-js/flyteidl.js @@ -13823,6 +13823,7 @@ * @property {string|null} [message] ExecutionError message * @property {string|null} [errorUri] ExecutionError errorUri * @property {flyteidl.core.ExecutionError.ErrorKind|null} [kind] ExecutionError kind + * @property {string|null} [worker] ExecutionError worker */ /** @@ -13872,6 +13873,14 @@ */ ExecutionError.prototype.kind = 0; + /** + * ExecutionError worker. + * @member {string} worker + * @memberof flyteidl.core.ExecutionError + * @instance + */ + ExecutionError.prototype.worker = ""; + /** * Creates a new ExecutionError instance using the specified properties. * @function create @@ -13904,6 +13913,8 @@ writer.uint32(/* id 3, wireType 2 =*/26).string(message.errorUri); if (message.kind != null && message.hasOwnProperty("kind")) writer.uint32(/* id 4, wireType 0 =*/32).int32(message.kind); + if (message.worker != null && message.hasOwnProperty("worker")) + writer.uint32(/* id 6, wireType 2 =*/50).string(message.worker); return writer; }; @@ -13937,6 +13948,9 @@ case 4: message.kind = reader.int32(); break; + case 6: + message.worker = reader.string(); + break; default: reader.skipType(tag & 7); break; @@ -13974,6 +13988,9 @@ case 2: break; } + if (message.worker != null && message.hasOwnProperty("worker")) + if (!$util.isString(message.worker)) + return "worker: string expected"; return null; }; @@ -18268,6 +18285,8 @@ * @property {string|null} [message] ContainerError message * @property {flyteidl.core.ContainerError.Kind|null} [kind] ContainerError kind * @property {flyteidl.core.ExecutionError.ErrorKind|null} [origin] ContainerError origin + * @property {Long|null} [timetsamp] ContainerError timetsamp + * @property {string|null} [worker] ContainerError worker */ /** @@ -18317,6 +18336,22 @@ */ ContainerError.prototype.origin = 0; + /** + * ContainerError timetsamp. + * @member {Long} timetsamp + * @memberof flyteidl.core.ContainerError + * @instance + */ + ContainerError.prototype.timetsamp = $util.Long ? $util.Long.fromBits(0,0,false) : 0; + + /** + * ContainerError worker. + * @member {string} worker + * @memberof flyteidl.core.ContainerError + * @instance + */ + ContainerError.prototype.worker = ""; + /** * Creates a new ContainerError instance using the specified properties. * @function create @@ -18349,6 +18384,10 @@ writer.uint32(/* id 3, wireType 0 =*/24).int32(message.kind); if (message.origin != null && message.hasOwnProperty("origin")) writer.uint32(/* id 4, wireType 0 =*/32).int32(message.origin); + if (message.timetsamp != null && message.hasOwnProperty("timetsamp")) + writer.uint32(/* id 5, wireType 0 =*/40).int64(message.timetsamp); + if (message.worker != null && message.hasOwnProperty("worker")) + writer.uint32(/* id 6, wireType 2 =*/50).string(message.worker); return writer; }; @@ -18382,6 +18421,12 @@ case 4: message.origin = reader.int32(); break; + case 5: + message.timetsamp = reader.int64(); + break; + case 6: + message.worker = reader.string(); + break; default: reader.skipType(tag & 7); break; @@ -18424,6 +18469,12 @@ case 2: break; } + if (message.timetsamp != null && message.hasOwnProperty("timetsamp")) + if (!$util.isInteger(message.timetsamp) && !(message.timetsamp && $util.isInteger(message.timetsamp.low) && $util.isInteger(message.timetsamp.high))) + return "timetsamp: integer|Long expected"; + if (message.worker != null && message.hasOwnProperty("worker")) + if (!$util.isString(message.worker)) + return "worker: string expected"; return null; }; diff --git a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py index 68182fd259..cdd55fb6e3 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py +++ b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py @@ -14,7 +14,7 @@ from flyteidl.core import execution_pb2 as flyteidl_dot_core_dot_execution__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1a\x66lyteidl/core/errors.proto\x12\rflyteidl.core\x1a\x1d\x66lyteidl/core/execution.proto\"\xe5\x01\n\x0e\x43ontainerError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x36\n\x04kind\x18\x03 \x01(\x0e\x32\".flyteidl.core.ContainerError.KindR\x04kind\x12?\n\x06origin\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x06origin\",\n\x04Kind\x12\x13\n\x0fNON_RECOVERABLE\x10\x00\x12\x0f\n\x0bRECOVERABLE\x10\x01\"D\n\rErrorDocument\x12\x33\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x1d.flyteidl.core.ContainerErrorR\x05\x65rrorB\xb1\x01\n\x11\x63om.flyteidl.coreB\x0b\x45rrorsProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1a\x66lyteidl/core/errors.proto\x12\rflyteidl.core\x1a\x1d\x66lyteidl/core/execution.proto\"\x9b\x02\n\x0e\x43ontainerError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x36\n\x04kind\x18\x03 \x01(\x0e\x32\".flyteidl.core.ContainerError.KindR\x04kind\x12?\n\x06origin\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x06origin\x12\x1c\n\ttimetsamp\x18\x05 \x01(\x03R\ttimetsamp\x12\x16\n\x06worker\x18\x06 \x01(\tR\x06worker\",\n\x04Kind\x12\x13\n\x0fNON_RECOVERABLE\x10\x00\x12\x0f\n\x0bRECOVERABLE\x10\x01\"D\n\rErrorDocument\x12\x33\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x1d.flyteidl.core.ContainerErrorR\x05\x65rrorB\xb1\x01\n\x11\x63om.flyteidl.coreB\x0b\x45rrorsProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -24,9 +24,9 @@ DESCRIPTOR._options = None DESCRIPTOR._serialized_options = b'\n\021com.flyteidl.coreB\013ErrorsProtoP\001Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\242\002\003FCX\252\002\rFlyteidl.Core\312\002\rFlyteidl\\Core\342\002\031Flyteidl\\Core\\GPBMetadata\352\002\016Flyteidl::Core' _globals['_CONTAINERERROR']._serialized_start=77 - _globals['_CONTAINERERROR']._serialized_end=306 - _globals['_CONTAINERERROR_KIND']._serialized_start=262 - _globals['_CONTAINERERROR_KIND']._serialized_end=306 - _globals['_ERRORDOCUMENT']._serialized_start=308 - _globals['_ERRORDOCUMENT']._serialized_end=376 + _globals['_CONTAINERERROR']._serialized_end=360 + _globals['_CONTAINERERROR_KIND']._serialized_start=316 + _globals['_CONTAINERERROR_KIND']._serialized_end=360 + _globals['_ERRORDOCUMENT']._serialized_start=362 + _globals['_ERRORDOCUMENT']._serialized_end=430 # @@protoc_insertion_point(module_scope) diff --git a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi index b13aa40915..5ab22ddc47 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi +++ b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi @@ -7,7 +7,7 @@ from typing import ClassVar as _ClassVar, Mapping as _Mapping, Optional as _Opti DESCRIPTOR: _descriptor.FileDescriptor class ContainerError(_message.Message): - __slots__ = ["code", "message", "kind", "origin"] + __slots__ = ["code", "message", "kind", "origin", "timetsamp", "worker"] class Kind(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = [] NON_RECOVERABLE: _ClassVar[ContainerError.Kind] @@ -18,11 +18,15 @@ class ContainerError(_message.Message): MESSAGE_FIELD_NUMBER: _ClassVar[int] KIND_FIELD_NUMBER: _ClassVar[int] ORIGIN_FIELD_NUMBER: _ClassVar[int] + TIMETSAMP_FIELD_NUMBER: _ClassVar[int] + WORKER_FIELD_NUMBER: _ClassVar[int] code: str message: str kind: ContainerError.Kind origin: _execution_pb2.ExecutionError.ErrorKind - def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., kind: _Optional[_Union[ContainerError.Kind, str]] = ..., origin: _Optional[_Union[_execution_pb2.ExecutionError.ErrorKind, str]] = ...) -> None: ... + timetsamp: int + worker: str + def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., kind: _Optional[_Union[ContainerError.Kind, str]] = ..., origin: _Optional[_Union[_execution_pb2.ExecutionError.ErrorKind, str]] = ..., timetsamp: _Optional[int] = ..., worker: _Optional[str] = ...) -> None: ... class ErrorDocument(_message.Message): __slots__ = ["error"] diff --git a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py index 2d59497e3a..44fe17c6ac 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py +++ b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py @@ -14,7 +14,7 @@ from google.protobuf import duration_pb2 as google_dot_protobuf_dot_duration__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x66lyteidl/core/execution.proto\x12\rflyteidl.core\x1a\x1egoogle/protobuf/duration.proto\"\xa7\x01\n\x11WorkflowExecution\"\x91\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\x0e\n\nSUCCEEDING\x10\x03\x12\r\n\tSUCCEEDED\x10\x04\x12\x0b\n\x07\x46\x41ILING\x10\x05\x12\n\n\x06\x46\x41ILED\x10\x06\x12\x0b\n\x07\x41\x42ORTED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x0c\n\x08\x41\x42ORTING\x10\t\"\xb6\x01\n\rNodeExecution\"\xa4\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x46\x41ILING\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x0b\n\x07\x41\x42ORTED\x10\x06\x12\x0b\n\x07SKIPPED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x13\n\x0f\x44YNAMIC_RUNNING\x10\t\x12\r\n\tRECOVERED\x10\n\"\x96\x01\n\rTaskExecution\"\x84\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x41\x42ORTED\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x10\n\x0cINITIALIZING\x10\x06\x12\x19\n\x15WAITING_FOR_RESOURCES\x10\x07\"\xc8\x01\n\x0e\x45xecutionError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1b\n\terror_uri\x18\x03 \x01(\tR\x08\x65rrorUri\x12;\n\x04kind\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x04kind\".\n\tErrorKind\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x08\n\x04USER\x10\x01\x12\n\n\x06SYSTEM\x10\x02\"\xb2\x02\n\x07TaskLog\x12\x10\n\x03uri\x18\x01 \x01(\tR\x03uri\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12K\n\x0emessage_format\x18\x03 \x01(\x0e\x32$.flyteidl.core.TaskLog.MessageFormatR\rmessageFormat\x12+\n\x03ttl\x18\x04 \x01(\x0b\x32\x19.google.protobuf.DurationR\x03ttl\x12*\n\x10ShowWhilePending\x18\x05 \x01(\x08R\x10ShowWhilePending\x12*\n\x10HideOnceFinished\x18\x06 \x01(\x08R\x10HideOnceFinished\"/\n\rMessageFormat\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x07\n\x03\x43SV\x10\x01\x12\x08\n\x04JSON\x10\x02\"Z\n\x14QualityOfServiceSpec\x12\x42\n\x0fqueueing_budget\x18\x01 \x01(\x0b\x32\x19.google.protobuf.DurationR\x0equeueingBudget\"\xce\x01\n\x10QualityOfService\x12:\n\x04tier\x18\x01 \x01(\x0e\x32$.flyteidl.core.QualityOfService.TierH\x00R\x04tier\x12\x39\n\x04spec\x18\x02 \x01(\x0b\x32#.flyteidl.core.QualityOfServiceSpecH\x00R\x04spec\"4\n\x04Tier\x12\r\n\tUNDEFINED\x10\x00\x12\x08\n\x04HIGH\x10\x01\x12\n\n\x06MEDIUM\x10\x02\x12\x07\n\x03LOW\x10\x03\x42\r\n\x0b\x64\x65signationB\xb4\x01\n\x11\x63om.flyteidl.coreB\x0e\x45xecutionProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x66lyteidl/core/execution.proto\x12\rflyteidl.core\x1a\x1egoogle/protobuf/duration.proto\"\xa7\x01\n\x11WorkflowExecution\"\x91\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\x0e\n\nSUCCEEDING\x10\x03\x12\r\n\tSUCCEEDED\x10\x04\x12\x0b\n\x07\x46\x41ILING\x10\x05\x12\n\n\x06\x46\x41ILED\x10\x06\x12\x0b\n\x07\x41\x42ORTED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x0c\n\x08\x41\x42ORTING\x10\t\"\xb6\x01\n\rNodeExecution\"\xa4\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x46\x41ILING\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x0b\n\x07\x41\x42ORTED\x10\x06\x12\x0b\n\x07SKIPPED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x13\n\x0f\x44YNAMIC_RUNNING\x10\t\x12\r\n\tRECOVERED\x10\n\"\x96\x01\n\rTaskExecution\"\x84\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x41\x42ORTED\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x10\n\x0cINITIALIZING\x10\x06\x12\x19\n\x15WAITING_FOR_RESOURCES\x10\x07\"\xe0\x01\n\x0e\x45xecutionError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1b\n\terror_uri\x18\x03 \x01(\tR\x08\x65rrorUri\x12;\n\x04kind\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x04kind\x12\x16\n\x06worker\x18\x06 \x01(\tR\x06worker\".\n\tErrorKind\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x08\n\x04USER\x10\x01\x12\n\n\x06SYSTEM\x10\x02\"\xb2\x02\n\x07TaskLog\x12\x10\n\x03uri\x18\x01 \x01(\tR\x03uri\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12K\n\x0emessage_format\x18\x03 \x01(\x0e\x32$.flyteidl.core.TaskLog.MessageFormatR\rmessageFormat\x12+\n\x03ttl\x18\x04 \x01(\x0b\x32\x19.google.protobuf.DurationR\x03ttl\x12*\n\x10ShowWhilePending\x18\x05 \x01(\x08R\x10ShowWhilePending\x12*\n\x10HideOnceFinished\x18\x06 \x01(\x08R\x10HideOnceFinished\"/\n\rMessageFormat\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x07\n\x03\x43SV\x10\x01\x12\x08\n\x04JSON\x10\x02\"Z\n\x14QualityOfServiceSpec\x12\x42\n\x0fqueueing_budget\x18\x01 \x01(\x0b\x32\x19.google.protobuf.DurationR\x0equeueingBudget\"\xce\x01\n\x10QualityOfService\x12:\n\x04tier\x18\x01 \x01(\x0e\x32$.flyteidl.core.QualityOfService.TierH\x00R\x04tier\x12\x39\n\x04spec\x18\x02 \x01(\x0b\x32#.flyteidl.core.QualityOfServiceSpecH\x00R\x04spec\"4\n\x04Tier\x12\r\n\tUNDEFINED\x10\x00\x12\x08\n\x04HIGH\x10\x01\x12\n\n\x06MEDIUM\x10\x02\x12\x07\n\x03LOW\x10\x03\x42\r\n\x0b\x64\x65signationB\xb4\x01\n\x11\x63om.flyteidl.coreB\x0e\x45xecutionProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -36,17 +36,17 @@ _globals['_TASKEXECUTION_PHASE']._serialized_start=454 _globals['_TASKEXECUTION_PHASE']._serialized_end=586 _globals['_EXECUTIONERROR']._serialized_start=589 - _globals['_EXECUTIONERROR']._serialized_end=789 - _globals['_EXECUTIONERROR_ERRORKIND']._serialized_start=743 - _globals['_EXECUTIONERROR_ERRORKIND']._serialized_end=789 - _globals['_TASKLOG']._serialized_start=792 - _globals['_TASKLOG']._serialized_end=1098 - _globals['_TASKLOG_MESSAGEFORMAT']._serialized_start=1051 - _globals['_TASKLOG_MESSAGEFORMAT']._serialized_end=1098 - _globals['_QUALITYOFSERVICESPEC']._serialized_start=1100 - _globals['_QUALITYOFSERVICESPEC']._serialized_end=1190 - _globals['_QUALITYOFSERVICE']._serialized_start=1193 - _globals['_QUALITYOFSERVICE']._serialized_end=1399 - _globals['_QUALITYOFSERVICE_TIER']._serialized_start=1332 - _globals['_QUALITYOFSERVICE_TIER']._serialized_end=1384 + _globals['_EXECUTIONERROR']._serialized_end=813 + _globals['_EXECUTIONERROR_ERRORKIND']._serialized_start=767 + _globals['_EXECUTIONERROR_ERRORKIND']._serialized_end=813 + _globals['_TASKLOG']._serialized_start=816 + _globals['_TASKLOG']._serialized_end=1122 + _globals['_TASKLOG_MESSAGEFORMAT']._serialized_start=1075 + _globals['_TASKLOG_MESSAGEFORMAT']._serialized_end=1122 + _globals['_QUALITYOFSERVICESPEC']._serialized_start=1124 + _globals['_QUALITYOFSERVICESPEC']._serialized_end=1214 + _globals['_QUALITYOFSERVICE']._serialized_start=1217 + _globals['_QUALITYOFSERVICE']._serialized_end=1423 + _globals['_QUALITYOFSERVICE_TIER']._serialized_start=1356 + _globals['_QUALITYOFSERVICE_TIER']._serialized_end=1408 # @@protoc_insertion_point(module_scope) diff --git a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi index 5c28a55418..09b01791da 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi +++ b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi @@ -83,7 +83,7 @@ class TaskExecution(_message.Message): def __init__(self) -> None: ... class ExecutionError(_message.Message): - __slots__ = ["code", "message", "error_uri", "kind"] + __slots__ = ["code", "message", "error_uri", "kind", "worker"] class ErrorKind(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = [] UNKNOWN: _ClassVar[ExecutionError.ErrorKind] @@ -96,11 +96,13 @@ class ExecutionError(_message.Message): MESSAGE_FIELD_NUMBER: _ClassVar[int] ERROR_URI_FIELD_NUMBER: _ClassVar[int] KIND_FIELD_NUMBER: _ClassVar[int] + WORKER_FIELD_NUMBER: _ClassVar[int] code: str message: str error_uri: str kind: ExecutionError.ErrorKind - def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., error_uri: _Optional[str] = ..., kind: _Optional[_Union[ExecutionError.ErrorKind, str]] = ...) -> None: ... + worker: str + def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., error_uri: _Optional[str] = ..., kind: _Optional[_Union[ExecutionError.ErrorKind, str]] = ..., worker: _Optional[str] = ...) -> None: ... class TaskLog(_message.Message): __slots__ = ["uri", "name", "message_format", "ttl", "ShowWhilePending", "HideOnceFinished"] diff --git a/flyteidl/gen/pb_rust/flyteidl.core.rs b/flyteidl/gen/pb_rust/flyteidl.core.rs index 441609be89..707279c65f 100644 --- a/flyteidl/gen/pb_rust/flyteidl.core.rs +++ b/flyteidl/gen/pb_rust/flyteidl.core.rs @@ -2127,6 +2127,9 @@ pub struct ExecutionError { pub error_uri: ::prost::alloc::string::String, #[prost(enumeration="execution_error::ErrorKind", tag="4")] pub kind: i32, + /// Worker that generated the error + #[prost(string, tag="6")] + pub worker: ::prost::alloc::string::String, } /// Nested message and enum types in `ExecutionError`. pub mod execution_error { @@ -3089,6 +3092,12 @@ pub struct ContainerError { /// Defines the origin of the error (system, user, unknown). #[prost(enumeration="execution_error::ErrorKind", tag="4")] pub origin: i32, + /// Timestamp of the error + #[prost(int64, tag="5")] + pub timetsamp: i64, + /// Worker that generated the error + #[prost(string, tag="6")] + pub worker: ::prost::alloc::string::String, } /// Nested message and enum types in `ContainerError`. pub mod container_error { diff --git a/flyteidl/protos/flyteidl/core/errors.proto b/flyteidl/protos/flyteidl/core/errors.proto index 4d25389349..b3243e72e4 100644 --- a/flyteidl/protos/flyteidl/core/errors.proto +++ b/flyteidl/protos/flyteidl/core/errors.proto @@ -25,6 +25,12 @@ message ContainerError { // Defines the origin of the error (system, user, unknown). ExecutionError.ErrorKind origin = 4; + + // Timestamp of the error + int64 timetsamp = 5; + + // Worker that generated the error + string worker = 6; } // Defines the errors.pb file format the container can produce to communicate diff --git a/flyteidl/protos/flyteidl/core/execution.proto b/flyteidl/protos/flyteidl/core/execution.proto index 4d55198955..e11a5c7d51 100644 --- a/flyteidl/protos/flyteidl/core/execution.proto +++ b/flyteidl/protos/flyteidl/core/execution.proto @@ -73,6 +73,8 @@ message ExecutionError { SYSTEM = 2; } ErrorKind kind = 4; + // Worker that generated the error + string worker = 6; } // Log information for the task that is specific to a log sink diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index 30bc616bd4..b98365cf84 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -77,6 +77,7 @@ func errorDoc2ExecutionError(errorDoc *core.ErrorDocument, errorFilePath storage Code: errorDoc.Error.Code, Message: errorDoc.Error.Message, Kind: errorDoc.Error.Origin, + Worker: errorDoc.Error.Worker, }, } @@ -191,18 +192,10 @@ func (e *EarliestFileErrorRetriever) GetError(ctx context.Context) (io.Execution } } - extractTimestampFromErrorDoc := func(errorDoc *core.ErrorDocument) int64 { - // TODO: add optional timestamp to ErrorDocument - if errorDoc == nil { - panic("") - } - return 0 - } - var earliestTimestamp int64 = math.MaxInt64 earliestExecutionError := io.ExecutionError{} for _, errorFileAndDoc := range errorFileAndDocs { - timestamp := extractTimestampFromErrorDoc(errorFileAndDoc.errorDoc) + timestamp := errorFileAndDoc.errorDoc.Error.GetTimetsamp() if earliestTimestamp >= timestamp { earliestExecutionError = errorDoc2ExecutionError(errorFileAndDoc.errorDoc, errorFileAndDoc.errorFilePath) earliestTimestamp = timestamp diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go index a30960da93..f7e8ac102a 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go @@ -3,6 +3,7 @@ package ioutils import ( "context" "fmt" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -121,20 +122,21 @@ func TestReadOrigin(t *testing.T) { store := &storageMocks.ComposedProtobufStore{} store.OnReadProtobufMatch(mock.Anything, mock.Anything, mock.Anything).Run(func(args mock.Arguments) { + errorFilePath := args.Get(1).(storage.DataReference) + workerIdx := strings.Split(strings.Split(errorFilePath.String(), "-")[1], ".")[0] errorDoc := &core.ErrorDocument{ Error: &core.ContainerError{ Code: "red", - Message: "hi", + Message: fmt.Sprintf("hi-%s", workerIdx), Kind: core.ContainerError_NON_RECOVERABLE, Origin: core.ExecutionError_USER, + Worker: fmt.Sprintf("worker-%s", workerIdx), }, } - errorFilePath := args.Get(1) incomingErrorDoc := args.Get(2) assert.NotNil(t, incomingErrorDoc) casted := incomingErrorDoc.(*core.ErrorDocument) casted.Error = errorDoc.Error - casted.Error.Message = fmt.Sprintf("%s-%s", casted.Error.Message, errorFilePath) }).Return(nil) store.OnList(ctx, storage.DataReference("s3://errors/error"), 1000, storage.NewCursorAtStart()).Return( @@ -169,7 +171,8 @@ func TestReadOrigin(t *testing.T) { assert.NoError(t, err) assert.Equal(t, core.ExecutionError_USER, executionError.Kind) assert.Equal(t, "red", executionError.Code) - assert.Equal(t, "hi-error-2.pb", executionError.Message) + assert.Equal(t, "hi-2", executionError.Message) + assert.Equal(t, "worker-2", executionError.Worker) assert.False(t, executionError.IsRecoverable) }) } From 17d3db4051dae61d05cade90c570ebc906aa3941 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Tue, 1 Oct 2024 23:46:30 +0000 Subject: [PATCH 07/29] Add multi file error aggregation strategy --- .../tasks/pluginmachinery/ioutils/remote_file_output_reader.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index b98365cf84..42197e220c 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -90,7 +90,7 @@ func errorDoc2ExecutionError(errorDoc *core.ErrorDocument, errorFilePath storage func (s *SingleFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionError, error) { errorDoc := &core.ErrorDocument{} - err := s.store.ReadProtobuf(ctx, storage.DataReference(s.errorFilePath), errorDoc) + err := s.store.ReadProtobuf(ctx, s.errorFilePath, errorDoc) if err != nil { if storage.IsNotFound(err) { return io.ExecutionError{ From fa982c3046af8afcc7f92f7d72ea99e8641401cf Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Wed, 9 Oct 2024 05:59:21 +0000 Subject: [PATCH 08/29] Add multi file error aggregation strategy --- .../pkg/controller/nodes/task/k8s/plugin_manager_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager_test.go b/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager_test.go index a2bcb57014..af8442880e 100644 --- a/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager_test.go +++ b/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager_test.go @@ -33,6 +33,7 @@ import ( "github.com/flyteorg/flyte/flytestdlib/contextutils" "github.com/flyteorg/flyte/flytestdlib/promutils" "github.com/flyteorg/flyte/flytestdlib/promutils/labeled" + "github.com/flyteorg/flyte/flytestdlib/storage" ) type extendedFakeClient struct { @@ -163,6 +164,10 @@ func (d *dummyOutputWriter) Put(ctx context.Context, reader io.OutputReader) err return nil } +func (d *dummyOutputWriter) GetErrorPath() storage.DataReference { + return "" +} + func getMockTaskContext(initPhase PluginPhase, wantPhase PluginPhase) pluginsCore.TaskExecutionContext { taskExecutionContext := &pluginsCoreMock.TaskExecutionContext{} taskExecutionContext.OnTaskExecutionMetadata().Return(getMockTaskExecutionMetadata()) From 7128bb700499b093eb38bde1aa00f110394870bb Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Wed, 9 Oct 2024 06:08:04 +0000 Subject: [PATCH 09/29] Add multi file error aggregation strategy --- flyteplugins/go/tasks/plugins/array/outputs_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/flyteplugins/go/tasks/plugins/array/outputs_test.go b/flyteplugins/go/tasks/plugins/array/outputs_test.go index 529eba0429..0998ba0047 100644 --- a/flyteplugins/go/tasks/plugins/array/outputs_test.go +++ b/flyteplugins/go/tasks/plugins/array/outputs_test.go @@ -353,6 +353,7 @@ func TestAssembleFinalOutputs(t *testing.T) { ow := &mocks2.OutputWriter{} ow.OnGetOutputPrefixPath().Return("/prefix/") ow.OnGetOutputPath().Return("/prefix/outputs.pb") + ow.OnGetErrorPath().Return("/location/prefix/error.pb") ow.On("Put", mock.Anything, mock.Anything).Return(func(ctx context.Context, or io.OutputReader) error { m, ee, err := or.Read(ctx) assert.NoError(t, err) From 8f4629fe50295634611caf6751c0e0795ffd9fd6 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Wed, 9 Oct 2024 06:14:37 +0000 Subject: [PATCH 10/29] Add multi file error aggregation strategy --- flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts | 4 ++-- flyteidl/gen/pb-go/flyteidl/core/execution.pb.go | 4 ++-- flyteidl/gen/pb-js/flyteidl.js | 4 ++-- flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py | 2 +- flyteidl/gen/pb_rust/flyteidl.core.rs | 2 +- flyteidl/protos/flyteidl/core/execution.proto | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts b/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts index d8bb872738..aaf3969007 100644 --- a/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts +++ b/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts @@ -344,7 +344,7 @@ export class ExecutionError extends Message { /** * Worker that generated the error * - * @generated from field: string worker = 6; + * @generated from field: string worker = 5; */ worker = ""; @@ -360,7 +360,7 @@ export class ExecutionError extends Message { { no: 2, name: "message", kind: "scalar", T: 9 /* ScalarType.STRING */ }, { no: 3, name: "error_uri", kind: "scalar", T: 9 /* ScalarType.STRING */ }, { no: 4, name: "kind", kind: "enum", T: proto3.getEnumType(ExecutionError_ErrorKind) }, - { no: 6, name: "worker", kind: "scalar", T: 9 /* ScalarType.STRING */ }, + { no: 5, name: "worker", kind: "scalar", T: 9 /* ScalarType.STRING */ }, ]); static fromBinary(bytes: Uint8Array, options?: Partial): ExecutionError { diff --git a/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go b/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go index c103b83852..d3b58f9f69 100644 --- a/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go +++ b/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go @@ -515,7 +515,7 @@ type ExecutionError struct { ErrorUri string `protobuf:"bytes,3,opt,name=error_uri,json=errorUri,proto3" json:"error_uri,omitempty"` Kind ExecutionError_ErrorKind `protobuf:"varint,4,opt,name=kind,proto3,enum=flyteidl.core.ExecutionError_ErrorKind" json:"kind,omitempty"` // Worker that generated the error - Worker string `protobuf:"bytes,6,opt,name=worker,proto3" json:"worker,omitempty"` + Worker string `protobuf:"bytes,5,opt,name=worker,proto3" json:"worker,omitempty"` } func (x *ExecutionError) Reset() { @@ -855,7 +855,7 @@ var file_flyteidl_core_execution_proto_rawDesc = []byte{ 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x52, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x18, - 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x22, 0x2e, 0x0a, + 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x22, 0x2e, 0x0a, 0x09, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x55, 0x53, 0x45, 0x52, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x59, 0x53, 0x54, 0x45, 0x4d, 0x10, 0x02, 0x22, 0xb2, 0x02, diff --git a/flyteidl/gen/pb-js/flyteidl.js b/flyteidl/gen/pb-js/flyteidl.js index 5d2a58ecce..2c2a2798f6 100644 --- a/flyteidl/gen/pb-js/flyteidl.js +++ b/flyteidl/gen/pb-js/flyteidl.js @@ -13914,7 +13914,7 @@ if (message.kind != null && message.hasOwnProperty("kind")) writer.uint32(/* id 4, wireType 0 =*/32).int32(message.kind); if (message.worker != null && message.hasOwnProperty("worker")) - writer.uint32(/* id 6, wireType 2 =*/50).string(message.worker); + writer.uint32(/* id 5, wireType 2 =*/42).string(message.worker); return writer; }; @@ -13948,7 +13948,7 @@ case 4: message.kind = reader.int32(); break; - case 6: + case 5: message.worker = reader.string(); break; default: diff --git a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py index 44fe17c6ac..7165379912 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py +++ b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py @@ -14,7 +14,7 @@ from google.protobuf import duration_pb2 as google_dot_protobuf_dot_duration__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x66lyteidl/core/execution.proto\x12\rflyteidl.core\x1a\x1egoogle/protobuf/duration.proto\"\xa7\x01\n\x11WorkflowExecution\"\x91\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\x0e\n\nSUCCEEDING\x10\x03\x12\r\n\tSUCCEEDED\x10\x04\x12\x0b\n\x07\x46\x41ILING\x10\x05\x12\n\n\x06\x46\x41ILED\x10\x06\x12\x0b\n\x07\x41\x42ORTED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x0c\n\x08\x41\x42ORTING\x10\t\"\xb6\x01\n\rNodeExecution\"\xa4\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x46\x41ILING\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x0b\n\x07\x41\x42ORTED\x10\x06\x12\x0b\n\x07SKIPPED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x13\n\x0f\x44YNAMIC_RUNNING\x10\t\x12\r\n\tRECOVERED\x10\n\"\x96\x01\n\rTaskExecution\"\x84\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x41\x42ORTED\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x10\n\x0cINITIALIZING\x10\x06\x12\x19\n\x15WAITING_FOR_RESOURCES\x10\x07\"\xe0\x01\n\x0e\x45xecutionError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1b\n\terror_uri\x18\x03 \x01(\tR\x08\x65rrorUri\x12;\n\x04kind\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x04kind\x12\x16\n\x06worker\x18\x06 \x01(\tR\x06worker\".\n\tErrorKind\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x08\n\x04USER\x10\x01\x12\n\n\x06SYSTEM\x10\x02\"\xb2\x02\n\x07TaskLog\x12\x10\n\x03uri\x18\x01 \x01(\tR\x03uri\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12K\n\x0emessage_format\x18\x03 \x01(\x0e\x32$.flyteidl.core.TaskLog.MessageFormatR\rmessageFormat\x12+\n\x03ttl\x18\x04 \x01(\x0b\x32\x19.google.protobuf.DurationR\x03ttl\x12*\n\x10ShowWhilePending\x18\x05 \x01(\x08R\x10ShowWhilePending\x12*\n\x10HideOnceFinished\x18\x06 \x01(\x08R\x10HideOnceFinished\"/\n\rMessageFormat\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x07\n\x03\x43SV\x10\x01\x12\x08\n\x04JSON\x10\x02\"Z\n\x14QualityOfServiceSpec\x12\x42\n\x0fqueueing_budget\x18\x01 \x01(\x0b\x32\x19.google.protobuf.DurationR\x0equeueingBudget\"\xce\x01\n\x10QualityOfService\x12:\n\x04tier\x18\x01 \x01(\x0e\x32$.flyteidl.core.QualityOfService.TierH\x00R\x04tier\x12\x39\n\x04spec\x18\x02 \x01(\x0b\x32#.flyteidl.core.QualityOfServiceSpecH\x00R\x04spec\"4\n\x04Tier\x12\r\n\tUNDEFINED\x10\x00\x12\x08\n\x04HIGH\x10\x01\x12\n\n\x06MEDIUM\x10\x02\x12\x07\n\x03LOW\x10\x03\x42\r\n\x0b\x64\x65signationB\xb4\x01\n\x11\x63om.flyteidl.coreB\x0e\x45xecutionProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x66lyteidl/core/execution.proto\x12\rflyteidl.core\x1a\x1egoogle/protobuf/duration.proto\"\xa7\x01\n\x11WorkflowExecution\"\x91\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\x0e\n\nSUCCEEDING\x10\x03\x12\r\n\tSUCCEEDED\x10\x04\x12\x0b\n\x07\x46\x41ILING\x10\x05\x12\n\n\x06\x46\x41ILED\x10\x06\x12\x0b\n\x07\x41\x42ORTED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x0c\n\x08\x41\x42ORTING\x10\t\"\xb6\x01\n\rNodeExecution\"\xa4\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x46\x41ILING\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x0b\n\x07\x41\x42ORTED\x10\x06\x12\x0b\n\x07SKIPPED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x13\n\x0f\x44YNAMIC_RUNNING\x10\t\x12\r\n\tRECOVERED\x10\n\"\x96\x01\n\rTaskExecution\"\x84\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x41\x42ORTED\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x10\n\x0cINITIALIZING\x10\x06\x12\x19\n\x15WAITING_FOR_RESOURCES\x10\x07\"\xe0\x01\n\x0e\x45xecutionError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1b\n\terror_uri\x18\x03 \x01(\tR\x08\x65rrorUri\x12;\n\x04kind\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x04kind\x12\x16\n\x06worker\x18\x05 \x01(\tR\x06worker\".\n\tErrorKind\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x08\n\x04USER\x10\x01\x12\n\n\x06SYSTEM\x10\x02\"\xb2\x02\n\x07TaskLog\x12\x10\n\x03uri\x18\x01 \x01(\tR\x03uri\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12K\n\x0emessage_format\x18\x03 \x01(\x0e\x32$.flyteidl.core.TaskLog.MessageFormatR\rmessageFormat\x12+\n\x03ttl\x18\x04 \x01(\x0b\x32\x19.google.protobuf.DurationR\x03ttl\x12*\n\x10ShowWhilePending\x18\x05 \x01(\x08R\x10ShowWhilePending\x12*\n\x10HideOnceFinished\x18\x06 \x01(\x08R\x10HideOnceFinished\"/\n\rMessageFormat\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x07\n\x03\x43SV\x10\x01\x12\x08\n\x04JSON\x10\x02\"Z\n\x14QualityOfServiceSpec\x12\x42\n\x0fqueueing_budget\x18\x01 \x01(\x0b\x32\x19.google.protobuf.DurationR\x0equeueingBudget\"\xce\x01\n\x10QualityOfService\x12:\n\x04tier\x18\x01 \x01(\x0e\x32$.flyteidl.core.QualityOfService.TierH\x00R\x04tier\x12\x39\n\x04spec\x18\x02 \x01(\x0b\x32#.flyteidl.core.QualityOfServiceSpecH\x00R\x04spec\"4\n\x04Tier\x12\r\n\tUNDEFINED\x10\x00\x12\x08\n\x04HIGH\x10\x01\x12\n\n\x06MEDIUM\x10\x02\x12\x07\n\x03LOW\x10\x03\x42\r\n\x0b\x64\x65signationB\xb4\x01\n\x11\x63om.flyteidl.coreB\x0e\x45xecutionProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) diff --git a/flyteidl/gen/pb_rust/flyteidl.core.rs b/flyteidl/gen/pb_rust/flyteidl.core.rs index 782d131c57..1f027ce308 100644 --- a/flyteidl/gen/pb_rust/flyteidl.core.rs +++ b/flyteidl/gen/pb_rust/flyteidl.core.rs @@ -2130,7 +2130,7 @@ pub struct ExecutionError { #[prost(enumeration="execution_error::ErrorKind", tag="4")] pub kind: i32, /// Worker that generated the error - #[prost(string, tag="6")] + #[prost(string, tag="5")] pub worker: ::prost::alloc::string::String, } /// Nested message and enum types in `ExecutionError`. diff --git a/flyteidl/protos/flyteidl/core/execution.proto b/flyteidl/protos/flyteidl/core/execution.proto index e11a5c7d51..81fec315c2 100644 --- a/flyteidl/protos/flyteidl/core/execution.proto +++ b/flyteidl/protos/flyteidl/core/execution.proto @@ -74,7 +74,7 @@ message ExecutionError { } ErrorKind kind = 4; // Worker that generated the error - string worker = 6; + string worker = 5; } // Log information for the task that is specific to a log sink From 2df9e4d9974441044c134c2b794b4ed01a555c84 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Wed, 9 Oct 2024 06:23:18 +0000 Subject: [PATCH 11/29] Add multi file error aggregation strategy --- .../go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go | 4 +++- .../go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go index 8084b75b4c..359393459f 100644 --- a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go +++ b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go @@ -28,7 +28,9 @@ type pytorchOperatorResourceHandler struct { var _ k8s.Plugin = pytorchOperatorResourceHandler{} func (pytorchOperatorResourceHandler) GetProperties() k8s.PluginProperties { - return k8s.PluginProperties{} + return k8s.PluginProperties{ + ErrorAggregationStrategy: k8s.EarliestErrorAggregationStrategy, + } } // Defines a func to create a query object (typically just object and type meta portions) that's used to query k8s diff --git a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go index 546b42d7df..b5f13ade2e 100644 --- a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go +++ b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go @@ -712,7 +712,9 @@ func TestGetLogsElastic(t *testing.T) { func TestGetProperties(t *testing.T) { pytorchResourceHandler := pytorchOperatorResourceHandler{} - expected := k8s.PluginProperties{} + expected := k8s.PluginProperties{ + ErrorAggregationStrategy: k8s.EarliestErrorAggregationStrategy, + } assert.Equal(t, expected, pytorchResourceHandler.GetProperties()) } From 43afb54c6e0fe07ed71408e8b3f03b82608fad0b Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Wed, 9 Oct 2024 19:18:34 +0000 Subject: [PATCH 12/29] Add multi file error aggregation strategy --- flytestdlib/storage/storage_test.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/flytestdlib/storage/storage_test.go b/flytestdlib/storage/storage_test.go index d4896f274b..3f369bab55 100644 --- a/flytestdlib/storage/storage_test.go +++ b/flytestdlib/storage/storage_test.go @@ -11,6 +11,14 @@ import ( "github.com/flyteorg/flyte/flytestdlib/promutils" ) +func TestDataReference_New(t *testing.T) { + scheme := "s3" + container := "container" + key := "path/to/file" + dataReference := NewDataReference(scheme, container, key) + assert.Equal(t, DataReference("s3://container/path/to/file"), dataReference) +} + func TestDataReference_Split(t *testing.T) { input := DataReference("s3://container/path/to/file") scheme, container, key, err := input.Split() From 22f39cd23675582d840d759d64cfdaeae9bf2c34 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Thu, 10 Oct 2024 05:23:31 +0000 Subject: [PATCH 13/29] Add multi error file support --- flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts | 6 +++--- flyteidl/gen/pb-go/flyteidl/core/errors.pb.go | 10 +++++----- flyteidl/gen/pb-js/flyteidl.d.ts | 8 ++++---- flyteidl/gen/pb-js/flyteidl.js | 20 +++++++++---------- .../gen/pb_python/flyteidl/core/errors_pb2.py | 2 +- .../pb_python/flyteidl/core/errors_pb2.pyi | 8 ++++---- flyteidl/gen/pb_rust/flyteidl.core.rs | 2 +- flyteidl/protos/flyteidl/core/errors.proto | 2 +- .../ioutils/remote_file_output_reader.go | 2 +- 9 files changed, 30 insertions(+), 30 deletions(-) diff --git a/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts b/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts index dac74e7e8f..892b885d4a 100644 --- a/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts +++ b/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts @@ -45,9 +45,9 @@ export class ContainerError extends Message { /** * Timestamp of the error * - * @generated from field: int64 timetsamp = 5; + * @generated from field: int64 timestamp = 5; */ - timetsamp = protoInt64.zero; + timestamp = protoInt64.zero; /** * Worker that generated the error @@ -68,7 +68,7 @@ export class ContainerError extends Message { { no: 2, name: "message", kind: "scalar", T: 9 /* ScalarType.STRING */ }, { no: 3, name: "kind", kind: "enum", T: proto3.getEnumType(ContainerError_Kind) }, { no: 4, name: "origin", kind: "enum", T: proto3.getEnumType(ExecutionError_ErrorKind) }, - { no: 5, name: "timetsamp", kind: "scalar", T: 3 /* ScalarType.INT64 */ }, + { no: 5, name: "timestamp", kind: "scalar", T: 3 /* ScalarType.INT64 */ }, { no: 6, name: "worker", kind: "scalar", T: 9 /* ScalarType.STRING */ }, ]); diff --git a/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go b/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go index 392c40d462..3c999b4bc6 100644 --- a/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go +++ b/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go @@ -83,7 +83,7 @@ type ContainerError struct { // Defines the origin of the error (system, user, unknown). Origin ExecutionError_ErrorKind `protobuf:"varint,4,opt,name=origin,proto3,enum=flyteidl.core.ExecutionError_ErrorKind" json:"origin,omitempty"` // Timestamp of the error - Timetsamp int64 `protobuf:"varint,5,opt,name=timetsamp,proto3" json:"timetsamp,omitempty"` + Timestamp int64 `protobuf:"varint,5,opt,name=timestamp,proto3" json:"timestamp,omitempty"` // Worker that generated the error Worker string `protobuf:"bytes,6,opt,name=worker,proto3" json:"worker,omitempty"` } @@ -148,9 +148,9 @@ func (x *ContainerError) GetOrigin() ExecutionError_ErrorKind { return ExecutionError_UNKNOWN } -func (x *ContainerError) GetTimetsamp() int64 { +func (x *ContainerError) GetTimestamp() int64 { if x != nil { - return x.Timetsamp + return x.Timestamp } return 0 } @@ -231,8 +231,8 @@ var file_flyteidl_core_errors_proto_rawDesc = []byte{ 0x01, 0x28, 0x0e, 0x32, 0x27, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x52, 0x06, 0x6f, 0x72, - 0x69, 0x67, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x74, 0x73, 0x61, 0x6d, - 0x70, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x74, 0x73, 0x61, + 0x69, 0x67, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, + 0x70, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x22, 0x2c, 0x0a, 0x04, 0x4b, 0x69, 0x6e, 0x64, 0x12, 0x13, 0x0a, 0x0f, 0x4e, 0x4f, 0x4e, 0x5f, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, diff --git a/flyteidl/gen/pb-js/flyteidl.d.ts b/flyteidl/gen/pb-js/flyteidl.d.ts index 34196b4ec5..0c161d2a89 100644 --- a/flyteidl/gen/pb-js/flyteidl.d.ts +++ b/flyteidl/gen/pb-js/flyteidl.d.ts @@ -7562,8 +7562,8 @@ export namespace flyteidl { /** ContainerError origin */ origin?: (flyteidl.core.ExecutionError.ErrorKind|null); - /** ContainerError timetsamp */ - timetsamp?: (Long|null); + /** ContainerError timestamp */ + timestamp?: (Long|null); /** ContainerError worker */ worker?: (string|null); @@ -7590,8 +7590,8 @@ export namespace flyteidl { /** ContainerError origin. */ public origin: flyteidl.core.ExecutionError.ErrorKind; - /** ContainerError timetsamp. */ - public timetsamp: Long; + /** ContainerError timestamp. */ + public timestamp: Long; /** ContainerError worker. */ public worker: string; diff --git a/flyteidl/gen/pb-js/flyteidl.js b/flyteidl/gen/pb-js/flyteidl.js index 2c2a2798f6..0906e12692 100644 --- a/flyteidl/gen/pb-js/flyteidl.js +++ b/flyteidl/gen/pb-js/flyteidl.js @@ -18285,7 +18285,7 @@ * @property {string|null} [message] ContainerError message * @property {flyteidl.core.ContainerError.Kind|null} [kind] ContainerError kind * @property {flyteidl.core.ExecutionError.ErrorKind|null} [origin] ContainerError origin - * @property {Long|null} [timetsamp] ContainerError timetsamp + * @property {Long|null} [timestamp] ContainerError timestamp * @property {string|null} [worker] ContainerError worker */ @@ -18337,12 +18337,12 @@ ContainerError.prototype.origin = 0; /** - * ContainerError timetsamp. - * @member {Long} timetsamp + * ContainerError timestamp. + * @member {Long} timestamp * @memberof flyteidl.core.ContainerError * @instance */ - ContainerError.prototype.timetsamp = $util.Long ? $util.Long.fromBits(0,0,false) : 0; + ContainerError.prototype.timestamp = $util.Long ? $util.Long.fromBits(0,0,false) : 0; /** * ContainerError worker. @@ -18384,8 +18384,8 @@ writer.uint32(/* id 3, wireType 0 =*/24).int32(message.kind); if (message.origin != null && message.hasOwnProperty("origin")) writer.uint32(/* id 4, wireType 0 =*/32).int32(message.origin); - if (message.timetsamp != null && message.hasOwnProperty("timetsamp")) - writer.uint32(/* id 5, wireType 0 =*/40).int64(message.timetsamp); + if (message.timestamp != null && message.hasOwnProperty("timestamp")) + writer.uint32(/* id 5, wireType 0 =*/40).int64(message.timestamp); if (message.worker != null && message.hasOwnProperty("worker")) writer.uint32(/* id 6, wireType 2 =*/50).string(message.worker); return writer; @@ -18422,7 +18422,7 @@ message.origin = reader.int32(); break; case 5: - message.timetsamp = reader.int64(); + message.timestamp = reader.int64(); break; case 6: message.worker = reader.string(); @@ -18469,9 +18469,9 @@ case 2: break; } - if (message.timetsamp != null && message.hasOwnProperty("timetsamp")) - if (!$util.isInteger(message.timetsamp) && !(message.timetsamp && $util.isInteger(message.timetsamp.low) && $util.isInteger(message.timetsamp.high))) - return "timetsamp: integer|Long expected"; + if (message.timestamp != null && message.hasOwnProperty("timestamp")) + if (!$util.isInteger(message.timestamp) && !(message.timestamp && $util.isInteger(message.timestamp.low) && $util.isInteger(message.timestamp.high))) + return "timestamp: integer|Long expected"; if (message.worker != null && message.hasOwnProperty("worker")) if (!$util.isString(message.worker)) return "worker: string expected"; diff --git a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py index cdd55fb6e3..1ea22c214a 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py +++ b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py @@ -14,7 +14,7 @@ from flyteidl.core import execution_pb2 as flyteidl_dot_core_dot_execution__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1a\x66lyteidl/core/errors.proto\x12\rflyteidl.core\x1a\x1d\x66lyteidl/core/execution.proto\"\x9b\x02\n\x0e\x43ontainerError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x36\n\x04kind\x18\x03 \x01(\x0e\x32\".flyteidl.core.ContainerError.KindR\x04kind\x12?\n\x06origin\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x06origin\x12\x1c\n\ttimetsamp\x18\x05 \x01(\x03R\ttimetsamp\x12\x16\n\x06worker\x18\x06 \x01(\tR\x06worker\",\n\x04Kind\x12\x13\n\x0fNON_RECOVERABLE\x10\x00\x12\x0f\n\x0bRECOVERABLE\x10\x01\"D\n\rErrorDocument\x12\x33\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x1d.flyteidl.core.ContainerErrorR\x05\x65rrorB\xb1\x01\n\x11\x63om.flyteidl.coreB\x0b\x45rrorsProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1a\x66lyteidl/core/errors.proto\x12\rflyteidl.core\x1a\x1d\x66lyteidl/core/execution.proto\"\x9b\x02\n\x0e\x43ontainerError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x36\n\x04kind\x18\x03 \x01(\x0e\x32\".flyteidl.core.ContainerError.KindR\x04kind\x12?\n\x06origin\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x06origin\x12\x1c\n\ttimestamp\x18\x05 \x01(\x03R\ttimestamp\x12\x16\n\x06worker\x18\x06 \x01(\tR\x06worker\",\n\x04Kind\x12\x13\n\x0fNON_RECOVERABLE\x10\x00\x12\x0f\n\x0bRECOVERABLE\x10\x01\"D\n\rErrorDocument\x12\x33\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x1d.flyteidl.core.ContainerErrorR\x05\x65rrorB\xb1\x01\n\x11\x63om.flyteidl.coreB\x0b\x45rrorsProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) diff --git a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi index 5ab22ddc47..707ad7e3cb 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi +++ b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi @@ -7,7 +7,7 @@ from typing import ClassVar as _ClassVar, Mapping as _Mapping, Optional as _Opti DESCRIPTOR: _descriptor.FileDescriptor class ContainerError(_message.Message): - __slots__ = ["code", "message", "kind", "origin", "timetsamp", "worker"] + __slots__ = ["code", "message", "kind", "origin", "timestamp", "worker"] class Kind(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = [] NON_RECOVERABLE: _ClassVar[ContainerError.Kind] @@ -18,15 +18,15 @@ class ContainerError(_message.Message): MESSAGE_FIELD_NUMBER: _ClassVar[int] KIND_FIELD_NUMBER: _ClassVar[int] ORIGIN_FIELD_NUMBER: _ClassVar[int] - TIMETSAMP_FIELD_NUMBER: _ClassVar[int] + TIMESTAMP_FIELD_NUMBER: _ClassVar[int] WORKER_FIELD_NUMBER: _ClassVar[int] code: str message: str kind: ContainerError.Kind origin: _execution_pb2.ExecutionError.ErrorKind - timetsamp: int + timestamp: int worker: str - def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., kind: _Optional[_Union[ContainerError.Kind, str]] = ..., origin: _Optional[_Union[_execution_pb2.ExecutionError.ErrorKind, str]] = ..., timetsamp: _Optional[int] = ..., worker: _Optional[str] = ...) -> None: ... + def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., kind: _Optional[_Union[ContainerError.Kind, str]] = ..., origin: _Optional[_Union[_execution_pb2.ExecutionError.ErrorKind, str]] = ..., timestamp: _Optional[int] = ..., worker: _Optional[str] = ...) -> None: ... class ErrorDocument(_message.Message): __slots__ = ["error"] diff --git a/flyteidl/gen/pb_rust/flyteidl.core.rs b/flyteidl/gen/pb_rust/flyteidl.core.rs index 1f027ce308..36fa35054b 100644 --- a/flyteidl/gen/pb_rust/flyteidl.core.rs +++ b/flyteidl/gen/pb_rust/flyteidl.core.rs @@ -3096,7 +3096,7 @@ pub struct ContainerError { pub origin: i32, /// Timestamp of the error #[prost(int64, tag="5")] - pub timetsamp: i64, + pub timestamp: i64, /// Worker that generated the error #[prost(string, tag="6")] pub worker: ::prost::alloc::string::String, diff --git a/flyteidl/protos/flyteidl/core/errors.proto b/flyteidl/protos/flyteidl/core/errors.proto index b3243e72e4..c0c9cadc2a 100644 --- a/flyteidl/protos/flyteidl/core/errors.proto +++ b/flyteidl/protos/flyteidl/core/errors.proto @@ -27,7 +27,7 @@ message ContainerError { ExecutionError.ErrorKind origin = 4; // Timestamp of the error - int64 timetsamp = 5; + int64 timestamp = 5; // Worker that generated the error string worker = 6; diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index 42197e220c..fb5f52452e 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -195,7 +195,7 @@ func (e *EarliestFileErrorRetriever) GetError(ctx context.Context) (io.Execution var earliestTimestamp int64 = math.MaxInt64 earliestExecutionError := io.ExecutionError{} for _, errorFileAndDoc := range errorFileAndDocs { - timestamp := errorFileAndDoc.errorDoc.Error.GetTimetsamp() + timestamp := errorFileAndDoc.errorDoc.Error.GetTimestamp() if earliestTimestamp >= timestamp { earliestExecutionError = errorDoc2ExecutionError(errorFileAndDoc.errorDoc, errorFileAndDoc.errorFilePath) earliestTimestamp = timestamp From 218b11a9b60262821ca43df00f9ee4f140149030 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Tue, 15 Oct 2024 20:57:49 +0000 Subject: [PATCH 14/29] Code reviews --- .../gen/pb-es/flyteidl/core/execution_pb.ts | 14 +- .../gen/pb-go/flyteidl/core/execution.pb.go | 125 +++++++++-------- .../flyteidl/service/admin.swagger.json | 5 + flyteidl/gen/pb-js/flyteidl.d.ts | 6 + flyteidl/gen/pb-js/flyteidl.js | 19 ++- .../pb_python/flyteidl/core/execution_pb2.py | 28 ++-- .../pb_python/flyteidl/core/execution_pb2.pyi | 6 +- flyteidl/gen/pb_rust/flyteidl.core.rs | 5 +- flyteidl/protos/flyteidl/core/execution.proto | 4 +- .../ioutils/remote_file_output_reader.go | 127 ++++++++---------- .../ioutils/remote_file_output_reader_test.go | 9 +- .../go/tasks/plugins/array/catalog.go | 6 +- .../go/tasks/plugins/array/outputs.go | 5 +- flyteplugins/go/tasks/plugins/testing/echo.go | 5 +- .../go/tasks/plugins/webapi/agent/plugin.go | 5 +- .../tasks/plugins/webapi/databricks/plugin.go | 5 +- .../pkg/controller/nodes/array/handler.go | 13 +- flytepropeller/pkg/controller/nodes/cache.go | 5 +- .../pkg/controller/workflow/executor_test.go | 6 +- 19 files changed, 237 insertions(+), 161 deletions(-) diff --git a/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts b/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts index aaf3969007..5accf3a1b4 100644 --- a/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts +++ b/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts @@ -4,7 +4,7 @@ // @ts-nocheck import type { BinaryReadOptions, FieldList, JsonReadOptions, JsonValue, PartialMessage, PlainMessage } from "@bufbuild/protobuf"; -import { Duration, Message, proto3 } from "@bufbuild/protobuf"; +import { Duration, Message, proto3, protoInt64 } from "@bufbuild/protobuf"; /** * Indicates various phases of Workflow Execution @@ -341,10 +341,17 @@ export class ExecutionError extends Message { */ kind = ExecutionError_ErrorKind.UNKNOWN; + /** + * Timestamp of the error + * + * @generated from field: int64 timestamp = 5; + */ + timestamp = protoInt64.zero; + /** * Worker that generated the error * - * @generated from field: string worker = 5; + * @generated from field: string worker = 6; */ worker = ""; @@ -360,7 +367,8 @@ export class ExecutionError extends Message { { no: 2, name: "message", kind: "scalar", T: 9 /* ScalarType.STRING */ }, { no: 3, name: "error_uri", kind: "scalar", T: 9 /* ScalarType.STRING */ }, { no: 4, name: "kind", kind: "enum", T: proto3.getEnumType(ExecutionError_ErrorKind) }, - { no: 5, name: "worker", kind: "scalar", T: 9 /* ScalarType.STRING */ }, + { no: 5, name: "timestamp", kind: "scalar", T: 3 /* ScalarType.INT64 */ }, + { no: 6, name: "worker", kind: "scalar", T: 9 /* ScalarType.STRING */ }, ]); static fromBinary(bytes: Uint8Array, options?: Partial): ExecutionError { diff --git a/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go b/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go index d3b58f9f69..ebe077194e 100644 --- a/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go +++ b/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go @@ -514,8 +514,10 @@ type ExecutionError struct { // Full error contents accessible via a URI ErrorUri string `protobuf:"bytes,3,opt,name=error_uri,json=errorUri,proto3" json:"error_uri,omitempty"` Kind ExecutionError_ErrorKind `protobuf:"varint,4,opt,name=kind,proto3,enum=flyteidl.core.ExecutionError_ErrorKind" json:"kind,omitempty"` + // Timestamp of the error + Timestamp int64 `protobuf:"varint,5,opt,name=timestamp,proto3" json:"timestamp,omitempty"` // Worker that generated the error - Worker string `protobuf:"bytes,5,opt,name=worker,proto3" json:"worker,omitempty"` + Worker string `protobuf:"bytes,6,opt,name=worker,proto3" json:"worker,omitempty"` } func (x *ExecutionError) Reset() { @@ -578,6 +580,13 @@ func (x *ExecutionError) GetKind() ExecutionError_ErrorKind { return ExecutionError_UNKNOWN } +func (x *ExecutionError) GetTimestamp() int64 { + if x != nil { + return x.Timestamp + } + return 0 +} + func (x *ExecutionError) GetWorker() string { if x != nil { return x.Worker @@ -844,7 +853,7 @@ var file_flyteidl_core_execution_proto_rawDesc = []byte{ 0x04, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x05, 0x12, 0x10, 0x0a, 0x0c, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x49, 0x4e, 0x47, 0x10, 0x06, 0x12, 0x19, 0x0a, 0x15, 0x57, 0x41, 0x49, 0x54, 0x49, 0x4e, 0x47, 0x5f, 0x46, 0x4f, 0x52, 0x5f, 0x52, - 0x45, 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, 0x53, 0x10, 0x07, 0x22, 0xe0, 0x01, 0x0a, 0x0e, 0x45, + 0x45, 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, 0x53, 0x10, 0x07, 0x22, 0xfe, 0x01, 0x0a, 0x0e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x12, 0x0a, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, @@ -854,61 +863,63 @@ var file_flyteidl_core_execution_proto_rawDesc = []byte{ 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x27, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x52, - 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x18, - 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x22, 0x2e, 0x0a, - 0x09, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, - 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x55, 0x53, 0x45, 0x52, 0x10, - 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x59, 0x53, 0x54, 0x45, 0x4d, 0x10, 0x02, 0x22, 0xb2, 0x02, - 0x0a, 0x07, 0x54, 0x61, 0x73, 0x6b, 0x4c, 0x6f, 0x67, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, 0x6e, - 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, - 0x4b, 0x0a, 0x0e, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, - 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, - 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x54, 0x61, 0x73, 0x6b, 0x4c, 0x6f, 0x67, 0x2e, - 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0d, 0x6d, - 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, 0x2b, 0x0a, 0x03, - 0x74, 0x74, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, - 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, - 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x03, 0x74, 0x74, 0x6c, 0x12, 0x2a, 0x0a, 0x10, 0x53, 0x68, 0x6f, - 0x77, 0x57, 0x68, 0x69, 0x6c, 0x65, 0x50, 0x65, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x18, 0x05, 0x20, - 0x01, 0x28, 0x08, 0x52, 0x10, 0x53, 0x68, 0x6f, 0x77, 0x57, 0x68, 0x69, 0x6c, 0x65, 0x50, 0x65, - 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x2a, 0x0a, 0x10, 0x48, 0x69, 0x64, 0x65, 0x4f, 0x6e, 0x63, - 0x65, 0x46, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x10, 0x48, 0x69, 0x64, 0x65, 0x4f, 0x6e, 0x63, 0x65, 0x46, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, - 0x64, 0x22, 0x2f, 0x0a, 0x0d, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, - 0x61, 0x74, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, - 0x07, 0x0a, 0x03, 0x43, 0x53, 0x56, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x4a, 0x53, 0x4f, 0x4e, - 0x10, 0x02, 0x22, 0x5a, 0x0a, 0x14, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, - 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x70, 0x65, 0x63, 0x12, 0x42, 0x0a, 0x0f, 0x71, 0x75, - 0x65, 0x75, 0x65, 0x69, 0x6e, 0x67, 0x5f, 0x62, 0x75, 0x64, 0x67, 0x65, 0x74, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0e, - 0x71, 0x75, 0x65, 0x75, 0x65, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x64, 0x67, 0x65, 0x74, 0x22, 0xce, - 0x01, 0x0a, 0x10, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, - 0x69, 0x63, 0x65, 0x12, 0x3a, 0x0a, 0x04, 0x74, 0x69, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x0e, 0x32, 0x24, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, - 0x65, 0x2e, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, - 0x63, 0x65, 0x2e, 0x54, 0x69, 0x65, 0x72, 0x48, 0x00, 0x52, 0x04, 0x74, 0x69, 0x65, 0x72, 0x12, - 0x39, 0x0a, 0x04, 0x73, 0x70, 0x65, 0x63, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, - 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x51, 0x75, - 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x70, - 0x65, 0x63, 0x48, 0x00, 0x52, 0x04, 0x73, 0x70, 0x65, 0x63, 0x22, 0x34, 0x0a, 0x04, 0x54, 0x69, - 0x65, 0x72, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, - 0x00, 0x12, 0x08, 0x0a, 0x04, 0x48, 0x49, 0x47, 0x48, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x4d, - 0x45, 0x44, 0x49, 0x55, 0x4d, 0x10, 0x02, 0x12, 0x07, 0x0a, 0x03, 0x4c, 0x4f, 0x57, 0x10, 0x03, - 0x42, 0x0d, 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, - 0xb4, 0x01, 0x0a, 0x11, 0x63, 0x6f, 0x6d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, - 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x42, 0x0e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x3a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, - 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x6f, 0x72, 0x67, 0x2f, 0x66, 0x6c, 0x79, - 0x74, 0x65, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x67, 0x65, 0x6e, 0x2f, - 0x70, 0x62, 0x2d, 0x67, 0x6f, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, - 0x6f, 0x72, 0x65, 0xa2, 0x02, 0x03, 0x46, 0x43, 0x58, 0xaa, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, - 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x43, 0x6f, 0x72, 0x65, 0xca, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, - 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, 0x65, 0xe2, 0x02, 0x19, 0x46, 0x6c, 0x79, 0x74, - 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, 0x65, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x0e, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, - 0x3a, 0x3a, 0x43, 0x6f, 0x72, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, + 0x6d, 0x70, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, + 0x61, 0x6d, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x18, 0x06, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x22, 0x2e, 0x0a, 0x09, 0x45, + 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, + 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x55, 0x53, 0x45, 0x52, 0x10, 0x01, 0x12, + 0x0a, 0x0a, 0x06, 0x53, 0x59, 0x53, 0x54, 0x45, 0x4d, 0x10, 0x02, 0x22, 0xb2, 0x02, 0x0a, 0x07, + 0x54, 0x61, 0x73, 0x6b, 0x4c, 0x6f, 0x67, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4b, 0x0a, + 0x0e, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, + 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, + 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x54, 0x61, 0x73, 0x6b, 0x4c, 0x6f, 0x67, 0x2e, 0x4d, 0x65, + 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0d, 0x6d, 0x65, 0x73, + 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, 0x2b, 0x0a, 0x03, 0x74, 0x74, + 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x52, 0x03, 0x74, 0x74, 0x6c, 0x12, 0x2a, 0x0a, 0x10, 0x53, 0x68, 0x6f, 0x77, 0x57, + 0x68, 0x69, 0x6c, 0x65, 0x50, 0x65, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, + 0x08, 0x52, 0x10, 0x53, 0x68, 0x6f, 0x77, 0x57, 0x68, 0x69, 0x6c, 0x65, 0x50, 0x65, 0x6e, 0x64, + 0x69, 0x6e, 0x67, 0x12, 0x2a, 0x0a, 0x10, 0x48, 0x69, 0x64, 0x65, 0x4f, 0x6e, 0x63, 0x65, 0x46, + 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x48, + 0x69, 0x64, 0x65, 0x4f, 0x6e, 0x63, 0x65, 0x46, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, 0x64, 0x22, + 0x2f, 0x0a, 0x0d, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, + 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, + 0x03, 0x43, 0x53, 0x56, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x4a, 0x53, 0x4f, 0x4e, 0x10, 0x02, + 0x22, 0x5a, 0x0a, 0x14, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, + 0x76, 0x69, 0x63, 0x65, 0x53, 0x70, 0x65, 0x63, 0x12, 0x42, 0x0a, 0x0f, 0x71, 0x75, 0x65, 0x75, + 0x65, 0x69, 0x6e, 0x67, 0x5f, 0x62, 0x75, 0x64, 0x67, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0e, 0x71, 0x75, + 0x65, 0x75, 0x65, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x64, 0x67, 0x65, 0x74, 0x22, 0xce, 0x01, 0x0a, + 0x10, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, + 0x65, 0x12, 0x3a, 0x0a, 0x04, 0x74, 0x69, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x24, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, + 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, + 0x2e, 0x54, 0x69, 0x65, 0x72, 0x48, 0x00, 0x52, 0x04, 0x74, 0x69, 0x65, 0x72, 0x12, 0x39, 0x0a, + 0x04, 0x73, 0x70, 0x65, 0x63, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x66, 0x6c, + 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x51, 0x75, 0x61, 0x6c, + 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x70, 0x65, 0x63, + 0x48, 0x00, 0x52, 0x04, 0x73, 0x70, 0x65, 0x63, 0x22, 0x34, 0x0a, 0x04, 0x54, 0x69, 0x65, 0x72, + 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, + 0x08, 0x0a, 0x04, 0x48, 0x49, 0x47, 0x48, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x4d, 0x45, 0x44, + 0x49, 0x55, 0x4d, 0x10, 0x02, 0x12, 0x07, 0x0a, 0x03, 0x4c, 0x4f, 0x57, 0x10, 0x03, 0x42, 0x0d, + 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0xb4, 0x01, + 0x0a, 0x11, 0x63, 0x6f, 0x6d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, + 0x6f, 0x72, 0x65, 0x42, 0x0e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x50, 0x72, + 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x3a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, + 0x6d, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x6f, 0x72, 0x67, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, + 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x62, + 0x2d, 0x67, 0x6f, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, + 0x65, 0xa2, 0x02, 0x03, 0x46, 0x43, 0x58, 0xaa, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, + 0x64, 0x6c, 0x2e, 0x43, 0x6f, 0x72, 0x65, 0xca, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, + 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, 0x65, 0xe2, 0x02, 0x19, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, + 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, 0x65, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0xea, 0x02, 0x0e, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x3a, 0x3a, + 0x43, 0x6f, 0x72, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json b/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json index 7084c0b7f6..bc33080a5f 100644 --- a/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json +++ b/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json @@ -7154,6 +7154,11 @@ "kind": { "$ref": "#/definitions/ExecutionErrorErrorKind" }, + "timestamp": { + "type": "string", + "format": "int64", + "title": "Timestamp of the error" + }, "worker": { "type": "string", "title": "Worker that generated the error" diff --git a/flyteidl/gen/pb-js/flyteidl.d.ts b/flyteidl/gen/pb-js/flyteidl.d.ts index 0c161d2a89..ee68f70557 100644 --- a/flyteidl/gen/pb-js/flyteidl.d.ts +++ b/flyteidl/gen/pb-js/flyteidl.d.ts @@ -5749,6 +5749,9 @@ export namespace flyteidl { /** ExecutionError kind */ kind?: (flyteidl.core.ExecutionError.ErrorKind|null); + /** ExecutionError timestamp */ + timestamp?: (Long|null); + /** ExecutionError worker */ worker?: (string|null); } @@ -5774,6 +5777,9 @@ export namespace flyteidl { /** ExecutionError kind. */ public kind: flyteidl.core.ExecutionError.ErrorKind; + /** ExecutionError timestamp. */ + public timestamp: Long; + /** ExecutionError worker. */ public worker: string; diff --git a/flyteidl/gen/pb-js/flyteidl.js b/flyteidl/gen/pb-js/flyteidl.js index 0906e12692..58689003b1 100644 --- a/flyteidl/gen/pb-js/flyteidl.js +++ b/flyteidl/gen/pb-js/flyteidl.js @@ -13823,6 +13823,7 @@ * @property {string|null} [message] ExecutionError message * @property {string|null} [errorUri] ExecutionError errorUri * @property {flyteidl.core.ExecutionError.ErrorKind|null} [kind] ExecutionError kind + * @property {Long|null} [timestamp] ExecutionError timestamp * @property {string|null} [worker] ExecutionError worker */ @@ -13873,6 +13874,14 @@ */ ExecutionError.prototype.kind = 0; + /** + * ExecutionError timestamp. + * @member {Long} timestamp + * @memberof flyteidl.core.ExecutionError + * @instance + */ + ExecutionError.prototype.timestamp = $util.Long ? $util.Long.fromBits(0,0,false) : 0; + /** * ExecutionError worker. * @member {string} worker @@ -13913,8 +13922,10 @@ writer.uint32(/* id 3, wireType 2 =*/26).string(message.errorUri); if (message.kind != null && message.hasOwnProperty("kind")) writer.uint32(/* id 4, wireType 0 =*/32).int32(message.kind); + if (message.timestamp != null && message.hasOwnProperty("timestamp")) + writer.uint32(/* id 5, wireType 0 =*/40).int64(message.timestamp); if (message.worker != null && message.hasOwnProperty("worker")) - writer.uint32(/* id 5, wireType 2 =*/42).string(message.worker); + writer.uint32(/* id 6, wireType 2 =*/50).string(message.worker); return writer; }; @@ -13949,6 +13960,9 @@ message.kind = reader.int32(); break; case 5: + message.timestamp = reader.int64(); + break; + case 6: message.worker = reader.string(); break; default: @@ -13988,6 +14002,9 @@ case 2: break; } + if (message.timestamp != null && message.hasOwnProperty("timestamp")) + if (!$util.isInteger(message.timestamp) && !(message.timestamp && $util.isInteger(message.timestamp.low) && $util.isInteger(message.timestamp.high))) + return "timestamp: integer|Long expected"; if (message.worker != null && message.hasOwnProperty("worker")) if (!$util.isString(message.worker)) return "worker: string expected"; diff --git a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py index 7165379912..7b1d94c774 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py +++ b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py @@ -14,7 +14,7 @@ from google.protobuf import duration_pb2 as google_dot_protobuf_dot_duration__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x66lyteidl/core/execution.proto\x12\rflyteidl.core\x1a\x1egoogle/protobuf/duration.proto\"\xa7\x01\n\x11WorkflowExecution\"\x91\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\x0e\n\nSUCCEEDING\x10\x03\x12\r\n\tSUCCEEDED\x10\x04\x12\x0b\n\x07\x46\x41ILING\x10\x05\x12\n\n\x06\x46\x41ILED\x10\x06\x12\x0b\n\x07\x41\x42ORTED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x0c\n\x08\x41\x42ORTING\x10\t\"\xb6\x01\n\rNodeExecution\"\xa4\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x46\x41ILING\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x0b\n\x07\x41\x42ORTED\x10\x06\x12\x0b\n\x07SKIPPED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x13\n\x0f\x44YNAMIC_RUNNING\x10\t\x12\r\n\tRECOVERED\x10\n\"\x96\x01\n\rTaskExecution\"\x84\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x41\x42ORTED\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x10\n\x0cINITIALIZING\x10\x06\x12\x19\n\x15WAITING_FOR_RESOURCES\x10\x07\"\xe0\x01\n\x0e\x45xecutionError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1b\n\terror_uri\x18\x03 \x01(\tR\x08\x65rrorUri\x12;\n\x04kind\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x04kind\x12\x16\n\x06worker\x18\x05 \x01(\tR\x06worker\".\n\tErrorKind\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x08\n\x04USER\x10\x01\x12\n\n\x06SYSTEM\x10\x02\"\xb2\x02\n\x07TaskLog\x12\x10\n\x03uri\x18\x01 \x01(\tR\x03uri\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12K\n\x0emessage_format\x18\x03 \x01(\x0e\x32$.flyteidl.core.TaskLog.MessageFormatR\rmessageFormat\x12+\n\x03ttl\x18\x04 \x01(\x0b\x32\x19.google.protobuf.DurationR\x03ttl\x12*\n\x10ShowWhilePending\x18\x05 \x01(\x08R\x10ShowWhilePending\x12*\n\x10HideOnceFinished\x18\x06 \x01(\x08R\x10HideOnceFinished\"/\n\rMessageFormat\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x07\n\x03\x43SV\x10\x01\x12\x08\n\x04JSON\x10\x02\"Z\n\x14QualityOfServiceSpec\x12\x42\n\x0fqueueing_budget\x18\x01 \x01(\x0b\x32\x19.google.protobuf.DurationR\x0equeueingBudget\"\xce\x01\n\x10QualityOfService\x12:\n\x04tier\x18\x01 \x01(\x0e\x32$.flyteidl.core.QualityOfService.TierH\x00R\x04tier\x12\x39\n\x04spec\x18\x02 \x01(\x0b\x32#.flyteidl.core.QualityOfServiceSpecH\x00R\x04spec\"4\n\x04Tier\x12\r\n\tUNDEFINED\x10\x00\x12\x08\n\x04HIGH\x10\x01\x12\n\n\x06MEDIUM\x10\x02\x12\x07\n\x03LOW\x10\x03\x42\r\n\x0b\x64\x65signationB\xb4\x01\n\x11\x63om.flyteidl.coreB\x0e\x45xecutionProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x66lyteidl/core/execution.proto\x12\rflyteidl.core\x1a\x1egoogle/protobuf/duration.proto\"\xa7\x01\n\x11WorkflowExecution\"\x91\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\x0e\n\nSUCCEEDING\x10\x03\x12\r\n\tSUCCEEDED\x10\x04\x12\x0b\n\x07\x46\x41ILING\x10\x05\x12\n\n\x06\x46\x41ILED\x10\x06\x12\x0b\n\x07\x41\x42ORTED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x0c\n\x08\x41\x42ORTING\x10\t\"\xb6\x01\n\rNodeExecution\"\xa4\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x46\x41ILING\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x0b\n\x07\x41\x42ORTED\x10\x06\x12\x0b\n\x07SKIPPED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x13\n\x0f\x44YNAMIC_RUNNING\x10\t\x12\r\n\tRECOVERED\x10\n\"\x96\x01\n\rTaskExecution\"\x84\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x41\x42ORTED\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x10\n\x0cINITIALIZING\x10\x06\x12\x19\n\x15WAITING_FOR_RESOURCES\x10\x07\"\xfe\x01\n\x0e\x45xecutionError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1b\n\terror_uri\x18\x03 \x01(\tR\x08\x65rrorUri\x12;\n\x04kind\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x04kind\x12\x1c\n\ttimestamp\x18\x05 \x01(\x03R\ttimestamp\x12\x16\n\x06worker\x18\x06 \x01(\tR\x06worker\".\n\tErrorKind\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x08\n\x04USER\x10\x01\x12\n\n\x06SYSTEM\x10\x02\"\xb2\x02\n\x07TaskLog\x12\x10\n\x03uri\x18\x01 \x01(\tR\x03uri\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12K\n\x0emessage_format\x18\x03 \x01(\x0e\x32$.flyteidl.core.TaskLog.MessageFormatR\rmessageFormat\x12+\n\x03ttl\x18\x04 \x01(\x0b\x32\x19.google.protobuf.DurationR\x03ttl\x12*\n\x10ShowWhilePending\x18\x05 \x01(\x08R\x10ShowWhilePending\x12*\n\x10HideOnceFinished\x18\x06 \x01(\x08R\x10HideOnceFinished\"/\n\rMessageFormat\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x07\n\x03\x43SV\x10\x01\x12\x08\n\x04JSON\x10\x02\"Z\n\x14QualityOfServiceSpec\x12\x42\n\x0fqueueing_budget\x18\x01 \x01(\x0b\x32\x19.google.protobuf.DurationR\x0equeueingBudget\"\xce\x01\n\x10QualityOfService\x12:\n\x04tier\x18\x01 \x01(\x0e\x32$.flyteidl.core.QualityOfService.TierH\x00R\x04tier\x12\x39\n\x04spec\x18\x02 \x01(\x0b\x32#.flyteidl.core.QualityOfServiceSpecH\x00R\x04spec\"4\n\x04Tier\x12\r\n\tUNDEFINED\x10\x00\x12\x08\n\x04HIGH\x10\x01\x12\n\n\x06MEDIUM\x10\x02\x12\x07\n\x03LOW\x10\x03\x42\r\n\x0b\x64\x65signationB\xb4\x01\n\x11\x63om.flyteidl.coreB\x0e\x45xecutionProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -36,17 +36,17 @@ _globals['_TASKEXECUTION_PHASE']._serialized_start=454 _globals['_TASKEXECUTION_PHASE']._serialized_end=586 _globals['_EXECUTIONERROR']._serialized_start=589 - _globals['_EXECUTIONERROR']._serialized_end=813 - _globals['_EXECUTIONERROR_ERRORKIND']._serialized_start=767 - _globals['_EXECUTIONERROR_ERRORKIND']._serialized_end=813 - _globals['_TASKLOG']._serialized_start=816 - _globals['_TASKLOG']._serialized_end=1122 - _globals['_TASKLOG_MESSAGEFORMAT']._serialized_start=1075 - _globals['_TASKLOG_MESSAGEFORMAT']._serialized_end=1122 - _globals['_QUALITYOFSERVICESPEC']._serialized_start=1124 - _globals['_QUALITYOFSERVICESPEC']._serialized_end=1214 - _globals['_QUALITYOFSERVICE']._serialized_start=1217 - _globals['_QUALITYOFSERVICE']._serialized_end=1423 - _globals['_QUALITYOFSERVICE_TIER']._serialized_start=1356 - _globals['_QUALITYOFSERVICE_TIER']._serialized_end=1408 + _globals['_EXECUTIONERROR']._serialized_end=843 + _globals['_EXECUTIONERROR_ERRORKIND']._serialized_start=797 + _globals['_EXECUTIONERROR_ERRORKIND']._serialized_end=843 + _globals['_TASKLOG']._serialized_start=846 + _globals['_TASKLOG']._serialized_end=1152 + _globals['_TASKLOG_MESSAGEFORMAT']._serialized_start=1105 + _globals['_TASKLOG_MESSAGEFORMAT']._serialized_end=1152 + _globals['_QUALITYOFSERVICESPEC']._serialized_start=1154 + _globals['_QUALITYOFSERVICESPEC']._serialized_end=1244 + _globals['_QUALITYOFSERVICE']._serialized_start=1247 + _globals['_QUALITYOFSERVICE']._serialized_end=1453 + _globals['_QUALITYOFSERVICE_TIER']._serialized_start=1386 + _globals['_QUALITYOFSERVICE_TIER']._serialized_end=1438 # @@protoc_insertion_point(module_scope) diff --git a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi index 09b01791da..ed246353cf 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi +++ b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi @@ -83,7 +83,7 @@ class TaskExecution(_message.Message): def __init__(self) -> None: ... class ExecutionError(_message.Message): - __slots__ = ["code", "message", "error_uri", "kind", "worker"] + __slots__ = ["code", "message", "error_uri", "kind", "timestamp", "worker"] class ErrorKind(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = [] UNKNOWN: _ClassVar[ExecutionError.ErrorKind] @@ -96,13 +96,15 @@ class ExecutionError(_message.Message): MESSAGE_FIELD_NUMBER: _ClassVar[int] ERROR_URI_FIELD_NUMBER: _ClassVar[int] KIND_FIELD_NUMBER: _ClassVar[int] + TIMESTAMP_FIELD_NUMBER: _ClassVar[int] WORKER_FIELD_NUMBER: _ClassVar[int] code: str message: str error_uri: str kind: ExecutionError.ErrorKind + timestamp: int worker: str - def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., error_uri: _Optional[str] = ..., kind: _Optional[_Union[ExecutionError.ErrorKind, str]] = ..., worker: _Optional[str] = ...) -> None: ... + def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., error_uri: _Optional[str] = ..., kind: _Optional[_Union[ExecutionError.ErrorKind, str]] = ..., timestamp: _Optional[int] = ..., worker: _Optional[str] = ...) -> None: ... class TaskLog(_message.Message): __slots__ = ["uri", "name", "message_format", "ttl", "ShowWhilePending", "HideOnceFinished"] diff --git a/flyteidl/gen/pb_rust/flyteidl.core.rs b/flyteidl/gen/pb_rust/flyteidl.core.rs index 36fa35054b..ab2bcdfb9d 100644 --- a/flyteidl/gen/pb_rust/flyteidl.core.rs +++ b/flyteidl/gen/pb_rust/flyteidl.core.rs @@ -2129,8 +2129,11 @@ pub struct ExecutionError { pub error_uri: ::prost::alloc::string::String, #[prost(enumeration="execution_error::ErrorKind", tag="4")] pub kind: i32, + /// Timestamp of the error + #[prost(int64, tag="5")] + pub timestamp: i64, /// Worker that generated the error - #[prost(string, tag="5")] + #[prost(string, tag="6")] pub worker: ::prost::alloc::string::String, } /// Nested message and enum types in `ExecutionError`. diff --git a/flyteidl/protos/flyteidl/core/execution.proto b/flyteidl/protos/flyteidl/core/execution.proto index 81fec315c2..c92fcbfba8 100644 --- a/flyteidl/protos/flyteidl/core/execution.proto +++ b/flyteidl/protos/flyteidl/core/execution.proto @@ -73,8 +73,10 @@ message ExecutionError { SYSTEM = 2; } ErrorKind kind = 4; + // Timestamp of the error + int64 timestamp = 5; // Worker that generated the error - string worker = 5; + string worker = 6; } // Log information for the task that is specific to a log sink diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index fb5f52452e..a45373f625 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -30,6 +30,8 @@ type SingleFileErrorRetriever struct { errorFilePath storage.DataReference } +const errorFileNotFoundErrorCode = "ErrorFileNotFound" + func NewSingleFileErrorRetriever(errorFilePath storage.DataReference, store storage.ComposedProtobufStore, maxPayloadSize int64) *SingleFileErrorRetriever { return &SingleFileErrorRetriever{ baseErrorRetriever: baseErrorRetriever{ @@ -74,10 +76,11 @@ func errorDoc2ExecutionError(errorDoc *core.ErrorDocument, errorFilePath storage } executionError := io.ExecutionError{ ExecutionError: &core.ExecutionError{ - Code: errorDoc.Error.Code, - Message: errorDoc.Error.Message, - Kind: errorDoc.Error.Origin, - Worker: errorDoc.Error.Worker, + Code: errorDoc.Error.Code, + Message: errorDoc.Error.Message, + Kind: errorDoc.Error.Origin, + Timestamp: errorDoc.Error.Timestamp, + Worker: errorDoc.Error.Worker, }, } @@ -96,7 +99,7 @@ func (s *SingleFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionEr return io.ExecutionError{ IsRecoverable: true, ExecutionError: &core.ExecutionError{ - Code: "ErrorFileNotFound", + Code: errorFileNotFoundErrorCode, Message: err.Error(), Kind: core.ExecutionError_SYSTEM, }, @@ -110,42 +113,24 @@ func (s *SingleFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionEr type EarliestFileErrorRetriever struct { baseErrorRetriever - errorDirPath storage.DataReference - canonicalErrorFilename string -} - -func (e *EarliestFileErrorRetriever) parseErrorFilename() (errorFilePathPrefix storage.DataReference, errorFileExtension string, err error) { - // If the canonical error file name is error.pb, we expect multiple error files - // to have name error.pb - pieces := strings.Split(e.canonicalErrorFilename, ".") - if len(pieces) != 2 { - err = errors.Errorf("expected canonical error filename to have a single dot (.), got %d", len(pieces)) - return - } - errorFilePrefix := pieces[0] - scheme, container, key, _ := e.errorDirPath.Split() - errorFilePathPrefix = storage.NewDataReference(scheme, container, filepath.Join(key, errorFilePrefix)) - errorFileExtension = fmt.Sprintf(".%s", pieces[1]) - return + errorDirPath storage.DataReference + errorFilePathPrefix storage.DataReference + errorFileExtension string } func (e *EarliestFileErrorRetriever) HasError(ctx context.Context) (bool, error) { - errorFilePathPrefix, errorFileExtension, err := e.parseErrorFilename() - if err != nil { - return false, errors.Wrapf(err, "failed to parse canonical error filename @[%s]", e.canonicalErrorFilename) - } hasError := false const maxItems = 1000 cursor := storage.NewCursorAtStart() for cursor != storage.NewCursorAtEnd() { var err error var errorFilePaths []storage.DataReference - errorFilePaths, cursor, err = e.store.List(ctx, errorFilePathPrefix, maxItems, cursor) + errorFilePaths, cursor, err = e.store.List(ctx, e.errorFilePathPrefix, maxItems, cursor) if err != nil { return false, errors.Wrapf(err, "failed to list error files @[%s]", e.errorDirPath) } for _, errorFilePath := range errorFilePaths { - if strings.HasSuffix(errorFilePath.String(), errorFileExtension) { + if strings.HasSuffix(errorFilePath.String(), e.errorFileExtension) { metadata, err := e.store.Head(ctx, errorFilePath) if err != nil { return false, errors.Wrapf(err, "failed to read error file @[%s]", errorFilePath) @@ -162,69 +147,69 @@ func (e *EarliestFileErrorRetriever) HasError(ctx context.Context) (bool, error) } func (e *EarliestFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionError, error) { - errorFilePathPrefix, errorFileExtension, err := e.parseErrorFilename() - if err != nil { - return io.ExecutionError{}, errors.Wrapf(err, "failed to parse canonical error filename @[%s]", e.canonicalErrorFilename) - } + var earliestTimestamp int64 = math.MaxInt64 + earliestExecutionError := io.ExecutionError{} const maxItems = 1000 cursor := storage.NewCursorAtStart() - type ErrorFileAndDocument struct { - errorFilePath storage.DataReference - errorDoc *core.ErrorDocument - } - var errorFileAndDocs []ErrorFileAndDocument for cursor != storage.NewCursorAtEnd() { var err error var errorFilePaths []storage.DataReference - errorFilePaths, cursor, err = e.store.List(ctx, errorFilePathPrefix, maxItems, cursor) + errorFilePaths, cursor, err = e.store.List(ctx, e.errorFilePathPrefix, maxItems, cursor) if err != nil { return io.ExecutionError{}, errors.Wrapf(err, "failed to list error files @[%s]", e.errorDirPath) } for _, errorFilePath := range errorFilePaths { - if strings.HasSuffix(errorFilePath.String(), errorFileExtension) { - errorDoc := &core.ErrorDocument{} - err := e.store.ReadProtobuf(ctx, errorFilePath, errorDoc) - if err != nil { - return io.ExecutionError{}, errors.Wrapf(err, "failed to read error file @[%s]", errorFilePath.String()) - } - errorFileAndDocs = append(errorFileAndDocs, ErrorFileAndDocument{errorFilePath: errorFilePath, errorDoc: errorDoc}) + if !strings.HasSuffix(errorFilePath.String(), e.errorFileExtension) { + continue + } + errorDoc := &core.ErrorDocument{} + err := e.store.ReadProtobuf(ctx, errorFilePath, errorDoc) + if err != nil { + return io.ExecutionError{}, errors.Wrapf(err, "failed to read error file @[%s]", errorFilePath.String()) + } + timestamp := errorDoc.Error.GetTimestamp() + if earliestTimestamp >= timestamp { + earliestExecutionError = errorDoc2ExecutionError(errorDoc, errorFilePath) + earliestTimestamp = timestamp } - } - } - - var earliestTimestamp int64 = math.MaxInt64 - earliestExecutionError := io.ExecutionError{} - for _, errorFileAndDoc := range errorFileAndDocs { - timestamp := errorFileAndDoc.errorDoc.Error.GetTimestamp() - if earliestTimestamp >= timestamp { - earliestExecutionError = errorDoc2ExecutionError(errorFileAndDoc.errorDoc, errorFileAndDoc.errorFilePath) - earliestTimestamp = timestamp } } return earliestExecutionError, nil } -func NewEarliestFileErrorRetriever(errorDirPath storage.DataReference, canonicalErrorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) *EarliestFileErrorRetriever { +func NewEarliestFileErrorRetriever(errorDirPath storage.DataReference, canonicalErrorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) (*EarliestFileErrorRetriever, error) { + // If the canonical error file name is error.pb, we expect multiple error files + // to have name error.pb + pieces := strings.Split(canonicalErrorFilename, ".") + if len(pieces) != 2 { + return nil, errors.Errorf("expected canonical error filename to have a single dot (.), got %d", len(pieces)) + } + errorFilePrefix := pieces[0] + scheme, container, key, _ := errorDirPath.Split() + errorFilePathPrefix := storage.NewDataReference(scheme, container, filepath.Join(key, errorFilePrefix)) + errorFileExtension := fmt.Sprintf(".%s", pieces[1]) + return &EarliestFileErrorRetriever{ baseErrorRetriever: baseErrorRetriever{ store: store, maxPayloadSize: maxPayloadSize, }, - errorDirPath: errorDirPath, - canonicalErrorFilename: canonicalErrorFilename, - } + errorDirPath: errorDirPath, + errorFilePathPrefix: errorFilePathPrefix, + errorFileExtension: errorFileExtension, + }, nil } -func NewErrorRetriever(errorAggregationStrategy k8s.ErrorAggregationStrategy, errorDirPath storage.DataReference, errorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) ErrorRetriever { +func NewErrorRetriever(errorAggregationStrategy k8s.ErrorAggregationStrategy, errorDirPath storage.DataReference, errorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) (ErrorRetriever, error) { if errorAggregationStrategy == k8s.DefaultErrorAggregationStrategy { scheme, container, key, _ := errorDirPath.Split() errorFilePath := storage.NewDataReference(scheme, container, filepath.Join(key, errorFilename)) - return NewSingleFileErrorRetriever(errorFilePath, store, maxPayloadSize) + return NewSingleFileErrorRetriever(errorFilePath, store, maxPayloadSize), nil } if errorAggregationStrategy == k8s.EarliestErrorAggregationStrategy { return NewEarliestFileErrorRetriever(errorDirPath, errorFilename, store, maxPayloadSize) } - return nil + return nil, errors.Errorf("unknown error aggregation strategy: %v", errorAggregationStrategy) } type RemoteFileOutputReader struct { @@ -290,25 +275,31 @@ func (r RemoteFileOutputReader) DeckExists(ctx context.Context) (bool, error) { return md.Exists(), nil } -func NewRemoteFileOutputReader(context context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64) RemoteFileOutputReader { +func NewRemoteFileOutputReader(context context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64) (*RemoteFileOutputReader, error) { return NewRemoteFileOutputReaderWithErrorAggregationStrategy(context, store, outPaths, maxDatasetSize, k8s.DefaultErrorAggregationStrategy) } -func NewRemoteFileOutputReaderWithErrorAggregationStrategy(_ context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64, errorAggregationStrategy k8s.ErrorAggregationStrategy) RemoteFileOutputReader { +func NewRemoteFileOutputReaderWithErrorAggregationStrategy(_ context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64, errorAggregationStrategy k8s.ErrorAggregationStrategy) (*RemoteFileOutputReader, error) { // Note: even though the data store retrieval checks against GetLimitMegabytes, there might be external // storage implementations, so we keep this check here as well. maxPayloadSize := maxDatasetSize if maxPayloadSize == 0 { maxPayloadSize = storage.GetConfig().Limits.GetLimitMegabytes * 1024 * 1024 } - scheme, container, key, _ := outPaths.GetErrorPath().Split() + scheme, container, key, err := outPaths.GetErrorPath().Split() + if err != nil { + return nil, errors.Wrapf(err, "failed to parse error path %s", outPaths.GetErrorPath()) + } errorFilename := filepath.Base(key) errorDirPath := storage.NewDataReference(scheme, container, filepath.Dir(key)) - errorRetriever := NewErrorRetriever(errorAggregationStrategy, errorDirPath, errorFilename, store, maxPayloadSize) - return RemoteFileOutputReader{ + errorRetriever, err := NewErrorRetriever(errorAggregationStrategy, errorDirPath, errorFilename, store, maxPayloadSize) + if err != nil { + return nil, errors.Wrapf(err, "failed to create remote output reader with error aggregation strategy %v", errorAggregationStrategy) + } + return &RemoteFileOutputReader{ outPath: outPaths, store: store, maxPayloadSize: maxPayloadSize, errorRetriever: errorRetriever, - } + }, nil } diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go index f7e8ac102a..5f9652cb32 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go @@ -69,12 +69,13 @@ func TestReadOrigin(t *testing.T) { }, nil) maxPayloadSize := int64(0) - r := NewRemoteFileOutputReader( + r, err := NewRemoteFileOutputReader( ctx, store, opath, maxPayloadSize, ) + assert.NoError(t, err) ee, err := r.ReadError(ctx) assert.NoError(t, err) @@ -103,12 +104,13 @@ func TestReadOrigin(t *testing.T) { }).Return(nil) maxPayloadSize := int64(0) - r := NewRemoteFileOutputReader( + r, err := NewRemoteFileOutputReader( ctx, store, opath, maxPayloadSize, ) + assert.NoError(t, err) ee, err := r.ReadError(ctx) assert.NoError(t, err) @@ -155,13 +157,14 @@ func TestReadOrigin(t *testing.T) { }, nil) maxPayloadSize := int64(0) - r := NewRemoteFileOutputReaderWithErrorAggregationStrategy( + r, err := NewRemoteFileOutputReaderWithErrorAggregationStrategy( ctx, store, outputPaths, maxPayloadSize, k8s.EarliestErrorAggregationStrategy, ) + assert.NoError(t, err) hasError, err := r.IsError(ctx) assert.NoError(t, err) diff --git a/flyteplugins/go/tasks/plugins/array/catalog.go b/flyteplugins/go/tasks/plugins/array/catalog.go index d6bf5e8820..70ba9db78f 100644 --- a/flyteplugins/go/tasks/plugins/array/catalog.go +++ b/flyteplugins/go/tasks/plugins/array/catalog.go @@ -567,5 +567,9 @@ func ConstructOutputReader(ctx context.Context, dataStore *storage.DataStore, ou // checkpoint paths are not computed here because this function is only called when writing // existing cached outputs. if this functionality changes this will need to be revisited. outputPath := ioutils.NewCheckpointRemoteFilePaths(ctx, dataStore, dataReference, ioutils.NewRawOutputPaths(ctx, outputSandbox), "") - return ioutils.NewRemoteFileOutputReader(ctx, dataStore, outputPath, int64(999999999)), nil + reader, err := ioutils.NewRemoteFileOutputReader(ctx, dataStore, outputPath, int64(999999999)) + if err != nil { + return nil, err + } + return reader, nil } diff --git a/flyteplugins/go/tasks/plugins/array/outputs.go b/flyteplugins/go/tasks/plugins/array/outputs.go index cb07fb0de1..1c074581c0 100644 --- a/flyteplugins/go/tasks/plugins/array/outputs.go +++ b/flyteplugins/go/tasks/plugins/array/outputs.go @@ -225,7 +225,10 @@ func AssembleFinalOutputs(ctx context.Context, assemblyQueue OutputAssembler, tC switch w.Status() { case workqueue.WorkStatusSucceeded: - or := ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0) + or, err := ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0) + if err != nil { + return nil, err + } if err = tCtx.OutputWriter().Put(ctx, or); err != nil { return nil, err } diff --git a/flyteplugins/go/tasks/plugins/testing/echo.go b/flyteplugins/go/tasks/plugins/testing/echo.go index 09c4dc53b1..96c7f1c031 100644 --- a/flyteplugins/go/tasks/plugins/testing/echo.go +++ b/flyteplugins/go/tasks/plugins/testing/echo.go @@ -116,7 +116,10 @@ func copyInputsToOutputs(ctx context.Context, tCtx core.TaskExecutionContext) (c return core.UnknownTransition, err } - or := ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0) + or, err := ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0) + if err != nil { + return core.UnknownTransition, err + } if err = tCtx.OutputWriter().Put(ctx, or); err != nil { return core.UnknownTransition, err } diff --git a/flyteplugins/go/tasks/plugins/webapi/agent/plugin.go b/flyteplugins/go/tasks/plugins/webapi/agent/plugin.go index a7b2a3d1d4..403e49c20b 100644 --- a/flyteplugins/go/tasks/plugins/webapi/agent/plugin.go +++ b/flyteplugins/go/tasks/plugins/webapi/agent/plugin.go @@ -370,7 +370,10 @@ func writeOutput(ctx context.Context, taskCtx webapi.StatusContext, outputs *fly opReader = ioutils.NewInMemoryOutputReader(outputs, nil, nil) } else { logger.Debugf(ctx, "AgentDeployment didn't return any output, assuming file based outputs.") - opReader = ioutils.NewRemoteFileOutputReader(ctx, taskCtx.DataStore(), taskCtx.OutputWriter(), 0) + opReader, err = ioutils.NewRemoteFileOutputReader(ctx, taskCtx.DataStore(), taskCtx.OutputWriter(), 0) + if err != nil { + return err + } } return taskCtx.OutputWriter().Put(ctx, opReader) } diff --git a/flyteplugins/go/tasks/plugins/webapi/databricks/plugin.go b/flyteplugins/go/tasks/plugins/webapi/databricks/plugin.go index 6ae9a1dbe5..1390be11ab 100644 --- a/flyteplugins/go/tasks/plugins/webapi/databricks/plugin.go +++ b/flyteplugins/go/tasks/plugins/webapi/databricks/plugin.go @@ -304,7 +304,10 @@ func writeOutput(ctx context.Context, taskCtx webapi.StatusContext) error { return nil } - outputReader := ioutils.NewRemoteFileOutputReader(ctx, taskCtx.DataStore(), taskCtx.OutputWriter(), 0) + outputReader, err := ioutils.NewRemoteFileOutputReader(ctx, taskCtx.DataStore(), taskCtx.OutputWriter(), 0) + if err != nil { + return err + } return taskCtx.OutputWriter().Put(ctx, outputReader) } diff --git a/flytepropeller/pkg/controller/nodes/array/handler.go b/flytepropeller/pkg/controller/nodes/array/handler.go index 834a016cb2..1ca8e03c23 100644 --- a/flytepropeller/pkg/controller/nodes/array/handler.go +++ b/flytepropeller/pkg/controller/nodes/array/handler.go @@ -500,13 +500,18 @@ func (a *arrayNodeHandler) Handle(ctx context.Context, nCtx interfaces.NodeExecu }{nil, err} continue } - // checkpoint paths are not computed here because this function is only called when writing // existing cached outputs. if this functionality changes this will need to be revisited. outputPaths := ioutils.NewCheckpointRemoteFilePaths(ctx, nCtx.DataStore(), subOutputDir, ioutils.NewRawOutputPaths(ctx, subDataDir), "") - reader := ioutils.NewRemoteFileOutputReader(ctx, nCtx.DataStore(), outputPaths, 0) - - gatherOutputsRequest.reader = &reader + reader, err := ioutils.NewRemoteFileOutputReader(ctx, nCtx.DataStore(), outputPaths, 0) + if err != nil { + gatherOutputsRequest.responseChannel <- struct { + literalMap map[string]*idlcore.Literal + error + }{nil, err} + continue + } + gatherOutputsRequest.reader = reader a.gatherOutputsRequestChannel <- gatherOutputsRequest } diff --git a/flytepropeller/pkg/controller/nodes/cache.go b/flytepropeller/pkg/controller/nodes/cache.go index e8e7fc3720..5ec0871bd6 100644 --- a/flytepropeller/pkg/controller/nodes/cache.go +++ b/flytepropeller/pkg/controller/nodes/cache.go @@ -216,7 +216,10 @@ func (n *nodeExecutor) WriteCatalogCache(ctx context.Context, nCtx interfaces.No catalogKey.Identifier.Domain, catalogKey.Identifier.Name, catalogKey.Identifier.Version) outputPaths := ioutils.NewReadOnlyOutputFilePaths(ctx, nCtx.DataStore(), nCtx.NodeStatus().GetOutputDir()) - outputReader := ioutils.NewRemoteFileOutputReader(ctx, nCtx.DataStore(), outputPaths, 0) + outputReader, err := ioutils.NewRemoteFileOutputReader(ctx, nCtx.DataStore(), outputPaths, 0) + if err != nil { + return catalog.NewStatus(core.CatalogCacheStatus_CACHE_DISABLED, nil), errors.Wrapf(err, "failed to initialize the remote output file reader") + } metadata := catalog.Metadata{ TaskExecutionIdentifier: task.GetTaskExecutionIdentifier(nCtx), } diff --git a/flytepropeller/pkg/controller/workflow/executor_test.go b/flytepropeller/pkg/controller/workflow/executor_test.go index a3d028e94b..74cdcebd96 100644 --- a/flytepropeller/pkg/controller/workflow/executor_test.go +++ b/flytepropeller/pkg/controller/workflow/executor_test.go @@ -99,7 +99,11 @@ func (f fakeRemoteWritePlugin) Handle(ctx context.Context, tCtx pluginCore.TaskE o.Literals[k] = l } assert.NoError(f.t, tCtx.DataStore().WriteProtobuf(ctx, tCtx.OutputWriter().GetOutputPath(), storage.Options{}, o)) - assert.NoError(f.t, tCtx.OutputWriter().Put(ctx, ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0))) + reader, err := ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0) + if err != nil { + return trns, err + } + assert.NoError(f.t, tCtx.OutputWriter().Put(ctx, reader)) } return trns, err } From b6e40d0be7bad4b0ab899d93c960a43f4cdcad53 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Tue, 15 Oct 2024 21:10:59 +0000 Subject: [PATCH 15/29] Resolve merge conflicts --- .../pkg/controller/nodes/task/k8s/plugin_manager.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager.go b/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager.go index 17935a89e7..7496fb0d8a 100644 --- a/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager.go +++ b/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager.go @@ -290,9 +290,12 @@ func (e *PluginManager) checkResourcePhase(ctx context.Context, tCtx pluginsCore var opReader io.OutputReader if pCtx.ow == nil { logger.Infof(ctx, "Plugin [%s] returned no outputReader, assuming file based outputs", e.id) - opReader = ioutils.NewRemoteFileOutputReaderWithErrorAggregationStrategy( + opReader, err = ioutils.NewRemoteFileOutputReaderWithErrorAggregationStrategy( ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0, e.plugin.GetProperties().ErrorAggregationStrategy) + if err != nil { + return pluginsCore.UnknownTransition, err + } } else { logger.Infof(ctx, "Plugin [%s] returned outputReader", e.id) opReader = pCtx.ow.GetReader() From 819f871d28b73e7c4e74886d2b73f0a76ba157f1 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Tue, 15 Oct 2024 21:16:17 +0000 Subject: [PATCH 16/29] Resolve merge conflicts --- flytepropeller/pkg/controller/nodes/dynamic/handler.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/flytepropeller/pkg/controller/nodes/dynamic/handler.go b/flytepropeller/pkg/controller/nodes/dynamic/handler.go index e23f145bb3..fc9e7ccf82 100644 --- a/flytepropeller/pkg/controller/nodes/dynamic/handler.go +++ b/flytepropeller/pkg/controller/nodes/dynamic/handler.go @@ -141,7 +141,10 @@ func (d dynamicNodeTaskNodeHandler) handleDynamicSubNodes(ctx context.Context, n // These outputPaths only reads the output metadata. So the sandbox is completely optional here and hence it is nil. // The sandbox creation as it uses hashing can be expensive and we skip that expense. outputPaths := ioutils.NewReadOnlyOutputFilePaths(ctx, nCtx.DataStore(), nCtx.NodeStatus().GetOutputDir()) - outputReader := ioutils.NewRemoteFileOutputReader(ctx, nCtx.DataStore(), outputPaths, 0) + outputReader, err := ioutils.NewRemoteFileOutputReader(ctx, nCtx.DataStore(), outputPaths, 0) + if err != nil { + return handler.UnknownTransition, prevState, err + } ee, err := d.TaskNodeHandler.ValidateOutput(ctx, nCtx.NodeID(), nCtx.InputReader(), outputReader, nil, nCtx.ExecutionContext().GetExecutionConfig(), nCtx.TaskReader()) From fcb4b7360467faefb8a9ba48575a50556391be6c Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Tue, 15 Oct 2024 21:52:16 +0000 Subject: [PATCH 17/29] More accurate timestamps --- flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts | 8 +- .../gen/pb-es/flyteidl/core/execution_pb.ts | 8 +- flyteidl/gen/pb-go/flyteidl/core/errors.pb.go | 95 ++++---- .../gen/pb-go/flyteidl/core/execution.pb.go | 229 +++++++++--------- .../flyteidl/service/admin.swagger.json | 2 +- flyteidl/gen/pb-js/flyteidl.d.ts | 8 +- flyteidl/gen/pb-js/flyteidl.js | 36 +-- .../gen/pb_python/flyteidl/core/errors_pb2.py | 15 +- .../pb_python/flyteidl/core/errors_pb2.pyi | 5 +- .../pb_python/flyteidl/core/execution_pb2.py | 55 ++--- .../pb_python/flyteidl/core/execution_pb2.pyi | 5 +- flyteidl/gen/pb_rust/flyteidl.core.rs | 8 +- flyteidl/protos/flyteidl/core/errors.proto | 3 +- flyteidl/protos/flyteidl/core/execution.proto | 3 +- .../ioutils/remote_file_output_reader.go | 8 +- .../ioutils/remote_file_output_reader_test.go | 22 +- 16 files changed, 270 insertions(+), 240 deletions(-) diff --git a/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts b/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts index 892b885d4a..4c508574c7 100644 --- a/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts +++ b/flyteidl/gen/pb-es/flyteidl/core/errors_pb.ts @@ -4,7 +4,7 @@ // @ts-nocheck import type { BinaryReadOptions, FieldList, JsonReadOptions, JsonValue, PartialMessage, PlainMessage } from "@bufbuild/protobuf"; -import { Message, proto3, protoInt64 } from "@bufbuild/protobuf"; +import { Message, proto3, Timestamp } from "@bufbuild/protobuf"; import { ExecutionError_ErrorKind } from "./execution_pb.js"; /** @@ -45,9 +45,9 @@ export class ContainerError extends Message { /** * Timestamp of the error * - * @generated from field: int64 timestamp = 5; + * @generated from field: google.protobuf.Timestamp timestamp = 5; */ - timestamp = protoInt64.zero; + timestamp?: Timestamp; /** * Worker that generated the error @@ -68,7 +68,7 @@ export class ContainerError extends Message { { no: 2, name: "message", kind: "scalar", T: 9 /* ScalarType.STRING */ }, { no: 3, name: "kind", kind: "enum", T: proto3.getEnumType(ContainerError_Kind) }, { no: 4, name: "origin", kind: "enum", T: proto3.getEnumType(ExecutionError_ErrorKind) }, - { no: 5, name: "timestamp", kind: "scalar", T: 3 /* ScalarType.INT64 */ }, + { no: 5, name: "timestamp", kind: "message", T: Timestamp }, { no: 6, name: "worker", kind: "scalar", T: 9 /* ScalarType.STRING */ }, ]); diff --git a/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts b/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts index 5accf3a1b4..d9d0a71718 100644 --- a/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts +++ b/flyteidl/gen/pb-es/flyteidl/core/execution_pb.ts @@ -4,7 +4,7 @@ // @ts-nocheck import type { BinaryReadOptions, FieldList, JsonReadOptions, JsonValue, PartialMessage, PlainMessage } from "@bufbuild/protobuf"; -import { Duration, Message, proto3, protoInt64 } from "@bufbuild/protobuf"; +import { Duration, Message, proto3, Timestamp } from "@bufbuild/protobuf"; /** * Indicates various phases of Workflow Execution @@ -344,9 +344,9 @@ export class ExecutionError extends Message { /** * Timestamp of the error * - * @generated from field: int64 timestamp = 5; + * @generated from field: google.protobuf.Timestamp timestamp = 5; */ - timestamp = protoInt64.zero; + timestamp?: Timestamp; /** * Worker that generated the error @@ -367,7 +367,7 @@ export class ExecutionError extends Message { { no: 2, name: "message", kind: "scalar", T: 9 /* ScalarType.STRING */ }, { no: 3, name: "error_uri", kind: "scalar", T: 9 /* ScalarType.STRING */ }, { no: 4, name: "kind", kind: "enum", T: proto3.getEnumType(ExecutionError_ErrorKind) }, - { no: 5, name: "timestamp", kind: "scalar", T: 3 /* ScalarType.INT64 */ }, + { no: 5, name: "timestamp", kind: "message", T: Timestamp }, { no: 6, name: "worker", kind: "scalar", T: 9 /* ScalarType.STRING */ }, ]); diff --git a/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go b/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go index 3c999b4bc6..cb7640d053 100644 --- a/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go +++ b/flyteidl/gen/pb-go/flyteidl/core/errors.pb.go @@ -9,6 +9,7 @@ package core import ( protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" + timestamppb "google.golang.org/protobuf/types/known/timestamppb" reflect "reflect" sync "sync" ) @@ -83,7 +84,7 @@ type ContainerError struct { // Defines the origin of the error (system, user, unknown). Origin ExecutionError_ErrorKind `protobuf:"varint,4,opt,name=origin,proto3,enum=flyteidl.core.ExecutionError_ErrorKind" json:"origin,omitempty"` // Timestamp of the error - Timestamp int64 `protobuf:"varint,5,opt,name=timestamp,proto3" json:"timestamp,omitempty"` + Timestamp *timestamppb.Timestamp `protobuf:"bytes,5,opt,name=timestamp,proto3" json:"timestamp,omitempty"` // Worker that generated the error Worker string `protobuf:"bytes,6,opt,name=worker,proto3" json:"worker,omitempty"` } @@ -148,11 +149,11 @@ func (x *ContainerError) GetOrigin() ExecutionError_ErrorKind { return ExecutionError_UNKNOWN } -func (x *ContainerError) GetTimestamp() int64 { +func (x *ContainerError) GetTimestamp() *timestamppb.Timestamp { if x != nil { return x.Timestamp } - return 0 + return nil } func (x *ContainerError) GetWorker() string { @@ -219,41 +220,45 @@ var file_flyteidl_core_errors_proto_rawDesc = []byte{ 0x65, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0d, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x1a, 0x1d, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, 0x65, 0x2f, 0x65, 0x78, 0x65, 0x63, 0x75, - 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x9b, 0x02, 0x0a, 0x0e, 0x43, - 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x12, 0x0a, - 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x6f, 0x64, - 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x36, 0x0a, 0x04, 0x6b, - 0x69, 0x6e, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x22, 0x2e, 0x66, 0x6c, 0x79, 0x74, - 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69, - 0x6e, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x4b, 0x69, 0x6e, 0x64, 0x52, 0x04, 0x6b, - 0x69, 0x6e, 0x64, 0x12, 0x3f, 0x0a, 0x06, 0x6f, 0x72, 0x69, 0x67, 0x69, 0x6e, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x0e, 0x32, 0x27, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, - 0x6f, 0x72, 0x65, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, 0x72, - 0x6f, 0x72, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x52, 0x06, 0x6f, 0x72, - 0x69, 0x67, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, - 0x70, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, - 0x6d, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x18, 0x06, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x22, 0x2c, 0x0a, 0x04, 0x4b, 0x69, - 0x6e, 0x64, 0x12, 0x13, 0x0a, 0x0f, 0x4e, 0x4f, 0x4e, 0x5f, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, - 0x52, 0x41, 0x42, 0x4c, 0x45, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x52, 0x45, 0x43, 0x4f, 0x56, - 0x45, 0x52, 0x41, 0x42, 0x4c, 0x45, 0x10, 0x01, 0x22, 0x44, 0x0a, 0x0d, 0x45, 0x72, 0x72, 0x6f, - 0x72, 0x44, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x33, 0x0a, 0x05, 0x65, 0x72, 0x72, - 0x6f, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, - 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, - 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x42, 0xb1, - 0x01, 0x0a, 0x11, 0x63, 0x6f, 0x6d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, - 0x63, 0x6f, 0x72, 0x65, 0x42, 0x0b, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x50, 0x72, 0x6f, 0x74, - 0x6f, 0x50, 0x01, 0x5a, 0x3a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, - 0x66, 0x6c, 0x79, 0x74, 0x65, 0x6f, 0x72, 0x67, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x2f, 0x66, - 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x62, 0x2d, 0x67, - 0x6f, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, 0x65, 0xa2, - 0x02, 0x03, 0x46, 0x43, 0x58, 0xaa, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, - 0x2e, 0x43, 0x6f, 0x72, 0x65, 0xca, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, - 0x5c, 0x43, 0x6f, 0x72, 0x65, 0xe2, 0x02, 0x19, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, - 0x5c, 0x43, 0x6f, 0x72, 0x65, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0xea, 0x02, 0x0e, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x3a, 0x3a, 0x43, 0x6f, - 0x72, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x1f, 0x67, 0x6f, 0x6f, 0x67, + 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x74, 0x69, 0x6d, 0x65, + 0x73, 0x74, 0x61, 0x6d, 0x70, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xb7, 0x02, 0x0a, 0x0e, + 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x12, + 0x0a, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x6f, + 0x64, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x36, 0x0a, 0x04, + 0x6b, 0x69, 0x6e, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x22, 0x2e, 0x66, 0x6c, 0x79, + 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x61, + 0x69, 0x6e, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x4b, 0x69, 0x6e, 0x64, 0x52, 0x04, + 0x6b, 0x69, 0x6e, 0x64, 0x12, 0x3f, 0x0a, 0x06, 0x6f, 0x72, 0x69, 0x67, 0x69, 0x6e, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x0e, 0x32, 0x27, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, + 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, + 0x72, 0x6f, 0x72, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x52, 0x06, 0x6f, + 0x72, 0x69, 0x67, 0x69, 0x6e, 0x12, 0x38, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, + 0x6d, 0x70, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, + 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, + 0x74, 0x61, 0x6d, 0x70, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x12, + 0x16, 0x0a, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x22, 0x2c, 0x0a, 0x04, 0x4b, 0x69, 0x6e, 0x64, 0x12, + 0x13, 0x0a, 0x0f, 0x4e, 0x4f, 0x4e, 0x5f, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, 0x52, 0x41, 0x42, + 0x4c, 0x45, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, 0x52, 0x41, + 0x42, 0x4c, 0x45, 0x10, 0x01, 0x22, 0x44, 0x0a, 0x0d, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x44, 0x6f, + 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x33, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, + 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x45, + 0x72, 0x72, 0x6f, 0x72, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x42, 0xb1, 0x01, 0x0a, 0x11, + 0x63, 0x6f, 0x6d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, + 0x65, 0x42, 0x0b, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, + 0x5a, 0x3a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x6c, 0x79, + 0x74, 0x65, 0x6f, 0x72, 0x67, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x2f, 0x66, 0x6c, 0x79, 0x74, + 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x62, 0x2d, 0x67, 0x6f, 0x2f, 0x66, + 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, 0x65, 0xa2, 0x02, 0x03, 0x46, + 0x43, 0x58, 0xaa, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x43, 0x6f, + 0x72, 0x65, 0xca, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, + 0x72, 0x65, 0xe2, 0x02, 0x19, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, 0x43, 0x6f, + 0x72, 0x65, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, + 0x0e, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x3a, 0x3a, 0x43, 0x6f, 0x72, 0x65, 0x62, + 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -275,16 +280,18 @@ var file_flyteidl_core_errors_proto_goTypes = []interface{}{ (*ContainerError)(nil), // 1: flyteidl.core.ContainerError (*ErrorDocument)(nil), // 2: flyteidl.core.ErrorDocument (ExecutionError_ErrorKind)(0), // 3: flyteidl.core.ExecutionError.ErrorKind + (*timestamppb.Timestamp)(nil), // 4: google.protobuf.Timestamp } var file_flyteidl_core_errors_proto_depIdxs = []int32{ 0, // 0: flyteidl.core.ContainerError.kind:type_name -> flyteidl.core.ContainerError.Kind 3, // 1: flyteidl.core.ContainerError.origin:type_name -> flyteidl.core.ExecutionError.ErrorKind - 1, // 2: flyteidl.core.ErrorDocument.error:type_name -> flyteidl.core.ContainerError - 3, // [3:3] is the sub-list for method output_type - 3, // [3:3] is the sub-list for method input_type - 3, // [3:3] is the sub-list for extension type_name - 3, // [3:3] is the sub-list for extension extendee - 0, // [0:3] is the sub-list for field type_name + 4, // 2: flyteidl.core.ContainerError.timestamp:type_name -> google.protobuf.Timestamp + 1, // 3: flyteidl.core.ErrorDocument.error:type_name -> flyteidl.core.ContainerError + 4, // [4:4] is the sub-list for method output_type + 4, // [4:4] is the sub-list for method input_type + 4, // [4:4] is the sub-list for extension type_name + 4, // [4:4] is the sub-list for extension extendee + 0, // [0:4] is the sub-list for field type_name } func init() { file_flyteidl_core_errors_proto_init() } diff --git a/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go b/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go index ebe077194e..a17e94eba1 100644 --- a/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go +++ b/flyteidl/gen/pb-go/flyteidl/core/execution.pb.go @@ -10,6 +10,7 @@ import ( protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" durationpb "google.golang.org/protobuf/types/known/durationpb" + timestamppb "google.golang.org/protobuf/types/known/timestamppb" reflect "reflect" sync "sync" ) @@ -515,7 +516,7 @@ type ExecutionError struct { ErrorUri string `protobuf:"bytes,3,opt,name=error_uri,json=errorUri,proto3" json:"error_uri,omitempty"` Kind ExecutionError_ErrorKind `protobuf:"varint,4,opt,name=kind,proto3,enum=flyteidl.core.ExecutionError_ErrorKind" json:"kind,omitempty"` // Timestamp of the error - Timestamp int64 `protobuf:"varint,5,opt,name=timestamp,proto3" json:"timestamp,omitempty"` + Timestamp *timestamppb.Timestamp `protobuf:"bytes,5,opt,name=timestamp,proto3" json:"timestamp,omitempty"` // Worker that generated the error Worker string `protobuf:"bytes,6,opt,name=worker,proto3" json:"worker,omitempty"` } @@ -580,11 +581,11 @@ func (x *ExecutionError) GetKind() ExecutionError_ErrorKind { return ExecutionError_UNKNOWN } -func (x *ExecutionError) GetTimestamp() int64 { +func (x *ExecutionError) GetTimestamp() *timestamppb.Timestamp { if x != nil { return x.Timestamp } - return 0 + return nil } func (x *ExecutionError) GetWorker() string { @@ -821,105 +822,109 @@ var file_flyteidl_core_execution_proto_rawDesc = []byte{ 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0d, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x1a, 0x1e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, - 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xa7, - 0x01, 0x0a, 0x11, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x45, 0x78, 0x65, 0x63, 0x75, - 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x91, 0x01, 0x0a, 0x05, 0x50, 0x68, 0x61, 0x73, 0x65, 0x12, 0x0d, + 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x1f, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, + 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, + 0xa7, 0x01, 0x0a, 0x11, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x45, 0x78, 0x65, 0x63, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x91, 0x01, 0x0a, 0x05, 0x50, 0x68, 0x61, 0x73, 0x65, 0x12, + 0x0d, 0x0a, 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0a, + 0x0a, 0x06, 0x51, 0x55, 0x45, 0x55, 0x45, 0x44, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x52, 0x55, + 0x4e, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x55, 0x43, 0x43, 0x45, + 0x45, 0x44, 0x49, 0x4e, 0x47, 0x10, 0x03, 0x12, 0x0d, 0x0a, 0x09, 0x53, 0x55, 0x43, 0x43, 0x45, + 0x45, 0x44, 0x45, 0x44, 0x10, 0x04, 0x12, 0x0b, 0x0a, 0x07, 0x46, 0x41, 0x49, 0x4c, 0x49, 0x4e, + 0x47, 0x10, 0x05, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x06, 0x12, + 0x0b, 0x0a, 0x07, 0x41, 0x42, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, 0x07, 0x12, 0x0d, 0x0a, 0x09, + 0x54, 0x49, 0x4d, 0x45, 0x44, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x08, 0x12, 0x0c, 0x0a, 0x08, 0x41, + 0x42, 0x4f, 0x52, 0x54, 0x49, 0x4e, 0x47, 0x10, 0x09, 0x22, 0xb6, 0x01, 0x0a, 0x0d, 0x4e, 0x6f, + 0x64, 0x65, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0xa4, 0x01, 0x0a, 0x05, + 0x50, 0x68, 0x61, 0x73, 0x65, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, + 0x45, 0x44, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x51, 0x55, 0x45, 0x55, 0x45, 0x44, 0x10, 0x01, + 0x12, 0x0b, 0x0a, 0x07, 0x52, 0x55, 0x4e, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x0d, 0x0a, + 0x09, 0x53, 0x55, 0x43, 0x43, 0x45, 0x45, 0x44, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, + 0x46, 0x41, 0x49, 0x4c, 0x49, 0x4e, 0x47, 0x10, 0x04, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, + 0x4c, 0x45, 0x44, 0x10, 0x05, 0x12, 0x0b, 0x0a, 0x07, 0x41, 0x42, 0x4f, 0x52, 0x54, 0x45, 0x44, + 0x10, 0x06, 0x12, 0x0b, 0x0a, 0x07, 0x53, 0x4b, 0x49, 0x50, 0x50, 0x45, 0x44, 0x10, 0x07, 0x12, + 0x0d, 0x0a, 0x09, 0x54, 0x49, 0x4d, 0x45, 0x44, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x08, 0x12, 0x13, + 0x0a, 0x0f, 0x44, 0x59, 0x4e, 0x41, 0x4d, 0x49, 0x43, 0x5f, 0x52, 0x55, 0x4e, 0x4e, 0x49, 0x4e, + 0x47, 0x10, 0x09, 0x12, 0x0d, 0x0a, 0x09, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, 0x52, 0x45, 0x44, + 0x10, 0x0a, 0x22, 0x96, 0x01, 0x0a, 0x0d, 0x54, 0x61, 0x73, 0x6b, 0x45, 0x78, 0x65, 0x63, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x84, 0x01, 0x0a, 0x05, 0x50, 0x68, 0x61, 0x73, 0x65, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x51, 0x55, 0x45, 0x55, 0x45, 0x44, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x52, 0x55, 0x4e, - 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x55, 0x43, 0x43, 0x45, 0x45, - 0x44, 0x49, 0x4e, 0x47, 0x10, 0x03, 0x12, 0x0d, 0x0a, 0x09, 0x53, 0x55, 0x43, 0x43, 0x45, 0x45, - 0x44, 0x45, 0x44, 0x10, 0x04, 0x12, 0x0b, 0x0a, 0x07, 0x46, 0x41, 0x49, 0x4c, 0x49, 0x4e, 0x47, - 0x10, 0x05, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x06, 0x12, 0x0b, - 0x0a, 0x07, 0x41, 0x42, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, 0x07, 0x12, 0x0d, 0x0a, 0x09, 0x54, - 0x49, 0x4d, 0x45, 0x44, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x08, 0x12, 0x0c, 0x0a, 0x08, 0x41, 0x42, - 0x4f, 0x52, 0x54, 0x49, 0x4e, 0x47, 0x10, 0x09, 0x22, 0xb6, 0x01, 0x0a, 0x0d, 0x4e, 0x6f, 0x64, - 0x65, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0xa4, 0x01, 0x0a, 0x05, 0x50, - 0x68, 0x61, 0x73, 0x65, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, - 0x44, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x51, 0x55, 0x45, 0x55, 0x45, 0x44, 0x10, 0x01, 0x12, - 0x0b, 0x0a, 0x07, 0x52, 0x55, 0x4e, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x0d, 0x0a, 0x09, - 0x53, 0x55, 0x43, 0x43, 0x45, 0x45, 0x44, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x46, - 0x41, 0x49, 0x4c, 0x49, 0x4e, 0x47, 0x10, 0x04, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, - 0x45, 0x44, 0x10, 0x05, 0x12, 0x0b, 0x0a, 0x07, 0x41, 0x42, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, - 0x06, 0x12, 0x0b, 0x0a, 0x07, 0x53, 0x4b, 0x49, 0x50, 0x50, 0x45, 0x44, 0x10, 0x07, 0x12, 0x0d, - 0x0a, 0x09, 0x54, 0x49, 0x4d, 0x45, 0x44, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x08, 0x12, 0x13, 0x0a, - 0x0f, 0x44, 0x59, 0x4e, 0x41, 0x4d, 0x49, 0x43, 0x5f, 0x52, 0x55, 0x4e, 0x4e, 0x49, 0x4e, 0x47, - 0x10, 0x09, 0x12, 0x0d, 0x0a, 0x09, 0x52, 0x45, 0x43, 0x4f, 0x56, 0x45, 0x52, 0x45, 0x44, 0x10, - 0x0a, 0x22, 0x96, 0x01, 0x0a, 0x0d, 0x54, 0x61, 0x73, 0x6b, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, - 0x69, 0x6f, 0x6e, 0x22, 0x84, 0x01, 0x0a, 0x05, 0x50, 0x68, 0x61, 0x73, 0x65, 0x12, 0x0d, 0x0a, - 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, - 0x51, 0x55, 0x45, 0x55, 0x45, 0x44, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x52, 0x55, 0x4e, 0x4e, - 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x0d, 0x0a, 0x09, 0x53, 0x55, 0x43, 0x43, 0x45, 0x45, 0x44, - 0x45, 0x44, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x41, 0x42, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, - 0x04, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x05, 0x12, 0x10, 0x0a, - 0x0c, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x49, 0x4e, 0x47, 0x10, 0x06, 0x12, - 0x19, 0x0a, 0x15, 0x57, 0x41, 0x49, 0x54, 0x49, 0x4e, 0x47, 0x5f, 0x46, 0x4f, 0x52, 0x5f, 0x52, - 0x45, 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, 0x53, 0x10, 0x07, 0x22, 0xfe, 0x01, 0x0a, 0x0e, 0x45, - 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x12, 0x0a, - 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x6f, 0x64, - 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x65, - 0x72, 0x72, 0x6f, 0x72, 0x5f, 0x75, 0x72, 0x69, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, - 0x65, 0x72, 0x72, 0x6f, 0x72, 0x55, 0x72, 0x69, 0x12, 0x3b, 0x0a, 0x04, 0x6b, 0x69, 0x6e, 0x64, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x27, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, - 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x45, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x52, - 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, - 0x6d, 0x70, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, - 0x61, 0x6d, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x18, 0x06, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x22, 0x2e, 0x0a, 0x09, 0x45, - 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, - 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x55, 0x53, 0x45, 0x52, 0x10, 0x01, 0x12, - 0x0a, 0x0a, 0x06, 0x53, 0x59, 0x53, 0x54, 0x45, 0x4d, 0x10, 0x02, 0x22, 0xb2, 0x02, 0x0a, 0x07, - 0x54, 0x61, 0x73, 0x6b, 0x4c, 0x6f, 0x67, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, - 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4b, 0x0a, - 0x0e, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, - 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x54, 0x61, 0x73, 0x6b, 0x4c, 0x6f, 0x67, 0x2e, 0x4d, 0x65, - 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0d, 0x6d, 0x65, 0x73, - 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, 0x2b, 0x0a, 0x03, 0x74, 0x74, - 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, - 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x52, 0x03, 0x74, 0x74, 0x6c, 0x12, 0x2a, 0x0a, 0x10, 0x53, 0x68, 0x6f, 0x77, 0x57, - 0x68, 0x69, 0x6c, 0x65, 0x50, 0x65, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x08, 0x52, 0x10, 0x53, 0x68, 0x6f, 0x77, 0x57, 0x68, 0x69, 0x6c, 0x65, 0x50, 0x65, 0x6e, 0x64, - 0x69, 0x6e, 0x67, 0x12, 0x2a, 0x0a, 0x10, 0x48, 0x69, 0x64, 0x65, 0x4f, 0x6e, 0x63, 0x65, 0x46, - 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x48, - 0x69, 0x64, 0x65, 0x4f, 0x6e, 0x63, 0x65, 0x46, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, 0x64, 0x22, - 0x2f, 0x0a, 0x0d, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, - 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, - 0x03, 0x43, 0x53, 0x56, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x4a, 0x53, 0x4f, 0x4e, 0x10, 0x02, - 0x22, 0x5a, 0x0a, 0x14, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, - 0x76, 0x69, 0x63, 0x65, 0x53, 0x70, 0x65, 0x63, 0x12, 0x42, 0x0a, 0x0f, 0x71, 0x75, 0x65, 0x75, - 0x65, 0x69, 0x6e, 0x67, 0x5f, 0x62, 0x75, 0x64, 0x67, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0e, 0x71, 0x75, - 0x65, 0x75, 0x65, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x64, 0x67, 0x65, 0x74, 0x22, 0xce, 0x01, 0x0a, - 0x10, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, - 0x65, 0x12, 0x3a, 0x0a, 0x04, 0x74, 0x69, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x24, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, - 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, - 0x2e, 0x54, 0x69, 0x65, 0x72, 0x48, 0x00, 0x52, 0x04, 0x74, 0x69, 0x65, 0x72, 0x12, 0x39, 0x0a, - 0x04, 0x73, 0x70, 0x65, 0x63, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x66, 0x6c, - 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x51, 0x75, 0x61, 0x6c, - 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x70, 0x65, 0x63, - 0x48, 0x00, 0x52, 0x04, 0x73, 0x70, 0x65, 0x63, 0x22, 0x34, 0x0a, 0x04, 0x54, 0x69, 0x65, 0x72, - 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, - 0x08, 0x0a, 0x04, 0x48, 0x49, 0x47, 0x48, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x4d, 0x45, 0x44, - 0x49, 0x55, 0x4d, 0x10, 0x02, 0x12, 0x07, 0x0a, 0x03, 0x4c, 0x4f, 0x57, 0x10, 0x03, 0x42, 0x0d, - 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0xb4, 0x01, - 0x0a, 0x11, 0x63, 0x6f, 0x6d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, - 0x6f, 0x72, 0x65, 0x42, 0x0e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x50, 0x72, - 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x3a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, - 0x6d, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x6f, 0x72, 0x67, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, - 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x62, - 0x2d, 0x67, 0x6f, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, - 0x65, 0xa2, 0x02, 0x03, 0x46, 0x43, 0x58, 0xaa, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, - 0x64, 0x6c, 0x2e, 0x43, 0x6f, 0x72, 0x65, 0xca, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, - 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, 0x65, 0xe2, 0x02, 0x19, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, - 0x64, 0x6c, 0x5c, 0x43, 0x6f, 0x72, 0x65, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0xea, 0x02, 0x0e, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x3a, 0x3a, - 0x43, 0x6f, 0x72, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x0d, 0x0a, 0x09, 0x53, 0x55, 0x43, 0x43, 0x45, 0x45, + 0x44, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x41, 0x42, 0x4f, 0x52, 0x54, 0x45, 0x44, + 0x10, 0x04, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x05, 0x12, 0x10, + 0x0a, 0x0c, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x49, 0x4e, 0x47, 0x10, 0x06, + 0x12, 0x19, 0x0a, 0x15, 0x57, 0x41, 0x49, 0x54, 0x49, 0x4e, 0x47, 0x5f, 0x46, 0x4f, 0x52, 0x5f, + 0x52, 0x45, 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, 0x53, 0x10, 0x07, 0x22, 0x9a, 0x02, 0x0a, 0x0e, + 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x12, + 0x0a, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x6f, + 0x64, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x1b, 0x0a, 0x09, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x5f, 0x75, 0x72, 0x69, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x08, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x55, 0x72, 0x69, 0x12, 0x3b, 0x0a, 0x04, 0x6b, 0x69, 0x6e, + 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x27, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, + 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4b, 0x69, 0x6e, 0x64, + 0x52, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x12, 0x38, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, + 0x61, 0x6d, 0x70, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, + 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, + 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, + 0x12, 0x16, 0x0a, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x06, 0x77, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x22, 0x2e, 0x0a, 0x09, 0x45, 0x72, 0x72, 0x6f, + 0x72, 0x4b, 0x69, 0x6e, 0x64, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, + 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x55, 0x53, 0x45, 0x52, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, + 0x53, 0x59, 0x53, 0x54, 0x45, 0x4d, 0x10, 0x02, 0x22, 0xb2, 0x02, 0x0a, 0x07, 0x54, 0x61, 0x73, + 0x6b, 0x4c, 0x6f, 0x67, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4b, 0x0a, 0x0e, 0x6d, 0x65, + 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, + 0x72, 0x65, 0x2e, 0x54, 0x61, 0x73, 0x6b, 0x4c, 0x6f, 0x67, 0x2e, 0x4d, 0x65, 0x73, 0x73, 0x61, + 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0d, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, + 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, 0x2b, 0x0a, 0x03, 0x74, 0x74, 0x6c, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, + 0x03, 0x74, 0x74, 0x6c, 0x12, 0x2a, 0x0a, 0x10, 0x53, 0x68, 0x6f, 0x77, 0x57, 0x68, 0x69, 0x6c, + 0x65, 0x50, 0x65, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, + 0x53, 0x68, 0x6f, 0x77, 0x57, 0x68, 0x69, 0x6c, 0x65, 0x50, 0x65, 0x6e, 0x64, 0x69, 0x6e, 0x67, + 0x12, 0x2a, 0x0a, 0x10, 0x48, 0x69, 0x64, 0x65, 0x4f, 0x6e, 0x63, 0x65, 0x46, 0x69, 0x6e, 0x69, + 0x73, 0x68, 0x65, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x48, 0x69, 0x64, 0x65, + 0x4f, 0x6e, 0x63, 0x65, 0x46, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, 0x64, 0x22, 0x2f, 0x0a, 0x0d, + 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, 0x0b, 0x0a, + 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x43, 0x53, + 0x56, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x4a, 0x53, 0x4f, 0x4e, 0x10, 0x02, 0x22, 0x5a, 0x0a, + 0x14, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, + 0x65, 0x53, 0x70, 0x65, 0x63, 0x12, 0x42, 0x0a, 0x0f, 0x71, 0x75, 0x65, 0x75, 0x65, 0x69, 0x6e, + 0x67, 0x5f, 0x62, 0x75, 0x64, 0x67, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, + 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, + 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0e, 0x71, 0x75, 0x65, 0x75, 0x65, + 0x69, 0x6e, 0x67, 0x42, 0x75, 0x64, 0x67, 0x65, 0x74, 0x22, 0xce, 0x01, 0x0a, 0x10, 0x51, 0x75, + 0x61, 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x3a, + 0x0a, 0x04, 0x74, 0x69, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x66, + 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x51, 0x75, 0x61, + 0x6c, 0x69, 0x74, 0x79, 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x54, 0x69, + 0x65, 0x72, 0x48, 0x00, 0x52, 0x04, 0x74, 0x69, 0x65, 0x72, 0x12, 0x39, 0x0a, 0x04, 0x73, 0x70, + 0x65, 0x63, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, + 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x51, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, + 0x4f, 0x66, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x70, 0x65, 0x63, 0x48, 0x00, 0x52, + 0x04, 0x73, 0x70, 0x65, 0x63, 0x22, 0x34, 0x0a, 0x04, 0x54, 0x69, 0x65, 0x72, 0x12, 0x0d, 0x0a, + 0x09, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, + 0x48, 0x49, 0x47, 0x48, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x4d, 0x45, 0x44, 0x49, 0x55, 0x4d, + 0x10, 0x02, 0x12, 0x07, 0x0a, 0x03, 0x4c, 0x4f, 0x57, 0x10, 0x03, 0x42, 0x0d, 0x0a, 0x0b, 0x64, + 0x65, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0xb4, 0x01, 0x0a, 0x11, 0x63, + 0x6f, 0x6d, 0x2e, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, 0x63, 0x6f, 0x72, 0x65, + 0x42, 0x0e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x50, 0x72, 0x6f, 0x74, 0x6f, + 0x50, 0x01, 0x5a, 0x3a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, + 0x6c, 0x79, 0x74, 0x65, 0x6f, 0x72, 0x67, 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x2f, 0x66, 0x6c, + 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x62, 0x2d, 0x67, 0x6f, + 0x2f, 0x66, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2f, 0x63, 0x6f, 0x72, 0x65, 0xa2, 0x02, + 0x03, 0x46, 0x43, 0x58, 0xaa, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x2e, + 0x43, 0x6f, 0x72, 0x65, 0xca, 0x02, 0x0d, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, + 0x43, 0x6f, 0x72, 0x65, 0xe2, 0x02, 0x19, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x5c, + 0x43, 0x6f, 0x72, 0x65, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0xea, 0x02, 0x0e, 0x46, 0x6c, 0x79, 0x74, 0x65, 0x69, 0x64, 0x6c, 0x3a, 0x3a, 0x43, 0x6f, 0x72, + 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -950,20 +955,22 @@ var file_flyteidl_core_execution_proto_goTypes = []interface{}{ (*TaskLog)(nil), // 10: flyteidl.core.TaskLog (*QualityOfServiceSpec)(nil), // 11: flyteidl.core.QualityOfServiceSpec (*QualityOfService)(nil), // 12: flyteidl.core.QualityOfService - (*durationpb.Duration)(nil), // 13: google.protobuf.Duration + (*timestamppb.Timestamp)(nil), // 13: google.protobuf.Timestamp + (*durationpb.Duration)(nil), // 14: google.protobuf.Duration } var file_flyteidl_core_execution_proto_depIdxs = []int32{ 3, // 0: flyteidl.core.ExecutionError.kind:type_name -> flyteidl.core.ExecutionError.ErrorKind - 4, // 1: flyteidl.core.TaskLog.message_format:type_name -> flyteidl.core.TaskLog.MessageFormat - 13, // 2: flyteidl.core.TaskLog.ttl:type_name -> google.protobuf.Duration - 13, // 3: flyteidl.core.QualityOfServiceSpec.queueing_budget:type_name -> google.protobuf.Duration - 5, // 4: flyteidl.core.QualityOfService.tier:type_name -> flyteidl.core.QualityOfService.Tier - 11, // 5: flyteidl.core.QualityOfService.spec:type_name -> flyteidl.core.QualityOfServiceSpec - 6, // [6:6] is the sub-list for method output_type - 6, // [6:6] is the sub-list for method input_type - 6, // [6:6] is the sub-list for extension type_name - 6, // [6:6] is the sub-list for extension extendee - 0, // [0:6] is the sub-list for field type_name + 13, // 1: flyteidl.core.ExecutionError.timestamp:type_name -> google.protobuf.Timestamp + 4, // 2: flyteidl.core.TaskLog.message_format:type_name -> flyteidl.core.TaskLog.MessageFormat + 14, // 3: flyteidl.core.TaskLog.ttl:type_name -> google.protobuf.Duration + 14, // 4: flyteidl.core.QualityOfServiceSpec.queueing_budget:type_name -> google.protobuf.Duration + 5, // 5: flyteidl.core.QualityOfService.tier:type_name -> flyteidl.core.QualityOfService.Tier + 11, // 6: flyteidl.core.QualityOfService.spec:type_name -> flyteidl.core.QualityOfServiceSpec + 7, // [7:7] is the sub-list for method output_type + 7, // [7:7] is the sub-list for method input_type + 7, // [7:7] is the sub-list for extension type_name + 7, // [7:7] is the sub-list for extension extendee + 0, // [0:7] is the sub-list for field type_name } func init() { file_flyteidl_core_execution_proto_init() } diff --git a/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json b/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json index bc33080a5f..c4f6f3ef7f 100644 --- a/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json +++ b/flyteidl/gen/pb-go/gateway/flyteidl/service/admin.swagger.json @@ -7156,7 +7156,7 @@ }, "timestamp": { "type": "string", - "format": "int64", + "format": "date-time", "title": "Timestamp of the error" }, "worker": { diff --git a/flyteidl/gen/pb-js/flyteidl.d.ts b/flyteidl/gen/pb-js/flyteidl.d.ts index ee68f70557..73b5a73eaa 100644 --- a/flyteidl/gen/pb-js/flyteidl.d.ts +++ b/flyteidl/gen/pb-js/flyteidl.d.ts @@ -5750,7 +5750,7 @@ export namespace flyteidl { kind?: (flyteidl.core.ExecutionError.ErrorKind|null); /** ExecutionError timestamp */ - timestamp?: (Long|null); + timestamp?: (google.protobuf.ITimestamp|null); /** ExecutionError worker */ worker?: (string|null); @@ -5778,7 +5778,7 @@ export namespace flyteidl { public kind: flyteidl.core.ExecutionError.ErrorKind; /** ExecutionError timestamp. */ - public timestamp: Long; + public timestamp?: (google.protobuf.ITimestamp|null); /** ExecutionError worker. */ public worker: string; @@ -7569,7 +7569,7 @@ export namespace flyteidl { origin?: (flyteidl.core.ExecutionError.ErrorKind|null); /** ContainerError timestamp */ - timestamp?: (Long|null); + timestamp?: (google.protobuf.ITimestamp|null); /** ContainerError worker */ worker?: (string|null); @@ -7597,7 +7597,7 @@ export namespace flyteidl { public origin: flyteidl.core.ExecutionError.ErrorKind; /** ContainerError timestamp. */ - public timestamp: Long; + public timestamp?: (google.protobuf.ITimestamp|null); /** ContainerError worker. */ public worker: string; diff --git a/flyteidl/gen/pb-js/flyteidl.js b/flyteidl/gen/pb-js/flyteidl.js index 58689003b1..970a69229c 100644 --- a/flyteidl/gen/pb-js/flyteidl.js +++ b/flyteidl/gen/pb-js/flyteidl.js @@ -13823,7 +13823,7 @@ * @property {string|null} [message] ExecutionError message * @property {string|null} [errorUri] ExecutionError errorUri * @property {flyteidl.core.ExecutionError.ErrorKind|null} [kind] ExecutionError kind - * @property {Long|null} [timestamp] ExecutionError timestamp + * @property {google.protobuf.ITimestamp|null} [timestamp] ExecutionError timestamp * @property {string|null} [worker] ExecutionError worker */ @@ -13876,11 +13876,11 @@ /** * ExecutionError timestamp. - * @member {Long} timestamp + * @member {google.protobuf.ITimestamp|null|undefined} timestamp * @memberof flyteidl.core.ExecutionError * @instance */ - ExecutionError.prototype.timestamp = $util.Long ? $util.Long.fromBits(0,0,false) : 0; + ExecutionError.prototype.timestamp = null; /** * ExecutionError worker. @@ -13923,7 +13923,7 @@ if (message.kind != null && message.hasOwnProperty("kind")) writer.uint32(/* id 4, wireType 0 =*/32).int32(message.kind); if (message.timestamp != null && message.hasOwnProperty("timestamp")) - writer.uint32(/* id 5, wireType 0 =*/40).int64(message.timestamp); + $root.google.protobuf.Timestamp.encode(message.timestamp, writer.uint32(/* id 5, wireType 2 =*/42).fork()).ldelim(); if (message.worker != null && message.hasOwnProperty("worker")) writer.uint32(/* id 6, wireType 2 =*/50).string(message.worker); return writer; @@ -13960,7 +13960,7 @@ message.kind = reader.int32(); break; case 5: - message.timestamp = reader.int64(); + message.timestamp = $root.google.protobuf.Timestamp.decode(reader, reader.uint32()); break; case 6: message.worker = reader.string(); @@ -14002,9 +14002,11 @@ case 2: break; } - if (message.timestamp != null && message.hasOwnProperty("timestamp")) - if (!$util.isInteger(message.timestamp) && !(message.timestamp && $util.isInteger(message.timestamp.low) && $util.isInteger(message.timestamp.high))) - return "timestamp: integer|Long expected"; + if (message.timestamp != null && message.hasOwnProperty("timestamp")) { + var error = $root.google.protobuf.Timestamp.verify(message.timestamp); + if (error) + return "timestamp." + error; + } if (message.worker != null && message.hasOwnProperty("worker")) if (!$util.isString(message.worker)) return "worker: string expected"; @@ -18302,7 +18304,7 @@ * @property {string|null} [message] ContainerError message * @property {flyteidl.core.ContainerError.Kind|null} [kind] ContainerError kind * @property {flyteidl.core.ExecutionError.ErrorKind|null} [origin] ContainerError origin - * @property {Long|null} [timestamp] ContainerError timestamp + * @property {google.protobuf.ITimestamp|null} [timestamp] ContainerError timestamp * @property {string|null} [worker] ContainerError worker */ @@ -18355,11 +18357,11 @@ /** * ContainerError timestamp. - * @member {Long} timestamp + * @member {google.protobuf.ITimestamp|null|undefined} timestamp * @memberof flyteidl.core.ContainerError * @instance */ - ContainerError.prototype.timestamp = $util.Long ? $util.Long.fromBits(0,0,false) : 0; + ContainerError.prototype.timestamp = null; /** * ContainerError worker. @@ -18402,7 +18404,7 @@ if (message.origin != null && message.hasOwnProperty("origin")) writer.uint32(/* id 4, wireType 0 =*/32).int32(message.origin); if (message.timestamp != null && message.hasOwnProperty("timestamp")) - writer.uint32(/* id 5, wireType 0 =*/40).int64(message.timestamp); + $root.google.protobuf.Timestamp.encode(message.timestamp, writer.uint32(/* id 5, wireType 2 =*/42).fork()).ldelim(); if (message.worker != null && message.hasOwnProperty("worker")) writer.uint32(/* id 6, wireType 2 =*/50).string(message.worker); return writer; @@ -18439,7 +18441,7 @@ message.origin = reader.int32(); break; case 5: - message.timestamp = reader.int64(); + message.timestamp = $root.google.protobuf.Timestamp.decode(reader, reader.uint32()); break; case 6: message.worker = reader.string(); @@ -18486,9 +18488,11 @@ case 2: break; } - if (message.timestamp != null && message.hasOwnProperty("timestamp")) - if (!$util.isInteger(message.timestamp) && !(message.timestamp && $util.isInteger(message.timestamp.low) && $util.isInteger(message.timestamp.high))) - return "timestamp: integer|Long expected"; + if (message.timestamp != null && message.hasOwnProperty("timestamp")) { + var error = $root.google.protobuf.Timestamp.verify(message.timestamp); + if (error) + return "timestamp." + error; + } if (message.worker != null && message.hasOwnProperty("worker")) if (!$util.isString(message.worker)) return "worker: string expected"; diff --git a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py index 1ea22c214a..fe1be689e4 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py +++ b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.py @@ -12,9 +12,10 @@ from flyteidl.core import execution_pb2 as flyteidl_dot_core_dot_execution__pb2 +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1a\x66lyteidl/core/errors.proto\x12\rflyteidl.core\x1a\x1d\x66lyteidl/core/execution.proto\"\x9b\x02\n\x0e\x43ontainerError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x36\n\x04kind\x18\x03 \x01(\x0e\x32\".flyteidl.core.ContainerError.KindR\x04kind\x12?\n\x06origin\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x06origin\x12\x1c\n\ttimestamp\x18\x05 \x01(\x03R\ttimestamp\x12\x16\n\x06worker\x18\x06 \x01(\tR\x06worker\",\n\x04Kind\x12\x13\n\x0fNON_RECOVERABLE\x10\x00\x12\x0f\n\x0bRECOVERABLE\x10\x01\"D\n\rErrorDocument\x12\x33\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x1d.flyteidl.core.ContainerErrorR\x05\x65rrorB\xb1\x01\n\x11\x63om.flyteidl.coreB\x0b\x45rrorsProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1a\x66lyteidl/core/errors.proto\x12\rflyteidl.core\x1a\x1d\x66lyteidl/core/execution.proto\x1a\x1fgoogle/protobuf/timestamp.proto\"\xb7\x02\n\x0e\x43ontainerError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x36\n\x04kind\x18\x03 \x01(\x0e\x32\".flyteidl.core.ContainerError.KindR\x04kind\x12?\n\x06origin\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x06origin\x12\x38\n\ttimestamp\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\ttimestamp\x12\x16\n\x06worker\x18\x06 \x01(\tR\x06worker\",\n\x04Kind\x12\x13\n\x0fNON_RECOVERABLE\x10\x00\x12\x0f\n\x0bRECOVERABLE\x10\x01\"D\n\rErrorDocument\x12\x33\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x1d.flyteidl.core.ContainerErrorR\x05\x65rrorB\xb1\x01\n\x11\x63om.flyteidl.coreB\x0b\x45rrorsProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -23,10 +24,10 @@ DESCRIPTOR._options = None DESCRIPTOR._serialized_options = b'\n\021com.flyteidl.coreB\013ErrorsProtoP\001Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\242\002\003FCX\252\002\rFlyteidl.Core\312\002\rFlyteidl\\Core\342\002\031Flyteidl\\Core\\GPBMetadata\352\002\016Flyteidl::Core' - _globals['_CONTAINERERROR']._serialized_start=77 - _globals['_CONTAINERERROR']._serialized_end=360 - _globals['_CONTAINERERROR_KIND']._serialized_start=316 - _globals['_CONTAINERERROR_KIND']._serialized_end=360 - _globals['_ERRORDOCUMENT']._serialized_start=362 - _globals['_ERRORDOCUMENT']._serialized_end=430 + _globals['_CONTAINERERROR']._serialized_start=110 + _globals['_CONTAINERERROR']._serialized_end=421 + _globals['_CONTAINERERROR_KIND']._serialized_start=377 + _globals['_CONTAINERERROR_KIND']._serialized_end=421 + _globals['_ERRORDOCUMENT']._serialized_start=423 + _globals['_ERRORDOCUMENT']._serialized_end=491 # @@protoc_insertion_point(module_scope) diff --git a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi index 707ad7e3cb..c0566c73ad 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi +++ b/flyteidl/gen/pb_python/flyteidl/core/errors_pb2.pyi @@ -1,4 +1,5 @@ from flyteidl.core import execution_pb2 as _execution_pb2 +from google.protobuf import timestamp_pb2 as _timestamp_pb2 from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message @@ -24,9 +25,9 @@ class ContainerError(_message.Message): message: str kind: ContainerError.Kind origin: _execution_pb2.ExecutionError.ErrorKind - timestamp: int + timestamp: _timestamp_pb2.Timestamp worker: str - def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., kind: _Optional[_Union[ContainerError.Kind, str]] = ..., origin: _Optional[_Union[_execution_pb2.ExecutionError.ErrorKind, str]] = ..., timestamp: _Optional[int] = ..., worker: _Optional[str] = ...) -> None: ... + def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., kind: _Optional[_Union[ContainerError.Kind, str]] = ..., origin: _Optional[_Union[_execution_pb2.ExecutionError.ErrorKind, str]] = ..., timestamp: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ..., worker: _Optional[str] = ...) -> None: ... class ErrorDocument(_message.Message): __slots__ = ["error"] diff --git a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py index 7b1d94c774..2ff8f47010 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py +++ b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.py @@ -12,9 +12,10 @@ from google.protobuf import duration_pb2 as google_dot_protobuf_dot_duration__pb2 +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x66lyteidl/core/execution.proto\x12\rflyteidl.core\x1a\x1egoogle/protobuf/duration.proto\"\xa7\x01\n\x11WorkflowExecution\"\x91\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\x0e\n\nSUCCEEDING\x10\x03\x12\r\n\tSUCCEEDED\x10\x04\x12\x0b\n\x07\x46\x41ILING\x10\x05\x12\n\n\x06\x46\x41ILED\x10\x06\x12\x0b\n\x07\x41\x42ORTED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x0c\n\x08\x41\x42ORTING\x10\t\"\xb6\x01\n\rNodeExecution\"\xa4\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x46\x41ILING\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x0b\n\x07\x41\x42ORTED\x10\x06\x12\x0b\n\x07SKIPPED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x13\n\x0f\x44YNAMIC_RUNNING\x10\t\x12\r\n\tRECOVERED\x10\n\"\x96\x01\n\rTaskExecution\"\x84\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x41\x42ORTED\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x10\n\x0cINITIALIZING\x10\x06\x12\x19\n\x15WAITING_FOR_RESOURCES\x10\x07\"\xfe\x01\n\x0e\x45xecutionError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1b\n\terror_uri\x18\x03 \x01(\tR\x08\x65rrorUri\x12;\n\x04kind\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x04kind\x12\x1c\n\ttimestamp\x18\x05 \x01(\x03R\ttimestamp\x12\x16\n\x06worker\x18\x06 \x01(\tR\x06worker\".\n\tErrorKind\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x08\n\x04USER\x10\x01\x12\n\n\x06SYSTEM\x10\x02\"\xb2\x02\n\x07TaskLog\x12\x10\n\x03uri\x18\x01 \x01(\tR\x03uri\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12K\n\x0emessage_format\x18\x03 \x01(\x0e\x32$.flyteidl.core.TaskLog.MessageFormatR\rmessageFormat\x12+\n\x03ttl\x18\x04 \x01(\x0b\x32\x19.google.protobuf.DurationR\x03ttl\x12*\n\x10ShowWhilePending\x18\x05 \x01(\x08R\x10ShowWhilePending\x12*\n\x10HideOnceFinished\x18\x06 \x01(\x08R\x10HideOnceFinished\"/\n\rMessageFormat\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x07\n\x03\x43SV\x10\x01\x12\x08\n\x04JSON\x10\x02\"Z\n\x14QualityOfServiceSpec\x12\x42\n\x0fqueueing_budget\x18\x01 \x01(\x0b\x32\x19.google.protobuf.DurationR\x0equeueingBudget\"\xce\x01\n\x10QualityOfService\x12:\n\x04tier\x18\x01 \x01(\x0e\x32$.flyteidl.core.QualityOfService.TierH\x00R\x04tier\x12\x39\n\x04spec\x18\x02 \x01(\x0b\x32#.flyteidl.core.QualityOfServiceSpecH\x00R\x04spec\"4\n\x04Tier\x12\r\n\tUNDEFINED\x10\x00\x12\x08\n\x04HIGH\x10\x01\x12\n\n\x06MEDIUM\x10\x02\x12\x07\n\x03LOW\x10\x03\x42\r\n\x0b\x64\x65signationB\xb4\x01\n\x11\x63om.flyteidl.coreB\x0e\x45xecutionProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x66lyteidl/core/execution.proto\x12\rflyteidl.core\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\"\xa7\x01\n\x11WorkflowExecution\"\x91\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\x0e\n\nSUCCEEDING\x10\x03\x12\r\n\tSUCCEEDED\x10\x04\x12\x0b\n\x07\x46\x41ILING\x10\x05\x12\n\n\x06\x46\x41ILED\x10\x06\x12\x0b\n\x07\x41\x42ORTED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x0c\n\x08\x41\x42ORTING\x10\t\"\xb6\x01\n\rNodeExecution\"\xa4\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x46\x41ILING\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x0b\n\x07\x41\x42ORTED\x10\x06\x12\x0b\n\x07SKIPPED\x10\x07\x12\r\n\tTIMED_OUT\x10\x08\x12\x13\n\x0f\x44YNAMIC_RUNNING\x10\t\x12\r\n\tRECOVERED\x10\n\"\x96\x01\n\rTaskExecution\"\x84\x01\n\x05Phase\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06QUEUED\x10\x01\x12\x0b\n\x07RUNNING\x10\x02\x12\r\n\tSUCCEEDED\x10\x03\x12\x0b\n\x07\x41\x42ORTED\x10\x04\x12\n\n\x06\x46\x41ILED\x10\x05\x12\x10\n\x0cINITIALIZING\x10\x06\x12\x19\n\x15WAITING_FOR_RESOURCES\x10\x07\"\x9a\x02\n\x0e\x45xecutionError\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1b\n\terror_uri\x18\x03 \x01(\tR\x08\x65rrorUri\x12;\n\x04kind\x18\x04 \x01(\x0e\x32\'.flyteidl.core.ExecutionError.ErrorKindR\x04kind\x12\x38\n\ttimestamp\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\ttimestamp\x12\x16\n\x06worker\x18\x06 \x01(\tR\x06worker\".\n\tErrorKind\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x08\n\x04USER\x10\x01\x12\n\n\x06SYSTEM\x10\x02\"\xb2\x02\n\x07TaskLog\x12\x10\n\x03uri\x18\x01 \x01(\tR\x03uri\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12K\n\x0emessage_format\x18\x03 \x01(\x0e\x32$.flyteidl.core.TaskLog.MessageFormatR\rmessageFormat\x12+\n\x03ttl\x18\x04 \x01(\x0b\x32\x19.google.protobuf.DurationR\x03ttl\x12*\n\x10ShowWhilePending\x18\x05 \x01(\x08R\x10ShowWhilePending\x12*\n\x10HideOnceFinished\x18\x06 \x01(\x08R\x10HideOnceFinished\"/\n\rMessageFormat\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x07\n\x03\x43SV\x10\x01\x12\x08\n\x04JSON\x10\x02\"Z\n\x14QualityOfServiceSpec\x12\x42\n\x0fqueueing_budget\x18\x01 \x01(\x0b\x32\x19.google.protobuf.DurationR\x0equeueingBudget\"\xce\x01\n\x10QualityOfService\x12:\n\x04tier\x18\x01 \x01(\x0e\x32$.flyteidl.core.QualityOfService.TierH\x00R\x04tier\x12\x39\n\x04spec\x18\x02 \x01(\x0b\x32#.flyteidl.core.QualityOfServiceSpecH\x00R\x04spec\"4\n\x04Tier\x12\r\n\tUNDEFINED\x10\x00\x12\x08\n\x04HIGH\x10\x01\x12\n\n\x06MEDIUM\x10\x02\x12\x07\n\x03LOW\x10\x03\x42\r\n\x0b\x64\x65signationB\xb4\x01\n\x11\x63om.flyteidl.coreB\x0e\x45xecutionProtoP\x01Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\xa2\x02\x03\x46\x43X\xaa\x02\rFlyteidl.Core\xca\x02\rFlyteidl\\Core\xe2\x02\x19\x46lyteidl\\Core\\GPBMetadata\xea\x02\x0e\x46lyteidl::Coreb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -23,30 +24,30 @@ DESCRIPTOR._options = None DESCRIPTOR._serialized_options = b'\n\021com.flyteidl.coreB\016ExecutionProtoP\001Z:github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core\242\002\003FCX\252\002\rFlyteidl.Core\312\002\rFlyteidl\\Core\342\002\031Flyteidl\\Core\\GPBMetadata\352\002\016Flyteidl::Core' - _globals['_WORKFLOWEXECUTION']._serialized_start=81 - _globals['_WORKFLOWEXECUTION']._serialized_end=248 - _globals['_WORKFLOWEXECUTION_PHASE']._serialized_start=103 - _globals['_WORKFLOWEXECUTION_PHASE']._serialized_end=248 - _globals['_NODEEXECUTION']._serialized_start=251 - _globals['_NODEEXECUTION']._serialized_end=433 - _globals['_NODEEXECUTION_PHASE']._serialized_start=269 - _globals['_NODEEXECUTION_PHASE']._serialized_end=433 - _globals['_TASKEXECUTION']._serialized_start=436 - _globals['_TASKEXECUTION']._serialized_end=586 - _globals['_TASKEXECUTION_PHASE']._serialized_start=454 - _globals['_TASKEXECUTION_PHASE']._serialized_end=586 - _globals['_EXECUTIONERROR']._serialized_start=589 - _globals['_EXECUTIONERROR']._serialized_end=843 - _globals['_EXECUTIONERROR_ERRORKIND']._serialized_start=797 - _globals['_EXECUTIONERROR_ERRORKIND']._serialized_end=843 - _globals['_TASKLOG']._serialized_start=846 - _globals['_TASKLOG']._serialized_end=1152 - _globals['_TASKLOG_MESSAGEFORMAT']._serialized_start=1105 - _globals['_TASKLOG_MESSAGEFORMAT']._serialized_end=1152 - _globals['_QUALITYOFSERVICESPEC']._serialized_start=1154 - _globals['_QUALITYOFSERVICESPEC']._serialized_end=1244 - _globals['_QUALITYOFSERVICE']._serialized_start=1247 - _globals['_QUALITYOFSERVICE']._serialized_end=1453 - _globals['_QUALITYOFSERVICE_TIER']._serialized_start=1386 - _globals['_QUALITYOFSERVICE_TIER']._serialized_end=1438 + _globals['_WORKFLOWEXECUTION']._serialized_start=114 + _globals['_WORKFLOWEXECUTION']._serialized_end=281 + _globals['_WORKFLOWEXECUTION_PHASE']._serialized_start=136 + _globals['_WORKFLOWEXECUTION_PHASE']._serialized_end=281 + _globals['_NODEEXECUTION']._serialized_start=284 + _globals['_NODEEXECUTION']._serialized_end=466 + _globals['_NODEEXECUTION_PHASE']._serialized_start=302 + _globals['_NODEEXECUTION_PHASE']._serialized_end=466 + _globals['_TASKEXECUTION']._serialized_start=469 + _globals['_TASKEXECUTION']._serialized_end=619 + _globals['_TASKEXECUTION_PHASE']._serialized_start=487 + _globals['_TASKEXECUTION_PHASE']._serialized_end=619 + _globals['_EXECUTIONERROR']._serialized_start=622 + _globals['_EXECUTIONERROR']._serialized_end=904 + _globals['_EXECUTIONERROR_ERRORKIND']._serialized_start=858 + _globals['_EXECUTIONERROR_ERRORKIND']._serialized_end=904 + _globals['_TASKLOG']._serialized_start=907 + _globals['_TASKLOG']._serialized_end=1213 + _globals['_TASKLOG_MESSAGEFORMAT']._serialized_start=1166 + _globals['_TASKLOG_MESSAGEFORMAT']._serialized_end=1213 + _globals['_QUALITYOFSERVICESPEC']._serialized_start=1215 + _globals['_QUALITYOFSERVICESPEC']._serialized_end=1305 + _globals['_QUALITYOFSERVICE']._serialized_start=1308 + _globals['_QUALITYOFSERVICE']._serialized_end=1514 + _globals['_QUALITYOFSERVICE_TIER']._serialized_start=1447 + _globals['_QUALITYOFSERVICE_TIER']._serialized_end=1499 # @@protoc_insertion_point(module_scope) diff --git a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi index ed246353cf..08f1937c08 100644 --- a/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi +++ b/flyteidl/gen/pb_python/flyteidl/core/execution_pb2.pyi @@ -1,4 +1,5 @@ from google.protobuf import duration_pb2 as _duration_pb2 +from google.protobuf import timestamp_pb2 as _timestamp_pb2 from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message @@ -102,9 +103,9 @@ class ExecutionError(_message.Message): message: str error_uri: str kind: ExecutionError.ErrorKind - timestamp: int + timestamp: _timestamp_pb2.Timestamp worker: str - def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., error_uri: _Optional[str] = ..., kind: _Optional[_Union[ExecutionError.ErrorKind, str]] = ..., timestamp: _Optional[int] = ..., worker: _Optional[str] = ...) -> None: ... + def __init__(self, code: _Optional[str] = ..., message: _Optional[str] = ..., error_uri: _Optional[str] = ..., kind: _Optional[_Union[ExecutionError.ErrorKind, str]] = ..., timestamp: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ..., worker: _Optional[str] = ...) -> None: ... class TaskLog(_message.Message): __slots__ = ["uri", "name", "message_format", "ttl", "ShowWhilePending", "HideOnceFinished"] diff --git a/flyteidl/gen/pb_rust/flyteidl.core.rs b/flyteidl/gen/pb_rust/flyteidl.core.rs index ab2bcdfb9d..a97a209a47 100644 --- a/flyteidl/gen/pb_rust/flyteidl.core.rs +++ b/flyteidl/gen/pb_rust/flyteidl.core.rs @@ -2130,8 +2130,8 @@ pub struct ExecutionError { #[prost(enumeration="execution_error::ErrorKind", tag="4")] pub kind: i32, /// Timestamp of the error - #[prost(int64, tag="5")] - pub timestamp: i64, + #[prost(message, optional, tag="5")] + pub timestamp: ::core::option::Option<::prost_types::Timestamp>, /// Worker that generated the error #[prost(string, tag="6")] pub worker: ::prost::alloc::string::String, @@ -3098,8 +3098,8 @@ pub struct ContainerError { #[prost(enumeration="execution_error::ErrorKind", tag="4")] pub origin: i32, /// Timestamp of the error - #[prost(int64, tag="5")] - pub timestamp: i64, + #[prost(message, optional, tag="5")] + pub timestamp: ::core::option::Option<::prost_types::Timestamp>, /// Worker that generated the error #[prost(string, tag="6")] pub worker: ::prost::alloc::string::String, diff --git a/flyteidl/protos/flyteidl/core/errors.proto b/flyteidl/protos/flyteidl/core/errors.proto index c0c9cadc2a..71ecd1de84 100644 --- a/flyteidl/protos/flyteidl/core/errors.proto +++ b/flyteidl/protos/flyteidl/core/errors.proto @@ -5,6 +5,7 @@ package flyteidl.core; option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core"; import "flyteidl/core/execution.proto"; +import "google/protobuf/timestamp.proto"; // Error message to propagate detailed errors from container executions to the execution // engine. @@ -27,7 +28,7 @@ message ContainerError { ExecutionError.ErrorKind origin = 4; // Timestamp of the error - int64 timestamp = 5; + google.protobuf.Timestamp timestamp = 5; // Worker that generated the error string worker = 6; diff --git a/flyteidl/protos/flyteidl/core/execution.proto b/flyteidl/protos/flyteidl/core/execution.proto index c92fcbfba8..3b9bfbbbb7 100644 --- a/flyteidl/protos/flyteidl/core/execution.proto +++ b/flyteidl/protos/flyteidl/core/execution.proto @@ -5,6 +5,7 @@ package flyteidl.core; option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core"; import "google/protobuf/duration.proto"; +import "google/protobuf/timestamp.proto"; // Indicates various phases of Workflow Execution message WorkflowExecution { @@ -74,7 +75,7 @@ message ExecutionError { } ErrorKind kind = 4; // Timestamp of the error - int64 timestamp = 5; + google.protobuf.Timestamp timestamp = 5; // Worker that generated the error string worker = 6; } diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index d25bfd7b3d..daa9585623 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -3,9 +3,9 @@ package ioutils import ( "context" "fmt" - "math" "path/filepath" "strings" + "time" "github.com/pkg/errors" @@ -151,7 +151,7 @@ func (e *EarliestFileErrorRetriever) HasError(ctx context.Context) (bool, error) } func (e *EarliestFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionError, error) { - var earliestTimestamp int64 = math.MaxInt64 + var earliestTimestamp time.Time = time.Now() earliestExecutionError := io.ExecutionError{} const maxItems = 1000 cursor := storage.NewCursorAtStart() @@ -171,8 +171,8 @@ func (e *EarliestFileErrorRetriever) GetError(ctx context.Context) (io.Execution if err != nil { return io.ExecutionError{}, errors.Wrapf(err, "failed to read error file @[%s]", errorFilePath.String()) } - timestamp := errorDoc.Error.GetTimestamp() - if earliestTimestamp >= timestamp { + timestamp := errorDoc.Error.GetTimestamp().AsTime() + if earliestTimestamp.After(timestamp) { earliestExecutionError = errorDoc2ExecutionError(errorDoc, errorFilePath) earliestTimestamp = timestamp } diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go index 00863293e1..d1ec153c26 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go @@ -3,12 +3,15 @@ package ioutils import ( "context" "fmt" + "strconv" "strings" "testing" + "time" regErrors "github.com/pkg/errors" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" + "google.golang.org/protobuf/types/known/timestamppb" "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/core" pluginsIOMock "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/io/mocks" @@ -152,14 +155,16 @@ func TestReadOrigin(t *testing.T) { store := &storageMocks.ComposedProtobufStore{} store.OnReadProtobufMatch(mock.Anything, mock.Anything, mock.Anything).Run(func(args mock.Arguments) { errorFilePath := args.Get(1).(storage.DataReference) - workerIdx := strings.Split(strings.Split(errorFilePath.String(), "-")[1], ".")[0] + workerIdx, err := strconv.Atoi(strings.Split(strings.Split(errorFilePath.String(), "-")[1], ".")[0]) + assert.NoError(t, err) errorDoc := &core.ErrorDocument{ Error: &core.ContainerError{ - Code: "red", - Message: fmt.Sprintf("hi-%s", workerIdx), - Kind: core.ContainerError_NON_RECOVERABLE, - Origin: core.ExecutionError_USER, - Worker: fmt.Sprintf("worker-%s", workerIdx), + Code: "red", + Message: fmt.Sprintf("hi-%d", workerIdx), + Kind: core.ContainerError_NON_RECOVERABLE, + Origin: core.ExecutionError_USER, + Worker: fmt.Sprintf("worker-%d", workerIdx), + Timestamp: timestamppb.New(time.Unix(int64(100+workerIdx%2), 0)), }, } incomingErrorDoc := args.Get(2) @@ -201,8 +206,9 @@ func TestReadOrigin(t *testing.T) { assert.NoError(t, err) assert.Equal(t, core.ExecutionError_USER, executionError.Kind) assert.Equal(t, "red", executionError.Code) - assert.Equal(t, "hi-2", executionError.Message) - assert.Equal(t, "worker-2", executionError.Worker) + assert.Equal(t, "hi-1", executionError.Message) + assert.Equal(t, "worker-1", executionError.Worker) + assert.Equal(t, timestamppb.New(time.Unix(101, 0)), executionError.Timestamp) assert.False(t, executionError.IsRecoverable) }) } From 6ef46f650df6334192c1f11bfdf28dee67fcf8dd Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Wed, 16 Oct 2024 17:30:47 +0000 Subject: [PATCH 18/29] Fix timestamp logic --- .../pluginmachinery/ioutils/remote_file_output_reader_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go index d1ec153c26..298baf1915 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go @@ -164,7 +164,7 @@ func TestReadOrigin(t *testing.T) { Kind: core.ContainerError_NON_RECOVERABLE, Origin: core.ExecutionError_USER, Worker: fmt.Sprintf("worker-%d", workerIdx), - Timestamp: timestamppb.New(time.Unix(int64(100+workerIdx%2), 0)), + Timestamp: timestamppb.New(time.Unix(int64(100-workerIdx%2), 0)), }, } incomingErrorDoc := args.Get(2) @@ -208,7 +208,7 @@ func TestReadOrigin(t *testing.T) { assert.Equal(t, "red", executionError.Code) assert.Equal(t, "hi-1", executionError.Message) assert.Equal(t, "worker-1", executionError.Worker) - assert.Equal(t, timestamppb.New(time.Unix(101, 0)), executionError.Timestamp) + assert.Equal(t, timestamppb.New(time.Unix(99, 0)), executionError.Timestamp) assert.False(t, executionError.IsRecoverable) }) } From dcb3a396bf27f88266f32d7e63a274b688eb04b6 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Wed, 16 Oct 2024 18:21:38 +0000 Subject: [PATCH 19/29] Fix timestamp logic --- .../flytek8s/k8s_resource_adds.go | 3 +++ .../k8s/kfoperators/pytorch/pytorch.go | 21 ++++++++++++++++++ .../k8s/kfoperators/pytorch/pytorch_test.go | 22 +++++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/flyteplugins/go/tasks/pluginmachinery/flytek8s/k8s_resource_adds.go b/flyteplugins/go/tasks/pluginmachinery/flytek8s/k8s_resource_adds.go index b77615120a..3cd000dd40 100644 --- a/flyteplugins/go/tasks/pluginmachinery/flytek8s/k8s_resource_adds.go +++ b/flyteplugins/go/tasks/pluginmachinery/flytek8s/k8s_resource_adds.go @@ -17,6 +17,9 @@ import ( const ( flyteExecutionURL = "FLYTE_EXECUTION_URL" + + FlyteInternalWorkerNameEnvVarKey = "_F_WN" // "FLYTE_INTERNAL_WORKER_NAME" + FlyteInternalDistErrorStrategyEnvVarKey = "_F_DES" // "FLYTE_INTERNAL_DIST_ERROR_STRATEGY" ) func GetContextEnvVars(ownerCtx context.Context) []v1.EnvVar { diff --git a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go index 359393459f..84c0ed887a 100644 --- a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go +++ b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go @@ -7,6 +7,7 @@ import ( commonOp "github.com/kubeflow/common/pkg/apis/common/v1" kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" + apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/scheme" "sigs.k8s.io/controller-runtime/pkg/client" @@ -16,6 +17,7 @@ import ( flyteerr "github.com/flyteorg/flyte/flyteplugins/go/tasks/errors" "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery" pluginsCore "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/core" + pluginsK8s "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/flytek8s" "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/k8s" "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/utils" "github.com/flyteorg/flyte/flyteplugins/go/tasks/plugins/k8s/kfoperators/common" @@ -101,6 +103,25 @@ func (pytorchOperatorResourceHandler) BuildResource(ctx context.Context, taskCtx return nil, flyteerr.Errorf(flyteerr.BadTaskSpecification, "Unable to create worker replica spec: [%v]", err.Error()) } + updateEnvVars := func(container *apiv1.Container) { + if container.Env == nil { + container.Env = make([]apiv1.EnvVar, 0, 2) + } + container.Env = append(container.Env, apiv1.EnvVar{ + Name: pluginsK8s.FlyteInternalWorkerNameEnvVarKey, + ValueFrom: &apiv1.EnvVarSource{ + FieldRef: &apiv1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }) + container.Env = append(container.Env, apiv1.EnvVar{ + Name: pluginsK8s.FlyteInternalDistErrorStrategyEnvVarKey, + Value: "1", + }) + } + updateEnvVars(&workerReplicaSpec.Template.Spec.Containers[0]) + if kfPytorchTaskExtraArgs.GetRunPolicy() != nil { runPolicy = common.ParseRunPolicy(*kfPytorchTaskExtraArgs.GetRunPolicy()) } diff --git a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go index b5f13ade2e..0fc913b289 100644 --- a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go +++ b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go @@ -12,6 +12,7 @@ import ( kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" + apiv1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -23,6 +24,7 @@ import ( pluginsCore "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/core" "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/core/mocks" "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/flytek8s" + pluginsK8s "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/flytek8s" flytek8sConfig "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/flytek8s/config" pluginIOMocks "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/io/mocks" "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/k8s" @@ -878,6 +880,26 @@ func TestBuildResourcePytorchV1(t *testing.T) { assert.Nil(t, pytorchJob.Spec.RunPolicy.ActiveDeadlineSeconds) assert.Nil(t, pytorchJob.Spec.ElasticPolicy) + + // validate plugin specific environment variables + workerContainerEnv := pytorchJob.Spec.PyTorchReplicaSpecs[kubeflowv1.PyTorchJobReplicaTypeWorker].Template.Spec.Containers[0].Env + assert.Equal(t, + []apiv1.EnvVar{ + { + Name: pluginsK8s.FlyteInternalWorkerNameEnvVarKey, + ValueFrom: &apiv1.EnvVarSource{ + FieldRef: &apiv1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }, + { + Name: pluginsK8s.FlyteInternalDistErrorStrategyEnvVarKey, + Value: "1", + }, + }, + workerContainerEnv[len(workerContainerEnv)-2:], + ) } } From 5170dd16d6b439c364444005bcf60d17a1f27ec6 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Thu, 17 Oct 2024 04:35:42 +0000 Subject: [PATCH 20/29] Review comments --- flyteplugins/go/tasks/plugins/array/outputs_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flyteplugins/go/tasks/plugins/array/outputs_test.go b/flyteplugins/go/tasks/plugins/array/outputs_test.go index 0998ba0047..eb1e874bc5 100644 --- a/flyteplugins/go/tasks/plugins/array/outputs_test.go +++ b/flyteplugins/go/tasks/plugins/array/outputs_test.go @@ -353,7 +353,7 @@ func TestAssembleFinalOutputs(t *testing.T) { ow := &mocks2.OutputWriter{} ow.OnGetOutputPrefixPath().Return("/prefix/") ow.OnGetOutputPath().Return("/prefix/outputs.pb") - ow.OnGetErrorPath().Return("/location/prefix/error.pb") + ow.OnGetErrorPath().Return("/prefix/error.pb") ow.On("Put", mock.Anything, mock.Anything).Return(func(ctx context.Context, or io.OutputReader) error { m, ee, err := or.Read(ctx) assert.NoError(t, err) From a14f5257afe5c1f8a9f1e35e9f321f1f02fb7de6 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Thu, 17 Oct 2024 04:35:42 +0000 Subject: [PATCH 21/29] Review comments --- flyteplugins/go/tasks/plugins/array/outputs_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flyteplugins/go/tasks/plugins/array/outputs_test.go b/flyteplugins/go/tasks/plugins/array/outputs_test.go index 0998ba0047..eb1e874bc5 100644 --- a/flyteplugins/go/tasks/plugins/array/outputs_test.go +++ b/flyteplugins/go/tasks/plugins/array/outputs_test.go @@ -353,7 +353,7 @@ func TestAssembleFinalOutputs(t *testing.T) { ow := &mocks2.OutputWriter{} ow.OnGetOutputPrefixPath().Return("/prefix/") ow.OnGetOutputPath().Return("/prefix/outputs.pb") - ow.OnGetErrorPath().Return("/location/prefix/error.pb") + ow.OnGetErrorPath().Return("/prefix/error.pb") ow.On("Put", mock.Anything, mock.Anything).Return(func(ctx context.Context, or io.OutputReader) error { m, ee, err := or.Read(ctx) assert.NoError(t, err) From 84934e5e132c43ee5e572002d367d2d7a592dd5b Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Thu, 17 Oct 2024 05:14:26 +0000 Subject: [PATCH 22/29] Review comments --- .../ioutils/remote_file_output_reader.go | 59 ++++++++++++------- .../ioutils/remote_file_output_reader_test.go | 6 +- .../go/tasks/pluginmachinery/k8s/plugin.go | 11 ++++ .../go/tasks/plugins/array/catalog.go | 6 +- .../go/tasks/plugins/array/outputs.go | 5 +- .../k8s/kfoperators/pytorch/pytorch.go | 2 +- .../k8s/kfoperators/pytorch/pytorch_test.go | 2 +- flyteplugins/go/tasks/plugins/testing/echo.go | 5 +- .../go/tasks/plugins/webapi/agent/plugin.go | 5 +- .../tasks/plugins/webapi/databricks/plugin.go | 5 +- .../pkg/controller/nodes/array/handler.go | 11 +--- flytepropeller/pkg/controller/nodes/cache.go | 5 +- .../pkg/controller/workflow/executor_test.go | 5 +- 13 files changed, 61 insertions(+), 66 deletions(-) diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index daa9585623..2e81b8ecec 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -15,7 +15,7 @@ import ( "github.com/flyteorg/flyte/flytestdlib/storage" ) -type ErrorRetriever interface { +type errorRetriever interface { HasError(ctx context.Context) (bool, error) GetError(ctx context.Context) (io.ExecutionError, error) } @@ -25,7 +25,7 @@ type baseErrorRetriever struct { maxPayloadSize int64 } -type SingleFileErrorRetriever struct { +type singleFileErrorRetriever struct { baseErrorRetriever errorFilePath storage.DataReference } @@ -34,8 +34,8 @@ const errorFileNotFoundErrorCode = "ErrorFileNotFound" var ErrRemoteFileExceedsMaxSize = errors.New("remote file exceeds max size") -func NewSingleFileErrorRetriever(errorFilePath storage.DataReference, store storage.ComposedProtobufStore, maxPayloadSize int64) *SingleFileErrorRetriever { - return &SingleFileErrorRetriever{ +func newSingleFileErrorRetriever(errorFilePath storage.DataReference, store storage.ComposedProtobufStore, maxPayloadSize int64) *singleFileErrorRetriever { + return &singleFileErrorRetriever{ baseErrorRetriever: baseErrorRetriever{ store: store, maxPayloadSize: maxPayloadSize, @@ -55,7 +55,7 @@ func (b *baseErrorRetriever) validatePayloadSize(filePath storage.DataReference, return nil } -func (s *SingleFileErrorRetriever) HasError(ctx context.Context) (bool, error) { +func (s *singleFileErrorRetriever) HasError(ctx context.Context) (bool, error) { metadata, err := s.store.Head(ctx, s.errorFilePath) if err != nil { return false, errors.Wrapf(err, "failed to read error file @[%s]", s.errorFilePath) @@ -95,7 +95,7 @@ func errorDoc2ExecutionError(errorDoc *core.ErrorDocument, errorFilePath storage return executionError } -func (s *SingleFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionError, error) { +func (s *singleFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionError, error) { errorDoc := &core.ErrorDocument{} err := s.store.ReadProtobuf(ctx, s.errorFilePath, errorDoc) if err != nil { @@ -115,14 +115,14 @@ func (s *SingleFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionEr return errorDoc2ExecutionError(errorDoc, s.errorFilePath), nil } -type EarliestFileErrorRetriever struct { +type earliestFileErrorRetriever struct { baseErrorRetriever errorDirPath storage.DataReference errorFilePathPrefix storage.DataReference errorFileExtension string } -func (e *EarliestFileErrorRetriever) HasError(ctx context.Context) (bool, error) { +func (e *earliestFileErrorRetriever) HasError(ctx context.Context) (bool, error) { hasError := false const maxItems = 1000 cursor := storage.NewCursorAtStart() @@ -150,7 +150,7 @@ func (e *EarliestFileErrorRetriever) HasError(ctx context.Context) (bool, error) return hasError, nil } -func (e *EarliestFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionError, error) { +func (e *earliestFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionError, error) { var earliestTimestamp time.Time = time.Now() earliestExecutionError := io.ExecutionError{} const maxItems = 1000 @@ -181,7 +181,7 @@ func (e *EarliestFileErrorRetriever) GetError(ctx context.Context) (io.Execution return earliestExecutionError, nil } -func NewEarliestFileErrorRetriever(errorDirPath storage.DataReference, canonicalErrorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) (*EarliestFileErrorRetriever, error) { +func newEarliestFileErrorRetriever(errorDirPath storage.DataReference, canonicalErrorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) (*earliestFileErrorRetriever, error) { // If the canonical error file name is error.pb, we expect multiple error files // to have name error.pb pieces := strings.Split(canonicalErrorFilename, ".") @@ -193,7 +193,7 @@ func NewEarliestFileErrorRetriever(errorDirPath storage.DataReference, canonical errorFilePathPrefix := storage.NewDataReference(scheme, container, filepath.Join(key, errorFilePrefix)) errorFileExtension := fmt.Sprintf(".%s", pieces[1]) - return &EarliestFileErrorRetriever{ + return &earliestFileErrorRetriever{ baseErrorRetriever: baseErrorRetriever{ store: store, maxPayloadSize: maxPayloadSize, @@ -204,14 +204,17 @@ func NewEarliestFileErrorRetriever(errorDirPath storage.DataReference, canonical }, nil } -func NewErrorRetriever(errorAggregationStrategy k8s.ErrorAggregationStrategy, errorDirPath storage.DataReference, errorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) (ErrorRetriever, error) { +func newErrorRetriever(errorAggregationStrategy k8s.ErrorAggregationStrategy, errorDirPath storage.DataReference, errorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) (errorRetriever, error) { if errorAggregationStrategy == k8s.DefaultErrorAggregationStrategy { - scheme, container, key, _ := errorDirPath.Split() + scheme, container, key, err := errorDirPath.Split() + if err != nil { + return nil, errors.Wrapf(err, "invalid error dir path %s", errorDirPath) + } errorFilePath := storage.NewDataReference(scheme, container, filepath.Join(key, errorFilename)) - return NewSingleFileErrorRetriever(errorFilePath, store, maxPayloadSize), nil + return newSingleFileErrorRetriever(errorFilePath, store, maxPayloadSize), nil } if errorAggregationStrategy == k8s.EarliestErrorAggregationStrategy { - return NewEarliestFileErrorRetriever(errorDirPath, errorFilename, store, maxPayloadSize) + return newEarliestFileErrorRetriever(errorDirPath, errorFilename, store, maxPayloadSize) } return nil, errors.Errorf("unknown error aggregation strategy: %v", errorAggregationStrategy) } @@ -220,7 +223,7 @@ type RemoteFileOutputReader struct { outPath io.OutputFilePaths store storage.ComposedProtobufStore maxPayloadSize int64 - errorRetriever ErrorRetriever + errorRetriever errorRetriever } func (r RemoteFileOutputReader) IsError(ctx context.Context) (bool, error) { @@ -279,24 +282,36 @@ func (r RemoteFileOutputReader) DeckExists(ctx context.Context) (bool, error) { return md.Exists(), nil } -func NewRemoteFileOutputReader(context context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64) (*RemoteFileOutputReader, error) { - return NewRemoteFileOutputReaderWithErrorAggregationStrategy(context, store, outPaths, maxDatasetSize, k8s.DefaultErrorAggregationStrategy) -} - -func NewRemoteFileOutputReaderWithErrorAggregationStrategy(_ context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64, errorAggregationStrategy k8s.ErrorAggregationStrategy) (*RemoteFileOutputReader, error) { +func getMaxPayloadSize(maxDatasetSize int64) int64 { // Note: even though the data store retrieval checks against GetLimitMegabytes, there might be external // storage implementations, so we keep this check here as well. maxPayloadSize := maxDatasetSize if maxPayloadSize == 0 { maxPayloadSize = storage.GetConfig().Limits.GetLimitMegabytes * 1024 * 1024 } + return maxPayloadSize +} + +func NewRemoteFileOutputReader(context context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64) RemoteFileOutputReader { + maxPayloadSize := getMaxPayloadSize(maxDatasetSize) + errorRetriever := newSingleFileErrorRetriever(outPaths.GetErrorPath(), store, maxPayloadSize) + return RemoteFileOutputReader{ + outPath: outPaths, + store: store, + maxPayloadSize: maxPayloadSize, + errorRetriever: errorRetriever, + } +} + +func NewRemoteFileOutputReaderWithErrorAggregationStrategy(_ context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64, errorAggregationStrategy k8s.ErrorAggregationStrategy) (*RemoteFileOutputReader, error) { + maxPayloadSize := getMaxPayloadSize(maxDatasetSize) scheme, container, key, err := outPaths.GetErrorPath().Split() if err != nil { return nil, errors.Wrapf(err, "failed to parse error path %s", outPaths.GetErrorPath()) } errorFilename := filepath.Base(key) errorDirPath := storage.NewDataReference(scheme, container, filepath.Dir(key)) - errorRetriever, err := NewErrorRetriever(errorAggregationStrategy, errorDirPath, errorFilename, store, maxPayloadSize) + errorRetriever, err := newErrorRetriever(errorAggregationStrategy, errorDirPath, errorFilename, store, maxPayloadSize) if err != nil { return nil, errors.Wrapf(err, "failed to create remote output reader with error aggregation strategy %v", errorAggregationStrategy) } diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go index 298baf1915..3e8bd81db2 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go @@ -99,13 +99,12 @@ func TestReadOrigin(t *testing.T) { }, nil) maxPayloadSize := int64(0) - r, err := NewRemoteFileOutputReader( + r := NewRemoteFileOutputReader( ctx, store, opath, maxPayloadSize, ) - assert.NoError(t, err) ee, err := r.ReadError(ctx) assert.NoError(t, err) @@ -134,13 +133,12 @@ func TestReadOrigin(t *testing.T) { }).Return(nil) maxPayloadSize := int64(0) - r, err := NewRemoteFileOutputReader( + r := NewRemoteFileOutputReader( ctx, store, opath, maxPayloadSize, ) - assert.NoError(t, err) ee, err := r.ReadError(ctx) assert.NoError(t, err) diff --git a/flyteplugins/go/tasks/pluginmachinery/k8s/plugin.go b/flyteplugins/go/tasks/pluginmachinery/k8s/plugin.go index 2e4a531fd1..8b2124e1cd 100644 --- a/flyteplugins/go/tasks/pluginmachinery/k8s/plugin.go +++ b/flyteplugins/go/tasks/pluginmachinery/k8s/plugin.go @@ -40,6 +40,17 @@ const ( EarliestErrorAggregationStrategy ) +func (e ErrorAggregationStrategy) String() string { + switch e { + case DefaultErrorAggregationStrategy: + return "Default" + case EarliestErrorAggregationStrategy: + return "Earliest" + default: + panic("Unknown enum value, cannot happen") + } +} + // System level properties that this Plugin supports type PluginProperties struct { // Disables the inclusion of OwnerReferences in kubernetes resources that this plugin is responsible for. diff --git a/flyteplugins/go/tasks/plugins/array/catalog.go b/flyteplugins/go/tasks/plugins/array/catalog.go index 70ba9db78f..d6bf5e8820 100644 --- a/flyteplugins/go/tasks/plugins/array/catalog.go +++ b/flyteplugins/go/tasks/plugins/array/catalog.go @@ -567,9 +567,5 @@ func ConstructOutputReader(ctx context.Context, dataStore *storage.DataStore, ou // checkpoint paths are not computed here because this function is only called when writing // existing cached outputs. if this functionality changes this will need to be revisited. outputPath := ioutils.NewCheckpointRemoteFilePaths(ctx, dataStore, dataReference, ioutils.NewRawOutputPaths(ctx, outputSandbox), "") - reader, err := ioutils.NewRemoteFileOutputReader(ctx, dataStore, outputPath, int64(999999999)) - if err != nil { - return nil, err - } - return reader, nil + return ioutils.NewRemoteFileOutputReader(ctx, dataStore, outputPath, int64(999999999)), nil } diff --git a/flyteplugins/go/tasks/plugins/array/outputs.go b/flyteplugins/go/tasks/plugins/array/outputs.go index 1c074581c0..cb07fb0de1 100644 --- a/flyteplugins/go/tasks/plugins/array/outputs.go +++ b/flyteplugins/go/tasks/plugins/array/outputs.go @@ -225,10 +225,7 @@ func AssembleFinalOutputs(ctx context.Context, assemblyQueue OutputAssembler, tC switch w.Status() { case workqueue.WorkStatusSucceeded: - or, err := ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0) - if err != nil { - return nil, err - } + or := ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0) if err = tCtx.OutputWriter().Put(ctx, or); err != nil { return nil, err } diff --git a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go index 84c0ed887a..6d7c80a7fd 100644 --- a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go +++ b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch.go @@ -117,7 +117,7 @@ func (pytorchOperatorResourceHandler) BuildResource(ctx context.Context, taskCtx }) container.Env = append(container.Env, apiv1.EnvVar{ Name: pluginsK8s.FlyteInternalDistErrorStrategyEnvVarKey, - Value: "1", + Value: k8s.EarliestErrorAggregationStrategy.String(), }) } updateEnvVars(&workerReplicaSpec.Template.Spec.Containers[0]) diff --git a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go index 0fc913b289..814b340fe6 100644 --- a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go +++ b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go @@ -895,7 +895,7 @@ func TestBuildResourcePytorchV1(t *testing.T) { }, { Name: pluginsK8s.FlyteInternalDistErrorStrategyEnvVarKey, - Value: "1", + Value: "Earliest", }, }, workerContainerEnv[len(workerContainerEnv)-2:], diff --git a/flyteplugins/go/tasks/plugins/testing/echo.go b/flyteplugins/go/tasks/plugins/testing/echo.go index 96c7f1c031..09c4dc53b1 100644 --- a/flyteplugins/go/tasks/plugins/testing/echo.go +++ b/flyteplugins/go/tasks/plugins/testing/echo.go @@ -116,10 +116,7 @@ func copyInputsToOutputs(ctx context.Context, tCtx core.TaskExecutionContext) (c return core.UnknownTransition, err } - or, err := ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0) - if err != nil { - return core.UnknownTransition, err - } + or := ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0) if err = tCtx.OutputWriter().Put(ctx, or); err != nil { return core.UnknownTransition, err } diff --git a/flyteplugins/go/tasks/plugins/webapi/agent/plugin.go b/flyteplugins/go/tasks/plugins/webapi/agent/plugin.go index 403e49c20b..a7b2a3d1d4 100644 --- a/flyteplugins/go/tasks/plugins/webapi/agent/plugin.go +++ b/flyteplugins/go/tasks/plugins/webapi/agent/plugin.go @@ -370,10 +370,7 @@ func writeOutput(ctx context.Context, taskCtx webapi.StatusContext, outputs *fly opReader = ioutils.NewInMemoryOutputReader(outputs, nil, nil) } else { logger.Debugf(ctx, "AgentDeployment didn't return any output, assuming file based outputs.") - opReader, err = ioutils.NewRemoteFileOutputReader(ctx, taskCtx.DataStore(), taskCtx.OutputWriter(), 0) - if err != nil { - return err - } + opReader = ioutils.NewRemoteFileOutputReader(ctx, taskCtx.DataStore(), taskCtx.OutputWriter(), 0) } return taskCtx.OutputWriter().Put(ctx, opReader) } diff --git a/flyteplugins/go/tasks/plugins/webapi/databricks/plugin.go b/flyteplugins/go/tasks/plugins/webapi/databricks/plugin.go index 1390be11ab..6ae9a1dbe5 100644 --- a/flyteplugins/go/tasks/plugins/webapi/databricks/plugin.go +++ b/flyteplugins/go/tasks/plugins/webapi/databricks/plugin.go @@ -304,10 +304,7 @@ func writeOutput(ctx context.Context, taskCtx webapi.StatusContext) error { return nil } - outputReader, err := ioutils.NewRemoteFileOutputReader(ctx, taskCtx.DataStore(), taskCtx.OutputWriter(), 0) - if err != nil { - return err - } + outputReader := ioutils.NewRemoteFileOutputReader(ctx, taskCtx.DataStore(), taskCtx.OutputWriter(), 0) return taskCtx.OutputWriter().Put(ctx, outputReader) } diff --git a/flytepropeller/pkg/controller/nodes/array/handler.go b/flytepropeller/pkg/controller/nodes/array/handler.go index 1ca8e03c23..4fa7b4ab5f 100644 --- a/flytepropeller/pkg/controller/nodes/array/handler.go +++ b/flytepropeller/pkg/controller/nodes/array/handler.go @@ -503,15 +503,8 @@ func (a *arrayNodeHandler) Handle(ctx context.Context, nCtx interfaces.NodeExecu // checkpoint paths are not computed here because this function is only called when writing // existing cached outputs. if this functionality changes this will need to be revisited. outputPaths := ioutils.NewCheckpointRemoteFilePaths(ctx, nCtx.DataStore(), subOutputDir, ioutils.NewRawOutputPaths(ctx, subDataDir), "") - reader, err := ioutils.NewRemoteFileOutputReader(ctx, nCtx.DataStore(), outputPaths, 0) - if err != nil { - gatherOutputsRequest.responseChannel <- struct { - literalMap map[string]*idlcore.Literal - error - }{nil, err} - continue - } - gatherOutputsRequest.reader = reader + reader := ioutils.NewRemoteFileOutputReader(ctx, nCtx.DataStore(), outputPaths, 0) + gatherOutputsRequest.reader = &reader a.gatherOutputsRequestChannel <- gatherOutputsRequest } diff --git a/flytepropeller/pkg/controller/nodes/cache.go b/flytepropeller/pkg/controller/nodes/cache.go index 5ec0871bd6..e8e7fc3720 100644 --- a/flytepropeller/pkg/controller/nodes/cache.go +++ b/flytepropeller/pkg/controller/nodes/cache.go @@ -216,10 +216,7 @@ func (n *nodeExecutor) WriteCatalogCache(ctx context.Context, nCtx interfaces.No catalogKey.Identifier.Domain, catalogKey.Identifier.Name, catalogKey.Identifier.Version) outputPaths := ioutils.NewReadOnlyOutputFilePaths(ctx, nCtx.DataStore(), nCtx.NodeStatus().GetOutputDir()) - outputReader, err := ioutils.NewRemoteFileOutputReader(ctx, nCtx.DataStore(), outputPaths, 0) - if err != nil { - return catalog.NewStatus(core.CatalogCacheStatus_CACHE_DISABLED, nil), errors.Wrapf(err, "failed to initialize the remote output file reader") - } + outputReader := ioutils.NewRemoteFileOutputReader(ctx, nCtx.DataStore(), outputPaths, 0) metadata := catalog.Metadata{ TaskExecutionIdentifier: task.GetTaskExecutionIdentifier(nCtx), } diff --git a/flytepropeller/pkg/controller/workflow/executor_test.go b/flytepropeller/pkg/controller/workflow/executor_test.go index 74cdcebd96..1a804d1e4b 100644 --- a/flytepropeller/pkg/controller/workflow/executor_test.go +++ b/flytepropeller/pkg/controller/workflow/executor_test.go @@ -99,10 +99,7 @@ func (f fakeRemoteWritePlugin) Handle(ctx context.Context, tCtx pluginCore.TaskE o.Literals[k] = l } assert.NoError(f.t, tCtx.DataStore().WriteProtobuf(ctx, tCtx.OutputWriter().GetOutputPath(), storage.Options{}, o)) - reader, err := ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0) - if err != nil { - return trns, err - } + reader := ioutils.NewRemoteFileOutputReader(ctx, tCtx.DataStore(), tCtx.OutputWriter(), 0) assert.NoError(f.t, tCtx.OutputWriter().Put(ctx, reader)) } return trns, err From 7d9dfe20c6017fdfb6402f8aafb1a6a8fe121226 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Thu, 17 Oct 2024 05:16:10 +0000 Subject: [PATCH 23/29] Review comments --- flytepropeller/pkg/controller/nodes/array/handler.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/flytepropeller/pkg/controller/nodes/array/handler.go b/flytepropeller/pkg/controller/nodes/array/handler.go index 4fa7b4ab5f..834a016cb2 100644 --- a/flytepropeller/pkg/controller/nodes/array/handler.go +++ b/flytepropeller/pkg/controller/nodes/array/handler.go @@ -500,10 +500,12 @@ func (a *arrayNodeHandler) Handle(ctx context.Context, nCtx interfaces.NodeExecu }{nil, err} continue } + // checkpoint paths are not computed here because this function is only called when writing // existing cached outputs. if this functionality changes this will need to be revisited. outputPaths := ioutils.NewCheckpointRemoteFilePaths(ctx, nCtx.DataStore(), subOutputDir, ioutils.NewRawOutputPaths(ctx, subDataDir), "") reader := ioutils.NewRemoteFileOutputReader(ctx, nCtx.DataStore(), outputPaths, 0) + gatherOutputsRequest.reader = &reader a.gatherOutputsRequestChannel <- gatherOutputsRequest } From 36e68cd916e9acea166aa334e777a721fdda3a61 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Thu, 17 Oct 2024 05:16:55 +0000 Subject: [PATCH 24/29] Review comments --- flytepropeller/pkg/controller/nodes/dynamic/handler.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/flytepropeller/pkg/controller/nodes/dynamic/handler.go b/flytepropeller/pkg/controller/nodes/dynamic/handler.go index fc9e7ccf82..e23f145bb3 100644 --- a/flytepropeller/pkg/controller/nodes/dynamic/handler.go +++ b/flytepropeller/pkg/controller/nodes/dynamic/handler.go @@ -141,10 +141,7 @@ func (d dynamicNodeTaskNodeHandler) handleDynamicSubNodes(ctx context.Context, n // These outputPaths only reads the output metadata. So the sandbox is completely optional here and hence it is nil. // The sandbox creation as it uses hashing can be expensive and we skip that expense. outputPaths := ioutils.NewReadOnlyOutputFilePaths(ctx, nCtx.DataStore(), nCtx.NodeStatus().GetOutputDir()) - outputReader, err := ioutils.NewRemoteFileOutputReader(ctx, nCtx.DataStore(), outputPaths, 0) - if err != nil { - return handler.UnknownTransition, prevState, err - } + outputReader := ioutils.NewRemoteFileOutputReader(ctx, nCtx.DataStore(), outputPaths, 0) ee, err := d.TaskNodeHandler.ValidateOutput(ctx, nCtx.NodeID(), nCtx.InputReader(), outputReader, nil, nCtx.ExecutionContext().GetExecutionConfig(), nCtx.TaskReader()) From 0ea8096dbdfa64287df408686e25bd514f2fefca Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Thu, 17 Oct 2024 05:50:35 +0000 Subject: [PATCH 25/29] Review comments --- .../pkg/controller/nodes/task/k8s/plugin_manager_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager_test.go b/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager_test.go index af8442880e..1d8d5064d9 100644 --- a/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager_test.go +++ b/flytepropeller/pkg/controller/nodes/task/k8s/plugin_manager_test.go @@ -165,7 +165,7 @@ func (d *dummyOutputWriter) Put(ctx context.Context, reader io.OutputReader) err } func (d *dummyOutputWriter) GetErrorPath() storage.DataReference { - return "" + return "s3://errors/error.pb" } func getMockTaskContext(initPhase PluginPhase, wantPhase PluginPhase) pluginsCore.TaskExecutionContext { From 0a12e95f4768a57f9770b3f36ec7849827acfe5a Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Fri, 18 Oct 2024 05:06:00 +0000 Subject: [PATCH 26/29] Review comments --- .../go/tasks/pluginmachinery/io/iface.go | 11 +++- .../ioutils/remote_file_output_reader.go | 57 +++++++++---------- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/flyteplugins/go/tasks/pluginmachinery/io/iface.go b/flyteplugins/go/tasks/pluginmachinery/io/iface.go index f876defe5a..1f32717812 100644 --- a/flyteplugins/go/tasks/pluginmachinery/io/iface.go +++ b/flyteplugins/go/tasks/pluginmachinery/io/iface.go @@ -27,13 +27,18 @@ type InputReader interface { Get(ctx context.Context) (*core.LiteralMap, error) } -// OutputReader provides an abstracted OutputReader interface. The plugins are responsible to provide -// the implementations for the interface. Some helper implementations can be found in ioutils -type OutputReader interface { +// ErrorReader provides an abstracted error reading interface, which is part of OutputReader below. +type ErrorReader interface { // IsError returns true if an error was detected when reading the output and false if no error was detected IsError(ctx context.Context) (bool, error) // ReadError returns the error as type ExecutionError ReadError(ctx context.Context) (ExecutionError, error) +} + +// OutputReader provides an abstracted OutputReader interface. The plugins are responsible to provide +// the implementations for the interface. Some helper implementations can be found in ioutils +type OutputReader interface { + ErrorReader // IsFile returns true if the outputs are using the OutputFilePaths specified files. If so it allows the system to // optimize the reads of the files IsFile(ctx context.Context) bool diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index 2e81b8ecec..077b51c5c7 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -15,18 +15,13 @@ import ( "github.com/flyteorg/flyte/flytestdlib/storage" ) -type errorRetriever interface { - HasError(ctx context.Context) (bool, error) - GetError(ctx context.Context) (io.ExecutionError, error) -} - -type baseErrorRetriever struct { +type baseErrorReader struct { store storage.ComposedProtobufStore maxPayloadSize int64 } -type singleFileErrorRetriever struct { - baseErrorRetriever +type singleFileErrorReader struct { + baseErrorReader errorFilePath storage.DataReference } @@ -34,9 +29,9 @@ const errorFileNotFoundErrorCode = "ErrorFileNotFound" var ErrRemoteFileExceedsMaxSize = errors.New("remote file exceeds max size") -func newSingleFileErrorRetriever(errorFilePath storage.DataReference, store storage.ComposedProtobufStore, maxPayloadSize int64) *singleFileErrorRetriever { - return &singleFileErrorRetriever{ - baseErrorRetriever: baseErrorRetriever{ +func newSingleFileErrorReader(errorFilePath storage.DataReference, store storage.ComposedProtobufStore, maxPayloadSize int64) *singleFileErrorReader { + return &singleFileErrorReader{ + baseErrorReader: baseErrorReader{ store: store, maxPayloadSize: maxPayloadSize, }, @@ -44,7 +39,7 @@ func newSingleFileErrorRetriever(errorFilePath storage.DataReference, store stor } } -func (b *baseErrorRetriever) validatePayloadSize(filePath storage.DataReference, metadata storage.Metadata) error { +func (b *baseErrorReader) validatePayloadSize(filePath storage.DataReference, metadata storage.Metadata) error { if metadata.Exists() { if metadata.Size() > b.maxPayloadSize { return errors.Wrapf(ErrRemoteFileExceedsMaxSize, @@ -55,7 +50,7 @@ func (b *baseErrorRetriever) validatePayloadSize(filePath storage.DataReference, return nil } -func (s *singleFileErrorRetriever) HasError(ctx context.Context) (bool, error) { +func (s *singleFileErrorReader) IsError(ctx context.Context) (bool, error) { metadata, err := s.store.Head(ctx, s.errorFilePath) if err != nil { return false, errors.Wrapf(err, "failed to read error file @[%s]", s.errorFilePath) @@ -95,7 +90,7 @@ func errorDoc2ExecutionError(errorDoc *core.ErrorDocument, errorFilePath storage return executionError } -func (s *singleFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionError, error) { +func (s *singleFileErrorReader) ReadError(ctx context.Context) (io.ExecutionError, error) { errorDoc := &core.ErrorDocument{} err := s.store.ReadProtobuf(ctx, s.errorFilePath, errorDoc) if err != nil { @@ -115,14 +110,14 @@ func (s *singleFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionEr return errorDoc2ExecutionError(errorDoc, s.errorFilePath), nil } -type earliestFileErrorRetriever struct { - baseErrorRetriever +type earliestFileErrorReader struct { + baseErrorReader errorDirPath storage.DataReference errorFilePathPrefix storage.DataReference errorFileExtension string } -func (e *earliestFileErrorRetriever) HasError(ctx context.Context) (bool, error) { +func (e *earliestFileErrorReader) IsError(ctx context.Context) (bool, error) { hasError := false const maxItems = 1000 cursor := storage.NewCursorAtStart() @@ -150,7 +145,7 @@ func (e *earliestFileErrorRetriever) HasError(ctx context.Context) (bool, error) return hasError, nil } -func (e *earliestFileErrorRetriever) GetError(ctx context.Context) (io.ExecutionError, error) { +func (e *earliestFileErrorReader) ReadError(ctx context.Context) (io.ExecutionError, error) { var earliestTimestamp time.Time = time.Now() earliestExecutionError := io.ExecutionError{} const maxItems = 1000 @@ -181,7 +176,7 @@ func (e *earliestFileErrorRetriever) GetError(ctx context.Context) (io.Execution return earliestExecutionError, nil } -func newEarliestFileErrorRetriever(errorDirPath storage.DataReference, canonicalErrorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) (*earliestFileErrorRetriever, error) { +func newEarliestFileErrorReader(errorDirPath storage.DataReference, canonicalErrorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) (*earliestFileErrorReader, error) { // If the canonical error file name is error.pb, we expect multiple error files // to have name error.pb pieces := strings.Split(canonicalErrorFilename, ".") @@ -193,8 +188,8 @@ func newEarliestFileErrorRetriever(errorDirPath storage.DataReference, canonical errorFilePathPrefix := storage.NewDataReference(scheme, container, filepath.Join(key, errorFilePrefix)) errorFileExtension := fmt.Sprintf(".%s", pieces[1]) - return &earliestFileErrorRetriever{ - baseErrorRetriever: baseErrorRetriever{ + return &earliestFileErrorReader{ + baseErrorReader: baseErrorReader{ store: store, maxPayloadSize: maxPayloadSize, }, @@ -204,17 +199,17 @@ func newEarliestFileErrorRetriever(errorDirPath storage.DataReference, canonical }, nil } -func newErrorRetriever(errorAggregationStrategy k8s.ErrorAggregationStrategy, errorDirPath storage.DataReference, errorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) (errorRetriever, error) { +func newErrorReader(errorAggregationStrategy k8s.ErrorAggregationStrategy, errorDirPath storage.DataReference, errorFilename string, store storage.ComposedProtobufStore, maxPayloadSize int64) (io.ErrorReader, error) { if errorAggregationStrategy == k8s.DefaultErrorAggregationStrategy { scheme, container, key, err := errorDirPath.Split() if err != nil { return nil, errors.Wrapf(err, "invalid error dir path %s", errorDirPath) } errorFilePath := storage.NewDataReference(scheme, container, filepath.Join(key, errorFilename)) - return newSingleFileErrorRetriever(errorFilePath, store, maxPayloadSize), nil + return newSingleFileErrorReader(errorFilePath, store, maxPayloadSize), nil } if errorAggregationStrategy == k8s.EarliestErrorAggregationStrategy { - return newEarliestFileErrorRetriever(errorDirPath, errorFilename, store, maxPayloadSize) + return newEarliestFileErrorReader(errorDirPath, errorFilename, store, maxPayloadSize) } return nil, errors.Errorf("unknown error aggregation strategy: %v", errorAggregationStrategy) } @@ -223,15 +218,15 @@ type RemoteFileOutputReader struct { outPath io.OutputFilePaths store storage.ComposedProtobufStore maxPayloadSize int64 - errorRetriever errorRetriever + errorReader io.ErrorReader } func (r RemoteFileOutputReader) IsError(ctx context.Context) (bool, error) { - return r.errorRetriever.HasError(ctx) + return r.errorReader.IsError(ctx) } func (r RemoteFileOutputReader) ReadError(ctx context.Context) (io.ExecutionError, error) { - return r.errorRetriever.GetError(ctx) + return r.errorReader.ReadError(ctx) } func (r RemoteFileOutputReader) Exists(ctx context.Context) (bool, error) { @@ -294,12 +289,12 @@ func getMaxPayloadSize(maxDatasetSize int64) int64 { func NewRemoteFileOutputReader(context context.Context, store storage.ComposedProtobufStore, outPaths io.OutputFilePaths, maxDatasetSize int64) RemoteFileOutputReader { maxPayloadSize := getMaxPayloadSize(maxDatasetSize) - errorRetriever := newSingleFileErrorRetriever(outPaths.GetErrorPath(), store, maxPayloadSize) + errorReader := newSingleFileErrorReader(outPaths.GetErrorPath(), store, maxPayloadSize) return RemoteFileOutputReader{ outPath: outPaths, store: store, maxPayloadSize: maxPayloadSize, - errorRetriever: errorRetriever, + errorReader: errorReader, } } @@ -311,7 +306,7 @@ func NewRemoteFileOutputReaderWithErrorAggregationStrategy(_ context.Context, st } errorFilename := filepath.Base(key) errorDirPath := storage.NewDataReference(scheme, container, filepath.Dir(key)) - errorRetriever, err := newErrorRetriever(errorAggregationStrategy, errorDirPath, errorFilename, store, maxPayloadSize) + errorReader, err := newErrorReader(errorAggregationStrategy, errorDirPath, errorFilename, store, maxPayloadSize) if err != nil { return nil, errors.Wrapf(err, "failed to create remote output reader with error aggregation strategy %v", errorAggregationStrategy) } @@ -319,6 +314,6 @@ func NewRemoteFileOutputReaderWithErrorAggregationStrategy(_ context.Context, st outPath: outPaths, store: store, maxPayloadSize: maxPayloadSize, - errorRetriever: errorRetriever, + errorReader: errorReader, }, nil } From 4d0ee5d6761593484540122ef433fb2488ba99a2 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Fri, 18 Oct 2024 17:25:44 +0000 Subject: [PATCH 27/29] Review comments --- .../pluginmachinery/ioutils/remote_file_output_reader.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index 077b51c5c7..42eee4550a 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -146,7 +146,7 @@ func (e *earliestFileErrorReader) IsError(ctx context.Context) (bool, error) { } func (e *earliestFileErrorReader) ReadError(ctx context.Context) (io.ExecutionError, error) { - var earliestTimestamp time.Time = time.Now() + var earliestTimestamp *time.Time = nil earliestExecutionError := io.ExecutionError{} const maxItems = 1000 cursor := storage.NewCursorAtStart() @@ -167,9 +167,9 @@ func (e *earliestFileErrorReader) ReadError(ctx context.Context) (io.ExecutionEr return io.ExecutionError{}, errors.Wrapf(err, "failed to read error file @[%s]", errorFilePath.String()) } timestamp := errorDoc.Error.GetTimestamp().AsTime() - if earliestTimestamp.After(timestamp) { + if earliestTimestamp == nil || earliestTimestamp.After(timestamp) { earliestExecutionError = errorDoc2ExecutionError(errorDoc, errorFilePath) - earliestTimestamp = timestamp + earliestTimestamp = ×tamp } } } From cb24656188aa49aa8a3b7a16dc71f69b7affbe34 Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Fri, 1 Nov 2024 11:55:52 -0700 Subject: [PATCH 28/29] Additional comments --- .../ioutils/remote_file_output_reader.go | 49 ++++++++++++++++--- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index 42eee4550a..4fdb6cf92b 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -25,6 +25,48 @@ type singleFileErrorReader struct { errorFilePath storage.DataReference } +type earliestFileErrorReader struct { + baseErrorReader + errorDirPath storage.DataReference + errorFilePathPrefix storage.DataReference + errorFileExtension string +} + +/* + We have a 'single file error reader' and 'earliest file error reader' as two + different strategies for reading task error files. + + Single file error reader is used to check for a single error.pb file uploaded + by a task, and is the default strategy. Earliest file error reader is used to check for + multiple error-.pb files and pick the one that has the earliest error timestamp. + It is used when a distributed task requests earliest timestamp error aggregation + strategy. To support backward compatibility, the earliest file error reader also handles + cases when there is a single error.pb file uploaded by the task. The earliest file + error reader is currently used for the PyTorch plugin. + + A few notes: + + - While the earliest file error reader handles the single error file scenario as well, + it is not set as the default, because its implementation depends on doing a listing operation + on remote storage. We do not want the listing overhead to be paid for the more common case of + having a single error file. + - Under the multiple error aggregation scenario, it is possible that the error aggregation + is performed before all the errors are reported. For PyTorch plugin specifically, the + the training operator will mark the job as 'done' when it detects one of the pods as failing. + Once Propeller detects this, it will perform the error aggregation. There is a rare scenario + where the pod that has the earliest error gets delayed in uploading its error file to + remote storage, and the pod that has a later error ends up completing first. If the + training operator's detection of job completion and Propeller's error aggregation happen so + fast that the pod with the earliest error has not yet uploaded it's error to remote storage, + we may end up reporting the wrong error. This is highly unlikely in practice. The implementation + we have here is significantly better than the prior behavior of reporting the latest written + error.pb file (as there was a race condition on overwriting error files), which is almost always + not the earliest error. + - The training operator does not have any error aggregation strategy implemented. PyTorch + distributed itself aggregates errors from the trainers running under the same elastic agent, + and reports the earliest error. The aggregation we perform here extends that to across pods. +*/ + const errorFileNotFoundErrorCode = "ErrorFileNotFound" var ErrRemoteFileExceedsMaxSize = errors.New("remote file exceeds max size") @@ -110,13 +152,6 @@ func (s *singleFileErrorReader) ReadError(ctx context.Context) (io.ExecutionErro return errorDoc2ExecutionError(errorDoc, s.errorFilePath), nil } -type earliestFileErrorReader struct { - baseErrorReader - errorDirPath storage.DataReference - errorFilePathPrefix storage.DataReference - errorFileExtension string -} - func (e *earliestFileErrorReader) IsError(ctx context.Context) (bool, error) { hasError := false const maxItems = 1000 From 2fa02ec1c0a748ade2367921748cd4074e9e992e Mon Sep 17 00:00:00 2001 From: Bugra Gedik Date: Sat, 2 Nov 2024 04:22:04 -0700 Subject: [PATCH 29/29] Backward compatibility test --- .../ioutils/remote_file_output_reader.go | 6 +-- .../ioutils/remote_file_output_reader_test.go | 51 +++++++++++++++++++ 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go index 4fdb6cf92b..ae880f3640 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader.go @@ -48,7 +48,7 @@ type earliestFileErrorReader struct { - While the earliest file error reader handles the single error file scenario as well, it is not set as the default, because its implementation depends on doing a listing operation - on remote storage. We do not want the listing overhead to be paid for the more common case of + on remote storage. We do not want the listing overhead to be paid for the more common case of having a single error file. - Under the multiple error aggregation scenario, it is possible that the error aggregation is performed before all the errors are reported. For PyTorch plugin specifically, the @@ -57,8 +57,8 @@ type earliestFileErrorReader struct { where the pod that has the earliest error gets delayed in uploading its error file to remote storage, and the pod that has a later error ends up completing first. If the training operator's detection of job completion and Propeller's error aggregation happen so - fast that the pod with the earliest error has not yet uploaded it's error to remote storage, - we may end up reporting the wrong error. This is highly unlikely in practice. The implementation + fast that the pod with the earliest error has not yet uploaded it's error to remote storage, + we may end up reporting the wrong error. This is highly unlikely in practice. The implementation we have here is significantly better than the prior behavior of reporting the latest written error.pb file (as there was a race condition on overwriting error files), which is almost always not the earliest error. diff --git a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go index 3e8bd81db2..1cd7099f78 100644 --- a/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go +++ b/flyteplugins/go/tasks/pluginmachinery/ioutils/remote_file_output_reader_test.go @@ -209,4 +209,55 @@ func TestReadOrigin(t *testing.T) { assert.Equal(t, timestamppb.New(time.Unix(99, 0)), executionError.Timestamp) assert.False(t, executionError.IsRecoverable) }) + + t.Run("multi-user-error-backward-compat", func(t *testing.T) { + outputPaths := &pluginsIOMock.OutputFilePaths{} + outputPaths.OnGetErrorPath().Return("s3://errors/error.pb") + + store := &storageMocks.ComposedProtobufStore{} + store.OnReadProtobufMatch(mock.Anything, mock.Anything, mock.Anything).Run(func(args mock.Arguments) { + errorDoc := &core.ErrorDocument{ + Error: &core.ContainerError{ + Code: "red", + Message: "hi", + Kind: core.ContainerError_NON_RECOVERABLE, + Origin: core.ExecutionError_USER, + }, + } + incomingErrorDoc := args.Get(2) + assert.NotNil(t, incomingErrorDoc) + casted := incomingErrorDoc.(*core.ErrorDocument) + casted.Error = errorDoc.Error + }).Return(nil) + + store.OnList(ctx, storage.DataReference("s3://errors/error"), 1000, storage.NewCursorAtStart()).Return( + []storage.DataReference{"error.pb"}, storage.NewCursorAtEnd(), nil) + + store.OnHead(ctx, storage.DataReference("error.pb")).Return(MemoryMetadata{ + exists: true, + }, nil) + + maxPayloadSize := int64(0) + r, err := NewRemoteFileOutputReaderWithErrorAggregationStrategy( + ctx, + store, + outputPaths, + maxPayloadSize, + k8s.EarliestErrorAggregationStrategy, + ) + assert.NoError(t, err) + + hasError, err := r.IsError(ctx) + assert.NoError(t, err) + assert.True(t, hasError) + + executionError, err := r.ReadError(ctx) + assert.NoError(t, err) + assert.Equal(t, core.ExecutionError_USER, executionError.Kind) + assert.Equal(t, "red", executionError.Code) + assert.Equal(t, "hi", executionError.Message) + assert.Equal(t, "", executionError.Worker) + assert.Nil(t, executionError.Timestamp) + assert.False(t, executionError.IsRecoverable) + }) }