From b933238228e09d07cabb2f98ed0f9d83bf727da7 Mon Sep 17 00:00:00 2001 From: Ketan Umare <16888709+kumare3@users.noreply.github.com> Date: Mon, 25 Oct 2021 10:21:04 -0700 Subject: [PATCH] Visibility: Better error message when resource quota is exceeded (#353) --- go.sum | 2 -- .../fakeplugins/next_phase_state_plugin.go | 2 +- .../nodes/task/k8s/plugin_manager.go | 4 ++-- pkg/controller/nodes/task/transformer_test.go | 20 +++++++++++-------- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/go.sum b/go.sum index 7a999ed406..0f9178beb1 100644 --- a/go.sum +++ b/go.sum @@ -77,7 +77,6 @@ github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZ github.com/Azure/go-autorest/tracing v0.5.0/go.mod h1:r/s2XiOKccPW3HrqB+W0TQzfbtp2fGCgRFtBroKn4Dk= github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo= github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= -github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/DiSiqueira/GoTree v1.0.1-0.20180907134536-53a8e837f295 h1:xJ0dAkuxJXfwdH7IaSzBEbSQxEDz36YUmt7+CB4zoNA= @@ -1261,7 +1260,6 @@ honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -honnef.co/go/tools v0.0.1-2020.1.4 h1:UoveltGrhghAA7ePc+e+QYDHXrBps2PqFZiHkGR/xK8= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= k8s.io/api v0.0.0-20210217171935-8e2decd92398/go.mod h1:60tmSUpHxGPFerNHbo/ayI2lKxvtrhbxFyXuEIWJd78= k8s.io/api v0.18.2/go.mod h1:SJCWI7OLzhZSvbY7U8zwNl9UA4o1fizoug34OV/2r78= diff --git a/pkg/controller/nodes/task/fakeplugins/next_phase_state_plugin.go b/pkg/controller/nodes/task/fakeplugins/next_phase_state_plugin.go index c345d29701..ae8bda577b 100644 --- a/pkg/controller/nodes/task/fakeplugins/next_phase_state_plugin.go +++ b/pkg/controller/nodes/task/fakeplugins/next_phase_state_plugin.go @@ -69,7 +69,7 @@ func (n NextPhaseStatePlugin) Handle(ctx context.Context, tCtx pluginCore.TaskEx case pluginCore.PhaseRunning: return pluginCore.DoTransition(pluginCore.PhaseInfoRunning(s.PhaseVersion, s.TaskInfo)), nil case pluginCore.PhaseWaitingForResources: - return pluginCore.DoTransition(pluginCore.PhaseInfoWaitingForResources(time.Now(), s.PhaseVersion, "waiting")), nil + return pluginCore.DoTransition(pluginCore.PhaseInfoWaitingForResourcesInfo(time.Now(), s.PhaseVersion, "waiting", nil)), nil } return pluginCore.UnknownTransition, nil } diff --git a/pkg/controller/nodes/task/k8s/plugin_manager.go b/pkg/controller/nodes/task/k8s/plugin_manager.go index a25897558b..f14fdf397e 100644 --- a/pkg/controller/nodes/task/k8s/plugin_manager.go +++ b/pkg/controller/nodes/task/k8s/plugin_manager.go @@ -227,11 +227,11 @@ func (e *PluginManager) LaunchResource(ctx context.Context, tCtx pluginsCore.Tas if err != nil && !k8serrors.IsAlreadyExists(err) { if backoff.IsBackoffError(err) { logger.Warnf(ctx, "Failed to launch job, resource quota exceeded. err: %v", err) - return pluginsCore.DoTransition(pluginsCore.PhaseInfoWaitingForResources(time.Now(), pluginsCore.DefaultPhaseVersion, "failed to launch job, resource quota exceeded.")), nil + return pluginsCore.DoTransition(pluginsCore.PhaseInfoWaitingForResourcesInfo(time.Now(), pluginsCore.DefaultPhaseVersion, fmt.Sprintf("Exceeded resourcequota: %s", err.Error()), nil)), nil } else if k8serrors.IsForbidden(err) { if e.backOffController == nil && strings.Contains(err.Error(), "exceeded quota") { logger.Warnf(ctx, "Failed to launch job, resource quota exceeded and the operation is not guarded by back-off. err: %v", err) - return pluginsCore.DoTransition(pluginsCore.PhaseInfoWaitingForResources(time.Now(), pluginsCore.DefaultPhaseVersion, "failed to launch job, resource quota exceeded.")), nil + return pluginsCore.DoTransition(pluginsCore.PhaseInfoWaitingForResourcesInfo(time.Now(), pluginsCore.DefaultPhaseVersion, fmt.Sprintf("Exceeded resourcequota: %s", err.Error()), nil)), nil } return pluginsCore.DoTransition(pluginsCore.PhaseInfoRetryableFailure("RuntimeFailure", err.Error(), nil)), nil } else if k8serrors.IsBadRequest(err) || k8serrors.IsInvalid(err) { diff --git a/pkg/controller/nodes/task/transformer_test.go b/pkg/controller/nodes/task/transformer_test.go index 7add0281c9..4519ad626a 100644 --- a/pkg/controller/nodes/task/transformer_test.go +++ b/pkg/controller/nodes/task/transformer_test.go @@ -103,10 +103,12 @@ func TestToTaskExecutionEvent(t *testing.T) { } tev, err := ToTaskExecutionEvent(ToTaskExecutionEventInputs{ - TaskExecContext: tCtx, - InputReader: in, - OutputWriter: out, - Info: pluginCore.PhaseInfoWaitingForResources(n, 0, "reason"), + TaskExecContext: tCtx, + InputReader: in, + OutputWriter: out, + Info: pluginCore.PhaseInfoWaitingForResourcesInfo(n, 0, "reason", &pluginCore.TaskInfo{ + OccurredAt: &n, + }), NodeExecutionMetadata: &nodeExecutionMetadata, ExecContext: mockExecContext, TaskType: containerTaskType, @@ -254,10 +256,12 @@ func TestToTaskExecutionEventWithParent(t *testing.T) { } tev, err := ToTaskExecutionEvent(ToTaskExecutionEventInputs{ - TaskExecContext: tCtx, - InputReader: in, - OutputWriter: out, - Info: pluginCore.PhaseInfoWaitingForResources(n, 0, "reason"), + TaskExecContext: tCtx, + InputReader: in, + OutputWriter: out, + Info: pluginCore.PhaseInfoWaitingForResourcesInfo(n, 0, "reason", &pluginCore.TaskInfo{ + OccurredAt: &n, + }), NodeExecutionMetadata: &nodeExecutionMetadata, ExecContext: mockExecContext, TaskType: containerTaskType,