Skip to content

Commit

Permalink
Merge pull request #4390 from tejal29/add_infra_errors
Browse files Browse the repository at this point in the history
Refactor Add proto.ActionableErr to diag.Resource and deploy.Resource.Status
  • Loading branch information
tejal29 authored Jun 25, 2020
2 parents 4459eea + 270899b commit fc1b5da
Show file tree
Hide file tree
Showing 16 changed files with 515 additions and 425 deletions.
72 changes: 48 additions & 24 deletions docs/content/en/api/skaffold.swagger.json

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions docs/content/en/docs/references/api/grpc.md
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,8 @@ skip 408 as STATUSCHECK_UNHEALTH code renumbered as 357 |
| DEVINIT_REGISTER_TEST_DEPS | 702 | Failed to configure watcher for test dependencies in dev loop |
| DEVINIT_REGISTER_DEPLOY_DEPS | 703 | Failed to configure watcher for deploy dependencies in dev loop |
| DEVINIT_REGISTER_CONFIG_DEP | 704 | Failed to configure watcher for Skaffold configuration file. |
| STATUSCHECK_CONTEXT_CANCELLED | 800 | User cancelled the skaffold dev run |
| STATUSCHECK_DEADLINE_EXCEEDED | 801 | Deadline for status check exceeded |



Expand Down
53 changes: 31 additions & 22 deletions pkg/diag/validator/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func (p *PodValidator) Validate(ctx context.Context, ns string, opts metav1.List
if po.Kind == "" {
po.Kind = podKind
}
rs = append(rs, NewResourceFromObject(&po, Status(ps.phase), ps.err, ps.statusCode, ps.logs))
rs = append(rs, NewResourceFromObject(&po, Status(ps.phase), ps.ae, ps.logs))
}
return rs, nil
}
Expand Down Expand Up @@ -209,45 +209,52 @@ func processPodEvents(e corev1.EventInterface, pod v1.Pod, ps *podStatus) {
}
switch recentEvent.Reason {
case failedScheduling:
ps.statusCode = proto.StatusCode_STATUSCHECK_FAILED_SCHEDULING
ps.err = fmt.Errorf(recentEvent.Message)
ps.updateAE(proto.StatusCode_STATUSCHECK_FAILED_SCHEDULING, recentEvent.Message)
case unhealthy:
ps.statusCode = proto.StatusCode_STATUSCHECK_UNHEALTHY
ps.err = fmt.Errorf(recentEvent.Message)
ps.updateAE(proto.StatusCode_STATUSCHECK_UNHEALTHY, recentEvent.Message)
default:
// TODO: Add unique error codes for reasons
ps.statusCode = proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT
ps.err = fmt.Errorf("%s: %s", recentEvent.Reason, recentEvent.Message)
ps.updateAE(
proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT,
fmt.Sprintf("%s: %s", recentEvent.Reason, recentEvent.Message),
)
}
}

type podStatus struct {
name string
namespace string
phase string
logs []string
err error
statusCode proto.StatusCode
name string
namespace string
phase string
logs []string
ae proto.ActionableErr
}

func (p *podStatus) isStable() bool {
return p.phase == success || (p.phase == running && p.err == nil)
return p.phase == success || (p.phase == running && p.ae.Message == "")
}

func (p *podStatus) withErrAndLogs(errCode proto.StatusCode, l []string, err error) *podStatus {
p.err = err
p.statusCode = errCode
var msg string
if err != nil {
msg = err.Error()
}
p.updateAE(errCode, msg)
p.logs = l
return p
}

func (p *podStatus) updateAE(errCode proto.StatusCode, msg string) {
p.ae.ErrCode = errCode
p.ae.Message = msg
}

func (p *podStatus) String() string {
switch {
case p.isStable():
return ""
default:
if p.err != nil {
return fmt.Sprintf("%s", p.err)
if p.ae.Message != "" {
return p.ae.Message
}
}
return fmt.Sprintf(actionableMessage, p.namespace, p.name)
Expand Down Expand Up @@ -275,10 +282,12 @@ func extractErrorMessageFromWaitingContainerStatus(po *v1.Pod, c v1.ContainerSta

func newPodStatus(n string, ns string, p string) *podStatus {
return &podStatus{
name: n,
namespace: ns,
phase: p,
statusCode: proto.StatusCode_STATUSCHECK_SUCCESS,
name: n,
namespace: ns,
phase: p,
ae: proto.ActionableErr{
ErrCode: proto.StatusCode_STATUSCHECK_SUCCESS,
},
}
}

Expand Down
75 changes: 54 additions & 21 deletions pkg/diag/validator/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,10 @@ func TestRun(t *testing.T) {
},
}},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("container foo-container is waiting to start: foo-image can't be pulled"),
proto.StatusCode_STATUSCHECK_IMAGE_PULL_ERR, nil)},
proto.ActionableErr{
Message: "container foo-container is waiting to start: foo-image can't be pulled",
ErrCode: proto.StatusCode_STATUSCHECK_IMAGE_PULL_ERR,
}, nil)},
},
{
description: "pod is Waiting condition due to ErrImageBackOffPullErr",
Expand Down Expand Up @@ -113,8 +115,10 @@ func TestRun(t *testing.T) {
},
}},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("container foo-container is waiting to start: foo-image can't be pulled"),
proto.StatusCode_STATUSCHECK_IMAGE_PULL_ERR, nil)},
proto.ActionableErr{
Message: "container foo-container is waiting to start: foo-image can't be pulled",
ErrCode: proto.StatusCode_STATUSCHECK_IMAGE_PULL_ERR,
}, nil)},
},
{
description: "pod is Waiting due to Image Backoff Pull error",
Expand Down Expand Up @@ -142,8 +146,10 @@ func TestRun(t *testing.T) {
},
}},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("container foo-container is waiting to start: foo-image can't be pulled"),
proto.StatusCode_STATUSCHECK_IMAGE_PULL_ERR, nil)},
proto.ActionableErr{
Message: "container foo-container is waiting to start: foo-image can't be pulled",
ErrCode: proto.StatusCode_STATUSCHECK_IMAGE_PULL_ERR,
}, nil)},
},
{
description: "pod is in Terminated State",
Expand All @@ -158,8 +164,11 @@ func TestRun(t *testing.T) {
Conditions: []v1.PodCondition{{Type: v1.PodScheduled, Status: v1.ConditionTrue}},
},
}},
expected: []Resource{NewResource("test", "Pod", "foo", "Succeeded", nil,
proto.StatusCode_STATUSCHECK_SUCCESS, nil)},
expected: []Resource{NewResource("test", "Pod", "foo", "Succeeded",
proto.ActionableErr{
Message: "",
ErrCode: proto.StatusCode_STATUSCHECK_SUCCESS,
}, nil)},
},
{
description: "pod is in Stable State",
Expand All @@ -180,8 +189,11 @@ func TestRun(t *testing.T) {
},
},
}},
expected: []Resource{NewResource("test", "Pod", "foo", "Running", nil,
proto.StatusCode_STATUSCHECK_SUCCESS, nil)},
expected: []Resource{NewResource("test", "Pod", "foo", "Running",
proto.ActionableErr{
Message: "",
ErrCode: proto.StatusCode_STATUSCHECK_SUCCESS,
}, nil)},
},
{
description: "pod condition unknown",
Expand All @@ -201,7 +213,10 @@ func TestRun(t *testing.T) {
},
}},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("could not determine"), proto.StatusCode_STATUSCHECK_UNKNOWN, nil)},
proto.ActionableErr{
Message: "could not determine",
ErrCode: proto.StatusCode_STATUSCHECK_UNKNOWN,
}, nil)},
},
{
description: "pod could not be scheduled",
Expand All @@ -222,8 +237,10 @@ func TestRun(t *testing.T) {
},
}},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("Unschedulable: 0/2 nodes available: 1 node has disk pressure, 1 node is unreachable"),
proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE, nil)},
proto.ActionableErr{
Message: "Unschedulable: 0/2 nodes available: 1 node has disk pressure, 1 node is unreachable",
ErrCode: proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE,
}, nil)},
},
{
description: "pod is running but container terminated",
Expand All @@ -248,8 +265,10 @@ func TestRun(t *testing.T) {
output: []byte("main.go:57 \ngo panic"),
},
expected: []Resource{NewResource("test", "Pod", "foo", "Running",
fmt.Errorf("container foo-container terminated with exit code 1"),
proto.StatusCode_STATUSCHECK_CONTAINER_TERMINATED, []string{
proto.ActionableErr{
Message: "container foo-container terminated with exit code 1",
ErrCode: proto.StatusCode_STATUSCHECK_CONTAINER_TERMINATED,
}, []string{
"[foo foo-container] main.go:57 ",
"[foo foo-container] go panic"},
)},
Expand All @@ -276,8 +295,10 @@ func TestRun(t *testing.T) {
err: fmt.Errorf("error"),
},
expected: []Resource{NewResource("test", "pod", "foo", "Running",
fmt.Errorf("container foo-container terminated with exit code 1"),
proto.StatusCode_STATUSCHECK_CONTAINER_TERMINATED, []string{
proto.ActionableErr{
Message: "container foo-container terminated with exit code 1",
ErrCode: proto.StatusCode_STATUSCHECK_CONTAINER_TERMINATED,
}, []string{
"Error retrieving logs for pod foo. Try `kubectl logs foo -n test -c foo-container`"},
)},
},
Expand Down Expand Up @@ -306,7 +327,10 @@ func TestRun(t *testing.T) {
},
},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("eventCode: dummy event"), proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT, nil)},
proto.ActionableErr{
Message: "eventCode: dummy event",
ErrCode: proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT,
}, nil)},
},
{
description: "pod condition a warning event followed up normal event",
Expand Down Expand Up @@ -338,7 +362,10 @@ func TestRun(t *testing.T) {
},
},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("eventCode: dummy event"), proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT, nil)},
proto.ActionableErr{
Message: "eventCode: dummy event",
ErrCode: proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT,
}, nil)},
},
{
description: "pod condition a normal event followed by a warning event",
Expand Down Expand Up @@ -370,7 +397,10 @@ func TestRun(t *testing.T) {
},
},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("eventCode: dummy event"), proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT, nil)},
proto.ActionableErr{
Message: "eventCode: dummy event",
ErrCode: proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT,
}, nil)},
},
{
description: "pod condition a warning event followed up by warning adds last warning seen",
Expand Down Expand Up @@ -402,7 +432,10 @@ func TestRun(t *testing.T) {
},
},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("0/1 nodes are available: 1 node(s) had taint {key: value}, that the pod didn't tolerate"), proto.StatusCode_STATUSCHECK_FAILED_SCHEDULING, nil)},
proto.ActionableErr{
Message: "0/1 nodes are available: 1 node(s) had taint {key: value}, that the pod didn't tolerate",
ErrCode: proto.StatusCode_STATUSCHECK_FAILED_SCHEDULING,
}, nil)},
},
}

Expand Down
28 changes: 16 additions & 12 deletions pkg/diag/validator/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,31 +26,35 @@ import (
)

type Resource struct {
namespace string
kind string
name string
logs []string
status Status
err error
StatusCode proto.StatusCode
namespace string
kind string
name string
logs []string
status Status
ae proto.ActionableErr
}

func (r Resource) Kind() string { return r.kind }
func (r Resource) Name() string { return r.name }
func (r Resource) Namespace() string { return r.namespace }
func (r Resource) Status() Status { return r.status }
func (r Resource) Error() error { return r.err }
func (r Resource) Logs() []string { return r.logs }
func (r Resource) String() string {
if r.namespace == "default" {
return fmt.Sprintf("%s/%s", r.kind, r.name)
}
return fmt.Sprintf("%s:%s/%s", r.namespace, r.kind, r.name)
}
func (r Resource) ActionableError() proto.ActionableErr {
return r.ae
}
func (r Resource) StatusUpdated(another Resource) bool {
return r.ae.ErrCode != another.ae.ErrCode
}

// NewResource creates new Resource of kind
func NewResource(namespace, kind, name string, status Status, err error, statusCode proto.StatusCode, logs []string) Resource {
return Resource{namespace: namespace, kind: kind, name: name, status: status, err: err, StatusCode: statusCode, logs: logs}
func NewResource(namespace, kind, name string, status Status, ae proto.ActionableErr, logs []string) Resource {
return Resource{namespace: namespace, kind: kind, name: name, status: status, ae: ae, logs: logs}
}

// objectWithMetadata is any k8s object that has kind and object metadata.
Expand All @@ -60,6 +64,6 @@ type objectWithMetadata interface {
}

// NewResourceFromObject creates new Resource with fields populated from object metadata.
func NewResourceFromObject(object objectWithMetadata, status Status, err error, statusCode proto.StatusCode, logs []string) Resource {
return NewResource(object.GetNamespace(), object.GetObjectKind().GroupVersionKind().Kind, object.GetName(), status, err, statusCode, logs)
func NewResourceFromObject(object objectWithMetadata, status Status, ae proto.ActionableErr, logs []string) Resource {
return NewResource(object.GetNamespace(), object.GetObjectKind().GroupVersionKind().Kind, object.GetName(), status, ae, logs)
}
7 changes: 4 additions & 3 deletions pkg/diag/validator/resource_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/GoogleContainerTools/skaffold/proto"
"github.com/GoogleContainerTools/skaffold/testutil"
)

Expand All @@ -42,7 +43,7 @@ func TestNewResource(t *testing.T) {
Name: "foo",
},
},
expected: Resource{"default", "pod", "foo", nil, Status(""), nil, 0},
expected: Resource{"default", "pod", "foo", nil, Status(""), proto.ActionableErr{}},
expectedName: "pod/foo",
},
{
Expand All @@ -54,13 +55,13 @@ func TestNewResource(t *testing.T) {
Name: "bar",
},
},
expected: Resource{"test", "pod", "bar", nil, Status(""), nil, 0},
expected: Resource{"test", "pod", "bar", nil, Status(""), proto.ActionableErr{}},
expectedName: "test:pod/bar",
},
}
for _, test := range tests {
testutil.Run(t, test.description, func(t *testutil.T) {
actual := NewResourceFromObject(test.resource, Status(""), nil, 0, nil)
actual := NewResourceFromObject(test.resource, Status(""), proto.ActionableErr{}, nil)
t.CheckDeepEqual(test.expected, actual, cmp.AllowUnexported(Resource{}))
t.CheckDeepEqual(test.expectedName, actual.String(), cmp.AllowUnexported(Resource{}))
})
Expand Down
Loading

0 comments on commit fc1b5da

Please sign in to comment.