Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix prepare timeout issue #8486

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelogs/unreleased/8486-Lyndon-Li
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix issue #8485, add an accepted time so as to count the prepare timeout
6 changes: 6 additions & 0 deletions pkg/builder/data_download_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ func (d *DataDownloadBuilder) Labels(labels map[string]string) *DataDownloadBuil
return d
}

// Annotations sets the DataDownload's Annotations.
func (d *DataDownloadBuilder) Annotations(annotations map[string]string) *DataDownloadBuilder {
d.object.Annotations = annotations
return d
}

// StartTimestamp sets the DataDownload's StartTimestamp.
func (d *DataDownloadBuilder) StartTimestamp(startTime *metav1.Time) *DataDownloadBuilder {
d.object.Status.StartTimestamp = startTime
Expand Down
6 changes: 6 additions & 0 deletions pkg/builder/data_upload_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,12 @@ func (d *DataUploadBuilder) Labels(labels map[string]string) *DataUploadBuilder
return d
}

// Annotations sets the DataUpload's Annotations.
func (d *DataUploadBuilder) Annotations(annotations map[string]string) *DataUploadBuilder {
d.object.Annotations = annotations
return d
}

// Progress sets the DataUpload's Progress.
func (d *DataUploadBuilder) Progress(progress shared.DataMoveOperationProgress) *DataUploadBuilder {
d.object.Status.Progress = progress
Expand Down
19 changes: 11 additions & 8 deletions pkg/controller/data_download_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,9 +223,11 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
} else if peekErr := r.restoreExposer.PeekExposed(ctx, getDataDownloadOwnerObject(dd)); peekErr != nil {
r.tryCancelAcceptedDataDownload(ctx, dd, fmt.Sprintf("found a dataupload %s/%s with expose error: %s. mark it as cancel", dd.Namespace, dd.Name, peekErr))
log.Errorf("Cancel dd %s/%s because of expose error %s", dd.Namespace, dd.Name, peekErr)
} else if dd.Status.StartTimestamp != nil {
if time.Since(dd.Status.StartTimestamp.Time) >= r.preparingTimeout {
r.onPrepareTimeout(ctx, dd)
} else if at, found := dd.Annotations[acceptTimeAnnoKey]; found {
if t, err := time.Parse(time.RFC3339, at); err == nil {
if time.Since(t) >= r.preparingTimeout {
r.onPrepareTimeout(ctx, dd)
}
}
}

Expand Down Expand Up @@ -634,12 +636,13 @@ func (r *DataDownloadReconciler) acceptDataDownload(ctx context.Context, dd *vel

updateFunc := func(datadownload *velerov2alpha1api.DataDownload) {
datadownload.Status.Phase = velerov2alpha1api.DataDownloadPhaseAccepted
labels := datadownload.GetLabels()
if labels == nil {
labels = make(map[string]string)
annotations := datadownload.GetAnnotations()
if annotations == nil {
annotations = make(map[string]string)
}
labels[acceptNodeLabelKey] = r.nodeName
datadownload.SetLabels(labels)
annotations[acceptNodeAnnoKey] = r.nodeName
annotations[acceptTimeAnnoKey] = r.Clock.Now().Format(time.RFC3339)
datadownload.SetAnnotations(annotations)
}

succeeded, err := r.exclusiveUpdateDataDownload(ctx, updated, updateFunc)
Expand Down
14 changes: 7 additions & 7 deletions pkg/controller/data_download_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ func TestDataDownloadReconcile(t *testing.T) {
},
{
name: "prepare timeout",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).StartTimestamp(&metav1.Time{Time: time.Now().Add(-time.Minute * 5)}).Result(),
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Annotations(map[string]string{acceptTimeAnnoKey: (time.Now().Add(-time.Minute * 5)).Format(time.RFC3339)}).Result(),
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseFailed).Result(),
},
{
Expand Down Expand Up @@ -496,7 +496,7 @@ func TestDataDownloadReconcile(t *testing.T) {

if test.expected != nil {
require.NoError(t, err)
assert.Equal(t, dd.Status.Phase, test.expected.Status.Phase)
assert.Equal(t, test.expected.Status.Phase, dd.Status.Phase)
}

if test.isGetExposeErr {
Expand Down Expand Up @@ -1003,22 +1003,22 @@ func TestAttemptDataDownloadResume(t *testing.T) {
},
{
name: "accepted DataDownload in the current node",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Labels(map[string]string{acceptNodeLabelKey: "node-1"}).Result(),
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Annotations(map[string]string{acceptNodeAnnoKey: "node-1"}).Result(),
cancelledDataDownloads: []string{dataDownloadName},
acceptedDataDownloads: []string{dataDownloadName},
},
{
name: "accepted DataDownload with dd label but is canceled",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Cancel(true).Labels(map[string]string{
acceptNodeLabelKey: "node-1",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Cancel(true).Annotations(map[string]string{
acceptNodeAnnoKey: "node-1",
}).Result(),
acceptedDataDownloads: []string{dataDownloadName},
cancelledDataDownloads: []string{dataDownloadName},
},
{
name: "accepted DataDownload with dd label but cancel fail",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Labels(map[string]string{
acceptNodeLabelKey: "node-1",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Annotations(map[string]string{
acceptNodeAnnoKey: "node-1",
}).Result(),
needErrs: []bool{false, false, true, false, false, false},
acceptedDataDownloads: []string{dataDownloadName},
Expand Down
22 changes: 13 additions & 9 deletions pkg/controller/data_upload_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ import (

const (
dataUploadDownloadRequestor = "snapshot-data-upload-download"
acceptNodeLabelKey = "velero.io/accepted-by"
acceptNodeAnnoKey = "velero.io/accepted-by"
acceptTimeAnnoKey = "velero.io/accepted-at"
DataUploadDownloadFinalizer = "velero.io/data-upload-download-finalizer"
preparingMonitorFrequency = time.Minute
)
Expand Down Expand Up @@ -255,9 +256,11 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
} else if peekErr := ep.PeekExposed(ctx, getOwnerObject(du)); peekErr != nil {
r.tryCancelAcceptedDataUpload(ctx, du, fmt.Sprintf("found a dataupload %s/%s with expose error: %s. mark it as cancel", du.Namespace, du.Name, peekErr))
log.Errorf("Cancel du %s/%s because of expose error %s", du.Namespace, du.Name, peekErr)
} else if du.Status.StartTimestamp != nil {
if time.Since(du.Status.StartTimestamp.Time) >= r.preparingTimeout {
r.onPrepareTimeout(ctx, du)
} else if at, found := du.Annotations[acceptTimeAnnoKey]; found {
if t, err := time.Parse(time.RFC3339, at); err == nil {
if time.Since(t) >= r.preparingTimeout {
r.onPrepareTimeout(ctx, du)
}
}
}

Expand Down Expand Up @@ -701,12 +704,13 @@ func (r *DataUploadReconciler) acceptDataUpload(ctx context.Context, du *velerov

updateFunc := func(dataUpload *velerov2alpha1api.DataUpload) {
dataUpload.Status.Phase = velerov2alpha1api.DataUploadPhaseAccepted
labels := dataUpload.GetLabels()
if labels == nil {
labels = make(map[string]string)
annotations := dataUpload.GetAnnotations()
if annotations == nil {
annotations = make(map[string]string)
}
labels[acceptNodeLabelKey] = r.nodeName
dataUpload.SetLabels(labels)
annotations[acceptNodeAnnoKey] = r.nodeName
annotations[acceptTimeAnnoKey] = r.Clock.Now().Format(time.RFC3339)
dataUpload.SetAnnotations(annotations)
}

succeeded, err := r.exclusiveUpdateDataUpload(ctx, updated, updateFunc)
Expand Down
10 changes: 5 additions & 5 deletions pkg/controller/data_upload_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ func TestReconcile(t *testing.T) {
},
{
name: "prepare timeout",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).SnapshotType(fakeSnapshotType).StartTimestamp(&metav1.Time{Time: time.Now().Add(-time.Minute * 5)}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).SnapshotType(fakeSnapshotType).Annotations(map[string]string{acceptTimeAnnoKey: (time.Now().Add(-time.Minute * 5)).Format(time.RFC3339)}).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Result(),
},
{
Expand Down Expand Up @@ -607,7 +607,7 @@ func TestReconcile(t *testing.T) {
require.Error(t, err)
} else {
require.NoError(t, err)
assert.Equal(t, du.Status.Phase, test.expected.Status.Phase)
assert.Equal(t, test.expected.Status.Phase, du.Status.Phase)
}

if test.expectedProcessed {
Expand Down Expand Up @@ -1071,19 +1071,19 @@ func TestAttemptDataUploadResume(t *testing.T) {
},
{
name: "accepted DataUpload in the current node",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Labels(map[string]string{acceptNodeLabelKey: "node-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Annotations(map[string]string{acceptNodeAnnoKey: "node-1"}).Result(),
cancelledDataUploads: []string{dataUploadName},
acceptedDataUploads: []string{dataUploadName},
},
{
name: "accepted DataUpload in the current node but canceled",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Labels(map[string]string{acceptNodeLabelKey: "node-1"}).Cancel(true).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Annotations(map[string]string{acceptNodeAnnoKey: "node-1"}).Cancel(true).Result(),
cancelledDataUploads: []string{dataUploadName},
acceptedDataUploads: []string{dataUploadName},
},
{
name: "accepted DataUpload in the current node but update error",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Labels(map[string]string{acceptNodeLabelKey: "node-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Annotations(map[string]string{acceptNodeAnnoKey: "node-1"}).Result(),
needErrs: []bool{false, false, true, false, false, false},
acceptedDataUploads: []string{dataUploadName},
},
Expand Down
Loading