Skip to content

Commit

Permalink
Make restore controller adopting the controller-runtime framework.
Browse files Browse the repository at this point in the history
Signed-off-by: Xun Jiang <[email protected]>
  • Loading branch information
Xun Jiang committed Feb 15, 2023
1 parent ef05af1 commit 4edb17e
Show file tree
Hide file tree
Showing 4 changed files with 325 additions and 373 deletions.
138 changes: 77 additions & 61 deletions pkg/cmd/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ import (
"time"

logrusr "github.com/bombsimon/logrusr/v3"
snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1"
snapshotv1client "github.com/kubernetes-csi/external-snapshotter/client/v4/clientset/versioned"
snapshotv1informers "github.com/kubernetes-csi/external-snapshotter/client/v4/informers/externalversions"
snapshotv1listers "github.com/kubernetes-csi/external-snapshotter/client/v4/listers/volumesnapshot/v1"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/sirupsen/logrus"
Expand All @@ -36,57 +40,46 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/clock"
kubeerrs "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/discovery"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"

"github.com/vmware-tanzu/velero/pkg/uploader"

snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1"
snapshotv1client "github.com/kubernetes-csi/external-snapshotter/client/v4/clientset/versioned"
snapshotv1informers "github.com/kubernetes-csi/external-snapshotter/client/v4/informers/externalversions"
snapshotv1listers "github.com/kubernetes-csi/external-snapshotter/client/v4/listers/volumesnapshot/v1"
"k8s.io/utils/clock"
ctrl "sigs.k8s.io/controller-runtime"
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/manager"

"github.com/vmware-tanzu/velero/internal/credentials"
"github.com/vmware-tanzu/velero/internal/storage"
"github.com/vmware-tanzu/velero/internal/util/managercontroller"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/backup"
"github.com/vmware-tanzu/velero/pkg/buildinfo"
"github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/cmd"
"github.com/vmware-tanzu/velero/pkg/cmd/util/flag"
"github.com/vmware-tanzu/velero/pkg/cmd/util/signals"

"github.com/vmware-tanzu/velero/pkg/controller"
velerodiscovery "github.com/vmware-tanzu/velero/pkg/discovery"
"github.com/vmware-tanzu/velero/pkg/features"
clientset "github.com/vmware-tanzu/velero/pkg/generated/clientset/versioned"
informers "github.com/vmware-tanzu/velero/pkg/generated/informers/externalversions"
"github.com/vmware-tanzu/velero/pkg/metrics"
"github.com/vmware-tanzu/velero/pkg/nodeagent"
"github.com/vmware-tanzu/velero/pkg/persistence"
"github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt"
"github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt/process"
"github.com/vmware-tanzu/velero/pkg/podexec"
"github.com/vmware-tanzu/velero/pkg/restore"
"github.com/vmware-tanzu/velero/pkg/util/filesystem"
"github.com/vmware-tanzu/velero/pkg/util/logging"

ctrl "sigs.k8s.io/controller-runtime"

ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/manager"

"github.com/vmware-tanzu/velero/internal/util/managercontroller"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/podvolume"
"github.com/vmware-tanzu/velero/pkg/repository"
repokey "github.com/vmware-tanzu/velero/pkg/repository/keys"

"github.com/vmware-tanzu/velero/pkg/nodeagent"
"github.com/vmware-tanzu/velero/pkg/restore"
"github.com/vmware-tanzu/velero/pkg/uploader"
"github.com/vmware-tanzu/velero/pkg/util/filesystem"
"github.com/vmware-tanzu/velero/pkg/util/logging"
)

const (
Expand Down Expand Up @@ -672,50 +665,12 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
}
}

restoreControllerRunInfo := func() controllerRunInfo {
restorer, err := restore.NewKubernetesRestorer(
s.discoveryHelper,
client.NewDynamicFactory(s.dynamicClient),
s.config.restoreResourcePriorities,
s.kubeClient.CoreV1().Namespaces(),
podvolume.NewRestorerFactory(s.repoLocker, s.repoEnsurer, s.veleroClient, s.kubeClient.CoreV1(),
s.kubeClient.CoreV1(), s.kubeClient, s.sharedInformerFactory.Velero().V1().BackupRepositories().Informer().HasSynced, s.logger),
s.config.podVolumeOperationTimeout,
s.config.resourceTerminatingTimeout,
s.logger,
podexec.NewPodCommandExecutor(s.kubeClientConfig, s.kubeClient.CoreV1().RESTClient()),
s.kubeClient.CoreV1().RESTClient(),
s.credentialFileStore,
s.mgr.GetClient(),
)
cmd.CheckError(err)

restoreController := controller.NewRestoreController(
s.namespace,
s.sharedInformerFactory.Velero().V1().Restores(),
restorer,
s.mgr.GetClient(),
s.logger,
s.logLevel,
newPluginManager,
backupStoreGetter,
s.metrics,
s.config.formatFlag.Parse(),
)

return controllerRunInfo{
controller: restoreController,
numWorkers: defaultControllerWorkers,
}
}

// By far, PodVolumeBackup, PodVolumeRestore, BackupStorageLocation controllers
// are not included in --disable-controllers list.
// This is because of PVB and PVR are used by node agent DaemonSet,
// and BSL controller is mandatory for Velero to work.
enabledControllers := map[string]func() controllerRunInfo{
controller.Backup: backupControllerRunInfo,
controller.Restore: restoreControllerRunInfo,
controller.Backup: backupControllerRunInfo,
}
// Note: all runtime type controllers that can be disabled are grouped separately, below:
enabledRuntimeControllers := map[string]struct{}{
Expand All @@ -726,6 +681,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
controller.BackupDeletion: {},
controller.GarbageCollection: {},
controller.BackupSync: {},
controller.Restore: {},
}

if s.config.restoreOnly {
Expand Down Expand Up @@ -868,6 +824,42 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
}
}

if _, ok := enabledRuntimeControllers[controller.Restore]; ok {
restorer, err := restore.NewKubernetesRestorer(
s.discoveryHelper,
client.NewDynamicFactory(s.dynamicClient),
s.config.restoreResourcePriorities,
s.kubeClient.CoreV1().Namespaces(),
podvolume.NewRestorerFactory(s.repoLocker, s.repoEnsurer, s.veleroClient, s.kubeClient.CoreV1(),
s.kubeClient.CoreV1(), s.kubeClient, s.sharedInformerFactory.Velero().V1().BackupRepositories().Informer().HasSynced, s.logger),
s.config.podVolumeOperationTimeout,
s.config.resourceTerminatingTimeout,
s.logger,
podexec.NewPodCommandExecutor(s.kubeClientConfig, s.kubeClient.CoreV1().RESTClient()),
s.kubeClient.CoreV1().RESTClient(),
s.credentialFileStore,
s.mgr.GetClient(),
)

cmd.CheckError(err)

r := controller.NewRestoreReconciler(
s.namespace,
restorer,
s.mgr.GetClient(),
s.logger,
s.logLevel,
newPluginManager,
backupStoreGetter,
s.metrics,
s.config.formatFlag.Parse(),
)

if err = r.SetupWithManager(s.mgr); err != nil {

}
}

// TODO(2.0): presuming all controllers and resources are converted to runtime-controller
// by v2.0, the block from this line and including the `s.mgr.Start() will be
// deprecated, since the manager auto-starts all the caches. Until then, we need to start the
Expand All @@ -879,6 +871,13 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
s.mgr.Add(managercontroller.Runnable(controllerRunInfo.controller, controllerRunInfo.numWorkers))
}

// Move the periodical backup and restore metrics computing logic from controllers to here.
// This is due to, after controllers using controller-runtime, controllers doesn't have a
// timer as the before generic-controller, and the backup and restore controller only have
// one length queue, furthermore the backup and restore process could last for a long time.
// Compute the metric here is a better choice.
s.updateTotalRestoreMetric()

s.logger.Info("Server starting...")

if err := s.mgr.Start(s.ctx); err != nil {
Expand All @@ -887,6 +886,23 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
return nil
}

func (s *server) updateTotalRestoreMetric() {
go wait.Until(
func() {
// recompute restore_total metric
restoreList := &velerov1api.RestoreList{}
err := s.mgr.GetClient().List(context.Background(), restoreList, &ctrlclient.ListOptions{})
if err != nil {
s.logger.Error(err, "Error computing restore_total metric")
} else {
s.metrics.SetRestoreTotal(int64(len(restoreList.Items)))
}
},
1*time.Minute,
s.ctx.Done(),
)
}

// removeControllers will remove any controller listed to be disabled from the list
// of controllers to be initialized. First it will check the legacy list of controllers,
// then it will check the new runtime controllers. If both checks fail a match
Expand Down
Loading

0 comments on commit 4edb17e

Please sign in to comment.