From a787ee35295e990e748f8537a06a1ede1ebd2ba0 Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Tue, 12 Apr 2022 12:56:49 -0700 Subject: [PATCH] SPDI-40020. Backport Support CSI start AlluxioFuse process in separate pod Making CSI launch a separate pod running AlluxioFuse process, instead of launcing AlluxioFuse process in the CSI nodeserver container If nodeserver container or node-plugin pod for any reason is down, we lose Alluxio Fuse process and it's very cumbersome to bring it back. With a separate Fuse pod, CSI pod won't affect Fuse process. Solves #14917 1. Removed `javaOptions` from csi section in `values.yaml`. Alluxio properties in helm chart should be organized in one place, not in `properties` and in `csi`. 2. Add property `mountInPod` in csi section. If set to `true`, Fuse process is launched in the separate pod. pr-link: Alluxio/alluxio#15221 change-id: cid-b6897172e11f80618decbfdc0758423e71aa387e --- .../deploy/Running-Alluxio-On-Kubernetes.md | 16 +- .../docker/csi/alluxio/controllerserver.go | 5 +- integration/docker/csi/alluxio/driver.go | 11 +- integration/docker/csi/alluxio/nodeserver.go | 262 ++++++++++++++++-- integration/docker/csi/main.go | 28 +- integration/kubernetes/CSI_README.md | 90 ------ .../helm-chart/alluxio/CHANGELOG.md | 29 ++ .../kubernetes/helm-chart/alluxio/Chart.yaml | 4 +- .../templates/csi/controller-rbac.yaml | 3 + .../alluxio/templates/csi/controller.yaml | 1 + .../alluxio/templates/csi/fuse-configmap.yaml | 90 ++++++ .../alluxio/templates/csi/nodeplugin.yaml | 21 +- .../helm-chart/alluxio/templates/csi/pv.yaml | 1 + .../alluxio/templates/csi/storage-class.yaml | 1 + .../kubernetes/helm-chart/alluxio/values.yaml | 12 +- integration/kubernetes/helm-generate.sh | 1 + 16 files changed, 447 insertions(+), 128 deletions(-) delete mode 100644 integration/kubernetes/CSI_README.md create mode 100644 integration/kubernetes/helm-chart/alluxio/templates/csi/fuse-configmap.yaml diff --git a/docs/en/deploy/Running-Alluxio-On-Kubernetes.md b/docs/en/deploy/Running-Alluxio-On-Kubernetes.md index 8c8239c42b53..033b3af6b3df 100644 --- a/docs/en/deploy/Running-Alluxio-On-Kubernetes.md +++ b/docs/en/deploy/Running-Alluxio-On-Kubernetes.md @@ -1406,12 +1406,12 @@ Here are some common properties that you can customize: The path in Alluxio which will be mounted - mountPath - The path that Alluxio will be mounted to in the application container + mountInPod + Set to true to launch Fuse process in an alluxio-fuse pod. Otherwise in the same container as nodeserver - javaOptions - The customized options which will be passes to fuse daemon + mountPath + The path that Alluxio will be mounted to in the application container mountOptions @@ -1429,11 +1429,12 @@ Modify or add any configuration properties as required, then create the respecti $ mv alluxio-csi-controller-rbac.yaml.template alluxio-csi-controller-rbac.yaml $ mv alluxio-csi-controller.yaml.template alluxio-csi-controller.yaml $ mv alluxio-csi-driver.yaml.template alluxio-csi-driver.yaml +$ mv alluxio-csi-fuse-configmap.yaml.template alluxio-csi-fuse-configmap.yaml $ mv alluxio-csi-nodeplugin.yaml.template alluxio-csi-nodeplugin.yaml ``` Then run ```console -$ kubectl apply -f alluxio-csi-controller-rbac.yaml -f alluxio-csi-controller.yaml -f alluxio-csi-driver.yaml -f alluxio-csi-nodeplugin.yaml +$ kubectl apply -f alluxio-csi-controller-rbac.yaml -f alluxio-csi-controller.yaml -f alluxio-csi-driver.yaml -f alluxio-csi-fuse-configmap.yaml -f alluxio-csi-nodeplugin.yaml ``` to deploy CSI-related services. @@ -1460,6 +1461,11 @@ $ kubectl apply -f alluxio-pvc-static.yaml ``` to deploy the resources. +Note: If `mountInPod` is set to `true`, in `alluxio-pv.yaml`, the value of `spec.csi.volumeHandle` +needs to be unique for CSI to identify different volumes. If the values of `volumeHundle` of two +PVs are the same, CSI would regard them as the same volume, and thus may not launch Fuse pod, +affecting the business pods. + {% endnavtab %} {% navtab Dynamic Volume Provisioning %} diff --git a/integration/docker/csi/alluxio/controllerserver.go b/integration/docker/csi/alluxio/controllerserver.go index 61fc9b388c29..8b2ac11ccbbe 100644 --- a/integration/docker/csi/alluxio/controllerserver.go +++ b/integration/docker/csi/alluxio/controllerserver.go @@ -30,6 +30,10 @@ type controllerServer struct { *csicommon.DefaultControllerServer } +/* + * If dynamic provisioning, CreateVolume() is called when the pvc is created and matches one of the storageclass. + */ + func (cs *controllerServer) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) (*csi.CreateVolumeResponse, error) { volumeID := sanitizeVolumeID(req.GetName()) @@ -122,7 +126,6 @@ func (cs *controllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol glog.V(3).Infof("Invalid delete volume req: %v", req) return nil, err } - glog.V(4).Infof("Deleting volume %s", volumeID) return &csi.DeleteVolumeResponse{}, nil } diff --git a/integration/docker/csi/alluxio/driver.go b/integration/docker/csi/alluxio/driver.go index d457e82f5e50..0b042c25e6fa 100644 --- a/integration/docker/csi/alluxio/driver.go +++ b/integration/docker/csi/alluxio/driver.go @@ -15,6 +15,7 @@ import ( "github.com/container-storage-interface/spec/lib/go/csi" "github.com/golang/glog" csicommon "github.com/kubernetes-csi/drivers/pkg/csi-common" + "k8s.io/client-go/kubernetes" ) const ( @@ -23,11 +24,13 @@ const ( ) type driver struct { - csiDriver *csicommon.CSIDriver - nodeId, endpoint string + csiDriver *csicommon.CSIDriver + endpoint string + client kubernetes.Clientset + nodeId string } -func NewDriver(nodeID, endpoint string) *driver { +func NewDriver(nodeID, endpoint string, client kubernetes.Clientset) *driver { glog.Infof("Driver: %v version: %v", driverName, version) csiDriver := csicommon.NewCSIDriver(driverName, version, nodeID) csiDriver.AddControllerServiceCapabilities([]csi.ControllerServiceCapability_RPC_Type{csi.ControllerServiceCapability_RPC_CREATE_DELETE_VOLUME}) @@ -37,6 +40,7 @@ func NewDriver(nodeID, endpoint string) *driver { nodeId: nodeID, endpoint: endpoint, csiDriver: csiDriver, + client: client, } } @@ -49,6 +53,7 @@ func (d *driver) newNodeServer() *nodeServer { return &nodeServer{ nodeId: d.nodeId, DefaultNodeServer: csicommon.NewDefaultNodeServer(d.csiDriver), + client: d.client, } } diff --git a/integration/docker/csi/alluxio/nodeserver.go b/integration/docker/csi/alluxio/nodeserver.go index aeb2349ce157..971ffd3ceda4 100644 --- a/integration/docker/csi/alluxio/nodeserver.go +++ b/integration/docker/csi/alluxio/nodeserver.go @@ -12,9 +12,14 @@ package alluxio import ( + "fmt" + "io/ioutil" "os" "os/exec" + "strconv" "strings" + "sync" + "time" "github.com/container-storage-interface/spec/lib/go/csi" "github.com/golang/glog" @@ -22,27 +27,48 @@ import ( "golang.org/x/net/context" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" mount "k8s.io/mount-utils" ) type nodeServer struct { - nodeId string + client kubernetes.Clientset *csicommon.DefaultNodeServer + nodeId string + mutex sync.Mutex } +/* + * When there is no app pod using the pv, the first app pod using the pv would trigger NodeStageVolume(). + * Only after a successful return, NodePublishVolume() is called. + * When a pv is already in use and a new app pod uses it as its volume, it would only trigger NodePublishVolume() + * + * NodeUnpublishVolume() and NodeUnstageVolume() are the opposites of NodePublishVolume() and NodeStageVolume() + * When a pv would still be using by other pods after an app pod terminated, only NodeUnpublishVolume() is called. + * When a pv would not be in use after an app pod terminated, NodeUnpublishVolume() is called. Only after a successful + * return, NodeUnstageVolume() is called. + * + * For more detailed CSI doc, refer to https://github.com/container-storage-interface/spec/blob/master/spec.md + */ + func (ns *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) { + if req.GetVolumeContext()["mountInPod"] == "true" { + glog.V(4).Infoln("Bind mount staging path (global mount point) to target path (pod volume path).") + return bindMountGlobalMountPointToPodVolPath(req) + } + glog.V(4).Infoln("Mount Alluxio to target path (pod volume path) with AlluxioFuse in CSI node server.") + return newFuseProcessInNodeServer(req) +} + +func newFuseProcessInNodeServer(req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) { targetPath := req.GetTargetPath() - notMnt, err := mount.New("").IsLikelyNotMountPoint(targetPath) + notMnt, err := ensureMountPoint(targetPath) if err != nil { - if os.IsNotExist(err) { - if err := os.MkdirAll(targetPath, 0750); err != nil { - return nil, status.Error(codes.Internal, err.Error()) - } - notMnt = true - } else { - return nil, status.Error(codes.Internal, err.Error()) - } + return nil, status.Error(codes.Internal, err.Error()) } if !notMnt { @@ -94,34 +120,230 @@ func (ns *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis return &csi.NodePublishVolumeResponse{}, nil } -func (ns *nodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error) { +func bindMountGlobalMountPointToPodVolPath(req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) { targetPath := req.GetTargetPath() + stagingPath := req.GetStagingTargetPath() + + notMnt, err := ensureMountPoint(targetPath) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + + if !notMnt { + glog.V(4).Infoln("target path is already mounted") + return &csi.NodePublishVolumeResponse{}, nil + } - command := exec.Command("/opt/alluxio/integration/fuse/bin/alluxio-fuse", - "unmount", targetPath, - ) + args := []string{"--bind", stagingPath, targetPath} + command := exec.Command("mount", args...) + glog.V(4).Infoln(command) + stdoutStderr, err := command.CombinedOutput() + glog.V(4).Infoln(string(stdoutStderr)) + if err != nil { + if os.IsPermission(err) { + return nil, status.Error(codes.PermissionDenied, err.Error()) + } + if strings.Contains(err.Error(), "invalid argument") { + return nil, status.Error(codes.InvalidArgument, err.Error()) + } + return nil, status.Error(codes.Internal, err.Error()) + } + return &csi.NodePublishVolumeResponse{}, nil +} + +func (ns *nodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error) { + targetPath := req.GetTargetPath() + command := exec.Command("/opt/alluxio/integration/fuse/bin/alluxio-fuse", "umount", targetPath) + glog.V(4).Infoln(command) stdoutStderr, err := command.CombinedOutput() if err != nil { glog.V(3).Infoln(err) } glog.V(4).Infoln(string(stdoutStderr)) - err = mount.CleanupMountPoint(req.GetTargetPath(), mount.New(""), false) + err = mount.CleanupMountPoint(targetPath, mount.New(""), false) if err != nil { glog.V(3).Infoln(err) + } else { + glog.V(4).Infof("Succeed in unmounting %s", targetPath) } - return &csi.NodeUnpublishVolumeResponse{}, nil } -func (ns *nodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstageVolumeRequest) (*csi.NodeUnstageVolumeResponse, error) { - return &csi.NodeUnstageVolumeResponse{}, nil +func (ns *nodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRequest) (*csi.NodeStageVolumeResponse, error) { + if req.GetVolumeContext()["mountInPod"] != "true" { + return &csi.NodeStageVolumeResponse{}, nil + } + ns.mutex.Lock() + defer ns.mutex.Unlock() + + glog.V(4).Infoln("Creating Alluxio-fuse pod and mounting Alluxio to global mount point.") + fusePod, err := getAndCompleteFusePodObj(ns.nodeId, req) + if err != nil { + return nil, err + } + if _, err := ns.client.CoreV1().Pods(os.Getenv("NAMESPACE")).Create(fusePod); err != nil { + if strings.Contains(err.Error(), "already exists") { + glog.V(4).Infof("Fuse pod %s already exists.", fusePod.Name) + return &csi.NodeStageVolumeResponse{}, nil + } + return nil, status.Errorf(codes.Internal, "Failed to launch Fuse Pod at %v.\n%v", ns.nodeId, err.Error()) + } + glog.V(4).Infoln("Successfully creating Fuse pod.") + + // Wait for alluxio-fuse pod finishing mount to global mount point + retry, err := strconv.Atoi(os.Getenv("FAILURE_THRESHOLD")) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "Cannot convert failure threshold %v to int.", os.Getenv("FAILURE_THRESHOLD")) + } + timeout, err := strconv.Atoi(os.Getenv("PERIOD_SECONDS")) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "Cannot convert period seconds %v to int.", os.Getenv("PERIOD_SECONDS")) + } + for i := 0; i < retry; i++ { + time.Sleep(time.Duration(timeout) * time.Second) + command := exec.Command("bash", "-c", fmt.Sprintf("mount | grep %v | grep alluxio-fuse", req.GetStagingTargetPath())) + stdout, err := command.CombinedOutput() + if err != nil { + glog.V(3).Infoln(fmt.Sprintf("Alluxio is not mounted in %v seconds.", i*timeout)) + } + if len(stdout) > 0 { + return &csi.NodeStageVolumeResponse{}, nil + } + } + glog.V(3).Infoln(fmt.Sprintf("Time out. Alluxio-fuse is not mounted to global mount point in %vs.", (retry-1)*timeout)) + return nil, status.Error(codes.DeadlineExceeded, fmt.Sprintf("alluxio-fuse is not mounted to global mount point in %vs", (retry-1)*timeout)) } -func (ns *nodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRequest) (*csi.NodeStageVolumeResponse, error) { - return &csi.NodeStageVolumeResponse{}, nil +func getAndCompleteFusePodObj(nodeId string, req *csi.NodeStageVolumeRequest) (*v1.Pod, error) { + csiFusePodObj, err := getFusePodObj() + if err != nil { + return nil, err + } + + // Append volumeId to pod name for uniqueness + csiFusePodObj.Name = csiFusePodObj.Name + "-" + req.GetVolumeId() + + // Set node name for scheduling + csiFusePodObj.Spec.NodeName = nodeId + + // Set Alluxio path to be mounted + targetPath := req.GetVolumeContext()["alluxioPath"] + if targetPath == "" { + targetPath = "/" + } + source := v1.EnvVar{Name: "FUSE_ALLUXIO_PATH", Value: targetPath} + csiFusePodObj.Spec.Containers[0].Env = append(csiFusePodObj.Spec.Containers[0].Env, source) + + // Set mount path provided by CSI + mountPoint := v1.EnvVar{Name: "MOUNT_POINT", Value: req.GetStagingTargetPath()} + csiFusePodObj.Spec.Containers[0].Env = append(csiFusePodObj.Spec.Containers[0].Env, mountPoint) + + // Set pre-stop command (umount) in pod lifecycle + lifecycle := &v1.Lifecycle{ + PreStop: &v1.Handler{ + Exec: &v1.ExecAction{ + Command: []string{"/opt/alluxio/integration/fuse/bin/alluxio-fuse", "unmount", req.GetStagingTargetPath()}, + }, + }, + } + csiFusePodObj.Spec.Containers[0].Lifecycle = lifecycle + + // Set fuse mount options + fuseOptsStr := strings.Join(req.GetVolumeCapability().GetMount().GetMountFlags(), ",") + csiFusePodObj.Spec.Containers[0].Args = append(csiFusePodObj.Spec.Containers[0].Args, "--fuse-opts="+fuseOptsStr) + + // Update ALLUXIO_FUSE_JAVA_OPTS to include csi client java options + alluxioCSIFuseJavaOpts := + strings.Join([]string{os.Getenv("ALLUXIO_FUSE_JAVA_OPTS"), req.GetVolumeContext()["javaOptions"]}, " ") + alluxioFuseJavaOptsEnv := v1.EnvVar{Name: "ALLUXIO_FUSE_JAVA_OPTS", Value: alluxioCSIFuseJavaOpts} + csiFusePodObj.Spec.Containers[0].Env = append(csiFusePodObj.Spec.Containers[0].Env, alluxioFuseJavaOptsEnv) + + return csiFusePodObj, nil +} + +func (ns *nodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstageVolumeRequest) (*csi.NodeUnstageVolumeResponse, error) { + podName := "alluxio-fuse-" + req.GetVolumeId() + if err := ns.client.CoreV1().Pods(os.Getenv("NAMESPACE")).Delete(podName, &metav1.DeleteOptions{}); err != nil { + if strings.Contains(err.Error(), "not found") { + // Pod not found. Try to clean up the mount point. + command := exec.Command("umount", req.GetStagingTargetPath()) + glog.V(4).Infoln(command) + stdoutStderr, err := command.CombinedOutput() + if err != nil { + glog.V(3).Infoln(err) + } + glog.V(4).Infoln(string(stdoutStderr)) + return &csi.NodeUnstageVolumeResponse{}, nil + } + return nil, status.Error(codes.Internal, fmt.Sprintf("Error deleting fuse pod %v\n%v", podName, err.Error())) + } + return &csi.NodeUnstageVolumeResponse{}, nil } func (ns *nodeServer) NodeExpandVolume(ctx context.Context, req *csi.NodeExpandVolumeRequest) (*csi.NodeExpandVolumeResponse, error) { return nil, status.Error(codes.Unimplemented, "") } + +func (ns *nodeServer) NodeGetCapabilities(ctx context.Context, req *csi.NodeGetCapabilitiesRequest) (*csi.NodeGetCapabilitiesResponse, error) { + return &csi.NodeGetCapabilitiesResponse{ + Capabilities: []*csi.NodeServiceCapability{ + { + Type: &csi.NodeServiceCapability_Rpc{ + Rpc: &csi.NodeServiceCapability_RPC{ + Type: csi.NodeServiceCapability_RPC_STAGE_UNSTAGE_VOLUME, + }, + }, + }, + }, + }, nil +} + +func isCorruptedDir(dir string) bool { + pathExists, pathErr := mount.PathExists(dir) + glog.V(3).Infoln("isCorruptedDir(%s) returned with error: (%v, %v)\\n", dir, pathExists, pathErr) + return pathErr != nil && mount.IsCorruptedMnt(pathErr) +} + +func ensureMountPoint(targetPath string) (bool, error) { + mounter := mount.New(targetPath) + notMnt, err := mounter.IsLikelyNotMountPoint(targetPath) + + if err == nil { + return notMnt, nil + } + if err != nil && os.IsNotExist(err) { + if err := os.MkdirAll(targetPath, 0750); err != nil { + return notMnt, err + } + return true, nil + } + if isCorruptedDir(targetPath) { + glog.V(3).Infoln("detected corrupted mount for targetPath [%s]", targetPath) + if err := mounter.Unmount(targetPath); err != nil { + glog.V(3).Infoln("failed to umount corrupted path [%s]", targetPath) + return false, err + } + return true, nil + } + return notMnt, err +} + +func getFusePodObj() (*v1.Pod, error) { + csiFuseYaml, err := ioutil.ReadFile("/opt/alluxio/integration/kubernetes/csi/alluxio-csi-fuse.yaml") + if err != nil { + glog.V(3).Info("csi-fuse config yaml file not found") + return nil, status.Errorf(codes.NotFound, "csi-fuse config yaml file not found: %v", err.Error()) + } + csiFuseObj, grpVerKind, err := scheme.Codecs.UniversalDeserializer().Decode(csiFuseYaml, nil, nil) + if err != nil { + glog.V(3).Info("Failed to decode csi-fuse config yaml file") + return nil, status.Errorf(codes.Internal, "Failed to decode csi-fuse config yaml file.\n", err.Error()) + } + // Only support Fuse Pod + if grpVerKind.Kind != "Pod" { + glog.V(3).Info("csi-fuse only support pod. %v found.") + return nil, status.Errorf(codes.InvalidArgument, "csi-fuse only support Pod. %v found.\n%v", grpVerKind.Kind, err.Error()) + } + return csiFuseObj.(*v1.Pod), nil +} diff --git a/integration/docker/csi/main.go b/integration/docker/csi/main.go index 333bb9f5944e..9ad1899f1757 100644 --- a/integration/docker/csi/main.go +++ b/integration/docker/csi/main.go @@ -14,6 +14,10 @@ package main import ( "flag" "fmt" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" "os" "os/signal" "syscall" @@ -63,8 +67,12 @@ func main() { func handle() { startReaper() - - d := alluxio.NewDriver(nodeID, endpoint) + client, err := newKubeClient() + if err != nil { + glog.Fatalf("Error starting kubeClient") + return + } + d := alluxio.NewDriver(nodeID, endpoint, *client) d.Run() } @@ -94,3 +102,19 @@ func startReaper() { } }() } + +func newKubeClient() (*kubernetes.Clientset, error) { + // Use the inClusterConfig because k8s worker machines may not have .kube config file + config, err := clientcmd.BuildConfigFromFlags("", "") + if err != nil { + return nil, status.Errorf(codes.Internal, "error getting inClusterConfig file.\n %v", err.Error()) + } + + // create the clientset + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, status.Errorf(codes.Internal, "error creating clientset through inClusterConfig file.\n %v", err.Error()) + } + + return clientset, nil +} diff --git a/integration/kubernetes/CSI_README.md b/integration/kubernetes/CSI_README.md deleted file mode 100644 index 922721be3bdf..000000000000 --- a/integration/kubernetes/CSI_README.md +++ /dev/null @@ -1,90 +0,0 @@ -# Alluxio CSI - -This module implement container storage interface(https://github.com/container-storage-interface/spec) for Alluxio. -The related source code is at `${ALLUXIO_HOME}/integration/docker/csi`. - -## Requirements - -Kubernetes 1.14 or higher, RBAC enabled in API server(https://kubernetes.io/docs/reference/access-authn-authz/rbac/). - -## Usage - - -### Docker image - -The official `alluxio/alluxio` docker image now supports CSI. By default Kubernetes will pull the latest published `alluxio/alluxio` image on Dockerhub. - -Alternatively you can build the docker image yourself following the instructions in `${ALLUXIO_HOME}/integration/docker/README.md`, -section `Building docker image for production` or `Building docker image for development` depending on your needs. Make sure you refer to the -built image in the CSI deploying yaml files. - -### Deploy - -Please use `helm-generate.sh` to generate related templates. All CSI related templates will be under `${ALLUXIO_HOME}/integration/kubernetes//csi` folder. - -You need to deploy `alluxio-csi-controller`, `alluxio-csi-nodeplugin`, `alluxio-csi-driver` before mounting volume via CSI. - -The generated templates provide two types of provisioning methods. For static provisioning, you need to deploy `alluxio-pv.yaml` (a persistent volume) and -`alluxio-pvc-static.yaml` (a persistent volume claim) first. For dynamic provisioning, you need to deploy `alluxio-storage-class.yaml` and `alluxio-pvc.yaml` first. - -To deploy any service, run `kubectl apply -f /file/path` - -### Configuration - -You can customize alluxio volumes via several configurations. - -The options you can customize: -| Options | Description | -| --- | --- | -| `alluxioPath` | The path in alluxio | -| `javaOptions` | The customized options which will be passes to fuse daemon | -| `mountOptions` | Alluxio fuse mount options | - -If you use dynamic provisioning, please put your cutomized parameters under `StorageClass.parameters` and `StorageClass.mountOptions`. - -An example of Alluxio StorageClass Spec: -```yaml -apiVersion: storage.k8s.io/v1 -kind: StorageClass -metadata: - name: alluxio -provisioner: alluxio -parameters: - alluxioPath: /data - javaOptions: "-Dalluxio.user.metadata.cache.enabled=true " -volumeBindingMode: Immediate -mountOptions: - - kernel_cache - - allow_other - - entry_timeout=36000 - - attr_timeout=36000 - - max_readahead=0 -``` - -If you use static provisioning, you can customize these options in `PersistentVolume.spec.csi.volumeAttributes` and `PersistentVolume.spec.mountOptions` - -An example of Alluxio PersistentVolume Spec: -```yaml -apiVersion: v1 -kind: PersistentVolume -metadata: - name: alluxio-pv - labels: - name: alluxio-pv -spec: - accessModes: - - ReadWriteMany - capacity: - storage: 100Gi - csi: - driver: alluxio - volumeHandle: alluxio - volumeAttributes: - alluxioPath: /data - javaOptions: "-Dalluxio.user.metadata.cache.enabled=true " - mountOptions: - - kernel_cache - - allow_other - - entry_timeout=36000 - - attr_timeout=36000 -``` diff --git a/integration/kubernetes/helm-chart/alluxio/CHANGELOG.md b/integration/kubernetes/helm-chart/alluxio/CHANGELOG.md index f03756a9f657..5dd035c0de48 100644 --- a/integration/kubernetes/helm-chart/alluxio/CHANGELOG.md +++ b/integration/kubernetes/helm-chart/alluxio/CHANGELOG.md @@ -233,3 +233,32 @@ - Enable mounting a specific directory in Alluxio through Fuse +0.6.35 + +- Remove usage of Helm hook annotations in Charts + +0.6.36 + +- Fix volumeMounts indentations in master statefulset + +0.6.37 + +- Fix jobMaster.env indentations in master statefulset + +0.6.38 + +- Fix MOUNT_POINT env in fuse daemonset + +0.6.39 + +- Fix CSI controller rbac rule not specifying namespace. +- Fix CSI driver compatibility issue under kubernetes 18+ version. + +0.6.40 + +- Fix incorrect directory when mounting & formatting master journal volume + +0.6.41 + +- Add property to enable launching Fuse process in a separate pod in CSI +- Fix default CSI accessMode diff --git a/integration/kubernetes/helm-chart/alluxio/Chart.yaml b/integration/kubernetes/helm-chart/alluxio/Chart.yaml index f3043a151537..2511fc8b7f12 100644 --- a/integration/kubernetes/helm-chart/alluxio/Chart.yaml +++ b/integration/kubernetes/helm-chart/alluxio/Chart.yaml @@ -12,7 +12,7 @@ name: alluxio apiVersion: v1 description: Open source data orchestration for analytics and machine learning in any cloud. -version: 0.6.33 +version: 0.6.41 home: https://www.alluxio.io/ maintainers: - name: Adit Madan @@ -21,5 +21,7 @@ maintainers: email: czhu@alluxio.com - name: Jiacheng Liu email: jiacheng@alluxio.com +- name: Shawn Sun + email: shawn.sun@alluxio.com - name: Yang Che email: cheyang@163.com diff --git a/integration/kubernetes/helm-chart/alluxio/templates/csi/controller-rbac.yaml b/integration/kubernetes/helm-chart/alluxio/templates/csi/controller-rbac.yaml index 1f3b45e51124..e7111cf39e8b 100644 --- a/integration/kubernetes/helm-chart/alluxio/templates/csi/controller-rbac.yaml +++ b/integration/kubernetes/helm-chart/alluxio/templates/csi/controller-rbac.yaml @@ -42,6 +42,9 @@ rules: - apiGroups: [""] resources: ["nodes"] verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch", "create", "delete"] --- kind: ClusterRoleBinding diff --git a/integration/kubernetes/helm-chart/alluxio/templates/csi/controller.yaml b/integration/kubernetes/helm-chart/alluxio/templates/csi/controller.yaml index e56cde233dc6..813544effc91 100644 --- a/integration/kubernetes/helm-chart/alluxio/templates/csi/controller.yaml +++ b/integration/kubernetes/helm-chart/alluxio/templates/csi/controller.yaml @@ -43,6 +43,7 @@ spec: heritage: {{ .Release.Service }} role: alluxio-csi-controller spec: + serviceAccount: csi-controller-sa hostNetwork: {{ .Values.csi.controllerPlugin.hostNetwork }} dnsPolicy: {{ .Values.csi.controllerPlugin.dnsPolicy }} serviceAccountName: csi-controller-sa diff --git a/integration/kubernetes/helm-chart/alluxio/templates/csi/fuse-configmap.yaml b/integration/kubernetes/helm-chart/alluxio/templates/csi/fuse-configmap.yaml new file mode 100644 index 000000000000..54408fc35303 --- /dev/null +++ b/integration/kubernetes/helm-chart/alluxio/templates/csi/fuse-configmap.yaml @@ -0,0 +1,90 @@ +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +{{ if .Values.csi.enabled -}} +{{- $name := include "alluxio.name" . }} +{{- $fullName := include "alluxio.fullname" . }} +{{- $chart := include "alluxio.chart" . }} + +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: {{ $fullName }}-csi-fuse-config + labels: + name: {{ $fullName }}-csi-fuse-config + app: {{ $name }} + chart: {{ $chart }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + alluxio-csi-fuse-config: | + kind: Pod + apiVersion: v1 + metadata: + name: {{ $fullName }}-fuse + labels: + name: {{ $fullName }}-fuse + app: {{ $name }} + role: alluxio-fuse + spec: + nodeName: + hostNetwork: {{ .Values.fuse.hostNetwork }} + hostPID: {{ .Values.fuse.hostID }} + dnsPolicy: {{ .Values.fuse.dnsPolicy }} + securityContext: + runAsUser: 0 + runAsGroup: 0 + fsGroup: 0 + containers: + - name: alluxio-fuse + image: {{ .Values.image }}:{{ .Values.imageTag }} + imagePullPolicy: {{ .Values.imagePullPolicy }} + {{- if .Values.fuse.resources }} + resources: + {{- if .Values.fuse.resources.limits }} + limits: + cpu: {{ .Values.fuse.resources.limits.cpu }} + memory: {{ .Values.fuse.resources.limits.memory }} + {{- end }} + {{- if .Values.fuse.resources.requests }} + cpu: {{ .Values.fuse.resources.requests.cpu }} + memory: {{ .Values.fuse.resources.requests.memory }} + {{- end }} + {{- end }} + command: [ "/entrypoint.sh" ] + args: + - fuse + env: + {{- range $key, $value := .Values.fuse.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + securityContext: + privileged: true + capabilities: + add: + # SYS_ADMIN is needed for run `mount` command in the container + - SYS_ADMIN + envFrom: + - configMapRef: + name: {{ $fullName }}-config + volumeMounts: + - name: pods-mount-dir + mountPath: /var/lib/kubelet + mountPropagation: "Bidirectional" + restartPolicy: Always + volumes: + - name: pods-mount-dir + hostPath: + path: /var/lib/kubelet + type: Directory +{{- end }} diff --git a/integration/kubernetes/helm-chart/alluxio/templates/csi/nodeplugin.yaml b/integration/kubernetes/helm-chart/alluxio/templates/csi/nodeplugin.yaml index 6b22709b2be5..1651f77607d1 100644 --- a/integration/kubernetes/helm-chart/alluxio/templates/csi/nodeplugin.yaml +++ b/integration/kubernetes/helm-chart/alluxio/templates/csi/nodeplugin.yaml @@ -41,6 +41,7 @@ spec: heritage: {{ .Release.Service }} role: alluxio-csi-nodeplugin spec: + serviceAccount: csi-controller-sa hostNetwork: {{ .Values.csi.nodePlugin.hostNetwork }} dnsPolicy: {{ .Values.csi.nodePlugin.dnsPolicy }} {{- if .Values.imagePullSecrets }} @@ -120,6 +121,14 @@ spec: fieldPath: spec.nodeName - name: CSI_ENDPOINT value: unix://plugin/csi.sock + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: PERIOD_SECONDS + value: "{{ .Values.csi.periodSeconds }}" + - name: FAILURE_THRESHOLD + value: "{{ .Values.csi.failureThreshold }}" envFrom: - configMapRef: name: {{ template "alluxio.fullname" . }}-config @@ -127,8 +136,10 @@ spec: - name: plugin-dir mountPath: /plugin - name: pods-mount-dir - mountPath: /var/lib/kubelet/pods + mountPath: /var/lib/kubelet mountPropagation: "Bidirectional" + - name: csi-fuse-config + mountPath: /opt/alluxio/integration/kubernetes/csi volumes: - name: plugin-dir hostPath: @@ -136,10 +147,16 @@ spec: type: DirectoryOrCreate - name: pods-mount-dir hostPath: - path: /var/lib/kubelet/pods + path: /var/lib/kubelet type: Directory - hostPath: path: /var/lib/kubelet/plugins_registry type: Directory name: registration-dir + - name: csi-fuse-config + configMap: + name: {{ $fullName }}-csi-fuse-config + items: + - key: alluxio-csi-fuse-config + path: alluxio-csi-fuse.yaml {{- end }} diff --git a/integration/kubernetes/helm-chart/alluxio/templates/csi/pv.yaml b/integration/kubernetes/helm-chart/alluxio/templates/csi/pv.yaml index bf2e2d525e35..4eafbf930680 100644 --- a/integration/kubernetes/helm-chart/alluxio/templates/csi/pv.yaml +++ b/integration/kubernetes/helm-chart/alluxio/templates/csi/pv.yaml @@ -26,6 +26,7 @@ spec: volumeHandle: alluxio volumeAttributes: alluxioPath: {{ .Values.csi.alluxioPath }} + mountInPod: "{{ .Values.csi.mountInPod }}" javaOptions: {{ .Values.csi.javaOptions }} mountOptions: {{- range .Values.csi.mountOptions }} diff --git a/integration/kubernetes/helm-chart/alluxio/templates/csi/storage-class.yaml b/integration/kubernetes/helm-chart/alluxio/templates/csi/storage-class.yaml index 24f0b0283a77..8a4c8d589250 100644 --- a/integration/kubernetes/helm-chart/alluxio/templates/csi/storage-class.yaml +++ b/integration/kubernetes/helm-chart/alluxio/templates/csi/storage-class.yaml @@ -17,6 +17,7 @@ metadata: provisioner: alluxio parameters: alluxioPath: {{ .Values.csi.alluxioPath }} + mountInPod: "{{ .Values.csi.mountInPod }}" javaOptions: {{ .Values.csi.javaOptions }} volumeBindingMode: Immediate mountOptions: diff --git a/integration/kubernetes/helm-chart/alluxio/values.yaml b/integration/kubernetes/helm-chart/alluxio/values.yaml index f33e89e38163..efa559df6185 100644 --- a/integration/kubernetes/helm-chart/alluxio/values.yaml +++ b/integration/kubernetes/helm-chart/alluxio/values.yaml @@ -630,13 +630,18 @@ csi: requests: cpu: 10m memory: 20Mi - # Will run fuse daemon inside csi nodeserver + # Run alluxio fuse process inside csi nodeserver container if mountInPod = false + # Run alluxio fuse process inside a separate pod if mountInPod = true + mountInPod: false + # If mountInPod, use a timeout for waiting until the fuse process is ready + periodSeconds: 15 + failureThreshold: 10 nodePlugin: hostNetwork: true dnsPolicy: ClusterFirstWithHostNet nodeserver: resources: - # The default xmx is 8G + # fuse in nodeserver container needs more resources limits: cpu: "4" memory: "8G" @@ -652,11 +657,10 @@ csi: requests: cpu: 10m memory: 20Mi - # for csi client clientEnabled: false accessModes: - - ReadWriteMany + - ReadWriteOnce quota: 100Gi mountPath: /data alluxioPath: / diff --git a/integration/kubernetes/helm-generate.sh b/integration/kubernetes/helm-generate.sh index 7a8308236ea8..3e84880dcbed 100755 --- a/integration/kubernetes/helm-generate.sh +++ b/integration/kubernetes/helm-generate.sh @@ -114,6 +114,7 @@ function generateCsiTemplates { helm template --name-template ${RELEASE_NAME} helm-chart/alluxio/ --set csi.enabled=true --show-only templates/csi/controller.yaml -f $dir/config.yaml > "$dir/csi/alluxio-csi-controller.yaml.template" helm template --name-template ${RELEASE_NAME} helm-chart/alluxio/ --set csi.enabled=true --show-only templates/csi/driver.yaml -f $dir/config.yaml > "$dir/csi/alluxio-csi-driver.yaml.template" helm template --name-template ${RELEASE_NAME} helm-chart/alluxio/ --set csi.enabled=true --show-only templates/csi/nodeplugin.yaml -f $dir/config.yaml > "$dir/csi/alluxio-csi-nodeplugin.yaml.template" + helm template --name-template ${RELEASE_NAME} helm-chart/alluxio/ --set csi.enabled=true --show-only templates/csi/fuse-configmap.yaml -f $dir/config.yaml > "$dir/csi/alluxio-csi-fuse-configmap.yaml.template" helm template --name-template ${RELEASE_NAME} helm-chart/alluxio/ --set csi.clientEnabled=true --show-only templates/csi/storage-class.yaml -f $dir/config.yaml > "$dir/csi/alluxio-storage-class.yaml.template" helm template --name-template ${RELEASE_NAME} helm-chart/alluxio/ --set csi.clientEnabled=true --show-only templates/csi/pvc.yaml -f $dir/config.yaml > "$dir/csi/alluxio-pvc.yaml.template" helm template --name-template ${RELEASE_NAME} helm-chart/alluxio/ --set csi.clientEnabled=true --show-only templates/csi/pvc-static.yaml -f $dir/config.yaml > "$dir/csi/alluxio-pvc-static.yaml.template"