diff --git a/api/v1beta1/elfmachine_types.go b/api/v1beta1/elfmachine_types.go
index 600e6f5a..ee50e679 100644
--- a/api/v1beta1/elfmachine_types.go
+++ b/api/v1beta1/elfmachine_types.go
@@ -318,8 +318,16 @@ func (m *ElfMachine) GetVMDisconnectionTimestamp() *metav1.Time {
 	return nil
 }
 
+func (m *ElfMachine) RequiresGPUOrVGPUDevices() bool {
+	return m.RequiresGPUDevices() || m.RequiresVGPUDevices()
+}
+
 func (m *ElfMachine) RequiresGPUDevices() bool {
-	return len(m.Spec.GPUDevices) > 0 || len(m.Spec.VGPUDevices) > 0
+	return len(m.Spec.GPUDevices) > 0
+}
+
+func (m *ElfMachine) RequiresVGPUDevices() bool {
+	return len(m.Spec.VGPUDevices) > 0
 }
 
 //+kubebuilder:object:root=true
diff --git a/controllers/elfmachine_controller.go b/controllers/elfmachine_controller.go
index dc64c8fb..096bfd64 100644
--- a/controllers/elfmachine_controller.go
+++ b/controllers/elfmachine_controller.go
@@ -308,7 +308,7 @@ func (r *ElfMachineReconciler) reconcileDelete(ctx *context.MachineContext) (rec
 		// locked by the virtual machine may not be unlocked.
 		// For example, the Cluster or ElfMachine was deleted during a pause.
 		if !ctrlutil.ContainsFinalizer(ctx.ElfMachine, infrav1.MachineFinalizer) &&
-			ctx.ElfMachine.RequiresGPUDevices() {
+			ctx.ElfMachine.RequiresGPUOrVGPUDevices() {
 			unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name)
 		}
 	}()
@@ -532,7 +532,7 @@ func (r *ElfMachineReconciler) reconcileVM(ctx *context.MachineContext) (*models
 		}
 
 		var hostID *string
-		var gpuDevices []*models.GpuDevice
+		var gpuDeviceInfos []*service.GPUDeviceInfo
 		// The virtual machine of the Control Plane does not support GPU Devices.
 		if machineutil.IsControlPlaneMachine(ctx.Machine) {
 			hostID, err = r.preCheckPlacementGroup(ctx)
@@ -540,7 +540,7 @@ func (r *ElfMachineReconciler) reconcileVM(ctx *context.MachineContext) (*models
 				return nil, false, err
 			}
 		} else {
-			hostID, gpuDevices, err = r.selectHostAndGPUsForVM(ctx, "")
+			hostID, gpuDeviceInfos, err = r.selectHostAndGPUsForVM(ctx, "")
 			if err != nil || hostID == nil {
 				return nil, false, err
 			}
@@ -548,7 +548,7 @@ func (r *ElfMachineReconciler) reconcileVM(ctx *context.MachineContext) (*models
 
 		ctx.Logger.Info("Create VM for ElfMachine")
 
-		withTaskVM, err := ctx.VMService.Clone(ctx.ElfCluster, ctx.ElfMachine, bootstrapData, *hostID, gpuDevices)
+		withTaskVM, err := ctx.VMService.Clone(ctx.ElfCluster, ctx.ElfMachine, bootstrapData, *hostID, gpuDeviceInfos)
 		if err != nil {
 			releaseTicketForCreateVM(ctx.ElfMachine.Name)
 
@@ -561,7 +561,7 @@ func (r *ElfMachineReconciler) reconcileVM(ctx *context.MachineContext) (*models
 				ctx.ElfMachine.SetVM(util.GetVMRef(vm))
 			} else {
 				// Duplicate VM error does not require unlocking GPU devices.
-				if ctx.ElfMachine.RequiresGPUDevices() {
+				if ctx.ElfMachine.RequiresGPUOrVGPUDevices() {
 					unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name)
 				}
 
@@ -907,11 +907,11 @@ func (r *ElfMachineReconciler) reconcileVMTask(ctx *context.MachineContext, vm *
 				setVMDuplicate(ctx.ElfMachine.Name)
 			}
 
-			if ctx.ElfMachine.RequiresGPUDevices() {
+			if ctx.ElfMachine.RequiresGPUOrVGPUDevices() {
 				unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name)
 			}
 		case service.IsPowerOnVMTask(task) || service.IsUpdateVMTask(task):
-			if ctx.ElfMachine.RequiresGPUDevices() {
+			if ctx.ElfMachine.RequiresGPUOrVGPUDevices() {
 				unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name)
 			}
 		case service.IsMemoryInsufficientError(errorMessage):
@@ -933,7 +933,7 @@ func (r *ElfMachineReconciler) reconcileVMTask(ctx *context.MachineContext, vm *
 		ctx.Logger.Info("VM task succeeded", "vmRef", vmRef, "taskRef", taskRef, "taskDescription", service.GetTowerString(task.Description))
 
 		if service.IsCloneVMTask(task) || service.IsUpdateVMTask(task) {
-			if ctx.ElfMachine.RequiresGPUDevices() {
+			if ctx.ElfMachine.RequiresGPUOrVGPUDevices() {
 				unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name)
 			}
 		}
diff --git a/controllers/elfmachine_controller_gpu.go b/controllers/elfmachine_controller_gpu.go
index cc264fc0..17042f03 100644
--- a/controllers/elfmachine_controller_gpu.go
+++ b/controllers/elfmachine_controller_gpu.go
@@ -43,8 +43,8 @@ import (
 // 3. A non-empty string indicates that the specified host ID was returned.
 //
 // The return gpudevices: the GPU devices for virtual machine.
-func (r *ElfMachineReconciler) selectHostAndGPUsForVM(ctx *context.MachineContext, preferredHostID string) (rethost *string, gpudevices []*models.GpuDevice, reterr error) {
-	if !ctx.ElfMachine.RequiresGPUDevices() {
+func (r *ElfMachineReconciler) selectHostAndGPUsForVM(ctx *context.MachineContext, preferredHostID string) (rethost *string, gpudevices []*service.GPUDeviceInfo, reterr error) {
+	if !ctx.ElfMachine.RequiresGPUOrVGPUDevices() {
 		return pointer.String(""), nil, nil
 	}
 
@@ -58,12 +58,12 @@ func (r *ElfMachineReconciler) selectHostAndGPUsForVM(ctx *context.MachineContex
 
 	// If the GPU devices locked by the virtual machine still exist, use them directly.
 	if lockedVMGPUs := getGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name); lockedVMGPUs != nil {
-		if ok, gpuDevices, err := r.checkGPUsCanBeUsedForVM(ctx, lockedVMGPUs.GPUDeviceIDs, ctx.ElfMachine.Name); err != nil {
+		if ok, err := r.checkGPUsCanBeUsedForVM(ctx, lockedVMGPUs.GetGPUIDs()); err != nil {
 			return nil, nil, err
 		} else if ok {
 			ctx.Logger.V(1).Info("Found locked VM GPU devices, so skip allocation", "lockedVMGPUs", lockedVMGPUs)
 
-			return &lockedVMGPUs.HostID, gpuDevices, nil
+			return &lockedVMGPUs.HostID, lockedVMGPUs.GetGPUDeviceInfos(), nil
 		}
 
 		// If the GPU devices returned by Tower is inconsistent with the locked GPU,
@@ -84,30 +84,46 @@ func (r *ElfMachineReconciler) selectHostAndGPUsForVM(ctx *context.MachineContex
 	}
 
 	// Get all GPU devices of available hosts.
-	gpuDevices, err := ctx.VMService.FindGPUDevicesByHostIDs(availableHosts.IDs())
+	gpuDeviceUsage := models.GpuDeviceUsagePASSTHROUGH
+	if ctx.ElfMachine.RequiresVGPUDevices() {
+		gpuDeviceUsage = models.GpuDeviceUsageVGPU
+	}
+	gpuDevices, err := ctx.VMService.FindGPUDevicesByHostIDs(availableHosts.IDs(), gpuDeviceUsage)
+	if err != nil || len(gpuDevices) == 0 {
+		return nil, nil, err
+	}
+
+	gpuDeviceIDs := make([]string, len(gpuDevices))
+	for i := 0; i < len(gpuDevices); i++ {
+		gpuDeviceIDs[i] = *gpuDevices[i].ID
+	}
+	// Get GPU devices with VMs and allocation details.
+	gpuDeviceInfos, err := ctx.VMService.FindGPUDeviceInfos(gpuDeviceIDs)
 	if err != nil {
 		return nil, nil, err
 	}
 
-	lockedClusterGPUIDs := getLockedClusterGPUIDs(ctx.ElfCluster.Spec.Cluster)
+	service.AggregateUnusedGPUDevicesToGPUDeviceInfos(gpuDeviceInfos, gpuDevices)
 
-	// Group GPU devices by host.
-	hostGPUDeviceMap := make(map[string][]*models.GpuDevice)
-	hostIDSet := sets.NewString()
-	for i := 0; i < len(gpuDevices); i++ {
-		// Filter already used or locked GPU devices.
-		if !service.GPUCanBeUsedForVM(gpuDevices[i], ctx.ElfMachine.Name) ||
-			lockedClusterGPUIDs.Has(*gpuDevices[i].ID) {
-			continue
-		}
+	// Filter already used GPU devices.
+	gpuDeviceInfos = gpuDeviceInfos.Filter(func(g *service.GPUDeviceInfo) bool {
+		return g.AvailableCount > 0
+	})
 
-		hostIDSet.Insert(*gpuDevices[i].Host.ID)
-		if gpus, ok := hostGPUDeviceMap[*gpuDevices[i].Host.ID]; !ok {
-			hostGPUDeviceMap[*gpuDevices[i].Host.ID] = []*models.GpuDevice{gpuDevices[i]}
+	// Filter locked GPU devices.
+	gpuDeviceInfos = filterGPUDeviceInfosByLockGPUDevices(ctx.ElfCluster.Spec.Cluster, gpuDeviceInfos)
+
+	// Group GPU deviceInfos by host.
+	hostGPUDeviceInfoMap := make(map[string]service.GPUDeviceInfos)
+	hostIDSet := sets.NewString()
+	gpuDeviceInfos.Iterate(func(gpuDeviceInfo *service.GPUDeviceInfo) {
+		hostIDSet.Insert(gpuDeviceInfo.HostID)
+		if gpuInfos, ok := hostGPUDeviceInfoMap[gpuDeviceInfo.HostID]; !ok {
+			hostGPUDeviceInfoMap[gpuDeviceInfo.HostID] = service.NewGPUDeviceInfos(gpuDeviceInfo)
 		} else {
-			hostGPUDeviceMap[*gpuDevices[i].Host.ID] = append(gpus, gpuDevices[i])
+			gpuInfos.Insert(gpuDeviceInfo)
 		}
-	}
+	})
 
 	// Choose a host that meets ElfMachine GPU needs.
 	// Use a random host list to reduce the probability of the same host being selected at the same time.
@@ -122,25 +138,29 @@ func (r *ElfMachineReconciler) selectHostAndGPUsForVM(ctx *context.MachineContex
 	}
 
 	for i := 0; i < len(unsortedHostIDs); i++ {
-		if hostGPUDevices, ok := hostGPUDeviceMap[unsortedHostIDs[i]]; ok {
-			selectedGPUDevices := selectGPUDevicesForVM(hostGPUDevices, ctx.ElfMachine.Spec.GPUDevices)
-			if len(selectedGPUDevices) > 0 {
-				gpuDeviceIDs := make([]string, len(selectedGPUDevices))
-				for i := 0; i < len(selectedGPUDevices); i++ {
-					gpuDeviceIDs[i] = *selectedGPUDevices[i].ID
-				}
-
-				// Lock the selected GPU devices to prevent it from being allocated to multiple virtual machines.
-				if !lockGPUDevicesForVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name, unsortedHostIDs[i], gpuDeviceIDs) {
-					// Lock failure indicates that the GPU devices are locked by another virtual machine.
-					// Just trying other hosts.
-					continue
-				}
-
-				ctx.Logger.Info("Selected host and GPU devices for VM", "hostId", unsortedHostIDs[i], "gpuDeviceIds", gpuDeviceIDs)
-
-				return &unsortedHostIDs[i], selectedGPUDevices, nil
+		hostGPUDeviceInfos, ok := hostGPUDeviceInfoMap[unsortedHostIDs[i]]
+		if !ok {
+			continue
+		}
+
+		var selectedGPUDeviceInfos []*service.GPUDeviceInfo
+		if ctx.ElfMachine.RequiresGPUDevices() {
+			selectedGPUDeviceInfos = selectGPUDevicesForVM(hostGPUDeviceInfos, ctx.ElfMachine.Spec.GPUDevices)
+		} else {
+			selectedGPUDeviceInfos = selectVGPUDevicesForVM(hostGPUDeviceInfos, ctx.ElfMachine.Spec.VGPUDevices)
+		}
+
+		if len(selectedGPUDeviceInfos) > 0 {
+			// Lock the selected GPU devices to prevent it from being allocated to multiple virtual machines.
+			if !lockGPUDevicesForVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name, unsortedHostIDs[i], selectedGPUDeviceInfos) {
+				// Lock failure indicates that the GPU devices are locked by another virtual machine.
+				// Just trying other hosts.
+				continue
 			}
+
+			ctx.Logger.Info("Selected host and GPU devices for VM", "hostId", unsortedHostIDs[i], "gpuDevices", selectedGPUDeviceInfos)
+
+			return &unsortedHostIDs[i], selectedGPUDeviceInfos, nil
 		}
 	}
 
@@ -149,38 +169,86 @@ func (r *ElfMachineReconciler) selectHostAndGPUsForVM(ctx *context.MachineContex
 
 // selectGPUDevicesForVM selects the GPU devices required by the virtual machine from the host's GPU devices.
 // Empty GPU devices indicates that the host's GPU devices cannot meet the GPU requirements of the virtual machine.
-func selectGPUDevicesForVM(hostGPUDevices []*models.GpuDevice, requiredGPUDevices []infrav1.GPUPassthroughDeviceSpec) []*models.GpuDevice {
+func selectGPUDevicesForVM(hostGPUDeviceInfos service.GPUDeviceInfos, requiredGPUDevices []infrav1.GPUPassthroughDeviceSpec) []*service.GPUDeviceInfo {
 	// Group GPU devices by model.
-	modelGPUDeviceMap := make(map[string][]*models.GpuDevice)
-	for i := 0; i < len(hostGPUDevices); i++ {
-		if gpus, ok := modelGPUDeviceMap[*hostGPUDevices[i].Model]; !ok {
-			modelGPUDeviceMap[*hostGPUDevices[i].Model] = []*models.GpuDevice{hostGPUDevices[i]}
+	modelGPUDeviceMap := make(map[string][]*service.GPUDeviceInfo)
+	hostGPUDeviceInfos.Iterate(func(gpuDeviceInfo *service.GPUDeviceInfo) {
+		if gpuInfos, ok := modelGPUDeviceMap[gpuDeviceInfo.Model]; !ok {
+			modelGPUDeviceMap[gpuDeviceInfo.Model] = []*service.GPUDeviceInfo{gpuDeviceInfo}
 		} else {
-			modelGPUDeviceMap[*hostGPUDevices[i].Model] = append(gpus, hostGPUDevices[i])
+			modelGPUDeviceMap[gpuDeviceInfo.Model] = append(gpuInfos, gpuDeviceInfo)
 		}
-	}
+	})
 
-	var selectedGPUDevices []*models.GpuDevice
+	var selectedGPUDeviceInfos []*service.GPUDeviceInfo
 	for i := 0; i < len(requiredGPUDevices); i++ {
-		if gpus, ok := modelGPUDeviceMap[requiredGPUDevices[i].Model]; !ok {
+		gpuDevices, ok := modelGPUDeviceMap[requiredGPUDevices[i].Model]
+		if !ok || len(gpuDevices) < int(requiredGPUDevices[i].Count) {
 			return nil
+		}
+
+		gpuInfos := gpuDevices[:int(requiredGPUDevices[i].Count)]
+		for j := 0; j < len(gpuInfos); j++ {
+			selectedGPUDeviceInfos = append(selectedGPUDeviceInfos, &service.GPUDeviceInfo{ID: gpuInfos[j].ID, AllocatedCount: 1, AvailableCount: 1})
+		}
+	}
+
+	return selectedGPUDeviceInfos
+}
+
+// selectVGPUDevicesForVM selects the vGPU devices required by the virtual machine from the host's vGPU devices.
+// Empty vGPU devices indicates that the host's vGPU devices cannot meet the vGPU requirements of the virtual machine.
+func selectVGPUDevicesForVM(hostGPUDeviceInfos service.GPUDeviceInfos, requiredVGPUDevices []infrav1.VGPUDeviceSpec) []*service.GPUDeviceInfo {
+	// Group vGPU devices by vGPU type.
+	typeVGPUDeviceInfoMap := make(map[string][]*service.GPUDeviceInfo)
+	hostGPUDeviceInfos.Iterate(func(gpuDeviceInfo *service.GPUDeviceInfo) {
+		if gpuInfos, ok := typeVGPUDeviceInfoMap[gpuDeviceInfo.VGPUType]; !ok {
+			typeVGPUDeviceInfoMap[gpuDeviceInfo.VGPUType] = []*service.GPUDeviceInfo{gpuDeviceInfo}
 		} else {
-			if len(gpus) < int(requiredGPUDevices[i].Count) {
-				return nil
+			typeVGPUDeviceInfoMap[gpuDeviceInfo.VGPUType] = append(gpuInfos, gpuDeviceInfo)
+		}
+	})
+
+	var selectedGPUDeviceInfos []*service.GPUDeviceInfo
+	for i := 0; i < len(requiredVGPUDevices); i++ {
+		gpuDeviceInfos, ok := typeVGPUDeviceInfoMap[requiredVGPUDevices[i].Type]
+		if !ok {
+			return nil
+		}
+
+		var gpuInfos []*service.GPUDeviceInfo
+		requiredCount := requiredVGPUDevices[i].Count
+		for j := 0; j < len(gpuDeviceInfos); j++ {
+			if gpuDeviceInfos[j].AvailableCount <= 0 {
+				continue
+			}
+
+			if gpuDeviceInfos[j].AvailableCount >= requiredCount {
+				gpuInfos = append(gpuInfos, &service.GPUDeviceInfo{ID: gpuDeviceInfos[j].ID, AllocatedCount: requiredCount, AvailableCount: gpuDeviceInfos[j].AvailableCount})
+				requiredCount = 0
+
+				break
+			} else {
+				gpuInfos = append(gpuInfos, &service.GPUDeviceInfo{ID: gpuDeviceInfos[j].ID, AllocatedCount: gpuDeviceInfos[j].AvailableCount, AvailableCount: gpuDeviceInfos[j].AvailableCount})
+				requiredCount -= gpuDeviceInfos[j].AvailableCount
 			}
+		}
 
-			selectedGPUDevices = append(selectedGPUDevices, gpus[:int(requiredGPUDevices[i].Count)]...)
-			// Remove selected GPU devices.
-			modelGPUDeviceMap[requiredGPUDevices[i].Model] = gpus[int(requiredGPUDevices[i].Count):]
+		// If requiredCount is greater than 0, it means there are not enough vGPUs,
+		// just return directly.
+		if requiredCount > 0 {
+			return nil
 		}
+
+		selectedGPUDeviceInfos = append(selectedGPUDeviceInfos, gpuInfos...)
 	}
 
-	return selectedGPUDevices
+	return selectedGPUDeviceInfos
 }
 
 // reconcileGPUDevices ensures that the virtual machine has the expected GPU devices.
 func (r *ElfMachineReconciler) reconcileGPUDevices(ctx *context.MachineContext, vm *models.VM) (bool, error) {
-	if !ctx.ElfMachine.RequiresGPUDevices() {
+	if !ctx.ElfMachine.RequiresGPUOrVGPUDevices() {
 		return true, nil
 	}
 
@@ -213,7 +281,7 @@ func (r *ElfMachineReconciler) reconcileGPUDevices(ctx *context.MachineContext,
 		gpuIDs[i] = *vm.GpuDevices[i].ID
 	}
 
-	if ok, _, err := r.checkGPUsCanBeUsedForVM(ctx, gpuIDs, ctx.ElfMachine.Name); err != nil {
+	if ok, err := r.checkGPUsCanBeUsedForVM(ctx, gpuIDs); err != nil {
 		return false, err
 	} else if !ok {
 		// If the GPU devices are already in use,
@@ -228,7 +296,7 @@ func (r *ElfMachineReconciler) reconcileGPUDevices(ctx *context.MachineContext,
 
 // addGPUDevicesForVM adds expected GPU devices to the virtual machine.
 func (r *ElfMachineReconciler) addGPUDevicesForVM(ctx *context.MachineContext, vm *models.VM) (bool, error) {
-	hostID, gpuDevices, err := r.selectHostAndGPUsForVM(ctx, *vm.Host.ID)
+	hostID, gpuDeviceInfos, err := r.selectHostAndGPUsForVM(ctx, *vm.Host.ID)
 	if err != nil || hostID == nil {
 		return false, err
 	}
@@ -244,15 +312,7 @@ func (r *ElfMachineReconciler) addGPUDevicesForVM(ctx *context.MachineContext, v
 		return ok, err
 	}
 
-	gpus := make([]*models.VMGpuOperationParams, len(gpuDevices))
-	for i := 0; i < len(gpuDevices); i++ {
-		gpus[i] = &models.VMGpuOperationParams{
-			GpuID:  gpuDevices[i].ID,
-			Amount: service.TowerInt32(1),
-		}
-	}
-
-	task, err := ctx.VMService.AddGPUDevices(ctx.ElfMachine.Status.VMRef, gpus)
+	task, err := ctx.VMService.AddGPUDevices(ctx.ElfMachine.Status.VMRef, gpuDeviceInfos)
 	if err != nil {
 		conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.AttachingGPUFailedReason, clusterv1.ConditionSeverityWarning, err.Error())
 
@@ -298,19 +358,22 @@ func (r *ElfMachineReconciler) removeVMGPUDevices(ctx *context.MachineContext, v
 
 // checkGPUsCanBeUsedForVM checks whether GPU devices can be used by the specified virtual machine.
 // The return true means the GPU devices can be used for the virtual machine.
-func (r *ElfMachineReconciler) checkGPUsCanBeUsedForVM(ctx *context.MachineContext, gpuDeviceIDs []string, vm string) (bool, []*models.GpuDevice, error) {
+func (r *ElfMachineReconciler) checkGPUsCanBeUsedForVM(ctx *context.MachineContext, gpuDeviceIDs []string) (bool, error) {
 	gpuDevices, err := ctx.VMService.FindGPUDevicesByIDs(gpuDeviceIDs)
-	if err != nil {
-		return false, nil, err
+	if err != nil || len(gpuDevices) != len(gpuDeviceIDs) {
+		return false, err
 	}
 
-	if len(gpuDevices) != len(gpuDeviceIDs) {
-		return false, nil, nil
+	gpuDeviceInfos, err := ctx.VMService.FindGPUDeviceInfos(gpuDeviceIDs)
+	if err != nil {
+		return false, err
 	}
 
-	if len(service.FilterOutGPUsCanNotBeUsedForVM(gpuDevices, vm)) != len(gpuDeviceIDs) {
-		return false, nil, nil
+	service.AggregateUnusedGPUDevicesToGPUDeviceInfos(gpuDeviceInfos, gpuDevices)
+
+	if service.HasGPUsCanNotBeUsedForVM(gpuDeviceInfos, ctx.ElfMachine) {
+		return false, nil
 	}
 
-	return true, gpuDevices, nil
+	return true, nil
 }
diff --git a/controllers/elfmachine_controller_gpu_test.go b/controllers/elfmachine_controller_gpu_test.go
index 720a32f0..af976658 100644
--- a/controllers/elfmachine_controller_gpu_test.go
+++ b/controllers/elfmachine_controller_gpu_test.go
@@ -110,30 +110,47 @@ var _ = Describe("ElfMachineReconciler-GPU", func() {
 			gpu.Model = service.TowerString(gpuModel)
 			gpuIDs := []string{*gpu.ID}
 			gpusDevices := []*models.GpuDevice{gpu}
+			gpusDeviceInfos := service.NewGPUDeviceInfos(&service.GPUDeviceInfo{
+				ID:             *gpu.ID,
+				HostID:         *host.ID,
+				Model:          *gpu.Model,
+				AllocatedCount: 0,
+				AvailableCount: 1,
+			})
 			ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md)
 			fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine)
 			mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil)
-			mockVMService.EXPECT().FindGPUDevicesByHostIDs([]string{*host.ID}).Return(gpusDevices, nil)
+			mockVMService.EXPECT().FindGPUDevicesByHostIDs([]string{*host.ID}, models.GpuDeviceUsagePASSTHROUGH).Return(gpusDevices, nil)
+			mockVMService.EXPECT().FindGPUDeviceInfos(gpuIDs).Return(gpusDeviceInfos, nil)
 
 			machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService)
 			reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService}
 			hostID, gpus, err := reconciler.selectHostAndGPUsForVM(machineContext, "")
 			Expect(err).NotTo(HaveOccurred())
 			Expect(*hostID).To(Equal(*host.ID))
-			Expect(gpus).To(Equal(gpusDevices))
+			Expect(gpus).To(HaveLen(1))
+			Expect(gpus[0].ID).To(Equal(*gpu.ID))
+			Expect(gpus[0].AllocatedCount).To(Equal(int32(1)))
 
 			mockVMService.EXPECT().FindGPUDevicesByIDs(gpuIDs).Return(gpusDevices, nil)
+			mockVMService.EXPECT().FindGPUDeviceInfos(gpuIDs).Return(gpusDeviceInfos, nil)
 			hostID, gpus, err = reconciler.selectHostAndGPUsForVM(machineContext, "")
 			Expect(err).NotTo(HaveOccurred())
 			Expect(*hostID).To(Equal(*host.ID))
-			Expect(gpus).To(Equal(gpusDevices))
+			Expect(gpus).To(HaveLen(1))
+			Expect(gpus[0].ID).To(Equal(*gpu.ID))
+			Expect(gpus[0].AllocatedCount).To(Equal(int32(1)))
 			Expect(logBuffer.String()).To(ContainSubstring("Found locked VM GPU devices"))
 
 			logBuffer.Reset()
 			gpu.Vms = []*models.NestedVM{{ID: service.TowerString("id"), Name: service.TowerString("vm")}}
+			gpusDeviceInfo := gpusDeviceInfos.Get(*gpu.ID)
+			gpusDeviceInfo.AllocatedCount = 1
+			gpusDeviceInfo.AvailableCount = 0
+			gpusDeviceInfo.VMs = []service.GPUDeviceVM{{ID: "id", Name: "vm"}}
 			mockVMService.EXPECT().FindGPUDevicesByIDs(gpuIDs).Return(gpusDevices, nil)
-			mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil)
-			mockVMService.EXPECT().FindGPUDevicesByHostIDs([]string{*host.ID}).Return(gpusDevices, nil)
+			mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(nil, nil)
+			mockVMService.EXPECT().FindGPUDeviceInfos(gpuIDs).Return(gpusDeviceInfos, nil)
 			hostID, gpus, err = reconciler.selectHostAndGPUsForVM(machineContext, "")
 			Expect(err).NotTo(HaveOccurred())
 			Expect(hostID).To(BeNil())
@@ -154,17 +171,33 @@ var _ = Describe("ElfMachineReconciler-GPU", func() {
 			preferredGPU.Host = &models.NestedHost{ID: preferredHost.ID}
 			preferredGPU.Model = service.TowerString(gpuModel)
 			gpusDevices := []*models.GpuDevice{gpu, preferredGPU}
+			gpusDeviceInfos := service.NewGPUDeviceInfos(&service.GPUDeviceInfo{
+				ID:             *gpu.ID,
+				HostID:         *host.ID,
+				Model:          *gpu.Model,
+				AllocatedCount: 0,
+				AvailableCount: 1,
+			}, &service.GPUDeviceInfo{
+				ID:             *preferredGPU.ID,
+				HostID:         *preferredHost.ID,
+				Model:          *preferredGPU.Model,
+				AllocatedCount: 0,
+				AvailableCount: 1,
+			})
 			ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md)
 			fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine)
 			mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host, preferredHost), nil)
-			mockVMService.EXPECT().FindGPUDevicesByHostIDs(gomock.InAnyOrder([]string{*host.ID, *preferredHost.ID})).Return(gpusDevices, nil)
+			mockVMService.EXPECT().FindGPUDevicesByHostIDs(gomock.InAnyOrder([]string{*host.ID, *preferredHost.ID}), models.GpuDeviceUsagePASSTHROUGH).Return(gpusDevices, nil)
+			mockVMService.EXPECT().FindGPUDeviceInfos(gomock.InAnyOrder([]string{*gpu.ID, *preferredGPU.ID})).Return(gpusDeviceInfos, nil)
 
 			machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService)
 			reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService}
 			hostID, gpus, err := reconciler.selectHostAndGPUsForVM(machineContext, *preferredHost.ID)
 			Expect(err).NotTo(HaveOccurred())
 			Expect(*hostID).To(Equal(*preferredHost.ID))
-			Expect(gpus).To(Equal([]*models.GpuDevice{preferredGPU}))
+			Expect(gpus).To(HaveLen(1))
+			Expect(gpus[0].ID).To(Equal(*preferredGPU.ID))
+			Expect(gpus[0].AllocatedCount).To(Equal(int32(1)))
 		})
 	})
 
@@ -244,6 +277,14 @@ var _ = Describe("ElfMachineReconciler-GPU", func() {
 			gpu.Host = &models.NestedHost{ID: host.ID}
 			gpu.Model = service.TowerString(gpuModel)
 			gpu.Vms = []*models.NestedVM{{ID: service.TowerString("id"), Name: service.TowerString("vm")}}
+			gpusDeviceInfos := service.NewGPUDeviceInfos(&service.GPUDeviceInfo{
+				ID:             *gpu.ID,
+				HostID:         *host.ID,
+				Model:          *gpu.Model,
+				AllocatedCount: 0,
+				AvailableCount: 1,
+				VMs:            []service.GPUDeviceVM{{Name: "name", AllocatedCount: 1}},
+			})
 			vm := fake.NewTowerVMFromElfMachine(elfMachine)
 			vm.Host = &models.NestedHost{ID: host.ID}
 			vm.Status = models.NewVMStatus(models.VMStatusSTOPPED)
@@ -251,6 +292,7 @@ var _ = Describe("ElfMachineReconciler-GPU", func() {
 			ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md)
 			fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine)
 			mockVMService.EXPECT().FindGPUDevicesByIDs([]string{*gpu.ID}).Times(2).Return([]*models.GpuDevice{gpu}, nil)
+			mockVMService.EXPECT().FindGPUDeviceInfos([]string{*gpu.ID}).Return(gpusDeviceInfos, nil)
 			mockVMService.EXPECT().RemoveGPUDevices(elfMachine.Status.VMRef, gomock.Len(1)).Return(nil, unexpectedError)
 
 			machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService)
@@ -261,7 +303,8 @@ var _ = Describe("ElfMachineReconciler-GPU", func() {
 			Expect(ok).To(BeFalse())
 			Expect(logBuffer.String()).To(ContainSubstring("GPU devices of VM are already in use, so remove and reallocate"))
 
-			gpu.Vms = []*models.NestedVM{{ID: vm.ID, Name: vm.Name}}
+			gpusDeviceInfos.Get(*gpu.ID).VMs = []service.GPUDeviceVM{{Name: *vm.Name, AllocatedCount: 1}}
+			mockVMService.EXPECT().FindGPUDeviceInfos([]string{*gpu.ID}).Return(gpusDeviceInfos, nil)
 			ok, err = reconciler.reconcileGPUDevices(machineContext, vm)
 			Expect(err).NotTo(HaveOccurred())
 			Expect(ok).To(BeTrue())
@@ -281,12 +324,20 @@ var _ = Describe("ElfMachineReconciler-GPU", func() {
 			gpu := fake.NewTowerGPU()
 			gpu.Host = &models.NestedHost{ID: host.ID}
 			gpu.Model = service.TowerString(gpuModel)
+			gpusDeviceInfos := service.NewGPUDeviceInfos(&service.GPUDeviceInfo{
+				ID:             *gpu.ID,
+				HostID:         *host.ID,
+				Model:          *gpu.Model,
+				AllocatedCount: 0,
+				AvailableCount: 1,
+			})
 			task := fake.NewTowerTask()
 			withTaskVM := fake.NewWithTaskVM(vm, task)
 			ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md)
 			fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine)
 			mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Times(2).Return(service.NewHosts(host), nil)
-			mockVMService.EXPECT().FindGPUDevicesByHostIDs([]string{*host.ID}).Times(2).Return([]*models.GpuDevice{gpu}, nil)
+			mockVMService.EXPECT().FindGPUDevicesByHostIDs([]string{*host.ID}, models.GpuDeviceUsagePASSTHROUGH).Times(2).Return([]*models.GpuDevice{gpu}, nil)
+			mockVMService.EXPECT().FindGPUDeviceInfos([]string{*gpu.ID}).Times(2).Return(gpusDeviceInfos, nil)
 			mockVMService.EXPECT().Migrate(*vm.ID, *host.ID).Return(withTaskVM, nil)
 
 			machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService)
@@ -315,11 +366,19 @@ var _ = Describe("ElfMachineReconciler-GPU", func() {
 			gpu := fake.NewTowerGPU()
 			gpu.Host = &models.NestedHost{ID: host.ID}
 			gpu.Model = service.TowerString(gpuModel)
+			gpusDeviceInfos := service.NewGPUDeviceInfos(&service.GPUDeviceInfo{
+				ID:             *gpu.ID,
+				HostID:         *host.ID,
+				Model:          *gpu.Model,
+				AllocatedCount: 0,
+				AvailableCount: 1,
+			})
 			task := fake.NewTowerTask()
 			ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md)
 			fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine)
 			mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Times(2).Return(service.NewHosts(host), nil)
-			mockVMService.EXPECT().FindGPUDevicesByHostIDs([]string{*host.ID}).Times(2).Return([]*models.GpuDevice{gpu}, nil)
+			mockVMService.EXPECT().FindGPUDevicesByHostIDs([]string{*host.ID}, models.GpuDeviceUsagePASSTHROUGH).Times(2).Return([]*models.GpuDevice{gpu}, nil)
+			mockVMService.EXPECT().FindGPUDeviceInfos([]string{*gpu.ID}).Times(2).Return(gpusDeviceInfos, nil)
 			mockVMService.EXPECT().AddGPUDevices(elfMachine.Status.VMRef, gomock.Any()).Return(task, nil)
 
 			machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService)
@@ -431,4 +490,90 @@ var _ = Describe("ElfMachineReconciler-GPU", func() {
 			}, timeout).Should(BeTrue())
 		})
 	})
+
+	It("checkGPUsCanBeUsedForVM", func() {
+		host := fake.NewTowerGPU()
+		gpu := fake.NewTowerGPU()
+		gpu.Host = &models.NestedHost{ID: host.ID}
+		gpuIDs := []string{*gpu.ID}
+		gpusDevices := []*models.GpuDevice{gpu}
+		gpusDeviceInfos := service.NewGPUDeviceInfos()
+		elfMachine.Spec.GPUDevices = append(elfMachine.Spec.GPUDevices, infrav1.GPUPassthroughDeviceSpec{Model: "A16", Count: 1})
+		ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md)
+		fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine)
+		mockVMService.EXPECT().FindGPUDevicesByIDs(gpuIDs).Return(nil, nil)
+
+		machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService)
+		reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService}
+		ok, err := reconciler.checkGPUsCanBeUsedForVM(machineContext, gpuIDs)
+		Expect(err).NotTo(HaveOccurred())
+		Expect(ok).To(BeFalse())
+
+		mockVMService.EXPECT().FindGPUDevicesByIDs(gpuIDs).Return(gpusDevices, nil)
+		mockVMService.EXPECT().FindGPUDeviceInfos(gpuIDs).Return(gpusDeviceInfos, nil)
+		ok, err = reconciler.checkGPUsCanBeUsedForVM(machineContext, gpuIDs)
+		Expect(err).NotTo(HaveOccurred())
+		Expect(ok).To(BeTrue())
+
+		gpusDeviceInfos.Insert(&service.GPUDeviceInfo{
+			VMs: []service.GPUDeviceVM{{ID: "vm1", Name: "vm1"}},
+		})
+		mockVMService.EXPECT().FindGPUDevicesByIDs(gpuIDs).Return(gpusDevices, nil)
+		mockVMService.EXPECT().FindGPUDeviceInfos(gpuIDs).Return(gpusDeviceInfos, nil)
+		ok, err = reconciler.checkGPUsCanBeUsedForVM(machineContext, gpuIDs)
+		Expect(err).NotTo(HaveOccurred())
+		Expect(ok).To(BeFalse())
+	})
+
+	It("selectVGPUDevicesForVM", func() {
+		host := &models.NestedHost{ID: service.TowerString("host")}
+		vGPU1 := fake.NewTowerVGPU(1)
+		vGPU1.Host = host
+		vGPU2 := fake.NewTowerVGPU(2)
+		vGPU2.Host = host
+		vGPUType := "V100"
+		requiredVGPUDevice := infrav1.VGPUDeviceSpec{Type: vGPUType, Count: 1}
+		requiredVGPUDevices := []infrav1.VGPUDeviceSpec{requiredVGPUDevice}
+		gpuDeviceInfos := service.NewGPUDeviceInfos()
+		gpus := selectVGPUDevicesForVM(gpuDeviceInfos, requiredVGPUDevices)
+		Expect(gpus).To(BeEmpty())
+
+		gpuDeviceInfo1 := &service.GPUDeviceInfo{
+			ID:             *vGPU1.ID,
+			HostID:         *vGPU1.Host.ID,
+			Model:          *vGPU1.Model,
+			VGPUType:       vGPUType,
+			AllocatedCount: 1,
+			AvailableCount: 0,
+		}
+		gpuDeviceInfos = service.NewGPUDeviceInfos(gpuDeviceInfo1)
+		gpus = selectVGPUDevicesForVM(gpuDeviceInfos, requiredVGPUDevices)
+		Expect(gpus).To(BeEmpty())
+
+		gpuDeviceInfo1.AvailableCount = 1
+		gpuDeviceInfos = service.NewGPUDeviceInfos(gpuDeviceInfo1)
+		gpus = selectVGPUDevicesForVM(gpuDeviceInfos, requiredVGPUDevices)
+		Expect(gpus).To(Equal([]*service.GPUDeviceInfo{{ID: gpuDeviceInfo1.ID, AllocatedCount: requiredVGPUDevice.Count, AvailableCount: gpuDeviceInfo1.AvailableCount}}))
+
+		requiredVGPUDevice.Count = 3
+		requiredVGPUDevices[0] = requiredVGPUDevice
+		gpus = selectVGPUDevicesForVM(gpuDeviceInfos, requiredVGPUDevices)
+		Expect(gpus).To(BeEmpty())
+
+		gpuDeviceInfo2 := &service.GPUDeviceInfo{
+			ID:             *vGPU2.ID,
+			HostID:         *vGPU2.Host.ID,
+			Model:          *vGPU2.Model,
+			VGPUType:       vGPUType,
+			AllocatedCount: 1,
+			AvailableCount: 3,
+		}
+		gpuDeviceInfos.Insert(gpuDeviceInfo2)
+		gpus = selectVGPUDevicesForVM(gpuDeviceInfos, requiredVGPUDevices)
+		Expect(gpus).To(Equal([]*service.GPUDeviceInfo{
+			{ID: gpuDeviceInfo1.ID, AllocatedCount: 1, AvailableCount: gpuDeviceInfo1.AvailableCount},
+			{ID: gpuDeviceInfo2.ID, AllocatedCount: 2, AvailableCount: gpuDeviceInfo2.AvailableCount},
+		}))
+		Expect(gpus[0].AllocatedCount + gpus[1].AllocatedCount).To(Equal(requiredVGPUDevice.Count))
+	})
 })
diff --git a/controllers/vm_limiter.go b/controllers/vm_limiter.go
index 2aa67df5..db59b22d 100644
--- a/controllers/vm_limiter.go
+++ b/controllers/vm_limiter.go
@@ -22,9 +22,9 @@ import (
 	"time"
 
 	"github.com/patrickmn/go-cache"
-	"k8s.io/apimachinery/pkg/util/sets"
 
 	"github.com/smartxworks/cluster-api-provider-elf/pkg/config"
+	"github.com/smartxworks/cluster-api-provider-elf/pkg/service"
 )
 
 const (
@@ -142,10 +142,33 @@ func getKeyForVMDuplicate(name string) string {
 
 /* GPU */
 
+type lockedGPUDevice struct {
+	ID    string `json:"id"`
+	Count int32  `json:"count"`
+}
+
 type lockedVMGPUs struct {
-	HostID       string    `json:"hostId"`
-	GPUDeviceIDs []string  `json:"gpuDeviceIds"`
-	LockedAt     time.Time `json:"lockedAt"`
+	HostID     string            `json:"hostId"`
+	GPUDevices []lockedGPUDevice `json:"gpuDevices"`
+	LockedAt   time.Time         `json:"lockedAt"`
+}
+
+func (g *lockedVMGPUs) GetGPUIDs() []string {
+	ids := make([]string, len(g.GPUDevices))
+	for i := 0; i < len(g.GPUDevices); i++ {
+		ids[i] = g.GPUDevices[i].ID
+	}
+
+	return ids
+}
+
+func (g *lockedVMGPUs) GetGPUDeviceInfos() []*service.GPUDeviceInfo {
+	gpuDeviceInfos := make([]*service.GPUDeviceInfo, len(g.GPUDevices))
+	for i := 0; i < len(g.GPUDevices); i++ {
+		gpuDeviceInfos[i] = &service.GPUDeviceInfo{ID: g.GPUDevices[i].ID, AllocatedCount: g.GPUDevices[i].Count}
+	}
+
+	return gpuDeviceInfos
 }
 
 type lockedClusterGPUMap map[string]lockedVMGPUs
@@ -158,42 +181,53 @@ var lockedGPUMap = make(map[string]lockedClusterGPUMap)
 // lockGPUDevicesForVM locks the GPU devices required to create or start a virtual machine.
 // The GPU devices will be unlocked when the task is completed or times out.
 // This prevents multiple virtual machines from being allocated the same GPU.
-func lockGPUDevicesForVM(clusterID, vmName, hostID string, gpuDeviceIDs []string) bool {
+func lockGPUDevicesForVM(clusterID, vmName, hostID string, gpuDeviceInfos []*service.GPUDeviceInfo) bool {
 	gpuLock.Lock()
 	defer gpuLock.Unlock()
 
-	lockedClusterGPUIDs := getLockedClusterGPUIDsWithoutLock(clusterID)
-	for i := 0; i < len(gpuDeviceIDs); i++ {
-		if lockedClusterGPUIDs.Has(gpuDeviceIDs[i]) {
-			return false
-		}
+	availableCountMap := make(map[string]int32)
+	lockedGPUs := lockedVMGPUs{HostID: hostID, LockedAt: time.Now(), GPUDevices: make([]lockedGPUDevice, len(gpuDeviceInfos))}
+	for i := 0; i < len(gpuDeviceInfos); i++ {
+		availableCountMap[gpuDeviceInfos[i].ID] = gpuDeviceInfos[i].AvailableCount - gpuDeviceInfos[i].AllocatedCount
+		lockedGPUs.GPUDevices[i] = lockedGPUDevice{ID: gpuDeviceInfos[i].ID, Count: gpuDeviceInfos[i].AllocatedCount}
 	}
 
-	lockedClusterGPUs := getLockedClusterGPUs(clusterID)
-	lockedClusterGPUs[vmName] = lockedVMGPUs{
-		HostID:       hostID,
-		GPUDeviceIDs: gpuDeviceIDs,
-		LockedAt:     time.Now(),
+	lockedClusterGPUs := getLockedClusterGPUsWithoutLock(clusterID)
+	lockedCountMap := getLockedCountMapWithoutLock(lockedClusterGPUs)
+
+	for gpuID, availableCount := range availableCountMap {
+		if lockedCount, ok := lockedCountMap[gpuID]; ok && lockedCount > availableCount {
+			return false
+		}
 	}
 
+	lockedClusterGPUs[vmName] = lockedGPUs
 	lockedGPUMap[clusterID] = lockedClusterGPUs
 
 	return true
 }
 
-// getLockedClusterGPUIDs returns the locked GPU devices of the specified cluster.
-func getLockedClusterGPUIDs(clusterID string) sets.Set[string] {
+func filterGPUDeviceInfosByLockGPUDevices(clusterID string, gpuDeviceInfos service.GPUDeviceInfos) service.GPUDeviceInfos {
 	gpuLock.Lock()
 	defer gpuLock.Unlock()
 
-	return getLockedClusterGPUIDsWithoutLock(clusterID)
+	lockedClusterGPUs := getLockedClusterGPUsWithoutLock(clusterID)
+	lockedCountMap := getLockedCountMapWithoutLock(lockedClusterGPUs)
+
+	return gpuDeviceInfos.Filter(func(g *service.GPUDeviceInfo) bool {
+		if lockedCount, ok := lockedCountMap[g.ID]; ok && lockedCount >= g.AvailableCount {
+			return false
+		}
+
+		return true
+	})
 }
 
 func getGPUDevicesLockedByVM(clusterID, vmName string) *lockedVMGPUs {
 	gpuLock.Lock()
 	defer gpuLock.Unlock()
 
-	lockedClusterGPUs := getLockedClusterGPUs(clusterID)
+	lockedClusterGPUs := getLockedClusterGPUsWithoutLock(clusterID)
 	if vmGPUs, ok := lockedClusterGPUs[vmName]; ok {
 		if time.Now().Before(vmGPUs.LockedAt.Add(gpuLockTimeout)) {
 			return &vmGPUs
@@ -210,7 +244,7 @@ func unlockGPUDevicesLockedByVM(clusterID, vmName string) {
 	gpuLock.Lock()
 	defer gpuLock.Unlock()
 
-	lockedClusterGPUs := getLockedClusterGPUs(clusterID)
+	lockedClusterGPUs := getLockedClusterGPUsWithoutLock(clusterID)
 	delete(lockedClusterGPUs, vmName)
 
 	if len(lockedClusterGPUs) == 0 {
@@ -220,25 +254,34 @@ func unlockGPUDevicesLockedByVM(clusterID, vmName string) {
 	}
 }
 
-func getLockedClusterGPUs(clusterID string) lockedClusterGPUMap {
-	if _, ok := lockedGPUMap[clusterID]; ok {
-		return lockedGPUMap[clusterID]
+func getLockedClusterGPUsWithoutLock(clusterID string) lockedClusterGPUMap {
+	if _, ok := lockedGPUMap[clusterID]; !ok {
+		return make(map[string]lockedVMGPUs)
 	}
 
-	return make(map[string]lockedVMGPUs)
-}
-
-func getLockedClusterGPUIDsWithoutLock(clusterID string) sets.Set[string] {
-	gpuIDs := sets.Set[string]{}
-
-	lockedClusterGPUs := getLockedClusterGPUs(clusterID)
+	lockedClusterGPUs := lockedGPUMap[clusterID]
 	for vmName, lockedGPUs := range lockedClusterGPUs {
-		if time.Now().Before(lockedGPUs.LockedAt.Add(gpuLockTimeout)) {
-			gpuIDs.Insert(lockedGPUs.GPUDeviceIDs...)
-		} else {
+		if !time.Now().Before(lockedGPUs.LockedAt.Add(gpuLockTimeout)) {
+			// Delete expired data
 			delete(lockedClusterGPUs, vmName)
 		}
 	}
 
-	return gpuIDs
+	return lockedClusterGPUs
+}
+
+// getLockedCountMapWithoutLock counts and returns the number of locks for each GPU.
+func getLockedCountMapWithoutLock(lockedClusterGPUs lockedClusterGPUMap) map[string]int32 {
+	lockedCountMap := make(map[string]int32)
+	for _, lockedGPUs := range lockedClusterGPUs {
+		for i := 0; i < len(lockedGPUs.GPUDevices); i++ {
+			if count, ok := lockedCountMap[lockedGPUs.GPUDevices[i].ID]; ok {
+				lockedCountMap[lockedGPUs.GPUDevices[i].ID] = count + lockedGPUs.GPUDevices[i].Count
+			} else {
+				lockedCountMap[lockedGPUs.GPUDevices[i].ID] = lockedGPUs.GPUDevices[i].Count
+			}
+		}
+	}
+
+	return lockedCountMap
 }
diff --git a/controllers/vm_limiter_test.go b/controllers/vm_limiter_test.go
index 53e529db..6df3c1bf 100644
--- a/controllers/vm_limiter_test.go
+++ b/controllers/vm_limiter_test.go
@@ -24,6 +24,7 @@ import (
 	. "github.com/onsi/gomega"
 
 	"github.com/smartxworks/cluster-api-provider-elf/pkg/config"
+	"github.com/smartxworks/cluster-api-provider-elf/pkg/service"
 	"github.com/smartxworks/cluster-api-provider-elf/test/fake"
 )
 
@@ -137,47 +138,55 @@ var _ = Describe("Lock GPU devices for VM", func() {
 	})
 
 	It("lockGPUDevicesForVM", func() {
-		gpuIDs := []string{gpuID}
+		lockedGPUID := fake.UUID()
+		gpuID = fake.UUID()
+		lockedGPUDeviceInfo := &service.GPUDeviceInfo{ID: lockedGPUID, AllocatedCount: 1, AvailableCount: 1}
+		gpuDeviceInfo := &service.GPUDeviceInfo{ID: gpuID, AllocatedCount: 0, AvailableCount: 1}
+		lockedGPUDeviceInfos := []*service.GPUDeviceInfo{lockedGPUDeviceInfo}
+		filteredGPUDeviceInfos := []*service.GPUDeviceInfo{gpuDeviceInfo, lockedGPUDeviceInfo}
 
 		lockedVMGPUs := getGPUDevicesLockedByVM(clusterID, vmName)
 		Expect(lockedVMGPUs).To(BeNil())
-		lockedClusterGPUIDs := getLockedClusterGPUIDs(clusterID)
-		Expect(lockedClusterGPUIDs.Len()).To(Equal(0))
+		filteredGPUs := filterGPUDeviceInfosByLockGPUDevices(clusterID, service.NewGPUDeviceInfos(filteredGPUDeviceInfos...))
+		Expect(filteredGPUs).To(HaveLen(2))
 
-		Expect(lockGPUDevicesForVM(clusterID, vmName, hostID, gpuIDs)).To(BeTrue())
+		Expect(lockGPUDevicesForVM(clusterID, vmName, hostID, lockedGPUDeviceInfos)).To(BeTrue())
 		lockedVMGPUs = getGPUDevicesLockedByVM(clusterID, vmName)
 		Expect(lockedVMGPUs.HostID).To(Equal(hostID))
-		Expect(lockedVMGPUs.GPUDeviceIDs).To(Equal(gpuIDs))
+		Expect(lockedVMGPUs.GPUDevices).To(HaveLen(1))
+		Expect(lockedVMGPUs.GPUDevices[0].ID).To(Equal(lockedGPUID))
+		Expect(lockedVMGPUs.GPUDevices[0].Count).To(Equal(int32(1)))
 		Expect(lockedVMGPUs.LockedAt.Unix()).To(Equal(time.Now().Unix()))
-		lockedClusterGPUIDs = getLockedClusterGPUIDs(clusterID)
-		Expect(lockedClusterGPUIDs.Len()).To(Equal(1))
-		Expect(lockedClusterGPUIDs.Has(gpuID)).To(BeTrue())
+		filteredGPUs = filterGPUDeviceInfosByLockGPUDevices(clusterID, service.NewGPUDeviceInfos(filteredGPUDeviceInfos...))
+		Expect(filteredGPUs).To(HaveLen(1))
+		Expect(filteredGPUs.Contains(gpuDeviceInfo.ID)).To(BeTrue())
 
-		Expect(lockGPUDevicesForVM(clusterID, vmName, hostID, gpuIDs)).To(BeFalse())
+		Expect(lockGPUDevicesForVM(clusterID, vmName, hostID, lockedGPUDeviceInfos)).To(BeFalse())
+		lockedVMGPUs = getGPUDevicesLockedByVM(clusterID, vmName)
+		Expect(lockedVMGPUs.GPUDevices).To(HaveLen(1))
 
 		unlockGPUDevicesLockedByVM(clusterID, vmName)
 		lockedVMGPUs = getGPUDevicesLockedByVM(clusterID, vmName)
 		Expect(lockedVMGPUs).To(BeNil())
-		lockedClusterGPUIDs = getLockedClusterGPUIDs(clusterID)
-		Expect(lockedClusterGPUIDs.Len()).To(Equal(0))
+		filteredGPUs = filterGPUDeviceInfosByLockGPUDevices(clusterID, service.NewGPUDeviceInfos(filteredGPUDeviceInfos...))
+		Expect(filteredGPUs).To(HaveLen(2))
 
-		Expect(lockGPUDevicesForVM(clusterID, vmName, hostID, gpuIDs)).To(BeTrue())
+		Expect(lockGPUDevicesForVM(clusterID, vmName, hostID, lockedGPUDeviceInfos)).To(BeTrue())
 		vmGPUs := lockedGPUMap[clusterID][vmName]
 		vmGPUs.LockedAt = vmGPUs.LockedAt.Add(-gpuLockTimeout)
 		lockedGPUMap[clusterID][vmName] = vmGPUs
 		lockedVMGPUs = getGPUDevicesLockedByVM(clusterID, vmName)
 		Expect(lockedVMGPUs).To(BeNil())
-		lockedClusterGPUIDs = getLockedClusterGPUIDs(clusterID)
-		Expect(lockedClusterGPUIDs.Len()).To(Equal(0))
-
-		Expect(lockGPUDevicesForVM(clusterID, vmName, hostID, gpuIDs)).To(BeTrue())
-		vmGPUs = lockedGPUMap[clusterID][vmName]
-		vmGPUs.LockedAt = vmGPUs.LockedAt.Add(-gpuLockTimeout)
-		lockedGPUMap[clusterID][vmName] = vmGPUs
-		lockedClusterGPUIDs = getLockedClusterGPUIDs(clusterID)
-		Expect(lockedClusterGPUIDs.Len()).To(Equal(0))
-		lockedVMGPUs = getGPUDevicesLockedByVM(clusterID, vmName)
-		Expect(lockedVMGPUs).To(BeNil())
+		filteredGPUs = filterGPUDeviceInfosByLockGPUDevices(clusterID, service.NewGPUDeviceInfos(filteredGPUDeviceInfos...))
+		Expect(filteredGPUs).To(HaveLen(2))
+
+		lockedGPUDeviceInfo.AvailableCount = 2
+		Expect(lockGPUDevicesForVM(clusterID, vmName, hostID, lockedGPUDeviceInfos)).To(BeTrue())
+		Expect(lockGPUDevicesForVM(clusterID, fake.UUID(), hostID, lockedGPUDeviceInfos)).To(BeTrue())
+		Expect(lockGPUDevicesForVM(clusterID, fake.UUID(), hostID, lockedGPUDeviceInfos)).To(BeFalse())
+		Expect(lockedGPUMap[clusterID]).To(HaveLen(2))
+		filteredGPUs = filterGPUDeviceInfosByLockGPUDevices(clusterID, service.NewGPUDeviceInfos(filteredGPUDeviceInfos...))
+		Expect(filteredGPUs).To(HaveLen(1))
 	})
 })
 
diff --git a/go.mod b/go.mod
index 53023d2d..a52131c3 100644
--- a/go.mod
+++ b/go.mod
@@ -10,7 +10,7 @@ require (
 	github.com/onsi/gomega v1.27.10
 	github.com/patrickmn/go-cache v2.1.0+incompatible
 	github.com/pkg/errors v0.9.1
-	github.com/smartxworks/cloudtower-go-sdk/v2 v2.11.1-rc-2023-09-14
+	github.com/smartxworks/cloudtower-go-sdk/v2 v2.12.1-0.20231102021857-ae16239443e2
 	golang.org/x/mod v0.12.0
 	k8s.io/api v0.27.2
 	k8s.io/apiextensions-apiserver v0.27.2
diff --git a/go.sum b/go.sum
index d21438c3..ae9e834e 100644
--- a/go.sum
+++ b/go.sum
@@ -547,8 +547,8 @@ github.com/sirupsen/logrus v1.4.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx
 github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q=
 github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
 github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
-github.com/smartxworks/cloudtower-go-sdk/v2 v2.11.1-rc-2023-09-14 h1:CHJLqIwjPHMKpnlR7wXmKUr9n2Ba7KhR5LG63S4TqQY=
-github.com/smartxworks/cloudtower-go-sdk/v2 v2.11.1-rc-2023-09-14/go.mod h1:X6R9+L438SMnLJXykSCV3fJ+AZul0hlyjITsZgrSRtM=
+github.com/smartxworks/cloudtower-go-sdk/v2 v2.12.1-0.20231102021857-ae16239443e2 h1:UXS2xA1dmSdR5B9BPmArlKHDAmpjGytY7XVPbVadBqU=
+github.com/smartxworks/cloudtower-go-sdk/v2 v2.12.1-0.20231102021857-ae16239443e2/go.mod h1:X6R9+L438SMnLJXykSCV3fJ+AZul0hlyjITsZgrSRtM=
 github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
 github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
 github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
diff --git a/pkg/service/collections.go b/pkg/service/collections.go
index bf9d7cef..7d25ec7b 100644
--- a/pkg/service/collections.go
+++ b/pkg/service/collections.go
@@ -168,3 +168,86 @@ func (s Hosts) IDs() []string {
 	}
 	return res
 }
+
+// GPUDeviceInfos is a set of GPUDeviceInfos.
+type GPUDeviceInfos map[string]*GPUDeviceInfo
+
+// NewGPUDeviceInfos creates a GPUDeviceInfos. from a list of values.
+func NewGPUDeviceInfos(gpuDeviceInfo ...*GPUDeviceInfo) GPUDeviceInfos {
+	ss := make(GPUDeviceInfos, len(gpuDeviceInfo))
+	ss.Insert(gpuDeviceInfo...)
+	return ss
+}
+
+func (s GPUDeviceInfos) Insert(gpuDeviceInfos ...*GPUDeviceInfo) {
+	for i := range gpuDeviceInfos {
+		if gpuDeviceInfos[i] != nil {
+			g := gpuDeviceInfos[i]
+			s[g.ID] = g
+		}
+	}
+}
+
+// UnsortedList returns the slice with contents in random order.
+func (s GPUDeviceInfos) UnsortedList() []*GPUDeviceInfo {
+	res := make([]*GPUDeviceInfo, 0, len(s))
+	for _, value := range s {
+		res = append(res, value)
+	}
+	return res
+}
+
+// Get returns a GPUDeviceInfo of the specified gpuID.
+func (s GPUDeviceInfos) Get(gpuID string) *GPUDeviceInfo {
+	if gpuDeviceInfo, ok := s[gpuID]; ok {
+		return gpuDeviceInfo
+	}
+	return nil
+}
+
+func (s GPUDeviceInfos) Contains(gpuID string) bool {
+	_, ok := s[gpuID]
+	return ok
+}
+
+func (s GPUDeviceInfos) Len() int {
+	return len(s)
+}
+
+func (s GPUDeviceInfos) Iterate(fn func(*GPUDeviceInfo)) {
+	for _, g := range s {
+		fn(g)
+	}
+}
+
+// Filter returns a GPUDeviceInfos containing only the GPUDeviceInfos that match all of the given GPUDeviceInfoFilters.
+func (s GPUDeviceInfos) Filter(filters ...GPUDeviceInfoFilterFunc) GPUDeviceInfos {
+	return newFilteredGPUDeviceInfoCollection(GPUDeviceInfoFilterAnd(filters...), s.UnsortedList()...)
+}
+
+// newFilteredGPUDeviceInfoCollection creates a GPUDeviceInfos from a filtered list of values.
+func newFilteredGPUDeviceInfoCollection(filter GPUDeviceInfoFilterFunc, gpuDeviceInfos ...*GPUDeviceInfo) GPUDeviceInfos {
+	ss := make(GPUDeviceInfos, len(gpuDeviceInfos))
+	for i := range gpuDeviceInfos {
+		g := gpuDeviceInfos[i]
+		if filter(g) {
+			ss.Insert(g)
+		}
+	}
+	return ss
+}
+
+// GPUDeviceInfoFilterFunc is the functon definition for a filter.
+type GPUDeviceInfoFilterFunc func(*GPUDeviceInfo) bool
+
+// GPUDeviceInfoFilterAnd returns a filter that returns true if all of the given filters returns true.
+func GPUDeviceInfoFilterAnd(filters ...GPUDeviceInfoFilterFunc) GPUDeviceInfoFilterFunc {
+	return func(g *GPUDeviceInfo) bool {
+		for _, f := range filters {
+			if !f(g) {
+				return false
+			}
+		}
+		return true
+	}
+}
diff --git a/pkg/service/collections_test.go b/pkg/service/collections_test.go
index 7fe4fa0f..ab6c55d4 100644
--- a/pkg/service/collections_test.go
+++ b/pkg/service/collections_test.go
@@ -81,3 +81,36 @@ func TestHostCollection(t *testing.T) {
 		g.Expect(NewHosts(host1, host2).Difference(NewHosts(host2)).Contains(*host1.ID)).To(gomega.BeTrue())
 	})
 }
+
+func TestGPUDeviceInfoCollection(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+
+	t.Run("Find", func(t *testing.T) {
+		gpuDeviceInfo1 := &GPUDeviceInfo{ID: "gpu1"}
+		gpuDeviceInfo2 := &GPUDeviceInfo{ID: "gpu2"}
+
+		gpuDeviceInfos := NewGPUDeviceInfos()
+		g.Expect(gpuDeviceInfos.Get("404")).To(gomega.BeNil())
+		g.Expect(gpuDeviceInfos.Len()).To(gomega.Equal(0))
+
+		gpuDeviceInfos.Insert(gpuDeviceInfo1)
+		g.Expect(gpuDeviceInfos.Contains("gpu1")).To(gomega.BeTrue())
+		g.Expect(gpuDeviceInfos.Get("gpu1")).To(gomega.Equal(gpuDeviceInfo1))
+		g.Expect(gpuDeviceInfos.UnsortedList()).To(gomega.Equal([]*GPUDeviceInfo{gpuDeviceInfo1}))
+		count := 0
+		gpuID := gpuDeviceInfo1.ID
+		gpuDeviceInfos.Iterate(func(g *GPUDeviceInfo) {
+			count += 1
+			gpuID = g.ID
+		})
+		g.Expect(count).To(gomega.Equal(1))
+		g.Expect(gpuID).To(gomega.Equal(gpuDeviceInfo1.ID))
+
+		gpuDeviceInfos = NewGPUDeviceInfos(gpuDeviceInfo1, gpuDeviceInfo2)
+		filteredGPUDeviceInfos := gpuDeviceInfos.Filter(func(g *GPUDeviceInfo) bool {
+			return g.ID != gpuDeviceInfo1.ID
+		})
+		g.Expect(filteredGPUDeviceInfos.Len()).To(gomega.Equal(1))
+		g.Expect(filteredGPUDeviceInfos.Contains(gpuDeviceInfo2.ID)).To(gomega.BeTrue())
+	})
+}
diff --git a/pkg/service/mock_services/vm_mock.go b/pkg/service/mock_services/vm_mock.go
index 8e30db92..fa7eedb8 100644
--- a/pkg/service/mock_services/vm_mock.go
+++ b/pkg/service/mock_services/vm_mock.go
@@ -39,18 +39,18 @@ func (m *MockVMService) EXPECT() *MockVMServiceMockRecorder {
 }
 
 // AddGPUDevices mocks base method.
-func (m *MockVMService) AddGPUDevices(id string, gpus []*models.VMGpuOperationParams) (*models.Task, error) {
+func (m *MockVMService) AddGPUDevices(id string, gpuDeviceInfo []*service.GPUDeviceInfo) (*models.Task, error) {
 	m.ctrl.T.Helper()
-	ret := m.ctrl.Call(m, "AddGPUDevices", id, gpus)
+	ret := m.ctrl.Call(m, "AddGPUDevices", id, gpuDeviceInfo)
 	ret0, _ := ret[0].(*models.Task)
 	ret1, _ := ret[1].(error)
 	return ret0, ret1
 }
 
 // AddGPUDevices indicates an expected call of AddGPUDevices.
-func (mr *MockVMServiceMockRecorder) AddGPUDevices(id, gpus interface{}) *gomock.Call {
+func (mr *MockVMServiceMockRecorder) AddGPUDevices(id, gpuDeviceInfo interface{}) *gomock.Call {
 	mr.mock.ctrl.T.Helper()
-	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddGPUDevices", reflect.TypeOf((*MockVMService)(nil).AddGPUDevices), id, gpus)
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddGPUDevices", reflect.TypeOf((*MockVMService)(nil).AddGPUDevices), id, gpuDeviceInfo)
 }
 
 // AddLabelsToVM mocks base method.
@@ -84,7 +84,7 @@ func (mr *MockVMServiceMockRecorder) AddVMsToPlacementGroup(placementGroup, vmID
 }
 
 // Clone mocks base method.
-func (m *MockVMService) Clone(elfCluster *v1beta1.ElfCluster, elfMachine *v1beta1.ElfMachine, bootstrapData, host string, machineGPUDevices []*models.GpuDevice) (*models.WithTaskVM, error) {
+func (m *MockVMService) Clone(elfCluster *v1beta1.ElfCluster, elfMachine *v1beta1.ElfMachine, bootstrapData, host string, machineGPUDevices []*service.GPUDeviceInfo) (*models.WithTaskVM, error) {
 	m.ctrl.T.Helper()
 	ret := m.ctrl.Call(m, "Clone", elfCluster, elfMachine, bootstrapData, host, machineGPUDevices)
 	ret0, _ := ret[0].(*models.WithTaskVM)
@@ -188,19 +188,34 @@ func (mr *MockVMServiceMockRecorder) FindByIDs(ids interface{}) *gomock.Call {
 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FindByIDs", reflect.TypeOf((*MockVMService)(nil).FindByIDs), ids)
 }
 
+// FindGPUDeviceInfos mocks base method.
+func (m *MockVMService) FindGPUDeviceInfos(gpuIDs []string) (service.GPUDeviceInfos, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "FindGPUDeviceInfos", gpuIDs)
+	ret0, _ := ret[0].(service.GPUDeviceInfos)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// FindGPUDeviceInfos indicates an expected call of FindGPUDeviceInfos.
+func (mr *MockVMServiceMockRecorder) FindGPUDeviceInfos(gpuIDs interface{}) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FindGPUDeviceInfos", reflect.TypeOf((*MockVMService)(nil).FindGPUDeviceInfos), gpuIDs)
+}
+
 // FindGPUDevicesByHostIDs mocks base method.
-func (m *MockVMService) FindGPUDevicesByHostIDs(hostIDs []string) ([]*models.GpuDevice, error) {
+func (m *MockVMService) FindGPUDevicesByHostIDs(hostIDs []string, gpuDeviceUsage models.GpuDeviceUsage) ([]*models.GpuDevice, error) {
 	m.ctrl.T.Helper()
-	ret := m.ctrl.Call(m, "FindGPUDevicesByHostIDs", hostIDs)
+	ret := m.ctrl.Call(m, "FindGPUDevicesByHostIDs", hostIDs, gpuDeviceUsage)
 	ret0, _ := ret[0].([]*models.GpuDevice)
 	ret1, _ := ret[1].(error)
 	return ret0, ret1
 }
 
 // FindGPUDevicesByHostIDs indicates an expected call of FindGPUDevicesByHostIDs.
-func (mr *MockVMServiceMockRecorder) FindGPUDevicesByHostIDs(hostIDs interface{}) *gomock.Call {
+func (mr *MockVMServiceMockRecorder) FindGPUDevicesByHostIDs(hostIDs, gpuDeviceUsage interface{}) *gomock.Call {
 	mr.mock.ctrl.T.Helper()
-	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FindGPUDevicesByHostIDs", reflect.TypeOf((*MockVMService)(nil).FindGPUDevicesByHostIDs), hostIDs)
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FindGPUDevicesByHostIDs", reflect.TypeOf((*MockVMService)(nil).FindGPUDevicesByHostIDs), hostIDs, gpuDeviceUsage)
 }
 
 // FindGPUDevicesByIDs mocks base method.
diff --git a/pkg/service/types.go b/pkg/service/types.go
new file mode 100644
index 00000000..906abf14
--- /dev/null
+++ b/pkg/service/types.go
@@ -0,0 +1,60 @@
+/*
+Copyright 2023.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package service
+
+import "fmt"
+
+type GPUDeviceVM struct {
+	ID             string `json:"id"`
+	Name           string `json:"name"`
+	AllocatedCount int32  `json:"allocatedCount"`
+}
+
+type GPUDeviceInfo struct {
+	ID       string `json:"id"`
+	HostID   string `json:"hostId"`
+	Model    string `json:"model"`
+	VGPUType string `json:"vGpuType"`
+	// AllocatedCount  the number that has been allocated.
+	// For GPU devices, can be 0 or larger than 0.
+	// For vGPU devices, can larger than vgpuInstanceNum.
+	AllocatedCount int32 `json:"allocatedCount"`
+	// AvailableCount is the number of GPU that can be allocated.
+	// For GPU devices, can be 0 or 1.
+	// For vGPU devices, can be 0 - vgpuInstanceNum.
+	AvailableCount int32 `json:"availableCount"`
+	// VMs(including STOPPED) allocated to the current GPU.
+	VMs []GPUDeviceVM `json:"vms"`
+}
+
+func (g *GPUDeviceInfo) GetVMCount() int {
+	return len(g.VMs)
+}
+
+func (g *GPUDeviceInfo) ContainsVM(vm string) bool {
+	for i := 0; i < len(g.VMs); i++ {
+		if g.VMs[i].ID == vm || g.VMs[i].Name == vm {
+			return true
+		}
+	}
+
+	return false
+}
+
+func (g *GPUDeviceInfo) String() string {
+	return fmt.Sprintf("{id:%s, hostId:%s, model:%s, vGPUType:%s, allocatedCount:%d, availableCount:%d}", g.ID, g.HostID, g.Model, g.VGPUType, g.AllocatedCount, g.AvailableCount)
+}
diff --git a/pkg/service/util.go b/pkg/service/util.go
index f6794562..7b1e598e 100644
--- a/pkg/service/util.go
+++ b/pkg/service/util.go
@@ -201,24 +201,134 @@ func IsPlacementGroupTask(task *models.Task) bool {
 	return strings.Contains(GetTowerString(task.Description), "VM placement group") // Update VM placement group
 }
 
-// GPUCanBeUsedForVM returns whether the virtual machine can use the specified GPU.
-func GPUCanBeUsedForVM(gpuDevice *models.GpuDevice, vm string) bool {
-	if len(gpuDevice.Vms) == 0 ||
-		*gpuDevice.Vms[0].ID == vm ||
-		*gpuDevice.Vms[0].Name == vm {
-		return true
+// HasGPUsCanNotBeUsedForVM returns whether the specified GPUs contains GPU
+// that cannot be used by the specified VM.
+func HasGPUsCanNotBeUsedForVM(gpuDeviceInfos GPUDeviceInfos, elfMachine *infrav1.ElfMachine) bool {
+	if elfMachine.RequiresGPUDevices() {
+		for gpuID := range gpuDeviceInfos {
+			gpuInfo := gpuDeviceInfos[gpuID]
+			if gpuInfo.GetVMCount() > 1 || (gpuInfo.GetVMCount() == 1 && !gpuInfo.ContainsVM(elfMachine.Name)) {
+				return true
+			}
+		}
+
+		return false
+	}
+
+	gpuCountUsedByVM := 0
+	availableCountMap := make(map[string]int32)
+	for gpuID := range gpuDeviceInfos {
+		gpuInfo := gpuDeviceInfos[gpuID]
+
+		if gpuInfo.ContainsVM(elfMachine.Name) {
+			gpuCountUsedByVM += 1
+		}
+
+		if count, ok := availableCountMap[gpuInfo.ID]; ok {
+			availableCountMap[gpuInfo.VGPUType] = count + gpuInfo.AvailableCount
+		} else {
+			availableCountMap[gpuInfo.VGPUType] = gpuInfo.AvailableCount
+		}
+	}
+
+	if gpuCountUsedByVM > 0 {
+		return gpuCountUsedByVM != gpuDeviceInfos.Len()
+	}
+
+	vGPUDevices := elfMachine.Spec.VGPUDevices
+	for i := 0; i < len(vGPUDevices); i++ {
+		if count, ok := availableCountMap[vGPUDevices[i].Type]; !ok || vGPUDevices[i].Count > count {
+			return true
+		}
 	}
 
 	return false
 }
 
-func FilterOutGPUsCanNotBeUsedForVM(gpuDevices []*models.GpuDevice, vm string) []*models.GpuDevice {
-	var gpus []*models.GpuDevice
+// AggregateUnusedGPUDevicesToGPUDeviceInfos selects the GPU device
+// that gpuDeviceInfos does not have from the specified GPU devices and add to it.
+// It should be used in conjunction with FindGPUDeviceInfos.
+//
+// FindGPUDeviceInfos only returns the GPUs that has been used by the virtual machine,
+// so need to aggregate the unused GPUs.
+func AggregateUnusedGPUDevicesToGPUDeviceInfos(gpuDeviceInfos GPUDeviceInfos, gpuDevices []*models.GpuDevice) {
 	for i := 0; i < len(gpuDevices); i++ {
-		if GPUCanBeUsedForVM(gpuDevices[i], vm) {
-			gpus = append(gpus, gpuDevices[i])
+		if !gpuDeviceInfos.Contains(*gpuDevices[i].ID) {
+			gpuDeviceInfo := &GPUDeviceInfo{
+				ID:       *gpuDevices[i].ID,
+				HostID:   *gpuDevices[i].Host.ID,
+				Model:    *gpuDevices[i].Model,
+				VGPUType: *gpuDevices[i].UserVgpuTypeName,
+				// Not yet allocated to a VM, the value is 0
+				AllocatedCount: 0,
+				// Not yet allocated to a VM, value of GPU Passthrough is 1,
+				// value of vGPU is availableVgpusNum
+				AvailableCount: 1,
+			}
+			if *gpuDevices[i].UserUsage == models.GpuDeviceUsageVGPU {
+				gpuDeviceInfo.AvailableCount = *gpuDevices[i].AvailableVgpusNum
+			}
+
+			gpuDeviceInfos.Insert(gpuDeviceInfo)
 		}
 	}
+}
+
+// ConvertVMGpuInfosToGPUDeviceInfos Converts Tower's VMGpuInfo type to GPUDeviceInfos.
+// It should be used in conjunction with FindGPUDeviceInfos.
+//
+// Tower does not provide API to obtain the detailes of the VM allocated by the GPU Device.
+// So we need to get GPUDeviceInfos reversely through VMGpuInfo.
+func ConvertVMGpuInfosToGPUDeviceInfos(vmGPUInfos []*models.VMGpuInfo) GPUDeviceInfos {
+	gpuDeviceInfos := NewGPUDeviceInfos()
+	for i := 0; i < len(vmGPUInfos); i++ {
+		gpuDevices := vmGPUInfos[i].GpuDevices
+		for j := 0; j < len(gpuDevices); j++ {
+			allocatedCount := int32(1)
+			if *gpuDevices[j].UserUsage == models.GpuDeviceUsageVGPU {
+				allocatedCount = *gpuDevices[j].VgpuInstanceOnVMNum
+			}
+
+			gpuDeviceVM := GPUDeviceVM{
+				ID:             *vmGPUInfos[i].ID,
+				Name:           *vmGPUInfos[i].Name,
+				AllocatedCount: allocatedCount,
+			}
+
+			if gpuDeviceInfos.Contains(*gpuDevices[j].ID) {
+				gpuDeviceInfo := gpuDeviceInfos.Get(*gpuDevices[j].ID)
+				gpuDeviceInfo.VMs = append(gpuDeviceInfo.VMs, gpuDeviceVM)
+				gpuDeviceInfo.AllocatedCount += gpuDeviceVM.AllocatedCount
+				if *gpuDevices[j].UserUsage == models.GpuDeviceUsageVGPU {
+					gpuDeviceInfo.AvailableCount = calGPUAvailableCount(gpuDeviceInfo.AvailableCount, gpuDeviceVM.AllocatedCount)
+				}
+			} else {
+				availableCount := int32(0)
+				if *gpuDevices[j].UserUsage == models.GpuDeviceUsageVGPU {
+					availableCount = calGPUAvailableCount(*gpuDevices[j].VgpuInstanceNum, gpuDeviceVM.AllocatedCount)
+				}
+
+				gpuDeviceInfos.Insert(&GPUDeviceInfo{
+					ID:             *gpuDevices[j].ID,
+					HostID:         *gpuDevices[j].Host.ID,
+					Model:          *gpuDevices[j].Model,
+					VGPUType:       *gpuDevices[j].UserVgpuTypeName,
+					AllocatedCount: gpuDeviceVM.AllocatedCount,
+					AvailableCount: availableCount,
+					VMs:            []GPUDeviceVM{gpuDeviceVM},
+				})
+			}
+		}
+	}
+
+	return gpuDeviceInfos
+}
+
+func calGPUAvailableCount(availableCount, allocatedCount int32) int32 {
+	count := availableCount - allocatedCount
+	if count < 0 {
+		count = 0
+	}
 
-	return gpus
+	return count
 }
diff --git a/pkg/service/util_test.go b/pkg/service/util_test.go
index 98e167e8..9c2412fc 100644
--- a/pkg/service/util_test.go
+++ b/pkg/service/util_test.go
@@ -22,6 +22,8 @@ import (
 	"github.com/onsi/gomega"
 	"github.com/smartxworks/cloudtower-go-sdk/v2/models"
 	"k8s.io/utils/pointer"
+
+	infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1"
 )
 
 func TestIsAvailableHost(t *testing.T) {
@@ -101,26 +103,190 @@ func TestIsAvailableHost(t *testing.T) {
 	})
 }
 
-func TestGPUCanBeUsedForVM(t *testing.T) {
+// func TestGPUCanBeUsedForVM(t *testing.T) {
+// 	g := gomega.NewGomegaWithT(t)
+
+// 	t.Run("should return false when GPU can not be used for VM", func(t *testing.T) {
+// 		g.Expect(GPUCanBeUsedForVM(&models.GpuDevice{Vms: []*models.NestedVM{{ID: TowerString("id2"), Name: TowerString("vm2")}, {ID: TowerString("id"), Name: TowerString("vm")}}}, "vm")).To(gomega.BeFalse())
+// 	})
+
+// 	t.Run("should return false when GPU can not be used for VM", func(t *testing.T) {
+// 		g.Expect(GPUCanBeUsedForVM(&models.GpuDevice{}, "vm")).To(gomega.BeTrue())
+// 		g.Expect(GPUCanBeUsedForVM(&models.GpuDevice{Vms: []*models.NestedVM{{ID: TowerString("vm")}}}, "vm")).To(gomega.BeTrue())
+// 		g.Expect(GPUCanBeUsedForVM(&models.GpuDevice{Vms: []*models.NestedVM{{ID: TowerString("id"), Name: TowerString("vm")}}}, "vm")).To(gomega.BeTrue())
+// 	})
+// }
+
+// func TestFilterOutGPUsCanNotBeUsedForVM(t *testing.T) {
+// 	g := gomega.NewGomegaWithT(t)
+
+// 	t.Run("should filter GPUs", func(t *testing.T) {
+// 		g.Expect(FilterOutGPUsCanNotBeUsedForVM([]*models.GpuDevice{}, "vm")).To(gomega.BeEmpty())
+// 		g.Expect(FilterOutGPUsCanNotBeUsedForVM([]*models.GpuDevice{{Vms: []*models.NestedVM{{ID: TowerString("id2"), Name: TowerString("vm2")}}}}, "vm")).To(gomega.BeEmpty())
+// 		g.Expect(FilterOutGPUsCanNotBeUsedForVM([]*models.GpuDevice{{Vms: []*models.NestedVM{{ID: TowerString("id"), Name: TowerString("vm")}}}}, "vm")).To(gomega.HaveLen(1))
+// 	})
+// }
+
+func TestHasGPUsCanNotBeUsedForVM(t *testing.T) {
 	g := gomega.NewGomegaWithT(t)
+	elfMachine := &infrav1.ElfMachine{}
+	elfMachine.Name = "test"
+	elfMachine.Spec.GPUDevices = append(elfMachine.Spec.GPUDevices, infrav1.GPUPassthroughDeviceSpec{Model: "A16", Count: 1})
 
-	t.Run("should return false when GPU can not be used for VM", func(t *testing.T) {
-		g.Expect(GPUCanBeUsedForVM(&models.GpuDevice{Vms: []*models.NestedVM{{ID: TowerString("id2"), Name: TowerString("vm2")}, {ID: TowerString("id"), Name: TowerString("vm")}}}, "vm")).To(gomega.BeFalse())
+	t.Run("GPU", func(t *testing.T) {
+		g.Expect(HasGPUsCanNotBeUsedForVM(NewGPUDeviceInfos(), elfMachine)).To(gomega.BeFalse())
+		g.Expect(HasGPUsCanNotBeUsedForVM(NewGPUDeviceInfos(&GPUDeviceInfo{
+			VMs: []GPUDeviceVM{{ID: "vm1", Name: elfMachine.Name}},
+		}), elfMachine)).To(gomega.BeFalse())
+		g.Expect(HasGPUsCanNotBeUsedForVM(NewGPUDeviceInfos(&GPUDeviceInfo{
+			VMs: []GPUDeviceVM{{ID: "vm1", Name: "vm1"}},
+		}), elfMachine)).To(gomega.BeTrue())
+		g.Expect(HasGPUsCanNotBeUsedForVM(NewGPUDeviceInfos(&GPUDeviceInfo{
+			VMs: []GPUDeviceVM{
+				{ID: "vm1", Name: "vm1"},
+				{ID: "vm2", Name: elfMachine.Name},
+			},
+		}), elfMachine)).To(gomega.BeTrue())
 	})
+}
+
+func TestAggregateUnusedGPUDevicesToGPUDeviceInfos(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	host := &models.NestedHost{ID: TowerString("host1")}
 
-	t.Run("should return false when GPU can not be used for VM", func(t *testing.T) {
-		g.Expect(GPUCanBeUsedForVM(&models.GpuDevice{}, "vm")).To(gomega.BeTrue())
-		g.Expect(GPUCanBeUsedForVM(&models.GpuDevice{Vms: []*models.NestedVM{{ID: TowerString("vm")}}}, "vm")).To(gomega.BeTrue())
-		g.Expect(GPUCanBeUsedForVM(&models.GpuDevice{Vms: []*models.NestedVM{{ID: TowerString("id"), Name: TowerString("vm")}}}, "vm")).To(gomega.BeTrue())
+	t.Run("GPU", func(t *testing.T) {
+		gpuDevice := &models.GpuDevice{ID: TowerString("gpu1"), Host: host, Model: TowerString("A16"), UserUsage: models.NewGpuDeviceUsage(models.GpuDeviceUsagePASSTHROUGH), UserVgpuTypeName: TowerString("")}
+		gpuDevices := []*models.GpuDevice{gpuDevice}
+		gpuDeviceInfos := NewGPUDeviceInfos()
+
+		AggregateUnusedGPUDevicesToGPUDeviceInfos(gpuDeviceInfos, gpuDevices)
+		g.Expect(gpuDeviceInfos.Len()).To(gomega.Equal(1))
+		g.Expect(*gpuDeviceInfos.Get(*gpuDevice.ID)).To(gomega.Equal(GPUDeviceInfo{
+			ID:             *gpuDevice.ID,
+			HostID:         *gpuDevice.Host.ID,
+			Model:          *gpuDevice.Model,
+			VGPUType:       *gpuDevice.UserVgpuTypeName,
+			AllocatedCount: 0,
+			AvailableCount: 1,
+		}))
+
+		AggregateUnusedGPUDevicesToGPUDeviceInfos(gpuDeviceInfos, gpuDevices)
+		g.Expect(gpuDeviceInfos.Len()).To(gomega.Equal(1))
+		g.Expect(gpuDeviceInfos.Contains(*gpuDevice.ID)).To(gomega.BeTrue())
+	})
+
+	t.Run("vGPU", func(t *testing.T) {
+		gpuDevice := &models.GpuDevice{ID: TowerString("gpu1"), Host: host, Model: TowerString("V100"), UserUsage: models.NewGpuDeviceUsage(models.GpuDeviceUsageVGPU), UserVgpuTypeName: TowerString(""), AvailableVgpusNum: TowerInt32(6)}
+		gpuDevices := []*models.GpuDevice{gpuDevice}
+		gpuDeviceInfos := NewGPUDeviceInfos()
+
+		AggregateUnusedGPUDevicesToGPUDeviceInfos(gpuDeviceInfos, gpuDevices)
+		g.Expect(gpuDeviceInfos.Len()).To(gomega.Equal(1))
+		g.Expect(*gpuDeviceInfos.Get(*gpuDevice.ID)).To(gomega.Equal(GPUDeviceInfo{
+			ID:             *gpuDevice.ID,
+			HostID:         *gpuDevice.Host.ID,
+			Model:          *gpuDevice.Model,
+			VGPUType:       *gpuDevice.UserVgpuTypeName,
+			AllocatedCount: 0,
+			AvailableCount: *gpuDevice.AvailableVgpusNum,
+		}))
+
+		AggregateUnusedGPUDevicesToGPUDeviceInfos(gpuDeviceInfos, gpuDevices)
+		g.Expect(gpuDeviceInfos.Len()).To(gomega.Equal(1))
+		g.Expect(gpuDeviceInfos.Contains(*gpuDevice.ID)).To(gomega.BeTrue())
 	})
 }
 
-func TestFilterOutGPUsCanNotBeUsedForVM(t *testing.T) {
+func TestConvertVMGpuInfosToGPUDeviceInfos(t *testing.T) {
 	g := gomega.NewGomegaWithT(t)
+	host := &models.NestedHost{ID: TowerString("host1")}
+
+	t.Run("GPU", func(t *testing.T) {
+		vmGpuDetail := &models.VMGpuDetail{ID: TowerString("gpu1"), Host: host, Model: TowerString("A16"), UserUsage: models.NewGpuDeviceUsage(models.GpuDeviceUsagePASSTHROUGH), UserVgpuTypeName: TowerString("")}
+		vmGpuInfo1 := &models.VMGpuInfo{ID: TowerString("1"), Name: TowerString("vm1"), GpuDevices: []*models.VMGpuDetail{vmGpuDetail}}
+		vmGpuInfo2 := &models.VMGpuInfo{ID: TowerString("2"), Name: TowerString("vm2"), GpuDevices: []*models.VMGpuDetail{vmGpuDetail}}
+
+		g.Expect(ConvertVMGpuInfosToGPUDeviceInfos(
+			[]*models.VMGpuInfo{},
+		)).To(gomega.BeEmpty())
+
+		g.Expect(ConvertVMGpuInfosToGPUDeviceInfos(
+			[]*models.VMGpuInfo{vmGpuInfo1},
+		)).To(gomega.Equal(NewGPUDeviceInfos(&GPUDeviceInfo{
+			ID:             *vmGpuDetail.ID,
+			HostID:         *vmGpuDetail.Host.ID,
+			Model:          *vmGpuDetail.Model,
+			VGPUType:       *vmGpuDetail.UserVgpuTypeName,
+			AllocatedCount: 1,
+			AvailableCount: 0,
+			VMs:            []GPUDeviceVM{{ID: *vmGpuInfo1.ID, Name: *vmGpuInfo1.Name, AllocatedCount: 1}},
+		})))
+
+		g.Expect(ConvertVMGpuInfosToGPUDeviceInfos(
+			[]*models.VMGpuInfo{vmGpuInfo1, vmGpuInfo2},
+		)).To(gomega.Equal(NewGPUDeviceInfos(&GPUDeviceInfo{
+			ID:             *vmGpuDetail.ID,
+			HostID:         *vmGpuDetail.Host.ID,
+			Model:          *vmGpuDetail.Model,
+			VGPUType:       *vmGpuDetail.UserVgpuTypeName,
+			AllocatedCount: 2,
+			AvailableCount: 0,
+			VMs: []GPUDeviceVM{
+				{ID: *vmGpuInfo1.ID, Name: *vmGpuInfo1.Name, AllocatedCount: 1},
+				{ID: *vmGpuInfo2.ID, Name: *vmGpuInfo2.Name, AllocatedCount: 1},
+			},
+		})))
+	})
+
+	t.Run("vGPU", func(t *testing.T) {
+		vmGpuDetail1 := &models.VMGpuDetail{ID: TowerString("gpu1"), Host: host, Model: TowerString("V100"), UserUsage: models.NewGpuDeviceUsage(models.GpuDeviceUsageVGPU), UserVgpuTypeName: TowerString("GRID V100-4C"), VgpuInstanceNum: TowerInt32(3), VgpuInstanceOnVMNum: TowerInt32(1)}
+		vmGpuDetail2 := &models.VMGpuDetail{ID: TowerString("gpu1"), Host: host, Model: TowerString("V100"), UserUsage: models.NewGpuDeviceUsage(models.GpuDeviceUsageVGPU), UserVgpuTypeName: TowerString("GRID V100-4C"), VgpuInstanceNum: TowerInt32(3), VgpuInstanceOnVMNum: TowerInt32(2)}
+		vmGpuDetail3 := &models.VMGpuDetail{ID: TowerString("gpu1"), Host: host, Model: TowerString("V100"), UserUsage: models.NewGpuDeviceUsage(models.GpuDeviceUsageVGPU), UserVgpuTypeName: TowerString("GRID V100-4C"), VgpuInstanceNum: TowerInt32(3), VgpuInstanceOnVMNum: TowerInt32(1)}
+		vmGpuInfo1 := &models.VMGpuInfo{ID: TowerString("1"), Name: TowerString("vm1"), GpuDevices: []*models.VMGpuDetail{vmGpuDetail1}}
+		vmGpuInfo2 := &models.VMGpuInfo{ID: TowerString("2"), Name: TowerString("vm2"), GpuDevices: []*models.VMGpuDetail{vmGpuDetail2}}
+		vmGpuInfo3 := &models.VMGpuInfo{ID: TowerString("1"), Name: TowerString("vm2"), GpuDevices: []*models.VMGpuDetail{vmGpuDetail3}}
+
+		g.Expect(ConvertVMGpuInfosToGPUDeviceInfos(
+			[]*models.VMGpuInfo{vmGpuInfo1},
+		)).To(gomega.Equal(NewGPUDeviceInfos(&GPUDeviceInfo{
+			ID:             *vmGpuDetail1.ID,
+			HostID:         *vmGpuDetail1.Host.ID,
+			Model:          *vmGpuDetail1.Model,
+			VGPUType:       *vmGpuDetail1.UserVgpuTypeName,
+			AllocatedCount: 1,
+			AvailableCount: 2,
+			VMs:            []GPUDeviceVM{{ID: *vmGpuInfo1.ID, Name: *vmGpuInfo1.Name, AllocatedCount: 1}},
+		})))
+
+		g.Expect(ConvertVMGpuInfosToGPUDeviceInfos(
+			[]*models.VMGpuInfo{vmGpuInfo1, vmGpuInfo2},
+		)).To(gomega.Equal(NewGPUDeviceInfos(&GPUDeviceInfo{
+			ID:             *vmGpuDetail1.ID,
+			HostID:         *vmGpuDetail1.Host.ID,
+			Model:          *vmGpuDetail1.Model,
+			VGPUType:       *vmGpuDetail1.UserVgpuTypeName,
+			AllocatedCount: 3,
+			AvailableCount: 0,
+			VMs: []GPUDeviceVM{
+				{ID: *vmGpuInfo1.ID, Name: *vmGpuInfo1.Name, AllocatedCount: 1},
+				{ID: *vmGpuInfo2.ID, Name: *vmGpuInfo2.Name, AllocatedCount: 2},
+			},
+		})))
 
-	t.Run("should filter GPUs", func(t *testing.T) {
-		g.Expect(FilterOutGPUsCanNotBeUsedForVM([]*models.GpuDevice{}, "vm")).To(gomega.BeEmpty())
-		g.Expect(FilterOutGPUsCanNotBeUsedForVM([]*models.GpuDevice{{Vms: []*models.NestedVM{{ID: TowerString("id2"), Name: TowerString("vm2")}}}}, "vm")).To(gomega.BeEmpty())
-		g.Expect(FilterOutGPUsCanNotBeUsedForVM([]*models.GpuDevice{{Vms: []*models.NestedVM{{ID: TowerString("id"), Name: TowerString("vm")}}}}, "vm")).To(gomega.HaveLen(1))
+		g.Expect(ConvertVMGpuInfosToGPUDeviceInfos(
+			[]*models.VMGpuInfo{vmGpuInfo1, vmGpuInfo2, vmGpuInfo3},
+		)).To(gomega.Equal(NewGPUDeviceInfos(&GPUDeviceInfo{
+			ID:             *vmGpuDetail1.ID,
+			HostID:         *vmGpuDetail1.Host.ID,
+			Model:          *vmGpuDetail1.Model,
+			VGPUType:       *vmGpuDetail1.UserVgpuTypeName,
+			AllocatedCount: 4,
+			AvailableCount: 0,
+			VMs: []GPUDeviceVM{
+				{ID: *vmGpuInfo1.ID, Name: *vmGpuInfo1.Name, AllocatedCount: 1},
+				{ID: *vmGpuInfo2.ID, Name: *vmGpuInfo2.Name, AllocatedCount: 2},
+				{ID: *vmGpuInfo3.ID, Name: *vmGpuInfo3.Name, AllocatedCount: 1},
+			},
+		})))
 	})
 }
diff --git a/pkg/service/vm.go b/pkg/service/vm.go
index 1e0e69be..33dbf3c7 100644
--- a/pkg/service/vm.go
+++ b/pkg/service/vm.go
@@ -43,7 +43,7 @@ import (
 
 type VMService interface {
 	Clone(elfCluster *infrav1.ElfCluster, elfMachine *infrav1.ElfMachine, bootstrapData,
-		host string, machineGPUDevices []*models.GpuDevice) (*models.WithTaskVM, error)
+		host string, machineGPUDevices []*GPUDeviceInfo) (*models.WithTaskVM, error)
 	UpdateVM(vm *models.VM, elfMachine *infrav1.ElfMachine) (*models.WithTaskVM, error)
 	Migrate(vmID, hostID string) (*models.WithTaskVM, error)
 	Delete(uuid string) (*models.Task, error)
@@ -51,7 +51,7 @@ type VMService interface {
 	PowerOn(uuid string) (*models.Task, error)
 	ShutDown(uuid string) (*models.Task, error)
 	RemoveGPUDevices(id string, gpus []*models.VMGpuOperationParams) (*models.Task, error)
-	AddGPUDevices(id string, gpus []*models.VMGpuOperationParams) (*models.Task, error)
+	AddGPUDevices(id string, gpuDeviceInfo []*GPUDeviceInfo) (*models.Task, error)
 	Get(id string) (*models.VM, error)
 	GetByName(name string) (*models.VM, error)
 	FindByIDs(ids []string) ([]*models.VM, error)
@@ -72,8 +72,9 @@ type VMService interface {
 	AddVMsToPlacementGroup(placementGroup *models.VMPlacementGroup, vmIDs []string) (*models.Task, error)
 	DeleteVMPlacementGroupByID(ctx goctx.Context, id string) (bool, error)
 	DeleteVMPlacementGroupsByNamePrefix(ctx goctx.Context, placementGroupName string) (int, error)
-	FindGPUDevicesByHostIDs(hostIDs []string) ([]*models.GpuDevice, error)
+	FindGPUDevicesByHostIDs(hostIDs []string, gpuDeviceUsage models.GpuDeviceUsage) ([]*models.GpuDevice, error)
 	FindGPUDevicesByIDs(gpuIDs []string) ([]*models.GpuDevice, error)
+	FindGPUDeviceInfos(gpuIDs []string) (GPUDeviceInfos, error)
 }
 
 type NewVMServiceFunc func(ctx goctx.Context, auth infrav1.Tower, logger logr.Logger) (VMService, error)
@@ -118,7 +119,7 @@ func (svr *TowerVMService) UpdateVM(vm *models.VM, elfMachine *infrav1.ElfMachin
 // Clone kicks off a clone operation on Elf to create a new virtual machine using VM template.
 func (svr *TowerVMService) Clone(
 	elfCluster *infrav1.ElfCluster, elfMachine *infrav1.ElfMachine, bootstrapData,
-	host string, machineGPUDevices []*models.GpuDevice) (*models.WithTaskVM, error) {
+	host string, gpuDeviceInfos []*GPUDeviceInfo) (*models.WithTaskVM, error) {
 	cluster, err := svr.GetCluster(elfCluster.Spec.Cluster)
 	if err != nil {
 		return nil, err
@@ -133,11 +134,11 @@ func (svr *TowerVMService) Clone(
 	cpuCores := TowerCPUCores(*vCPU, elfMachine.Spec.NumCoresPerSocket)
 	cpuSockets := TowerCPUSockets(*vCPU, *cpuCores)
 
-	gpuDevices := make([]*models.VMGpuOperationParams, len(machineGPUDevices))
-	for i := 0; i < len(machineGPUDevices); i++ {
+	gpuDevices := make([]*models.VMGpuOperationParams, len(gpuDeviceInfos))
+	for i := 0; i < len(gpuDeviceInfos); i++ {
 		gpuDevices[i] = &models.VMGpuOperationParams{
-			GpuID:  machineGPUDevices[i].ID,
-			Amount: TowerInt32(1),
+			GpuID:  TowerString(gpuDeviceInfos[i].ID),
+			Amount: TowerInt32(int(gpuDeviceInfos[i].AllocatedCount)),
 		}
 	}
 
@@ -403,7 +404,15 @@ func (svr *TowerVMService) RemoveGPUDevices(id string, gpus []*models.VMGpuOpera
 	return &models.Task{ID: temoveVMGPUDeviceResp.Payload[0].TaskID}, nil
 }
 
-func (svr *TowerVMService) AddGPUDevices(id string, gpus []*models.VMGpuOperationParams) (*models.Task, error) {
+func (svr *TowerVMService) AddGPUDevices(id string, gpuDeviceInfos []*GPUDeviceInfo) (*models.Task, error) {
+	gpus := make([]*models.VMGpuOperationParams, len(gpuDeviceInfos))
+	for i := 0; i < len(gpuDeviceInfos); i++ {
+		gpus[i] = &models.VMGpuOperationParams{
+			GpuID:  TowerString(gpuDeviceInfos[i].ID),
+			Amount: TowerInt32(int(gpuDeviceInfos[i].AllocatedCount)),
+		}
+	}
+
 	addVMGpuDeviceParams := clientvm.NewAddVMGpuDeviceParams()
 	addVMGpuDeviceParams.RequestBody = &models.VMAddGpuDeviceParams{
 		Data: gpus,
@@ -927,21 +936,26 @@ func (svr *TowerVMService) DeleteVMPlacementGroupsByNamePrefix(ctx goctx.Context
 	return len(getVMPlacementGroupsResp.Payload), nil
 }
 
-func (svr *TowerVMService) FindGPUDevicesByHostIDs(hostIDs []string) ([]*models.GpuDevice, error) {
+func (svr *TowerVMService) FindGPUDevicesByHostIDs(hostIDs []string, gpuDeviceUsage models.GpuDeviceUsage) ([]*models.GpuDevice, error) {
 	if len(hostIDs) == 0 {
 		return nil, nil
 	}
 
-	getGpuDevicesParams := clientgpu.NewGetGpuDevicesParams()
-	getGpuDevicesParams.RequestBody = &models.GetGpuDevicesRequestBody{
-		Where: &models.GpuDeviceWhereInput{
-			UserUsage: models.NewGpuDeviceUsage(models.GpuDeviceUsagePASSTHROUGH),
-			Host: &models.HostWhereInput{
-				IDIn: hostIDs,
-			},
+	where := &models.GpuDeviceWhereInput{
+		UserUsage: models.NewGpuDeviceUsage(gpuDeviceUsage),
+		Host: &models.HostWhereInput{
+			IDIn: hostIDs,
 		},
 	}
 
+	// Filter GPU devices whose vGPU has been fully used
+	if gpuDeviceUsage == models.GpuDeviceUsageVGPU {
+		where.AvailableVgpusNumGt = TowerInt32(0)
+	}
+
+	getGpuDevicesParams := clientgpu.NewGetGpuDevicesParams()
+	getGpuDevicesParams.RequestBody = &models.GetGpuDevicesRequestBody{Where: where}
+
 	getGpuDevicesResp, err := svr.Session.GpuDevice.GetGpuDevices(getGpuDevicesParams)
 	if err != nil {
 		return nil, err
@@ -958,8 +972,7 @@ func (svr *TowerVMService) FindGPUDevicesByIDs(gpuIDs []string) ([]*models.GpuDe
 	getGpuDevicesParams := clientgpu.NewGetGpuDevicesParams()
 	getGpuDevicesParams.RequestBody = &models.GetGpuDevicesRequestBody{
 		Where: &models.GpuDeviceWhereInput{
-			UserUsage: models.NewGpuDeviceUsage(models.GpuDeviceUsagePASSTHROUGH),
-			IDIn:      gpuIDs,
+			IDIn: gpuIDs,
 		},
 	}
 
@@ -970,3 +983,21 @@ func (svr *TowerVMService) FindGPUDevicesByIDs(gpuIDs []string) ([]*models.GpuDe
 
 	return getGpuDevicesResp.Payload, nil
 }
+
+func (svr *TowerVMService) FindGPUDeviceInfos(gpuIDs []string) (GPUDeviceInfos, error) {
+	getVMGpuDeviceInfoParams := clientvm.NewGetVMGpuDeviceInfoParams()
+	getVMGpuDeviceInfoParams.RequestBody = &models.GetVmsRequestBody{
+		Where: &models.VMWhereInput{
+			GpuDevicesSome: &models.GpuDeviceWhereInput{
+				IDIn: gpuIDs,
+			},
+		},
+	}
+
+	getVMGpuDeviceInfoResp, err := svr.Session.VM.GetVMGpuDeviceInfo(getVMGpuDeviceInfoParams)
+	if err != nil {
+		return nil, err
+	}
+
+	return ConvertVMGpuInfosToGPUDeviceInfos(getVMGpuDeviceInfoResp.Payload), nil
+}
diff --git a/test/fake/tower.go b/test/fake/tower.go
index 3b6329f5..c6a2953c 100644
--- a/test/fake/tower.go
+++ b/test/fake/tower.go
@@ -153,9 +153,24 @@ func NewWithTaskVMPlacementGroup(placementGroup *models.VMPlacementGroup, task *
 
 func NewTowerGPU() *models.GpuDevice {
 	return &models.GpuDevice{
-		ID:      pointer.String(ID()),
-		LocalID: pointer.String(UUID()),
-		Name:    pointer.String(ID()),
-		Model:   pointer.String("A16"),
+		ID:               pointer.String(ID()),
+		LocalID:          pointer.String(UUID()),
+		Name:             pointer.String(ID()),
+		Model:            pointer.String("A16"),
+		UserUsage:        models.NewGpuDeviceUsage(models.GpuDeviceUsagePASSTHROUGH),
+		UserVgpuTypeName: pointer.String(""),
+	}
+}
+
+func NewTowerVGPU(vGPUCount int32) *models.GpuDevice {
+	return &models.GpuDevice{
+		ID:                pointer.String(ID()),
+		LocalID:           pointer.String(UUID()),
+		Name:              pointer.String(ID()),
+		UserVgpuTypeName:  pointer.String("V100"),
+		UserUsage:         models.NewGpuDeviceUsage(models.GpuDeviceUsageVGPU),
+		AvailableVgpusNum: pointer.Int32(vGPUCount),
+		AssignedVgpusNum:  pointer.Int32(0),
+		Model:             pointer.String(""),
 	}
 }