diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchFrame.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchFrame.java index db946b90c..781401165 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchFrame.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchFrame.java @@ -43,7 +43,9 @@ public class DispatchFrame extends FrameEntity implements FrameInterface { public int maxCores; public boolean threadable; public long minMemory; - public long minGpu; + public int minGpus; + public int maxGpus; + public long minGpuMemory; public String services; } diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index e1b3cc8f2..d76d98464 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -35,13 +35,16 @@ public class DispatchHost extends Entity public int cores; public int idleCores; + public int gpus; + public int idleGpus; + // Basically an 0 = auto, 1 = all. public int threadMode; public long memory; public long idleMemory; - public long gpu; - public long idleGpu; + public long gpuMemory; + public long idleGpuMemory; public String tags; public String os; @@ -53,11 +56,13 @@ public class DispatchHost extends Entity * booked to this host. */ public int strandedCores = 0; + public int strandedGpus = 0; // To reserve resources for future gpu job long idleMemoryOrig = 0; int idleCoresOrig = 0; - long idleGpuOrig = 0; + long idleGpuMemoryOrig = 0; + int idleGpusOrig = 0; public String getHostId() { return id; @@ -72,7 +77,7 @@ public String getFacilityId() { } @Override - public boolean hasAdditionalResources(int minCores, long minMemory, long minGpu) { + public boolean hasAdditionalResources(int minCores, long minMemory, int minGpus, long minGpuMemory) { if (idleCores < minCores) { return false; @@ -80,7 +85,10 @@ public boolean hasAdditionalResources(int minCores, long minMemory, long minGpu) else if (idleMemory < minMemory) { return false; } - else if (idleGpu < minGpu) { + else if (idleGpus < minGpus) { + return false; + } + else if (idleGpuMemory < minGpuMemory) { return false; } @@ -88,10 +96,11 @@ else if (idleGpu < minGpu) { } @Override - public void useResources(int coreUnits, long memory, long gpu) { + public void useResources(int coreUnits, long memory, int gpuUnits, long gpuMemory) { idleCores = idleCores - coreUnits; idleMemory = idleMemory - memory; - idleGpu = idleGpu - gpu; + idleGpus = idleGpus - gpuUnits; + idleGpuMemory = idleGpuMemory - gpuMemory; } /** @@ -99,14 +108,16 @@ public void useResources(int coreUnits, long memory, long gpu) { * */ public void removeGpu() { - if (idleGpu > 0 && idleGpuOrig == 0) { + if (idleGpuMemory > 0 && idleGpuMemoryOrig == 0) { idleMemoryOrig = idleMemory; idleCoresOrig = idleCores; - idleGpuOrig = idleGpu; + idleGpuMemoryOrig = idleGpuMemory; + idleGpusOrig = idleGpus; idleMemory = idleMemory - Math.min(CueUtil.GB4, idleMemory); idleCores = idleCores - Math.min(100, idleCores); - idleGpu = 0; + idleGpuMemory = 0; + idleGpus = 0; } } @@ -115,14 +126,16 @@ public void removeGpu() { * */ public void restoreGpu() { - if (idleGpuOrig > 0) { + if (idleGpuMemoryOrig > 0) { idleMemory = idleMemoryOrig; idleCores = idleCoresOrig; - idleGpu = idleGpuOrig; + idleGpuMemory = idleGpuMemoryOrig; + idleGpus = idleGpusOrig; idleMemoryOrig = 0; idleCoresOrig = 0; - idleGpuOrig = 0; + idleGpuMemoryOrig = 0; + idleGpusOrig = 0; } } } diff --git a/cuebot/src/main/java/com/imageworks/spcue/ExecutionSummary.java b/cuebot/src/main/java/com/imageworks/spcue/ExecutionSummary.java index a13529ad8..afe85121a 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/ExecutionSummary.java +++ b/cuebot/src/main/java/com/imageworks/spcue/ExecutionSummary.java @@ -28,6 +28,9 @@ public class ExecutionSummary { public long coreTime; public long coreTimeSuccess; public long coreTimeFail; + public long gpuTime; + public long gpuTimeSuccess; + public long gpuTimeFail; public long highMemoryKb; public long getHighMemoryKb() { @@ -69,5 +72,29 @@ public long getCoreTimeFail() { public void setCoreTimeFail(long coreTimeFail) { this.coreTimeFail = coreTimeFail; } + + public long getGpuTime() { + return gpuTime; + } + + public void setGpuTime(long gpuTime) { + this.gpuTime = gpuTime; + } + + public long getGpuTimeSuccess() { + return gpuTimeSuccess; + } + + public void setGpuTimeSuccess(long gpuTimeSuccess) { + this.gpuTimeSuccess = gpuTimeSuccess; + } + + public long getGpuTimeFail() { + return gpuTimeFail; + } + + public void setGpuTimeFail(long gpuTimeFail) { + this.gpuTimeFail = gpuTimeFail; + } } diff --git a/cuebot/src/main/java/com/imageworks/spcue/GroupDetail.java b/cuebot/src/main/java/com/imageworks/spcue/GroupDetail.java index b67a53b77..cd9f8a998 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/GroupDetail.java +++ b/cuebot/src/main/java/com/imageworks/spcue/GroupDetail.java @@ -23,11 +23,16 @@ public class GroupDetail extends Entity implements GroupInterface, DepartmentInt public int jobMinCores = -1; public int jobMaxCores = -1; + public int jobMinGpus = -1; + public int jobMaxGpus = -1; public int jobPriority = -1; public int minCores = -1; public int maxCores = -1; + public int minGpus = -1; + public int maxGpus = -1; + public String parentId = null; public String showId; public String deptId; diff --git a/cuebot/src/main/java/com/imageworks/spcue/HostEntity.java b/cuebot/src/main/java/com/imageworks/spcue/HostEntity.java index e804b230f..9a9bb9475 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/HostEntity.java +++ b/cuebot/src/main/java/com/imageworks/spcue/HostEntity.java @@ -38,6 +38,8 @@ public class HostEntity extends Entity implements HostInterface { public int idleCores; public int memory; public int idleMemory; + public int gpus; + public int idleGpus; public int gpuMemory; public int idleGpuMemory; @@ -59,6 +61,8 @@ public HostEntity(Host grpcHost) { this.idleCores = (int) grpcHost.getIdleCores(); this.memory = (int) grpcHost.getMemory(); this.idleMemory = (int) grpcHost.getIdleMemory(); + this.gpus = (int) grpcHost.getGpus(); + this.idleGpus = (int) grpcHost.getIdleGpus(); this.gpuMemory = (int) grpcHost.getGpuMemory(); this.idleGpuMemory = (int) grpcHost.getIdleGpuMemory(); } diff --git a/cuebot/src/main/java/com/imageworks/spcue/Inherit.java b/cuebot/src/main/java/com/imageworks/spcue/Inherit.java index 73651c33d..1fdb23336 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/Inherit.java +++ b/cuebot/src/main/java/com/imageworks/spcue/Inherit.java @@ -28,6 +28,8 @@ public enum Inherit { Priority, MinCores, MaxCores, + MinGpus, + MaxGpus, All } diff --git a/cuebot/src/main/java/com/imageworks/spcue/JobDetail.java b/cuebot/src/main/java/com/imageworks/spcue/JobDetail.java index d97a05fb3..dad6f8a6d 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/JobDetail.java +++ b/cuebot/src/main/java/com/imageworks/spcue/JobDetail.java @@ -46,12 +46,15 @@ public class JobDetail extends JobEntity implements JobInterface, DepartmentInte public int priority = 1; public int minCoreUnits = 100; public int maxCoreUnits = 200000; + public int minGpuUnits = 0; + public int maxGpuUnits = 1000; public boolean isLocal = false; public String localHostName; public int localMaxCores; - public int localMaxMemory; + public long localMaxMemory; public int localThreadNumber; - public int localMaxGpuMemory; + public int localMaxGpus; + public long localMaxGpuMemory; public String getDepartmentId() { return deptId; diff --git a/cuebot/src/main/java/com/imageworks/spcue/LayerDetail.java b/cuebot/src/main/java/com/imageworks/spcue/LayerDetail.java index 3b473f8c1..565995d9d 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/LayerDetail.java +++ b/cuebot/src/main/java/com/imageworks/spcue/LayerDetail.java @@ -32,9 +32,11 @@ public class LayerDetail extends LayerEntity implements LayerInterface { public LayerType type; public int minimumCores; public int maximumCores; + public int minimumGpus; + public int maximumGpus; public boolean isThreadable; public long minimumMemory; - public long minimumGpu; + public long minimumGpuMemory; public int chunkSize; public int timeout; public int timeout_llu; @@ -116,12 +118,20 @@ public void setMinimumMemory(long minimumMemory) { this.minimumMemory = minimumMemory; } - public long getMinimumGpu() { - return minimumGpu; + public int getMinimumGpus() { + return minimumGpus; } - public void setMinimumGpu(long minimumGpu) { - this.minimumGpu = minimumGpu; + public void setMinimumGpus(int minimumGpus) { + this.minimumGpus = minimumGpus; + } + + public long getMinimumGpuMemory() { + return minimumGpuMemory; + } + + public void setMinimumGpuMemory(long minimumGpuMemory) { + this.minimumGpuMemory = minimumGpuMemory; } public int getChunkSize() { diff --git a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java index 6d8bbf08d..3e073fa73 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java +++ b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java @@ -35,11 +35,13 @@ public class LocalHostAssignment extends Entity private int idleCoreUnits; private long idleMemory; - private long idleGpu; + private int idleGpuUnits; + private long idleGpuMemory; private long maxMemory; private long maxGpuMemory; private int maxCoreUnits; + private int maxGpuUnits; private int threads; @@ -52,15 +54,16 @@ public class LocalHostAssignment extends Entity public LocalHostAssignment() { } - public LocalHostAssignment(int maxCores, int threads, long maxMemory, long maxGpuMemory) { + public LocalHostAssignment(int maxCores, int threads, long maxMemory, int maxGpus, long maxGpuMemory) { this.maxCoreUnits = maxCores; this.threads = threads; this.maxMemory = maxMemory; + this.maxGpuUnits = maxGpus; this.maxGpuMemory = maxGpuMemory; } @Override - public boolean hasAdditionalResources(int minCores, long minMemory, long minGpu) { + public boolean hasAdditionalResources(int minCores, long minMemory, int minGpus, long minGpuMemory) { if (idleCoreUnits < minCores) { return false; @@ -68,7 +71,10 @@ public boolean hasAdditionalResources(int minCores, long minMemory, long minGpu) else if (idleMemory < minMemory) { return false; } - else if (idleGpu < minGpu) { + else if (idleGpuUnits < minGpus) { + return false; + } + else if (idleGpuMemory < minGpuMemory) { return false; } @@ -76,10 +82,11 @@ else if (idleGpu < minGpu) { } @Override - public void useResources(int coreUnits, long memory, long gpu) { + public void useResources(int coreUnits, long memory, int gpuUnits, long gpuMemory) { idleCoreUnits = idleCoreUnits - coreUnits; idleMemory = idleMemory - memory; - idleGpu = idleGpu - gpu; + idleGpuUnits = idleGpuUnits - gpuUnits; + idleGpuMemory = idleGpuMemory - gpuMemory; } public int getThreads() { @@ -110,6 +117,14 @@ public long getIdleMemory() { return this.idleMemory; } + public int getMaxGpuUnits() { + return maxGpuUnits; + } + + public void setMaxGpuUnits(int maxGpuUnits) { + this.maxGpuUnits = maxGpuUnits; + } + public long getMaxGpuMemory() { return maxGpuMemory; } @@ -118,8 +133,8 @@ public void setMaxGpuMemory(long maxGpuMemory) { this.maxGpuMemory = maxGpuMemory; } - public long getIdleGpu() { - return this.idleGpu; + public long getIdleGpuMemory() { + return this.idleGpuMemory; } public int getIdleCoreUnits() { @@ -134,8 +149,16 @@ public void setIdleMemory(long idleMemory) { this.idleMemory = idleMemory; } - public void setIdleGpu(long idleGpu) { - this.idleGpu = idleGpu; + public int getIdleGpuUnits() { + return this.idleGpuUnits; + } + + public void setIdleGpuUnits(int idleGpuUnits) { + this.idleGpuUnits = idleGpuUnits; + } + + public void setIdleGpuMemory(long idleGpuMemory) { + this.idleGpuMemory = idleGpuMemory; } public String getHostId() { diff --git a/cuebot/src/main/java/com/imageworks/spcue/ResourceUsage.java b/cuebot/src/main/java/com/imageworks/spcue/ResourceUsage.java index aae8921e4..b45af0838 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/ResourceUsage.java +++ b/cuebot/src/main/java/com/imageworks/spcue/ResourceUsage.java @@ -25,9 +25,10 @@ public class ResourceUsage { private final long coreTimeSeconds; + private final long gpuTimeSeconds; private final long clockTimeSeconds; - public ResourceUsage(long clockTime, int corePoints) { + public ResourceUsage(long clockTime, int corePoints, int gpuPoints) { if (clockTime < 1) { clockTime = 1; @@ -38,14 +39,21 @@ public ResourceUsage(long clockTime, int corePoints) { coreTime = 1; } + long gpuTime = clockTime * gpuPoints; + clockTimeSeconds = clockTime; coreTimeSeconds = coreTime; + gpuTimeSeconds = gpuTime; } public long getCoreTimeSeconds() { return coreTimeSeconds; } + public long getGpuTimeSeconds() { + return gpuTimeSeconds; + } + public long getClockTimeSeconds() { return clockTimeSeconds; } diff --git a/cuebot/src/main/java/com/imageworks/spcue/ServiceEntity.java b/cuebot/src/main/java/com/imageworks/spcue/ServiceEntity.java index 373877e69..16d03c5c5 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/ServiceEntity.java +++ b/cuebot/src/main/java/com/imageworks/spcue/ServiceEntity.java @@ -40,6 +40,17 @@ public class ServiceEntity extends Entity { */ public int maxCores = 0; + /** + * Determines the default minimum gpus per frame. + */ + public int minGpus = 0; + + /** + * Determines the default minimum gpus per frame. 0 indicates + * the feature is disabled. + */ + public int maxGpus = 0; + /** * Determines the default minimum memory per frame. */ @@ -48,7 +59,7 @@ public class ServiceEntity extends Entity { /** * Determines the default minimum gpu per frame. */ - public long minGpu = Dispatcher.GPU_RESERVED_DEFAULT; + public long minGpuMemory = Dispatcher.MEM_GPU_RESERVED_DEFAULT; /** * Determines the default tags. diff --git a/cuebot/src/main/java/com/imageworks/spcue/ShowEntity.java b/cuebot/src/main/java/com/imageworks/spcue/ShowEntity.java index 8a4d768af..1d2f675e1 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/ShowEntity.java +++ b/cuebot/src/main/java/com/imageworks/spcue/ShowEntity.java @@ -25,6 +25,8 @@ public class ShowEntity extends Entity implements ShowInterface { public boolean paused; public int defaultMinCores; public int defaultMaxCores; + public int defaultMinGpus; + public int defaultMaxGpus; public String[] commentMail; public String getShowId() { diff --git a/cuebot/src/main/java/com/imageworks/spcue/StrandedGpus.java b/cuebot/src/main/java/com/imageworks/spcue/StrandedGpus.java new file mode 100644 index 000000000..91b9ad76a --- /dev/null +++ b/cuebot/src/main/java/com/imageworks/spcue/StrandedGpus.java @@ -0,0 +1,44 @@ + +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +package com.imageworks.spcue; + +public final class StrandedGpus { + + /** + * The maximum time this object should be valid. + */ + private static final long MAX_AGE_MILLIS = 5000l; + + private final int gpus; + private final long expireTime = System.currentTimeMillis() + MAX_AGE_MILLIS; + + public StrandedGpus(int gpus) { + this.gpus = gpus; + } + + public int getGpus() { + return this.gpus; + } + + public boolean isExpired() { + return System.currentTimeMillis() > expireTime; + } +} + diff --git a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java index 4316b708d..28b54799d 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java @@ -36,7 +36,11 @@ public class VirtualProc extends FrameEntity implements ProcInterface { public long memoryMax; public long virtualMemoryUsed; public long virtualMemoryMax; - public long gpuReserved; + + public int gpusReserved; + public long gpuMemoryReserved; + public long gpuMemoryUsed; + public long gpuMemoryMax; public boolean unbooked; public boolean usageRecorded = false; @@ -91,8 +95,8 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { proc.coresReserved = frame.minCores; proc.memoryReserved = frame.minMemory; - // This reserves all the gpu memory on a host for one frame - proc.gpuReserved = (frame.minGpu > 0) ? host.idleGpu : 0; + proc.gpusReserved = frame.minGpus; + proc.gpuMemoryReserved = frame.minGpuMemory; /* * Frames that are announcing cores less than 100 are not multi-threaded @@ -208,7 +212,8 @@ public static final VirtualProc build(DispatchHost host, proc.coresReserved = lja.getThreads() * 100; proc.memoryReserved = frame.minMemory; - proc.gpuReserved = frame.minGpu; + proc.gpusReserved = frame.minGpus; + proc.gpuMemoryReserved = frame.minGpuMemory; int wholeCores = (int) (Math.floor(host.idleCores / 100.0)); if (wholeCores == 0) { diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/BookingDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/BookingDao.java index 4b362f000..91afcd07b 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/BookingDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/BookingDao.java @@ -38,6 +38,15 @@ public interface BookingDao { */ boolean updateMaxCores(LocalHostAssignment l, int maxCoreUnits); + /** + * Updates the maximum number of gpus the given local + * host assignment should use. + * + * @param l + * @return + */ + boolean updateMaxGpus(LocalHostAssignment l, int gpus); + /** * Updates the maximum amount of memory a given local host * assignment should use. @@ -54,7 +63,7 @@ public interface BookingDao { * @param l * @return */ - boolean updateMaxGpu(LocalHostAssignment l, long maxGpu); + boolean updateMaxGpuMemory(LocalHostAssignment l, long maxGpuMemory); /** * Create a new LocalHostAssignment attached to the given job. @@ -158,6 +167,16 @@ void insertLocalHostAssignment(HostInterface host, FrameInterface frame, */ int getCoreUsageDifference(LocalHostAssignment l, int coreUnits); + /** + * Return the difference between the number of assigned gpus and + * the given gpuUnits. + * + * @param l + * @param gpuUnits + * @return + */ + int getGpuUsageDifference(LocalHostAssignment l, int gpuUnits); + /** * Allocate additional cores from the given host. * @@ -176,6 +195,24 @@ void insertLocalHostAssignment(HostInterface host, FrameInterface frame, */ boolean deallocateCoresFromHost(HostInterface h, int cores); + /** + * Allocate additional gpus from the given host. + * + * @param h + * @param gpus + * @return + */ + boolean allocateGpusFromHost(HostInterface h, int gpus); + + /** + * Deallocate gpu from the given host, returning them to its pool. + * + * @param h + * @param gpus + * @return + */ + boolean deallocateGpusFromHost(HostInterface h, int gpus); + /** * Return true if the Host has a resource deficit. A * deficit can occur if there are more resources in use than the diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/GroupDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/GroupDao.java index 181062df6..dfb49dd9c 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/GroupDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/GroupDao.java @@ -137,6 +137,40 @@ public interface GroupDao { */ public void updateMinCores(GroupInterface group, int value); + + /** + * Sets the maximum number of gpus the group should be running. + * + * @param group + * @param value + */ + void updateDefaultJobMaxGpus(GroupInterface group, int value); + + /** + * Sets the minimum number of gpus the group should be running. + * + * @param group + * @param value + */ + void updateDefaultJobMinGpus(GroupInterface group, int value); + + /** + * Sets the maximum number of gpus for this group + * + * @param group + * @param value + */ + public void updateMaxGpus(GroupInterface group, int value); + + /** + * Set the minimum number of gpus for this group + * + * @param group + * @param value + */ + + public void updateMinGpus(GroupInterface group, int value); + /** * Renames the group * @@ -186,6 +220,14 @@ public interface GroupDao { */ boolean isOverMinCores(JobInterface job); + /** + * Returns true if the group of the specified job is at or over its min gpus + * + * @param job + * @return + */ + boolean isOverMinGpus(JobInterface job); + /** * Returns true if the group is managed. * diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java index 31b7d475d..9ca4c5298 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java @@ -258,8 +258,8 @@ public interface HostDao { * @param freeSwap long * @param totalMcp long * @param freeMcp long - * @param totalGpu long - * @param freeGpu long + * @param totalGpuMemory long + * @param freeGpuMemory long * @param load int * @param os String */ @@ -267,7 +267,7 @@ void updateHostStats(HostInterface host, long totalMemory, long freeMemory, long totalSwap, long freeSwap, long totalMcp, long freeMcp, - long totalGpu, long freeGpu, + long totalGpuMemory, long freeGpuMemory, int load, Timestamp bootTime, String os); /** @@ -288,6 +288,16 @@ void updateHostStats(HostInterface host, */ int getStrandedCoreUnits(HostInterface h); + /** + * Return the number of whole stranded gpus on this host. The must have + * less than Dispacher.MEM_STRANDED_THRESHHOLD for the gpus to be + * considered stranded. + * + * @param h HostInterface + * @return int + */ + int getStrandedGpus(HostInterface h); + /** * Return true if the host is preferring a particular show. * diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/JobDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/JobDao.java index 4ffaf2f43..3882f95a7 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/JobDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/JobDao.java @@ -59,6 +59,24 @@ public interface JobDao { */ public void updateMinCores(GroupInterface g, int cores); + /** + * Updates all jobs in the speficed group to the + * max gpu value. + * + * @param g + * @param gpu + */ + public void updateMaxGpus(GroupInterface g, int gpus); + + /** + * Updates all jobs in the specifid group to the + * min gpu value. + * + * @param g + * @param gpu + */ + public void updateMinGpus(GroupInterface g, int gpus); + /** * Updates all jobs in the specified group to the * set priority. @@ -255,6 +273,39 @@ public interface JobDao { */ boolean isOverMaxCores(JobInterface job, int coreUnits); + /** + * reteurns true if job is over its minimum gpus + * + * @param job + * @return boolean + */ + boolean isOverMinGpus(JobInterface job); + + /** + * returns true if job is over max gpus + * + * @param job + * @return + */ + boolean isOverMaxGpus(JobInterface job); + + /** + * returns true if job is at its max gpus + * + * @param job + * @return + */ + boolean isAtMaxGpus(JobInterface job); + + /** + * Return true if adding given gpus to the job + * will set the job over its max gpus value. + * + * @param job + * @param gpus + * @return + */ + boolean isOverMaxGpus(JobInterface job, int gpus); /** * sets the jobs new priority value @@ -280,6 +331,22 @@ public interface JobDao { */ void updateMaxCores(JobInterface j, int v); + /** + * sets the jobs new min gpu value + * + * @param j + * @param v + */ + void updateMinGpus(JobInterface j, int v); + + /** + * sets the jobs new max gpu value + * + * @param j + * @param v + */ + void updateMaxGpus(JobInterface j, int v); + /** * Update a job's paused state * diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java index 7843d8e8d..151ea8dff 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java @@ -131,6 +131,15 @@ public interface LayerDao { */ void updateLayerMinCores(LayerInterface layer, int val); + + /** + * update the number of gpus the layer requires + * + * @param layer + * @param val + */ + void updateLayerMinGpus(LayerInterface layer, int val); + /** * update the amount of memory required by all subsequent * running frames in the specified layer. @@ -147,7 +156,7 @@ public interface LayerDao { * @param layer * @param val */ - void updateLayerMinGpu(LayerInterface layer, long gpu); + void updateLayerMinGpuMemory(LayerInterface layer, long val); /** * Update a layer with new host tags. @@ -207,9 +216,9 @@ public interface LayerDao { * value is larger than the current value * * @param layer - * @param gpu + * @param val */ - void increaseLayerMinGpu(LayerInterface layer, long gpu); + void increaseLayerMinGpuMemory(LayerInterface layer, long val); /** * Tries to find a max RSS value for layer in the specified job. The @@ -256,10 +265,10 @@ public interface LayerDao { * job with the new gpu requirement. * * @param job - * @param gpu + * @param mem * @param type */ - void updateMinGpu(JobInterface job, long gpu, LayerType type); + void updateMinGpuMemory(JobInterface job, long mem, LayerType type); /** * Update all layers of the set type in the specified job @@ -271,6 +280,16 @@ public interface LayerDao { */ void updateMinCores(JobInterface job, int cores, LayerType type); + /** + * Update all layers of the set type in the specified job + * with the new min cores requirement. + * + * @param job + * @param gpus + * @param type + */ + void updateMinGpus(JobInterface job, int gpus, LayerType type); + /** * Update a layer's max cores value, which limits how * much threading can go on. @@ -395,6 +414,16 @@ public interface LayerDao { */ void updateLayerMaxCores(LayerInterface layer, int val); + /** + * Set the layer's max gpus value to the given int. The + * max gpu value will not allow the dispatcher to + * book over the given number of gpu. + * + * @param layer + * @param val + */ + void updateLayerMaxGpus(LayerInterface layer, int val); + /** * Add a limit to the given layer. * diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/ProcDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/ProcDao.java index 5efdd01d2..31e49a208 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/ProcDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/ProcDao.java @@ -54,7 +54,7 @@ public interface ProcDao { * @return */ - long getReservedGpu(ProcInterface proc); + long getReservedGpuMemory(ProcInterface proc); /** * Return the proc that has exceeded its reserved memory by the largest factor. diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/ShowDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/ShowDao.java index 1853662a5..f0cdcbba7 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/ShowDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/ShowDao.java @@ -81,6 +81,21 @@ public interface ShowDao { */ void updateShowDefaultMaxCores(ShowInterface s, int val); + /** + * + * @param s + * @param val + */ + void updateShowDefaultMinGpus(ShowInterface s, int val); + + /** + * + * @param s + * @param val + */ + void updateShowDefaultMaxGpus(ShowInterface s, int val); + + /** * Disabling this would stop new proc assignement. The show would get no new * procs, but any procs already assigned to a job would continue to diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/BookingDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/BookingDaoJdbc.java index 475073cb8..de447011d 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/BookingDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/BookingDaoJdbc.java @@ -71,7 +71,7 @@ public void insertLocalHostAssignment(HostInterface h, JobInterface job, LocalHo l.setType(RenderPartitionType.JOB_PARTITION); l.setIdleCoreUnits(l.getMaxCoreUnits()); l.setIdleMemory(l.getMaxMemory()); - l.setIdleGpu(l.getMaxGpuMemory()); + l.setIdleGpuMemory(l.getMaxGpuMemory()); getJdbcTemplate().update( INSERT_LOCAL_JOB_ASSIGNMENT, @@ -100,7 +100,7 @@ public void insertLocalHostAssignment(HostInterface h, LayerInterface layer, Loc l.setType(RenderPartitionType.LAYER_PARTITION); l.setIdleCoreUnits(l.getMaxCoreUnits()); l.setIdleMemory(l.getMaxMemory()); - l.setIdleGpu(l.getMaxGpuMemory()); + l.setIdleGpuMemory(l.getMaxGpuMemory()); getJdbcTemplate().update( INSERT_LOCAL_JOB_ASSIGNMENT, @@ -130,7 +130,7 @@ public void insertLocalHostAssignment(HostInterface h, FrameInterface frame, Loc l.setType(RenderPartitionType.FRAME_PARTITION); l.setIdleCoreUnits(l.getMaxCoreUnits()); l.setIdleMemory(l.getMaxMemory()); - l.setIdleGpu(l.getMaxGpuMemory()); + l.setIdleGpuMemory(l.getMaxGpuMemory()); getJdbcTemplate().update( INSERT_LOCAL_JOB_ASSIGNMENT, @@ -159,7 +159,7 @@ public LocalHostAssignment mapRow(final ResultSet rs, int rowNum) throws SQLExce l.setThreads(rs.getInt("int_threads")); l.setIdleCoreUnits(rs.getInt("int_cores_idle")); l.setIdleMemory(rs.getLong("int_mem_idle")); - l.setIdleGpu(rs.getLong("int_gpu_idle")); + l.setIdleGpuMemory(rs.getLong("int_gpu_idle")); l.setJobId(rs.getString("pk_job")); l.setLayerId(rs.getString("pk_layer")); l.setFrameId(rs.getString("pk_frame")); @@ -312,6 +312,11 @@ public int getCoreUsageDifference(LocalHostAssignment l, int coreUnits) { Integer.class, coreUnits, l.getId()); } + @Override + public int getGpuUsageDifference(LocalHostAssignment l, int gpuUnits) { + return 0; + } + private static final String UPDATE_MAX_CORES = "UPDATE " + "host_local " + @@ -327,6 +332,11 @@ public boolean updateMaxCores(LocalHostAssignment l, int coreUnits) { coreUnits, coreUnits, l.getId()) > 0; } + @Override + public boolean updateMaxGpus(LocalHostAssignment l, int gpus) { + return false; + } + private static final String UPDATE_MAX_MEMORY = "UPDATE " + "host_local " + @@ -352,7 +362,7 @@ public boolean updateMaxMemory(LocalHostAssignment l, long maxMemory) { "pk_host_local = ? "; @Override - public boolean updateMaxGpu(LocalHostAssignment l, long maxGpu) { + public boolean updateMaxGpuMemory(LocalHostAssignment l, long maxGpu) { return getJdbcTemplate().update( UPDATE_MAX_GPU, maxGpu, maxGpu, l.getId()) > 0; } @@ -404,6 +414,16 @@ public boolean deallocateCoresFromHost(HostInterface h, int cores) { } } + @Override + public boolean allocateGpusFromHost(HostInterface h, int gpus) { + return false; + } + + @Override + public boolean deallocateGpusFromHost(HostInterface h, int gpus) { + return false; + } + @Override public boolean hasResourceDeficit(HostInterface host) { return getJdbcTemplate().queryForObject( diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/DispatcherDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/DispatcherDaoJdbc.java index ff56fe35a..c516e8220 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/DispatcherDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/DispatcherDaoJdbc.java @@ -192,11 +192,11 @@ private Set findDispatchJobs(DispatchHost host, int numJobs, boolean shu s.getShowId(), host.getFacilityId(), host.os, host.idleCores, host.idleMemory, threadMode(host.threadMode), - (host.idleGpu > 0) ? 1: 0, host.idleGpu, + (host.idleGpuMemory > 0) ? 1: 0, host.idleGpuMemory, hostString(host.getName()), numJobs * 10)); if (result.size() < 1) { - if (host.gpu == 0) { + if (host.gpuMemory == 0) { s.skip(host.tags, host.idleCores, host.idleMemory); } } @@ -227,7 +227,7 @@ public Set findDispatchJobs(DispatchHost host, GroupInterface g) { g.getGroupId(),host.getFacilityId(), host.os, host.idleCores, host.idleMemory, threadMode(host.threadMode), - (host.idleGpu > 0) ? 1: 0, host.idleGpu, + (host.idleGpuMemory > 0) ? 1: 0, host.idleGpuMemory, hostString(host.getName()), 50)); return result; @@ -242,7 +242,7 @@ public List findNextDispatchFrames(JobInterface job, FIND_LOCAL_DISPATCH_FRAME_BY_JOB_AND_PROC, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, proc.memoryReserved, - proc.gpuReserved, + proc.gpuMemoryReserved, job.getJobId(), limit); } @@ -252,7 +252,7 @@ public List findNextDispatchFrames(JobInterface job, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, proc.coresReserved, proc.memoryReserved, - (proc.gpuReserved > 0) ? 1: 0, proc.gpuReserved, + (proc.gpuMemoryReserved > 0) ? 1: 0, proc.gpuMemoryReserved, job.getJobId(), job.getJobId(), hostString(proc.hostName), limit); } @@ -266,7 +266,7 @@ public List findNextDispatchFrames(JobInterface job, return getJdbcTemplate().query( FIND_LOCAL_DISPATCH_FRAME_BY_JOB_AND_HOST, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, - host.idleMemory, host.idleGpu, job.getJobId(), + host.idleMemory, host.idleGpuMemory, job.getJobId(), limit); } else { @@ -275,7 +275,7 @@ public List findNextDispatchFrames(JobInterface job, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, host.idleCores, host.idleMemory, threadMode(host.threadMode), - (host.idleGpu > 0) ? 1: 0, host.idleGpu, + (host.idleGpuMemory > 0) ? 1: 0, host.idleGpuMemory, job.getJobId(), job.getJobId(), hostString(host.getName()), limit); } @@ -290,7 +290,7 @@ public List findNextDispatchFrames(LayerInterface layer, return getJdbcTemplate().query( FIND_LOCAL_DISPATCH_FRAME_BY_LAYER_AND_PROC, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, - proc.memoryReserved, proc.gpuReserved, + proc.memoryReserved, proc.gpuMemoryReserved, layer.getLayerId(), limit); } @@ -299,7 +299,7 @@ public List findNextDispatchFrames(LayerInterface layer, FIND_DISPATCH_FRAME_BY_LAYER_AND_PROC, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, proc.coresReserved, proc.memoryReserved, - proc.gpuReserved, + proc.gpuMemoryReserved, layer.getLayerId(), layer.getLayerId(), hostString(proc.hostName), limit); } @@ -313,7 +313,7 @@ public List findNextDispatchFrames(LayerInterface layer, return getJdbcTemplate().query( FIND_LOCAL_DISPATCH_FRAME_BY_LAYER_AND_HOST, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, - host.idleMemory, host.idleGpu, layer.getLayerId(), + host.idleMemory, host.idleGpuMemory, layer.getLayerId(), limit); } else { @@ -322,7 +322,7 @@ public List findNextDispatchFrames(LayerInterface layer, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, host.idleCores, host.idleMemory, threadMode(host.threadMode), - host.idleGpu, layer.getLayerId(), layer.getLayerId(), + host.idleGpuMemory, layer.getLayerId(), layer.getLayerId(), hostString(host.getName()), limit); } } @@ -347,7 +347,7 @@ public boolean findUnderProcedJob(JobInterface excludeJob, VirtualProc proc) { Integer.class, excludeJob.getShowId(), proc.getFacilityId(), proc.os, excludeJob.getShowId(), proc.getFacilityId(), proc.os, - proc.coresReserved, proc.memoryReserved, proc.gpuReserved, + proc.coresReserved, proc.memoryReserved, proc.gpuMemoryReserved, hostString(proc.hostName)) > 0; } catch (org.springframework.dao.EmptyResultDataAccessException e) { return false; @@ -365,7 +365,7 @@ public boolean higherPriorityJobExists(JobDetail baseJob, VirtualProc proc) { HIGHER_PRIORITY_JOB_BY_FACILITY_EXISTS, Boolean.class, baseJob.priority, proc.getFacilityId(), proc.os, proc.getFacilityId(), proc.os, - proc.coresReserved, proc.memoryReserved, proc.gpuReserved, + proc.coresReserved, proc.memoryReserved, proc.gpuMemoryReserved, hostString(proc.hostName)); } catch (org.springframework.dao.EmptyResultDataAccessException e) { return false; @@ -386,7 +386,7 @@ public Set findDispatchJobs(DispatchHost host, show.getShowId(), host.getFacilityId(), host.os, host.idleCores, host.idleMemory, threadMode(host.threadMode), - (host.idleGpu > 0) ? 1: 0, host.idleGpu, + (host.idleGpuMemory > 0) ? 1: 0, host.idleGpuMemory, hostString(host.getName()), numJobs * 10)); return result; diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/FrameDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/FrameDaoJdbc.java index fd3a421d1..79d358170 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/FrameDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/FrameDaoJdbc.java @@ -198,7 +198,7 @@ public void updateFrameStarted(VirtualProc proc, FrameInterface frame) { int result = getJdbcTemplate().update(UPDATE_FRAME_STARTED, FrameState.RUNNING.toString(), proc.hostName, proc.coresReserved, - proc.memoryReserved, proc.gpuReserved, frame.getFrameId(), + proc.memoryReserved, proc.gpuMemoryReserved, frame.getFrameId(), FrameState.WAITING.toString(), frame.getVersion()); if (result == 0) { @@ -238,7 +238,7 @@ public void updateFrameStarted(VirtualProc proc, FrameInterface frame) { public boolean updateFrameFixed(VirtualProc proc, FrameInterface frame) { return getJdbcTemplate().update(UPDATE_FRAME_FIXED, FrameState.RUNNING.toString(), proc.hostName, proc.coresReserved, - proc.memoryReserved, proc.gpuReserved, frame.getFrameId()) == 1; + proc.memoryReserved, proc.gpuMemoryReserved, frame.getFrameId()) == 1; } @Override @@ -274,7 +274,7 @@ public DispatchFrame mapRow(ResultSet rs, int rowNum) throws SQLException { frame.maxCores = rs.getInt("int_cores_max"); frame.threadable = rs.getBoolean("b_threadable"); frame.minMemory = rs.getLong("int_mem_min"); - frame.minGpu = rs.getLong("int_gpu_min"); + frame.minGpuMemory = rs.getLong("int_gpu_min"); frame.version = rs.getInt("int_version"); frame.services = rs.getString("str_services"); return frame; @@ -968,7 +968,8 @@ public ResourceUsage mapRow(ResultSet rs, int rowNum) throws SQLException { return new ResourceUsage( rs.getLong("int_clock_time"), - rs.getInt("int_cores")); + rs.getInt("int_cores"), + 0); } }; diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/GroupDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/GroupDaoJdbc.java index 1656da1f5..b54c7d7cd 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/GroupDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/GroupDaoJdbc.java @@ -213,6 +213,26 @@ public void updateMinCores(GroupInterface group, int value) { value, group.getId()); } + @Override + public void updateDefaultJobMaxGpus(GroupInterface group, int value) { + + } + + @Override + public void updateDefaultJobMinGpus(GroupInterface group, int value) { + + } + + @Override + public void updateMaxGpus(GroupInterface group, int value) { + + } + + @Override + public void updateMinGpus(GroupInterface group, int value) { + + } + private static final String IS_OVER_MIN_CORES = "SELECT " + "COUNT(1) " + @@ -232,6 +252,11 @@ public boolean isOverMinCores(JobInterface job) { Integer.class, job.getJobId()) > 0; } + @Override + public boolean isOverMinGpus(JobInterface job) { + return false; + } + @Override public void updateDefaultJobPriority(GroupInterface group, int value) { if (value < 0) { value = CueUtil.FEATURE_DISABLED; } diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/HostDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/HostDaoJdbc.java index 4db673de8..d207bc1b8 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/HostDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/HostDaoJdbc.java @@ -201,10 +201,10 @@ public DispatchHost mapRow(ResultSet rs, int rowNum) throws SQLException { host.lockState = LockState.valueOf(rs.getString("str_lock_state")); host.memory = rs.getInt("int_mem"); host.cores = rs.getInt("int_cores"); - host.gpu= rs.getInt("int_gpu"); + host.gpus = rs.getInt("int_gpu"); host.idleMemory= rs.getInt("int_mem_idle"); host.idleCores = rs.getInt("int_cores_idle"); - host.idleGpu= rs.getInt("int_gpu_idle"); + host.idleGpus = rs.getInt("int_gpu_idle"); host.isNimby = rs.getBoolean("b_nimby"); host.threadMode = rs.getInt("int_thread_mode"); host.tags = rs.getString("str_tags"); @@ -599,6 +599,11 @@ public int getStrandedCoreUnits(HostInterface h) { } } + @Override + public int getStrandedGpus(HostInterface h) { + return 0; + } + private static final String IS_HOST_UP = "SELECT " + "COUNT(1) " + diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/JobDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/JobDaoJdbc.java index 0f591a350..18ba0bba7 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/JobDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/JobDaoJdbc.java @@ -340,6 +340,16 @@ public void updateMinCores(GroupInterface g, int v) { v, g.getGroupId()); } + @Override + public void updateMaxGpus(GroupInterface g, int gpus) { + + } + + @Override + public void updateMinGpus(GroupInterface g, int gpus) { + + } + @Override public void updateMaxCores(GroupInterface g, int v) { getJdbcTemplate().update("UPDATE job_resource SET int_max_cores=? WHERE " + @@ -359,6 +369,16 @@ public void updateMaxCores(JobInterface j, int v) { v, j.getJobId()); } + @Override + public void updateMinGpus(JobInterface j, int v) { + + } + + @Override + public void updateMaxGpus(JobInterface j, int v) { + + } + @Override public void updatePaused(JobInterface j, boolean b) { getJdbcTemplate().update("UPDATE job SET b_paused=? WHERE pk_job=?", @@ -611,6 +631,26 @@ public boolean isOverMaxCores(JobInterface job, int coreUnits) { Integer.class, job.getJobId(), coreUnits) > 0; } + @Override + public boolean isOverMinGpus(JobInterface job) { + return false; + } + + @Override + public boolean isOverMaxGpus(JobInterface job) { + return false; + } + + @Override + public boolean isAtMaxGpus(JobInterface job) { + return false; + } + + @Override + public boolean isOverMaxGpus(JobInterface job, int gpus) { + return false; + } + private static final String IS_JOB_AT_MAX_CORES = "SELECT " + diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/LayerDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/LayerDaoJdbc.java index f189af1d9..80f300ffc 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/LayerDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/LayerDaoJdbc.java @@ -206,7 +206,7 @@ public LayerDetail mapRow(ResultSet rs, int rowNum) throws SQLException { layer.range = rs.getString("str_range"); layer.minimumCores = rs.getInt("int_cores_min"); layer.minimumMemory = rs.getLong("int_mem_min"); - layer.minimumGpu = rs.getLong("int_gpu_min"); + layer.minimumGpuMemory = rs.getLong("int_gpu_min"); layer.type = LayerType.valueOf(rs.getString("str_type")); layer.tags = Sets.newHashSet( rs.getString("str_tags").replaceAll(" ", "").split("\\|")); @@ -323,7 +323,7 @@ public void insertLayerDetail(LayerDetail l) { l.range, l.chunkSize, l.dispatchOrder, StringUtils.join(l.tags," | "), l.type.toString(), l.minimumCores, l.maximumCores, l.isThreadable, - l.minimumMemory, l.minimumGpu, StringUtils.join(l.services,",")); + l.minimumMemory, l.minimumGpus, StringUtils.join(l.services,",")); } @Override @@ -336,9 +336,8 @@ public void updateLayerMinMemory(LayerInterface layer, long val) { } @Override - public void updateLayerMinGpu(LayerInterface layer, long gpu) { - getJdbcTemplate().update("UPDATE layer SET int_gpu_min=? WHERE pk_layer=?", - gpu, layer.getLayerId()); + public void updateLayerMinGpuMemory(LayerInterface layer, long val) { + } private static final String BALANCE_MEM = @@ -388,7 +387,7 @@ public void increaseLayerMinMemory(LayerInterface layer, long val) { } @Override - public void increaseLayerMinGpu(LayerInterface layer, long gpu) { + public void increaseLayerMinGpuMemory(LayerInterface layer, long gpu) { getJdbcTemplate().update("UPDATE layer SET int_gpu_min=? WHERE pk_layer=? AND int_gpu_min < ?", gpu, layer.getLayerId(), gpu); } @@ -402,12 +401,22 @@ public void updateLayerMinCores(LayerInterface layer, int val) { val, layer.getLayerId()); } + @Override + public void updateLayerMinGpus(LayerInterface layer, int val) { + + } + @Override public void updateLayerMaxCores(LayerInterface layer, int val) { getJdbcTemplate().update("UPDATE layer SET int_cores_max=? WHERE pk_layer=?", val, layer.getLayerId()); } + @Override + public void updateLayerMaxGpus(LayerInterface layer, int val) { + + } + private static final String UPDATE_LAYER_MAX_RSS = "UPDATE " + "layer_mem " + @@ -604,7 +613,7 @@ public void updateMinMemory(JobInterface job, long mem, LayerType type) { } @Override - public void updateMinGpu(JobInterface job, long gpu, LayerType type) { + public void updateMinGpuMemory(JobInterface job, long gpu, LayerType type) { getJdbcTemplate().update( "UPDATE layer SET int_gpu_min=? WHERE pk_job=? AND str_type=?", gpu, job.getJobId(), type.toString()); @@ -617,6 +626,11 @@ public void updateMinCores(JobInterface job, int cores, LayerType type) { cores, job.getJobId(), type.toString()); } + @Override + public void updateMinGpus(JobInterface job, int gpu, LayerType type) { + + } + @Override public void updateThreadable(LayerInterface layer, boolean threadable) { getJdbcTemplate().update( diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/NestedWhiteboardDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/NestedWhiteboardDaoJdbc.java index 693d99fbf..828d659aa 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/NestedWhiteboardDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/NestedWhiteboardDaoJdbc.java @@ -444,7 +444,7 @@ public NestedHost mapRow(ResultSet rs, int row) throws SQLException { proc = NestedProc.newBuilder() .setId(pid) .setName(CueUtil.buildProcName(host.getName(), - rs.getInt("proc_cores"))) + rs.getInt("proc_cores"), 0)) .setReservedCores(Convert.coreUnitsToCores( rs.getInt("proc_cores"))) .setReservedMemory(rs.getLong("proc_memory")) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/ProcDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/ProcDaoJdbc.java index f363bbc0f..8e6d5f848 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/ProcDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/ProcDaoJdbc.java @@ -122,7 +122,7 @@ public void insertVirtualProc(VirtualProc proc) { proc.getLayerId(), proc.getJobId(), proc.getFrameId(), proc.coresReserved, proc.memoryReserved, proc.memoryReserved, Dispatcher.MEM_RESERVED_MIN, - proc.gpuReserved, proc.isLocalDispatch); + proc.gpuMemoryReserved, proc.isLocalDispatch); // Update all of the resource counts procCreated(proc); @@ -280,7 +280,7 @@ public VirtualProc mapRow(ResultSet rs, int rowNum) throws SQLException { proc.coresReserved =rs.getInt("int_cores_reserved"); proc.memoryReserved = rs.getInt("int_mem_reserved"); proc.memoryMax = rs.getInt("int_mem_max_used"); - proc.gpuReserved = rs.getInt("int_gpu_reserved"); + proc.gpuMemoryReserved = rs.getInt("int_gpu_reserved"); proc.virtualMemoryMax = rs.getLong("int_virt_max_used"); proc.virtualMemoryUsed = rs.getLong("int_virt_used"); proc.memoryUsed = rs.getInt("int_mem_used"); @@ -581,7 +581,12 @@ public long getReservedMemory(ProcInterface proc) { Long.class, proc.getProcId()); } - public long getReservedGpu(ProcInterface proc) { + @Override + public long getReservedGpuMemory(ProcInterface proc) { + return 0; + } + + public long getReservedGpu(ProcInterface proc) { return getJdbcTemplate().queryForObject( "SELECT int_gpu_reserved FROM proc WHERE pk_proc=?", Long.class, proc.getProcId()); @@ -700,7 +705,7 @@ private void procDestroyed(VirtualProc proc) { "int_gpu_idle = int_gpu_idle + ? " + "WHERE " + "pk_host = ?", - proc.coresReserved, proc.memoryReserved, proc.gpuReserved, proc.getHostId()); + proc.coresReserved, proc.memoryReserved, proc.gpuMemoryReserved, proc.getHostId()); if (!proc.isLocalDispatch) { getJdbcTemplate().update( @@ -784,7 +789,7 @@ private void procDestroyed(VirtualProc proc) { "pk_host = ? ", proc.coresReserved, proc.memoryReserved, - proc.gpuReserved, + proc.gpuMemoryReserved, proc.getJobId(), proc.getHostId()); } @@ -808,7 +813,7 @@ private void procCreated(VirtualProc proc) { "int_gpu_idle = int_gpu_idle - ? " + "WHERE " + "pk_host = ?", - proc.coresReserved, proc.memoryReserved, proc.gpuReserved, proc.getHostId()); + proc.coresReserved, proc.memoryReserved, proc.gpuMemoryReserved, proc.getHostId()); /** diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/ServiceDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/ServiceDaoJdbc.java index 8b9225636..cc85beb44 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/ServiceDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/ServiceDaoJdbc.java @@ -60,7 +60,7 @@ public ServiceEntity mapRow(ResultSet rs, int rowNum) throws SQLException { s.minCores = rs.getInt("int_cores_min"); s.maxCores = rs.getInt("int_cores_max"); s.minMemory = rs.getLong("int_mem_min"); - s.minGpu = rs.getLong("int_gpu_min"); + s.minGpuMemory = rs.getLong("int_gpu_min"); s.threadable = rs.getBoolean("b_threadable"); s.tags = splitTags(rs.getString("str_tags")); return s; @@ -77,7 +77,7 @@ public ServiceOverrideEntity mapRow(ResultSet rs, int rowNum) s.minCores = rs.getInt("int_cores_min"); s.maxCores = rs.getInt("int_cores_max"); s.minMemory = rs.getLong("int_mem_min"); - s.minGpu = rs.getLong("int_gpu_min"); + s.minGpuMemory = rs.getLong("int_gpu_min"); s.threadable = rs.getBoolean("b_threadable"); s.tags = splitTags(rs.getString("str_tags")); s.showId = rs.getString("pk_show"); @@ -168,7 +168,7 @@ public void insert(ServiceEntity service) { service.id = SqlUtil.genKeyRandom(); getJdbcTemplate().update(INSERT_SERVICE, service.id, service.name, service.threadable, service.minCores, - service.maxCores, service.minMemory, service.minGpu, + service.maxCores, service.minMemory, service.minGpuMemory, StringUtils.join(service.tags.toArray(), " | ")); } @@ -193,7 +193,7 @@ public void insert(ServiceOverrideEntity service) { getJdbcTemplate().update(INSERT_SERVICE_WITH_SHOW, service.id, service.showId, service.name, service.threadable, service.minCores, service.maxCores, service.minMemory, - service.minGpu, joinTags(service.tags)); + service.minGpuMemory, joinTags(service.tags)); } private static final String UPDATE_SERVICE = @@ -214,7 +214,7 @@ public void insert(ServiceOverrideEntity service) { public void update(ServiceEntity service) { getJdbcTemplate().update(UPDATE_SERVICE, service.name, service.threadable, service.minCores, service.maxCores, - service.minMemory, service.minGpu, joinTags(service.tags), + service.minMemory, service.minGpuMemory, joinTags(service.tags), service.getId()); } @@ -236,7 +236,7 @@ service.minMemory, service.minGpu, joinTags(service.tags), public void update(ServiceOverrideEntity service) { getJdbcTemplate().update(UPDATE_SERVICE_WITH_SHOW, service.name, service.threadable, service.minCores, service.maxCores, - service.minMemory, service.minGpu, joinTags(service.tags), + service.minMemory, service.minGpuMemory, joinTags(service.tags), service.getId()); } diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/ShowDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/ShowDaoJdbc.java index 0d6765e51..67a99303e 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/ShowDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/ShowDaoJdbc.java @@ -180,6 +180,16 @@ public void updateShowDefaultMaxCores(ShowInterface s, int val) { val, s.getShowId()); } + @Override + public void updateShowDefaultMinGpus(ShowInterface s, int val) { + + } + + @Override + public void updateShowDefaultMaxGpus(ShowInterface s, int val) { + + } + @Override public void updateBookingEnabled(ShowInterface s, boolean enabled) { getJdbcTemplate().update( diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/WhiteboardDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/WhiteboardDaoJdbc.java index 167d6c697..7178a3316 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/WhiteboardDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/oracle/WhiteboardDaoJdbc.java @@ -952,7 +952,7 @@ public Proc mapRow(ResultSet rs, int row) throws SQLException { return Proc.newBuilder() .setId(SqlUtil.getString(rs, "pk_proc")) .setName(CueUtil.buildProcName(SqlUtil.getString(rs, "host_name"), - rs.getInt("int_cores_reserved"))) + rs.getInt("int_cores_reserved"), 0)) .setReservedCores(Convert.coreUnitsToCores(rs.getInt("int_cores_reserved"))) .setReservedMemory(rs.getLong("int_mem_reserved")) .setReservedGpuMemory(rs.getLong("int_gpu_reserved")) @@ -1370,7 +1370,7 @@ public Frame mapRow(ResultSet rs, int rowNum) throws SQLException { if (SqlUtil.getString(rs, "str_host") != null) { builder.setLastResource(CueUtil.buildProcName(SqlUtil.getString(rs, "str_host"), - rs.getInt("int_cores"))); + rs.getInt("int_cores"), 0)); } else { builder.setLastResource(""); } diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/BookingDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/BookingDaoJdbc.java index 2e1552f4f..e8c7295fd 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/BookingDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/BookingDaoJdbc.java @@ -57,10 +57,12 @@ public class BookingDaoJdbc extends "int_cores_idle,"+ "int_gpu_mem_idle,"+ "int_gpu_mem_max,"+ + "int_gpus_max,"+ + "int_gpus_idle,"+ "int_threads "+ ") " + "VALUES " + - "(?,?,?,?,?,?,?,?,?,?,?,?,?)"; + "(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"; @Override public void insertLocalHostAssignment(HostInterface h, JobInterface job, LocalHostAssignment l) { @@ -71,7 +73,8 @@ public void insertLocalHostAssignment(HostInterface h, JobInterface job, LocalHo l.setType(RenderPartitionType.JOB_PARTITION); l.setIdleCoreUnits(l.getMaxCoreUnits()); l.setIdleMemory(l.getMaxMemory()); - l.setIdleGpu(l.getMaxGpuMemory()); + l.setIdleGpuUnits(l.getMaxGpuUnits()); + l.setIdleGpuMemory(l.getMaxGpuMemory()); getJdbcTemplate().update( INSERT_LOCAL_JOB_ASSIGNMENT, @@ -87,6 +90,8 @@ public void insertLocalHostAssignment(HostInterface h, JobInterface job, LocalHo l.getMaxCoreUnits(), l.getMaxGpuMemory(), l.getMaxGpuMemory(), + l.getMaxGpuUnits(), + l.getMaxGpuUnits(), l.getThreads()); } @@ -100,7 +105,8 @@ public void insertLocalHostAssignment(HostInterface h, LayerInterface layer, Loc l.setType(RenderPartitionType.LAYER_PARTITION); l.setIdleCoreUnits(l.getMaxCoreUnits()); l.setIdleMemory(l.getMaxMemory()); - l.setIdleGpu(l.getMaxGpuMemory()); + l.setIdleGpuUnits(l.getMaxGpuUnits()); + l.setIdleGpuMemory(l.getMaxGpuMemory()); getJdbcTemplate().update( INSERT_LOCAL_JOB_ASSIGNMENT, @@ -116,6 +122,8 @@ public void insertLocalHostAssignment(HostInterface h, LayerInterface layer, Loc l.getMaxCoreUnits(), l.getMaxGpuMemory(), l.getMaxGpuMemory(), + l.getMaxGpuUnits(), + l.getMaxGpuUnits(), l.getThreads()); } @@ -130,7 +138,8 @@ public void insertLocalHostAssignment(HostInterface h, FrameInterface frame, Loc l.setType(RenderPartitionType.FRAME_PARTITION); l.setIdleCoreUnits(l.getMaxCoreUnits()); l.setIdleMemory(l.getMaxMemory()); - l.setIdleGpu(l.getMaxGpuMemory()); + l.setIdleGpuUnits(l.getMaxGpuUnits()); + l.setIdleGpuMemory(l.getMaxGpuMemory()); getJdbcTemplate().update( INSERT_LOCAL_JOB_ASSIGNMENT, @@ -146,6 +155,8 @@ public void insertLocalHostAssignment(HostInterface h, FrameInterface frame, Loc l.getMaxCoreUnits(), l.getMaxGpuMemory(), l.getMaxGpuMemory(), + l.getMaxGpuUnits(), + l.getMaxGpuUnits(), l.getThreads()); } public static final RowMapper LJA_MAPPER = @@ -155,11 +166,13 @@ public LocalHostAssignment mapRow(final ResultSet rs, int rowNum) throws SQLExce l.id = rs.getString("pk_host_local"); l.setMaxCoreUnits(rs.getInt("int_cores_max")); l.setMaxMemory(rs.getLong("int_mem_max")); + l.setMaxGpuUnits(rs.getInt("int_gpus_max")); l.setMaxGpuMemory(rs.getLong("int_gpu_mem_max")); l.setThreads(rs.getInt("int_threads")); l.setIdleCoreUnits(rs.getInt("int_cores_idle")); l.setIdleMemory(rs.getLong("int_mem_idle")); - l.setIdleGpu(rs.getLong("int_gpu_mem_idle")); + l.setIdleGpuUnits(rs.getInt("int_gpus_idle")); + l.setIdleGpuMemory(rs.getLong("int_gpu_mem_idle")); l.setJobId(rs.getString("pk_job")); l.setLayerId(rs.getString("pk_layer")); l.setFrameId(rs.getString("pk_frame")); @@ -182,6 +195,8 @@ public LocalHostAssignment mapRow(final ResultSet rs, int rowNum) throws SQLExce "int_cores_max,"+ "int_gpu_mem_idle,"+ "int_gpu_mem_max,"+ + "int_gpus_idle,"+ + "int_gpus_max,"+ "int_threads, "+ "str_type " + "FROM " + @@ -312,6 +327,13 @@ public int getCoreUsageDifference(LocalHostAssignment l, int coreUnits) { Integer.class, coreUnits, l.getId()); } + @Override + public int getGpuUsageDifference(LocalHostAssignment l, int gpuUnits) { + return getJdbcTemplate().queryForObject( + "SELECT ? - int_gpus_max FROM host_local WHERE pk_host_local=?", + Integer.class, gpuUnits, l.getId()); + } + private static final String UPDATE_MAX_CORES = "UPDATE " + "host_local " + @@ -327,6 +349,21 @@ public boolean updateMaxCores(LocalHostAssignment l, int coreUnits) { coreUnits, coreUnits, l.getId()) > 0; } + private static final String UPDATE_MAX_GPUS = + "UPDATE " + + "host_local " + + "SET " + + "int_gpus_idle = int_gpus_idle + (? - int_gpus_max), " + + "int_gpus_max = ? "+ + "WHERE " + + "pk_host_local = ? "; + + @Override + public boolean updateMaxGpus(LocalHostAssignment l, int gpuUnits) { + return getJdbcTemplate().update(UPDATE_MAX_GPUS, + gpuUnits, gpuUnits, l.getId()) > 0; + } + private static final String UPDATE_MAX_MEMORY = "UPDATE " + "host_local " + @@ -342,7 +379,7 @@ public boolean updateMaxMemory(LocalHostAssignment l, long maxMemory) { UPDATE_MAX_MEMORY, maxMemory, maxMemory, l.getId()) > 0; } - private static final String UPDATE_MAX_GPU = + private static final String UPDATE_MAX_GPU_MEMORY = "UPDATE " + "host_local " + "SET " + @@ -352,9 +389,9 @@ public boolean updateMaxMemory(LocalHostAssignment l, long maxMemory) { "pk_host_local = ? "; @Override - public boolean updateMaxGpu(LocalHostAssignment l, long maxGpu) { + public boolean updateMaxGpuMemory(LocalHostAssignment l, long maxGpuMemory) { return getJdbcTemplate().update( - UPDATE_MAX_GPU, maxGpu, maxGpu, l.getId()) > 0; + UPDATE_MAX_GPU_MEMORY, maxGpuMemory, maxGpuMemory, l.getId()) > 0; } @Override @@ -386,6 +423,26 @@ public boolean allocateCoresFromHost(HostInterface h, int cores) { } + /** + * + * @param h HostInterface + * @param gpus int + * @return boolean + */ + @Override + public boolean allocateGpusFromHost(HostInterface h, int gpus) { + + try { + return getJdbcTemplate().update( + "UPDATE host SET int_gpus_idle = int_gpus_idle - ? " + + "WHERE pk_host = ?", + gpus, h.getHostId()) > 0; + } catch (DataAccessException e) { + throw new ResourceReservationFailureException("Failed to allocate " + + gpus + " GPU from host, " + e); + } + } + /** * * @param h HostInterface @@ -404,11 +461,30 @@ public boolean deallocateCoresFromHost(HostInterface h, int cores) { } } + /** + * + * @param h HostInterface + * @param gpus int + * @return boolean + */ + @Override + public boolean deallocateGpusFromHost(HostInterface h, int gpus) { + try { + return getJdbcTemplate().update( + "UPDATE host SET int_gpus_idle = int_gpus_idle + ? WHERE pk_host = ?", + gpus, h.getHostId()) > 0; + } catch (DataAccessException e) { + throw new ResourceReservationFailureException("Failed to de-allocate " + + gpus + " GPU from host, " + e); + } + } + @Override public boolean hasResourceDeficit(HostInterface host) { return getJdbcTemplate().queryForObject( "SELECT COUNT(1) FROM host_local WHERE " + "(int_cores_max < int_cores_max - int_cores_idle OR " + + "int_gpus_max < int_gpus_max - int_gpus_idle OR " + "int_gpu_mem_max < int_gpu_mem_max - int_gpu_mem_idle OR " + "int_mem_max < int_mem_max - int_mem_idle) AND " + "host_local.pk_host= ?", diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/DispatchQuery.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/DispatchQuery.java index 59cdca0c2..fb267ddbd 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/DispatchQuery.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/DispatchQuery.java @@ -52,6 +52,12 @@ public class DispatchQuery { "OR " + "folder_resource.int_cores < folder_resource.int_max_cores " + ") " + + "AND " + + "(" + + "folder_resource.int_max_gpus = -1 " + + "OR " + + "folder_resource.int_gpus < folder_resource.int_max_gpus " + + ") " + "AND job.str_state = 'PENDING' " + "AND job.b_paused = false " + "AND job.pk_show = ? " + @@ -66,8 +72,10 @@ public class DispatchQuery { "AND layer.int_cores_min <= ? " + "AND layer.int_mem_min <= ? " + "AND (CASE WHEN layer.b_threadable = true THEN 1 ELSE 0 END) >= ? " + + "AND layer.int_gpus_min <= ? " + "AND layer.int_gpu_mem_min BETWEEN ? AND ? " + "AND job_resource.int_cores + layer.int_cores_min < job_resource.int_max_cores " + + "AND job_resource.int_gpus + layer.int_gpus_min < job_resource.int_max_gpus " + "AND host.str_tags ~* ('(?x)' || layer.str_tags) " + "AND host.str_name = ? " + "AND layer.pk_layer IN (" + @@ -219,6 +227,8 @@ public class DispatchQuery { "folder.pk_folder = folder_resource.pk_folder " + "AND " + "(folder_resource.int_max_cores = -1 OR folder_resource.int_cores < folder_resource.int_max_cores) " + + "AND " + + "(folder_resource.int_max_gpus = -1 OR folder_resource.int_gpus < folder_resource.int_max_gpus) " + "AND " + "job_resource.float_tier < 1.00 " + "AND " + @@ -262,6 +272,8 @@ public class DispatchQuery { "l.int_cores_min <= ? " + "AND " + "l.int_mem_min <= ? " + + "AND " + + "l.int_gpus_min <= ? " + "AND " + "l.int_gpu_mem_min = ? " + "AND " + @@ -320,10 +332,14 @@ public class DispatchQuery { "folder.pk_folder = folder_resource.pk_folder " + "AND " + "(folder_resource.int_max_cores = -1 OR folder_resource.int_cores < folder_resource.int_max_cores) " + + "AND " + + "(folder_resource.int_max_gpus = -1 OR folder_resource.int_gpus < folder_resource.int_max_gpus) " + "AND " + "job_resource.int_priority > ?" + "AND " + "job_resource.int_cores < job_resource.int_max_cores " + + "AND " + + "job_resource.int_gpus < job_resource.int_max_gpus " + "AND " + "job.str_state = 'PENDING' " + "AND " + @@ -359,6 +375,8 @@ public class DispatchQuery { "l.int_cores_min <= ? " + "AND " + "l.int_mem_min <= ? " + + "AND " + + "l.int_gpus_min <= ? " + "AND " + "l.int_gpu_mem_min = ? " + "AND " + @@ -417,6 +435,8 @@ public class DispatchQuery { "int_cores_min, " + "int_cores_max, " + "int_mem_min, " + + "int_gpus_min, " + + "int_gpus_max, " + "int_gpu_mem_min, " + "str_cmd, " + "str_range, " + @@ -450,6 +470,8 @@ public class DispatchQuery { "layer.int_cores_min, " + "layer.int_cores_max, " + "layer.int_mem_min, " + + "layer.int_gpus_min, " + + "layer.int_gpus_max, " + "layer.int_gpu_mem_min, " + "layer.str_cmd, " + "layer.str_range, " + @@ -467,6 +489,8 @@ public class DispatchQuery { "layer.int_cores_min <= ? " + "AND " + "layer.int_mem_min <= ? " + + "AND " + + "layer.int_gpus_min <= ? " + "AND " + "layer.int_gpu_mem_min BETWEEN ? AND ? " + "AND " + @@ -524,6 +548,8 @@ public class DispatchQuery { "layer_type, " + "int_cores_min, " + "int_cores_max, " + + "int_gpus_min, " + + "int_gpus_max, " + "b_threadable, " + "int_mem_min, " + "int_gpu_mem_min, " + @@ -557,6 +583,8 @@ public class DispatchQuery { "layer.str_type AS layer_type, " + "layer.int_cores_min, " + "layer.int_cores_max, " + + "layer.int_gpus_min, " + + "layer.int_gpus_max, " + "layer.b_threadable, " + "layer.int_mem_min, " + "layer.int_gpu_mem_min, " + @@ -578,6 +606,8 @@ public class DispatchQuery { "layer.int_mem_min <= ? " + "AND " + "(CASE WHEN layer.b_threadable = true THEN 1 ELSE 0 END) >= ? " + + "AND " + + "layer.int_gpus_min <= ? " + "AND " + "layer.int_gpu_mem_min BETWEEN ? AND ? " + "AND " + @@ -636,6 +666,8 @@ public class DispatchQuery { "int_cores_min, " + "int_cores_max, " + "int_mem_min, " + + "int_gpus_min, " + + "int_gpus_max, " + "int_gpu_mem_min, " + "str_cmd, " + "str_range, " + @@ -669,6 +701,8 @@ public class DispatchQuery { "layer.int_cores_min, " + "layer.int_cores_max, " + "layer.int_mem_min, " + + "layer.int_gpus_min, " + + "layer.int_gpus_max, " + "layer.int_gpu_mem_min, " + "layer.str_cmd, " + "layer.str_range, " + @@ -739,6 +773,8 @@ public class DispatchQuery { "layer_type, " + "int_cores_min, " + "int_cores_max, " + + "int_gpus_min, " + + "int_gpus_max, " + "b_threadable, " + "int_mem_min, " + "int_gpu_mem_min, " + @@ -774,6 +810,8 @@ public class DispatchQuery { "layer.int_cores_max, " + "layer.b_threadable, " + "layer.int_mem_min, " + + "layer.int_gpus_min, " + + "layer.int_gpus_max, " + "layer.int_gpu_mem_min, " + "layer.str_cmd, " + "layer.str_range, " + @@ -849,6 +887,8 @@ public class DispatchQuery { "int_cores_min, " + "int_cores_max, " + "int_mem_min, " + + "int_gpus_min, " + + "int_gpus_max, " + "int_gpu_mem_min, " + "str_cmd, " + "str_range, " + @@ -882,6 +922,8 @@ public class DispatchQuery { "layer.int_cores_min, " + "layer.int_cores_max, " + "layer.int_mem_min, " + + "layer.int_gpus_min, " + + "layer.int_gpus_max, " + "layer.int_gpu_mem_min, " + "layer.str_cmd, " + "layer.str_range, " + @@ -900,7 +942,9 @@ public class DispatchQuery { "AND " + "layer.int_mem_min <= ? " + "AND " + - "layer.int_gpu_mem_min = ? " + + "layer.int_gpus_min <= ? " + + "AND " + + "layer.int_gpu_mem_min <= ? " + "AND " + "frame.str_state='WAITING' " + "AND " + @@ -958,6 +1002,8 @@ public class DispatchQuery { "int_cores_max, " + "b_threadable, " + "int_mem_min, " + + "int_gpus_min, " + + "int_gpus_max, " + "int_gpu_mem_min, " + "str_cmd, " + "str_range, " + @@ -991,6 +1037,8 @@ public class DispatchQuery { "layer.int_cores_max, " + "layer.b_threadable, " + "layer.int_mem_min, " + + "layer.int_gpus_min, " + + "layer.int_gpus_max, " + "layer.int_gpu_mem_min, " + "layer.str_cmd, " + "layer.str_range, " + @@ -1010,6 +1058,8 @@ public class DispatchQuery { "layer.int_mem_min <= ? " + "AND " + "(CASE WHEN layer.b_threadable = true THEN 1 ELSE 0 END) >= ? " + + "AND " + + "layer.int_gpus_min <= ? " + "AND " + "layer.int_gpu_mem_min <= ? " + "AND " + @@ -1068,6 +1118,8 @@ public class DispatchQuery { "int_cores_min, " + "int_cores_max, " + "int_mem_min, " + + "int_gpus_min, " + + "int_gpus_max, " + "int_gpu_mem_min, " + "str_cmd, " + "str_range, " + @@ -1100,6 +1152,8 @@ public class DispatchQuery { "layer.b_threadable, " + "layer.int_cores_min, " + "layer.int_mem_min, " + + "layer.int_gpus_min, " + + "layer.int_gpus_max, " + "layer.int_gpu_mem_min, " + "layer.int_cores_max, " + "layer.str_cmd, " + @@ -1173,6 +1227,8 @@ public class DispatchQuery { "int_cores_max, " + "b_threadable, " + "int_mem_min, " + + "int_gpus_min, " + + "int_gpus_max, " + "int_gpu_mem_min, " + "str_cmd, " + "str_range, " + @@ -1206,6 +1262,8 @@ public class DispatchQuery { "layer.int_cores_max, " + "layer.b_threadable, " + "layer.int_mem_min, " + + "layer.int_gpus_min, " + + "layer.int_gpus_max, " + "layer.int_gpu_mem_min, " + "layer.str_cmd, " + "layer.str_range, " + diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/DispatcherDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/DispatcherDaoJdbc.java index d3e50525d..4b015e1af 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/DispatcherDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/DispatcherDaoJdbc.java @@ -190,11 +190,12 @@ private Set findDispatchJobs(DispatchHost host, int numJobs, boolean shu s.getShowId(), host.getFacilityId(), host.os, host.idleCores, host.idleMemory, threadMode(host.threadMode), - (host.idleGpu > 0) ? 1: 0, host.idleGpu, + host.idleGpus, + (host.idleGpuMemory > 0) ? 1: 0, host.idleGpuMemory, host.getName(), numJobs * 10)); if (result.size() < 1) { - if (host.gpu == 0) { + if (host.gpuMemory == 0) { s.skip(host.tags, host.idleCores, host.idleMemory); } } @@ -225,7 +226,8 @@ public Set findDispatchJobs(DispatchHost host, GroupInterface g) { g.getGroupId(),host.getFacilityId(), host.os, host.idleCores, host.idleMemory, threadMode(host.threadMode), - (host.idleGpu > 0) ? 1: 0, host.idleGpu, + host.idleGpus, + (host.idleGpuMemory > 0) ? 1: 0, host.idleGpuMemory, host.getName(), 50)); return result; @@ -240,7 +242,7 @@ public List findNextDispatchFrames(JobInterface job, FIND_LOCAL_DISPATCH_FRAME_BY_JOB_AND_PROC, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, proc.memoryReserved, - proc.gpuReserved, + proc.gpuMemoryReserved, job.getJobId(), limit); } @@ -250,7 +252,8 @@ public List findNextDispatchFrames(JobInterface job, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, proc.coresReserved, proc.memoryReserved, - (proc.gpuReserved > 0) ? 1: 0, proc.gpuReserved, + proc.gpusReserved, + (proc.gpuMemoryReserved > 0) ? 1 : 0, proc.gpuMemoryReserved, job.getJobId(), proc.hostName, job.getJobId(), limit); } @@ -264,7 +267,7 @@ public List findNextDispatchFrames(JobInterface job, return getJdbcTemplate().query( FIND_LOCAL_DISPATCH_FRAME_BY_JOB_AND_HOST, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, - host.idleMemory, host.idleGpu, job.getJobId(), + host.idleMemory, host.idleGpuMemory, job.getJobId(), limit); } else { @@ -273,7 +276,8 @@ public List findNextDispatchFrames(JobInterface job, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, host.idleCores, host.idleMemory, threadMode(host.threadMode), - (host.idleGpu > 0) ? 1: 0, host.idleGpu, + host.idleGpus, + (host.idleGpuMemory > 0) ? 1: 0, host.idleGpuMemory, job.getJobId(), host.getName(), job.getJobId(), limit); } @@ -288,7 +292,7 @@ public List findNextDispatchFrames(LayerInterface layer, return getJdbcTemplate().query( FIND_LOCAL_DISPATCH_FRAME_BY_LAYER_AND_PROC, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, - proc.memoryReserved, proc.gpuReserved, + proc.memoryReserved, proc.gpuMemoryReserved, layer.getLayerId(), limit); } @@ -297,7 +301,7 @@ public List findNextDispatchFrames(LayerInterface layer, FIND_DISPATCH_FRAME_BY_LAYER_AND_PROC, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, proc.coresReserved, proc.memoryReserved, - proc.gpuReserved, + proc.gpusReserved, proc.gpuMemoryReserved, layer.getLayerId(), layer.getLayerId(), proc.hostName, limit); } @@ -311,7 +315,7 @@ public List findNextDispatchFrames(LayerInterface layer, return getJdbcTemplate().query( FIND_LOCAL_DISPATCH_FRAME_BY_LAYER_AND_HOST, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, - host.idleMemory, host.idleGpu, layer.getLayerId(), + host.idleMemory, host.idleGpuMemory, layer.getLayerId(), limit); } else { @@ -320,7 +324,7 @@ public List findNextDispatchFrames(LayerInterface layer, FrameDaoJdbc.DISPATCH_FRAME_MAPPER, host.idleCores, host.idleMemory, threadMode(host.threadMode), - host.idleGpu, layer.getLayerId(), layer.getLayerId(), + host.idleGpus, host.idleGpuMemory, layer.getLayerId(), layer.getLayerId(), host.getName(), limit); } } @@ -345,7 +349,7 @@ public boolean findUnderProcedJob(JobInterface excludeJob, VirtualProc proc) { Integer.class, excludeJob.getShowId(), proc.getFacilityId(), proc.os, excludeJob.getShowId(), proc.getFacilityId(), proc.os, - proc.coresReserved, proc.memoryReserved, proc.gpuReserved, + proc.coresReserved, proc.memoryReserved, proc.gpusReserved, proc.gpuMemoryReserved, proc.hostName) > 0; } catch (org.springframework.dao.EmptyResultDataAccessException e) { return false; @@ -363,7 +367,7 @@ public boolean higherPriorityJobExists(JobDetail baseJob, VirtualProc proc) { HIGHER_PRIORITY_JOB_BY_FACILITY_EXISTS, Boolean.class, baseJob.priority, proc.getFacilityId(), proc.os, proc.getFacilityId(), proc.os, - proc.coresReserved, proc.memoryReserved, proc.gpuReserved, + proc.coresReserved, proc.memoryReserved, proc.gpusReserved, proc.gpuMemoryReserved, proc.hostName); } catch (org.springframework.dao.EmptyResultDataAccessException e) { return false; @@ -384,7 +388,8 @@ public Set findDispatchJobs(DispatchHost host, show.getShowId(), host.getFacilityId(), host.os, host.idleCores, host.idleMemory, threadMode(host.threadMode), - (host.idleGpu > 0) ? 1: 0, host.idleGpu, + host.idleGpus, + (host.idleGpuMemory > 0) ? 1: 0, host.idleGpuMemory, host.getName(), numJobs * 10)); return result; diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/FrameDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/FrameDaoJdbc.java index a5cf87b3f..36ebb598b 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/FrameDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/FrameDaoJdbc.java @@ -66,7 +66,9 @@ public class FrameDaoJdbc extends JdbcDaoSupport implements FrameDao { "ts_updated = current_timestamp, " + "int_version = int_version + 1, " + "int_total_past_core_time = int_total_past_core_time + " + - "round(INTERVAL_TO_SECONDS(current_timestamp - ts_started) * int_cores / 100) " + + "round(INTERVAL_TO_SECONDS(current_timestamp - ts_started) * int_cores / 100)," + + "int_total_past_gpu_time = int_total_past_gpu_time + " + + "round(INTERVAL_TO_SECONDS(current_timestamp - ts_started) * int_gpus) " + "WHERE " + "frame.pk_frame = ? " + "AND " + @@ -93,7 +95,9 @@ public boolean updateFrameStopped(FrameInterface frame, FrameState state, "int_mem_max_used = ?, " + "int_version = int_version + 1, " + "int_total_past_core_time = int_total_past_core_time + " + - "round(INTERVAL_TO_SECONDS(current_timestamp + interval '1' second - ts_started) * int_cores / 100) " + + "round(INTERVAL_TO_SECONDS(current_timestamp + interval '1' second - ts_started) * int_cores / 100), " + + "int_total_past_gpu_time = int_total_past_gpu_time + " + + "round(INTERVAL_TO_SECONDS(current_timestamp + interval '1' second - ts_started) * int_gpus) " + "WHERE " + "frame.pk_frame = ? " + "AND " + @@ -149,6 +153,7 @@ public boolean updateFrameCleared(FrameInterface frame) { "str_host = ?, " + "int_cores = ?, " + "int_mem_reserved = ?, " + + "int_gpus = ?, " + "int_gpu_mem_reserved = ?, " + "ts_updated = current_timestamp, " + "ts_started = current_timestamp, " + @@ -200,7 +205,7 @@ public void updateFrameStarted(VirtualProc proc, FrameInterface frame) { int result = getJdbcTemplate().update(UPDATE_FRAME_STARTED, FrameState.RUNNING.toString(), proc.hostName, proc.coresReserved, - proc.memoryReserved, proc.gpuReserved, frame.getFrameId(), + proc.memoryReserved, proc.gpusReserved, proc.gpuMemoryReserved, frame.getFrameId(), FrameState.WAITING.toString(), frame.getVersion()); if (result == 0) { @@ -226,6 +231,7 @@ public void updateFrameStarted(VirtualProc proc, FrameInterface frame) { "str_host=?, " + "int_cores=?, "+ "int_mem_reserved = ?, " + + "int_gpus = ?, " + "int_gpu_mem_reserved = ?, " + "ts_updated = current_timestamp, " + "ts_started = current_timestamp, " + @@ -240,7 +246,7 @@ public void updateFrameStarted(VirtualProc proc, FrameInterface frame) { public boolean updateFrameFixed(VirtualProc proc, FrameInterface frame) { return getJdbcTemplate().update(UPDATE_FRAME_FIXED, FrameState.RUNNING.toString(), proc.hostName, proc.coresReserved, - proc.memoryReserved, proc.gpuReserved, frame.getFrameId()) == 1; + proc.memoryReserved, proc.gpusReserved, proc.gpuMemoryReserved, frame.getFrameId()) == 1; } @Override @@ -276,7 +282,9 @@ public DispatchFrame mapRow(ResultSet rs, int rowNum) throws SQLException { frame.maxCores = rs.getInt("int_cores_max"); frame.threadable = rs.getBoolean("b_threadable"); frame.minMemory = rs.getLong("int_mem_min"); - frame.minGpu = rs.getLong("int_gpu_mem_min"); + frame.minGpus = rs.getInt("int_gpus_min"); + frame.maxGpus = rs.getInt("int_gpus_max"); + frame.minGpuMemory = rs.getLong("int_gpu_mem_min"); frame.version = rs.getInt("int_version"); frame.services = rs.getString("str_services"); return frame; @@ -308,6 +316,8 @@ public DispatchFrame mapRow(ResultSet rs, int rowNum) throws SQLException { "layer.int_cores_max,"+ "layer.b_threadable,"+ "layer.int_mem_min, "+ + "layer.int_gpus_min,"+ + "layer.int_gpus_max,"+ "layer.int_gpu_mem_min, "+ "layer.str_range, "+ "layer.int_chunk_size, " + @@ -402,7 +412,7 @@ public FrameDetail mapRow(ResultSet rs, int rowNum) throws SQLException { frame.version = rs.getInt("int_version"); if (rs.getString("str_host") != null) { - frame.lastResource = String.format("%s/%d",rs.getString("str_host"),rs.getInt("int_cores")); + frame.lastResource = String.format("%s/%d",rs.getString("str_host"),rs.getInt("int_cores"),rs.getInt("int_gpus")); } else { frame.lastResource = ""; @@ -946,7 +956,8 @@ public ResourceUsage mapRow(ResultSet rs, int rowNum) throws SQLException { return new ResourceUsage( rs.getLong("int_clock_time"), - rs.getInt("int_cores")); + rs.getInt("int_cores"), + rs.getInt("int_gpus")); } }; @@ -962,7 +973,8 @@ public ResourceUsage getResourceUsage(FrameInterface f) { "SELECT " + "COALESCE(interval_to_seconds(current_timestamp - ts_started), 1) " + "AS int_clock_time, " + - "COALESCE(int_cores, 100) AS int_cores " + + "COALESCE(int_cores, 100) AS int_cores," + + "int_gpus " + "FROM " + "frame " + "WHERE " + diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/GroupDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/GroupDaoJdbc.java index 9051131ea..b502bb680 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/GroupDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/GroupDaoJdbc.java @@ -232,6 +232,73 @@ public boolean isOverMinCores(JobInterface job) { Integer.class, job.getJobId()) > 0; } + @Override + public void updateDefaultJobMaxGpus(GroupInterface group, int value) { + if (value <= 0) { value = CueUtil.FEATURE_DISABLED; } + if (value < CueUtil.ONE_CORE && value != CueUtil.FEATURE_DISABLED) { + String msg = "The default max cores for a job must " + + "be greater than a single core"; + throw new IllegalArgumentException(msg); + } + getJdbcTemplate().update( + "UPDATE folder SET int_job_max_gpus=? WHERE pk_folder=?", + value, group.getId()); + } + + @Override + public void updateDefaultJobMinGpus(GroupInterface group, int value) { + if (value <= 0) { value = CueUtil.FEATURE_DISABLED; } + if (value < CueUtil.ONE_CORE && value != CueUtil.FEATURE_DISABLED) { + String msg = "The default min cores for a job must " + + "be greater than a single core"; + throw new IllegalArgumentException(msg); + } + getJdbcTemplate().update( + "UPDATE folder SET int_job_min_gpu=? WHERE pk_folder=?", + value, group.getId()); + } + + @Override + public void updateMaxGpus(GroupInterface group, int value) { + if (value < 0) { value = CueUtil.FEATURE_DISABLED; } + if (value < CueUtil.ONE_CORE && value != CueUtil.FEATURE_DISABLED) { + String msg = "The group max cores feature must " + + "be a whole core or greater, pass in: " + value; + throw new IllegalArgumentException(msg); + } + + getJdbcTemplate().update( + "UPDATE folder_resource SET int_max_gpu=? WHERE pk_folder=?", + value, group.getId()); + } + + @Override + public void updateMinGpus(GroupInterface group, int value) { + if (value < 0) { value = 0; } + getJdbcTemplate().update( + "UPDATE folder_resource SET int_min_gpus=? WHERE pk_folder=?", + value, group.getId()); + } + + private static final String IS_OVER_MIN_GPUS = + "SELECT " + + "COUNT(1) " + + "FROM " + + "job,"+ + "folder_resource fr "+ + "WHERE " + + "job.pk_folder = fr.pk_folder " + + "AND " + + "fr.int_gpus > fr.int_min_gpus " + + "AND "+ + "job.pk_job = ?"; + + @Override + public boolean isOverMinGpus(JobInterface job) { + return getJdbcTemplate().queryForObject(IS_OVER_MIN_GPUS, + Integer.class, job.getJobId()) > 0; + } + @Override public void updateDefaultJobPriority(GroupInterface group, int value) { if (value < 0) { value = CueUtil.FEATURE_DISABLED; } @@ -251,6 +318,8 @@ public void updateDefaultJobPriority(GroupInterface group, int value) { "folder.pk_folder, " + "folder.int_job_max_cores,"+ "folder.int_job_min_cores,"+ + "folder.int_job_max_gpus,"+ + "folder.int_job_min_gpus,"+ "folder.int_job_priority,"+ "folder.str_name,"+ "folder.pk_parent_folder,"+ @@ -258,7 +327,9 @@ public void updateDefaultJobPriority(GroupInterface group, int value) { "folder.pk_dept,"+ "folder_level.int_level, " + "folder_resource.int_min_cores,"+ - "folder_resource.int_max_cores " + + "folder_resource.int_max_cores," + + "folder_resource.int_min_gpus,"+ + "folder_resource.int_max_gpus " + "FROM " + "folder, "+ "folder_level, " + @@ -273,6 +344,8 @@ public void updateDefaultJobPriority(GroupInterface group, int value) { "folder.pk_folder, " + "folder.int_job_max_cores,"+ "folder.int_job_min_cores,"+ + "folder.int_job_max_gpus,"+ + "folder.int_job_min_gpus,"+ "folder.int_job_priority,"+ "folder.str_name,"+ "folder.pk_parent_folder,"+ @@ -280,7 +353,9 @@ public void updateDefaultJobPriority(GroupInterface group, int value) { "folder.pk_dept,"+ "folder_level.int_level, " + "folder_resource.int_min_cores,"+ - "folder_resource.int_max_cores " + + "folder_resource.int_max_cores," + + "folder_resource.int_min_gpus,"+ + "folder_resource.int_max_gpus " + "FROM " + "folder, "+ "folder_level, " + @@ -393,6 +468,8 @@ public GroupDetail mapRow(ResultSet rs, int rowNum) throws SQLException { group.id = rs.getString("pk_folder"); group.jobMaxCores = rs.getInt("int_job_max_cores"); group.jobMinCores = rs.getInt("int_job_min_cores"); + group.jobMaxGpus = rs.getInt("int_job_max_gpus"); + group.jobMinGpus = rs.getInt("int_job_min_gpus"); group.jobPriority = rs.getInt("int_job_priority"); group.name = rs.getString("str_name"); group.parentId = rs.getString("pk_parent_folder"); diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java index 1236e6d38..60465407b 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java @@ -73,6 +73,8 @@ public HostEntity mapRow(ResultSet rs, int rowNum) throws SQLException { host.idleCores = rs.getInt("int_cores_idle"); host.memory = rs.getInt("int_mem"); host.idleMemory = rs.getInt("int_mem_idle"); + host.gpus = rs.getInt("int_gpus"); + host.idleGpus = rs.getInt("int_gpus_idle"); host.gpuMemory = rs.getInt("int_gpu_mem"); host.idleGpuMemory = rs.getInt("int_gpu_mem_idle"); host.dateBooted = rs.getDate("ts_booted"); @@ -110,6 +112,8 @@ public HostInterface mapRow(final ResultSet rs, int rowNum) throws SQLException "host.int_cores_idle,"+ "host.int_mem,"+ "host.int_mem_idle,"+ + "host.int_gpus,"+ + "host.int_gpus_idle,"+ "host.int_gpu_mem,"+ "host.int_gpu_mem_idle,"+ "host.ts_created,"+ @@ -201,10 +205,12 @@ public DispatchHost mapRow(ResultSet rs, int rowNum) throws SQLException { host.lockState = LockState.valueOf(rs.getString("str_lock_state")); host.memory = rs.getInt("int_mem"); host.cores = rs.getInt("int_cores"); - host.gpu= rs.getInt("int_gpu_mem"); + host.gpus = rs.getInt("int_gpus"); + host.gpuMemory = rs.getInt("int_gpu_mem"); host.idleMemory= rs.getInt("int_mem_idle"); host.idleCores = rs.getInt("int_cores_idle"); - host.idleGpu= rs.getInt("int_gpu_mem_idle"); + host.idleGpuMemory = rs.getInt("int_gpu_mem_idle"); + host.idleGpus = rs.getInt("int_gpus_idle"); host.isNimby = rs.getBoolean("b_nimby"); host.threadMode = rs.getInt("int_thread_mode"); host.tags = rs.getString("str_tags"); @@ -225,6 +231,8 @@ public DispatchHost mapRow(ResultSet rs, int rowNum) throws SQLException { "host.int_cores_idle, " + "host.int_mem,"+ "host.int_mem_idle, "+ + "host.int_gpus, "+ + "host.int_gpus_idle, " + "host.int_gpu_mem,"+ "host.int_gpu_mem_idle, "+ "host.b_nimby, "+ @@ -276,12 +284,14 @@ public DispatchHost getDispatchHost(String id) { "int_cores_idle, " + "int_mem,"+ "int_mem_idle,"+ + "int_gpus, " + + "int_gpus_idle, " + "int_gpu_mem,"+ "int_gpu_mem_idle,"+ "str_fqdn, " + "int_thread_mode "+ ") " + - "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)", + "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", "INSERT INTO " + "host_stat " + @@ -347,28 +357,17 @@ public void insertRenderHost(RenderHost host, AllocationInterface a, boolean use os = Dispatcher.OS_DEFAULT; } - long totalGpu; - if (host.getAttributesMap().containsKey("totalGpu")) - totalGpu = Integer.parseInt(host.getAttributesMap().get("totalGpu")); - else - totalGpu = 0; - - long freeGpu; - if (host.getAttributesMap().containsKey("freeGpu")) - freeGpu = Integer.parseInt(host.getAttributesMap().get("freeGpu")); - else - freeGpu = 0; - - getJdbcTemplate().update(INSERT_HOST_DETAIL[0], hid, a.getAllocationId(), name, host.getNimbyEnabled(), LockState.OPEN.toString(), host.getNumProcs(), coreUnits, coreUnits, - memUnits, memUnits, totalGpu, totalGpu, + memUnits, memUnits, + host.getNumGpus(), host.getNumGpus(), + host.getTotalGpuMem(), host.getTotalGpuMem(), fqdn, threadMode.getNumber()); getJdbcTemplate().update(INSERT_HOST_DETAIL[1], hid, hid, host.getTotalMem(), host.getFreeMem(), - totalGpu, freeGpu, + host.getTotalGpuMem(), host.getFreeGpuMem(), host.getTotalSwap(), host.getFreeSwap(), host.getTotalMcp(), host.getFreeMcp(), host.getLoad(), new Timestamp(host.getBootTime() * 1000l), @@ -410,7 +409,7 @@ public void updateHostStats(HostInterface host, long totalMemory, long freeMemory, long totalSwap, long freeSwap, long totalMcp, long freeMcp, - long totalGpu, long freeGpu, + long totalGpuMemory, long freeGpuMemory, int load, Timestamp bootTime, String os) { @@ -420,7 +419,7 @@ public void updateHostStats(HostInterface host, getJdbcTemplate().update(UPDATE_RENDER_HOST, totalMemory, freeMemory, totalSwap, - freeSwap, totalMcp, freeMcp, totalGpu, freeGpu, load, + freeSwap, totalMcp, freeMcp, totalGpuMemory, freeGpuMemory, load, bootTime, os, host.getHostId()); } @@ -440,12 +439,8 @@ public void updateHostResources(HostInterface host, HostReport report) { long memory = convertMemoryUnits(report.getHost()); int cores = report.getHost().getNumProcs() * report.getHost().getCoresPerProc(); - - long totalGpu; - if (report.getHost().getAttributesMap().containsKey("totalGpu")) - totalGpu = Integer.parseInt(report.getHost().getAttributesMap().get("totalGpu")); - else - totalGpu = 0; + long gpu_memory = report.getHost().getTotalGpuMem(); + int gpus = report.getHost().getNumGpus(); getJdbcTemplate().update( "UPDATE " + @@ -456,6 +451,8 @@ public void updateHostResources(HostInterface host, HostReport report) { "int_cores_idle=?," + "int_mem=?," + "int_mem_idle=?, " + + "int_gpus=?," + + "int_gpus_idle=?," + "int_gpu_mem=?," + "int_gpu_mem_idle=? " + "WHERE " + @@ -463,9 +460,11 @@ public void updateHostResources(HostInterface host, HostReport report) { "AND " + "int_cores = int_cores_idle " + "AND " + - "int_mem = int_mem_idle", + "int_mem = int_mem_idle " + + "AND " + + "int_gpus = int_gpus_idle", report.getHost().getNimbyEnabled(), cores, cores, - memory, memory, totalGpu, totalGpu, host.getId()); + memory, memory, gpus, gpus, gpu_memory, gpu_memory, host.getId()); } @Override @@ -600,6 +599,18 @@ public int getStrandedCoreUnits(HostInterface h) { } } + @Override + public int getStrandedGpus(HostInterface h) { + try { + int idle_gpus = getJdbcTemplate().queryForObject( + "SELECT int_gpus_idle FROM host WHERE pk_host = ?", + Integer.class, h.getHostId()); + return idle_gpus; + } catch (EmptyResultDataAccessException e) { + return 0; + } + } + private static final String IS_HOST_UP = "SELECT " + "COUNT(1) " + diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/JobDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/JobDaoJdbc.java index a3dab8f4e..b5c5ed7af 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/JobDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/JobDaoJdbc.java @@ -116,6 +116,8 @@ public JobDetail mapRow(ResultSet rs, int rowNum) throws SQLException { job.logDir = rs.getString("str_log_dir"); job.maxCoreUnits = rs.getInt("int_max_cores"); job.minCoreUnits = rs.getInt("int_min_cores"); + job.maxGpuUnits = rs.getInt("int_max_gpus"); + job.minGpuUnits = rs.getInt("int_min_gpus"); job.name = rs.getString("str_name"); job.priority = rs.getInt("int_priority"); job.shot = rs.getString("str_shot"); @@ -218,6 +220,8 @@ public boolean isJobComplete(JobInterface job) { "job.int_max_retries,"+ "job_resource.int_max_cores,"+ "job_resource.int_min_cores,"+ + "job_resource.int_max_gpus,"+ + "job_resource.int_min_gpus,"+ "job_resource.int_priority,"+ "show.str_name AS show_name, " + "dept.str_name AS dept_name, "+ @@ -364,6 +368,32 @@ public void updateMaxCores(JobInterface j, int v) { v, j.getJobId()); } + @Override + public void updateMinGpus(GroupInterface g, int v) { + getJdbcTemplate().update("UPDATE job_resource SET int_min_gpus=? WHERE " + + "pk_job IN (SELECT pk_job FROM job WHERE pk_folder=?)", + v, g.getGroupId()); + } + + @Override + public void updateMaxGpus(GroupInterface g, int v) { + getJdbcTemplate().update("UPDATE job_resource SET int_max_gpus=? WHERE " + + "pk_job IN (SELECT pk_job FROM job WHERE pk_folder=?)", + v, g.getGroupId()); + } + + @Override + public void updateMinGpus(JobInterface j, int v) { + getJdbcTemplate().update("UPDATE job_resource SET int_min_gpus=? WHERE pk_job=?", + v, j.getJobId()); + } + + @Override + public void updateMaxGpus(JobInterface j, int v) { + getJdbcTemplate().update("UPDATE job_resource SET int_max_gpus=? WHERE pk_job=?", + v, j.getJobId()); + } + @Override public void updatePaused(JobInterface j, boolean b) { getJdbcTemplate().update("UPDATE job SET b_paused=? WHERE pk_job=?", @@ -632,6 +662,60 @@ public boolean isAtMaxCores(JobInterface job) { Integer.class, job.getJobId()) > 0; } + private static final String IS_JOB_OVER_MIN_GPUS = + "SELECT " + + "COUNT(1) " + + "FROM " + + "job_resource " + + "WHERE " + + "job_resource.pk_job = ? " + + "AND " + + "job_resource.int_gpus > job_resource.int_min_gpus"; + + @Override + public boolean isOverMinGpus(JobInterface job) { + return getJdbcTemplate().queryForObject(IS_JOB_OVER_MIN_GPUS, + Integer.class, job.getJobId()) > 0; + } + + private static final String IS_JOB_OVER_MAX_GPUS = + "SELECT " + + "COUNT(1) " + + "FROM " + + "job_resource " + + "WHERE " + + "job_resource.pk_job = ? " + + "AND " + + "job_resource.int_gpus + ? > job_resource.int_max_gpus"; + + @Override + public boolean isOverMaxGpus(JobInterface job) { + return getJdbcTemplate().queryForObject(IS_JOB_OVER_MAX_GPUS, + Integer.class, job.getJobId(), 0) > 0; + } + + @Override + public boolean isOverMaxGpus(JobInterface job, int gpu) { + return getJdbcTemplate().queryForObject(IS_JOB_OVER_MAX_GPUS, + Integer.class, job.getJobId(), gpu) > 0; + } + + private static final String IS_JOB_AT_MAX_GPUS = + "SELECT " + + "COUNT(1) " + + "FROM " + + "job_resource " + + "WHERE " + + "job_resource.pk_job = ? " + + "AND " + + "job_resource.int_gpus >= job_resource.int_max_gpus "; + + @Override + public boolean isAtMaxGpus(JobInterface job) { + return getJdbcTemplate().queryForObject(IS_JOB_AT_MAX_GPUS, + Integer.class, job.getJobId()) > 0; + } + @Override public void updateMaxFrameRetries(JobInterface j, int max_retries) { if (max_retries < 0) { @@ -685,8 +769,10 @@ public FrameStateTotals mapRow(ResultSet rs, int rowNum) throws SQLException { private static final String GET_EXECUTION_SUMMARY = "SELECT " + "job_usage.int_core_time_success,"+ - "job_usage.int_core_time_fail," + - "job_mem.int_max_rss " + + "job_usage.int_core_time_fail,"+ + "job_usage.int_gpu_time_success,"+ + "job_usage.int_gpu_time_fail,"+ + "job_mem.int_max_rss " + "FROM " + "job," + "job_usage, "+ @@ -707,6 +793,9 @@ public ExecutionSummary mapRow(ResultSet rs, int rowNum) throws SQLException { e.coreTimeSuccess = rs.getLong("int_core_time_success"); e.coreTimeFail = rs.getLong("int_core_time_fail"); e.coreTime = e.coreTimeSuccess + e.coreTimeFail; + e.gpuTimeSuccess = rs.getLong("int_gpu_time_success"); + e.gpuTimeFail = rs.getLong("int_gpu_time_fail"); + e.gpuTime = e.gpuTimeSuccess + e.gpuTimeFail; e.highMemoryKb = rs.getLong("int_max_rss"); return e; @@ -795,6 +884,20 @@ public void updateParent(JobInterface job, GroupDetail dest, Inherit[] inherits) } break; + case MinGpus: + if (dest.jobMinGpus != CueUtil.FEATURE_DISABLED) { + query.append("int_min_gpus=?,"); + values.add(dest.jobMinGpus); + } + break; + + case MaxGpus: + if (dest.jobMaxGpus != CueUtil.FEATURE_DISABLED) { + query.append("int_max_gpus=?,"); + values.add(dest.jobMaxGpus); + } + break; + case All: if (dest.jobPriority != CueUtil.FEATURE_DISABLED) { query.append("int_priority=?,"); @@ -810,6 +913,16 @@ public void updateParent(JobInterface job, GroupDetail dest, Inherit[] inherits) query.append("int_max_cores=?,"); values.add(dest.jobMaxCores); } + + if (dest.jobMinGpus != CueUtil.FEATURE_DISABLED) { + query.append("int_min_gpus=?,"); + values.add(dest.jobMinGpus); + } + + if (dest.jobMaxGpus != CueUtil.FEATURE_DISABLED) { + query.append("int_max_gpus=?,"); + values.add(dest.jobMaxGpus); + } break; } } @@ -851,6 +964,8 @@ public void updateParent(JobInterface job, GroupDetail dest, Inherit[] inherits) "job_stat.int_waiting_count != 0" + "AND " + "job_resource.int_cores < job_resource.int_max_cores " + + "AND " + + "job_resource.int_gpus < job_resource.int_max_gpus " + "AND " + "job.pk_facility = ? " + "LIMIT 1"; @@ -922,11 +1037,13 @@ public void updateUsage(JobInterface job, ResourceUsage usage, int exitStatus) { "job_usage " + "SET " + "int_core_time_success = int_core_time_success + ?," + + "int_gpu_time_success = int_gpu_time_success + ?," + "int_clock_time_success = int_clock_time_success + ?,"+ "int_frame_success_count = int_frame_success_count + 1 " + "WHERE " + "pk_job = ? ", usage.getCoreTimeSeconds(), + usage.getGpuTimeSeconds(), usage.getClockTimeSeconds(), job.getJobId()); diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java index a029c16b8..212963519 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java @@ -205,7 +205,8 @@ public LayerDetail mapRow(ResultSet rs, int rowNum) throws SQLException { layer.range = rs.getString("str_range"); layer.minimumCores = rs.getInt("int_cores_min"); layer.minimumMemory = rs.getLong("int_mem_min"); - layer.minimumGpu = rs.getLong("int_gpu_mem_min"); + layer.minimumGpus = rs.getInt("int_gpus_min"); + layer.minimumGpuMemory = rs.getLong("int_gpu_mem_min"); layer.type = LayerType.valueOf(rs.getString("str_type")); layer.tags = Sets.newHashSet( rs.getString("str_tags").replaceAll(" ", "").split("\\|")); @@ -311,12 +312,14 @@ public LayerInterface getLayer(String id) { "int_cores_max, "+ "b_threadable, " + "int_mem_min, " + + "int_gpus_min, "+ + "int_gpus_max, "+ "int_gpu_mem_min, " + "str_services, " + "int_timeout," + "int_timeout_llu " + ") " + - "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"; + "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"; @Override public void insertLayerDetail(LayerDetail l) { @@ -326,7 +329,7 @@ public void insertLayerDetail(LayerDetail l) { l.range, l.chunkSize, l.dispatchOrder, StringUtils.join(l.tags," | "), l.type.toString(), l.minimumCores, l.maximumCores, l.isThreadable, - l.minimumMemory, l.minimumGpu, StringUtils.join(l.services,","), + l.minimumMemory, l.minimumGpus, l.maximumGpus, l.minimumGpuMemory, StringUtils.join(l.services,","), l.timeout, l.timeout_llu); } @@ -340,9 +343,9 @@ public void updateLayerMinMemory(LayerInterface layer, long val) { } @Override - public void updateLayerMinGpu(LayerInterface layer, long gpu) { + public void updateLayerMinGpuMemory(LayerInterface layer, long kb) { getJdbcTemplate().update("UPDATE layer SET int_gpu_mem_min=? WHERE pk_layer=?", - gpu, layer.getLayerId()); + kb, layer.getLayerId()); } private static final String BALANCE_MEM = @@ -392,9 +395,9 @@ public void increaseLayerMinMemory(LayerInterface layer, long val) { } @Override - public void increaseLayerMinGpu(LayerInterface layer, long gpu) { + public void increaseLayerMinGpuMemory(LayerInterface layer, long kb) { getJdbcTemplate().update("UPDATE layer SET int_gpu_mem_min=? WHERE pk_layer=? AND int_gpu_mem_min < ?", - gpu, layer.getLayerId(), gpu); + kb, layer.getLayerId(), kb); } @Override @@ -412,6 +415,18 @@ public void updateLayerMaxCores(LayerInterface layer, int val) { val, layer.getLayerId()); } + @Override + public void updateLayerMinGpus(LayerInterface layer, int val) { + getJdbcTemplate().update("UPDATE layer SET int_gpus_min=? WHERE pk_layer=?", + val, layer.getLayerId()); + } + + @Override + public void updateLayerMaxGpus(LayerInterface layer, int val) { + getJdbcTemplate().update("UPDATE layer SET int_gpus_max=? WHERE pk_layer=?", + val, layer.getLayerId()); + } + private static final String UPDATE_LAYER_MAX_RSS = "UPDATE " + "layer_mem " + @@ -489,6 +504,8 @@ public FrameStateTotals mapRow(ResultSet rs, int rowNum) throws SQLException { "SELECT " + "layer_usage.int_core_time_success,"+ "layer_usage.int_core_time_fail," + + "layer_usage.int_gpu_time_success,"+ + "layer_usage.int_gpu_time_fail," + "layer_usage.int_clock_time_success," + "layer_mem.int_max_rss " + "FROM " + @@ -512,6 +529,9 @@ public ExecutionSummary mapRow(ResultSet rs, int rowNum) throws SQLException { e.coreTimeSuccess = rs.getLong("int_core_time_success"); e.coreTimeFail = rs.getLong("int_core_time_fail"); e.coreTime = e.coreTimeSuccess + e.coreTimeFail; + e.gpuTimeSuccess = rs.getLong("int_gpu_time_success"); + e.gpuTimeFail = rs.getLong("int_gpu_time_fail"); + e.gpuTime = e.gpuTimeSuccess + e.gpuTimeFail; e.highMemoryKb = rs.getLong("int_max_rss"); return e; } @@ -608,10 +628,10 @@ public void updateMinMemory(JobInterface job, long mem, LayerType type) { } @Override - public void updateMinGpu(JobInterface job, long gpu, LayerType type) { + public void updateMinGpuMemory(JobInterface job, long kb, LayerType type) { getJdbcTemplate().update( "UPDATE layer SET int_gpu_mem_min=? WHERE pk_job=? AND str_type=?", - gpu, job.getJobId(), type.toString()); + kb, job.getJobId(), type.toString()); } @Override @@ -621,6 +641,13 @@ public void updateMinCores(JobInterface job, int cores, LayerType type) { cores, job.getJobId(), type.toString()); } + @Override + public void updateMinGpus(JobInterface job, int gpus, LayerType type) { + getJdbcTemplate().update( + "UPDATE layer SET int_gpus_min=? WHERE pk_job=? AND str_type=?", + gpus, job.getJobId(), type.toString()); + } + @Override public void updateThreadable(LayerInterface layer, boolean threadable) { getJdbcTemplate().update( @@ -664,6 +691,8 @@ public void enableMemoryOptimizer(LayerInterface layer, boolean value) { "layer.pk_layer = ? " + "AND " + "layer.int_cores_min = 100 " + + "AND " + + "layer.int_gpus_min = 0 " + "AND " + "str_tags LIKE '%general%' " + "AND " + @@ -686,7 +715,8 @@ public boolean isOptimizable(LayerInterface l, int succeeded, float avg) { private static final String THREAD_STATS = "SELECT " + "avg(interval_to_seconds(ts_stopped - ts_started)) AS avg, " + - "int_cores " + + "int_cores, " + + "int_gpus " + "FROM " + "frame " + "WHERE " + @@ -695,8 +725,11 @@ public boolean isOptimizable(LayerInterface l, int succeeded, float avg) { "frame.int_checkpoint_count = 0 " + "AND " + "int_cores > 0 " + + "AND " + + "int_gpus > 0 " + "GROUP BY " + - "int_cores " + + "int_cores, " + + "int_gpus " + "ORDER BY " + "int_cores DESC "; @@ -724,11 +757,13 @@ public void updateUsage(LayerInterface layer, ResourceUsage usage, int exitStatu "layer_usage " + "SET " + "int_core_time_success = int_core_time_success + ?," + + "int_gpu_time_success = int_gpu_time_success + ?," + "int_clock_time_success = int_clock_time_success + ?,"+ "int_frame_success_count = int_frame_success_count + 1 " + "WHERE " + "pk_layer = ? ", usage.getCoreTimeSeconds(), + usage.getGpuTimeSeconds(), usage.getClockTimeSeconds(), layer.getLayerId()); diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/NestedWhiteboardDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/NestedWhiteboardDaoJdbc.java index 66ebb105b..924a65a96 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/NestedWhiteboardDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/NestedWhiteboardDaoJdbc.java @@ -73,8 +73,12 @@ public CachedJobWhiteboardMapper(NestedJobWhiteboardMapper result) { "folder.int_job_priority as int_def_job_priority, " + "folder.int_job_min_cores as int_def_job_min_cores, " + "folder.int_job_max_cores as int_def_job_max_cores, " + + "folder.int_job_min_gpus as int_def_job_min_gpus, " + + "folder.int_job_max_gpus as int_def_job_max_gpus, " + "folder_resource.int_min_cores AS folder_min_cores, " + "folder_resource.int_max_cores AS folder_max_cores, " + + "folder_resource.int_min_gpus AS folder_min_gpus, " + + "folder_resource.int_max_gpus AS folder_max_gpus, " + "folder_level.int_level, " + "job.pk_job, " + "job.str_name, " + @@ -101,13 +105,18 @@ public CachedJobWhiteboardMapper(NestedJobWhiteboardMapper result) { "job_stat.int_succeeded_count, " + "job_usage.int_core_time_success, " + "job_usage.int_core_time_fail, " + + "job_usage.int_gpu_time_success, " + + "job_usage.int_gpu_time_fail, " + "job_usage.int_frame_success_count, " + "job_usage.int_frame_fail_count, " + "job_usage.int_clock_time_high, " + "job_usage.int_clock_time_success, " + "(job_resource.int_cores + job_resource.int_local_cores) AS int_cores, " + + "(job_resource.int_gpus + job_resource.int_local_gpus) AS int_gpus, " + "job_resource.int_min_cores, " + + "job_resource.int_min_gpus, " + "job_resource.int_max_cores, " + + "job_resource.int_max_gpus, " + "job_mem.int_max_rss " + "FROM " + "show, " + @@ -165,8 +174,12 @@ public NestedGroup mapRow(ResultSet rs, int rowNum) throws SQLException { .setDefaultJobPriority(rs.getInt("int_def_job_priority")) .setDefaultJobMinCores(Convert.coreUnitsToCores(rs.getInt("int_def_job_min_cores"))) .setDefaultJobMaxCores(Convert.coreUnitsToCores(rs.getInt("int_def_job_max_cores"))) + .setDefaultJobMinGpus(rs.getInt("int_def_job_min_gpus")) + .setDefaultJobMaxGpus(rs.getInt("int_def_job_max_gpus")) .setMaxCores(Convert.coreUnitsToCores(rs.getInt("folder_max_cores"))) .setMinCores(Convert.coreUnitsToCores(rs.getInt("folder_min_cores"))) + .setMaxGpus(rs.getInt("folder_max_gpus")) + .setMinGpus(rs.getInt("folder_min_gpus")) .setLevel(rs.getInt("int_level")) .setDepartment(rs.getString("dept_name")) .build(); @@ -254,6 +267,8 @@ private static final NestedJob mapResultSetToJob(ResultSet rs) throws SQLExcepti .setLogDir(rs.getString("str_log_dir")) .setMaxCores(Convert.coreUnitsToCores(rs.getInt("int_max_cores"))) .setMinCores(Convert.coreUnitsToCores(rs.getInt("int_min_cores"))) + .setMaxGpus(rs.getInt("int_max_cores")) + .setMinGpus(rs.getInt("int_min_cores")) .setName(rs.getString("str_name")) .setPriority(rs.getInt("int_priority")) .setShot(rs.getString("str_shot")) @@ -295,6 +310,8 @@ private static final NestedJob mapResultSetToJob(ResultSet rs) throws SQLExcepti "host_stat.ts_ping, " + "host.int_cores, " + "host.int_cores_idle, " + + "host.int_gpus, " + + "host.int_gpus_idle, " + "host.int_gpu_mem, " + "host.int_gpu_mem_idle, " + "host.int_mem, " + @@ -315,10 +332,11 @@ private static final NestedJob mapResultSetToJob(ResultSet rs) throws SQLExcepti "host_stat.int_load, " + "proc.pk_proc, " + "proc.int_cores_reserved AS proc_cores, " + + "proc.int_gpus_reserved AS proc_gpus, " + "proc.int_mem_reserved AS proc_memory, " + "proc.int_mem_used AS used_memory, " + "proc.int_mem_max_used AS max_memory, " + - "proc.int_gpu_mem_reserved AS proc_gpu, " + + "proc.int_gpu_mem_reserved AS proc_gpu_memory, " + "proc.ts_ping, " + "proc.ts_booked, " + "proc.ts_dispatched, " + @@ -445,10 +463,13 @@ public NestedHost mapRow(ResultSet rs, int row) throws SQLException { proc = NestedProc.newBuilder() .setId(pid) .setName(CueUtil.buildProcName(host.getName(), - rs.getInt("proc_cores"))) + rs.getInt("proc_cores"), + rs.getInt("proc_gpus"))) .setReservedCores(Convert.coreUnitsToCores( rs.getInt("proc_cores"))) + .setReservedGpus(rs.getInt("proc_gpus")) .setReservedMemory(rs.getLong("proc_memory")) + .setReservedGpuMemory(rs.getLong("proc_gpu_memory")) .setUsedMemory(rs.getLong("used_memory")) .setFrameName(rs.getString("frame_name")) .setJobName(rs.getString("job_name")) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java index 8e004e571..ba9f33c1f 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java @@ -108,9 +108,12 @@ public boolean deleteVirtualProc(VirtualProc proc) { "int_mem_reserved, " + "int_mem_pre_reserved, " + "int_mem_used, "+ + "int_gpus_reserved, " + "int_gpu_mem_reserved, " + + "int_gpu_mem_pre_reserved, " + + "int_gpu_mem_used, " + "b_local " + - ") VALUES (?,?,?,?,?,?,?,?,?,?,?,?) "; + ") VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) "; public void insertVirtualProc(VirtualProc proc) { proc.id = SqlUtil.genKeyRandom(); @@ -121,7 +124,9 @@ public void insertVirtualProc(VirtualProc proc) { proc.getLayerId(), proc.getJobId(), proc.getFrameId(), proc.coresReserved, proc.memoryReserved, proc.memoryReserved, Dispatcher.MEM_RESERVED_MIN, - proc.gpuReserved, proc.isLocalDispatch); + proc.gpusReserved, proc.gpuMemoryReserved, + proc.gpuMemoryReserved, Dispatcher.MEM_GPU_RESERVED_MIN, + proc.isLocalDispatch); // Update all of the resource counts procCreated(proc); @@ -278,7 +283,9 @@ public VirtualProc mapRow(ResultSet rs, int rowNum) throws SQLException { proc.coresReserved =rs.getInt("int_cores_reserved"); proc.memoryReserved = rs.getLong("int_mem_reserved"); proc.memoryMax = rs.getLong("int_mem_max_used"); - proc.gpuReserved = rs.getLong("int_gpu_mem_reserved"); + proc.gpusReserved = rs.getInt("int_gpus_reserved"); + proc.gpuMemoryReserved = rs.getLong("int_gpu_mem_reserved"); + proc.gpuMemoryMax = rs.getLong("int_gpu_mem_max_used"); proc.virtualMemoryMax = rs.getLong("int_virt_max_used"); proc.virtualMemoryUsed = rs.getLong("int_virt_used"); proc.memoryUsed = rs.getLong("int_mem_used"); @@ -305,7 +312,10 @@ public VirtualProc mapRow(ResultSet rs, int rowNum) throws SQLException { "proc.int_mem_reserved,"+ "proc.int_mem_max_used,"+ "proc.int_mem_used,"+ + "proc.int_gpus_reserved,"+ "proc.int_gpu_mem_reserved,"+ + "proc.int_gpu_mem_max_used,"+ + "proc.int_gpu_mem_used,"+ "proc.int_virt_max_used,"+ "proc.int_virt_used,"+ "host.str_name AS host_name, " + @@ -551,7 +561,10 @@ public boolean increaseReservedMemory(ProcInterface p, long value) { "int_mem_reserved," + "int_mem_max_used,"+ "int_mem_used,"+ + "int_gpus_reserved," + "int_gpu_mem_reserved," + + "int_gpu_mem_max_used," + + "int_gpu_mem_used," + "int_virt_max_used,"+ "int_virt_used,"+ "host_name, " + @@ -578,7 +591,7 @@ public long getReservedMemory(ProcInterface proc) { Long.class, proc.getProcId()); } - public long getReservedGpu(ProcInterface proc) { + public long getReservedGpuMemory(ProcInterface proc) { return getJdbcTemplate().queryForObject( "SELECT int_gpu_mem_reserved FROM proc WHERE pk_proc=?", Long.class, proc.getProcId()); @@ -694,22 +707,24 @@ private void procDestroyed(VirtualProc proc) { "SET " + "int_cores_idle = int_cores_idle + ?," + "int_mem_idle = int_mem_idle + ?, " + + "int_gpus_idle = int_gpus_idle + ?," + "int_gpu_mem_idle = int_gpu_mem_idle + ? " + "WHERE " + "pk_host = ?", - proc.coresReserved, proc.memoryReserved, proc.gpuReserved, proc.getHostId()); + proc.coresReserved, proc.memoryReserved, proc.gpusReserved, proc.gpuMemoryReserved, proc.getHostId()); if (!proc.isLocalDispatch) { getJdbcTemplate().update( "UPDATE " + "subscription " + "SET " + - "int_cores = int_cores - ? " + + "int_cores = int_cores - ?," + + "int_gpus = int_gpus - ? " + "WHERE " + "pk_show = ? " + "AND " + "pk_alloc = ?", - proc.coresReserved, proc.getShowId(), + proc.coresReserved, proc.gpusReserved, proc.getShowId(), proc.getAllocationId()); } @@ -717,10 +732,11 @@ private void procDestroyed(VirtualProc proc) { "UPDATE " + "layer_resource " + "SET " + - "int_cores = int_cores - ? " + + "int_cores = int_cores - ?," + + "int_gpus = int_gpus - ? " + "WHERE " + "pk_layer = ?", - proc.coresReserved, proc.getLayerId()); + proc.coresReserved, proc.gpusReserved, proc.getLayerId()); if (!proc.isLocalDispatch) { @@ -728,33 +744,36 @@ private void procDestroyed(VirtualProc proc) { "UPDATE " + "job_resource " + "SET " + - "int_cores = int_cores - ? " + + "int_cores = int_cores - ?," + + "int_gpus = int_gpus - ? " + "WHERE " + "pk_job = ?", - proc.coresReserved, proc.getJobId()); + proc.coresReserved, proc.gpusReserved, proc.getJobId()); getJdbcTemplate().update( "UPDATE " + "folder_resource " + "SET " + - "int_cores = int_cores - ? " + + "int_cores = int_cores - ?," + + "int_gpus = int_gpus - ? " + "WHERE " + "pk_folder = " + "(SELECT pk_folder FROM job WHERE pk_job=?)", - proc.coresReserved, proc.getJobId()); + proc.coresReserved, proc.gpusReserved, proc.getJobId()); getJdbcTemplate().update( "UPDATE " + "point " + "SET " + - "int_cores = int_cores - ? " + + "int_cores = int_cores - ?, " + + "int_gpus = int_gpus - ? " + "WHERE " + "pk_dept = " + "(SELECT pk_dept FROM job WHERE pk_job=?) " + "AND " + "pk_show = " + "(SELECT pk_show FROM job WHERE pk_job=?) ", - proc.coresReserved, proc.getJobId(), proc.getJobId()); + proc.coresReserved, proc.gpusReserved, proc.getJobId(), proc.getJobId()); } if (proc.isLocalDispatch) { @@ -763,10 +782,11 @@ private void procDestroyed(VirtualProc proc) { "UPDATE " + "job_resource " + "SET " + - "int_local_cores = int_local_cores - ? " + + "int_local_cores = int_local_cores - ?, " + + "int_local_gpus = int_local_gpus - ? " + "WHERE " + "pk_job = ?", - proc.coresReserved, proc.getJobId()); + proc.coresReserved, proc.gpusReserved, proc.getJobId()); getJdbcTemplate().update( "UPDATE " + @@ -774,6 +794,7 @@ private void procDestroyed(VirtualProc proc) { "SET " + "int_cores_idle = int_cores_idle + ?, " + "int_mem_idle = int_mem_idle + ?, " + + "int_gpus_idle = int_gpus_idle + ?, " + "int_gpu_mem_idle = int_gpu_mem_idle + ? " + "WHERE " + "pk_job = ? " + @@ -781,7 +802,8 @@ private void procDestroyed(VirtualProc proc) { "pk_host = ? ", proc.coresReserved, proc.memoryReserved, - proc.gpuReserved, + proc.gpusReserved, + proc.gpuMemoryReserved, proc.getJobId(), proc.getHostId()); } @@ -802,10 +824,11 @@ private void procCreated(VirtualProc proc) { "SET " + "int_cores_idle = int_cores_idle - ?," + "int_mem_idle = int_mem_idle - ?, " + + "int_gpus_idle = int_gpus_idle - ?," + "int_gpu_mem_idle = int_gpu_mem_idle - ? " + "WHERE " + "pk_host = ?", - proc.coresReserved, proc.memoryReserved, proc.gpuReserved, proc.getHostId()); + proc.coresReserved, proc.memoryReserved, proc.gpusReserved, proc.gpuMemoryReserved, proc.getHostId()); /** @@ -817,12 +840,13 @@ private void procCreated(VirtualProc proc) { "UPDATE " + "subscription " + "SET " + - "int_cores = int_cores + ? " + + "int_cores = int_cores + ?," + + "int_gpus = int_gpus + ? " + "WHERE " + "pk_show = ? " + "AND " + "pk_alloc = ?", - proc.coresReserved, proc.getShowId(), + proc.coresReserved, proc.gpusReserved, proc.getShowId(), proc.getAllocationId()); } @@ -830,10 +854,11 @@ private void procCreated(VirtualProc proc) { "UPDATE " + "layer_resource " + "SET " + - "int_cores = int_cores + ? " + + "int_cores = int_cores + ?," + + "int_gpus = int_gpus + ? " + "WHERE " + "pk_layer = ?", - proc.coresReserved, proc.getLayerId()); + proc.coresReserved, proc.gpusReserved, proc.getLayerId()); if (!proc.isLocalDispatch) { @@ -841,33 +866,36 @@ private void procCreated(VirtualProc proc) { "UPDATE " + "job_resource " + "SET " + - "int_cores = int_cores + ? " + + "int_cores = int_cores + ?," + + "int_gpus = int_gpus + ? " + "WHERE " + "pk_job = ?", - proc.coresReserved, proc.getJobId()); + proc.coresReserved, proc.gpusReserved, proc.getJobId()); getJdbcTemplate().update( "UPDATE " + "folder_resource " + "SET " + - "int_cores = int_cores + ? " + + "int_cores = int_cores + ?," + + "int_gpus = int_gpus + ? " + "WHERE " + "pk_folder = " + "(SELECT pk_folder FROM job WHERE pk_job=?)", - proc.coresReserved, proc.getJobId()); + proc.coresReserved, proc.gpusReserved, proc.getJobId()); getJdbcTemplate().update( "UPDATE " + "point " + "SET " + - "int_cores = int_cores + ? " + + "int_cores = int_cores + ?," + + "int_gpus = int_gpus + ? " + "WHERE " + "pk_dept = " + "(SELECT pk_dept FROM job WHERE pk_job=?) " + "AND " + "pk_show = " + "(SELECT pk_show FROM job WHERE pk_job=?) ", - proc.coresReserved, proc.getJobId(), proc.getJobId()); + proc.coresReserved, proc.gpusReserved, proc.getJobId(), proc.getJobId()); } if (proc.isLocalDispatch) { @@ -876,23 +904,28 @@ private void procCreated(VirtualProc proc) { "UPDATE " + "job_resource " + "SET " + - "int_local_cores = int_local_cores + ? " + + "int_local_cores = int_local_cores + ?," + + "int_local_gpus = int_local_gpus + ? " + "WHERE " + "pk_job = ?", - proc.coresReserved, proc.getJobId()); + proc.coresReserved, proc.gpusReserved, proc.getJobId()); getJdbcTemplate().update( "UPDATE " + "host_local " + "SET " + "int_cores_idle = int_cores_idle - ?, " + - "int_mem_idle = int_mem_idle - ? " + + "int_mem_idle = int_mem_idle - ?," + + "int_gpus_idle = int_gpus_idle - ?, " + + "int_gpu_mem_idle = int_gpu_mem_idle - ? " + "WHERE " + "pk_job = ? " + "AND " + "pk_host = ?", proc.coresReserved, proc.memoryReserved, + proc.gpusReserved, + proc.gpuMemoryReserved, proc.getJobId(), proc.getHostId()); } diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ServiceDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ServiceDaoJdbc.java index 65617d941..6330cc8cb 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ServiceDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ServiceDaoJdbc.java @@ -60,7 +60,9 @@ public ServiceEntity mapRow(ResultSet rs, int rowNum) throws SQLException { s.minCores = rs.getInt("int_cores_min"); s.maxCores = rs.getInt("int_cores_max"); s.minMemory = rs.getLong("int_mem_min"); - s.minGpu = rs.getLong("int_gpu_mem_min"); + s.minGpus = rs.getInt("int_gpus_min"); + s.maxGpus = rs.getInt("int_gpus_max"); + s.minGpuMemory = rs.getLong("int_gpu_mem_min"); s.threadable = rs.getBoolean("b_threadable"); s.tags = splitTags(rs.getString("str_tags")); s.timeout = rs.getInt("int_timeout"); @@ -79,7 +81,9 @@ public ServiceOverrideEntity mapRow(ResultSet rs, int rowNum) s.minCores = rs.getInt("int_cores_min"); s.maxCores = rs.getInt("int_cores_max"); s.minMemory = rs.getLong("int_mem_min"); - s.minGpu = rs.getLong("int_gpu_mem_min"); + s.minGpus = rs.getInt("int_gpus_min"); + s.maxGpus = rs.getInt("int_gpus_max"); + s.minGpuMemory = rs.getLong("int_gpu_mem_min"); s.threadable = rs.getBoolean("b_threadable"); s.tags = splitTags(rs.getString("str_tags")); s.showId = rs.getString("pk_show"); @@ -97,6 +101,8 @@ public ServiceOverrideEntity mapRow(ResultSet rs, int rowNum) "service.int_cores_min," + "service.int_cores_max," + "service.int_mem_min," + + "service.int_gpus_min," + + "service.int_gpus_max," + "service.int_gpu_mem_min," + "service.str_tags, " + "service.int_timeout, " + @@ -119,6 +125,8 @@ public ServiceEntity get(String id) { "show_service.int_cores_min," + "show_service.int_cores_max, "+ "show_service.int_mem_min," + + "show_service.int_gpus_min," + + "show_service.int_gpus_max, "+ "show_service.int_gpu_mem_min," + "show_service.str_tags," + "show_service.int_timeout," + @@ -167,18 +175,21 @@ public boolean isOverridden(String service, String show) { "int_cores_min," + "int_cores_max, "+ "int_mem_min," + + "int_gpus_min," + + "int_gpus_max, "+ "int_gpu_mem_min," + "str_tags," + "int_timeout," + "int_timeout_llu " + - ") VALUES (?,?,?,?,?,?,?,?,?,?)"; + ") VALUES (?,?,?,?,?,?,?,?,?,?,?,?)"; @Override public void insert(ServiceEntity service) { service.id = SqlUtil.genKeyRandom(); getJdbcTemplate().update(INSERT_SERVICE, service.id, service.name, service.threadable, service.minCores, - service.maxCores, service.minMemory, service.minGpu, + service.maxCores, service.minMemory, + service.minGpus, service.maxGpus, service.minGpuMemory, StringUtils.join(service.tags.toArray(), " | "), service.timeout, service.timeout_llu); } @@ -194,11 +205,13 @@ public void insert(ServiceEntity service) { "int_cores_min," + "int_cores_max," + "int_mem_min," + + "int_gpus_min," + + "int_gpus_max," + "int_gpu_mem_min," + "str_tags," + "int_timeout," + "int_timeout_llu " + - ") VALUES (?,?,?,?,?,?,?,?,?,?,?)"; + ") VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)"; @Override public void insert(ServiceOverrideEntity service) { @@ -206,7 +219,7 @@ public void insert(ServiceOverrideEntity service) { getJdbcTemplate().update(INSERT_SERVICE_WITH_SHOW, service.id, service.showId, service.name, service.threadable, service.minCores, service.maxCores, service.minMemory, - service.minGpu, joinTags(service.tags), + service.minGpus, service.maxGpus, service.minGpuMemory, joinTags(service.tags), service.timeout, service.timeout_llu); } @@ -219,6 +232,8 @@ service.minGpu, joinTags(service.tags), "int_cores_min=?," + "int_cores_max=?,"+ "int_mem_min=?," + + "int_gpus_min=?," + + "int_gpus_max=?," + "int_gpu_mem_min=?," + "str_tags=?," + "int_timeout=?," + @@ -230,7 +245,7 @@ service.minGpu, joinTags(service.tags), public void update(ServiceEntity service) { getJdbcTemplate().update(UPDATE_SERVICE, service.name, service.threadable, service.minCores, service.maxCores, - service.minMemory, service.minGpu, joinTags(service.tags), + service.minMemory, service.minGpus, service.maxGpus, service.minGpuMemory, joinTags(service.tags), service.timeout, service.timeout_llu, service.getId()); } @@ -243,6 +258,8 @@ service.minMemory, service.minGpu, joinTags(service.tags), "int_cores_min=?," + "int_cores_max=?," + "int_mem_min=?," + + "int_gpus_min=?," + + "int_gpus_max=?," + "int_gpu_mem_min=?," + "str_tags=?," + "int_timeout=?," + @@ -254,7 +271,7 @@ service.minMemory, service.minGpu, joinTags(service.tags), public void update(ServiceOverrideEntity service) { getJdbcTemplate().update(UPDATE_SERVICE_WITH_SHOW, service.name, service.threadable, service.minCores, service.maxCores, - service.minMemory, service.minGpu, joinTags(service.tags), + service.minMemory, service.minGpus, service.maxGpus, service.minGpuMemory, joinTags(service.tags), service.timeout, service.timeout_llu, service.getId()); } diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ShowDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ShowDaoJdbc.java index 893455be3..add49a178 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ShowDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ShowDaoJdbc.java @@ -44,6 +44,8 @@ public ShowEntity mapRow(ResultSet rs, int rowNum) throws SQLException { show.id = rs.getString("pk_show"); show.defaultMaxCores = rs.getInt("int_default_max_cores"); show.defaultMinCores = rs.getInt("int_default_min_cores"); + show.defaultMaxGpus = rs.getInt("int_default_max_gpus"); + show.defaultMinGpus = rs.getInt("int_default_min_gpus"); show.active = rs.getBoolean("b_active"); if (rs.getString("str_comment_email") != null) { @@ -61,6 +63,8 @@ public ShowEntity mapRow(ResultSet rs, int rowNum) throws SQLException { "show.pk_show, " + "show.int_default_max_cores, " + "show.int_default_min_cores, " + + "show.int_default_max_gpus, " + + "show.int_default_min_gpus, " + "show.str_name, " + "show.b_active, " + "show.str_comment_email " + @@ -72,6 +76,8 @@ public ShowEntity mapRow(ResultSet rs, int rowNum) throws SQLException { "show.pk_show, " + "show.int_default_max_cores, " + "show.int_default_min_cores, " + + "show.int_default_max_gpus, " + + "show.int_default_min_gpus, " + "show_alias.str_name, " + "show.b_active, " + "show.str_comment_email " + @@ -101,6 +107,8 @@ public ShowEntity getShowDetail(String id) { "show.pk_show, " + "show.int_default_max_cores, " + "show.int_default_min_cores, " + + "show.int_default_max_gpus, " + + "show.int_default_min_gpus, " + "show.str_name, " + "show.b_active, " + "show.str_comment_email " + @@ -180,6 +188,18 @@ public void updateShowDefaultMaxCores(ShowInterface s, int val) { val, s.getShowId()); } + public void updateShowDefaultMinGpus(ShowInterface s, int val) { + getJdbcTemplate().update( + "UPDATE show SET int_default_min_gpus=? WHERE pk_show=?", + val, s.getShowId()); + } + + public void updateShowDefaultMaxGpus(ShowInterface s, int val) { + getJdbcTemplate().update( + "UPDATE show SET int_default_max_gpus=? WHERE pk_show=?", + val, s.getShowId()); + } + @Override public void updateBookingEnabled(ShowInterface s, boolean enabled) { getJdbcTemplate().update( diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/WhiteboardDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/WhiteboardDaoJdbc.java index 259164118..1df1607cf 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/WhiteboardDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/WhiteboardDaoJdbc.java @@ -898,6 +898,9 @@ public RenderPartition mapRow(ResultSet rs, int rowNum) throws SQLException { .setThreads(rs.getInt("int_threads")) .setMaxMemory(rs.getLong("int_mem_max")) .setMemory( rs.getLong("int_mem_max") - rs.getLong("int_mem_idle")) + .setGpus(rs.getInt("int_gpus_max") - rs.getInt("int_gpus_idle")) + .setMaxGpus(rs.getInt("int_gpus_max")) + .setGpuMemory(rs.getLong("int_gpu_mem_max") - rs.getLong("int_gpu_mem_idle")) .setMaxGpuMemory(rs.getLong("int_gpu_mem_max")) .setHost(SqlUtil.getString(rs,"str_host_name")) .setJob(SqlUtil.getString(rs,"str_job_name")) @@ -950,11 +953,13 @@ public Proc mapRow(ResultSet rs, int row) throws SQLException { return Proc.newBuilder() .setId(SqlUtil.getString(rs,"pk_proc")) .setName(CueUtil.buildProcName(SqlUtil.getString(rs,"host_name"), - rs.getInt("int_cores_reserved"))) + rs.getInt("int_cores_reserved"), rs.getInt("int_gpus_reserved"))) .setReservedCores(Convert.coreUnitsToCores(rs.getInt("int_cores_reserved"))) .setReservedMemory(rs.getLong("int_mem_reserved")) + .setReservedGpus(rs.getInt("int_gpus_reserved")) .setReservedGpuMemory(rs.getLong("int_gpu_mem_reserved")) .setUsedMemory(rs.getLong("int_mem_used")) + .setUsedGpuMemory(rs.getLong("int_gpu_mem_used")) .setFrameName(SqlUtil.getString(rs, "frame_name")) .setJobName(SqlUtil.getString(rs,"job_name")) .setGroupName(SqlUtil.getString(rs,"folder_name")) @@ -1016,6 +1021,8 @@ public static NestedHost.Builder mapNestedHostBuilder(ResultSet rs) throws SQLEx .setIdleCores(Convert.coreUnitsToCores(rs.getInt("int_cores_idle"))) .setMemory(rs.getLong("int_mem")) .setIdleMemory(rs.getLong("int_mem_idle")) + .setGpus(rs.getInt("int_gpus")) + .setIdleGpus(rs.getInt("int_gpus_idle")) .setGpuMemory(rs.getLong("int_gpu_mem")) .setIdleGpuMemory(rs.getLong("int_gpu_mem_idle")) .setState(HardwareState.valueOf(SqlUtil.getString(rs,"host_state"))) @@ -1051,6 +1058,8 @@ public static Host.Builder mapHostBuilder(ResultSet rs) throws SQLException { builder.setIdleCores(Convert.coreUnitsToCores(rs.getInt("int_cores_idle"))); builder.setMemory(rs.getLong("int_mem")); builder.setIdleMemory(rs.getLong("int_mem_idle")); + builder.setGpus(rs.getInt("int_gpus")); + builder.setIdleGpus(rs.getInt("int_gpus_idle")); builder.setGpuMemory(rs.getLong("int_gpu_mem")); builder.setIdleGpuMemory(rs.getLong("int_gpu_mem_idle")); builder.setState(HardwareState.valueOf(SqlUtil.getString(rs,"host_state"))); @@ -1112,6 +1121,11 @@ public Allocation mapRow(ResultSet rs, int rowNum) throws SQLException { .setIdleCores(Convert.coreUnitsToCores(rs.getInt("int_idle_cores"))) .setRunningCores(Convert.coreUnitsToCores(rs.getInt("int_running_cores"))) .setLockedCores(Convert.coreUnitsToCores(rs.getInt("int_locked_cores"))) + .setGpus(rs.getInt("int_gpus")) + .setAvailableGpus(rs.getInt("int_available_gpus")) + .setIdleGpus(rs.getInt("int_idle_gpus")) + .setRunningGpus(rs.getInt("int_running_gpus")) + .setLockedGpus(rs.getInt("int_locked_gpus")) .setHosts(rs.getInt("int_hosts")) .setDownHosts(rs.getInt("int_down_hosts")) .setLockedHosts(rs.getInt("int_locked_hosts")) @@ -1131,6 +1145,7 @@ public Group mapRow(ResultSet rs, int rowNum) throws SQLException { .setDependFrames(rs.getInt("int_depend_count")) .setPendingJobs(rs.getInt("int_job_count")) .setReservedCores(Convert.coreUnitsToCores(rs.getInt("int_cores"))) + .setReservedGpus(rs.getInt("int_gpus")) .build(); return Group.newBuilder() .setId(SqlUtil.getString(rs,"pk_folder")) @@ -1139,8 +1154,12 @@ public Group mapRow(ResultSet rs, int rowNum) throws SQLException { .setDefaultJobPriority(rs.getInt("int_job_priority")) .setDefaultJobMinCores(Convert.coreUnitsToCores(rs.getInt("int_job_min_cores"))) .setDefaultJobMaxCores(Convert.coreUnitsToCores(rs.getInt("int_job_max_cores"))) + .setDefaultJobMinGpus(rs.getInt("int_job_min_gpus")) + .setDefaultJobMaxGpus(rs.getInt("int_job_max_gpus")) .setMaxCores(Convert.coreUnitsToCores(rs.getInt("int_max_cores"))) .setMinCores(Convert.coreUnitsToCores(rs.getInt("int_min_cores"))) + .setMaxGpus(rs.getInt("int_max_gpus")) + .setMinGpus(rs.getInt("int_min_gpus")) .setLevel(rs.getInt("int_level")) .setParentId(SqlUtil.getString(rs, "pk_parent_folder")) .setGroupStats(stats) @@ -1156,6 +1175,8 @@ public Job mapRow(ResultSet rs, int rowNum) throws SQLException { .setLogDir(SqlUtil.getString(rs, "str_log_dir")) .setMaxCores(Convert.coreUnitsToCores(rs.getInt("int_max_cores"))) .setMinCores(Convert.coreUnitsToCores(rs.getInt("int_min_cores"))) + .setMaxGpus(rs.getInt("int_max_gpus")) + .setMinGpus(rs.getInt("int_min_gpus")) .setName(SqlUtil.getString(rs,"str_name")) .setPriority(rs.getInt("int_priority")) .setShot(SqlUtil.getString(rs,"str_shot")) @@ -1192,6 +1213,7 @@ public static JobStats mapJobStats(ResultSet rs) throws SQLException { JobStats.Builder statsBuilder = JobStats.newBuilder() .setReservedCores(Convert.coreUnitsToCores(rs.getInt("int_cores"))) + .setReservedGpus(rs.getInt("int_gpus")) .setMaxRss(rs.getLong("int_max_rss")) .setTotalFrames(rs.getInt("int_frame_count")) .setTotalLayers(rs.getInt("int_layer_count")) @@ -1205,6 +1227,9 @@ public static JobStats mapJobStats(ResultSet rs) throws SQLException { .setFailedCoreSec(rs.getLong("int_core_time_fail")) .setRenderedCoreSec(rs.getLong("int_core_time_success")) .setTotalCoreSec( rs.getLong("int_core_time_fail") + rs.getLong("int_core_time_success")) + .setFailedGpuSec(rs.getLong("int_gpu_time_fail")) + .setRenderedGpuSec(rs.getLong("int_gpu_time_success")) + .setTotalGpuSec(rs.getLong("int_gpu_time_fail") + rs.getLong("int_gpu_time_success")) .setRenderedFrameCount( rs.getLong("int_frame_success_count")) .setFailedFrameCount(rs.getLong("int_frame_fail_count")) .setHighFrameSec(rs.getInt("int_clock_time_high")); @@ -1239,6 +1264,8 @@ public Layer mapRow(ResultSet rs, int rowNum) throws SQLException { .setMaxCores(Convert.coreUnitsToCores(rs.getInt("int_cores_max"))) .setIsThreadable(rs.getBoolean("b_threadable")) .setMinMemory(rs.getLong("int_mem_min")) + .setMinGpus(rs.getInt("int_gpus_min")) + .setMaxGpus(rs.getInt("int_gpus_max")) .setMinGpuMemory(rs.getLong("int_gpu_mem_min")) .setType(LayerType.valueOf(SqlUtil.getString(rs,"str_type"))) .addAllTags(Sets.newHashSet( @@ -1252,6 +1279,7 @@ public Layer mapRow(ResultSet rs, int rowNum) throws SQLException { LayerStats.Builder statsBuilder = LayerStats.newBuilder() .setReservedCores(Convert.coreUnitsToCores(rs.getInt("int_cores"))) + .setReservedGpus(rs.getInt("int_gpus")) .setMaxRss(rs.getLong("int_max_rss")) .setTotalFrames(rs.getInt("int_total_count")) .setWaitingFrames(rs.getInt("int_waiting_count")) @@ -1266,6 +1294,9 @@ public Layer mapRow(ResultSet rs, int rowNum) throws SQLException { .setRenderedCoreSec(rs.getLong("int_core_time_success")) .setTotalCoreSec( rs.getLong("int_core_time_fail") + rs.getLong("int_core_time_success")) + .setFailedGpuSec(rs.getLong("int_gpu_time_fail")) + .setRenderedGpuSec(rs.getLong("int_gpu_time_success")) + .setTotalGpuSec(rs.getLong("int_gpu_time_fail") + rs.getLong("int_gpu_time_success")) .setRenderedFrameCount( rs.getLong("int_frame_success_count")) .setFailedFrameCount(rs.getLong("int_frame_fail_count")) .setHighFrameSec(rs.getInt("int_clock_time_high")) @@ -1304,6 +1335,7 @@ public Subscription mapRow(ResultSet rs, int rowNum) throws SQLException { .setBurst(rs.getInt("int_burst")) .setName(rs.getString("name")) .setReservedCores(rs.getInt("int_cores")) + .setReservedGpus(rs.getInt("int_gpus")) .setSize(rs.getInt("int_size")) .setAllocationName(rs.getString("alloc_name")) .setShowName(rs.getString("show_name")) @@ -1324,9 +1356,10 @@ public UpdatedFrame mapRow(ResultSet rs, int rowNum) throws SQLException { .setUsedMemory(rs.getInt("int_mem_used")); if (SqlUtil.getString(rs, "str_host") != null) { - builder.setLastResource(String.format(Locale.ROOT, "%s/%2.2f", + builder.setLastResource(String.format(Locale.ROOT, "%s/%2.2f/%d", SqlUtil.getString(rs, "str_host"), - Convert.coreUnitsToCores(rs.getInt("int_cores")))); + Convert.coreUnitsToCores(rs.getInt("int_cores")), + rs.getInt("int_gpus"))); } else { builder.setLastResource(""); } @@ -1370,7 +1403,7 @@ public Frame mapRow(ResultSet rs, int rowNum) throws SQLException { if (SqlUtil.getString(rs,"str_host") != null) { builder.setLastResource(CueUtil.buildProcName(SqlUtil.getString(rs,"str_host"), - rs.getInt("int_cores"))); + rs.getInt("int_cores"), rs.getInt("int_gpus"))); } else { builder.setLastResource(""); } @@ -1391,9 +1424,12 @@ public Frame mapRow(ResultSet rs, int rowNum) throws SQLException { } builder.setTotalCoreTime(rs.getInt("int_total_past_core_time")); + builder.setTotalGpuTime(rs.getInt("int_total_past_gpu_time")); if (builder.getState() == FrameState.RUNNING) { builder.setTotalCoreTime(builder.getTotalCoreTime() + (int)(System.currentTimeMillis() / 1000 - builder.getStartTime()) * rs.getInt("int_cores") / 100); + builder.setTotalGpuTime(builder.getTotalGpuTime() + + (int)(System.currentTimeMillis() / 1000 - builder.getStartTime()) * rs.getInt("int_gpus")); } return builder.build(); } @@ -1409,6 +1445,8 @@ public Service mapRow(ResultSet rs, int rowNum) throws SQLException { .setMinCores(rs.getInt("int_cores_min")) .setMaxCores(rs.getInt("int_cores_max")) .setMinMemory(rs.getInt("int_mem_min")) + .setMinGpus(rs.getInt("int_gpus_min")) + .setMaxGpus(rs.getInt("int_gpus_max")) .setMinGpuMemory(rs.getInt("int_gpu_mem_min")) .addAllTags(Lists.newArrayList(ServiceDaoJdbc.splitTags( SqlUtil.getString(rs,"str_tags")))) @@ -1428,6 +1466,8 @@ public ServiceOverride mapRow(ResultSet rs, int rowNum) throws SQLException { .setMinCores(rs.getInt("int_cores_min")) .setMaxCores(rs.getInt("int_cores_max")) .setMinMemory(rs.getInt("int_mem_min")) + .setMinGpus(rs.getInt("int_gpus_min")) + .setMaxGpus(rs.getInt("int_gpus_max")) .setMinGpuMemory(rs.getInt("int_gpu_mem_min")) .addAllTags(Lists.newArrayList(ServiceDaoJdbc.splitTags( SqlUtil.getString(rs,"str_tags")))) @@ -1453,6 +1493,7 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { .setRenderedFrameCount(rs.getLong("int_frame_success_count")) .setFailedFrameCount(rs.getLong("int_frame_fail_count")) .setReservedCores(Convert.coreUnitsToCores(rs.getInt("int_cores"))) + .setReservedGpus(rs.getInt("int_gpus")) .setPendingJobs(rs.getInt("int_job_count")) .build(); return Show.newBuilder() @@ -1461,6 +1502,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { .setActive(rs.getBoolean("b_active")) .setDefaultMaxCores(Convert.coreUnitsToCores(rs.getInt("int_default_max_cores"))) .setDefaultMinCores(Convert.coreUnitsToCores(rs.getInt("int_default_min_cores"))) + .setDefaultMaxGpus(rs.getInt("int_default_max_gpus")) + .setDefaultMinGpus(rs.getInt("int_default_min_gpus")) .setBookingEnabled(rs.getBoolean("b_booking_enabled")) .setDispatchEnabled(rs.getBoolean("b_dispatch_enabled")) .setCommentEmail(SqlUtil.getString(rs,"str_comment_email")) @@ -1516,6 +1559,7 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "frame.str_state,"+ "frame.str_host,"+ "frame.int_cores,"+ + "frame.int_gpus,"+ "frame.int_mem_max_used," + "frame.int_mem_used, " + "frame.int_mem_reserved, " + @@ -1523,6 +1567,7 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "frame.str_checkpoint_state,"+ "frame.int_checkpoint_count,"+ "frame.int_total_past_core_time,"+ + "frame.int_total_past_gpu_time,"+ "layer.str_name AS layer_name," + "job.str_name AS job_name "+ "FROM "+ @@ -1559,7 +1604,10 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "proc.int_mem_reserved, " + "proc.int_mem_used, " + "proc.int_mem_max_used, " + + "proc.int_gpus_reserved, " + "proc.int_gpu_mem_reserved, " + + "proc.int_gpu_mem_used, " + + "proc.int_gpu_mem_max_used, " + "proc.ts_ping, " + "proc.ts_booked, " + "proc.ts_dispatched, " + @@ -1596,6 +1644,7 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "frame.str_state,"+ "frame.str_host,"+ "frame.int_cores,"+ + "frame.int_gpus,"+ "frame.ts_llu,"+ "COALESCE(proc.int_mem_max_used, frame.int_mem_max_used) AS int_mem_max_used," + "COALESCE(proc.int_mem_used, frame.int_mem_used) AS int_mem_used " + @@ -1620,6 +1669,11 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "vs_alloc_usage.int_running_cores,"+ "vs_alloc_usage.int_available_cores,"+ "vs_alloc_usage.int_locked_cores,"+ + "vs_alloc_usage.int_gpus,"+ + "vs_alloc_usage.int_idle_gpus,"+ + "vs_alloc_usage.int_running_gpus,"+ + "vs_alloc_usage.int_available_gpus,"+ + "vs_alloc_usage.int_locked_gpus,"+ "vs_alloc_usage.int_hosts,"+ "vs_alloc_usage.int_locked_hosts,"+ "vs_alloc_usage.int_down_hosts "+ @@ -1653,6 +1707,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "str_ti_task,"+ "int_cores,"+ "int_min_cores,"+ + "int_gpus,"+ + "int_min_gpus,"+ "b_managed " + "FROM " + "point," + @@ -1675,6 +1731,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "str_ti_task,"+ "int_cores,"+ "int_min_cores,"+ + "int_gpus,"+ + "int_min_gpus,"+ "b_managed " + "FROM " + "point," + @@ -1705,6 +1763,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "host_local.pk_host_local,"+ "host_local.int_cores_idle,"+ "host_local.int_cores_max,"+ + "host_local.int_gpus_idle,"+ + "host_local.int_gpus_max,"+ "host_local.int_threads,"+ "host_local.int_mem_idle,"+ "host_local.int_mem_max,"+ @@ -1778,6 +1838,10 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "folder.int_job_max_cores," + "folder_resource.int_min_cores,"+ "folder_resource.int_max_cores,"+ + "folder.int_job_min_gpus," + + "folder.int_job_max_gpus," + + "folder_resource.int_min_gpus,"+ + "folder_resource.int_max_gpus,"+ "folder.b_default, " + "folder_level.int_level, " + "c.int_waiting_count, " + @@ -1785,7 +1849,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "c.int_running_count,"+ "c.int_dead_count,"+ "c.int_job_count,"+ - "c.int_cores " + + "c.int_cores," + + "c.int_gpus " + "FROM " + "folder, " + "folder_level," + @@ -1820,6 +1885,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "job.str_log_dir," + "job_resource.int_max_cores," + "job_resource.int_min_cores," + + "job_resource.int_max_gpus," + + "job_resource.int_min_gpus," + "job.str_name," + "job.str_shot,"+ "job.str_state,"+ @@ -1846,12 +1913,15 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "job_stat.int_succeeded_count, "+ "job_usage.int_core_time_success, "+ "job_usage.int_core_time_fail, " + + "job_usage.int_gpu_time_success, "+ + "job_usage.int_gpu_time_fail, " + "job_usage.int_frame_success_count, "+ "job_usage.int_frame_fail_count, "+ "job_usage.int_clock_time_high,"+ "job_usage.int_clock_time_success,"+ "job_mem.int_max_rss,"+ - "(job_resource.int_cores + job_resource.int_local_cores) AS int_cores " + + "(job_resource.int_cores + job_resource.int_local_cores) AS int_cores," + + "(job_resource.int_gpus + job_resource.int_local_gpus) AS int_gpus " + "FROM " + "job,"+ "folder,"+ @@ -1888,6 +1958,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "layer_stat.int_succeeded_count," + "layer_usage.int_core_time_success," + "layer_usage.int_core_time_fail, "+ + "layer_usage.int_gpu_time_success," + + "layer_usage.int_gpu_time_fail, "+ "layer_usage.int_frame_success_count, "+ "layer_usage.int_frame_fail_count, "+ "layer_usage.int_clock_time_low, "+ @@ -1895,7 +1967,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "layer_usage.int_clock_time_success," + "layer_usage.int_clock_time_fail," + "layer_mem.int_max_rss,"+ - "layer_resource.int_cores " + + "layer_resource.int_cores," + + "layer_resource.int_gpus " + "FROM " + "layer, " + "job," + @@ -1926,6 +1999,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "layer_stat.int_succeeded_count, " + "layer_usage.int_core_time_success, " + "layer_usage.int_core_time_fail, " + + "layer_usage.int_gpu_time_success, " + + "layer_usage.int_gpu_time_fail, " + "layer_usage.int_frame_success_count, " + "layer_usage.int_frame_fail_count, " + "layer_usage.int_clock_time_low, " + @@ -1934,6 +2009,7 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "layer_usage.int_clock_time_fail, " + "layer_mem.int_max_rss, " + "layer_resource.int_cores, " + + "layer_resource.int_gpus, " + "limit_names.str_limit_names " + "FROM " + "layer " + @@ -1979,6 +2055,7 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "COALESCE(vs_show_stat.int_running_count,0) AS int_running_count," + "COALESCE(vs_show_stat.int_dead_count,0) AS int_dead_count," + "COALESCE(vs_show_resource.int_cores,0) AS int_cores, " + + "COALESCE(vs_show_resource.int_gpus,0) AS int_gpus, " + "COALESCE(vs_show_stat.int_job_count,0) AS int_job_count " + "FROM " + "show " + @@ -1995,6 +2072,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "service.int_cores_min," + "service.int_cores_max," + "service.int_mem_min," + + "service.int_gpus_min," + + "service.int_gpus_max," + "service.int_gpu_mem_min," + "service.str_tags," + "service.int_timeout," + @@ -2010,6 +2089,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "show_service.int_cores_min," + "show_service.int_cores_max," + "show_service.int_mem_min," + + "show_service.int_gpus_min," + + "show_service.int_gpus_max," + "show_service.int_gpu_mem_min," + "show_service.str_tags," + "show_service.int_timeout," + @@ -2026,6 +2107,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "task.str_shot,"+ "task.int_min_cores + task.int_adjust_cores AS int_min_cores, "+ "task.int_adjust_cores, " + + "task.int_min_gpus + task.int_adjust_gpus AS int_min_gpus, "+ + "task.int_adjust_gpus, " + "dept.str_name AS str_dept "+ "FROM " + "task,"+ @@ -2048,6 +2131,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "host.int_cores_idle,"+ "host.int_mem,"+ "host.int_mem_idle,"+ + "host.int_gpus,"+ + "host.int_gpus_idle,"+ "host.int_gpu_mem,"+ "host.int_gpu_mem_idle,"+ "host.str_tags,"+ @@ -2100,6 +2185,7 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "subscription.int_burst, " + "subscription.int_size, " + "subscription.int_cores, " + + "subscription.int_gpus, " + "show.str_name AS show_name, " + "alloc.str_name AS alloc_name, " + "facility.str_name AS facility_name " + @@ -2138,10 +2224,14 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { "frame.int_mem_max_used," + "frame.int_mem_used, " + "frame.int_mem_reserved, " + + "frame.int_gpus,"+ + "frame.int_gpu_mem_max_used, " + + "frame.int_gpu_mem_used, " + "frame.int_gpu_mem_reserved, " + "frame.str_checkpoint_state,"+ "frame.int_checkpoint_count,"+ "frame.int_total_past_core_time,"+ + "frame.int_total_past_gpu_time,"+ "layer.str_name AS layer_name," + "job.str_name AS job_name, "+ "ROW_NUMBER() OVER " + diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/AbstractDispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/AbstractDispatcher.java index ddf3b2a2b..73f5aef73 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/AbstractDispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/AbstractDispatcher.java @@ -129,6 +129,7 @@ public boolean dispatchHost(DispatchFrame frame, VirtualProc proc) { dispatchSummary(proc, frame, "Booking"); DispatchSupport.bookedProcs.getAndIncrement(); DispatchSupport.bookedCores.addAndGet(proc.coresReserved); + DispatchSupport.bookedGpus.addAndGet(proc.gpusReserved); return true; } catch (FrameReservationException fre) { /* @@ -222,8 +223,10 @@ private static void dispatchSummary(VirtualProc p, DispatchFrame f, String type) " cores / " + CueUtil.KbToMb(p.memoryReserved) + " memory / " + - p.gpuReserved + - " gpu on " + + p.gpusReserved + + " gpus / " + + CueUtil.KbToMb(p.gpuMemoryReserved) + + " gpu memory " + p.getName() + " to " + f.show + "/" + f.shot; logger.info(msg); diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java index d57caf3e9..beacefd97 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java @@ -135,7 +135,8 @@ private List dispatchJobs(DispatchHost host, Set jobs) { if (!host.hasAdditionalResources( Dispatcher.CORE_POINTS_RESERVED_MIN, Dispatcher.MEM_RESERVED_MIN, - Dispatcher.GPU_RESERVED_MIN)) { + Dispatcher.GPU_UNITS_RESERVED_MIN, + Dispatcher.MEM_GPU_RESERVED_MIN)) { return procs; } @@ -179,7 +180,8 @@ private Set getGpuJobs(DispatchHost host, ShowInterface show) { if (host.hasAdditionalResources( Dispatcher.CORE_POINTS_RESERVED_DEFAULT, Dispatcher.MEM_RESERVED_MIN, - 1)) { + Dispatcher.GPU_UNITS_RESERVED_DEFAULT, + Dispatcher.MEM_GPU_RESERVED_DEFAULT)) { if (show == null) jobs = dispatchSupport.findDispatchJobs(host, getIntProperty("dispatcher.job_query_max")); @@ -262,11 +264,12 @@ public List dispatchHost(DispatchHost host, JobInterface job) { if (host.idleCores < frame.minCores || host.idleMemory < frame.minMemory || - host.idleGpu < frame.minGpu) { + host.idleGpus < frame.minGpus || + host.idleGpuMemory < frame.minGpuMemory) { break; } - if (!dispatchSupport.isJobBookable(job, proc.coresReserved)) { + if (!dispatchSupport.isJobBookable(job, proc.coresReserved, proc.gpusReserved)) { break; } @@ -289,17 +292,19 @@ public void wrapDispatchFrame() { DispatchSupport.bookedProcs.getAndIncrement(); DispatchSupport.bookedCores.addAndGet(proc.coresReserved); + DispatchSupport.bookedGpus.addAndGet(proc.gpusReserved); if (host.strandedCores > 0) { dispatchSupport.pickupStrandedCores(host); break; } - host.useResources(proc.coresReserved, proc.memoryReserved, proc.gpuReserved); + host.useResources(proc.coresReserved, proc.memoryReserved, proc.gpusReserved, proc.gpuMemoryReserved); if (!host.hasAdditionalResources( Dispatcher.CORE_POINTS_RESERVED_MIN, Dispatcher.MEM_RESERVED_MIN, - Dispatcher.GPU_RESERVED_MIN)) { + Dispatcher.GPU_UNITS_RESERVED_MIN, + Dispatcher.MEM_GPU_RESERVED_MIN)) { break; } else if (procs.size() >= getIntProperty("dispatcher.job_frame_dispatch_max")) { @@ -398,8 +403,10 @@ private void dispatchSummary(VirtualProc p, DispatchFrame f, String type) { " cores / " + CueUtil.KbToMb(p.memoryReserved) + " memory / " + - p.gpuReserved + - " gpu on " + + p.gpusReserved + + " gpus / " + + CueUtil.KbToMb(p.gpuMemoryReserved) + + " gpu memory " + p.getName() + " to " + f.show + "/" + f.shot; logger.trace(msg); diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupport.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupport.java index ebdd5082d..47dac264a 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupport.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupport.java @@ -77,6 +77,11 @@ public interface DispatchSupport { */ static final AtomicLong bookedCores = new AtomicLong(0); + /** + * Long for counting how many gpus have been booked + */ + static final AtomicLong bookedGpus = new AtomicLong(0); + /** * Long for counting how many procs have been booked */ @@ -122,6 +127,16 @@ public interface DispatchSupport { */ static final AtomicLong strandedCoresCount = new AtomicLong(0); + /** + * Count number of picked up gpus. + */ + static final AtomicLong pickedUpGpusCount = new AtomicLong(0); + + /** + * Count number of stranded gpus. + */ + static final AtomicLong strandedGpusCount = new AtomicLong(0); + /** * Set the proc's frame assignment to null; * @@ -456,7 +471,7 @@ void updateProcMemoryUsage(FrameInterface frame, long rss, long maxRss, long vsi * @param job * @return */ - boolean isJobBookable(JobInterface job, int coreUnits); + boolean isJobBookable(JobInterface job, int coreUnits, int gpuUnits); /** * Return true if the specified show is at or over its @@ -511,6 +526,40 @@ void updateProcMemoryUsage(FrameInterface frame, long rss, long maxRss, long vsi */ void determineIdleCores(DispatchHost host, int load); + /** + * Pickup any gpus that were stranded on the given host. + * + * @param host + */ + void pickupStrandedGpus(DispatchHost host); + + /** + * Return true if the host has stranded gpus. + * + * @param host + * @return + */ + boolean hasStrandedGpus(HostInterface host); + + /** + * Add stranded gpus for the given host. Stranded + * gpus will automatically be added to the next frame dispatched + * from the host to make up for gpus stranded with no memory. + * + * @param host + * @param gpus + */ + void strandGpus(DispatchHost host, int gpus); + + /** + * Lowers the perceived idle gpus on a machine if + * the load is over certain threshold. + * + * @param host + * @param load + */ + void determineIdleGpus(DispatchHost host, int load); + /** * Return a set of job IDs that can take the given host. * diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupportService.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupportService.java index 3e3d82b2f..ad1d8196c 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupportService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupportService.java @@ -42,6 +42,7 @@ import com.imageworks.spcue.ResourceUsage; import com.imageworks.spcue.ShowInterface; import com.imageworks.spcue.StrandedCores; +import com.imageworks.spcue.StrandedGpus; import com.imageworks.spcue.VirtualProc; import com.imageworks.spcue.dao.BookingDao; import com.imageworks.spcue.dao.DispatcherDao; @@ -82,6 +83,9 @@ public class DispatchSupportService implements DispatchSupport { private ConcurrentHashMap strandedCores = new ConcurrentHashMap(); + private ConcurrentHashMap strandedGpus = + new ConcurrentHashMap(); + @Override public void pickupStrandedCores(DispatchHost host) { logger.info(host + "picked up stranded cores"); @@ -113,6 +117,35 @@ public void strandCores(DispatchHost host, int cores) { strandedCoresCount.getAndIncrement(); } + @Override + public void pickupStrandedGpus(DispatchHost host) { + logger.info(host + "picked up stranded gpu"); + pickedUpGpusCount.getAndIncrement(); + strandedGpus.remove(host.getHostId()); + } + + @Override + public boolean hasStrandedGpus(HostInterface host) { + StrandedGpus stranded = strandedGpus.get(host.getHostId()); + if (stranded == null) { + return false; + } + if (stranded.isExpired()) { + return false; + } + + return true; + } + + @Override + public void strandGpus(DispatchHost host, int gpus) { + logger.info(host + " found " + gpus + ", stranded gpu"); + host.strandedGpus = gpus; + strandedGpus.putIfAbsent(host.getHostId(), new StrandedGpus(gpus)); + strandedGpusCount.getAndIncrement(); + } + + @Transactional(readOnly = true) public List findNextDispatchFrames(JobInterface job, VirtualProc proc, int limit) { return dispatcherDao.findNextDispatchFrames(job, proc, limit); @@ -245,7 +278,7 @@ public boolean isJobBookable(JobInterface job) { @Override @Transactional(propagation = Propagation.REQUIRED, readOnly=true) - public boolean isJobBookable(JobInterface job, int coreUnits) { + public boolean isJobBookable(JobInterface job, int coreUnits, int gpuUnits) { if (!jobDao.hasPendingFrames(job)) { return false; @@ -255,6 +288,10 @@ public boolean isJobBookable(JobInterface job, int coreUnits) { return false; } + if (jobDao.isOverMaxGpus(job, gpuUnits)) { + return false; + } + return true; } @@ -363,6 +400,7 @@ public RunFrame prepareRqdRunFrame(VirtualProc proc, DispatchFrame frame) { .setLayerId(frame.getLayerId()) .setResourceId(proc.getProcId()) .setNumCores(proc.coresReserved) + .setNumGpus(proc.gpusReserved) .setStartTime(System.currentTimeMillis()) .setIgnoreNimby(proc.isLocalDispatch) .putAllEnvironment(jobDao.getEnvironment(frame)) @@ -370,6 +408,8 @@ public RunFrame prepareRqdRunFrame(VirtualProc proc, DispatchFrame frame) { .putEnvironment("CUE3", "1") .putEnvironment("CUE_THREADS", String.valueOf(threads)) .putEnvironment("CUE_MEMORY", String.valueOf(proc.memoryReserved)) + .putEnvironment("CUE_GPUS", String.valueOf(proc.gpusReserved)) + .putEnvironment("CUE_GPU_MEMORY", String.valueOf(proc.gpuMemoryReserved)) .putEnvironment("CUE_LOG_PATH", frame.logDir) .putEnvironment("CUE_RANGE", frame.range) .putEnvironment("CUE_CHUNK", String.valueOf(frame.chunkSize)) @@ -575,6 +615,14 @@ public void determineIdleCores(DispatchHost host, int load) { } } + @Override + public void determineIdleGpus(DispatchHost host, int load) { + int idleGpu = host.gpus - load; + if (idleGpu < host.idleGpus) { + host.idleGpus = idleGpu; + } + } + public DispatcherDao getDispatcherDao() { return dispatcherDao; } diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/Dispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/Dispatcher.java index d29c51f9c..3440fb595 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/Dispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/Dispatcher.java @@ -44,6 +44,10 @@ public interface Dispatcher { // The minimum amount of core points you can assign to a frame. public static final int CORE_POINTS_RESERVED_MIN = 10; + // The minimum amount of gpu points you can assign to a frame. + public static final int GPU_UNITS_RESERVED_DEFAULT = 0; + public static final int GPU_UNITS_RESERVED_MIN = 0; + // Amount of load per core a host can have before the perceived // number of idle cores is modified to reflect load conditions // on the host. @@ -69,13 +73,13 @@ public interface Dispatcher { // The default amount of gpu memory reserved for a frame if no gpu memory // reservation settings are specified - public static final long GPU_RESERVED_DEFAULT = 0; + public static final long MEM_GPU_RESERVED_DEFAULT = 0; // The minimum amount of gpu memory that can be assigned to a frame. - public static final long GPU_RESERVED_MIN = 0; + public static final long MEM_GPU_RESERVED_MIN = 0; // The maximum amount of gpu memory that can be assigned to a frame. - public static final long GPU_RESERVED_MAX = CueUtil.GB4; + public static final long MEM_GPU_RESERVED_MAX = CueUtil.GB * 1024; // Return value for cleared frame public static final int EXIT_STATUS_FRAME_CLEARED = 299; diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/FrameCompleteHandler.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/FrameCompleteHandler.java index fe2482720..20afd374c 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/FrameCompleteHandler.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/FrameCompleteHandler.java @@ -401,7 +401,7 @@ else if (report.getHost().getNimbyLocked()) { // Then check for higher priority jobs // If not, rebook this job if (job.autoUnbook && proc.coresReserved >= 100) { - if (jobManager.isOverMinCores(job)) { + if (jobManager.isOverMinCores(job) && jobManager.isOverMinGpus(job)) { try { boolean unbook = diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java index 815689bb4..b17eab87e 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java @@ -135,18 +135,6 @@ public void handleHostReport(HostReport report, boolean isBoot) { long startTime = System.currentTimeMillis(); try { - long totalGpu; - if (report.getHost().getAttributesMap().containsKey("totalGpu")) - totalGpu = Integer.parseInt(report.getHost().getAttributesMap().get("totalGpu")); - else - totalGpu = 0; - - long freeGpu; - if (report.getHost().getAttributesMap().containsKey("freeGpu")) - freeGpu = Integer.parseInt(report.getHost().getAttributesMap().get("freeGpu")); - else - freeGpu = 0; - long swapOut = 0; if (report.getHost().getAttributesMap().containsKey("swapout")) { swapOut = Integer.parseInt(report.getHost().getAttributesMap().get("swapout")); @@ -163,7 +151,7 @@ public void handleHostReport(HostReport report, boolean isBoot) { rhost.getTotalMem(), rhost.getFreeMem(), rhost.getTotalSwap(), rhost.getFreeSwap(), rhost.getTotalMcp(), rhost.getFreeMcp(), - totalGpu, freeGpu, + rhost.getTotalGpuMem(), rhost.getFreeGpuMem(), rhost.getLoad(), new Timestamp(rhost.getBootTime() * 1000l), rhost.getAttributesMap().get("SP_OS")); diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/LocalDispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/LocalDispatcher.java index 55497b83e..23bf6f73a 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/LocalDispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/LocalDispatcher.java @@ -111,7 +111,8 @@ private List dispatchHost(DispatchHost host, JobInterface job, */ if (!lha.hasAdditionalResources(lha.getThreads() * 100, frame.minMemory, - frame.minGpu)) { + frame.minGpus, + frame.minGpuMemory)) { continue; } @@ -141,10 +142,11 @@ private List dispatchHost(DispatchHost host, JobInterface job, * This should stay here and not go into VirtualProc * or else the count will be off if you fail to book. */ - lha.useResources(proc.coresReserved, proc.memoryReserved, proc.gpuReserved); + lha.useResources(proc.coresReserved, proc.memoryReserved, proc.gpusReserved, proc.gpuMemoryReserved); if (!lha.hasAdditionalResources(lha.getThreads() * 100, Dispatcher.MEM_RESERVED_MIN, - Dispatcher.GPU_RESERVED_MIN)) { + Dispatcher.GPU_UNITS_RESERVED_MIN, + Dispatcher.MEM_GPU_RESERVED_MIN)) { break; } @@ -196,7 +198,8 @@ private List dispatchHost(DispatchHost host, LayerInterface layer, */ if (!lha.hasAdditionalResources(lha.getThreads() * 100, frame.minMemory, - frame.minGpu)) { + frame.minGpus, + frame.minGpuMemory)) { continue; } @@ -226,10 +229,11 @@ private List dispatchHost(DispatchHost host, LayerInterface layer, * This should stay here and not go into VirtualProc * or else the count will be off if you fail to book. */ - lha.useResources(proc.coresReserved, proc.memoryReserved, proc.gpuReserved); + lha.useResources(proc.coresReserved, proc.memoryReserved, proc.gpusReserved, proc.gpuMemoryReserved); if (!lha.hasAdditionalResources(100, Dispatcher.MEM_RESERVED_MIN, - Dispatcher.GPU_RESERVED_MIN)) { + Dispatcher.GPU_UNITS_RESERVED_MIN, + Dispatcher.MEM_GPU_RESERVED_MIN)) { break; } @@ -272,7 +276,8 @@ private List dispatchHost(DispatchHost host, FrameInterface frame, DispatchFrame dframe = jobManager.getDispatchFrame(frame.getId()); if (!lha.hasAdditionalResources(lha.getMaxCoreUnits(), dframe.minMemory, - dframe.minGpu)) { + lha.getMaxGpuUnits(), + dframe.minGpuMemory)) { return procs; } @@ -382,7 +387,8 @@ private void prepHost(DispatchHost host, LocalHostAssignment lha) { host.isLocalDispatch = true; host.idleCores = lha.getIdleCoreUnits(); host.idleMemory = lha.getIdleMemory(); - host.idleGpu = lha.getIdleGpu(); + host.idleGpus = lha.getIdleGpuUnits(); + host.idleGpuMemory = lha.getIdleGpuMemory(); } diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/RedirectManager.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/RedirectManager.java index a3519f10e..24b1681e9 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/RedirectManager.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/RedirectManager.java @@ -258,6 +258,8 @@ public boolean addRedirect(VirtualProc proc, GroupInterface group, DispatchHost host = hostManager.getDispatchHost(proc.getHostId()); host.idleCores = proc.coresReserved; host.idleMemory = proc.memoryReserved; + host.idleGpus = proc.gpusReserved; + host.idleGpuMemory = proc.gpuMemoryReserved; if (dispatchSupport.findDispatchJobs(host, group).size() < 1) { logger.info("Failed to find a pending job in group: " + group.getName()); diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/ResourceContainer.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/ResourceContainer.java index c829eb390..0d1141bc1 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/ResourceContainer.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/ResourceContainer.java @@ -27,19 +27,21 @@ public interface ResourceContainer { * * @param minCores * @param minMemory - * @param minGpu + * @param minGpus + * @param minGpuMemory * @return */ - public boolean hasAdditionalResources(int minCores, long minMemory, long minGpu); + public boolean hasAdditionalResources(int minCores, long minMemory, int minGpus, long minGpuMemory); /** * Subtract the given resources from the grand totals. * * @param coreUnits * @param memory - * @param gpu + * @param gpuUnits + * @param gpuMemory */ - public void useResources(int coreUnits, long memory, long gpu); + public void useResources(int coreUnits, long memory, int gpuUnits, long gpuMemory); } diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/commands/DispatchBookHost.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/commands/DispatchBookHost.java index 6ddaa9a3b..0013171db 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/commands/DispatchBookHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/commands/DispatchBookHost.java @@ -82,14 +82,16 @@ else if (job != null) { if (host.hasAdditionalResources( Dispatcher.CORE_POINTS_RESERVED_MIN, Dispatcher.MEM_RESERVED_MIN, - Dispatcher.GPU_RESERVED_MIN)) { + Dispatcher.GPU_UNITS_RESERVED_MIN, + Dispatcher.MEM_GPU_RESERVED_MIN)) { dispatcher.dispatchHost(host); } if (host.hasAdditionalResources( Dispatcher.CORE_POINTS_RESERVED_MIN, Dispatcher.MEM_RESERVED_MIN, - Dispatcher.GPU_RESERVED_MIN)) { + Dispatcher.GPU_UNITS_RESERVED_MIN, + Dispatcher.MEM_GPU_RESERVED_MIN)) { dispatcher.dispatchHostToAllShows(host); } } diff --git a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageGroup.java b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageGroup.java index 8fd7e10ce..b8f3cd43e 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageGroup.java +++ b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageGroup.java @@ -55,6 +55,10 @@ import com.imageworks.spcue.grpc.job.GroupSetDefJobMaxCoresResponse; import com.imageworks.spcue.grpc.job.GroupSetDefJobMinCoresRequest; import com.imageworks.spcue.grpc.job.GroupSetDefJobMinCoresResponse; +import com.imageworks.spcue.grpc.job.GroupSetDefJobMaxGpusRequest; +import com.imageworks.spcue.grpc.job.GroupSetDefJobMaxGpusResponse; +import com.imageworks.spcue.grpc.job.GroupSetDefJobMinGpusRequest; +import com.imageworks.spcue.grpc.job.GroupSetDefJobMinGpusResponse; import com.imageworks.spcue.grpc.job.GroupSetDefJobPriorityRequest; import com.imageworks.spcue.grpc.job.GroupSetDefJobPriorityResponse; import com.imageworks.spcue.grpc.job.GroupSetDeptRequest; @@ -65,6 +69,10 @@ import com.imageworks.spcue.grpc.job.GroupSetMaxCoresResponse; import com.imageworks.spcue.grpc.job.GroupSetMinCoresRequest; import com.imageworks.spcue.grpc.job.GroupSetMinCoresResponse; +import com.imageworks.spcue.grpc.job.GroupSetMaxGpusRequest; +import com.imageworks.spcue.grpc.job.GroupSetMaxGpusResponse; +import com.imageworks.spcue.grpc.job.GroupSetMinGpusRequest; +import com.imageworks.spcue.grpc.job.GroupSetMinGpusResponse; import com.imageworks.spcue.grpc.job.GroupSetNameRequest; import com.imageworks.spcue.grpc.job.GroupSetNameResponse; import com.imageworks.spcue.grpc.job.Job; @@ -189,6 +197,24 @@ public void setDefaultJobMinCores(GroupSetDefJobMinCoresRequest request, StreamO responseObserver.onCompleted(); } + @Override + public void setDefaultJobMaxGpus(GroupSetDefJobMaxGpusRequest request, + StreamObserver responseObserver) { + GroupInterface group = getGroupInterface(request.getGroup()); + groupManager.setGroupDefaultJobMaxGpus(group, request.getMaxGpus()); + responseObserver.onNext(GroupSetDefJobMaxGpusResponse.newBuilder().build()); + responseObserver.onCompleted(); + } + + @Override + public void setDefaultJobMinGpus(GroupSetDefJobMinGpusRequest request, + StreamObserver responseObserver) { + GroupInterface group = getGroupInterface(request.getGroup()); + groupManager.setGroupDefaultJobMinGpus(group, request.getMinGpus()); + responseObserver.onNext(GroupSetDefJobMinGpusResponse.newBuilder().build()); + responseObserver.onCompleted(); + } + @Override public void setName(GroupSetNameRequest request, StreamObserver responseObserver) { GroupInterface group = getGroupInterface(request.getGroup()); @@ -262,6 +288,24 @@ public void setMinCores(GroupSetMinCoresRequest request, responseObserver.onCompleted(); } + @Override + public void setMaxGpus(GroupSetMaxGpusRequest request, + StreamObserver responseObserver) { + GroupInterface group = getGroupInterface(request.getGroup()); + groupManager.setGroupMaxGpus(group, request.getMaxGpus()); + responseObserver.onNext(GroupSetMaxGpusResponse.newBuilder().build()); + responseObserver.onCompleted(); + } + + @Override + public void setMinGpus(GroupSetMinGpusRequest request, + StreamObserver responseObserver) { + GroupInterface group = getGroupInterface(request.getGroup()); + groupManager.setGroupMinGpus(group, request.getMinGpus()); + responseObserver.onNext(GroupSetMinGpusResponse.newBuilder().build()); + responseObserver.onCompleted(); + } + public GroupDao getGroupDao() { return groupDao; } diff --git a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageJob.java b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageJob.java index 6ef7715b6..c13177a74 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageJob.java +++ b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageJob.java @@ -124,10 +124,14 @@ import com.imageworks.spcue.grpc.job.JobSetGroupResponse; import com.imageworks.spcue.grpc.job.JobSetMaxCoresRequest; import com.imageworks.spcue.grpc.job.JobSetMaxCoresResponse; +import com.imageworks.spcue.grpc.job.JobSetMaxGpusRequest; +import com.imageworks.spcue.grpc.job.JobSetMaxGpusResponse; import com.imageworks.spcue.grpc.job.JobSetMaxRetriesRequest; import com.imageworks.spcue.grpc.job.JobSetMaxRetriesResponse; import com.imageworks.spcue.grpc.job.JobSetMinCoresRequest; import com.imageworks.spcue.grpc.job.JobSetMinCoresResponse; +import com.imageworks.spcue.grpc.job.JobSetMinGpusRequest; +import com.imageworks.spcue.grpc.job.JobSetMinGpusResponse; import com.imageworks.spcue.grpc.job.JobSetPriorityRequest; import com.imageworks.spcue.grpc.job.JobSetPriorityResponse; import com.imageworks.spcue.grpc.job.JobStaggerFramesRequest; @@ -376,6 +380,36 @@ public void setMinCores(JobSetMinCoresRequest request, StreamObserver responseObserver) { + try{ + setupJobData(request.getJob()); + jobDao.updateMaxGpus(job, request.getVal()); + responseObserver.onNext(JobSetMaxGpusResponse.newBuilder().build()); + responseObserver.onCompleted(); + } + catch (EmptyResultDataAccessException e) { + responseObserver.onError(Status.INTERNAL + .withDescription("Failed to find job data") + .asRuntimeException()); + } + } + + @Override + public void setMinGpus(JobSetMinGpusRequest request, StreamObserver responseObserver) { + try{ + setupJobData(request.getJob()); + jobDao.updateMinGpus(job, request.getVal()); + responseObserver.onNext(JobSetMinGpusResponse.newBuilder().build()); + responseObserver.onCompleted(); + } + catch (EmptyResultDataAccessException e) { + responseObserver.onError(Status.INTERNAL + .withDescription("Failed to find job data") + .asRuntimeException()); + } + } + @Override public void setPriority(JobSetPriorityRequest request, StreamObserver responseObserver) { try{ @@ -772,6 +806,7 @@ public void addRenderPartition(JobAddRenderPartRequest request, StreamObserver responseObserver) { + updateLayer(request.getLayer()); + jobManager.setLayerMinGpus(layer, request.getMinGpus()); + responseObserver.onNext(LayerSetMinGpusResponse.newBuilder().build()); + responseObserver.onCompleted(); + } + @Override public void setMinMemory(LayerSetMinMemoryRequest request, StreamObserver responseObserver) { updateLayer(request.getLayer()); @@ -237,7 +249,7 @@ public void setMinMemory(LayerSetMinMemoryRequest request, StreamObserver responseObserver) { updateLayer(request.getLayer()); - layerDao.updateLayerMinGpu(layer, request.getGpuMemory()); + layerDao.updateLayerMinGpuMemory(layer, request.getGpuMemory()); responseObserver.onNext(LayerSetMinGpuMemoryResponse.newBuilder().build()); responseObserver.onCompleted(); } @@ -389,6 +401,7 @@ public void addRenderPartition(LayerAddRenderPartitionRequest request, lha.setThreads(request.getThreads()); lha.setMaxCoreUnits(request.getMaxCores() * 100); lha.setMaxMemory(request.getMaxMemory()); + lha.setMaxGpuUnits(request.getMaxGpus()); lha.setMaxGpuMemory(request.getMaxGpuMemory()); lha.setType(RenderPartitionType.LAYER_PARTITION); if (localBookingSupport.bookLocal(layer, request.getHost(), request.getUsername(), lha)) { @@ -450,6 +463,14 @@ public void setMaxCores(LayerSetMaxCoresRequest request, StreamObserver responseObserver) { + updateLayer(request.getLayer()); + jobManager.setLayerMaxGpus(layer, request.getMaxGpus()); + responseObserver.onNext(LayerSetMaxGpusResponse.newBuilder().build()); + responseObserver.onCompleted(); + } + public DependManager getDependManager() { return dependManager; } diff --git a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageRenderPartition.java b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageRenderPartition.java index 7c630c118..413f1982c 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageRenderPartition.java +++ b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageRenderPartition.java @@ -45,7 +45,7 @@ public void delete(RenderPartDeleteRequest request, StreamObserver responseObserver) { LocalHostAssignment localJobAssign = getLocalHostAssignment(request.getRenderPartition()); - bookingManager.setMaxResources(localJobAssign, request.getCores(), request.getMemory(), request.getGpuMemory()); + bookingManager.setMaxResources(localJobAssign, request.getCores(), request.getMemory(), request.getGpus(), request.getGpuMemory()); responseObserver.onNext(RenderPartSetMaxResourcesResponse.newBuilder().build()); responseObserver.onCompleted(); } diff --git a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageService.java b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageService.java index e49fc4e9a..70a15f3bf 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageService.java @@ -55,7 +55,9 @@ public void createService(ServiceCreateServiceRequest request, service.minCores = request.getData().getMinCores(); service.maxCores = request.getData().getMaxCores(); service.minMemory = request.getData().getMinMemory(); - service.minGpu = request.getData().getMinGpuMemory(); + service.minGpus = request.getData().getMinGpus(); + service.maxGpus = request.getData().getMaxGpus(); + service.minGpuMemory = request.getData().getMinGpuMemory(); service.tags = Sets.newLinkedHashSet(request.getData().getTagsList()); service.threadable = request.getData().getThreadable(); service.timeout = request.getData().getTimeout(); @@ -129,7 +131,9 @@ private ServiceEntity toServiceEntity(Service service) { entity.minCores = service.getMinCores(); entity.maxCores = service.getMaxCores(); entity.minMemory = service.getMinMemory(); - entity.minGpu = service.getMinGpuMemory(); + entity.minGpus = service.getMinGpus(); + entity.maxGpus = service.getMaxGpus(); + entity.minGpuMemory = service.getMinGpuMemory(); entity.tags = new LinkedHashSet<> (service.getTagsList()); entity.threadable = service.getThreadable(); entity.timeout = service.getTimeout(); diff --git a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageServiceOverride.java b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageServiceOverride.java index 49cb60016..ed3d46107 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageServiceOverride.java +++ b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageServiceOverride.java @@ -67,7 +67,9 @@ private ServiceEntity toServiceEntity(Service service) { entity.minCores = service.getMinCores(); entity.maxCores = service.getMaxCores(); entity.minMemory = service.getMinMemory(); - entity.minGpu = service.getMinGpuMemory(); + entity.minGpus = service.getMinGpus(); + entity.maxGpus = service.getMaxGpus(); + entity.minGpuMemory = service.getMinGpuMemory(); entity.tags = new LinkedHashSet<>(service.getTagsList()); entity.threadable = service.getThreadable(); entity.timeout = service.getTimeout(); diff --git a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageShow.java b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageShow.java index 934f301a0..6e5fbcbe8 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageShow.java +++ b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageShow.java @@ -93,6 +93,10 @@ import com.imageworks.spcue.grpc.show.ShowSetDefaultMaxCoresResponse; import com.imageworks.spcue.grpc.show.ShowSetDefaultMinCoresRequest; import com.imageworks.spcue.grpc.show.ShowSetDefaultMinCoresResponse; +import com.imageworks.spcue.grpc.show.ShowSetDefaultMaxGpusRequest; +import com.imageworks.spcue.grpc.show.ShowSetDefaultMaxGpusResponse; +import com.imageworks.spcue.grpc.show.ShowSetDefaultMinGpusRequest; +import com.imageworks.spcue.grpc.show.ShowSetDefaultMinGpusResponse; import com.imageworks.spcue.grpc.subscription.Subscription; import com.imageworks.spcue.grpc.subscription.SubscriptionSeq; import com.imageworks.spcue.service.AdminManager; @@ -257,6 +261,24 @@ public void setDefaultMinCores(ShowSetDefaultMinCoresRequest request, responseObserver.onCompleted(); } + @Override + public void setDefaultMaxGpus(ShowSetDefaultMaxGpusRequest request, + StreamObserver responseObserver) { + ShowEntity show = getShowEntity(request.getShow()); + showDao.updateShowDefaultMaxGpus(show, request.getMaxGpus()); + responseObserver.onNext(ShowSetDefaultMaxGpusResponse.newBuilder().build()); + responseObserver.onCompleted(); + } + + @Override + public void setDefaultMinGpus(ShowSetDefaultMinGpusRequest request, + StreamObserver responseObserver) { + ShowEntity show = getShowEntity(request.getShow()); + showDao.updateShowDefaultMinGpus(show, request.getMinGpus()); + responseObserver.onNext(ShowSetDefaultMinGpusResponse.newBuilder().build()); + responseObserver.onCompleted(); + } + @Override public void findFilter(ShowFindFilterRequest request, StreamObserver responseObserver) { @@ -361,7 +383,9 @@ public void createServiceOverride(ShowCreateServiceOverrideRequest request, service.minCores = requestService.getMinCores(); service.maxCores = requestService.getMaxCores(); service.minMemory = requestService.getMinMemory(); - service.minGpu = requestService.getMinGpuMemory(); + service.minGpus = requestService.getMinGpus(); + service.maxGpus = requestService.getMaxGpus(); + service.minGpuMemory = requestService.getMinGpuMemory(); service.tags = Sets.newLinkedHashSet(requestService.getTagsList()); service.threadable = requestService.getThreadable(); serviceManager.createService(service); diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/BookingManager.java b/cuebot/src/main/java/com/imageworks/spcue/service/BookingManager.java index 51a04bfa3..11ff9fd46 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/BookingManager.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/BookingManager.java @@ -122,9 +122,10 @@ public void createLocalHostAssignment(DispatchHost host, * @param l * @param maxCoreUnits * @param maxMemory - * @param maxGpu + * @param maxGpuUnits + * @param maxGpuMemory */ - void setMaxResources(LocalHostAssignment l, int maxCoreUnits, long maxMemory, long maxGpu); + void setMaxResources(LocalHostAssignment l, int maxCoreUnits, long maxMemory, int maxGpuUnits, long maxGpuMemory); /** * Remove a LocalHostAssignment if there are no procs assigned to it. diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/BookingManagerService.java b/cuebot/src/main/java/com/imageworks/spcue/service/BookingManagerService.java index 91abdab4b..092dbbb59 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/BookingManagerService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/BookingManagerService.java @@ -72,7 +72,7 @@ public boolean hasActiveLocalFrames(HostInterface host) { @Override public void setMaxResources(LocalHostAssignment l, int maxCoreUnits, - long maxMemory, long maxGpu) { + long maxMemory, int maxGpuUnits, long maxGpuMemory) { HostInterface host = hostDao.getHost(l.getHostId()); @@ -84,8 +84,12 @@ public void setMaxResources(LocalHostAssignment l, int maxCoreUnits, bookingDao.updateMaxMemory(l, maxMemory); } - if (maxGpu > 0) { - bookingDao.updateMaxGpu(l, maxGpu); + if (maxGpuUnits > 0) { + bookingDao.updateMaxGpus(l, maxGpuUnits); + } + + if (maxGpuMemory > 0) { + bookingDao.updateMaxGpuMemory(l, maxGpuMemory); } } diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/GroupManager.java b/cuebot/src/main/java/com/imageworks/spcue/service/GroupManager.java index 9017304f8..2e3cf70be 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/GroupManager.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/GroupManager.java @@ -34,6 +34,10 @@ public interface GroupManager { void setGroupMinCores(GroupInterface g, int coreUnits); void setGroupDefaultJobMinCores(GroupInterface g, int coreUnits); void setGroupDefaultJobMaxCores(GroupInterface g, int coreUnits); + void setGroupMaxGpus(GroupInterface g, int gpuUnits); + void setGroupMinGpus(GroupInterface g, int gpuUnits); + void setGroupDefaultJobMinGpus(GroupInterface g, int gpuUnits); + void setGroupDefaultJobMaxGpus(GroupInterface g, int gpuUnits); void setGroupDefaultJobPriority(GroupInterface g, int priority); /** diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/GroupManagerService.java b/cuebot/src/main/java/com/imageworks/spcue/service/GroupManagerService.java index 7e785c0ea..89fc25193 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/GroupManagerService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/GroupManagerService.java @@ -78,6 +78,32 @@ public void setGroupMinCores(GroupInterface g, int coreUnits) { groupDao.updateMinCores(g,coreUnits); } + @Override + public void setGroupDefaultJobMaxGpus(GroupInterface g, int gpuUnits) { + groupDao.updateDefaultJobMaxGpus(g,gpuUnits); + if (gpuUnits != CueUtil.FEATURE_DISABLED && !groupDao.isManaged(g)) { + jobDao.updateMaxGpus(g, gpuUnits); + } + } + + @Override + public void setGroupDefaultJobMinGpus(GroupInterface g, int gpuUnits) { + groupDao.updateDefaultJobMinGpus(g,gpuUnits); + if (gpuUnits != CueUtil.FEATURE_DISABLED && !groupDao.isManaged(g)) { + jobDao.updateMinGpus(g, gpuUnits); + } + } + + @Override + public void setGroupMaxGpus(GroupInterface g, int gpuUnits) { + groupDao.updateMaxGpus(g, gpuUnits); + } + + @Override + public void setGroupMinGpus(GroupInterface g, int gpuUnits) { + groupDao.updateMinGpus(g, gpuUnits); + } + @Override public void setGroupParent(GroupInterface group, GroupInterface newParent) { groupDao.updateGroupParent(group, newParent); diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java b/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java index 19704e65e..8b176c77e 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java @@ -112,8 +112,8 @@ public interface HostManager { * @param freeSwap * @param totalMcp * @param freeMcp - * @param totalGpu - * @param freeGpu + * @param totalGpuMemory + * @param freeGpuMemory * @param load * @param bootTime * @param os @@ -122,7 +122,7 @@ void setHostStatistics(HostInterface host, long totalMemory, long freeMemory, long totalSwap, long freeSwap, long totalMcp, long freeMcp, - long totalGpu, long freeGpu, + long totalGpuMemory, long freeGpuMemory, int load, Timestamp bootTime, String os); @@ -212,6 +212,11 @@ void setHostStatistics(HostInterface host, */ int getStrandedCoreUnits(HostInterface h); + /** + * Return the number of stranded cores on the host. + */ + int getStrandedGpuUnits(HostInterface h); + /** * Return true of the host prefers a particular show. * diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java b/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java index ee081ecc7..da3464ddf 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java @@ -123,7 +123,7 @@ public void setHostStatistics(HostInterface host, long totalMemory, long freeMemory, long totalSwap, long freeSwap, long totalMcp, long freeMcp, - long totalGpu, long freeGpu, + long totalGpuMemory, long freeGpuMemory, int load, Timestamp bootTime, String os) { @@ -131,7 +131,7 @@ public void setHostStatistics(HostInterface host, totalMemory, freeMemory, totalSwap, freeSwap, totalMcp, freeMcp, - totalGpu, freeGpu, + totalGpuMemory, freeGpuMemory, load, bootTime, os); } @@ -246,6 +246,12 @@ public int getStrandedCoreUnits(HostInterface h) { return hostDao.getStrandedCoreUnits(h); } + @Override + @Transactional(propagation = Propagation.REQUIRED, readOnly=true) + public int getStrandedGpuUnits(HostInterface h) { + return hostDao.getStrandedGpus(h); + } + @Override @Transactional(propagation = Propagation.REQUIRED, readOnly=true) public boolean verifyRunningProc(String procId, String frameId) { diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobLauncher.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobLauncher.java index 86a7090d9..14f2a5741 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobLauncher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobLauncher.java @@ -109,6 +109,7 @@ public void launch(final JobSpec spec) { lha.setThreads(d.localThreadNumber); lha.setMaxCoreUnits(d.localMaxCores * 100); lha.setMaxMemory(d.localMaxMemory); + lha.setMaxGpuUnits(d.localMaxGpus); lha.setMaxGpuMemory(d.localMaxGpuMemory); lha.setType(RenderPartitionType.JOB_PARTITION); diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobManager.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobManager.java index 4ab1b2120..6ab4bb38e 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobManager.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobManager.java @@ -292,6 +292,14 @@ public interface JobManager { */ boolean isOverMinCores(JobInterface job); + /** + * Return true if the given job is booked greater than min gpus. + * + * @param job + * @return + */ + boolean isOverMinGpus(JobInterface job); + /** * Increase the layer memory requirement to given KB value. * @@ -453,6 +461,22 @@ public interface JobManager { */ void setLayerMinCores(LayerInterface layer, int coreUnits); + /** + * Update the max gpu value for the given layer. + * + * @param layer + * @param gpuUnits + */ + void setLayerMaxGpus(LayerInterface layer, int gpuUnits); + + /** + * Update the min gpu value for the given layer. + * + * @param layer + * @param gpuUnits + */ + void setLayerMinGpus(LayerInterface layer, int gpuUnits); + /** * Add a limit to the given layer. * diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java index 68821ed64..a4f6f1ebb 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java @@ -135,6 +135,12 @@ public boolean isOverMinCores(JobInterface job) { return jobDao.isOverMinCores(job); } + @Override + @Transactional(propagation = Propagation.REQUIRED, readOnly=true) + public boolean isOverMinGpus(JobInterface job) { + return jobDao.isOverMinGpus(job); + } + @Transactional(propagation = Propagation.REQUIRED, readOnly=true) public DispatchJob getDispatchJob(String id) { return jobDao.getDispatchJob(id); @@ -450,6 +456,16 @@ public void setLayerMaxCores(LayerInterface layer, int coreUnits) { layerDao.updateLayerMaxCores(layer, coreUnits); } + @Override + public void setLayerMinGpus(LayerInterface layer, int gpu) { + layerDao.updateLayerMinGpus(layer, gpu); + } + + @Override + public void setLayerMaxGpus(LayerInterface layer, int gpu) { + layerDao.updateLayerMaxGpus(layer, gpu); + } + @Override public void addLayerLimit(LayerInterface layer, String limitId) { layerDao.addLimit(layer, limitId); diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index c46e4feec..30bf7cfd3 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -286,15 +286,17 @@ private BuildableJob handleJobTag(Element jobTag) { if (local.getAttributeValue("cores") != null) job.localMaxCores = Integer.parseInt(local.getAttributeValue("cores")); if (local.getAttributeValue("memory") != null) - job.localMaxMemory = Integer.parseInt(local.getAttributeValue("memory")); + job.localMaxMemory = Long.parseLong(local.getAttributeValue("memory")); if (local.getAttributeValue("threads") != null) job.localThreadNumber = Integer.parseInt(local.getAttributeValue("threads")); + if (local.getAttributeValue("gpus") != null) + job.localMaxGpus = Integer.parseInt(local.getAttributeValue("gpus")); if (local.getAttributeValue("gpu") != null) { logger.warn(job.name + " localbook has the deprecated gpu. Use gpu_memory."); - job.localMaxGpuMemory = Integer.parseInt(local.getAttributeValue("gpu")); + job.localMaxGpuMemory = Long.parseLong(local.getAttributeValue("gpu")); } if (local.getAttributeValue("gpu_memory") != null) - job.localMaxGpuMemory = Integer.parseInt(local.getAttributeValue("gpu_memory")); + job.localMaxGpuMemory = Long.parseLong(local.getAttributeValue("gpu_memory")); } job.maxCoreUnits = 20000; @@ -427,11 +429,12 @@ private void handleLayerTags(BuildableJob buildableJob, Element jobTag) { determineResourceDefaults(layerTag, buildableJob, layer); determineChunkSize(layerTag, layer); determineMinimumCores(layerTag, layer); + determineMinimumGpus(layerTag, layer); determineThreadable(layerTag, layer); determineTags(buildableJob, layer, layerTag); determineMinimumMemory(buildableJob, layerTag, layer, buildableLayer); - determineMinimumGpu(buildableJob, layerTag, layer); + determineMinimumGpuMemory(buildableJob, layerTag, layer); // set a timeout value on the layer if (layerTag.getChildTextTrim("timeout") != null) { @@ -525,12 +528,12 @@ else if (minMemory < Dispatcher.MEM_RESERVED_MIN) { } /** - * If the gpu_memory option is set, set minimumGpu to that supplied value + * If the gpu_memory option is set, set minimumGpuMemory to that supplied value * * @param layerTag * @param layer */ - private void determineMinimumGpu(BuildableJob buildableJob, Element layerTag, + private void determineMinimumGpuMemory(BuildableJob buildableJob, Element layerTag, LayerDetail layer) { String gpu = layerTag.getChildTextTrim("gpu"); @@ -548,30 +551,30 @@ private void determineMinimumGpu(BuildableJob buildableJob, Element layerTag, if (gpuMemory != null) memory = gpuMemory.toLowerCase(); - long minGpu; + long minGpuMemory; try { - minGpu = convertMemoryInput(memory); + minGpuMemory = convertMemoryInput(memory); // Some quick sanity checks to make sure gpu memory hasn't gone // over or under reasonable defaults. - if (minGpu > Dispatcher.GPU_RESERVED_MAX) { + if (minGpuMemory > Dispatcher.MEM_GPU_RESERVED_MAX) { throw new SpecBuilderException("Gpu memory requirements exceed " + "maximum. Are you specifying the correct units?"); } - else if (minGpu < Dispatcher.GPU_RESERVED_MIN) { + else if (minGpuMemory < Dispatcher.MEM_GPU_RESERVED_MIN) { logger.warn(buildableJob.detail.name + "/" + layer.name + "Specified too little gpu memory, defaulting to: " + - Dispatcher.GPU_RESERVED_MIN); - minGpu = Dispatcher.GPU_RESERVED_MIN; + Dispatcher.MEM_GPU_RESERVED_MIN); + minGpuMemory = Dispatcher.MEM_GPU_RESERVED_MIN; } - layer.minimumGpu = minGpu; + layer.minimumGpuMemory = minGpuMemory; } catch (Exception e) { logger.info("Error setting gpu memory for " + buildableJob.detail.name + "/" + layer.name + " failed, reason: " + e + ". Using default."); - layer.minimumGpu = Dispatcher.GPU_RESERVED_DEFAULT; + layer.minimumGpuMemory = Dispatcher.MEM_GPU_RESERVED_DEFAULT; } } @@ -611,6 +614,20 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { layer.minimumCores = corePoints; } + /** + * Gpu is a int. + * + * If no gpu value is specified, we default to the value of + * Dispatcher.GPU_RESERVED_DEFAULT + */ + private void determineMinimumGpus(Element layerTag, LayerDetail layer) { + + String gpus = layerTag.getChildTextTrim("gpus"); + if (gpus != null) { + layer.minimumGpus = Integer.valueOf(gpus); + } + } + private void determineChunkSize(Element layerTag, LayerDetail layer) { layer.chunkSize = Integer.parseInt(layerTag.getChildTextTrim("chunk")); } @@ -715,7 +732,9 @@ private void determineResourceDefaults(Element layerTag, layer.maximumCores = primaryService.maxCores; layer.minimumCores = primaryService.minCores; layer.minimumMemory = primaryService.minMemory; - layer.minimumGpu = primaryService.minGpu; + layer.maximumGpus = primaryService.maxGpus; + layer.minimumGpus = primaryService.minGpus; + layer.minimumGpuMemory = primaryService.minGpuMemory; layer.tags.addAll(primaryService.tags); layer.services.addAll(services); layer.limits.addAll(limits); diff --git a/cuebot/src/main/java/com/imageworks/spcue/util/CueUtil.java b/cuebot/src/main/java/com/imageworks/spcue/util/CueUtil.java index a7d89e7ee..82d002967 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/util/CueUtil.java +++ b/cuebot/src/main/java/com/imageworks/spcue/util/CueUtil.java @@ -235,10 +235,10 @@ public final static String buildFrameName(LayerInterface layer, int num) { return String.format("%04d-%s", num, layer.getName()); } - public final static String buildProcName(String host, int cores) { - return String.format(Locale.ROOT, "%s/%4.2f", host, Convert.coreUnitsToCores(cores)); - + public final static String buildProcName(String host, int cores, int gpus) { + return String.format(Locale.ROOT, "%s/%4.2f/%d", host, Convert.coreUnitsToCores(cores), gpus); } + /** * for logging how long an operation took * diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/BookingDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/BookingDaoTests.java index e04911bd8..1511bc7b5 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/BookingDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/BookingDaoTests.java @@ -444,7 +444,7 @@ public void updateMaxGpu() { assertEquals(CueUtil.GB2, lj2.getMaxMemory()); assertEquals(1, lj2.getMaxGpuMemory()); - bookingDao.updateMaxGpu(lja, 2); + bookingDao.updateMaxGpuMemory(lja, 2); lj2 = bookingDao.getLocalJobAssignment(lja.id); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/JobDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/JobDaoTests.java index 31f8dca66..bde9ee86f 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/JobDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/JobDaoTests.java @@ -639,7 +639,7 @@ public void testUpdateUsage() { JobInterface job = jobDao.findJob(spec.getJobs().get(0).detail.name); /** 60 seconds of 100 core units **/ - ResourceUsage usage = new ResourceUsage(60, 33); + ResourceUsage usage = new ResourceUsage(60, 33, 0); assertTrue(usage.getClockTimeSeconds() > 0); assertTrue(usage.getCoreTimeSeconds() > 0); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/LayerDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/LayerDaoTests.java index 61ee6e864..692570eb5 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/LayerDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/LayerDaoTests.java @@ -474,7 +474,7 @@ public void updateMinMemory() { public void updateMinGpu() { long gpu = CueUtil.GB; LayerDetail layer = getLayer(); - layerDao.updateMinGpu(layer, gpu, LayerType.RENDER); + layerDao.updateMinGpuMemory(layer, gpu, LayerType.RENDER); assertEquals(Long.valueOf(gpu),jdbcTemplate.queryForObject( "SELECT int_gpu_min FROM layer WHERE pk_layer=?", Long.class, layer.getLayerId())); @@ -590,7 +590,7 @@ public void testUpdateUsage() { Integer.class, layer.getId())); /** 60 seconds of 100 core units **/ - ResourceUsage usage = new ResourceUsage(60, 33); + ResourceUsage usage = new ResourceUsage(60, 33, 0); assertTrue(usage.getClockTimeSeconds() > 0); assertTrue(usage.getCoreTimeSeconds() > 0); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/ProcDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/ProcDaoTests.java index a069234f1..78e4c2e64 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/ProcDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/ProcDaoTests.java @@ -645,11 +645,11 @@ public void testGetReservedGpu() { procDao.insertVirtualProc(proc); VirtualProc _proc = procDao.findVirtualProc(frame); - assertEquals(Long.valueOf(Dispatcher.GPU_RESERVED_DEFAULT), jdbcTemplate.queryForObject( + assertEquals(Long.valueOf(Dispatcher.MEM_GPU_RESERVED_DEFAULT), jdbcTemplate.queryForObject( "SELECT int_gpu_reserved FROM proc WHERE pk_proc=?", Long.class, _proc.id)); - assertEquals(Dispatcher.GPU_RESERVED_DEFAULT, - procDao.getReservedGpu(_proc)); + assertEquals(Dispatcher.MEM_GPU_RESERVED_DEFAULT, + procDao.getReservedGpuMemory(_proc)); } @Test diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/ServiceDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/ServiceDaoTests.java index 5a90c256a..fcedb30b6 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/ServiceDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/ServiceDaoTests.java @@ -68,7 +68,7 @@ public void testInsertService() { s.name = "dillweed"; s.minCores = 100; s.minMemory = CueUtil.GB4; - s.minGpu = CueUtil.GB; + s.minGpuMemory = CueUtil.GB; s.threadable = false; s.tags.addAll(Sets.newHashSet(new String[] { "general"})); @@ -84,7 +84,7 @@ public void testUpdateService() { s.name = "dillweed"; s.minCores = 100; s.minMemory = CueUtil.GB4; - s.minGpu = CueUtil.GB; + s.minGpuMemory = CueUtil.GB; s.threadable = false; s.tags.addAll(Sets.newHashSet(new String[] { "general"})); @@ -94,7 +94,7 @@ public void testUpdateService() { s.name = "smacktest"; s.minCores = 200; s.minMemory = CueUtil.GB8; - s.minGpu = CueUtil.GB2; + s.minGpuMemory = CueUtil.GB2; s.threadable = true; s.tags = Sets.newLinkedHashSet(); s.tags.add("linux"); @@ -117,7 +117,7 @@ public void testDeleteService() { s.name = "dillweed"; s.minCores = 100; s.minMemory = CueUtil.GB4; - s.minGpu = CueUtil.GB; + s.minGpuMemory = CueUtil.GB; s.threadable = false; s.tags.addAll(Sets.newHashSet(new String[] { "general"})); @@ -139,7 +139,7 @@ public void testInsertServiceOverride() { s.name = "dillweed"; s.minCores = 100; s.minMemory = CueUtil.GB4; - s.minGpu = CueUtil.GB; + s.minGpuMemory = CueUtil.GB; s.threadable = false; s.tags.addAll(Sets.newHashSet(new String[] { "general"})); s.showId = "00000000-0000-0000-0000-000000000000"; @@ -156,7 +156,7 @@ public void testUpdateServiceOverride() { s.name = "dillweed"; s.minCores = 100; s.minMemory = CueUtil.GB4; - s.minGpu = CueUtil.GB2; + s.minGpuMemory = CueUtil.GB2; s.threadable = false; s.tags.addAll(Sets.newHashSet(new String[] { "general"})); s.showId = "00000000-0000-0000-0000-000000000000"; @@ -168,7 +168,7 @@ public void testUpdateServiceOverride() { s.name = "smacktest"; s.minCores = 200; s.minMemory = CueUtil.GB8; - s.minGpu = CueUtil.GB4; + s.minGpuMemory = CueUtil.GB4; s.threadable = true; s.tags = Sets.newLinkedHashSet(); s.tags.add("linux"); @@ -179,7 +179,7 @@ public void testUpdateServiceOverride() { assertEquals(s.name, s1.name); assertEquals(s.minCores, s1.minCores); assertEquals(s.minMemory, s1.minMemory); - assertEquals(s.minGpu, s1.minGpu); + assertEquals(s.minGpuMemory, s1.minGpuMemory); assertEquals(s.threadable, s1.threadable); assertEquals(s.tags.toArray()[0], s1.tags.toArray()[0]); } @@ -192,7 +192,7 @@ public void testDeleteServiceOverride() { s.name = "dillweed"; s.minCores = 100; s.minMemory = CueUtil.GB4; - s.minGpu = CueUtil.GB; + s.minGpuMemory = CueUtil.GB; s.threadable = false; s.tags.addAll(Sets.newHashSet(new String[] { "general"})); s.showId = "00000000-0000-0000-0000-000000000000"; diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/WhiteboardDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/WhiteboardDaoTests.java index 294d22718..74144ad2f 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/WhiteboardDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/oracle/WhiteboardDaoTests.java @@ -1243,7 +1243,7 @@ public void getRenderPartition() { jobLauncher.launch(new File("src/test/resources/conf/jobspec/jobspec_dispatch_test.xml")); JobDetail job = jobManager.findJobDetail("pipe-dev.cue-testuser_shell_dispatch_test_v1"); - LocalHostAssignment lba = new LocalHostAssignment(800, 8, CueUtil.GB8, 1); + LocalHostAssignment lba = new LocalHostAssignment(800, 8, CueUtil.GB8, 1, 1); bookingManager.createLocalHostAssignment(hd, job, lba); whiteboardDao.getRenderPartition(lba); @@ -1260,7 +1260,7 @@ public void getRenderPartitionsByHost() { jobLauncher.launch(new File("src/test/resources/conf/jobspec/jobspec_dispatch_test.xml")); JobDetail job = jobManager.findJobDetail("pipe-dev.cue-testuser_shell_dispatch_test_v1"); - LocalHostAssignment lba = new LocalHostAssignment(800, 8, CueUtil.GB8, 1); + LocalHostAssignment lba = new LocalHostAssignment(800, 8, CueUtil.GB8, 1, 1); bookingManager.createLocalHostAssignment(hd, job, lba); assertEquals(1, whiteboardDao.getRenderPartitions(hd).getRenderPartitionsCount()); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/BookingDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/BookingDaoTests.java index 6105dbb00..c6c03d604 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/BookingDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/BookingDaoTests.java @@ -109,8 +109,8 @@ public DispatchHost createHost() { .setState(HardwareState.UP) .setFacility("spi") .addTags("general") - .putAttributes("freeGpu", String.format("%d", CueUtil.MB512)) - .putAttributes("totalGpu", String.format("%d", CueUtil.MB512)) + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); DispatchHost dh = hostManager.createHost(host); hostManager.setAllocation(dh, @@ -424,7 +424,7 @@ public void updateMaxMemory() { @Test @Transactional @Rollback(true) - public void updateMaxGpu() { + public void updateMaxGpuMemory() { DispatchHost h = createHost(); JobDetail j = launchJob(); @@ -444,7 +444,7 @@ public void updateMaxGpu() { assertEquals(CueUtil.GB2, lj2.getMaxMemory()); assertEquals(1, lj2.getMaxGpuMemory()); - bookingDao.updateMaxGpu(lja, 2); + bookingDao.updateMaxGpuMemory(lja, 2); lj2 = bookingDao.getLocalJobAssignment(lja.id); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/CommentDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/CommentDaoTests.java index 886400823..668e666e9 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/CommentDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/CommentDaoTests.java @@ -153,8 +153,8 @@ public void testInsertCommentOnHost() { .addTags("linux") .setState(HardwareState.UP) .setFacility("spi") - .putAttributes("freeGpu", String.format("%d", CueUtil.MB512)) - .putAttributes("totalGpu", String.format("%d", CueUtil.MB512)) + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); CommentDetail d = new CommentDetail(); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/DeedDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/DeedDaoTests.java index 3b06eb59c..e9c63764d 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/DeedDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/DeedDaoTests.java @@ -86,8 +86,8 @@ public DispatchHost createHost() { .addTags("general") .setState(HardwareState.UP) .setFacility("spi") - .putAttributes("freeGpu", String.format("%d", CueUtil.MB512)) - .putAttributes("totalGpu", String.format("%d", CueUtil.MB512)) + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); DispatchHost dh = hostManager.createHost(host); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/FrameDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/FrameDaoTests.java index 602b68b96..c505116e9 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/FrameDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/FrameDaoTests.java @@ -60,6 +60,7 @@ import com.imageworks.spcue.service.JobLauncher; import com.imageworks.spcue.service.JobManager; import com.imageworks.spcue.test.AssumingPostgresEngine; +import com.imageworks.spcue.util.CueUtil; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -126,8 +127,8 @@ public void create() { .addAllTags(ImmutableList.of("mcore", "4core", "8g")) .setState(HardwareState.UP) .setFacility("spi") - .putAttributes("freeGpu", "512") - .putAttributes("totalGpu", "512") + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); hostManager.createHost(host); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java index 9327fc8dd..bf7ae6ca1 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java @@ -100,8 +100,8 @@ public static RenderHost buildRenderHost(String name) { .addAllTags(ImmutableList.of("linux", "64bit")) .setState(HardwareState.UP) .setFacility("spi") - .putAttributes("freeGpu", String.format("%d", CueUtil.MB512)) - .putAttributes("totalGpu", String.format("%d", CueUtil.MB512)) + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); return host; diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/JobDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/JobDaoTests.java index 3c51f34c1..47bcff37f 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/JobDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/JobDaoTests.java @@ -639,7 +639,7 @@ public void testUpdateUsage() { JobInterface job = jobDao.findJob(spec.getJobs().get(0).detail.name); /** 60 seconds of 100 core units **/ - ResourceUsage usage = new ResourceUsage(60, 33); + ResourceUsage usage = new ResourceUsage(60, 33, 0); assertTrue(usage.getClockTimeSeconds() > 0); assertTrue(usage.getCoreTimeSeconds() > 0); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/LayerDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/LayerDaoTests.java index a09281239..06864a9bc 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/LayerDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/LayerDaoTests.java @@ -471,11 +471,11 @@ public void updateMinMemory() { @Test @Transactional @Rollback(true) - public void updateMinGpu() { - long gpu = CueUtil.GB; + public void updateMinGpuMemory() { + long mem = CueUtil.GB; LayerDetail layer = getLayer(); - layerDao.updateMinGpu(layer, gpu, LayerType.RENDER); - assertEquals(Long.valueOf(gpu),jdbcTemplate.queryForObject( + layerDao.updateMinGpuMemory(layer, mem, LayerType.RENDER); + assertEquals(Long.valueOf(mem),jdbcTemplate.queryForObject( "SELECT int_gpu_mem_min FROM layer WHERE pk_layer=?", Long.class, layer.getLayerId())); } @@ -590,7 +590,7 @@ public void testUpdateUsage() { Integer.class, layer.getId())); /** 60 seconds of 100 core units **/ - ResourceUsage usage = new ResourceUsage(60, 33); + ResourceUsage usage = new ResourceUsage(60, 33, 0); assertTrue(usage.getClockTimeSeconds() > 0); assertTrue(usage.getCoreTimeSeconds() > 0); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ProcDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ProcDaoTests.java index 60668b428..6c9efc3e5 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ProcDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ProcDaoTests.java @@ -633,7 +633,7 @@ public void testGetReservedMemory() { @Test @Transactional @Rollback(true) - public void testGetReservedGpu() { + public void testGetReservedGpuMemory() { DispatchHost host = createHost(); JobDetail job = launchJob(); @@ -645,11 +645,11 @@ public void testGetReservedGpu() { procDao.insertVirtualProc(proc); VirtualProc _proc = procDao.findVirtualProc(frame); - assertEquals(Long.valueOf(Dispatcher.GPU_RESERVED_DEFAULT), jdbcTemplate.queryForObject( + assertEquals(Long.valueOf(Dispatcher.MEM_GPU_RESERVED_DEFAULT), jdbcTemplate.queryForObject( "SELECT int_gpu_mem_reserved FROM proc WHERE pk_proc=?", Long.class, _proc.id)); - assertEquals(Dispatcher.GPU_RESERVED_DEFAULT, - procDao.getReservedGpu(_proc)); + assertEquals(Dispatcher.MEM_GPU_RESERVED_DEFAULT, + procDao.getReservedGpuMemory(_proc)); } @Test diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ServiceDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ServiceDaoTests.java index fee824fc1..16168f245 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ServiceDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ServiceDaoTests.java @@ -70,7 +70,7 @@ public void testInsertService() { s.timeout = 0; s.timeout_llu = 0; s.minMemory = CueUtil.GB4; - s.minGpu = CueUtil.GB; + s.minGpuMemory = CueUtil.GB; s.threadable = false; s.tags.addAll(Sets.newHashSet(new String[] { "general"})); @@ -88,7 +88,7 @@ public void testUpdateService() { s.timeout = 0; s.timeout_llu = 0; s.minMemory = CueUtil.GB4; - s.minGpu = CueUtil.GB; + s.minGpuMemory = CueUtil.GB; s.threadable = false; s.tags.addAll(Sets.newHashSet(new String[] { "general"})); @@ -100,7 +100,7 @@ public void testUpdateService() { s.timeout = 0; s.timeout_llu = 0; s.minMemory = CueUtil.GB8; - s.minGpu = CueUtil.GB2; + s.minGpuMemory = CueUtil.GB2; s.threadable = true; s.tags = Sets.newLinkedHashSet(); s.tags.add("linux"); @@ -125,7 +125,7 @@ public void testDeleteService() { s.timeout = 0; s.timeout_llu = 0; s.minMemory = CueUtil.GB4; - s.minGpu = CueUtil.GB; + s.minGpuMemory = CueUtil.GB; s.threadable = false; s.tags.addAll(Sets.newHashSet(new String[] { "general"})); @@ -149,7 +149,7 @@ public void testInsertServiceOverride() { s.timeout = 0; s.timeout_llu = 0; s.minMemory = CueUtil.GB4; - s.minGpu = CueUtil.GB; + s.minGpuMemory = CueUtil.GB; s.threadable = false; s.tags.addAll(Sets.newHashSet(new String[] { "general"})); s.showId = "00000000-0000-0000-0000-000000000000"; @@ -168,7 +168,7 @@ public void testUpdateServiceOverride() { s.timeout = 0; s.timeout_llu = 0; s.minMemory = CueUtil.GB4; - s.minGpu = CueUtil.GB2; + s.minGpuMemory = CueUtil.GB2; s.threadable = false; s.tags.addAll(Sets.newHashSet(new String[] { "general"})); s.showId = "00000000-0000-0000-0000-000000000000"; @@ -182,7 +182,7 @@ public void testUpdateServiceOverride() { s.timeout = 10; s.timeout_llu = 10; s.minMemory = CueUtil.GB8; - s.minGpu = CueUtil.GB4; + s.minGpuMemory = CueUtil.GB4; s.threadable = true; s.tags = Sets.newLinkedHashSet(); s.tags.add("linux"); @@ -195,7 +195,7 @@ public void testUpdateServiceOverride() { assertEquals(s.timeout, s1.timeout); assertEquals(s.timeout_llu, s1.timeout_llu); assertEquals(s.minMemory, s1.minMemory); - assertEquals(s.minGpu, s1.minGpu); + assertEquals(s.minGpuMemory, s1.minGpuMemory); assertEquals(s.threadable, s1.threadable); assertEquals(s.tags.toArray()[0], s1.tags.toArray()[0]); } @@ -210,7 +210,7 @@ public void testDeleteServiceOverride() { s.timeout = 0; s.timeout_llu = 0; s.minMemory = CueUtil.GB4; - s.minGpu = CueUtil.GB; + s.minGpuMemory = CueUtil.GB; s.threadable = false; s.tags.addAll(Sets.newHashSet(new String[] { "general"})); s.showId = "00000000-0000-0000-0000-000000000000"; diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ShowDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ShowDaoTests.java index b27114554..d430ab3b0 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ShowDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ShowDaoTests.java @@ -84,8 +84,8 @@ public DispatchHost createHost() { .addTags("general") .setState(HardwareState.UP) .setFacility("spi") - .putAttributes("freeGpu", String.format("%d", CueUtil.MB512)) - .putAttributes("totalGpu", String.format("%d", CueUtil.MB512)) + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); DispatchHost dh = hostManager.createHost(host); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/WhiteboardDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/WhiteboardDaoTests.java index 99449337b..8807514d4 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/WhiteboardDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/WhiteboardDaoTests.java @@ -278,8 +278,8 @@ public RenderHost getRenderHost() { .setCoresPerProc(400) .setState(HardwareState.DOWN) .setFacility("spi") - .putAttributes("freeGpu", String.format("%d", CueUtil.MB512)) - .putAttributes("totalGpu", String.format("%d", CueUtil.MB512)) + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); return host; } @@ -1246,7 +1246,7 @@ public void getRenderPartition() { jobLauncher.launch(new File("src/test/resources/conf/jobspec/jobspec_dispatch_test.xml")); JobDetail job = jobManager.findJobDetail("pipe-dev.cue-testuser_shell_dispatch_test_v1"); - LocalHostAssignment lba = new LocalHostAssignment(800, 8, CueUtil.GB8, 1); + LocalHostAssignment lba = new LocalHostAssignment(800, 8, CueUtil.GB8, 1, 1); bookingManager.createLocalHostAssignment(hd, job, lba); whiteboardDao.getRenderPartition(lba); @@ -1263,7 +1263,7 @@ public void getRenderPartitionsByHost() { jobLauncher.launch(new File("src/test/resources/conf/jobspec/jobspec_dispatch_test.xml")); JobDetail job = jobManager.findJobDetail("pipe-dev.cue-testuser_shell_dispatch_test_v1"); - LocalHostAssignment lba = new LocalHostAssignment(800, 8, CueUtil.GB8, 1); + LocalHostAssignment lba = new LocalHostAssignment(800, 8, CueUtil.GB8, 1, 1); bookingManager.createLocalHostAssignment(hd, job, lba); assertEquals(1, whiteboardDao.getRenderPartitions(hd).getRenderPartitionsCount()); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/CoreUnitDispatcherGpuJobTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/CoreUnitDispatcherGpuJobTests.java index d99041b65..4cc1c1f03 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/CoreUnitDispatcherGpuJobTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/CoreUnitDispatcherGpuJobTests.java @@ -114,8 +114,8 @@ public void createHost() { .setState(HardwareState.UP) .setFacility("spi") .putAttributes("SP_OS", "Linux") - .putAttributes("freeGpu", String.format("%d", CueUtil.MB512)) - .putAttributes("totalGpu", String.format("%d", CueUtil.MB512)) + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); hostManager.createHost(host, @@ -153,7 +153,7 @@ public void testDispatchGpuRemovedHostToNonGpuJob() { host.idleMemory = host.idleMemory - Math.min(CueUtil.GB4, host.idleMemory); host.idleCores = host.idleCores - Math.min(100, host.idleCores); - host.idleGpu = 0; + host.idleGpuMemory = 0; List procs = dispatcher.dispatchHost(host, job); assertEquals(0, procs.size()); } diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/CoreUnitDispatcherGpuTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/CoreUnitDispatcherGpuTests.java index 9318258ad..0a4f6b74a 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/CoreUnitDispatcherGpuTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/CoreUnitDispatcherGpuTests.java @@ -114,8 +114,8 @@ public void createHost() { .setState(HardwareState.UP) .setFacility("spi") .putAttributes("SP_OS", "Linux") - .putAttributes("freeGpu", String.format("%d", CueUtil.MB512)) - .putAttributes("totalGpu", String.format("%d", CueUtil.MB512)) + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); hostManager.createHost(host, @@ -153,7 +153,7 @@ public void testDispatchGpuRemovedHostToNonGpuJob() { host.idleMemory = host.idleMemory - Math.min(CueUtil.GB4, host.idleMemory); host.idleCores = host.idleCores - Math.min(100, host.idleCores); - host.idleGpu = 0; + host.idleGpuMemory = 0; List procs = dispatcher.dispatchHost(host, job); assertEquals(1, procs.size()); } @@ -202,17 +202,20 @@ public void testDispatchHostRemoveRestoreGpu() { long idleMemoryOrig = host.idleMemory; int idleCoresOrig = host.idleCores; - long idleGpuOrig = host.idleGpu; + long idleGpuMemoryOrig = host.idleGpuMemory; + int idleGpusOrig = host.idleGpus; host.removeGpu(); - assertEquals(0, host.idleGpu); + assertEquals(0, host.idleGpuMemory); + assertEquals(0, host.idleGpus); assertEquals(idleMemoryOrig - CueUtil.GB4, host.idleMemory); assertEquals(idleCoresOrig - 100, host.idleCores); host.restoreGpu(); assertEquals(idleMemoryOrig, host.idleMemory); assertEquals(idleCoresOrig, host.idleCores); - assertEquals(idleGpuOrig, host.idleGpu); + assertEquals(idleGpuMemoryOrig, host.idleGpuMemory); + assertEquals(idleGpusOrig, host.idleGpus); } @Test @@ -222,7 +225,7 @@ public void dispatchProcToJob() { DispatchHost host = getHost(); JobDetail job = getJob(); - host.idleGpu = 0; + host.idleGpuMemory = 0; List procs = dispatcher.dispatchHost(host, job); VirtualProc proc = procs.get(0); dispatcher.dispatchProcToJob(proc, job); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/DispatchSupportTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/DispatchSupportTests.java index baa2353fb..98c60fd9c 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/DispatchSupportTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/DispatchSupportTests.java @@ -110,8 +110,8 @@ public void createHost() { .setState(HardwareState.UP) .setFacility("spi") .putAttributes("SP_OS", "Linux") - .putAttributes("freeGpu", String.format("%d", CueUtil.MB512)) - .putAttributes("totalGpu", String.format("%d", CueUtil.MB512)) + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); hostManager.createHost(host, diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/HostReportHandlerTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/HostReportHandlerTests.java index 81fe8fd68..8e89a09ad 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/HostReportHandlerTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/HostReportHandlerTests.java @@ -101,8 +101,8 @@ private static RenderHost getRenderHost() { .setState(HardwareState.UP) .setFacility("spi") .putAttributes("SP_OS", "Linux") - .putAttributes("freeGpu", String.format("%d", CueUtil.MB512)) - .putAttributes("totalGpu", String.format("%d", CueUtil.MB512)) + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); } diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/LocalDispatcherTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/LocalDispatcherTests.java index 88f195111..97a270085 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/LocalDispatcherTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/LocalDispatcherTests.java @@ -109,8 +109,8 @@ public void createHost() { .setFacility("spi") .addTags("test") .putAttributes("SP_OS", "Linux") - .putAttributes("freeGpu", String.format("%d", CueUtil.MB512)) - .putAttributes("totalGpu", String.format("%d", CueUtil.MB512)) + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); hostManager.createHost(host, @@ -171,7 +171,7 @@ public void testDispatchHostAutoDetectLayer() { JobDetail job = getJob(); LayerInterface layer = jobManager.getLayers(job).get(0); - LocalHostAssignment lba = new LocalHostAssignment(300, 1, CueUtil.GB8, 1); + LocalHostAssignment lba = new LocalHostAssignment(300, 1, CueUtil.GB8, 0, 0); bookingManager.createLocalHostAssignment(host, layer, lba); List procs = localDispatcher.dispatchHost(host); @@ -205,7 +205,7 @@ public void testDispatchHostAutoDetectFrame() { LayerInterface layer = jobManager.getLayers(job).get(0); FrameInterface frame = jobManager.findFrame(layer, 5); - LocalHostAssignment lba = new LocalHostAssignment(200, 1, CueUtil.GB8, 1); + LocalHostAssignment lba = new LocalHostAssignment(200, 1, CueUtil.GB8, 0, 0); bookingManager.createLocalHostAssignment(host, frame, lba); List procs = localDispatcher.dispatchHost(host); @@ -228,7 +228,7 @@ public void testDispatchHostToLocalJob() { DispatchHost host = getHost(); JobDetail job = getJob(); - LocalHostAssignment lba = new LocalHostAssignment(200, 1, CueUtil.GB8, 1); + LocalHostAssignment lba = new LocalHostAssignment(200, 1, CueUtil.GB8, 0, 0); bookingManager.createLocalHostAssignment(host, job, lba); List procs = localDispatcher.dispatchHost(host, job); @@ -258,7 +258,7 @@ public void testDispatchHostToLocalLayer() { JobDetail job = getJob(); LayerInterface layer = jobManager.getLayers(job).get(0); - LocalHostAssignment lba = new LocalHostAssignment(300, 1, CueUtil.GB8, 1); + LocalHostAssignment lba = new LocalHostAssignment(300, 1, CueUtil.GB8, 0, 0); bookingManager.createLocalHostAssignment(host, layer, lba); List procs = localDispatcher.dispatchHost(host, layer); @@ -292,7 +292,7 @@ public void testDispatchHostToLocalFrame() { LayerInterface layer = jobManager.getLayers(job).get(0); FrameInterface frame = jobManager.findFrame(layer, 5); - LocalHostAssignment lba = new LocalHostAssignment(200, 1, CueUtil.GB8, 1); + LocalHostAssignment lba = new LocalHostAssignment(200, 1, CueUtil.GB8, 0, 0); bookingManager.createLocalHostAssignment(host, frame, lba); List procs = localDispatcher.dispatchHost(host, frame); @@ -317,7 +317,7 @@ public void testDispatchHostToLocalFrameTwice() { LayerInterface layer = jobManager.getLayers(job).get(0); FrameInterface frame = jobManager.findFrame(layer, 5); - LocalHostAssignment lba = new LocalHostAssignment(200, 1, CueUtil.GB8, 1); + LocalHostAssignment lba = new LocalHostAssignment(200, 1, CueUtil.GB8, 0, 0); bookingManager.createLocalHostAssignment(host, frame, lba); List procs = localDispatcher.dispatchHost(host, frame); @@ -345,7 +345,7 @@ public void testDispatchHostToLocalJobDeficit() { DispatchHost host = getHost(); JobDetail job = getJob(); - LocalHostAssignment lba = new LocalHostAssignment(800, 8, CueUtil.GB8, 1); + LocalHostAssignment lba = new LocalHostAssignment(800, 8, CueUtil.GB8, 0, 0); bookingManager.createLocalHostAssignment(host, job, lba); List procs = localDispatcher.dispatchHost(host, job); @@ -365,7 +365,7 @@ public void testDispatchHostToLocalJobDeficit() { * Now, lower our min cores to create a deficit. */ assertFalse(bookingManager.hasResourceDeficit(host)); - bookingManager.setMaxResources(lba, 700, 0, 1); + bookingManager.setMaxResources(lba, 700, 0, 0, 0); assertTrue(bookingManager.hasResourceDeficit(host)); } } diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/TestBookingQueue.java b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/TestBookingQueue.java index a34cc1d3e..7502e0687 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/TestBookingQueue.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/TestBookingQueue.java @@ -74,8 +74,8 @@ public void create() { .setState(HardwareState.UP) .setFacility("spi") .addAllTags(ImmutableList.of("mcore", "4core", "8g")) - .putAttributes("freeGpu", String.format("%d", CueUtil.MB512)) - .putAttributes("totalGpu", String.format("%d", CueUtil.MB512)) + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); hostManager.createHost(host); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/service/BookingManagerTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/service/BookingManagerTests.java index 9352b0077..9b6813c33 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/service/BookingManagerTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/service/BookingManagerTests.java @@ -125,8 +125,8 @@ public DispatchHost createHost() { .setState(HardwareState.UP) .setFacility("spi") .addTags("general") - .putAttributes("freeGpu", String.format("%d", CueUtil.MB512)) - .putAttributes("totalGpu", String.format("%d", CueUtil.MB512)) + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); DispatchHost dh = hostManager.createHost(host); @@ -319,7 +319,7 @@ public void setMaxResources() { /* * Lower the cores. */ - bookingManager.setMaxResources(lja, 100, CueUtil.GB2, CueUtil.MB256); + bookingManager.setMaxResources(lja, 100, CueUtil.GB2, 1, CueUtil.MB256); LocalHostAssignment l2 = bookingManager.getLocalHostAssignment(lja.id); @@ -330,7 +330,7 @@ public void setMaxResources() { /* * Raise the values. */ - bookingManager.setMaxResources(lja, 200, CueUtil.GB4, CueUtil.MB512); + bookingManager.setMaxResources(lja, 200, CueUtil.GB4, 1, CueUtil.MB512); l2 = bookingManager.getLocalHostAssignment(lja.id); assertEquals(200, l2.getMaxCoreUnits()); @@ -359,7 +359,7 @@ public void setIllegalMaxResources() { /* * Raise the cores too high */ - bookingManager.setMaxResources(lja, 800, CueUtil.GB2, 0); + bookingManager.setMaxResources(lja, 800, CueUtil.GB2, 0, 0); } @Test diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/service/HostManagerTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/service/HostManagerTests.java index ed89219da..cf86e5362 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/service/HostManagerTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/service/HostManagerTests.java @@ -114,8 +114,8 @@ public DispatchHost createHost() { .setState(HardwareState.UP) .setFacility("spi") .addAllTags(ImmutableList.of("linux", "64bit")) - .putAttributes("freeGpu", "512") - .putAttributes("totalGpu", "512") + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); hostDao.insertRenderHost(host, diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/service/JobManagerTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/service/JobManagerTests.java index b2446fe20..3be56bf06 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/service/JobManagerTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/service/JobManagerTests.java @@ -463,7 +463,7 @@ public void optimizeLayer() { .stream() .limit(5) .forEach(frame -> frameDao.updateFrameState(frame, FrameState.SUCCEEDED)); - layerDao.updateUsage(layer, new ResourceUsage(100, 3500 * 5), 0); + layerDao.updateUsage(layer, new ResourceUsage(100, 3500 * 5, 0), 0); // Test to make sure our optimization jobManager.optimizeLayer(layer, 100, CueUtil.MB512, 120); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/service/OwnerManagerTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/service/OwnerManagerTests.java index 38d178444..f8060be46 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/service/OwnerManagerTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/service/OwnerManagerTests.java @@ -82,8 +82,8 @@ public DispatchHost createHost() { .setState(HardwareState.UP) .setFacility("spi") .addTags("general") - .putAttributes("freeGpu", String.format("%d", CueUtil.MB512)) - .putAttributes("totalGpu", String.format("%d", CueUtil.MB512)) + .setFreeGpuMem((int) CueUtil.MB512) + .setTotalGpuMem((int) CueUtil.MB512) .build(); DispatchHost dh = hostManager.createHost(host); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/service/ServiceManagerTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/service/ServiceManagerTests.java index 3573cbe59..5354d763e 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/service/ServiceManagerTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/service/ServiceManagerTests.java @@ -84,7 +84,7 @@ public void testCreateService() { s.name = "dillweed"; s.minCores = 100; s.minMemory = CueUtil.GB4; - s.minGpu = CueUtil.GB2; + s.minGpuMemory = CueUtil.GB2; s.threadable = false; s.timeout = 0; s.timeout_llu = 0; @@ -105,7 +105,7 @@ public void testOverrideExistingService() { s.timeout = 10; s.timeout_llu = 10; s.minMemory = CueUtil.GB8; - s.minGpu = CueUtil.GB2; + s.minGpuMemory = CueUtil.GB2; s.threadable = false; s.tags.addAll(Sets.newHashSet("general")); s.showId = "00000000-0000-0000-0000-000000000000"; @@ -118,7 +118,7 @@ public void testOverrideExistingService() { assertEquals(10, newService.timeout); assertEquals(10, newService.timeout_llu); assertEquals(CueUtil.GB8, newService.minMemory); - assertEquals(CueUtil.GB2, newService.minGpu); + assertEquals(CueUtil.GB2, newService.minGpuMemory); assertFalse(newService.threadable); assertTrue(s.tags.contains("general")); @@ -127,7 +127,7 @@ public void testOverrideExistingService() { // now check the original is back. newService = serviceManager.getService("arnold", s.showId); assertEquals(100, newService.minCores); - assertEquals(0, newService.minGpu); + assertEquals(0, newService.minGpuMemory); } @Test @@ -151,7 +151,7 @@ public void testJobLaunch() { assertEquals(shell.minCores, shellLayer.minimumCores); assertEquals(shell.minMemory, shellLayer.minimumMemory); - assertEquals(shell.minGpu, shellLayer.minimumGpu); + assertEquals(shell.minGpuMemory, shellLayer.minimumGpuMemory); assertFalse(shellLayer.isThreadable); assertEquals(shell.tags, shellLayer.tags); assertThat(shellLayer.services, contains("shell", "katana", "unknown")); @@ -164,7 +164,7 @@ public void testJobLaunch() { assertEquals(cuda.minCores, cudaLayer.minimumCores); assertEquals(cuda.minMemory, cudaLayer.minimumMemory); - assertEquals(cuda.minGpu, cudaLayer.minimumGpu); + assertEquals(cuda.minGpuMemory, cudaLayer.minimumGpuMemory); assertFalse(cudaLayer.isThreadable); assertEquals(cuda.tags, cudaLayer.tags); assertThat(cudaLayer.services, contains("cuda")); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/util/CueUtilTester.java b/cuebot/src/test/java/com/imageworks/spcue/test/util/CueUtilTester.java index 9bfc19e41..d3a4abe76 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/util/CueUtilTester.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/util/CueUtilTester.java @@ -154,9 +154,9 @@ public void testCoreUnitsToCoresWithScale() { @Test public void testBuildProcName() { - assertEquals("drack100/1.00", CueUtil.buildProcName("drack100",100)); - assertEquals("drack100/1.40", CueUtil.buildProcName("drack100",140)); - assertEquals("drack100/2.01", CueUtil.buildProcName("drack100",201)); + assertEquals("drack100/1.00/1", CueUtil.buildProcName("drack100",100,1)); + assertEquals("drack100/1.40/0", CueUtil.buildProcName("drack100",140,0)); + assertEquals("drack100/2.01/2", CueUtil.buildProcName("drack100",201,2)); } @Test