Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add multiple GPU support #760

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cueadmin/cueadmin/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def promptYesNo(prompt, force=False):
@param prompt: The question that the user can see
@type force: boolean
@param force: (Optional) If true, skips the prompt and returns true
@rtype: bool
:rtype: bool
@return: The users response"""
try:
result = force or input("%s [y/n] " % prompt) in ("y", "Y")
Expand All @@ -68,7 +68,7 @@ def waitOnJobName(jobName, maxWaitForLaunch=None):
@type maxWaitForLaunch: int
@param maxWaitForLaunch: (Optional) The maximum number of seconds to wait
for the job to launch.
@rtype: bool
:rtype: bool
@return: Returns True if the job was found and is now Finished, False if
the job was not found before maxWaitForLaunch was reached"""
isLocated = False
Expand Down
17 changes: 17 additions & 0 deletions cuebot/.project
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>cuebot</name>
<comment>Project cuebot created by Buildship.</comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.buildship.core.gradleprojectbuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.buildship.core.gradleprojectnature</nature>
</natures>
</projectDescription>
2 changes: 1 addition & 1 deletion cuebot/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ COPY --chown=gradle:gradle ./proto /home/gradle/proto/

WORKDIR /home/gradle/cuebot

RUN gradle build --stacktrace
RUN gradle build --stacktrace --info

COPY --chown=gradle:gradle VERSION.in VERSIO[N] ./
RUN test -e VERSION || echo "$(cat VERSION.in)-custom" | tee VERSION
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ public class DispatchFrame extends FrameEntity implements FrameInterface {

public int minCores;
public int maxCores;
public int minGpu;
public int maxGpu;
public boolean threadable;
public long minMemory;
public long minGpu;
public long minGpuMemory;

public String services;
}
Expand Down
36 changes: 23 additions & 13 deletions cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,16 @@ public class DispatchHost extends Entity
public int cores;
public int idleCores;

public int gpu;
public int idleGpu;

// Basically an 0 = auto, 1 = all.
public int threadMode;

public long memory;
public long idleMemory;
public long gpu;
public long idleGpu;
public long gpuMemory;
public long idleGpuMemory;
public String tags;
public String os;

Expand All @@ -53,11 +56,13 @@ public class DispatchHost extends Entity
* booked to this host.
*/
public int strandedCores = 0;
public int strandedGpu = 0;

// To reserve resources for future gpu job
long idleMemoryOrig = 0;
int idleCoresOrig = 0;
long idleGpuOrig = 0;
long idleMemoryOrig = 0;
int idleGpuOrig = 0;
long idleGpuMemoryOrig = 0;

public String getHostId() {
return id;
Expand All @@ -72,41 +77,44 @@ public String getFacilityId() {
}

@Override
public boolean hasAdditionalResources(int minCores, long minMemory, long minGpu) {
public boolean hasAdditionalResources(int minCores, int minGpu, long minMemory, long minGpuMemory) {

if (idleCores < minCores) {
return false;
}
else if (idleMemory < minMemory) {
} else if (idleGpu < minGpu) {
return false;
}
else if (idleGpu < minGpu) {
} else if (idleMemory < minMemory) {
return false;
} else if (idleGpuMemory < minGpuMemory) {
return false;
}

return true;
}

@Override
public void useResources(int coreUnits, long memory, long gpu) {
public void useResources(int coreUnits, int gpu, long memory, long gpuMemory) {
idleCores = idleCores - coreUnits;
idleMemory = idleMemory - memory;
idleGpu = idleGpu - gpu;
idleGpuMemory = idleGpuMemory - gpuMemory;
}

/**
* If host has idle gpu, remove enough resources to book a gpu frame later.
*
*/
public void removeGpu() {
if (idleGpu > 0 && idleGpuOrig == 0) {
if (idleGpuMemory > 0 && idleGpuMemoryOrig == 0) {
idleMemoryOrig = idleMemory;
idleCoresOrig = idleCores;
idleGpuOrig = idleGpu;
idleGpuMemoryOrig = idleGpuMemory;

idleMemory = (long) idleMemory - Math.min(CueUtil.GB4, idleMemory);
idleCores = (int) idleCores - Math.min(100, idleCores);
idleGpu = 0;
idleGpu = (int) idleGpu - idleGpu;
idleGpuMemory = 0;
}
}

Expand All @@ -115,14 +123,16 @@ public void removeGpu() {
*
*/
public void restoreGpu() {
if (idleGpuOrig > 0) {
if (idleGpuMemoryOrig > 0) {
idleMemory = idleMemoryOrig;
idleCores = idleCoresOrig;
idleGpu = idleGpuOrig;
idleGpuMemory = idleGpuMemoryOrig;

idleMemoryOrig = 0;
idleCoresOrig = 0;
idleGpuOrig = 0;
idleGpuMemoryOrig = 0;
}
}
}
Expand Down
27 changes: 27 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/ExecutionSummary.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ public class ExecutionSummary {
public long coreTime;
public long coreTimeSuccess;
public long coreTimeFail;
public long gpuTime;
public long gpuTimeSuccess;
public long gpuTimeFail;
public long highMemoryKb;

public long getHighMemoryKb() {
Expand Down Expand Up @@ -69,5 +72,29 @@ public long getCoreTimeFail() {
public void setCoreTimeFail(long coreTimeFail) {
this.coreTimeFail = coreTimeFail;
}

public long getGpuTime() {
return gpuTime;
}

public void setGpuTime(long gpuTime) {
this.gpuTime = gpuTime;
}

public long getGpuTimeSuccess() {
return gpuTimeSuccess;
}

public void setGpuTimeSuccess(long gpuTimeSuccess) {
this.gpuTimeSuccess = gpuTimeSuccess;
}

public long getGpuTimeFail() {
return gpuTimeFail;
}

public void setGpuTimeFail(long gpuTimeFail) {
this.gpuTimeFail = gpuTimeFail;
}
}

5 changes: 5 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/GroupDetail.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,16 @@ public class GroupDetail extends Entity implements GroupInterface, DepartmentInt

public int jobMinCores = -1;
public int jobMaxCores = -1;
public int jobMinGpu = -1;
public int jobMaxGpu = -1;
public int jobPriority = -1;

public int minCores = -1;
public int maxCores = -1;

public int minGpu = -1;
public int maxGpu = -1;

public String parentId = null;
public String showId;
public String deptId;
Expand Down
12 changes: 8 additions & 4 deletions cuebot/src/main/java/com/imageworks/spcue/HostEntity.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@ public class HostEntity extends Entity implements HostInterface {

public int procs;
public int cores;
public int gpu;
public int idleCores;
public int idleGpu;
public int memory;
public int idleMemory;
public int gpu;
public int idleGpu;
public int gpuMemory;
public int idleGpuMemory;

public boolean unlockAtBoot;

Expand All @@ -57,10 +59,12 @@ public HostEntity(Host grpcHost) {
this.nimbyEnabled = grpcHost.getNimbyEnabled();
this.cores = (int) grpcHost.getCores();
this.idleCores = (int) grpcHost.getIdleCores();
this.memory = (int) grpcHost.getMemory();
this.idleMemory = (int) grpcHost.getIdleMemory();
this.gpu = (int) grpcHost.getGpu();
this.idleGpu = (int) grpcHost.getIdleGpu();
this.memory = (int) grpcHost.getMemory();
this.idleMemory = (int) grpcHost.getIdleMemory();
this.gpuMemory = (int) grpcHost.getGpuMemory();
this.idleGpuMemory = (int) grpcHost.getIdleGpuMemory();
}

public String getHostId() {
Expand Down
2 changes: 2 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/Inherit.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ public enum Inherit {
Priority,
MinCores,
MaxCores,
MinGpu,
MaxGpu,
All
}

5 changes: 4 additions & 1 deletion cuebot/src/main/java/com/imageworks/spcue/JobDetail.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,15 @@ public class JobDetail extends JobEntity implements JobInterface, DepartmentInte
public int priority = 1;
public int minCoreUnits = 100;
public int maxCoreUnits = 200000;
public int minGpuUnits = 0;
public int maxGpuUnits = 100;
public boolean isLocal = false;
public String localHostName;
public int localMaxCores;
public int localMaxGpu;
public int localMaxMemory;
public int localThreadNumber;
public int localMaxGpu;
public int localMaxGpuMemory;

public String getDepartmentId() {
return deptId;
Expand Down
20 changes: 15 additions & 5 deletions cuebot/src/main/java/com/imageworks/spcue/LayerDetail.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ public class LayerDetail extends LayerEntity implements LayerInterface {
public LayerType type;
public int minimumCores;
public int maximumCores;
public int minimumGpu;
public int maximumGpu;
public boolean isThreadable;
public long minimumMemory;
public long minimumGpu;
public long minimumGpuMemory;
public int chunkSize;
public int dispatchOrder;
public int totalFrameCount;
Expand Down Expand Up @@ -82,6 +84,14 @@ public void setMinimumCores(int minimumCores) {
this.minimumCores = minimumCores;
}

public int getMinimumGpu() {
return minimumGpu;
}

public void setMinimumGpu(int minimumGpu) {
this.minimumGpu = minimumGpu;
}

public boolean isThreadable() {
return isThreadable;
}
Expand All @@ -98,12 +108,12 @@ public void setMinimumMemory(long minimumMemory) {
this.minimumMemory = minimumMemory;
}

public long getMinimumGpu() {
return minimumGpu;
public long getMinimumGpuMemory() {
return minimumGpuMemory;
}

public void setMinimumGpu(long minimumGpu) {
this.minimumGpu = minimumGpu;
public void setMinimumGpuMemory(long minimumGpuMemory) {
this.minimumGpuMemory = minimumGpuMemory;
}

public int getChunkSize() {
Expand Down
Loading