Skip to content

Commit

Permalink
Replace gpu with gpus and gpu_memory
Browse files Browse the repository at this point in the history
Co-authored-by: Lars van der Bijl <[email protected]>
  • Loading branch information
splhack and larsbijl committed Feb 22, 2021
1 parent b4dbd14 commit 45cdc0f
Show file tree
Hide file tree
Showing 104 changed files with 1,796 additions and 394 deletions.
4 changes: 3 additions & 1 deletion cuebot/src/main/java/com/imageworks/spcue/DispatchFrame.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ public class DispatchFrame extends FrameEntity implements FrameInterface {
public int maxCores;
public boolean threadable;
public long minMemory;
public long minGpu;
public int minGpus;
public int maxGpus;
public long minGpuMemory;

public String services;
}
Expand Down
39 changes: 26 additions & 13 deletions cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,16 @@ public class DispatchHost extends Entity
public int cores;
public int idleCores;

public int gpus;
public int idleGpus;

// Basically an 0 = auto, 1 = all.
public int threadMode;

public long memory;
public long idleMemory;
public long gpu;
public long idleGpu;
public long gpuMemory;
public long idleGpuMemory;
public String tags;
public String os;

Expand All @@ -53,11 +56,13 @@ public class DispatchHost extends Entity
* booked to this host.
*/
public int strandedCores = 0;
public int strandedGpus = 0;

// To reserve resources for future gpu job
long idleMemoryOrig = 0;
int idleCoresOrig = 0;
long idleGpuOrig = 0;
long idleGpuMemoryOrig = 0;
int idleGpusOrig = 0;

public String getHostId() {
return id;
Expand All @@ -72,41 +77,47 @@ public String getFacilityId() {
}

@Override
public boolean hasAdditionalResources(int minCores, long minMemory, long minGpu) {
public boolean hasAdditionalResources(int minCores, long minMemory, int minGpus, long minGpuMemory) {

if (idleCores < minCores) {
return false;
}
else if (idleMemory < minMemory) {
return false;
}
else if (idleGpu < minGpu) {
else if (idleGpus < minGpus) {
return false;
}
else if (idleGpuMemory < minGpuMemory) {
return false;
}

return true;
}

@Override
public void useResources(int coreUnits, long memory, long gpu) {
public void useResources(int coreUnits, long memory, int gpuUnits, long gpuMemory) {
idleCores = idleCores - coreUnits;
idleMemory = idleMemory - memory;
idleGpu = idleGpu - gpu;
idleGpus = idleGpus - gpuUnits;
idleGpuMemory = idleGpuMemory - gpuMemory;
}

/**
* If host has idle gpu, remove enough resources to book a gpu frame later.
*
*/
public void removeGpu() {
if (idleGpu > 0 && idleGpuOrig == 0) {
if (idleGpuMemory > 0 && idleGpuMemoryOrig == 0) {
idleMemoryOrig = idleMemory;
idleCoresOrig = idleCores;
idleGpuOrig = idleGpu;
idleGpuMemoryOrig = idleGpuMemory;
idleGpusOrig = idleGpus;

idleMemory = idleMemory - Math.min(CueUtil.GB4, idleMemory);
idleCores = idleCores - Math.min(100, idleCores);
idleGpu = 0;
idleGpuMemory = 0;
idleGpus = 0;
}
}

Expand All @@ -115,14 +126,16 @@ public void removeGpu() {
*
*/
public void restoreGpu() {
if (idleGpuOrig > 0) {
if (idleGpuMemoryOrig > 0) {
idleMemory = idleMemoryOrig;
idleCores = idleCoresOrig;
idleGpu = idleGpuOrig;
idleGpuMemory = idleGpuMemoryOrig;
idleGpus = idleGpusOrig;

idleMemoryOrig = 0;
idleCoresOrig = 0;
idleGpuOrig = 0;
idleGpuMemoryOrig = 0;
idleGpusOrig = 0;
}
}
}
Expand Down
27 changes: 27 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/ExecutionSummary.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ public class ExecutionSummary {
public long coreTime;
public long coreTimeSuccess;
public long coreTimeFail;
public long gpuTime;
public long gpuTimeSuccess;
public long gpuTimeFail;
public long highMemoryKb;

public long getHighMemoryKb() {
Expand Down Expand Up @@ -69,5 +72,29 @@ public long getCoreTimeFail() {
public void setCoreTimeFail(long coreTimeFail) {
this.coreTimeFail = coreTimeFail;
}

public long getGpuTime() {
return gpuTime;
}

public void setGpuTime(long gpuTime) {
this.gpuTime = gpuTime;
}

public long getGpuTimeSuccess() {
return gpuTimeSuccess;
}

public void setGpuTimeSuccess(long gpuTimeSuccess) {
this.gpuTimeSuccess = gpuTimeSuccess;
}

public long getGpuTimeFail() {
return gpuTimeFail;
}

public void setGpuTimeFail(long gpuTimeFail) {
this.gpuTimeFail = gpuTimeFail;
}
}

5 changes: 5 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/GroupDetail.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,16 @@ public class GroupDetail extends Entity implements GroupInterface, DepartmentInt

public int jobMinCores = -1;
public int jobMaxCores = -1;
public int jobMinGpus = -1;
public int jobMaxGpus = -1;
public int jobPriority = -1;

public int minCores = -1;
public int maxCores = -1;

public int minGpus = -1;
public int maxGpus = -1;

public String parentId = null;
public String showId;
public String deptId;
Expand Down
4 changes: 4 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/HostEntity.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ public class HostEntity extends Entity implements HostInterface {
public int idleCores;
public int memory;
public int idleMemory;
public int gpus;
public int idleGpus;
public int gpuMemory;
public int idleGpuMemory;

Expand All @@ -59,6 +61,8 @@ public HostEntity(Host grpcHost) {
this.idleCores = (int) grpcHost.getIdleCores();
this.memory = (int) grpcHost.getMemory();
this.idleMemory = (int) grpcHost.getIdleMemory();
this.gpus = (int) grpcHost.getGpus();
this.idleGpus = (int) grpcHost.getIdleGpus();
this.gpuMemory = (int) grpcHost.getGpuMemory();
this.idleGpuMemory = (int) grpcHost.getIdleGpuMemory();
}
Expand Down
2 changes: 2 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/Inherit.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ public enum Inherit {
Priority,
MinCores,
MaxCores,
MinGpus,
MaxGpus,
All
}

7 changes: 5 additions & 2 deletions cuebot/src/main/java/com/imageworks/spcue/JobDetail.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,15 @@ public class JobDetail extends JobEntity implements JobInterface, DepartmentInte
public int priority = 1;
public int minCoreUnits = 100;
public int maxCoreUnits = 200000;
public int minGpuUnits = 0;
public int maxGpuUnits = 1000;
public boolean isLocal = false;
public String localHostName;
public int localMaxCores;
public int localMaxMemory;
public long localMaxMemory;
public int localThreadNumber;
public int localMaxGpuMemory;
public int localMaxGpus;
public long localMaxGpuMemory;

public String getDepartmentId() {
return deptId;
Expand Down
20 changes: 15 additions & 5 deletions cuebot/src/main/java/com/imageworks/spcue/LayerDetail.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ public class LayerDetail extends LayerEntity implements LayerInterface {
public LayerType type;
public int minimumCores;
public int maximumCores;
public int minimumGpus;
public int maximumGpus;
public boolean isThreadable;
public long minimumMemory;
public long minimumGpu;
public long minimumGpuMemory;
public int chunkSize;
public int timeout;
public int timeout_llu;
Expand Down Expand Up @@ -116,12 +118,20 @@ public void setMinimumMemory(long minimumMemory) {
this.minimumMemory = minimumMemory;
}

public long getMinimumGpu() {
return minimumGpu;
public int getMinimumGpus() {
return minimumGpus;
}

public void setMinimumGpu(long minimumGpu) {
this.minimumGpu = minimumGpu;
public void setMinimumGpus(int minimumGpus) {
this.minimumGpus = minimumGpus;
}

public long getMinimumGpuMemory() {
return minimumGpuMemory;
}

public void setMinimumGpuMemory(long minimumGpuMemory) {
this.minimumGpuMemory = minimumGpuMemory;
}

public int getChunkSize() {
Expand Down
43 changes: 33 additions & 10 deletions cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@ public class LocalHostAssignment extends Entity

private int idleCoreUnits;
private long idleMemory;
private long idleGpu;
private int idleGpuUnits;
private long idleGpuMemory;

private long maxMemory;
private long maxGpuMemory;
private int maxCoreUnits;
private int maxGpuUnits;

private int threads;

Expand All @@ -52,34 +54,39 @@ public class LocalHostAssignment extends Entity

public LocalHostAssignment() { }

public LocalHostAssignment(int maxCores, int threads, long maxMemory, long maxGpuMemory) {
public LocalHostAssignment(int maxCores, int threads, long maxMemory, int maxGpus, long maxGpuMemory) {
this.maxCoreUnits = maxCores;
this.threads = threads;
this.maxMemory = maxMemory;
this.maxGpuUnits = maxGpus;
this.maxGpuMemory = maxGpuMemory;
}

@Override
public boolean hasAdditionalResources(int minCores, long minMemory, long minGpu) {
public boolean hasAdditionalResources(int minCores, long minMemory, int minGpus, long minGpuMemory) {

if (idleCoreUnits < minCores) {
return false;
}
else if (idleMemory < minMemory) {
return false;
}
else if (idleGpu < minGpu) {
else if (idleGpuUnits < minGpus) {
return false;
}
else if (idleGpuMemory < minGpuMemory) {
return false;
}

return true;
}

@Override
public void useResources(int coreUnits, long memory, long gpu) {
public void useResources(int coreUnits, long memory, int gpuUnits, long gpuMemory) {
idleCoreUnits = idleCoreUnits - coreUnits;
idleMemory = idleMemory - memory;
idleGpu = idleGpu - gpu;
idleGpuUnits = idleGpuUnits - gpuUnits;
idleGpuMemory = idleGpuMemory - gpuMemory;
}

public int getThreads() {
Expand Down Expand Up @@ -110,6 +117,14 @@ public long getIdleMemory() {
return this.idleMemory;
}

public int getMaxGpuUnits() {
return maxGpuUnits;
}

public void setMaxGpuUnits(int maxGpuUnits) {
this.maxGpuUnits = maxGpuUnits;
}

public long getMaxGpuMemory() {
return maxGpuMemory;
}
Expand All @@ -118,8 +133,8 @@ public void setMaxGpuMemory(long maxGpuMemory) {
this.maxGpuMemory = maxGpuMemory;
}

public long getIdleGpu() {
return this.idleGpu;
public long getIdleGpuMemory() {
return this.idleGpuMemory;
}

public int getIdleCoreUnits() {
Expand All @@ -134,8 +149,16 @@ public void setIdleMemory(long idleMemory) {
this.idleMemory = idleMemory;
}

public void setIdleGpu(long idleGpu) {
this.idleGpu = idleGpu;
public int getIdleGpuUnits() {
return this.idleGpuUnits;
}

public void setIdleGpuUnits(int idleGpuUnits) {
this.idleGpuUnits = idleGpuUnits;
}

public void setIdleGpuMemory(long idleGpuMemory) {
this.idleGpuMemory = idleGpuMemory;
}

public String getHostId() {
Expand Down
10 changes: 9 additions & 1 deletion cuebot/src/main/java/com/imageworks/spcue/ResourceUsage.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@
public class ResourceUsage {

private final long coreTimeSeconds;
private final long gpuTimeSeconds;
private final long clockTimeSeconds;

public ResourceUsage(long clockTime, int corePoints) {
public ResourceUsage(long clockTime, int corePoints, int gpuPoints) {

if (clockTime < 1) {
clockTime = 1;
Expand All @@ -38,14 +39,21 @@ public ResourceUsage(long clockTime, int corePoints) {
coreTime = 1;
}

long gpuTime = clockTime * gpuPoints;

clockTimeSeconds = clockTime;
coreTimeSeconds = coreTime;
gpuTimeSeconds = gpuTime;
}

public long getCoreTimeSeconds() {
return coreTimeSeconds;
}

public long getGpuTimeSeconds() {
return gpuTimeSeconds;
}

public long getClockTimeSeconds() {
return clockTimeSeconds;
}
Expand Down
Loading

0 comments on commit 45cdc0f

Please sign in to comment.