From 4deedac76f0c8c0a93e7b1b7fa05b2dc20f4ad24 Mon Sep 17 00:00:00 2001 From: Kazuki Sakamoto Date: Sun, 12 Sep 2021 13:41:40 -0700 Subject: [PATCH] [cuebot] Update GPU memory usage in the database from host report. (#1032) --- .../main/java/com/imageworks/spcue/dao/ProcDao.java | 3 ++- .../com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java | 7 +++++-- .../imageworks/spcue/dispatcher/DispatchSupport.java | 4 +++- .../spcue/dispatcher/DispatchSupportService.java | 6 ++++-- .../imageworks/spcue/dispatcher/HostReportHandler.java | 3 ++- .../spcue/test/dao/postgres/ProcDaoTests.java | 10 +++++----- 6 files changed, 21 insertions(+), 12 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/ProcDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/ProcDao.java index 31e49a208..c96e8e28e 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/ProcDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/ProcDao.java @@ -150,7 +150,8 @@ public interface ProcDao { * @param maxKb */ void updateProcMemoryUsage(FrameInterface f, long rss, long maxRss, - long vsize, long maxVsize); + long vsize, long maxVsize, + long usedGpuMemory, long maxUsedGpuMemory); /** * get aq virual proc from its unique id diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java index ba9f33c1f..43c7b081b 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java @@ -234,13 +234,15 @@ public boolean clearVirtualProcAssignment(FrameInterface frame) { "int_mem_max_used = ?," + "int_virt_used = ?, " + "int_virt_max_used = ?, " + + "int_gpu_mem_used = ?, " + + "int_gpu_mem_max_used = ?, " + "ts_ping = current_timestamp " + "WHERE " + "pk_frame = ?"; @Override public void updateProcMemoryUsage(FrameInterface f, long rss, long maxRss, - long vss, long maxVss) { + long vss, long maxVss, long usedGpuMemory, long maxUsedGpuMemory) { /* * This method is going to repeat for a proc every 1 minute, so * if the proc is being touched by another thread, then return @@ -256,7 +258,8 @@ public void updateProcMemoryUsage(FrameInterface f, long rss, long maxRss, String.class, f.getFrameId()).equals(f.getFrameId())) { getJdbcTemplate().update(UPDATE_PROC_MEMORY_USAGE, - rss, maxRss, vss, maxVss, f.getFrameId()); + rss, maxRss, vss, maxVss, + usedGpuMemory, maxUsedGpuMemory, f.getFrameId()); } } catch (DataAccessException dae) { logger.info("The proc for frame " + f + diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupport.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupport.java index 3aa9f3ab9..0fcfbef26 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupport.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupport.java @@ -442,9 +442,11 @@ List findNextDispatchFrames(LayerInterface layer, VirtualProc pro * @param maxRss * @param vsize * @param maxVsize + * @param usedGpuMemory + * @param maxUsedGpuMemory */ void updateProcMemoryUsage(FrameInterface frame, long rss, long maxRss, long vsize, - long maxVsize); + long maxVsize, long usedGpuMemory, long maxUsedGpuMemory); /** * Return true if adding the given core units would put the show diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupportService.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupportService.java index 35fe506c4..53d882035 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupportService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupportService.java @@ -535,8 +535,10 @@ public void lostProc(VirtualProc proc, String reason, int exitStatus) { @Override @Transactional(propagation = Propagation.REQUIRED) public void updateProcMemoryUsage(FrameInterface frame, long rss, long maxRss, - long vsize, long maxVsize) { - procDao.updateProcMemoryUsage(frame, rss, maxRss, vsize, maxVsize); + long vsize, long maxVsize, + long usedGpuMemory, long maxUsedGpuMemory) { + procDao.updateProcMemoryUsage(frame, rss, maxRss, vsize, maxVsize, + usedGpuMemory, maxUsedGpuMemory); } @Override diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java index d9f670838..92f7335c6 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java @@ -572,7 +572,8 @@ private void updateMemoryUsage(List rFrames) { rf.getRss(), rf.getMaxRss()); dispatchSupport.updateProcMemoryUsage(frame, - rf.getRss(), rf.getMaxRss(), rf.getVsize(), rf.getMaxVsize()); + rf.getRss(), rf.getMaxRss(), rf.getVsize(), rf.getMaxVsize(), + rf.getUsedGpuMemory(), rf.getMaxUsedGpuMemory()); } updateJobMemoryUsage(rFrames); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ProcDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ProcDaoTests.java index 6c9efc3e5..dbecca54d 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ProcDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ProcDaoTests.java @@ -326,7 +326,7 @@ public void testUpdateProcMemoryUsage() { procDao.insertVirtualProc(proc); procDao.verifyRunningProc(proc.getId(), frame.getId()); - procDao.updateProcMemoryUsage(frame, 100, 100, 1000, 1000); + procDao.updateProcMemoryUsage(frame, 100, 100, 1000, 1000, 0, 0); } @@ -593,7 +593,7 @@ public void testFindReservedMemoryOffender() { // Increase the memory usage as frames are added procDao.updateProcMemoryUsage(frame, - 1000*i, 1000*i, 1000*i, 1000*i); + 1000*i, 1000*i, 1000*i, 1000*i, 0, 0); i++; } @@ -666,7 +666,7 @@ public void testBalanceUnderUtilizedProcs() { proc1.frameId = frame1.id; procDao.insertVirtualProc(proc1); - procDao.updateProcMemoryUsage(frame1, 250000, 250000, 250000, 250000); + procDao.updateProcMemoryUsage(frame1, 250000, 250000, 250000, 250000, 0, 0); layerDao.updateLayerMaxRSS(frame1, 250000, true); FrameDetail frameDetail2 = frameDao.findFrameDetail(job, "0002-pass_1"); @@ -676,7 +676,7 @@ public void testBalanceUnderUtilizedProcs() { proc2.frameId = frame2.id; procDao.insertVirtualProc(proc2); - procDao.updateProcMemoryUsage(frame2, 255000, 255000,255000, 255000); + procDao.updateProcMemoryUsage(frame2, 255000, 255000,255000, 255000, 0, 0); layerDao.updateLayerMaxRSS(frame2, 255000, true); FrameDetail frameDetail3 = frameDao.findFrameDetail(job, "0003-pass_1"); @@ -686,7 +686,7 @@ public void testBalanceUnderUtilizedProcs() { proc3.frameId = frame3.id; procDao.insertVirtualProc(proc3); - procDao.updateProcMemoryUsage(frame3, 3145728, 3145728,3145728, 3145728); + procDao.updateProcMemoryUsage(frame3, 3145728, 3145728,3145728, 3145728, 0, 0); layerDao.updateLayerMaxRSS(frame3,300000, true); procDao.balanceUnderUtilizedProcs(proc3, 100000);