From 1f1d67cea2575dc12b09ef899023d9f6fcdc27f7 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Thu, 14 Sep 2023 10:45:50 -0700 Subject: [PATCH] Change OOM protection logic The current logic relies on hardcoded values which are not suitable for large hosts. The new logic takes into account the size of hosts and also tries to be more aggressive with misbehaving frames. Prevent host from entering an OOM state where oom-killer might start killing important OS processes. The kill logic will kick in one of the following conditions is met: - Host has less than OOM_MEMORY_LEFT_THRESHOLD_PERCENT memory available - A frame is taking more than OOM_FRAME_OVERBOARD_PERCENT of what it had reserved For frames that are using more than they had reserved but not above the threshold, negotiate expanding the reservations with other frames on the same host (cherry picked from commit e88a5295f23bd927614de6d5af6a09d496d3e6ac) Signed-off-by: Diego Tavares --- .../imageworks/spcue/PrometheusMetrics.java | 57 +++ .../com/imageworks/spcue/dao/HostDao.java | 9 - .../spcue/dao/postgres/HostDaoJdbc.java | 9 - .../spcue/dao/postgres/ProcDaoJdbc.java | 2 +- .../spcue/dispatcher/Dispatcher.java | 10 +- .../spcue/dispatcher/HostReportHandler.java | 245 ++++++++---- .../imageworks/spcue/service/HostManager.java | 9 +- .../spcue/service/HostManagerService.java | 5 +- .../spcue/test/dao/postgres/HostDaoTests.java | 18 - .../dispatcher/HostReportHandlerGpuTests.java | 15 +- .../dispatcher/HostReportHandlerTests.java | 372 ++++++++++++++++-- .../conf/jobspec/jobspec_multiple_frames.xml | 48 +++ cuebot/src/test/resources/opencue.properties | 2 +- 13 files changed, 628 insertions(+), 173 deletions(-) create mode 100644 cuebot/src/main/java/com/imageworks/spcue/PrometheusMetrics.java create mode 100644 cuebot/src/test/resources/conf/jobspec/jobspec_multiple_frames.xml diff --git a/cuebot/src/main/java/com/imageworks/spcue/PrometheusMetrics.java b/cuebot/src/main/java/com/imageworks/spcue/PrometheusMetrics.java new file mode 100644 index 000000000..14f79cea5 --- /dev/null +++ b/cuebot/src/main/java/com/imageworks/spcue/PrometheusMetrics.java @@ -0,0 +1,57 @@ +package com.imageworks.spcue; + +import io.prometheus.client.Counter; +import io.prometheus.client.Gauge; +import io.prometheus.client.Histogram; + +public class PrometheusMetrics { + private static final Counter findJobsByShowQueryCountMetric = Counter.build() + .name("cue_find_jobs_by_show_count") + .help("Count the occurrences of the query FIND_JOBS_BY_SHOW.") + .labelNames("env", "cuebot_hosts") + .register(); + private static final Gauge bookingDurationMillisMetric = Gauge.build() + .name("cue_booking_durations_in_millis") + .help("Register duration of booking steps in milliseconds.") + .labelNames("env", "cuebot_host", "stage_desc") + .register(); + private static final Histogram bookingDurationMillisHistogramMetric = Histogram.build() + .name("cue_booking_durations_histogram_in_millis") + .help("Register a summary of duration of booking steps in milliseconds.") + .labelNames("env", "cuebot_host", "stage_desc") + .register(); + + private static final Counter frameOomKilledCounter = Counter.build() + .name("cue_frame_oom_killed_counter") + .help("Number of frames killed for being above memory on a host under OOM") + .labelNames("env", "cuebot_host", "render_node") + .register(); + + private String deployment_environment; + private String cuebot_host; + + public PrometheusMetrics() { + this.cuebot_host = System.getenv("NODE_HOSTNAME"); + if (this.cuebot_host == null) { + this.cuebot_host = "undefined"; + } + // Use the same environment set for SENTRY as the prometheus environment + this.deployment_environment = System.getenv("SENTRY_ENVIRONMENT"); + if (this.deployment_environment == null) { + this.deployment_environment = "undefined"; + } + } + + public void setBookingDurationMetric(String stage_desc, double value) { + bookingDurationMillisMetric.labels(this.deployment_environment, this.cuebot_host, stage_desc).set(value); + bookingDurationMillisHistogramMetric.labels(this.deployment_environment, this.cuebot_host, stage_desc).observe(value); + } + + public void incrementFindJobsByShowQueryCountMetric() { + findJobsByShowQueryCountMetric.labels(this.deployment_environment, this.cuebot_host).inc(); + } + + public void incrementFrameOomKilledCounter(String renderNode) { + frameOomKilledCounter.labels(this.deployment_environment, this.cuebot_host, renderNode).inc(); + } +} \ No newline at end of file diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java index 768bcdbd2..95036b15e 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java @@ -244,15 +244,6 @@ public interface HostDao { */ void updateThreadMode(HostInterface host, ThreadMode mode); - /** - * When a host is in kill mode that means its 256MB+ into the swap and the - * the worst memory offender is killed. - * - * @param h HostInterface - * @return boolean - */ - boolean isKillMode(HostInterface h); - /** * Update the specified host's hardware information. * diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java index 5c106335c..8e626303d 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java @@ -605,15 +605,6 @@ public void updateHostOs(HostInterface host, String os) { os, host.getHostId()); } - @Override - public boolean isKillMode(HostInterface h) { - return getJdbcTemplate().queryForObject( - "SELECT COUNT(1) FROM host_stat WHERE pk_host = ? " + - "AND int_swap_total - int_swap_free > ? AND int_mem_free < ?", - Integer.class, h.getHostId(), Dispatcher.KILL_MODE_SWAP_THRESHOLD, - Dispatcher.KILL_MODE_MEM_THRESHOLD) > 0; - } - @Override public int getStrandedCoreUnits(HostInterface h) { try { diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java index 5af292fb3..8f6322690 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java @@ -564,7 +564,7 @@ public boolean increaseReservedMemory(ProcInterface p, long value) { value, p.getProcId(), value) == 1; } catch (Exception e) { // check by trigger erify_host_resources - throw new ResourceReservationFailureException("failed to increase memory reserveration for proc " + throw new ResourceReservationFailureException("failed to increase memory reservation for proc " + p.getProcId() + " to " + value + ", proc does not have that much memory to spare."); } } diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/Dispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/Dispatcher.java index 6ac703a41..fa7679830 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/Dispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/Dispatcher.java @@ -108,13 +108,11 @@ public interface Dispatcher { // without being penalized for it. public static final long VIRTUAL_MEM_THRESHHOLD = CueUtil.GB2; - // The amount of swap that must be used before a host can go - // into kill mode. - public static final long KILL_MODE_SWAP_THRESHOLD = CueUtil.MB128; + // Percentage of used memory to consider a risk for triggering oom-killer + public static final double OOM_MAX_SAFE_USED_MEMORY_THRESHOLD = 0.95; - // When the amount of free memory drops below this point, the - // host can go into kill mode. - public static final long KILL_MODE_MEM_THRESHOLD = CueUtil.MB512; + // How much can a frame exceed its reserved memory + public static final double OOM_FRAME_OVERBOARD_ALLOWED_THRESHOLD = 0.25; // A higher number gets more deep booking but less spread on the cue. public static final int DEFAULT_MAX_FRAMES_PER_PASS = 4; diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java index d763cce53..e9b163b0c 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java @@ -21,26 +21,19 @@ import java.sql.Timestamp; import java.util.ArrayList; -import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.ThreadPoolExecutor; +import com.imageworks.spcue.*; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.task.TaskRejectedException; import org.springframework.dao.DataAccessException; import org.springframework.dao.EmptyResultDataAccessException; -import com.imageworks.spcue.DispatchHost; -import com.imageworks.spcue.FrameInterface; -import com.imageworks.spcue.JobEntity; -import com.imageworks.spcue.LayerEntity; -import com.imageworks.spcue.LayerDetail; -import com.imageworks.spcue.LocalHostAssignment; -import com.imageworks.spcue.Source; -import com.imageworks.spcue.VirtualProc; import com.imageworks.spcue.dao.JobDao; import com.imageworks.spcue.dao.LayerDao; import com.imageworks.spcue.dispatcher.commands.DispatchBookHost; @@ -63,6 +56,8 @@ import com.imageworks.spcue.util.CueExceptionUtil; import com.imageworks.spcue.util.CueUtil; +import static com.imageworks.spcue.dispatcher.Dispatcher.*; + public class HostReportHandler { private static final Logger logger = LogManager.getLogger(HostReportHandler.class); @@ -199,9 +194,9 @@ public void handleHostReport(HostReport report, boolean isBoot) { killTimedOutFrames(report); /* - * Increase/decreased reserved memory. + * Prevent OOM (Out-Of-Memory) issues on the host and manage frame reserved memory */ - handleMemoryReservations(host, report); + handleMemoryUsage(host, report); /* * The checks are done in order of least CPU intensive to @@ -389,103 +384,187 @@ private void changeLockState(DispatchHost host, CoreDetail coreInfo) { } /** - * Handle memory reservations for the given host. This will re-balance memory - * reservations on the machine and kill and frames that are out of control. - * + * Prevent host from entering an OOM state where oom-killer might start killing important OS processes. + * The kill logic will kick in one of the following conditions is met: + * - Host has less than OOM_MEMORY_LEFT_THRESHOLD_PERCENT memory available + * - A frame is taking more than OOM_FRAME_OVERBOARD_PERCENT of what it had reserved + * For frames that are using more than they had reserved but not above the threshold, negotiate expanding + * the reservations with other frames on the same host * @param host * @param report */ - private void handleMemoryReservations(final DispatchHost host, final HostReport report) { + private void handleMemoryUsage(final DispatchHost host, final HostReport report) { + RenderHost renderHost = report.getHost(); + List runningFrames = report.getFramesList(); + + boolean memoryWarning = renderHost.getTotalMem() > 0 && + ((double)renderHost.getFreeMem()/renderHost.getTotalMem() < + (1.0 - OOM_MAX_SAFE_USED_MEMORY_THRESHOLD)); + + if (memoryWarning) { + VirtualProc killedProc = killWorstMemoryOffender(host); + long memoryAvailable = renderHost.getFreeMem(); + long minSafeMemoryAvailable = (long)(renderHost.getTotalMem() * (1.0 - OOM_MAX_SAFE_USED_MEMORY_THRESHOLD)); + // Some extra protection for this possibly unbound loop + int unboundProtectionLimit = 10; + while (killedProc != null && unboundProtectionLimit > 0) { + memoryAvailable = memoryAvailable + killedProc.memoryUsed; + + // If killing this proc solved the memory issue, stop the attack + if (memoryAvailable >= minSafeMemoryAvailable) { + break; + } + killedProc = killWorstMemoryOffender(host); + unboundProtectionLimit -= 1; + } + } else { + // When no mass cleaning was required, check for frames going overboard + // if frames didn't go overboard, manage its reservations trying to increase + // them accordingly + for (final RunningFrameInfo frame: runningFrames) { + if (isFrameOverboard(frame)) { + if (!killFrame(frame, host.getName())) { + logger.warn("Frame " + frame.getJobName() + "." + frame.getFrameName() + + " is overboard but could not be killed"); + } + } else { + handleMemoryReservations(frame); + } + } + } + } - // TODO: GPU: Need to keep frames from growing into space reserved for GPU frames - // However all this is done in the database without a chance to edit the values here + private boolean killFrame(RunningFrameInfo frame, String hostName) { + try { + VirtualProc proc = hostManager.getVirtualProc(frame.getResourceId()); - /* - * Check to see if we enable kill mode to free up memory. - */ - boolean killMode = hostManager.isSwapping(host); + // Don't mess with localDispatch procs + if (proc.isLocalDispatch) { + return false; + } - for (final RunningFrameInfo f: report.getFramesList()) { + logger.info("Killing frame on " + frame.getJobName() + "." + frame.getFrameName() + + ", using too much memory."); + DispatchSupport.killedOffenderProcs.incrementAndGet(); - VirtualProc proc = null; - try { - proc = hostManager.getVirtualProc(f.getResourceId()); + if (!dispatcher.isTestMode()) { + jobManagerSupport.kill(proc, new Source("This frame is using way more than it had reserved.")); + } + return true; + } catch (EmptyResultDataAccessException e) { + return false; + } + } - // TODO: handle memory management for local dispatches - // Skip local dispatches for now. - if (proc.isLocalDispatch) { - continue; - } + /** + * Kill proc with the worst user/reserved memory ratio. + * + * @param host + * @return killed proc, or null if none could be found + */ + private VirtualProc killWorstMemoryOffender(final DispatchHost host) { + VirtualProc proc; + try { + proc = hostManager.getWorstMemoryOffender(host); + } + catch (EmptyResultDataAccessException e) { + logger.error(host.name + " is under OOM and no proc is running on it."); + return null; + } + logger.info("Killing frame on " + proc.getName() + ", host is under stress."); + DispatchSupport.killedOffenderProcs.incrementAndGet(); - if (f.getRss() > host.memory) { - try{ - logger.info("Killing frame " + f.getJobName() + "/" + f.getFrameName() + ", " - + proc.getName() + " was OOM"); - try { - killQueue.execute(new DispatchRqdKillFrame(proc, "The frame required " + - CueUtil.KbToMb(f.getRss()) + " but the machine only has " + - CueUtil.KbToMb(host.memory), rqdClient)); - } catch (TaskRejectedException e) { - logger.warn("Unable to queue RQD kill, task rejected, " + e); - } - DispatchSupport.killedOomProcs.incrementAndGet(); - } catch (Exception e) { - logger.info("failed to kill frame on " + proc.getName() + - "," + e); - } - } + if (!dispatcher.isTestMode()) { + jobManagerSupport.kill(proc, new Source("The host was dangerously low on memory and swapping.")); + prometheusMetrics.incrementFrameOomKilledCounter(host.getName()); + } - if (dispatchSupport.increaseReservedMemory(proc, f.getRss())) { - proc.memoryReserved = f.getRss(); - logger.info("frame " + f.getFrameName() + " on job " + f.getJobName() - + " increased its reserved memory to " + - CueUtil.KbToMb(f.getRss())); - } + return proc; + } - } catch (ResourceReservationFailureException e) { + /** + * Check frame memory usage comparing the amount used with the amount it had reserved + * @param frame + * @return + */ + private boolean isFrameOverboard(final RunningFrameInfo frame) { + double rss = (double)frame.getRss(); + double maxRss = (double)frame.getMaxRss(); + final double MAX_RSS_OVERBOARD_THRESHOLD = OOM_FRAME_OVERBOARD_ALLOWED_THRESHOLD * 2; + final double RSS_AVAILABLE_FOR_MAX_RSS_TRIGGER = 0.1; + + try { + VirtualProc proc = hostManager.getVirtualProc(frame.getResourceId()); + double reserved = (double)proc.memoryReserved; + + // Last memory report is higher than the threshold + if (isOverboard(rss, reserved, OOM_FRAME_OVERBOARD_ALLOWED_THRESHOLD)) { + return true; + } + // If rss is not overboard, handle the situation where the frame might be going overboard from + // time to time but the last report wasn't during a spike. For this case, consider a combination + // of rss and maxRss. maxRss > 2 * threshold and rss > 0.9 + else { + return isOverboard(maxRss, reserved, MAX_RSS_OVERBOARD_THRESHOLD) && + isOverboard(rss, reserved, -RSS_AVAILABLE_FOR_MAX_RSS_TRIGGER); + } + } catch (EmptyResultDataAccessException e) { + logger.info("HostReportHandler(isFrameOverboard): Virtual proc for frame " + + frame.getFrameName() + " on job " + frame.getJobName() + " doesn't exist on the database"); + // Not able to mark the frame overboard is it couldn't be found on the db. + // Proc accounting (verifyRunningProc) should take care of it + return false; + } + } + + private boolean isOverboard(double value, double total, double threshold) { + return value/total >= (1 + threshold); + } + + /** + * Handle memory reservations for the given frame + * + * @param frame + */ + private void handleMemoryReservations(final RunningFrameInfo frame) { + VirtualProc proc = null; + try { + proc = hostManager.getVirtualProc(frame.getResourceId()); - long memNeeded = f.getRss() - proc.memoryReserved; + if (proc.isLocalDispatch) { + return; + } - logger.info("frame " + f.getFrameName() + " on job " + f.getJobName() + if (dispatchSupport.increaseReservedMemory(proc, frame.getRss())) { + proc.memoryReserved = frame.getRss(); + logger.info("frame " + frame.getFrameName() + " on job " + frame.getJobName() + + " increased its reserved memory to " + + CueUtil.KbToMb(frame.getRss())); + } + } catch (ResourceReservationFailureException e) { + if (proc != null) { + long memNeeded = frame.getRss() - proc.memoryReserved; + logger.info("frame " + frame.getFrameName() + " on job " + frame.getJobName() + "was unable to reserve an additional " + CueUtil.KbToMb(memNeeded) + "on proc " + proc.getName() + ", " + e); - try { if (dispatchSupport.balanceReservedMemory(proc, memNeeded)) { - proc.memoryReserved = f.getRss(); + proc.memoryReserved = frame.getRss(); logger.info("was able to balance host: " + proc.getName()); - } - else { + } else { logger.info("failed to balance host: " + proc.getName()); } } catch (Exception ex) { logger.warn("failed to balance host: " + proc.getName() + ", " + e); } - } catch (EmptyResultDataAccessException e) { - logger.info("HostReportHandler: frame " + f.getFrameName() + - " on job " + f.getJobName() + - " was unable be processed" + - " because the proc could not be found"); - } - } - - if (killMode) { - VirtualProc proc; - try { - proc = hostManager.getWorstMemoryOffender(host); - } - catch (EmptyResultDataAccessException e) { - logger.info(host.name + " is swapping and no proc is running on it."); - return; + } else { + logger.info("frame " + frame.getFrameName() + " on job " + frame.getJobName() + + "was unable to reserve an additional memory. Proc could not be found"); } - - logger.info("Killing frame on " + - proc.getName() + ", host is distressed."); - - DispatchSupport.killedOffenderProcs.incrementAndGet(); - jobManagerSupport.kill(proc, new Source( - "The host was dangerously low on memory and swapping.")); + } catch (EmptyResultDataAccessException e) { + logger.info("HostReportHandler: Memory reservations for frame " + frame.getFrameName() + + " on job " + frame.getJobName() + " proc could not be found"); } } diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java b/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java index 8b176c77e..00b90654c 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java @@ -63,13 +63,12 @@ public interface HostManager { void setHostState(HostInterface host, HardwareState state); /** - * Return true if the host is swapping hard enough - * that killing frames will save the entire machine. + * Updates the freeMcp of a host. * - * @param host - * @return + * @param host HostInterface + * @param freeMcp Long */ - boolean isSwapping(HostInterface host); + void setHostFreeMcp(HostInterface host, Long freeMcp); DispatchHost createHost(HostReport report); DispatchHost createHost(RenderHost host); diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java b/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java index a7c5b0729..da17458e2 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java @@ -94,9 +94,8 @@ public void setHostState(HostInterface host, HardwareState state) { } @Override - @Transactional(propagation = Propagation.REQUIRED, readOnly=true) - public boolean isSwapping(HostInterface host) { - return hostDao.isKillMode(host); + public void setHostFreeMcp(HostInterface host, Long freeMcp) { + hostDao.updateHostFreeMcp(host, freeMcp); } public void rebootWhenIdle(HostInterface host) { diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java index df965893b..ea9fbe0c2 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java @@ -330,24 +330,6 @@ public void testIsHostLocked() { assertEquals(hostDao.isHostLocked(host),true); } - @Test - @Transactional - @Rollback(true) - public void testIsKillMode() { - hostDao.insertRenderHost(buildRenderHost(TEST_HOST), - hostManager.getDefaultAllocationDetail(), - false); - - HostEntity host = hostDao.findHostDetail(TEST_HOST); - assertFalse(hostDao.isKillMode(host)); - - jdbcTemplate.update( - "UPDATE host_stat SET int_swap_free = ?, int_mem_free = ? WHERE pk_host = ?", - CueUtil.MB256, CueUtil.MB256, host.getHostId()); - - assertTrue(hostDao.isKillMode(host)); - } - @Test @Transactional @Rollback(true) diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/HostReportHandlerGpuTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/HostReportHandlerGpuTests.java index dee9d0792..5daf14fe8 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/HostReportHandlerGpuTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/HostReportHandlerGpuTests.java @@ -81,13 +81,14 @@ private static RenderHost getRenderHost() { return RenderHost.newBuilder() .setName(HOSTNAME) .setBootTime(1192369572) - .setFreeMcp(76020) - .setFreeMem(53500) - .setFreeSwap(20760) + // The minimum amount of free space in the /mcp directory to book a host. + .setFreeMcp(CueUtil.GB) + .setFreeMem(CueUtil.GB8) + .setFreeSwap(CueUtil.GB2) .setLoad(0) - .setTotalMcp(195430) - .setTotalMem(1048576L * 4096) - .setTotalSwap(20960) + .setTotalMcp(CueUtil.GB4) + .setTotalMem(CueUtil.GB8) + .setTotalSwap(CueUtil.GB2) .setNimbyEnabled(false) .setNumProcs(2) .setCoresPerProc(100) @@ -114,7 +115,7 @@ public void testHandleHostReport() { hostReportHandler.handleHostReport(report, true); DispatchHost host = getHost(); assertEquals(host.lockState, LockState.OPEN); - assertEquals(host.memory, 4294443008L); + assertEquals(host.memory, CueUtil.GB8 - 524288); assertEquals(host.gpus, 64); assertEquals(host.idleGpus, 64); assertEquals(host.gpuMemory, 1048576L * 2048); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/HostReportHandlerTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/HostReportHandlerTests.java index d27f76c32..93b943a3d 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/HostReportHandlerTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/HostReportHandlerTests.java @@ -21,9 +21,12 @@ import java.io.File; import java.sql.Timestamp; +import java.util.Arrays; import java.util.List; import javax.annotation.Resource; +import com.imageworks.spcue.dispatcher.DispatchSupport; +import com.imageworks.spcue.dispatcher.HostReportQueue; import org.junit.Before; import org.junit.Test; import org.springframework.test.annotation.Rollback; @@ -31,6 +34,7 @@ import org.springframework.transaction.annotation.Transactional; import com.imageworks.spcue.AllocationEntity; +import com.imageworks.spcue.CommentDetail; import com.imageworks.spcue.DispatchHost; import com.imageworks.spcue.dispatcher.Dispatcher; import com.imageworks.spcue.dispatcher.HostReportHandler; @@ -43,6 +47,7 @@ import com.imageworks.spcue.grpc.report.RenderHost; import com.imageworks.spcue.grpc.report.RunningFrameInfo; import com.imageworks.spcue.service.AdminManager; +import com.imageworks.spcue.service.CommentManager; import com.imageworks.spcue.service.HostManager; import com.imageworks.spcue.service.JobLauncher; import com.imageworks.spcue.service.JobManager; @@ -50,7 +55,10 @@ import com.imageworks.spcue.util.CueUtil; import com.imageworks.spcue.VirtualProc; +import java.util.UUID; + import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; @ContextConfiguration public class HostReportHandlerTests extends TransactionalTest { @@ -73,8 +81,16 @@ public class HostReportHandlerTests extends TransactionalTest { @Resource JobManager jobManager; + @Resource + CommentManager commentManager; + private static final String HOSTNAME = "beta"; private static final String NEW_HOSTNAME = "gamma"; + private String hostname; + private String hostname2; + private static final String SUBJECT_COMMENT_FULL_MCP_DIR = "Host set to REPAIR for not having enough storage " + + "space on /mcp"; + private static final String CUEBOT_COMMENT_USER = "cuebot"; @Before public void setTestMode() { @@ -83,7 +99,11 @@ public void setTestMode() { @Before public void createHost() { - hostManager.createHost(getRenderHost(), + hostname = UUID.randomUUID().toString().substring(0, 8); + hostname2 = UUID.randomUUID().toString().substring(0, 8); + hostManager.createHost(getRenderHost(hostname), + adminManager.findAllocationDetail("spi","general")); + hostManager.createHost(getRenderHost(hostname2), adminManager.findAllocationDetail("spi","general")); } @@ -96,44 +116,50 @@ private static CoreDetail getCoreDetail(int total, int idle, int booked, int loc .build(); } - private DispatchHost getHost() { - return hostManager.findDispatchHost(HOSTNAME); + private DispatchHost getHost(String hostname) { + return hostManager.findDispatchHost(hostname); } - private static RenderHost getRenderHost() { + private static RenderHost.Builder getRenderHostBuilder(String hostname) { return RenderHost.newBuilder() - .setName(HOSTNAME) + .setName(hostname) .setBootTime(1192369572) - .setFreeMcp(76020) - .setFreeMem((int) CueUtil.GB8) - .setFreeSwap(20760) + // The minimum amount of free space in the /mcp directory to book a host. + .setFreeMcp(CueUtil.GB) + .setFreeMem(CueUtil.GB8) + .setFreeSwap(CueUtil.GB2) .setLoad(0) - .setTotalMcp(195430) + .setTotalMcp(CueUtil.GB4) .setTotalMem(CueUtil.GB8) .setTotalSwap(CueUtil.GB2) .setNimbyEnabled(false) - .setNumProcs(2) + .setNumProcs(16) .setCoresPerProc(100) .addTags("test") .setState(HardwareState.UP) .setFacility("spi") .putAttributes("SP_OS", "Linux") - .setFreeGpuMem((int) CueUtil.MB512) - .setTotalGpuMem((int) CueUtil.MB512) - .build(); + .setNumGpus(0) + .setFreeGpuMem(0) + .setTotalGpuMem(0); + } + + private static RenderHost getRenderHost(String hostname) { + return getRenderHostBuilder(hostname).build(); } private static RenderHost getNewRenderHost(String tags) { return RenderHost.newBuilder() .setName(NEW_HOSTNAME) .setBootTime(1192369572) - .setFreeMcp(76020) - .setFreeMem(53500) - .setFreeSwap(20760) + // The minimum amount of free space in the /mcp directory to book a host. + .setFreeMcp(CueUtil.GB) + .setFreeMem(CueUtil.GB8) + .setFreeSwap(CueUtil.GB2) .setLoad(0) .setTotalMcp(195430) - .setTotalMem(8173264) - .setTotalSwap(20960) + .setTotalMem(CueUtil.GB8) + .setTotalSwap(CueUtil.GB2) .setNimbyEnabled(false) .setNumProcs(2) .setCoresPerProc(100) @@ -149,17 +175,40 @@ private static RenderHost getNewRenderHost(String tags) { @Test @Transactional @Rollback(true) - public void testHandleHostReport() { - boolean isBoot = false; + public void testHandleHostReport() throws InterruptedException { CoreDetail cores = getCoreDetail(200, 200, 0, 0); - HostReport report = HostReport.newBuilder() - .setHost(getRenderHost()) + HostReport report1 = HostReport.newBuilder() + .setHost(getRenderHost(hostname)) .setCoreInfo(cores) .build(); + HostReport report2 = HostReport.newBuilder() + .setHost(getRenderHost(hostname2)) + .setCoreInfo(cores) + .build(); + HostReport report1_2 = HostReport.newBuilder() + .setHost(getRenderHost(hostname)) + .setCoreInfo(getCoreDetail(200, 200, 100, 0)) + .build(); - hostReportHandler.handleHostReport(report, isBoot); - DispatchHost host = getHost(); - assertEquals(host.lockState, LockState.OPEN); + hostReportHandler.handleHostReport(report1, false, System.currentTimeMillis()); + DispatchHost host = getHost(hostname); + assertEquals(LockState.OPEN, host.lockState); + assertEquals(HardwareState.UP, host.hardwareState); + hostReportHandler.handleHostReport(report1_2, false, System.currentTimeMillis()); + host = getHost(hostname); + assertEquals(HardwareState.UP, host.hardwareState); + + // Test Queue thread handling + HostReportQueue queue = hostReportHandler.getReportQueue(); + // Make sure jobs flow normally without any nullpointer exception + // Expecting results from a ThreadPool based class on JUnit is tricky + // A future test will be developed in the future to better address the behavior of + // this feature + hostReportHandler.queueHostReport(report1); // HOSTNAME + hostReportHandler.queueHostReport(report2); // HOSTNAME2 + hostReportHandler.queueHostReport(report1); // HOSTNAME + hostReportHandler.queueHostReport(report1); // HOSTNAME + hostReportHandler.queueHostReport(report1_2); // HOSTNAME } @Test @@ -183,7 +232,7 @@ public void testHandleHostReportWithNewAllocation() { .setCoreInfo(cores) .build(); - hostReportHandler.handleHostReport(report, isBoot); + hostReportHandler.handleHostReport(report, isBoot, System.currentTimeMillis()); DispatchHost host = hostManager.findDispatchHost(NEW_HOSTNAME); assertEquals(host.getAllocationId(), detail.id); } @@ -203,7 +252,7 @@ public void testHandleHostReportWithExistentAllocation() { .setCoreInfo(cores) .build(); - hostReportHandler.handleHostReport(report, isBoot); + hostReportHandler.handleHostReport(report, isBoot, System.currentTimeMillis()); DispatchHost host = hostManager.findDispatchHost(NEW_HOSTNAME); assertEquals(host.getAllocationId(), alloc.id); } @@ -223,11 +272,143 @@ public void testHandleHostReportWithNonExistentTags() { .setCoreInfo(cores) .build(); - hostReportHandler.handleHostReport(report, isBoot); + hostReportHandler.handleHostReport(report, isBoot, System.currentTimeMillis()); DispatchHost host = hostManager.findDispatchHost(NEW_HOSTNAME); assertEquals(host.getAllocationId(), alloc.id); } + @Test + @Transactional + @Rollback(true) + public void testHandleHostReportWithFullMCPDirectories() { + // Create CoreDetail + CoreDetail cores = getCoreDetail(200, 200, 0, 0); + + /* + * Test 1: + * Precondition: + * - HardwareState=UP + * Action: + * - Receives a HostReport with freeMCP < dispatcher.min_bookable_free_mcp_kb (opencue.properties) + * Postcondition: + * - Host hardwareState changes to REPAIR + * - A comment is created with subject=SUBJECT_COMMENT_FULL_MCP_DIR and user=CUEBOT_COMMENT_USER + * */ + // Create HostReport + HostReport report1 = HostReport.newBuilder() + .setHost(getRenderHostBuilder(hostname).setFreeMcp(1024L).build()) + .setCoreInfo(cores) + .build(); + // Call handleHostReport() => Create the comment with subject=SUBJECT_COMMENT_FULL_MCP_DIR and change the host's + // hardwareState to REPAIR + hostReportHandler.handleHostReport(report1, false, System.currentTimeMillis()); + // Get host + DispatchHost host = getHost(hostname); + // Get list of comments by host, user, and subject + List comments = commentManager.getCommentsByHostUserAndSubject(host, CUEBOT_COMMENT_USER, + SUBJECT_COMMENT_FULL_MCP_DIR); + // Check if there is 1 comment + assertEquals(comments.size(), 1); + // Get host comment + CommentDetail comment = comments.get(0); + // Check if the comment has the user = CUEBOT_COMMENT_USER + assertEquals(comment.user, CUEBOT_COMMENT_USER); + // Check if the comment has the subject = SUBJECT_COMMENT_FULL_MCP_DIR + assertEquals(comment.subject, SUBJECT_COMMENT_FULL_MCP_DIR); + // Check host lock state + assertEquals(LockState.OPEN, host.lockState); + // Check if host hardware state is REPAIR + assertEquals(HardwareState.REPAIR, host.hardwareState); + // Test Queue thread handling + HostReportQueue queue = hostReportHandler.getReportQueue(); + // Make sure jobs flow normally without any nullpointer exception + hostReportHandler.queueHostReport(report1); // HOSTNAME + hostReportHandler.queueHostReport(report1); // HOSTNAME + + /* + * Test 2: + * Precondition: + * - HardwareState=REPAIR + * - There is a comment for the host with subject=SUBJECT_COMMENT_FULL_MCP_DIR and user=CUEBOT_COMMENT_USER + * Action: + * - Receives a HostReport with freeMCP >= dispatcher.min_bookable_free_mcp_kb (opencue.properties) + * Postcondition: + * - Host hardwareState changes to UP + * - Comment with subject=SUBJECT_COMMENT_FULL_MCP_DIR and user=CUEBOT_COMMENT_USER gets deleted + * */ + // Set the host freeMcp to the minimum size required = 1GB (1048576 KB) + HostReport report2 = HostReport.newBuilder() + .setHost(getRenderHostBuilder(hostname).setFreeMcp(CueUtil.GB).build()) + .setCoreInfo(cores) + .build(); + // Call handleHostReport() => Delete the comment with subject=SUBJECT_COMMENT_FULL_MCP_DIR and change the host's + // hardwareState to UP + hostReportHandler.handleHostReport(report2, false, System.currentTimeMillis()); + // Get host + host = getHost(hostname); + // Get list of comments by host, user, and subject + comments = commentManager.getCommentsByHostUserAndSubject(host, CUEBOT_COMMENT_USER, + SUBJECT_COMMENT_FULL_MCP_DIR); + // Check if there is no comment associated with the host + assertEquals(comments.size(), 0); + // Check host lock state + assertEquals(LockState.OPEN, host.lockState); + // Check if host hardware state is UP + assertEquals(HardwareState.UP, host.hardwareState); + // Test Queue thread handling + queue = hostReportHandler.getReportQueue(); + // Make sure jobs flow normally without any nullpointer exception + hostReportHandler.queueHostReport(report1); // HOSTNAME + hostReportHandler.queueHostReport(report1); // HOSTNAME + } + + @Test + @Transactional + @Rollback(true) + public void testHandleHostReportWithHardwareStateRepairNotRelatedToFullMCPdirectories() { + // Create CoreDetail + CoreDetail cores = getCoreDetail(200, 200, 0, 0); + + /* + * Test if host.hardwareState == HardwareState.REPAIR + * (Not related to freeMcp < dispatcher.min_bookable_free_mcp_kb (opencue.properties)) + * + * - There is no comment with subject=SUBJECT_COMMENT_FULL_MCP_DIR and user=CUEBOT_COMMENT_USER associated with + * the host + * The host.hardwareState continue as HardwareState.REPAIR + * */ + // Create HostReport + HostReport report = HostReport.newBuilder() + .setHost(getRenderHostBuilder(hostname).setFreeMcp(CueUtil.GB).build()) + .setCoreInfo(cores) + .build(); + // Get host + DispatchHost host = getHost(hostname); + // Host's HardwareState set to REPAIR + hostManager.setHostState(host, HardwareState.REPAIR); + host.hardwareState = HardwareState.REPAIR; + // Get list of comments by host, user, and subject + List hostComments = commentManager.getCommentsByHostUserAndSubject(host, CUEBOT_COMMENT_USER, + SUBJECT_COMMENT_FULL_MCP_DIR); + // Check if there is no comment + assertEquals(hostComments.size(), 0); + // There is no comment to delete + boolean commentsDeleted = commentManager.deleteCommentByHostUserAndSubject(host, + CUEBOT_COMMENT_USER, SUBJECT_COMMENT_FULL_MCP_DIR); + assertFalse(commentsDeleted); + // Call handleHostReport() + hostReportHandler.handleHostReport(report, false, System.currentTimeMillis()); + // Check host lock state + assertEquals(LockState.OPEN, host.lockState); + // Check if host hardware state is REPAIR + assertEquals(HardwareState.REPAIR, host.hardwareState); + // Test Queue thread handling + HostReportQueue queueThread = hostReportHandler.getReportQueue(); + // Make sure jobs flow normally without any nullpointer exception + hostReportHandler.queueHostReport(report); // HOSTNAME + hostReportHandler.queueHostReport(report); // HOSTNAME + } + @Test @Transactional @Rollback(true) @@ -235,7 +416,7 @@ public void testMemoryAndLlu() { jobLauncher.testMode = true; jobLauncher.launch(new File("src/test/resources/conf/jobspec/jobspec_simple.xml")); - DispatchHost host = getHost(); + DispatchHost host = getHost(hostname); List procs = dispatcher.dispatchHost(host); assertEquals(1, procs.size()); VirtualProc proc = procs.get(0); @@ -252,16 +433,145 @@ public void testMemoryAndLlu() { .setMaxRss(420000) .build(); HostReport report = HostReport.newBuilder() - .setHost(getRenderHost()) + .setHost(getRenderHost(hostname)) .setCoreInfo(cores) .addFrames(info) .build(); - hostReportHandler.handleHostReport(report, false); + hostReportHandler.handleHostReport(report, false, System.currentTimeMillis()); FrameDetail frame = jobManager.getFrameDetail(proc.getFrameId()); assertEquals(frame.dateLLU, new Timestamp(now / 1000 * 1000)); assertEquals(420000, frame.maxRss); } + + @Test + @Transactional + @Rollback(true) + public void testMemoryAggressionRss() { + jobLauncher.testMode = true; + jobLauncher.launch(new File("src/test/resources/conf/jobspec/jobspec_simple.xml")); + + DispatchHost host = getHost(hostname); + List procs = dispatcher.dispatchHost(host); + assertEquals(1, procs.size()); + VirtualProc proc = procs.get(0); + + long memoryOverboard = (long) Math.ceil((double) proc.memoryReserved * + (1.0 + Dispatcher.OOM_FRAME_OVERBOARD_ALLOWED_THRESHOLD)); + + // Test rss overboard + RunningFrameInfo info = RunningFrameInfo.newBuilder() + .setJobId(proc.getJobId()) + .setLayerId(proc.getLayerId()) + .setFrameId(proc.getFrameId()) + .setResourceId(proc.getProcId()) + .setRss(memoryOverboard) + .setMaxRss(memoryOverboard) + .build(); + HostReport report = HostReport.newBuilder() + .setHost(getRenderHost(hostname)) + .setCoreInfo(getCoreDetail(200, 200, 0, 0)) + .addFrames(info) + .build(); + + long killCount = DispatchSupport.killedOffenderProcs.get(); + hostReportHandler.handleHostReport(report, false, System.currentTimeMillis()); + assertEquals(killCount + 1, DispatchSupport.killedOffenderProcs.get()); + } + + @Test + @Transactional + @Rollback(true) + public void testMemoryAggressionMaxRss() { + jobLauncher.testMode = true; + jobLauncher.launch(new File("src/test/resources/conf/jobspec/jobspec_simple.xml")); + + DispatchHost host = getHost(hostname); + List procs = dispatcher.dispatchHost(host); + assertEquals(1, procs.size()); + VirtualProc proc = procs.get(0); + + long memoryOverboard = (long) Math.ceil((double) proc.memoryReserved * + (1.0 + (2 * Dispatcher.OOM_FRAME_OVERBOARD_ALLOWED_THRESHOLD))); + + // Test rss>90% and maxRss overboard + RunningFrameInfo info = RunningFrameInfo.newBuilder() + .setJobId(proc.getJobId()) + .setLayerId(proc.getLayerId()) + .setFrameId(proc.getFrameId()) + .setResourceId(proc.getProcId()) + .setRss((long)Math.ceil(0.95 * proc.memoryReserved)) + .setMaxRss(memoryOverboard) + .build(); + HostReport report = HostReport.newBuilder() + .setHost(getRenderHost(hostname)) + .setCoreInfo(getCoreDetail(200, 200, 0, 0)) + .addFrames(info) + .build(); + + long killCount = DispatchSupport.killedOffenderProcs.get(); + hostReportHandler.handleHostReport(report, false, System.currentTimeMillis()); + assertEquals(killCount + 1, DispatchSupport.killedOffenderProcs.get()); + } + + @Test + @Transactional + @Rollback(true) + public void testMemoryAggressionMemoryWarning() { + jobLauncher.testMode = true; + dispatcher.setTestMode(true); + jobLauncher.launch(new File("src/test/resources/conf/jobspec/jobspec_multiple_frames.xml")); + + DispatchHost host = getHost(hostname); + List procs = dispatcher.dispatchHost(host); + assertEquals(3, procs.size()); + VirtualProc proc1 = procs.get(0); + VirtualProc proc2 = procs.get(1); + VirtualProc proc3 = procs.get(2); + + // Ok + RunningFrameInfo info1 = RunningFrameInfo.newBuilder() + .setJobId(proc1.getJobId()) + .setLayerId(proc1.getLayerId()) + .setFrameId(proc1.getFrameId()) + .setResourceId(proc1.getProcId()) + .setRss(CueUtil.GB2) + .setMaxRss(CueUtil.GB2) + .build(); + + // Overboard Rss + RunningFrameInfo info2 = RunningFrameInfo.newBuilder() + .setJobId(proc2.getJobId()) + .setLayerId(proc2.getLayerId()) + .setFrameId(proc2.getFrameId()) + .setResourceId(proc2.getProcId()) + .setRss(CueUtil.GB4) + .setMaxRss(CueUtil.GB4) + .build(); + + // Overboard Rss + RunningFrameInfo info3 = RunningFrameInfo.newBuilder() + .setJobId(proc3.getJobId()) + .setLayerId(proc3.getLayerId()) + .setFrameId(proc3.getFrameId()) + .setResourceId(proc3.getProcId()) + .setRss(CueUtil.GB4) + .setMaxRss(CueUtil.GB4) + .build(); + + RenderHost hostAfterUpdate = getRenderHostBuilder(hostname).setFreeMem(0).build(); + + HostReport report = HostReport.newBuilder() + .setHost(hostAfterUpdate) + .setCoreInfo(getCoreDetail(200, 200, 0, 0)) + .addAllFrames(Arrays.asList(info1, info2, info3)) + .build(); + + // In this case, killing one job should be enough to ge the machine to a safe state + long killCount = DispatchSupport.killedOffenderProcs.get(); + hostReportHandler.handleHostReport(report, false, System.currentTimeMillis()); + assertEquals(killCount + 1, DispatchSupport.killedOffenderProcs.get()); + } } diff --git a/cuebot/src/test/resources/conf/jobspec/jobspec_multiple_frames.xml b/cuebot/src/test/resources/conf/jobspec/jobspec_multiple_frames.xml new file mode 100644 index 000000000..3baa0b22b --- /dev/null +++ b/cuebot/src/test/resources/conf/jobspec/jobspec_multiple_frames.xml @@ -0,0 +1,48 @@ + + + + + + + + + spi + pipe + default + testuser + 9860 + + + False + 2 + False + + + + echo hello + 1-3 + 1 + 2gb + + + shell + + + + + + diff --git a/cuebot/src/test/resources/opencue.properties b/cuebot/src/test/resources/opencue.properties index 334408470..0203ee6f7 100644 --- a/cuebot/src/test/resources/opencue.properties +++ b/cuebot/src/test/resources/opencue.properties @@ -38,7 +38,7 @@ dispatcher.job_query_max=20 dispatcher.job_lock_expire_seconds=2 dispatcher.job_lock_concurrency_level=3 dispatcher.frame_query_max=10 -dispatcher.job_frame_dispatch_max=2 +dispatcher.job_frame_dispatch_max=3 dispatcher.host_frame_dispatch_max=12 dispatcher.launch_queue.core_pool_size=1