Skip to content

Commit

Permalink
pass the correct shingleSize to ThresholdedRandomCutForest
Browse files Browse the repository at this point in the history
Previously, I used shingleSize 1 for externally shingled ThresholdedRandomCutForest because of the double multiplication with shingle size in RCF.  Now RCF has fixed the issue. This commits adds new RCF libraries from aws/random-cut-forest-by-aws#278 and passes the correct shingleSize to ThresholdedRandomCutForest.

This commits adds new RCF libraries from aws/random-cut-forest-by-aws#278 and passes the correct shingleSize to ThresholdedRandomCutForest.
  • Loading branch information
kaituo committed Sep 27, 2021
1 parent 0184e88 commit 2b93e24
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 10 deletions.
Binary file modified lib/randomcutforest-core-2.0.1.jar
Binary file not shown.
Binary file modified lib/randomcutforest-parkservices-2.0.1.jar
Binary file not shown.
6 changes: 2 additions & 4 deletions src/main/java/org/opensearch/ad/ml/ModelManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -469,8 +469,7 @@ private void trainModelForStep(
.compact(true)
.precision(Precision.FLOAT_32)
.boundingBoxCacheFraction(AnomalyDetectorSettings.REAL_TIME_BOUNDING_BOX_CACHE_RATIO)
// for external shingling, rcf does not recognize shingle
.shingleSize(1)
.shingleSize(detector.getShingleSize())
.anomalyRate(1 - thresholdMinPvalue)
.build();
Arrays.stream(dataPoints).forEach(s -> trcf.process(s, 0));
Expand Down Expand Up @@ -559,8 +558,7 @@ public List<ThresholdingResult> getPreviewResults(double[][] dataPoints, int shi
.compact(true)
.precision(Precision.FLOAT_32)
.boundingBoxCacheFraction(AnomalyDetectorSettings.BATCH_BOUNDING_BOX_CACHE_RATIO)
// for external shingling, rcf does not recognize shingle
.shingleSize(1)
.shingleSize(shingleSize)
.anomalyRate(1 - this.thresholdMinPvalue)
.build();
return Arrays.stream(dataPoints).map(point -> {
Expand Down
6 changes: 1 addition & 5 deletions src/main/java/org/opensearch/ad/task/ADBatchTaskCache.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,7 @@ protected ADBatchTaskCache(ADTask adTask) {
.compact(true)
.precision(Precision.FLOAT_32)
.boundingBoxCacheFraction(AnomalyDetectorSettings.BATCH_BOUNDING_BOX_CACHE_RATIO)
// for external shingling, rcf does not recognize shingle. Thus, shingle size
// is 1 here.
// shingle in detector config and shingle size here are different things.
// shingle size in detector config impacts dimensions.
.shingleSize(1)
.shingleSize(shingleSize)
.anomalyRate(1 - AnomalyDetectorSettings.THRESHOLD_MIN_PVALUE)
.build();

Expand Down
3 changes: 2 additions & 1 deletion src/test/java/org/opensearch/ad/ml/ModelManagerTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,8 @@ public void setup() {
when(this.modelState.getModel()).thenReturn(this.entityModel);
when(this.entityModel.getTrcf()).thenReturn(Optional.of(this.trcf));
settings = Settings.builder().put("plugins.anomaly_detection.model_max_size_percent", modelMaxSizePercentage).build();

when(anomalyDetector.getShingleSize()).thenReturn(shingleSize);
}

private Object[] getDetectorIdForModelIdData() {
Expand Down Expand Up @@ -392,7 +394,6 @@ public void getRcfResult_throwToListener_whenHeapLimitExceed() {
}).when(checkpointDao).getTRCFModel(eq(rcfModelId), any(ActionListener.class));

when(jvmService.info().getMem().getHeapMax().getBytes()).thenReturn(1_000L);
when(anomalyDetector.getShingleSize()).thenReturn(shingleSize);
final Set<Setting<?>> settingsSet = Stream
.concat(
ClusterSettings.BUILT_IN_CLUSTER_SETTINGS.stream(),
Expand Down

0 comments on commit 2b93e24

Please sign in to comment.