-
Notifications
You must be signed in to change notification settings - Fork 36
not return estimated minutes remaining until cold start is finished #210
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,6 +76,9 @@ public AnomalyDetectorProfileRunner( | |
this.client = client; | ||
this.xContentRegistry = xContentRegistry; | ||
this.nodeFilter = nodeFilter; | ||
if (requiredSamples <= 0) { | ||
throw new IllegalArgumentException("required samples should be a positive number, but was " + requiredSamples); | ||
} | ||
this.requiredSamples = requiredSamples; | ||
} | ||
|
||
|
@@ -287,14 +290,7 @@ private ActionListener<GetResponse> onGetDetectorForInitProgress( | |
ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser::getTokenLocation); | ||
AnomalyDetector detector = AnomalyDetector.parse(parser, detectorId); | ||
long intervalMins = ((IntervalTimeConfiguration) detector.getDetectionInterval()).toDuration().toMinutes(); | ||
float percent = (100.0f * totalUpdates) / requiredSamples; | ||
int neededPoints = (int) (requiredSamples - totalUpdates); | ||
InitProgressProfile initProgress = new InitProgressProfile( | ||
// rounding: 93.456 => 93%, 93.556 => 94% | ||
String.format("%.0f%%", percent), | ||
intervalMins * neededPoints, | ||
neededPoints | ||
); | ||
InitProgressProfile initProgress = computeInitProgressProfile(totalUpdates, intervalMins); | ||
|
||
listener.onResponse(new DetectorProfile.Builder().initProgress(initProgress).build()); | ||
} catch (Exception t) { | ||
|
@@ -308,6 +304,17 @@ private ActionListener<GetResponse> onGetDetectorForInitProgress( | |
}, exception -> { listener.failImmediately(FAIL_TO_FIND_DETECTOR_MSG + detectorId, exception); }); | ||
} | ||
|
||
private InitProgressProfile computeInitProgressProfile(long totalUpdates, long intervalMins) { | ||
float percent = (100.0f * totalUpdates) / requiredSamples; | ||
ohltyler marked this conversation as resolved.
Show resolved
Hide resolved
|
||
int neededPoints = (int) (requiredSamples - totalUpdates); | ||
return new InitProgressProfile( | ||
// rounding: 93.456 => 93%, 93.556 => 94% | ||
String.format("%.0f%%", percent), | ||
intervalMins * neededPoints, | ||
neededPoints | ||
); | ||
} | ||
|
||
private void profileModels( | ||
String detectorId, | ||
Set<ProfileName> profiles, | ||
|
@@ -357,7 +364,7 @@ private ActionListener<RCFPollingResponse> onPollRCFUpdates( | |
return ActionListener.wrap(rcfPollResponse -> { | ||
long totalUpdates = rcfPollResponse.getTotalUpdates(); | ||
if (totalUpdates < requiredSamples) { | ||
processInitResponse(detectorId, profilesToCollect, listener, totalUpdates); | ||
processInitResponse(detectorId, profilesToCollect, listener, totalUpdates, false); | ||
} else { | ||
if (profilesToCollect.contains(ProfileName.STATE)) { | ||
listener.onResponse(new DetectorProfile.Builder().state(DetectorState.RUNNING).build()); | ||
|
@@ -379,7 +386,11 @@ private ActionListener<RCFPollingResponse> onPollRCFUpdates( | |
|| (causeException instanceof IndexNotFoundException | ||
&& causeException.getMessage().contains(CommonName.CHECKPOINT_INDEX_NAME))) { | ||
// cannot find checkpoint | ||
processInitResponse(detectorId, profilesToCollect, listener, 0L); | ||
// We don't want to show the estimated time remaining to initialize | ||
// a detector before cold start finishes, where the actual | ||
// initialization time may be much shorter if sufficient historical | ||
// data exists. | ||
processInitResponse(detectorId, profilesToCollect, listener, 0L, true); | ||
} else { | ||
logger.error(new ParameterizedMessage("Fail to get init progress through messaging for {}", detectorId), exception); | ||
listener.failImmediately(FAIL_TO_GET_PROFILE_MSG + detectorId, exception); | ||
|
@@ -391,19 +402,26 @@ private void processInitResponse( | |
String detectorId, | ||
Set<ProfileName> profilesToCollect, | ||
MultiResponsesDelegateActionListener<DetectorProfile> listener, | ||
long totalUpdates | ||
long totalUpdates, | ||
boolean hideMinutesLeft | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be hide estimate, which affects minutes left and data points needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you meant the variable name should be hideEstimate? It only affect minutes left, not data points needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see the data needed and time needed together. Before model training is done, they should be hidden together by a message such as "Model training is starting soon". Once that is done, the detailed information on needed data and time is now shown, such as "need x data and y time". It'd be confusing to see "need x data and no time". What does the ux look like in this case? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as ux goes, we will probably add an intermediate callout saying something like "attempting to initialize with existing/historical data", and show that callout until we can retrieve I guess it's kind of arbitrary on if we hide minutes left and/or data points left, but need some way to indicate on the frontend that the cold start process isn't finished. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The initial temporal call out is informative enough. Before a model is trained, both data and time that the (non-existent) model will need to wait for should be equally unknown. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we have a separate callout, both of these would be ignored as far as frontend is concerned. Wouldn't we want a different callout in this case? Seems that we wouldn't want to show 0% progress either, when that may immediately go to 100% and disappear if there is sufficient historical data. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The call out for the case before model training may just be "attempting to initialize with existing/historical data"/"Model training is starting soon", no data/time/progress needed to cause confusion or panic (yet). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. First, UX does not show needed shingles. They only look at progress and estimated time. Second, as an API user, I don't feel it causes confusion when we show needed shingles. For example, one minute ago, we need 128 shingles. After 1 minute, we need 0. It means we find enough training data. At that particular moment, the needed shingles are accurate. If cold start is stuck or retrying, the number also gives clarification on the state. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not apply a more consistent logic to make code more maintainable and understandable? that is before a model is created, the time and data it needs to wait is either both unknown or both guessed. Or, what's the benefit of having mixed results from different logic for each field? this is a question not a blocker There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The data it needs to wait is not guessed. The time is. Without a checkpoint, it is accurate to say we need 128 shingles. The benefit is provide transparency instead of guessing. Thanks for not blocking. |
||
) { | ||
if (profilesToCollect.contains(ProfileName.STATE)) { | ||
listener.onResponse(new DetectorProfile.Builder().state(DetectorState.INIT).build()); | ||
} | ||
|
||
if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { | ||
GetRequest getDetectorRequest = new GetRequest(ANOMALY_DETECTORS_INDEX, detectorId); | ||
client | ||
.get( | ||
getDetectorRequest, | ||
onGetDetectorForInitProgress(listener, detectorId, profilesToCollect, totalUpdates, requiredSamples) | ||
); | ||
if (hideMinutesLeft) { | ||
InitProgressProfile initProgress = computeInitProgressProfile(totalUpdates, 0); | ||
listener.onResponse(new DetectorProfile.Builder().initProgress(initProgress).build()); | ||
} else { | ||
GetRequest getDetectorRequest = new GetRequest(ANOMALY_DETECTORS_INDEX, detectorId); | ||
client | ||
.get( | ||
getDetectorRequest, | ||
onGetDetectorForInitProgress(listener, detectorId, profilesToCollect, totalUpdates, requiredSamples) | ||
); | ||
} | ||
|
||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
testing. the testing of this condition is missing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added