Skip to content

Commit

Permalink
Detector Param Tuning
Browse files Browse the repository at this point in the history
Summary: Minor changes to support metalearning for detection

Reviewed By: yangbk560

Differential Revision: D28313594

fbshipit-source-id: 9e21ed88b40ed4a9153508a9548652a0638903b9
  • Loading branch information
rohanfb authored and facebook-github-bot committed Jun 9, 2021
1 parent b84c91f commit 33d3d5c
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 47 deletions.
100 changes: 65 additions & 35 deletions kats/detectors/bocpd_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,22 @@
algorithm as a DetectorModel, to provide a common interface.
"""

from typing import Optional
import pandas as pd
import json
from typing import Optional

from kats.detectors.detector import DetectorModel

import pandas as pd
from kats.consts import TimeSeriesData

from kats.detectors.bocpd import (
BOCPDetector,
BOCPDModelType,
)

from kats.detectors.detector import DetectorModel
from kats.detectors.detector_consts import (
AnomalyResponse,
ConfidenceBand,
)


class BocpdDetectorModel(DetectorModel):
"""Implements the Bayesian Online Changepoint Detection as a DetectorModel.
Expand All @@ -43,16 +41,25 @@ class BocpdDetectorModel(DetectorModel):
>>> anom = bocpd_detector.fit_predict(data=level_ts)
"""

def __init__(self, serialized_model: Optional[bytes] = None, slow_drift: bool = False):
def __init__(
self,
serialized_model: Optional[bytes] = None,
slow_drift: bool = False,
threshold: Optional[float] = None,
):
if serialized_model is None:
self.slow_drift = slow_drift
self.threshold = threshold
else:
model_dict = json.loads(serialized_model)
if 'slow_drift' in model_dict:
self.slow_drift = model_dict['slow_drift']
if "slow_drift" in model_dict:
self.slow_drift = model_dict["slow_drift"]
else:
self.slow_drift = slow_drift

if "threshold" in model_dict:
self.threshold = model_dict["threshold"]
else:
self.threshold = threshold

def serialize(self) -> bytes:
"""Returns the serialzed model.
Expand All @@ -64,7 +71,7 @@ def serialize(self) -> bytes:
json containing information about serialized model.
"""

model_dict = {'slow_drift': self.slow_drift}
model_dict = {"slow_drift": self.slow_drift}
return json.dumps(model_dict).encode("utf-8")

def _handle_missing_data_extend(
Expand All @@ -76,10 +83,7 @@ def _handle_missing_data_extend(
# but we will remove the interpolated data when we
# evaluate, to make sure that the anomaly score is
# the same length as data
original_time_list = (
list(historical_data.time)
+ list(data.time)
)
original_time_list = list(historical_data.time) + list(data.time)

if historical_data.is_data_missing():
historical_data = historical_data.interpolate()
Expand All @@ -90,20 +94,27 @@ def _handle_missing_data_extend(

# extend has been done, now remove the interpolated data
data = TimeSeriesData(
pd.DataFrame({
'time':[
historical_data.time.iloc[i] for i in range(len(historical_data))
if historical_data.time.iloc[i] in original_time_list],
'value':[
historical_data.value.iloc[i] for i in range(len(historical_data))
if historical_data.time.iloc[i] in original_time_list]
}),
use_unix_time=True, unix_time_units="s", tz="US/Pacific"
pd.DataFrame(
{
"time": [
historical_data.time.iloc[i]
for i in range(len(historical_data))
if historical_data.time.iloc[i] in original_time_list
],
"value": [
historical_data.value.iloc[i]
for i in range(len(historical_data))
if historical_data.time.iloc[i] in original_time_list
],
}
),
use_unix_time=True,
unix_time_units="s",
tz="US/Pacific",
)

return data


# pyre-fixme[14]: `fit_predict` overrides method defined in `DetectorModel`
# inconsistently.
def fit_predict(
Expand All @@ -129,7 +140,7 @@ def fit_predict(
# pyre-fixme[16]: `BocpdDetectorModel` has no attribute `last_N`.
self.last_N = len(data)

#if there is historical data
# if there is historical data
# we prepend it to data, and run
# the detector as if we only saw data
if historical_data is not None:
Expand All @@ -138,20 +149,39 @@ def fit_predict(
bocpd_model = BOCPDetector(data=data)

if not self.slow_drift:
_ = bocpd_model.detector(
model=BOCPDModelType.NORMAL_KNOWN_MODEL, choose_priors=True,
agg_cp=True
)

if self.threshold is not None:
_ = bocpd_model.detector(
model=BOCPDModelType.NORMAL_KNOWN_MODEL,
choose_priors=True,
agg_cp=True,
threshold=self.threshold,
)
else:
_ = bocpd_model.detector(
model=BOCPDModelType.NORMAL_KNOWN_MODEL,
choose_priors=True,
agg_cp=True,
)
else:
_ = bocpd_model.detector(
model=BOCPDModelType.TREND_CHANGE_MODEL, choose_priors=False,
agg_cp=True
)
if self.threshold is not None:
_ = bocpd_model.detector(
model=BOCPDModelType.NORMAL_KNOWN_MODEL,
choose_priors=True,
agg_cp=True,
threshold=self.threshold,
)
else:
_ = bocpd_model.detector(
model=BOCPDModelType.TREND_CHANGE_MODEL,
choose_priors=False,
agg_cp=True,
)

change_prob_dict = bocpd_model.get_change_prob()
change_prob = list(change_prob_dict.values())[0]

#construct the object
# construct the object
N = len(data)
default_ts = TimeSeriesData(time=data.time, value=pd.Series(N * [0.0]))
score_ts = TimeSeriesData(time=data.time, value=pd.Series(change_prob))
Expand Down
2 changes: 0 additions & 2 deletions kats/detectors/changepoint_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,6 @@ def _parse_data(self, df_row: Any):
this_ts = df_row['time_series']
this_anno = df_row['annotation']

print(this_dataset)

this_anno_json_acc = this_anno.replace("'", "\"")
this_anno_dict = json.loads(this_anno_json_acc)

Expand Down
23 changes: 18 additions & 5 deletions kats/detectors/cusum_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import logging
from datetime import datetime
from enum import Enum
from typing import Any, List, Optional
from typing import Any, List, Optional, Union

import numpy as np
import pandas as pd
Expand All @@ -49,7 +49,6 @@
CHANGEPOINT_RETENTION = 7 * 24 * 60 * 60 # in seconds
MAX_CHANGEPOINT = 10


def percentage_change(
data: TimeSeriesData, pre_mean: float, **kwargs: Any
) -> TimeSeriesData:
Expand Down Expand Up @@ -94,12 +93,18 @@ class CusumScoreFunction(Enum):
percentage_change = "percentage_change"
z_score = "z_score"


# Score Function Constants
SCORE_FUNC_DICT = {
CusumScoreFunction.change.value: change,
CusumScoreFunction.percentage_change.value: percentage_change,
CusumScoreFunction.z_score.value: z_score,
}
DEFAULT_SCORE_FUNCTION = CusumScoreFunction.change
STR_TO_SCORE_FUNC = { # Used for param tuning
"change": CusumScoreFunction.change,
"percentage_change": CusumScoreFunction.percentage_change,
"z_score": CusumScoreFunction.z_score,
}


class CUSUMDetectorModel(DetectorModel):
Expand Down Expand Up @@ -139,7 +144,7 @@ def __init__(
magnitude_quantile: float = CUSUM_DEFAULT_ARGS["magnitude_quantile"],
magnitude_ratio: float = CUSUM_DEFAULT_ARGS["magnitude_ratio"],
change_directions: List[str] = CUSUM_DEFAULT_ARGS["change_directions"],
score_func: CusumScoreFunction = CusumScoreFunction.change,
score_func: Union[str, CusumScoreFunction] = DEFAULT_SCORE_FUNCTION,
remove_seasonality: bool = CUSUM_DEFAULT_ARGS["remove_seasonality"],
):
if serialized_model:
Expand Down Expand Up @@ -178,8 +183,16 @@ def __init__(
self.magnitude_quantile = magnitude_quantile
self.magnitude_ratio = magnitude_ratio
self.change_directions = change_directions
self.score_func = score_func.value
self.remove_seasonality = remove_seasonality

# We allow score_function to be a str for compatibility with param tuning
if isinstance(score_func, str):
if score_func in STR_TO_SCORE_FUNC:
score_func = STR_TO_SCORE_FUNC[score_func]
else:
score_func = DEFAULT_SCORE_FUNCTION
self.score_func = score_func.value

else:
raise ValueError(
"""
Expand Down
24 changes: 21 additions & 3 deletions kats/models/metalearner/metalearner_hpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@
],
"numerical_idx": [],
},
"cusum": {
"categorical_idx": ["score_func"],
"numerical_idx": ["delta_std_ratio", "scan_window", "historical_window"],
},
"statsig": {
"categorical_idx": [],
"numerical_idx": ["n_control", "n_test"],
}
}

default_model_networks = {
Expand Down Expand Up @@ -78,6 +86,16 @@
"n_hidden_cat_combo": [[5], [5], [2], [3], [5], [5], [5]],
"n_hidden_num": [],
},
"cusum": {
"n_hidden_shared": [20],
"n_hidden_cat_combo": [[3]],
"n_hidden_num": [5, 5, 5],
},
"statsig": {
"n_hidden_shared": [20],
"n_hidden_cat_combo": [],
"n_hidden_num": [5, 5],
},
}


Expand Down Expand Up @@ -163,7 +181,7 @@ def __init__(
numerical_idx = default_model_params[default_model]["numerical_idx"]

else:
msg = f"default_model={default_model} is not available! Please choose one from 'prophet', 'arima', 'sarima', 'holtwinters', stlf, 'theta'"
msg = f"default_model={default_model} is not available! Please choose one from 'prophet', 'arima', 'sarima', 'holtwinters', 'stlf', 'theta', 'cusum', 'statsig'"
logging.error(msg)
raise ValueError(msg)

Expand Down Expand Up @@ -385,7 +403,7 @@ def _prepare_data(
else None
)
y_num = (
torch.from_numpy(self._target_num[train_idx, :]).float()
torch.from_numpy(self._target_num[train_idx, :].astype('float')).float()
if self.numerical_idx
else None
)
Expand All @@ -398,7 +416,7 @@ def _prepare_data(
else None
)
y_num_val = (
torch.from_numpy(self._target_num[val_idx, :]).float()
torch.from_numpy(self._target_num[val_idx, :].astype('float')).float()
if self.numerical_idx
else None
)
Expand Down
11 changes: 9 additions & 2 deletions kats/models/metalearner/metalearner_modelselect.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,16 @@ class MetaLearnModelSelect:
def __init__(
self, metadata: Optional[List[Dict[str, Any]]] = None, load_model: bool = False
) -> None:
if not load_model and metadata is not None:
if not load_model:
# pyre-fixme[6]: Expected `Sized` for 1st param but got
# `Optional[List[typing.Any]]`.
if len(metadata) <= 30:
msg = f"metadata size should be greater than 30 but receives {len(metadata)}."
msg = "Dataset is too small to train a meta learner!"
logging.error(msg)
raise ValueError(msg)

if metadata is None:
msg = "Missing metadata!"
logging.error(msg)
raise ValueError(msg)

Expand Down

0 comments on commit 33d3d5c

Please sign in to comment.