Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support local xgboost regressor #1052

Merged
merged 5 commits into from
Nov 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/developer_local.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
go-version-file: go.mod
- name: Prepare environment
run: |
brew install cpuid
brew install cpuid xgboost
cd doc/ && sudo ./dev/prepare_dev_env.sh && cd -
git config --global --add safe.directory /kepler
- name: Run
Expand Down
7 changes: 6 additions & 1 deletion .github/workflows/golang.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,12 @@ jobs:
with:
go-version-file: go.mod
- name: Run go vet
run: go vet ./...
run: |
wget https://github.com/sustainable-computing-io/kepler-ci-artifacts/releases/download/v0.26.0/xgboost-2.0.1-Linux.sh.tar.gz
tar -zxvf xgboost-2.0.1-Linux.sh.tar.gz
sudo sh xgboost-2.0.1-Linux.sh --skip-license --prefix=/usr/local
sudo ldconfig
go vet ./...

vulnerability_detect:
runs-on: ubuntu-latest
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ jobs:
GOPATH: /home/runner/go
GOBIN: /home/runner/go/bin
- name: install bcc
uses: sustainable-computing-io/[email protected].2
uses: sustainable-computing-io/[email protected].3
with:
ebpfprovider: bcc
xgboost_version: 2.0.1
- name: Prepare environment
run: |
sudo apt-get install -y cpuid
Expand Down
14 changes: 7 additions & 7 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,13 @@ var (
ProcessComponentsPowerKey = "PROCESS_COMPONENTS"

// model_parameter_attribute
RatioEnabledKey = "RATIO" // the default container power model is RATIO but ESTIMATOR or LINEAR_REGRESSION can be used
EstimatorEnabledKey = "ESTIMATOR"
LinearRegressionEnabledKey = "LINEAR_REGRESSION"
InitModelURLKey = "INIT_URL"
FixedTrainerNameKey = "TRAINER"
FixedNodeTypeKey = "NODE_TYPE"
ModelFiltersKey = "FILTERS"
RatioEnabledKey = "RATIO" // the default container power model is RATIO but ESTIMATOR or LINEAR_REGRESSION can be used
EstimatorEnabledKey = "ESTIMATOR"
LocalRegressorEnabledKey = "LOCAL_REGRESSION"
InitModelURLKey = "INIT_URL"
FixedTrainerNameKey = "TRAINER"
FixedNodeTypeKey = "NODE_TYPE"
ModelFiltersKey = "FILTERS"
////////////////////////////////////

// KubeConfig is used to start k8s client with the pod running outside the cluster
Expand Down
132 changes: 89 additions & 43 deletions pkg/model/estimator/local/lr.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,26 +59,34 @@ type ModelRequest struct {
/*
ModelWeights, AllWeight, CategoricalFeature, NormalizedNumericalFeature define structure of model weight
{
"All_Weights":

"All_Weights":
{
"Bias_Weight": 1.0,
"Categorical_Variables": {"cpu_architecture": {"Sky Lake": {"weight": 1.0}}},
"Numerical_Variables": {"cpu_cycles": {"mean": 0, "variance": 1.0, "weight": 1.0}}
}
}
},
"XGboost_Weights": "base64_encoded_dmlc_xgboost_model_json"
}
*/

type ModelWeights struct {
AllWeights `json:"All_Weights"`
}

type AllWeights struct {
BiasWeight float64 `json:"Bias_Weight"`
CategoricalVariables map[string]map[string]CategoricalFeature `json:"Categorical_Variables"`
NumericalVariables map[string]NormalizedNumericalFeature `json:"Numerical_Variables"`
XGBoostWeight string `json:"XGBoost_Weights"`
XGBoostModel XGBoostModelWeight
RegressorType types.RegressorType
}

type CategoricalFeature struct {
Weight float64 `json:"weight"`
}

type NormalizedNumericalFeature struct {
Scale float64 `json:"scale"` // to normalize the data
Weight float64 `json:"weight"`
Expand All @@ -96,24 +104,45 @@ func (weights ModelWeights) getIndexedWeights(usageMetrics, systemFeatures []str
return
}

// predict applies normalization and linear regression to usageMetricValues and systemMetaDataFeatureValues
// predict applies normalization and local regression to usageMetricValues and systemMetaDataFeatureValues
func (weights ModelWeights) predict(usageMetricNames []string, usageMetricValues [][]float64, systemMetaDataFeatureNames, systemMetaDataFeatureValues []string) []float64 {
categoricalWeights, numericalWeights := weights.getIndexedWeights(usageMetricNames, systemMetaDataFeatureNames)
basePower := weights.AllWeights.BiasWeight
for index, coeffMap := range categoricalWeights {
basePower += coeffMap[systemMetaDataFeatureValues[index]].Weight
}
var powers []float64
for _, vals := range usageMetricValues {
power := basePower
for index, coeff := range numericalWeights {
if coeff.Weight == 0 {
continue
switch weights.RegressorType {
case types.LinearRegressor:
for _, vals := range usageMetricValues {
power := basePower
for index, coeff := range numericalWeights {
if coeff.Weight == 0 {
continue
}
normalizedX := vals[index] / coeff.Scale
power += coeff.Weight * normalizedX
}
normalizedX := vals[index] / coeff.Scale
power += coeff.Weight * normalizedX
powers = append(powers, power)
}
powers = append(powers, power)
case types.XGBoostRegressor:
rootfs marked this conversation as resolved.
Show resolved Hide resolved
for _, vals := range usageMetricValues {
data := make([]float32, len(vals))
for index, coeff := range numericalWeights {
if coeff.Weight == 0 {
continue
}
data[index] = float32(vals[index] / coeff.Scale)
}
power, err := weights.XGBoostModel.PredictFromData(data)
if err != nil {
klog.Errorf("XGBoostModel.PredictFromData failed: %v", err)
return []float64{}
}
powers = append(powers, power[0])
}
default:
klog.Errorf("RegressorType %v is not supported", weights.RegressorType)
}
return powers
}
Expand Down Expand Up @@ -141,10 +170,11 @@ ComponentModelWeights defines structure for multiple (power component's) weights
}
}
*/

type ComponentModelWeights map[string]ModelWeights

// LinearRegressor defines power estimator with linear regression approach
type LinearRegressor struct {
// LocalRegressor defines power estimator with regression approach
type LocalRegressor struct {
ModelServerEndpoint string
OutputType types.ModelOutputType
EnergySource string
Expand All @@ -168,8 +198,25 @@ type LinearRegressor struct {
modelWeight *ComponentModelWeights
}

// init model weight
func (r *LocalRegressor) initModelWeight(content *ComponentModelWeights) error {
r.modelWeight = content
for k, v := range *r.modelWeight {
if v.XGBoostWeight != "" {
err := v.XGBoostModel.LoadFromBuffer(v.XGBoostWeight)
if err != nil {
return fmt.Errorf("failed to load %v xgboost model: %v", k, err)
}
} else {
v.RegressorType = types.LinearRegressor
}
(*r.modelWeight)[k] = v
}
return nil
}

// Start returns nil if model weight is obtainable
func (r *LinearRegressor) Start() error {
func (r *LocalRegressor) Start() error {
var err error
var weight *ComponentModelWeights
outputStr := r.OutputType.String()
Expand All @@ -186,8 +233,7 @@ func (r *LinearRegressor) Start() error {
}
if weight != nil {
r.enabled = true
r.modelWeight = weight
return nil
return r.initModelWeight(weight)
} else {
if err == nil {
err = fmt.Errorf("the model LR (%s): has no config", outputStr)
Expand All @@ -198,7 +244,7 @@ func (r *LinearRegressor) Start() error {
}

// getWeightFromServer tries getting weights for Kepler Model Server
func (r *LinearRegressor) getWeightFromServer() (*ComponentModelWeights, error) {
func (r *LocalRegressor) getWeightFromServer() (*ComponentModelWeights, error) {
modelRequest := ModelRequest{
MetricNames: append(r.FloatFeatureNames, r.SystemMetaDataFeatureNames...),
OutputType: r.OutputType.String(),
Expand Down Expand Up @@ -242,7 +288,7 @@ func (r *LinearRegressor) getWeightFromServer() (*ComponentModelWeights, error)

// loadWeightFromURLorLocal get weight from either local or URL
// if string start with '/', we take it as local file
func (r *LinearRegressor) loadWeightFromURLorLocal() (*ComponentModelWeights, error) {
func (r *LocalRegressor) loadWeightFromURLorLocal() (*ComponentModelWeights, error) {
var body []byte
var err error

Expand All @@ -262,7 +308,7 @@ func (r *LinearRegressor) loadWeightFromURLorLocal() (*ComponentModelWeights, er
}

// loadWeightFromLocal tries loading weights from local file given by r.ModelWeightsURL
func (r *LinearRegressor) loadWeightFromLocal() ([]byte, error) {
func (r *LocalRegressor) loadWeightFromLocal() ([]byte, error) {
data, err := os.ReadFile(r.ModelWeightsFilepath)
if err != nil {
return nil, err
Expand All @@ -271,7 +317,7 @@ func (r *LinearRegressor) loadWeightFromLocal() ([]byte, error) {
}

// loadWeightFromURL tries loading weights from initial model URL
func (r *LinearRegressor) loadWeightFromURL() ([]byte, error) {
func (r *LocalRegressor) loadWeightFromURL() ([]byte, error) {
if r.ModelWeightsURL == "" {
return nil, fmt.Errorf("ModelWeightsURL is empty")
}
Expand All @@ -295,7 +341,7 @@ func (r *LinearRegressor) loadWeightFromURL() ([]byte, error) {
}

// GetPlatformPower applies ModelWeight prediction and return a list of power associated to each process/container/pod
func (r *LinearRegressor) GetPlatformPower(isIdlePower bool) ([]float64, error) {
func (r *LocalRegressor) GetPlatformPower(isIdlePower bool) ([]float64, error) {
if !r.enabled {
return []float64{}, fmt.Errorf("disabled power model call: %s", r.OutputType.String())
}
Expand All @@ -316,7 +362,7 @@ func (r *LinearRegressor) GetPlatformPower(isIdlePower bool) ([]float64, error)
}

// GetComponentsPower applies each component's ModelWeight prediction and return a map of component power associated to each process/container/pod
func (r *LinearRegressor) GetComponentsPower(isIdlePower bool) ([]source.NodeComponentsEnergy, error) {
func (r *LocalRegressor) GetComponentsPower(isIdlePower bool) ([]source.NodeComponentsEnergy, error) {
if !r.enabled {
return []source.NodeComponentsEnergy{}, fmt.Errorf("disabled power model call: %s", r.OutputType.String())
}
Expand Down Expand Up @@ -349,11 +395,11 @@ func (r *LinearRegressor) GetComponentsPower(isIdlePower bool) ([]source.NodeCom
}

// GetComponentsPower returns GPU Power in Watts associated to each each process/container/pod
func (r *LinearRegressor) GetGPUPower(isIdlePower bool) ([]float64, error) {
func (r *LocalRegressor) GetGPUPower(isIdlePower bool) ([]float64, error) {
return []float64{}, fmt.Errorf("current power model does not support GPUs")
}

func (r *LinearRegressor) addFloatFeatureValues(x []float64) {
func (r *LocalRegressor) addFloatFeatureValues(x []float64) {
for i, feature := range x {
// floatFeatureValues is a cyclic list, where we only append a new value if it is necessary.
if r.xidx < len(r.floatFeatureValues) {
Expand All @@ -377,53 +423,53 @@ func (r *LinearRegressor) addFloatFeatureValues(x []float64) {
}

// AddContainerFeatureValues adds the the x for prediction, which are the explanatory variables (or the independent variable) of regression.
// LinearRegressor is trained off-line then we cannot Add training samples. We might implement it in the future.
// The LinearRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LinearRegressor) AddContainerFeatureValues(x []float64) {
// LocalRegressor is trained off-line then we cannot Add training samples. We might implement it in the future.
// The LocalRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LocalRegressor) AddContainerFeatureValues(x []float64) {
r.addFloatFeatureValues(x)
}

// AddNodeFeatureValues adds the the x for prediction, which is the variable used to calculate the ratio.
// LinearRegressor is not trained, then we cannot Add training samples, only samples for prediction.
// The LinearRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LinearRegressor) AddNodeFeatureValues(x []float64) {
// LocalRegressor is not trained, then we cannot Add training samples, only samples for prediction.
// The LocalRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LocalRegressor) AddNodeFeatureValues(x []float64) {
r.addFloatFeatureValues(x)
}

// AddDesiredOutValue adds the the y, which is the response variable (or the dependent variable) of regression.
// LinearRegressor is trained off-line then we do not add Y for trainning. We might implement it in the future.
func (r *LinearRegressor) AddDesiredOutValue(y float64) {
// LocalRegressor is trained off-line then we do not add Y for trainning. We might implement it in the future.
func (r *LocalRegressor) AddDesiredOutValue(y float64) {
}

// ResetSampleIdx set the sample vector index to 0 to overwrite the old samples with new ones for trainning or prediction.
func (r *LinearRegressor) ResetSampleIdx() {
func (r *LocalRegressor) ResetSampleIdx() {
r.xidx = 0
}

// Train triggers the regressiong fit after adding data points to create a new power model.
// LinearRegressor is trained off-line then we cannot trigger the trainning. We might implement it in the future.
func (r *LinearRegressor) Train() error {
// LocalRegressor is trained off-line then we cannot trigger the trainning. We might implement it in the future.
func (r *LocalRegressor) Train() error {
return nil
}

// IsEnabled returns true if the power model was trained and is active
func (r *LinearRegressor) IsEnabled() bool {
func (r *LocalRegressor) IsEnabled() bool {
return r.enabled
}

// GetModelType returns the model type
func (r *LinearRegressor) GetModelType() types.ModelType {
return types.LinearRegressor
func (r *LocalRegressor) GetModelType() types.ModelType {
return types.LocalRegressor
}

// GetContainerFeatureNamesList returns the list of float features that the model was configured to use
// The LinearRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LinearRegressor) GetContainerFeatureNamesList() []string {
// The LocalRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LocalRegressor) GetContainerFeatureNamesList() []string {
return r.FloatFeatureNames
}

// GetNodeFeatureNamesList returns the list of float features that the model was configured to use
// The LinearRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LinearRegressor) GetNodeFeatureNamesList() []string {
// The LocalRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LocalRegressor) GetNodeFeatureNamesList() []string {
return r.FloatFeatureNames
}
Loading
Loading