Skip to content

Commit

Permalink
Revert "Support local xgboost regressor (sustainable-computing-io#1052)…
Browse files Browse the repository at this point in the history
…" (sustainable-computing-io#1081)

This reverts commit c0d6a3f.

Signed-off-by: Sunil Thaha <sthaha@redhat.com>
  • Loading branch information
sthaha authored and vprashar2929 committed Dec 19, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 81349c0 commit ec3ab50
Showing 13 changed files with 84 additions and 501 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/developer_local.yml
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@ jobs:
go-version-file: go.mod
- name: Prepare environment
run: |
brew install cpuid xgboost
brew install cpuid
cd doc/ && sudo ./dev/prepare_dev_env.sh && cd -
git config --global --add safe.directory /kepler
- name: Run
7 changes: 1 addition & 6 deletions .github/workflows/golang.yml
Original file line number Diff line number Diff line change
@@ -48,12 +48,7 @@ jobs:
with:
go-version-file: go.mod
- name: Run go vet
run: |
wget https://github.com/sustainable-computing-io/kepler-ci-artifacts/releases/download/v0.26.0/xgboost-2.0.1-Linux.sh.tar.gz
tar -zxvf xgboost-2.0.1-Linux.sh.tar.gz
sudo sh xgboost-2.0.1-Linux.sh --skip-license --prefix=/usr/local
sudo ldconfig
go vet ./...
run: go vet ./...

vulnerability_detect:
runs-on: ubuntu-latest
1 change: 0 additions & 1 deletion .github/workflows/unit_test.yml
Original file line number Diff line number Diff line change
@@ -31,7 +31,6 @@ jobs:
uses: sustainable-computing-io/kepler-action@v0.0.3
with:
ebpfprovider: bcc
xgboost_version: 2.0.1
- name: Prepare environment
run: |
sudo apt-get install -y cpuid
14 changes: 7 additions & 7 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
@@ -121,13 +121,13 @@ var (
ProcessComponentsPowerKey = "PROCESS_COMPONENTS"

// model_parameter_attribute
RatioEnabledKey = "RATIO" // the default container power model is RATIO but ESTIMATOR or LINEAR_REGRESSION can be used
EstimatorEnabledKey = "ESTIMATOR"
LocalRegressorEnabledKey = "LOCAL_REGRESSION"
InitModelURLKey = "INIT_URL"
FixedTrainerNameKey = "TRAINER"
FixedNodeTypeKey = "NODE_TYPE"
ModelFiltersKey = "FILTERS"
RatioEnabledKey = "RATIO" // the default container power model is RATIO but ESTIMATOR or LINEAR_REGRESSION can be used
EstimatorEnabledKey = "ESTIMATOR"
LinearRegressionEnabledKey = "LINEAR_REGRESSION"
InitModelURLKey = "INIT_URL"
FixedTrainerNameKey = "TRAINER"
FixedNodeTypeKey = "NODE_TYPE"
ModelFiltersKey = "FILTERS"
////////////////////////////////////

// KubeConfig is used to start k8s client with the pod running outside the cluster
132 changes: 43 additions & 89 deletions pkg/model/estimator/local/lr.go
Original file line number Diff line number Diff line change
@@ -59,34 +59,26 @@ type ModelRequest struct {
/*
ModelWeights, AllWeight, CategoricalFeature, NormalizedNumericalFeature define structure of model weight
{
"All_Weights":
"All_Weights":
{
"Bias_Weight": 1.0,
"Categorical_Variables": {"cpu_architecture": {"Sky Lake": {"weight": 1.0}}},
"Numerical_Variables": {"cpu_cycles": {"mean": 0, "variance": 1.0, "weight": 1.0}}
}
},
"XGboost_Weights": "base64_encoded_dmlc_xgboost_model_json"
}
}
*/

type ModelWeights struct {
AllWeights `json:"All_Weights"`
}

type AllWeights struct {
BiasWeight float64 `json:"Bias_Weight"`
CategoricalVariables map[string]map[string]CategoricalFeature `json:"Categorical_Variables"`
NumericalVariables map[string]NormalizedNumericalFeature `json:"Numerical_Variables"`
XGBoostWeight string `json:"XGBoost_Weights"`
XGBoostModel XGBoostModelWeight
RegressorType types.RegressorType
}

type CategoricalFeature struct {
Weight float64 `json:"weight"`
}

type NormalizedNumericalFeature struct {
Scale float64 `json:"scale"` // to normalize the data
Weight float64 `json:"weight"`
@@ -104,45 +96,24 @@ func (weights ModelWeights) getIndexedWeights(usageMetrics, systemFeatures []str
return
}

// predict applies normalization and local regression to usageMetricValues and systemMetaDataFeatureValues
// predict applies normalization and linear regression to usageMetricValues and systemMetaDataFeatureValues
func (weights ModelWeights) predict(usageMetricNames []string, usageMetricValues [][]float64, systemMetaDataFeatureNames, systemMetaDataFeatureValues []string) []float64 {
categoricalWeights, numericalWeights := weights.getIndexedWeights(usageMetricNames, systemMetaDataFeatureNames)
basePower := weights.AllWeights.BiasWeight
for index, coeffMap := range categoricalWeights {
basePower += coeffMap[systemMetaDataFeatureValues[index]].Weight
}
var powers []float64
switch weights.RegressorType {
case types.LinearRegressor:
for _, vals := range usageMetricValues {
power := basePower
for index, coeff := range numericalWeights {
if coeff.Weight == 0 {
continue
}
normalizedX := vals[index] / coeff.Scale
power += coeff.Weight * normalizedX
for _, vals := range usageMetricValues {
power := basePower
for index, coeff := range numericalWeights {
if coeff.Weight == 0 {
continue
}
powers = append(powers, power)
normalizedX := vals[index] / coeff.Scale
power += coeff.Weight * normalizedX
}
case types.XGBoostRegressor:
for _, vals := range usageMetricValues {
data := make([]float32, len(vals))
for index, coeff := range numericalWeights {
if coeff.Weight == 0 {
continue
}
data[index] = float32(vals[index] / coeff.Scale)
}
power, err := weights.XGBoostModel.PredictFromData(data)
if err != nil {
klog.Errorf("XGBoostModel.PredictFromData failed: %v", err)
return []float64{}
}
powers = append(powers, power[0])
}
default:
klog.Errorf("RegressorType %v is not supported", weights.RegressorType)
powers = append(powers, power)
}
return powers
}
@@ -170,11 +141,10 @@ ComponentModelWeights defines structure for multiple (power component's) weights
}
}
*/

type ComponentModelWeights map[string]ModelWeights

// LocalRegressor defines power estimator with regression approach
type LocalRegressor struct {
// LinearRegressor defines power estimator with linear regression approach
type LinearRegressor struct {
ModelServerEndpoint string
OutputType types.ModelOutputType
EnergySource string
@@ -198,25 +168,8 @@ type LocalRegressor struct {
modelWeight *ComponentModelWeights
}

// init model weight
func (r *LocalRegressor) initModelWeight(content *ComponentModelWeights) error {
r.modelWeight = content
for k, v := range *r.modelWeight {
if v.XGBoostWeight != "" {
err := v.XGBoostModel.LoadFromBuffer(v.XGBoostWeight)
if err != nil {
return fmt.Errorf("failed to load %v xgboost model: %v", k, err)
}
} else {
v.RegressorType = types.LinearRegressor
}
(*r.modelWeight)[k] = v
}
return nil
}

// Start returns nil if model weight is obtainable
func (r *LocalRegressor) Start() error {
func (r *LinearRegressor) Start() error {
var err error
var weight *ComponentModelWeights
outputStr := r.OutputType.String()
@@ -233,7 +186,8 @@ func (r *LocalRegressor) Start() error {
}
if weight != nil {
r.enabled = true
return r.initModelWeight(weight)
r.modelWeight = weight
return nil
} else {
if err == nil {
err = fmt.Errorf("the model LR (%s): has no config", outputStr)
@@ -244,7 +198,7 @@ func (r *LocalRegressor) Start() error {
}

// getWeightFromServer tries getting weights for Kepler Model Server
func (r *LocalRegressor) getWeightFromServer() (*ComponentModelWeights, error) {
func (r *LinearRegressor) getWeightFromServer() (*ComponentModelWeights, error) {
modelRequest := ModelRequest{
MetricNames: append(r.FloatFeatureNames, r.SystemMetaDataFeatureNames...),
OutputType: r.OutputType.String(),
@@ -288,7 +242,7 @@ func (r *LocalRegressor) getWeightFromServer() (*ComponentModelWeights, error) {

// loadWeightFromURLorLocal get weight from either local or URL
// if string start with '/', we take it as local file
func (r *LocalRegressor) loadWeightFromURLorLocal() (*ComponentModelWeights, error) {
func (r *LinearRegressor) loadWeightFromURLorLocal() (*ComponentModelWeights, error) {
var body []byte
var err error

@@ -308,7 +262,7 @@ func (r *LocalRegressor) loadWeightFromURLorLocal() (*ComponentModelWeights, err
}

// loadWeightFromLocal tries loading weights from local file given by r.ModelWeightsURL
func (r *LocalRegressor) loadWeightFromLocal() ([]byte, error) {
func (r *LinearRegressor) loadWeightFromLocal() ([]byte, error) {
data, err := os.ReadFile(r.ModelWeightsFilepath)
if err != nil {
return nil, err
@@ -317,7 +271,7 @@ func (r *LocalRegressor) loadWeightFromLocal() ([]byte, error) {
}

// loadWeightFromURL tries loading weights from initial model URL
func (r *LocalRegressor) loadWeightFromURL() ([]byte, error) {
func (r *LinearRegressor) loadWeightFromURL() ([]byte, error) {
if r.ModelWeightsURL == "" {
return nil, fmt.Errorf("ModelWeightsURL is empty")
}
@@ -341,7 +295,7 @@ func (r *LocalRegressor) loadWeightFromURL() ([]byte, error) {
}

// GetPlatformPower applies ModelWeight prediction and return a list of power associated to each process/container/pod
func (r *LocalRegressor) GetPlatformPower(isIdlePower bool) ([]float64, error) {
func (r *LinearRegressor) GetPlatformPower(isIdlePower bool) ([]float64, error) {
if !r.enabled {
return []float64{}, fmt.Errorf("disabled power model call: %s", r.OutputType.String())
}
@@ -362,7 +316,7 @@ func (r *LocalRegressor) GetPlatformPower(isIdlePower bool) ([]float64, error) {
}

// GetComponentsPower applies each component's ModelWeight prediction and return a map of component power associated to each process/container/pod
func (r *LocalRegressor) GetComponentsPower(isIdlePower bool) ([]source.NodeComponentsEnergy, error) {
func (r *LinearRegressor) GetComponentsPower(isIdlePower bool) ([]source.NodeComponentsEnergy, error) {
if !r.enabled {
return []source.NodeComponentsEnergy{}, fmt.Errorf("disabled power model call: %s", r.OutputType.String())
}
@@ -395,11 +349,11 @@ func (r *LocalRegressor) GetComponentsPower(isIdlePower bool) ([]source.NodeComp
}

// GetComponentsPower returns GPU Power in Watts associated to each each process/container/pod
func (r *LocalRegressor) GetGPUPower(isIdlePower bool) ([]float64, error) {
func (r *LinearRegressor) GetGPUPower(isIdlePower bool) ([]float64, error) {
return []float64{}, fmt.Errorf("current power model does not support GPUs")
}

func (r *LocalRegressor) addFloatFeatureValues(x []float64) {
func (r *LinearRegressor) addFloatFeatureValues(x []float64) {
for i, feature := range x {
// floatFeatureValues is a cyclic list, where we only append a new value if it is necessary.
if r.xidx < len(r.floatFeatureValues) {
@@ -423,53 +377,53 @@ func (r *LocalRegressor) addFloatFeatureValues(x []float64) {
}

// AddContainerFeatureValues adds the the x for prediction, which are the explanatory variables (or the independent variable) of regression.
// LocalRegressor is trained off-line then we cannot Add training samples. We might implement it in the future.
// The LocalRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LocalRegressor) AddContainerFeatureValues(x []float64) {
// LinearRegressor is trained off-line then we cannot Add training samples. We might implement it in the future.
// The LinearRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LinearRegressor) AddContainerFeatureValues(x []float64) {
r.addFloatFeatureValues(x)
}

// AddNodeFeatureValues adds the the x for prediction, which is the variable used to calculate the ratio.
// LocalRegressor is not trained, then we cannot Add training samples, only samples for prediction.
// The LocalRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LocalRegressor) AddNodeFeatureValues(x []float64) {
// LinearRegressor is not trained, then we cannot Add training samples, only samples for prediction.
// The LinearRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LinearRegressor) AddNodeFeatureValues(x []float64) {
r.addFloatFeatureValues(x)
}

// AddDesiredOutValue adds the the y, which is the response variable (or the dependent variable) of regression.
// LocalRegressor is trained off-line then we do not add Y for trainning. We might implement it in the future.
func (r *LocalRegressor) AddDesiredOutValue(y float64) {
// LinearRegressor is trained off-line then we do not add Y for trainning. We might implement it in the future.
func (r *LinearRegressor) AddDesiredOutValue(y float64) {
}

// ResetSampleIdx set the sample vector index to 0 to overwrite the old samples with new ones for trainning or prediction.
func (r *LocalRegressor) ResetSampleIdx() {
func (r *LinearRegressor) ResetSampleIdx() {
r.xidx = 0
}

// Train triggers the regressiong fit after adding data points to create a new power model.
// LocalRegressor is trained off-line then we cannot trigger the trainning. We might implement it in the future.
func (r *LocalRegressor) Train() error {
// LinearRegressor is trained off-line then we cannot trigger the trainning. We might implement it in the future.
func (r *LinearRegressor) Train() error {
return nil
}

// IsEnabled returns true if the power model was trained and is active
func (r *LocalRegressor) IsEnabled() bool {
func (r *LinearRegressor) IsEnabled() bool {
return r.enabled
}

// GetModelType returns the model type
func (r *LocalRegressor) GetModelType() types.ModelType {
return types.LocalRegressor
func (r *LinearRegressor) GetModelType() types.ModelType {
return types.LinearRegressor
}

// GetContainerFeatureNamesList returns the list of float features that the model was configured to use
// The LocalRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LocalRegressor) GetContainerFeatureNamesList() []string {
// The LinearRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LinearRegressor) GetContainerFeatureNamesList() []string {
return r.FloatFeatureNames
}

// GetNodeFeatureNamesList returns the list of float features that the model was configured to use
// The LocalRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LocalRegressor) GetNodeFeatureNamesList() []string {
// The LinearRegressor does not differentiate node or container power estimation, the difference will only be the amount of resource utilization
func (r *LinearRegressor) GetNodeFeatureNamesList() []string {
return r.FloatFeatureNames
}
Loading

0 comments on commit ec3ab50

Please sign in to comment.