-
Notifications
You must be signed in to change notification settings - Fork 2
/
interact_validation.py
77 lines (69 loc) · 2.36 KB
/
interact_validation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import cross_val_score
df = pd.read_csv(f"{os.path.dirname(os.path.abspath(__file__))}/data/spec_data_cleaned.csv")
df_new = df.copy()
df_new = df_new[df_new.CPUChips == 2] # Fit a model for every amount of CPUChips
X = df_new[[
'HW_MemAmountGB',
'TDP',
'utilization',
'CPUCores',
'CPUThreads',
'HW_CPUFreq',
'Hardware_Availability_Year',
'HW_FormFactor',
'HW_Vendor'
]]
X = pd.get_dummies(X, columns=['HW_FormFactor', 'HW_Vendor'])
y = df_new.power
model = XGBRegressor()
kfold = RepeatedKFold()
kf_cv_scores = cross_val_score(model, X, y, cv=kfold, scoring='neg_mean_absolute_error')
# pylint: disable=consider-using-f-string
print(f"[Interact DC Original (untuned)] K-fold CV score range: \
{kf_cv_scores.min():.2f} < {kf_cv_scores.mean():.2f} < {kf_cv_scores.max():.2f}"
)
X = df_new[[
'HW_MemAmountGB',
'TDP',
'utilization',
'CPUCores',
'CPUThreads',
'HW_CPUFreq',
'Hardware_Availability_Year'
]]
y = df_new.power
model = XGBRegressor()
kfold = RepeatedKFold()
kf_cv_scores = cross_val_score(model, X, y, cv=kfold, scoring='neg_mean_absolute_error')
# pylint: disable=consider-using-f-string
print(f"[Interact DC cloud available variables (untuned)] K-fold CV score range: \
{kf_cv_scores.min():.2f} < {kf_cv_scores.mean():.2f} < {kf_cv_scores.max():.2f}"
)
X = df_new[[
'HW_MemAmountGB',
'TDP',
'utilization',
'CPUCores',
'CPUThreads',
'HW_CPUFreq',
'Hardware_Availability_Year',
'Architecture',
'CPUMake'
]]
X = pd.get_dummies(X, columns=['Architecture', 'CPUMake'])
y = df_new.power
model = XGBRegressor()
kfold = RepeatedKFold()
kf_cv_scores = cross_val_score(model, X, y, cv=kfold, scoring='neg_mean_absolute_error')
# pylint: disable=consider-using-f-string
print(f"[Our variable selection (untuned)] K-fold CV score range: \
{kf_cv_scores.min():.2f} < {kf_cv_scores.mean():.2f} < {kf_cv_scores.max():.2f}"
)
## Expected output from 07.12.2022 with the pre-interpolated data
## [Interact DC Original (untuned)] K-fold CV score range: -4.70 < -4.54 < -4.33
## [Interact DC cloud available variables (untuned)] K-fold CV score range: -8.00 < -7.88 < -7.80
## [Our variable selection (untuned)] K-fold CV score range: -8.13 < -8.02 < -7.93