Skip to content

Commit

Permalink
chore: use kepler_vm metrics as energy source
Browse files Browse the repository at this point in the history
Signed-off-by: Huamin Chen <[email protected]>
  • Loading branch information
rootfs committed Oct 28, 2024
1 parent 42a77cb commit 874abf2
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 7 deletions.
9 changes: 7 additions & 2 deletions src/kepler_model/cmd/cmd_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from kepler_model.util.prom_types import (
SOURCE_COL,
energy_component_to_query,
vm_energy_component_to_query,
node_info_column,
prom_responses_to_results,
)
Expand Down Expand Up @@ -99,7 +100,7 @@ def summary_validation(validate_df):
print("{} data: \t{}".format(metric, target_df[">0"].values))


def get_validate_df(data_path, benchmark_filename, query_response):
def get_validate_df(data_path, benchmark_filename, query_response, use_vm_metrics=False):
items = []
query_results = prom_responses_to_results(query_response)
container_queries = [query for query in query_results.keys() if "container" in query]
Expand Down Expand Up @@ -175,10 +176,14 @@ def get_validate_df(data_path, benchmark_filename, query_response):
item["total"] = filtered_df[query].max()
items += [item]
energy_queries = [query for query in query_results.keys() if "_joules" in query]
print("Energy Queries: ", container_queries)
print("Energy Queries: ", energy_queries)
for energy_source, energy_components in PowerSourceMap.items():
for component in energy_components:
query = energy_component_to_query(component)
if use_vm_metrics:
query = vm_energy_component_to_query(component)
else:
query = energy_component_to_query(component)
df = query_results[query]
df = df[df[SOURCE_COL] == energy_source]
if len(df) == 0:
Expand Down
4 changes: 3 additions & 1 deletion src/kepler_model/cmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def query(args):
if args.to_csv:
save_query_results(data_path, args.output, response)
# try validation if applicable
validate_df = get_validate_df(data_path, benchmark_filename, response)
validate_df = get_validate_df(data_path, benchmark_filename, response, use_vm_metrics=args.vm_train)
summary_validation(validate_df)
save_csv(path=data_path, name=args.output + "_validate_result", data=validate_df)

Expand Down Expand Up @@ -487,7 +487,9 @@ def train(args):
print("cannot get pipeline")
exit()
for energy_source in energy_sources:
print("energy source: ", energy_source)
energy_components = PowerSourceMap[energy_source]
print("energy components: ", energy_components)
for feature_group in valid_feature_groups:
success, abs_data, dyn_data = pipeline.process_multiple_query(
input_query_results_list, energy_components, energy_source, feature_group=feature_group.name, replace_node_type=node_type
Expand Down
7 changes: 3 additions & 4 deletions src/kepler_model/train/extractor/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ def get_workload_feature_data(self, query_results, features, use_vm_metrics=Fals
print("no data in ", query)
return None
aggr_query_data = query_results[query].copy()

if all(col in aggr_query_data.columns for col in cols_to_use):
if use_vm_metrics:
aggr_query_data = aggr_query_data.loc[aggr_query_data["job"] == VM_JOB_NAME]
Expand Down Expand Up @@ -256,9 +255,9 @@ def get_power_data(self, query_results, energy_components, source, use_vm_metric
return None
aggr_query_data = query_results[query].copy()
if not use_vm_metrics:
aggr_query_data = aggr_query_data.loc[aggr_query_data["job"] != VM_JOB_NAME]
# filter source
aggr_query_data = aggr_query_data[aggr_query_data[SOURCE_COL] == source]
# aggr_query_data = aggr_query_data.loc[aggr_query_data["job"] != VM_JOB_NAME]
# filter source
aggr_query_data = aggr_query_data[aggr_query_data[SOURCE_COL] == source]
if len(aggr_query_data) == 0:
return None
if unit_col is not None:
Expand Down

0 comments on commit 874abf2

Please sign in to comment.