Skip to content

Commit

Permalink
Make top candidates view as the default view in user-tools
Browse files Browse the repository at this point in the history
Signed-off-by: Ahmed Hussein (amahussein) <[email protected]>

Fixes NVIDIA#868

- Use Top-candidate view as default for user-tools qualification
- Revisit the spark_rapids CLI to verify that resetting the filter based
  on user-input does not override the default flag argument
- Fix unit-tests
  • Loading branch information
amahussein committed Mar 26, 2024
1 parent 9de5a38 commit 05d392c
Show file tree
Hide file tree
Showing 10 changed files with 59 additions and 50 deletions.
20 changes: 12 additions & 8 deletions user_tools/src/spark_rapids_pytools/rapids/qualification.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,14 +242,18 @@ def _process_gpu_cluster_worker_node():
return gpu_cluster_obj is not None

def _process_offline_cluster_args(self):
offline_cluster_opts = self.wrapper_options.get('migrationClustersProps', {})
self._process_cpu_cluster_args(offline_cluster_opts)
if self.ctxt.get_ctxt('cpuClusterProxy') is None:
# if no cpu-cluster is defined, then we are not supposed to run cost calculations
enable_savings_flag = False
else:
# if no gpu-cluster is defined, then we are not supposed to run cost calculations
enable_savings_flag = self._process_gpu_cluster_args(offline_cluster_opts)
# read the wrapper option defined by the spark_rapids cmd if any.
enable_savings_flag = self.wrapper_options.get('savingsCalculations', True)
if enable_savings_flag:
offline_cluster_opts = self.wrapper_options.get('migrationClustersProps', {})
self._process_cpu_cluster_args(offline_cluster_opts)
if self.ctxt.get_ctxt('cpuClusterProxy') is None:
# if no cpu-cluster is defined, then we are not supposed to run cost calculations
enable_savings_flag = False
else:
# if no gpu-cluster is defined, then we are not supposed to run cost calculations
enable_savings_flag = self._process_gpu_cluster_args(offline_cluster_opts)

self._set_savings_calculations_flag(enable_savings_flag)

def _set_savings_calculations_flag(self, enable_flag: bool):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def qualification(cpu_cluster: str = None,
gpu_cluster: str = None,
tools_jar: str = None,
credentials_file: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
filter_apps: str = QualFilterApp.tostring(QualFilterApp.get_default()),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
estimation_model: str = None,
Expand Down Expand Up @@ -81,12 +81,12 @@ def qualification(cpu_cluster: str = None,
to provide the location of a credential file. The default credentials file exists as
"~/.databrickscfg" on Unix, Linux, or macOS
:param filter_apps: filtering criteria of the applications listed in the final STDOUT table
is one of the following (ALL, SPEEDUPS, savings, top_candidates).
is one of the following (all, speedups, savings, top_candidates).
Note that this filter does not affect the CSV report.
"ALL" means no filter applied. "SPEEDUPS" lists all the apps that are either
'Recommended', or 'Strongly Recommended' based on speedups. "SAVINGS"
"all" means no filter applied. "speedups" lists all the apps that are either
'Recommended', or 'Strongly Recommended' based on speedups. "savings"
lists all the apps that have positive estimated GPU savings except for the apps that
are "Not Applicable". "TOP_CANDIDATES" lists all apps that have unsupported operators
are "Not Applicable". "top_candidates" lists all apps that have unsupported operators
stage duration less than 25% of app duration and speedups greater than 1.3x.
:param gpu_cluster_recommendation: The type of GPU cluster recommendation to generate.
It accepts one of the following ("CLUSTER", "JOB" and the default value "MATCH").
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def qualification(cpu_cluster: str = None,
gpu_cluster: str = None,
tools_jar: str = None,
credentials_file: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
filter_apps: str = QualFilterApp.tostring(QualFilterApp.get_default()),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
estimation_model: str = None,
Expand Down Expand Up @@ -79,12 +79,12 @@ def qualification(cpu_cluster: str = None,
to provide the location of a credential file. The default credentials file exists as
"~/.databrickscfg" on Unix, Linux, or macOS
:param filter_apps: filtering criteria of the applications listed in the final STDOUT table
is one of the following (ALL, SPEEDUPS, SAVINGS, TOP_CANDIDATES).
is one of the following (all, speedups, savings, top_candidates).
Note that this filter does not affect the CSV report.
"ALL" means no filter applied. "SPEEDUPS" lists all the apps that are either
'Recommended', or 'Strongly Recommended' based on speedups. "SAVINGS"
"all" means no filter applied. "speedups" lists all the apps that are either
'Recommended', or 'Strongly Recommended' based on speedups. "savings"
lists all the apps that have positive estimated GPU savings except for the apps that
are "Not Applicable". "TOP_CANDIDATES" lists all apps that have unsupported operators
are "Not Applicable". "top_candidates" lists all apps that have unsupported operators
stage duration less than 25% of app duration and speedups greater than 1.3x.
:param gpu_cluster_recommendation: The type of GPU cluster recommendation to generate.
It accepts one of the following ("CLUSTER", "JOB" and the default value "MATCH").
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def qualification(cpu_cluster: str = None,
gpu_cluster: str = None,
tools_jar: str = None,
credentials_file: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
filter_apps: str = QualFilterApp.tostring(QualFilterApp.get_default()),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
estimation_model: str = None,
Expand Down Expand Up @@ -76,12 +76,12 @@ def qualification(cpu_cluster: str = None,
to provide the location of a credential JSON file. The default credentials file exists as
"$HOME/.config/gcloud/application_default_credentials.json"
:param filter_apps: filtering criteria of the applications listed in the final STDOUT table
is one of the following (ALL, SPEEDUPS, savings, top_candidates).
is one of the following (all, speedups, savings, top_candidates).
Note that this filter does not affect the CSV report.
"ALL" means no filter applied. "SPEEDUPS" lists all the apps that are either
'Recommended', or 'Strongly Recommended' based on speedups. "SAVINGS"
"all" means no filter applied. "speedups" lists all the apps that are either
'Recommended', or 'Strongly Recommended' based on speedups. "savings"
lists all the apps that have positive estimated GPU savings except for the apps that
are "Not Applicable". "TOP_CANDIDATES" lists all apps that have unsupported operators
are "Not Applicable". "top_candidates" lists all apps that have unsupported operators
stage duration less than 25% of app duration and speedups greater than 1.3x.
:param gpu_cluster_recommendation: The type of GPU cluster recommendation to generate.
It accepts one of the following ("CLUSTER", "JOB" and the default value "MATCH").
Expand Down
10 changes: 5 additions & 5 deletions user_tools/src/spark_rapids_pytools/wrappers/dataproc_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def qualification(cpu_cluster: str = None,
gpu_cluster: str = None,
tools_jar: str = None,
credentials_file: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
filter_apps: str = QualFilterApp.tostring(QualFilterApp.get_default()),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
estimation_model: str = None,
Expand Down Expand Up @@ -78,12 +78,12 @@ def qualification(cpu_cluster: str = None,
to provide the location of a credential JSON file. The default credentials file exists as
"$HOME/.config/gcloud/application_default_credentials.json"
:param filter_apps: filtering criteria of the applications listed in the final STDOUT table
is one of the following (ALL, SPEEDUPS, savings, top_candidates).
is one of the following (all, speedups, savings, top_candidates).
Note that this filter does not affect the CSV report.
"ALL" means no filter applied. "SPEEDUPS" lists all the apps that are either
'Recommended', or 'Strongly Recommended' based on speedups. "SAVINGS"
"all" means no filter applied. "speedups" lists all the apps that are either
'Recommended', or 'Strongly Recommended' based on speedups. "savings"
lists all the apps that have positive estimated GPU savings except for the apps that
are "Not Applicable". "TOP_CANDIDATES" lists all apps that have unsupported operators
are "Not Applicable". "top_candidates" lists all apps that have unsupported operators
stage duration less than 25% of app duration and speedups greater than 1.3x.
:param gpu_cluster_recommendation: The type of GPU cluster recommendation to generate.
It accepts one of the following ("CLUSTER", "JOB" and the default value "MATCH").
Expand Down
10 changes: 5 additions & 5 deletions user_tools/src/spark_rapids_pytools/wrappers/emr_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def qualification(cpu_cluster: str = None,
remote_folder: str = None,
gpu_cluster: str = None,
tools_jar: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
filter_apps: str = QualFilterApp.tostring(QualFilterApp.get_default()),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
estimation_model: str = None,
Expand Down Expand Up @@ -76,12 +76,12 @@ def qualification(cpu_cluster: str = None,
or remote S3 url. If missing, the wrapper downloads the latest rapids-4-spark-tools_*.jar
from maven repo
:param filter_apps: filtering criteria of the applications listed in the final STDOUT table
is one of the following (ALL, SPEEDUPS, SAVINGS, TOP_CANDIDATES). Default is "SAVINGS".
is one of the following (all, speedups, savings, top_candidates).
Note that this filter does not affect the CSV report.
"ALL" means no filter applied. "SPEEDUPS" lists all the apps that are either
'Recommended', or 'Strongly Recommended' based on speedups. "SAVINGS"
"all" means no filter applied. "speedups" lists all the apps that are either
'Recommended', or 'Strongly Recommended' based on speedups. "savings"
lists all the apps that have positive estimated GPU savings except for the apps that
are "Not Applicable". "TOP_CANDIDATES" lists all apps that have unsupported operators
are "Not Applicable". "top_candidates" lists all apps that have unsupported operators
stage duration less than 25% of app duration and speedups greater than 1.3x.
:param gpu_cluster_recommendation: The type of GPU cluster recommendation to generate.
It accepts one of the following ("CLUSTER", "JOB" and the default value "MATCH").
Expand Down
17 changes: 10 additions & 7 deletions user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def qualification(cpu_cluster: str = None,
eventlogs: str = None,
local_folder: str = None,
tools_jar: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SPEEDUPS),
filter_apps: str = QualFilterApp.tostring(QualFilterApp.get_default()),
target_platform: str = None,
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
Expand All @@ -55,11 +55,14 @@ def qualification(cpu_cluster: str = None,
directory for temporary folders/files. The final output will go into a subdirectory
named `qual-${EXEC_ID}` where `exec_id` is an auto-generated unique identifier of the execution.
:param tools_jar: Path to a bundled jar including RAPIDS tool. The path is a local filesystem path
:param filter_apps: Filtering criteria of the applications listed in the final STDOUT table is one of
the following (`ALL`, `SPEEDUPS`, `TOP_CANDIDATES`). "`ALL`" means no filter applied. "`SPEEDUPS`"
lists all the apps that are either '_Recommended_', or '_Strongly Recommended_' based on speedups.
"`TOP_CANDIDATES`" lists all apps that have unsupported operators stage duration less than 25% of
app duration and speedups greater than 1.3x.
:param filter_apps: filtering criteria of the applications listed in the final STDOUT table
is one of the following (all, speedups, savings, top_candidates).
Note that this filter does not affect the CSV report.
"all" means no filter applied. "speedups" lists all the apps that are either
'Recommended', or 'Strongly Recommended' based on speedups. "savings"
lists all the apps that have positive estimated GPU savings except for the apps that
are "Not Applicable". "top_candidates" lists all apps that have unsupported operators
stage duration less than 25% of app duration and speedups greater than 1.3x.
:param target_platform: Cost savings and speedup recommendation for comparable cluster in target_platform
based on on-premises cluster configuration. Currently only `dataproc` is supported for
target_platform.If not provided, the final report will be limited to GPU speedups only
Expand Down Expand Up @@ -102,7 +105,7 @@ def qualification(cpu_cluster: str = None,
if cpu_cluster is None:
raise RuntimeError('OnPrem\'s cluster property file required to calculate'
'savings for ' + target_platform + ' platform')
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS)
# filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS)
else:
raise RuntimeError(target_platform + ' platform is currently not supported to calculate savings'
' from OnPrem cluster')
Expand Down
14 changes: 8 additions & 6 deletions user_tools/src/spark_rapids_tools/cmdli/argprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,13 +334,17 @@ class QualifyUserArgModel(ToolUserArgModel):
def init_tool_args(self):
self.p_args['toolArgs']['platform'] = self.platform
self.p_args['toolArgs']['savingsCalculations'] = True
self.p_args['toolArgs']['filterApps'] = self.filter_apps
self.p_args['toolArgs']['targetPlatform'] = self.target_platform
self.p_args['toolArgs']['cpuClusterPrice'] = self.cpu_cluster_price
self.p_args['toolArgs']['estimatedGpuClusterPrice'] = self.estimated_gpu_cluster_price
self.p_args['toolArgs']['cpuDiscount'] = self.cpu_discount
self.p_args['toolArgs']['gpuDiscount'] = self.gpu_discount
self.p_args['toolArgs']['globalDiscount'] = self.global_discount
# check the filter_apps argument
if self.filter_apps is None:
self.p_args['toolArgs']['filterApps'] = QualFilterApp.get_default()
else:
self.p_args['toolArgs']['filterApps'] = self.filter_apps
# check the reshapeType argument
if self.gpu_cluster_recommendation is None:
self.p_args['toolArgs']['gpuClusterRecommendation'] = QualGpuClusterReshapeType.get_default()
Expand Down Expand Up @@ -406,11 +410,9 @@ def build_tools_args(self) -> dict:
'"target_platform" argument to generate cost savings')

# check the filter_apps argument
if self.p_args['toolArgs']['filterApps'] is None:
# set a default filterApps argument to be savings if the cost savings is enabled
if self.p_args['toolArgs']['savingsCalculations']:
self.p_args['toolArgs']['filterApps'] = QualFilterApp.SAVINGS
else:
if not self.p_args['toolArgs']['savingsCalculations']:
# if savingsCalculations is disabled, we cannot use savings filter
if self.p_args['toolArgs']['filterApps'] == QualFilterApp.SAVINGS:
self.p_args['toolArgs']['filterApps'] = QualFilterApp.SPEEDUPS

# finally generate the final values
Expand Down
2 changes: 1 addition & 1 deletion user_tools/src/spark_rapids_tools/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ class QualFilterApp(EnumeratedType):

@classmethod
def get_default(cls):
return cls.SAVINGS
return cls.TOP_CANDIDATES


class QualGpuClusterReshapeType(EnumeratedType):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -66,14 +66,14 @@ def validate_args_w_savings_enabled(tool_name: str, t_args: dict):
if tool_name == 'qualification':
assert t_args['savingsCalculations']
# filterApps should be set to savings
assert t_args['filterApps'] == QualFilterApp.SAVINGS
assert t_args['filterApps'] == QualFilterApp.get_default()

@staticmethod
def validate_args_w_savings_disabled(tool_name: str, t_args: dict):
if tool_name == 'qualification':
assert not t_args['savingsCalculations']
# filterApps should be set to savings
assert t_args['filterApps'] == QualFilterApp.SPEEDUPS
assert t_args['filterApps'] != QualFilterApp.SAVINGS

@staticmethod
def create_tool_args_should_pass(tool_name: str, platform=None, cluster=None, eventlogs=None):
Expand Down

0 comments on commit 05d392c

Please sign in to comment.