From 25374b106d3263987a11bcd7f36fa264599384e7 Mon Sep 17 00:00:00 2001 From: Nishchay Karle <45297081+NishchayKarle@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:25:01 -0500 Subject: [PATCH] Add usage tracking project name (#3624) Add option to deanonymize usage tracking data by tagging it with a name using parsl.Config.project_name. --- parsl/config.py | 5 +++++ parsl/tests/unit/test_usage_tracking.py | 21 +++++++++++++++++++++ parsl/usage_tracking/usage.py | 8 ++++++++ 3 files changed, 34 insertions(+) diff --git a/parsl/config.py b/parsl/config.py index c3725eccf8..1358e99d28 100644 --- a/parsl/config.py +++ b/parsl/config.py @@ -83,6 +83,9 @@ class Config(RepresentationMixin, UsageInformation): Setting this field to 0 will disable usage tracking. Default (this field is not set): usage tracking is not enabled. Parsl only collects minimal, non personally-identifiable, information used for reporting to our funding agencies. + project_name: str, optional + Option to deanonymize usage tracking data. + If set, this value will be used as the project name in the usage tracking data and placed on the leaderboard. initialize_logging : bool, optional Make DFK optionally not initialize any logging. Log messages will still be passed into the python logging system under the @@ -118,6 +121,7 @@ def __init__(self, max_idletime: float = 120.0, monitoring: Optional[MonitoringHub] = None, usage_tracking: int = 0, + project_name: Optional[str] = None, initialize_logging: bool = True) -> None: executors = tuple(executors or []) @@ -154,6 +158,7 @@ def __init__(self, self.max_idletime = max_idletime self.validate_usage_tracking(usage_tracking) self.usage_tracking = usage_tracking + self.project_name = project_name self.initialize_logging = initialize_logging self.monitoring = monitoring self.std_autopath: Optional[Callable] = std_autopath diff --git a/parsl/tests/unit/test_usage_tracking.py b/parsl/tests/unit/test_usage_tracking.py index 351355811c..1581249cd2 100644 --- a/parsl/tests/unit/test_usage_tracking.py +++ b/parsl/tests/unit/test_usage_tracking.py @@ -43,3 +43,24 @@ def test_invalid_types(level): # we can't instantiate TypeCheckError if we're in typeguard 2.x environment # because it does not exist... so check name using strings. assert ex.type.__name__ in ["TypeCheckError", "TypeError"] + + +@pytest.mark.local +def test_valid_project_name(): + """Test valid project_name.""" + assert ( + Config( + usage_tracking=3, + project_name="unit-test", + ).project_name == "unit-test" + ) + + +@pytest.mark.local +@pytest.mark.parametrize("name", (1, 1.0, True, object())) +def test_invalid_project_name(name): + """Test invalid project_name.""" + with pytest.raises(Exception) as ex: + Config(usage_tracking=3, project_name=name) + + assert ex.type.__name__ in ["TypeCheckError", "TypeError"] diff --git a/parsl/usage_tracking/usage.py b/parsl/usage_tracking/usage.py index 3730fcc464..c22eb529fe 100644 --- a/parsl/usage_tracking/usage.py +++ b/parsl/usage_tracking/usage.py @@ -114,6 +114,7 @@ def __init__(self, dfk, port=50077, sys.version_info.minor, sys.version_info.micro) self.tracking_level = self.check_tracking_level() + self.project_name = self.config.project_name self.start_time = None logger.debug("Tracking level: {}".format(self.tracking_level)) @@ -153,6 +154,9 @@ def construct_start_message(self) -> bytes: 'platform.system': platform.system(), 'tracking_level': int(self.tracking_level)} + if self.project_name: + message['project_name'] = self.project_name + if self.tracking_level >= 2: message['components'] = get_parsl_usage(self.dfk._config) @@ -188,6 +192,10 @@ def construct_end_message(self) -> bytes: 'end': end_time, 'execution_time': end_time - self.start_time, 'components': [dfk_component] + get_parsl_usage(self.dfk._config)} + + if self.project_name: + message['project_name'] = self.project_name + logger.debug(f"Usage tracking end message (unencoded): {message}") return self.encode_message(message)