Skip to content

Commit

Permalink
Improving aggregate_params validation (#290)
Browse files Browse the repository at this point in the history
  • Loading branch information
dvadym authored Jun 3, 2022
1 parent c9ed815 commit 94bacde
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 50 deletions.
82 changes: 44 additions & 38 deletions pipeline_dp/aggregate_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,39 +112,29 @@ def __post_init__(self):
needs_min_max_value = Metrics.SUM in self.metrics \
or Metrics.MEAN in self.metrics \
or Metrics.VARIANCE in self.metrics
if not isinstance(self.max_partitions_contributed,
int) or self.max_partitions_contributed <= 0:
raise ValueError(
"params.max_partitions_contributed must be set "
"to a positive integer")
if not isinstance(self.max_contributions_per_partition,
int) or self.max_contributions_per_partition <= 0:
raise ValueError(
"params.max_contributions_per_partition must be set "
"to a positive integer")
if needs_min_max_value and (self.min_value is None or
self.max_value is None):
raise ValueError(
"params.min_value and params.max_value must be set")
"AggregateParams: min_value and max_value must be set")
if needs_min_max_value and (_not_a_proper_number(self.min_value) or
_not_a_proper_number(self.max_value)):
raise ValueError(
"params.min_value and params.max_value must be both finite numbers"
)
"AggregateParams: min_value and max_value must be both "
"finite numbers")
if needs_min_max_value and self.max_value < self.min_value:
raise ValueError(
"params.max_value must be equal to or greater than params.min_value"
)
"AggregateParams: max_value must be equal to or greater than"
" min_value")
if Metrics.VECTOR_SUM in self.metrics and \
(Metrics.SUM in self.metrics or \
Metrics.MEAN in self.metrics or \
Metrics.VARIANCE in self.metrics):
raise ValueError(
"Vector sum can not be computed together with scalar metrics, like sum, mean etc"
)
"AggregateParams: vector sum can not be computed together "
"with scalar metrics, like sum, mean etc")
if self.contribution_bounds_already_enforced and Metrics.PRIVACY_ID_COUNT in self.metrics:
raise ValueError(
"Cannot calculate PRIVACY_ID_COUNT when "
"AggregateParams: Cannot calculate PRIVACY_ID_COUNT when "
"contribution_bounds_already_enforced is set to True.")
if self.custom_combiners:
logging.warning("Warning: custom combiners are used. This is an "
Expand All @@ -160,23 +150,31 @@ def __post_init__(self):
raise ValueError(
"AggregateParams.public_partitions is deprecated. Please use public_partitions argument in DPEngine.aggregate insead."
)
if self.max_contributions is not None and self.max_contributions > 0:
if self.max_contributions is not None:
_check_is_positive_int(self.max_contributions, "max_contributions")
if ((self.max_partitions_contributed is not None) or
(self.max_contributions_per_partition is not None)):
raise ValueError(
"Only one in params.max_contributions or "
"(params.max_partitions_contributed and "
"params.max_contributions_per_partition) must be set")
else:
if ((self.max_partitions_contributed is None or
self.max_partitions_contributed <= 0) and
(self.max_contributions_per_partition is None or
self.max_contributions_per_partition <= 0)):
"AggregateParams: only one in max_contributions or "
"both max_partitions_contributed and "
"max_contributions_per_partition must be set")
else: # self.max_contributions is None
n_not_none = _count_not_none(self.max_partitions_contributed,
self.max_contributions_per_partition)
if n_not_none == 0:
raise ValueError(
"AggregateParams: either max_contributions must be set or "
"both max_partitions_contributed and "
"max_contributions_per_partition must be set.")
elif n_not_none == 1:
raise ValueError(
"One amongst params.max_contributions or "
"(params.max_partitions_contributed or "
"params.max_contributions_per_partition) must be set to a "
"positive value.")
"AggregateParams: either none or both from "
"max_partitions_contributed and "
" max_contributions_per_partition must be set.")
_check_is_positive_int(self.max_partitions_contributed,
"max_partitions_contributed")
_check_is_positive_int(self.max_contributions_per_partition,
"max_contributions_per_partition")

def __str__(self):
if self.custom_combiners:
Expand Down Expand Up @@ -373,13 +371,21 @@ class PrivacyIdCountParams:
def __post_init__(self):
if self.public_partitions:
raise ValueError(
"PrivacyIdCountParams.public_partitions is deprecated. Please read API documentation for anonymous PrivacyIdCountParams transform."
)
"PrivacyIdCountParams.public_partitions is deprecated. Please "
"read API documentation for anonymous PrivacyIdCountParams "
"transform.")


def _not_a_proper_number(num):
"""
Returns:
true if num is inf or NaN, false otherwise.
"""
def _not_a_proper_number(num: Any) -> bool:
"""Returns true if num is inf or NaN, false otherwise."""
return math.isnan(num) or math.isinf(num)


def _check_is_positive_int(num: Any, field_name: str) -> bool:
if not (isinstance(num, int) and num > 0):
raise ValueError(
f"{field_name} has to be positive integer, but {num} given.")


def _count_not_none(*args):
return sum([1 for arg in args if arg is not None])
2 changes: 2 additions & 0 deletions pipeline_dp/dp_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,8 @@ def divide_and_round_up(a, b):

def _check_aggregate_params(col, params: pipeline_dp.AggregateParams,
data_extractors: DataExtractors):
if params.max_contributions is not None:
raise NotImplementedError("max_contributions is not supported yet.")
if col is None or not col:
raise ValueError("col must be non-empty")
if params is None:
Expand Down
60 changes: 48 additions & 12 deletions tests/dp_engine_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ def test_user_contribution_bounding_applied(self):
input_col,
max_contributions=max_contributions,
aggregator_fn=DpEngineTest.aggregator_fn))
print(bound_result)
self.assertLen(bound_result, 5)

def test_contribution_bounding_empty_col(self):
Expand Down Expand Up @@ -198,77 +197,113 @@ def test_aggregate_none(self):
with self.assertRaises(Exception):
pipeline_dp.DPEngine(None, None).aggregate(None, None, None)

# TODO: move these tests to aggregate_params_tests.py
@parameterized.named_parameters(
dict(testcase_name='negative max_partitions_contributed',
error_msg='negative max_partitions_contributed',
error_msg='max_partitions_contributed has to be positive integer',
min_value=None,
max_value=None,
max_partitions_contributed=-1,
max_contributions_per_partition=1,
max_contributions=None,
metrics=[pipeline_dp.Metrics.PRIVACY_ID_COUNT]),
dict(testcase_name='negative max_contributions_per_partition',
error_msg='negative max_contributions_per_partition',
error_msg=
'max_contributions_per_partition has to be positive integer',
min_value=None,
max_value=None,
max_partitions_contributed=1,
max_contributions_per_partition=-1,
max_contributions=None,
metrics=[pipeline_dp.Metrics.PRIVACY_ID_COUNT]),
dict(testcase_name='float max_partitions_contributed',
error_msg='float max_partitions_contributed',
error_msg='max_partitions_contributed has to be positive integer',
min_value=None,
max_value=None,
max_partitions_contributed=1.5,
max_contributions_per_partition=1,
max_contributions=None,
metrics=[pipeline_dp.Metrics.PRIVACY_ID_COUNT]),
dict(testcase_name='float max_contributions_per_partition',
error_msg='float max_contributions_per_partition',
error_msg=
'max_contributions_per_partition has to be positive integer',
min_value=None,
max_value=None,
max_partitions_contributed=1,
max_contributions_per_partition=1.5,
max_contributions=None,
metrics=[pipeline_dp.Metrics.PRIVACY_ID_COUNT]),
dict(testcase_name='unspecified min_value SUM',
error_msg='unspecified min_value',
error_msg='min_value and max_value must be set',
min_value=None,
max_value=1,
max_partitions_contributed=1,
max_contributions_per_partition=1,
max_contributions=None,
metrics=[pipeline_dp.Metrics.SUM]),
dict(testcase_name='unspecified max_value SUM',
error_msg='unspecified max_value',
error_msg='min_value and max_value must be set',
min_value=1,
max_value=None,
max_partitions_contributed=1,
max_contributions_per_partition=1,
max_contributions=None,
metrics=[pipeline_dp.Metrics.SUM]),
dict(testcase_name='unspecified min_value MEAN',
error_msg='unspecified min_value',
error_msg='min_value and max_value must be set',
min_value=None,
max_value=1,
max_partitions_contributed=1,
max_contributions_per_partition=1,
max_contributions=None,
metrics=[pipeline_dp.Metrics.MEAN]),
dict(testcase_name='unspecified max_value MEAN',
error_msg='unspecified max_value',
error_msg='min_value and max_value must be set',
min_value=1,
max_value=None,
max_partitions_contributed=1,
max_contributions_per_partition=1,
max_contributions=None,
metrics=[pipeline_dp.Metrics.MEAN]),
dict(testcase_name='min_value > max_value',
error_msg='min_value > max_value',
error_msg='must be equal to or greater',
min_value=2,
max_value=1,
max_partitions_contributed=1,
max_contributions_per_partition=1,
max_contributions=None,
metrics=[pipeline_dp.Metrics.SUM]),
dict(testcase_name='max_contrib and max_partitions are set',
error_msg='only one in max_contributions or',
min_value=0,
max_value=1,
max_partitions_contributed=1,
max_contributions_per_partition=1,
max_contributions=1,
metrics=[pipeline_dp.Metrics.SUM]),
dict(testcase_name='max_partitions_contributed not set',
error_msg='either none or both',
min_value=0,
max_value=1,
max_partitions_contributed=None,
max_contributions_per_partition=1,
max_contributions=None,
metrics=[pipeline_dp.Metrics.SUM]),
dict(testcase_name='contributions not set',
error_msg='either max_contributions must',
min_value=0,
max_value=1,
max_partitions_contributed=None,
max_contributions_per_partition=None,
max_contributions=None,
metrics=[pipeline_dp.Metrics.SUM]),
)
def test_check_invalid_bounding_params(self, error_msg, min_value,
max_value,
max_partitions_contributed,
max_contributions_per_partition,
metrics):
with self.assertRaises(Exception, msg=error_msg):
max_contributions, metrics):
with self.assertRaisesRegex(ValueError, error_msg):
budget_accountant = NaiveBudgetAccountant(total_epsilon=1,
total_delta=1e-10)
engine = pipeline_dp.DPEngine(budget_accountant=budget_accountant,
Expand All @@ -282,6 +317,7 @@ def test_check_invalid_bounding_params(self, error_msg, min_value,
max_contributions_per_partition,
min_value=min_value,
max_value=max_value,
max_contributions=max_contributions,
metrics=metrics), self._get_default_extractors())

def test_check_aggregate_params(self):
Expand Down

0 comments on commit 94bacde

Please sign in to comment.