Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing VectorSumCombiner #276

Merged
merged 12 commits into from
May 20, 2022
4 changes: 4 additions & 0 deletions pipeline_dp/aggregate_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ class AggregateParams:
max_value: Upper bound on each value.
custom_combiners: Warning: experimental@ Combiners for computing custom
metrics.
norm_kind: The type of norm to use for the DP calculations.
max_norm: Bound on each value of a vector.
rialg marked this conversation as resolved.
Show resolved Hide resolved
"""

metrics: Iterable[Metrics]
Expand All @@ -82,6 +84,8 @@ class AggregateParams:
public_partitions: Any = None # deprecated
noise_kind: NoiseKind = NoiseKind.LAPLACE
custom_combiners: Iterable['CustomCombiner'] = None
norm_kind: NormKind = NormKind.Linf
rialg marked this conversation as resolved.
Show resolved Hide resolved
max_norm: float = None
rialg marked this conversation as resolved.
Show resolved Hide resolved

def __post_init__(self):
if self.low is not None:
Expand Down
52 changes: 52 additions & 0 deletions pipeline_dp/combiners.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from pipeline_dp import budget_accounting
import numpy as np
import collections
from pipeline_dp.aggregate_params import NormKind
rialg marked this conversation as resolved.
Show resolved Hide resolved


class Combiner(abc.ABC):
Expand Down Expand Up @@ -148,6 +149,18 @@ def mean_var_params(self):
self.aggregate_params.max_contributions_per_partition,
self.aggregate_params.noise_kind)

@property
def additive_vector_noise_params(self):
rialg marked this conversation as resolved.
Show resolved Hide resolved
return dp_computations.AdditiveVectorNoiseParams(
eps_per_coordinate=self.eps,
rialg marked this conversation as resolved.
Show resolved Hide resolved
delta_per_coordinate=self.delta,
max_norm=self.aggregate_params.max_norm,
l0_sensitivity=self.aggregate_params.max_partitions_contributed,
linf_sensitivity=self.aggregate_params.
max_contributions_per_partition,
norm_kind=self.aggregate_params.norm_kind,
noise_kind=self.aggregate_params.noise_kind)


class CountCombiner(Combiner):
"""Combiner for computing DP Count.
Expand Down Expand Up @@ -520,3 +533,42 @@ def create_compound_combiner_with_custom_combiners(
combiner.set_aggregate_params(aggregate_params)

return CompoundCombiner(custom_combiners, return_named_tuple=False)


class VectorSumCombiner(Combiner):
"""Combiner for computing dp vector sum.

the type of the accumulator is ndarray, which represents sum of the vectors of the same size
in the dataset for which this accumulator is computed.
"""
AccumulatorType = np.ndarray

def __init__(self, params: CombinerParams):
self._params = params

def create_accumulator(self,
values: Iterable[np.ndarray]) -> AccumulatorType:
array_sum = None
for val in values:
if array_sum is None:
array_sum = val
else:
if array_sum.shape != val.shape:
raise TypeError(
f"Shape mismatch: {array_sum.shape} != {val.shape}")
array_sum += val
return array_sum

def merge_accumulators(self, array_sum1: AccumulatorType,
array_sum2: AccumulatorType):
return array_sum1 + array_sum2

def compute_metrics(self, array_sum: AccumulatorType) -> dict:
return {
'array_sum':
dp_computations.add_noise_vector(
array_sum, self._params.additive_vector_noise_params)
}

def metrics_names(self) -> List[str]:
return ['array_sum']
62 changes: 61 additions & 1 deletion tests/combiners_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def _create_aggregate_params(max_value: float = 1):
max_partitions_contributed=1,
max_contributions_per_partition=3,
noise_kind=pipeline_dp.NoiseKind.GAUSSIAN,
metrics=[pipeline_dp.Metrics.COUNT])
metrics=[pipeline_dp.Metrics.COUNT],
max_norm=3)


class CreateCompoundCombinersTest(parameterized.TestCase):
Expand Down Expand Up @@ -458,5 +459,64 @@ def test_compute_metrics_with_noise(self):
self.assertTrue(np.var(noised_sum) > 1) # check that noise is added


class VectorSumCombinerTest(parameterized.TestCase):

def _create_combiner(self, no_noise):
mechanism_spec = _create_mechanism_spec(no_noise)
aggregate_params = _create_aggregate_params()
params = dp_combiners.CombinerParams(mechanism_spec, aggregate_params)
return dp_combiners.VectorSumCombiner(params)

@parameterized.named_parameters(
dict(testcase_name='no_noise', no_noise=True),
dict(testcase_name='noise', no_noise=False),
)
def test_create_accumulator(self, no_noise):
combiner = self._create_combiner(no_noise)
self.assertEqual(np.array([0.]), combiner.create_accumulator([[0.]]))
self.assertEqual(
np.array([2.]),
combiner.create_accumulator([np.array([1.]),
np.array([1.])]))
# Bounding on values.
#self.assertEqual(2, combiner.create_accumulator([1, 3]))
#self.assertEqual(1, combiner.create_accumulator([0, 3]))

@parameterized.named_parameters(
dict(testcase_name='no_noise', no_noise=True),
dict(testcase_name='noise', no_noise=False),
)
def test_merge_accumulators(self, no_noise):
combiner = self._create_combiner(no_noise)
self.assertEqual(
np.array([0.]),
combiner.merge_accumulators(np.array([0.]), np.array([0.])))
merge_resut = combiner.merge_accumulators(np.array([1., 1.]),
np.array([1., 4.]))
self.assertEqual(2., merge_resut[0])
rialg marked this conversation as resolved.
Show resolved Hide resolved
self.assertEqual(5., merge_resut[1])

def test_compute_metrics_no_noise(self):
combiner = self._create_combiner(no_noise=True)
self.assertAlmostEqual(3,
combiner.compute_metrics(np.array(
[3]))['array_sum'],
delta=1e-5)

def test_compute_metrics_with_noise(self):
combiner = self._create_combiner(no_noise=False)
accumulator = np.array([1, 3])
noisy_values = [
combiner.compute_metrics(accumulator)['array_sum']
for _ in range(1000)
]
# Standard deviation for the noise is about 1.37. So we set a large
# delta here.
mean_array = np.mean(noisy_values, axis=0)
self.assertAlmostEqual(accumulator[0], mean_array[0], delta=0.5)
self.assertAlmostEqual(accumulator[1], mean_array[1], delta=0.5)
self.assertTrue(np.var(noisy_values) > 1) # check that noise is added


if __name__ == '__main__':
absltest.main()