-
-
Notifications
You must be signed in to change notification settings - Fork 77
/
aggregate_params.py
279 lines (239 loc) · 10.8 KB
/
aggregate_params.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
# Copyright 2022 OpenMined.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains utility classes used for specifying DP aggregation parameters, noise types, and norms."""
from dataclasses import dataclass
from enum import Enum
from typing import Any, Iterable, Callable, Union
import math
import logging
class Metrics(Enum):
COUNT = 'count'
PRIVACY_ID_COUNT = 'privacy_id_count'
SUM = 'sum'
MEAN = 'mean'
class NoiseKind(Enum):
LAPLACE = 'laplace'
GAUSSIAN = 'gaussian'
def convert_to_mechanism_type(self):
if self.value == NoiseKind.LAPLACE.value:
return MechanismType.LAPLACE
elif self.value == NoiseKind.GAUSSIAN.value:
return MechanismType.GAUSSIAN
class MechanismType(Enum):
LAPLACE = 'Laplace'
GAUSSIAN = 'Gaussian'
GENERIC = 'Truncated Geometric'
class NormKind(Enum):
Linf = "linf"
L0 = "l0"
L1 = "l1"
L2 = "l2"
@dataclass
class AggregateParams:
"""Specifies parameters for function DPEngine.aggregate()
Args:
noise_kind: The type of noise to use for the DP calculations.
metrics: A list of metrics to compute.
max_partitions_contributed: A bound on the number of partitions to which one
unit of privacy (e.g., a user) can contribute.
max_contributions_per_partition: A bound on the number of times one unit of
privacy (e.g. a user) can contribute to a partition.
budget_weight: Relative weight of the privacy budget allocated to this
aggregation.
min_value: Lower bound on each value.
max_value: Upper bound on each value.
public_partitions: A collection of partition keys that will be present in
the result. Optional. If not provided, partitions will be selected in a DP
manner.
custom_combiners: Warning: experimental@ Combiners for computing custom
metrics.
"""
metrics: Iterable[Metrics]
max_partitions_contributed: int
max_contributions_per_partition: int
budget_weight: float = 1
low: float = None # deprecated
high: float = None # deprecated
min_value: float = None
max_value: float = None
public_partitions: Any = None
noise_kind: NoiseKind = NoiseKind.LAPLACE
custom_combiners: Iterable['CustomCombiner'] = None
def __post_init__(self):
if self.low is not None:
raise ValueError(
"AggregateParams: please use min_value instead of low")
if self.high is not None:
raise ValueError(
"AggregateParams: please use max_value instead of high")
if self.metrics:
needs_min_max_value = Metrics.SUM in self.metrics \
or Metrics.MEAN in self.metrics
if not isinstance(self.max_partitions_contributed,
int) or self.max_partitions_contributed <= 0:
raise ValueError(
"params.max_partitions_contributed must be set "
"to a positive integer")
if not isinstance(self.max_contributions_per_partition,
int) or self.max_contributions_per_partition <= 0:
raise ValueError(
"params.max_contributions_per_partition must be set "
"to a positive integer")
if needs_min_max_value and (self.min_value is None or
self.max_value is None):
raise ValueError(
"params.min_value and params.max_value must be set")
if needs_min_max_value and (_not_a_proper_number(self.min_value) or
_not_a_proper_number(self.max_value)):
raise ValueError(
"params.min_value and params.max_value must be both finite numbers"
)
if needs_min_max_value and self.max_value < self.min_value:
raise ValueError(
"params.max_value must be equal to or greater than params.min_value"
)
if self.custom_combiners:
logging.warning("Warning: custom combiners are used. This is an "
"experimental feature. It might not work properly "
"and it might be changed orremoved without any "
"notifications.")
if self.metrics and self.custom_combiners:
# TODO(dvadym): after implementation of custom combiners to verify
# whether this check is required?
raise ValueError(
"Custom combiners can not be used with standard metrics")
def __str__(self):
if self.custom_combiners:
return f"Custom combiners: {[c.metrics_names() for c in self.custom_combiners]}"
return f"Metrics: {[m.value for m in self.metrics]}"
@dataclass
class SelectPartitionsParams:
"""Specifies parameters for differentially-private partition selection.
Args:
max_partitions_contributed: Maximum number of partitions per privacy ID.
The algorithm will drop contributions over this limit. To keep more
data, this should be a good estimate of the realistic upper bound.
Significantly over- or under-estimating this may increase the number
of dropped partitions.
budget_weight: Relative weight of the privacy budget allocated to
partition selection.
"""
max_partitions_contributed: int
budget_weight: float = 1
def __str__(self):
return "Private Partitions"
@dataclass
class SumParams:
"""Specifies parameters for differentially-private sum calculation.
Args:
noise_kind: The type of noise to use for the DP calculations.
max_partitions_contributed: A bounds on the number of partitions to which one
unit of privacy (e.g., a user) can contribute.
max_contributions_per_partition: A bound on the number of times one unit of
privacy (e.g. a user) can contribute to a partition.
low: Lower bound on each value.
high: Upper bound on each value.
public_partitions: A collection of partition keys that will be present in
the result. Optioanl.
partition_extractor: A function which, given an input element, will return its partition id.
value_extractor: A function which, given an input element, will return its value.
"""
max_partitions_contributed: int
max_contributions_per_partition: int
min_value: float
max_value: float
partition_extractor: Callable
value_extractor: Callable
low: float = None # deprecated
high: float = None # deprecated
budget_weight: float = 1
noise_kind: NoiseKind = NoiseKind.LAPLACE
public_partitions: Union[Iterable, 'PCollection', 'RDD'] = None
def __post_init__(self):
if self.low is not None:
raise ValueError("SumParams: please use min_value instead of low")
if self.high is not None:
raise ValueError("SumParams: please use max_value instead of high")
@dataclass
class MeanParams:
"""Specifies parameters for differentially-private mean calculation.
Args:
noise_kind: Kind of noise to use for the DP calculations.
max_partitions_contributed: Bounds the number of partitions in which one
unit of privacy (e.g., a user) can participate.
max_contributions_per_partition: Bounds the number of times one unit of
privacy (e.g. a user) can contribute to a partition.
min_value: Lower bound on a value contributed by a unit of privacy in a partition.
max_value: Upper bound on a value contributed by a unit of privacy in a
partition.
public_partitions: A collection of partition keys that will be present in
the result.
partition_extractor: A function for partition id extraction from a collection record.
value_extractor: A function for extraction of value
for which the sum will be calculated.
"""
max_partitions_contributed: int
max_contributions_per_partition: int
min_value: float
max_value: float
partition_extractor: Callable
value_extractor: Callable
budget_weight: float = 1
noise_kind: NoiseKind = NoiseKind.LAPLACE
public_partitions: Union[Iterable, 'PCollection', 'RDD'] = None
@dataclass
class CountParams:
"""Specifies parameters for differentially-private count calculation.
Args:
noise_kind: The type of noise to use for the DP calculations.
max_partitions_contributed: A bound on the number of partitions to which one
unit of privacy (e.g., a user) can contribute.
max_contributions_per_partition: A bound on the number of times one unit of
privacy (e.g. a user) can contribute to a partition.
partition_extractor: A function which, given an input element, will return its partition id.
budget_weight: Relative weight of the privacy budget allocated for this
operation.
public_partitions: A collection of partition keys that will be present in
the result. Optional.
"""
noise_kind: NoiseKind
max_partitions_contributed: int
max_contributions_per_partition: int
partition_extractor: Callable
budget_weight: float = 1
public_partitions: Union[Iterable, 'PCollection', 'RDD'] = None
@dataclass
class PrivacyIdCountParams:
"""Specifies parameters for differentially-private privacy id count calculation.
Args:
noise_kind: The type of noise to use for the DP calculations.
max_partitions_contributed: A bound on the number of partitions to which one
unit of privacy (e.g., a user) can contribute.
budget_weight: Relative weight of the privacy budget allocated for this
operation.
partition_extractor: A function which, given an input element, will return its partition id.
public_partitions: A collection of partition keys that will be present in
the result. Optional.
"""
noise_kind: NoiseKind
max_partitions_contributed: int
partition_extractor: Callable
budget_weight: float = 1
public_partitions: Union[Iterable, 'PCollection', 'RDD'] = None
def _not_a_proper_number(num):
"""
Returns:
true if num is inf or NaN, false otherwise.
"""
return math.isnan(num) or math.isinf(num)