Skip to content

Commit

Permalink
Output uniformly random guess at frequency distribution when reach is…
Browse files Browse the repository at this point in the history
… too small (#1498)

The variance calculation of frequency distribution will output `NaN` when reach is zero. Moreover, the estimated variance is not accurate when reach is impractically small. The solution is to check whether the reach is too small using its confidence interval. If the confidence interval of the reach contains values <= 0, we claim the reach is too small for an accurate variance estimate of frequency distribution, and output the variance of uniformly random draw from [0, 1].
  • Loading branch information
riemanli authored Feb 26, 2024
1 parent ff5757c commit 05abea3
Show file tree
Hide file tree
Showing 4 changed files with 601 additions and 349 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -253,20 +253,32 @@ object LiquidLegions {
sketchParams: LiquidLegionsSketchParams,
collisionResolution: Boolean,
frequencyNoiseVariance: Double,
totalReach: Long,
reachRatio: Double,
frequencyMeasurementParams: FrequencyMeasurementParams,
multiplier: Int,
relativeFrequencyMeasurementVarianceParams: RelativeFrequencyMeasurementVarianceParams,
): Double {
val (
totalReach: Long,
reachMeasurementVariance: Double,
reachRatio: Double,
frequencyMeasurementParams: FrequencyMeasurementParams,
multiplier: Int) =
relativeFrequencyMeasurementVarianceParams

val expectedRegisterNum =
expectedNumberOfNonDestroyedRegisters(
sketchParams,
collisionResolution,
totalReach,
frequencyMeasurementParams.vidSamplingInterval.width,
)
if (expectedRegisterNum < 1.0) {
return 0.0

// When reach is too small, we have little info to estimate frequency, and thus the estimate of
// relative frequency is equivalent to a uniformly random guess of a probability in [0, 1].
if (
isReachTooSmallForComputingRelativeFrequencyVariance(totalReach, reachMeasurementVariance) ||
expectedRegisterNum < 1.0
) {
return if (frequencyMeasurementParams.maximumFrequency == multiplier) 0.0
else VARIANCE_OF_UNIFORMLY_RANDOM_PROBABILITY
}

val registerNumVariance =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package org.wfanet.measurement.measurementconsumer.stats

import kotlin.math.sqrt
import org.wfanet.measurement.eventdataprovider.noiser.DpParams

/** Noise mechanism enums. */
Expand Down Expand Up @@ -72,6 +73,43 @@ data class FrequencyMeasurementVarianceParams(
val measurementParams: FrequencyMeasurementParams,
)

/**
* The parameters used to compute the variance of a reach ratio at a certain frequency in a relative
* frequency measurement.
*/
data class RelativeFrequencyMeasurementVarianceParams(
val totalReach: Long,
val reachMeasurementVariance: Double,
val reachRatio: Double,
val measurementParams: FrequencyMeasurementParams,
val multiplier: Int,
)

/**
* A reach result is considered too small when computing variances of relative frequency if the 95%
* confidence interval of the reach covers 0 or negative values. The 95% confidence interval =
* reach_result +/- 1.96 * reach_std.
*/
private const val REACH_THRESHOLD_CONSTANT_FOR_RELATIVE_FREQUENCY_VARIANCE = 1.96

/**
* A uniformly random number from [0, 1] has a variance equal to 1 / 12
* (en.wikipedia.org/wiki/Continuous_uniform_distribution).
*/
const val VARIANCE_OF_UNIFORMLY_RANDOM_PROBABILITY = 1.0 / 12.0

/** Determines if a reach is too small for computing relative frequency variance. */
fun isReachTooSmallForComputingRelativeFrequencyVariance(
reach: Long,
reachVariance: Double,
): Boolean {
// A reach result is considered too small for computing variances of relative frequency if the
// confidence interval lower bound of the reach <= 0.
val reachConfidenceIntervalLowerBound =
reach - REACH_THRESHOLD_CONSTANT_FOR_RELATIVE_FREQUENCY_VARIANCE * sqrt(reachVariance)
return reachConfidenceIntervalLowerBound <= 0
}

/** The parameters used to compute the variance of an impression measurement. */
data class ImpressionMeasurementVarianceParams(
val impression: Long,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,26 @@ object VariancesImpl : Variances {
* Different types of frequency histograms have different values of [multiplier].
*/
private fun deterministicFrequencyRelativeVariance(
totalReach: Long,
reachRatio: Double,
measurementParams: FrequencyMeasurementParams,
multiplier: Int,
relativeFrequencyMeasurementVarianceParams: RelativeFrequencyMeasurementVarianceParams
): Double {

val (
totalReach: Long,
reachMeasurementVariance: Double,
reachRatio: Double,
measurementParams: FrequencyMeasurementParams,
multiplier: Int) =
relativeFrequencyMeasurementVarianceParams

// When reach is too small, we have little info to estimate frequency, and thus the estimate of
// relative frequency is equivalent to a uniformly random guess at probability.
if (
isReachTooSmallForComputingRelativeFrequencyVariance(totalReach, reachMeasurementVariance)
) {
return if (measurementParams.maximumFrequency == multiplier) 0.0
else VARIANCE_OF_UNIFORMLY_RANDOM_PROBABILITY
}

val frequencyNoiseVariance: Double =
computeNoiseVariance(measurementParams.dpParams, measurementParams.noiseMechanism)
val varPart1 =
Expand Down Expand Up @@ -257,22 +272,16 @@ object VariancesImpl : Variances {
sketchParams: LiquidLegionsSketchParams,
measurementParams: FrequencyMeasurementParams,
): (
totalReach: Long,
reachRatio: Double,
measurementParams: FrequencyMeasurementParams,
multiplier: Int,
relativeFrequencyMeasurementVarianceParams: RelativeFrequencyMeasurementVarianceParams
) -> Double {
val frequencyNoiseVariance: Double =
computeNoiseVariance(measurementParams.dpParams, measurementParams.noiseMechanism)
return { totalReach, reachRatio, freqParams, multiplier ->
return { relativeFrequencyMeasurementVarianceParams ->
LiquidLegions.liquidLegionsFrequencyRelativeVariance(
sketchParams = sketchParams,
collisionResolution = true,
frequencyNoiseVariance = frequencyNoiseVariance,
totalReach = totalReach,
reachRatio = reachRatio,
frequencyMeasurementParams = freqParams,
multiplier = multiplier,
relativeFrequencyMeasurementVarianceParams = relativeFrequencyMeasurementVarianceParams,
)
}
}
Expand Down Expand Up @@ -326,23 +335,17 @@ object VariancesImpl : Variances {
sketchParams: LiquidLegionsSketchParams,
measurementParams: FrequencyMeasurementParams,
): (
totalReach: Long,
reachRatio: Double,
measurementParams: FrequencyMeasurementParams,
multiplier: Int,
relativeFrequencyMeasurementVarianceParams: RelativeFrequencyMeasurementVarianceParams
) -> Double {
val frequencyNoiseVariance: Double =
computeDistributedNoiseVariance(measurementParams.dpParams, measurementParams.noiseMechanism)

return { totalReach, reachRatio, freqParams, multiplier ->
return { relativeFrequencyMeasurementVarianceParams ->
LiquidLegions.liquidLegionsFrequencyRelativeVariance(
sketchParams = sketchParams,
collisionResolution = false,
frequencyNoiseVariance = frequencyNoiseVariance,
totalReach = totalReach,
reachRatio = reachRatio,
frequencyMeasurementParams = freqParams,
multiplier = multiplier,
relativeFrequencyMeasurementVarianceParams = relativeFrequencyMeasurementVarianceParams,
)
}
}
Expand Down Expand Up @@ -387,10 +390,7 @@ object VariancesImpl : Variances {
params: FrequencyMeasurementVarianceParams,
frequencyRelativeVarianceFun:
(
totalReach: Long,
reachRatio: Double,
measurementParams: FrequencyMeasurementParams,
multiplier: Int,
relativeFrequencyMeasurementVarianceParams: RelativeFrequencyMeasurementVarianceParams
) -> Double,
frequencyCountVarianceFun:
(
Expand All @@ -415,20 +415,26 @@ object VariancesImpl : Variances {
val relativeVariances: Map<Int, Double> =
(1..maximumFrequency).associateWith { frequency ->
frequencyRelativeVarianceFun(
params.totalReach,
params.relativeFrequencyDistribution.getOrDefault(frequency, 0.0),
params.measurementParams,
1,
RelativeFrequencyMeasurementVarianceParams(
params.totalReach,
params.reachMeasurementVariance,
params.relativeFrequencyDistribution.getOrDefault(frequency, 0.0),
params.measurementParams,
1,
)
)
}

val kPlusRelativeVariances: Map<Int, Double> =
(1..maximumFrequency).associateWith { frequency ->
frequencyRelativeVarianceFun(
params.totalReach,
kPlusRelativeFrequencyDistribution.getValue(frequency),
params.measurementParams,
maximumFrequency - frequency + 1,
RelativeFrequencyMeasurementVarianceParams(
params.totalReach,
params.reachMeasurementVariance,
kPlusRelativeFrequencyDistribution.getValue(frequency),
params.measurementParams,
maximumFrequency - frequency + 1,
)
)
}

Expand Down
Loading

0 comments on commit 05abea3

Please sign in to comment.