Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Output uniformly random guess at frequency distribution when reach is too small #1498

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -253,20 +253,32 @@ object LiquidLegions {
sketchParams: LiquidLegionsSketchParams,
collisionResolution: Boolean,
frequencyNoiseVariance: Double,
totalReach: Long,
reachRatio: Double,
frequencyMeasurementParams: FrequencyMeasurementParams,
multiplier: Int,
relativeFrequencyMeasurementVarianceParams: RelativeFrequencyMeasurementVarianceParams,
): Double {
val (
totalReach: Long,
reachMeasurementVariance: Double,
reachRatio: Double,
frequencyMeasurementParams: FrequencyMeasurementParams,
multiplier: Int) =
relativeFrequencyMeasurementVarianceParams

val expectedRegisterNum =
expectedNumberOfNonDestroyedRegisters(
sketchParams,
collisionResolution,
totalReach,
frequencyMeasurementParams.vidSamplingInterval.width,
)
if (expectedRegisterNum < 1.0) {
return 0.0

// When reach is too small, we have little info to estimate frequency, and thus the estimate of
// relative frequency is equivalent to a uniformly random guess of a probability in [0, 1].
if (
isReachTooSmallForComputingRelativeFrequencyVariance(totalReach, reachMeasurementVariance) ||
expectedRegisterNum < 1.0
) {
return if (frequencyMeasurementParams.maximumFrequency == multiplier) 0.0
else VARIANCE_OF_UNIFORMLY_RANDOM_PROBABILITY
}

val registerNumVariance =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package org.wfanet.measurement.measurementconsumer.stats

import kotlin.math.sqrt
import org.wfanet.measurement.eventdataprovider.noiser.DpParams

/** Noise mechanism enums. */
Expand Down Expand Up @@ -72,6 +73,43 @@ data class FrequencyMeasurementVarianceParams(
val measurementParams: FrequencyMeasurementParams,
)

/**
* The parameters used to compute the variance of a reach ratio at a certain frequency in a relative
* frequency measurement.
*/
data class RelativeFrequencyMeasurementVarianceParams(
val totalReach: Long,
val reachMeasurementVariance: Double,
val reachRatio: Double,
val measurementParams: FrequencyMeasurementParams,
val multiplier: Int,
)

/**
* A reach result is considered too small when computing variances of relative frequency if the 95%
* confidence interval of the reach covers 0 or negative values. The 95% confidence interval =
* reach_result +/- 1.96 * reach_std.
*/
private const val REACH_THRESHOLD_CONSTANT_FOR_RELATIVE_FREQUENCY_VARIANCE = 1.96

/**
* A uniformly random number from [0, 1] has a variance equal to 1 / 12
* (en.wikipedia.org/wiki/Continuous_uniform_distribution).
*/
const val VARIANCE_OF_UNIFORMLY_RANDOM_PROBABILITY = 1.0 / 12.0

/** Determines if a reach is too small for computing relative frequency variance. */
fun isReachTooSmallForComputingRelativeFrequencyVariance(
reach: Long,
reachVariance: Double,
): Boolean {
// A reach result is considered too small for computing variances of relative frequency if the
// confidence interval lower bound of the reach <= 0.
val reachConfidenceIntervalLowerBound =
reach - REACH_THRESHOLD_CONSTANT_FOR_RELATIVE_FREQUENCY_VARIANCE * sqrt(reachVariance)
return reachConfidenceIntervalLowerBound <= 0
}

/** The parameters used to compute the variance of an impression measurement. */
data class ImpressionMeasurementVarianceParams(
val impression: Long,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,26 @@ object VariancesImpl : Variances {
* Different types of frequency histograms have different values of [multiplier].
*/
private fun deterministicFrequencyRelativeVariance(
totalReach: Long,
reachRatio: Double,
measurementParams: FrequencyMeasurementParams,
multiplier: Int,
relativeFrequencyMeasurementVarianceParams: RelativeFrequencyMeasurementVarianceParams
): Double {

val (
totalReach: Long,
reachMeasurementVariance: Double,
reachRatio: Double,
measurementParams: FrequencyMeasurementParams,
multiplier: Int) =
relativeFrequencyMeasurementVarianceParams

// When reach is too small, we have little info to estimate frequency, and thus the estimate of
// relative frequency is equivalent to a uniformly random guess at probability.
if (
isReachTooSmallForComputingRelativeFrequencyVariance(totalReach, reachMeasurementVariance)
) {
return if (measurementParams.maximumFrequency == multiplier) 0.0
else VARIANCE_OF_UNIFORMLY_RANDOM_PROBABILITY
}

val frequencyNoiseVariance: Double =
computeNoiseVariance(measurementParams.dpParams, measurementParams.noiseMechanism)
val varPart1 =
Expand Down Expand Up @@ -257,22 +272,16 @@ object VariancesImpl : Variances {
sketchParams: LiquidLegionsSketchParams,
measurementParams: FrequencyMeasurementParams,
): (
totalReach: Long,
reachRatio: Double,
measurementParams: FrequencyMeasurementParams,
multiplier: Int,
relativeFrequencyMeasurementVarianceParams: RelativeFrequencyMeasurementVarianceParams
) -> Double {
val frequencyNoiseVariance: Double =
computeNoiseVariance(measurementParams.dpParams, measurementParams.noiseMechanism)
return { totalReach, reachRatio, freqParams, multiplier ->
return { relativeFrequencyMeasurementVarianceParams ->
LiquidLegions.liquidLegionsFrequencyRelativeVariance(
sketchParams = sketchParams,
collisionResolution = true,
frequencyNoiseVariance = frequencyNoiseVariance,
totalReach = totalReach,
reachRatio = reachRatio,
frequencyMeasurementParams = freqParams,
multiplier = multiplier,
relativeFrequencyMeasurementVarianceParams = relativeFrequencyMeasurementVarianceParams,
)
}
}
Expand Down Expand Up @@ -326,23 +335,17 @@ object VariancesImpl : Variances {
sketchParams: LiquidLegionsSketchParams,
measurementParams: FrequencyMeasurementParams,
): (
totalReach: Long,
reachRatio: Double,
measurementParams: FrequencyMeasurementParams,
multiplier: Int,
relativeFrequencyMeasurementVarianceParams: RelativeFrequencyMeasurementVarianceParams
) -> Double {
val frequencyNoiseVariance: Double =
computeDistributedNoiseVariance(measurementParams.dpParams, measurementParams.noiseMechanism)

return { totalReach, reachRatio, freqParams, multiplier ->
return { relativeFrequencyMeasurementVarianceParams ->
LiquidLegions.liquidLegionsFrequencyRelativeVariance(
sketchParams = sketchParams,
collisionResolution = false,
frequencyNoiseVariance = frequencyNoiseVariance,
totalReach = totalReach,
reachRatio = reachRatio,
frequencyMeasurementParams = freqParams,
multiplier = multiplier,
relativeFrequencyMeasurementVarianceParams = relativeFrequencyMeasurementVarianceParams,
)
}
}
Expand Down Expand Up @@ -387,10 +390,7 @@ object VariancesImpl : Variances {
params: FrequencyMeasurementVarianceParams,
frequencyRelativeVarianceFun:
(
totalReach: Long,
reachRatio: Double,
measurementParams: FrequencyMeasurementParams,
multiplier: Int,
relativeFrequencyMeasurementVarianceParams: RelativeFrequencyMeasurementVarianceParams
) -> Double,
frequencyCountVarianceFun:
(
Expand All @@ -415,20 +415,26 @@ object VariancesImpl : Variances {
val relativeVariances: Map<Int, Double> =
(1..maximumFrequency).associateWith { frequency ->
frequencyRelativeVarianceFun(
params.totalReach,
params.relativeFrequencyDistribution.getOrDefault(frequency, 0.0),
params.measurementParams,
1,
RelativeFrequencyMeasurementVarianceParams(
params.totalReach,
params.reachMeasurementVariance,
params.relativeFrequencyDistribution.getOrDefault(frequency, 0.0),
params.measurementParams,
1,
)
)
}

val kPlusRelativeVariances: Map<Int, Double> =
(1..maximumFrequency).associateWith { frequency ->
frequencyRelativeVarianceFun(
params.totalReach,
kPlusRelativeFrequencyDistribution.getValue(frequency),
params.measurementParams,
maximumFrequency - frequency + 1,
RelativeFrequencyMeasurementVarianceParams(
params.totalReach,
params.reachMeasurementVariance,
kPlusRelativeFrequencyDistribution.getValue(frequency),
params.measurementParams,
maximumFrequency - frequency + 1,
)
)
}

Expand Down
Loading
Loading