From 4bbe9913abe64b6ee149eb44633ba8ac307f795b Mon Sep 17 00:00:00 2001 From: Ugur Date: Wed, 17 Jan 2024 16:17:03 +0000 Subject: [PATCH 01/13] sampling for synthetic event group spec --- .../dataprovider/SyntheticDataGeneration.kt | 59 ++++- .../simulator_synthetic_data_spec.proto | 6 +- .../SyntheticDataGenerationTest.kt | 238 ++++++++++++++++++ 3 files changed, 296 insertions(+), 7 deletions(-) diff --git a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt index 45db0ec3c94..cd13f66cbb9 100644 --- a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt +++ b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt @@ -19,9 +19,11 @@ package org.wfanet.measurement.loadtest.dataprovider import com.google.protobuf.Descriptors.FieldDescriptor import com.google.protobuf.Message import java.time.ZoneOffset +import kotlin.random.Random import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.FieldValue import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.SimulatorSyntheticDataSpec import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.SyntheticEventGroupSpec +import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.SyntheticEventGroupSpec.FrequencySpec.VidRangeSpec import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.SyntheticPopulationSpec import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.SyntheticPopulationSpec.SubPopulation import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.VidRange @@ -44,7 +46,8 @@ object SyntheticDataGeneration { fun generateEvents( messageInstance: T, populationSpec: SyntheticPopulationSpec, - syntheticEventGroupSpec: SyntheticEventGroupSpec + syntheticEventGroupSpec: SyntheticEventGroupSpec, + randomSeed: Long = 0L, ): Sequence> { val subPopulations = populationSpec.subPopulationsList @@ -52,8 +55,10 @@ object SyntheticDataGeneration { for (dateSpec: SyntheticEventGroupSpec.DateSpec in syntheticEventGroupSpec.dateSpecsList) { val dateProgression = dateSpec.dateRange.toProgression() for (frequencySpec: SyntheticEventGroupSpec.FrequencySpec in dateSpec.frequencySpecsList) { - for (vidRangeSpec: SyntheticEventGroupSpec.FrequencySpec.VidRangeSpec in - frequencySpec.vidRangeSpecsList) { + + check(!frequencySpec.hasOverlaps()) { "The VID ranges should be non-overlapping." } + + for (vidRangeSpec: VidRangeSpec in frequencySpec.vidRangeSpecsList) { val subPopulation: SubPopulation = vidRangeSpec.vidRange.findSubPopulation(subPopulations) ?: throw IllegalArgumentException() @@ -77,9 +82,10 @@ object SyntheticDataGeneration { @Suppress("UNCHECKED_CAST") // Safe per protobuf API. val message = builder.build() as T - for (vid in vidRangeSpec.vidRange.start until vidRangeSpec.vidRange.endExclusive) { - for (date in dateProgression) { - for (i in 0 until frequencySpec.frequency) { + for (date in dateProgression) { + for (i in 0 until frequencySpec.frequency) { + val sampledVids = sampleVids(vidRangeSpec, date.toEpochDay() + randomSeed) + for (vid in sampledVids) { yield(LabeledEvent(date.atStartOfDay().toInstant(ZoneOffset.UTC), vid, message)) } } @@ -90,6 +96,31 @@ object SyntheticDataGeneration { } } + /** + * Returns the sampled Vids from [vidRangeSpec]. Given the same [vidRangeSpec] and [randomSeed], + * returns the same vids. Returns all of the vids if sample size is 0. + */ + private fun sampleVids(vidRangeSpec: VidRangeSpec, randomSeed: Long): Sequence { + val vidRangeSequence = + (vidRangeSpec.vidRange.start until vidRangeSpec.vidRange.endExclusive).asSequence() + if (vidRangeSpec.sampleSize == 0) { + return vidRangeSequence + } + + // This step ensures given the same seed and the same vidRangeSpec, sample vids are the same. + // This is required because the EDP should respond by querying the same data for the same + // requisition. + val random = + Random( + vidRangeSpec.vidRange.start + + vidRangeSpec.vidRange.endExclusive + + vidRangeSpec.sampleSize.toLong() + + randomSeed + ) + + return vidRangeSequence.shuffled(random).take(vidRangeSpec.sampleSize) + } + /** * Returns the [SubPopulation] from a list of [SubPopulation] that contains the [VidRange] in its * range. @@ -154,3 +185,19 @@ object SyntheticDataGeneration { private fun SyntheticEventGroupSpec.DateSpec.DateRange.toProgression(): LocalDateProgression { return start.toLocalDate()..endExclusive.toLocalDate().minusDays(1) } + +private fun SyntheticEventGroupSpec.FrequencySpec.hasOverlaps(): Boolean { + return this.vidRangeSpecsList + .toList() + .flatMap { vidRangeSpec: VidRangeSpec -> + listOf( + RangePoint(vidRangeSpec.vidRange.start, true), + RangePoint(vidRangeSpec.vidRange.endExclusive - 1, false) + ) + } + .sortedBy { it.x } + .zipWithNext { first, second -> first.isStart && second.isStart } + .any { it } +} + +private data class RangePoint(val x: Long, val isStart: Boolean) diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto index a878c488935..ccac1175d21 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto @@ -107,9 +107,13 @@ message SyntheticEventGroupSpec { // A range of VIDs within a single `SubPopulation`. VidRange vid_range = 1; + // Number of vids sampled uniformly without replacement from vid_range. + // If this is 0, no sampling is done and all the vids in range are taken. + int32 sample_size = 2; + // A map of `non_population_fields` from `SyntheticPopulationSpec` to // their values. - map non_population_field_values = 2; + map non_population_field_values = 3; } // The VID ranges should be non-overlapping sub-ranges of SubPopulations. repeated VidRangeSpec vid_range_specs = 2; diff --git a/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt b/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt index ca0682f1fd6..5bde627cd4d 100644 --- a/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt +++ b/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt @@ -249,6 +249,244 @@ class SyntheticDataGenerationTest { assertThat(labeledEvents).containsExactlyElementsIn(expectedTestEvents) } + @Test + fun `generateEvents returns a sequence of sampled events when sample size specified`() { + + val sampleSizeForFreqOne = 2 + val firstsampleSizeForFreqTwo = 5 + val secondSampleSizeForFreqTwo = 10 + + val population = syntheticPopulationSpec { + vidRange = vidRange { + start = 0L + endExclusive = 100L + } + + populationFields += "person.gender" + populationFields += "person.age_group" + + nonPopulationFields += "banner_ad.viewable" + nonPopulationFields += "video_ad.viewed_fraction" + + subPopulations += + SyntheticPopulationSpecKt.subPopulation { + vidSubRange = vidRange { + start = 0L + endExclusive = 50L + } + + populationFieldsValues["person.gender"] = fieldValue { + enumValue = Person.Gender.MALE_VALUE + } + populationFieldsValues["person.age_group"] = fieldValue { + enumValue = Person.AgeGroup.YEARS_18_TO_34_VALUE + } + } + subPopulations += + SyntheticPopulationSpecKt.subPopulation { + vidSubRange = vidRange { + start = 50L + endExclusive = 100L + } + + populationFieldsValues["person.gender"] = fieldValue { + enumValue = Person.Gender.FEMALE_VALUE + } + populationFieldsValues["person.age_group"] = fieldValue { + enumValue = Person.AgeGroup.YEARS_18_TO_34_VALUE + } + } + } + val eventGroupSpec = syntheticEventGroupSpec { + description = "event group 1" + + dateSpecs += + SyntheticEventGroupSpecKt.dateSpec { + dateRange = + SyntheticEventGroupSpecKt.DateSpecKt.dateRange { + start = date { + year = 2023 + month = 6 + day = 27 + } + endExclusive = date { + year = 2023 + month = 6 + day = 28 + } + } + + frequencySpecs += + SyntheticEventGroupSpecKt.frequencySpec { + frequency = 2 + + vidRangeSpecs += + SyntheticEventGroupSpecKt.FrequencySpecKt.vidRangeSpec { + vidRange = vidRange { + start = 0L + endExclusive = 25L + } + + sampleSize = firstsampleSizeForFreqTwo + + nonPopulationFieldValues["banner_ad.viewable"] = fieldValue { boolValue = true } + nonPopulationFieldValues["video_ad.viewed_fraction"] = fieldValue { + doubleValue = 0.5 + } + } + vidRangeSpecs += + SyntheticEventGroupSpecKt.FrequencySpecKt.vidRangeSpec { + vidRange = vidRange { + start = 25L + endExclusive = 50L + } + + sampleSize = secondSampleSizeForFreqTwo + + nonPopulationFieldValues["banner_ad.viewable"] = fieldValue { boolValue = false } + nonPopulationFieldValues["video_ad.viewed_fraction"] = fieldValue { + doubleValue = 0.7 + } + } + } + frequencySpecs += + SyntheticEventGroupSpecKt.frequencySpec { + frequency = 1 + + vidRangeSpecs += + SyntheticEventGroupSpecKt.FrequencySpecKt.vidRangeSpec { + vidRange = vidRange { + start = 50L + endExclusive = 75L + } + + sampleSize = sampleSizeForFreqOne + + nonPopulationFieldValues["banner_ad.viewable"] = fieldValue { boolValue = true } + nonPopulationFieldValues["video_ad.viewed_fraction"] = fieldValue { + doubleValue = 0.8 + } + } + } + } + } + + val labeledEvents: List> = + SyntheticDataGeneration.generateEvents( + TestEvent.getDefaultInstance(), + population, + eventGroupSpec + ) + .toList() + val expectedNumberOfEvents = + sampleSizeForFreqOne + 2 * (firstsampleSizeForFreqTwo + secondSampleSizeForFreqTwo) + assertThat(labeledEvents.size).isEqualTo(expectedNumberOfEvents) + } + + fun `generateEvents fails when overlapping vidRanges exist`() { + val population = syntheticPopulationSpec { + vidRange = vidRange { + start = 0L + endExclusive = 100L + } + + populationFields += "person.gender" + populationFields += "person.age_group" + + nonPopulationFields += "banner_ad.viewable" + nonPopulationFields += "video_ad.viewed_fraction" + + subPopulations += + SyntheticPopulationSpecKt.subPopulation { + vidSubRange = vidRange { + start = 0L + endExclusive = 50L + } + + populationFieldsValues["person.gender"] = fieldValue { + enumValue = Person.Gender.MALE_VALUE + } + populationFieldsValues["person.age_group"] = fieldValue { + enumValue = Person.AgeGroup.YEARS_18_TO_34_VALUE + } + } + subPopulations += + SyntheticPopulationSpecKt.subPopulation { + vidSubRange = vidRange { + start = 50L + endExclusive = 100L + } + + populationFieldsValues["person.gender"] = fieldValue { + enumValue = Person.Gender.FEMALE_VALUE + } + populationFieldsValues["person.age_group"] = fieldValue { + enumValue = Person.AgeGroup.YEARS_18_TO_34_VALUE + } + } + } + val eventGroupSpec = syntheticEventGroupSpec { + description = "event group 1" + + dateSpecs += + SyntheticEventGroupSpecKt.dateSpec { + dateRange = + SyntheticEventGroupSpecKt.DateSpecKt.dateRange { + start = date { + year = 2023 + month = 6 + day = 27 + } + endExclusive = date { + year = 2023 + month = 6 + day = 28 + } + } + + frequencySpecs += + SyntheticEventGroupSpecKt.frequencySpec { + frequency = 2 + + vidRangeSpecs += + SyntheticEventGroupSpecKt.FrequencySpecKt.vidRangeSpec { + vidRange = vidRange { + start = 0L + endExclusive = 25L + } + + nonPopulationFieldValues["banner_ad.viewable"] = fieldValue { boolValue = true } + nonPopulationFieldValues["video_ad.viewed_fraction"] = fieldValue { + doubleValue = 0.5 + } + } + vidRangeSpecs += + SyntheticEventGroupSpecKt.FrequencySpecKt.vidRangeSpec { + vidRange = vidRange { + // 20 is in between 0 and 25, the previous range. + start = 20L + endExclusive = 50L + } + + nonPopulationFieldValues["banner_ad.viewable"] = fieldValue { boolValue = false } + nonPopulationFieldValues["video_ad.viewed_fraction"] = fieldValue { + doubleValue = 0.7 + } + } + } + } + } + + assertFailsWith { + SyntheticDataGeneration.generateEvents( + TestEvent.getDefaultInstance(), + population, + eventGroupSpec + ) + .toList() + } + } + @Test fun `generateEvents returns messages with a Duration field`() { val populationSpec = syntheticPopulationSpec { From 4a3968bcf674a49a969e95ce841d7d76786b1461 Mon Sep 17 00:00:00 2001 From: Ugur Date: Wed, 17 Jan 2024 17:23:09 +0000 Subject: [PATCH 02/13] addressed comments --- .../dataprovider/SyntheticDataGeneration.kt | 38 +++++-------------- .../simulator_synthetic_data_spec.proto | 10 ++--- .../SyntheticDataGenerationTest.kt | 6 ++- 3 files changed, 19 insertions(+), 35 deletions(-) diff --git a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt index cd13f66cbb9..7518d8539dc 100644 --- a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt +++ b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt @@ -47,7 +47,7 @@ object SyntheticDataGeneration { messageInstance: T, populationSpec: SyntheticPopulationSpec, syntheticEventGroupSpec: SyntheticEventGroupSpec, - randomSeed: Long = 0L, + random: Random = Random(0L), ): Sequence> { val subPopulations = populationSpec.subPopulationsList @@ -84,7 +84,7 @@ object SyntheticDataGeneration { for (date in dateProgression) { for (i in 0 until frequencySpec.frequency) { - val sampledVids = sampleVids(vidRangeSpec, date.toEpochDay() + randomSeed) + val sampledVids = sampleVids(vidRangeSpec, random) for (vid in sampledVids) { yield(LabeledEvent(date.atStartOfDay().toInstant(ZoneOffset.UTC), vid, message)) } @@ -100,24 +100,13 @@ object SyntheticDataGeneration { * Returns the sampled Vids from [vidRangeSpec]. Given the same [vidRangeSpec] and [randomSeed], * returns the same vids. Returns all of the vids if sample size is 0. */ - private fun sampleVids(vidRangeSpec: VidRangeSpec, randomSeed: Long): Sequence { + private fun sampleVids(vidRangeSpec: VidRangeSpec, random: Random): Sequence { val vidRangeSequence = (vidRangeSpec.vidRange.start until vidRangeSpec.vidRange.endExclusive).asSequence() if (vidRangeSpec.sampleSize == 0) { return vidRangeSequence } - // This step ensures given the same seed and the same vidRangeSpec, sample vids are the same. - // This is required because the EDP should respond by querying the same data for the same - // requisition. - val random = - Random( - vidRangeSpec.vidRange.start + - vidRangeSpec.vidRange.endExclusive + - vidRangeSpec.sampleSize.toLong() + - randomSeed - ) - return vidRangeSequence.shuffled(random).take(vidRangeSpec.sampleSize) } @@ -186,18 +175,11 @@ private fun SyntheticEventGroupSpec.DateSpec.DateRange.toProgression(): LocalDat return start.toLocalDate()..endExclusive.toLocalDate().minusDays(1) } -private fun SyntheticEventGroupSpec.FrequencySpec.hasOverlaps(): Boolean { - return this.vidRangeSpecsList - .toList() - .flatMap { vidRangeSpec: VidRangeSpec -> - listOf( - RangePoint(vidRangeSpec.vidRange.start, true), - RangePoint(vidRangeSpec.vidRange.endExclusive - 1, false) - ) - } - .sortedBy { it.x } - .zipWithNext { first, second -> first.isStart && second.isStart } +// Sort the ranges by their start. If there are any consecutive ranges where +// the previous has a larger end than the latter's start, then there is an overlap +private fun SyntheticEventGroupSpec.FrequencySpec.hasOverlaps() = + vidRangeSpecsList + .map { it.vidRange } + .sortedBy { it.start } + .zipWithNext { first, second -> first.endExclusive > second.start } .any { it } -} - -private data class RangePoint(val x: Long, val isStart: Boolean) diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto index ccac1175d21..566f2a0ebd7 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto @@ -107,13 +107,13 @@ message SyntheticEventGroupSpec { // A range of VIDs within a single `SubPopulation`. VidRange vid_range = 1; - // Number of vids sampled uniformly without replacement from vid_range. - // If this is 0, no sampling is done and all the vids in range are taken. - int32 sample_size = 2; - // A map of `non_population_fields` from `SyntheticPopulationSpec` to // their values. - map non_population_field_values = 3; + map non_population_field_values = 2; + + // Number of vids sampled uniformly without replacement from vid_range. + // If this is 0, no sampling is done and all the vids in range are taken. + int32 sample_size = 3; } // The VID ranges should be non-overlapping sub-ranges of SubPopulations. repeated VidRangeSpec vid_range_specs = 2; diff --git a/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt b/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt index 5bde627cd4d..f6e70b4c3c5 100644 --- a/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt +++ b/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt @@ -21,6 +21,7 @@ import com.google.type.date import java.time.Duration import java.time.LocalDate import java.time.ZoneOffset +import kotlin.random.Random import kotlin.test.assertFailsWith import org.junit.Test import org.junit.runner.RunWith @@ -383,7 +384,7 @@ class SyntheticDataGenerationTest { assertThat(labeledEvents.size).isEqualTo(expectedNumberOfEvents) } - fun `generateEvents fails when overlapping vidRanges exist`() { + fun `generateEvents throws IllegalArgumentException when vid ranges overlap`() { val population = syntheticPopulationSpec { vidRange = vidRange { start = 0L @@ -481,7 +482,8 @@ class SyntheticDataGenerationTest { SyntheticDataGeneration.generateEvents( TestEvent.getDefaultInstance(), population, - eventGroupSpec + eventGroupSpec, + Random(42L), ) .toList() } From 4fe09ebfa8799fefbfa8ec5228ca63dec259b7b6 Mon Sep 17 00:00:00 2001 From: Ugur Date: Wed, 17 Jan 2024 18:12:31 +0000 Subject: [PATCH 03/13] added param comment --- .../loadtest/dataprovider/SyntheticDataGeneration.kt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt index 7518d8539dc..95de279300a 100644 --- a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt +++ b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt @@ -42,6 +42,7 @@ object SyntheticDataGeneration { * @param messageInstance an instance of the event message type [T] * @param populationSpec specification of the synthetic population * @param syntheticEventGroupSpec specification of the synthetic event group + * @param random object to be used in sampling vids when sampleSize is provided in specification */ fun generateEvents( messageInstance: T, @@ -176,7 +177,7 @@ private fun SyntheticEventGroupSpec.DateSpec.DateRange.toProgression(): LocalDat } // Sort the ranges by their start. If there are any consecutive ranges where -// the previous has a larger end than the latter's start, then there is an overlap +// the previous has a larger end than the latter's start, then there is an overlap. private fun SyntheticEventGroupSpec.FrequencySpec.hasOverlaps() = vidRangeSpecsList .map { it.vidRange } From cfdf5f51ee6c0c2e78edeeb3d9993d3d8d36726c Mon Sep 17 00:00:00 2001 From: Ugur Date: Wed, 17 Jan 2024 18:23:08 +0000 Subject: [PATCH 04/13] addressed comments --- .../loadtest/dataprovider/SyntheticDataGeneration.kt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt index 95de279300a..d8f86dad28f 100644 --- a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt +++ b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt @@ -19,6 +19,8 @@ package org.wfanet.measurement.loadtest.dataprovider import com.google.protobuf.Descriptors.FieldDescriptor import com.google.protobuf.Message import java.time.ZoneOffset +import kotlin.math.max +import kotlin.math.min import kotlin.random.Random import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.FieldValue import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.SimulatorSyntheticDataSpec @@ -48,7 +50,7 @@ object SyntheticDataGeneration { messageInstance: T, populationSpec: SyntheticPopulationSpec, syntheticEventGroupSpec: SyntheticEventGroupSpec, - random: Random = Random(0L), + random: Random = Random(), ): Sequence> { val subPopulations = populationSpec.subPopulationsList @@ -182,5 +184,8 @@ private fun SyntheticEventGroupSpec.FrequencySpec.hasOverlaps() = vidRangeSpecsList .map { it.vidRange } .sortedBy { it.start } - .zipWithNext { first, second -> first.endExclusive > second.start } - .any { it } + .zipWithNext() + .any { (first, second) -> first.overlaps(second) } + +private fun VidRange.overlaps(other: VidRange) = + max(start, other.start) < min(endExclusive, other.endExclusive) From 0e63840ecde499978c7dc8c0d89fe3e8cb165316 Mon Sep 17 00:00:00 2001 From: Ugur Date: Thu, 25 Jan 2024 16:57:24 +0000 Subject: [PATCH 05/13] addressed comments --- .../dataprovider/SyntheticDataGeneration.kt | 6 ++++- .../simulator_synthetic_data_spec.proto | 14 ++++++++++ .../SyntheticDataGenerationTest.kt | 26 ++++++++++++------- 3 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt index d8f86dad28f..e88757924fd 100644 --- a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt +++ b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt @@ -50,8 +50,12 @@ object SyntheticDataGeneration { messageInstance: T, populationSpec: SyntheticPopulationSpec, syntheticEventGroupSpec: SyntheticEventGroupSpec, - random: Random = Random(), ): Sequence> { + + check(syntheticEventGroupSpec.rngType == SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM) { + "Expecting KOTLIN_RANDOM rng type, got ${syntheticEventGroupSpec.rngType}" + } + val random = Random(syntheticEventGroupSpec.randomSeed) val subPopulations = populationSpec.subPopulationsList return sequence { diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto index 566f2a0ebd7..b7a7589cf27 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto @@ -134,4 +134,18 @@ message SyntheticEventGroupSpec { } // `DateSpec`s should describe non-overlapping date ranges. repeated DateSpec date_specs = 2; + + // Random seed to be fed into the random number generator to sample vids. + int64 random_seed = 3; + + // Type of random number generator to sample vids. + enum RngType { + // Default value used if the rng type is omitted. + RNG_TYPE_UNSPECIFIED = 0; + // Signals kotlin.random.Random should be used for sampling. + KOTLIN_RANDOM = 1; + } + + // Activation state of this `Account`. Output-only. + RngType rng_type = 4; } diff --git a/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt b/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt index f6e70b4c3c5..088dbfd7669 100644 --- a/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt +++ b/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt @@ -21,11 +21,11 @@ import com.google.type.date import java.time.Duration import java.time.LocalDate import java.time.ZoneOffset -import kotlin.random.Random import kotlin.test.assertFailsWith import org.junit.Test import org.junit.runner.RunWith import org.junit.runners.JUnit4 +import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.SyntheticEventGroupSpec import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.SyntheticEventGroupSpecKt import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.SyntheticPopulationSpecKt import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.fieldValue @@ -87,6 +87,7 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group 1" + rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { @@ -190,7 +191,7 @@ class SyntheticDataGenerationTest { SyntheticDataGeneration.generateEvents( TestEvent.getDefaultInstance(), population, - eventGroupSpec + eventGroupSpec, ) .toList() @@ -300,6 +301,8 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group 1" + randomSeed = 42L + rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { @@ -376,7 +379,7 @@ class SyntheticDataGenerationTest { SyntheticDataGeneration.generateEvents( TestEvent.getDefaultInstance(), population, - eventGroupSpec + eventGroupSpec, ) .toList() val expectedNumberOfEvents = @@ -428,6 +431,7 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group 1" + rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { @@ -483,7 +487,6 @@ class SyntheticDataGenerationTest { TestEvent.getDefaultInstance(), population, eventGroupSpec, - Random(42L), ) .toList() } @@ -509,6 +512,7 @@ class SyntheticDataGenerationTest { } val videoLength = Duration.ofMinutes(5).toProtoDuration() val eventGroupSpec = syntheticEventGroupSpec { + rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { dateRange = @@ -543,7 +547,7 @@ class SyntheticDataGenerationTest { SyntheticDataGeneration.generateEvents( TestEvent.getDefaultInstance(), populationSpec, - eventGroupSpec + eventGroupSpec, ) .map { TestEvent.parseFrom(it.message.toByteString()) } .toList() @@ -583,6 +587,7 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group" + rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { @@ -618,7 +623,7 @@ class SyntheticDataGenerationTest { SyntheticDataGeneration.generateEvents( TestEvent.getDefaultInstance(), population, - eventGroupSpec + eventGroupSpec, ) .toList() } @@ -651,6 +656,7 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group" + rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { @@ -688,7 +694,7 @@ class SyntheticDataGenerationTest { SyntheticDataGeneration.generateEvents( TestEvent.getDefaultInstance(), population, - eventGroupSpec + eventGroupSpec, ) .toList() } @@ -721,6 +727,7 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group" + rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { @@ -758,7 +765,7 @@ class SyntheticDataGenerationTest { SyntheticDataGeneration.generateEvents( TestEvent.getDefaultInstance(), population, - eventGroupSpec + eventGroupSpec, ) .toList() } @@ -791,6 +798,7 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group" + rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { @@ -828,7 +836,7 @@ class SyntheticDataGenerationTest { SyntheticDataGeneration.generateEvents( TestEvent.getDefaultInstance(), population, - eventGroupSpec + eventGroupSpec, ) .toList() } From 001cb321496af2645d9062acf73657fce93d9a55 Mon Sep 17 00:00:00 2001 From: Ugur Date: Thu, 25 Jan 2024 19:45:19 +0000 Subject: [PATCH 06/13] addressed comments --- .../loadtest/dataprovider/SyntheticDataGeneration.kt | 1 - .../testing/simulator_synthetic_data_spec.proto | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt index e88757924fd..91ffe97980d 100644 --- a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt +++ b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt @@ -44,7 +44,6 @@ object SyntheticDataGeneration { * @param messageInstance an instance of the event message type [T] * @param populationSpec specification of the synthetic population * @param syntheticEventGroupSpec specification of the synthetic event group - * @param random object to be used in sampling vids when sampleSize is provided in specification */ fun generateEvents( messageInstance: T, diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto index b7a7589cf27..148985a0769 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto @@ -136,6 +136,7 @@ message SyntheticEventGroupSpec { repeated DateSpec date_specs = 2; // Random seed to be fed into the random number generator to sample vids. + // Required if any VidRangeSpec specifies a sample_size. int64 random_seed = 3; // Type of random number generator to sample vids. @@ -146,6 +147,6 @@ message SyntheticEventGroupSpec { KOTLIN_RANDOM = 1; } - // Activation state of this `Account`. Output-only. + // Random Number Generator type for this `SyntheticEventGroupSpec`. RngType rng_type = 4; } From 8215f7ad9d1f826a095017304bf796d32a915130 Mon Sep 17 00:00:00 2001 From: Ugur Date: Fri, 26 Jan 2024 18:47:50 +0000 Subject: [PATCH 07/13] addressed comments --- .../dataprovider/SyntheticDataGeneration.kt | 2 +- .../simulator_synthetic_data_spec.proto | 7 +- .../SyntheticDataGenerationTest.kt | 138 +++++++++++++++++- 3 files changed, 141 insertions(+), 6 deletions(-) diff --git a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt index 91ffe97980d..23be8376fbc 100644 --- a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt +++ b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt @@ -54,7 +54,6 @@ object SyntheticDataGeneration { check(syntheticEventGroupSpec.rngType == SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM) { "Expecting KOTLIN_RANDOM rng type, got ${syntheticEventGroupSpec.rngType}" } - val random = Random(syntheticEventGroupSpec.randomSeed) val subPopulations = populationSpec.subPopulationsList return sequence { @@ -65,6 +64,7 @@ object SyntheticDataGeneration { check(!frequencySpec.hasOverlaps()) { "The VID ranges should be non-overlapping." } for (vidRangeSpec: VidRangeSpec in frequencySpec.vidRangeSpecsList) { + val random = Random(vidRangeSpec.randomSeed) val subPopulation: SubPopulation = vidRangeSpec.vidRange.findSubPopulation(subPopulations) ?: throw IllegalArgumentException() diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto index 148985a0769..43ba0a5b528 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto @@ -114,6 +114,9 @@ message SyntheticEventGroupSpec { // Number of vids sampled uniformly without replacement from vid_range. // If this is 0, no sampling is done and all the vids in range are taken. int32 sample_size = 3; + + // Random seed to be fed into the random number generator to sample vids. + int64 random_seed = 4; } // The VID ranges should be non-overlapping sub-ranges of SubPopulations. repeated VidRangeSpec vid_range_specs = 2; @@ -135,10 +138,6 @@ message SyntheticEventGroupSpec { // `DateSpec`s should describe non-overlapping date ranges. repeated DateSpec date_specs = 2; - // Random seed to be fed into the random number generator to sample vids. - // Required if any VidRangeSpec specifies a sample_size. - int64 random_seed = 3; - // Type of random number generator to sample vids. enum RngType { // Default value used if the rng type is omitted. diff --git a/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt b/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt index 088dbfd7669..2297beddc88 100644 --- a/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt +++ b/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt @@ -301,7 +301,6 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group 1" - randomSeed = 42L rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += @@ -326,6 +325,7 @@ class SyntheticDataGenerationTest { vidRangeSpecs += SyntheticEventGroupSpecKt.FrequencySpecKt.vidRangeSpec { + randomSeed = 42L vidRange = vidRange { start = 0L endExclusive = 25L @@ -340,6 +340,7 @@ class SyntheticDataGenerationTest { } vidRangeSpecs += SyntheticEventGroupSpecKt.FrequencySpecKt.vidRangeSpec { + randomSeed = 42L vidRange = vidRange { start = 25L endExclusive = 50L @@ -359,6 +360,7 @@ class SyntheticDataGenerationTest { vidRangeSpecs += SyntheticEventGroupSpecKt.FrequencySpecKt.vidRangeSpec { + randomSeed = 42L vidRange = vidRange { start = 50L endExclusive = 75L @@ -387,6 +389,140 @@ class SyntheticDataGenerationTest { assertThat(labeledEvents.size).isEqualTo(expectedNumberOfEvents) } + @Test + fun `generateEvents throws IllegalStateException when RNG is not specified`() { + + val sampleSizeForFreqOne = 2 + val firstsampleSizeForFreqTwo = 5 + val secondSampleSizeForFreqTwo = 10 + + val population = syntheticPopulationSpec { + vidRange = vidRange { + start = 0L + endExclusive = 100L + } + + populationFields += "person.gender" + populationFields += "person.age_group" + + nonPopulationFields += "banner_ad.viewable" + nonPopulationFields += "video_ad.viewed_fraction" + + subPopulations += + SyntheticPopulationSpecKt.subPopulation { + vidSubRange = vidRange { + start = 0L + endExclusive = 50L + } + + populationFieldsValues["person.gender"] = fieldValue { + enumValue = Person.Gender.MALE_VALUE + } + populationFieldsValues["person.age_group"] = fieldValue { + enumValue = Person.AgeGroup.YEARS_18_TO_34_VALUE + } + } + subPopulations += + SyntheticPopulationSpecKt.subPopulation { + vidSubRange = vidRange { + start = 50L + endExclusive = 100L + } + + populationFieldsValues["person.gender"] = fieldValue { + enumValue = Person.Gender.FEMALE_VALUE + } + populationFieldsValues["person.age_group"] = fieldValue { + enumValue = Person.AgeGroup.YEARS_18_TO_34_VALUE + } + } + } + val eventGroupSpec = syntheticEventGroupSpec { + description = "event group 1" + + dateSpecs += + SyntheticEventGroupSpecKt.dateSpec { + dateRange = + SyntheticEventGroupSpecKt.DateSpecKt.dateRange { + start = date { + year = 2023 + month = 6 + day = 27 + } + endExclusive = date { + year = 2023 + month = 6 + day = 28 + } + } + + frequencySpecs += + SyntheticEventGroupSpecKt.frequencySpec { + frequency = 2 + + vidRangeSpecs += + SyntheticEventGroupSpecKt.FrequencySpecKt.vidRangeSpec { + randomSeed = 42L + vidRange = vidRange { + start = 0L + endExclusive = 25L + } + + sampleSize = firstsampleSizeForFreqTwo + + nonPopulationFieldValues["banner_ad.viewable"] = fieldValue { boolValue = true } + nonPopulationFieldValues["video_ad.viewed_fraction"] = fieldValue { + doubleValue = 0.5 + } + } + vidRangeSpecs += + SyntheticEventGroupSpecKt.FrequencySpecKt.vidRangeSpec { + randomSeed = 42L + vidRange = vidRange { + start = 25L + endExclusive = 50L + } + + sampleSize = secondSampleSizeForFreqTwo + + nonPopulationFieldValues["banner_ad.viewable"] = fieldValue { boolValue = false } + nonPopulationFieldValues["video_ad.viewed_fraction"] = fieldValue { + doubleValue = 0.7 + } + } + } + frequencySpecs += + SyntheticEventGroupSpecKt.frequencySpec { + frequency = 1 + + vidRangeSpecs += + SyntheticEventGroupSpecKt.FrequencySpecKt.vidRangeSpec { + randomSeed = 42L + vidRange = vidRange { + start = 50L + endExclusive = 75L + } + + sampleSize = sampleSizeForFreqOne + + nonPopulationFieldValues["banner_ad.viewable"] = fieldValue { boolValue = true } + nonPopulationFieldValues["video_ad.viewed_fraction"] = fieldValue { + doubleValue = 0.8 + } + } + } + } + } + + assertFailsWith { + SyntheticDataGeneration.generateEvents( + TestEvent.getDefaultInstance(), + population, + eventGroupSpec, + ) + } + } + fun `generateEvents throws IllegalArgumentException when vid ranges overlap`() { val population = syntheticPopulationSpec { vidRange = vidRange { From 1f0da41edd93ccc6708cae61b577bb167ca95431 Mon Sep 17 00:00:00 2001 From: Ugur Date: Fri, 26 Jan 2024 20:28:41 +0000 Subject: [PATCH 08/13] added comments --- .../testing/simulator_synthetic_data_spec.proto | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto index 43ba0a5b528..d6d8763eccb 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto @@ -116,6 +116,7 @@ message SyntheticEventGroupSpec { int32 sample_size = 3; // Random seed to be fed into the random number generator to sample vids. + // Required if this VidRangeSpec specifies a sample_size. int64 random_seed = 4; } // The VID ranges should be non-overlapping sub-ranges of SubPopulations. From 3c92dc8784b2fa169769466960bc7ad1bfbbacba Mon Sep 17 00:00:00 2001 From: Ugur Date: Fri, 26 Jan 2024 20:43:48 +0000 Subject: [PATCH 09/13] only check rng type if sampling is needed --- .../dataprovider/SyntheticDataGeneration.kt | 13 +++++++++++-- .../testing/simulator_synthetic_data_spec.proto | 1 + .../dataprovider/SyntheticDataGenerationTest.kt | 9 +-------- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt index 23be8376fbc..9e403fc8db9 100644 --- a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt +++ b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt @@ -51,9 +51,18 @@ object SyntheticDataGeneration { syntheticEventGroupSpec: SyntheticEventGroupSpec, ): Sequence> { - check(syntheticEventGroupSpec.rngType == SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM) { - "Expecting KOTLIN_RANDOM rng type, got ${syntheticEventGroupSpec.rngType}" + val samplingRequired = + syntheticEventGroupSpec.dateSpecsList + .flatMap { it.frequencySpecsList } + .flatMap { it.vidRangeSpecsList } + .any { it.sampleSize != 0 } + + if (samplingRequired) { + check(syntheticEventGroupSpec.rngType == SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM) { + "Expecting KOTLIN_RANDOM rng type, got ${syntheticEventGroupSpec.rngType}" + } } + val subPopulations = populationSpec.subPopulationsList return sequence { diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto index d6d8763eccb..a5190b5c556 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto @@ -148,5 +148,6 @@ message SyntheticEventGroupSpec { } // Random Number Generator type for this `SyntheticEventGroupSpec`. + // Required if any VidRangeSpec specifies a sample_size. RngType rng_type = 4; } diff --git a/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt b/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt index 2297beddc88..5c5a2e7fb2e 100644 --- a/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt +++ b/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt @@ -87,7 +87,6 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group 1" - rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { @@ -390,7 +389,7 @@ class SyntheticDataGenerationTest { } @Test - fun `generateEvents throws IllegalStateException when RNG is not specified`() { + fun `generateEvents throws IllegalStateException for RNG not specified when sampling enabled`() { val sampleSizeForFreqOne = 2 val firstsampleSizeForFreqTwo = 5 @@ -567,7 +566,6 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group 1" - rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { @@ -648,7 +646,6 @@ class SyntheticDataGenerationTest { } val videoLength = Duration.ofMinutes(5).toProtoDuration() val eventGroupSpec = syntheticEventGroupSpec { - rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { dateRange = @@ -723,7 +720,6 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group" - rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { @@ -792,7 +788,6 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group" - rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { @@ -863,7 +858,6 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group" - rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { @@ -934,7 +928,6 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group" - rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { From dbe9349214dbe5d6ecb6fe920b384e46e7c1b70e Mon Sep 17 00:00:00 2001 From: Ugur Date: Wed, 31 Jan 2024 17:49:25 +0000 Subject: [PATCH 10/13] addressed comments --- .../dataprovider/SyntheticDataGeneration.kt | 15 ++- .../SyntheticDataGenerationTest.kt | 96 +++++++++++++++++++ 2 files changed, 108 insertions(+), 3 deletions(-) diff --git a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt index 9e403fc8db9..7003445b00e 100644 --- a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt +++ b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt @@ -50,12 +50,21 @@ object SyntheticDataGeneration { populationSpec: SyntheticPopulationSpec, syntheticEventGroupSpec: SyntheticEventGroupSpec, ): Sequence> { - - val samplingRequired = + var samplingRequired = false + val vidRangeSpecs = syntheticEventGroupSpec.dateSpecsList .flatMap { it.frequencySpecsList } .flatMap { it.vidRangeSpecsList } - .any { it.sampleSize != 0 } + + for (vidRangeSpec in vidRangeSpecs) { + val vidRangeWidth = vidRangeSpec.vidRange.endExclusive - vidRangeSpec.vidRange.start + check(vidRangeWidth >= vidRangeSpec.sampleSize) { + "all vidRange widths should be larger than sampleSizes" + } + if (vidRangeSpec.sampleSize > 0) { + samplingRequired = true + } + } if (samplingRequired) { check(syntheticEventGroupSpec.rngType == SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM) { diff --git a/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt b/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt index 5c5a2e7fb2e..5594a9da1ee 100644 --- a/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt +++ b/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt @@ -388,6 +388,102 @@ class SyntheticDataGenerationTest { assertThat(labeledEvents.size).isEqualTo(expectedNumberOfEvents) } + @Test + fun `generateEvents throws IllegalStateException for sample size larger than vidRange`() { + + val population = syntheticPopulationSpec { + vidRange = vidRange { + start = 0L + endExclusive = 100L + } + + populationFields += "person.gender" + populationFields += "person.age_group" + + nonPopulationFields += "banner_ad.viewable" + nonPopulationFields += "video_ad.viewed_fraction" + + subPopulations += + SyntheticPopulationSpecKt.subPopulation { + vidSubRange = vidRange { + start = 0L + endExclusive = 50L + } + + populationFieldsValues["person.gender"] = fieldValue { + enumValue = Person.Gender.MALE_VALUE + } + populationFieldsValues["person.age_group"] = fieldValue { + enumValue = Person.AgeGroup.YEARS_18_TO_34_VALUE + } + } + subPopulations += + SyntheticPopulationSpecKt.subPopulation { + vidSubRange = vidRange { + start = 50L + endExclusive = 100L + } + + populationFieldsValues["person.gender"] = fieldValue { + enumValue = Person.Gender.FEMALE_VALUE + } + populationFieldsValues["person.age_group"] = fieldValue { + enumValue = Person.AgeGroup.YEARS_18_TO_34_VALUE + } + } + } + val eventGroupSpec = syntheticEventGroupSpec { + description = "event group 1" + rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM + + dateSpecs += + SyntheticEventGroupSpecKt.dateSpec { + dateRange = + SyntheticEventGroupSpecKt.DateSpecKt.dateRange { + start = date { + year = 2023 + month = 6 + day = 27 + } + endExclusive = date { + year = 2023 + month = 6 + day = 28 + } + } + + frequencySpecs += + SyntheticEventGroupSpecKt.frequencySpec { + frequency = 2 + + vidRangeSpecs += + SyntheticEventGroupSpecKt.FrequencySpecKt.vidRangeSpec { + randomSeed = 42L + vidRange = vidRange { + start = 0L + endExclusive = 25L + } + + sampleSize = 50 + + nonPopulationFieldValues["banner_ad.viewable"] = fieldValue { boolValue = true } + nonPopulationFieldValues["video_ad.viewed_fraction"] = fieldValue { + doubleValue = 0.5 + } + } + } + } + } + + assertFailsWith { + SyntheticDataGeneration.generateEvents( + TestEvent.getDefaultInstance(), + population, + eventGroupSpec, + ) + } + } + @Test fun `generateEvents throws IllegalStateException for RNG not specified when sampling enabled`() { From 02d25676e9855a79f4905fd72b29904dd8691e44 Mon Sep 17 00:00:00 2001 From: Ugur Date: Thu, 1 Feb 2024 15:15:40 +0000 Subject: [PATCH 11/13] addressed comments --- .../dataprovider/SyntheticDataGeneration.kt | 14 ++++++++------ .../testing/simulator_synthetic_data_spec.proto | 4 ++-- .../dataprovider/SyntheticDataGenerationTest.kt | 4 ++-- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt index 7003445b00e..da3eb8a0fa8 100644 --- a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt +++ b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt @@ -19,9 +19,10 @@ package org.wfanet.measurement.loadtest.dataprovider import com.google.protobuf.Descriptors.FieldDescriptor import com.google.protobuf.Message import java.time.ZoneOffset +import java.util.Collections +import java.util.Random import kotlin.math.max import kotlin.math.min -import kotlin.random.Random import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.FieldValue import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.SimulatorSyntheticDataSpec import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.SyntheticEventGroupSpec @@ -67,8 +68,8 @@ object SyntheticDataGeneration { } if (samplingRequired) { - check(syntheticEventGroupSpec.rngType == SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM) { - "Expecting KOTLIN_RANDOM rng type, got ${syntheticEventGroupSpec.rngType}" + check(syntheticEventGroupSpec.rngType == SyntheticEventGroupSpec.RngType.JAVA_UTIL_RANDOM) { + "Expecting JAVA_UTIL_RANDOM rng type, got ${syntheticEventGroupSpec.rngType}" } } @@ -124,14 +125,15 @@ object SyntheticDataGeneration { * Returns the sampled Vids from [vidRangeSpec]. Given the same [vidRangeSpec] and [randomSeed], * returns the same vids. Returns all of the vids if sample size is 0. */ - private fun sampleVids(vidRangeSpec: VidRangeSpec, random: Random): Sequence { + private fun sampleVids(vidRangeSpec: VidRangeSpec, random: Random): List { val vidRangeSequence = - (vidRangeSpec.vidRange.start until vidRangeSpec.vidRange.endExclusive).asSequence() + (vidRangeSpec.vidRange.start until vidRangeSpec.vidRange.endExclusive).toMutableList() if (vidRangeSpec.sampleSize == 0) { return vidRangeSequence } + Collections.shuffle(vidRangeSequence, random) - return vidRangeSequence.shuffled(random).take(vidRangeSpec.sampleSize) + return vidRangeSequence.take(vidRangeSpec.sampleSize) } /** diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto index a5190b5c556..df90eb0fe87 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto @@ -143,8 +143,8 @@ message SyntheticEventGroupSpec { enum RngType { // Default value used if the rng type is omitted. RNG_TYPE_UNSPECIFIED = 0; - // Signals kotlin.random.Random should be used for sampling. - KOTLIN_RANDOM = 1; + // Signals java.util.Random should be used for sampling. + JAVA_UTIL_RANDOM = 1; } // Random Number Generator type for this `SyntheticEventGroupSpec`. diff --git a/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt b/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt index 5594a9da1ee..f4e85040fb9 100644 --- a/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt +++ b/src/test/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGenerationTest.kt @@ -300,7 +300,7 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group 1" - rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM + rngType = SyntheticEventGroupSpec.RngType.JAVA_UTIL_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { @@ -434,7 +434,7 @@ class SyntheticDataGenerationTest { } val eventGroupSpec = syntheticEventGroupSpec { description = "event group 1" - rngType = SyntheticEventGroupSpec.RngType.KOTLIN_RANDOM + rngType = SyntheticEventGroupSpec.RngType.JAVA_UTIL_RANDOM dateSpecs += SyntheticEventGroupSpecKt.dateSpec { From 18ad90914c7dc23c244fda166ac582615d044dfe Mon Sep 17 00:00:00 2001 From: Ugur Date: Thu, 1 Feb 2024 15:18:43 +0000 Subject: [PATCH 12/13] naming correction --- .../loadtest/dataprovider/SyntheticDataGeneration.kt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt index da3eb8a0fa8..94be68f2be0 100644 --- a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt +++ b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt @@ -126,14 +126,14 @@ object SyntheticDataGeneration { * returns the same vids. Returns all of the vids if sample size is 0. */ private fun sampleVids(vidRangeSpec: VidRangeSpec, random: Random): List { - val vidRangeSequence = + val vidRangeList = (vidRangeSpec.vidRange.start until vidRangeSpec.vidRange.endExclusive).toMutableList() if (vidRangeSpec.sampleSize == 0) { return vidRangeSequence } - Collections.shuffle(vidRangeSequence, random) + Collections.shuffle(vidRangeList, random) - return vidRangeSequence.take(vidRangeSpec.sampleSize) + return vidRangeList.take(vidRangeSpec.sampleSize) } /** From 2aeb8cb080e58f9e5801bb5f0fc246855344149d Mon Sep 17 00:00:00 2001 From: Ugur Date: Thu, 1 Feb 2024 20:09:36 +0000 Subject: [PATCH 13/13] wrapped with kotlin random --- .../dataprovider/SyntheticDataGeneration.kt | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt index 94be68f2be0..67f31374d0c 100644 --- a/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt +++ b/src/main/kotlin/org/wfanet/measurement/loadtest/dataprovider/SyntheticDataGeneration.kt @@ -19,10 +19,11 @@ package org.wfanet.measurement.loadtest.dataprovider import com.google.protobuf.Descriptors.FieldDescriptor import com.google.protobuf.Message import java.time.ZoneOffset -import java.util.Collections import java.util.Random import kotlin.math.max import kotlin.math.min +import kotlin.random.Random as KotlinRandom +import kotlin.random.asKotlinRandom import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.FieldValue import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.SimulatorSyntheticDataSpec import org.wfanet.measurement.api.v2alpha.event_group_metadata.testing.SyntheticEventGroupSpec @@ -83,7 +84,7 @@ object SyntheticDataGeneration { check(!frequencySpec.hasOverlaps()) { "The VID ranges should be non-overlapping." } for (vidRangeSpec: VidRangeSpec in frequencySpec.vidRangeSpecsList) { - val random = Random(vidRangeSpec.randomSeed) + val random = Random(vidRangeSpec.randomSeed).asKotlinRandom() val subPopulation: SubPopulation = vidRangeSpec.vidRange.findSubPopulation(subPopulations) ?: throw IllegalArgumentException() @@ -125,15 +126,13 @@ object SyntheticDataGeneration { * Returns the sampled Vids from [vidRangeSpec]. Given the same [vidRangeSpec] and [randomSeed], * returns the same vids. Returns all of the vids if sample size is 0. */ - private fun sampleVids(vidRangeSpec: VidRangeSpec, random: Random): List { - val vidRangeList = - (vidRangeSpec.vidRange.start until vidRangeSpec.vidRange.endExclusive).toMutableList() + private fun sampleVids(vidRangeSpec: VidRangeSpec, random: KotlinRandom): Sequence { + val vidRangeSequence = + (vidRangeSpec.vidRange.start until vidRangeSpec.vidRange.endExclusive).asSequence() if (vidRangeSpec.sampleSize == 0) { return vidRangeSequence } - Collections.shuffle(vidRangeList, random) - - return vidRangeList.take(vidRangeSpec.sampleSize) + return vidRangeSequence.shuffled(random).take(vidRangeSpec.sampleSize) } /**