From e09888eda96421a831861e0c92eba9cf09b9d9f3 Mon Sep 17 00:00:00 2001 From: Joe Corilla <10494069+jcorilla@users.noreply.github.com> Date: Tue, 30 May 2023 21:47:13 +0000 Subject: [PATCH 1/6] Add proto definition for simulator synthetic data specification --- .../event_group_metadata/testing/BUILD.bazel | 26 +++++- .../simulator_synthetic_data_spec.proto | 91 +++++++++++++++++++ 2 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/BUILD.bazel b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/BUILD.bazel index ee7fe26d5cf..a9f2cb43551 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/BUILD.bazel +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/BUILD.bazel @@ -11,7 +11,10 @@ IMPORT_PREFIX = "/src/main/proto" proto_library( name = "test_metadata_messages_proto", - srcs = glob(["*.proto"]), + srcs = [ + "test_metadata_message.proto", + "test_metadata_message_2.proto", + ], strip_import_prefix = IMPORT_PREFIX, deps = [ "@com_google_protobuf//:descriptor_proto", @@ -30,3 +33,24 @@ kt_jvm_proto_library( srcs = [":test_metadata_messages_proto"], deps = [":test_metadata_messages_java_proto"], ) + +proto_library( + name = "simulator_synthetic_data_spec_proto", + srcs = ["simulator_synthetic_data_spec.proto"], + strip_import_prefix = IMPORT_PREFIX, + deps = [ + "@com_google_googleapis//google/type:date_proto", + "@com_google_protobuf//:descriptor_proto", + ], +) + +java_proto_library( + name = "simulator_synthetic_data_spec_java_proto", + deps = [":simulator_synthetic_data_spec_proto"], +) + +kt_jvm_proto_library( + name = "simulator_synthetic_data_spec_kt_jvm_proto", + srcs = [":simulator_synthetic_data_spec_proto"], + deps = [":simulator_synthetic_data_spec_java_proto"], +) diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto new file mode 100644 index 00000000000..b71b65161e4 --- /dev/null +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto @@ -0,0 +1,91 @@ +// Copyright 2023 The Cross-Media Measurement Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package wfa.measurement.api.v2alpha.event_group_metadata.testing; + +import "google/protobuf/descriptor.proto"; +import "google/type/date.proto"; + +option java_package = "org.wfanet.measurement.api.v2alpha.event_group_metadata.testing"; +option java_multiple_files = true; + +message SimulatorSyntheticDataSpec { + repeated SyntheticEventGroupSpec event_group_spec = 1; + SyntheticPopulationSpec population = 2; +} + +// A sequence of VIDs represented with an inclusive beginning and end. +message VidInterval { + int64 begin = 1; + int64 end = 2; +} + +// The specification of the entire VID space. +message SyntheticPopulationSpec { + VidInterval vid_interval = 1; + + // For Age, Gender, Social Grade ... + // These need to be assigned at the VID level here. When generating a + // SimulatorSyntheticData you need to import the necessary EventTemplate. + repeated google.protobuf.FieldDescriptorProto population_fields = 2; + + // For Device, Location, Duration ... + // These need to be assigned at the impression level in the FrequencySpec. + repeated google.protobuf.FieldDescriptorProto non_population_fields = 3; + + message SubPopulation { + VidInterval vid_sub_interval = 1; + + // A list of the same length as the population field descriptors whose + // values are one of each of the values for the field descriptor + // (e.g. if the first field descriptor is age bucket, then the first + // value in this list is a particular age bucket). This list should be + // unique across subpopulations. + repeated string population_fields_values = 2; + } + repeated SubPopulation sub_populations = 4; +} + +// Each Synthetic event group spec indicates which VID intervals are reached. +message SyntheticEventGroupSpec { + string property_under_measurement = 1; + string description = 2; + + // The VIDs reached with their frequency non-population attributes. + message FrequencySpec { + int64 frequency = 1; + // The VID intervals should be sub-intervals of SubPopulations. + message VidIntervalSpec { + VidInterval vid_intervals = 1; + + // A list of the same length as the non-population field descriptors + // whose values are one of each of the values for the field + // descriptor (e.g. if the first field descriptor is device bucket, + // then the first value in this list is a particular device). + repeated string non_population_field_values = 2; + } + repeated VidIntervalSpec vid_interval_specs = 2; + } + + message DateSpec { + // Date the VIDs were reached. + google.type.Date date = 1; + // Each FrequencySpec must have a unique frequency. + repeated FrequencySpec frequency_specs = 2; + } + + repeated DateSpec date_specs = 3; +} From d588b8f039b9efd43b0a701a0ac9e1b57f59159e Mon Sep 17 00:00:00 2001 From: Joe Corilla <10494069+jcorilla@users.noreply.github.com> Date: Thu, 1 Jun 2023 17:23:23 +0000 Subject: [PATCH 2/6] Change population and nonpopulation fields to string --- .../simulator_synthetic_data_spec.proto | 70 ++++++++++--------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto index b71b65161e4..e52a2c16918 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto @@ -22,64 +22,70 @@ import "google/type/date.proto"; option java_package = "org.wfanet.measurement.api.v2alpha.event_group_metadata.testing"; option java_multiple_files = true; +// The specification of synthetic EventGroups created from a shared synthetic +// virtual population. message SimulatorSyntheticDataSpec { repeated SyntheticEventGroupSpec event_group_spec = 1; SyntheticPopulationSpec population = 2; } -// A sequence of VIDs represented with an inclusive beginning and end. -message VidInterval { - int64 begin = 1; +// A sequence of VIDs represented with a beginning and exclusive end. +message VidRange { + int64 start = 1; int64 end = 2; } -// The specification of the entire VID space. +// The specification of a synthetic virtual population. message SyntheticPopulationSpec { - VidInterval vid_interval = 1; + VidRange vid_range = 1; - // For Age, Gender, Social Grade ... - // These need to be assigned at the VID level here. When generating a - // SimulatorSyntheticData you need to import the necessary EventTemplate. - repeated google.protobuf.FieldDescriptorProto population_fields = 2; + // Set of strings that define population data fields such as Age, Gender, + // and Social Grade. These should conform to a CEL expression syntax (e.g. + // person.age_group). These are assigned at the subpopulation level. + repeated string population_fields = 2; - // For Device, Location, Duration ... - // These need to be assigned at the impression level in the FrequencySpec. - repeated google.protobuf.FieldDescriptorProto non_population_fields = 3; + // Set of strings that define non-population data fields such as Device, + // Location, and Duration. These should conform to a CEL expression syntax + // (e.g.person.age_group). These are assigned at the impression level in the + // FrequencySpec. + repeated string non_population_fields = 3; message SubPopulation { - VidInterval vid_sub_interval = 1; - - // A list of the same length as the population field descriptors whose - // values are one of each of the values for the field descriptor - // (e.g. if the first field descriptor is age bucket, then the first - // value in this list is a particular age bucket). This list should be - // unique across subpopulations. - repeated string population_fields_values = 2; + VidRange vid_sub_range = 1; + + // A map of `population_fields` to their values for each subpopulation. + map population_fields_values = 2; } + + // Subpopulations should describe non-overlapping VID ranges. The combinations + // of population field values should be unique across subpopulations. repeated SubPopulation sub_populations = 4; } -// Each Synthetic event group spec indicates which VID intervals are reached. +// The specification of a synthetic EventGroup which describes all impressions +// for specific dates. message SyntheticEventGroupSpec { string property_under_measurement = 1; string description = 2; - // The VIDs reached with their frequency non-population attributes. + // The specification of VIDs reached at a specific frequency and their + // non-population attributes. message FrequencySpec { int64 frequency = 1; - // The VID intervals should be sub-intervals of SubPopulations. - message VidIntervalSpec { - VidInterval vid_intervals = 1; - - // A list of the same length as the non-population field descriptors - // whose values are one of each of the values for the field - // descriptor (e.g. if the first field descriptor is device bucket, - // then the first value in this list is a particular device). - repeated string non_population_field_values = 2; + + // The specification of non_population_values for a VID range. + message VidRangeSpec { + VidRange vid_range = 1; + + // A map of `non_population_fields` from `SyntheticPopulationSpec` to + // their values. + map non_population_field_values = 2; } - repeated VidIntervalSpec vid_interval_specs = 2; + // The VID ranges should be non-overlapping sub-ranges of SubPopulations. + repeated VidRangeSpec vid_range_specs = 2; } + // The specification for all frequencies reached at a specific date. message DateSpec { // Date the VIDs were reached. google.type.Date date = 1; From 410033f9381a10fe75231dd09bd2fac5fe0d02cd Mon Sep 17 00:00:00 2001 From: Joe Corilla <10494069+jcorilla@users.noreply.github.com> Date: Thu, 1 Jun 2023 21:45:43 +0000 Subject: [PATCH 3/6] Drop property_under_measurement from synthetic event group spec --- .../testing/simulator_synthetic_data_spec.proto | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto index e52a2c16918..599a4b26633 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto @@ -65,8 +65,7 @@ message SyntheticPopulationSpec { // The specification of a synthetic EventGroup which describes all impressions // for specific dates. message SyntheticEventGroupSpec { - string property_under_measurement = 1; - string description = 2; + string description = 1; // The specification of VIDs reached at a specific frequency and their // non-population attributes. @@ -93,5 +92,5 @@ message SyntheticEventGroupSpec { repeated FrequencySpec frequency_specs = 2; } - repeated DateSpec date_specs = 3; + repeated DateSpec date_specs = 2; } From 387fd2f28114081787cba07b6414eb0042cc8840 Mon Sep 17 00:00:00 2001 From: Joe Corilla <10494069+jcorilla@users.noreply.github.com> Date: Thu, 1 Jun 2023 21:56:26 +0000 Subject: [PATCH 4/6] Remove unused import --- .../api/v2alpha/event_group_metadata/testing/BUILD.bazel | 1 - .../testing/simulator_synthetic_data_spec.proto | 1 - 2 files changed, 2 deletions(-) diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/BUILD.bazel b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/BUILD.bazel index a9f2cb43551..b1ef37a96f5 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/BUILD.bazel +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/BUILD.bazel @@ -40,7 +40,6 @@ proto_library( strip_import_prefix = IMPORT_PREFIX, deps = [ "@com_google_googleapis//google/type:date_proto", - "@com_google_protobuf//:descriptor_proto", ], ) diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto index 599a4b26633..f34131e20c2 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto @@ -16,7 +16,6 @@ syntax = "proto3"; package wfa.measurement.api.v2alpha.event_group_metadata.testing; -import "google/protobuf/descriptor.proto"; import "google/type/date.proto"; option java_package = "org.wfanet.measurement.api.v2alpha.event_group_metadata.testing"; From 0348c5a8feccc59c9ca186e49e589025822799f7 Mon Sep 17 00:00:00 2001 From: Joe Corilla <10494069+jcorilla@users.noreply.github.com> Date: Thu, 1 Jun 2023 22:08:09 +0000 Subject: [PATCH 5/6] Rename VidRange end to specift exclusivity --- .../testing/simulator_synthetic_data_spec.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto index f34131e20c2..66959a5b96a 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto @@ -31,7 +31,7 @@ message SimulatorSyntheticDataSpec { // A sequence of VIDs represented with a beginning and exclusive end. message VidRange { int64 start = 1; - int64 end = 2; + int64 end_exclusive = 2; } // The specification of a synthetic virtual population. From 3f780756e7c1a0a08f38db693c41dc641a1278f6 Mon Sep 17 00:00:00 2001 From: Joe Corilla <10494069+jcorilla@users.noreply.github.com> Date: Mon, 5 Jun 2023 18:56:06 +0000 Subject: [PATCH 6/6] Use date range in DateSpec instead of singular dates --- .../testing/simulator_synthetic_data_spec.proto | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto index 66959a5b96a..3f67e2498df 100644 --- a/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto +++ b/src/main/proto/wfa/measurement/api/v2alpha/event_group_metadata/testing/simulator_synthetic_data_spec.proto @@ -83,13 +83,19 @@ message SyntheticEventGroupSpec { repeated VidRangeSpec vid_range_specs = 2; } - // The specification for all frequencies reached at a specific date. + // The specification for all frequencies reached for a specific date range. message DateSpec { - // Date the VIDs were reached. - google.type.Date date = 1; + // A range of `Date`s represented with a beginning and exclusive end. + message DateRange { + google.type.Date start = 1; + google.type.Date end_exclusive = 2; + } + // Dates the VIDs were reached. + DateRange date_range = 1; + // Each FrequencySpec must have a unique frequency. repeated FrequencySpec frequency_specs = 2; } - + // `DateSpec`s should describe non-overlapping date ranges. repeated DateSpec date_specs = 2; }