From 290875a2dcbbee18237500814d2412afa8cca6b8 Mon Sep 17 00:00:00 2001 From: n-h-diaz Date: Thu, 25 Apr 2024 10:49:51 -0700 Subject: [PATCH] Add schema for aggregations PiperOrigin-RevId: 628124823 --- core/dcschema.mcf | 96 ++++++++++++++++++++++++++++++++ core/dcschema_enum_classes.mcf | 6 ++ core/dcschema_enum_instances.mcf | 20 +++++++ 3 files changed, 122 insertions(+) diff --git a/core/dcschema.mcf b/core/dcschema.mcf index 35776a121..1f222f523 100644 --- a/core/dcschema.mcf +++ b/core/dcschema.mcf @@ -3886,3 +3886,99 @@ name: "statisticsCanadaGeographicCode" typeOf: schema:Property domainIncludes: schema:Place description: "Unique codes for categorizing and enumerating the census geographic units of Canada" + +Node: dcid:StatisticalAggregation +typeOf: schema:Class +subClassOf: schema:Intangible +name: "StatisticalAggregation" +description: "Custom aggregation of statistical data in Data Commons, computed by Data Commons. For example, aggregating population with health insurance stats available at county-level to state-level." + +Node: dcid:AggregationSlice +typeOf: schema:Class +subClassOf: schema:Intangible +name: "AggregationSlice" +description: "Filter to a Data Commons StatisticalAggregation consisting of a property and one or more values, which will further slice the output data. For example, the count of earthquakes might be sliced by different magnitude ranges." + +Node: dcid:StatisticalCalculation +typeOf: schema:Class +subClassOf: schema:Intangible +name: "StatisticalCalculation" +description: "Custom calculation in Data Commons, computed by Data Commons. For example, computing the population without health insurance using an exhaustive age breakdown of such population." + +Node: dcid:aggregateProperty +typeOf: schema:Property +domainIncludes: dcs:StatisticalAggregation +rangeIncludes: schema:Property +description: "Property being aggregated in a Data Commons StatisticalAggregation. This could be a StatisticalVariable when aggregating across places, or a constraint property when aggregating entities." + +Node: dcid:aggregationMethod +typeOf: schema:Property +domainIncludes: dcs:StatisticalAggregation +rangeIncludes: schema:StatisticalAggregationMethodEnum +description: "Method by which data is aggregated (minimum, maximum, sum, etc)." + +Node: dcid:aggregationSlice +typeOf: schema:Property +domainIncludes: dcs:StatisticalAggregation +rangeIncludes: schema:AggregationSlice +description: "Filter to a Data Commons StatisticalAggregation, which will further slice the output data. Output StatisticalVariables will be generated for each slice in the filter." + +Node: dcid:aggregationSliceProperty +typeOf: schema:Property +domainIncludes: dcs:AggregationSlice +rangeIncludes: schema:Property +description: "Property to filter a Data Commons StatisticalAggregation by. Entities will be grouped by this property when computing the aggregation." + +Node: dcid:aggregationSliceValue +typeOf: schema:Property +domainIncludes: dcs:AggregationSlice +rangeIncludes: dcs:Enumeration, dcs:QuantityRange +description: "Value to filter a Data Commons StatisticalAggregation by. Entities will be grouped by this value when computing the aggregation." + +Node: dcid:inputEntity +typeOf: schema:Property +domainIncludes: dcs:StatisticalAggregation +rangeIncludes: schema:Class +description: "Type of input entity to a Data Commons StatisticalAggregation (place type, event type, etc)." + +Node: dcid:inputTimeProperty +typeOf: schema:Property +domainIncludes: dcs:StatisticalAggregation +rangeIncludes: schema:Property +description: "Input property in a Data Commons StatisticalAggregation that will be used to indicate the time. Required for aggregating entities. Output StatVarObservations will use this property to determine their date." + +Node: dcid:linkProperty +typeOf: schema:Property +domainIncludes: dcs:StatisticalAggregation +rangeIncludes: schema:Property +description: "Property linking input to output entities in a Data Commons StatisticalAggregation, such as containment, membership, or hierarchy." + +Node: dcid:outputEntity +typeOf: schema:Property +domainIncludes: dcs:StatisticalAggregation +rangeIncludes: schema:Class +description: "Type of output entity to a Data Commons StatisticalAggregation. The output StatVarObservations will be attached to entities associated with this type." + +Node: dcid:outputTimeGranularity +typeOf: schema:Property +domainIncludes: dcs:StatisticalAggregation +rangeIncludes: dcs:Text +description: "Date pattern (ISO 8601) to match for aggregating by time. Inputs will by grouped by this date pattern, and the output StatVarObservations will have dates of this pattern." + +Node: dcid:inputPropertyExpression +typeOf: schema:Property +domainIncludes: dcs:StatisticalCalculation +rangeIncludes: dcs:Text +description: "Mathematical expression consisting of properties (typically StatisticalVariables) representing a Data Commons StatisticalCalculation. Valid operators include +, -, *, /. For example, (Count_Person_Male + Count_Person_Female)." + +Node: dcid:operatedEntity +typeOf: schema:Property +domainIncludes: dcs:StatisticalCalculation +rangeIncludes: schema:Class +description: "Type of entity for which to perform a Data Commons StatisticalCalculation." + +Node: dcid:outputProperty +typeOf: schema:Property +domainIncludes: dcs:StatisticalCalculation +rangeIncludes: schema:Property +description: "Output property to hold the result of a Data Commons StatisticalCalculation. This is typically a StatisticalVariable." diff --git a/core/dcschema_enum_classes.mcf b/core/dcschema_enum_classes.mcf index 1142ae853..fbe9e07a2 100644 --- a/core/dcschema_enum_classes.mcf +++ b/core/dcschema_enum_classes.mcf @@ -586,3 +586,9 @@ typeOf: schema:Class subClassOf: dcs:Enumeration name: "LicenseTypeEnum" description: "Type of data license, for certain common licenses." + +Node: dcid:StatisticalAggregationMethodEnum +typeOf: schema:Class +subClassOf: dcs:Enumeration +name: "StatisticalAggregationMethodEnum" +description: "Enum representing the method by which data is aggregated (minimum, maximum, sum, etc)." diff --git a/core/dcschema_enum_instances.mcf b/core/dcschema_enum_instances.mcf index 153fe78f8..17bb926ff 100644 --- a/core/dcschema_enum_instances.mcf +++ b/core/dcschema_enum_instances.mcf @@ -3043,3 +3043,23 @@ typeOf: dcs:LicenseTypeEnum name: "Open Data Commons Open Database License" description: "Data available under the Open Data Commons Open Database License." url: "https://opendatacommons.org/licenses/odbl/" + +Node: dcid:Count +typeOf: dcs:StatisticalAggregationMethodEnum +name: "Count" +description: "Computes the count of input entities in a Data Commons StatisticalAggregation." + +Node: dcid:Maximum +typeOf: dcs:StatisticalAggregationMethodEnum +name: "Maximum" +description: "Computes the maximum of input values in a Data Commons StatisticalAggregation." + +Node: dcid:Minimum +typeOf: dcs:StatisticalAggregationMethodEnum +name: "Minimum" +description: "Computes the minimum of input values in a Data Commons StatisticalAggregation." + +Node: dcid:Sum +typeOf: dcs:StatisticalAggregationMethodEnum +name: "Sum" +description: "Computes the sum of input values in a Data Commons StatisticalAggregation."