Merge pull request #274 from azavea/feature/cross-year-aggregations

Cross Year Aggregations
azavea · Feb 23, 2017 · 03d1186 · 03d1186
2 parents 366b4a4 + 3414c3d
commit 03d1186
Show file tree

Hide file tree

Showing 10 changed files with 403 additions and 342 deletions.
diff --git a/django/climate_change_api/climate_data/filters.py b/django/climate_change_api/climate_data/filters.py
@@ -16,6 +16,10 @@ class ClimateDataFilterSet(filters.FilterSet):
     models = django_filters.MethodFilter()
     years = django_filters.MethodFilter()
 
+    def __init__(self, *args, **kwargs):
+        self.year_col = kwargs.pop('year_col', 'data_source__year')
+        super(ClimateDataFilterSet, self).__init__(*args, **kwargs)
+
     def filter_models(self, queryset, value):
         """ Filter models based on a comma separated list of names
 
@@ -46,13 +50,16 @@ def filter_years(self, queryset, value):
             for year_range_str in value.split(','):
                 year_range = year_range_str.split(':')
                 if len(year_range) == 2:
-                    start_year = int(year_range[0])
-                    end_year = int(year_range[1])
-                    # AND the year range
-                    year_filters.append((Q(data_source__year__gte=start_year) & Q(data_source__year__lte=end_year)))
+                    # Pair the two years with their comparators, gte and lte respectively
+                    bounds = zip(['gte', 'lte'], year_range)
+                    # Create two Q objects with the proper column, comparator and boundary year
+                    start, end = [Q(**{"%s__%s" % (self.year_col, comparator): year})
+                                  for comparator, year in bounds]
+                    # And the checks together
+                    year_filters.append(start & end)
                 if len(year_range) == 1:
                     year = int(year_range[0])
-                    year_filters.append(Q(data_source__year=year))
+                    year_filters.append(Q(**{self.year_col: year}))
             logger.debug(year_filters)
             # Now OR together all the year filters we've created
             queryset = queryset.filter(reduce(lambda x, y: x | y, year_filters))

diff --git a/django/climate_change_api/indicators/abstract_indicators.py b/django/climate_change_api/indicators/abstract_indicators.py
@@ -1,25 +1,23 @@
 from collections import OrderedDict, defaultdict
 from itertools import groupby
 import re
-from datetime import date, timedelta
 
 from django.db.models import F, Case, When, FloatField, Sum
 from django.db import connection
 
 
 from climate_data.models import ClimateData
-from climate_data.filters import ClimateDataFilterSet
 from .params import IndicatorParams, ThresholdIndicatorParams
 from .serializers import IndicatorSerializer
 from .unit_converters import DaysUnitsMixin, TemperatureConverter, PrecipitationConverter
-from .query_ranges import MonthRangeConfig, QuarterRangeConfig, CustomRangeConfig
+import queryset_generator
 
 
 class Indicator(object):
 
     label = ''
     description = ''
-    valid_aggregations = ('yearly', 'quarterly', 'monthly', 'daily', 'custom')
+    valid_aggregations = ('yearly', 'quarterly', 'monthly', 'offset_yearly', 'custom')
     variables = ClimateData.VARIABLE_CHOICES
 
     # Filters define which rows match our query, conditions limit which rows
@@ -50,6 +48,9 @@ class Indicator(object):
     available_units = (None,)
     parameters = None
 
+    # These are the keys that values are identified by when they come from the database.
+    aggregate_keys = ['agg_key', 'data_source__model']
+
     def __init__(self, city, scenario, parameters=None):
         if not city:
             raise ValueError('Indicator constructor requires a city instance')
@@ -105,45 +106,29 @@ def get_queryset(self):
         by the constructor
 
         """
-        filter_set = ClimateDataFilterSet()
-        queryset = (ClimateData.objects.filter(map_cell=self.city.map_cell)
-                    .filter(data_source__scenario=self.scenario))
-        queryset = filter_set.filter_years(queryset, self.params.years.value)
-        queryset = filter_set.filter_models(queryset, self.params.models.value)
+        # Get the queryset generator for this indicator's time aggregation
+        generator = queryset_generator.get(self.params.time_aggregation.value)
+        key_params = {}
+
+        # The custom range config accepts a user-defined parameter to pick which dates to use
+        if self.params.custom_time_agg.value is not None:
+            key_params['custom_time_agg'] = self.params.custom_time_agg.value
+
+        # Use the queryset generator classes to construct the initial base climate data queryset
+        queryset = generator.create_queryset(
+            years=self.params.years.value,
+            models=self.params.models.value,
+            scenario=self.scenario,
+            key_params=key_params
+        ).filter(
+            map_cell=self.city.map_cell
+        )
 
         if self.filters is not None:
             queryset = queryset.filter(**self.filters)
 
-        # For certain time aggregations, add a field to track which interval a data point is in
-        time_aggregation_configs = {
-            'monthly': MonthRangeConfig,
-            'quarterly': QuarterRangeConfig,
-            'custom': CustomRangeConfig
-        }
-        if self.params.time_aggregation.value in time_aggregation_configs:
-            config = time_aggregation_configs[self.params.time_aggregation.value]
-            params = {}
-
-            # The custom range config accepts a user-defined parameter to pick which dates to use
-            if self.params.custom_time_agg.value is not None:
-                params['custom_time_agg'] = self.params.custom_time_agg.value
-
-            queryset = (queryset
-                        .annotate(interval=config.cases(**params))
-                        .filter(interval__isnull=False))
-
         return queryset
 
-    @property
-    def aggregate_keys(self):
-        return {
-            'daily': ['data_source__year', 'day_of_year', 'data_source__model'],
-            'monthly': ['data_source__year', 'interval', 'data_source__model'],
-            'quarterly': ['data_source__year', 'interval', 'data_source__model'],
-            'custom': ['data_source__year', 'interval', 'data_source__model'],
-            'yearly': ['data_source__year', 'data_source__model']
-        }.get(self.params.time_aggregation.value)
-
     @property
     def expression(self):
         return self.variables[0]
@@ -183,40 +168,13 @@ def collate_results(self, aggregations):
         """ Take results as a series of datapoints and collate them by key
 
         @param aggregations list-of-dicts returned by aggregate method
-        @returns Dict of list of values, keyed by the subclass's key_results implementation
+        @returns Dict of list of values, keyed by the queryset's agg_key column
         """
         results = defaultdict(list)
         for result in aggregations:
-            key = self.key_result(result)
-            results[key].append(result['value'])
+            results[result['agg_key']].append(result['value'])
         return results
 
-    def key_result(self, result):
-        """ Stub function for subclasses to determine how to collate results
-
-        @param result A row of timeseries data generated by aggregate()
-        @returns The value that row should be keyed by in the final response.
-                 Results should be keyed as one of the following based on aggregation:
-                 * YYYY for yearly data
-                 * YYYY-MM for monthly data
-                 * YYYY-MM-DD for daily data
-        """
-        year = result['data_source__year']
-        if self.params.time_aggregation.value == 'yearly':
-            return year
-
-        if self.params.time_aggregation.value == 'daily':
-            day_of_year = result['day_of_year']
-            day = date(year, 1, 1) + timedelta(days=day_of_year-1)
-            return day.isoformat()
-
-        template = {
-            'monthly': '{year}-{int:02d}',
-            'quarterly': '{year}-Q{int:0d}',
-            'custom': '{year}-{int:02d}'
-        }.get(self.params.time_aggregation.value)
-        return template.format(year=year, int=(result['interval']+1))
-
     def calculate(self):
         aggregations = self.aggregate()
         aggregations = self.convert_units(aggregations)
@@ -265,17 +223,17 @@ def get_streaks(self):
         (base_query, base_query_params) = (self.queryset.select_related('data_source')
                                                .query.sql_with_params())
         query = """
-            SELECT year as data_source__year, model_id as data_source__model,
+            SELECT agg_key, model_id as data_source__model,
                    count(*) as length, match
-            FROM (SELECT year, model_id, day_of_year,
+            FROM (SELECT agg_key, model_id, day_of_year,
                          (CASE WHEN {condition} THEN 1 ELSE 0 END) as match,
-                         ROW_NUMBER() OVER(ORDER BY year, model_id, day_of_year) -
+                         ROW_NUMBER() OVER(ORDER BY agg_key, model_id, day_of_year) -
                          ROW_NUMBER() OVER(PARTITION BY CASE WHEN {condition} THEN 1 ELSE 0 END
-                                           ORDER BY year, model_id, day_of_year)
+                                           ORDER BY agg_key, model_id, day_of_year)
                          AS grp
                   FROM ({base_query}) orig_query) groups
-            GROUP BY year, model_id, grp, match
-            ORDER BY year, model_id
+            GROUP BY agg_key, model_id, grp, match
+            ORDER BY agg_key, model_id
         """.format(base_query=base_query, condition=self.raw_condition)
         # First run the query and get a list of dicts with one result per sequence
         with connection.cursor() as cursor:

diff --git a/django/climate_change_api/indicators/params.py b/django/climate_change_api/indicators/params.py
@@ -23,9 +23,9 @@
 
 TIME_AGGREGATION_PARAM_DOCSTRING = ("Time granularity to group data by for result structure. Valid "
                                     "aggregations depend on indicator. Can be 'yearly', "
-                                    "'quarterly', 'monthly', 'daily' or 'custom'. Defaults to "
-                                    "'yearly'. If 'custom', 'custom_time_agg' parameter must be "
-                                    "set.")
+                                    "'offset_yearly', 'quarterly', 'monthly', or 'custom'. "
+                                    "Defaults to 'yearly'. If 'custom', 'custom_time_agg' "
+                                    "parameter must be set.")
 
 UNITS_PARAM_DOCSTRING = ("Units in which to return the data. Defaults to Imperial units (Fahrenheit"
                          " for temperature indicators and inches for precipitation).")

diff --git a/django/climate_change_api/indicators/query_ranges.py b/django/climate_change_api/indicators/query_ranges.py