Skip to content
This repository has been archived by the owner on Aug 28, 2023. It is now read-only.

Commit

Permalink
Merge pull request #274 from azavea/feature/cross-year-aggregations
Browse files Browse the repository at this point in the history
Cross Year Aggregations
  • Loading branch information
rmartz authored Feb 23, 2017
2 parents 366b4a4 + 3414c3d commit 03d1186
Show file tree
Hide file tree
Showing 10 changed files with 403 additions and 342 deletions.
17 changes: 12 additions & 5 deletions django/climate_change_api/climate_data/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ class ClimateDataFilterSet(filters.FilterSet):
models = django_filters.MethodFilter()
years = django_filters.MethodFilter()

def __init__(self, *args, **kwargs):
self.year_col = kwargs.pop('year_col', 'data_source__year')
super(ClimateDataFilterSet, self).__init__(*args, **kwargs)

def filter_models(self, queryset, value):
""" Filter models based on a comma separated list of names
Expand Down Expand Up @@ -46,13 +50,16 @@ def filter_years(self, queryset, value):
for year_range_str in value.split(','):
year_range = year_range_str.split(':')
if len(year_range) == 2:
start_year = int(year_range[0])
end_year = int(year_range[1])
# AND the year range
year_filters.append((Q(data_source__year__gte=start_year) & Q(data_source__year__lte=end_year)))
# Pair the two years with their comparators, gte and lte respectively
bounds = zip(['gte', 'lte'], year_range)
# Create two Q objects with the proper column, comparator and boundary year
start, end = [Q(**{"%s__%s" % (self.year_col, comparator): year})
for comparator, year in bounds]
# And the checks together
year_filters.append(start & end)
if len(year_range) == 1:
year = int(year_range[0])
year_filters.append(Q(data_source__year=year))
year_filters.append(Q(**{self.year_col: year}))
logger.debug(year_filters)
# Now OR together all the year filters we've created
queryset = queryset.filter(reduce(lambda x, y: x | y, year_filters))
Expand Down
102 changes: 30 additions & 72 deletions django/climate_change_api/indicators/abstract_indicators.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
from collections import OrderedDict, defaultdict
from itertools import groupby
import re
from datetime import date, timedelta

from django.db.models import F, Case, When, FloatField, Sum
from django.db import connection


from climate_data.models import ClimateData
from climate_data.filters import ClimateDataFilterSet
from .params import IndicatorParams, ThresholdIndicatorParams
from .serializers import IndicatorSerializer
from .unit_converters import DaysUnitsMixin, TemperatureConverter, PrecipitationConverter
from .query_ranges import MonthRangeConfig, QuarterRangeConfig, CustomRangeConfig
import queryset_generator


class Indicator(object):

label = ''
description = ''
valid_aggregations = ('yearly', 'quarterly', 'monthly', 'daily', 'custom')
valid_aggregations = ('yearly', 'quarterly', 'monthly', 'offset_yearly', 'custom')
variables = ClimateData.VARIABLE_CHOICES

# Filters define which rows match our query, conditions limit which rows
Expand Down Expand Up @@ -50,6 +48,9 @@ class Indicator(object):
available_units = (None,)
parameters = None

# These are the keys that values are identified by when they come from the database.
aggregate_keys = ['agg_key', 'data_source__model']

def __init__(self, city, scenario, parameters=None):
if not city:
raise ValueError('Indicator constructor requires a city instance')
Expand Down Expand Up @@ -105,45 +106,29 @@ def get_queryset(self):
by the constructor
"""
filter_set = ClimateDataFilterSet()
queryset = (ClimateData.objects.filter(map_cell=self.city.map_cell)
.filter(data_source__scenario=self.scenario))
queryset = filter_set.filter_years(queryset, self.params.years.value)
queryset = filter_set.filter_models(queryset, self.params.models.value)
# Get the queryset generator for this indicator's time aggregation
generator = queryset_generator.get(self.params.time_aggregation.value)
key_params = {}

# The custom range config accepts a user-defined parameter to pick which dates to use
if self.params.custom_time_agg.value is not None:
key_params['custom_time_agg'] = self.params.custom_time_agg.value

# Use the queryset generator classes to construct the initial base climate data queryset
queryset = generator.create_queryset(
years=self.params.years.value,
models=self.params.models.value,
scenario=self.scenario,
key_params=key_params
).filter(
map_cell=self.city.map_cell
)

if self.filters is not None:
queryset = queryset.filter(**self.filters)

# For certain time aggregations, add a field to track which interval a data point is in
time_aggregation_configs = {
'monthly': MonthRangeConfig,
'quarterly': QuarterRangeConfig,
'custom': CustomRangeConfig
}
if self.params.time_aggregation.value in time_aggregation_configs:
config = time_aggregation_configs[self.params.time_aggregation.value]
params = {}

# The custom range config accepts a user-defined parameter to pick which dates to use
if self.params.custom_time_agg.value is not None:
params['custom_time_agg'] = self.params.custom_time_agg.value

queryset = (queryset
.annotate(interval=config.cases(**params))
.filter(interval__isnull=False))

return queryset

@property
def aggregate_keys(self):
return {
'daily': ['data_source__year', 'day_of_year', 'data_source__model'],
'monthly': ['data_source__year', 'interval', 'data_source__model'],
'quarterly': ['data_source__year', 'interval', 'data_source__model'],
'custom': ['data_source__year', 'interval', 'data_source__model'],
'yearly': ['data_source__year', 'data_source__model']
}.get(self.params.time_aggregation.value)

@property
def expression(self):
return self.variables[0]
Expand Down Expand Up @@ -183,40 +168,13 @@ def collate_results(self, aggregations):
""" Take results as a series of datapoints and collate them by key
@param aggregations list-of-dicts returned by aggregate method
@returns Dict of list of values, keyed by the subclass's key_results implementation
@returns Dict of list of values, keyed by the queryset's agg_key column
"""
results = defaultdict(list)
for result in aggregations:
key = self.key_result(result)
results[key].append(result['value'])
results[result['agg_key']].append(result['value'])
return results

def key_result(self, result):
""" Stub function for subclasses to determine how to collate results
@param result A row of timeseries data generated by aggregate()
@returns The value that row should be keyed by in the final response.
Results should be keyed as one of the following based on aggregation:
* YYYY for yearly data
* YYYY-MM for monthly data
* YYYY-MM-DD for daily data
"""
year = result['data_source__year']
if self.params.time_aggregation.value == 'yearly':
return year

if self.params.time_aggregation.value == 'daily':
day_of_year = result['day_of_year']
day = date(year, 1, 1) + timedelta(days=day_of_year-1)
return day.isoformat()

template = {
'monthly': '{year}-{int:02d}',
'quarterly': '{year}-Q{int:0d}',
'custom': '{year}-{int:02d}'
}.get(self.params.time_aggregation.value)
return template.format(year=year, int=(result['interval']+1))

def calculate(self):
aggregations = self.aggregate()
aggregations = self.convert_units(aggregations)
Expand Down Expand Up @@ -265,17 +223,17 @@ def get_streaks(self):
(base_query, base_query_params) = (self.queryset.select_related('data_source')
.query.sql_with_params())
query = """
SELECT year as data_source__year, model_id as data_source__model,
SELECT agg_key, model_id as data_source__model,
count(*) as length, match
FROM (SELECT year, model_id, day_of_year,
FROM (SELECT agg_key, model_id, day_of_year,
(CASE WHEN {condition} THEN 1 ELSE 0 END) as match,
ROW_NUMBER() OVER(ORDER BY year, model_id, day_of_year) -
ROW_NUMBER() OVER(ORDER BY agg_key, model_id, day_of_year) -
ROW_NUMBER() OVER(PARTITION BY CASE WHEN {condition} THEN 1 ELSE 0 END
ORDER BY year, model_id, day_of_year)
ORDER BY agg_key, model_id, day_of_year)
AS grp
FROM ({base_query}) orig_query) groups
GROUP BY year, model_id, grp, match
ORDER BY year, model_id
GROUP BY agg_key, model_id, grp, match
ORDER BY agg_key, model_id
""".format(base_query=base_query, condition=self.raw_condition)
# First run the query and get a list of dicts with one result per sequence
with connection.cursor() as cursor:
Expand Down
6 changes: 3 additions & 3 deletions django/climate_change_api/indicators/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@

TIME_AGGREGATION_PARAM_DOCSTRING = ("Time granularity to group data by for result structure. Valid "
"aggregations depend on indicator. Can be 'yearly', "
"'quarterly', 'monthly', 'daily' or 'custom'. Defaults to "
"'yearly'. If 'custom', 'custom_time_agg' parameter must be "
"set.")
"'offset_yearly', 'quarterly', 'monthly', or 'custom'. "
"Defaults to 'yearly'. If 'custom', 'custom_time_agg' "
"parameter must be set.")

UNITS_PARAM_DOCSTRING = ("Units in which to return the data. Defaults to Imperial units (Fahrenheit"
" for temperature indicators and inches for precipitation).")
Expand Down
153 changes: 0 additions & 153 deletions django/climate_change_api/indicators/query_ranges.py

This file was deleted.

Loading

0 comments on commit 03d1186

Please sign in to comment.