Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated Frequency Module #432

Merged
merged 13 commits into from
Mar 23, 2020
Merged
15 changes: 8 additions & 7 deletions server/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from multiprocessing import cpu_count

from services.time_to_close import time_to_close
from services.frequency import frequency
from services.frequency import FrequencyService
from services.pinService import PinService
from services.requestCountsService import RequestCountsService
from services.requestDetailService import RequestDetailService
Expand Down Expand Up @@ -68,16 +68,17 @@ async def timetoclose(request):
return json(data)


@app.route('/requestfrequency')
@app.route('/requestfrequency', methods=["POST"])
@compress.compress()
async def requestfrequency(request):
freq_worker = frequency(app.config['Settings'])
freq_worker = FrequencyService(app.config['Settings'])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now this is using hardcoded values, let throw in the logic to pull parameters from the request object


data = freq_worker.freq_view_data(service=True,
councils=[],
aggregate=True)
return_data = await freq_worker.get_frequency(startDate='2019-01-01',
endDate='2020-12-31',
ncList=['SHERMAN OAKS NC'],
requestTypes=['Other'])

return json(data)
return json(return_data)


@app.route('/sample-data')
Expand Down
293 changes: 173 additions & 120 deletions server/src/services/frequency.py
Original file line number Diff line number Diff line change
@@ -1,133 +1,186 @@
from configparser import ConfigParser
import sqlalchemy as db
from .dataService import DataService
import pandas as pd
import json


class frequency(object):
class FrequencyService(object):
def __init__(self, config=None, tableName="ingest_staging_table"):
self.config = config
self.dbString = None if not self.config \
else self.config['Database']['DB_CONNECTION_STRING']
self.dataAccess = DataService(config, tableName)

self.table = tableName
self.data = None
pass
async def get_frequency(self,
startDate=None,
endDate=None,
ncList=[],
requestTypes=[],
window='month'):

def freq_view_all(self, serviced=False, aggregate=True):
"""
Returns the request type and associated dates for all data
Sorted by request type, followed by created date,
service date (if applicable), and then closed date
{
"lastPulled": "NOW",
"data": [{
"bucketStartDates": ["2015-01-01", "2015-01-04",
"2015-01-07", "2015-01-10",
"2015-01-13", "2015-01-16"],
"requestTypes": [{
"type": "Homeless Encampment",
"numRequests": [200, 250, 12, 143, 200, 250]
}, {
"type": "Bulky Items",
"numRequests": [2, 25, 682, 333, 444, 666]
}]
}]
}
"""
# Todo: implement condition for serviced date
engine = db.create_engine(self.dbString)

if serviced:
query = "SELECT \
requesttype,\
createddate,\
closeddate,\
servicedate\
FROM %s" % self.table
else:
query = "SELECT \
requesttype,\
createddate,\
closeddate\
FROM %s" % self.table

df = pd.read_sql_query(query, con=engine)

if serviced:
df['servicedate'] = pd.to_datetime(df['servicedate'])

df['closeddate'] = pd.to_datetime(df['closeddate'])
df = df.sort_values(by=['requesttype', 'createddate', 'closeddate'])

return df.to_json(orient="records")
filters = self.dataAccess.standardFilters(
startDate, endDate, ncList, requestTypes)

def freq_aggregate(self, df):
request_counts = df['requesttype'].value_counts()
fields = ['createddate', 'requesttype']

return request_counts.to_json()

def freq_view_data(self,
service=False,
aggregate=True,
councils=[],
startdate="",
enddate=""):
"""
Returns the request type, neighborhood council, created and
closed dates for all data sorted by request type, followed by
neighborhood council #, then created date, and then closed date
Returns serviced date as well if service is set to True
Returns data for all councils if councils=[], otherwise returns data
for only the array of neighborhood council #s
Returns summary data as well if aggregate is set to True
Returns only entries created between startdate and enddate if values
are set for those parameters
Format of startdate and enddate should be a string in
the form 2019-12-01 23:02:05
"""
engine = db.create_engine(self.dbString)

if service:
df = pd.read_sql_query("SELECT\
requesttype,\
createddate,\
closeddate,\
servicedate,\
nc,\
ncname\
FROM %s" % self.table, con=engine)
df['servicedate'] = pd.to_datetime(df['servicedate'])
filteredData = self.dataAccess.query(fields, filters)
df = pd.DataFrame(data=filteredData['data'])

if window == 'month':
numBins = 10
else:
df = pd.read_sql_query("SELECT\
requesttype,\
createddate,\
closeddate,\
nc,\
ncname\
FROM %s" % self.table, con=engine)

df['closeddate'] = pd.to_datetime(df['closeddate'])

if councils != []:
df = df[df.nc.isin(councils)]

if startdate != "":
start = pd.to_datetime(startdate)
df = df[(df['createddate'] >= start)]

if enddate != "":
end = pd.to_datetime(enddate)
df = df[df['createddate'] <= end]

df = df.sort_values(by=['requesttype',
'nc',
'createddate',
'closeddate'])
df_json = json.loads(df.to_json(orient="records"))

if aggregate:
summary = self.freq_aggregate(df)
json_data = []
json_data.append(json.loads(summary))
json_data.append(df_json)
return json_data

return df_json

# Todo: filter by NC at the sql request stage instead of afterwards


if __name__ == "__main__":
freq = frequency()
config = ConfigParser()
config.read("../setting.cfg")
freq.config = config
freq.dbString = config['Database']['DB_CONNECTION_STRING']
freq.freq_view_data(service=True, aggregate=True)
numBins = 12

df['buckets'] = pd.qcut(df['createddate'], q=numBins, precision=0)
bucketStartDates = [str(df['buckets'].unique()[i].left)
for i in range(numBins)]

return [{
'bucketStartDates': bucketStartDates,
'requestTypes': [{
'type': request,
'numRequests': df['buckets'][df['requesttype'] == request]
.value_counts(sort=False).values.tolist()
} for request in requestTypes]
}]

# Following is deprecated, saving for reference
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can remove all this commented stuff
If we need to reference it we can go through git-history. Otherwise this will make the code messy


# class frequency(object):
# def __init__(self, config=None, tableName="ingest_staging_table"):
# self.config = config
# self.dbString = None if not self.config \
# else self.config['Database']['DB_CONNECTION_STRING']

# self.table = tableName
# self.data = None
# pass

# Following code are deprecated, saving in the meantime for reference

# def freq_view_all(self, serviced=False, aggregate=True):
# """
# Returns the request type and associated dates for all data
# Sorted by request type, followed by created date,
# service date (if applicable), and then closed date
# """
# engine = db.create_engine(self.dbString)

# if serviced:
# query = "SELECT \
# requesttype,\
# createddate,\
# closeddate,\
# servicedate\
# FROM %s" % self.table
# else:
# query = "SELECT \
# requesttype,\
# createddate,\
# closeddate\
# FROM %s" % self.table

# df = pd.read_sql_query(query, con=engine)

# if serviced:
# df['servicedate'] = pd.to_datetime(df['servicedate'])

# df['closeddate'] = pd.to_datetime(df['closeddate'])
# df = df.sort_values(by=['requesttype', 'createddate', 'closeddate'])

# return df.to_json(orient="records")

# def freq_aggregate(self, df):
# request_counts = df['requesttype'].value_counts()

# return request_counts.to_json()

# def freq_view_data(self,
# service=False,
# aggregate=True,
# councils=[],
# startdate="",
# enddate=""):
# """
# Returns the request type, neighborhood council, created and
# closed dates for all data sorted by request type, followed by
# neighborhood council #, then created date, and then closed date
# Returns serviced date as well if service is set to True
# Returns data for all councils if councils=[], otherwise returns data
# for only the array of neighborhood council #s
# Returns summary data as well if aggregate is set to True
# Returns only entries created between startdate and enddate if values
# are set for those parameters
# Format of startdate and enddate should be a string in
# the form 2019-12-01 23:02:05
# """
# engine = db.create_engine(self.dbString)

# if service:
# df = pd.read_sql_query("SELECT\
# requesttype,\
# createddate,\
# closeddate,\
# servicedate,\
# nc,\
# ncname\
# FROM %s" % self.table, con=engine)
# df['servicedate'] = pd.to_datetime(df['servicedate'])

# else:
# df = pd.read_sql_query("SELECT\
# requesttype,\
# createddate,\
# closeddate,\
# nc,\
# ncname\
# FROM %s" % self.table, con=engine)

# df['closeddate'] = pd.to_datetime(df['closeddate'])

# if councils != []:
# df = df[df.nc.isin(councils)]

# if startdate != "":
# start = pd.to_datetime(startdate)
# df = df[(df['createddate'] >= start)]

# if enddate != "":
# end = pd.to_datetime(enddate)
# df = df[df['createddate'] <= end]

# df = df.sort_values(by=['requesttype',
# 'nc',
# 'createddate',
# 'closeddate'])
# df_json = json.loads(df.to_json(orient="records"))

# if aggregate:
# summary = self.freq_aggregate(df)
# json_data = []
# json_data.append(json.loads(summary))
# json_data.append(df_json)
# return json_data

# return df_json

# if __name__ == "__main__":
# freq = frequency()
# config = ConfigParser()
# config.read("../setting.cfg")
# freq.config = config
# freq.dbString = config['Database']['DB_CONNECTION_STRING']
# freq.freq_view_data(service=True, aggregate=True)
Loading