From 6c4fb3304d0f482b488a11994b980fde33f34e02 Mon Sep 17 00:00:00 2001 From: Adam Kendis Date: Thu, 7 May 2020 12:41:38 -0700 Subject: [PATCH 01/18] Fixed data vis error resulting from a query with zero results. --- src/components/chartExtras/NumberOfRequests.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/chartExtras/NumberOfRequests.jsx b/src/components/chartExtras/NumberOfRequests.jsx index 00c9435d3..4422c36aa 100644 --- a/src/components/chartExtras/NumberOfRequests.jsx +++ b/src/components/chartExtras/NumberOfRequests.jsx @@ -20,7 +20,7 @@ const NumberOfRequests = ({ ); const mapStateToProps = state => ({ - numRequests: Object.values(state.data.counts.type).reduce((p, c) => p + c, 0), + numRequests: Object.values(state.data.counts.type || {}).reduce((p, c) => p + c, 0), }); export default connect(mapStateToProps)(NumberOfRequests); From d117590d30ee5ba638aee45b2eea8cbcb36fa711 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Fri, 8 May 2020 09:12:59 -0700 Subject: [PATCH 02/18] isolated db in its own module --- server/src/app.py | 2 ++ server/src/services/dataService.py | 16 ++++---------- server/src/services/sqlIngest.py | 29 +++++++++---------------- server/src/utils/database.py | 34 ++++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 31 deletions(-) create mode 100644 server/src/utils/database.py diff --git a/server/src/app.py b/server/src/app.py index b45df0500..9762ead5a 100644 --- a/server/src/app.py +++ b/server/src/app.py @@ -21,6 +21,7 @@ from utils.sanic import add_performance_header from utils.redis import cache +from utils.database import db app = Sanic(__name__) CORS(app) @@ -48,6 +49,7 @@ def configure_app(): if app.config['Settings']['Server']['Debug']: add_performance_header(app) cache.config(app.config['Settings']['Redis']) + db.config(app.config['Settings']['Database']) @app.route('/apistatus') diff --git a/server/src/services/dataService.py b/server/src/services/dataService.py index 33f48ddaa..5d4beae1c 100644 --- a/server/src/services/dataService.py +++ b/server/src/services/dataService.py @@ -1,20 +1,12 @@ import datetime import pandas as pd -import sqlalchemy as db -from sqlalchemy.orm import sessionmaker from .databaseOrm import Ingest as Request +from utils.database import db class DataService(object): def __init__(self, config=None, tableName="ingest_staging_table"): - self.config = config - self.dbString = None if not self.config \ - else self.config['Database']['DB_CONNECTION_STRING'] - - self.table = tableName - self.data = None - self.engine = db.create_engine(self.dbString) - self.Session = sessionmaker(bind=self.engine) + pass async def lastPulled(self): # Will represent last time the ingest pipeline ran @@ -63,7 +55,7 @@ def itemQuery(self, requestNumber): if 'id' in fields: fields.remove('id') - session = self.Session() + session = db.Session() record = session \ .query(*fields) \ .filter(Request.srnumber == requestNumber) \ @@ -86,7 +78,7 @@ def query(self, queryItems=[], queryFilters=[], limit=None): selectFields = [getattr(Request, item) for item in queryItems] - session = self.Session() + session = db.Session() records = session \ .query(*selectFields) \ .filter(*queryFilters) \ diff --git a/server/src/services/sqlIngest.py b/server/src/services/sqlIngest.py index 95fead649..1894a765a 100644 --- a/server/src/services/sqlIngest.py +++ b/server/src/services/sqlIngest.py @@ -1,6 +1,4 @@ -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker -from sqlalchemy.sql import text +from utils.database import db import time import json from .databaseOrm import Ingest, Base @@ -21,10 +19,7 @@ def end(self): class DataHandler: def __init__(self, config=None): - dbString = config['Database']['DB_CONNECTION_STRING'] - - self.engine = create_engine(dbString) - self.session = sessionmaker(bind=self.engine)() + self.session = db.Session() self.socrata = SocrataClient(config) def __del__(self): @@ -32,8 +27,8 @@ def __del__(self): def resetDatabase(self): log('\nResetting database.') - Base.metadata.drop_all(self.engine) - Base.metadata.create_all(self.engine) + Base.metadata.drop_all(db.engine) + Base.metadata.create_all(db.engine) def fetchData(self, year, offset, limit): log('\tFetching {} rows, offset {}'.format(limit, offset)) @@ -74,12 +69,8 @@ def ingestYear(self, year, limit, querySize): } def cleanTable(self): - def exec_sql(sql): - with self.engine.connect() as conn: - return conn.execute(text(sql)) - def dropDuplicates(table, report): - rows = exec_sql(f""" + rows = db.exec_sql(f""" DELETE FROM {table} a USING {table} b WHERE a.id < b.id AND a.srnumber = b.srnumber; """) @@ -90,7 +81,7 @@ def dropDuplicates(table, report): }) def switchPrimaryKey(table, report): - exec_sql(f""" + db.exec_sql(f""" ALTER TABLE {table} DROP COLUMN id; ALTER TABLE {table} ADD PRIMARY KEY (srnumber); """) @@ -101,7 +92,7 @@ def switchPrimaryKey(table, report): }) def removeInvalidClosedDates(table, report): - result = exec_sql(f""" + result = db.exec_sql(f""" UPDATE {table} SET closeddate = NULL WHERE closeddate::timestamp < createddate::timestamp; @@ -113,7 +104,7 @@ def removeInvalidClosedDates(table, report): }) def setDaysToClose(table, report): - result = exec_sql(f""" + result = db.exec_sql(f""" UPDATE {table} SET _daystoclose = EXTRACT ( EPOCH FROM @@ -128,7 +119,7 @@ def setDaysToClose(table, report): }) def fixNorthWestwood(table, report): - result = exec_sql(f""" + result = db.exec_sql(f""" UPDATE {table} SET nc = 127 WHERE nc = 0 AND ncname = 'NORTH WESTWOOD NC' @@ -140,7 +131,7 @@ def fixNorthWestwood(table, report): }) def fixHistoricCulturalNorth(table, report): - result = exec_sql(f""" + result = db.exec_sql(f""" UPDATE {table} SET nc = 128 WHERE nc = 0 AND ncname = 'HISTORIC CULTURAL NORTH NC' diff --git a/server/src/utils/database.py b/server/src/utils/database.py new file mode 100644 index 000000000..6e65e3c1a --- /dev/null +++ b/server/src/utils/database.py @@ -0,0 +1,34 @@ +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from sqlalchemy.sql import text + + +class Database(object): + def __init__(self, verbose=False): + self.verbose = verbose + + def config(self, config): + self.engine = create_engine(config['DB_CONNECTION_STRING']) + self.Session = sessionmaker(bind=self.engine) + + if self.verbose: + self.log_connection_events() + + def exec_sql(self, sql): + with self.engine.connect() as conn: + return conn.execute(text(sql)) + + def log_connection_events(self): + def on_checkout(*args, **kwargs): + print('process id {} checkout'.format(os.getpid()), flush=True) + + def on_checkin(*args, **kwargs): + print('process id {} checkin'.format(os.getpid()), flush=True) + + from sqlalchemy import event + import os + event.listen(self.engine, 'checkout', on_checkout) + event.listen(self.engine, 'checkin', on_checkin) + + +db = Database() From 9f1cfcee09d34f6a14e8017cdd2c2cd3d920c7f9 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Sat, 9 May 2020 15:11:07 -0700 Subject: [PATCH 03/18] added server/src to python path --- server/test/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/server/test/__init__.py b/server/test/__init__.py index e69de29bb..bfcd38b6a 100644 --- a/server/test/__init__.py +++ b/server/test/__init__.py @@ -0,0 +1,7 @@ +import sys +from os.path import dirname, abspath, join + + +# add src directory to path so pytest can find modules +src_dir = join(dirname(abspath(__file__)), '..', 'src') +sys.path.append(src_dir) From bc8f8054d5c328311ad674b72f32e06eb6efd9e5 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Sun, 10 May 2020 06:36:18 -0700 Subject: [PATCH 04/18] removed DataService constructor and updated services that use the DataService --- server/src/services/dataService.py | 6 ++---- server/src/services/frequencyService.py | 4 ++-- server/src/services/heatmapService.py | 2 +- server/src/services/pinClusterService.py | 2 +- server/src/services/pinService.py | 4 ++-- server/src/services/requestCountsService.py | 4 ++-- server/src/services/requestDetailService.py | 4 ++-- server/src/services/timeToCloseService.py | 4 ++-- 8 files changed, 14 insertions(+), 16 deletions(-) diff --git a/server/src/services/dataService.py b/server/src/services/dataService.py index 5d4beae1c..ff0270f47 100644 --- a/server/src/services/dataService.py +++ b/server/src/services/dataService.py @@ -2,12 +2,10 @@ import pandas as pd from .databaseOrm import Ingest as Request from utils.database import db +import sqlalchemy as sql class DataService(object): - def __init__(self, config=None, tableName="ingest_staging_table"): - pass - async def lastPulled(self): # Will represent last time the ingest pipeline ran return datetime.datetime.utcnow() @@ -40,7 +38,7 @@ def comparisonFilters(self, Request.createddate > startDate if startDate else False, Request.createddate < endDate if endDate else False, Request.requesttype.in_(requestTypes), - db.or_(Request.nc.in_(ncList), Request.cd.in_(cdList)) + sql.or_(Request.nc.in_(ncList), Request.cd.in_(cdList)) ] def itemQuery(self, requestNumber): diff --git a/server/src/services/frequencyService.py b/server/src/services/frequencyService.py index b09876d7f..0e06264d2 100644 --- a/server/src/services/frequencyService.py +++ b/server/src/services/frequencyService.py @@ -5,8 +5,8 @@ class FrequencyService(object): - def __init__(self, config=None, tableName="ingest_staging_table"): - self.dataAccess = DataService(config, tableName) + def __init__(self, config=None): + self.dataAccess = DataService() def get_bins(self, startDate, endDate): """ diff --git a/server/src/services/heatmapService.py b/server/src/services/heatmapService.py index ad4a42c69..3b88e17a8 100644 --- a/server/src/services/heatmapService.py +++ b/server/src/services/heatmapService.py @@ -20,7 +20,7 @@ async def get_heatmap(self, filters): fields = ['latitude', 'longitude'] if pins is None: - dataAccess = DataService(self.config) + dataAccess = DataService() filters = dataAccess.standardFilters( filters['startDate'], diff --git a/server/src/services/pinClusterService.py b/server/src/services/pinClusterService.py index 220e3e5c1..5d76c995c 100644 --- a/server/src/services/pinClusterService.py +++ b/server/src/services/pinClusterService.py @@ -20,7 +20,7 @@ def get_pins(self, filters): pins = cache.get(key) if pins is None: - dataAccess = DataService(self.config) + dataAccess = DataService() fields = [ 'srnumber', diff --git a/server/src/services/pinService.py b/server/src/services/pinService.py index 04088367f..101738ff6 100644 --- a/server/src/services/pinService.py +++ b/server/src/services/pinService.py @@ -2,8 +2,8 @@ class PinService(object): - def __init__(self, config=None, tableName="ingest_staging_table"): - self.dataAccess = DataService(config, tableName) + def __init__(self, config=None): + self.dataAccess = DataService() async def get_base_pins(self, startDate=None, diff --git a/server/src/services/requestCountsService.py b/server/src/services/requestCountsService.py index 730b414de..064c15f4e 100644 --- a/server/src/services/requestCountsService.py +++ b/server/src/services/requestCountsService.py @@ -2,8 +2,8 @@ class RequestCountsService(object): - def __init__(self, config=None, tableName="ingest_staging_table"): - self.dataAccess = DataService(config, tableName) + def __init__(self, config=None): + self.dataAccess = DataService() async def get_req_counts(self, startDate=None, diff --git a/server/src/services/requestDetailService.py b/server/src/services/requestDetailService.py index 877208721..14b04ebbe 100644 --- a/server/src/services/requestDetailService.py +++ b/server/src/services/requestDetailService.py @@ -2,8 +2,8 @@ class RequestDetailService(object): - def __init__(self, config=None, tableName="ingest_staging_table"): - self.dataAccess = DataService(config, tableName) + def __init__(self, config=None): + self.dataAccess = DataService() async def get_request_detail(self, requestNumber=None): """ diff --git a/server/src/services/timeToCloseService.py b/server/src/services/timeToCloseService.py index 0ad727949..a5268cd92 100644 --- a/server/src/services/timeToCloseService.py +++ b/server/src/services/timeToCloseService.py @@ -4,8 +4,8 @@ class TimeToCloseService(object): - def __init__(self, config=None, tableName="ingest_staging_table"): - self.dataAccess = DataService(config, tableName) + def __init__(self, config=None): + self.dataAccess = DataService() def ttc(self, groupField, groupFieldItems, filters): From 6567fbfea642ea3c137fd23e04e5524ebb2a4579 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Sun, 10 May 2020 08:39:59 -0700 Subject: [PATCH 05/18] updated dataservice test --- server/test/test_db_service.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/server/test/test_db_service.py b/server/test/test_db_service.py index bbdfcfde0..3b3681b85 100644 --- a/server/test/test_db_service.py +++ b/server/test/test_db_service.py @@ -1,16 +1,10 @@ from src.services.dataService import DataService -TESTCONFIG = { - "Database": { - "DB_CONNECTION_STRING": "postgresql://testingString/postgresql" - } -} - def test_serviceExists(): # Arrange # Act - data_worker = DataService(TESTCONFIG) + data_worker = DataService() # Assert assert isinstance(data_worker, DataService) @@ -18,7 +12,7 @@ def test_serviceExists(): def test_emptyQuery(): # Arrange queryItems = [] - data_worker = DataService(TESTCONFIG) + data_worker = DataService() # Act result = data_worker.query(queryItems) # Assert @@ -28,7 +22,7 @@ def test_emptyQuery(): def test_nullQuery(): # Arrange queryItems = None - data_worker = DataService(TESTCONFIG) + data_worker = DataService() # Act result = data_worker.query(queryItems) # Assert From db065e3310ff3574fde9e255cd886559aa0c6b52 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Mon, 11 May 2020 05:45:09 -0700 Subject: [PATCH 06/18] removed arg to DataService constructor in /apistatus --- server/src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/app.py b/server/src/app.py index 9762ead5a..0b9e4b1ec 100644 --- a/server/src/app.py +++ b/server/src/app.py @@ -63,7 +63,7 @@ async def healthcheck(request): settings['Version']['VER_MINOR'], settings['Version']['VER_PATCH']) - data_worker = DataService(settings) + data_worker = DataService() lastPulled = await data_worker.lastPulled() return json({'currentTime': currentTime, From 6bdc33006d1776a051370c7a6cd6573584a679e6 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Sat, 9 May 2020 11:31:25 -0700 Subject: [PATCH 07/18] converted DataService to pandas --- server/src/services/dataService.py | 107 +++++++++++----------- server/src/services/frequencyService.py | 7 +- server/src/services/timeToCloseService.py | 8 +- 3 files changed, 55 insertions(+), 67 deletions(-) diff --git a/server/src/services/dataService.py b/server/src/services/dataService.py index ff0270f47..5d2b04440 100644 --- a/server/src/services/dataService.py +++ b/server/src/services/dataService.py @@ -1,11 +1,12 @@ import datetime import pandas as pd -from .databaseOrm import Ingest as Request +from .databaseOrm import Ingest from utils.database import db -import sqlalchemy as sql class DataService(object): + default_table = Ingest.__tablename__ + async def lastPulled(self): # Will represent last time the ingest pipeline ran return datetime.datetime.utcnow() @@ -18,12 +19,15 @@ def standardFilters(self, ''' Generates filters for dates, request types, and ncs. ''' - return [ - Request.createddate > startDate if startDate else False, - Request.createddate < endDate if endDate else False, - Request.requesttype.in_(requestTypes), - Request.nc.in_(ncList), - ] + + requestTypes = (', ').join([f"'{rt}'" for rt in requestTypes]) + ncList = (', ').join([str(nc) for nc in ncList]) + return f""" + createddate >= '{startDate}' AND + createddate <= '{endDate}' AND + requesttype IN ({requestTypes}) AND + nc IN ({ncList}) + """ def comparisonFilters(self, startDate=None, @@ -34,14 +38,26 @@ def comparisonFilters(self, ''' Generates filters for the comparison endpoints. ''' - return [ - Request.createddate > startDate if startDate else False, - Request.createddate < endDate if endDate else False, - Request.requesttype.in_(requestTypes), - sql.or_(Request.nc.in_(ncList), Request.cd.in_(cdList)) - ] - - def itemQuery(self, requestNumber): + + requestTypes = (', ').join([f"'{rt}'" for rt in requestTypes]) + if len(ncList) > 0: + ncList = (', ').join([str(nc) for nc in ncList]) + return f""" + createddate >= '{startDate}' AND + createddate <= '{endDate}' AND + requesttype IN ({requestTypes}) AND + nc IN ({ncList}) + """ + else: + cdList = (', ').join([str(cd) for cd in cdList]) + return f""" + createddate >= '{startDate}' AND + createddate <= '{endDate}' AND + requesttype IN ({requestTypes}) AND + cd IN ({cdList}) + """ + + def itemQuery(self, requestNumber, table=default_table): ''' Returns a single request by its requestNumber. ''' @@ -49,59 +65,38 @@ def itemQuery(self, requestNumber): if not requestNumber or not isinstance(requestNumber, str): return {'Error': 'Missing request number'} - fields = Request.__table__.columns.keys() - if 'id' in fields: - fields.remove('id') + rows = db.exec_sql(f""" + SELECT * FROM {table} + WHERE srnumber = '{requestNumber}' + """) - session = db.Session() - record = session \ - .query(*fields) \ - .filter(Request.srnumber == requestNumber) \ - .first() - session.close() + rows = [dict(row) for row in rows] - if record: - return record._asdict() + if len(rows) > 0: + return rows[0] else: return {'Error': 'Request number not found'} - def query(self, queryItems=[], queryFilters=[], limit=None): - ''' - Returns the specified properties of each request, - after filtering by queryFilters and applying the limit. - ''' + def query(self, fields, filters, table=default_table): + fields = (', ').join(fields) + return pd.read_sql(f""" + SELECT {fields} + FROM {table} + WHERE {filters} + """, db.engine) - if not queryItems or not isinstance(queryItems, list): - return {'Error': 'Missing query items'} - - selectFields = [getattr(Request, item) for item in queryItems] - - session = db.Session() - records = session \ - .query(*selectFields) \ - .filter(*queryFilters) \ - .limit(limit) \ - .all() - session.close() - - return [rec._asdict() for rec in records] - - def aggregateQuery(self, countFields=[], queryFilters=[]): + def aggregateQuery(self, fields, filters, table=default_table): ''' Returns the counts of distinct values in the specified fields, - after filtering by queryFilters. + after filtering. ''' - if not countFields or not isinstance(countFields, list): + if not fields or not isinstance(fields, list): return {'Error': 'Missing count fields'} - filteredData = self.query(countFields, queryFilters) - df = pd.DataFrame(data=filteredData) + df = self.query(fields, filters, table) return [{ 'field': field, 'counts': df.groupby(by=field).size().to_dict() - } for field in countFields if field in df.columns] - - def storedProc(self): - pass + } for field in fields if field in df.columns] diff --git a/server/src/services/frequencyService.py b/server/src/services/frequencyService.py index 0e06264d2..130dab57c 100644 --- a/server/src/services/frequencyService.py +++ b/server/src/services/frequencyService.py @@ -47,12 +47,9 @@ def get_counts(dates, bins): counts, _ = np.histogram(dates, bins=bins) return list(map(int, counts)) - # grab the necessary data from the db + # grab the necessary data from the db and drop nulls fields = [groupField, 'createddate'] - data = self.dataAccess.query(fields, filters) - - # read into a dataframe and drop the nulls - df = pd.DataFrame(data, columns=fields).dropna() + df = self.dataAccess.query(fields, filters).dropna() # convert bins to float so numpy can use them bins_fl = np.array(bins).astype('datetime64[s]').astype('float') diff --git a/server/src/services/timeToCloseService.py b/server/src/services/timeToCloseService.py index a5268cd92..64546684a 100644 --- a/server/src/services/timeToCloseService.py +++ b/server/src/services/timeToCloseService.py @@ -1,4 +1,3 @@ -import pandas as pd import numpy as np from .dataService import DataService @@ -52,12 +51,9 @@ def get_boxplot_stats(arr, C=1.5): 'outlierCount': len(outliers) } - # grab the necessary data from the db + # grab the necessary data from the db and drop nulls fields = [groupField, '_daystoclose'] - data = self.dataAccess.query(fields, filters) - - # read into a dataframe and drop the nulls - dtc_df = pd.DataFrame(data, columns=fields).dropna() + dtc_df = self.dataAccess.query(fields, filters).dropna() # group the requests by type and get box plot stats for each type data = dtc_df \ From ad2a7fd515b1533d57707b0ed6af8edfe3fc3e23 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Sun, 10 May 2020 22:09:37 -0700 Subject: [PATCH 08/18] map and vis views --- server/src/services/frequencyService.py | 2 +- server/src/services/heatmapService.py | 2 +- server/src/services/pinClusterService.py | 2 +- server/src/services/requestCountsService.py | 9 ++- server/src/services/sqlIngest.py | 70 ++++++++++++++++++++- server/src/services/timeToCloseService.py | 2 +- 6 files changed, 79 insertions(+), 8 deletions(-) diff --git a/server/src/services/frequencyService.py b/server/src/services/frequencyService.py index 130dab57c..3729612b4 100644 --- a/server/src/services/frequencyService.py +++ b/server/src/services/frequencyService.py @@ -49,7 +49,7 @@ def get_counts(dates, bins): # grab the necessary data from the db and drop nulls fields = [groupField, 'createddate'] - df = self.dataAccess.query(fields, filters).dropna() + df = self.dataAccess.query(fields, filters, table='vis').dropna() # convert bins to float so numpy can use them bins_fl = np.array(bins).astype('datetime64[s]').astype('float') diff --git a/server/src/services/heatmapService.py b/server/src/services/heatmapService.py index 3b88e17a8..0ec1f6f7b 100644 --- a/server/src/services/heatmapService.py +++ b/server/src/services/heatmapService.py @@ -28,7 +28,7 @@ async def get_heatmap(self, filters): filters['requestTypes'], filters['ncList']) - pins = dataAccess.query(fields, filters) + pins = dataAccess.query(fields, filters, table='map') pins = pd.DataFrame(pins, columns=fields) else: pins = pins[fields] diff --git a/server/src/services/pinClusterService.py b/server/src/services/pinClusterService.py index 5d76c995c..cc2bcb431 100644 --- a/server/src/services/pinClusterService.py +++ b/server/src/services/pinClusterService.py @@ -34,7 +34,7 @@ def get_pins(self, filters): filters['requestTypes'], filters['ncList']) - pins = dataAccess.query(fields, filters) + pins = dataAccess.query(fields, filters, table='map') pins = pd.DataFrame(pins, columns=fields) cache.set(key, pins) diff --git a/server/src/services/requestCountsService.py b/server/src/services/requestCountsService.py index 064c15f4e..44bf66ca5 100644 --- a/server/src/services/requestCountsService.py +++ b/server/src/services/requestCountsService.py @@ -38,7 +38,8 @@ async def get_req_counts(self, filters = self.dataAccess.standardFilters( startDate, endDate, requestTypes, ncList) - return self.dataAccess.aggregateQuery(countFields, filters) + return self.dataAccess.aggregateQuery( + countFields, filters, table='vis') async def get_req_counts_comparison(self, startDate=None, @@ -112,10 +113,12 @@ def get_filters(district, items): return self.dataAccess.comparisonFilters(**common) filters = get_filters(set1['district'], set1['list']) - set1data = self.dataAccess.aggregateQuery(countFields, filters) + set1data = self.dataAccess.aggregateQuery( + countFields, filters, table='vis') filters = get_filters(set2['district'], set2['list']) - set2data = self.dataAccess.aggregateQuery(countFields, filters) + set2data = self.dataAccess.aggregateQuery( + countFields, filters, table='vis') return { 'set1': { diff --git a/server/src/services/sqlIngest.py b/server/src/services/sqlIngest.py index 1894a765a..66bec8694 100644 --- a/server/src/services/sqlIngest.py +++ b/server/src/services/sqlIngest.py @@ -27,7 +27,9 @@ def __del__(self): def resetDatabase(self): log('\nResetting database.') - Base.metadata.drop_all(db.engine) + db.exec_sql(f""" + DROP TABLE IF EXISTS {Ingest.__tablename__} CASCADE + """) Base.metadata.create_all(db.engine) def fetchData(self, year, offset, limit): @@ -155,6 +157,70 @@ def fixHistoricCulturalNorth(table, report): return report + def createViews(self): + def createMapView(table, report): + rows = db.exec_sql(f""" + CREATE MATERIALIZED VIEW map AS + SELECT + srnumber, + requesttype, + nc, + latitude, + longitude, + createddate + FROM {table} + WHERE + latitude IS NOT NULL AND + longitude IS NOT NULL + WITH DATA; + """) + + db.exec_sql(""" + CREATE INDEX map_nc_index ON map(nc); + CREATE INDEX map_requesttype_index ON map(requesttype); + CREATE INDEX map_createddate_index ON map(createddate); + """) + + report.append({ + 'description': 'create map view', + 'rowsAffected': rows.rowcount + }) + + def createVisView(table, report): + rows = db.exec_sql(f""" + CREATE MATERIALIZED VIEW vis AS + SELECT + requesttype, + requestsource, + nc, + cd, + createddate, + _daystoclose + FROM {table} + WITH DATA; + """) + + db.exec_sql(""" + CREATE INDEX vis_nc_index ON vis(nc); + CREATE INDEX vis_cd_index ON vis(cd); + CREATE INDEX vis_requesttype_index ON vis(requesttype); + CREATE INDEX vis_createddate_index ON vis(createddate); + """) + + report.append({ + 'description': 'create vis view', + 'rowsAffected': rows.rowcount + }) + + log('\nCreating views on ingest table.') + table = Ingest.__tablename__ + report = [] + + createMapView(table, report) + createVisView(table, report) + + return report + async def populateDatabase(self, years=[], limit=None, querySize=None): log('\nPopulating database for years: {}'.format(list(years))) timer = Timer() @@ -167,6 +233,7 @@ async def populateDatabase(self, years=[], limit=None, querySize=None): insertReport.append(inserts) cleanReport = self.cleanTable() + viewsReport = self.createViews() minutes = timer.end() log('\nDone with ingestion after {} minutes.\n'.format(minutes)) @@ -174,6 +241,7 @@ async def populateDatabase(self, years=[], limit=None, querySize=None): report = { 'insertion': insertReport, 'cleaning': cleanReport, + 'views': viewsReport, 'totalMinutesElapsed': minutes } log(json.dumps(report, indent=2)) diff --git a/server/src/services/timeToCloseService.py b/server/src/services/timeToCloseService.py index 64546684a..408bb627f 100644 --- a/server/src/services/timeToCloseService.py +++ b/server/src/services/timeToCloseService.py @@ -53,7 +53,7 @@ def get_boxplot_stats(arr, C=1.5): # grab the necessary data from the db and drop nulls fields = [groupField, '_daystoclose'] - dtc_df = self.dataAccess.query(fields, filters).dropna() + dtc_df = self.dataAccess.query(fields, filters, table='vis').dropna() # group the requests by type and get box plot stats for each type data = dtc_df \ From a9511e28572a2d00ed88152a204a6e34d939e378 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Mon, 11 May 2020 10:47:31 -0700 Subject: [PATCH 09/18] handling null fields or filters in dataservice.query --- server/src/services/dataService.py | 3 +++ server/test/test_db_service.py | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/server/src/services/dataService.py b/server/src/services/dataService.py index 5d2b04440..14129c1fe 100644 --- a/server/src/services/dataService.py +++ b/server/src/services/dataService.py @@ -78,6 +78,9 @@ def itemQuery(self, requestNumber, table=default_table): return {'Error': 'Request number not found'} def query(self, fields, filters, table=default_table): + if not fields or not filters: + return {'Error': 'fields and filters are required'} + fields = (', ').join(fields) return pd.read_sql(f""" SELECT {fields} diff --git a/server/test/test_db_service.py b/server/test/test_db_service.py index 3b3681b85..08bdff5b8 100644 --- a/server/test/test_db_service.py +++ b/server/test/test_db_service.py @@ -12,9 +12,10 @@ def test_serviceExists(): def test_emptyQuery(): # Arrange queryItems = [] + filters = None data_worker = DataService() # Act - result = data_worker.query(queryItems) + result = data_worker.query(queryItems, filters) # Assert assert result['Error'] is not None @@ -22,8 +23,9 @@ def test_emptyQuery(): def test_nullQuery(): # Arrange queryItems = None + filters = None data_worker = DataService() # Act - result = data_worker.query(queryItems) + result = data_worker.query(queryItems, filters) # Assert assert result['Error'] is not None From c3041b569da2c02b159839ca6e0d80321d1df28d Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Mon, 11 May 2020 11:51:46 -0700 Subject: [PATCH 10/18] metadata table with last_pulled populated during ingest --- server/src/services/dataService.py | 5 ++--- server/src/services/sqlIngest.py | 6 +++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/server/src/services/dataService.py b/server/src/services/dataService.py index 14129c1fe..addafa317 100644 --- a/server/src/services/dataService.py +++ b/server/src/services/dataService.py @@ -1,4 +1,3 @@ -import datetime import pandas as pd from .databaseOrm import Ingest from utils.database import db @@ -8,8 +7,8 @@ class DataService(object): default_table = Ingest.__tablename__ async def lastPulled(self): - # Will represent last time the ingest pipeline ran - return datetime.datetime.utcnow() + rows = db.exec_sql('SELECT last_pulled FROM metadata') + return rows.first()[0] def standardFilters(self, startDate=None, diff --git a/server/src/services/sqlIngest.py b/server/src/services/sqlIngest.py index 66bec8694..7f5eb4477 100644 --- a/server/src/services/sqlIngest.py +++ b/server/src/services/sqlIngest.py @@ -28,7 +28,11 @@ def __del__(self): def resetDatabase(self): log('\nResetting database.') db.exec_sql(f""" - DROP TABLE IF EXISTS {Ingest.__tablename__} CASCADE + DROP TABLE IF EXISTS {Ingest.__tablename__} CASCADE; + + DROP TABLE IF EXISTS metadata; + CREATE TABLE metadata AS + SELECT * FROM (VALUES (NOW())) as vals(last_pulled); """) Base.metadata.create_all(db.engine) From 45db20f0f061234c7b149f24ac117f8fc9c50f67 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Sun, 3 May 2020 04:48:25 -0700 Subject: [PATCH 11/18] /visualizations and /comparison endpoints working --- server/src/app.py | 41 +++++ server/src/services/comparisonService.py | 163 +++++++++++++++++++ server/src/services/visualizationsService.py | 53 ++++++ server/src/utils/stats.py | 144 ++++++++++++++++ 4 files changed, 401 insertions(+) create mode 100644 server/src/services/comparisonService.py create mode 100644 server/src/services/visualizationsService.py create mode 100644 server/src/utils/stats.py diff --git a/server/src/app.py b/server/src/app.py index 0b9e4b1ec..f9d130038 100644 --- a/server/src/app.py +++ b/server/src/app.py @@ -8,6 +8,8 @@ from datetime import datetime from multiprocessing import cpu_count +from services.visualizationsService import VisualizationsService +from services.comparisonService import ComparisonService from services.timeToCloseService import TimeToCloseService from services.frequencyService import FrequencyService from services.pinService import PinService @@ -78,6 +80,45 @@ async def index(request): return json('You hit the index') +@app.route('/visualizations', methods=["POST"]) +@compress.compress() +async def visualizations(request): + worker = VisualizationsService() + + postArgs = request.json + start = postArgs.get('startDate', None) + end = postArgs.get('endDate', None) + ncs = postArgs.get('ncList', []) + requests = postArgs.get('requestTypes', []) + + data = await worker.visualizations(startDate=start, + endDate=end, + requestTypes=requests, + ncList=ncs) + return json(data) + + +@app.route('/comparison/', methods=["POST"]) +@compress.compress() +async def comparison(request, type): + worker = ComparisonService() + + postArgs = request.json + startDate = postArgs.get('startDate', None) + endDate = postArgs.get('endDate', None) + requestTypes = postArgs.get('requestTypes', []) + set1 = postArgs.get('set1', None) + set2 = postArgs.get('set2', None) + + data = await worker.comparison(type=type, + startDate=startDate, + endDate=endDate, + requestTypes=requestTypes, + set1=set1, + set2=set2) + return json(data) + + @app.route('/timetoclose', methods=["POST"]) @compress.compress() async def timetoclose(request): diff --git a/server/src/services/comparisonService.py b/server/src/services/comparisonService.py new file mode 100644 index 000000000..131cfcd02 --- /dev/null +++ b/server/src/services/comparisonService.py @@ -0,0 +1,163 @@ +from .dataService import DataService +from utils.stats import box_plots, date_bins, date_histograms, counts + + +class ComparisonService(object): + def __init__(self, config=None): + self.dataAccess = DataService() + + def frequency_comparison(self, + startDate=None, + endDate=None, + requestTypes=[], + set1={'district': None, 'list': []}, + set2={'district': None, 'list': []}): + + def get_data(district, items, bins, start, end): + common = { + 'startDate': start, + 'endDate': end, + 'requestTypes': requestTypes + } + + if district == 'nc': + common['ncList'] = items + groupField = 'nc' + elif district == 'cc': + common['cdList'] = items + groupField = 'cd' + + fields = [groupField, 'createddate'] + filters = self.dataAccess.comparisonFilters(**common) + df = self.dataAccess.query(fields, filters, table='vis') + + return date_histograms( + df, + dateField='createddate', + bins=bins, + groupField=groupField, + groupFieldItems=items) + + bins, start, end = date_bins(startDate, endDate) + set1data = get_data(set1['district'], set1['list'], bins, start, end) + set2data = get_data(set2['district'], set2['list'], bins, start, end) + + return { + 'bins': list(bins.astype(str)), + 'set1': { + 'district': set1['district'], + 'counts': set1data + }, + 'set2': { + 'district': set2['district'], + 'counts': set2data + } + } + + def ttc_comparison(self, + startDate=None, + endDate=None, + requestTypes=[], + set1={'district': None, 'list': []}, + set2={'district': None, 'list': []}): + + def get_data(district, items): + common = { + 'startDate': startDate, + 'endDate': endDate, + 'requestTypes': requestTypes + } + + if district == 'nc': + common['ncList'] = items + groupField = 'nc' + elif district == 'cc': + common['cdList'] = items + groupField = 'cd' + + fields = [groupField, '_daystoclose'] + filters = self.dataAccess.comparisonFilters(**common) + df = self.dataAccess.query(fields, filters, table='vis') + + return box_plots( + df, + plotField='_daystoclose', + groupField=groupField, + groupFieldItems=items) + + set1data = get_data(set1['district'], set1['list']) + set2data = get_data(set2['district'], set2['list']) + + return { + 'set1': { + 'district': set1['district'], + 'data': set1data + }, + 'set2': { + 'district': set2['district'], + 'data': set2data + } + } + + def counts_comparison(self, + startDate=None, + endDate=None, + requestTypes=[], + set1={'district': None, 'list': []}, + set2={'district': None, 'list': []}): + + def get_data(district, items): + common = { + 'startDate': startDate, + 'endDate': endDate, + 'requestTypes': requestTypes + } + + if district == 'nc': + common['ncList'] = items + elif district == 'cc': + common['cdList'] = items + + fields = ['requestsource'] + filters = self.dataAccess.comparisonFilters(**common) + df = self.dataAccess.query(fields, filters, table='vis') + + return counts(df, 'requestsource') + + set1data = get_data(set1['district'], set1['list']) + set2data = get_data(set2['district'], set2['list']) + + return { + 'set1': { + 'district': set1['district'], + 'source': set1data + }, + 'set2': { + 'district': set2['district'], + 'source': set2data + } + } + + async def comparison(self, + type=None, + startDate=None, + endDate=None, + requestTypes=[], + set1={'district': None, 'list': []}, + set2={'district': None, 'list': []}): + + args = { + 'startDate': startDate, + 'endDate': endDate, + 'requestTypes': requestTypes, + 'set1': set1, + 'set2': set2} + + if type == 'frequency': + return self.frequency_comparison(**args) + elif type == 'timetoclose': + return self.ttc_comparison(**args) + elif type == 'counts': + return self.counts_comparison(**args) + else: + return {'Error': 'Unrecognized comparison type'} diff --git a/server/src/services/visualizationsService.py b/server/src/services/visualizationsService.py new file mode 100644 index 000000000..4d9f7c811 --- /dev/null +++ b/server/src/services/visualizationsService.py @@ -0,0 +1,53 @@ +from .dataService import DataService +from utils.stats import box_plots, date_bins, date_histograms, counts + + +class VisualizationsService(object): + def __init__(self, config=None): + self.dataAccess = DataService() + + async def visualizations(self, + startDate=None, + endDate=None, + requestTypes=[], + ncList=[]): + + bins, start, end = date_bins(startDate, endDate) + + fields = [ + 'requesttype', + 'createddate', + '_daystoclose', + 'requestsource'] + + filters = self.dataAccess.standardFilters( + start, end, requestTypes, ncList) + + df = self.dataAccess.query(fields, filters, table='vis') + + inner_df = df.loc[ + (df['createddate'] > startDate) & + (df['createddate'] < endDate)] + + return { + 'frequency': { + 'bins': list(bins.astype(str)), + 'counts': date_histograms( + df, + dateField='createddate', + bins=bins, + groupField='requesttype', + groupFieldItems=requestTypes) + }, + + 'timeToClose': box_plots( + inner_df, + plotField='_daystoclose', + groupField='requesttype', + groupFieldItems=requestTypes), + + 'counts': { + 'type': counts(inner_df, groupField='requesttype'), + 'source': counts(inner_df, groupField='requestsource') + } + } diff --git a/server/src/utils/stats.py b/server/src/utils/stats.py new file mode 100644 index 000000000..82e667d73 --- /dev/null +++ b/server/src/utils/stats.py @@ -0,0 +1,144 @@ +""" +This is a collection of pure statistical functions that support +the visualizations and comparison endpoints. +""" + +import pandas as pd +import numpy as np +import math + + +def box_plot(arr, C=1.5): + """ + Takes a one-dimensional numpy array of floats and generates boxplot + statistics for the data. The basic algorithm is standard. + See https://en.wikipedia.org/wiki/Box_plot + + The max length of the whiskers is the constant C, multiplied by the + interquartile range. This is a common method, although there + are others. The default value of C=1.5 is typical when this + method is used. + See matplotlib.org/3.1.3/api/_as_gen/matplotlib.pyplot.boxplot.html + """ + # calculate first and third quantiles + q1 = np.quantile(arr, 0.25) + q3 = np.quantile(arr, 0.75) + + # calculate whiskers + iqr = q3 - q1 + whiskerMin = arr[arr >= q1 - C * iqr].min() + whiskerMax = arr[arr <= q3 + C * iqr].max() + + # don't let whiskers be inside range q1 -> q3 + whiskerMin = min([q1, whiskerMin]) + whiskerMax = max([q3, whiskerMax]) + + # calculate outliers + minOutliers = arr[arr < whiskerMin] + maxOutliers = arr[arr > whiskerMax] + outliers = list(np.concatenate((minOutliers, maxOutliers))) + + return { + 'min': np.min(arr), + 'q1': q1, + 'median': np.median(arr), + 'q3': q3, + 'max': np.max(arr), + 'whiskerMin': whiskerMin, + 'whiskerMax': whiskerMax, + 'count': len(arr), + 'outlierCount': len(outliers) + } + + +def box_plots(df, plotField, groupField, groupFieldItems): + """ + Returns a dictionary of box plot statistics for the plotField, + where the keys are the unique items in the groupField. + """ + # reduce df and drop the nulls + df = df[[plotField, groupField]].dropna() + + # group the requests by type and get box plot stats for each type + data = df \ + .groupby(by=groupField) \ + .apply(lambda df: box_plot(df[plotField].values)) \ + .to_dict() + + # if no rows exist for a particular item in the groupField, + # return a count of 0 + for item in groupFieldItems: + if item not in data.keys(): + data[item] = {'count': 0} + + return data + + +def date_bins(startDate, endDate): + """ + Takes a date range a returns a list of equal-size date bins that + cover the range. + + For ranges of 24 days or less, each bin covers one calendar day. + + For larger ranges, each bin is the largest size such that: + (1) the size is a whole number of days (i.e. the bin edges + are all at midnight) + (2) the number of bins is at least 12. + + Not all date ranges are evenly divisible by a whole number of + days, so in cases where they aren't, we move the end date forward + so that the last bin is the same size as the rest. + """ + start = pd.to_datetime(startDate) + end = pd.to_datetime(endDate) + pd.Timedelta(days=1) + diff = (end - start).days + + # calculate size and number of bins + bin_size = 1 if diff <= 24 else diff // 12 + num_bins = math.ceil(diff / bin_size) + + # move the end date forward in cases where the range can't + # be evenly divided + if diff != num_bins * bin_size: + end = start + num_bins * pd.Timedelta(days=bin_size) + + bins = pd.date_range(start, end, freq='{}D'.format(bin_size)) + return bins, start, end + + +def date_histogram(dates, bins): + """ count the number of dates in each date bin """ + dates = dates.astype('datetime64[s]').astype('float') + counts, _ = np.histogram(dates, bins=bins) + return list(map(int, counts)) + + +def date_histograms(df, dateField, bins, groupField, groupFieldItems): + """ + Returns a dictionary of histograms, where the keys are the unique values + in the groupField, and the values are the counts within the dateField. + """ + # reduce df and drop the nulls + df = df[[dateField, groupField]].dropna() + + # convert bins to float so numpy can use them + bins_fl = np.array(bins).astype('datetime64[s]').astype('float') + + # count the requests created in each bin + counts = df \ + .groupby(by=groupField) \ + .apply(lambda x: date_histogram(x[dateField].values, bins_fl)) \ + .to_dict() + + # if no rows exist for a particular item in the groupField, + # return all 0's for that item + for item in groupFieldItems: + if item not in counts.keys(): + counts[item] = [0 for bin in bins][:-1] + + return counts + + +def counts(df, groupField): + return df.groupby(by=groupField).size().to_dict() From 1c2bb409174dd36b76e05b7afbed0438bffdea1a Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Tue, 12 May 2020 14:19:45 -0700 Subject: [PATCH 12/18] frontend integration --- src/redux/sagas/comparisonData.js | 22 ++++----------- src/redux/sagas/data.js | 46 ++----------------------------- 2 files changed, 8 insertions(+), 60 deletions(-) diff --git a/src/redux/sagas/comparisonData.js b/src/redux/sagas/comparisonData.js index 97d11eeee..31851bce3 100644 --- a/src/redux/sagas/comparisonData.js +++ b/src/redux/sagas/comparisonData.js @@ -23,27 +23,15 @@ import { const BASE_URL = process.env.DB_URL; function* getCountsComparison(filters) { - const url = `${BASE_URL}/requestcounts-comparison`; + const url = `${BASE_URL}/comparison/counts`; - const { data: { set1, set2 } } = yield call(axios.post, url, { - ...filters, - countFields: ['requestsource'], - }); + const { data } = yield call(axios.post, url, filters); - return { - set1: { - district: set1.district, - source: set1.data.find(d => d.field === 'requestsource')?.counts, - }, - set2: { - district: set2.district, - source: set2.data.find(d => d.field === 'requestsource')?.counts, - }, - }; + return data; } function* getTimeToCloseComparison(filters) { - const url = `${BASE_URL}/timetoclose-comparison`; + const url = `${BASE_URL}/comparison/timetoclose`; const { data } = yield call(axios.post, url, filters); @@ -51,7 +39,7 @@ function* getTimeToCloseComparison(filters) { } function* getFrequencyComparison(filters) { - const url = `${BASE_URL}/requestfrequency-comparison`; + const url = `${BASE_URL}/comparison/frequency`; const { data } = yield call(axios.post, url, filters); diff --git a/src/redux/sagas/data.js b/src/redux/sagas/data.js index da42b9e55..0e3e01b51 100644 --- a/src/redux/sagas/data.js +++ b/src/redux/sagas/data.js @@ -76,54 +76,14 @@ function* fetchPinInfo(srnumber) { /* //// VISUALIZATIONS //// */ -function* fetchCounts(filters) { - const countsUrl = `${BASE_URL}/requestcounts`; - - const { data } = yield call(axios.post, countsUrl, { - ...filters, - countFields: ['requesttype', 'requestsource'], - }); - - return { - type: data.find(d => d.field === 'requesttype')?.counts, - source: data.find(d => d.field === 'requestsource')?.counts, - }; -} - -function* fetchFrequency(filters) { - const frequencyUrl = `${BASE_URL}/requestfrequency`; - - const { data } = yield call(axios.post, frequencyUrl, filters); - - return data; -} - -function* fetchTimeToClose(filters) { - const ttcUrl = `${BASE_URL}/timetoclose`; +function* fetchVisData(filters) { + const visUrl = `${BASE_URL}/visualizations`; - const { data } = yield call(axios.post, ttcUrl, filters); + const { data } = yield call(axios.post, visUrl, filters); return data; } -function* fetchVisData(filters) { - const [ - counts, - frequency, - timeToClose, - ] = yield all([ - call(fetchCounts, filters), - call(fetchFrequency, filters), - call(fetchTimeToClose, filters), - ]); - - return { - counts, - frequency, - timeToClose, - }; -} - /* //// OTHER //// */ function* postFeedback(message) { From f39aaa25224e20404150954153678ac41cb847c0 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Tue, 12 May 2020 17:32:12 -0700 Subject: [PATCH 13/18] deleted unused endpoints and their supporting services --- server/src/app.py | 196 +++----------------- server/src/services/frequencyService.py | 163 ---------------- server/src/services/pinService.py | 35 ---- server/src/services/requestCountsService.py | 132 ------------- server/src/services/timeToCloseService.py | 164 ---------------- 5 files changed, 29 insertions(+), 661 deletions(-) delete mode 100644 server/src/services/frequencyService.py delete mode 100644 server/src/services/pinService.py delete mode 100644 server/src/services/requestCountsService.py delete mode 100644 server/src/services/timeToCloseService.py diff --git a/server/src/app.py b/server/src/app.py index f9d130038..4fd1e3e68 100644 --- a/server/src/app.py +++ b/server/src/app.py @@ -8,15 +8,11 @@ from datetime import datetime from multiprocessing import cpu_count -from services.visualizationsService import VisualizationsService -from services.comparisonService import ComparisonService -from services.timeToCloseService import TimeToCloseService -from services.frequencyService import FrequencyService -from services.pinService import PinService from services.pinClusterService import PinClusterService from services.heatmapService import HeatmapService -from services.requestCountsService import RequestCountsService from services.requestDetailService import RequestDetailService +from services.visualizationsService import VisualizationsService +from services.comparisonService import ComparisonService from services.sqlIngest import DataHandler from services.feedbackService import FeedbackService from services.dataService import DataService @@ -80,121 +76,6 @@ async def index(request): return json('You hit the index') -@app.route('/visualizations', methods=["POST"]) -@compress.compress() -async def visualizations(request): - worker = VisualizationsService() - - postArgs = request.json - start = postArgs.get('startDate', None) - end = postArgs.get('endDate', None) - ncs = postArgs.get('ncList', []) - requests = postArgs.get('requestTypes', []) - - data = await worker.visualizations(startDate=start, - endDate=end, - requestTypes=requests, - ncList=ncs) - return json(data) - - -@app.route('/comparison/', methods=["POST"]) -@compress.compress() -async def comparison(request, type): - worker = ComparisonService() - - postArgs = request.json - startDate = postArgs.get('startDate', None) - endDate = postArgs.get('endDate', None) - requestTypes = postArgs.get('requestTypes', []) - set1 = postArgs.get('set1', None) - set2 = postArgs.get('set2', None) - - data = await worker.comparison(type=type, - startDate=startDate, - endDate=endDate, - requestTypes=requestTypes, - set1=set1, - set2=set2) - return json(data) - - -@app.route('/timetoclose', methods=["POST"]) -@compress.compress() -async def timetoclose(request): - ttc_worker = TimeToCloseService(app.config['Settings']) - - postArgs = request.json - start = postArgs.get('startDate', None) - end = postArgs.get('endDate', None) - ncs = postArgs.get('ncList', []) - requests = postArgs.get('requestTypes', []) - - data = await ttc_worker.get_ttc(startDate=start, - endDate=end, - ncList=ncs, - requestTypes=requests) - return json(data) - - -@app.route('/timetoclose-comparison', methods=["POST"]) -@compress.compress() -async def timetoclose_comparison(request): - ttc_worker = TimeToCloseService(app.config['Settings']) - - postArgs = request.json - startDate = postArgs.get('startDate', None) - endDate = postArgs.get('endDate', None) - requestTypes = postArgs.get('requestTypes', []) - set1 = postArgs.get('set1', None) - set2 = postArgs.get('set2', None) - - data = await ttc_worker.get_ttc_comparison(startDate=startDate, - endDate=endDate, - requestTypes=requestTypes, - set1=set1, - set2=set2) - return json(data) - - -@app.route('/requestfrequency', methods=["POST"]) -@compress.compress() -async def requestfrequency(request): - freq_worker = FrequencyService(app.config['Settings']) - - postArgs = request.json - startDate = postArgs.get('startDate', None) - endDate = postArgs.get('endDate', None) - ncList = postArgs.get('ncList', []) - requestTypes = postArgs.get('requestTypes', []) - - data = await freq_worker.get_frequency(startDate=startDate, - endDate=endDate, - ncList=ncList, - requestTypes=requestTypes) - return json(data) - - -@app.route('/requestfrequency-comparison', methods=["POST"]) -@compress.compress() -async def requestfrequency_comparison(request): - worker = FrequencyService(app.config['Settings']) - - postArgs = request.json - startDate = postArgs.get('startDate', None) - endDate = postArgs.get('endDate', None) - requestTypes = postArgs.get('requestTypes', []) - set1 = postArgs.get('set1', None) - set2 = postArgs.get('set2', None) - - data = await worker.get_frequency_comparison(startDate=startDate, - endDate=endDate, - requestTypes=requestTypes, - set1=set1, - set2=set2) - return json(data) - - @app.route('/ingest', methods=["GET"]) @compress.compress() async def ingest(request): @@ -252,23 +133,6 @@ async def ingest(request): return json(data) -@app.route('/pins', methods=["POST"]) -@compress.compress() -async def pinMap(request): - pin_worker = PinService(app.config['Settings']) - postArgs = request.json - start = postArgs.get('startDate', '2015-01-01') - end = postArgs.get('endDate', '2015-12-31 01:01:01') - ncs = postArgs.get('ncList', ['SHERMAN OAKS NC']) - requests = postArgs.get('requestTypes', ['Bulky Items']) - - return_data = await pin_worker.get_base_pins(startDate=start, - endDate=end, - ncList=ncs, - requestTypes=requests) - return json(return_data) - - @app.route('/pin-clusters', methods=["POST"]) @compress.compress() async def pinClusters(request): @@ -306,29 +170,36 @@ async def heatmap(request): return json(heatmap) -@app.route('/requestcounts', methods=["POST"]) +@app.route('/servicerequest/', methods=["GET"]) +async def requestDetails(request, srnumber): + detail_worker = RequestDetailService(app.config['Settings']) + + return_data = await detail_worker.get_request_detail(srnumber) + return json(return_data) + + +@app.route('/visualizations', methods=["POST"]) @compress.compress() -async def requestCounts(request): - counts_worker = RequestCountsService(app.config['Settings']) +async def visualizations(request): + worker = VisualizationsService() + postArgs = request.json start = postArgs.get('startDate', None) end = postArgs.get('endDate', None) ncs = postArgs.get('ncList', []) requests = postArgs.get('requestTypes', []) - countFields = postArgs.get('countFields', []) - return_data = await counts_worker.get_req_counts(startDate=start, - endDate=end, - ncList=ncs, - requestTypes=requests, - countFields=countFields) - return json(return_data) + data = await worker.visualizations(startDate=start, + endDate=end, + requestTypes=requests, + ncList=ncs) + return json(data) -@app.route('/requestcounts-comparison', methods=["POST"]) +@app.route('/comparison/', methods=["POST"]) @compress.compress() -async def requestCountsComparison(request): - worker = RequestCountsService(app.config['Settings']) +async def comparison(request, type): + worker = ComparisonService() postArgs = request.json startDate = postArgs.get('startDate', None) @@ -336,23 +207,14 @@ async def requestCountsComparison(request): requestTypes = postArgs.get('requestTypes', []) set1 = postArgs.get('set1', None) set2 = postArgs.get('set2', None) - countFields = postArgs.get('countFields', []) - - data = await worker.get_req_counts_comparison(startDate=startDate, - endDate=endDate, - requestTypes=requestTypes, - set1=set1, - set2=set2, - countFields=countFields) - return json(data) - - -@app.route('/servicerequest/', methods=["GET"]) -async def requestDetails(request, srnumber): - detail_worker = RequestDetailService(app.config['Settings']) - return_data = await detail_worker.get_request_detail(srnumber) - return json(return_data) + data = await worker.comparison(type=type, + startDate=startDate, + endDate=endDate, + requestTypes=requestTypes, + set1=set1, + set2=set2) + return json(data) @app.route('/feedback', methods=["POST"]) diff --git a/server/src/services/frequencyService.py b/server/src/services/frequencyService.py deleted file mode 100644 index 3729612b4..000000000 --- a/server/src/services/frequencyService.py +++ /dev/null @@ -1,163 +0,0 @@ -import pandas as pd -import numpy as np -import math -from .dataService import DataService - - -class FrequencyService(object): - def __init__(self, config=None): - self.dataAccess = DataService() - - def get_bins(self, startDate, endDate): - """ - Takes a date range a returns a list of equal-size date bins that - cover the range. - - For ranges of 24 days or less, each bin covers one calendar day. - - For larger ranges, each bin is the largest size such that: - (1) the size is a whole number of days (i.e. the bin edges - are all at midnight) - (2) the number of bins is at least 12. - - Not all date ranges are evenly divisible by a whole number of - days, so in cases where they aren't, we move the end date forward - so that the last bin is the same size as the rest. - """ - start = pd.to_datetime(startDate) - end = pd.to_datetime(endDate) + pd.Timedelta(days=1) - diff = (end - start).days - - # calculate size and number of bins - bin_size = 1 if diff <= 24 else diff // 12 - num_bins = math.ceil(diff / bin_size) - - # move the end date forward in cases where the range can't - # be evenly divided - if diff != num_bins * bin_size: - end = start + num_bins * pd.Timedelta(days=bin_size) - - bins = pd.date_range(start, end, freq='{}D'.format(bin_size)) - return bins, start, end - - def frequency(self, groupField, groupFieldItems, bins, filters): - def get_counts(dates, bins): - """ count the number of dates in each date bin """ - dates = dates.astype('datetime64[s]').astype('float') - counts, _ = np.histogram(dates, bins=bins) - return list(map(int, counts)) - - # grab the necessary data from the db and drop nulls - fields = [groupField, 'createddate'] - df = self.dataAccess.query(fields, filters, table='vis').dropna() - - # convert bins to float so numpy can use them - bins_fl = np.array(bins).astype('datetime64[s]').astype('float') - - # count the requests created in each bin - counts = df \ - .groupby(by=groupField) \ - .apply(lambda x: get_counts(x['createddate'].values, bins_fl)) \ - .to_dict() - - # if no rows exist for a particular item in the groupField, - # return all 0's for that item - for item in groupFieldItems: - if item not in counts.keys(): - counts[item] = [0 for bin in bins][:-1] - - return { - 'bins': list(bins.astype(str)), - 'counts': counts - } - - async def get_frequency(self, - startDate=None, - endDate=None, - requestTypes=[], - ncList=[]): - - """ - Given a date range, covers the range with equal-length date bins, and - counts the number of requests that were created in each date bin. - - Example response if startDate = 01/01/18 and endDate = 03/02/2020 - { - 'bins': [ - "2018-01-01", - "2018-03-08", - "2018-05-13", - "2018-07-18", - "2018-09-22", - "2018-11-27", - "2019-02-01", - "2019-04-08", - "2019-06-13", - "2019-08-18", - "2019-10-23", - "2019-12-28", - "2020-03-03" - ], - 'counts': { - 'Graffiti Removal': [ - 125, 15, 53, 24, 98, 42, - 33, 128, 30, 16, 138, 57 - ], - 'Bulky Items': [ - 1, 1, 2, 3, 5, 8, - 13, 21, 34, 55, 89, 144 - ] - } - } - - Note that the number of bins is one greater than the number of counts, - because the list of bins includes the end date of the final bin. - """ - - bins, start, end = self.get_bins(startDate, endDate) - - filters = self.dataAccess.standardFilters( - start, end, requestTypes, ncList) - - return self.frequency('requesttype', requestTypes, bins, filters) - - async def get_frequency_comparison(self, - startDate=None, - endDate=None, - requestTypes=[], - set1={'district': None, 'list': []}, - set2={'district': None, 'list': []}): - - def get_data(district, items, bins, start, end): - common = { - 'startDate': start, - 'endDate': end, - 'requestTypes': requestTypes - } - - if district == 'nc': - common['ncList'] = items - filters = self.dataAccess.comparisonFilters(**common) - return self.frequency('nc', items, bins, filters) - - elif district == 'cc': - common['cdList'] = items - filters = self.dataAccess.comparisonFilters(**common) - return self.frequency('cd', items, bins, filters) - - bins, start, end = self.get_bins(startDate, endDate) - - set1data = get_data(set1['district'], set1['list'], bins, start, end) - set2data = get_data(set2['district'], set2['list'], bins, start, end) - - return { - 'bins': set1data['bins'], - 'set1': { - 'district': set1['district'], - 'counts': set1data['counts'] - }, - 'set2': { - 'district': set2['district'], - 'counts': set2data['counts'] - } - } diff --git a/server/src/services/pinService.py b/server/src/services/pinService.py deleted file mode 100644 index 101738ff6..000000000 --- a/server/src/services/pinService.py +++ /dev/null @@ -1,35 +0,0 @@ -from .dataService import DataService - - -class PinService(object): - def __init__(self, config=None): - self.dataAccess = DataService() - - async def get_base_pins(self, - startDate=None, - endDate=None, - requestTypes=[], - ncList=[]): - """ - Returns the base pin data given times, ncs, and request filters - { - 'data': [ - { - 'srnumber':'String', - 'requesttype': 'String', - 'latitude': 'String', - 'longitude': 'String', - } - ] - } - """ - - items = ['srnumber', - 'requesttype', - 'latitude', - 'longitude'] - - filters = self.dataAccess.standardFilters( - startDate, endDate, requestTypes, ncList) - - return self.dataAccess.query(items, filters) diff --git a/server/src/services/requestCountsService.py b/server/src/services/requestCountsService.py deleted file mode 100644 index 44bf66ca5..000000000 --- a/server/src/services/requestCountsService.py +++ /dev/null @@ -1,132 +0,0 @@ -from .dataService import DataService - - -class RequestCountsService(object): - def __init__(self, config=None): - self.dataAccess = DataService() - - async def get_req_counts(self, - startDate=None, - endDate=None, - requestTypes=[], - ncList=[], - countFields=[]): - """ - For each countField, returns the counts of each distinct value - in that field, given times, ncs, and request filters. - E.g. if countsFields is ['requesttype', 'requestsource'], returns: - [ - { - 'field': 'requesttype', - 'counts': { - 'Graffiti Removal': 'Int', - 'Bulky Items': 'Int', - ... - } - }, - { - 'field': 'requestsource', - 'counts': { - 'Mobile App': 'Int', - 'Driver Self Report': 'Int', - ... - } - } - ] - """ - - filters = self.dataAccess.standardFilters( - startDate, endDate, requestTypes, ncList) - - return self.dataAccess.aggregateQuery( - countFields, filters, table='vis') - - async def get_req_counts_comparison(self, - startDate=None, - endDate=None, - requestTypes=[], - set1={'district': None, 'list': []}, - set2={'district': None, 'list': []}, - countFields=[]): - - """ - { - "set1": { - "district": "nc", - "data": [ - { - "field": "requestsource", - "counts": { - "Call": 48, - "Driver Self Report": 68, - "Mobile App": 41, - "Self Service": 41 - } - }, - { - "field": "requesttype", - "counts": { - "Bulky Items": 93, - "Graffiti Removal": 105 - } - } - ] - }, - "set2": { - "district": "cc", - "data": [ - { - "field": "requestsource", - "counts": { - "Call": 572, - "Driver Self Report": 279, - "Email": 2, - "Mobile App": 530, - "Self Service": 159 - } - }, - { - "field": "requesttype", - "counts": { - "Bulky Items": 1053, - "Graffiti Removal": 489 - } - } - ] - } - } - """ - - def get_filters(district, items): - common = { - 'startDate': startDate, - 'endDate': endDate, - 'requestTypes': requestTypes - } - - if district == 'nc': - common['ncList'] = items - return self.dataAccess.comparisonFilters(**common) - - elif district == 'cc': - common['cdList'] = items - return self.dataAccess.comparisonFilters(**common) - - filters = get_filters(set1['district'], set1['list']) - set1data = self.dataAccess.aggregateQuery( - countFields, filters, table='vis') - - filters = get_filters(set2['district'], set2['list']) - set2data = self.dataAccess.aggregateQuery( - countFields, filters, table='vis') - - return { - 'set1': { - 'district': set1['district'], - 'data': set1data - }, - 'set2': { - 'district': set2['district'], - 'data': set2data - } - } diff --git a/server/src/services/timeToCloseService.py b/server/src/services/timeToCloseService.py deleted file mode 100644 index 408bb627f..000000000 --- a/server/src/services/timeToCloseService.py +++ /dev/null @@ -1,164 +0,0 @@ -import numpy as np -from .dataService import DataService - - -class TimeToCloseService(object): - def __init__(self, config=None): - self.dataAccess = DataService() - - def ttc(self, groupField, groupFieldItems, filters): - - def get_boxplot_stats(arr, C=1.5): - """ - Takes a one-dimensional numpy array of floats and generates boxplot - statistics for the data. The basic algorithm is standard. - See https://en.wikipedia.org/wiki/Box_plot - - The max length of the whiskers is the constant C, multiplied by the - interquartile range. This is a common method, although there - are others. The default value of C=1.5 is typical when this - method is used. - See matplotlib.org/3.1.3/api/_as_gen/matplotlib.pyplot.boxplot.html - """ - - # calculate first and third quantiles - q1 = np.quantile(arr, 0.25) - q3 = np.quantile(arr, 0.75) - - # calculate whiskers - iqr = q3 - q1 - whiskerMin = arr[arr >= q1 - C * iqr].min() - whiskerMax = arr[arr <= q3 + C * iqr].max() - - # don't let whiskers be inside range q1 -> q3 - whiskerMin = min([q1, whiskerMin]) - whiskerMax = max([q3, whiskerMax]) - - # calculate outliers - minOutliers = arr[arr < whiskerMin] - maxOutliers = arr[arr > whiskerMax] - outliers = list(np.concatenate((minOutliers, maxOutliers))) - - return { - 'min': np.min(arr), - 'q1': q1, - 'median': np.median(arr), - 'q3': q3, - 'max': np.max(arr), - 'whiskerMin': whiskerMin, - 'whiskerMax': whiskerMax, - 'count': len(arr), - 'outlierCount': len(outliers) - } - - # grab the necessary data from the db and drop nulls - fields = [groupField, '_daystoclose'] - dtc_df = self.dataAccess.query(fields, filters, table='vis').dropna() - - # group the requests by type and get box plot stats for each type - data = dtc_df \ - .groupby(by=groupField) \ - .apply(lambda df: get_boxplot_stats(df['_daystoclose'].values)) \ - .to_dict() - - # if no rows exist for a particular item in the groupField, - # return a count of 0 - for item in groupFieldItems: - if item not in data.keys(): - data[item] = {'count': 0} - - return data - - async def get_ttc(self, - startDate=None, - endDate=None, - requestTypes=[], - ncList=[]): - """ - For each requestType, returns the statistics necessary to generate - a boxplot of the number of days it took to close the requests. - - Example response: - { - 'Bulky Items': { - 'min': float, - 'q1': float, - 'median': float, - 'q3': float, - 'max': float, - 'whiskerMin': float, - 'whiskerMax': float, - 'outliers': [float], - 'count': int - } - ... - } - """ - - filters = self.dataAccess.standardFilters( - startDate, endDate, requestTypes, ncList) - - return self.ttc('requesttype', requestTypes, filters) - - async def get_ttc_comparison(self, - startDate=None, - endDate=None, - requestTypes=[], - set1={'district': None, 'list': []}, - set2={'district': None, 'list': []}): - - """ - For each of the two sets, returns the statistics necessary to generate - a boxplot of the number of days it took to close the requests. - - Example response: - { - set1: { - district: 'nc', - data: { - 4: { stats }, - 8: { stats } - ... - } - }, - set2: { - district: 'cc', - data: { - 1: { stats }, - 15: { stats } - ... - } - } - } - """ - - def get_data(district, items): - common = { - 'startDate': startDate, - 'endDate': endDate, - 'requestTypes': requestTypes - } - - if district == 'nc': - common['ncList'] = items - filters = self.dataAccess.comparisonFilters(**common) - return self.ttc('nc', items, filters) - - elif district == 'cc': - common['cdList'] = items - filters = self.dataAccess.comparisonFilters(**common) - return self.ttc('cd', items, filters) - - set1data = get_data(set1['district'], set1['list']) - set2data = get_data(set2['district'], set2['list']) - - return { - 'set1': { - 'district': set1['district'], - 'data': set1data - }, - 'set2': { - 'district': set2['district'], - 'data': set2data - } - } From 4423b5a36758beaf0aa6135107cb1a87be648a40 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Tue, 12 May 2020 17:52:44 -0700 Subject: [PATCH 14/18] updated tests --- server/test/test_comparison.py | 14 ++++++++++++++ server/test/test_time_to_close.py | 20 -------------------- server/test/test_visualizations.py | 14 ++++++++++++++ 3 files changed, 28 insertions(+), 20 deletions(-) create mode 100644 server/test/test_comparison.py delete mode 100644 server/test/test_time_to_close.py create mode 100644 server/test/test_visualizations.py diff --git a/server/test/test_comparison.py b/server/test/test_comparison.py new file mode 100644 index 000000000..498dda81f --- /dev/null +++ b/server/test/test_comparison.py @@ -0,0 +1,14 @@ +from src.services.comparisonService import ComparisonService + + +def test_serviceExists(): + # Arrange + testString = 'result' + print(testString) + + # Act + comp_worker = ComparisonService() + print(comp_worker) + + # Assert + assert True diff --git a/server/test/test_time_to_close.py b/server/test/test_time_to_close.py deleted file mode 100644 index df704dfe4..000000000 --- a/server/test/test_time_to_close.py +++ /dev/null @@ -1,20 +0,0 @@ -from src.services.timeToCloseService import TimeToCloseService - -TESTCONFIG = { - "Database": { - "DB_CONNECTION_STRING": "postgresql://testingString/postgresql" - } -} - - -def test_serviceExists(): - # Arrange - testString = 'result' - print(testString) - - # Act - ttc_worker = TimeToCloseService(TESTCONFIG) - print(ttc_worker) - - # Assert - assert True diff --git a/server/test/test_visualizations.py b/server/test/test_visualizations.py new file mode 100644 index 000000000..fb72b4e09 --- /dev/null +++ b/server/test/test_visualizations.py @@ -0,0 +1,14 @@ +from src.services.visualizationsService import VisualizationsService + + +def test_serviceExists(): + # Arrange + testString = 'result' + print(testString) + + # Act + vis_worker = VisualizationsService() + print(vis_worker) + + # Assert + assert True From e6d78b14debe0afcf479bd20cb993702c03228d5 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Tue, 12 May 2020 18:08:46 -0700 Subject: [PATCH 15/18] linting --- src/redux/sagas/data.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/redux/sagas/data.js b/src/redux/sagas/data.js index 0e3e01b51..e263f766d 100644 --- a/src/redux/sagas/data.js +++ b/src/redux/sagas/data.js @@ -5,7 +5,6 @@ import { call, put, select, - all, } from 'redux-saga/effects'; import { COUNCILS } from '@components/common/CONSTANTS'; From 4587ef9c6f8bc22b087007a7e8969a9b1459491e Mon Sep 17 00:00:00 2001 From: Adam Kendis Date: Tue, 12 May 2020 18:34:15 -0700 Subject: [PATCH 16/18] Removed devcheck and basename from App and Router. --- src/App.jsx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/App.jsx b/src/App.jsx index 7702615b1..314bcf598 100644 --- a/src/App.jsx +++ b/src/App.jsx @@ -11,8 +11,6 @@ import Footer from './components/main/footer/Footer'; import StaticFooter from './components/main/footer/StaticFooter'; import { SnapshotRenderer } from './components/export/SnapshotService'; -const basename = process.env.NODE_ENV === 'development' ? '/' : process.env.BASE_URL || '/'; - const App = ({ getMetadata, }) => { @@ -21,7 +19,7 @@ const App = ({ }); return ( - +
From f27ccde214e7ba22075e388e71b4b8fe7bb2fec0 Mon Sep 17 00:00:00 2001 From: Adam Kendis Date: Tue, 12 May 2020 18:34:55 -0700 Subject: [PATCH 17/18] Removed BASE_URL from orchestration files. --- .example.env | 1 - .github/workflows/Continuous_Delivery.yml | 2 -- Orchestration/docker-compose-example.yml | 1 - copyEnv.sh | 1 - 4 files changed, 5 deletions(-) diff --git a/.example.env b/.example.env index 44914644c..db13ffc40 100644 --- a/.example.env +++ b/.example.env @@ -1,3 +1,2 @@ REACT_APP_MAPBOX_TOKEN=REDACTED DB_URL=REDACTED -BASE_URL=/311-data \ No newline at end of file diff --git a/.github/workflows/Continuous_Delivery.yml b/.github/workflows/Continuous_Delivery.yml index 01eaf4590..909683365 100644 --- a/.github/workflows/Continuous_Delivery.yml +++ b/.github/workflows/Continuous_Delivery.yml @@ -24,7 +24,6 @@ jobs: run: | echo REACT_APP_MAPBOX_TOKEN=${{ secrets.MAPBOX_TOKEN }} > .env echo DB_URL=${{ secrets.DB_URL }} >> .env - echo BASE_URL=${{ secrets.BASE_URL }} >> .env echo GITHUB_SHA=${{ github.sha }} >> .env - name: Build project run: npm run build @@ -36,7 +35,6 @@ jobs: ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }} REACT_APP_MAPBOX_TOKEN: ${{ secrets.MAPBOX_TOKEN }} DB_URL: ${{ secrets.DB_URL }} - BASE_URL: ${{ secrets.BASE_URL }} BASE_BRANCH: master # The branch the action should deploy from. BRANCH: gh-pages # The branch the action should deploy to. FOLDER: dist # The folder the action should deploy. diff --git a/Orchestration/docker-compose-example.yml b/Orchestration/docker-compose-example.yml index cc7ad023c..c88ffb9ca 100644 --- a/Orchestration/docker-compose-example.yml +++ b/Orchestration/docker-compose-example.yml @@ -26,7 +26,6 @@ services: environment: REACT_APP_MAPBOX_TOKEN: REDACTED DB_URL: http://localhost:5000 - BASE_URL: '' ports: - 3000:3000 diff --git a/copyEnv.sh b/copyEnv.sh index c0c797350..ea42c7c42 100644 --- a/copyEnv.sh +++ b/copyEnv.sh @@ -1,5 +1,4 @@ echo REACT_APP_MAPBOX_TOKEN=$REACT_APP_MAPBOX_TOKEN > .env echo DB_URL=$DB_URL >> .env -echo BASE_URL=$BASE_URL >> .env webpack node server.js From 0a676f1b3ccabdb5923e0134ea9cf48a5240c997 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Wed, 13 May 2020 09:26:19 -0700 Subject: [PATCH 18/18] minor tweaks --- server/src/services/visualizationsService.py | 4 ++-- server/src/utils/stats.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/services/visualizationsService.py b/server/src/services/visualizationsService.py index 4d9f7c811..af6355f58 100644 --- a/server/src/services/visualizationsService.py +++ b/server/src/services/visualizationsService.py @@ -26,8 +26,8 @@ async def visualizations(self, df = self.dataAccess.query(fields, filters, table='vis') inner_df = df.loc[ - (df['createddate'] > startDate) & - (df['createddate'] < endDate)] + (df['createddate'] >= startDate) & + (df['createddate'] <= endDate)] return { 'frequency': { diff --git a/server/src/utils/stats.py b/server/src/utils/stats.py index 82e667d73..204473cc8 100644 --- a/server/src/utils/stats.py +++ b/server/src/utils/stats.py @@ -128,7 +128,7 @@ def date_histograms(df, dateField, bins, groupField, groupFieldItems): # count the requests created in each bin counts = df \ .groupby(by=groupField) \ - .apply(lambda x: date_histogram(x[dateField].values, bins_fl)) \ + .apply(lambda df: date_histogram(df[dateField].values, bins_fl)) \ .to_dict() # if no rows exist for a particular item in the groupField,