From d0a4791ec1d5fa3ee93d43e82aff833717e4aa93 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Thu, 23 Apr 2020 14:08:12 -0700 Subject: [PATCH] calculating _daystoclose during ingest; nc code fixes --- server/src/services/databaseOrm.py | 3 +- server/src/services/sqlIngest.py | 46 ++++++++++++++++++++++- server/src/services/timeToCloseService.py | 15 ++------ 3 files changed, 49 insertions(+), 15 deletions(-) diff --git a/server/src/services/databaseOrm.py b/server/src/services/databaseOrm.py index 08313ee5f..fe5b012b0 100644 --- a/server/src/services/databaseOrm.py +++ b/server/src/services/databaseOrm.py @@ -25,9 +25,10 @@ class Ingest(Base, Mixin): # dates createddate = Column(DateTime, index=True) + closeddate = Column(DateTime) + _daystoclose = Column(Float(1)) updateddate = Column(DateTime) servicedate = Column(DateTime) - closeddate = Column(DateTime) # about requesttype = Column(String, index=True) diff --git a/server/src/services/sqlIngest.py b/server/src/services/sqlIngest.py index 7a147b6fb..95fead649 100644 --- a/server/src/services/sqlIngest.py +++ b/server/src/services/sqlIngest.py @@ -86,7 +86,7 @@ def dropDuplicates(table, report): report.append({ 'description': 'dropped duplicate rows by srnumber', - 'rows': rows.rowcount + 'rowsAffected': rows.rowcount }) def switchPrimaryKey(table, report): @@ -97,7 +97,7 @@ def switchPrimaryKey(table, report): report.append({ 'description': 'switched primary key column to srnumber', - 'rows': 'N/A' + 'rowsAffected': 'N/A' }) def removeInvalidClosedDates(table, report): @@ -112,6 +112,45 @@ def removeInvalidClosedDates(table, report): 'rowsAffected': result.rowcount }) + def setDaysToClose(table, report): + result = exec_sql(f""" + UPDATE {table} + SET _daystoclose = EXTRACT ( + EPOCH FROM + (closeddate::timestamp - createddate::timestamp) / + (60 * 60 * 24) + ); + """) + + report.append({ + 'description': 'set _daystoclose column', + 'rowsAffected': result.rowcount + }) + + def fixNorthWestwood(table, report): + result = exec_sql(f""" + UPDATE {table} + SET nc = 127 + WHERE nc = 0 AND ncname = 'NORTH WESTWOOD NC' + """) + + report.append({ + 'description': 'fix nc code for North Westwood NC', + 'rowsAffected': result.rowcount + }) + + def fixHistoricCulturalNorth(table, report): + result = exec_sql(f""" + UPDATE {table} + SET nc = 128 + WHERE nc = 0 AND ncname = 'HISTORIC CULTURAL NORTH NC' + """) + + report.append({ + 'description': 'fix nc code for Historic Cultural North NC', + 'rowsAffected': result.rowcount + }) + log('\nCleaning ingest table.') table = Ingest.__tablename__ report = [] @@ -119,6 +158,9 @@ def removeInvalidClosedDates(table, report): dropDuplicates(table, report) switchPrimaryKey(table, report) removeInvalidClosedDates(table, report) + setDaysToClose(table, report) + fixNorthWestwood(table, report) + fixHistoricCulturalNorth(table, report) return report diff --git a/server/src/services/timeToCloseService.py b/server/src/services/timeToCloseService.py index 7e0c0636b..0ad727949 100644 --- a/server/src/services/timeToCloseService.py +++ b/server/src/services/timeToCloseService.py @@ -53,25 +53,16 @@ def get_boxplot_stats(arr, C=1.5): } # grab the necessary data from the db - fields = [groupField, 'createddate', 'closeddate'] + fields = [groupField, '_daystoclose'] data = self.dataAccess.query(fields, filters) # read into a dataframe and drop the nulls - df = pd.DataFrame(data, columns=fields).dropna() - - # generate a new dataframe that contains the number of days it - # takes to close each request, plus the type of request - df['closeddate'] = pd.to_datetime(df['closeddate']) - df['createddate'] = pd.to_datetime(df['createddate']) - df['time-to-close'] = df['closeddate'] - df['createddate'] - df['hours-to-close'] = df['time-to-close'].astype('timedelta64[h]') - df['days-to-close'] = (df['hours-to-close'] / 24).round(2) - dtc_df = df[[groupField, 'days-to-close']] + dtc_df = pd.DataFrame(data, columns=fields).dropna() # group the requests by type and get box plot stats for each type data = dtc_df \ .groupby(by=groupField) \ - .apply(lambda df: get_boxplot_stats(df['days-to-close'].values)) \ + .apply(lambda df: get_boxplot_stats(df['_daystoclose'].values)) \ .to_dict() # if no rows exist for a particular item in the groupField,