diff --git a/server/src/services/databaseOrm.py b/server/src/services/databaseOrm.py index 22ac385d2..20a67227d 100644 --- a/server/src/services/databaseOrm.py +++ b/server/src/services/databaseOrm.py @@ -43,40 +43,40 @@ class Ingest(Base): policeprecinct = Column(String) -insertFields = {'srnumber': String, +insertFields = {'srnumber': String(50), 'createddate': DateTime, 'updateddate': DateTime, - 'actiontaken': String, - 'owner': String, - 'requesttype': String, - 'status': String, - 'requestsource': String, - 'createdbyuserorganization': String, - 'mobileos': String, - 'anonymous': String, - 'assignto': String, - 'servicedate': String, - 'closeddate': String, - 'addressverified': String, - 'approximateaddress': String, - 'address': String, - 'housenumber': String, - 'direction': String, - 'streetname': String, - 'suffix': String, + 'actiontaken': String(30), + 'owner': String(10), + 'requesttype': String(30), + 'status': String(20), + 'requestsource': String(30), + 'createdbyuserorganization': String(16), + 'mobileos': String(10), + 'anonymous': String(10), + 'assignto': String(20), + 'servicedate': String(30), + 'closeddate': String(30), + 'addressverified': String(16), + 'approximateaddress': String(20), + 'address': String(100), + 'housenumber': String(10), + 'direction': String(10), + 'streetname': String(30), + 'suffix': String(6), 'zipcode': Integer, 'latitude': Float, 'longitude': Float, - 'location': String, + 'location': String(100), 'tbmpage': Integer, - 'tbmcolumn': String, + 'tbmcolumn': String(10), 'tbmrow': Float, - 'apc': String, + 'apc': String(30), 'cd': Float, - 'cdmember': String, + 'cdmember': String(30), 'nc': Float, - 'ncname': String, - 'policeprecinct': String} + 'ncname': String(100), + 'policeprecinct': String(30)} readFields = {'SRNumber': str, diff --git a/server/src/services/sqlIngest.py b/server/src/services/sqlIngest.py index 2bbd340b9..5462ccf0f 100644 --- a/server/src/services/sqlIngest.py +++ b/server/src/services/sqlIngest.py @@ -5,7 +5,7 @@ import numpy as np from sodapy import Socrata import time -from . import databaseOrm # Contains database specs and field definitions +import databaseOrm # Contains database specs and field definitions class DataHandler: @@ -20,6 +20,7 @@ def __init__(self, config=None, configFilePath=None, separator=','): self.fields = databaseOrm.tableFields self.insertParams = databaseOrm.insertFields self.readParams = databaseOrm.readFields + self.dialect = None def loadConfig(self, configFilePath): '''Load and parse config data''' @@ -33,6 +34,7 @@ def loadConfig(self, configFilePath): config.read(configFilePath) self.config = config self.dbString = config['Database']['DB_CONNECTION_STRING'] + self.dialect = self.dbString.split(':')[0] self.token = None if config['Socrata']['TOKEN'] == 'None' \ else config['Socrata']['TOKEN'] @@ -84,19 +86,22 @@ def cleanData(self): def ingestData(self, ingestMethod='replace'): '''Set up connection to database''' - print('Inserting data into Postgres instance...') + asdf = 'Inserting data into ' + self.dialect + ' instance...' + print(asdf) ingestTimer = time.time() data = self.data.copy() # shard deepcopy for other endpoint operations engine = db.create_engine(self.dbString) newColumns = [column.replace(' ', '_').lower() for column in data] data.columns = newColumns # Ingest data + # Schema is same as database in MySQL; + # schema here is set to db name in connection string data.to_sql("ingest_staging_table", engine, if_exists=ingestMethod, schema='public', index=False, - chunksize=10000, + chunksize=10, dtype=self.insertParams) print('\tIngest Complete: %.1f minutes' % self.elapsedTimer(ingestTimer)) @@ -171,7 +176,7 @@ def populateFullDatabase(self, yearRange=range(2015, 2021)): Default operation is to fetch data from 2015-2020 !!! Be aware that each fresh import will wipe the existing staging table''' - print('Performing fresh Postgres population from Socrata data sources') + print('Performing fresh ' + self.dialect + ' population from Socrata data sources') tableInit = False globalTimer = time.time() for y in yearRange: @@ -238,11 +243,11 @@ def fix_nan_vals(resultDict): '''Class DataHandler workflow from initial load to SQL population''' loader = DataHandler() loader.loadConfig(configFilePath='../settings.cfg') - loader.fetchSocrataFull(limit=10000) + loader.fetchSocrataFull() loader.cleanData() loader.ingestData() - loader.saveCsvFile('testfile.csv') - loader.dumpFilteredCsvFile(dataset="", - startDate='2018-05-01', - requestType='Bulky Items', - councilName='VOICES OF 90037') + # loader.saveCsvFile('testfile.csv') + # loader.dumpFilteredCsvFile(dataset="", + # startDate='2018-05-01', + # requestType='Bulky Items', + # councilName='VOICES OF 90037') diff --git a/server/src/settings.example.cfg b/server/src/settings.example.cfg index 3cb142558..14ec194c8 100644 --- a/server/src/settings.example.cfg +++ b/server/src/settings.example.cfg @@ -4,7 +4,7 @@ HOST = 0.0.0.0 PORT = 5000 [Database] -DB_CONNECTION_STRING = postgres://REDACTED:REDACTED@localhost:5432/postgres +DB_CONNECTION_STRING = mysql://REDACTED:REDACTED@localhost:5432/public DATA_DIRECTORY = static [Api]