From 3d41ad50147d48654111d2ec29923c8b586183e0 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Fri, 15 May 2020 09:40:17 -0700 Subject: [PATCH 1/3] converted /ingest endpoint to script --- server/src/app.py | 57 -------------------------------- server/src/ingest.py | 22 ++++++++++++ server/src/services/sqlIngest.py | 2 +- server/src/utils/database.py | 3 ++ 4 files changed, 26 insertions(+), 58 deletions(-) create mode 100644 server/src/ingest.py diff --git a/server/src/app.py b/server/src/app.py index 4fd1e3e68..2e5765869 100644 --- a/server/src/app.py +++ b/server/src/app.py @@ -76,63 +76,6 @@ async def index(request): return json('You hit the index') -@app.route('/ingest', methods=["GET"]) -@compress.compress() -async def ingest(request): - """ - Query parameters: - years: - a comma-separated list of years to import. - Ex. '/ingest?years=2015,2016,2017' - limit: - the max number of records per year - querySize: - the number of records per request to socrata - - Counts: - These are the counts you can expect if you do the full ingest: - - 2015: 237305 - 2016: 952486 - 2017: 1131558 - 2018: 1210075 - 2019: 1308093 - 2020: 319628 (and counting) - - GET https://data.lacity.org/resource/{ID}.json?$select=count(srnumber) - - Hint: - Run /ingest without params to get all socrata data - """ - - # parse params - defaults = app.config['Settings']['Ingestion'] - - years = request.args.get('years', defaults['YEARS']) - limit = request.args.get('limit', defaults['LIMIT']) - querySize = request.args.get('querySize', defaults['QUERY_SIZE']) - - # validate params - current_year = datetime.now().year - allowed_years = [year for year in range(2015, current_year+1)] - years = set([int(year) for year in years.split(',')]) - if not all(year in allowed_years for year in years): - return json({ - 'error': f"'years' param values must be one of {allowed_years}" - }) - - limit = int(limit) - querySize = int(querySize) - querySize = min([limit, querySize]) - - # get data - loader = DataHandler(app.config['Settings']) - data = await loader.populateDatabase(years=years, - limit=limit, - querySize=querySize) - return json(data) - - @app.route('/pin-clusters', methods=["POST"]) @compress.compress() async def pinClusters(request): diff --git a/server/src/ingest.py b/server/src/ingest.py new file mode 100644 index 000000000..627575c93 --- /dev/null +++ b/server/src/ingest.py @@ -0,0 +1,22 @@ +import os +from configparser import ConfigParser +from utils.database import db +from services.sqlIngest import DataHandler + + +if __name__ == '__main__': + config = ConfigParser() + settings_file = os.path.join(os.getcwd(), 'settings.cfg') + config.read(settings_file) + + db.config(config['Database']) + loader = DataHandler(config) + ingestion = config['Ingestion'] + + years = [int(year) for year in ingestion['YEARS'].split(',')] + limit = int(ingestion['LIMIT']) + querySize = int(ingestion['QUERY_SIZE']) + + querySize = min([limit, querySize]) + + loader.populateDatabase(years, limit, querySize) diff --git a/server/src/services/sqlIngest.py b/server/src/services/sqlIngest.py index 7f5eb4477..49a1a5aea 100644 --- a/server/src/services/sqlIngest.py +++ b/server/src/services/sqlIngest.py @@ -225,7 +225,7 @@ def createVisView(table, report): return report - async def populateDatabase(self, years=[], limit=None, querySize=None): + def populateDatabase(self, years=[], limit=None, querySize=None): log('\nPopulating database for years: {}'.format(list(years))) timer = Timer() diff --git a/server/src/utils/database.py b/server/src/utils/database.py index 6e65e3c1a..b24575754 100644 --- a/server/src/utils/database.py +++ b/server/src/utils/database.py @@ -8,6 +8,9 @@ def __init__(self, verbose=False): self.verbose = verbose def config(self, config): + if self.engine is not None: + return + self.engine = create_engine(config['DB_CONNECTION_STRING']) self.Session = sessionmaker(bind=self.engine) From 581736b844d744b0d5f0f9bfb7a0ed4382ebbbd2 Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Fri, 15 May 2020 09:48:34 -0700 Subject: [PATCH 2/3] linting --- server/src/app.py | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/app.py b/server/src/app.py index 2e5765869..d980e833e 100644 --- a/server/src/app.py +++ b/server/src/app.py @@ -13,7 +13,6 @@ from services.requestDetailService import RequestDetailService from services.visualizationsService import VisualizationsService from services.comparisonService import ComparisonService -from services.sqlIngest import DataHandler from services.feedbackService import FeedbackService from services.dataService import DataService From fe3c0485037ee0aeb16917447722722db7b25aeb Mon Sep 17 00:00:00 2001 From: Jake Mensch Date: Fri, 15 May 2020 10:17:50 -0700 Subject: [PATCH 3/3] tiny fix --- server/src/utils/database.py | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/utils/database.py b/server/src/utils/database.py index b24575754..e5079cbff 100644 --- a/server/src/utils/database.py +++ b/server/src/utils/database.py @@ -6,6 +6,7 @@ class Database(object): def __init__(self, verbose=False): self.verbose = verbose + self.engine = None def config(self, config): if self.engine is not None: