Skip to content

Commit

Permalink
Merge pull request #618 from hackforla/BACK-IngestionScript
Browse files Browse the repository at this point in the history
ingestion script
  • Loading branch information
jmensch1 authored May 15, 2020
2 parents 033c14c + fe3c048 commit 9f09860
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 59 deletions.
58 changes: 0 additions & 58 deletions server/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from services.requestDetailService import RequestDetailService
from services.visualizationsService import VisualizationsService
from services.comparisonService import ComparisonService
from services.sqlIngest import DataHandler
from services.feedbackService import FeedbackService
from services.dataService import DataService

Expand Down Expand Up @@ -76,63 +75,6 @@ async def index(request):
return json('You hit the index')


@app.route('/ingest', methods=["GET"])
@compress.compress()
async def ingest(request):
"""
Query parameters:
years:
a comma-separated list of years to import.
Ex. '/ingest?years=2015,2016,2017'
limit:
the max number of records per year
querySize:
the number of records per request to socrata
Counts:
These are the counts you can expect if you do the full ingest:
2015: 237305
2016: 952486
2017: 1131558
2018: 1210075
2019: 1308093
2020: 319628 (and counting)
GET https://data.lacity.org/resource/{ID}.json?$select=count(srnumber)
Hint:
Run /ingest without params to get all socrata data
"""

# parse params
defaults = app.config['Settings']['Ingestion']

years = request.args.get('years', defaults['YEARS'])
limit = request.args.get('limit', defaults['LIMIT'])
querySize = request.args.get('querySize', defaults['QUERY_SIZE'])

# validate params
current_year = datetime.now().year
allowed_years = [year for year in range(2015, current_year+1)]
years = set([int(year) for year in years.split(',')])
if not all(year in allowed_years for year in years):
return json({
'error': f"'years' param values must be one of {allowed_years}"
})

limit = int(limit)
querySize = int(querySize)
querySize = min([limit, querySize])

# get data
loader = DataHandler(app.config['Settings'])
data = await loader.populateDatabase(years=years,
limit=limit,
querySize=querySize)
return json(data)


@app.route('/pin-clusters', methods=["POST"])
@compress.compress()
async def pinClusters(request):
Expand Down
22 changes: 22 additions & 0 deletions server/src/ingest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os
from configparser import ConfigParser
from utils.database import db
from services.sqlIngest import DataHandler


if __name__ == '__main__':
config = ConfigParser()
settings_file = os.path.join(os.getcwd(), 'settings.cfg')
config.read(settings_file)

db.config(config['Database'])
loader = DataHandler(config)
ingestion = config['Ingestion']

years = [int(year) for year in ingestion['YEARS'].split(',')]
limit = int(ingestion['LIMIT'])
querySize = int(ingestion['QUERY_SIZE'])

querySize = min([limit, querySize])

loader.populateDatabase(years, limit, querySize)
2 changes: 1 addition & 1 deletion server/src/services/sqlIngest.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def createVisView(table, report):

return report

async def populateDatabase(self, years=[], limit=None, querySize=None):
def populateDatabase(self, years=[], limit=None, querySize=None):
log('\nPopulating database for years: {}'.format(list(years)))
timer = Timer()

Expand Down
4 changes: 4 additions & 0 deletions server/src/utils/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@
class Database(object):
def __init__(self, verbose=False):
self.verbose = verbose
self.engine = None

def config(self, config):
if self.engine is not None:
return

self.engine = create_engine(config['DB_CONNECTION_STRING'])
self.Session = sessionmaker(bind=self.engine)

Expand Down

0 comments on commit 9f09860

Please sign in to comment.