Skip to content

Commit

Permalink
Merge pull request #392 from hackforla/dev
Browse files Browse the repository at this point in the history
Updating master with latest dev
  • Loading branch information
sellnat77 authored Mar 9, 2020
2 parents 403e5de + 1fd966a commit 3525807
Show file tree
Hide file tree
Showing 22 changed files with 421 additions and 172 deletions.
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/blank-issue.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ assignees: ''

---

#Description
# Description

## Action Items

Expand Down
14 changes: 14 additions & 0 deletions .github/workflows/Publish_Backend_Package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,17 @@ jobs:
dockerfile: server/Dockerfile
context: server
tags: "latest, ${{github.sha}}"
- name: Login to heroku
env:
HEROKU_API_KEY: ${{ secrets.HEROKU_API_KEY }}
run: heroku container:login
- name: Build and push heroku
env:
HEROKU_API_KEY: ${{ secrets.HEROKU_API_KEY }}
run: |
docker tag docker.pkg.github.com/hackforla/311-data/backend:${{github.sha}} registry.heroku.com/hackforla-311-data/web
docker push registry.heroku.com/hackforla-311-data/web
- name: Release
env:
HEROKU_API_KEY: ${{ secrets.HEROKU_API_KEY }}
run: heroku container:release -a hackforla-311-data web
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ Empower Neighborhood Associations to Improve analysis of their initiatives using
* Open Source
* Kanban

## UI/UX Technologies
* Figma
* Google Drive
* Adobe CC
* Miro

## 🎉🎉 Engineers start [here](https://github.com/hackforla/311-data/blob/master/GETTING_STARTED.md)!!!! 🎉🎉🎉

## Resources
Expand Down
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,16 @@
"html-webpack-plugin": "^3.2.0",
"jest": "^24.9.0",
"leaflet": "^1.5.1",
"leaflet.markercluster": "^1.4.1",
"moment": "^2.24.0",
"proptypes": "^1.1.0",
"react": "^16.8.6",
"react-burger-menu": "^2.6.13",
"react-datepicker": "^2.12.1",
"react-dom": "^16.8.6",
"react-leaflet": "^2.4.0",
"react-leaflet-choropleth": "^2.0.0",
"react-leaflet-heatmap-layer": "^2.0.0",
"react-leaflet-markercluster": "^2.0.0-rc3",
"react-redux": "^7.1.3",
"react-test-renderer": "^16.12.0",
"react-tooltip": "^4.0.3",
Expand Down
1 change: 1 addition & 0 deletions public/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
<link rel="stylesheet" href="https://unpkg.com/[email protected]/dist/leaflet.css"
integrity="sha512-xwE/Az9zrjBIphAcBb3F6JVqxf46+CDLwfLMHloNu6KEQCAWi6HcDUbeOfBIptF7tcCzusKFjFw2yuvEpDL9wQ=="
crossorigin=""/>
<link rel="stylesheet" href="https://unpkg.com/react-leaflet-markercluster/dist/styles.min.css" />
<link href="https://fonts.googleapis.com/css?family=Open+Sans:700|Roboto&display=swap" rel="stylesheet">
<title>311 Data</title>
</head>
Expand Down
88 changes: 88 additions & 0 deletions server/311-Data.postman_collection.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{
"info": {
"_postman_id": "16e2823e-b118-4cbe-8007-1dc4119111cd",
"name": "311-Data",
"schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
},
"item": [
{
"name": "Bulk_Ingest",
"request": {
"method": "POST",
"header": [
{
"key": "Content-Type",
"name": "Content-Type",
"value": "application/json",
"type": "text"
}
],
"body": {
"mode": "raw",
"raw": "",
"options": {
"raw": {
"language": "json"
}
}
},
"url": {
"raw": "localhost:5000/ingest?years=2019&limit=10000",
"host": [
"localhost"
],
"port": "5000",
"path": [
"ingest"
],
"query": [
{
"key": "years",
"value": "2019"
},
{
"key": "limit",
"value": "10000"
}
]
}
},
"response": []
},
{
"name": "Pinmap",
"request": {
"method": "POST",
"header": [
{
"key": "Content-Type",
"name": "Content-Type",
"value": "application/json",
"type": "text"
}
],
"body": {
"mode": "raw",
"raw": "{\n\t\"startDate\":\"2017-01-01\",\n\t\"endDate\":\"2019-12-31\",\n\t\"ncList\": [\"SUNLAND-TUJUNGA NC\", \"HISTORIC HIGHLAND PARK NC\", \"ALL\"],\n\t\"requestTypes\":[\"Homeless Encampment\"]\n}",
"options": {
"raw": {
"language": "json"
}
}
},
"url": {
"raw": "localhost:5000/pins",
"host": [
"localhost"
],
"port": "5000",
"path": [
"pins"
]
}
},
"response": []
}
],
"protocolProfileBehavior": {}
}
3 changes: 2 additions & 1 deletion server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ RUN apt-get update && apt-get install -yq \
gfortran musl-dev

ENV DB_CONNECTION_STRING=REDACTED
ENV PORT=5000
COPY requirements.txt .

RUN pip install --no-cache-dir -r requirements.txt
Expand All @@ -13,6 +14,6 @@ COPY src/ /app

WORKDIR /app

EXPOSE 5000
EXPOSE $PORT

CMD ["python", "app.py"]
19 changes: 15 additions & 4 deletions server/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,22 @@
compress = Compress()


def environment_overrides():
if os.environ.get('DB_CONNECTION_STRING', None):
app.config['Settings']['Database']['DB_CONNECTION_STRING'] =\
os.environ.get('DB_CONNECTION_STRING')
if os.environ.get('PORT', None):
app.config['Settings']['Server']['PORT'] =\
os.environ.get('PORT')


def configure_app():
# Settings initialization
config = ConfigParser()
settings_file = os.path.join(os.getcwd(), 'settings.cfg')
config.read(settings_file)
app.config['Settings'] = config
if os.environ.get('DB_CONNECTION_STRING', None):
app.config['Settings']['Database']['DB_CONNECTION_STRING'] =\
os.environ.get('DB_CONNECTION_STRING')
environment_overrides()
app.config["STATIC_DIR"] = os.path.join(os.getcwd(), "static")
os.makedirs(os.path.join(app.config["STATIC_DIR"], "temp"), exist_ok=True)

Expand Down Expand Up @@ -83,6 +90,8 @@ async def ingest(request):
Ex. '/ingest?years=2015,2016,2017'
"""
current_year = datetime.now().year
querySize = request.args.get("querySize", None)
limit = request.args.get("limit", None)
ALLOWED_YEARS = [year for year in range(2015, current_year+1)]
if not request.args.get("years"):
return json({"error": "'years' parameter is required."})
Expand All @@ -91,7 +100,9 @@ async def ingest(request):
return json({"error":
f"'years' param values must be one of {ALLOWED_YEARS}"})
loader = DataHandler(app.config['Settings'])
loader.populateFullDatabase(yearRange=years)
loader.populateFullDatabase(yearRange=years,
querySize=querySize,
limit=limit)
return_data = {'response': 'ingest ok'}
return json(return_data)

Expand Down
4 changes: 2 additions & 2 deletions server/src/services/databaseOrm.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,15 @@ class Ingest(Base):
'closeddate': String(30),
'addressverified': String(16),
'approximateaddress': String(20),
'address': String(100),
'address': String(250),
'housenumber': String(10),
'direction': String(10),
'streetname': String(50),
'suffix': String(10),
'zipcode': Integer,
'latitude': Float,
'longitude': Float,
'location': String(100),
'location': String(250),
'tbmpage': Integer,
'tbmcolumn': String(10),
'tbmrow': Float,
Expand Down
59 changes: 36 additions & 23 deletions server/src/services/sqlIngest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def __init__(self, config=None, configFilePath=None, separator=','):
else self.config['Database']['DB_CONNECTION_STRING']
self.token = None if config['Socrata']['TOKEN'] == 'None' \
else config['Socrata']['TOKEN']
self.timeout = int(self.config['Socrata']['TIMEOUT'])
self.filePath = None
self.configFilePath = configFilePath
self.separator = separator
Expand Down Expand Up @@ -56,9 +57,12 @@ def cleanData(self):
zipIndex = (data['zipcode'].str.isdigit()) | (data['zipcode'].isna())
data['zipcode'].loc[~zipIndex] = np.nan
# Format dates as datetime (Time intensive)
data['createddate'] = pd.to_datetime(data['createddate'])
data['closeddate'] = pd.to_datetime(data['closeddate'])
data['servicedate'] = pd.to_datetime(data['servicedate'])
if 'createddate' in data.columns:
data['createddate'] = pd.to_datetime(data['createddate'])
if 'closeddate' in data.columns:
data['closeddate'] = pd.to_datetime(data['closeddate'])
if 'servicedate' in data.columns:
data['servicedate'] = pd.to_datetime(data['servicedate'])
data['location'] = data.location.astype(str)
# Check for column consistency
for f in self.fields:
Expand Down Expand Up @@ -93,6 +97,7 @@ def ingestData(self, ingestMethod='replace',
schema='public',
index=False,
chunksize=10,
method='multi',
dtype=self.insertParams)
print('\tIngest Complete: %.1f minutes' %
self.elapsedTimer(ingestTimer))
Expand All @@ -118,34 +123,43 @@ def saveCsvFile(self, filename):
'''Save contents of self.data to CSV output'''
self.data.to_csv(filename, index=False)

def fetchSocrata(self, year=2019, querySize=20000, pageSize=20000):
def fetchSocrata(self,
year=2019,
querySize=10000,
totalRequestRecords=10**7):
'''Fetch data from Socrata connection and return pandas dataframe'''
# Load config files
print('Retrieving partial Socrata query...')
fetchTimer = time.time()
socrata_domain = self.config['Socrata']['DOMAIN']
socrata_dataset_identifier = self.config['Socrata']['AP' + str(year)]
socrata_token = self.token
# Establish connection to Socrata resource
client = Socrata(socrata_domain, socrata_token)
client.timeout = self.timeout
# Fetch data
# Loop for querying dataset
queryDf = None
for i in range(0, querySize, pageSize):
# print(i + pageSize)
tableInit = False
for i in range(0, totalRequestRecords, querySize):
fetchTimer = time.time()
print('Fetching %d records with offset %d up to a max of %d'
% (querySize, i, totalRequestRecords))
results = client.get(socrata_dataset_identifier,
offset=i,
select="*",
order="updateddate DESC",
limit=querySize)
if not results:
break
tempDf = pd.DataFrame.from_dict(results)
if queryDf is None:
queryDf = tempDf.copy()
self.data = tempDf
self.cleanData()
if not tableInit:
self.ingestData(ingestMethod='replace')
tableInit = True
else:
queryDf = queryDf.append(tempDf)
self.data = queryDf
print('%d records retrieved in %.2f minutes' %
(self.data.shape[0], self.elapsedTimer(fetchTimer)))
self.ingestData(ingestMethod='append')
print('%d records retrieved in %.2f minutes' %
(self.data.shape[0], self.elapsedTimer(fetchTimer)))

def fetchSocrataFull(self, year=2019, limit=10**7):
'''Fetch entirety of dataset via Socrata'''
Expand All @@ -162,22 +176,21 @@ def fetchSocrataFull(self, year=2019, limit=10**7):
print('\tDownload Complete: %.1f minutes' %
self.elapsedTimer(downloadTimer))

def populateFullDatabase(self, yearRange=range(2015, 2021)):
def populateFullDatabase(self,
yearRange=range(2015, 2021),
querySize=None,
limit=None):
'''Fetches all data from Socrata to populate database
Default operation is to fetch data from 2015-2020
!!! Be aware that each fresh import will wipe the
existing staging table'''
print('Performing {} population from data source'.format(self.dialect))
tableInit = False
globalTimer = time.time()
for y in yearRange:
self.fetchSocrataFull(year=y)
self.cleanData()
if not tableInit:
self.ingestData(ingestMethod='replace')
tableInit = True
else:
self.ingestData(ingestMethod='append')
self.fetchSocrata(year=y,
querySize=querySize,
totalRequestRecords=limit)

print('All Operations Complete: %.1f minutes' %
self.elapsedTimer(globalTimer))

Expand Down
1 change: 1 addition & 0 deletions server/src/settings.example.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ REDACTED = REDACTED

[Socrata]
TOKEN = None
TIMEOUT = 90
DOMAIN = data.lacity.org
AP2020 = rq3b-xjk8
AP2019 = pvft-t768
Expand Down
Loading

0 comments on commit 3525807

Please sign in to comment.