Skip to content

Commit

Permalink
Switch from python3 (CPython) to pypy3 (PyPy) for speed
Browse files Browse the repository at this point in the history
  • Loading branch information
anthonyfok committed Oct 22, 2021
1 parent 5f6340a commit 6f84f3d
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 46 deletions.
4 changes: 1 addition & 3 deletions python/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ghcr.io/opendrr/python-env
FROM ghcr.io/anthonyfok/python-env:debian-sid-20201012

LABEL maintainer="Joost van Ulden <[email protected]>"

Expand All @@ -8,8 +8,6 @@ COPY . .

#RUN chmod +x docker-entrypoint.sh && chmod +x add_data.sh
RUN chmod +x add_data.sh && \
apt-get update && \
apt-get install -y dos2unix eatmydata git git-lfs jq time && \
dos2unix add_data.sh
#ENTRYPOINT ["/usr/src/app/docker-entrypoint.sh"]
CMD ./add_data.sh
82 changes: 41 additions & 41 deletions python/add_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ post_process_mh_tables() {

copy_ancillary_tables() {
LOG '## Use python to run \copy from a system call'
RUN python3 copyAncillaryTables.py
RUN pypy3 copyAncillaryTables.py
}

post_process_all_tables_update() {
Expand Down Expand Up @@ -630,7 +630,7 @@ import_raw_psra_tables() {
# This was only needed when the cHazard data was divided by economic region
# for PT in "${PT_LIST[@]}"; do
# ( cd "cHazard/$PT"
# RUN python3 /usr/src/app/PSRA_hCurveTableCombine.py --hCurveDir="/usr/src/app/cHazard/$PT/"
# RUN pypy3 /usr/src/app/PSRA_hCurveTableCombine.py --hCurveDir="/usr/src/app/cHazard/$PT/"
# )
# done

Expand All @@ -657,7 +657,7 @@ import_raw_psra_tables() {
RUN merge_csv ebR_*avg_losses-stats_r2.csv "ebR_${PT}_avg_losses-stats_r2.csv"

# Combine source loss tables for runs that were split by economic region or sub-region
RUN python3 /usr/src/app/PSRA_combineSrcLossTable.py --srcLossDir="/usr/src/app/ebRisk/$PT"
RUN pypy3 /usr/src/app/PSRA_combineSrcLossTable.py --srcLossDir="/usr/src/app/ebRisk/$PT"
)
done
}
Expand All @@ -668,12 +668,12 @@ post_process_psra_tables() {

LOG "## PSRA_1-8"
for PT in "${PT_LIST[@]}"; do
RUN python3 PSRA_runCreate_tables.py --province="$PT" --sqlScript="psra_1.Create_tables.sql"
RUN python3 PSRA_copyTables.py --province="$PT"
RUN python3 PSRA_sqlWrapper.py --province="$PT" --sqlScript="psra_2.Create_table_updates.sql"
RUN python3 PSRA_sqlWrapper.py --province="$PT" --sqlScript="psra_3.Create_psra_building_all_indicators.sql"
RUN python3 PSRA_sqlWrapper.py --province="$PT" --sqlScript="psra_4.Create_psra_sauid_all_indicators.sql"
RUN python3 PSRA_sqlWrapper.py --province="$PT" --sqlScript="psra_5.Create_psra_sauid_references_indicators.sql"
RUN pypy3 PSRA_runCreate_tables.py --province="$PT" --sqlScript="psra_1.Create_tables.sql"
RUN pypy3 PSRA_copyTables.py --province="$PT"
RUN pypy3 PSRA_sqlWrapper.py --province="$PT" --sqlScript="psra_2.Create_table_updates.sql"
RUN pypy3 PSRA_sqlWrapper.py --province="$PT" --sqlScript="psra_3.Create_psra_building_all_indicators.sql"
RUN pypy3 PSRA_sqlWrapper.py --province="$PT" --sqlScript="psra_4.Create_psra_sauid_all_indicators.sql"
RUN pypy3 PSRA_sqlWrapper.py --province="$PT" --sqlScript="psra_5.Create_psra_sauid_references_indicators.sql"
done

RUN run_psql psra_6.Create_psra_merge_into_national_indicators.sql
Expand All @@ -698,7 +698,7 @@ import_earthquake_scenarios() {

LOG "## Importing scenario outputs into PostGIS"
for eqscenario in ${EQSCENARIO_LIST[*]}; do
RUN python3 DSRA_outputs2postgres_lfs.py --dsraModelDir=$DSRA_REPOSITORY --columnsINI=DSRA_outputs2postgres.ini --eqScenario="$eqscenario"
RUN pypy3 DSRA_outputs2postgres_lfs.py --dsraModelDir=$DSRA_REPOSITORY --columnsINI=DSRA_outputs2postgres.ini --eqScenario="$eqscenario"
done
}

Expand All @@ -720,7 +720,7 @@ import_shakemap() {
-L "$DOWNLOAD_URL"

# Run Create_table_shakemap.sql
RUN python3 DSRA_runCreateTableShakemap.py --shakemapFile="$shakemap_filename"
RUN pypy3 DSRA_runCreateTableShakemap.py --shakemapFile="$shakemap_filename"
done

# Run Create_table_shakemap_update.sql or Create_table_shakemap_update_ste.sql
Expand All @@ -736,11 +736,11 @@ import_shakemap() {
case $SITE in
s)
echo "Site Model"
RUN python3 DSRA_runCreateTableShakemapUpdate.py --eqScenario="$eqscenario" --exposureAgg="$SITE"
RUN pypy3 DSRA_runCreateTableShakemapUpdate.py --eqScenario="$eqscenario" --exposureAgg="$SITE"
;;
b)
echo "Building Model"
RUN python3 DSRA_runCreateTableShakemapUpdate.py --eqScenario="$eqscenario" --exposureAgg="$SITE"
RUN pypy3 DSRA_runCreateTableShakemapUpdate.py --eqScenario="$eqscenario" --exposureAgg="$SITE"
;;
esac
echo " " # TODO: Find out the purpose of this echo statement
Expand All @@ -749,7 +749,7 @@ import_shakemap() {

import_rupture_model() {
LOG "## Importing Rupture Model"
RUN python3 DSRA_ruptures2postgres.py --dsraRuptureDir="https://github.com/OpenDRR/earthquake-scenarios/tree/master/ruptures"
RUN pypy3 DSRA_ruptures2postgres.py --dsraRuptureDir="https://github.com/OpenDRR/earthquake-scenarios/tree/master/ruptures"

LOG "## Generating indicator views"
for item in ${EQSCENARIO_LIST_LONGFORM[*]}; do
Expand All @@ -760,14 +760,14 @@ LOG "## Generating indicator views"
case $SITE in
s)
#echo "Site Model"
RUN python3 DSRA_createRiskProfileIndicators.py --eqScenario="$eqscenario" --aggregation=site_level --exposureModel=site
RUN python3 DSRA_createRiskProfileIndicators.py --eqScenario="$eqscenario" --aggregation=building --exposureModel=site
RUN python3 DSRA_createRiskProfileIndicators.py --eqScenario="$eqscenario" --aggregation=sauid --exposureModel=site
RUN pypy3 DSRA_createRiskProfileIndicators.py --eqScenario="$eqscenario" --aggregation=site_level --exposureModel=site
RUN pypy3 DSRA_createRiskProfileIndicators.py --eqScenario="$eqscenario" --aggregation=building --exposureModel=site
RUN pypy3 DSRA_createRiskProfileIndicators.py --eqScenario="$eqscenario" --aggregation=sauid --exposureModel=site
;;
b)
#echo "Building Model"
RUN python3 DSRA_createRiskProfileIndicators.py --eqScenario="$eqscenario" --aggregation=building --exposureModel=building
RUN python3 DSRA_createRiskProfileIndicators.py --eqScenario="$eqscenario" --aggregation=sauid --exposureModel=building
RUN pypy3 DSRA_createRiskProfileIndicators.py --eqScenario="$eqscenario" --aggregation=building --exposureModel=building
RUN pypy3 DSRA_createRiskProfileIndicators.py --eqScenario="$eqscenario" --aggregation=sauid --exposureModel=building
;;
esac
done
Expand Down Expand Up @@ -801,10 +801,10 @@ export_to_elasticsearch() {
# shellcheck disable=SC2154
if [ "$loadPsraModels" = true ]; then
LOG "Creating PSRA indices in Elasticsearch"
RUN python3 psra_postgres2es.py
RUN python3 hmaps_postgres2es.py
RUN python3 uhs_postgres2es.py
RUN python3 srcLoss_postgres2es.py
RUN pypy3 psra_postgres2es.py
RUN pypy3 hmaps_postgres2es.py
RUN pypy3 uhs_postgres2es.py
RUN pypy3 srcLoss_postgres2es.py

LOG "Creating PSRA Kibana Index Patterns"
RUN curl -X POST -H "Content-Type: application/json" "${KIBANA_ENDPOINT}/s/gsc-cgc/api/saved_objects/index-pattern/opendrr_psra_indicators_s" -H "kbn-xsrf: true" -d '{ "attributes": { "title":"opendrr_psra_indicators_s"}}'
Expand All @@ -819,8 +819,8 @@ export_to_elasticsearch() {
if [[ "$loadDsraScenario" = true ]]; then
for eqscenario in ${EQSCENARIO_LIST[*]}; do
LOG "Creating Elasticsearch indexes for DSRA"
RUN python3 dsra_postgres2es.py --eqScenario="$eqscenario" --dbview="all_indicators" --idField="building"
RUN python3 dsra_postgres2es.py --eqScenario="$eqscenario" --dbview="all_indicators" --idField="sauid"
RUN pypy3 dsra_postgres2es.py --eqScenario="$eqscenario" --dbview="all_indicators" --idField="building"
RUN pypy3 dsra_postgres2es.py --eqScenario="$eqscenario" --dbview="all_indicators" --idField="sauid"

# LOG "Creating DSRA Kibana Index Patterns"
# Need to develop saved object workflow for automated index patern generation
Expand All @@ -835,7 +835,7 @@ done
# if [[ "$loadHazardThreat" = true ]]; then
# # All Indicators
# LOG "Creating Elasticsearch indexes for Hazard Threat"
# RUN python3 hazardThreat_postgres2es.py --type="all_indicators" --aggregation="sauid" --geometry=geom_poly --idField="Sauid"
# RUN pypy3 hazardThreat_postgres2es.py --type="all_indicators" --aggregation="sauid" --geometry=geom_poly --idField="Sauid"

# LOG "Creating Hazard Threat Kibana Index Patterns"
# RUN curl -X POST -H "Content-Type: application/json" "${KIBANA_ENDPOINT}/s/gsc-cgc/api/saved_objects/index-pattern/opendrr_hazard_threat_all_indicators_s" -H "kbn-xsrf: true" -d '{ "attributes": { "title":"opendrr_hazard_threat_all_indicators_s"}}'
Expand All @@ -845,8 +845,8 @@ done
# shellcheck disable=SC2154
if [[ $loadPhysicalExposure = true ]]; then
LOG "Creating Elasticsearch indexes for Physical Exposure"
RUN python3 exposure_postgres2es.py --aggregation="building" --geometry=geom_point
RUN python3 exposure_postgres2es.py --aggregation="sauid" --geometry=geom_poly
RUN pypy3 exposure_postgres2es.py --aggregation="building" --geometry=geom_point
RUN pypy3 exposure_postgres2es.py --aggregation="sauid" --geometry=geom_poly

LOG "Creating Exposure Kibana Index Patterns"
RUN curl -X POST -H "Content-Type: application/json" "${KIBANA_ENDPOINT}/s/gsc-cgc/api/saved_objects/index-pattern/opendrr_nhsl_physical_exposure_all_indicators_s" -H "kbn-xsrf: true" -d '{ "attributes": { "title":"opendrr_nhsl_physical_exposure_all_indicators_s"}}'
Expand All @@ -858,7 +858,7 @@ done
# 2021/09/21 DR - Keeping Hazard Threah and Risk Dynamics out of ES for the time being
# if [[ $loadRiskDynamics = true ]]; then
# LOG "Creating Elasticsearch indexes for Risk Dynamics"
# RUN python3 riskDynamics_postgres2es.py --type="all_indicators" --aggregation="sauid" --geometry=geom_point --idField="ghslID"
# RUN pypy3 riskDynamics_postgres2es.py --type="all_indicators" --aggregation="sauid" --geometry=geom_point --idField="ghslID"

# LOG "Creating Risk Dynamics Kibana Index Patterns"
# RUN curl -X POST -H "Content-Type: application/json" "${KIBANA_ENDPOINT}/s/gsc-cgc/api/saved_objects/index-pattern/opendrr_nhsl_risk_dynamics_all_indicators" -H "kbn-xsrf: true" -d '{ "attributes": { "title":"opendrr_nhsl_risk_dynamics_all_indicators"}}'
Expand All @@ -868,12 +868,12 @@ done
# shellcheck disable=SC2154
if [[ $loadSocialFabric = true ]]; then
LOG "Creating Elasticsearch indexes for Social Fabric"
RUN python3 socialFabric_postgres2es.py --aggregation="sauid" --geometry=geom_poly --sortfield="Sauid"
RUN python3 socialFabric_postgres2es.py --aggregation="hexgrid_5km" --geometry=geom --sortfield="gridid_5"
RUN python3 socialFabric_postgres2es.py --aggregation="hexgrid_10km" --geometry=geom --sortfield="gridid_10"
RUN python3 socialFabric_postgres2es.py --aggregation="hexgrid_25km" --geometry=geom --sortfield="gridid_25"
RUN python3 socialFabric_postgres2es.py --aggregation="hexgrid_50km" --geometry=geom --sortfield="gridid_50"
RUN python3 socialFabric_postgres2es.py --aggregation="hexgrid_100km" --geometry=geom --sortfield="gridid_100"
RUN pypy3 socialFabric_postgres2es.py --aggregation="sauid" --geometry=geom_poly --sortfield="Sauid"
RUN pypy3 socialFabric_postgres2es.py --aggregation="hexgrid_5km" --geometry=geom --sortfield="gridid_5"
RUN pypy3 socialFabric_postgres2es.py --aggregation="hexgrid_10km" --geometry=geom --sortfield="gridid_10"
RUN pypy3 socialFabric_postgres2es.py --aggregation="hexgrid_25km" --geometry=geom --sortfield="gridid_25"
RUN pypy3 socialFabric_postgres2es.py --aggregation="hexgrid_50km" --geometry=geom --sortfield="gridid_50"
RUN pypy3 socialFabric_postgres2es.py --aggregation="hexgrid_100km" --geometry=geom --sortfield="gridid_100"

LOG "Creating Social Fabric Kibana Index Patterns"
RUN curl -X POST -H "Content-Type: application/json" "${KIBANA_ENDPOINT}/s/gsc-cgc/api/saved_objects/index-pattern/opendrr_nhsl_social_fabric_all_indicators_s" -H "kbn-xsrf: true" -d '{ "attributes": { "title":"opendrr_nhsl_social_fabric_all_indicators_s"}}'
Expand All @@ -888,12 +888,12 @@ done
# shellcheck disable=SC2154
if [[ $loadHexGrid = true ]]; then
LOG "Creating Elasticsearch indexes for Hexgrids"
RUN python3 hexgrid_5km_postgres2es.py
RUN python3 hexgrid_10km_postgres2es.py
RUN python3 hexgrid_25km_postgres2es.py
RUN python3 hexgrid_50km_postgres2es.py
RUN python3 hexgrid_100km_postgres2es.py
RUN python3 hexgrid_sauid_postgres2es.py
RUN pypy3 hexgrid_5km_postgres2es.py
RUN pypy3 hexgrid_10km_postgres2es.py
RUN pypy3 hexgrid_25km_postgres2es.py
RUN pypy3 hexgrid_50km_postgres2es.py
RUN pypy3 hexgrid_100km_postgres2es.py
RUN pypy3 hexgrid_sauid_postgres2es.py

LOG "Creating HexGrid Kibana Index Patterns"
RUN curl -X POST -H "Content-Type: application/json" "${KIBANA_ENDPOINT}/s/gsc-cgc/api/saved_objects/index-pattern/opendrr_hexgrid_5km" -H "kbn-xsrf: true" -d '{ "attributes": { "title":"opendrr_hexgrid_5km"}}'
Expand Down
8 changes: 6 additions & 2 deletions python/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/python3
# =================================================================
# SPDX-License-Identifier: MIT
#
Expand All @@ -9,10 +8,15 @@
# =================================================================

import configparser
import psycopg2
import json
import decimal

import platform
if platform.python_implementation() == "PyPy":
import psycopg2cffi as psycopg2
else:
import psycopg2

from elasticsearch import Elasticsearch
from elasticsearch import helpers

Expand Down

0 comments on commit 6f84f3d

Please sign in to comment.