diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml index ecbe59578..af6a17b21 100644 --- a/data/data-pipeline/data_pipeline/content/config/csv.yml +++ b/data/data-pipeline/data_pipeline/content/config/csv.yml @@ -256,4 +256,13 @@ fields: format: bool - score_name: Percent of population not currently enrolled in college or graduate school label: Percent of residents who are not currently enrolled in higher ed - format: percentage \ No newline at end of file + format: percentage + - score_name: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income? + label: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income? + format: bool + - score_name: Leaky underground storage tanks (percentile) + label: Leaky underground storage tanks (percentile) + format: percentage + - score_name: Leaky underground storage tanks + label: Leaky underground storage tanks + format: float \ No newline at end of file diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml index cee48b566..f983ded46 100644 --- a/data/data-pipeline/data_pipeline/content/config/excel.yml +++ b/data/data-pipeline/data_pipeline/content/config/excel.yml @@ -153,12 +153,21 @@ sheets: - score_name: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students? label: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students? format: bool + - score_name: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income? + label: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income? + format: bool - score_name: Wastewater discharge (percentile) label: Wastewater discharge (percentile) format: percentage + - score_name: Leaky underground storage tanks (percentile) + label: Leaky underground storage tanks (percentile) + format: percentage - score_name: Wastewater discharge label: Wastewater discharge format: float + - score_name: Leaky underground storage tanks + label: Leaky underground storage tanks + format: float - score_name: Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students? label: Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students? format: bool diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 59075dbda..94eb77a33 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -195,6 +195,8 @@ + field_names.PERCENTILE_FIELD_SUFFIX: "UF_PFS", field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS", + field_names.UST_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX: "UST_PFS", field_names.M_WATER: "M_WTR", field_names.M_WORKFORCE: "M_WKFC", field_names.M_CLIMATE: "M_CLT", @@ -220,6 +222,7 @@ field_names.SUPERFUND_LOW_INCOME_LOW_HIGHER_ED_FIELD: "SFLI", field_names.HAZARDOUS_WASTE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HWLI", field_names.WASTEWATER_DISCHARGE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "WDLI", + field_names.UST_LOW_INCOME_FIELD: "USTLI", field_names.DIABETES_LOW_INCOME_LOW_HIGHER_ED_FIELD: "DLI", field_names.ASTHMA_LOW_INCOME_LOW_HIGHER_ED_FIELD: "ALI", field_names.HEART_DISEASE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HDLI", @@ -242,6 +245,7 @@ field_names.NPL_PCTILE_THRESHOLD: "NPL_ET", field_names.TSDF_PCTILE_THRESHOLD: "TSDF_ET", field_names.WASTEWATER_PCTILE_THRESHOLD: "WD_ET", + field_names.UST_PCTILE_THRESHOLD: "UST_ET", field_names.DIABETES_PCTILE_THRESHOLD: "DB_ET", field_names.ASTHMA_PCTILE_THRESHOLD: "A_ET", field_names.HEART_DISEASE_PCTILE_THRESHOLD: "HD_ET", diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index c736e3fc8..4be007916 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -446,6 +446,7 @@ def _prepare_initial_df(self) -> pd.DataFrame: field_names.NPL_FIELD, field_names.WASTEWATER_FIELD, field_names.LEAD_PAINT_FIELD, + field_names.UST_FIELD, field_names.UNDER_5_FIELD, field_names.OVER_64_FIELD, field_names.LINGUISTIC_ISO_FIELD, diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 04bc62e4f..5b7a88af7 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -170,7 +170,7 @@ NPL_FIELD = "Proximity to NPL sites" AIR_TOXICS_CANCER_RISK_FIELD = "Air toxics cancer risk" RESPIRATORY_HAZARD_FIELD = "Respiratory hazard index" -UST_FIELD = "Underground storage tanks" +UST_FIELD = "Leaky underground storage tanks" LOW_INCOME_THRESHOLD = "Exceeds FPL200 threshold" @@ -430,6 +430,8 @@ # Critical Clean Water and Waste Infrastructure WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for wastewater discharge and is low income?" +UST_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for leaky underground storage tanks and is low income?" + # Health Burdens DIABETES_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for diabetes and is low income?" @@ -629,6 +631,8 @@ NPL_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for NPL (superfund sites) proximity" TSDF_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for proximity to hazardous waste sites" WASTEWATER_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for wastewater discharge" +UST_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for leaky underwater storage tanks" + DIABETES_PCTILE_THRESHOLD = ( f"Greater than or equal to the {PERCENTILE}th percentile for diabetes" ) diff --git a/data/data-pipeline/data_pipeline/score/score_narwhal.py b/data/data-pipeline/data_pipeline/score/score_narwhal.py index bea5cbed0..081b591bd 100644 --- a/data/data-pipeline/data_pipeline/score/score_narwhal.py +++ b/data/data-pipeline/data_pipeline/score/score_narwhal.py @@ -443,23 +443,42 @@ def _water_factor(self) -> bool: ] >= self.ENVIRONMENTAL_BURDEN_THRESHOLD ) - - # Straight copy here in case we add additional water fields. - self.df[field_names.WATER_THRESHOLD_EXCEEDED] = self.df[ - field_names.WASTEWATER_PCTILE_THRESHOLD - ].copy() + self.df[field_names.UST_PCTILE_THRESHOLD] = ( + self.df[field_names.UST_FIELD + field_names.PERCENTILE_FIELD_SUFFIX] + >= self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD] = ( self.df[field_names.WASTEWATER_PCTILE_THRESHOLD] & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] ) + self.df[field_names.UST_LOW_INCOME_FIELD] = ( + self.df[field_names.UST_PCTILE_THRESHOLD] + & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] + ) + + self.df[field_names.WATER_THRESHOLD_EXCEEDED] = self.df[ + [ + field_names.WASTEWATER_PCTILE_THRESHOLD, + field_names.UST_PCTILE_THRESHOLD, + ] + ].max(axis=1) + self._increment_total_eligibility_exceeded( - [field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD], + [ + field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD, + field_names.UST_LOW_INCOME_FIELD, + ], skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS, ) - return self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD] + return self.df[ + [ + field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD, + field_names.UST_LOW_INCOME_FIELD, + ] + ].any(axis=1) def _health_factor(self) -> bool: # In Xth percentile or above for diabetes (Source: CDC Places)