Skip to content

Commit

Permalink
Emma nechamkin/holc patch (#1742)
Browse files Browse the repository at this point in the history
Removing HOLC calculation from score narwhal.
  • Loading branch information
emma-nechamkin authored Jul 15, 2022
1 parent bd395bb commit 859e1ca
Showing 1 changed file with 27 additions and 23 deletions.
50 changes: 27 additions & 23 deletions data/data-pipeline/data_pipeline/score/score_narwhal.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,16 +314,18 @@ def _housing_factor(self) -> bool:
housing_eligibility_columns = [
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED_LOW_INCOME_FIELD,
# Until we get confirmation -- NOT included
# Updated in a patch
# field_names.HISTORIC_REDLINING_SCORE_EXCEEDED_LOW_INCOME_FIELD,
]

# design question -- should read in scalar with threshold here instead?
self.df[
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED_LOW_INCOME_FIELD
] = (
self.df[field_names.HISTORIC_REDLINING_SCORE_EXCEEDED]
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
)
# # design question -- should read in scalar with threshold here instead?
# self.df[
# field_names.HISTORIC_REDLINING_SCORE_EXCEEDED_LOW_INCOME_FIELD
# ] = (
# self.df[field_names.HISTORIC_REDLINING_SCORE_EXCEEDED]
# & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
# )

self.df[field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD] = (
self.df[
Expand Down Expand Up @@ -639,29 +641,27 @@ def _workforce_factor(self) -> bool:
)
| (
self.df[field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD]
& (self.df[field_names.GEOID_TRACT_FIELD].str[:2] != constants.TILES_PUERTO_RICO_FIPS_CODE[0] )
& (
self.df[field_names.GEOID_TRACT_FIELD].str[:2]
!= constants.TILES_PUERTO_RICO_FIPS_CODE[0]
)
)
)

# Use only PR combined criteria for rows with PR FIPS code;
# otherwise use all criteria.
workforce_combined_criteria_for_states = (
(
(
self.df[field_names.GEOID_TRACT_FIELD].str[:2] == constants.TILES_PUERTO_RICO_FIPS_CODE[0]
)
&
self.df[pr_workforce_eligibility_columns].any(axis="columns")
self.df[field_names.GEOID_TRACT_FIELD].str[:2]
== constants.TILES_PUERTO_RICO_FIPS_CODE[0]
)
|
& self.df[pr_workforce_eligibility_columns].any(axis="columns")
) | (
(
(
self.df[field_names.GEOID_TRACT_FIELD].str[:2] != constants.TILES_PUERTO_RICO_FIPS_CODE[0]
)
& self.df[
workforce_eligibility_columns
].any(axis="columns")
self.df[field_names.GEOID_TRACT_FIELD].str[:2]
!= constants.TILES_PUERTO_RICO_FIPS_CODE[0]
)
& self.df[workforce_eligibility_columns].any(axis="columns")
)

self._increment_total_eligibility_exceeded(
Expand Down Expand Up @@ -793,7 +793,10 @@ def _workforce_factor(self) -> bool:
)
| (
self.df[field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD]
& ( self.df[field_names.GEOID_TRACT_FIELD].str[:2] != constants.TILES_PUERTO_RICO_FIPS_CODE[0] )
& (
self.df[field_names.GEOID_TRACT_FIELD].str[:2]
!= constants.TILES_PUERTO_RICO_FIPS_CODE[0]
)
)
) | (
## then we calculate just for the island areas
Expand Down Expand Up @@ -859,7 +862,8 @@ def add_columns(self) -> pd.DataFrame:
self.df[field_names.CATEGORY_COUNT] = self.df[factors].sum(axis=1)
self.df[field_names.SCORE_N_COMMUNITIES] = self.df[factors].any(axis=1)
self.df[
field_names.SCORE_N_COMMUNITIES + field_names.PERCENTILE_FIELD_SUFFIX
field_names.SCORE_N_COMMUNITIES
+ field_names.PERCENTILE_FIELD_SUFFIX
] = self.df[field_names.SCORE_N_COMMUNITIES].astype(int)

return self.df

0 comments on commit 859e1ca

Please sign in to comment.