diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 7d218eaca..3bfad7ed3 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -394,9 +394,10 @@ field_names.PERCENT_AGE_UNDER_10: "AGE_10", field_names.PERCENT_AGE_10_TO_64: "AGE_MIDDLE", field_names.PERCENT_AGE_OVER_64: "AGE_OLD", - field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT: "TA_COUNT", + field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK: "TA_COUNT_AK", + field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS: "TA_COUNT_C", field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT: "TA_PERC", - field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING: "TA_PERC_STR", + field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY: "TA_PERC_FE", } # columns to round floats to 2 decimals diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index e257ee1f4..aec33aa9f 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -488,7 +488,9 @@ def _prepare_initial_df(self) -> pd.DataFrame: field_names.PERCENT_AGE_10_TO_64, field_names.PERCENT_AGE_OVER_64, field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT, - field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT, + field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK, + field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS, + field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY, ] + self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS non_numeric_columns = [ @@ -496,7 +498,6 @@ def _prepare_initial_df(self) -> pd.DataFrame: field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD, field_names.AGRICULTURAL_VALUE_BOOL_FIELD, field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT, - field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING, ] boolean_columns = [ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv index 6a7cd9e5a..4ab78f23a 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv +++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv @@ -1,3 +1,3 @@ -GEOID10_TRACT,Does the tract have at least 35 acres in it?,Contains agricultural value,Names of Tribal areas within Census tract,"Percent of the Census tract that is within Tribal areas, for display",Housing burden (percent),Share of homes with no kitchen or indoor plumbing (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged greater than or equal to 18 years,Coronary heart disease among adults aged greater than or equal to 18 years,Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged greater than or equal to 18 years,Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals below 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployment (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Percent of population not currently enrolled in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployment (percent) in 2009,Unemployment (percent) in 2010,Percent of individuals less than 100% Federal Poverty Line in 2010,Total population in 2009,Leaky underground storage tanks,DOT Travel Barriers Score,Share of properties at risk of flood in 30 years,Share of properties at risk of fire in 30 years,Share of the tract's land area that is covered by impervious surface or cropland as a percent,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted",Percent Black or African American,Percent American Indian / Alaska Native,Percent Asian,Percent Native Hawaiian or Pacific,Percent two or more races,Percent White,Percent Hispanic or Latino,Percent other races,Percent age under 10,Percent age 10 to 64,Percent age over 64,Percent of the Census tract that is within Tribal areas,Number of Tribal areas within Census tract,Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Is there at least one abandoned mine in this census tract?,Income data has been estimated based on neighbor income,Is there at least one Formerly Used Defense Site (FUDS) in the tract?,Tract-level redlining score meets or exceeds 3.25,Housing burden (percent) (percentile),Share of homes with no kitchen or indoor plumbing (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged greater than or equal to 18 years (percentile),Coronary heart disease among adults aged greater than or equal to 18 years (percentile),Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile),Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals below 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income in the past 12 months (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Urban Heuristic Flag (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter exposure (percentile),PM2.5 in the air (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to hazardous waste sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployment (percent) (percentile),Median value ($) of owner-occupied housing units (percentile),Percent enrollment in college or graduate school (percentile),Percent of population not currently enrolled in college or graduate school (percentile),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (percentile),Unemployment (percent) in 2009 (percentile),Unemployment (percent) in 2010 (percentile),Percent of individuals less than 100% Federal Poverty Line in 2010 (percentile),Total population in 2009 (percentile),Leaky underground storage tanks (percentile),DOT Travel Barriers Score (percentile),Share of properties at risk of flood in 30 years (percentile),Share of properties at risk of fire in 30 years (percentile),Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile),"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Percent Black or African American (percentile),Percent American Indian / Alaska Native (percentile),Percent Asian (percentile),Percent Native Hawaiian or Pacific (percentile),Percent two or more races (percentile),Percent White (percentile),Percent Hispanic or Latino (percentile),Percent other races (percentile),Percent age under 10 (percentile),Percent age 10 to 64 (percentile),Percent age over 64 (percentile),Percent of the Census tract that is within Tribal areas (percentile),Number of Tribal areas within Census tract (percentile),Percent Black or African American in 2009 (percentile),Percent American Indian / Alaska Native in 2009 (percentile),Percent Asian in 2009 (percentile),Percent Native Hawaiian or Pacific in 2009 (percentile),Percent two or more races in 2009 (percentile),Percent White in 2009 (percentile),Percent Hispanic or Latino in 2009 (percentile),Percent other races in 2009 (percentile),Low median household income as a percent of area median income (percentile),Low life expectancy (percentile),Low median household income as a percent of territory median income in 2009 (percentile),Total population in 2009 (island areas) and 2019 (states and PR),Total threshold criteria exceeded,Is low income (imputed and adjusted)?,Greater than or equal to the 90th percentile for expected population loss,Greater than or equal to the 90th percentile for expected agricultural loss,Greater than or equal to the 90th percentile for expected building loss,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years,At least one climate threshold exceeded,Greater than or equal to the 90th percentile for expected population loss rate and is low income?,Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?,Greater than or equal to the 90th percentile for expected building loss rate and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?,Climate Factor (Definition N),Greater than or equal to the 90th percentile for energy burden,Greater than or equal to the 90th percentile for pm2.5 exposure,At least one energy threshold exceeded,Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?,Greater than or equal to the 90th percentile for energy burden and is low income?,Energy Factor (Definition N),Greater than or equal to the 90th percentile for diesel particulate matter,Greater than or equal to the 90th percentile for DOT travel barriers,Greater than or equal to the 90th percentile for traffic proximity,At least one traffic threshold exceeded,Greater than or equal to the 90th percentile for diesel particulate matter and is low income?,Greater than or equal to the 90th percentile for traffic proximity and is low income?,Greater than or equal to the 90th percentile for DOT transit barriers and is low income?,Transportation Factor (Definition N),Tract-level redlining score meets or exceeds 3.25 and is low income,Greater than or equal to the 90th percentile for share of homes without indoor plumbing or a kitchen,Greater than or equal to the 90th percentile for share of homes with no kitchen or indoor plumbing and is low income?,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?,Greater than or equal to the 90th percentile for housing burden,Greater than or equal to the 90th percentile for housing burden and is low income?,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?,At least one housing threshold exceeded,Housing Factor (Definition N),Greater than or equal to the 90th percentile for RMP proximity,Greater than or equal to the 90th percentile for NPL (superfund sites) proximity,Greater than or equal to the 90th percentile for proximity to hazardous waste sites,"Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False?","Is there at least one abandoned mine in this census tract, where missing data is treated as False?",At least one pollution threshold exceeded,Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?,Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?,Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?,There is at least one abandoned mine in this census tract and the tract is low income.,There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.,Pollution Factor (Definition N),Greater than or equal to the 90th percentile for wastewater discharge,Greater than or equal to the 90th percentile for leaky underwater storage tanks,Greater than or equal to the 90th percentile for wastewater discharge and is low income?,Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?,At least one water threshold exceeded,Water Factor (Definition N),Greater than or equal to the 90th percentile for diabetes,Greater than or equal to the 90th percentile for asthma,Greater than or equal to the 90th percentile for heart disease,Greater than or equal to the 90th percentile for low life expectancy,At least one health threshold exceeded,Greater than or equal to the 90th percentile for diabetes and is low income?,Greater than or equal to the 90th percentile for asthma and is low income?,Greater than or equal to the 90th percentile for heart disease and is low income?,Greater than or equal to the 90th percentile for low life expectancy and is low income?,Health Factor (Definition N),Low high school education,Greater than or equal to the 90th percentile for unemployment,Greater than or equal to the 90th percentile for low median household income as a percent of area median income,Greater than or equal to the 90th percentile for households in linguistic isolation,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level,Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS attainment?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS attainment?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS attainment?,Greater than or equal to the 90th percentile for unemployment and has low HS attainment?,At least one workforce threshold exceeded,Unemployment (percent) in 2009 (island areas) and 2010 (states and PR),Unemployment (percent) in 2009 for island areas (percentile),Unemployment (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 for island areas (percentile),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Low high school education in 2009 (island areas),Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?,Both workforce socioeconomic indicators exceeded,Workforce Factor (Definition N),Total categories exceeded,Definition N (communities),Definition N (communities) (percentile),Meets the less stringent low income criterion for the adjacency index?,Definition N (communities) (average of neighbors),Is the tract surrounded by disadvantaged communities?,Definition N (communities) (based on adjacency index and low income alone),"Definition N community, including adjacency index tracts" -01073001100,True,True,,,0.2752043596730245,0.0,4781.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,40.0,0.5,0.467489734286576,9.8735797260274,43.056760130719,181.621925132718,2.0427358988323,0.702342755246247,0.134193041307899,4.45238981883771,0.168806466951973,0.035557414766785,0.203932231750679,0.0,0.0,0.374436090225563,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.9109248100602568,0.0004047858,5.6328e-05,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,1.96440511031451,47.695227725,0.0754274220583305,0.6620851491786792,21.2475,0.2853609002858206,0.9682074879732272,0.0121313532733737,0.0,0.0,0.0,0.0161054172767203,0.0035557414766785,0.0,0.1344906923237816,0.6615770759255386,0.2039322317506798,,,0.6445941476491375,70.3,,,False,,True,0.646637250602938,0.2159888182416136,0.6290959230020547,0.2601978513507951,0.8509877263095018,0.726480167252144,0.4790223713964701,0.6190889309231579,0.9653762462133604,0.6970048212990485,0.6204255784694491,0.7319894972922707,0.6305043487774192,0.3145069836211475,0.1524256393370651,0.8649904214559387,0.6038246858588359,0.5972204988211937,0.907050889025138,0.8818107500510934,0.8407248450166905,0.8256687748238973,0.5755269239817877,0.3919915848527349,0.9007380772142316,0.4819400886774089,0.7531091164702065,0.9619657803125694,0.3979128365956526,0.1737455390937601,0.7659646371796258,0.1287706711725437,0.1316846004109441,0.6347599221369092,0.4189065592792301,0.029797296373751,0.1130218397675614,0.7459773722926589,0.2540362752992234,0.7846382434272978,0.2153147384849333,0.6144741572412268,,,,0.9349594607528132,0.8950599559730369,,0.5210203309181356,0.4533200613827713,0.4974774332542475,0.8411474612766549,0.2685589820648203,0.6076863237481747,0.9950049813710372,0.8553628212301939,0.0982626615533689,0.4219630696163662,0.0261283146588784,0.0311301570837825,0.0475755053020894,0.0977645244496608,0.6708610265718614,0.1578889904876284,0.763719241739795,,,,,,,,,,,0.8218135517196475,0.970728837791148,,4781.0,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,,False,0.3189099613330878,,False,False,False,False,False,False,False,False,0.0,False,0,True,0.8571428571428571,False,False,False -01073001400,True,True,,,0.1823529411764705,0.0047058823529411,1946.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,40.0,0.6,0.655319095139786,9.945103013698628,43.1266823529412,3260.33374354854,1.81915896353987,3.34035680534013,0.214095348702766,0.103297800913177,0.647212543554006,0.054984583761562,0.189105858170606,0.0245098039215686,0.024509803921569,0.48355601233299,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.9228450874020494,0.0008951111,5.1282e-06,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,3.16184976454882,44.7571359825,0.2384615384615384,0.0,42.1254,0.4064010997350401,0.9167523124357656,0.0,0.0,0.0,0.0035971223021582,0.0,0.0683453237410072,0.0775950668036999,0.0853031860226104,0.7255909558067831,0.1891058581706063,,,0.6278134628440127,71.0,,,False,,True,0.3420576627932471,0.5051386757290068,0.0916310695360657,0.240302951305517,0.8385931477820602,0.9217996672023666,0.6049521425625418,0.7893561645783852,0.9878045311677784,0.8447513262127914,0.8689486351950112,0.8013648049887862,0.7892483999781194,0.3145069836211475,0.1404620788058391,0.9708470169677066,0.5282933267343067,0.5972204988211937,0.907050889025138,0.9704748279855576,0.9380475509230874,0.8390650299616657,0.5827647767060159,0.9563253856942496,0.8799475505569374,0.800191954147291,0.8653347031469666,0.8431412487963854,0.8462609494971342,0.4711122526224721,0.6930355791067373,0.5867081244286861,0.5846423164269493,0.7916756785984643,0.7516347007030237,0.9067399297439892,0.0522639122516786,0.6434566620719774,0.356556985519905,0.9166150755981124,0.0865380767537716,0.5590415088078317,,,,0.6917513228236646,0.8737301229199994,,0.6417351573483292,0.3727897363582321,0.885039133873299,0.3366465235813016,0.5569693544162451,0.7880665456580186,0.9840732602732248,0.2486523003016117,0.0982626615533689,0.4219630696163662,0.0924351398195788,0.0038486209108402,0.4634108061632525,0.8246557394947661,0.1930997775442523,0.5561393692083032,0.6900904835341803,,,,,,,,,,,0.8364273002184828,0.9602892118901531,,1946.0,9,True,False,False,True,False,False,True,False,False,True,False,False,True,True,False,True,False,True,True,True,False,True,True,True,True,False,True,True,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,True,True,True,True,False,False,False,False,False,False,True,True,0.0804953560371517,,False,0.2950894905920146,,False,False,False,False,False,False,True,True,6.0,True,1,True,1.0,True,True,True +GEOID10_TRACT,Does the tract have at least 35 acres in it?,Contains agricultural value,Names of Tribal areas within Census tract,Housing burden (percent),Share of homes with no kitchen or indoor plumbing (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged greater than or equal to 18 years,Coronary heart disease among adults aged greater than or equal to 18 years,Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged greater than or equal to 18 years,Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals below 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployment (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Percent of population not currently enrolled in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployment (percent) in 2009,Unemployment (percent) in 2010,Percent of individuals less than 100% Federal Poverty Line in 2010,Total population in 2009,Leaky underground storage tanks,DOT Travel Barriers Score,Share of properties at risk of flood in 30 years,Share of properties at risk of fire in 30 years,Share of the tract's land area that is covered by impervious surface or cropland as a percent,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted",Percent Black or African American,Percent American Indian / Alaska Native,Percent Asian,Percent Native Hawaiian or Pacific,Percent two or more races,Percent White,Percent Hispanic or Latino,Percent other races,Percent age under 10,Percent age 10 to 64,Percent age over 64,Percent of the Census tract that is within Tribal areas,Number of Tribal areas within Census tract for Alaska,Number of Tribal areas within Census tract,"Percent of the Census tract that is within Tribal areas, for display",Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Is there at least one abandoned mine in this census tract?,Income data has been estimated based on neighbor income,Is there at least one Formerly Used Defense Site (FUDS) in the tract?,Tract-level redlining score meets or exceeds 3.25,Housing burden (percent) (percentile),Share of homes with no kitchen or indoor plumbing (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged greater than or equal to 18 years (percentile),Coronary heart disease among adults aged greater than or equal to 18 years (percentile),Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile),Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals below 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income in the past 12 months (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Urban Heuristic Flag (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter exposure (percentile),PM2.5 in the air (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to hazardous waste sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployment (percent) (percentile),Median value ($) of owner-occupied housing units (percentile),Percent enrollment in college or graduate school (percentile),Percent of population not currently enrolled in college or graduate school (percentile),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (percentile),Unemployment (percent) in 2009 (percentile),Unemployment (percent) in 2010 (percentile),Percent of individuals less than 100% Federal Poverty Line in 2010 (percentile),Total population in 2009 (percentile),Leaky underground storage tanks (percentile),DOT Travel Barriers Score (percentile),Share of properties at risk of flood in 30 years (percentile),Share of properties at risk of fire in 30 years (percentile),Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile),"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Percent Black or African American (percentile),Percent American Indian / Alaska Native (percentile),Percent Asian (percentile),Percent Native Hawaiian or Pacific (percentile),Percent two or more races (percentile),Percent White (percentile),Percent Hispanic or Latino (percentile),Percent other races (percentile),Percent age under 10 (percentile),Percent age 10 to 64 (percentile),Percent age over 64 (percentile),Percent of the Census tract that is within Tribal areas (percentile),Number of Tribal areas within Census tract for Alaska (percentile),Number of Tribal areas within Census tract (percentile),"Percent of the Census tract that is within Tribal areas, for display (percentile)",Percent Black or African American in 2009 (percentile),Percent American Indian / Alaska Native in 2009 (percentile),Percent Asian in 2009 (percentile),Percent Native Hawaiian or Pacific in 2009 (percentile),Percent two or more races in 2009 (percentile),Percent White in 2009 (percentile),Percent Hispanic or Latino in 2009 (percentile),Percent other races in 2009 (percentile),Low median household income as a percent of area median income (percentile),Low life expectancy (percentile),Low median household income as a percent of territory median income in 2009 (percentile),Total population in 2009 (island areas) and 2019 (states and PR),Total threshold criteria exceeded,Is low income (imputed and adjusted)?,Greater than or equal to the 90th percentile for expected population loss,Greater than or equal to the 90th percentile for expected agricultural loss,Greater than or equal to the 90th percentile for expected building loss,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years,At least one climate threshold exceeded,Greater than or equal to the 90th percentile for expected population loss rate and is low income?,Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?,Greater than or equal to the 90th percentile for expected building loss rate and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?,Climate Factor (Definition N),Greater than or equal to the 90th percentile for energy burden,Greater than or equal to the 90th percentile for pm2.5 exposure,At least one energy threshold exceeded,Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?,Greater than or equal to the 90th percentile for energy burden and is low income?,Energy Factor (Definition N),Greater than or equal to the 90th percentile for diesel particulate matter,Greater than or equal to the 90th percentile for DOT travel barriers,Greater than or equal to the 90th percentile for traffic proximity,At least one traffic threshold exceeded,Greater than or equal to the 90th percentile for diesel particulate matter and is low income?,Greater than or equal to the 90th percentile for traffic proximity and is low income?,Greater than or equal to the 90th percentile for DOT transit barriers and is low income?,Transportation Factor (Definition N),Tract-level redlining score meets or exceeds 3.25 and is low income,Greater than or equal to the 90th percentile for share of homes without indoor plumbing or a kitchen,Greater than or equal to the 90th percentile for share of homes with no kitchen or indoor plumbing and is low income?,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?,Greater than or equal to the 90th percentile for housing burden,Greater than or equal to the 90th percentile for housing burden and is low income?,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?,At least one housing threshold exceeded,Housing Factor (Definition N),Greater than or equal to the 90th percentile for RMP proximity,Greater than or equal to the 90th percentile for NPL (superfund sites) proximity,Greater than or equal to the 90th percentile for proximity to hazardous waste sites,"Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False?","Is there at least one abandoned mine in this census tract, where missing data is treated as False?",At least one pollution threshold exceeded,Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?,Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?,Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?,There is at least one abandoned mine in this census tract and the tract is low income.,There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.,Pollution Factor (Definition N),Greater than or equal to the 90th percentile for wastewater discharge,Greater than or equal to the 90th percentile for leaky underwater storage tanks,Greater than or equal to the 90th percentile for wastewater discharge and is low income?,Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?,At least one water threshold exceeded,Water Factor (Definition N),Greater than or equal to the 90th percentile for diabetes,Greater than or equal to the 90th percentile for asthma,Greater than or equal to the 90th percentile for heart disease,Greater than or equal to the 90th percentile for low life expectancy,At least one health threshold exceeded,Greater than or equal to the 90th percentile for diabetes and is low income?,Greater than or equal to the 90th percentile for asthma and is low income?,Greater than or equal to the 90th percentile for heart disease and is low income?,Greater than or equal to the 90th percentile for low life expectancy and is low income?,Health Factor (Definition N),Low high school education,Greater than or equal to the 90th percentile for unemployment,Greater than or equal to the 90th percentile for low median household income as a percent of area median income,Greater than or equal to the 90th percentile for households in linguistic isolation,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level,Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS attainment?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS attainment?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS attainment?,Greater than or equal to the 90th percentile for unemployment and has low HS attainment?,At least one workforce threshold exceeded,Unemployment (percent) in 2009 (island areas) and 2010 (states and PR),Unemployment (percent) in 2009 for island areas (percentile),Unemployment (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 for island areas (percentile),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Low high school education in 2009 (island areas),Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?,Both workforce socioeconomic indicators exceeded,Workforce Factor (Definition N),Total categories exceeded,Definition N (communities),Definition N (communities) (percentile),Meets the less stringent low income criterion for the adjacency index?,Definition N (communities) (average of neighbors),Is the tract surrounded by disadvantaged communities?,Definition N (communities) (based on adjacency index and low income alone),"Definition N community, including adjacency index tracts" +01073001100,True,True,,0.2752043596730245,0.0,4781.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,40.0,0.5,0.467489734286576,9.8735797260274,43.056760130719,181.621925132718,2.0427358988323,0.702342755246247,0.134193041307899,4.45238981883771,0.168806466951973,0.035557414766785,0.203932231750679,0.0,0.0,0.374436090225563,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.9109248100602568,0.0004047858,5.6328e-05,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,1.96440511031451,47.695227725,0.0754274220583305,0.6620851491786792,21.2475,0.2853609002858206,0.9682074879732272,0.0121313532733737,0.0,0.0,0.0,0.0161054172767203,0.0035557414766785,0.0,0.1344906923237816,0.6615770759255386,0.2039322317506798,,,,,0.6445941476491375,70.3,,,False,,True,0.646637250602938,0.2159888182416136,0.6290959230020547,0.2601978513507951,0.8509877263095018,0.726480167252144,0.4790223713964701,0.6190889309231579,0.9653762462133604,0.6970048212990485,0.6204255784694491,0.7319894972922707,0.6305043487774192,0.3145069836211475,0.1524256393370651,0.8649904214559387,0.6038246858588359,0.5972204988211937,0.907050889025138,0.8818107500510934,0.8407248450166905,0.8256687748238973,0.5755269239817877,0.3919915848527349,0.9007380772142316,0.4819400886774089,0.7531091164702065,0.9619657803125694,0.3979128365956526,0.1737455390937601,0.7659646371796258,0.1287706711725437,0.1316846004109441,0.6347599221369092,0.4189065592792301,0.029797296373751,0.1130218397675614,0.7459773722926589,0.2540362752992234,0.7846382434272978,0.2153147384849333,0.6144741572412268,,,,0.9349594607528132,0.8950599559730369,,0.5210203309181356,0.4533200613827713,0.4974774332542475,0.8411474612766549,0.2685589820648203,0.6076863237481747,0.9950049813710372,0.8553628212301939,0.0982626615533689,0.4219630696163662,0.0261283146588784,0.0311301570837825,0.0475755053020894,0.0977645244496608,0.6708610265718614,0.1578889904876284,0.763719241739795,,,,,,,,,,,,,0.8218135517196475,0.970728837791148,,4781.0,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,,False,0.3189099613330878,,False,False,False,False,False,False,False,False,0.0,False,0,True,0.8571428571428571,False,False,False +01073001400,True,True,,0.1823529411764705,0.0047058823529411,1946.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,40.0,0.6,0.655319095139786,9.945103013698628,43.1266823529412,3260.33374354854,1.81915896353987,3.34035680534013,0.214095348702766,0.103297800913177,0.647212543554006,0.054984583761562,0.189105858170606,0.0245098039215686,0.024509803921569,0.48355601233299,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.9228450874020494,0.0008951111,5.1282e-06,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,3.16184976454882,44.7571359825,0.2384615384615384,0.0,42.1254,0.4064010997350401,0.9167523124357656,0.0,0.0,0.0,0.0035971223021582,0.0,0.0683453237410072,0.0775950668036999,0.0853031860226104,0.7255909558067831,0.1891058581706063,,,,,0.6278134628440127,71.0,,,False,,True,0.3420576627932471,0.5051386757290068,0.0916310695360657,0.240302951305517,0.8385931477820602,0.9217996672023666,0.6049521425625418,0.7893561645783852,0.9878045311677784,0.8447513262127914,0.8689486351950112,0.8013648049887862,0.7892483999781194,0.3145069836211475,0.1404620788058391,0.9708470169677066,0.5282933267343067,0.5972204988211937,0.907050889025138,0.9704748279855576,0.9380475509230874,0.8390650299616657,0.5827647767060159,0.9563253856942496,0.8799475505569374,0.800191954147291,0.8653347031469666,0.8431412487963854,0.8462609494971342,0.4711122526224721,0.6930355791067373,0.5867081244286861,0.5846423164269493,0.7916756785984643,0.7516347007030237,0.9067399297439892,0.0522639122516786,0.6434566620719774,0.356556985519905,0.9166150755981124,0.0865380767537716,0.5590415088078317,,,,0.6917513228236646,0.8737301229199994,,0.6417351573483292,0.3727897363582321,0.885039133873299,0.3366465235813016,0.5569693544162451,0.7880665456580186,0.9840732602732248,0.2486523003016117,0.0982626615533689,0.4219630696163662,0.0924351398195788,0.0038486209108402,0.4634108061632525,0.8246557394947661,0.1930997775442523,0.5561393692083032,0.6900904835341803,,,,,,,,,,,,,0.8364273002184828,0.9602892118901531,,1946.0,9,True,False,False,True,False,False,True,False,False,True,False,False,True,True,False,True,False,True,True,True,False,True,True,True,True,False,True,True,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,True,True,True,True,False,False,False,False,False,False,True,True,0.0804953560371517,,False,0.2950894905920146,,False,False,False,False,False,False,True,True,6.0,True,1,True,1.0,True,True,True diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl index f9baafbf1..0e5ac41b1 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl index ac1ba2bc9..7c5838c69 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl index 6a9934c8d..6e314d652 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py b/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py index b64f2cb25..9e6d1bc2f 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py @@ -1,3 +1,5 @@ +from typing import Optional + import geopandas as gpd import numpy as np import pandas as pd @@ -51,12 +53,14 @@ class TribalOverlapETL(ExtractTransformLoad): def __init__(self): self.COLUMNS_TO_KEEP = [ self.GEOID_TRACT_FIELD_NAME, - field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT, + field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK, + field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS, field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT, field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT, - field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING, + field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY, ] + self.OVERALL_TRIBAL_COUNT = "OVERALL_TRIBAL_COUNT" self.output_df: pd.DataFrame self.census_tract_gdf: gpd.GeoDataFrame self.tribal_gdf: gpd.GeoDataFrame @@ -69,40 +73,18 @@ def _create_string_from_list(series: pd.Series) -> str: return ", ".join(str_list) @staticmethod - def _adjust_percentage_to_string(percentage_float: float) -> str: - """Helper method that converts numeric floats to strings based on what-to-show rules. - - What are these rules? - 0. If None, return none - 1. If the percentage is below 1%, produce 'less than 1%' - 2. If the percentage is above 99.95%, produce '100%' - 3. If the percentage is X.00 when rounded to two sig digits, display the integer of the percent - 4. If the percentage has unique significant digits, report two digits - """ - # Rule 0 - if not percentage_float: - # I believe we need to do this because JS will do weird things with a mix-type column? - return "No tribal areas" - # Rule 1 + def _adjust_percentage_for_frontend( + percentage_float: float, + ) -> Optional[float]: + """Round numbers very close to 0 to 0 and very close to 1 to 1 for display""" + if percentage_float is None: + return None if percentage_float < 0.01: - return "less than 1%" - # Rule 2 + return 0.0 if percentage_float > 0.9995: - return "100%" - - rounded_percentage_str = str(round(percentage_float, 4) * 100) - first_digits, last_digits = rounded_percentage_str.split(".") - - # Rule 3 (this is a shorthand because round(4) will truncate repeated 0s) - if last_digits[-1] == "0": - return first_digits + "%" + return 1.0 - # Rule 4 - if last_digits != "00": - return rounded_percentage_str + "%" - - # There is something missing! - raise Exception("Yikes! The string conversion here failed!") + return percentage_float def extract(self) -> None: self.census_tract_gdf = get_tract_geojson() @@ -130,7 +112,7 @@ def transform(self) -> None: tribal_overlap_with_tracts = tribal_overlap_with_tracts.rename( columns={ - field_names.TRIBAL_ID: field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT, + field_names.TRIBAL_ID: self.OVERALL_TRIBAL_COUNT, field_names.TRIBAL_LAND_AREA_NAME: field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT, } ) @@ -245,12 +227,31 @@ def transform(self) -> None: merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT], ) + # Counting tribes in the lower 48 is different from counting in AK, + # so per request by the design and frontend team, we remove all the + # counts outside AK + merged_output_df[ + field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK + ] = np.where( + # In Alaska + (merged_output_df_state_fips_code == "02"), + # Keey the counts + merged_output_df[self.OVERALL_TRIBAL_COUNT], + # Otherwise, null them + None, + ) + + # TODO: Count tribal areas in the lower 48 correctly + merged_output_df[ + field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS + ] = None + # The very final thing we want to do is produce a string for the front end to show # We do this here so that all of the logic is included merged_output_df[ - field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING + field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY ] = merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT].apply( - self._adjust_percentage_to_string + self._adjust_percentage_for_frontend ) self.output_df = merged_output_df diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 82267d1b9..485b1bc8f 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -355,12 +355,17 @@ TRIBAL_LAND_AREA_NAME = "landAreaName" # Tribal overlap variables -COUNT_OF_TRIBAL_AREAS_IN_TRACT = "Number of Tribal areas within Census tract" +COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS = ( + "Number of Tribal areas within Census tract" +) +COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK = ( + "Number of Tribal areas within Census tract for Alaska" +) NAMES_OF_TRIBAL_AREAS_IN_TRACT = "Names of Tribal areas within Census tract" PERCENT_OF_TRIBAL_AREA_IN_TRACT = ( "Percent of the Census tract that is within Tribal areas" ) -PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING = ( +PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY = ( "Percent of the Census tract that is within Tribal areas, for display" )