Skip to content

Commit

Permalink
Change TA_PERC, change TA_COUNT (#1988, #1989)
Browse files Browse the repository at this point in the history
- Make TA_PERC_STR back into a nullable float following the rules
  requestsed in #1989
- Move TA_COUNT to be TA_COUNT_AK, also add a null TA_COUNT_C for CONUS
  that we can fill in later.
  • Loading branch information
mattbowen-usds committed Oct 6, 2022
1 parent 6505d49 commit 466b0a1
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 45 deletions.
5 changes: 3 additions & 2 deletions data/data-pipeline/data_pipeline/etl/score/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,9 +394,10 @@
field_names.PERCENT_AGE_UNDER_10: "AGE_10",
field_names.PERCENT_AGE_10_TO_64: "AGE_MIDDLE",
field_names.PERCENT_AGE_OVER_64: "AGE_OLD",
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT: "TA_COUNT",
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK: "TA_COUNT_AK",
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS: "TA_COUNT_C",
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT: "TA_PERC",
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING: "TA_PERC_STR",
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY: "TA_PERC_FE",
}

# columns to round floats to 2 decimals
Expand Down
5 changes: 3 additions & 2 deletions data/data-pipeline/data_pipeline/etl/score/etl_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,15 +488,16 @@ def _prepare_initial_df(self) -> pd.DataFrame:
field_names.PERCENT_AGE_10_TO_64,
field_names.PERCENT_AGE_OVER_64,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK,
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY,
] + self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS

non_numeric_columns = [
self.GEOID_TRACT_FIELD_NAME,
field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD,
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING,
]

boolean_columns = [
Expand Down

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
Binary file not shown.
73 changes: 37 additions & 36 deletions data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Optional

import geopandas as gpd
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -51,12 +53,14 @@ class TribalOverlapETL(ExtractTransformLoad):
def __init__(self):
self.COLUMNS_TO_KEEP = [
self.GEOID_TRACT_FIELD_NAME,
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK,
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY,
]

self.OVERALL_TRIBAL_COUNT = "OVERALL_TRIBAL_COUNT"
self.output_df: pd.DataFrame
self.census_tract_gdf: gpd.GeoDataFrame
self.tribal_gdf: gpd.GeoDataFrame
Expand All @@ -69,40 +73,18 @@ def _create_string_from_list(series: pd.Series) -> str:
return ", ".join(str_list)

@staticmethod
def _adjust_percentage_to_string(percentage_float: float) -> str:
"""Helper method that converts numeric floats to strings based on what-to-show rules.
What are these rules?
0. If None, return none
1. If the percentage is below 1%, produce 'less than 1%'
2. If the percentage is above 99.95%, produce '100%'
3. If the percentage is X.00 when rounded to two sig digits, display the integer of the percent
4. If the percentage has unique significant digits, report two digits
"""
# Rule 0
if not percentage_float:
# I believe we need to do this because JS will do weird things with a mix-type column?
return "No tribal areas"
# Rule 1
def _adjust_percentage_for_frontend(
percentage_float: float,
) -> Optional[float]:
"""Round numbers very close to 0 to 0 and very close to 1 to 1 for display"""
if percentage_float is None:
return None
if percentage_float < 0.01:
return "less than 1%"
# Rule 2
return 0.0
if percentage_float > 0.9995:
return "100%"

rounded_percentage_str = str(round(percentage_float, 4) * 100)
first_digits, last_digits = rounded_percentage_str.split(".")

# Rule 3 (this is a shorthand because round(4) will truncate repeated 0s)
if last_digits[-1] == "0":
return first_digits + "%"
return 1.0

# Rule 4
if last_digits != "00":
return rounded_percentage_str + "%"

# There is something missing!
raise Exception("Yikes! The string conversion here failed!")
return percentage_float

def extract(self) -> None:
self.census_tract_gdf = get_tract_geojson()
Expand Down Expand Up @@ -130,7 +112,7 @@ def transform(self) -> None:

tribal_overlap_with_tracts = tribal_overlap_with_tracts.rename(
columns={
field_names.TRIBAL_ID: field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
field_names.TRIBAL_ID: self.OVERALL_TRIBAL_COUNT,
field_names.TRIBAL_LAND_AREA_NAME: field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
}
)
Expand Down Expand Up @@ -245,12 +227,31 @@ def transform(self) -> None:
merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT],
)

# Counting tribes in the lower 48 is different from counting in AK,
# so per request by the design and frontend team, we remove all the
# counts outside AK
merged_output_df[
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK
] = np.where(
# In Alaska
(merged_output_df_state_fips_code == "02"),
# Keey the counts
merged_output_df[self.OVERALL_TRIBAL_COUNT],
# Otherwise, null them
None,
)

# TODO: Count tribal areas in the lower 48 correctly
merged_output_df[
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS
] = None

# The very final thing we want to do is produce a string for the front end to show
# We do this here so that all of the logic is included
merged_output_df[
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY
] = merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT].apply(
self._adjust_percentage_to_string
self._adjust_percentage_for_frontend
)

self.output_df = merged_output_df
9 changes: 7 additions & 2 deletions data/data-pipeline/data_pipeline/score/field_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,12 +355,17 @@
TRIBAL_LAND_AREA_NAME = "landAreaName"

# Tribal overlap variables
COUNT_OF_TRIBAL_AREAS_IN_TRACT = "Number of Tribal areas within Census tract"
COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS = (
"Number of Tribal areas within Census tract"
)
COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK = (
"Number of Tribal areas within Census tract for Alaska"
)
NAMES_OF_TRIBAL_AREAS_IN_TRACT = "Names of Tribal areas within Census tract"
PERCENT_OF_TRIBAL_AREA_IN_TRACT = (
"Percent of the Census tract that is within Tribal areas"
)
PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING = (
PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY = (
"Percent of the Census tract that is within Tribal areas, for display"
)

Expand Down

0 comments on commit 466b0a1

Please sign in to comment.