Change TA_PERC, change TA_COUNT (#1988, #1989)

- Make TA_PERC_STR back into a nullable float following the rules requestsed in #1989 - Move TA_COUNT to be TA_COUNT_AK, also add a null TA_COUNT_C for CONUS that we can fill in later.
usds · Oct 6, 2022 · 466b0a1 · 466b0a1
1 parent 6505d49
commit 466b0a1
Show file tree

Hide file tree

Showing 8 changed files with 53 additions and 45 deletions.
diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py
@@ -394,9 +394,10 @@
     field_names.PERCENT_AGE_UNDER_10: "AGE_10",
     field_names.PERCENT_AGE_10_TO_64: "AGE_MIDDLE",
     field_names.PERCENT_AGE_OVER_64: "AGE_OLD",
-    field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT: "TA_COUNT",
+    field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK: "TA_COUNT_AK",
+    field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS: "TA_COUNT_C",
     field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT: "TA_PERC",
-    field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING: "TA_PERC_STR",
+    field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY: "TA_PERC_FE",
 }
 
 # columns to round floats to 2 decimals

diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@@ -488,15 +488,16 @@ def _prepare_initial_df(self) -> pd.DataFrame:
             field_names.PERCENT_AGE_10_TO_64,
             field_names.PERCENT_AGE_OVER_64,
             field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
-            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
+            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK,
+            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS,
+            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY,
         ] + self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS
 
         non_numeric_columns = [
             self.GEOID_TRACT_FIELD_NAME,
             field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD,
             field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
             field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
-            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING,
         ]
 
         boolean_columns = [

diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
diff --git a/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py b/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import geopandas as gpd
 import numpy as np
 import pandas as pd
@@ -51,12 +53,14 @@ class TribalOverlapETL(ExtractTransformLoad):
     def __init__(self):
         self.COLUMNS_TO_KEEP = [
             self.GEOID_TRACT_FIELD_NAME,
-            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
+            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK,
+            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS,
             field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
             field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
-            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING,
+            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY,
         ]
 
+        self.OVERALL_TRIBAL_COUNT = "OVERALL_TRIBAL_COUNT"
         self.output_df: pd.DataFrame
         self.census_tract_gdf: gpd.GeoDataFrame
         self.tribal_gdf: gpd.GeoDataFrame
@@ -69,40 +73,18 @@ def _create_string_from_list(series: pd.Series) -> str:
         return ", ".join(str_list)
 
     @staticmethod
-    def _adjust_percentage_to_string(percentage_float: float) -> str:
-        """Helper method that converts numeric floats to strings based on what-to-show rules.
-
-        What are these rules?
-        0. If None, return none
-        1. If the percentage is below 1%, produce 'less than 1%'
-        2. If the percentage is above 99.95%, produce '100%'
-        3. If the percentage is X.00 when rounded to two sig digits, display the integer of the percent
-        4. If the percentage has unique significant digits, report two digits
-        """
-        # Rule 0
-        if not percentage_float:
-            # I believe we need to do this because JS will do weird things with a mix-type column?
-            return "No tribal areas"
-        # Rule 1
+    def _adjust_percentage_for_frontend(
+        percentage_float: float,
+    ) -> Optional[float]:
+        """Round numbers very close to 0 to 0 and very close to 1 to 1 for display"""
+        if percentage_float is None:
+            return None
         if percentage_float < 0.01:
-            return "less than 1%"
-        # Rule 2
+            return 0.0
         if percentage_float > 0.9995:
-            return "100%"
-
-        rounded_percentage_str = str(round(percentage_float, 4) * 100)
-        first_digits, last_digits = rounded_percentage_str.split(".")
-
-        # Rule 3 (this is a shorthand because round(4) will truncate repeated 0s)
-        if last_digits[-1] == "0":
-            return first_digits + "%"
+            return 1.0
 
-        # Rule 4
-        if last_digits != "00":
-            return rounded_percentage_str + "%"
-
-        # There is something missing!
-        raise Exception("Yikes! The string conversion here failed!")
+        return percentage_float
 
     def extract(self) -> None:
         self.census_tract_gdf = get_tract_geojson()
@@ -130,7 +112,7 @@ def transform(self) -> None:
 
         tribal_overlap_with_tracts = tribal_overlap_with_tracts.rename(
             columns={
-                field_names.TRIBAL_ID: field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
+                field_names.TRIBAL_ID: self.OVERALL_TRIBAL_COUNT,
                 field_names.TRIBAL_LAND_AREA_NAME: field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
             }
         )
@@ -245,12 +227,31 @@ def transform(self) -> None:
             merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT],
         )
 
+        # Counting tribes in the lower 48 is different from counting in AK,
+        # so per request by the design and frontend team, we remove all the
+        # counts outside AK
+        merged_output_df[
+            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK
+        ] = np.where(
+            # In Alaska
+            (merged_output_df_state_fips_code == "02"),
+            # Keey the counts
+            merged_output_df[self.OVERALL_TRIBAL_COUNT],
+            # Otherwise, null them
+            None,
+        )
+
+        # TODO: Count tribal areas in the lower 48 correctly
+        merged_output_df[
+            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS
+        ] = None
+
         # The very final thing we want to do is produce a string for the front end to show
         # We do this here so that all of the logic is included
         merged_output_df[
-            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING
+            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY
         ] = merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT].apply(
-            self._adjust_percentage_to_string
+            self._adjust_percentage_for_frontend
         )
 
         self.output_df = merged_output_df
diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py
@@ -355,12 +355,17 @@
 TRIBAL_LAND_AREA_NAME = "landAreaName"
 
 # Tribal overlap variables
-COUNT_OF_TRIBAL_AREAS_IN_TRACT = "Number of Tribal areas within Census tract"
+COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS = (
+    "Number of Tribal areas within Census tract"
+)
+COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK = (
+    "Number of Tribal areas within Census tract for Alaska"
+)
 NAMES_OF_TRIBAL_AREAS_IN_TRACT = "Names of Tribal areas within Census tract"
 PERCENT_OF_TRIBAL_AREA_IN_TRACT = (
     "Percent of the Census tract that is within Tribal areas"
 )
-PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING = (
+PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY = (
     "Percent of the Census tract that is within Tribal areas, for display"
 )