Rearrange a little for readabililty (#1835)

usds · Sep 2, 2022 · 9cd5eb6 · 9cd5eb6
1 parent c0c2064
commit 9cd5eb6
Showing 1 changed file with 9 additions and 4 deletions.
diff --git a/data/data-pipeline/data_pipeline/tests/score/test_output.py b/data/data-pipeline/data_pipeline/tests/score/test_output.py
@@ -255,21 +255,26 @@ def test_data_sources(
     }
 
     for data_source_name, data_source in data_sources.items():
-        final = "_final"
+        final = "final"
         df: pd.DataFrame = final_score_df.merge(
             data_source,
             on=GEOID_TRACT_FIELD_NAME,
             indicator="MERGE",
             suffixes=(final, f"_{data_source_name}"),
             how="left",
         )
+
+        # Make sure we have NAs for any tracts in the final data that aren't
+        # covered in the  final data
+        assert np.all(df[df.MERGE == "left_only"][final_columns].isna())
+        df = df[df.MERGE == "both"]
+
+        # Make our lists of columns for later comparison
         core_cols = data_source.columns.intersection(
             final_score_df.columns
         ).drop(GEOID_TRACT_FIELD_NAME)
         data_source_columns = [f"{col}_{data_source_name}" for col in core_cols]
-        final_columns = [f"{col}{final}" for col in core_cols]
-        assert np.all(df[df.MERGE == "left_only"][final_columns].isna())
-        df = df[df.MERGE == "both"]
+        final_columns = [f"{col}_{final}" for col in core_cols]
         assert (
             final_columns
         ), f"No columns from data source show up in final score in source {data_source_name}"