diff --git a/scripts/us_cdc/environmental_health_toxicology/OzoneCensusTractPollution.tmcf b/scripts/us_cdc/environmental_health_toxicology/OzoneCensusTractPollution.tmcf index 7e84ac4a03..7c96dcea72 100644 --- a/scripts/us_cdc/environmental_health_toxicology/OzoneCensusTractPollution.tmcf +++ b/scripts/us_cdc/environmental_health_toxicology/OzoneCensusTractPollution.tmcf @@ -4,7 +4,7 @@ typeOf: dcs:StatVarObservation observationDate: C:OzoneCTPollution->date variableMeasured: dcs:Mean_Concentration_AirPollutant_Ozone observationPeriod: "P8H" -unit: parts per billion (ppb) +unit: PartsPerBillion value: C:OzoneCTPollution->Value Node: E:OzoneCTPollution->E2 diff --git a/scripts/us_cdc/environmental_health_toxicology/OzoneCountyPollution.tmcf b/scripts/us_cdc/environmental_health_toxicology/OzoneCountyPollution.tmcf index 95c8551eee..2df45e298f 100644 --- a/scripts/us_cdc/environmental_health_toxicology/OzoneCountyPollution.tmcf +++ b/scripts/us_cdc/environmental_health_toxicology/OzoneCountyPollution.tmcf @@ -8,7 +8,7 @@ typeOf: dcs:StatVarObservation observationDate: C:OzoneCountyPollution->date value: C:OzoneCountyPollution->O3_mean_pred observationPeriod: "P8H" -unit: parts per billion (ppb) +unit: PartsPerBillion variableMeasured: dcs:Mean_Concentration_AirPollutant_Ozone Node: E:OzoneCountyPollution->E2 @@ -17,7 +17,7 @@ typeOf: dcs:StatVarObservation observationDate: C:OzoneCountyPollution->date value: C:OzoneCountyPollution->O3_med_pred observationPeriod: "P8H" -unit: parts per billion (ppb) +unit: PartsPerBillion variableMeasured: dcs:Median_Concentration_AirPollutant_Ozone Node: E:OzoneCountyPollution->E3 @@ -26,7 +26,7 @@ typeOf: dcs:StatVarObservation observationDate: C:OzoneCountyPollution->date value: C:OzoneCountyPollution->O3_max_pred observationPeriod: "P8H" -unit: parts per billion (ppb) +unit: PartsPerBillion variableMeasured: dcs:Max_Concentration_AirPollutant_Ozone Node: E:OzoneCountyPollution->E4 @@ -35,5 +35,5 @@ typeOf: dcs:StatVarObservation observationDate: C:OzoneCountyPollution->date value: C:OzoneCountyPollution->O3_pop_pred observationPeriod: "P8H" -unit: parts per billion (ppb) +unit: PartsPerBillion variableMeasured: dcs:PopulationWeighted_Concentration_AirPollutant_Ozone diff --git a/scripts/us_cdc/environmental_health_toxicology/PM25CensusTractPollution.tmcf b/scripts/us_cdc/environmental_health_toxicology/PM25CensusTractPollution.tmcf index 3b97d68a94..5d14798ff1 100644 --- a/scripts/us_cdc/environmental_health_toxicology/PM25CensusTractPollution.tmcf +++ b/scripts/us_cdc/environmental_health_toxicology/PM25CensusTractPollution.tmcf @@ -4,7 +4,7 @@ typeOf: dcs:StatVarObservation observationDate: C:PM25CTPollution->date variableMeasured: dcs:Mean_Concentration_AirPollutant_PM2.5 observationPeriod: "P24H" -unit: μg/m3 +unit: dcs:MicrogramsPerCubicMeter value: C:PM25CTPollution->Value Node: E:PM25CTPollution->E2 diff --git a/scripts/us_cdc/environmental_health_toxicology/PM25CountyPollution.tmcf b/scripts/us_cdc/environmental_health_toxicology/PM25CountyPollution.tmcf index c2a5620056..7636d7ef3f 100644 --- a/scripts/us_cdc/environmental_health_toxicology/PM25CountyPollution.tmcf +++ b/scripts/us_cdc/environmental_health_toxicology/PM25CountyPollution.tmcf @@ -4,7 +4,7 @@ typeOf: dcs:StatVarObservation observationDate: C:PM25CountyPollution->date value: C:PM25CountyPollution->PM25_mean_pred observationPeriod: "P24H" -unit: μg/m3 +unit: MicrogramsPerCubicMeter variableMeasured: dcs:Mean_Concentration_AirPollutant_PM2.5 Node: E:PM25CountyPollution->E2 @@ -13,7 +13,7 @@ typeOf: dcs:StatVarObservation observationDate: C:PPM25CountyPollution->date value: C:PM25CountyPollution->PM25_med_pred observationPeriod: "P24H" -unit: μg/m3 +unit: MicrogramsPerCubicMeter variableMeasured: dcs:Median_Concentration_AirPollutant_PM2.5 Node: E:PM25CountyPollution->E3 @@ -22,7 +22,7 @@ typeOf: dcs:StatVarObservation observationDate: C:PM25CountyPollution->date value: C:PM25CountyPollution->PM25_max_pred observationPeriod: "P24H" -unit: μg/m3 +unit: MicrogramsPerCubicMeter variableMeasured: dcs:Max_Concentration_AirPollutant_PM2.5 Node: E:PM25CountyPollution->E4 @@ -31,5 +31,5 @@ typeOf: dcs:StatVarObservation observationDate: C:PM25CountyPollution->date value: C:PM25CountyPollution->PM25_pop_pred observationPeriod: "P24H" -unit: μg/m3 +unit: MicrogramsPerCubicMeter variableMeasured: dcs:PopulationWeighted_Concentration_AirPollutant_PM2.5 diff --git a/scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py b/scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py index 1c658b3d8a..8d9c4c3811 100644 --- a/scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py +++ b/scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py @@ -63,6 +63,11 @@ def main(): clean_air_quality_data(file_path, output_file) +# this method is applicable only for "census tract PM25" +def add_prefix_zero(value, length): + return value.zfill(length) + + def clean_air_quality_data(file_path, output_file): """ Args: @@ -73,12 +78,8 @@ def clean_air_quality_data(file_path, output_file): """ print("Cleaning file...") data = pd.read_csv(file_path) - if "Ozone" in file_path and "County" in file_path: - data["Month"] = data["Month"].map(MONTH_MAP) - data["date"] = pd.to_datetime(data[["Year", "Month", "Day"]], - yearfirst=True) - else: - data["date"] = pd.to_datetime(data["date"], yearfirst=True) + data["date"] = pd.to_datetime(data["date"], yearfirst=True) + if "PM2.5" in file_path: census_tract = "DS_PM" elif "Ozone" in file_path: @@ -93,14 +94,19 @@ def clean_air_quality_data(file_path, output_file): var_name='StatisticalVariable', value_name='Value') data.rename(columns={census_tract + '_stdd': 'Error'}, inplace=True) + max_length = data['ctfips'].astype(str).str.len().max() + data['ctfips'] = data['ctfips'].astype(str).apply( + lambda x: add_prefix_zero(x, max_length)) data["dcid"] = "geoId/" + data["ctfips"].astype(str) data['StatisticalVariable'] = data['StatisticalVariable'].map(STATVARS) elif "County" in file_path and "PM" in file_path: - data["countyfips"] = "1200" + data["countyfips"].astype(str) - data["dcid"] = "geoId/" + data["countyfips"].astype(str) + data["statefips"] = data["statefips"].astype(str).str.zfill(2) + data["countyfips"] = data["countyfips"].astype(str).str.zfill(3) + data["dcid"] = "geoId/" + data["statefips"] + data["countyfips"] elif "County" in file_path and "Ozone" in file_path: - data["countyfips"] = "1200" + data["countyfips"].astype(str) - data["dcid"] = "geoId/" + data["countyfips"].astype(str) + data["statefips"] = data["statefips"].astype(str).str.zfill(2) + data["countyfips"] = data["countyfips"].astype(str).str.zfill(3) + data["dcid"] = "geoId/" + data["statefips"] + data["countyfips"] data.to_csv(output_file, float_format='%.6f', index=False) print("Finished cleaning file!")