Skip to content

Commit

Permalink
udc ent 20241202 changes
Browse files Browse the repository at this point in the history
  • Loading branch information
kurus21 committed Dec 2, 2024
1 parent 95d1c33 commit c8bdf3c
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ typeOf: dcs:StatVarObservation
observationDate: C:OzoneCTPollution->date
variableMeasured: dcs:Mean_Concentration_AirPollutant_Ozone
observationPeriod: "P8H"
unit: parts per billion (ppb)
unit: PartsPerBillion
value: C:OzoneCTPollution->Value

Node: E:OzoneCTPollution->E2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ typeOf: dcs:StatVarObservation
observationDate: C:OzoneCountyPollution->date
value: C:OzoneCountyPollution->O3_mean_pred
observationPeriod: "P8H"
unit: parts per billion (ppb)
unit: PartsPerBillion
variableMeasured: dcs:Mean_Concentration_AirPollutant_Ozone

Node: E:OzoneCountyPollution->E2
Expand All @@ -17,7 +17,7 @@ typeOf: dcs:StatVarObservation
observationDate: C:OzoneCountyPollution->date
value: C:OzoneCountyPollution->O3_med_pred
observationPeriod: "P8H"
unit: parts per billion (ppb)
unit: PartsPerBillion
variableMeasured: dcs:Median_Concentration_AirPollutant_Ozone

Node: E:OzoneCountyPollution->E3
Expand All @@ -26,7 +26,7 @@ typeOf: dcs:StatVarObservation
observationDate: C:OzoneCountyPollution->date
value: C:OzoneCountyPollution->O3_max_pred
observationPeriod: "P8H"
unit: parts per billion (ppb)
unit: PartsPerBillion
variableMeasured: dcs:Max_Concentration_AirPollutant_Ozone

Node: E:OzoneCountyPollution->E4
Expand All @@ -35,5 +35,5 @@ typeOf: dcs:StatVarObservation
observationDate: C:OzoneCountyPollution->date
value: C:OzoneCountyPollution->O3_pop_pred
observationPeriod: "P8H"
unit: parts per billion (ppb)
unit: PartsPerBillion
variableMeasured: dcs:PopulationWeighted_Concentration_AirPollutant_Ozone
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ typeOf: dcs:StatVarObservation
observationDate: C:PM25CTPollution->date
variableMeasured: dcs:Mean_Concentration_AirPollutant_PM2.5
observationPeriod: "P24H"
unit: μg/m3
unit: dcs:MicrogramsPerCubicMeter
value: C:PM25CTPollution->Value

Node: E:PM25CTPollution->E2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ typeOf: dcs:StatVarObservation
observationDate: C:PM25CountyPollution->date
value: C:PM25CountyPollution->PM25_mean_pred
observationPeriod: "P24H"
unit: μg/m3
unit: MicrogramsPerCubicMeter
variableMeasured: dcs:Mean_Concentration_AirPollutant_PM2.5

Node: E:PM25CountyPollution->E2
Expand All @@ -13,7 +13,7 @@ typeOf: dcs:StatVarObservation
observationDate: C:PPM25CountyPollution->date
value: C:PM25CountyPollution->PM25_med_pred
observationPeriod: "P24H"
unit: μg/m3
unit: MicrogramsPerCubicMeter
variableMeasured: dcs:Median_Concentration_AirPollutant_PM2.5

Node: E:PM25CountyPollution->E3
Expand All @@ -22,7 +22,7 @@ typeOf: dcs:StatVarObservation
observationDate: C:PM25CountyPollution->date
value: C:PM25CountyPollution->PM25_max_pred
observationPeriod: "P24H"
unit: μg/m3
unit: MicrogramsPerCubicMeter
variableMeasured: dcs:Max_Concentration_AirPollutant_PM2.5

Node: E:PM25CountyPollution->E4
Expand All @@ -31,5 +31,5 @@ typeOf: dcs:StatVarObservation
observationDate: C:PM25CountyPollution->date
value: C:PM25CountyPollution->PM25_pop_pred
observationPeriod: "P24H"
unit: μg/m3
unit: MicrogramsPerCubicMeter
variableMeasured: dcs:PopulationWeighted_Concentration_AirPollutant_PM2.5
26 changes: 16 additions & 10 deletions scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ def main():
clean_air_quality_data(file_path, output_file)


# this method is applicable only for "census tract PM25"
def add_prefix_zero(value, length):
return value.zfill(length)


def clean_air_quality_data(file_path, output_file):
"""
Args:
Expand All @@ -73,12 +78,8 @@ def clean_air_quality_data(file_path, output_file):
"""
print("Cleaning file...")
data = pd.read_csv(file_path)
if "Ozone" in file_path and "County" in file_path:
data["Month"] = data["Month"].map(MONTH_MAP)
data["date"] = pd.to_datetime(data[["Year", "Month", "Day"]],
yearfirst=True)
else:
data["date"] = pd.to_datetime(data["date"], yearfirst=True)
data["date"] = pd.to_datetime(data["date"], yearfirst=True)

if "PM2.5" in file_path:
census_tract = "DS_PM"
elif "Ozone" in file_path:
Expand All @@ -93,14 +94,19 @@ def clean_air_quality_data(file_path, output_file):
var_name='StatisticalVariable',
value_name='Value')
data.rename(columns={census_tract + '_stdd': 'Error'}, inplace=True)
max_length = data['ctfips'].astype(str).str.len().max()
data['ctfips'] = data['ctfips'].astype(str).apply(
lambda x: add_prefix_zero(x, max_length))
data["dcid"] = "geoId/" + data["ctfips"].astype(str)
data['StatisticalVariable'] = data['StatisticalVariable'].map(STATVARS)
elif "County" in file_path and "PM" in file_path:
data["countyfips"] = "1200" + data["countyfips"].astype(str)
data["dcid"] = "geoId/" + data["countyfips"].astype(str)
data["statefips"] = data["statefips"].astype(str).str.zfill(2)
data["countyfips"] = data["countyfips"].astype(str).str.zfill(3)
data["dcid"] = "geoId/" + data["statefips"] + data["countyfips"]
elif "County" in file_path and "Ozone" in file_path:
data["countyfips"] = "1200" + data["countyfips"].astype(str)
data["dcid"] = "geoId/" + data["countyfips"].astype(str)
data["statefips"] = data["statefips"].astype(str).str.zfill(2)
data["countyfips"] = data["countyfips"].astype(str).str.zfill(3)
data["dcid"] = "geoId/" + data["statefips"] + data["countyfips"]
data.to_csv(output_file, float_format='%.6f', index=False)
print("Finished cleaning file!")

Expand Down

0 comments on commit c8bdf3c

Please sign in to comment.