-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathretrieve_res_load.py
142 lines (109 loc) · 4.88 KB
/
retrieve_res_load.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import pandas as pd
import geopandas as gpd
from us import states
from tqdm import tqdm
BASE_URL = (
"https://oedi-data-lake.s3.amazonaws.com/nrel-pds-building-stock"
"/end-use-load-profiles-for-us-building-stock")
def create_resstock_url(state_abbr,
puma_id,
building_type,
year=2021,
product='resstock',
weather_version='tmy3',
release=1,
):
data_route = (f"/{year}"
f"/{product}_{weather_version}_release_{release}"
"/timeseries_aggregates/by_puma"
f"/state={state_abbr}/")
file = f"{puma_id.lower()}-{building_type}.csv"
return BASE_URL + data_route + file
def create_weather_url(puma_id,
year=2021,
product='resstock',
weather_version='tmy3',
release=1,
):
data_route = (f"/{year}"
f"/{product}_{weather_version}_release_{release}"
"/weather/tmy3/")
file = f"{puma_id.upper()}_{weather_version}.csv"
return BASE_URL + data_route + file
def process_weather_timeseries(data_url, tmy=True, weather_year=2018):
"""
This function processes timeseries weather data from NREL's
ResStock and ComStock. If the data are from the typical
meteorological year (TMY), then artificial timestamps will
be applied.
"""
df = pd.read_csv(data_url)
df.columns = ['date_time',
'temp_db',
'rel_humidity',
'wind_speed',
'wind_direction',
'ghi',
'dni',
'dhi']
if tmy:
timestamps = pd.date_range(start=f'{weather_year}-01-01',
freq='h',
periods=8760)
df.index = timestamps
else:
msg = "Processing non-TMY weather data has not been implemented."
raise NotImplementedError(msg)
return df
if __name__ == "__main__":
time_col = 'timestamp'
elec_col = 'out.electricity.total.energy_consumption'
heat_col = 'out.natural_gas.heating.energy_consumption'
# gather config options
state_name = snakemake.config['state']
state = states.lookup(state_name)
county = snakemake.config['county']
building_opts = snakemake.config['building_data_options']
sector_buildings = building_opts['building_types']
# load spatial lut
lut = pd.read_csv(snakemake.input.spatial_lut)
# get the PUMA ID this method is unstable, since some counties might contain
# multiple PUMAs
region = lut.loc[((lut['state_abbreviation']==state.abbr)
& (lut['resstock_county_id']==f"{state.abbr}, {county.capitalize()} County"))]
county_and_puma = region.loc[:,'nhgis_puma_gisjoin'].unique()[0]
county_gis_join = region.loc[:,'nhgis_county_gisjoin'].unique()[0]
# puma_id = county_and_puma.split(',')[-1].replace(' ','')
print("Retrieving weather data...")
weather_url = create_weather_url(puma_id=county_gis_join)
weather_data = process_weather_timeseries(weather_url)
weather_data.to_csv(snakemake.output.weather)
print("Weather data retrieved.")
outer_pbar = tqdm(list(sector_buildings.keys()), position=0, leave=True)
all_frames = []
for sector in outer_pbar:
outer_pbar.set_description(f"{sector.upper()}")
building_types = sector_buildings[sector]
inner_pbar = tqdm(building_types, position=1, leave=True)
elec_frames = []
heat_frames = []
for bldg_type in inner_pbar:
inner_pbar.set_description(f"Retrieving {bldg_type}")
bldg_url = create_resstock_url(state_abbr=state.abbr,
puma_id=county_and_puma,
building_type=bldg_type)
bldg_df = pd.read_csv(bldg_url,
parse_dates=True,
index_col=time_col)
bldg_df.to_csv(f"data/timeseries/{sector}_{bldg_type}.csv")
num_units = bldg_df['units_represented'].unique()[0]
heat_df = bldg_df[[heat_col]] / num_units
elec_df = bldg_df[[elec_col]] / num_units
elec_df = elec_df.rename(columns={elec_col:bldg_type})
heat_df = heat_df.rename(columns={heat_col:bldg_type})
elec_frames.append(elec_df)
heat_frames.append(heat_df)
elec_ts = pd.concat(elec_frames, axis=1)
elec_ts.to_csv(f"data/timeseries/{sector}_elec_load.csv")
heat_ts = pd.concat(heat_frames, axis=1)
heat_ts.to_csv(f"data/timeseries/{sector}_heat_load.csv")