-
Notifications
You must be signed in to change notification settings - Fork 28
/
dataset.py
87 lines (64 loc) · 2.32 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import warnings
import orca
import pandas as pd
import assumptions
import utils
warnings.filterwarnings('ignore', category=pd.io.pytables.PerformanceWarning)
@orca.table('jobs', cache=True)
def jobs(store):
df = store['jobs']
df = utils.fill_nas_from_config('jobs', df)
return df
@orca.table('buildings', cache=True)
def buildings(store):
df = store['buildings']
df = df[df.building_type_id > 0]
df = df[df.building_type_id <= 14]
df = utils.fill_nas_from_config('buildings', df)
return df
@orca.table('households', cache=True)
def households(store):
df = store['households']
return df
@orca.table('parcels', cache=True)
def parcels(store):
df = store['parcels']
return df
# these are shapes - "zones" in the bay area
@orca.table('zones', cache=True)
def zones(store):
df = store['zones']
return df
# starts with the same underlying shapefile, but is used later in the simulation
@orca.table('zones_prices', cache=True)
def zones_prices(store):
df = store['zones']
return df
# this is the mapping of parcels to zoning attributes
@orca.table('zoning_for_parcels', cache=True)
def zoning_for_parcels(store):
df = store['zoning_for_parcels']
df = df.reset_index().drop_duplicates(subset='parcel').set_index('parcel')
return df
# this is the actual zoning
@orca.table('zoning', cache=True)
def zoning(store):
df = store['zoning']
return df
# zoning for use in the "baseline" scenario
# comes in the hdf5
@orca.table('zoning_baseline', cache=True)
def zoning_baseline(zoning, zoning_for_parcels):
df = pd.merge(zoning_for_parcels.to_frame(),
zoning.to_frame(),
left_on='zoning',
right_index=True)
return df
orca.broadcast('zones', 'homesales', cast_index=True, onto_on='zone_id')
orca.broadcast('zones', 'costar', cast_index=True, onto_on='zone_id')
orca.broadcast('zones', 'apartments', cast_index=True, onto_on='zone_id')
orca.broadcast('zones', 'buildings', cast_index=True, onto_on='zone_id')
orca.broadcast('zones_prices', 'buildings', cast_index=True, onto_on='zone_id')
orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id')
orca.broadcast('buildings', 'households', cast_index=True, onto_on='building_id')
orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id')