-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel_project.py
198 lines (169 loc) · 8.93 KB
/
model_project.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression
import numpy as np
from census import Census
from us import states
import pandas as pd
import requests
import zipfile
import os
RELATIVE_PATH = f'./data/'
def df_oep(fipscode):
dataframes = []
for year in [2015,2016,2017,2018,2019,2020,2021,2022,2023]:
try:
if year in [2015,2016,2017]:
# Obtén los datos de población por condado para el año actual
#print(year)
path = f'{RELATIVE_PATH}{year}_OEP_County-Level_Public_Use_File.zip'
file = f'{year}_OEP_County-Level_Public_Use_File.xlsx'
sheet = "(1) Consumer Type"
archive = zipfile.ZipFile(path, 'r')
xlfile = archive.open(file)
if year == 2016:
df2 = pd.read_excel(xlfile,sheet_name= sheet)[['County FIPS Code','State ',
'Total Number of Consumers Who Have Selected a Marketplace Plan']].rename(columns={'Total Number of Consumers Who Have Selected a Marketplace Plan': 'Cnsmr',
'State ': 'State'})
df2['yrs'] = f'yrs_{year}'
df2 = df2.iloc[:-1,:]
else:
df2 = pd.read_excel(xlfile,sheet_name= sheet)[['County FIPS Code','State',
'Total Number of Consumers Who Have Selected a Marketplace Plan']].rename(columns={'Total Number of Consumers Who Have Selected a Marketplace Plan': 'Cnsmr'})
df2['yrs'] = f'yrs_{year}'
df2 = df2.iloc[:-1,:]
if year in [2018]:
# Obtén los datos de población por condado para el año actual
#print(year)
path = f'{RELATIVE_PATH}OE{year}_County_PUF_20180404.zip'
file = f'OE{year}_County_PUF_20180404.xlsx'
sheet = "(1) Enrollment Status"
archive = zipfile.ZipFile(path, 'r')
xlfile = archive.open(file)
df2 = pd.read_excel(xlfile,sheet_name= sheet)[['County FIPS Code', 'State',
'Total Number of Consumers Who Have Selected an Exchange Plan']].rename(columns={'Total Number of Consumers Who Have Selected an Exchange Plan': 'Cnsmr'})
df2['yrs'] = f'yrs_{year}'
df2 = df2.iloc[:-1,:]
elif year in [2019,2022]:
#print(year)
path = f'{RELATIVE_PATH}{year} OEP County-Level Public Use File.zip'
file = f'{year} OEP County-Level Public Use File.xlsx'
sheet = "(1) Enrollment Status"
archive = zipfile.ZipFile(path, 'r')
xlfile = archive.open(file)
if year == 2019:
df2 = pd.read_excel(xlfile,sheet_name= sheet)[['County FIPS Code','State',
'Total Number of Consumers Who Have Selected an Exchange Plan']].rename(columns={'Total Number of Consumers Who Have Selected an Exchange Plan': 'Cnsmr'})
df2['yrs'] = f'yrs_{year}'
df2 = df2.iloc[:-1,:]
else:
df2 = pd.read_excel(xlfile,sheet_name= sheet)[['County FIPS Code','State',
'Number of Consumers with a Marketplace Plan Selection']].rename(columns={'Number of Consumers with a Marketplace Plan Selection': 'Cnsmr'})
df2['yrs'] = f'yrs_{year}'
df2 = df2.iloc[:-1,:]
elif year in [2020]:
#print(year)
path = f'{RELATIVE_PATH}{year} OEP County-Level Public Use File.zip'
file = f'{year} OEP County-Level Public Use File.csv'
sheet = "2020 OEP County-Level Public Us"
archive = zipfile.ZipFile(path, 'r')
xlfile = archive.open(file)
df2 = pd.read_csv(xlfile)[['Cnty_FIPS_Cd','State_Abrvtn','Cnsmr']].rename(columns={'Cnty_FIPS_Cd':'County FIPS Code','State_Abrvtn': 'State'})
df2['yrs'] = f'yrs_{year}'
df2 = df2.iloc[:-1,:]
elif year in [2023]:
#print(year)
path = f'{RELATIVE_PATH}{year} OEP County-Level Public Use File.zip'
file = f'{year} OEP County-Level Public Use File.xlsx'
sheet = "(1) by Enrollment Status"
archive = zipfile.ZipFile(path, 'r')
xlfile = archive.open(file)
df2 = pd.read_excel(xlfile,sheet_name= sheet)[['County FIPS Code', 'State',
'Number of Consumers with a Marketplace Plan Selection']].rename(columns={'Number of Consumers with a Marketplace Plan Selection': 'Cnsmr'})
df2['yrs'] = f'yrs_{year}'
df2 = df2.iloc[:-1,:]
elif year in [2021]:
#print(year)
path = f'{RELATIVE_PATH}{year} OEP County-Level Public Use File_0.zip'
file = f'{year} OEP County-Level Public Use File.csv'
archive = zipfile.ZipFile(path, 'r')
xlfile = archive.open(file)
df2 = pd.read_csv(xlfile)[['County_FIPS_Cd','State_Abrvtn','Cnsmr']].rename(columns={'County_FIPS_Cd':'County FIPS Code','State_Abrvtn': 'State'})
df2['yrs'] = f'yrs_{year}'
df2 = df2.iloc[:-1,:]
except Exception as e:
# Manejo de la excepción sin detener el ciclo
print("Ocurrió un error:", str(e))
df = pd.DataFrame(df2)
dataframes.append(df)
df_final = pd.concat(dataframes, axis=0)
df_final['yrs'] = df_final['yrs'].str[-4:]
df_final = df_final[df_final['County FIPS Code'] == fipscode]
return df_final
def population(fipscode):
df_pop1 = pd.read_csv(f'{RELATIVE_PATH}co-est2020__.csv',encoding='latin-1').iloc[:,[3,4,5,8,9,10,11,12,13,14,15,16,17,18,19]]
df_pop2 = pd.read_csv(f'{RELATIVE_PATH}co-est2023-alldata.csv',encoding='latin-1').iloc[:,[3,4,5,9,10,11]]
df_pop1['COUNTY'] = df_pop1['COUNTY'].apply(lambda x: f'00{x}' if 1 <= x <= 9
else f'0{x}' if 10 <= x <= 99
else x)
df_pop1['STATE'] = df_pop1['STATE'].apply(lambda x: f'0{x}' if 1 <= x <= 9
else x)
df_pop1['County FIPS Code'] = df_pop1['STATE'].astype(str) +''+df_pop1['COUNTY'].astype(str)
df_pop1 = pd.melt(
df_pop1,
id_vars=["County FIPS Code", "STNAME"],
value_vars=['POPESTIMATE2015','POPESTIMATE2016','POPESTIMATE2017','POPESTIMATE2018','POPESTIMATE2019',
'POPESTIMATE042020'],
var_name="yrs",
value_name="Population",
)
df_pop1['yrs'] = df_pop1['yrs'].str[-4:]
df_pop2['COUNTY'] = df_pop2['COUNTY'].apply(lambda x: f'00{x}' if 1 <= x <= 9
else f'0{x}' if 10 <= x <= 99
else x)
df_pop2['STATE'] = df_pop2['STATE'].apply(lambda x: f'0{x}' if 1 <= x <= 9
else x)
df_pop2['County FIPS Code'] = df_pop2['STATE'].astype(str) +''+df_pop2['COUNTY'].astype(str)
df_pop2 = pd.melt(
df_pop2,
id_vars=["County FIPS Code", "STNAME"],
value_vars=['POPESTIMATE2021','POPESTIMATE2022','POPESTIMATE2023'],
var_name="yrs",
value_name="Population",
)
df_pop2['yrs'] = df_pop2['yrs'].str[-4:]
df_pop = pd.concat([df_pop1,df_pop2], axis=0)
df_maximos = df_pop.groupby(['STNAME','yrs']).agg({'Population':'max'}).reset_index()
df_pop = df_pop.merge(df_maximos, on=['STNAME','yrs','Population'], how='left', indicator=True)
df_pop = df_pop.query('_merge != "both"')[['County FIPS Code','STNAME','yrs','Population']]
df_pop = df_pop[df_pop['County FIPS Code'] == fipscode]
return df_pop
### this function build dataframe use on the model
def build_df(fipscode):
x = population(fipscode)[['yrs','Population']]
y = df_oep(fipscode)[['yrs','Cnsmr']]
y['Cnsmr'] = pd.to_numeric(y['Cnsmr'].astype(str).str.replace(',', ''))
df_model = pd.merge(x, y, on='yrs')
return df_model
def build_onnx_model(fipscode, forecast, county):
df_reg = build_df(fipscode)
## Ajustar el modelo a los datos del dataframe df_reg
model = LinearRegression()
x = np.array(df_reg['Population']).reshape(-1, 1)
y = df_reg['Cnsmr']
model.fit(x, y)
## Dato para realizar el pronóstico
x_proy = np.array([forecast]).reshape(-1, 1)
model.predict(X = x_proy)
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
# Define the initial types for the ONNX model
initial_type = [('float_input', FloatTensorType([None, x.shape[1]]))]
# Convert the scikit-learn model to ONNX
onnx_model = convert_sklearn(model, initial_types=initial_type)
# Save the ONNX model to a file
with open(os.path.join('onnx', f'{county}_county.onnx'), "wb") as f:
f.write(onnx_model.SerializeToString())
build_onnx_model('12007', 30010, 'bradford')
build_onnx_model('12047', 12910, 'hamilton')
build_onnx_model('12075', 48370, 'levy')
build_onnx_model('12131', 89541, 'walton')