-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEx2.py
202 lines (178 loc) · 8.38 KB
/
Ex2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits,load_iris
from sklearn.datasets import fetch_openml
from utils import *
from collections import defaultdict
import time
from concurrent.futures import ThreadPoolExecutor
import functools
def main():
def process_algo(params, names, perc):
print("--------- ",names)
corr, rmse = Algo(params[0], **params[1])
np.savetxt(f'{names}_{perc}_corr.txt', corr, fmt='%d')
#np.savetxt(f'{names}_{perc}_rmse.txt', rmse, fmt='%d')
# f = open(f'{names}_{perc}_corr.txt','w')
# f.write(corr)
# f.close()
f = open(f'{names}_{perc}_rmse.txt','w')
f.write(rmse)
f.close()
return [rmse,corr]
def Algo(func,**kwargs):
impute = func(**kwargs)
if impute.shape == kwargs['df'].shape:
# corr = impute.corr()
try:
corr = np.corrcoef(impute.drop('label',axis=1).T)
except:
corr = np.corrcoef(impute.T)
else:
corr = impute # in case dpers
# rmse_cu = covariance_rmse(corr_gt, corr)
# rmse_moi = rmse_ignore_nan(corr_gt.to_numpy(), corr.to_numpy())
rmse_moi = rmse_ignore_nan(corr_gt, corr)
# return corr,rmse_cu,rmse_moi
return corr,rmse_moi
# percs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
percs = [0.4]
# exps = ['Iris', 'Digits','MNIST']
# Types = ['randomly_tabular','randomly_img','monotone_img']
exps = ['MNIST']
Types = ['monotone_img']
start = time.time()
for exp,tpe in zip(exps,Types):
pers_verfied = []
rmse_vis = defaultdict(list)
for perc in tqdm(percs):
# if perc in [0.1, 0.2, 0.3] and tpe == 'monotone_img':
# continue
# if perc == 0.6 and tpe in ['randomly_tabular','randomly_img']:
# continue
if exp == 'Digits':
# ---------------- digits----------------------
digits = load_digits()
df = pd.DataFrame(digits.data)
df['target'] = digits.target
new_df = missing(df, Type=tpe, perc=perc)
new_df['target'] = digits.target
new_df = apply_normalize(new_df)
corr_gt = df.corr()
# print(sum(new_df.isna().sum()))
# ---------------- digits----------------------
elif exp == 'Iris':
# ---------------- iris ----------------------
data = load_iris()
df = pd.DataFrame(data= np.c_[data['data'], data['target']],
columns= data['feature_names'] + ['target'])
new_df = missing(df,Type=tpe,perc=perc)
n_sample = df.shape[0] * perc
new_df = apply_normalize(new_df)
corr_gt = df.corr()
# ---------------- MNIST ----------------------
elif exp == 'MNIST':
mnist = fetch_openml('mnist_784')
# Split the data into features (X) and labels (y)
X = mnist.data
y = mnist.target
df = pd.DataFrame(X)
new_df = missing(df,Type=tpe,perc=perc,perc_del=0.5, im_width=28,im_height=28)
new_df = apply_normalize(new_df)
df['label'] = y
# new_df['label'] = y
corr_gt = np.corrcoef(df.drop('label',axis=1).T)
# ---------------- MNIST ----------------------
all = defaultdict(list)
Algo_impute = {
'MICE': [impute_with_mice, dict(df=new_df, n_iterations = 1)],
'MissForest': [impute_with_missforest, dict(df=new_df, n_estimators = 1)],
'ALS': [soft_impute_als_df, dict(df=new_df)],
'PCA': [impute_with_pca, dict(df=new_df)],
'KNN': [impute_with_knn, dict(df=new_df)],
'EM' : [impute_with_EM, dict(df=new_df)],
'MeanImpute' : [impute_with_meanImpute, dict(df=new_df)],
'Dpers': [corr_with_dpers, dict(df=new_df)],
'GAIN': [gain_impute, dict(df=new_df, hint_probability=0.9, batch_size=64, epochs=1)],
'GINN': [ginn_imputate, dict(df=apply_normalize(df), perc=perc,Type=tpe, epochs=1)] # GINN have randomly in his mask
}
# for names, params in tqdm(Algo_impute.items()):
# print("--------- ",names)
# # corr, rmse1,rmse2 = Algo(params[0], **params[1])
# # all[names].append([[rmse1,rmse2],corr])
# corr, rmse = Algo(params[0], **params[1])
# np.savetxt(f'{names}_{perc}_corr.txt', corr, fmt='%d')
# np.savetxt(f'{names}_{perc}_rmse.txt', rmse, fmt='%d')
# all[names].append([rmse,corr])
with ThreadPoolExecutor(max_workers=4) as executor:
futures = []
for names, params in tqdm(Algo_impute.items()):
future = executor.submit(functools.partial(process_algo, params, names, perc))
futures.append(future)
results = []
for future in tqdm(futures):
result = future.result()
results.append(result)
all[names].append(result)
pers_verfied.append(perc)
# Create a figure with two subplots
fig, axs = plt.subplots(ncols=5,nrows=2, figsize=(22, 8))
# Plot each correlation matrix on the appropriate subplot
for i, each in enumerate(all):
matrix = all[each][0][1]
# rmse_vis1[each].append(all[each][0][0][0])
# rmse_vis2[each].append(all[each][0][0][1])
rmse_vis[each].append(all[each][0][0])
row = i // 5
col = i % 5
if col == 4:
im = sns.heatmap(matrix, cmap='coolwarm', cbar=False, ax=axs[row, col], vmin=-1, vmax=1)
else:
im = sns.heatmap(matrix, cmap='coolwarm', cbar=False, ax=axs[row, col], vmin=-1, vmax=1)
axs[row, col].set_title(f'{each}')
cbar_ax = fig.add_axes([0.92, 0.1, 0.02, 0.8])
fig.colorbar(im.get_children()[0], cax=cbar_ax)
# Set the label for the colorbar
axs[0, 0].set_ylabel('Correlation', fontsize=14)
# Add a title for the entire figure
fig.suptitle('Correlation Matrices', fontsize=16)
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.1, hspace=0.1)
# Hide the ticks and labels on all subplots
for ax in axs.flat:
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
# Show the figure
plt.savefig(f'experiments/{exp}_final_{perc}.png',dpi=600)
# plt.show()
print(rmse_vis)
with open('rmse_vis.txt', 'w') as f:
for key, value in rmse_vis.items():
f.write(f"{key}: {value}\n")
# print(rmse_vis2)
fig, ax = plt.subplots(figsize=(22, 15))
# Loop through each key and value in the dictionary
for key, value in rmse_vis.items():
# Plot the line chart
ax.plot(pers_verfied, value, label=key)
# for key, value in rmse_vis2.items():
# # Plot the line chart
# ax.plot(percs, value, label=key+"Moi")
# Add x-axis and y-axis labels
ax.set_xlabel('Perc')
ax.set_ylabel('Values')
# Add a title
ax.set_title(f'{exp} RMSE Line Chart')
# Add a legend
ax.legend()
# Show the plot
plt.savefig(f'experiments/rmse_{exp}.png')
# plt.show()
# plt.plot(rmse_vis)
print('Finished Time : ',time.time()-start)
if __name__ == "__main__":
main()