-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset_pca.py
39 lines (31 loc) · 1.13 KB
/
dataset_pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
"""
PCA with all components on published dataset
"""
import numpy as np
from sklearn.decomposition import PCA
pca = PCA(random_state=41)
# Full TXT embeddings
print("Full TXT embeddings")
full_txt = np.load("data/texts_embeddings.npy")
full_txt = pca.fit_transform(full_txt)
np.save("data/full_txt_embeddings_pca", full_txt)
# Multimodal TXT embeddings
print("Multimodal TXT embeddings")
noft_txt = np.load("data/multimodal_txt_noft.npy")
noft_txt = pca.fit_transform(noft_txt)
np.save("data/multimodal_txt_noft_pca", noft_txt)
# Multimodal TXT emneddings for 80% of IMG
print("Multimodal TXT emneddings for 80% of IMG")
ft_txt = np.load("data/multimodal_txt_ft.npy")
ft_txt = pca.fit_transform(ft_txt)
np.save("data/multimodal_txt_ft_pca", ft_txt)
# Full IMG embeddings
print("Full IMG embeddings")
noft_img = np.load("data/multimodal_img_noft.npy")
noft_img = pca.fit_transform(noft_img)
np.save("data/multimodal_img_noft_pca", noft_img)
# 80% IMG embeddings after finetuning
print("80% IMG embeddings after finetuning")
ft_img = np.load("data/multimodal_img_ft.npy")
ft_img = pca.fit_transform(ft_img)
np.save("data/multimodal_img_ft_pca", ft_img)