Previsão de classes de Ransoware
Nome da Familia ID
-----------------------------------------
Goodware 0
Critroni 1
CryptLocker 2
CryptoWall 3
KOLLAH 4
Kovter 5
Locker 6
MATSNU 7
PGPCODER 8
Reveton 9
TeslaCrypt 10
Trojan-Ransom 11
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
from keras.models import Sequential
from sklearn import preprocessing
from keras.layers import Dense
from sklearn import metrics
import pandasql as ps
from utilsIA import *
import pandas as pd
import numpy as np
import itertools
Leitura do dataset
path = 'Ransoware.xlsx'
dataset = pd.read_excel(path)
dataset
Ransomware
Classe
4
6
8
9
10
11
12
14
...
29079
29218
29287
29758
29769
29770
29796
29903
30200
30285
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
1
0
0
0
1
1
1
0
0
1
0
0
0
0
0
0
0
...
0
0
0
0
1
0
0
0
0
0
2
1
4
0
0
0
0
0
0
0
0
...
0
0
0
0
1
0
0
0
0
0
3
0
0
1
1
1
1
1
0
1
0
...
0
0
0
0
0
0
0
0
0
0
4
0
0
0
1
0
1
1
0
0
0
...
0
0
0
0
0
0
0
0
0
0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1519
1
4
0
1
0
0
0
0
0
0
...
0
0
0
0
1
0
0
0
1
1
1520
0
0
1
1
1
0
1
0
1
1
...
0
0
0
0
0
0
0
0
0
0
1521
0
0
1
1
1
0
0
0
0
0
...
0
0
0
0
1
0
1
0
0
0
1522
1
4
0
0
0
0
0
0
0
0
...
1
0
0
0
1
0
0
0
0
0
1523
0
0
1
1
1
0
1
0
1
0
...
0
0
0
0
0
0
0
0
0
0
1524 rows × 931 columns
path = 'Ransoware.xlsx'
dataset = pd.read_excel(path)
dataset
Ransomware | Classe | 4 | 6 | 8 | 9 | 10 | 11 | 12 | 14 | ... | 29079 | 29218 | 29287 | 29758 | 29769 | 29770 | 29796 | 29903 | 30200 | 30285 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 |
1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
2 | 1 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
3 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1519 | 1 | 4 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 |
1520 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1521 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 |
1522 | 1 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
1523 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1524 rows × 931 columns
train_row = 900
columns = 929
column_init = 2
column_label = 1
train_label = dataset.iloc[:train_row, column_label]
train_data = dataset.iloc[:train_row, column_init:columns+column_init]
test_label = dataset.iloc[train_row:, column_label].reset_index(drop=True)
test_data = dataset.iloc[train_row:, column_init:columns + column_init].reset_index(drop=True)
n_comp = 165
pca = PCA(n_components=n_comp)
X_train_pca = pca.fit_transform(train_data.values)
X_test_pca = pca.transform(test_data.values)
print(f'Representação: {pca.explained_variance_ratio_.sum()}')
Representação: 0.9715453985161124
X_train_pca = pd.DataFrame(X_train_pca)
X_test_pca = pd.DataFrame(X_test_pca)
df = pd.concat([X_train_pca, X_test_pca]).reset_index()
df['Classe'] = dataset['Classe']
df
index | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | ... | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | Classe | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | -4.821125 | -0.487440 | -2.201017 | 1.152447 | -0.033623 | 1.352974 | -0.578600 | 0.059564 | 0.038959 | ... | -0.053499 | 0.311921 | -0.050478 | -0.117299 | 0.066474 | -0.203876 | 0.246825 | -0.010021 | -0.282411 | 0 |
1 | 1 | -3.116892 | -0.320086 | -0.468679 | 0.077390 | -0.733340 | -0.059672 | 0.296803 | -0.416835 | -0.452211 | ... | -0.268512 | 0.186507 | 0.044608 | 0.237524 | 0.141487 | -0.163366 | -0.133308 | -0.139559 | 0.031824 | 0 |
2 | 2 | -4.817464 | -0.726339 | -2.223047 | 0.763878 | 0.408207 | 0.848962 | -0.412907 | 0.157133 | 0.103711 | ... | 0.115069 | -0.030130 | 0.102367 | 0.148219 | -0.342858 | 0.129300 | -0.067159 | -0.014187 | 0.124248 | 4 |
3 | 3 | 5.331648 | -2.676628 | 1.265138 | 0.893076 | 1.153713 | -0.109961 | 0.198922 | -0.561980 | -0.057770 | ... | 0.036511 | -0.175244 | -0.323986 | -0.024850 | 0.179646 | -0.095332 | -0.081275 | -0.192800 | 0.104840 | 0 |
4 | 4 | 4.054361 | 1.525786 | -0.874177 | -0.684646 | -0.014614 | -0.708375 | 0.977450 | 0.126336 | -0.524389 | ... | -0.058353 | -0.232046 | -0.426571 | 0.095166 | -0.060333 | -0.154573 | -0.096402 | -0.048692 | 0.468370 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1519 | 619 | -3.667744 | -0.287221 | -0.377622 | -0.285508 | -0.173065 | -0.097861 | -0.031989 | -0.025919 | 0.053390 | ... | -0.033192 | 0.723736 | -0.231849 | -0.136856 | -0.090843 | 0.277584 | -0.120122 | -0.174936 | -0.394245 | 4 |
1520 | 620 | 1.207262 | 4.874456 | 3.782525 | 1.782034 | 0.379592 | -0.096894 | -0.562963 | 1.003049 | -3.265090 | ... | -0.010782 | -0.362701 | -0.555329 | 0.299275 | 0.414574 | 0.270008 | -0.037364 | -0.070711 | 0.199022 | 0 |
1521 | 621 | 3.961603 | 1.436517 | -1.136199 | -0.921292 | -0.807843 | 0.733954 | 1.622033 | -2.877845 | 0.001690 | ... | -0.072145 | -0.226455 | 0.478052 | 0.026402 | 0.172154 | 0.158334 | -0.144294 | -0.308613 | -0.207756 | 0 |
1522 | 622 | -4.470373 | -0.806404 | -1.547585 | 0.073598 | 0.697804 | 0.348658 | -0.494289 | 0.044260 | 0.074465 | ... | -0.149596 | -0.241636 | 0.059691 | 0.157608 | 0.282329 | 0.006757 | -0.122828 | -0.280303 | -0.022284 | 4 |
1523 | 623 | 5.259862 | -3.403548 | 0.995541 | 1.227109 | 0.961682 | 0.304587 | -0.072982 | -0.855472 | 0.098563 | ... | 0.031373 | 0.019168 | -0.094154 | 0.007101 | -0.000693 | 0.014536 | -0.059940 | -0.031743 | 0.070975 | 0 |
1524 rows × 167 columns
df.drop(df.loc[df['Classe']==8].index, inplace=True)
df.drop(df.loc[df['Classe']==10].index, inplace=True)
df.drop(df.loc[df['Classe']==4].index, inplace=True)
df.drop('index', 1, inplace=True)
df
C:\Users\Edno\AppData\Local\Temp/ipykernel_5244/4213288135.py:4: FutureWarning: In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only
df.drop('index', 1, inplace=True)
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | Classe | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -4.821125 | -0.487440 | -2.201017 | 1.152447 | -0.033623 | 1.352974 | -0.578600 | 0.059564 | 0.038959 | 0.154728 | ... | -0.053499 | 0.311921 | -0.050478 | -0.117299 | 0.066474 | -0.203876 | 0.246825 | -0.010021 | -0.282411 | 0 |
1 | -3.116892 | -0.320086 | -0.468679 | 0.077390 | -0.733340 | -0.059672 | 0.296803 | -0.416835 | -0.452211 | 0.099741 | ... | -0.268512 | 0.186507 | 0.044608 | 0.237524 | 0.141487 | -0.163366 | -0.133308 | -0.139559 | 0.031824 | 0 |
3 | 5.331648 | -2.676628 | 1.265138 | 0.893076 | 1.153713 | -0.109961 | 0.198922 | -0.561980 | -0.057770 | 1.013530 | ... | 0.036511 | -0.175244 | -0.323986 | -0.024850 | 0.179646 | -0.095332 | -0.081275 | -0.192800 | 0.104840 | 0 |
4 | 4.054361 | 1.525786 | -0.874177 | -0.684646 | -0.014614 | -0.708375 | 0.977450 | 0.126336 | -0.524389 | -0.025725 | ... | -0.058353 | -0.232046 | -0.426571 | 0.095166 | -0.060333 | -0.154573 | -0.096402 | -0.048692 | 0.468370 | 0 |
5 | 1.761522 | 1.777536 | 1.389631 | -0.734398 | -1.769632 | -1.707925 | 0.010637 | 1.006003 | -1.032105 | 0.677926 | ... | -0.066147 | -0.053367 | -0.184165 | -0.156341 | 0.059520 | -0.043059 | -0.013471 | -0.092569 | 0.110692 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1516 | 5.471322 | 1.039605 | -4.132083 | 0.309695 | -2.036437 | 0.087284 | 0.457468 | -0.841350 | -0.999483 | -1.548910 | ... | 0.175220 | -0.015662 | -0.196201 | -0.293100 | -0.004059 | -0.188156 | 0.085008 | 0.043024 | 0.112271 | 0 |
1517 | 5.177134 | -3.400939 | 1.034620 | 1.240165 | 0.937027 | 0.308304 | -0.070532 | -0.924902 | 0.166870 | 1.175099 | ... | 0.038146 | -0.034900 | -0.006361 | -0.103692 | -0.046950 | 0.016334 | -0.074340 | -0.039927 | -0.009890 | 0 |
1520 | 1.207262 | 4.874456 | 3.782525 | 1.782034 | 0.379592 | -0.096894 | -0.562963 | 1.003049 | -3.265090 | 4.992138 | ... | -0.010782 | -0.362701 | -0.555329 | 0.299275 | 0.414574 | 0.270008 | -0.037364 | -0.070711 | 0.199022 | 0 |
1521 | 3.961603 | 1.436517 | -1.136199 | -0.921292 | -0.807843 | 0.733954 | 1.622033 | -2.877845 | 0.001690 | 2.131293 | ... | -0.072145 | -0.226455 | 0.478052 | 0.026402 | 0.172154 | 0.158334 | -0.144294 | -0.308613 | -0.207756 | 0 |
1523 | 5.259862 | -3.403548 | 0.995541 | 1.227109 | 0.961682 | 0.304587 | -0.072982 | -0.855472 | 0.098563 | 1.115860 | ... | 0.031373 | 0.019168 | -0.094154 | 0.007101 | -0.000693 | 0.014536 | -0.059940 | -0.031743 | 0.070975 | 0 |
1489 rows × 166 columns
q1 = """
SELECT distinct count(*) as Quantidade, Classe
FROM df
group by Classe
order by Quantidade desc"""
ps.sqldf(q1)
Quantidade | Classe | |
---|---|---|
0 | 942 | 0 |
1 | 107 | 2 |
2 | 97 | 6 |
3 | 90 | 9 |
4 | 64 | 5 |
5 | 59 | 7 |
6 | 50 | 1 |
7 | 46 | 3 |
8 | 34 | 11 |
Redução da quantidade de registros da categoria '0' para redução da chance de
enviesamento do modelo para esta classe
df_balanced = ProcessingData.balancedData(df, 'Classe', 107)
#df_balanced = df
df_balanced = ProcessingData.balancedData(df, 'Classe', 107)
#df_balanced = df
ProcessingData.showLabelsQtd(df_balanced, 'Classe')
Quantidade de itens nas 9 categorias:
0 107
2 107
6 97
9 90
5 64
7 59
1 50
3 46
11 34
Name: Classe, dtype: int64
#Colocando a coluna 'Classe' na primeira posição
col = ['Classe'] + [c for c in range(n_comp)]
df_balanced = df_balanced[col]
df_balanced.head()
Classe | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | ... | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1112 | 0 | 5.050851 | -3.437326 | 0.880903 | 1.284616 | 1.039539 | 0.332499 | -0.112654 | -0.929544 | 0.185957 | ... | -0.003749 | 0.007185 | -0.091705 | 0.030639 | -0.184883 | -0.014107 | 0.023129 | -0.065869 | -0.118227 | 0.052917 |
904 | 0 | -4.744892 | -1.090630 | -1.894263 | 0.623459 | 0.556025 | 0.847031 | -0.458654 | 0.282898 | 0.140678 | ... | -0.006809 | 0.061832 | 0.089955 | -0.009707 | 0.050636 | -0.008242 | 0.017653 | 0.002273 | 0.099682 | -0.081503 |
1018 | 0 | -0.894416 | 1.380315 | 0.817547 | 0.487481 | -3.652643 | -0.308384 | 0.959425 | 0.111086 | -0.468956 | ... | 0.020760 | -0.028250 | 0.168435 | -0.041256 | -0.425442 | 0.265058 | 0.060733 | 0.246020 | 0.026194 | -0.033660 |
294 | 0 | 5.585781 | -3.418598 | 1.011767 | 1.184437 | 1.192173 | 0.112080 | -0.042108 | -0.783191 | -0.261409 | ... | 0.015675 | -0.016180 | -0.023752 | 0.092484 | -0.102187 | -0.064161 | 0.023080 | 0.000156 | -0.029936 | -0.090316 |
1355 | 0 | 4.773488 | 0.741391 | -3.426230 | 0.476568 | -1.845094 | -0.454420 | 1.943682 | -1.763280 | -0.361983 | ... | -0.238522 | -0.164905 | 0.178833 | 0.062664 | 0.089606 | -0.098259 | 0.022611 | 0.108016 | -0.008297 | 0.028628 |
5 rows × 166 columns
Aplicação do One Hot Encode
n_classes = len(df_balanced['Classe'].astype(int).value_counts())
datas = df_balanced.iloc[:, 1:].values
enc = preprocessing.OneHotEncoder()
labels = df_balanced.Classe.to_list()
labels = [[la] for la in labels]
enc.fit(labels)
labels_hot = enc.transform(labels).toarray()
n_classes = len(df_balanced['Classe'].astype(int).value_counts())
datas = df_balanced.iloc[:, 1:].values
enc = preprocessing.OneHotEncoder()
labels = df_balanced.Classe.to_list()
labels = [[la] for la in labels]
enc.fit(labels)
labels_hot = enc.transform(labels).toarray()
Separando os dados para teste e treino
X_train, X_test, y_train, y_test = train_test_split(datas, labels_hot, test_size=.3, random_state=0)#, stratify=labels)
y_train = np.array(y_train)
y_test = np.array(y_test)
X_train, X_test, y_train, y_test = train_test_split(datas, labels_hot, test_size=.3, random_state=0)#, stratify=labels)
y_train = np.array(y_train)
y_test = np.array(y_test)
Modelo da rede neural utilizada
model = Sequential()
model.add(Dense(380, activation='relu', input_dim=n_comp))
model.add(Dense(160, activation='relu'))
model.add(Dense(90, activation='relu'))
model.add(Dense(45, activation='relu'))
model.add(Dense(n_classes, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics = ['accuracy'])
callbacks = [EarlyStopping(monitor='val_accuracy', patience=25),
ModelCheckpoint(filepath='best_model.h5', monitor='val_accuracy', save_best_only=True)]
history = model.fit(X_train, y_train, epochs=200, callbacks=callbacks, validation_data=(X_test, y_test), verbose=1)
model.load_weights('best_model.h5')
Epoch 1/200
15/15 [==============================] - 1s 23ms/step - loss: 1.9844 - accuracy: 0.2910 - val_loss: 1.7298 - val_accuracy: 0.4162
Epoch 2/200
15/15 [==============================] - 0s 9ms/step - loss: 1.4077 - accuracy: 0.6105 - val_loss: 1.2256 - val_accuracy: 0.5888
Epoch 3/200
15/15 [==============================] - 0s 10ms/step - loss: 0.9652 - accuracy: 0.7068 - val_loss: 1.0074 - val_accuracy: 0.6853
Epoch 4/200
15/15 [==============================] - 0s 10ms/step - loss: 0.7094 - accuracy: 0.7746 - val_loss: 0.8882 - val_accuracy: 0.7157
Epoch 5/200
15/15 [==============================] - 0s 9ms/step - loss: 0.5448 - accuracy: 0.8184 - val_loss: 0.7992 - val_accuracy: 0.7665
Epoch 6/200
15/15 [==============================] - 0s 9ms/step - loss: 0.3985 - accuracy: 0.8884 - val_loss: 0.7419 - val_accuracy: 0.7716
Epoch 7/200
15/15 [==============================] - 0s 5ms/step - loss: 0.3295 - accuracy: 0.9015 - val_loss: 0.7838 - val_accuracy: 0.7513
Epoch 8/200
15/15 [==============================] - 0s 9ms/step - loss: 0.2932 - accuracy: 0.9103 - val_loss: 0.7610 - val_accuracy: 0.8122
Epoch 9/200
15/15 [==============================] - 0s 5ms/step - loss: 0.2505 - accuracy: 0.9212 - val_loss: 0.7309 - val_accuracy: 0.7766
Epoch 10/200
15/15 [==============================] - 0s 9ms/step - loss: 0.2223 - accuracy: 0.9344 - val_loss: 0.7284 - val_accuracy: 0.8173
Epoch 11/200
15/15 [==============================] - 0s 9ms/step - loss: 0.1786 - accuracy: 0.9475 - val_loss: 0.7725 - val_accuracy: 0.8223
Epoch 12/200
15/15 [==============================] - 0s 6ms/step - loss: 0.1751 - accuracy: 0.9453 - val_loss: 0.7342 - val_accuracy: 0.8020
Epoch 13/200
15/15 [==============================] - 0s 9ms/step - loss: 0.1385 - accuracy: 0.9562 - val_loss: 0.7827 - val_accuracy: 0.8325
Epoch 14/200
15/15 [==============================] - 0s 5ms/step - loss: 0.1194 - accuracy: 0.9606 - val_loss: 0.7634 - val_accuracy: 0.8274
Epoch 15/200
15/15 [==============================] - 0s 5ms/step - loss: 0.1201 - accuracy: 0.9628 - val_loss: 0.7973 - val_accuracy: 0.8173
Epoch 16/200
15/15 [==============================] - 0s 5ms/step - loss: 0.1045 - accuracy: 0.9606 - val_loss: 0.8194 - val_accuracy: 0.8223
Epoch 17/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0962 - accuracy: 0.9781 - val_loss: 0.8739 - val_accuracy: 0.8223
Epoch 18/200
15/15 [==============================] - 0s 5ms/step - loss: 0.1099 - accuracy: 0.9628 - val_loss: 0.8371 - val_accuracy: 0.8325
Epoch 19/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0886 - accuracy: 0.9759 - val_loss: 0.8321 - val_accuracy: 0.8223
Epoch 20/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0760 - accuracy: 0.9759 - val_loss: 0.8440 - val_accuracy: 0.8325
Epoch 21/200
15/15 [==============================] - 0s 9ms/step - loss: 0.0788 - accuracy: 0.9803 - val_loss: 0.8539 - val_accuracy: 0.8376
Epoch 22/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0632 - accuracy: 0.9825 - val_loss: 0.8903 - val_accuracy: 0.8173
Epoch 23/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0817 - accuracy: 0.9716 - val_loss: 0.8795 - val_accuracy: 0.8274
Epoch 24/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0654 - accuracy: 0.9825 - val_loss: 0.9905 - val_accuracy: 0.8274
Epoch 25/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0876 - accuracy: 0.9716 - val_loss: 0.8827 - val_accuracy: 0.8325
Epoch 26/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0677 - accuracy: 0.9825 - val_loss: 0.9454 - val_accuracy: 0.8173
Epoch 27/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0775 - accuracy: 0.9759 - val_loss: 0.9085 - val_accuracy: 0.8274
Epoch 28/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0712 - accuracy: 0.9781 - val_loss: 0.9352 - val_accuracy: 0.8325
Epoch 29/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0671 - accuracy: 0.9825 - val_loss: 0.9524 - val_accuracy: 0.8122
Epoch 30/200
15/15 [==============================] - 0s 6ms/step - loss: 0.0605 - accuracy: 0.9825 - val_loss: 0.9559 - val_accuracy: 0.8223
Epoch 31/200
15/15 [==============================] - 0s 7ms/step - loss: 0.0775 - accuracy: 0.9781 - val_loss: 0.9911 - val_accuracy: 0.8274
Epoch 32/200
15/15 [==============================] - 0s 6ms/step - loss: 0.0886 - accuracy: 0.9716 - val_loss: 0.9290 - val_accuracy: 0.8325
Epoch 33/200
15/15 [==============================] - 0s 7ms/step - loss: 0.0729 - accuracy: 0.9803 - val_loss: 0.9835 - val_accuracy: 0.8274
Epoch 34/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0556 - accuracy: 0.9847 - val_loss: 0.9801 - val_accuracy: 0.8325
Epoch 35/200
15/15 [==============================] - 0s 6ms/step - loss: 0.0586 - accuracy: 0.9825 - val_loss: 0.9715 - val_accuracy: 0.8173
Epoch 36/200
15/15 [==============================] - 0s 6ms/step - loss: 0.0566 - accuracy: 0.9803 - val_loss: 0.9987 - val_accuracy: 0.8325
Epoch 37/200
15/15 [==============================] - 0s 7ms/step - loss: 0.0683 - accuracy: 0.9781 - val_loss: 0.9747 - val_accuracy: 0.8173
Epoch 38/200
15/15 [==============================] - 0s 8ms/step - loss: 0.0949 - accuracy: 0.9716 - val_loss: 1.0825 - val_accuracy: 0.8274
Epoch 39/200
15/15 [==============================] - 0s 7ms/step - loss: 0.0746 - accuracy: 0.9672 - val_loss: 0.9609 - val_accuracy: 0.8325
Epoch 40/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0505 - accuracy: 0.9847 - val_loss: 1.0257 - val_accuracy: 0.8223
Epoch 41/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0544 - accuracy: 0.9847 - val_loss: 0.9983 - val_accuracy: 0.8376
Epoch 42/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0465 - accuracy: 0.9847 - val_loss: 1.0335 - val_accuracy: 0.8223
Epoch 43/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0485 - accuracy: 0.9847 - val_loss: 1.0488 - val_accuracy: 0.8223
Epoch 44/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0454 - accuracy: 0.9847 - val_loss: 1.0534 - val_accuracy: 0.8376
Epoch 45/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0509 - accuracy: 0.9803 - val_loss: 1.0727 - val_accuracy: 0.8274
Epoch 46/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0494 - accuracy: 0.9847 - val_loss: 1.0572 - val_accuracy: 0.8274
model = Sequential()
model.add(Dense(380, activation='relu', input_dim=n_comp))
model.add(Dense(160, activation='relu'))
model.add(Dense(90, activation='relu'))
model.add(Dense(45, activation='relu'))
model.add(Dense(n_classes, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics = ['accuracy'])
callbacks = [EarlyStopping(monitor='val_accuracy', patience=25),
ModelCheckpoint(filepath='best_model.h5', monitor='val_accuracy', save_best_only=True)]
history = model.fit(X_train, y_train, epochs=200, callbacks=callbacks, validation_data=(X_test, y_test), verbose=1)
model.load_weights('best_model.h5')
Epoch 1/200
15/15 [==============================] - 1s 23ms/step - loss: 1.9844 - accuracy: 0.2910 - val_loss: 1.7298 - val_accuracy: 0.4162
Epoch 2/200
15/15 [==============================] - 0s 9ms/step - loss: 1.4077 - accuracy: 0.6105 - val_loss: 1.2256 - val_accuracy: 0.5888
Epoch 3/200
15/15 [==============================] - 0s 10ms/step - loss: 0.9652 - accuracy: 0.7068 - val_loss: 1.0074 - val_accuracy: 0.6853
Epoch 4/200
15/15 [==============================] - 0s 10ms/step - loss: 0.7094 - accuracy: 0.7746 - val_loss: 0.8882 - val_accuracy: 0.7157
Epoch 5/200
15/15 [==============================] - 0s 9ms/step - loss: 0.5448 - accuracy: 0.8184 - val_loss: 0.7992 - val_accuracy: 0.7665
Epoch 6/200
15/15 [==============================] - 0s 9ms/step - loss: 0.3985 - accuracy: 0.8884 - val_loss: 0.7419 - val_accuracy: 0.7716
Epoch 7/200
15/15 [==============================] - 0s 5ms/step - loss: 0.3295 - accuracy: 0.9015 - val_loss: 0.7838 - val_accuracy: 0.7513
Epoch 8/200
15/15 [==============================] - 0s 9ms/step - loss: 0.2932 - accuracy: 0.9103 - val_loss: 0.7610 - val_accuracy: 0.8122
Epoch 9/200
15/15 [==============================] - 0s 5ms/step - loss: 0.2505 - accuracy: 0.9212 - val_loss: 0.7309 - val_accuracy: 0.7766
Epoch 10/200
15/15 [==============================] - 0s 9ms/step - loss: 0.2223 - accuracy: 0.9344 - val_loss: 0.7284 - val_accuracy: 0.8173
Epoch 11/200
15/15 [==============================] - 0s 9ms/step - loss: 0.1786 - accuracy: 0.9475 - val_loss: 0.7725 - val_accuracy: 0.8223
Epoch 12/200
15/15 [==============================] - 0s 6ms/step - loss: 0.1751 - accuracy: 0.9453 - val_loss: 0.7342 - val_accuracy: 0.8020
Epoch 13/200
15/15 [==============================] - 0s 9ms/step - loss: 0.1385 - accuracy: 0.9562 - val_loss: 0.7827 - val_accuracy: 0.8325
Epoch 14/200
15/15 [==============================] - 0s 5ms/step - loss: 0.1194 - accuracy: 0.9606 - val_loss: 0.7634 - val_accuracy: 0.8274
Epoch 15/200
15/15 [==============================] - 0s 5ms/step - loss: 0.1201 - accuracy: 0.9628 - val_loss: 0.7973 - val_accuracy: 0.8173
Epoch 16/200
15/15 [==============================] - 0s 5ms/step - loss: 0.1045 - accuracy: 0.9606 - val_loss: 0.8194 - val_accuracy: 0.8223
Epoch 17/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0962 - accuracy: 0.9781 - val_loss: 0.8739 - val_accuracy: 0.8223
Epoch 18/200
15/15 [==============================] - 0s 5ms/step - loss: 0.1099 - accuracy: 0.9628 - val_loss: 0.8371 - val_accuracy: 0.8325
Epoch 19/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0886 - accuracy: 0.9759 - val_loss: 0.8321 - val_accuracy: 0.8223
Epoch 20/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0760 - accuracy: 0.9759 - val_loss: 0.8440 - val_accuracy: 0.8325
Epoch 21/200
15/15 [==============================] - 0s 9ms/step - loss: 0.0788 - accuracy: 0.9803 - val_loss: 0.8539 - val_accuracy: 0.8376
Epoch 22/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0632 - accuracy: 0.9825 - val_loss: 0.8903 - val_accuracy: 0.8173
Epoch 23/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0817 - accuracy: 0.9716 - val_loss: 0.8795 - val_accuracy: 0.8274
Epoch 24/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0654 - accuracy: 0.9825 - val_loss: 0.9905 - val_accuracy: 0.8274
Epoch 25/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0876 - accuracy: 0.9716 - val_loss: 0.8827 - val_accuracy: 0.8325
Epoch 26/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0677 - accuracy: 0.9825 - val_loss: 0.9454 - val_accuracy: 0.8173
Epoch 27/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0775 - accuracy: 0.9759 - val_loss: 0.9085 - val_accuracy: 0.8274
Epoch 28/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0712 - accuracy: 0.9781 - val_loss: 0.9352 - val_accuracy: 0.8325
Epoch 29/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0671 - accuracy: 0.9825 - val_loss: 0.9524 - val_accuracy: 0.8122
Epoch 30/200
15/15 [==============================] - 0s 6ms/step - loss: 0.0605 - accuracy: 0.9825 - val_loss: 0.9559 - val_accuracy: 0.8223
Epoch 31/200
15/15 [==============================] - 0s 7ms/step - loss: 0.0775 - accuracy: 0.9781 - val_loss: 0.9911 - val_accuracy: 0.8274
Epoch 32/200
15/15 [==============================] - 0s 6ms/step - loss: 0.0886 - accuracy: 0.9716 - val_loss: 0.9290 - val_accuracy: 0.8325
Epoch 33/200
15/15 [==============================] - 0s 7ms/step - loss: 0.0729 - accuracy: 0.9803 - val_loss: 0.9835 - val_accuracy: 0.8274
Epoch 34/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0556 - accuracy: 0.9847 - val_loss: 0.9801 - val_accuracy: 0.8325
Epoch 35/200
15/15 [==============================] - 0s 6ms/step - loss: 0.0586 - accuracy: 0.9825 - val_loss: 0.9715 - val_accuracy: 0.8173
Epoch 36/200
15/15 [==============================] - 0s 6ms/step - loss: 0.0566 - accuracy: 0.9803 - val_loss: 0.9987 - val_accuracy: 0.8325
Epoch 37/200
15/15 [==============================] - 0s 7ms/step - loss: 0.0683 - accuracy: 0.9781 - val_loss: 0.9747 - val_accuracy: 0.8173
Epoch 38/200
15/15 [==============================] - 0s 8ms/step - loss: 0.0949 - accuracy: 0.9716 - val_loss: 1.0825 - val_accuracy: 0.8274
Epoch 39/200
15/15 [==============================] - 0s 7ms/step - loss: 0.0746 - accuracy: 0.9672 - val_loss: 0.9609 - val_accuracy: 0.8325
Epoch 40/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0505 - accuracy: 0.9847 - val_loss: 1.0257 - val_accuracy: 0.8223
Epoch 41/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0544 - accuracy: 0.9847 - val_loss: 0.9983 - val_accuracy: 0.8376
Epoch 42/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0465 - accuracy: 0.9847 - val_loss: 1.0335 - val_accuracy: 0.8223
Epoch 43/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0485 - accuracy: 0.9847 - val_loss: 1.0488 - val_accuracy: 0.8223
Epoch 44/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0454 - accuracy: 0.9847 - val_loss: 1.0534 - val_accuracy: 0.8376
Epoch 45/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0509 - accuracy: 0.9803 - val_loss: 1.0727 - val_accuracy: 0.8274
Epoch 46/200
15/15 [==============================] - 0s 5ms/step - loss: 0.0494 - accuracy: 0.9847 - val_loss: 1.0572 - val_accuracy: 0.8274
def plot_confusion_matrix(y_test, y_pred, labels, normalize=False):
cm = confusion_matrix(y_test, y_pred)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.gcf().set_size_inches(17, 11)
plt.title('Confusion matrix')
plt.colorbar()
tick_marks = np.arange(len(labels))
plt.xticks(tick_marks, labels, rotation=45)
plt.yticks(tick_marks, labels)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
def evaluate_model(model, history, X_test, y_test):
scores = model.evaluate((X_test), y_test, verbose=0)
print('=========================================')
print("|| Accuracy: %.2f%%" % (scores[1]*100))
print('=========================================')
fig, axs = plt.subplots(1, 2, figsize=(12,6))
axs[0].plot(history.history['accuracy'])
axs[0].plot(history.history['val_accuracy'])
axs[0].set_title("Accuracy")
axs[0].legend(['Training', 'Validation'])
axs[1].plot(history.history['loss'])
axs[1].plot(history.history['val_loss'])
axs[1].set_title("Model- Loss")
axs[1].legend(['Training', 'Validation'])
fig.tight_layout()
evaluate_model(model, history, X_test, y_test)
=========================================
|| Accuracy: 83.76%
=========================================
OBS: O valores numéricos das classes não correspondem mais com os apresentados no inicio do arquivo
def find_class(probl):
probl_max = probl.max()
label = int(np.where(probl == probl_max)[0])
return label, round(probl_max*100,1)
prediction_proba = model.predict(X_test)
prediction_cat = [find_class(c)[0] for c in prediction_proba]
y_test_label = [find_class(c)[0] for c in y_test]
plot_confusion_matrix(y_test_label, prediction_cat, enc.categories_[0], True)
Normalized confusion matrix
def find_class(probl):
probl_max = probl.max()
label = int(np.where(probl == probl_max)[0])
return label, round(probl_max*100,1)
prediction_proba = model.predict(X_test)
prediction_cat = [find_class(c)[0] for c in prediction_proba]
y_test_label = [find_class(c)[0] for c in y_test]
plot_confusion_matrix(y_test_label, prediction_cat, enc.categories_[0], True)
Normalized confusion matrix
Geralmente, a sensibilidade e a especificidade são características difíceis de conciliar, isto é, é complicado aumentar
a sensibilidade e a especificidade de um teste ao mesmo tempo. As curvas ROC (receiver operator characteristic curve)
são uma forma de representar a relação, normalmente antagónica, entre a sensibilidade e a especificidade de um teste
diagnóstico quantitativo, ao longo de um contínuo de valores de "cutoff point".
# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
# Finally average it and compute AUC
mean_tpr /= n_classes
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = metrics.auc(fpr["macro"], tpr["macro"])
# Plot all ROC curves
plt.figure(figsize=(12,8))
plt.plot(
fpr["micro"],
tpr["micro"],
label="Média-mínima da curva ROC (area = {0:0.2f})".format(roc_auc["micro"]),
color="deeppink",
linestyle=":",
linewidth=4,
)
plt.plot(
fpr["macro"],
tpr["macro"],
label="Média-máxima da curva ROC(area = {0:0.2f})".format(roc_auc["macro"]),
color="navy",
linestyle=":",
linewidth=4,
)
for i in range(n_classes):
plt.plot(
fpr[i],
tpr[i],
lw=lw,
label="Cruva ROC da classe {0} (area = {1:0.2f})".format(i, roc_auc[i]),
)
plt.plot([0, 1], [0, 1], "k--", lw=lw)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel("Taxa de Falso Positivo")
plt.ylabel("Taxa de Verdadeiro Positivo")
plt.title("Curva ROC por classes")
plt.legend(loc="lower right")
plt.show()