-
Notifications
You must be signed in to change notification settings - Fork 0
/
encrypted_traffic_lstm.py
166 lines (147 loc) · 5.82 KB
/
encrypted_traffic_lstm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""
This program reads a preexisting hdf5 file containing information from the
original directory of network packet data.
"""
import os
import csv
import keras
import random
import sklearn
import h5py as h5
import numpy as np
import pandas as pd
from random import shuffle
from collections import Counter
from keras.utils import np_utils
from keras.utils import to_categorical
from keras.models import Sequential, Model
from sklearn.metrics import confusion_matrix
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers import Dense, Flatten, GlobalAveragePooling1D, Dropout, Activation, TimeDistributed, LSTM, Input
"""
************************************
Variables to change prior to running
************************************
"""
# time_concat = 1 -> run with time concatenation
# time_concat = 0 -> run without time concatenation
time_concat = 1
num_classes = 7
sfCutOff = 10 #number of timestamps per TimeDistribution
#Network hyperparameters
epochs = 4
learningRate = .0001
batch_size = 10
"""
************************************
"""
# set seeds for reproducibility
random.seed(3)
np.random.seed(1337)
#Increases the print size of pandas output
pd.set_option('display.max_columns', None) # or 1000
pd.set_option('display.max_rows', None) # or 1000
pd.set_option('display.max_colwidth', -1) # or 199
"""
Defines the 1D DCNN model using TimeDistributed
This model uses a concatenation layer to add in the time data for lstm
"""
def get_model_with_time():
activation = 'relu'
main_input = Input(shape = X_train.shape[1:], name='main_input')
auxiliary_input = Input(shape = time_train.shape[1:], name='aux_input')
x = TimeDistributed(Conv1D(256, (2), padding='same', strides=1, name = "con1"))(main_input)
x = TimeDistributed(MaxPooling1D())(x)
x = TimeDistributed(Conv1D(128,(2), strides=1, activation=activation, name = "con2"))(x)
x = TimeDistributed(MaxPooling1D())(x)
x = TimeDistributed(Flatten())(x)
x = TimeDistributed(Dense(64))(x)
x = TimeDistributed(Dense(64))(x)
x = TimeDistributed(Dense(14, activation=activation, name="den3"))(x)
print(x)
print(auxiliary_input)
x = keras.layers.concatenate([x, auxiliary_input], axis=2)
print(x)
x = LSTM(50, return_sequences=False, dropout=0.5)(x)
main_output = Dense(num_classes, activation = 'sigmoid', name='main_output')(x)
model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output, main_output])
return model
"""
Defines the 1D DCNN model using TimeDistributed
This model does not use time data, only the packets
"""
def get_model_without_time():
activation = 'relu'
main_input = Input(shape = X_train.shape[1:], name='main_input')
x = TimeDistributed(Conv1D(256, (2), padding='same', strides=1, name = "con1"))(main_input)
x = TimeDistributed(MaxPooling1D())(x)
x = TimeDistributed(Conv1D(128,(2), strides=1, activation=activation, name = "con2"))(x)
x = TimeDistributed(MaxPooling1D())(x)
x = TimeDistributed(Flatten())(x)
x = TimeDistributed(Dense(64))(x)
x = TimeDistributed(Dense(64))(x)
x = TimeDistributed(Dense(14, activation=activation, name="den3"))(x)
x = LSTM(50, return_sequences=False, dropout=0.5)(x)
main_output = Dense(num_classes, activation = 'sigmoid', name='main_output')(x)
model = Model(inputs=main_input, outputs=main_output)
return model
def run_with_time():
print("==========================================")
print("RUNNING WITH TIME DATA")
print("==========================================")
model = get_model_with_time()
model.load_weights('my_model_weights.h5', by_name=True)
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
model.fit([X_train, time_train], [y_train, y_train], epochs=epochs, batch_size=batch_size, verbose = 1)
print(model.summary())
benchmark_model_name = 'benchmark-model.h5'
model.save(benchmark_model_name)
print(model.evaluate([X_valid, time_valid], [y_valid, y_valid]))
def run_without_time():
print("==========================================")
print("RUNNING WITHOUT TIME DATA")
print("==========================================")
model = get_model_without_time()
model.load_weights('weights_large_cnn.h5', by_name=True)
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose = 1)
print(model.summary())
benchmark_model_name = 'benchmark-model.h5'
model.save(benchmark_model_name)
print(model.evaluate(X_valid, y_valid))
# y_pred = model.predict(X_valid)
# y_pred = (y_pred > 0.5)
# cm = confusion_matrix(y_valid, y_pred)
# print(cm)
"""
Load all data from hdf5 file and assign it to training and validation sets
"""
with h5.File('/Users/brycekroencke/Documents/TrafficClassification/Project Related Files/trafficData_lstm.hdf5', 'r') as f:
X_train = f["X_train"][:]
y_train = f["y_train"][:]
time_data = f["time"][:]
print(time_data[0])
X_train, y_train, time_data = sklearn.utils.shuffle(X_train, y_train, time_data, random_state = 0)
X_valid = X_train[:50]
y_valid = y_train[:50]
X_train = X_train[50:]
y_train = y_train[50:]
time_train = time_data[50:]
time_valid = time_data[:50]
X_train = np.array(X_train)
X_valid = np.array(X_valid)
y_train = np.array(y_train)
y_valid = np.array(y_valid)
y_train = np_utils.to_categorical(y_train, num_classes)
y_valid = np_utils.to_categorical(y_valid, num_classes)
X_train = np.expand_dims(X_train, axis=3)
X_valid = np.expand_dims(X_valid, axis=3)
time_train = np.expand_dims(time_train, axis=3)
time_valid = np.expand_dims(time_valid, axis=3)
print(X_train.shape)
print(time_train.shape)
if time_concat == 1:
run_with_time()
else:
run_without_time()