-
Notifications
You must be signed in to change notification settings - Fork 2.7k
/
Copy pathpytorch_simple_fullynet.py
164 lines (130 loc) · 5.49 KB
/
pytorch_simple_fullynet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""
A simple walkthrough of how to code a fully connected neural network
using the PyTorch library. For demonstration we train it on the very
common MNIST dataset of handwritten digits. In this code we go through
how to create the network as well as initialize a loss function, optimizer,
check accuracy and more.
Programmed by Aladdin Persson
* 2020-04-08: Initial coding
* 2021-03-24: Added more detailed comments also removed part of
check_accuracy which would only work specifically on MNIST.
* 2022-09-23: Updated with more detailed comments, docstrings to functions, and checked code still functions as intended.
"""
# Imports
import torch
import torch.nn.functional as F # Parameterless functions, like (some) activation functions
import torchvision.datasets as datasets # Standard datasets
import torchvision.transforms as transforms # Transformations we can perform on our dataset for augmentation
from torch import optim # For optimizers like SGD, Adam, etc.
from torch import nn # All neural network modules
from torch.utils.data import (
DataLoader,
) # Gives easier dataset managment by creating mini batches etc.
from tqdm import tqdm # For nice progress bar!
# Here we create our simple neural network. For more details here we are subclassing and
# inheriting from nn.Module, this is the most general way to create your networks and
# allows for more flexibility. I encourage you to also check out nn.Sequential which
# would be easier to use in this scenario but I wanted to show you something that
# "always" works and is a general approach.
class NN(nn.Module):
def __init__(self, input_size, num_classes):
"""
Here we define the layers of the network. We create two fully connected layers
Parameters:
input_size: the size of the input, in this case 784 (28x28)
num_classes: the number of classes we want to predict, in this case 10 (0-9)
"""
super(NN, self).__init__()
# Our first linear layer take input_size, in this case 784 nodes to 50
# and our second linear layer takes 50 to the num_classes we have, in
# this case 10.
self.fc1 = nn.Linear(input_size, 50)
self.fc2 = nn.Linear(50, num_classes)
def forward(self, x):
"""
x here is the mnist images and we run it through fc1, fc2 that we created above.
we also add a ReLU activation function in between and for that (since it has no parameters)
I recommend using nn.functional (F)
Parameters:
x: mnist images
Returns:
out: the output of the network
"""
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
# Set device cuda for GPU if it's available otherwise run on the CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Hyperparameters
input_size = 784
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 3
# Load Data
train_dataset = datasets.MNIST(
root="dataset/", train=True, transform=transforms.ToTensor(), download=True
)
test_dataset = datasets.MNIST(
root="dataset/", train=False, transform=transforms.ToTensor(), download=True
)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
# Initialize network
model = NN(input_size=input_size, num_classes=num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Train Network
for epoch in range(num_epochs):
for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
# Get data to cuda if possible
data = data.to(device=device)
targets = targets.to(device=device)
# Get to correct shape
data = data.reshape(data.shape[0], -1)
# Forward
scores = model(data)
loss = criterion(scores, targets)
# Backward
optimizer.zero_grad()
loss.backward()
# Gradient descent or adam step
optimizer.step()
# Check accuracy on training & test to see how good our model
def check_accuracy(loader, model):
"""
Check accuracy of our trained model given a loader and a model
Parameters:
loader: torch.utils.data.DataLoader
A loader for the dataset you want to check accuracy on
model: nn.Module
The model you want to check accuracy on
Returns:
acc: float
The accuracy of the model on the dataset given by the loader
"""
num_correct = 0
num_samples = 0
model.eval()
# We don't need to keep track of gradients here so we wrap it in torch.no_grad()
with torch.no_grad():
# Loop through the data
for x, y in loader:
# Move data to device
x = x.to(device=device)
y = y.to(device=device)
# Get to correct shape
x = x.reshape(x.shape[0], -1)
# Forward pass
scores = model(x)
_, predictions = scores.max(1)
# Check how many we got correct
num_correct += (predictions == y).sum()
# Keep track of number of samples
num_samples += predictions.size(0)
model.train()
return num_correct / num_samples
# Check accuracy on training & test to see how good our model
print(f"Accuracy on training set: {check_accuracy(train_loader, model)*100:.2f}")
print(f"Accuracy on test set: {check_accuracy(test_loader, model)*100:.2f}")