-
Notifications
You must be signed in to change notification settings - Fork 0
/
nn_scratch.py
176 lines (142 loc) · 6.66 KB
/
nn_scratch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 23 08:26:04 2024
@author: jaggu
"""
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
# Load the Iris dataset
iris = load_iris()
X = iris.data # Feature matrix (4 features per sample)
y = iris.target # Target vector (class labels for each sample)
# Normalize the feature matrix to have zero mean and unit variance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X) # Apply scaling to the features
# Convert the target vector into one-hot encoded format
encoder = OneHotEncoder()
y_one_hot = encoder.fit_transform(y.reshape(-1, 1)).toarray() # Reshape y and apply one-hot encoding
# Split the dataset into training and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_one_hot, test_size=0.2, random_state=42)
class NeuralNetwork:
def __init__(self, input_size, hidden_size, output_size):
"""
Initialize the neural network with weights and biases.
Parameters:
input_size (int): Number of input features
hidden_size (int): Number of neurons in the hidden layer
output_size (int): Number of output classes (number of neurons in the output layer)
"""
# Initialize weights and biases for the hidden and output layers
self.W1 = np.random.randn(input_size, hidden_size) * 0.01 # Weights for input to hidden layer
self.b1 = np.zeros((1, hidden_size)) # Biases for hidden layer
self.W2 = np.random.randn(hidden_size, output_size) * 0.01 # Weights for hidden to output layer
self.b2 = np.zeros((1, output_size)) # Biases for output layer
def relu(self, z):
"""
Apply the ReLU activation function.
Parameters:
z (array): Input array to the activation function
Returns:
array: Result of applying ReLU activation function
"""
return np.maximum(0, z)
def softmax(self, z):
"""
Apply the softmax activation function.
Parameters:
z (array): Input array to the activation function
Returns:
array: Result of applying softmax activation function
"""
exp_z = np.exp(z - np.max(z, axis=1, keepdims=True)) # Numerical stability improvement
return exp_z / np.sum(exp_z, axis=1, keepdims=True)
def forward(self, X):
"""
Perform forward propagation through the network.
Parameters:
X (array): Input features
Returns:
array: Predicted probabilities for each class
"""
self.z1 = np.dot(X, self.W1) + self.b1 # Linear combination for hidden layer
self.a1 = self.relu(self.z1) # Apply ReLU activation
self.z2 = np.dot(self.a1, self.W2) + self.b2 # Linear combination for output layer
self.a2 = self.softmax(self.z2) # Apply softmax activation
return self.a2
def compute_loss(self, y_true, y_pred):
"""
Compute the cross-entropy loss between the true and predicted values.
Parameters:
y_true (array): True one-hot encoded labels
y_pred (array): Predicted probabilities
Returns:
float: Loss value
"""
m = y_true.shape[0] # Number of samples
log_likelihood = -np.log(y_pred[np.arange(m), np.argmax(y_true, axis=1)]) # Compute log likelihood
return np.sum(log_likelihood) / m # Average loss
def backward(self, X, y_true, y_pred, learning_rate=0.01):
"""
Perform backward propagation to update the weights and biases.
Parameters:
X (array): Input features
y_true (array): True one-hot encoded labels
y_pred (array): Predicted probabilities
learning_rate (float): Learning rate for weight updates
"""
m = y_true.shape[0] # Number of samples
# Compute gradients for the output layer
dz2 = y_pred - y_true # Gradient of loss w.r.t. z2
dW2 = np.dot(self.a1.T, dz2) / m # Gradient of loss w.r.t. W2
db2 = np.sum(dz2, axis=0) / m # Gradient of loss w.r.t. b2
# Compute gradients for the hidden layer
dz1 = np.dot(dz2, self.W2.T) * (self.a1 > 0) # Gradient of loss w.r.t. z1 (ReLU derivative)
dW1 = np.dot(X.T, dz1) / m # Gradient of loss w.r.t. W1
db1 = np.sum(dz1, axis=0) / m # Gradient of loss w.r.t. b1
# Update weights and biases
self.W1 -= learning_rate * dW1
self.b1 -= learning_rate * db1
self.W2 -= learning_rate * dW2
self.b2 -= learning_rate * db2
def train(self, X_train, y_train, epochs=1000, learning_rate=0.01):
"""
Train the neural network using gradient descent.
Parameters:
X_train (array): Training features
y_train (array): Training labels
epochs (int): Number of training epochs
learning_rate (float): Learning rate for weight updates
"""
for epoch in range(epochs):
y_pred = self.forward(X_train) # Forward pass
loss = self.compute_loss(y_train, y_pred) # Compute loss
self.backward(X_train, y_train, y_pred, learning_rate) # Backward pass
if epoch % 100 == 0:
print(f'Epoch {epoch}, Loss: {loss}') # Print loss every 100 epochs
def predict(self, X):
"""
Predict the class labels for given input features.
Parameters:
X (array): Input features
Returns:
array: Predicted class labels
"""
y_pred = self.forward(X) # Forward pass
return np.argmax(y_pred, axis=1) # Return index of highest probability class
# Initialize the neural network with input size, hidden layer size, and output size
input_size = X_train.shape[1] # Number of features
hidden_size = 10 # Number of neurons in hidden layer
output_size = y_train.shape[1] # Number of classes
nn = NeuralNetwork(input_size, hidden_size, output_size) # Create neural network instance
# Train the neural network
nn.train(X_train, y_train, epochs=1001, learning_rate=0.01)
# Predict on the test set and compute accuracy
y_test_pred = nn.predict(X_test) # Predict labels for the test set
y_test_true = np.argmax(y_test, axis=1) # Convert one-hot encoded test labels to single class labels
# Calculate accuracy
accuracy = np.mean(y_test_pred == y_test_true) # Compute accuracy
print(f'Test Accuracy: {accuracy * 100:.2f}%') # Print test accuracy