-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkernel_network.jl
88 lines (63 loc) · 2.06 KB
/
kernel_network.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
using CUDA
function sigmoid(x)
return 1.0 ./ (1 .+ exp.(-x))
end
function sigmoid_derivative(x)
return sigmoid(x) .* (1 .- sigmoid(x))
end
function forward_pass_kernel!(input, weights, biases, output, z, N)
i = threadIdx().x
if i <= N
accumulator = 0.0f0
for j in axes(weights,2)
accumulator += weights[i, j] * input[j]
end
z[i] = accumulator + biases[i]
output[i] = sigmoid(z[i])
end
return nothing
end
function backward_pass_kernel!(input, output, z, target, weights, biases, d_weights, d_biases, learning_rate, N)
i = threadIdx().x
if i <= N
d_loss_output = 2.0 * (output[i] - target[i])
d_output_z = sigmoid_derivative(z[i])
d_loss_z = d_loss_output * d_output_z
for j in axes(input, 1)
d_weights[i, j] = d_loss_z * input[j]
end
d_biases[i] = d_loss_z
end
return nothing
end
function update_weights!(weights, d_weights, learning_rate)
weights .-= learning_rate .* d_weights
end
function update_biases!(biases, d_biases, learning_rate)
biases .-= learning_rate .* d_biases
end
CUDA.allowscalar(false)
input = CUDA.fill(0.5f0, 2, 1)
weights = CUDA.rand(2, 2)
biases = CUDA.rand(2, 1)
target = CUDA.fill(0.3f0, 2, 1)
d_weights = CUDA.zeros(2, 2)
d_biases = CUDA.zeros(2, 1)
output = CUDA.zeros(2, 1)
z = CUDA.zeros(2, 1)
learning_rate = 0.1
epochs = 1000
N = size(weights, 1)
for epoch in 1:epochs
@cuda threads=N forward_pass_kernel!(input, weights, biases, output, z, N)
@cuda threads=N backward_pass_kernel!(input, output, z, target, weights, biases, d_weights, d_biases, learning_rate, N)
current_loss = mse_loss(output, target)
update_weights!(weights, d_weights, learning_rate)
update_biases!(biases, d_biases, learning_rate)
if epoch % 100 == 0
println("epoch $epoch, loss: $(CUDA.collect(current_loss))")
end
end
@cuda threads=N forward_pass_kernel!(input, weights, biases, output, z, N)
final_output = CUDA.collect(output)
println("final output: ", final_output)