Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attention body support for blas / eigen / mkl #1852

Merged
merged 49 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
f2ba92a
update some shared files for attention body
borg323 Dec 6, 2022
f091f5e
update default net (#1804)
borg323 Dec 4, 2022
ad04529
option to select onnx opset (#1803)
borg323 Dec 4, 2022
2782b94
Add 'simple' time manager (#1764)
trre123 Dec 4, 2022
b218726
Backendbench assistant with TC-dependent output. (#1546)
zz4032 Dec 4, 2022
402da93
In smooth TM, use piggybank when it seems like the bestmove can be ov…
mooskagh Dec 4, 2022
8774cae
only use thread binding with blas backend (#1797)
borg323 Dec 4, 2022
a3646ea
Fix apparent "mate blindness" in endgame due to instamove logic (#1742)
Naphthalin Dec 4, 2022
2feda7c
initial dfrc support (#1684)
borg323 Dec 4, 2022
2efcd4f
Fix error in mha scaling in encoder layers. (#1808)
almaudoh Dec 7, 2022
eeeab14
add probability for early termination of selfplay openings (#1681)
borg323 Dec 8, 2022
bb23800
update some shared files for smolgen
borg323 Dec 12, 2022
c80a4b7
add swish activation
borg323 Dec 16, 2022
45965dd
Persistent L2 cache opt for cuda backend (#1815)
ankan-ban Dec 17, 2022
a127abd
fix undefined behavior (#1817)
borg323 Dec 18, 2022
f4a918a
set MADV_RANDOM for syzygy mmap (#1818)
borg323 Dec 18, 2022
6a77459
Fix softmax cuda (#1822)
almaudoh Dec 25, 2022
662648a
set cache_opt default to false (#1823)
borg323 Dec 26, 2022
a5de019
fix onnx final mlh activation (#1825)
borg323 Dec 27, 2022
c41ba16
inline fp16 conversions (#1824)
borg323 Dec 28, 2022
9ff390b
onnx squeeze cleanup (#1826)
borg323 Dec 28, 2022
ec887c5
update submodule
borg323 Dec 29, 2022
825d0d0
update for new net.proto
borg323 Jan 14, 2023
155c54a
ispc alternatives for attention policy parts
borg323 Feb 7, 2023
cab4c04
remove mha transpose
borg323 Feb 9, 2023
ce14608
ispc softmax
borg323 Feb 9, 2023
bd8aafe
important typo fix
borg323 Feb 10, 2023
8d67b1f
refactor blas encoder
borg323 Feb 11, 2023
a96c4c0
blas performance fix
borg323 Feb 11, 2023
ffd6bba
Attention body support to blas.
almaudoh Feb 20, 2023
919ef8e
Minor bug fixes (thanks @borg).
almaudoh Feb 20, 2023
8ebb222
Comment fix.
almaudoh Feb 20, 2023
ce2c2e7
Add check for add_gating.
almaudoh Feb 20, 2023
d299d01
Comment fix.
almaudoh Feb 20, 2023
def48b5
Comment fix.
almaudoh Feb 20, 2023
a0f978d
Merge branch 'master' into blas_ab
borg323 Feb 20, 2023
cc202bf
code formatting
borg323 Feb 20, 2023
add3c45
assorted fixes
borg323 Feb 21, 2023
e70462d
Minor cs fix
almaudoh Feb 21, 2023
53f73de
Add hack to fix nets with wrong proto.
almaudoh Mar 11, 2023
971e33d
Fix bug in smolgen for small nets.
almaudoh Mar 21, 2023
1243861
batch blas smolgen
borg323 Feb 21, 2023
b356308
Merge pull request #5 from borg323/blas_ab
almaudoh Mar 22, 2023
d011548
Minor nits.
almaudoh Mar 22, 2023
c73e5d2
Replace 64 with kSquares
almaudoh Mar 22, 2023
143b9cc
Blas performance improvements (#6)
borg323 Mar 23, 2023
f17861c
Merge remote-tracking branch 'upstream/master' into blas_ab
almaudoh Mar 28, 2023
9a71851
Remove duplicate code, fix proto changes.
almaudoh Mar 28, 2023
1d327af
Remove missed import
almaudoh Mar 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 21 additions & 8 deletions src/neural/blas/encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,22 @@ namespace lczero {

void LayerNorm2DWithSkipConnection(const size_t batch_size,
const size_t channels, float* data,
const float* skip, const float* gammas,
const float* betas, float epsilon) {
const float alpha, const float* skip,
const float* gammas, const float* betas,
float epsilon) {
for (size_t i = 0; i < batch_size; i++) {
#ifndef USE_ISPC
// Mean taken in dimension C.
float mean = 0;
for (size_t c = 0; c < channels; ++c) {
data[i * channels + c] += skip[i * channels + c];
mean += data[i * channels + c];
if (skip != nullptr) {
for (size_t c = 0; c < channels; ++c) {
data[i * channels + c] += alpha * skip[i * channels + c];
mean += data[i * channels + c];
}
} else {
for (size_t c = 0; c < channels; ++c) {
mean += data[i * channels + c];
}
}
mean /= channels;

Expand All @@ -57,9 +64,15 @@ void LayerNorm2DWithSkipConnection(const size_t batch_size,
betas[c] + gammas[c] * (data[i * channels + c] - mean) * den;
}
#else
ispc::LayerNorm2DWithSkipConnection(channels, data + i * channels,
skip + i * channels, gammas, betas,
epsilon);
if (skip != nullptr) {
ispc::LayerNorm2DWithSkipConnection(channels, data + i * channels, alpha,
skip + i * channels, gammas, betas,
epsilon);
} else {
ispc::LayerNorm2DWithSkipConnection(channels, data + i * channels, 0.0f,
nullptr, gammas, betas, epsilon);
}

#endif
}
}
Expand Down
8 changes: 6 additions & 2 deletions src/neural/blas/fully_connected_layer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,9 @@ void FullyConnectedLayer<false>::Forward1D(
outputs, // C
(int)output_size); // ldc, leading rank of C
}
ApplyBias(batch_size, output_size, biases, activation, outputs);
if (biases != nullptr) {
ApplyBias(batch_size, output_size, biases, activation, outputs);
}
}

template <>
Expand Down Expand Up @@ -134,7 +136,9 @@ void FullyConnectedLayer<true>::Forward1D(
.transpose() *
ConstEigenMatrixMap<float>(inputs, input_size, batch_size);
}
ApplyBias(batch_size, output_size, biases, activation, outputs);
if (biases != nullptr) {
ApplyBias(batch_size, output_size, biases, activation, outputs);
}
}

template <>
Expand Down
37 changes: 27 additions & 10 deletions src/neural/blas/layer_norm.ispc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

export void LayerNorm2DWithSkipConnection(uniform const size_t channels,
uniform float data[],
const uniform float alpha,
const uniform float skip[],
const uniform float gammas[],
const uniform float betas[],
Expand All @@ -27,23 +28,39 @@ export void LayerNorm2DWithSkipConnection(uniform const size_t channels,
// One pass mean and variance taken in dimension C. Uses shifted variance calculation.
float imean = 0;
float ivar = 0;
float k = data[0] + skip[0];
foreach (c = 0 ... channels) {
float t = data[c] + skip[c];
data[c] = t;
t -= k;
imean += t;
ivar += t * t;
float k = data[0];
if (skip != NULL) {
k += alpha * skip[0];
foreach (c = 0 ... channels) {
float t = data[c] + alpha * skip[c];
data[c] = t;
t -= k;
imean += t;
ivar += t * t;
}
} else {
foreach (c = 0 ... channels) {
float t = data[c];
t -= k;
imean += t;
ivar += t * t;
}
}
float mean = reduce_add(imean) / channels;
float var = (reduce_add(ivar) - channels * mean * mean) / channels;
mean += k;
#else
// Mean taken in dimension C.
float imean = 0;
foreach (c = 0 ... channels) {
data[c] += skip[c];
imean += data[c];
if (skip != NULL) {
foreach (c = 0 ... channels) {
data[c] += alpha * skip[c];
imean += data[c];
}
} else {
foreach (c = 0 ... channels) {
imean += data[c];
}
}
float mean = reduce_add(imean) / channels;

Expand Down
Loading