Avoid Ampere misaligment issue

marian-nmt · May 17, 2021 · 8b818b7 · 8b818b7
1 parent 49e379b
commit 8b818b7
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 2 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -41,6 +41,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 - Broken links to MNIST data sets
 
 ### Changed
+- Set REQUIRED_BIAS_ALIGNMENT = 16 in tensors/gpu/prod.cpp to avoid memory-misalignment on certain Ampere GPUs.
 - For BUILD_ARCH != native enable all intrinsics types by default, can be disabled like this: -DCOMPILE_AVX512=off
 - Moved FBGEMM pointer to commit c258054 for gcc 9.3+ fix
 - Change compile options a la -DCOMPILE_CUDA_SM35 to -DCOMPILE_KEPLER, -DCOMPILE_MAXWELL,

diff --git a/regression-tests b/regression-tests
diff --git a/src/tensors/gpu/prod.cpp b/src/tensors/gpu/prod.cpp
@@ -22,7 +22,7 @@ namespace gpu {
 // It seems that the bias must be 8 byte aligned for the cublasLt epilogue to work. Therefore,
 // if the bias pointer is not 8 byte aligned, we do a normal matmul in cublasLt and invoke a 
 // custom epilogue kernel.
-static constexpr int REQUIRED_BIAS_ALIGNMENT = 8;  
+static constexpr int REQUIRED_BIAS_ALIGNMENT = 16; // @TODO: MJD: changed this to 16 to avoid alignment error on A100. Seems to work fine.
 
 // Used to set preferences for cublasLt to filter out algos if matrices to not meet default 256 byte alignment
 int getAlignmentUpTo256(const void *ptr) {
+0 −3		tests/training/features/lr-decay/.gitignore
+0 −25		tests/training/features/lr-decay/lrdecay_stalled.expected
+0 −23		tests/training/features/lr-decay/lrdecay_stalled_all.expected
+0 −22		tests/training/features/lr-decay/lrdecay_stalled_any.expected
+0 −6		tests/training/features/lr-decay/setup.sh
+0 −43		tests/training/features/lr-decay/test_lr_decay_stalled.sh
+0 −43		tests/training/features/lr-decay/test_lr_decay_stalled_all.sh
+0 −43		tests/training/features/lr-decay/test_lr_decay_stalled_any.sh
+0 −9		tests/training/features/lr-decay/valid_script.sh
+0 −4		tests/training/restoring/validation/test_adding_validator_after_restart.sh
+0 −4		tests/training/restoring/validation/test_restoring_newbest_validators.sh
+0 −4		tests/training/restoring/validation/test_restoring_stalled_validators.sh
+0 −4		tests/training/restoring/validation/test_restoring_validation.sh
+0 −4		tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
+1 −1		tests/training/restoring/validation/test_valid_reset_stalled.sh
+0 −4		tests/training/validation/.gitignore
+0 −18		tests/training/validation/stop_on_1st.expected
+0 −18		tests/training/validation/stop_on_all.expected
+0 −14		tests/training/validation/stop_on_any.expected
+0 −9		tests/training/validation/stop_on_script.sh
+0 −42		tests/training/validation/test_early_stopping_on_1st.sh
+0 −42		tests/training/validation/test_early_stopping_on_all.sh
+0 −42		tests/training/validation/test_early_stopping_on_any.sh
+0 −4		tests/training/validation/test_empty_valid_set_lm.sh
+0 −4		tests/training/validation/test_empty_valid_set_source.sh
+0 −4		tests/training/validation/test_empty_valid_set_target.sh
+0 −4		tests/training/validation/test_empty_valid_sets.sh
+0 −4		tests/training/validation/test_final_validation_after_batches.sh
+0 −4		tests/training/validation/test_final_validation_after_batches_match.sh
+0 −4		tests/training/validation/test_final_validation_after_epochs.sh
+0 −1		tests/training/validation/test_templated_valid_bleu_output.sh
+0 −1		tests/training/validation/test_templated_valid_translation_output.sh