Fix optimizer variables name mismatch introduced by gradient accumula…

…tion
OpenNMT · Nov 30, 2018 · ff38e89 · ff38e89
1 parent f6641f1
commit ff38e89
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 1 deletion.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,6 +21,7 @@ OpenNMT-tf follows [semantic versioning 2.0.0](https://semver.org/). The API cov
 * Checkpoint utilities now save a relative path instead of absolute in the generated checkpoint state
 * Fix error on missing configuration fields that should be optional
 * Fix error on gradient accumulation in TensorFlow versions <= 1.9
+* Fix optimizer variable names mismatch introduced by gradient accumulation which prevented to continue from an existing checkpoint trained without
 
 ## [1.14.1](https://github.com/OpenNMT/OpenNMT-tf/releases/tag/v1.14.1) (2018-11-28)
 

diff --git a/opennmt/utils/optim.py b/opennmt/utils/optim.py
@@ -223,7 +223,12 @@ def _accum_grads(accum_fn=tf.assign_add, apply_gradients=False):
         update_ops.append(accum_fn(accum_grad, grad))
     with tf.control_dependencies(update_ops):
       if apply_gradients:
-        return optimizer.apply_gradients(accum_grads_and_vars, global_step=global_step)
+        # Override the current name scope to create the optimizer slot variables
+        # in the same scope as if the optimizer was called outside of tf.cond.
+        # This is needed to ensure we can continue from a model trained without
+        # gradient accumulation (and vice-versa).
+        with tf.name_scope("%s/" % tf.get_variable_scope().name):
+          return optimizer.apply_gradients(accum_grads_and_vars, global_step=global_step)
       else:
         return tf.no_op()