Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding python wrapper for adam operator #5021

Merged
merged 3 commits into from
Oct 24, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 153 additions & 5 deletions python/paddle/v2/framework/optimizer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import paddle.v2.framework.framework as framework
from collections import defaultdict

__all__ = ['SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer']
__all__ = [
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer'
]


class Optimizer(object):
Expand Down Expand Up @@ -43,6 +45,19 @@ def _create_accumulators(self, block, parameters):
"""
pass

def _finish_update(self, block):
"""Finish any custom updates needed
before completing an optimization step

Args:
block: the block in which the loss variable is present
parameters: list of parameter variables for the optimizer

Returns:
list of finish ops or None
"""
pass

def _add_accumulator(self, block, name, param, dtype=None, fill_value=0.0):
"""Utility function to add an accumulator for a parameter

Expand Down Expand Up @@ -137,15 +152,17 @@ def create_optimization_pass(self, parameters_and_grads, loss):
parameters_and_grads: a list of (variable, gradient) pair to update.

Returns:
optmization_op_list: a list of optimization operator that will update
parameter using gradient.
return_op_list: a list of operators that will complete one step of
optimization. This will include parameter update ops, global step
update ops and any other custom ops required by subclasses to manage
their internal state.
"""
# This is a default implementation of create_optimization_pass that
# can be shared by most optimizers. This implementation assumes that
# the subclass will implement the _append_optimize_op method and the
# _initialize_tensors method. The subclass can extend the
# _create_accumulators method if it needs to create accumulators
# for parameters.
# for parameters and extend _finish_update method to add custom ops.

# Create any accumulators
self._create_accumulators(loss.block,
Expand All @@ -160,7 +177,17 @@ def create_optimization_pass(self, parameters_and_grads, loss):
param_and_grad)
optimize_ops.append(optimize_op)

return optimize_ops
# Returned list of ops can include more ops in addition
# to optimization ops
return_ops = optimize_ops

# Get custom finish ops for subclasses
# FIXME: Need to fix this once we figure out how to handle dependencies
finish_ops = self._finish_update(loss.block)
if finish_ops is not None:
return_ops += finish_ops

return return_ops

def minimize(self, loss, parameter_list=None, no_grad_set=None):
"""Add operations to minimize `loss` by updating `parameter_list`.
Expand Down Expand Up @@ -329,3 +356,124 @@ def _append_optimize_op(self, block, param_and_grad):
attrs={"epsilon": self._epsilon})

return adagrad_op


class AdamOptimizer(Optimizer):
"""Implements the Adam Optimizer
"""
_moment1_acc_str = "moment1"
_moment2_acc_str = "moment2"

def __init__(self,
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-8):
assert learning_rate is not None
assert beta1 is not None
assert beta2 is not None
assert epsilon is not None
super(AdamOptimizer, self).__init__()
self.type = "adam"
self._learning_rate = learning_rate
self._beta1 = beta1
self._beta2 = beta2
self._epsilon = epsilon

def _initialize_tensors(self, block):
assert isinstance(block, framework.Block)
lr_shape = [1]
# create a variable for learning_rate
self._lr = block.create_var(
dtype="float32", shape=lr_shape, lod_level=0)

# create an op to init the learning_rate
# FIXME: Fix when Initialization design has been implemented
# https://github.com/PaddlePaddle/Paddle/pull/4852
block.append_op(
type="fill_constant",
outputs={"Out": self._lr},
attrs={"shape": lr_shape,
"value": self._learning_rate})

def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)

global_block = block.program.global_block()
# Create beta1 and beta2 power tensors
beta_shape = [1]
# Create variables for beta1 and beta2 powers
self._beta1_pow_acc = global_block.create_var(
dtype="float32", shape=beta_shape, lod_level=0)
self._beta2_pow_acc = global_block.create_var(
dtype="float32", shape=beta_shape, lod_level=0)

# Initialize beta1 and beta2 power accumulators
# FIXME: Fix when Initialization design has been implemented
# https://github.com/PaddlePaddle/Paddle/pull/4852
global_block.append_op(
type="fill_constant",
outputs={"Out": self._beta1_pow_acc},
attrs={"shape": beta_shape,
"value": self._beta1})
global_block.append_op(
type="fill_constant",
outputs={"Out": self._beta2_pow_acc},
attrs={"shape": beta_shape,
"value": self._beta2})

# Create accumulator tensors for first and second moments
for p in parameters:
self._add_accumulator(block, self._moment1_acc_str, p, 'float32')
self._add_accumulator(block, self._moment2_acc_str, p, 'float32')

def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)

moment1 = self._get_accumulator(self._moment1_acc_str,
param_and_grad[0])
moment2 = self._get_accumulator(self._moment2_acc_str,
param_and_grad[0])
# create the momentum optimize op
adam_op = block.append_op(
type=self.type,
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"LearningRate": self._lr,
"Moment1": moment1,
"Moment2": moment2,
"Beta1Pow": self._beta1_pow_acc,
"Beta2Pow": self._beta2_pow_acc
},
outputs={
"ParamOut": param_and_grad[0],
"Moment1Out": moment1,
"Moment2Out": moment2
},
attrs={
"beta1": self._beta1,
"beta2": self._beta2,
"epsilon": self._epsilon
})

return adam_op

def _finish_update(self, block):
"""Update Beta1 and Beta2 Power accumulators
"""
assert isinstance(block, framework.Block)
global_block = block.program.global_block()
scale_beta1 = global_block.append_op(
type="scale",
inputs={"X": self._beta1_pow_acc},
outputs={"Out": self._beta1_pow_acc},
attrs={"scale": self._beta1})

scale_beta2 = global_block.append_op(
type="scale",
inputs={"X": self._beta2_pow_acc},
outputs={"Out": self._beta2_pow_acc},
attrs={"scale": self._beta2})

return [scale_beta1, scale_beta2]
49 changes: 49 additions & 0 deletions python/paddle/v2/framework/tests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,5 +110,54 @@ def test_adagrad_optimizer(self):
self.assertTrue(mul_x.name in moment_acc)


class TestAdamOptimizer(unittest.TestCase):
class MockAdam(optimizer.AdamOptimizer):
def get_accumulators(self):
return self._accumulators

def get_moment1_str(self):
return self._moment1_acc_str

def get_moment2_str(self):
return self._moment2_acc_str

def test_adam_optimizer(self):
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
mul_y = block.create_var(
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
block.append_op(
type="mul",
inputs={"X": mul_x,
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
adam_optimizer = self.MockAdam(
learning_rate=0.01, beta1=0.9, beta2=0.999)
params_grads = adam_optimizer.create_backward_pass(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
opts = adam_optimizer.create_optimization_pass(params_grads, mul_out)
self.assertEqual(len(opts), 3)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the length of opts equal to 3 here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is because the opts will include 3 ops:

  1. Adam op for the parameter
  2. increment op for beta1 power
  3. increment op for beta2 power

adam_op = opts[0]
self.assertEqual(adam_op.type, "adam")

# Check accumulators
accumulators = adam_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 2)
self.assertTrue(adam_optimizer.get_moment1_str() in accumulators)
self.assertTrue(adam_optimizer.get_moment2_str() in accumulators)
moment1_acc = accumulators[adam_optimizer.get_moment1_str()]
moment2_acc = accumulators[adam_optimizer.get_moment2_str()]
self.assertEqual(len(moment1_acc), 1)
self.assertEqual(len(moment2_acc), 1)
self.assertTrue(mul_x.name in moment1_acc)
self.assertTrue(mul_x.name in moment2_acc)


if __name__ == '__main__':
unittest.main()