Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/quick start api #1126

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 34 additions & 42 deletions demo/mnist/api_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,42 +6,32 @@

The user api could be simpler and carefully designed.
"""
import py_paddle.swig_paddle as api
from py_paddle import DataProviderConverter
import paddle.trainer.PyDataProvider2 as dp
import numpy as np
import random

import paddle.v2 as paddle

from mnist_util import read_from_mnist
from paddle.trainer_config_helpers import *


def optimizer_config():
settings(
paddle.config.settings(
learning_rate=1e-4,
learning_method=AdamOptimizer(),
learning_method=paddle.config.AdamOptimizer(),
batch_size=1000,
model_average=ModelAverage(average_window=0.5),
regularization=L2Regularization(rate=0.5))
model_average=paddle.config.ModelAverage(average_window=0.5),
regularization=paddle.config.L2Regularization(rate=0.5))


def network_config():
imgs = data_layer(name='pixel', size=784)
hidden1 = fc_layer(input=imgs, size=200)
hidden2 = fc_layer(input=hidden1, size=200)
inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
cost = classification_cost(
input=inference, label=data_layer(
imgs = paddle.config.data_layer(name='pixel', size=784)
hidden1 = paddle.config.fc_layer(input=imgs, size=200)
hidden2 = paddle.config.fc_layer(input=hidden1, size=200)
inference = paddle.config.fc_layer(
input=hidden2, size=10, act=paddle.config.SoftmaxActivation())
cost = paddle.config.classification_cost(
input=inference, label=paddle.config.data_layer(
name='label', size=10))
outputs(cost)


def init_parameter(network):
assert isinstance(network, api.GradientMachine)
for each_param in network.getParameters():
assert isinstance(each_param, api.Parameter)
array_size = len(each_param)
array = np.random.uniform(-1.0, 1.0, array_size).astype('float32')
each_param.getBuf(api.PARAMETER_VALUE).copyFromNumpyArray(array)
paddle.config.outputs(cost)


def generator_to_batch(generator, batch_size):
Expand Down Expand Up @@ -73,42 +63,44 @@ def input_order_converter(generator):


def main():
api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores
paddle.raw.initPaddle("-use_gpu=false",
"-trainer_count=4") # use 4 cpu cores

# get enable_types for each optimizer.
# enable_types = [value, gradient, momentum, etc]
# For each optimizer(SGD, Adam), GradientMachine should enable different
# buffers.
opt_config_proto = parse_optimizer_config(optimizer_config)
opt_config = api.OptimizationConfig.createFromProto(opt_config_proto)
_temp_optimizer_ = api.ParameterOptimizer.create(opt_config)
opt_config_proto = paddle.config.parse_optimizer(optimizer_config)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里还是不要依赖把 optimzier_config 定义成一个函数。因为这里其实应该不需要protobuf吧。

opt_config = paddle.raw.OptimizationConfig.createFromProto(opt_config_proto)
_temp_optimizer_ = paddle.raw.ParameterOptimizer.create(opt_config)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里需要定义一个temporary variable,不容易理解为什么需要它。

enable_types = _temp_optimizer_.getParameterTypes()

# Create Simple Gradient Machine.
model_config = parse_network_config(network_config)
m = api.GradientMachine.createFromConfigProto(
model_config, api.CREATE_MODE_NORMAL, enable_types)
model_config = paddle.config.parse_network(network_config)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

类似的,model_config一定要是protobuf吗?

m = paddle.raw.GradientMachine.createFromConfigProto(
model_config, paddle.raw.CREATE_MODE_NORMAL, enable_types)

# This type check is not useful. Only enable type hint in IDE.
# Such as PyCharm
assert isinstance(m, api.GradientMachine)
assert isinstance(m, paddle.raw.GradientMachine)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

如果上面的代码是 m = paddle.gradient_mahcine.new(...) ,那么这里不需要这一行,也可以让用户了明白 m 是一个 gradient machine了。


# Initialize Parameter by numpy.
init_parameter(network=m)
m.randParameters()

# Create Local Updater. Local means not run in cluster.
# For a cluster training, here we can change to createRemoteUpdater
# in future.
updater = api.ParameterUpdater.createLocalUpdater(opt_config)
assert isinstance(updater, api.ParameterUpdater)
updater = paddle.raw.ParameterUpdater.createLocalUpdater(opt_config)
assert isinstance(updater, paddle.raw.ParameterUpdater)

# Initialize ParameterUpdater.
updater.init(m)

# DataProvider Converter is a utility convert Python Object to Paddle C++
# Input. The input format is as same as Paddle's DataProvider.
converter = DataProviderConverter(
input_types=[dp.dense_vector(784), dp.integer_value(10)])
converter = paddle.data.DataProviderConverter(input_types=[
paddle.data.dense_vector(784), paddle.data.integer_value(10)
])

train_file = './data/raw_data/train'
test_file = './data/raw_data/t10k'
Expand All @@ -130,7 +122,7 @@ def main():

# outArgs is Neural Network forward result. Here is not useful, just passed
# to gradient_machine.forward
outArgs = api.Arguments.createArguments(0)
outArgs = paddle.raw.Arguments.createArguments(0)

for pass_id in xrange(2): # we train 2 passes.
updater.startPass()
Expand Down Expand Up @@ -178,7 +170,7 @@ def main():
test_data_generator = input_order_converter(read_from_mnist(test_file))
for data_batch in generator_to_batch(test_data_generator, 512):
# in testing stage, only forward is needed.
m.forward(converter(data_batch), outArgs, api.PASS_TEST)
m.forward(converter(data_batch), outArgs, paddle.raw.PASS_TEST)
m.eval(test_evaluator)

# print error rate for test data set
Expand All @@ -189,8 +181,8 @@ def main():
updater.catchUpWith()
params = m.getParameters()
for each_param in params:
assert isinstance(each_param, api.Parameter)
value = each_param.getBuf(api.PARAMETER_VALUE)
assert isinstance(each_param, paddle.raw.Parameter)
value = each_param.getBuf(paddle.raw.PARAMETER_VALUE)
value = value.copyToNumpyArray()

# Here, we could save parameter to every where you want
Expand Down
3 changes: 3 additions & 0 deletions demo/quick_start/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@ data/pred.txt
dataprovider_copy_1.py
train.log
output
*.w0
*.wbias
*.pkl
242 changes: 242 additions & 0 deletions demo/quick_start/api_train_gm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
import random
import cPickle
import os
import paddle.v2 as paddle


class FileReader(object):
"""
:type word_dict: dict
:type __pool__: list
"""

def __init__(self, word_dict, filename, batch_size, should_shuffle=True):
if isinstance(word_dict, basestring):
self.word_dict = FileReader.read_from_dict(word_dict)
else:
self.word_dict = word_dict
self.__should_shuffle__ = should_shuffle
self.__batch_size__ = batch_size

self.__pool__ = self.load_all_data(filename)
self.__idx__ = 0

def load_all_data(self, filename):
def __mapper__(line):
label, sentence = line.split('\t')
label = int(label)
word_ids = filter(lambda x: x is not None,
map(lambda x: self.word_dict.get(x, None),
sentence.split()))
return word_ids, label

if filename[-3:] == 'txt':
with open(filename, 'r') as f:
ret_val = map(__mapper__, f)
with open("%s.pkl" % filename[:-4], 'wb') as f:
cPickle.dump(ret_val, f, cPickle.HIGHEST_PROTOCOL)
return ret_val
elif filename[-3:] == 'pkl':
with open(filename, 'rb') as f:
return cPickle.load(f)

def __iter__(self):
self.reset()
return self

def reset(self):
if self.__should_shuffle__:
random.shuffle(self.__pool__)
self.__idx__ = 0

def next(self):
if self.__idx__ < len(self.__pool__):
end = min(self.__idx__ + self.__batch_size__, len(self.__pool__))
start = self.__idx__
self.__idx__ = end
return self.__pool__[start:end]
else:
raise StopIteration()

@staticmethod
def read_from_dict(fn):
if os.path.exists(fn + '.pkl'):
with open(fn + '.pkl', 'rb') as f:
return cPickle.load(f)
else:
ret_val = dict()
with open(fn, 'r') as f:
for i, line in enumerate(f):
w = line.split()[0]
ret_val[w] = i
with open(fn + '.pkl', 'wb') as f:
cPickle.dump(ret_val, f, cPickle.HIGHEST_PROTOCOL)
return ret_val


def optimizer_config():
paddle.config.settings(
batch_size=1,
learning_rate=1e-4,
learning_method=paddle.config.RMSPropOptimizer())


def bow_config(dict_size):
def __impl__():
sentence = paddle.config.data_layer(name='sentence', size=dict_size)
inference = paddle.config.fc_layer(
input=sentence,
size=2,
act=paddle.config.SoftmaxActivation(),
param_attr=paddle.config.ParamAttr(sparse_update=True))
cost = paddle.config.classification_cost(
input=inference,
label=paddle.config.data_layer(
name='label', size=2))
paddle.config.outputs(cost)

return __impl__


def swap_batch(batch):
for each_item in batch:
a, b = each_item
yield b, a


def main():
print 'Loading data into memory'
train_file_name = './data/train.pkl' if os.path.exists(
'./data/train.pkl') else './data/train.txt'

test_file_name = './data/test.pkl' if os.path.exists(
'./data/test.pkl') else './data/test.txt'

train_reader = FileReader(
"./data/dict.txt", filename=train_file_name, batch_size=1024)
test_reader = FileReader(
train_reader.word_dict, filename=test_file_name, batch_size=1024)

print 'Done.'

paddle.raw.initPaddle('--use_gpu=0', '--trainer_count=3')

optimizer_proto = paddle.config.parse_optimizer(
optimizer_conf=optimizer_config)
optimizer_conf = paddle.raw.OptimizationConfig.createFromProto(
optimizer_proto)
__tmp_optimizer__ = paddle.raw.ParameterOptimizer.create(optimizer_conf)
assert isinstance(__tmp_optimizer__, paddle.raw.ParameterOptimizer)
enable_types = __tmp_optimizer__.getParameterTypes()

model_proto = paddle.config.parse_network(
network_conf=bow_config(len(train_reader.word_dict)))

for param in model_proto.parameters:
if param.sparse_remote_update:
# disable sparse remote update, when local
param.sparse_remote_update = False

gradient_machine = paddle.raw.GradientMachine.createFromConfigProto(
model_proto, paddle.raw.CREATE_MODE_NORMAL, enable_types)
assert isinstance(gradient_machine, paddle.raw.GradientMachine)
gradient_machine.randParameters()

updater = paddle.raw.ParameterUpdater.createLocalUpdater(optimizer_conf)
assert isinstance(updater, paddle.raw.ParameterUpdater)

input_order = model_proto.input_layer_names
input_types = {
'sentence':
paddle.data.sparse_binary_vector(len(train_reader.word_dict)),
'label': paddle.data.integer_value(2)
}

tmp = []
for each in input_order:
tmp.append(input_types[each])

input_types = tmp

converter = paddle.data.DataProviderConverter(input_types=input_types)

input_order_for_data = ['sentence', 'label']
switcher = None
if input_order_for_data != input_order:
switcher = swap_batch

updater.init(gradient_machine)

gradient_machine.start()

train_evaluator = gradient_machine.makeEvaluator()
test_evaluator = gradient_machine.makeEvaluator()
assert isinstance(train_evaluator, paddle.raw.Evaluator)
assert isinstance(test_evaluator, paddle.raw.Evaluator)

train_evaluate_period = 100

out_args = paddle.raw.Arguments.createArguments(0)
assert isinstance(out_args, paddle.raw.Arguments)
for pass_id in xrange(10):
updater.startPass()
for batch_id, data_batch in enumerate(train_reader):
if switcher is not None:
data_batch = switcher(data_batch)

updater.startBatch(len(data_batch))

in_args = converter(data_batch)

if batch_id % train_evaluate_period == 0:
train_evaluator.start()

gradient_machine.forwardBackward(in_args, out_args,
paddle.raw.PASS_TRAIN)

gradient_machine.eval(train_evaluator)

cost = out_args.sumCosts() / len(data_batch)

if batch_id % train_evaluate_period == 0:
print 'Pass=%d Batch=%d Cost=%f' % (pass_id, batch_id,
cost), train_evaluator
train_evaluator.finish()

gradient_machine.eval(train_evaluator)

for each_param in gradient_machine.getParameters():
updater.update(each_param)

updater.finishBatch(cost)

print 'Pass=%d Batch=%d Cost=%f' % (pass_id, batch_id,
cost), train_evaluator
updater.catchUpWith()

test_evaluator.start()
for data_batch in test_reader:
if switcher is not None:
data_batch = switcher(data_batch)

in_args = converter(data_batch)
gradient_machine.forward(in_args, out_args, paddle.raw.PASS_TEST)
gradient_machine.eval(test_evaluator)

print 'Test Pass=%d' % pass_id, test_evaluator

print 'Saving parameters.'
for param in gradient_machine.getParameters():
assert isinstance(param, paddle.raw.Parameter)
save_name = "%d_%s" % (pass_id, param.getName())
param.save(save_name)
print 'Done.'

test_evaluator.finish()

updater.finishPass()
gradient_machine.finish()


if __name__ == '__main__':
main()
Loading