Skip to content

Commit

Permalink
follow comments
Browse files Browse the repository at this point in the history
  • Loading branch information
qingqing01 committed Oct 31, 2016
1 parent 61e21c3 commit 27e89df
Show file tree
Hide file tree
Showing 11 changed files with 61 additions and 69 deletions.
18 changes: 7 additions & 11 deletions paddle/gserver/layers/ConcatenateLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ void ConcatenateLayer::backward(const UpdateCallback& callback) {
class ConcatenateLayer2 : public Layer {
public:
explicit ConcatenateLayer2(const LayerConfig& config) :
Layer(config), sharedBias_(false) {}
Layer(config) {}

~ConcatenateLayer2() {}

Expand Down Expand Up @@ -141,9 +141,7 @@ bool ConcatenateLayer2::init(const LayerMap& layerMap,

/* initialize biases_ */
if (biasParameter_.get() != NULL) {
if (config_.has_shared_biases()) {
sharedBias_ = config_.shared_biases();
}
sharedBias_ = config_.shared_biases();
size_t psize = config_.bias_size();
biases_ = std::unique_ptr<Weight>(new Weight(1, psize, biasParameter_));
}
Expand Down Expand Up @@ -173,7 +171,7 @@ void ConcatenateLayer2::forward(PassType passType) {
}

/* add the bias-vector */
if (biases_.get() != NULL) {
if (biases_) {
REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str());
output_.value->addBias(*(biases_->getW()), 1, sharedBias_);
}
Expand All @@ -190,18 +188,16 @@ void ConcatenateLayer2::backward(const UpdateCallback& callback) {
backwardActivation();
}

AsyncGpuBlock block;
if (biases_ && biases_->getWGrad()) {
REGISTER_TIMER_INFO("Concat2BpBiasTimer", getName().c_str());
biases_->getWGrad()->collectBias(*getOutputGrad(), 1, sharedBias_);
biases_->getParameterPtr()->incUpdate(callback);
}

{
AsyncGpuBlock block;
for (size_t i = 0; i != inputLayers_.size(); ++i) {
if (projections_[i]) {
projections_[i]->backward(callback);
}
for (size_t i = 0; i != inputLayers_.size(); ++i) {
if (projections_[i]) {
projections_[i]->backward(callback);
}
}
}
Expand Down
12 changes: 8 additions & 4 deletions paddle/gserver/layers/ConvBaseLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,14 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
}

size_t ConvBaseLayer::calOutputSize() {
imgSizeH_.clear();
imgSizeW_.clear();
outputH_.clear();
outputW_.clear();
auto clearAndReserve = [this](IntV* vec) {
vec->clear();
vec->reserve(this->inputLayers_.size());
};
clearAndReserve(&imgSizeH_);
clearAndReserve(&imgSizeW_);
clearAndReserve(&outputH_);
clearAndReserve(&outputW_);
size_t layerSize = 0;
for (size_t i = 0; i < inputLayers_.size(); i++) {
imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
Expand Down
7 changes: 6 additions & 1 deletion paddle/gserver/layers/ConvBaseLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ class ConvBaseLayer : public Layer {
protected:
typedef std::vector<int> IntV;


/// The number of filters.
int numFilters_;
/// The x dimension of the padding.
Expand Down Expand Up @@ -79,6 +78,12 @@ class ConvBaseLayer : public Layer {
explicit ConvBaseLayer(const LayerConfig& config) : Layer(config) {}

virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);

/**
* imgSizeH_ and imgSizeW_ will be set according to the previous input layers
* in this function. Then it will calculate outputH_ and outputW_ and set them
* into output argument.
*/
virtual size_t calOutputSize();

Weight& getWeight(int idx) { return *weights_[idx]; }
Expand Down
39 changes: 20 additions & 19 deletions paddle/gserver/layers/ConvProjection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,11 @@ limitations under the License. */

namespace paddle {

static ThreadLocal<std::vector<MemoryHandle*>> convMem_;
static __thread bool convMemInit = false;
void* getSpaceBytes(size_t size) {
if (!convMemInit) {
int numDevices = hl_get_device_count();
convMem_.get()->resize(numDevices);
convMemInit = true;
}

int devId = hl_get_device();
MemoryHandle** localMem = &(*convMem_.get())[devId];
if (NULL == *localMem || size > (*localMem)->getAllocSize()) {
*localMem = new GpuMemoryHandle(size);
}
return (*localMem)->getBuf();
}

REGISTER_PROJECTION(conv, ConvProjection);

ThreadLocalD<std::vector<MemoryHandle*>> ConvProjection::convMem_;

ConvProjection::ConvProjection(const ProjectionConfig& config,
ParameterPtr parameter, bool useGpu)
: Projection(config, parameter, useGpu) {
Expand All @@ -48,8 +34,6 @@ ConvProjection::ConvProjection(const ProjectionConfig& config,
size_t height = filterH_ * filterW_ * channels_ / groups_;
size_t width = numFilters_;
weight_.reset(new Weight(height, width, parameter));


weightOffset_ = height * width / groups_;
}

Expand Down Expand Up @@ -108,6 +92,7 @@ void ConvProjection::reshapeTensorDesc(int batchSize) {
// for example, in the case of layer ConcatenateLayer2 with two
// ConvProjection, the stride is the output_size of layer ConcatenateLayer2.
// So the calculation of nStride is different from CudnnConvLayer.
// In fact, only "nStride = out_->value->getStride()" is ok.
size_t nStride = numFilters_ * outputH_ * outputW_;
if (out_->value->isContiguous()) {
CHECK_EQ(nStride, out_->value->getWidth());
Expand All @@ -120,7 +105,8 @@ void ConvProjection::reshapeTensorDesc(int batchSize) {
}

void ConvProjection::reshape(int batchSize) {
calOutputSize();
size_t width = calOutputSize();
CHECK_EQ(width, out_->value->getWidth());

isSelectAlgo_ = (batchSize == batchNum_);
batchNum_ = batchSize;
Expand Down Expand Up @@ -201,6 +187,21 @@ void ConvProjection::backward(const UpdateCallback& callback) {
weight_->getParameterPtr()->incUpdate(callback);
}

void* ConvProjection::getSpaceBytes(size_t size) {
std::vector<MemoryHandle*>& convMem = *convMem_;
if (convMem.empty()) {
int numDevices = hl_get_device_count();
convMem.resize(numDevices);
}

int devId = hl_get_device();
MemoryHandle** localMem = &(convMem[devId]);
if (NULL == *localMem || size > (*localMem)->getAllocSize()) {
*localMem = new GpuMemoryHandle(size);
}
return (*localMem)->getBuf();
}

ConvProjection::~ConvProjection() {
hl_destroy_tensor_descriptor(inputDesc_);
hl_destroy_tensor_descriptor(outputDesc_);
Expand Down
15 changes: 9 additions & 6 deletions paddle/gserver/layers/ConvProjection.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class ConvProjection : public Projection {
return (imageSize - filterSize + 2 * padding) / stride + 1;
}

void calOutputSize() {
size_t calOutputSize() {
imageH_ = in_->getFrameHeight();
imageW_ = in_->getFrameWidth();
if (imageH_ == 0) imageH_ = configImgH_;
Expand All @@ -59,8 +59,11 @@ class ConvProjection : public Projection {

inputOffset_ = (channels_ / groups_) * imageH_ * imageW_;
outputOffset_ = (numFilters_ / groups_) * outputH_ * outputW_;
return outputH_ * outputW_ * numFilters_;
}

static void* getSpaceBytes(size_t size);

/// imageH_ and imageW_ is calculated from the input layer.
int imageH_, imageW_;
/// configImgH_ and configImgW_ is obtained from config.
Expand All @@ -87,13 +90,13 @@ class ConvProjection : public Projection {
/// Cudnn tensor descriptor for a convolution operation.
hl_convolution_descriptor convDesc_;

/// Save the algorithm for forward convolution, which is obtained by cudnn
/// Record the algorithm for forward convolution, which is obtained by cudnn
/// api to search the best suited algorithm.
int fwdAlgo_;
/// Save the algorithm for computing convolution gradient with respect to
/// Record the algorithm for computing convolution gradient with respect to
/// filter coefficients.
int bwdFilterAlgo_;
/// Save the algorithm for computing convolution gradient with respect to
/// Record the algorithm for computing convolution gradient with respect to
/// the output.
int bwdDataAlgo_;
/// Amount of GPU memory needed as workspace to be able to execute a
Expand All @@ -108,15 +111,15 @@ class ConvProjection : public Projection {
/// Size of total work space.
size_t workSpaceInBytes_;

/// Is or not select conv algorihtm.
/// Whether to call cuDNN api to choose conv algorithm.
bool isSelectAlgo_;
/// batchNum is used to record batch size. If the batch size is changed,
/// the selection algorithm will be called.
int batchNum_;

bool bias_;

std::unique_ptr<Weight> weight_;
static ThreadLocalD<std::vector<MemoryHandle*>> convMem_;
};

} // namespace paddle
3 changes: 2 additions & 1 deletion paddle/gserver/layers/CudnnConvLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ bool CudnnConvLayer::init(const LayerMap &layerMap,

CHECK_EQ(inputLayers_.size(), parameters_.size());
projections_.reserve(inputLayers_.size());
projConf_.reserve(inputLayers_.size());

numFilters_ = config_.num_filters();
CHECK(config_.shared_biases());
Expand Down Expand Up @@ -100,7 +101,7 @@ void CudnnConvLayer::backward(const UpdateCallback &callback) {
}

CudnnConvLayer::~CudnnConvLayer() {
if (biases_.get()) {
if (biases_) {
hl_destroy_tensor_descriptor(biasDesc_);
hl_destroy_tensor_descriptor(outputDesc_);
}
Expand Down
4 changes: 1 addition & 3 deletions paddle/gserver/layers/MixedLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@ bool MixedLayer::init(const LayerMap& layerMap,

/* initialize biases_ */
if (biasParameter_.get() != NULL) {
if (config_.has_shared_biases()) {
sharedBias_ = config_.shared_biases();
}
sharedBias_ = config_.shared_biases();
size_t psize = config_.bias_size();
biases_ = std::unique_ptr<Weight>(
new Weight(1, psize, biasParameter_));
Expand Down
9 changes: 2 additions & 7 deletions paddle/trainer/TrainerBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,14 @@ P_DEFINE_bool(feed_data, false, "Wether to read data from DataProvider.");

namespace paddle {


void Trainer::time() {
srand(config_->getConfig().start_pass() + 1);
dataProvider_->reset();
startTrain();

this->stats_->reset();
trainerInternal_.getParameterUpdater()->startPass();
evaluator_->start();

trainerInternal_.getGradientMachine()->start(*config_, dataProvider_);
DataBatch dataBatch;
int32_t batchSize = config_->getOptConfig().batch_size();

int32_t num = dataProvider_->getNextBatch(batchSize, &dataBatch);
CHECK_EQ(num, batchSize) << "The sample number is less than batch size "
<< num << " != " << batchSize;
Expand Down Expand Up @@ -70,7 +65,7 @@ void Trainer::time() {
globalStat.printSegTimerStatus();
globalStat.reset();

trainerInternal_.getGradientMachine()->finish();
finishTrain();
}

} // namespace paddle
2 changes: 1 addition & 1 deletion proto/ModelConfig.proto.m4
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ sinclude(`ModelConfigLayer.proto.m4')
// (which is how convnets are usually trained). Setting this to
// false will untie the biases, yielding a separate bias for
// every location at which the filter is applied.
optional bool shared_biases = 8;
optional bool shared_biases = 8 [default = false];

// Valid values are ones that divide the area of the output
// grid in this convolutional layer. For example if this layer
Expand Down
20 changes: 4 additions & 16 deletions python/paddle/trainer/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,9 +668,6 @@ def calc_bias_size(self):

def calc_parameter_dims(self, input_size, output_size):
return None
# or [self.proj_conf.conv_conf.channels *
# self.proj_conf.conv_conf.filter_size * self.proj_conf.conv_conf.filter_size_y,
# self.config.num_filters]


# Define a operator for mixed layer
Expand Down Expand Up @@ -2569,18 +2566,13 @@ def __init__(
record_operator_conf = self.config.operator_confs.add()
record_operator_conf.CopyFrom(operator_conf)

shared_biases=None

psize = self.config.size
if isinstance(self.inputs[0], ConvProjection):
shared_biases = True
self.config.shared_biases = True
psize = 0
for input in self.inputs:
psize += input.calc_bias_size()

if shared_biases is not None:
self.config.shared_biases = shared_biases

self.config.bias_size = psize
self.create_bias_parameter(bias, psize)

Expand Down Expand Up @@ -2632,7 +2624,8 @@ def __init__(
for input_index in xrange(len(self.inputs) - 1):
input = self.inputs[input_index + 1]
config_assert(isinstance(input, ConvProjection),
"All the inputs of ConcatenateLayer2 should be ConvProjection.")
"The first input of ConcatenateLayer2 is ConvProjection, "
"the other inputs should also be ConvProjection.")

size = 0
for input_index in xrange(len(self.inputs)):
Expand All @@ -2659,18 +2652,13 @@ def __init__(
input.proj_conf.output_size)
self.create_input_parameter(input_index, psize, dims)

shared_biases=None

psize = self.config.size
if isinstance(self.inputs[0], ConvProjection):
shared_biases = True
self.config.shared_biases = True
psize = 0
for input in self.inputs:
psize += input.calc_bias_size()

if shared_biases is not None:
self.config.shared_biases = shared_biases

self.config.bias_size = psize
self.create_bias_parameter(bias, psize)

Expand Down
1 change: 1 addition & 0 deletions python/paddle/trainer_config_helpers/networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,7 @@ def simple_gru2(input,
"""
simple_gru2 is the same with simple_gru, but using grumemory instead
Please see grumemory in layers.py for more detail about the maths.
simple_gru2 is faster than simple_gru.
The example usage is:
Expand Down

0 comments on commit 27e89df

Please sign in to comment.