From 01d65e227ce2fbeff405fa0a68d1cca9f12aa6e7 Mon Sep 17 00:00:00 2001 From: xuwei06 Date: Fri, 23 Sep 2016 17:33:39 -0700 Subject: [PATCH 1/3] Correctly handling multiple inputs and integer inputs for recurrent_group --- paddle/cuda/src/hl_cuda_cublas.cc | 4 +- .../RecurrentGradientMachine.cpp | 41 +++++++++++-------- paddle/gserver/layers/AgentLayer.cpp | 34 +++++++-------- paddle/gserver/tests/CMakeLists.txt | 1 - paddle/gserver/tests/sequence_nest_rnn.conf | 11 ++--- paddle/gserver/tests/sequence_rnn.conf | 7 ++-- .../tests/test_RecurrentGradientMachine.cpp | 22 ++++++---- paddle/parameter/Argument.cpp | 13 ++++-- paddle/parameter/Argument.h | 4 +- .../paddle/trainer_config_helpers/layers.py | 4 +- 10 files changed, 79 insertions(+), 62 deletions(-) diff --git a/paddle/cuda/src/hl_cuda_cublas.cc b/paddle/cuda/src/hl_cuda_cublas.cc index 445279fa01034..dc109487ded20 100644 --- a/paddle/cuda/src/hl_cuda_cublas.cc +++ b/paddle/cuda/src/hl_cuda_cublas.cc @@ -217,7 +217,7 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa, } else { LOG(FATAL) << "parameter transa error!"; } - CHECK_EQ(stat, CUBLAS_STATUS_SUCCESS); + CHECK_EQ(stat, CUBLAS_STATUS_SUCCESS) << hl_cublas_get_error_string(stat); CHECK_SYNC("hl_matrix_mul failed"); } @@ -266,7 +266,7 @@ void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans, LOG(FATAL) << "parameter transa error!"; } - CHECK_EQ(stat, CUBLAS_STATUS_SUCCESS); + CHECK_EQ(stat, CUBLAS_STATUS_SUCCESS) << hl_cublas_get_error_string(stat); CHECK_SYNC("hl_matrix_mul_vector"); } diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index bee82faa5fca8..fc38bca3c403b 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -497,20 +497,21 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, int idSize = 0; // connect in_links for (size_t j = 0; j < inFrameLines_.size(); ++j) { + Info& info = info_[shareInlinkInfo ? 0 : j]; // idSize denotes the sum number of tokens in each length i - idSize = info_[j].idIndex[i + 1] - info_[j].idIndex[i]; + idSize = info.idIndex[i + 1] - info.idIndex[i]; InFrameLine inFrameLine = inFrameLines_[j]; auto scatterAgent = dynamic_cast(inFrameLine.agents[i].get()); scatterAgent->setRealLayerAndOutput(inFrameLine.inLayer, - inFrameLine.outArg, info_[j].allIds, - info_[j].idIndex[i], idSize); + inFrameLine.outArg, info.allIds, + info.idIndex[i], idSize); if (hasSubseq) { // size: the length of subsequence int size = - info_[j].seqStartPosIndex[i + 1] - info_[j].seqStartPosIndex[i]; - scatterAgent->setSequenceStartPositions(info_[j].sequenceStartPositions, - info_[j].seqStartPosIndex[i], + info.seqStartPosIndex[i + 1] - info.seqStartPosIndex[i]; + scatterAgent->setSequenceStartPositions(info.sequenceStartPositions, + info.seqStartPosIndex[i], size); } } @@ -744,16 +745,24 @@ void RecurrentGradientMachine::selectRowsOneTime(LayerPtr layer, const IVectorPtr& allIds, Argument* arg, PassType passType) { - const MatrixPtr& realV = layer->getOutputValue(); - int height = realV->getHeight(); - int width = realV->getWidth(); - Matrix::resizeOrCreate(arg->value, height, width, /* trans */ false, useGpu_); - arg->value->zeroMem(); - arg->value->selectRows(*realV, *allIds); - if (passType != PASS_TEST) { - Matrix::resizeOrCreate(arg->grad, height, width, /* trans */ false, - useGpu_); - arg->grad->zeroMem(); + Argument& src = layer->getOutput(); + if (src.value) { + const MatrixPtr& realV = src.value; + int height = realV->getHeight(); + int width = realV->getWidth(); + Matrix::resizeOrCreate( + arg->value, height, width, /* trans */ false, useGpu_); + arg->value->zeroMem(); + arg->value->selectRows(*realV, *allIds); + if (passType != PASS_TEST) { + Matrix::resizeOrCreate(arg->grad, height, width, /* trans */ false, + useGpu_); + arg->grad->zeroMem(); + } + } + if (src.ids) { + IVector::resizeOrCreate(arg->ids, src.ids->getSize(), useGpu_); + arg->ids->selectFrom(*src.ids, *allIds); } } diff --git a/paddle/gserver/layers/AgentLayer.cpp b/paddle/gserver/layers/AgentLayer.cpp index c1bef18ed38af..9fa8698f8a611 100644 --- a/paddle/gserver/layers/AgentLayer.cpp +++ b/paddle/gserver/layers/AgentLayer.cpp @@ -139,22 +139,17 @@ void ScatterAgentLayer::forward(PassType passType) { Layer::forward(passType); CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); - if (realLayer_->getOutput().ids) { // ids scatter - IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_); - output_.ids->selectFrom(*realLayer_->getOutput().ids, *ids_); - } else { // value scatter - int width = this->getSize(); - if (realOutArg_.value) { - output_.subArgFrom(realOutArg_, /* offset */ idIndex_ * width, idSize_, - width, useGpu_); - } else { // used in generation - int height = ids_->getSize(); - resetOutput(height, width); - - const MatrixPtr& outV = getOutputValue(); - const MatrixPtr& realV = realLayer_->getOutputValue(); - outV->selectRows(*realV, *ids_); - } + int width = this->getSize(); + if (realOutArg_.value || realOutArg_.ids) { + output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, + width, useGpu_); + } else { // used in generation + int height = ids_->getSize(); + resetOutput(height, width); + + const MatrixPtr& outV = getOutputValue(); + const MatrixPtr& realV = realLayer_->getOutputValue(); + outV->selectRows(*realV, *ids_); } } @@ -213,18 +208,17 @@ void SequenceGatherAgentLayer::forward(PassType passType) { void SequenceScatterAgentLayer::forward(PassType passType) { Layer::forward(passType); CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); - CHECK(!realLayer_->getOutput().ids) << "Not supported"; const Argument& input = realLayer_->getOutput(); - CHECK_EQ(input.value->getWidth(), this->getSize()); + CHECK_EQ(realLayer_->getSize(), this->getSize()); int width = this->getSize(); AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceAgentLayerForward", getName().c_str()); - if (realOutArg_.value) { + if (realOutArg_.value || realOutArg_.ids) { CHECK(realOutArg_.sequenceStartPositions); - output_.subArgFrom(realOutArg_, /* offset */ idIndex_ * width, idSize_, + output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_, /* trans */ false, /* seqFlag */ true, /* seqStart */ seqStartPosIndex_, /* seqSize */ numSequences_); diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 129f10fac114d..ff2abf7697317 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -56,7 +56,6 @@ add_test(NAME test_RecurrentGradientMachine COMMAND .set_python_path.sh -d ${PROJ_ROOT}/python:${PROJ_ROOT}/paddle/gserver/tests ${CMAKE_CURRENT_BINARY_DIR}/test_RecurrentGradientMachine - --use_gpu=false WORKING_DIRECTORY ${PROJ_ROOT}/paddle) add_unittest_without_exec(test_NetworkCompare diff --git a/paddle/gserver/tests/sequence_nest_rnn.conf b/paddle/gserver/tests/sequence_nest_rnn.conf index 62b8c5d072d7b..e01b3f8e7aa5c 100644 --- a/paddle/gserver/tests/sequence_nest_rnn.conf +++ b/paddle/gserver/tests/sequence_nest_rnn.conf @@ -36,13 +36,14 @@ emb = embedding_layer(input=data, size=word_dim) # This hierachical RNN is designed to be equivalent to the simple RNN in # sequence_rnn.conf -def outer_step(x): +def outer_step(wid, x): outer_mem = memory(name="outer_rnn_state", size=hidden_dim) - def inner_step(y): + def inner_step(y, wid): + z = embedding_layer(input=wid, size=word_dim) inner_mem = memory(name="inner_rnn_state", size=hidden_dim, boot_layer=outer_mem) - out = fc_layer(input=[y, inner_mem], + out = fc_layer(input=[y, z, inner_mem], size=hidden_dim, act=TanhActivation(), bias_attr=True, @@ -52,7 +53,7 @@ def outer_step(x): inner_rnn_output = recurrent_group( step=inner_step, name="inner", - input=x) + input=[x, wid]) last = last_seq(input=inner_rnn_output, name="outer_rnn_state") # "return last" should also work. But currently RecurrentGradientMachine @@ -64,7 +65,7 @@ def outer_step(x): out = recurrent_group( name="outer", step=outer_step, - input=SubsequenceInput(emb)) + input=[SubsequenceInput(data), SubsequenceInput(emb)]) rep = last_seq(input=out) prob = fc_layer(size=label_dim, diff --git a/paddle/gserver/tests/sequence_rnn.conf b/paddle/gserver/tests/sequence_rnn.conf index 3294c2c3fc431..968621cab59be 100644 --- a/paddle/gserver/tests/sequence_rnn.conf +++ b/paddle/gserver/tests/sequence_rnn.conf @@ -33,9 +33,10 @@ data = data_layer(name="word", size=dict_dim) emb = embedding_layer(input=data, size=word_dim) -def step(y): +def step(y, wid): + z = embedding_layer(input=wid, size=word_dim) mem = memory(name="rnn_state", size=hidden_dim) - out = fc_layer(input=[y, mem], + out = fc_layer(input=[y, z, mem], size=hidden_dim, act=TanhActivation(), bias_attr=True, @@ -45,7 +46,7 @@ def step(y): out = recurrent_group( name="rnn", step=step, - input=emb) + input=[emb, data]) rep = last_seq(input=out) prob = fc_layer(size=label_dim, diff --git a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp index b73fdd18abf35..af8e5b0ad3361 100644 --- a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp +++ b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp @@ -92,7 +92,11 @@ void CalCost(const string& conf, const string& dir, real* cost, rmDir(dir.c_str()); } -void test(const string& conf1, const string& conf2, double eps) { +void test(const string& conf1, const string& conf2, double eps, bool useGpu) { + if (!paddle::version::isWithGpu() && useGpu) { + return; + } + FLAGS_use_gpu = useGpu; int num_passes = 5; real* cost1 = new real[num_passes]; const string dir1 = "gserver/tests/t1"; @@ -113,15 +117,19 @@ void test(const string& conf1, const string& conf2, double eps) { } TEST(RecurrentGradientMachine, HasSubSequence) { - test("gserver/tests/sequence_layer_group.conf", - "gserver/tests/sequence_nest_layer_group.conf", - 1e-5); + for (bool useGpu : {false, true}) { + test("gserver/tests/sequence_layer_group.conf", + "gserver/tests/sequence_nest_layer_group.conf", + 1e-5, useGpu); + } } TEST(RecurrentGradientMachine, rnn) { - test("gserver/tests/sequence_rnn.conf", - "gserver/tests/sequence_nest_rnn.conf", - 0); + for (bool useGpu : {false, true}) { + test("gserver/tests/sequence_rnn.conf", + "gserver/tests/sequence_nest_rnn.conf", + 1e-6, useGpu); + } } diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 0ca56b29b39b3..42c74661d2b2c 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -554,11 +554,16 @@ void Argument::degradeSequence(const Argument& input, bool useGpu) { void Argument::subArgFrom(const Argument& input, size_t offset, size_t height, size_t width, bool useGpu, bool trans, bool seqFlag, size_t seqStart, size_t seqSize) { - value = Matrix::create(input.value->getData() + offset, height, width, trans, - useGpu); + if (input.value) { + value = Matrix::create(input.value->getData() + offset * width, + height, width, trans, useGpu); + } + if (input.ids) { + ids = IVector::create(input.ids->getData() + offset, height, useGpu); + } if (input.grad) { - grad = Matrix::create(input.grad->getData() + offset, height, width, trans, - useGpu); + grad = Matrix::create(input.grad->getData() + offset * width, + height, width, trans, useGpu); } if (seqFlag) { sequenceStartPositions = std::make_shared( diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 81cd117fc45cf..81ff9029bc4c8 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -177,11 +177,11 @@ struct Argument { } /** - * @brief (value, grad, sequenceStartPositions) of output are subset of + * @brief (value, ids, grad, sequenceStartPositions) of output are subset of * input. Note that, output share the same memory of input. * * @param input[in] input - * @param offset[in] offset of input.value + * @param offset[in] offset in terms of rows * @param height[in] height of output.value * @param width[in] width of output.value * @param useGpu[in] diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 8b7cabf2fad50..cd3b1dcdf21a7 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -215,7 +215,7 @@ def check_input(input): """ if isinstance(input, LayerOutput): - return [LayerOutput] + return [input] assert isinstance(input, list) for inp in input: assert isinstance(inp, LayerOutput) @@ -761,7 +761,7 @@ def print_layer(input, name=None): :type input: LayerOutput|list|tuple :return: No return """ - check_input(input) + input = check_input(input) Layer( name=name, From db90d979728f45b30b9e330561243de87a78a247 Mon Sep 17 00:00:00 2001 From: xuwei06 Date: Sat, 24 Sep 2016 00:06:49 -0700 Subject: [PATCH 2/3] Fix ScatterAgentLayer for generation --- paddle/gserver/layers/AgentLayer.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/paddle/gserver/layers/AgentLayer.cpp b/paddle/gserver/layers/AgentLayer.cpp index 9fa8698f8a611..056e9568852ac 100644 --- a/paddle/gserver/layers/AgentLayer.cpp +++ b/paddle/gserver/layers/AgentLayer.cpp @@ -144,12 +144,18 @@ void ScatterAgentLayer::forward(PassType passType) { output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_); } else { // used in generation - int height = ids_->getSize(); - resetOutput(height, width); + if (realLayer_->getOutput().ids) { + IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_); + output_.ids->selectFrom(*realLayer_->getOutput().ids, *ids_); + } + if (realLayer_->getOutput().value) { + int height = ids_->getSize(); + resetOutput(height, width); - const MatrixPtr& outV = getOutputValue(); - const MatrixPtr& realV = realLayer_->getOutputValue(); - outV->selectRows(*realV, *ids_); + const MatrixPtr& outV = getOutputValue(); + const MatrixPtr& realV = realLayer_->getOutputValue(); + outV->selectRows(*realV, *ids_); + } } } From 1659d3e1e8bcb0ba573306a9aa1e2f3ad95895a4 Mon Sep 17 00:00:00 2001 From: xuwei06 Date: Mon, 26 Sep 2016 13:39:07 -0700 Subject: [PATCH 3/3] Revert sequence_(nest)_rnn.conf --- paddle/gserver/tests/sequence_nest_rnn.conf | 11 ++- .../tests/sequence_nest_rnn_multi_input.conf | 77 +++++++++++++++++++ paddle/gserver/tests/sequence_rnn.conf | 7 +- .../tests/sequence_rnn_multi_input.conf | 58 ++++++++++++++ .../tests/test_RecurrentGradientMachine.cpp | 7 ++ 5 files changed, 150 insertions(+), 10 deletions(-) create mode 100644 paddle/gserver/tests/sequence_nest_rnn_multi_input.conf create mode 100644 paddle/gserver/tests/sequence_rnn_multi_input.conf diff --git a/paddle/gserver/tests/sequence_nest_rnn.conf b/paddle/gserver/tests/sequence_nest_rnn.conf index e01b3f8e7aa5c..62b8c5d072d7b 100644 --- a/paddle/gserver/tests/sequence_nest_rnn.conf +++ b/paddle/gserver/tests/sequence_nest_rnn.conf @@ -36,14 +36,13 @@ emb = embedding_layer(input=data, size=word_dim) # This hierachical RNN is designed to be equivalent to the simple RNN in # sequence_rnn.conf -def outer_step(wid, x): +def outer_step(x): outer_mem = memory(name="outer_rnn_state", size=hidden_dim) - def inner_step(y, wid): - z = embedding_layer(input=wid, size=word_dim) + def inner_step(y): inner_mem = memory(name="inner_rnn_state", size=hidden_dim, boot_layer=outer_mem) - out = fc_layer(input=[y, z, inner_mem], + out = fc_layer(input=[y, inner_mem], size=hidden_dim, act=TanhActivation(), bias_attr=True, @@ -53,7 +52,7 @@ def outer_step(wid, x): inner_rnn_output = recurrent_group( step=inner_step, name="inner", - input=[x, wid]) + input=x) last = last_seq(input=inner_rnn_output, name="outer_rnn_state") # "return last" should also work. But currently RecurrentGradientMachine @@ -65,7 +64,7 @@ def outer_step(wid, x): out = recurrent_group( name="outer", step=outer_step, - input=[SubsequenceInput(data), SubsequenceInput(emb)]) + input=SubsequenceInput(emb)) rep = last_seq(input=out) prob = fc_layer(size=label_dim, diff --git a/paddle/gserver/tests/sequence_nest_rnn_multi_input.conf b/paddle/gserver/tests/sequence_nest_rnn_multi_input.conf new file mode 100644 index 0000000000000..e01b3f8e7aa5c --- /dev/null +++ b/paddle/gserver/tests/sequence_nest_rnn_multi_input.conf @@ -0,0 +1,77 @@ +#edit-mode: -*- python -*- +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', + test_list=None, + module='rnn_data_provider', + obj='process_subseq') + + +settings(batch_size=2, learning_rate=0.01) +######################## network configure ################################ +dict_dim = 10 +word_dim = 8 +hidden_dim = 8 +label_dim = 3 + +data = data_layer(name="word", size=dict_dim) + +emb = embedding_layer(input=data, size=word_dim) + +# This hierachical RNN is designed to be equivalent to the simple RNN in +# sequence_rnn.conf + +def outer_step(wid, x): + outer_mem = memory(name="outer_rnn_state", size=hidden_dim) + def inner_step(y, wid): + z = embedding_layer(input=wid, size=word_dim) + inner_mem = memory(name="inner_rnn_state", + size=hidden_dim, + boot_layer=outer_mem) + out = fc_layer(input=[y, z, inner_mem], + size=hidden_dim, + act=TanhActivation(), + bias_attr=True, + name="inner_rnn_state") + return out + + inner_rnn_output = recurrent_group( + step=inner_step, + name="inner", + input=[x, wid]) + last = last_seq(input=inner_rnn_output, name="outer_rnn_state") + + # "return last" should also work. But currently RecurrentGradientMachine + # does not handle it correctly. Current implementation requires that + # all the out links are from sequences. However, it does not report error + # when the out links are not sequences. + return inner_rnn_output + +out = recurrent_group( + name="outer", + step=outer_step, + input=[SubsequenceInput(data), SubsequenceInput(emb)]) + +rep = last_seq(input=out) +prob = fc_layer(size=label_dim, + input=rep, + act=SoftmaxActivation(), + bias_attr=True) + +outputs(classification_cost(input=prob, + label=data_layer(name="label", size=label_dim))) diff --git a/paddle/gserver/tests/sequence_rnn.conf b/paddle/gserver/tests/sequence_rnn.conf index 968621cab59be..3294c2c3fc431 100644 --- a/paddle/gserver/tests/sequence_rnn.conf +++ b/paddle/gserver/tests/sequence_rnn.conf @@ -33,10 +33,9 @@ data = data_layer(name="word", size=dict_dim) emb = embedding_layer(input=data, size=word_dim) -def step(y, wid): - z = embedding_layer(input=wid, size=word_dim) +def step(y): mem = memory(name="rnn_state", size=hidden_dim) - out = fc_layer(input=[y, z, mem], + out = fc_layer(input=[y, mem], size=hidden_dim, act=TanhActivation(), bias_attr=True, @@ -46,7 +45,7 @@ def step(y, wid): out = recurrent_group( name="rnn", step=step, - input=[emb, data]) + input=emb) rep = last_seq(input=out) prob = fc_layer(size=label_dim, diff --git a/paddle/gserver/tests/sequence_rnn_multi_input.conf b/paddle/gserver/tests/sequence_rnn_multi_input.conf new file mode 100644 index 0000000000000..968621cab59be --- /dev/null +++ b/paddle/gserver/tests/sequence_rnn_multi_input.conf @@ -0,0 +1,58 @@ +#edit-mode: -*- python -*- +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', + test_list=None, + module='rnn_data_provider', + obj='process_seq') + + +settings(batch_size=2, learning_rate=0.01) +######################## network configure ################################ +dict_dim = 10 +word_dim = 8 +hidden_dim = 8 +label_dim = 3 + +data = data_layer(name="word", size=dict_dim) + +emb = embedding_layer(input=data, size=word_dim) + +def step(y, wid): + z = embedding_layer(input=wid, size=word_dim) + mem = memory(name="rnn_state", size=hidden_dim) + out = fc_layer(input=[y, z, mem], + size=hidden_dim, + act=TanhActivation(), + bias_attr=True, + name="rnn_state") + return out + +out = recurrent_group( + name="rnn", + step=step, + input=[emb, data]) + +rep = last_seq(input=out) +prob = fc_layer(size=label_dim, + input=rep, + act=SoftmaxActivation(), + bias_attr=True) + +outputs(classification_cost(input=prob, + label=data_layer(name="label", size=label_dim))) diff --git a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp index af8e5b0ad3361..550df0a31844e 100644 --- a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp +++ b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp @@ -132,6 +132,13 @@ TEST(RecurrentGradientMachine, rnn) { } } +TEST(RecurrentGradientMachine, rnn_multi_input) { + for (bool useGpu : {false, true}) { + test("gserver/tests/sequence_rnn_multi_input.conf", + "gserver/tests/sequence_nest_rnn_multi_input.conf", + 1e-6, useGpu); + } +} int main(int argc, char** argv) { if (paddle::version::isWithPyDataProvider()) {