From 7259479378be9d7601ceb8f7bcb37424da546242 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Sat, 10 Sep 2016 11:18:07 +0800
Subject: [PATCH 1/3] fix CUDNN_VERSION for backward of CudnnBatchNormLayer

---
 paddle/gserver/layers/CudnnBatchNormLayer.cpp | 27 ++++++++++---------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp
index e1762e8d360de..cef8772fc254f 100644
--- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp
+++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp
@@ -114,27 +114,30 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
   } else {
     create(tmpBiasGrad_, 1, channels_, &betaGrad);
   }
-#if CUDNN_VERSION < 5000
+
   // because of the different api of cudnn v4 and v5.
-  if (weight_->getWGrad()) {
-    create(tmpWGrad_, 1, channels_, &gammaGrad);
-  }
-  if (biases_ && biases_->getWGrad()) {
-    create(tmpBiasGrad_, 1, channels_, &betaGrad);
+  if (hl_get_cudnn_lib_version() < 5000) {
+    if (weight_->getWGrad()) {
+      create(tmpWGrad_, 1, channels_, &gammaGrad);
+    }
+    if (biases_ && biases_->getWGrad()) {
+      create(tmpBiasGrad_, 1, channels_, &betaGrad);
+    }
   }
-#endif
+
   hl_batch_norm_backward(ioDesc_, input, ioDesc_, outGrad,
                          ioDesc_, inGrad, bnParamDesc_,
                          gamma, gammaGrad, betaGrad,
                          EPS, savedMean, savedInvVar);
 
-#if CUDNN_VERSION < 5000
   // because of the different api of cudnn v4 and v5.
-  if (weight_->getWGrad() && biases_->getWGrad()) {
-    weight_->getWGrad()->add(*tmpWGrad_);
-    biases_->getWGrad()->add(*tmpBiasGrad_);
+  if (hl_get_cudnn_lib_version() < 5000) {
+    if (weight_->getWGrad() && biases_->getWGrad()) {
+      weight_->getWGrad()->add(*tmpWGrad_);
+      biases_->getWGrad()->add(*tmpBiasGrad_);
+    }
   }
-#endif
+
   {
     REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
     biases_->getParameterPtr()->incUpdate(callback);

From 13e7fc86dfac3269b768e6bda5093e141abd8dba Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Wed, 14 Sep 2016 00:05:25 +0800
Subject: [PATCH 2/3] fix cudnn version number for BatchNorm

---
 paddle/cuda/src/hl_cuda_cudnn.cc              | 16 +++++++---------
 paddle/gserver/layers/CudnnBatchNormLayer.cpp | 18 ------------------
 python/paddle/trainer/config_parser.py        |  2 +-
 3 files changed, 8 insertions(+), 28 deletions(-)

diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc
index 19c94b2453981..c2dce1977bdf5 100644
--- a/paddle/cuda/src/hl_cuda_cudnn.cc
+++ b/paddle/cuda/src/hl_cuda_cudnn.cc
@@ -150,7 +150,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP)
 
 
 // APIs available after R4:
-#if CUDNN_VERSION >= 4000
+#if CUDNN_VERSION >= 4007
 #define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro)             \
   __macro(cudnnBatchNormalizationForwardTraining)            \
   __macro(cudnnBatchNormalizationForwardInference)           \
@@ -999,7 +999,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
                                     double epsilon,
                                     real *savedMean,
                                     real *savedVar) {
-#if CUDNN_VERSION >= 4000
+#if CUDNN_VERSION >= 4007
   if ((NULL != runningMean && NULL == runningInvVar) ||
       (NULL == runningMean && NULL != runningInvVar)) {
     LOG(FATAL) << "runningMean and runningInvVar can be NULL "
@@ -1024,7 +1024,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
 
   CHECK_SYNC("hl_batch_norm_forward_training failed");
 #else
-  LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
+  LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
              << "But cudnn lib version is " << g_cudnn_lib_version;
 #endif
 }
@@ -1039,7 +1039,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
                                     real *estimatedMean,
                                     real *estimatedInvVar,
                                     double epsilon) {
-#if CUDNN_VERSION >= 4000
+#if CUDNN_VERSION >= 4007
   cudnnTensorDescriptor_t xDesc = GET_TENSOR_DESCRIPTOR(inputDesc);
   cudnnTensorDescriptor_t yDesc = GET_TENSOR_DESCRIPTOR(outputDesc);
   cudnnTensorDescriptor_t bnDesc = GET_TENSOR_DESCRIPTOR(bnParamDesc);
@@ -1053,7 +1053,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
 
   CHECK_SYNC("hl_batch_norm_forward_inference failed");
 #else
-  LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
+  LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
              << "But cudnn lib version is " << g_cudnn_lib_version;
 #endif
 }
@@ -1071,7 +1071,7 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
                             double epsilon,
                             real *savedMean,
                             real *savedInvVar) {
-#if CUDNN_VERSION >= 4000
+#if CUDNN_VERSION >= 4007
   if ((NULL != savedMean && NULL == savedInvVar) ||
       (NULL == savedMean && NULL != savedInvVar)) {
     LOG(FATAL) << "savedMean and savedVar can be NULL "
@@ -1087,16 +1087,14 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
   cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
   CHECK_CUDNN(dynload::cudnnBatchNormalizationBackward(
               t_resource.cudnn_handle, mode, &alpha, &beta,
-#if CUDNN_VERSION >= 5000
               &alpha, &beta,
-#endif
               xDesc, input, dyDesc, outGrad, dxDesc, inGrad,
               bnDesc, scale, scaleGrad, biasGrad, epsilon,
               savedMean, savedInvVar));
 
   CHECK_SYNC("hl_batch_norm_backward failed");
 #else
-  LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
+  LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
              << "But cudnn lib version is " << g_cudnn_lib_version;
 #endif
 }
diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp
index cef8772fc254f..3c6d13b0bf92e 100644
--- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp
+++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp
@@ -115,29 +115,11 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
     create(tmpBiasGrad_, 1, channels_, &betaGrad);
   }
 
-  // because of the different api of cudnn v4 and v5.
-  if (hl_get_cudnn_lib_version() < 5000) {
-    if (weight_->getWGrad()) {
-      create(tmpWGrad_, 1, channels_, &gammaGrad);
-    }
-    if (biases_ && biases_->getWGrad()) {
-      create(tmpBiasGrad_, 1, channels_, &betaGrad);
-    }
-  }
-
   hl_batch_norm_backward(ioDesc_, input, ioDesc_, outGrad,
                          ioDesc_, inGrad, bnParamDesc_,
                          gamma, gammaGrad, betaGrad,
                          EPS, savedMean, savedInvVar);
 
-  // because of the different api of cudnn v4 and v5.
-  if (hl_get_cudnn_lib_version() < 5000) {
-    if (weight_->getWGrad() && biases_->getWGrad()) {
-      weight_->getWGrad()->add(*tmpWGrad_);
-      biases_->getWGrad()->add(*tmpBiasGrad_);
-    }
-  }
-
   {
     REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
     biases_->getParameterPtr()->incUpdate(callback);
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index b26a63e7f3c1d..3656d9e7d8242 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -1614,7 +1614,7 @@ def __init__(
         # Also based on cudnn version.
         use_cudnn = use_gpu and batch_norm_type != "batch_norm" and \
             ((not parallel_nn) or self.config.device > -1) and \
-            cudnn_version >= 4000
+            cudnn_version >= 4007
         self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm"
         super(BatchNormLayer, self).__init__(name, self.layer_type, 0,
                                              active_type=active_type,

From 8cdd84abb7f502d4103d5c1bcb1fe31ee076955f Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Wed, 14 Sep 2016 14:46:54 +0800
Subject: [PATCH 3/3] Add some commentis for CTC layer and fix CTC evalutors,
 also add interface test

---
 doc/build/contribute_to_paddle.md                 |  4 ++--
 .../paddle/trainer_config_helpers/evaluators.py   | 10 +++++++---
 python/paddle/trainer_config_helpers/layers.py    | 15 +++++++++++++--
 .../tests/layers_test_config.py                   |  9 +++++++++
 4 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/doc/build/contribute_to_paddle.md b/doc/build/contribute_to_paddle.md
index b3d5fa7c9ff5f..10d5d86311333 100644
--- a/doc/build/contribute_to_paddle.md
+++ b/doc/build/contribute_to_paddle.md
@@ -25,7 +25,7 @@ repo or just head straight to the command line:
  
 ```shell
 # Clone your fork to your local machine
-git clone git@github.com:USERNAME/paddle.git
+git clone git@github.com:USERNAME/Paddle.git
 ```
 Then you can start to develop. 
 
@@ -52,7 +52,7 @@ To do this, you'll need to add a remote at first:
 # see the current configured remote repository
 git remote -v
 # add upstream repository
-git remote add upstream https://github.com/paddle/paddle.git
+git remote add upstream https://github.com/baidu/Paddle.git
 # verify the new upstream
 git remote -v
 ```
diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py
index 956bedadd75e5..179a3a053a961 100644
--- a/python/paddle/trainer_config_helpers/evaluators.py
+++ b/python/paddle/trainer_config_helpers/evaluators.py
@@ -94,7 +94,7 @@ def evaluator_base(
          Batch=200 samples=20000 AvgCost=0.679655 CurrentCost=0.662179 Eval:
          classification_error_evaluator=0.4486
          CurrentEval: ErrorRate=0.3964
-         
+
     :param input: Input layers, a object of LayerOutput or a list of
                   LayerOutput.
     :type input: list|LayerOutput
@@ -296,6 +296,7 @@ def precision_recall_evaluator(
 @wrap_name_default()
 def ctc_error_evaluator(
         input,
+        label,
         name=None,
         ):
     """
@@ -305,16 +306,19 @@ def ctc_error_evaluator(
 
     .. code-block:: python
 
-       eval = ctc_error_evaluator(input)
+       eval = ctc_error_evaluator(input=input, label=lbl)
 
     :param name: Evaluator name.
     :type name: None|basestring
     :param input: Input Layer.
     :type input: LayerOutput
+    :param label: input label, which is a data_layer.
+    :type label: LayerOutput
     """
     evaluator_base(name=name,
                    type="ctc_edit_distance",
-                   input=input)
+                   input=input,
+                   label=label)
 
 @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
 @wrap_name_default()
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index f3f0077f9798f..47eadf5d04e50 100644
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -2944,7 +2944,7 @@ def linear_comb_layer(weights, vectors, size, name=None):
 
     .. math::
 
-       z = x^T Y
+       z = x^\mathrm{T} Y
 
     In this formular:
       - :math:`x`: weights
@@ -3064,6 +3064,17 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False):
     classication task. That is, for sequence labeling problems where the
     alignment between the inputs and the target labels is unknown.
 
+    More details can be found by referring to `Connectionist Temporal
+    Classification: Labelling Unsegmented Sequence Data with Recurrent
+    Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf>`_
+
+    Note:
+        Considering the 'blank' label needed by CTC, you need to use
+        (num_classes + 1) as the input size. num_classes is the category number.
+        And the 'blank' is the last category index. So the size of 'input' layer, such as
+        fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer
+        should also be num_classes + 1.
+
     The simple usage:
 
     .. code-block:: python
@@ -3077,7 +3088,7 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False):
     :type input: LayerOutput
     :param label: The data layer of label with variable length.
     :type label: LayerOutput
-    :param size: category numbers.
+    :param size: category numbers + 1.
     :type size: int
     :param name: The name of this layer, which can not specify.
     :type name: string|None
diff --git a/python/paddle/trainer_config_helpers/tests/layers_test_config.py b/python/paddle/trainer_config_helpers/tests/layers_test_config.py
index ec171fc6013f4..d479fb263fb66 100644
--- a/python/paddle/trainer_config_helpers/tests/layers_test_config.py
+++ b/python/paddle/trainer_config_helpers/tests/layers_test_config.py
@@ -34,6 +34,15 @@
 
 outputs(classification_cost(out, data_layer(name="label", size=num_classes)))
 
+# for ctc
+tmp = fc_layer(input=x1,
+               size=num_classes + 1,
+               act=SoftmaxActivation())
+ctc = ctc_layer(input=tmp,
+                label=y,
+                size=num_classes + 1)
+ctc_eval = ctc_error_evaluator(input=ctc, label=y)
+
 settings(
     batch_size=10,
     learning_rate=2e-3,