diff --git a/paddle/fluid/operators/fused/fused_seq_tensor_op.cc b/paddle/fluid/operators/fused/fused_seq_tensor_op.cc index 5ca2ec345f10e..7430d0d32ca37 100644 --- a/paddle/fluid/operators/fused/fused_seq_tensor_op.cc +++ b/paddle/fluid/operators/fused/fused_seq_tensor_op.cc @@ -47,10 +47,10 @@ class FusedSeqTensorOp : public framework::OperatorWithKernel { ad_slot_num, 0, platform::errors::InvalidArgument( "ad_slot_num [%ld] <= 0", ad_slot_num)); - PADDLE_ENFORCE_LT( + PADDLE_ENFORCE_LE( ad_slot_offset, slot_num - 1, platform::errors::InvalidArgument( - "ad_slot_num [%ld] > slot_num - 1 [%ld]", ad_slot_offset, slot_num)); + "ad_slot_num [%ld] > slot_num - 1 [%ld]", ad_slot_offset, slot_num)); PADDLE_ENFORCE_GE( ad_slot_offset, 0, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/fused/fused_seq_tensor_op.cu b/paddle/fluid/operators/fused/fused_seq_tensor_op.cu index d2fdf364d731d..8210cd43808c3 100644 --- a/paddle/fluid/operators/fused/fused_seq_tensor_op.cu +++ b/paddle/fluid/operators/fused/fused_seq_tensor_op.cu @@ -145,8 +145,8 @@ __device__ void warpReduce(volatile T* cache, int tid) { #define THREAD_PER_BLOCK 128 template -__global__ void reduce_sum_max_length(const T* input, // 1 - T* mask_output, // mask +__global__ void reduce_sum_max_length(const T* input, + T* mask_output, const size_t batch_count, const size_t ins_num, const size_t slot_num,