PaddlePaddle · Xreki · Jun 22, 2021 · Mar 25, 2021 · Mar 29, 2021 · Apr 19, 2021
diff --git a/paddle/fluid/operators/reduce_ops/reduce_functor_op.h b/paddle/fluid/operators/reduce_ops/reduce_functor_op.h
@@ -13,19 +13,45 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
-#include <string>
-#include <vector>
 
-#include "paddle/fluid/framework/eigen.h"
-#include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/operators/amp/fp16_type_traits.h"
-#include "paddle/fluid/platform/device_context.h"
-#include "paddle/fluid/platform/hostdevice.h"
-#include "paddle/fluid/platform/macros.h"
+#include <float.h>
+#include <limits.h>
+#include <math.h>
+#include <cstdint>
 
 namespace paddle {
 namespace operators {
 
+template <typename T>
+struct DataBound {
+  static inline T max() { return static_cast<T>(FLT_MAX); }
+  static inline T min() { return static_cast<T>(-FLT_MAX); }
+};
+
+template <>
+struct DataBound<float> {
+  static inline float max() { return FLT_MAX; }
+  static inline float min() { return -FLT_MAX; }
+};
+
+template <>
+struct DataBound<double> {
+  static inline double max() { return DBL_MAX; }
+  static inline double min() { return -DBL_MAX; }
+};
+
+template <>
+struct DataBound<int32_t> {
+  static inline int32_t max() { return INT32_MAX; }
+  static inline int32_t min() { return INT32_MIN; }
+};
+
+template <>
+struct DataBound<int64_t> {
+  static inline int64_t max() { return INT64_MAX; }
+  static inline int64_t min() { return INT64_MIN; }
+};
+
 template <typename T>
 struct CustomMin {
   __device__ __forceinline__ T operator()(const T &a, const T &b) const {
@@ -54,5 +80,19 @@ struct CustomMul {
   }
 };
 
+template <typename T>
+struct CustomLogicalOr {
+  __device__ __forceinline__ T operator()(const T &a, const T &b) const {
+    return b || a;
+  }
+};
+
+template <typename T>
+struct CustomLogicalAnd {
+  __device__ __forceinline__ T operator()(const T &a, const T &b) const {
+    return b && a;
+  }
+};
+
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op.cu b/paddle/fluid/operators/reduce_ops/reduce_max_op.cu
@@ -12,14 +12,46 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h"
-
-REGISTER_OP_CUDA_KERNEL(reduce_max,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          float, ops::MaxFunctor>,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          double, ops::MaxFunctor>,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          int, ops::MaxFunctor>,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          int64_t, ops::MaxFunctor>);
+#include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h"
+#include "paddle/fluid/operators/reduce_ops/reduce_op.cuh"
+#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+class ReduceMaxKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    bool reduce_all = context.Attr<bool>("reduce_all");
+    auto* input = context.Input<Tensor>("X");
+    auto* output = context.Output<Tensor>("Out");
+
+    auto dims = context.Attr<std::vector<int>>("dim");
+    bool keep_dim = context.Attr<bool>("keep_dim");
+
+    std::vector<int> reduce_dims;
+    if (reduce_all) {
+      reduce_dims.resize(input->dims().size());
+      for (int i = 0; i < reduce_dims.size(); ++i) {
+        reduce_dims[i] = i;
+      }
+    } else {
+      for (auto e : dims) {
+        reduce_dims.push_back(e >= 0 ? e : e + input->dims().size());
+      }
+    }
+
+    auto stream = context.cuda_device_context().stream();
+    TensorReduceFunc<T, T, CustomMax<T>, detail::IdentityFunctor<T>>(
+        *input, output, reduce_dims, DataBound<T>::min(), CustomMax<T>(),
+        detail::IdentityFunctor<T>(), detail::IdentityFunctor<T>(), stream);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+REGISTER_OP_CUDA_KERNEL(reduce_max, ops::ReduceMaxKernel<float>,
+                        ops::ReduceMaxKernel<double>, ops::ReduceMaxKernel<int>,
+                        ops::ReduceMaxKernel<int64_t>);
diff --git a/paddle/fluid/operators/reduce_ops/reduce_min_op.cu b/paddle/fluid/operators/reduce_ops/reduce_min_op.cu
@@ -12,14 +12,46 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h"
-
-REGISTER_OP_CUDA_KERNEL(reduce_min,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          float, ops::MinFunctor>,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          double, ops::MinFunctor>,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          int, ops::MinFunctor>,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          int64_t, ops::MinFunctor>);
+#include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h"
+#include "paddle/fluid/operators/reduce_ops/reduce_op.cuh"
+#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+class ReduceMinKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    bool reduce_all = context.Attr<bool>("reduce_all");
+    auto* input = context.Input<Tensor>("X");
+    auto* output = context.Output<Tensor>("Out");
+
+    auto dims = context.Attr<std::vector<int>>("dim");
+    bool keep_dim = context.Attr<bool>("keep_dim");
+
+    std::vector<int> reduce_dims;
+    if (reduce_all) {
+      reduce_dims.resize(input->dims().size());
+      for (int i = 0; i < reduce_dims.size(); ++i) {
+        reduce_dims[i] = i;
+      }
+    } else {
+      for (auto e : dims) {
+        reduce_dims.push_back(e >= 0 ? e : e + input->dims().size());
+      }
+    }
+
+    auto stream = context.cuda_device_context().stream();
+    TensorReduceFunc<T, T, CustomMin<T>, detail::IdentityFunctor<T>>(
+        *input, output, reduce_dims, DataBound<T>::max(), CustomMin<T>(),
+        detail::IdentityFunctor<T>(), detail::IdentityFunctor<T>(), stream);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+REGISTER_OP_CUDA_KERNEL(reduce_min, ops::ReduceMinKernel<float>,
+                        ops::ReduceMinKernel<double>, ops::ReduceMinKernel<int>,
+                        ops::ReduceMinKernel<int64_t>);