Skip to content

Commit

Permalink
[bf16] add bf16 kernel: dropout & reshape & slice (#39395)
Browse files Browse the repository at this point in the history
* add dropout

* add reshape

* add slice

* refien slice unittest

* refine slice unittest

* add cpu bf16 kernel
  • Loading branch information
zhangbo9674 committed Feb 10, 2022
1 parent 14ed2f5 commit e8ac7fc
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 11 deletions.
8 changes: 6 additions & 2 deletions paddle/fluid/operators/dropout_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,12 @@ REGISTER_OPERATOR(dropout, ops::DropoutOp, ops::DropoutOpMaker,
REGISTER_OPERATOR(dropout_grad, ops::DropoutOpGrad);
REGISTER_OP_CPU_KERNEL(
dropout, ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext, float>,
ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext, double>);
ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext, double>,
ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext,
paddle::platform::bfloat16>);
REGISTER_OP_CPU_KERNEL(
dropout_grad,
ops::DropoutGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::DropoutGradKernel<paddle::platform::CPUDeviceContext, double>);
ops::DropoutGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::DropoutGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::bfloat16>);
3 changes: 3 additions & 0 deletions paddle/fluid/operators/dropout_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/dropout_impl.cu.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/float16.h"

namespace paddle {
Expand Down Expand Up @@ -84,8 +85,10 @@ namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(
dropout, ops::GPUDropoutKernel<plat::CUDADeviceContext, float>,
ops::GPUDropoutKernel<plat::CUDADeviceContext, plat::float16>,
ops::GPUDropoutKernel<plat::CUDADeviceContext, plat::bfloat16>,
ops::GPUDropoutKernel<plat::CUDADeviceContext, double>);
REGISTER_OP_CUDA_KERNEL(
dropout_grad, ops::GPUDropoutGradKernel<plat::CUDADeviceContext, float>,
ops::GPUDropoutGradKernel<plat::CUDADeviceContext, plat::float16>,
ops::GPUDropoutGradKernel<plat::CUDADeviceContext, plat::bfloat16>,
ops::GPUDropoutGradKernel<plat::CUDADeviceContext, double>);
11 changes: 7 additions & 4 deletions paddle/fluid/operators/reshape_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -698,27 +698,30 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double,
ops::ReshapeKernel, int, ops::ReshapeKernel,
uint8_t, ops::ReshapeKernel, int64_t,
ops::ReshapeKernel, plat::float16,
ops::ReshapeKernel, plat::bfloat16,
ops::ReshapeKernel);
REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel,
double, ops::ReshapeGradKernel, int,
ops::ReshapeGradKernel, int64_t,
ops::ReshapeGradKernel, uint8_t,
ops::ReshapeGradKernel, plat::float16,

ops::ReshapeGradKernel, plat::bfloat16,
ops::ReshapeGradKernel);
REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double,
ops::ReshapeKernel, int, ops::ReshapeKernel,
uint8_t, ops::ReshapeKernel, int64_t,
ops::ReshapeKernel, plat::float16,
ops::ReshapeKernel, bool, ops::ReshapeKernel,
plat::complex<float>, ops::ReshapeKernel,
plat::complex<double>, ops::ReshapeKernel);
plat::complex<double>, ops::ReshapeKernel,
plat::bfloat16, ops::ReshapeKernel);
REGISTER_OP_CUDA_KERNEL_FUNCTOR(
reshape2_grad, float, ops::ReshapeGradKernel, double,
ops::ReshapeGradKernel, int, ops::ReshapeGradKernel, uint8_t,
ops::ReshapeGradKernel, int64_t, ops::ReshapeGradKernel, plat::float16,
ops::ReshapeGradKernel, bool, ops::ReshapeGradKernel, plat::complex<float>,
ops::ReshapeGradKernel, plat::complex<double>, ops::ReshapeGradKernel);
ops::ReshapeGradKernel, plat::complex<double>, ops::ReshapeGradKernel,
plat::bfloat16, ops::ReshapeGradKernel);

REGISTER_OP_CUDA_KERNEL_FUNCTOR(
reshape2_grad_grad, float, ops::ReshapeDoubleGradKernel, double,
Expand All @@ -727,7 +730,7 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(
plat::float16, ops::ReshapeDoubleGradKernel, bool,
ops::ReshapeDoubleGradKernel, plat::complex<float>,
ops::ReshapeDoubleGradKernel, plat::complex<double>,
ops::ReshapeDoubleGradKernel);
ops::ReshapeDoubleGradKernel, plat::bfloat16, ops::ReshapeDoubleGradKernel);
#endif

#ifdef PADDLE_WITH_XPU
Expand Down
12 changes: 10 additions & 2 deletions paddle/fluid/operators/slice_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,9 @@ REGISTER_OP_CPU_KERNEL(
ops::SliceKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<float>>,
ops::SliceKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<double>>);
paddle::platform::complex<double>>,
ops::SliceKernel<paddle::platform::CPUDeviceContext,
paddle::platform::bfloat16>);

REGISTER_OP_CPU_KERNEL(
slice_grad, ops::SliceGradKernel<paddle::platform::CPUDeviceContext, bool>,
Expand All @@ -453,7 +455,9 @@ REGISTER_OP_CPU_KERNEL(
ops::SliceGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<float>>,
ops::SliceGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<double>>);
paddle::platform::complex<double>>,
ops::SliceGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::bfloat16>);

REGISTER_OP_CUDA_KERNEL(
slice, ops::SliceKernel<paddle::platform::CUDADeviceContext, bool>,
Expand All @@ -463,6 +467,8 @@ REGISTER_OP_CUDA_KERNEL(
ops::SliceKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::SliceKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>,
ops::SliceKernel<paddle::platform::CUDADeviceContext,
paddle::platform::bfloat16>,
ops::SliceKernel<paddle::platform::CUDADeviceContext,
paddle::platform::complex<float>>,
ops::SliceKernel<paddle::platform::CUDADeviceContext,
Expand All @@ -476,6 +482,8 @@ REGISTER_OP_CUDA_KERNEL(
ops::SliceGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::bfloat16>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::complex<float>>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
Expand Down
2 changes: 2 additions & 0 deletions paddle/pten/kernels/funcs/eigen/pad.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/common/bfloat16.h"
#include "paddle/pten/common/complex.h"
#include "paddle/pten/kernels/funcs/eigen/eigen_function.h"

Expand Down Expand Up @@ -61,6 +62,7 @@ INSTANTIATION(EigenPad, int);
INSTANTIATION(EigenPad, int64_t);
INSTANTIATION(EigenPad, float);
INSTANTIATION(EigenPad, double);
INSTANTIATION(EigenPad, dtype::bfloat16);
INSTANTIATION(EigenPad, dtype::complex<float>);
INSTANTIATION(EigenPad, dtype::complex<double>);
#undef INSTANTIATION
Expand Down
23 changes: 22 additions & 1 deletion python/paddle/fluid/tests/unittests/test_dropout_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import unittest
import numpy as np
import paddle.fluid.core as core
from op_test import OpTest, skip_check_grad_ci
from op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16
import paddle
import paddle.static as static
import paddle.fluid as fluid
Expand Down Expand Up @@ -233,6 +233,27 @@ def init_test_case(self):
self.fix_seed = False


class TestBF16DropoutOp(OpTest):
def setUp(self):
self.op_type = "dropout"
self.dtype = np.uint16

x = np.random.random((32, 64)).astype("float32")
self.inputs = {'X': convert_float_to_uint16(x)}
self.attrs = {'dropout_prob': 1.0, 'fix_seed': True, 'is_test': False}
self.outputs = {
'Out':
convert_float_to_uint16(np.zeros((32, 64)).astype('float32')),
'Mask': np.zeros((32, 64)).astype('uint8')
}

def test_check_output(self):
self.check_output()

def test_check_grad_normal(self):
self.check_grad(['X'], 'Out')


class TestDropoutOpWithSeedOnCPUPlace(unittest.TestCase):
def test_seed_cpu_place(self):
paddle.enable_static()
Expand Down
30 changes: 29 additions & 1 deletion python/paddle/fluid/tests/unittests/test_reshape_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
import unittest
import numpy as np

from op_test import OpTest
from op_test import OpTest, convert_float_to_uint16
import paddle
import paddle.fluid as fluid
from paddle.fluid import compiler
from paddle.static import Program, program_guard
import paddle.fluid.core as core


# situation 1: have shape( list, no tensor), no actual shape(Tensor)
Expand All @@ -48,6 +49,33 @@ def test_check_grad(self):
self.check_grad(["X"], "Out")


class TestReshapeBF16Op(OpTest):
def setUp(self):
self.init_data()
self.op_type = "reshape2"
self.dtype = np.uint16
x = np.random.random(self.ori_shape).astype("float32")
out = x.reshape(self.infered_shape)
self.inputs = {"X": convert_float_to_uint16(x)}
self.attrs = {"shape": self.new_shape}
self.outputs = {
"Out": convert_float_to_uint16(out),
'XShape': convert_float_to_uint16(
np.random.random(self.ori_shape).astype("float32"))
}

def init_data(self):
self.ori_shape = (2, 60)
self.new_shape = (12, 10)
self.infered_shape = (12, 10)

def test_check_output(self):
self.check_output(no_check_set=['XShape'])

def test_check_grad(self):
self.check_grad(["X"], "Out")


class TestReshapeOpDimInfer1(TestReshapeOp):
def init_data(self):
self.ori_shape = (5, 25)
Expand Down
31 changes: 30 additions & 1 deletion python/paddle/fluid/tests/unittests/test_slice_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import unittest
import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
from op_test import OpTest, convert_float_to_uint16
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle
Expand Down Expand Up @@ -484,6 +484,35 @@ def test_check_grad_normal(self):
numeric_grad_delta=0.5)


class TestBF16(OpTest):
def setUp(self):
self.op_type = "slice"
self.config()
self.inputs = {'Input': convert_float_to_uint16(self.input)}
self.outputs = {'Out': convert_float_to_uint16(self.out)}
self.attrs = {
'axes': self.axes,
'starts': self.starts,
'ends': self.ends,
'infer_flags': self.infer_flags
}

def config(self):
self.dtype = np.uint16
self.input = np.random.random([3, 4, 5, 6]).astype(np.float32)
self.starts = [-3, 0, 2]
self.ends = [3, 100, -1]
self.axes = [0, 1, 3]
self.out = self.input[-3:3, 0:100, :, 2:-1]
self.infer_flags = [1, 1, 1]

def test_check_output(self):
self.check_output()

def test_check_grad_normal(self):
self.check_grad(['Input'], 'Out')


# Test python API
class TestSliceAPI(unittest.TestCase):
def test_1(self):
Expand Down

0 comments on commit e8ac7fc

Please sign in to comment.