Skip to content

Commit

Permalink
add unittest for new matmul_v2 kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
zyfncg committed Oct 28, 2021
1 parent 0fb60d0 commit 7758f14
Show file tree
Hide file tree
Showing 7 changed files with 178 additions and 11 deletions.
25 changes: 16 additions & 9 deletions paddle/fluid/operators/matmul_v2_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ limitations under the License. */
#include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/fluid/operators/reduce_ops/reduce_sum_op.h"

// only can include the headers in paddle/pten/api dirs
#include "paddle/pten/api/include/core.h"
#include "paddle/pten/api/include/linalg.h"
#include "paddle/pten/hapi/lib/utils/tensor_utils.h"

#if defined(__NVCC__) || defined(__HIPCC__)
#include "paddle/fluid/operators/reduce_ops/cub_reduce.h"
#endif
Expand Down Expand Up @@ -380,15 +385,17 @@ class MatMulV2Kernel : public framework::OpKernel<T> {
auto* Out = ctx.Output<Tensor>("Out");
bool trans_x = ctx.Attr<bool>("trans_x");
bool trans_y = ctx.Attr<bool>("trans_y");
PADDLE_ENFORCE_NE(framework::product(X->dims()), 0,
platform::errors::InvalidArgument(
"The Input(X) dims size must not be equal 0,"
" but reviced dims size is 0. "));
PADDLE_ENFORCE_NE(framework::product(Y->dims()), 0,
platform::errors::InvalidArgument(
"The Input(Y) dims size must not be equal 0,"
" but reviced dims size is 0. "));
MatMulFunction<DeviceContext, T>(X, Y, Out, trans_x, trans_y, ctx);

auto& dev_ctx = ctx.device_context<DeviceContext>();
Out->mutable_data<T>(X->place());

auto pt_x = paddle::experimental::MakePtenDenseTensor(*X);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*Y);
auto pt_out = paddle::experimental::MakePtenDenseTensor(*Out);

// call new kernel
pten::Matmul<T>(dev_ctx, *pt_x.get(), *pt_y.get(), trans_x, trans_y,
pt_out.get());
}
};

Expand Down
2 changes: 2 additions & 0 deletions paddle/pten/hapi/lib/linalg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ Tensor matmul(const Tensor& x,
auto dense_y = std::dynamic_pointer_cast<pten::DenseTensor>(y.impl());
kernel_context.EmplaceBackInput(dense_x);
kernel_context.EmplaceBackInput(dense_y);
kernel_context.EmplaceBackAttr(transpose_x);
kernel_context.EmplaceBackAttr(transpose_y);
// TODO(chenweihang): add transform impl

// 4. InferShape
Expand Down
2 changes: 2 additions & 0 deletions paddle/pten/kernels/cuda/linalg.cu
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ void Matmul(const CUDAContext& dev_ctx,

PT_REGISTER_MODULE(LinalgCUDA);

using float16 = paddle::platform::float16;
using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>;

Expand All @@ -76,5 +77,6 @@ PT_REGISTER_KERNEL("matmul_v2",
pten::Matmul,
float,
double,
float16,
complex64,
complex128) {}
1 change: 0 additions & 1 deletion paddle/pten/kernels/functions/math/matmul_func.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,6 @@ void MatMulFunction(const DeviceContext& dev_ctx,
x_broadcast_dims.data(),
y_broadcast_dims.data(),
out_broadcast_dims.data());

out_broadcast_dims[ndim - 2] = M;
out_broadcast_dims[ndim - 1] = N;

Expand Down
1 change: 1 addition & 0 deletions paddle/pten/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ cc_test(dense_tensor_test SRCS dense_tensor_test.cc DEPS dense_tensor)
cc_test(kernel_factory_test SRCS kernel_factory_test.cc DEPS kernel_factory)
cc_test(test_mean_api SRCS test_mean_api.cc DEPS math_api pten_hapi_utils)
cc_test(test_dot_api SRCS test_dot_api.cc DEPS linalg_api pten_hapi_utils)
cc_test(test_matmul_api SRCS test_matmul_api.cc DEPS linalg_api pten_hapi_utils)
cc_test(test_fill_api SRCS test_fill_api.cc DEPS creation_api pten_hapi_utils)
cc_test(test_copy_api SRCS test_copy_api.cc DEPS utils_cpu pten_hapi_utils)
cc_test(test_flatten_api SRCS test_flatten_api.cc DEPS utils_cpu manipulation_api pten_hapi_utils)
1 change: 0 additions & 1 deletion paddle/pten/tests/test_fill_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ using DDim = paddle::framework::DDim;

// TODO(chenweihang): Remove this test after the API is used in the dygraph
TEST(API, full_like) {
// 1. create tensor
// 1. create tensor
const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace());
Expand Down
157 changes: 157 additions & 0 deletions paddle/pten/tests/test_matmul_api.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <gtest/gtest.h>
#include <memory>

#include "paddle/pten/hapi/include/linalg.h"

#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/hapi/lib/utils/allocator.h"
#include "paddle/pten/kernels/cuda/utils.h"

PT_DECLARE_MODULE(LinalgCPU);

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_MODULE(LinalgCUDA);
#endif

namespace framework = paddle::framework;
using DDim = paddle::framework::DDim;

TEST(API, matmul_cpu) {
// 1. create tensor
const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace());
auto dense_x = std::make_shared<pten::DenseTensor>(
alloc,
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 3}),
pten::DataLayout::NCHW));

auto* dense_x_data = dense_x->mutable_data<float>();

auto dense_y = std::make_shared<pten::DenseTensor>(
alloc,
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 3}),
pten::DataLayout::NCHW));
auto* dense_y_data = dense_y->mutable_data<float>();

for (size_t i = 0; i < 9; ++i) {
dense_x_data[i] = 1.0;
dense_y_data[i] = 2.0;
}
std::vector<float> sum(9, 6.0);

paddle::experimental::Tensor x(dense_x);
paddle::experimental::Tensor y(dense_y);

// 2. test API
auto out = paddle::experimental::matmul(x, y, false, false);

// 3. check result
ASSERT_EQ(out.shape().size(), 2);
ASSERT_EQ(out.shape()[0], 3);
ASSERT_EQ(out.shape()[1], 3);
ASSERT_EQ(out.numel(), 9);
ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);

auto dense_out = std::dynamic_pointer_cast<pten::DenseTensor>(out.impl());

for (size_t i = 0; i < 9; i++) {
ASSERT_NEAR(sum[i], dense_out->data<float>()[i], 1e-6f);
}
}

TEST(API, matmul_cuda) {
// Prepare CPU Dense Tensor
const auto alloc_cpu =
std::make_shared<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace());
auto ref_x = std::make_shared<pten::DenseTensor>(
alloc_cpu,
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 3}),
pten::DataLayout::NCHW));

auto* ref_x_data = ref_x->mutable_data<float>();

auto ref_y = std::make_shared<pten::DenseTensor>(
alloc_cpu,
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 3}),
pten::DataLayout::NCHW));
auto* ref_y_data = ref_y->mutable_data<float>();

for (size_t i = 0; i < 9; ++i) {
ref_x_data[i] = 1.0;
ref_y_data[i] = 2.0;
}
std::vector<float> sum(9, 6.0);

// 1. create tensor
const auto alloc_cuda =
std::make_shared<paddle::experimental::DefaultAllocator>(
paddle::platform::CUDAPlace());
auto dense_x = std::make_shared<pten::DenseTensor>(
alloc_cuda,
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 3}),
pten::DataLayout::NCHW));

auto dense_y = std::make_shared<pten::DenseTensor>(
alloc_cuda,
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 3}),
pten::DataLayout::NCHW));

auto& pool = paddle::platform::DeviceContextPool::Instance();
auto place = paddle::platform::CUDAPlace();
auto* dev_ctx = pool.GetByPlace(place);

pten::Copy(*dev_ctx, *ref_x.get(), dense_x.get());
pten::Copy(*dev_ctx, *ref_y.get(), dense_y.get());

paddle::experimental::Tensor x(dense_x);
paddle::experimental::Tensor y(dense_y);

// 2. test API
auto out = paddle::experimental::matmul(x, y, false, false);

// 3. check result
ASSERT_EQ(out.shape().size(), 2);
ASSERT_EQ(out.shape()[0], 3);
ASSERT_EQ(out.shape()[1], 3);
ASSERT_EQ(out.numel(), 9);
ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);

auto dense_out = std::dynamic_pointer_cast<pten::DenseTensor>(out.impl());

auto ref_out = std::make_shared<pten::DenseTensor>(
alloc_cpu,
pten::DenseTensorMeta(
pten::DataType::FLOAT32, out.shape(), pten::DataLayout::NCHW));

pten::Copy(*dev_ctx, *dense_out.get(), ref_out.get());

for (size_t i = 0; i < 9; i++) {
ASSERT_NEAR(sum[i], ref_out->data<float>()[i], 1e-6f);
}
}

1 comment on commit 7758f14

@paddle-bot-old
Copy link

@paddle-bot-old paddle-bot-old bot commented on 7758f14 Oct 28, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🕵️ CI failures summary

🔍 PR: #36844 Commit ID: 7758f14 contains failed CI.

🔹 Failed: PR-CI-APPROVAL

approve_failed
2021-10-28 20:27:30 长度:5 [application/octet-stream]
2021-10-28 20:27:30 正在保存至: “bk.txt”
2021-10-28 20:27:30 0K 100% 2.96M=0s
2021-10-28 20:27:30 2021-10-28 20:27:30 (2.96 MB/s) - 已保存 “bk.txt” [5/5])
2021-10-28 20:27:38 ****************
2021-10-28 20:27:38 0. You must have one RD (lanxianghit (Recommend), phlrain or luotao1) approval for changing the FLAGS, which manages the environment variables.
2021-10-28 20:27:38 1. You must have Dianhai approval for change 20+ files or add than 1000+ lines of content.
2021-10-28 20:27:38 2. You must have one RD (XiaoguangHu01,chenwhql,zhiqiu,Xreki,luotao1) approval for paddle/fluid/framework/operator.h, which manages the underlying code for fluid.
2021-10-28 20:27:38 3. You must have one RD (zhiqiu (Recommend) , phlrain) approval for the changes of paddle/fluid/pybind/op_function_generator.cc, which manages the logic of automatic generating op functions for dygraph.
2021-10-28 20:27:38 4. You must have one RD (Avin0323(Recommend) or zhouwei25 or wanghuancoder or luotao1) approval for modifying unity_build_rule.cmake which the rules of Unity Build.
2021-10-28 20:27:38 There are 5 approved errors.
2021-10-28 20:27:38 ****************
2021-10-28 20:27:38 + EXCODE=6
2021-10-28 20:27:38 + echo 'EXCODE: 6'
2021-10-28 20:27:38 EXCODE: 6
2021-10-28 20:27:38 + echo 'ipipe_log_param_EXCODE: 6'
2021-10-28 20:27:38 ipipe_log_param_EXCODE: 6
2021-10-28 20:27:38 + exit 6

🔹 Failed: PR-CI-Mac-Python3

build_failed
2021-10-28 20:52:36 [ 30%] Linking CXX static library libgraph.a
2021-10-28 20:52:36 [ 30%] Built target graph
2021-10-28 20:52:36 [ 30%] Built target program_processing_test
2021-10-28 20:52:36 make: *** [all] Error 2
2021-10-28 20:52:36 + build_error=2
2021-10-28 20:52:36 + collect_ccache_hits
2021-10-28 20:52:36 ++ ccache -s
2021-10-28 20:52:36 ++ grep 'cache hit rate'
2021-10-28 20:52:36 ++ awk '{print $4}'
2021-10-28 20:52:36 + rate=100.00
2021-10-28 20:52:36 + echo 'ccache hit rate: 100.00%'
2021-10-28 20:52:36 ccache hit rate: 100.00%
2021-10-28 20:52:36 + echo 'ipipe_log_param_Ccache_Hit_Rate: 100.00%'
2021-10-28 20:52:36 + '[' 2 '!=' 0 ']'
2021-10-28 20:52:36 + exit 7
2021-10-28 20:52:36 EXCODE: 7
2021-10-28 20:52:36 ipipe_log_param_EXCODE: 7
2021-10-28 20:52:36 Sorry, build failed.
2021-10-28 20:52:36 + exit 7

🔹 Failed: PR-CI-Windows-OPENBLAS

build_failed
2021-10-28 20:56:44          C:\home\workspace\Paddle\paddle\pten\tests\test_matmul_api.cc(125): error C2672: 'paddle::platform::DeviceContextPool::GetByPlace': no matching overloaded function found [C:\home\workspace\Paddle\build\paddle\pten\tests\test_matmul_api.vcxproj]
2021-10-28 20:56:44 C:\home\workspace\Paddle\paddle\pten\tests\test_matmul_api.cc(125): error C2893: Failed to specialize function template 'const DefaultDeviceContextType::TYPE *paddle::platform::DeviceContextPool::GetByPlace(const Place &)' [C:\home\workspace\Paddle\build\paddle\pten\tests\test_matmul_api.vcxproj]
2021-10-28 20:56:44 C:\home\workspace\Paddle\paddle\pten\tests\test_matmul_api.cc(127): error C3536: 'dev_ctx': cannot be used before it is initialized [C:\home\workspace\Paddle\build\paddle\pten\tests\test_matmul_api.vcxproj]
2021-10-28 20:56:44 C:\home\workspace\Paddle\paddle\pten\tests\test_matmul_api.cc(127): error C2100: illegal indirection [C:\home\workspace\Paddle\build\paddle\pten\tests\test_matmul_api.vcxproj]
2021-10-28 20:56:44 C:\home\workspace\Paddle\paddle\pten\tests\test_matmul_api.cc(128): error C2100: illegal indirection [C:\home\workspace\Paddle\build\paddle\pten\tests\test_matmul_api.vcxproj]
2021-10-28 20:56:44 C:\home\workspace\Paddle\paddle\pten\tests\test_matmul_api.cc(152): error C2100: illegal indirection [C:\home\workspace\Paddle\build\paddle\pten\tests\test_matmul_api.vcxproj]
2021-10-28 20:56:44 796 Warning(s)
2021-10-28 20:56:44 10 Error(s)
2021-10-28 20:56:44 Time Elapsed 00:29:51.53
2021-10-28 20:56:44 7
2021-10-28 20:56:44 Build Paddle failed, will exit
2021-10-28 20:56:44 EXCODE: 7

Please sign in to comment.