Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… nearest_interp
  • Loading branch information
xingjing1 committed Sep 14, 2021
2 parents 58e2159 + 0f74188 commit d7b884d
Show file tree
Hide file tree
Showing 607 changed files with 42,472 additions and 6,755 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,9 @@ option(WITH_MIPS "Compile PaddlePaddle with mips support" OFF)
option(WITH_MUSL "Compile with musl libc instead of gblic" OFF)
option(WITH_UNITY_BUILD "Compile with UnityBuild mode" OFF)
option(WITH_STRIP "Strip so files of Whl packages" OFF)
option(NEW_RELEASE_CUBIN "PaddlePaddle next-level release strategy for pypi cubin package" OFF)
option(NEW_RELEASE_JIT "PaddlePaddle next-level release strategy for backup jit package" OFF)
option(WITH_ASCEND_INT64 "Compile with int64 kernel for ascend NPU" OFF)

# PY_VERSION
if(NOT PY_VERSION)
Expand Down
8 changes: 8 additions & 0 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ if(WITH_TESTING)
add_definitions(-DPADDLE_WITH_TESTING)
endif(WITH_TESTING)

if(WITH_INFERENCE_API_TEST)
add_definitions(-DPADDLE_WITH_INFERENCE_API_TEST)
endif(WITH_INFERENCE_API_TEST)

if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER)
Expand Down Expand Up @@ -86,6 +90,10 @@ if(WITH_ASCEND_CL)
add_definitions(-DPADDLE_WITH_ASCEND_CL)
endif()

if(WITH_ASCEND_INT64)
add_definitions(-DPADDLE_WITH_ASCEND_INT64)
endif()

if(WITH_XPU)
message(STATUS "Compile with XPU!")
add_definitions(-DPADDLE_WITH_XPU)
Expand Down
20 changes: 19 additions & 1 deletion cmake/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,22 @@ if(NOT WITH_GPU)
endif()


if (WITH_NV_JETSON)
if(WITH_NV_JETSON)
add_definitions(-DWITH_NV_JETSON)
set(paddle_known_gpu_archs "53 62 72")
set(paddle_known_gpu_archs10 "53 62 72")
elseif(NEW_RELEASE_CUBIN)
message("Using New Release Strategy - Cubin Packge")
add_definitions(-DNEW_RELEASE_CUBIN)
set(paddle_known_gpu_archs "35 37 50 52 60 61 70 75 80 86")
set(paddle_known_gpu_archs10 "50 60 70 75")
set(paddle_known_gpu_archs11 "60 70 75 80")
elseif(NEW_RELEASE_JIT)
message("Using New Release Strategy - JIT Packge")
add_definitions(-DNEW_RELEASE_JIT)
set(paddle_known_gpu_archs "35 37 50 52 60 61 70 75 80 86")
set(paddle_known_gpu_archs10 "35 50 60 70 75")
set(paddle_known_gpu_archs11 "35 50 60 70 75 80")
else()
set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80")
set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75")
Expand Down Expand Up @@ -130,11 +142,17 @@ function(select_nvcc_arch_flags out_variable)
set(cuda_arch_bin ${CUDA_ARCH_BIN})
endif()

if(NEW_RELEASE_JIT)
set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}")
set(cuda_arch_bin "")
endif()

# remove dots and convert to lists
string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
string(REGEX REPLACE "\\." "" cuda_arch_ptx "${CUDA_ARCH_PTX}")
string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}")

list(REMOVE_DUPLICATES cuda_arch_bin)
list(REMOVE_DUPLICATES cuda_arch_ptx)

Expand Down
2 changes: 1 addition & 1 deletion cmake/external/python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
OUTPUT_VARIABLE _PYTHON_VALUES
ERROR_VARIABLE _PYTHON_ERROR_VALUE)

if(NOT _PYTHON_SUCCESS MATCHES 0)
if(NOT _PYTHON_SUCCESS EQUAL 0)
set(PYTHONLIBS_FOUND FALSE)
return()
endif()
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ ELSE ()
ENDIF()

SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210818")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210909")
SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210623/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
Expand Down
4 changes: 4 additions & 0 deletions cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,8 @@ function(cc_test_run TARGET_NAME)
if (APPLE)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 20)
endif()
elseif(WITH_TESTING AND NOT TEST ${TARGET_NAME})
add_test(NAME ${TARGET_NAME} COMMAND ${CMAKE_COMMAND} -E echo CI skip ${TARGET_NAME}.)
endif()
endfunction()

Expand Down Expand Up @@ -459,6 +461,8 @@ function(cc_test TARGET_NAME)
COMMAND ${TARGET_NAME}
ARGS ${cc_test_ARGS})
endif()
elseif(WITH_TESTING AND NOT TEST ${TARGET_NAME})
add_test(NAME ${TARGET_NAME} COMMAND ${CMAKE_COMMAND} -E echo CI skip ${TARGET_NAME}.)
endif()
endfunction(cc_test)

Expand Down
2 changes: 2 additions & 0 deletions cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ function(op_library TARGET)
list(REMOVE_ITEM miopen_cu_cc_srcs "affine_grid_cudnn_op.cu.cc")
list(REMOVE_ITEM miopen_cu_cc_srcs "grid_sampler_cudnn_op.cu.cc")
list(REMOVE_ITEM hip_srcs "cholesky_op.cu")
list(REMOVE_ITEM hip_srcs "matrix_rank_op.cu")
list(REMOVE_ITEM hip_srcs "svd_op.cu")
list(REMOVE_ITEM hip_srcs "multinomial_op.cu")
list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu")
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS}
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -354,10 +354,10 @@ cc_library(executor_cache SRCS executor_cache.cc DEPS parallel_executor)
if(WITH_PSCORE)
get_property(RPC_DEPS GLOBAL PROPERTY RPC_DEPS)
cc_test(dist_multi_trainer_test SRCS dist_multi_trainer_test.cc DEPS
conditional_block_op executor ${RPC_DEPS})
conditional_block_op executor gloo_wrapper ${RPC_DEPS})
else()
cc_test(dist_multi_trainer_test SRCS dist_multi_trainer_test.cc DEPS
conditional_block_op executor)
conditional_block_op executor gloo_wrapper)
endif()
cc_library(prune SRCS prune.cc DEPS framework_proto boost)
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
Expand Down
22 changes: 21 additions & 1 deletion paddle/fluid/framework/block_desc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,27 @@ void BlockDesc::MoveFrom(BlockDesc *block) {
}
ops_.clear();
for (const auto &src_op : block->ops_) {
AppendOp()->CopyFrom(*src_op);
auto *dst_op = AppendOp();
dst_op->CopyFrom(*src_op);
for (const auto &pair : src_op->GetAttrMap()) {
const auto &attr_name = pair.first;
const auto &attr_value = pair.second;
auto attr_type = static_cast<proto::AttrType>(attr_value.which() - 1);
if (attr_type == proto::AttrType::BLOCK) {
auto block_id = BOOST_GET_CONST(BlockDesc *, attr_value)->ID();
dst_op->SetBlockAttr(attr_name, prog_->MutableBlock(block_id));
VLOG(10) << "Set block attr " << attr_name << " id " << block_id;
} else if (attr_type == proto::AttrType::BLOCKS) {
auto old_blocks = BOOST_GET_CONST(std::vector<BlockDesc *>, attr_value);
std::vector<BlockDesc *> new_blocks;
new_blocks.reserve(old_blocks.size());
for (auto *b : old_blocks) {
VLOG(10) << "Set block attr " << attr_name << " id " << b->ID();
new_blocks.push_back(prog_->MutableBlock(b->ID()));
}
dst_op->SetBlocksAttr(attr_name, new_blocks);
}
}
}
need_update_ = true;
Flush();
Expand Down
182 changes: 157 additions & 25 deletions paddle/fluid/framework/data_feed.cc
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,11 @@ bool InMemoryDataFeed<T>::Start() {
output_channel_->Write(std::move(data));
}
#endif
if (batch_offsets_.size() > 0) {
VLOG(3) << "batch_size offsets: " << batch_offsets_.size();
enable_heterps_ = true;
this->offset_index_ = 0;
}
this->finish_start_ = true;
return true;
}
Expand All @@ -265,34 +270,64 @@ template <typename T>
int InMemoryDataFeed<T>::Next() {
#ifdef _LINUX
this->CheckStart();
CHECK(output_channel_ != nullptr);
CHECK(consume_channel_ != nullptr);
VLOG(3) << "output_channel_ size=" << output_channel_->Size()
<< ", consume_channel_ size=" << consume_channel_->Size()
<< ", thread_id=" << thread_id_;
int index = 0;
T instance;
std::vector<T> ins_vec;
ins_vec.reserve(this->default_batch_size_);
while (index < this->default_batch_size_) {
if (output_channel_->Size() == 0) {
break;
if (!enable_heterps_) {
CHECK(output_channel_ != nullptr);
CHECK(consume_channel_ != nullptr);
VLOG(3) << "output_channel_ size=" << output_channel_->Size()
<< ", consume_channel_ size=" << consume_channel_->Size()
<< ", thread_id=" << thread_id_;
int index = 0;
T instance;
std::vector<T> ins_vec;
ins_vec.reserve(this->default_batch_size_);
while (index < this->default_batch_size_) {
if (output_channel_->Size() == 0) {
break;
}
output_channel_->Get(instance);
ins_vec.push_back(instance);
++index;
consume_channel_->Put(std::move(instance));
}
this->batch_size_ = index;
VLOG(3) << "batch_size_=" << this->batch_size_
<< ", thread_id=" << thread_id_;
if (this->batch_size_ != 0) {
PutToFeedVec(ins_vec);
} else {
VLOG(3) << "finish reading, output_channel_ size="
<< output_channel_->Size()
<< ", consume_channel_ size=" << consume_channel_->Size()
<< ", thread_id=" << thread_id_;
}
output_channel_->Get(instance);
ins_vec.push_back(instance);
++index;
consume_channel_->Put(std::move(instance));
}
this->batch_size_ = index;
VLOG(3) << "batch_size_=" << this->batch_size_
<< ", thread_id=" << thread_id_;
if (this->batch_size_ != 0) {
PutToFeedVec(ins_vec);
} else {
VLOG(3) << "finish reading, output_channel_ size="
<< output_channel_->Size()
<< ", consume_channel_ size=" << consume_channel_->Size()
VLOG(3) << "enable heter NEXT: " << offset_index_
<< " batch_offsets: " << batch_offsets_.size();
if (offset_index_ >= batch_offsets_.size()) {
VLOG(3) << "offset_index: " << offset_index_
<< " batch_offsets: " << batch_offsets_.size();
return 0;
}
auto& batch = batch_offsets_[offset_index_++];
this->batch_size_ = batch.second;
VLOG(3) << "batch_size_=" << this->batch_size_
<< ", thread_id=" << thread_id_;
if (this->batch_size_ != 0) {
PutToFeedVec(&records_[batch.first], this->batch_size_);
} else {
VLOG(3) << "finish reading for heterps, batch size zero, thread_id="
<< thread_id_;
}
/*
if (offset_index_ == batch_offsets_.size() - 1) {
std::vector<Record> data;
output_channel_->ReadAll(data);
consume_channel_->Write(std::move(data));
}
*/
VLOG(3) << "#15 enable heter NEXT: " << offset_index_
<< " batch_offsets: " << batch_offsets_.size()
<< " baych_size: " << this->batch_size_;
}
return this->batch_size_;
#else
Expand Down Expand Up @@ -1141,6 +1176,103 @@ bool MultiSlotInMemoryDataFeed::ParseOneInstance(Record* instance) {
return false;
}

void MultiSlotInMemoryDataFeed::PutToFeedVec(const Record* ins_vec, int num) {
#ifdef _LINUX
for (size_t i = 0; i < batch_float_feasigns_.size(); ++i) {
batch_float_feasigns_[i].clear();
batch_uint64_feasigns_[i].clear();
offset_[i].clear();
offset_[i].push_back(0);
}
ins_content_vec_.clear();
ins_content_vec_.reserve(num);
ins_id_vec_.clear();
ins_id_vec_.reserve(num);
for (int i = 0; i < num; ++i) {
auto& r = ins_vec[i];
ins_id_vec_.push_back(r.ins_id_);
ins_content_vec_.push_back(r.content_);
for (auto& item : r.float_feasigns_) {
batch_float_feasigns_[item.slot()].push_back(item.sign().float_feasign_);
visit_[item.slot()] = true;
}
for (auto& item : r.uint64_feasigns_) {
batch_uint64_feasigns_[item.slot()].push_back(
item.sign().uint64_feasign_);
visit_[item.slot()] = true;
}
for (size_t j = 0; j < use_slots_.size(); ++j) {
const auto& type = all_slots_type_[j];
if (visit_[j]) {
visit_[j] = false;
} else {
// fill slot value with default value 0
if (type[0] == 'f') { // float
batch_float_feasigns_[j].push_back(0.0);
} else if (type[0] == 'u') { // uint64
batch_uint64_feasigns_[j].push_back(0);
}
}
// get offset of this ins in this slot
if (type[0] == 'f') { // float
offset_[j].push_back(batch_float_feasigns_[j].size());
} else if (type[0] == 'u') { // uint64
offset_[j].push_back(batch_uint64_feasigns_[j].size());
}
}
}

for (size_t i = 0; i < use_slots_.size(); ++i) {
if (feed_vec_[i] == nullptr) {
continue;
}
int total_instance = offset_[i].back();
const auto& type = all_slots_type_[i];
if (type[0] == 'f') { // float
float* feasign = batch_float_feasigns_[i].data();
float* tensor_ptr =
feed_vec_[i]->mutable_data<float>({total_instance, 1}, this->place_);
CopyToFeedTensor(tensor_ptr, feasign, total_instance * sizeof(float));
} else if (type[0] == 'u') { // uint64
// no uint64_t type in paddlepaddle
uint64_t* feasign = batch_uint64_feasigns_[i].data();
int64_t* tensor_ptr = feed_vec_[i]->mutable_data<int64_t>(
{total_instance, 1}, this->place_);
CopyToFeedTensor(tensor_ptr, feasign, total_instance * sizeof(int64_t));
}
auto& slot_offset = offset_[i];
if (this->input_type_ == 0) {
LoD data_lod{slot_offset};
feed_vec_[i]->set_lod(data_lod);
} else if (this->input_type_ == 1) {
if (!use_slots_is_dense_[i]) {
std::vector<size_t> tmp_offset;
PADDLE_ENFORCE_EQ(slot_offset.size(), 2,
platform::errors::InvalidArgument(
"In batch reader, the sparse tensor lod size "
"must be 2, but received %d.",
slot_offset.size()));
const auto& max_size = slot_offset[1];
tmp_offset.reserve(max_size + 1);
for (unsigned int k = 0; k <= max_size; k++) {
tmp_offset.emplace_back(k);
}
slot_offset = tmp_offset;
LoD data_lod{slot_offset};
feed_vec_[i]->set_lod(data_lod);
}
}
if (use_slots_is_dense_[i]) {
if (inductive_shape_index_[i] != -1) {
use_slots_shape_[i][inductive_shape_index_[i]] =
total_instance / total_dims_without_inductive_[i];
}
feed_vec_[i]->Resize(framework::make_ddim(use_slots_shape_[i]));
}
}
#endif
}

void MultiSlotInMemoryDataFeed::PutToFeedVec(
const std::vector<Record>& ins_vec) {
#ifdef _LINUX
Expand Down
Loading

1 comment on commit d7b884d

@paddle-bot-old
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Congratulation! Your pull request passed all required CI. You could ask reviewer(s) to approve and merge. 🎉

Please sign in to comment.