Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/develop' into save_load/jit-func
Browse files Browse the repository at this point in the history
  • Loading branch information
hbwx24 committed Apr 26, 2021
2 parents a2c6960 + 8fec3c6 commit 8d2a862
Show file tree
Hide file tree
Showing 404 changed files with 20,212 additions and 4,734 deletions.
36 changes: 23 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License

cmake_minimum_required(VERSION 3.15)
cmake_minimum_required(VERSION 3.10)
cmake_policy(VERSION 3.10)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
Expand All @@ -22,9 +22,6 @@ include(system)

project(paddle CXX C)

include(init)
include(generic) # simplify cmake module

# enable language CUDA
# TODO(Shibo Tao): remove find_package(CUDA) completely.
find_package(CUDA QUIET)
Expand All @@ -34,10 +31,14 @@ option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF)
option(WITH_WIN_DUMP_DBG "Compile with windows core dump debug mode" OFF)
option(WITH_ASCEND "Compile PaddlePaddle with ASCEND" OFF)
option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF)
# NOTE(zhiqiu): WITH_ASCEND_CL can be compile on x86_64, so we can set WITH_ASCEND=OFF and WITH_ASCEND_CL=ON
# NOTE(zhiqiu): WITH_ASCEND_CL can be compile on x86_64, so we can set WITH_ASCEND=OFF and WITH_ASCEND_CL=ON
# to develop some acl related functionality on x86
option(WITH_ASCEND_CL "Compile PaddlePaddle with ASCEND CL" ${WITH_ASCEND})
option(WITH_ASCEND_CXX11 "Compile PaddlePaddle with ASCEND and CXX11 ABI" OFF)
# Note(zhouwei): It use option above, so put here
include(init)
include(generic) # simplify cmake module

if (WITH_GPU AND WITH_XPU)
message(FATAL_ERROR "Error when compile GPU and XPU at the same time")
endif()
Expand Down Expand Up @@ -65,7 +66,7 @@ if(WITH_MUSL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy")
endif()

if(WITH_ASCEND AND NOT WITH_ASCEND_CXX11)
if(WITH_ASCEND_CL AND NOT WITH_ASCEND_CXX11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
endif()

Expand Down Expand Up @@ -103,9 +104,11 @@ if(WIN32)
endif()
endforeach(flag_var)
endif()

# NOTE(Avin0323): Less parallel count result in faster compilation.
math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3")

# NOTE(zhouwei25): temporarily change MP to 1 for reducing CPU & memory utilization
set(PROCESS_MAX 1)
#math(EXPR PROCESS_MAX "${CPU_CORES} * 1 / 2")

# windows build turn off warnings, use parallel compiling.
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
Expand Down Expand Up @@ -133,6 +136,9 @@ if(WIN32)

foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS)
set(${flag_var} "${${flag_var}} /ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221")
if(MSVC_STATIC_CRT)
set(${flag_var} "${${flag_var}} /NODEFAULTLIB:MSVCRT.LIB")
endif()
endforeach(flag_var)

if (WITH_WIN_DUMP_DBG)
Expand Down Expand Up @@ -182,7 +188,6 @@ option(WITH_PSLIB "Compile with pslib support" OFF)
option(WITH_BOX_PS "Compile with box_ps support" OFF)
option(WITH_XBYAK "Compile with xbyak support" ON)
option(WITH_CONTRIB "Compile the third-party contributation" OFF)
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
option(WITH_PSCORE "Compile with parameter server support" ${WITH_DISTRIBUTE})
option(WITH_HETERPS "Compile with heterps" OFF})
option(WITH_INFERENCE_API_TEST "Test fluid inference C++ high-level api interface" OFF)
Expand All @@ -199,6 +204,7 @@ option(WITH_SW "Compile PaddlePaddle with sw support" OFF)
option(WITH_MIPS "Compile PaddlePaddle with mips support" OFF)
option(WITH_MUSL "Compile with musl libc instead of gblic" OFF)
option(WITH_UNITY_BUILD "Compile with UnityBuild mode" OFF)
option(WITH_STRIP "Strip so files of Whl packages" OFF)

# PY_VERSION
if(NOT PY_VERSION)
Expand Down Expand Up @@ -259,9 +265,6 @@ endif()

if(WITH_BRPC_RDMA)
message(STATUS "Use brpc with rdma.")
if(WITH_GRPC)
message(FATAL_ERROR "Can't use grpc with brpc rdma.")
endif()
if(NOT WITH_DISTRIBUTE)
message(FATAL_ERROR "Can't use brpc rdma in no distribute env.")
endif()
Expand Down Expand Up @@ -366,6 +369,13 @@ else()
message(WARNING "On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only.")
endif()

if(WITH_STRIP)
find_program(STRIP_PATH strip)
if(NOT STRIP_PATH OR NOT LINUX)
set(WITH_STRIP OFF CACHE STRING "Command strip is only used on Linux when it exists." FORCE)
endif()
endif()

add_subdirectory(paddle)
if(WITH_PYTHON)
add_subdirectory(python)
Expand Down
4 changes: 0 additions & 4 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,6 @@ if(WITH_HETERPS)
add_definitions(-DPADDLE_WITH_HETERPS)
endif()

if(WITH_GRPC)
add_definitions(-DPADDLE_WITH_GRPC)
endif(WITH_GRPC)

if(WITH_BRPC_RDMA)
add_definitions(-DPADDLE_WITH_BRPC_RDMA)
endif(WITH_BRPC_RDMA)
Expand Down
8 changes: 6 additions & 2 deletions cmake/external/ascend.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ if(EXISTS ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include/graph/ascend_str
add_definitions(-DPADDLE_WITH_ASCEND_STRING)
endif()

if(WITH_ASCEND)

if(WITH_ASCEND OR WITH_ASCEND_CL)
set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64)
set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common)
set(ASCEND_DRIVER_SHARE_DIR ${ASCEND_DIR}/driver/lib64/share)
Expand All @@ -49,7 +50,6 @@ if(WITH_ASCEND)
INCLUDE_DIRECTORIES(${ATLAS_RUNTIME_INC_DIR})



ADD_LIBRARY(ascend_ge SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET ascend_ge PROPERTY IMPORTED_LOCATION ${atlas_ge_runner_lib})

Expand All @@ -65,6 +65,7 @@ endif()
if(WITH_ASCEND_CL)
set(ASCEND_CL_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64)

set(ascend_hccl_lib ${ASCEND_CL_DIR}/libhccl.so)
set(ascendcl_lib ${ASCEND_CL_DIR}/libascendcl.so)
set(acl_op_compiler_lib ${ASCEND_CL_DIR}/libacl_op_compiler.so)
set(FWKACLLIB_INC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include)
Expand All @@ -78,6 +79,9 @@ if(WITH_ASCEND_CL)
ADD_LIBRARY(ascendcl SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET ascendcl PROPERTY IMPORTED_LOCATION ${ascendcl_lib})

ADD_LIBRARY(ascend_hccl SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET ascend_hccl PROPERTY IMPORTED_LOCATION ${ascend_hccl_lib})

ADD_LIBRARY(acl_op_compiler SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET acl_op_compiler PROPERTY IMPORTED_LOCATION ${acl_op_compiler_lib})
add_custom_target(extern_ascend_cl DEPENDS ascendcl acl_op_compiler)
Expand Down
19 changes: 15 additions & 4 deletions cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -447,9 +447,20 @@ function(cc_test TARGET_NAME)
cc_test_build(${TARGET_NAME}
SRCS ${cc_test_SRCS}
DEPS ${cc_test_DEPS})
cc_test_run(${TARGET_NAME}
COMMAND ${TARGET_NAME}
ARGS ${cc_test_ARGS})
# we dont test hcom op, because it need complex configuration
# with more than one machine
if(NOT ("${TARGET_NAME}" STREQUAL "c_broadcast_op_npu_test" OR
"${TARGET_NAME}" STREQUAL "c_allreduce_sum_op_npu_test" OR
"${TARGET_NAME}" STREQUAL "c_allreduce_max_op_npu_test" OR
"${TARGET_NAME}" STREQUAL "c_reducescatter_op_npu_test" OR
"${TARGET_NAME}" STREQUAL "c_allgather_op_npu_test" OR
"${TARGET_NAME}" STREQUAL "send_v2_op_npu_test" OR
"${TARGET_NAME}" STREQUAL "c_reduce_sum_op_npu_test" OR
"${TARGET_NAME}" STREQUAL "recv_v2_op_npu_test"))
cc_test_run(${TARGET_NAME}
COMMAND ${TARGET_NAME}
ARGS ${cc_test_ARGS})
endif()
endif()
endfunction(cc_test)

Expand Down Expand Up @@ -807,7 +818,7 @@ function(py_test TARGET_NAME)
${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif()

if (WIN32)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150)
endif()
Expand Down
4 changes: 2 additions & 2 deletions cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,11 @@ set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
if(WIN32)
set(paddle_inference_c_lib $<TARGET_FILE_DIR:paddle_inference_c>/paddle_inference_c.*)
else(WIN32)
set(paddle_inference_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/libpaddle_inference_c.*)
set(paddle_inference_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi_exp/libpaddle_inference_c.*)
endif(WIN32)

copy(inference_lib_dist
SRCS ${src_dir}/inference/capi/paddle_c_api.h ${paddle_inference_c_lib}
SRCS ${src_dir}/inference/capi_exp/pd_*.h ${paddle_inference_c_lib}
DSTS ${PADDLE_INFERENCE_C_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_C_INSTALL_DIR}/paddle/lib)

# fluid library for both train and inference
Expand Down
4 changes: 2 additions & 2 deletions cmake/init.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ if(NOT WIN32)
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG")
set(CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG")
else()
# It has not been used now, it can specify CUDA compile flag manualy,
# It can specify CUDA compile flag manualy,
# its use is to remvoe /Zi to reduce GPU static library size. But it's dangerous
# because CUDA will update by nvidia, then error will occur.
# Now, it's used in CUDA:[10.0, 10.2]
# Now, it's only used in VS2015 + CUDA:[10.0, 10.2]
set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props)
endif()

Expand Down
1 change: 0 additions & 1 deletion cmake/paddle_win.props
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,3 @@ set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)"
</ClCompile>
</ItemDefinitionGroup>
</Project>

14 changes: 7 additions & 7 deletions cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ set(third_party_deps)
# 2. REPOSITORY: specify git REPOSITORY of 3rd party
# 3. TAG: specify git tag/branch/commitID of 3rd party
# 4. DIR: overwrite the original SOURCE_DIR when cache directory
#
#
# The function Return 1 PARENT_SCOPE variables:
# - ${TARGET}_DOWNLOAD_CMD: Simply place "${TARGET}_DOWNLOAD_CMD" in ExternalProject_Add,
# - ${TARGET}_DOWNLOAD_CMD: Simply place "${TARGET}_DOWNLOAD_CMD" in ExternalProject_Add,
# and you no longer need to set any donwnload steps in ExternalProject_Add.
# For example:
# Cache_third_party(${TARGET}
Expand All @@ -52,7 +52,7 @@ FUNCTION(cache_third_party TARGET)
SET(${TARGET_NAME}_DOWNLOAD_CMD
GIT_REPOSITORY ${cache_third_party_REPOSITORY})
IF(cache_third_party_TAG)
LIST(APPEND ${TARGET_NAME}_DOWNLOAD_CMD
LIST(APPEND ${TARGET_NAME}_DOWNLOAD_CMD
GIT_TAG ${cache_third_party_TAG})
ENDIF()
ELSEIF(cache_third_party_URL)
Expand Down Expand Up @@ -130,7 +130,7 @@ ENDFUNCTION()
# Correction of flags on different Platform(WIN/MAC) and Print Warning Message
if (APPLE)
if(WITH_MKL)
MESSAGE(WARNING
MESSAGE(WARNING
"Mac is not supported with MKL in Paddle yet. Force WITH_MKL=OFF.")
set(WITH_MKL OFF CACHE STRING "Disable MKL for building on mac" FORCE)
endif()
Expand All @@ -141,7 +141,7 @@ if(WIN32 OR APPLE)
SET(WITH_XBYAK OFF CACHE STRING "Disable XBYAK in Windows and MacOS" FORCE)

if(WITH_LIBXSMM)
MESSAGE(WARNING
MESSAGE(WARNING
"Windows, Mac are not supported with libxsmm in Paddle yet."
"Force WITH_LIBXSMM=OFF")
SET(WITH_LIBXSMM OFF CACHE STRING "Disable LIBXSMM in Windows and MacOS" FORCE)
Expand Down Expand Up @@ -276,7 +276,7 @@ endif(WITH_BOX_PS)

if(WITH_ASCEND OR WITH_ASCEND_CL)
include(external/ascend)
if(WITH_ASCEND)
if(WITH_ASCEND OR WITH_ASCEND_CL)
list(APPEND third_party_deps extern_ascend)
endif()
if(WITH_ASCEND_CL)
Expand All @@ -290,7 +290,7 @@ if (WITH_PSCORE)

include(external/leveldb)
list(APPEND third_party_deps extern_leveldb)

include(external/brpc)
list(APPEND third_party_deps extern_brpc)

Expand Down
84 changes: 84 additions & 0 deletions go/demo/mobilenet_c_exp.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <pd_inference_api.h>
#include <stdio.h>
#include <stdlib.h>

void ReadData(float* data, int size);

int main(int argc, char* argv[]) {
PD_Config* config = PD_ConfigCreate();
PD_ConfigSetModel(config, "data/model/__model__", "data/model/__params__");
PD_ConfigDisableGlogInfo(config);

PD_Predictor* predictor = PD_PredictorCreate(config);
// config has destroyed in PD_PredictorCreate
config = NULL;

int input_num = PD_PredictorGetInputNum(predictor);
printf("Input num: %d\n", input_num);
int output_num = PD_PredictorGetOutputNum(predictor);
printf("Output num: %d\n", output_num);

PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor);
PD_Tensor* input_tensor =
PD_PredictorGetInputHandle(predictor, input_names->data[0]);
PD_OneDimArrayCstrDestroy(input_names);
input_names = NULL;

int32_t shape[] = {1, 3, 300, 300};
float* data = (float*)malloc(sizeof(float) * 1 * 3 * 300 * 300); // NOLINT
ReadData(data, 1 * 3 * 300 * 300); // NOLINT
PD_TensorReshape(input_tensor, 4, shape);
PD_TensorCopyFromCpuFloat(input_tensor, data);
free(data);
data = NULL;
PD_PredictorRun(predictor);

PD_OneDimArrayCstr* output_names = PD_PredictorGetOutputNames(predictor);
PD_Tensor* output_tensor =
PD_PredictorGetOutputHandle(predictor, output_names->data[0]);
PD_OneDimArrayCstrDestroy(output_names);
output_names = nullptr;

PD_OneDimArrayInt32* out_shape = PD_TensorGetShape(output_tensor);
int32_t size = 1;
for (size_t index = 0; index < out_shape->size; ++index) {
size = size * out_shape->data[index];
}
PD_OneDimArrayInt32Destroy(out_shape);
out_shape = NULL;

data = (float*)malloc(sizeof(float) * size); // NOLINT
PD_TensorCopyToCpuFloat(output_tensor, data);
free(data);
data = NULL;

PD_TensorDestroy(output_tensor);
output_tensor = NULL;
PD_TensorDestroy(input_tensor);
input_tensor = NULL;
PD_PredictorDestroy(predictor);
predictor = NULL;

return 0;
}

void ReadData(float* data, int n) {
FILE* fp = fopen("data/data.txt", "r");
for (int i = 0; i < n; i++) {
fscanf(fp, "%f", &data[i]);
}
fclose(fp);
}
2 changes: 1 addition & 1 deletion paddle/extension.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ limitations under the License. */
#pragma once

// All paddle apis in C++ frontend
#include "paddle/fluid/extension/include/ext_all.h"
#include "paddle/extension/include/ext_all.h"
12 changes: 0 additions & 12 deletions paddle/fluid/distributed/service/graph_py_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,7 @@ class GraphPyService {
std::vector<std::string> table_feat_conf_feat_dtype;
std::vector<int32_t> table_feat_conf_feat_shape;

// std::thread *server_thread, *client_thread;

// std::shared_ptr<paddle::distributed::PSServer> pserver_ptr;

// std::shared_ptr<paddle::distributed::PSClient> worker_ptr;

public:
// std::shared_ptr<paddle::distributed::PSServer> get_ps_server() {
// return pserver_ptr;
// }
// std::shared_ptr<paddle::distributed::PSClient> get_ps_client() {
// return worker_ptr;
// }
int get_shard_num() { return shard_num; }
void set_shard_num(int shard_num) { this->shard_num = shard_num; }
void GetDownpourSparseTableProto(
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/table/common_graph_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ int32_t GraphTable::load_nodes(const std::string &path, std::string node_type) {

int32_t GraphTable::load_edges(const std::string &path, bool reverse_edge) {
auto paths = paddle::string::split_string<std::string>(path, ";");
int count = 0;
int64_t count = 0;
std::string sample_type = "random";
bool is_weighted = false;
int valid_count = 0;
Expand Down
Loading

0 comments on commit 8d2a862

Please sign in to comment.