From f1996bcf57a0927688b26e4d21281f9a1c8a6e60 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 13 Nov 2017 10:26:36 +0000 Subject: [PATCH 1/4] Fix bug in MergeModel.cpp. --- paddle/trainer/MergeModel.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/trainer/MergeModel.cpp b/paddle/trainer/MergeModel.cpp index f3cfd9f97fea8..56c38015fb239 100644 --- a/paddle/trainer/MergeModel.cpp +++ b/paddle/trainer/MergeModel.cpp @@ -27,6 +27,9 @@ using namespace paddle; // NOLINT using namespace std; // NOLINT int main(int argc, char** argv) { + initMain(argc, argv); + initPython(argc, argv); + if (FLAGS_model_dir.empty() || FLAGS_config_file.empty() || FLAGS_model_file.empty()) { LOG(INFO) << "Usage: ./paddle_merge_model --model_dir=pass-00000 " @@ -34,9 +37,6 @@ int main(int argc, char** argv) { return 0; } - initMain(argc, argv); - initPython(argc, argv); - string confFile = FLAGS_config_file; #ifndef PADDLE_WITH_CUDA FLAGS_use_gpu = false; From 0cc1b6cfe5c57a44ce36f50252a898bc82ff191a Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 20 Nov 2017 05:03:03 +0000 Subject: [PATCH 2/4] Add a c-api inferface to initilize the thread environment of Paddle and add a GPU example. --- paddle/capi/Main.cpp | 12 ++ paddle/capi/Matrix.cpp | 6 +- paddle/capi/error.h | 17 +++ .../multi_thread/CMakeLists.txt | 29 ++++- .../model_inference/multi_thread/main.c | 3 + .../model_inference/multi_thread/main_gpu.c | 106 ++++++++++++++++++ paddle/capi/main.h | 5 + paddle/capi/matrix.h | 8 +- 8 files changed, 175 insertions(+), 11 deletions(-) create mode 100644 paddle/capi/examples/model_inference/multi_thread/main_gpu.c diff --git a/paddle/capi/Main.cpp b/paddle/capi/Main.cpp index bb8249a5511c0..85296db9d7d61 100644 --- a/paddle/capi/Main.cpp +++ b/paddle/capi/Main.cpp @@ -43,4 +43,16 @@ paddle_error paddle_init(int argc, char** argv) { isInit = true; return kPD_NO_ERROR; } + +paddle_error paddle_init_thread() { + static bool isInit = false; + if (isInit) return kPD_NO_ERROR; + + if (FLAGS_use_gpu) { + hl_init(FLAGS_gpu_id); + } + + isInit = true; + return kPD_NO_ERROR; +} } diff --git a/paddle/capi/Matrix.cpp b/paddle/capi/Matrix.cpp index d5b55e1c95f24..cbacd1fb71c14 100644 --- a/paddle/capi/Matrix.cpp +++ b/paddle/capi/Matrix.cpp @@ -40,7 +40,7 @@ paddle_error paddle_matrix_destroy(paddle_matrix mat) { paddle_error paddle_matrix_set_row(paddle_matrix mat, uint64_t rowID, paddle_real* rowArray) { - if (mat == nullptr) return kPD_NULLPTR; + if (mat == nullptr || rowArray == nullptr) return kPD_NULLPTR; auto ptr = cast(mat); if (ptr->mat == nullptr) return kPD_NULLPTR; if (rowID >= ptr->mat->getHeight()) return kPD_OUT_OF_RANGE; @@ -55,7 +55,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat, } PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat, - paddle_real* value) { + paddle_real* value) { if (mat == nullptr || value == nullptr) return kPD_NULLPTR; auto ptr = cast(mat); if (ptr->mat == nullptr) return kPD_NULLPTR; @@ -75,7 +75,7 @@ PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat, } PD_API paddle_error paddle_matrix_get_value(paddle_matrix mat, - paddle_real* result) { + paddle_real* result) { if (mat == nullptr || result == nullptr) return kPD_NULLPTR; auto ptr = cast(mat); if (ptr->mat == nullptr) return kPD_NULLPTR; diff --git a/paddle/capi/error.h b/paddle/capi/error.h index 44d8c2040d1aa..85853c202a1ff 100644 --- a/paddle/capi/error.h +++ b/paddle/capi/error.h @@ -27,4 +27,21 @@ typedef enum { kPD_UNDEFINED_ERROR = -1, } paddle_error; +static const char* paddle_error_string(paddle_error err) { + switch (err) { + case kPD_NULLPTR: + return "nullptr error"; + case kPD_OUT_OF_RANGE: + return "out of range error"; + case kPD_PROTOBUF_ERROR: + return "protobuf error"; + case kPD_NOT_SUPPORTED: + return "not supported error"; + case kPD_UNDEFINED_ERROR: + return "undefined error"; + default: + return ""; + } +} + #endif diff --git a/paddle/capi/examples/model_inference/multi_thread/CMakeLists.txt b/paddle/capi/examples/model_inference/multi_thread/CMakeLists.txt index 98e411ddc02a4..2fc8debddedea 100644 --- a/paddle/capi/examples/model_inference/multi_thread/CMakeLists.txt +++ b/paddle/capi/examples/model_inference/multi_thread/CMakeLists.txt @@ -1,8 +1,29 @@ project(multi_thread) cmake_minimum_required(VERSION 2.8) -aux_source_directory(. SRC_LIST) -add_executable(${PROJECT_NAME} ${SRC_LIST}) + find_package (Threads) + +if(NOT PADDLE_ROOT) + set(PADDLE_ROOT $ENV{PADDLE_ROOT} CACHE PATH "Paddle Path") +endif() +if(PADDLE_ROOT) + include_directories(${PADDLE_ROOT}/include) + link_directories(${PADDLE_ROOT}/lib) +endif() + +set(CPU_SRCS main.c) +add_executable(${PROJECT_NAME} ${CPU_SRCS}) set_property(TARGET ${PROJECT_NAME} PROPERTY C_STANDARD 99) -target_link_libraries(${PROJECT_NAME} -lpaddle_capi_shared - ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${PROJECT_NAME} + -lpaddle_capi_shared + ${CMAKE_THREAD_LIBS_INIT}) + +find_package(CUDA QUIET) +if(CUDA_FOUND) + set(GPU_SRCS main_gpu.c) + cuda_add_executable(${PROJECT_NAME}_gpu ${GPU_SRCS}) + set_property(TARGET ${PROJECT_NAME}_gpu PROPERTY C_STANDARD 99) + target_link_libraries(${PROJECT_NAME}_gpu + -lpaddle_capi_shared + ${CMAKE_THREAD_LIBS_INIT}) +endif(CUDA_FOUND) diff --git a/paddle/capi/examples/model_inference/multi_thread/main.c b/paddle/capi/examples/model_inference/multi_thread/main.c index d7675cd80a52f..17eda85e64b59 100644 --- a/paddle/capi/examples/model_inference/multi_thread/main.c +++ b/paddle/capi/examples/model_inference/multi_thread/main.c @@ -10,6 +10,9 @@ pthread_mutex_t mutex; void* thread_main(void* gm_ptr) { + // Initialize the thread environment of Paddle. + CHECK(paddle_init_thread()); + paddle_gradient_machine machine = (paddle_gradient_machine)(gm_ptr); paddle_arguments in_args = paddle_arguments_create_none(); // Create input matrix. diff --git a/paddle/capi/examples/model_inference/multi_thread/main_gpu.c b/paddle/capi/examples/model_inference/multi_thread/main_gpu.c new file mode 100644 index 0000000000000..63f2a9eeb09a0 --- /dev/null +++ b/paddle/capi/examples/model_inference/multi_thread/main_gpu.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include "../common/common.h" + +#define CONFIG_BIN "./trainer_config.bin" +#define NUM_THREAD 4 +#define NUM_ITER 1000 + +pthread_mutex_t mutex; + +void* thread_main(void* gm_ptr) { + // Initialize the thread environment of Paddle. + CHECK(paddle_init_thread()); + + paddle_gradient_machine machine = (paddle_gradient_machine)(gm_ptr); + // Create input arguments. + paddle_arguments in_args = paddle_arguments_create_none(); + // Create input matrix. + paddle_matrix mat = paddle_matrix_create(/* sample_num */ 1, + /* size */ 784, + /* useGPU */ true); + // Create output arguments. + paddle_arguments out_args = paddle_arguments_create_none(); + // Create output matrix. + paddle_matrix prob = paddle_matrix_create_none(); + + // CPU buffer to cache the input and output. + paddle_real* cpu_input = (paddle_real*)malloc(784 * sizeof(paddle_real)); + paddle_real* cpu_output = (paddle_real*)malloc(10 * sizeof(paddle_real)); + for (int iter = 0; iter < NUM_ITER; ++iter) { + // There is only one input of this network. + CHECK(paddle_arguments_resize(in_args, 1)); + CHECK(paddle_arguments_set_value(in_args, 0, mat)); + + for (int i = 0; i < 784; ++i) { + cpu_input[i] = rand() / ((float)RAND_MAX); + } + CHECK(paddle_matrix_set_value(mat, cpu_input)); + + CHECK(paddle_gradient_machine_forward(machine, + in_args, + out_args, + /* isTrain */ false)); + + CHECK(paddle_arguments_get_value(out_args, 0, prob)); + CHECK(paddle_matrix_get_value(prob, cpu_output)); + + pthread_mutex_lock(&mutex); + printf("Prob: "); + for (int i = 0; i < 10; ++i) { + printf("%.2f ", cpu_output[i]); + } + printf("\n"); + pthread_mutex_unlock(&mutex); + } + + CHECK(paddle_matrix_destroy(prob)); + CHECK(paddle_arguments_destroy(out_args)); + CHECK(paddle_matrix_destroy(mat)); + CHECK(paddle_arguments_destroy(in_args)); + CHECK(paddle_gradient_machine_destroy(machine)); + + free(cpu_input); + free(cpu_output); + + return NULL; +} + +int main() { + // Initalize Paddle + char* argv[] = {"--use_gpu=True"}; + CHECK(paddle_init(1, (char**)argv)); + + // Reading config binary file. It is generated by `convert_protobin.sh` + long size; + void* buf = read_config(CONFIG_BIN, &size); + + // Create a gradient machine for inference. + paddle_gradient_machine machine; + CHECK(paddle_gradient_machine_create_for_inference(&machine, buf, (int)size)); + CHECK(paddle_gradient_machine_randomize_param(machine)); + + // Loading parameter. Uncomment the following line and change the directory. + // CHECK(paddle_gradient_machine_load_parameter_from_disk(machine, + // "./some_where_to_params")); + srand(time(0)); + pthread_mutex_init(&mutex, NULL); + + pthread_t threads[NUM_THREAD]; + + for (int i = 0; i < NUM_THREAD; ++i) { + paddle_gradient_machine thread_local_machine; + CHECK(paddle_gradient_machine_create_shared_param( + machine, buf, size, &thread_local_machine)); + pthread_create(&threads[i], NULL, thread_main, thread_local_machine); + } + + for (int i = 0; i < NUM_THREAD; ++i) { + pthread_join(threads[i], NULL); + } + + pthread_mutex_destroy(&mutex); + + return 0; +} diff --git a/paddle/capi/main.h b/paddle/capi/main.h index 893ebcbd58dd2..20eb36f3080fc 100644 --- a/paddle/capi/main.h +++ b/paddle/capi/main.h @@ -26,6 +26,11 @@ extern "C" { */ PD_API paddle_error paddle_init(int argc, char** argv); +/** + * Initialize the thread environment of Paddle. + */ +PD_API paddle_error paddle_init_thread(); + #ifdef __cplusplus } #endif diff --git a/paddle/capi/matrix.h b/paddle/capi/matrix.h index 01b8bad2ee9f5..8cc3e0034e058 100644 --- a/paddle/capi/matrix.h +++ b/paddle/capi/matrix.h @@ -79,7 +79,7 @@ PD_API paddle_error paddle_matrix_set_row(paddle_matrix mat, * @note value should contain enough element of data to init the mat */ PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat, - paddle_real* value); + paddle_real* value); /** * @brief PDMatGetRow Get raw row buffer from matrix @@ -93,14 +93,14 @@ PD_API paddle_error paddle_matrix_get_row(paddle_matrix mat, paddle_real** rawRowBuffer); /** - * @brief copy data from the matrix + * @brief copy data from the matrix * @param [in] mat Target matrix - * @param [out] result pointer to store the matrix data + * @param [out] result pointer to store the matrix data * @return paddle_error * @note the space of the result should allocated before invoke this API */ PD_API paddle_error paddle_matrix_get_value(paddle_matrix mat, - paddle_real* result); + paddle_real* result); /** * @brief PDMatCreateNone Create None Matrix * @return From ee5df622c9cf63c0d25a794068333156a8e6e8e6 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 20 Nov 2017 07:56:52 +0000 Subject: [PATCH 3/4] Add some note for paddle_init_thread and move the inplementation of paddle_error_string into a .cpp file. --- paddle/capi/Main.cpp | 5 --- paddle/capi/error.cpp | 32 +++++++++++++++++++ paddle/capi/error.h | 22 ++++--------- .../model_inference/multi_thread/main.c | 3 -- paddle/capi/main.h | 1 + 5 files changed, 39 insertions(+), 24 deletions(-) create mode 100644 paddle/capi/error.cpp diff --git a/paddle/capi/Main.cpp b/paddle/capi/Main.cpp index 6f932af16ded1..c038789340033 100644 --- a/paddle/capi/Main.cpp +++ b/paddle/capi/Main.cpp @@ -45,14 +45,9 @@ paddle_error paddle_init(int argc, char** argv) { } paddle_error paddle_init_thread() { - static __thread bool isInit = false; - if (isInit) return kPD_NO_ERROR; - if (FLAGS_use_gpu) { hl_init(FLAGS_gpu_id); } - - isInit = true; return kPD_NO_ERROR; } } diff --git a/paddle/capi/error.cpp b/paddle/capi/error.cpp new file mode 100644 index 0000000000000..169b65f921043 --- /dev/null +++ b/paddle/capi/error.cpp @@ -0,0 +1,32 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "error.h" + +const char* paddle_error_string(paddle_error err) { + switch (err) { + case kPD_NULLPTR: + return "nullptr error"; + case kPD_OUT_OF_RANGE: + return "out of range error"; + case kPD_PROTOBUF_ERROR: + return "protobuf error"; + case kPD_NOT_SUPPORTED: + return "not supported error"; + case kPD_UNDEFINED_ERROR: + return "undefined error"; + default: + return ""; + } +} diff --git a/paddle/capi/error.h b/paddle/capi/error.h index 85853c202a1ff..9d9d0ed63a527 100644 --- a/paddle/capi/error.h +++ b/paddle/capi/error.h @@ -15,6 +15,8 @@ limitations under the License. */ #ifndef __PADDLE_CAPI_ERROR_H__ #define __PADDLE_CAPI_ERROR_H__ +#include "config.h" + /** * Error Type for Paddle API. */ @@ -27,21 +29,9 @@ typedef enum { kPD_UNDEFINED_ERROR = -1, } paddle_error; -static const char* paddle_error_string(paddle_error err) { - switch (err) { - case kPD_NULLPTR: - return "nullptr error"; - case kPD_OUT_OF_RANGE: - return "out of range error"; - case kPD_PROTOBUF_ERROR: - return "protobuf error"; - case kPD_NOT_SUPPORTED: - return "not supported error"; - case kPD_UNDEFINED_ERROR: - return "undefined error"; - default: - return ""; - } -} +/** + * Error string for Paddle API. + */ +PD_API const char* paddle_error_string(paddle_error err); #endif diff --git a/paddle/capi/examples/model_inference/multi_thread/main.c b/paddle/capi/examples/model_inference/multi_thread/main.c index 17eda85e64b59..d7675cd80a52f 100644 --- a/paddle/capi/examples/model_inference/multi_thread/main.c +++ b/paddle/capi/examples/model_inference/multi_thread/main.c @@ -10,9 +10,6 @@ pthread_mutex_t mutex; void* thread_main(void* gm_ptr) { - // Initialize the thread environment of Paddle. - CHECK(paddle_init_thread()); - paddle_gradient_machine machine = (paddle_gradient_machine)(gm_ptr); paddle_arguments in_args = paddle_arguments_create_none(); // Create input matrix. diff --git a/paddle/capi/main.h b/paddle/capi/main.h index 20eb36f3080fc..ffa4caa05a3b4 100644 --- a/paddle/capi/main.h +++ b/paddle/capi/main.h @@ -28,6 +28,7 @@ PD_API paddle_error paddle_init(int argc, char** argv); /** * Initialize the thread environment of Paddle. + * @note it is requisite for GPU runs but optional for CPU runs. */ PD_API paddle_error paddle_init_thread(); From 68f6b80acdb7ed7abf3ce56d17d759d12183c266 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Fri, 8 Dec 2017 02:44:53 +0000 Subject: [PATCH 4/4] Add some comments. --- .../examples/model_inference/multi_thread/main_gpu.c | 9 ++++++++- paddle/capi/main.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/paddle/capi/examples/model_inference/multi_thread/main_gpu.c b/paddle/capi/examples/model_inference/multi_thread/main_gpu.c index 63f2a9eeb09a0..6fd376e0d1a2f 100644 --- a/paddle/capi/examples/model_inference/multi_thread/main_gpu.c +++ b/paddle/capi/examples/model_inference/multi_thread/main_gpu.c @@ -9,6 +9,13 @@ pthread_mutex_t mutex; +/* + * @brief It is an simple inference example that runs multi-threads on a GPU. + * Each thread holds it own local gradient_machine but shares the same + * parameters. + * If you want to run on different GPUs, you need to launch + * multi-processes or set trainer_count > 1. + */ void* thread_main(void* gm_ptr) { // Initialize the thread environment of Paddle. CHECK(paddle_init_thread()); @@ -29,7 +36,7 @@ void* thread_main(void* gm_ptr) { paddle_real* cpu_input = (paddle_real*)malloc(784 * sizeof(paddle_real)); paddle_real* cpu_output = (paddle_real*)malloc(10 * sizeof(paddle_real)); for (int iter = 0; iter < NUM_ITER; ++iter) { - // There is only one input of this network. + // There is only one input layer of this network. CHECK(paddle_arguments_resize(in_args, 1)); CHECK(paddle_arguments_set_value(in_args, 0, mat)); diff --git a/paddle/capi/main.h b/paddle/capi/main.h index ffa4caa05a3b4..99c4e8428dbaa 100644 --- a/paddle/capi/main.h +++ b/paddle/capi/main.h @@ -29,6 +29,7 @@ PD_API paddle_error paddle_init(int argc, char** argv); /** * Initialize the thread environment of Paddle. * @note it is requisite for GPU runs but optional for CPU runs. + * For GPU runs, all threads will run on the same GPU devices. */ PD_API paddle_error paddle_init_thread();