-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add a c-api interface to initialize the thread environment of Paddle #5773
Changes from 6 commits
f1996bc
5c829af
0cc1b6c
64418e3
3948801
ee5df62
68f6b80
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,4 +43,11 @@ paddle_error paddle_init(int argc, char** argv) { | |
isInit = true; | ||
return kPD_NO_ERROR; | ||
} | ||
|
||
paddle_error paddle_init_thread() { | ||
if (FLAGS_use_gpu) { | ||
hl_init(FLAGS_gpu_id); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hl_init will set the t_resource.device to -1, is it a bug? |
||
} | ||
return kPD_NO_ERROR; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include "error.h" | ||
|
||
const char* paddle_error_string(paddle_error err) { | ||
switch (err) { | ||
case kPD_NULLPTR: | ||
return "nullptr error"; | ||
case kPD_OUT_OF_RANGE: | ||
return "out of range error"; | ||
case kPD_PROTOBUF_ERROR: | ||
return "protobuf error"; | ||
case kPD_NOT_SUPPORTED: | ||
return "not supported error"; | ||
case kPD_UNDEFINED_ERROR: | ||
return "undefined error"; | ||
default: | ||
return ""; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,29 @@ | ||
project(multi_thread) | ||
cmake_minimum_required(VERSION 2.8) | ||
aux_source_directory(. SRC_LIST) | ||
add_executable(${PROJECT_NAME} ${SRC_LIST}) | ||
|
||
find_package (Threads) | ||
|
||
if(NOT PADDLE_ROOT) | ||
set(PADDLE_ROOT $ENV{PADDLE_ROOT} CACHE PATH "Paddle Path") | ||
endif() | ||
if(PADDLE_ROOT) | ||
include_directories(${PADDLE_ROOT}/include) | ||
link_directories(${PADDLE_ROOT}/lib) | ||
endif() | ||
|
||
set(CPU_SRCS main.c) | ||
add_executable(${PROJECT_NAME} ${CPU_SRCS}) | ||
set_property(TARGET ${PROJECT_NAME} PROPERTY C_STANDARD 99) | ||
target_link_libraries(${PROJECT_NAME} -lpaddle_capi_shared | ||
${CMAKE_THREAD_LIBS_INIT}) | ||
target_link_libraries(${PROJECT_NAME} | ||
-lpaddle_capi_shared | ||
${CMAKE_THREAD_LIBS_INIT}) | ||
|
||
find_package(CUDA QUIET) | ||
if(CUDA_FOUND) | ||
set(GPU_SRCS main_gpu.c) | ||
cuda_add_executable(${PROJECT_NAME}_gpu ${GPU_SRCS}) | ||
set_property(TARGET ${PROJECT_NAME}_gpu PROPERTY C_STANDARD 99) | ||
target_link_libraries(${PROJECT_NAME}_gpu | ||
-lpaddle_capi_shared | ||
${CMAKE_THREAD_LIBS_INIT}) | ||
endif(CUDA_FOUND) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
#include <paddle/capi.h> | ||
#include <pthread.h> | ||
#include <time.h> | ||
#include "../common/common.h" | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add some comments about this example. For example, this is an inference implementation where multiple threads share a GPU. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
#define CONFIG_BIN "./trainer_config.bin" | ||
#define NUM_THREAD 4 | ||
#define NUM_ITER 1000 | ||
|
||
pthread_mutex_t mutex; | ||
|
||
void* thread_main(void* gm_ptr) { | ||
// Initialize the thread environment of Paddle. | ||
CHECK(paddle_init_thread()); | ||
|
||
paddle_gradient_machine machine = (paddle_gradient_machine)(gm_ptr); | ||
// Create input arguments. | ||
paddle_arguments in_args = paddle_arguments_create_none(); | ||
// Create input matrix. | ||
paddle_matrix mat = paddle_matrix_create(/* sample_num */ 1, | ||
/* size */ 784, | ||
/* useGPU */ true); | ||
// Create output arguments. | ||
paddle_arguments out_args = paddle_arguments_create_none(); | ||
// Create output matrix. | ||
paddle_matrix prob = paddle_matrix_create_none(); | ||
|
||
// CPU buffer to cache the input and output. | ||
paddle_real* cpu_input = (paddle_real*)malloc(784 * sizeof(paddle_real)); | ||
paddle_real* cpu_output = (paddle_real*)malloc(10 * sizeof(paddle_real)); | ||
for (int iter = 0; iter < NUM_ITER; ++iter) { | ||
// There is only one input of this network. | ||
CHECK(paddle_arguments_resize(in_args, 1)); | ||
CHECK(paddle_arguments_set_value(in_args, 0, mat)); | ||
|
||
for (int i = 0; i < 784; ++i) { | ||
cpu_input[i] = rand() / ((float)RAND_MAX); | ||
} | ||
CHECK(paddle_matrix_set_value(mat, cpu_input)); | ||
|
||
CHECK(paddle_gradient_machine_forward(machine, | ||
in_args, | ||
out_args, | ||
/* isTrain */ false)); | ||
|
||
CHECK(paddle_arguments_get_value(out_args, 0, prob)); | ||
CHECK(paddle_matrix_get_value(prob, cpu_output)); | ||
|
||
pthread_mutex_lock(&mutex); | ||
printf("Prob: "); | ||
for (int i = 0; i < 10; ++i) { | ||
printf("%.2f ", cpu_output[i]); | ||
} | ||
printf("\n"); | ||
pthread_mutex_unlock(&mutex); | ||
} | ||
|
||
CHECK(paddle_matrix_destroy(prob)); | ||
CHECK(paddle_arguments_destroy(out_args)); | ||
CHECK(paddle_matrix_destroy(mat)); | ||
CHECK(paddle_arguments_destroy(in_args)); | ||
CHECK(paddle_gradient_machine_destroy(machine)); | ||
|
||
free(cpu_input); | ||
free(cpu_output); | ||
|
||
return NULL; | ||
} | ||
|
||
int main() { | ||
// Initalize Paddle | ||
char* argv[] = {"--use_gpu=True"}; | ||
CHECK(paddle_init(1, (char**)argv)); | ||
|
||
// Reading config binary file. It is generated by `convert_protobin.sh` | ||
long size; | ||
void* buf = read_config(CONFIG_BIN, &size); | ||
|
||
// Create a gradient machine for inference. | ||
paddle_gradient_machine machine; | ||
CHECK(paddle_gradient_machine_create_for_inference(&machine, buf, (int)size)); | ||
CHECK(paddle_gradient_machine_randomize_param(machine)); | ||
|
||
// Loading parameter. Uncomment the following line and change the directory. | ||
// CHECK(paddle_gradient_machine_load_parameter_from_disk(machine, | ||
// "./some_where_to_params")); | ||
srand(time(0)); | ||
pthread_mutex_init(&mutex, NULL); | ||
|
||
pthread_t threads[NUM_THREAD]; | ||
|
||
for (int i = 0; i < NUM_THREAD; ++i) { | ||
paddle_gradient_machine thread_local_machine; | ||
CHECK(paddle_gradient_machine_create_shared_param( | ||
machine, buf, size, &thread_local_machine)); | ||
pthread_create(&threads[i], NULL, thread_main, thread_local_machine); | ||
} | ||
|
||
for (int i = 0; i < NUM_THREAD; ++i) { | ||
pthread_join(threads[i], NULL); | ||
} | ||
|
||
pthread_mutex_destroy(&mutex); | ||
|
||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,12 @@ extern "C" { | |
*/ | ||
PD_API paddle_error paddle_init(int argc, char** argv); | ||
|
||
/** | ||
* Initialize the thread environment of Paddle. | ||
* @note it is requisite for GPU runs but optional for CPU runs. | ||
*/ | ||
PD_API paddle_error paddle_init_thread(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since some previous CPU users did not use this interface. Here need to indicate only GPU need this interface. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to add an argument of
device_id
.Users may want to initialize the thread to a different device environment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If adding a device_id, another question is when user invokes the paddle_matrix_create API, which gpu is the matrix on? There is no device id in the paddle_matrix_create API.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe need to wrap the
hl_get_device
interface.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried to add
device_id
and set it to different values. The binary failed:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This problem is due to the thread share the same parameter model with the main thread. In this case, you need to pass in the same device id as the main thread.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I know what your means. In the current mode, the
Parameters
are shared among multi-threads, but each thread has its ownnetwork
. If each thread wants to run on different GPU, it should have its ownnetwork
+Parameters
. ThenParameters
cannot be shared anymore, and we should not use the interfacepaddle_gradient_machine_create_shared_param
but directly usingpaddle_gradient_machine_create_for_inference
andpaddle_gradient_machine_load_parameter_from_disk
instead.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
paddle_gradient_machine_create_shared_param
共享参数,不能支持跑在不同的GPU上。trainer_count>1
或者,在每个线程里面单独地paddle_gradient_machine_create_for_inference
以及paddle_gradient_machine_load_parameter_from_disk
。