Skip to content

Commit

Permalink
Merge branch 'paddlebox' into paddlebox
Browse files Browse the repository at this point in the history
  • Loading branch information
tiancaitzp committed Oct 19, 2023
2 parents 39187f9 + e738f10 commit 5cf096e
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 0 deletions.
3 changes: 3 additions & 0 deletions paddle/fluid/framework/boxps_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,9 @@ void BoxPSWorker::SyncParam(void) {
TensorScaleValue(place_, param_sync_, &param_sync_, scale);
PADDLE_ENFORCE_GPU_SUCCESS(cudaStreamSynchronize(stream));
#elif defined(PADDLE_WITH_XPU_BKCL) || defined(PADDLE_WITH_XPU)
// Other dense op use default stream, so we need wait other op calc finished before call bkcl_all_reduce.
xpu_wait(0);

PADDLE_ENFORCE_EQ(
bkcl_all_reduce(comm->comm(),
sendbuff,
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/operators/collective/c_allreduce_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,9 @@ class CAllReduceOpXPUKernel : public framework::OpKernel<T> {
"Invalid reduce type: %d", red_type));
}

// Other dense op use default stream, so we need wait other op calc finished before call bkcl_all_reduce.
xpu_wait(0);

PADDLE_ENFORCE_EQ(
bkcl_all_reduce(comm->comm(),
sendbuff,
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/operators/collective/c_mixallgather_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,10 @@ class CMixAllGatherOpXPUKernel : public framework::OpKernel<T> {
#ifdef TRACE_PROFILE
TRACE_SCOPE_START("bkcl_all_reduce", xpu_wait(stream));
#endif

// Other dense op use default stream, so we need wait other op calc finished before call bkcl_all_reduce.
xpu_wait(0);

PADDLE_ENFORCE_EQ(
bkcl_all_reduce(comm->comm(),
recvbuff,
Expand Down

0 comments on commit 5cf096e

Please sign in to comment.