From 2c1ed9b8d4e8392c37f32d360f85ccb44d20156f Mon Sep 17 00:00:00 2001 From: liuyuhui Date: Sat, 1 May 2021 14:13:00 +0800 Subject: [PATCH] [Kunlun]fix multi xpu dygraph hang, test=kunlun (#32662) (#32696) --- paddle/fluid/imperative/reducer.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc index a92704ce447dc..bf479e0d797ca 100644 --- a/paddle/fluid/imperative/reducer.cc +++ b/paddle/fluid/imperative/reducer.cc @@ -762,10 +762,11 @@ void Reducer::MarkGroupReady(size_t group_index) { // TODO(liuyuhui): Add try catch to deal with exception later, // otherwise the main thread will continue to run when an exception is // thrown in comm_pool_. - comm_pool_->enqueue([&] { + auto next_group = next_group_; + comm_pool_->enqueue([this, run_order, next_group, &group] { auto dev_id = BOOST_GET_CONST(platform::XPUPlace, place_).device; platform::SetXPUDeviceId(dev_id); - FusedAllReduceSchedule(run_order, group, next_group_); + FusedAllReduceSchedule(run_order, group, next_group); { std::lock_guard lock(mutex_); comm_op_count_ -= 1; // lock