diff --git a/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h b/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h index f211e15b13e28..9531d74adbaa4 100644 --- a/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h +++ b/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h @@ -449,16 +449,18 @@ int HeterComm::get_index_by_devid(int devid) { template void HeterComm::set_sparse_sgd( const OptimizerConfig& optimizer_config) { - for (auto& table : tables_) { - table->set_sparse_sgd(optimizer_config); + for (int i = 0; i < resource_->total_device(); ++i) { + AnyDeviceGuard guard(resource_->dev_id(i)); + ptr_tables_[i]->set_sparse_sgd(optimizer_config); } } template void HeterComm::set_embedx_sgd( const OptimizerConfig& optimizer_config) { - for (auto& table : tables_) { - table->set_embedx_sgd(optimizer_config); + for (int i = 0; i < resource_->total_device(); ++i) { + AnyDeviceGuard guard(resource_->dev_id(i)); + ptr_tables_[i]->set_embedx_sgd(optimizer_config); } } diff --git a/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc b/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc index 2f445d9bffe5a..83a6c48d3bac6 100644 --- a/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc +++ b/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc @@ -585,6 +585,8 @@ void PSGPUWrapper::BuildGPUTask(std::shared_ptr gpu_task) { HeterPs_ = HeterPsBase::get_instance(size_max, resource_, feature_value_accessor_, optimizer_type_); #ifdef PADDLE_WITH_CUDA HeterPs_->set_nccl_comm_and_size(inner_comms_, inter_comms_, node_size_); + HeterPs_->set_sparse_sgd(optimizer_config_); + HeterPs_->set_embedx_sgd(optimizer_config_); #endif auto build_dynamic_mf_func = [this, &gpu_task](int i, int j) { this->HeterPs_->set_multi_mf_dim(multi_mf_dim_, max_mf_dim_); diff --git a/paddle/fluid/framework/fleet/ps_gpu_wrapper.cu b/paddle/fluid/framework/fleet/ps_gpu_wrapper.cu index cd14cfa5d3437..d04da131e98ef 100644 --- a/paddle/fluid/framework/fleet/ps_gpu_wrapper.cu +++ b/paddle/fluid/framework/fleet/ps_gpu_wrapper.cu @@ -318,11 +318,9 @@ void PSGPUWrapper::SetSparseSGD(float nonclk_coeff, float clk_coeff, float learning_rate, float initial_g2sum, float initial_range, float beta1_decay_rate, float beta2_decay_rate, float ada_epsilon) { - OptimizerConfig optimizer_config; - optimizer_config.set_sparse_sgd(nonclk_coeff, clk_coeff, min_bound, max_bound, + optimizer_config_.set_sparse_sgd(nonclk_coeff, clk_coeff, min_bound, max_bound, learning_rate, initial_g2sum, initial_range, beta1_decay_rate, beta2_decay_rate, ada_epsilon); - HeterPs_->set_sparse_sgd(optimizer_config); } void PSGPUWrapper::SetEmbedxSGD(float mf_create_thresholds, @@ -330,12 +328,10 @@ void PSGPUWrapper::SetEmbedxSGD(float mf_create_thresholds, float mf_initial_range, float mf_min_bound, float mf_max_bound, float mf_beta1_decay_rate, float mf_beta2_decay_rate, float mf_ada_epsilon) { - OptimizerConfig optimizer_config; - optimizer_config.set_embedx_sgd(mf_create_thresholds, mf_learning_rate, + optimizer_config_.set_embedx_sgd(mf_create_thresholds, mf_learning_rate, mf_initial_g2sum, mf_initial_range, mf_min_bound, mf_max_bound, mf_beta1_decay_rate, mf_beta2_decay_rate, mf_ada_epsilon); - HeterPs_->set_embedx_sgd(optimizer_config); } } // end namespace framework diff --git a/paddle/fluid/framework/fleet/ps_gpu_wrapper.h b/paddle/fluid/framework/fleet/ps_gpu_wrapper.h index 6b52f6965b221..be8546c366994 100644 --- a/paddle/fluid/framework/fleet/ps_gpu_wrapper.h +++ b/paddle/fluid/framework/fleet/ps_gpu_wrapper.h @@ -319,8 +319,8 @@ class PSGPUWrapper { config["embedx_dim"] = sparse_table_accessor.embedx_dim(); config["nonclk_coeff"] = sparse_table_accessor_parameter.nonclk_coeff(); config["clk_coeff"] = sparse_table_accessor_parameter.click_coeff(); - - + config["mf_create_thresholds"] = sparse_table_accessor.embedx_threshold(); + if (accessor_class == "CtrDymfAccessor") { // optimizer config for embed_w and embedx add_sparse_optimizer(config, sparse_table_accessor.embed_sgd_param()); @@ -348,13 +348,13 @@ class PSGPUWrapper { ? 10.0 : config["max_bound"]; float learning_rate = (config.find("learning_rate") == config.end()) - ? 1.0 + ? 0.05 : config["learning_rate"]; float initial_g2sum = (config.find("initial_g2sum") == config.end()) - ? 1.0 + ? 3.0 : config["initial_g2sum"]; float initial_range = (config.find("initial_range") == config.end()) - ? 1.0 + ? 1e-4 : config["initial_range"]; float beta1_decay_rate = (config.find("beta1_decay_rate") == config.end()) ? 0.9 @@ -371,19 +371,19 @@ class PSGPUWrapper { ? static_cast(1.0) : config["mf_create_thresholds"]; float mf_learning_rate = (config.find("mf_learning_rate") == config.end()) - ? 1.0 + ? 0.05 : config["mf_learning_rate"]; float mf_initial_g2sum = (config.find("mf_initial_g2sum") == config.end()) - ? 1.0 + ? 3.0 : config["mf_initial_g2sum"]; float mf_initial_range = (config.find("mf_initial_range") == config.end()) - ? 1.0 + ? 1e-4 : config["mf_initial_range"]; float mf_min_bound = (config.find("mf_min_bound") == config.end()) - ? 1.0 + ? -10.0 : config["mf_min_bound"]; float mf_max_bound = (config.find("mf_max_bound") == config.end()) - ? 1.0 + ? 10.0 : config["mf_max_bound"]; float mf_beta1_decay_rate = (config.find("mf_beta1_decay_rate") == config.end()) ? 0.9 @@ -394,20 +394,14 @@ class PSGPUWrapper { float mf_ada_epsilon = (config.find("mf_ada_epsilon") == config.end()) ? 1e-8 : config["mf_ada_epsilon"]; - for (size_t i = 0; i < heter_devices_.size(); i++) { -#ifdef PADDLE_WITH_CUDA - PADDLE_ENFORCE_GPU_SUCCESS(cudaSetDevice(heter_devices_[i])); -#elif defined(PADDLE_WITH_XPU_KP) - PADDLE_ENFORCE_XPU_SUCCESS(xpu_set_device(heter_devices_[i])); -#endif - this->SetSparseSGD(nonclk_coeff, clk_coeff, min_bound, max_bound, - learning_rate, initial_g2sum, initial_range, - beta1_decay_rate, beta2_decay_rate, ada_epsilon); - this->SetEmbedxSGD(mf_create_thresholds, mf_learning_rate, - mf_initial_g2sum, mf_initial_range, mf_min_bound, - mf_max_bound, mf_beta1_decay_rate, mf_beta2_decay_rate, - mf_ada_epsilon); - } + + this->SetSparseSGD(nonclk_coeff, clk_coeff, min_bound, max_bound, + learning_rate, initial_g2sum, initial_range, + beta1_decay_rate, beta2_decay_rate, ada_epsilon); + this->SetEmbedxSGD(mf_create_thresholds, mf_learning_rate, + mf_initial_g2sum, mf_initial_range, mf_min_bound, + mf_max_bound, mf_beta1_decay_rate, mf_beta2_decay_rate, + mf_ada_epsilon); // set optimizer type(naive,adagrad,std_adagrad,adam,share_adam) optimizer_type_ = (config.find("optimizer_type") == config.end()) @@ -630,7 +624,7 @@ class PSGPUWrapper { bool running_ = false; std::vector> pull_thread_pool_; std::vector> hbm_thread_pool_; - + OptimizerConfig optimizer_config_; protected: static bool is_initialized_; }; diff --git a/paddle/fluid/framework/fleet/ps_gpu_wrapper.kps b/paddle/fluid/framework/fleet/ps_gpu_wrapper.kps index 3503ce72c9eee..f1084dc4d758b 100644 --- a/paddle/fluid/framework/fleet/ps_gpu_wrapper.kps +++ b/paddle/fluid/framework/fleet/ps_gpu_wrapper.kps @@ -258,31 +258,6 @@ void PSGPUWrapper::CopyForPush(const paddle::platform::Place& place, xpu_wait(stream); } -void PSGPUWrapper::SetSparseSGD(float nonclk_coeff, float clk_coeff, - float min_bound, float max_bound, - float learning_rate, float initial_g2sum, - float initial_range, float beta1_decay_rate, - float beta2_decay_rate, float ada_epsilon) { - OptimizerConfig optimizer_config; - optimizer_config.set_sparse_sgd(nonclk_coeff, clk_coeff, min_bound, max_bound, - learning_rate, initial_g2sum, initial_range, - beta1_decay_rate, beta2_decay_rate, ada_epsilon); - HeterPs_->set_sparse_sgd(optimizer_config); -} - -void PSGPUWrapper::SetEmbedxSGD(float mf_create_thresholds, - float mf_learning_rate, float mf_initial_g2sum, - float mf_initial_range, float mf_min_bound, - float mf_max_bound, float mf_beta1_decay_rate, - float mf_beta2_decay_rate, float mf_ada_epsilon) { - OptimizerConfig optimizer_config; - optimizer_config.set_embedx_sgd(mf_create_thresholds, mf_learning_rate, - mf_initial_g2sum, mf_initial_range, - mf_min_bound, mf_max_bound,mf_beta1_decay_rate, - mf_beta2_decay_rate, mf_ada_epsilon); - HeterPs_->set_embedx_sgd(optimizer_config); -} - } // end namespace framework } // end namespace paddle #endif